bloby-bot 0.70.13 → 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +223 -45
- package/dist-bloby/assets/{bloby-CU9KhQdP.js → bloby-es6cZJzs.js} +6 -6
- package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-D0Tm_wgU.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
- package/dist-bloby/assets/{onboard-GfjHF9nm.js → onboard-BKgy17OU.js} +1 -1
- package/dist-bloby/bloby.html +3 -3
- package/dist-bloby/onboard.html +3 -3
- package/package.json +2 -3
- package/scripts/install +141 -34
- package/scripts/install.ps1 +111 -15
- package/scripts/install.sh +141 -34
- package/shared/config.ts +37 -2
- package/supervisor/channels/manager.ts +68 -33
- package/supervisor/channels/telegram.ts +57 -16
- package/supervisor/channels/types.ts +4 -1
- package/supervisor/channels/whatsapp.ts +57 -10
- package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
- package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
- package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
- package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
- package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
- package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
- package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
- package/supervisor/chat/src/hooks/useChat.ts +52 -0
- package/supervisor/chat/src/lib/authedFile.ts +24 -12
- package/supervisor/file-saver.ts +92 -19
- package/supervisor/harnesses/attachment-policy.ts +111 -0
- package/supervisor/harnesses/claude.ts +62 -15
- package/supervisor/harnesses/codex.ts +69 -43
- package/supervisor/harnesses/pi/index.ts +84 -49
- package/supervisor/harnesses/pi/providers/humanize-error.ts +25 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +8 -0
- package/supervisor/harnesses/pi/providers/stream-google.ts +5 -0
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +15 -6
- package/supervisor/harnesses/pi/providers/types.ts +18 -1
- package/supervisor/harnesses/pi/session.ts +28 -1
- package/supervisor/index.ts +57 -16
- package/supervisor/widget.js +19 -5
- package/worker/db.ts +2 -0
- package/dist-bloby/assets/globals-DlPtwiZL.css +0 -2
- package/dist-bloby/assets/mermaid-GHXKKRXX-B95J3s3s.js +0 -1
- package/supervisor/public/headphones_spritesheet.webp +0 -0
- package/supervisor/public/spritesheet.webp +0 -0
- /package/dist-bloby/assets/{globals-mGpojCOe.js → globals-DN3F0CQE.js} +0 -0
|
@@ -21,6 +21,15 @@ import { assembleSystemPrompt } from '../../worker/prompts/prompt-assembler.js';
|
|
|
21
21
|
import { buildAgents } from '../agents/index.js';
|
|
22
22
|
import { preWarm, claimWarmup, discardWarmup } from '../cli-warmup.js';
|
|
23
23
|
import { mirrorSkillsInto } from './skills.js';
|
|
24
|
+
import {
|
|
25
|
+
routeAttachment,
|
|
26
|
+
normalizeImageMediaType,
|
|
27
|
+
approxBase64Bytes,
|
|
28
|
+
buildSavedFilesNote,
|
|
29
|
+
INLINE_TEXT_PER_FILE_CHARS,
|
|
30
|
+
INLINE_TEXT_TOTAL_CHARS,
|
|
31
|
+
MAX_INLINE_IMAGE_BYTES,
|
|
32
|
+
} from './attachment-policy.js';
|
|
24
33
|
|
|
25
34
|
// ── Types ──────────────────────────────────────────────────────────────────
|
|
26
35
|
|
|
@@ -157,30 +166,68 @@ function loadMcpServers(): Record<string, any> | undefined {
|
|
|
157
166
|
return undefined;
|
|
158
167
|
}
|
|
159
168
|
|
|
160
|
-
/** Build an SDKUserMessage from text + optional attachments
|
|
169
|
+
/** Build an SDKUserMessage from text + optional attachments.
|
|
170
|
+
* Routing is delegated to the shared attachment-policy so all three harnesses
|
|
171
|
+
* ingest identically. The Anthropic Messages API base64 document source accepts
|
|
172
|
+
* ONLY application/pdf — handing it a docx/xlsx/csv/markdown/octet-stream 400s
|
|
173
|
+
* the whole turn — so non-PDF binaries are NOT emitted as provider blocks; they
|
|
174
|
+
* ride on the saved-files disk pointer instead. Blocks stay MEDIA-FIRST, TEXT-last. */
|
|
161
175
|
function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFiles?: SavedFile[]): SDKUserMessage {
|
|
162
176
|
const content: any[] = [];
|
|
163
177
|
|
|
164
178
|
if (attachments?.length) {
|
|
179
|
+
// Running budget so the cross-file inline-text total never exceeds the cap.
|
|
180
|
+
let inlineTextBudget = INLINE_TEXT_TOTAL_CHARS;
|
|
181
|
+
|
|
165
182
|
for (const att of attachments) {
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
183
|
+
// Claude natively renders PDF document blocks (vision over the rendered pages).
|
|
184
|
+
const route = routeAttachment(att, { canNativeDocument: true });
|
|
185
|
+
switch (route) {
|
|
186
|
+
case 'image': {
|
|
187
|
+
// Drop the inline copy when it would bloat every stateless resend — the
|
|
188
|
+
// file is on disk and buildSavedFilesNote points the file tools at it.
|
|
189
|
+
if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
|
|
190
|
+
content.push({
|
|
191
|
+
type: 'image',
|
|
192
|
+
source: { type: 'base64', media_type: normalizeImageMediaType(att.mediaType), data: att.data },
|
|
193
|
+
});
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
case 'native-document': {
|
|
197
|
+
content.push({
|
|
198
|
+
type: 'document',
|
|
199
|
+
source: { type: 'base64', media_type: 'application/pdf', data: att.data },
|
|
200
|
+
});
|
|
201
|
+
break;
|
|
202
|
+
}
|
|
203
|
+
case 'inline-text': {
|
|
204
|
+
if (inlineTextBudget <= 0) break;
|
|
205
|
+
let decoded = '';
|
|
206
|
+
try {
|
|
207
|
+
decoded = Buffer.from(att.data, 'base64').toString('utf-8');
|
|
208
|
+
} catch {
|
|
209
|
+
break; // undecodable → rely on the saved-files note
|
|
210
|
+
}
|
|
211
|
+
const cap = Math.min(INLINE_TEXT_PER_FILE_CHARS, inlineTextBudget);
|
|
212
|
+
const slice = decoded.slice(0, cap);
|
|
213
|
+
inlineTextBudget -= slice.length;
|
|
214
|
+
// text/csv/markdown also 400 as document sources, so inline as a text note.
|
|
215
|
+
content.push({ type: 'text', text: `--- ${att.name} ---\n${slice}` });
|
|
216
|
+
break;
|
|
217
|
+
}
|
|
218
|
+
case 'reference-only':
|
|
219
|
+
default:
|
|
220
|
+
// Binary we can't inline (docx/xlsx/zip/…) or an unexpected route — no
|
|
221
|
+
// provider block; the saved-files note below carries the disk pointer.
|
|
222
|
+
break;
|
|
176
223
|
}
|
|
177
224
|
}
|
|
178
225
|
}
|
|
179
226
|
|
|
180
227
|
let promptText = text || '(attached files)';
|
|
181
228
|
if (savedFiles?.length) {
|
|
182
|
-
const
|
|
183
|
-
promptText += `\n\n
|
|
229
|
+
const note = buildSavedFilesNote(savedFiles);
|
|
230
|
+
if (note) promptText += `\n\n${note}`;
|
|
184
231
|
}
|
|
185
232
|
|
|
186
233
|
content.push({ type: 'text', text: promptText });
|
|
@@ -663,8 +710,8 @@ export async function startBlobyAgentQuery(
|
|
|
663
710
|
|
|
664
711
|
let plainPrompt = prompt;
|
|
665
712
|
if (savedFiles?.length && !attachments?.length) {
|
|
666
|
-
const
|
|
667
|
-
plainPrompt += `\n\n
|
|
713
|
+
const note = buildSavedFilesNote(savedFiles);
|
|
714
|
+
if (note) plainPrompt += `\n\n${note}`;
|
|
668
715
|
}
|
|
669
716
|
|
|
670
717
|
const sdkPrompt: string | AsyncIterable<SDKUserMessage> =
|
|
@@ -44,6 +44,15 @@ import type { SavedFile } from '../file-saver.js';
|
|
|
44
44
|
import { getCodexAccessToken } from '../../worker/codex-auth.js';
|
|
45
45
|
import { assembleSystemPrompt } from '../../worker/prompts/prompt-assembler.js';
|
|
46
46
|
import { mirrorSkillsInto } from './skills.js';
|
|
47
|
+
import {
|
|
48
|
+
routeAttachment,
|
|
49
|
+
normalizeImageMediaType,
|
|
50
|
+
approxBase64Bytes,
|
|
51
|
+
buildSavedFilesNote,
|
|
52
|
+
INLINE_TEXT_PER_FILE_CHARS,
|
|
53
|
+
INLINE_TEXT_TOTAL_CHARS,
|
|
54
|
+
MAX_INLINE_IMAGE_BYTES,
|
|
55
|
+
} from './attachment-policy.js';
|
|
47
56
|
import type { OnAgentMessage, RecentMessage, AgentAttachment, AgentQueryRequest, AgentQueryResult } from './types.js';
|
|
48
57
|
export type { RecentMessage, AgentAttachment };
|
|
49
58
|
|
|
@@ -555,42 +564,24 @@ function emitDone(conv: CodexConversation): void {
|
|
|
555
564
|
|
|
556
565
|
/* ── Input building ────────────────────────────────────────────────────── */
|
|
557
566
|
|
|
558
|
-
/**
|
|
559
|
-
*
|
|
560
|
-
*
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
567
|
+
/**
|
|
568
|
+
* Build codex `UserInput` blocks from the user text + saved files + raw
|
|
569
|
+
* attachments. Routing is delegated to the shared attachment-policy so codex
|
|
570
|
+
* stays byte-for-byte consistent with the Claude/PI harnesses. Codex's UserInput
|
|
571
|
+
* has NO native document type (verified against 0.138), so canNativeDocument is
|
|
572
|
+
* FALSE: PDFs/binaries become a disk-pointer note and the agent opens them with
|
|
573
|
+
* its file tools.
|
|
574
|
+
*
|
|
575
|
+
* Block order is MEDIA-first then TEXT (matching Claude/PI): the inline-text
|
|
576
|
+
* notes and the saved-files pointer are folded into the trailing text block.
|
|
577
|
+
*/
|
|
565
578
|
function buildUserInput(text: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): Array<Record<string, any>> {
|
|
566
579
|
const input: Array<Record<string, any>> = [];
|
|
567
580
|
|
|
568
|
-
let promptText = text || '(attached files)';
|
|
569
|
-
if (savedFiles?.length) {
|
|
570
|
-
const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
|
|
571
|
-
promptText += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
// Inline text-like attachments (size-capped) so the model can answer about
|
|
575
|
-
// their contents immediately instead of shelling out to read them from disk.
|
|
576
|
-
if (attachments?.length) {
|
|
577
|
-
let budget = INLINE_TEXT_TOTAL_BUDGET;
|
|
578
|
-
for (const att of attachments) {
|
|
579
|
-
if (att.type !== 'file' || !INLINE_TEXT_MEDIA.test(att.mediaType || '')) continue;
|
|
580
|
-
const approxBytes = Math.floor((att.data?.length || 0) * 0.75);
|
|
581
|
-
if (approxBytes === 0 || approxBytes > INLINE_TEXT_MAX_BYTES || approxBytes > budget) continue;
|
|
582
|
-
try {
|
|
583
|
-
const decoded = Buffer.from(att.data, 'base64').toString('utf-8');
|
|
584
|
-
budget -= approxBytes;
|
|
585
|
-
promptText += `\n\n[Attached file content: ${att.name}]\n\`\`\`\n${decoded}\n\`\`\``;
|
|
586
|
-
} catch {}
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
input.push({ type: 'text', text: promptText });
|
|
591
|
-
|
|
592
581
|
// Codex understands `localImage` (path on disk) — Bloby's file-saver already
|
|
593
|
-
// wrote attachments to disk, so we
|
|
582
|
+
// wrote image attachments to disk, so we point at the absolute path. Track a
|
|
583
|
+
// per-name COUNT (not presence): WhatsApp multi-image pushes share one
|
|
584
|
+
// attachment name and each saved file covers exactly one of them.
|
|
594
585
|
const savedImageCounts = new Map<string, number>();
|
|
595
586
|
if (savedFiles?.length) {
|
|
596
587
|
for (const f of savedFiles) {
|
|
@@ -601,23 +592,58 @@ function buildUserInput(text: string, savedFiles?: SavedFile[], attachments?: Ag
|
|
|
601
592
|
}
|
|
602
593
|
}
|
|
603
594
|
|
|
604
|
-
//
|
|
605
|
-
//
|
|
606
|
-
//
|
|
607
|
-
//
|
|
608
|
-
|
|
595
|
+
// Route every attachment through the shared policy. Inline-text notes are
|
|
596
|
+
// accumulated into `inlineNotes` (appended to the trailing text block);
|
|
597
|
+
// images become data-URL blocks (with the localImage path already covering
|
|
598
|
+
// disk-saved copies); everything else falls back to the saved-files pointer.
|
|
599
|
+
let promptText = text || '(attached files)';
|
|
600
|
+
const inlineNotes: string[] = [];
|
|
601
|
+
let inlineBudget = INLINE_TEXT_TOTAL_CHARS;
|
|
609
602
|
if (attachments?.length) {
|
|
610
603
|
for (const att of attachments) {
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
604
|
+
switch (routeAttachment(att, { canNativeDocument: false })) {
|
|
605
|
+
case 'image': {
|
|
606
|
+
if (!att.data) break;
|
|
607
|
+
// Skip data-URL inlining when a disk copy exists (localImage already
|
|
608
|
+
// points codex at it) or when the payload is too big to resend on
|
|
609
|
+
// every stateless turn — the saved-files pointer covers it instead.
|
|
610
|
+
const remaining = savedImageCounts.get(att.name) || 0;
|
|
611
|
+
if (remaining > 0) {
|
|
612
|
+
savedImageCounts.set(att.name, remaining - 1);
|
|
613
|
+
break;
|
|
614
|
+
}
|
|
615
|
+
if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
|
|
616
|
+
const mediaType = normalizeImageMediaType(att.mediaType);
|
|
617
|
+
input.push({ type: 'image', url: `data:${mediaType};base64,${att.data}` });
|
|
618
|
+
break;
|
|
619
|
+
}
|
|
620
|
+
case 'inline-text': {
|
|
621
|
+
if (!att.data) break;
|
|
622
|
+
try {
|
|
623
|
+
let decoded = Buffer.from(att.data, 'base64').toString('utf-8');
|
|
624
|
+
if (decoded.length > INLINE_TEXT_PER_FILE_CHARS) decoded = decoded.slice(0, INLINE_TEXT_PER_FILE_CHARS);
|
|
625
|
+
if (decoded.length > inlineBudget) decoded = decoded.slice(0, inlineBudget);
|
|
626
|
+
if (!decoded.length) break;
|
|
627
|
+
inlineBudget -= decoded.length;
|
|
628
|
+
inlineNotes.push(`\n\n[Attached file content: ${att.name}]\n\`\`\`\n${decoded}\n\`\`\``);
|
|
629
|
+
} catch {}
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
// 'native-document' cannot occur (canNativeDocument:false); it and
|
|
633
|
+
// 'reference-only' both rely on the saved-files disk pointer below.
|
|
634
|
+
default:
|
|
635
|
+
break;
|
|
616
636
|
}
|
|
617
|
-
input.push({ type: 'image', url: `data:${att.mediaType};base64,${att.data}` });
|
|
618
637
|
}
|
|
619
638
|
}
|
|
620
639
|
|
|
640
|
+
for (const note of inlineNotes) promptText += note;
|
|
641
|
+
|
|
642
|
+
const savedNote = buildSavedFilesNote(savedFiles || []);
|
|
643
|
+
if (savedNote) promptText += `\n\n${savedNote}`;
|
|
644
|
+
|
|
645
|
+
input.push({ type: 'text', text: promptText });
|
|
646
|
+
|
|
621
647
|
return input;
|
|
622
648
|
}
|
|
623
649
|
|
|
@@ -29,11 +29,20 @@ import type {
|
|
|
29
29
|
export type { RecentMessage, AgentAttachment };
|
|
30
30
|
|
|
31
31
|
import { buildSkillsIndex } from '../skills.js';
|
|
32
|
+
import {
|
|
33
|
+
routeAttachment,
|
|
34
|
+
buildSavedFilesNote,
|
|
35
|
+
normalizeImageMediaType,
|
|
36
|
+
approxBase64Bytes,
|
|
37
|
+
MAX_INLINE_IMAGE_BYTES,
|
|
38
|
+
INLINE_TEXT_PER_FILE_CHARS,
|
|
39
|
+
INLINE_TEXT_TOTAL_CHARS,
|
|
40
|
+
} from '../attachment-policy.js';
|
|
32
41
|
import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
|
|
33
42
|
import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
|
|
34
|
-
import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
|
|
43
|
+
import { getPiSubProvider, getCatalogModel, type PiApiFlavor } from './sub-providers.js';
|
|
35
44
|
import { readPiAuth } from './auth-storage.js';
|
|
36
|
-
import type { PiMessage } from './providers/types.js';
|
|
45
|
+
import type { PiMessage, PiContentBlock } from './providers/types.js';
|
|
37
46
|
import { toolDefsForProvider } from './tools/registry.js';
|
|
38
47
|
import type { PiTaskHost } from './tools/types.js';
|
|
39
48
|
|
|
@@ -495,62 +504,82 @@ function recentToPiMessages(messages: RecentMessage[] | undefined): PiMessage[]
|
|
|
495
504
|
}));
|
|
496
505
|
}
|
|
497
506
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
mt.includes('json') ||
|
|
506
|
-
mt.includes('xml') ||
|
|
507
|
-
mt.includes('yaml') ||
|
|
508
|
-
mt.includes('csv') ||
|
|
509
|
-
mt.includes('javascript') ||
|
|
510
|
-
mt.includes('typescript') ||
|
|
511
|
-
mt === 'application/x-sh'
|
|
512
|
-
);
|
|
507
|
+
/** Native PDF document blocks reach only the flavors that render them — the
|
|
508
|
+
* Anthropic Messages API and Gemini both ingest application/pdf inline
|
|
509
|
+
* (base64 document source / inlineData). openai-completions has no document
|
|
510
|
+
* type, so a PDF there falls back to the saved-files disk pointer. Matches the
|
|
511
|
+
* shared attachment-policy routing rule. */
|
|
512
|
+
function canNativeDocumentForFlavor(flavor: PiApiFlavor): boolean {
|
|
513
|
+
return flavor === 'anthropic-messages' || flavor === 'google-gemini';
|
|
513
514
|
}
|
|
514
515
|
|
|
515
|
-
/**
|
|
516
|
-
|
|
517
|
-
|
|
516
|
+
/** Build a PiContentBlock[] from raw text + attachments, MEDIA-FIRST then the
|
|
517
|
+
* prompt text last (parity with claude.ts and the other pi providers). Routing
|
|
518
|
+
* is delegated to the shared attachment-policy so all three harnesses ingest
|
|
519
|
+
* identically; canNativeDocument is the active provider's PDF capability. */
|
|
520
|
+
function buildAttachmentBlocks(
|
|
521
|
+
text: string,
|
|
522
|
+
canNativeDocument: boolean,
|
|
523
|
+
attachments?: AgentAttachment[],
|
|
524
|
+
savedFiles?: SavedFile[],
|
|
525
|
+
): PiContentBlock[] {
|
|
526
|
+
const content: PiContentBlock[] = [];
|
|
518
527
|
if (attachments?.length) {
|
|
528
|
+
// Running budget so the cross-file inline-text total never exceeds the cap.
|
|
529
|
+
let inlineTextBudget = INLINE_TEXT_TOTAL_CHARS;
|
|
519
530
|
for (const att of attachments) {
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
type: 'text',
|
|
542
|
-
|
|
543
|
-
}
|
|
531
|
+
switch (routeAttachment(att, { canNativeDocument })) {
|
|
532
|
+
case 'image': {
|
|
533
|
+
// Drop the inline copy when it would bloat every stateless resend —
|
|
534
|
+
// the file is on disk and buildSavedFilesNote points the tools at it.
|
|
535
|
+
if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
|
|
536
|
+
content.push({ type: 'image', mediaType: normalizeImageMediaType(att.mediaType), data: att.data });
|
|
537
|
+
break;
|
|
538
|
+
}
|
|
539
|
+
case 'native-document': {
|
|
540
|
+
// PDF on a flavor that renders it natively (anthropic / gemini).
|
|
541
|
+
content.push({ type: 'document', mediaType: 'application/pdf', data: att.data, name: att.name });
|
|
542
|
+
break;
|
|
543
|
+
}
|
|
544
|
+
case 'inline-text': {
|
|
545
|
+
if (inlineTextBudget <= 0) break;
|
|
546
|
+
let decoded = '';
|
|
547
|
+
try { decoded = Buffer.from(att.data, 'base64').toString('utf-8'); }
|
|
548
|
+
catch { break; } // undecodable → rely on the saved-files note
|
|
549
|
+
const cap = Math.min(INLINE_TEXT_PER_FILE_CHARS, inlineTextBudget);
|
|
550
|
+
const slice = decoded.slice(0, cap);
|
|
551
|
+
inlineTextBudget -= slice.length;
|
|
552
|
+
content.push({ type: 'text', text: `--- ${att.name} ---\n${slice}` });
|
|
553
|
+
break;
|
|
554
|
+
}
|
|
555
|
+
case 'reference-only':
|
|
556
|
+
default:
|
|
557
|
+
// Binary we can't inline (docx/xlsx/zip/…), a PDF on a flavor without
|
|
558
|
+
// native documents, or an unexpected route — no provider block; the
|
|
559
|
+
// saved-files note below carries the disk pointer. Never emit a
|
|
560
|
+
// malformed block (defensive default, review PI-E).
|
|
561
|
+
break;
|
|
544
562
|
}
|
|
545
563
|
}
|
|
546
564
|
}
|
|
565
|
+
|
|
547
566
|
let prompt = text || '(attached files)';
|
|
548
567
|
if (savedFiles?.length) {
|
|
549
|
-
const
|
|
550
|
-
prompt += `\n\n
|
|
568
|
+
const note = buildSavedFilesNote(savedFiles);
|
|
569
|
+
if (note) prompt += `\n\n${note}`;
|
|
551
570
|
}
|
|
552
571
|
content.push({ type: 'text', text: prompt });
|
|
553
|
-
return
|
|
572
|
+
return content;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
/** Wrap a raw user input into a PiMessage with text + optional media blocks. */
|
|
576
|
+
function buildUserMessage(
|
|
577
|
+
text: string,
|
|
578
|
+
canNativeDocument: boolean,
|
|
579
|
+
attachments?: AgentAttachment[],
|
|
580
|
+
savedFiles?: SavedFile[],
|
|
581
|
+
): PiMessage {
|
|
582
|
+
return { role: 'user', content: buildAttachmentBlocks(text, canNativeDocument, attachments, savedFiles) };
|
|
554
583
|
}
|
|
555
584
|
|
|
556
585
|
// ── Live Conversation API ──────────────────────────────────────────────────
|
|
@@ -771,7 +800,13 @@ export function pushMessage(
|
|
|
771
800
|
conv.busy = true;
|
|
772
801
|
conv.pendingCount += 1;
|
|
773
802
|
conv.turnOrigins.push('user');
|
|
774
|
-
|
|
803
|
+
// Resolve the active flavor at push time (the session re-resolves auth every
|
|
804
|
+
// round, so a wizard provider switch mid-session is honored). Unreadable auth
|
|
805
|
+
// ⇒ no native documents — the conservative route sends a PDF to the disk
|
|
806
|
+
// pointer rather than emitting a block the provider can't render.
|
|
807
|
+
const resolved = resolveAuth();
|
|
808
|
+
const canNativeDocument = resolved.ok ? canNativeDocumentForFlavor(resolved.auth.flavor) : false;
|
|
809
|
+
conv.inputQueue.push(buildUserMessage(content, canNativeDocument, attachments, savedFiles));
|
|
775
810
|
conv.onMessage('bot:typing', { conversationId });
|
|
776
811
|
return true;
|
|
777
812
|
}
|
|
@@ -981,7 +1016,7 @@ export async function startBlobyAgentQuery(
|
|
|
981
1016
|
});
|
|
982
1017
|
|
|
983
1018
|
const queue = createAsyncQueue<PiMessage>();
|
|
984
|
-
queue.push(buildUserMessage(prompt, attachments, savedFiles));
|
|
1019
|
+
queue.push(buildUserMessage(prompt, canNativeDocumentForFlavor(resolved.auth.flavor), attachments, savedFiles));
|
|
985
1020
|
queue.end();
|
|
986
1021
|
await session.run(queue);
|
|
987
1022
|
|
|
@@ -48,6 +48,18 @@ const AUTH_RE =
|
|
|
48
48
|
const BILLING_RE =
|
|
49
49
|
/insufficient_quota|credit balance is too low|payment required|purchase more credits/i;
|
|
50
50
|
|
|
51
|
+
// A text-only model rejecting an attached image. Vendors phrase it many ways:
|
|
52
|
+
// OpenAI "Invalid content type. image_url is only supported by certain models",
|
|
53
|
+
// OpenRouter "No endpoints found that support image input", others mention
|
|
54
|
+
// "image input" / "does not support images" / "unsupported content type".
|
|
55
|
+
// Only EXPLICIT image-naming phrases — the bare tokens "vision"/"multimodal"/
|
|
56
|
+
// "modality" were removed because the provider body routinely echoes the model id
|
|
57
|
+
// (e.g. "gpt-4-vision-preview", "llama-3.2-90b-vision-instruct"), which would
|
|
58
|
+
// mis-classify an unrelated 400 from a vision-capable model and wrongly disable
|
|
59
|
+
// vision for the rest of the session. Paired with a 400/415/422 status below.
|
|
60
|
+
const IMAGE_UNSUPPORTED_RE =
|
|
61
|
+
/image[_ ]?url|image input|images?(?: are| is)? not supported|does not support images?|no endpoints? .*support image|unsupported content type/i;
|
|
62
|
+
|
|
51
63
|
export function classifyPiError(
|
|
52
64
|
providerLabel: string,
|
|
53
65
|
status: number | undefined,
|
|
@@ -85,6 +97,19 @@ export function classifyPiError(
|
|
|
85
97
|
message: `${providerLabel} rejected your API key. Update it from the dashboard (Bloby provider settings).${suffix}`,
|
|
86
98
|
};
|
|
87
99
|
}
|
|
100
|
+
// A text-only model that the catalog couldn't flag up front (dynamic/unknown
|
|
101
|
+
// sub-providers) 400/415/422s on the attached image. The session reacts by
|
|
102
|
+
// disabling vision for the rest of the session and re-running the round with
|
|
103
|
+
// images downgraded — self-healing so a single screenshot can't permanently
|
|
104
|
+
// 400-poison the conversation (it rides every stateless resend otherwise).
|
|
105
|
+
if ((status === 400 || status === 415 || status === 422) && IMAGE_UNSUPPORTED_RE.test(body)) {
|
|
106
|
+
return {
|
|
107
|
+
kind: 'image-unsupported',
|
|
108
|
+
retryable: false,
|
|
109
|
+
status,
|
|
110
|
+
message: `${providerLabel} rejected the attached image — this model appears to be text-only. Retrying without the image; switch to a vision-capable model to send images.${suffix}`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
88
113
|
if (status === 429) {
|
|
89
114
|
return {
|
|
90
115
|
kind: 'rate-limit',
|
|
@@ -89,6 +89,14 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
|
|
|
89
89
|
type: 'image',
|
|
90
90
|
source: { type: 'base64', media_type: b.mediaType, data: b.data },
|
|
91
91
|
});
|
|
92
|
+
} else if (b.type === 'document') {
|
|
93
|
+
// Native PDF document block — the Messages API renders the pages and the
|
|
94
|
+
// model reads them as vision. The base64 document source accepts ONLY
|
|
95
|
+
// application/pdf (buildUserMessage gates it on canNativeDocument).
|
|
96
|
+
out.push({
|
|
97
|
+
type: 'document',
|
|
98
|
+
source: { type: 'base64', media_type: b.mediaType, data: b.data },
|
|
99
|
+
});
|
|
92
100
|
} else if (b.type === 'tool_use') {
|
|
93
101
|
out.push({
|
|
94
102
|
type: 'tool_use',
|
|
@@ -102,6 +102,11 @@ function toGeminiParts(content: PiContentBlock[]): any[] {
|
|
|
102
102
|
parts.push({ text: b.text });
|
|
103
103
|
} else if (b.type === 'image') {
|
|
104
104
|
parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
|
|
105
|
+
} else if (b.type === 'document') {
|
|
106
|
+
// Gemini ingests application/pdf inline via the same inlineData shape as
|
|
107
|
+
// images (it OCRs/renders the document). buildUserMessage only routes a
|
|
108
|
+
// document block here when the flavor supports it.
|
|
109
|
+
parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
|
|
105
110
|
} else if (b.type === 'tool_use') {
|
|
106
111
|
// Assistant turn: the model asked to invoke a tool. Thinking-capable
|
|
107
112
|
// Gemini 3.x rejects (HTTP 400) any echoed functionCall whose
|
|
@@ -119,24 +119,33 @@ function toOpenAIMessages(pi: PiMessage[]): any[] {
|
|
|
119
119
|
out.push(msg);
|
|
120
120
|
continue;
|
|
121
121
|
}
|
|
122
|
-
// role === 'user' with non-tool-result content (text + optional images)
|
|
122
|
+
// role === 'user' with non-tool-result content (text + optional images).
|
|
123
|
+
// Media parts go first; text is appended last (parity with the other
|
|
124
|
+
// providers and pi/index's media-first block ordering).
|
|
123
125
|
const contentBlocks: any[] = [];
|
|
124
126
|
let plainText = '';
|
|
125
|
-
let
|
|
127
|
+
let hasMedia = false;
|
|
126
128
|
for (const b of m.content) {
|
|
127
129
|
if (b.type === 'text') {
|
|
128
130
|
plainText += (plainText ? '\n' : '') + b.text;
|
|
129
131
|
} else if (b.type === 'image') {
|
|
130
|
-
|
|
132
|
+
hasMedia = true;
|
|
131
133
|
contentBlocks.push({
|
|
132
134
|
type: 'image_url',
|
|
133
135
|
image_url: { url: `data:${b.mediaType};base64,${b.data}` },
|
|
134
136
|
});
|
|
137
|
+
} else if (b.type === 'document') {
|
|
138
|
+
// The Chat Completions schema has no document part — degrade to a text
|
|
139
|
+
// note rather than crashing. The file is also on disk (saved-files
|
|
140
|
+
// note), so the agent can open it with its tools. This shouldn't
|
|
141
|
+
// normally happen: buildUserMessage gates documents on canNativeDocument
|
|
142
|
+
// (false for this flavor), so a PDF here rides as the disk pointer.
|
|
143
|
+
plainText += (plainText ? '\n' : '') +
|
|
144
|
+
`[Attached document${b.name ? ` "${b.name}"` : ''} (${b.mediaType}) could not be inlined for this model — it is saved to disk; open it with your file tools.]`;
|
|
135
145
|
}
|
|
136
146
|
}
|
|
137
|
-
if (
|
|
138
|
-
|
|
139
|
-
if (plainText) contentBlocks.unshift({ type: 'text', text: plainText });
|
|
147
|
+
if (hasMedia) {
|
|
148
|
+
if (plainText) contentBlocks.push({ type: 'text', text: plainText });
|
|
140
149
|
out.push({ role: 'user', content: contentBlocks });
|
|
141
150
|
} else {
|
|
142
151
|
out.push({ role: 'user', content: plainText });
|
|
@@ -17,6 +17,11 @@ export type PiRole = 'user' | 'assistant' | 'tool';
|
|
|
17
17
|
export type PiContentBlock =
|
|
18
18
|
| { type: 'text'; text: string }
|
|
19
19
|
| { type: 'image'; mediaType: string; data: string } // base64
|
|
20
|
+
// Native document block (PDF). Only the flavors with native document support
|
|
21
|
+
// (anthropic-messages, google-gemini) ever receive one — buildUserMessage
|
|
22
|
+
// gates it on canNativeDocument; openai-completions degrades it to a text
|
|
23
|
+
// note rather than crashing if one ever reaches it.
|
|
24
|
+
| { type: 'document'; mediaType: string; data: string; name?: string } // base64
|
|
20
25
|
// `thoughtSignature` is a Gemini 3.x thinking-model field. Pi-flavored
|
|
21
26
|
// providers that emit reasoning attach it to function-call parts; the API
|
|
22
27
|
// rejects the next turn with HTTP 400 if we don't echo it back verbatim.
|
|
@@ -74,7 +79,19 @@ export type PiStopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | 'a
|
|
|
74
79
|
* string-matching: retry transient rounds, tear down on auth/overflow, and
|
|
75
80
|
* show actionable messages instead of raw provider JSON.
|
|
76
81
|
*/
|
|
77
|
-
export type PiErrorKind =
|
|
82
|
+
export type PiErrorKind =
|
|
83
|
+
| 'auth'
|
|
84
|
+
| 'context-overflow'
|
|
85
|
+
| 'rate-limit'
|
|
86
|
+
| 'billing'
|
|
87
|
+
| 'transient'
|
|
88
|
+
/** The model rejected an image/vision/modality block (a text-only model 400/
|
|
89
|
+
* 415/422s on the attached image). The session reacts by disabling vision
|
|
90
|
+
* for the rest of the session and re-running the round with images
|
|
91
|
+
* downgraded to placeholders — self-healing for dynamic/unknown models whose
|
|
92
|
+
* catalog can't tell us up front whether they see images. */
|
|
93
|
+
| 'image-unsupported'
|
|
94
|
+
| 'other';
|
|
78
95
|
|
|
79
96
|
export type PiStreamEvent =
|
|
80
97
|
| { type: 'text_delta'; delta: string }
|
|
@@ -181,6 +181,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
181
181
|
let lastUsage: PiUsage | undefined;
|
|
182
182
|
let lastContextWindow: number | undefined;
|
|
183
183
|
|
|
184
|
+
// Self-healing vision (audit D rank 12): when a model the catalog couldn't
|
|
185
|
+
// classify (dynamic/unknown sub-providers ⇒ supportsImages undefined) rejects
|
|
186
|
+
// an image with an 'image-unsupported' error, latch this for the rest of the
|
|
187
|
+
// session and downgrade images on every subsequent send. The IMAGE stays in
|
|
188
|
+
// history (downgradeImages is transform-on-send only), so switching to a
|
|
189
|
+
// vision-capable model later restores it.
|
|
190
|
+
let visionDisabled = false;
|
|
191
|
+
|
|
184
192
|
/** One stream round — collect the assistant blocks the model emits this pass. */
|
|
185
193
|
interface RoundResult {
|
|
186
194
|
text: string;
|
|
@@ -204,7 +212,11 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
204
212
|
baseUrl: auth.baseUrl,
|
|
205
213
|
apiKey: auth.apiKey,
|
|
206
214
|
systemPrompt: init.systemPrompt,
|
|
207
|
-
|
|
215
|
+
// Downgrade images when the catalog says text-only (supportsImages
|
|
216
|
+
// false) OR a prior round in THIS session learned it the hard way via
|
|
217
|
+
// an 'image-unsupported' error (visionDisabled). The stored history
|
|
218
|
+
// keeps the image so a later vision-capable model still restores it.
|
|
219
|
+
messages: auth.supportsImages === false || visionDisabled ? downgradeImages(messages) : messages,
|
|
208
220
|
tools: init.tools,
|
|
209
221
|
toolChoice: opts?.wrapUp ? 'none' : undefined,
|
|
210
222
|
maxOutputTokens: auth.maxOutputTokens,
|
|
@@ -330,6 +342,21 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
330
342
|
res = await runOneRound(needsSeparator);
|
|
331
343
|
}
|
|
332
344
|
|
|
345
|
+
// Self-healing vision (audit D rank 12): a model the catalog couldn't
|
|
346
|
+
// classify just 400/415/422'd on an attached image. Latch visionDisabled
|
|
347
|
+
// and re-run the round ONCE — runOneRound now downgrades images on send,
|
|
348
|
+
// so the resend succeeds. Guarded by !visionDisabled so it fires at most
|
|
349
|
+
// once per session; an image rides every stateless resend, so without
|
|
350
|
+
// this the whole conversation would keep re-400ing.
|
|
351
|
+
if (
|
|
352
|
+
res.errored && res.errorKind === 'image-unsupported' && !visionDisabled &&
|
|
353
|
+
!init.abortController.signal.aborted
|
|
354
|
+
) {
|
|
355
|
+
log.info('[pi/session] model rejected image — disabling vision for this session and retrying without it');
|
|
356
|
+
visionDisabled = true;
|
|
357
|
+
res = await runOneRound(needsSeparator);
|
|
358
|
+
}
|
|
359
|
+
|
|
333
360
|
const { text, toolUses, errored } = res;
|
|
334
361
|
|
|
335
362
|
// Append whatever the model produced this round to history so subsequent
|