bloby-bot 0.70.12 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/bin/cli.js +234 -48
  2. package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-es6cZJzs.js} +6 -6
  3. package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
  4. package/dist-bloby/assets/{globals-B3cTbITX.js → globals-DN3F0CQE.js} +1 -1
  5. package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
  6. package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
  7. package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-BKgy17OU.js} +1 -1
  8. package/dist-bloby/bloby.html +3 -3
  9. package/dist-bloby/onboard.html +3 -3
  10. package/package.json +3 -4
  11. package/scripts/install +156 -41
  12. package/scripts/install.ps1 +146 -29
  13. package/scripts/install.sh +156 -41
  14. package/shared/config.ts +37 -2
  15. package/shared/relay.ts +3 -1
  16. package/supervisor/channels/manager.ts +84 -44
  17. package/supervisor/channels/telegram.ts +57 -16
  18. package/supervisor/channels/types.ts +4 -1
  19. package/supervisor/channels/whatsapp.ts +57 -10
  20. package/supervisor/chat/OnboardWizard.tsx +0 -15
  21. package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
  22. package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
  23. package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
  24. package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
  25. package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
  26. package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
  27. package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
  28. package/supervisor/chat/src/hooks/useChat.ts +52 -0
  29. package/supervisor/chat/src/lib/authedFile.ts +24 -12
  30. package/supervisor/file-saver.ts +92 -19
  31. package/supervisor/harnesses/attachment-policy.ts +111 -0
  32. package/supervisor/harnesses/claude.ts +62 -15
  33. package/supervisor/harnesses/codex.ts +69 -43
  34. package/supervisor/harnesses/pi/index.ts +367 -112
  35. package/supervisor/harnesses/pi/providers/humanize-error.ts +27 -2
  36. package/supervisor/harnesses/pi/providers/retry.ts +31 -0
  37. package/supervisor/harnesses/pi/providers/stream-anthropic.ts +31 -3
  38. package/supervisor/harnesses/pi/providers/stream-google.ts +26 -3
  39. package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +32 -9
  40. package/supervisor/harnesses/pi/providers/types.ts +29 -1
  41. package/supervisor/harnesses/pi/session.ts +143 -3
  42. package/supervisor/harnesses/pi/test-completion.ts +56 -0
  43. package/supervisor/harnesses/pi/tools/bash.ts +198 -22
  44. package/supervisor/harnesses/pi/tools/glob.ts +79 -0
  45. package/supervisor/harnesses/pi/tools/grep.ts +0 -0
  46. package/supervisor/harnesses/pi/tools/registry.ts +18 -6
  47. package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
  48. package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
  49. package/supervisor/index.ts +93 -18
  50. package/supervisor/widget.js +19 -5
  51. package/worker/db.ts +2 -0
  52. package/worker/index.ts +18 -1
  53. package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
  54. package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
  55. package/worker/prompts/bloby-system-prompt.txt +1 -1
  56. package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
  57. package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
  58. package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
  59. package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
  60. package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
  61. package/workspace/skills/mac/SKILL.md +13 -4
  62. package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
  63. package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1
  64. package/supervisor/public/headphones_spritesheet.webp +0 -0
  65. package/supervisor/public/spritesheet.webp +0 -0
@@ -11,6 +11,7 @@ import BlobyTextCard from './BlobyTextCard';
11
11
  import NotchCard from './NotchCard';
12
12
  import MorphyActionCard from './MorphyActionCard';
13
13
  import type { StoredAttachment } from '../../hooks/useChat';
14
+ import type { LightboxImage } from './ImageLightbox';
14
15
 
15
16
  interface Props {
16
17
  role: 'user' | 'assistant';
@@ -19,7 +20,7 @@ interface Props {
19
20
  hasAttachments?: boolean;
20
21
  audioData?: string;
21
22
  attachments?: StoredAttachment[];
22
- onImageClick?: (images: string[], index: number) => void;
23
+ onImageClick?: (images: LightboxImage[], index: number) => void;
23
24
  transcribing?: boolean;
24
25
  }
25
26
 
@@ -261,16 +262,27 @@ export default function MessageBubble({ role, content, timestamp, hasAttachments
261
262
  // Strip channel tag (and [voice] prefix / echoed tags in assistant replies) from BOTH sides
262
263
  const { tag: channelTag, body: displayContent } = parseChannelTag(content);
263
264
 
264
- // Separate image and document attachments
265
- const imageAtts = attachments?.filter((a) => a.mediaType?.startsWith('image/')) || [];
266
- const docAtts = attachments?.filter((a) => !a.mediaType?.startsWith('image/')) || [];
267
-
268
- // Resolve image URLs
269
- const imageUrls = imageAtts
270
- .filter((a) => a.filePath)
271
- .map((a) =>
272
- a.filePath.startsWith('data:') ? a.filePath : `/api/files/${a.filePath}`
273
- );
265
+ // Separate image and document attachments. An attachment counts as an image whenever
266
+ // its mediaType says so, its type is 'image', OR its name/path has an image extension —
267
+ // Mac/channel attachments often arrive with a missing or odd mediaType.
268
+ const isImageAtt = (a: StoredAttachment) =>
269
+ a.mediaType?.startsWith('image/') ||
270
+ a.type === 'image' ||
271
+ /\.(png|jpe?g|gif|webp|avif|bmp|svg|heic|heif)$/i.test(a.name || a.filePath || '');
272
+ const imageAtts = attachments?.filter(isImageAtt) || [];
273
+ const docAtts = attachments?.filter((a) => !isImageAtt(a)) || [];
274
+
275
+ // Resolve image URLs (keeping them aligned 1:1 with imageAtts so names thread through)
276
+ const imageAttsWithUrl = imageAtts.filter((a) => a.filePath);
277
+ const imageUrls = imageAttsWithUrl.map((a) =>
278
+ a.filePath.startsWith('data:') ? a.filePath : `/api/files/${a.filePath}`
279
+ );
280
+ // Lightbox data model: thread the human filename alongside each URL so downloads/alt
281
+ // text use the real name rather than a data:-URL or random stamp.
282
+ const imageItems: LightboxImage[] = imageUrls.map((url, i) => ({
283
+ url,
284
+ name: imageAttsWithUrl[i]?.name,
285
+ }));
274
286
 
275
287
  if (isUser) {
276
288
  return (
@@ -285,19 +297,26 @@ export default function MessageBubble({ role, content, timestamp, hasAttachments
285
297
  <AuthedImage
286
298
  key={i}
287
299
  src={url}
288
- alt={imageAtts[i]?.name || 'attachment'}
300
+ alt={imageAttsWithUrl[i]?.name || 'attachment'}
289
301
  className="w-28 h-28 rounded-lg object-cover cursor-pointer border border-white/10 hover:opacity-80 transition-opacity"
290
- onClick={() => onImageClick?.(imageUrls, i)}
302
+ onClick={() => onImageClick?.(imageItems, i)}
291
303
  />
292
304
  ))}
293
305
  </div>
294
306
  )}
295
- {/* Document attachments */}
307
+ {/* Document attachments — one row per file with paperclip + filename */}
296
308
  {docAtts.length > 0 && (
297
- <span className="inline-flex items-center gap-1 text-primary-foreground/60 mr-1.5">
298
- <Paperclip className="h-3 w-3" />
299
- {docAtts.length > 1 && <span className="text-xs">{docAtts.length}</span>}
300
- </span>
309
+ <div className="flex flex-col gap-1 mb-2">
310
+ {docAtts.map((a, i) => {
311
+ const label = a.name || a.filePath?.split('/').pop() || 'file';
312
+ return (
313
+ <span key={i} className="inline-flex items-center gap-1 text-primary-foreground/80 text-xs" title={label}>
314
+ <Paperclip className="h-3 w-3 shrink-0" />
315
+ <span className="truncate max-w-[14rem]">{label}</span>
316
+ </span>
317
+ );
318
+ })}
319
+ </div>
301
320
  )}
302
321
  {/* Fallback paperclip for legacy messages with no parsed attachments */}
303
322
  {!attachments?.length && hasAttachments && (
@@ -2,7 +2,7 @@ import { useCallback, useEffect, useRef, useState } from 'react';
2
2
  import type { ChatMessage, ToolActivity } from '../../hooks/useChat';
3
3
  import MessageBubble from './MessageBubble';
4
4
  import TypingIndicator from './TypingIndicator';
5
- import ImageLightbox from './ImageLightbox';
5
+ import ImageLightbox, { type LightboxImage } from './ImageLightbox';
6
6
 
7
7
  interface Props {
8
8
  messages: ChatMessage[];
@@ -19,9 +19,9 @@ export default function MessageList({ messages, streaming, streamBuffer, tools,
19
19
  const sentinelRef = useRef<HTMLDivElement>(null);
20
20
  const isInitialLoad = useRef(true);
21
21
  const [loadingOlder, setLoadingOlder] = useState(false);
22
- const [lightbox, setLightbox] = useState<{ images: string[]; index: number } | null>(null);
22
+ const [lightbox, setLightbox] = useState<{ images: LightboxImage[]; index: number } | null>(null);
23
23
 
24
- const handleImageClick = useCallback((images: string[], index: number) => {
24
+ const handleImageClick = useCallback((images: LightboxImage[], index: number) => {
25
25
  setLightbox({ images, index });
26
26
  }, []);
27
27
 
@@ -39,9 +39,14 @@ export function useChat(ws: WsClient | null) {
39
39
  const [streamBuffer, setStreamBuffer] = useState('');
40
40
  const [tools, setTools] = useState<ToolActivity[]>([]);
41
41
  const loaded = useRef(false);
42
+ /** Ref to current conversationId (avoids stale closures in the once-registered WS callbacks) */
43
+ const conversationIdRef = useRef<string | null>(null);
42
44
  /** Ref to current streamBuffer (avoids stale closures in callbacks) */
43
45
  const streamBufferRef = useRef('');
44
46
 
47
+ // Keep conversationIdRef in sync with state
48
+ useEffect(() => { conversationIdRef.current = conversationId; }, [conversationId]);
49
+
45
50
  // Load current conversation from DB on mount
46
51
  useEffect(() => {
47
52
  if (loaded.current) return;
@@ -185,6 +190,53 @@ export function useChat(ws: WsClient | null) {
185
190
  },
186
191
  ]);
187
192
  }),
193
+ // Cross-device / channel sync: append a message broadcast by the server.
194
+ // Covers channel-inbound (WhatsApp/Telegram), peer-client, scheduler, and
195
+ // workspace messages — none of which produce an optimistic bubble here, and
196
+ // the server skips the sender, so a plain append is correct (no de-dup needed).
197
+ ws.on('chat:sync', (data: { conversationId: string; message: { role: string; content: string; timestamp?: string; attachments?: StoredAttachment[]; audio_data?: string } }) => {
198
+ if (data.conversationId !== conversationIdRef.current) return;
199
+
200
+ // Resolve audioData the same way the DB loader does (usually absent on sync)
201
+ let audioData: string | undefined;
202
+ const raw = data.message.audio_data;
203
+ if (raw) {
204
+ if (raw.startsWith('data:')) {
205
+ audioData = raw;
206
+ } else if (raw.includes('/')) {
207
+ audioData = `/api/files/${raw}`;
208
+ } else {
209
+ audioData = `data:audio/webm;base64,${raw}`;
210
+ }
211
+ }
212
+
213
+ setMessages((msgs) => [
214
+ ...msgs,
215
+ {
216
+ id: Date.now().toString(),
217
+ role: data.message.role as 'user' | 'assistant',
218
+ content: data.message.content,
219
+ timestamp: data.message.timestamp || new Date().toISOString(),
220
+ attachments: data.message.attachments,
221
+ hasAttachments: !!(data.message.attachments?.length),
222
+ audioData,
223
+ },
224
+ ]);
225
+ }),
226
+ // Server created a new conversation (first message of a fresh context)
227
+ ws.on('chat:conversation-created', (data: { conversationId: string }) => {
228
+ setConversationId(data.conversationId);
229
+ }),
230
+ // Context cleared from any client
231
+ ws.on('chat:cleared', () => {
232
+ setMessages([]);
233
+ setConversationId(null);
234
+ setStreamBuffer('');
235
+ streamBufferRef.current = '';
236
+ setStreaming(false);
237
+ setTools([]);
238
+ loaded.current = false;
239
+ }),
188
240
  ];
189
241
 
190
242
  return () => unsubs.forEach((u) => u());
@@ -13,37 +13,49 @@ import { authFetch } from './auth';
13
13
  * surface) and handing back a `blob:` object URL is the secure way to render it.
14
14
  *
15
15
  * `data:`, `blob:`, and other non-`/api/files` inputs (and `undefined`) pass through
16
- * unchanged. The created object URL is revoked when the input changes or the component
17
- * unmounts. Returns `undefined` while the fetch is in flight or if it failed, so callers
18
- * can show a placeholder.
16
+ * unchanged with status `'ready'`. The created object URL is revoked when the input
17
+ * changes or the component unmounts. Returns `{ url, status }` so callers can tell a
18
+ * still-loading fetch (`'loading'`) apart from a failed one (`'error'`, on a non-ok
19
+ * response or a network throw) and render a distinct broken-image affordance.
19
20
  */
20
- export function useAuthedFileUrl(rawUrl: string | undefined): string | undefined {
21
+ export interface AuthedFile {
22
+ url: string | undefined;
23
+ status: 'loading' | 'ready' | 'error';
24
+ }
25
+
26
+ export function useAuthedFileUrl(rawUrl: string | undefined): AuthedFile {
21
27
  const requiresAuthedFetch = !!rawUrl && rawUrl.startsWith('/api/files/');
22
- const [resolvedUrl, setResolvedUrl] = useState<string | undefined>(
23
- requiresAuthedFetch ? undefined : rawUrl,
28
+ const [state, setState] = useState<AuthedFile>(
29
+ requiresAuthedFetch
30
+ ? { url: undefined, status: 'loading' }
31
+ : { url: rawUrl, status: 'ready' },
24
32
  );
25
33
 
26
34
  useEffect(() => {
27
35
  // Pass through anything that isn't a protected file path (data:/blob:/undefined).
28
36
  if (!rawUrl || !rawUrl.startsWith('/api/files/')) {
29
- setResolvedUrl(rawUrl);
37
+ setState({ url: rawUrl, status: 'ready' });
30
38
  return;
31
39
  }
32
40
 
33
41
  let createdObjectUrl: string | null = null;
34
42
  let cancelled = false;
35
- setResolvedUrl(undefined);
43
+ setState({ url: undefined, status: 'loading' });
36
44
 
37
45
  (async () => {
38
46
  try {
39
47
  const response = await authFetch(rawUrl);
40
- if (!response.ok) return; // leave undefined → caller shows a placeholder
48
+ if (cancelled) return;
49
+ if (!response.ok) {
50
+ setState({ url: undefined, status: 'error' }); // caller shows a broken-image affordance
51
+ return;
52
+ }
41
53
  const blob = await response.blob();
42
54
  if (cancelled) return;
43
55
  createdObjectUrl = URL.createObjectURL(blob);
44
- setResolvedUrl(createdObjectUrl);
56
+ setState({ url: createdObjectUrl, status: 'ready' });
45
57
  } catch {
46
- /* network error leave undefined so the caller renders its placeholder */
58
+ if (!cancelled) setState({ url: undefined, status: 'error' }); // network error → broken-image affordance
47
59
  }
48
60
  })();
49
61
 
@@ -53,5 +65,5 @@ export function useAuthedFileUrl(rawUrl: string | undefined): string | undefined
53
65
  };
54
66
  }, [rawUrl]);
55
67
 
56
- return resolvedUrl;
68
+ return state;
57
69
  }
@@ -10,41 +10,114 @@ export interface SavedFile {
10
10
  absPath: string;
11
11
  }
12
12
 
13
+ /** Per-file decoded-byte ceiling. Anything larger is rejected at the save chokepoint
14
+ * so a single message can't write an unbounded blob to disk. */
15
+ export const MAX_ATTACHMENT_BYTES = 12 * 1024 * 1024;
16
+ /** Per-message guards (enforced by callers around the save loop). */
17
+ export const MAX_ATTACHMENTS_PER_MESSAGE = 12;
18
+ export const MAX_TOTAL_ATTACHMENT_BYTES = 48 * 1024 * 1024;
19
+
13
20
  export function ensureFileDirs(): void {
14
21
  fs.mkdirSync(paths.filesAudio, { recursive: true });
15
22
  fs.mkdirSync(paths.filesImages, { recursive: true });
16
23
  fs.mkdirSync(paths.filesDocuments, { recursive: true });
17
24
  }
18
25
 
19
- export function saveAttachment(att: { type: 'image' | 'file'; name: string; mediaType: string; data: string }): SavedFile {
20
- const category = att.type === 'image' ? 'images' : 'documents';
26
+ const EXT_FROM_MIME: Record<string, string> = {
27
+ 'image/png': 'png', 'image/jpeg': 'jpg', 'image/gif': 'gif', 'image/webp': 'webp',
28
+ 'image/avif': 'avif', 'image/bmp': 'bmp', 'image/heic': 'heic', 'image/heif': 'heif', 'image/svg+xml': 'svg',
29
+ 'application/pdf': 'pdf', 'text/plain': 'txt', 'text/markdown': 'md', 'text/csv': 'csv',
30
+ 'application/json': 'json', 'application/xml': 'xml', 'text/html': 'html',
31
+ 'application/zip': 'zip', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
32
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
33
+ };
34
+
35
+ /** Dependency-free magic-byte sniff for the common binary types. Returns the detected
36
+ * media type or undefined when the bytes aren't a recognized signature. We trust this
37
+ * over the client-claimed mediaType when it fires — the client controls both name and
38
+ * mediaType, so the on-disk extension / served Content-Type must derive from content. */
39
+ function sniffMediaType(buf: Buffer): string | undefined {
40
+ if (buf.length >= 8 && buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4e && buf[3] === 0x47) return 'image/png';
41
+ if (buf.length >= 3 && buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) return 'image/jpeg';
42
+ if (buf.length >= 6) { const s = buf.toString('latin1', 0, 6); if (s === 'GIF87a' || s === 'GIF89a') return 'image/gif'; }
43
+ if (buf.length >= 12 && buf.toString('latin1', 0, 4) === 'RIFF' && buf.toString('latin1', 8, 12) === 'WEBP') return 'image/webp';
44
+ if (buf.length >= 5 && buf.toString('latin1', 0, 5) === '%PDF-') return 'application/pdf';
45
+ if (buf.length >= 4 && buf[0] === 0x50 && buf[1] === 0x4b && (buf[2] === 0x03 || buf[2] === 0x05 || buf[2] === 0x07)) return 'application/zip';
46
+ return undefined;
47
+ }
48
+
49
+ /** Strip path separators / control chars from a client-supplied display name and bound
50
+ * its length. Never used to build the on-disk filename (that is random) — only for the
51
+ * persisted/displayed name. */
52
+ function sanitizeName(name?: string): string {
53
+ if (!name) return '';
54
+ return name.replace(/[/\\\u0000-\u001f]/g, '_').replace(/\s+/g, ' ').trim().slice(0, 200);
55
+ }
21
56
 
57
+ function stampPrefix(): string {
22
58
  const now = new Date();
23
59
  const ts = now.toISOString().replace(/[-:T]/g, '').slice(0, 14);
24
- const stamp = `${ts.slice(0, 8)}_${ts.slice(8, 14)}`;
25
- const rand = crypto.randomBytes(3).toString('hex');
26
-
27
- // Extract extension from original name or mediaType
28
- const extFromName = att.name?.includes('.') ? att.name.split('.').pop()! : '';
29
- const extFromMime: Record<string, string> = {
30
- 'image/png': 'png', 'image/jpeg': 'jpg', 'image/gif': 'gif', 'image/webp': 'webp',
31
- 'application/pdf': 'pdf', 'text/plain': 'txt', 'text/csv': 'csv',
32
- };
33
- const ext = extFromName || extFromMime[att.mediaType] || 'bin';
60
+ return `${ts.slice(0, 8)}_${ts.slice(8, 14)}_${crypto.randomBytes(3).toString('hex')}`;
61
+ }
34
62
 
35
- const filename = `${stamp}_${rand}.${ext}`;
36
- const relPath = `${category}/${filename}`;
63
+ export function saveAttachment(att: { type: 'image' | 'file'; name?: string; mediaType: string; data: string }): SavedFile {
64
+ const buf = Buffer.from(att.data || '', 'base64');
65
+ if (buf.length === 0) throw new Error('empty attachment payload');
66
+ if (buf.length > MAX_ATTACHMENT_BYTES) {
67
+ throw new Error(`attachment too large: ${buf.length} bytes (max ${MAX_ATTACHMENT_BYTES})`);
68
+ }
37
69
 
38
- const dir = category === 'images' ? paths.filesImages : paths.filesDocuments;
70
+ // Content is authoritative. When we recognize the bytes, the sniffed type drives the
71
+ // media type, the on-disk category, and the rendered image/doc classification — so a
72
+ // client can't mislabel a PDF as image/png (or vice-versa) to land it in the wrong bucket.
73
+ const sniffed = sniffMediaType(buf);
74
+ const claimedImage = att.type === 'image' || (att.mediaType || '').toLowerCase().startsWith('image/');
75
+ const isImage = sniffed ? sniffed.startsWith('image/') : claimedImage;
76
+ const effectiveMediaType = sniffed || att.mediaType || (isImage ? 'image/jpeg' : 'application/octet-stream');
77
+
78
+ const category = isImage ? 'images' : 'documents';
79
+ const cleanName = sanitizeName(att.name);
80
+
81
+ // Extension: prefer the validated/sniffed media type, then a sanitized client extension,
82
+ // then a generic fallback. (Never trust the raw client name for the on-disk path.)
83
+ const extFromName = cleanName.includes('.') ? cleanName.split('.').pop()!.toLowerCase().replace(/[^a-z0-9]/g, '').slice(0, 8) : '';
84
+ const ext = EXT_FROM_MIME[effectiveMediaType] || extFromName || 'bin';
85
+
86
+ const filename = `${stampPrefix()}.${ext}`;
87
+ const relPath = `${category}/${filename}`;
88
+ const dir = isImage ? paths.filesImages : paths.filesDocuments;
39
89
  const absPath = `${dir}/${filename}`;
40
90
 
41
- fs.writeFileSync(absPath, Buffer.from(att.data, 'base64'));
91
+ fs.writeFileSync(absPath, buf);
42
92
 
43
93
  return {
44
- type: att.type === 'image' ? 'image' : 'document',
45
- name: att.name,
46
- mediaType: att.mediaType,
94
+ type: isImage ? 'image' : 'document',
95
+ name: cleanName,
96
+ mediaType: effectiveMediaType,
47
97
  relPath,
48
98
  absPath,
49
99
  };
50
100
  }
101
+
102
+ /** Persist a voice/audio clip (raw base64) to files/audio and return its served path.
103
+ * Audio rides on a message as meta.audio_data = relPath (not in the attachments array),
104
+ * so the chat can replay it after a refresh. */
105
+ export function saveAudio(base64: string, mediaType = 'audio/webm'): { relPath: string; absPath: string; mediaType: string } {
106
+ const buf = Buffer.from(base64 || '', 'base64');
107
+ if (buf.length === 0) throw new Error('empty audio payload');
108
+ if (buf.length > MAX_ATTACHMENT_BYTES) throw new Error(`audio too large: ${buf.length} bytes`);
109
+
110
+ const mt = (mediaType || 'audio/webm').toLowerCase();
111
+ const ext = mt.includes('webm') ? 'webm'
112
+ : (mt.includes('mp4') || mt.includes('m4a') || mt.includes('aac')) ? 'm4a'
113
+ : (mt.includes('mpeg') || mt.includes('mp3')) ? 'mp3'
114
+ : mt.includes('wav') ? 'wav'
115
+ : mt.includes('ogg') ? 'ogg'
116
+ : 'webm';
117
+
118
+ const filename = `${stampPrefix()}.${ext}`;
119
+ const relPath = `audio/${filename}`;
120
+ const absPath = `${paths.filesAudio}/${filename}`;
121
+ fs.writeFileSync(absPath, buf);
122
+ return { relPath, absPath, mediaType: mediaType || 'audio/webm' };
123
+ }
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Shared attachment-ingestion policy — the SINGLE source of truth all three
3
+ * harnesses (Claude Agent SDK, Codex app-server, PI) consult when turning an
4
+ * AgentAttachment into provider content. Centralizing it here is what makes the
5
+ * three harnesses behave consistently instead of drifting (different text
6
+ * allowlists, different size caps, different "saved to disk" wording, etc.).
7
+ *
8
+ * Routing rule (per the canonical attachment contract):
9
+ * • image/* → native image content block (vision)
10
+ * • application/pdf → native document block WHERE the provider supports it
11
+ * (Claude always; PI-anthropic; PI-gemini). Otherwise the
12
+ * file is on disk and the model reads it via its file tools.
13
+ * • text-like → decode + cap + inline as a text note
14
+ * • everything else → a disk-pointer note (the agent opens it with Read/Bash)
15
+ *
16
+ * The constraint on what is ingested is the MODEL's capability, never an arbitrary
17
+ * blocklist of ours — anything that cannot be inlined is still saved to disk and
18
+ * surfaced to the agent's file tools, so nothing is silently dropped.
19
+ */
20
+
21
+ import type { SavedFile } from '../file-saver.js';
22
+
23
+ /** Per-file / cross-file budgets for INLINING text-like documents as model-visible text.
24
+ * Both expressed in characters of decoded text so all harnesses agree on the unit
25
+ * (Codex previously capped on bytes, PI on chars — they diverged). The file is also
26
+ * on disk, so truncating the inline copy is lossless for a tool-capable agent. */
27
+ export const INLINE_TEXT_PER_FILE_CHARS = 48_000;
28
+ export const INLINE_TEXT_TOTAL_CHARS = 96_000;
29
+
30
+ /** Hard ceiling on a single inlined base64 image (decoded bytes). Over this we drop the
31
+ * inline image and fall back to the saved-files disk pointer rather than bloat every
32
+ * stateless resend with a multi-MB payload. */
33
+ export const MAX_INLINE_IMAGE_BYTES = 5 * 1024 * 1024;
34
+
35
+ /** Media types whose bytes are safe/useful to decode and inline as plain text.
36
+ * Anchored, union of the lists Codex and PI carried separately. */
37
+ const INLINE_TEXT_RE =
38
+ /^(?:text\/[\w.+-]+|application\/(?:json|xml|x-ndjson|ld\+json|yaml|x-yaml|toml|x-sh|javascript|ecmascript|x-www-form-urlencoded|csv))$/i;
39
+
40
+ /** Image media types we will hand to a provider as a native image block. */
41
+ const INLINE_IMAGE_RE = /^image\/(?:png|jpe?g|gif|webp|avif|bmp|heic|heif)$/i;
42
+
43
+ export function isInlineTextMediaType(mt?: string): boolean {
44
+ if (!mt) return false;
45
+ const base = mt.split(';')[0].trim().toLowerCase();
46
+ return INLINE_TEXT_RE.test(base);
47
+ }
48
+
49
+ export function isInlinePdf(mt?: string): boolean {
50
+ if (!mt) return false;
51
+ return mt.split(';')[0].trim().toLowerCase() === 'application/pdf';
52
+ }
53
+
54
+ export function isImageMediaType(mt?: string): boolean {
55
+ if (!mt) return false;
56
+ return INLINE_IMAGE_RE.test(mt.split(';')[0].trim().toLowerCase());
57
+ }
58
+
59
+ /** Coerce an image media type to one every provider accepts. Unknown/garbage
60
+ * (undefined, "", "application/octet-stream", "img/png", …) → image/jpeg so we
61
+ * never emit `data:undefined;base64,` or a provider-rejected document type. */
62
+ export function normalizeImageMediaType(mt?: string): string {
63
+ if (!mt) return 'image/jpeg';
64
+ const base = mt.split(';')[0].trim().toLowerCase();
65
+ if (base === 'image/jpg') return 'image/jpeg';
66
+ return INLINE_IMAGE_RE.test(base) ? base : 'image/jpeg';
67
+ }
68
+
69
+ /** Approximate decoded byte length of a base64 string without allocating a Buffer. */
70
+ export function approxBase64Bytes(data: string): number {
71
+ if (!data) return 0;
72
+ const len = data.length;
73
+ let padding = 0;
74
+ if (data.endsWith('==')) padding = 2;
75
+ else if (data.endsWith('=')) padding = 1;
76
+ return Math.max(0, Math.floor((len * 3) / 4) - padding);
77
+ }
78
+
79
+ export type AttachmentRoute = 'image' | 'native-document' | 'inline-text' | 'reference-only';
80
+
81
+ /**
82
+ * Decide how a non-image / document attachment should reach the model, given the
83
+ * active provider's native-document capability.
84
+ * - canNativeDocument: true for Claude, PI-anthropic, PI-gemini (PDF via document block).
85
+ */
86
+ export function routeAttachment(
87
+ att: { type: 'image' | 'file'; mediaType: string; data?: string },
88
+ opts: { canNativeDocument: boolean },
89
+ ): AttachmentRoute {
90
+ // An empty/undefined payload must never become an inline provider block: a
91
+ // `data:''`/empty base64 image or document source 400s the entire turn on
92
+ // Anthropic/Gemini. Degrade to reference-only (there's no SavedFile for it
93
+ // either, so it's simply skipped). Mirrors Codex's `if (!att.data) break;`.
94
+ if (!att.data) return 'reference-only';
95
+ if (att.type === 'image' || isImageMediaType(att.mediaType)) return 'image';
96
+ if (isInlinePdf(att.mediaType) && opts.canNativeDocument) return 'native-document';
97
+ if (isInlineTextMediaType(att.mediaType)) return 'inline-text';
98
+ return 'reference-only';
99
+ }
100
+
101
+ /**
102
+ * The ONE canonical "files are on disk, read them with your tools" note. Every
103
+ * harness emits byte-identical wording so the agent's behavior doesn't depend on
104
+ * which provider is active. Cites the ABSOLUTE path because the persisted relPath
105
+ * (`<category>/<file>`) omits the `files/` segment and isn't openable as-is.
106
+ */
107
+ export function buildSavedFilesNote(savedFiles: SavedFile[]): string {
108
+ if (!savedFiles?.length) return '';
109
+ const lines = savedFiles.map((f) => `- ${f.name || f.relPath} (${f.mediaType}) → ${f.absPath}`);
110
+ return `[attached files saved to disk — open them with your file tools (Read/Bash) if you need their contents]\n${lines.join('\n')}`;
111
+ }
@@ -21,6 +21,15 @@ import { assembleSystemPrompt } from '../../worker/prompts/prompt-assembler.js';
21
21
  import { buildAgents } from '../agents/index.js';
22
22
  import { preWarm, claimWarmup, discardWarmup } from '../cli-warmup.js';
23
23
  import { mirrorSkillsInto } from './skills.js';
24
+ import {
25
+ routeAttachment,
26
+ normalizeImageMediaType,
27
+ approxBase64Bytes,
28
+ buildSavedFilesNote,
29
+ INLINE_TEXT_PER_FILE_CHARS,
30
+ INLINE_TEXT_TOTAL_CHARS,
31
+ MAX_INLINE_IMAGE_BYTES,
32
+ } from './attachment-policy.js';
24
33
 
25
34
  // ── Types ──────────────────────────────────────────────────────────────────
26
35
 
@@ -157,30 +166,68 @@ function loadMcpServers(): Record<string, any> | undefined {
157
166
  return undefined;
158
167
  }
159
168
 
160
- /** Build an SDKUserMessage from text + optional attachments */
169
+ /** Build an SDKUserMessage from text + optional attachments.
170
+ * Routing is delegated to the shared attachment-policy so all three harnesses
171
+ * ingest identically. The Anthropic Messages API base64 document source accepts
172
+ * ONLY application/pdf — handing it a docx/xlsx/csv/markdown/octet-stream 400s
173
+ * the whole turn — so non-PDF binaries are NOT emitted as provider blocks; they
174
+ * ride on the saved-files disk pointer instead. Blocks stay MEDIA-FIRST, TEXT-last. */
161
175
  function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFiles?: SavedFile[]): SDKUserMessage {
162
176
  const content: any[] = [];
163
177
 
164
178
  if (attachments?.length) {
179
+ // Running budget so the cross-file inline-text total never exceeds the cap.
180
+ let inlineTextBudget = INLINE_TEXT_TOTAL_CHARS;
181
+
165
182
  for (const att of attachments) {
166
- if (att.type === 'image') {
167
- content.push({
168
- type: 'image',
169
- source: { type: 'base64', media_type: att.mediaType, data: att.data },
170
- });
171
- } else {
172
- content.push({
173
- type: 'document',
174
- source: { type: 'base64', media_type: att.mediaType, data: att.data },
175
- });
183
+ // Claude natively renders PDF document blocks (vision over the rendered pages).
184
+ const route = routeAttachment(att, { canNativeDocument: true });
185
+ switch (route) {
186
+ case 'image': {
187
+ // Drop the inline copy when it would bloat every stateless resend — the
188
+ // file is on disk and buildSavedFilesNote points the file tools at it.
189
+ if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
190
+ content.push({
191
+ type: 'image',
192
+ source: { type: 'base64', media_type: normalizeImageMediaType(att.mediaType), data: att.data },
193
+ });
194
+ break;
195
+ }
196
+ case 'native-document': {
197
+ content.push({
198
+ type: 'document',
199
+ source: { type: 'base64', media_type: 'application/pdf', data: att.data },
200
+ });
201
+ break;
202
+ }
203
+ case 'inline-text': {
204
+ if (inlineTextBudget <= 0) break;
205
+ let decoded = '';
206
+ try {
207
+ decoded = Buffer.from(att.data, 'base64').toString('utf-8');
208
+ } catch {
209
+ break; // undecodable → rely on the saved-files note
210
+ }
211
+ const cap = Math.min(INLINE_TEXT_PER_FILE_CHARS, inlineTextBudget);
212
+ const slice = decoded.slice(0, cap);
213
+ inlineTextBudget -= slice.length;
214
+ // text/csv/markdown also 400 as document sources, so inline as a text note.
215
+ content.push({ type: 'text', text: `--- ${att.name} ---\n${slice}` });
216
+ break;
217
+ }
218
+ case 'reference-only':
219
+ default:
220
+ // Binary we can't inline (docx/xlsx/zip/…) or an unexpected route — no
221
+ // provider block; the saved-files note below carries the disk pointer.
222
+ break;
176
223
  }
177
224
  }
178
225
  }
179
226
 
180
227
  let promptText = text || '(attached files)';
181
228
  if (savedFiles?.length) {
182
- const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
183
- promptText += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
229
+ const note = buildSavedFilesNote(savedFiles);
230
+ if (note) promptText += `\n\n${note}`;
184
231
  }
185
232
 
186
233
  content.push({ type: 'text', text: promptText });
@@ -663,8 +710,8 @@ export async function startBlobyAgentQuery(
663
710
 
664
711
  let plainPrompt = prompt;
665
712
  if (savedFiles?.length && !attachments?.length) {
666
- const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
667
- plainPrompt += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
713
+ const note = buildSavedFilesNote(savedFiles);
714
+ if (note) plainPrompt += `\n\n${note}`;
668
715
  }
669
716
 
670
717
  const sdkPrompt: string | AsyncIterable<SDKUserMessage> =