@spinabot/brigade 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/README.md +12 -10
  2. package/dist/agents/agent-loop.d.ts +55 -0
  3. package/dist/agents/agent-loop.d.ts.map +1 -1
  4. package/dist/agents/agent-loop.js +90 -1
  5. package/dist/agents/agent-loop.js.map +1 -1
  6. package/dist/agents/channels/inbound-pipeline.d.ts +22 -0
  7. package/dist/agents/channels/inbound-pipeline.d.ts.map +1 -1
  8. package/dist/agents/channels/inbound-pipeline.js +31 -1
  9. package/dist/agents/channels/inbound-pipeline.js.map +1 -1
  10. package/dist/agents/channels/media-capture.d.ts +69 -6
  11. package/dist/agents/channels/media-capture.d.ts.map +1 -1
  12. package/dist/agents/channels/media-capture.js +125 -8
  13. package/dist/agents/channels/media-capture.js.map +1 -1
  14. package/dist/agents/channels/telegram/media.d.ts.map +1 -1
  15. package/dist/agents/channels/telegram/media.js +16 -4
  16. package/dist/agents/channels/telegram/media.js.map +1 -1
  17. package/dist/agents/channels/whatsapp/media.d.ts +19 -0
  18. package/dist/agents/channels/whatsapp/media.d.ts.map +1 -1
  19. package/dist/agents/channels/whatsapp/media.js +37 -2
  20. package/dist/agents/channels/whatsapp/media.js.map +1 -1
  21. package/dist/agents/media-understanding/anthropic-adapter.d.ts +49 -0
  22. package/dist/agents/media-understanding/anthropic-adapter.d.ts.map +1 -0
  23. package/dist/agents/media-understanding/anthropic-adapter.js +162 -0
  24. package/dist/agents/media-understanding/anthropic-adapter.js.map +1 -0
  25. package/dist/agents/media-understanding/config.d.ts +57 -0
  26. package/dist/agents/media-understanding/config.d.ts.map +1 -0
  27. package/dist/agents/media-understanding/config.js +289 -0
  28. package/dist/agents/media-understanding/config.js.map +1 -0
  29. package/dist/agents/media-understanding/gemini-adapter.d.ts +57 -0
  30. package/dist/agents/media-understanding/gemini-adapter.d.ts.map +1 -0
  31. package/dist/agents/media-understanding/gemini-adapter.js +343 -0
  32. package/dist/agents/media-understanding/gemini-adapter.js.map +1 -0
  33. package/dist/agents/media-understanding/index.d.ts +58 -0
  34. package/dist/agents/media-understanding/index.d.ts.map +1 -0
  35. package/dist/agents/media-understanding/index.js +275 -0
  36. package/dist/agents/media-understanding/index.js.map +1 -0
  37. package/dist/agents/media-understanding/pi-adapter.d.ts +72 -0
  38. package/dist/agents/media-understanding/pi-adapter.d.ts.map +1 -0
  39. package/dist/agents/media-understanding/pi-adapter.js +160 -0
  40. package/dist/agents/media-understanding/pi-adapter.js.map +1 -0
  41. package/dist/agents/media-understanding/types.d.ts +189 -0
  42. package/dist/agents/media-understanding/types.d.ts.map +1 -0
  43. package/dist/agents/media-understanding/types.js +51 -0
  44. package/dist/agents/media-understanding/types.js.map +1 -0
  45. package/dist/agents/session-wiring.d.ts +11 -0
  46. package/dist/agents/session-wiring.d.ts.map +1 -1
  47. package/dist/agents/session-wiring.js +1 -0
  48. package/dist/agents/session-wiring.js.map +1 -1
  49. package/dist/agents/tools/analyze-media-tool.d.ts +263 -0
  50. package/dist/agents/tools/analyze-media-tool.d.ts.map +1 -0
  51. package/dist/agents/tools/analyze-media-tool.js +2321 -0
  52. package/dist/agents/tools/analyze-media-tool.js.map +1 -0
  53. package/dist/agents/tools/doc-shared.d.ts +187 -0
  54. package/dist/agents/tools/doc-shared.d.ts.map +1 -0
  55. package/dist/agents/tools/doc-shared.js +484 -0
  56. package/dist/agents/tools/doc-shared.js.map +1 -0
  57. package/dist/agents/tools/edit-document-tool.d.ts +133 -0
  58. package/dist/agents/tools/edit-document-tool.d.ts.map +1 -0
  59. package/dist/agents/tools/edit-document-tool.js +815 -0
  60. package/dist/agents/tools/edit-document-tool.js.map +1 -0
  61. package/dist/agents/tools/image-downscale.d.ts +93 -0
  62. package/dist/agents/tools/image-downscale.d.ts.map +1 -0
  63. package/dist/agents/tools/image-downscale.js +257 -0
  64. package/dist/agents/tools/image-downscale.js.map +1 -0
  65. package/dist/agents/tools/make-document-tool.d.ts +114 -0
  66. package/dist/agents/tools/make-document-tool.d.ts.map +1 -0
  67. package/dist/agents/tools/make-document-tool.js +542 -0
  68. package/dist/agents/tools/make-document-tool.js.map +1 -0
  69. package/dist/agents/tools/media-cache.d.ts +56 -0
  70. package/dist/agents/tools/media-cache.d.ts.map +1 -0
  71. package/dist/agents/tools/media-cache.js +133 -0
  72. package/dist/agents/tools/media-cache.js.map +1 -0
  73. package/dist/agents/tools/ooxml-images.d.ts +107 -0
  74. package/dist/agents/tools/ooxml-images.d.ts.map +1 -0
  75. package/dist/agents/tools/ooxml-images.js +308 -0
  76. package/dist/agents/tools/ooxml-images.js.map +1 -0
  77. package/dist/agents/tools/registry.d.ts +12 -0
  78. package/dist/agents/tools/registry.d.ts.map +1 -1
  79. package/dist/agents/tools/registry.js +47 -0
  80. package/dist/agents/tools/registry.js.map +1 -1
  81. package/dist/buildstamp.json +1 -1
  82. package/dist/cli/commands/doctor.d.ts.map +1 -1
  83. package/dist/cli/commands/doctor.js +41 -0
  84. package/dist/cli/commands/doctor.js.map +1 -1
  85. package/dist/core/console-stream.d.ts.map +1 -1
  86. package/dist/core/console-stream.js +7 -5
  87. package/dist/core/console-stream.js.map +1 -1
  88. package/dist/core/server.js +6 -1
  89. package/dist/core/server.js.map +1 -1
  90. package/dist/system-prompt/assembler.d.ts.map +1 -1
  91. package/dist/system-prompt/assembler.js +25 -1
  92. package/dist/system-prompt/assembler.js.map +1 -1
  93. package/dist/system-prompt/guidance.d.ts +30 -0
  94. package/dist/system-prompt/guidance.d.ts.map +1 -1
  95. package/dist/system-prompt/guidance.js +50 -0
  96. package/dist/system-prompt/guidance.js.map +1 -1
  97. package/package.json +9 -1
@@ -0,0 +1,2321 @@
1
+ /**
2
+ * `analyze_media` tool — comprehensive media + document understanding.
3
+ *
4
+ * The model hands this tool a local file PATH or a URL (+ an optional
5
+ * `question`) and the tool RESOLVES the input into content the CURRENT turn's
6
+ * model can reason about against that question. It auto-detects the kind by
7
+ * extension / MIME and dispatches per-format.
8
+ *
9
+ * ─────────────────────────────────────────────────────────────────────────
10
+ * WHY THIS DESIGN (STEP-0 investigation findings — read before changing)
11
+ * ─────────────────────────────────────────────────────────────────────────
12
+ * 1. TOOL-RESULT CONTENT SHAPE. Pi types a tool's `AgentToolResult.content`
13
+ * as `(TextContent | ImageContent)[]` — TEXT or IMAGE only. There is NO
14
+ * `document` / `pdf` / `video` content-block type anywhere in the Pi SDK,
15
+ * and `Model.input` is `("text" | "image")[]` — the whole SDK content model
16
+ * is text + image. `ImageContent` is `{ type:"image"; data:<base64>;
17
+ * mimeType }`. So an IMAGE can flow to the model as a real multimodal block
18
+ * (the same shape `payload-mutators.ts` prunes from history, proving image
19
+ * blocks reach the provider); a PDF/DOCX/PPTX/XLSX/HTML/VIDEO can NOT be
20
+ * returned as a native non-text block. They must become TEXT.
21
+ *
22
+ * 2. DIRECT-PROVIDER UNDERSTANDING (the gap-closer). For modalities Pi can't
23
+ * carry — VIDEO, native/scanned PDF, and images on a text-only current
24
+ * model — the tool calls a provider REST API DIRECTLY via the
25
+ * media-understanding subsystem (`agents/media-understanding/`): it ships
26
+ * the media bytes + the question to Gemini (video → Files API; image/pdf →
27
+ * inline) or Anthropic (pdf → native `document` block with OCR; image →
28
+ * image block) and gets back TEXT, which it returns for the current model.
29
+ * Keys are resolved through Brigade's existing credential store
30
+ * (`readBrigadeCredentials`), never invented here. This bypasses Pi's
31
+ * text+image content cap WITHOUT needing a Pi aux-model runtime.
32
+ *
33
+ * 3. REUSE. HTML → markdown reuses the existing readability/linkedom extractor
34
+ * (`web-fetch-utils.ts`); URL fetches route through the SSRF guard
35
+ * (`guardedFetch`, `infra/net/fetch-guard.ts`) with size + content-type
36
+ * caps; local paths reuse the outbound media-path guard
37
+ * (`security/media-path-guard.ts`) PLUS a workspace/cwd/cache root scoping
38
+ * so secrets/system files outside allowed roots are refused (the same
39
+ * posture the `read`/path-write guards enforce). Untrusted bytes are
40
+ * wrapped in the external-content envelope (`security/external-content.ts`).
41
+ *
42
+ * ─────────────────────────────────────────────────────────────────────────
43
+ * PER-FORMAT BEHAVIOUR
44
+ * ─────────────────────────────────────────────────────────────────────────
45
+ * • image (png/jpg/jpeg/webp/gif/bmp/heic/heif): when the CURRENT model is
46
+ * vision-capable, returned as an IMAGE block so the model sees it (cheap —
47
+ * no extra call). When the current model is text-only, the tool routes the
48
+ * image to a vision-capable provider and returns the resulting TEXT — via
49
+ * the Pi SDK against ANY keyed provider with an image-capable model
50
+ * (OpenAI / OpenRouter / Groq / xAI / Mistral / Ollama / …), or the bespoke
51
+ * google/anthropic REST adapters — so vision works on any model + any
52
+ * configured provider. HEIC/HEIF cannot be transcoded without a native dep,
53
+ * so they are passed through with their declared mime — most providers
54
+ * reject HEIC, so the tool warns. Capped by `maxBytes`.
55
+ * • audio (mp3/wav/m4a/ogg/oga/flac/aac/opus): routed to the media-
56
+ * understanding subsystem (Gemini inline — audio is GEMINI-ONLY because Pi's
57
+ * content model is text + image, so no Pi-drivable provider can ingest an
58
+ * audio block) and the TEXT transcription / summary is returned, so voice
59
+ * notes work. Needs a Google/Gemini key; with none the tool returns a clear
60
+ * "configure a Gemini key" message (NOT a provider 400).
61
+ * • pdf: when an understanding provider key is configured, the PDF is sent
62
+ * NATIVELY (Anthropic `document` block — OCRs scanned pages + reads layout;
63
+ * or Gemini inline) and the provider's TEXT answer is returned, so scanned
64
+ * / no-text-layer PDFs now work. With no key (or `mode:"text"`) it falls
65
+ * back to per-page text extraction (`unpdf`, zero native deps) honoring a
66
+ * `pages` range. `mode:"provider"` forces the provider path.
67
+ * • docx: unzip (`fflate`) → concatenate `word/document.xml` text runs.
68
+ * • pptx: unzip → per-slide text (`ppt/slides/slideN.xml`), slide-numbered,
69
+ * honoring `pages` as a slide range.
70
+ * • xlsx: unzip → `xl/sharedStrings.xml` + each `xl/worksheets/sheetN.xml`
71
+ * → CSV-ish per-sheet text.
72
+ * • html (or a URL returning HTML): readability/linkedom → markdown.
73
+ * • video (mp4/webm/mov/…): always routed to the media-understanding
74
+ * subsystem (Gemini via the Files API: upload → poll ACTIVE →
75
+ * generateContent with a fileData part), and the model's TEXT description
76
+ * is returned. Needs a Google/Gemini key; with none the tool returns a
77
+ * clear "configure a Gemini key" message.
78
+ *
79
+ * The user's `question` is ALWAYS echoed back as a leading text block so the
80
+ * model knows what to do with the resolved content.
81
+ *
82
+ * SECURITY POSTURE: read capability — NOT owner-only — but it MUST honour the
83
+ * path guard (local) + SSRF guard (URL). Registered for every sender; no
84
+ * mutation, no spend.
85
+ */
86
+ import fs from "node:fs";
87
+ import fsp from "node:fs/promises";
88
+ import os from "node:os";
89
+ import path from "node:path";
90
+ import { Type } from "typebox";
91
+ import { guardedFetch, SsrfBlockedError } from "../../infra/net/fetch-guard.js";
92
+ import { validateOutboundMediaPath } from "../../security/media-path-guard.js";
93
+ import { wrapWebContent } from "../../security/external-content.js";
94
+ import { downscaleImageToBudget, isDownscalableImageMime, } from "./image-downscale.js";
95
+ import { extractOoxmlImages, resolveSlideOrder, } from "./ooxml-images.js";
96
+ import { mediaCacheKey, readMediaCache, writeMediaCache, } from "./media-cache.js";
97
+ import { resolveCacheDir, resolveOsCacheDir, resolveStateDir, DEFAULT_AGENT_ID, } from "../../config/paths.js";
98
+ import { runMediaUnderstanding as defaultRunMediaUnderstanding, resolvePiModel, MediaUnderstandingUnavailableError, } from "../media-understanding/index.js";
99
+ import { buildMediaUnderstandingConfig } from "../media-understanding/config.js";
100
+ import { composeFetchBody, extractBasicHtmlContent, extractReadableContent, } from "./web-fetch-utils.js";
101
+ import { truncateText } from "./web-shared.js";
102
+ import { BrigadeToolInputError, jsonResult } from "./common.js";
103
+ /* ─────────────────────────── tunables ─────────────────────────── */
104
+ /** Default hard cap on bytes read for ANY source (image bytes, doc bytes, fetched body). */
105
+ const DEFAULT_MAX_BYTES = 12 * 1024 * 1024; // 12 MiB
106
+ /** Absolute ceiling — even an explicit `maxBytes` is clamped to this. */
107
+ const MAX_BYTES_CEILING = 48 * 1024 * 1024; // 48 MiB
108
+ /** Image blocks are the most token-expensive — cap them tighter by default. */
109
+ const DEFAULT_IMAGE_MAX_BYTES = 8 * 1024 * 1024; // 8 MiB
110
+ /** Max characters of extracted text returned to the model (keeps the turn bounded). */
111
+ const DEFAULT_MAX_CHARS = 60_000;
112
+ /** Per-request HTTP timeout for URL sources. */
113
+ const FETCH_TIMEOUT_MS = 45_000;
114
+ /** Max images accepted in one batch (`sources[]`). Matches the field cap. */
115
+ const MAX_BATCH_IMAGES = 20;
116
+ /** Max non-image (document/text) sources accepted in one batch. */
117
+ const MAX_BATCH_DOCS = 10;
118
+ /**
119
+ * Max EMBEDDED images surfaced from a single OOXML document when
120
+ * `includeImages` is set. A real deck can carry many pictures (the failure case
121
+ * had 35); cap the count (reusing the batch cap) so the turn stays bounded, and
122
+ * report "showing N of M" when truncated. Per-image + total byte budgets reuse
123
+ * the existing image downscale path.
124
+ */
125
+ const MAX_EMBEDDED_IMAGES = MAX_BATCH_IMAGES;
126
+ /**
127
+ * Total byte budget across ALL embedded image blocks from one document, so a
128
+ * deck with many large pictures can't blow the turn even under the count cap.
129
+ * Each image is downscaled to the per-image budget first; once the running total
130
+ * would exceed this ceiling, remaining images are dropped (and reported).
131
+ */
132
+ const EMBEDDED_IMAGES_TOTAL_BYTES = 24 * 1024 * 1024; // 24 MiB
133
+ /**
134
+ * Image MIME types that the understanding providers reliably accept on an image
135
+ * block. Anthropic's Messages API accepts ONLY jpeg / png / gif / webp and
136
+ * returns a 400 for anything else (e.g. image/bmp, image/tiff); Gemini + the
137
+ * Pi-driven providers are similarly conservative. So before routing an image to
138
+ * a provider we re-encode any other (decodable) raster format to JPEG via the
139
+ * downscale path. `image/heic`/`image/heif` are intentionally NOT here — they
140
+ * are not decodable without a native dep (they pass through with their declared
141
+ * mime + a warning).
142
+ */
143
+ const PROVIDER_SAFE_IMAGE_MIME = new Set([
144
+ "image/jpeg",
145
+ "image/png",
146
+ "image/gif",
147
+ "image/webp",
148
+ ]);
149
+ /** Extension → kind. Lowercase, no leading dot. */
150
+ const EXT_KIND = {
151
+ // images
152
+ png: "image",
153
+ jpg: "image",
154
+ jpeg: "image",
155
+ webp: "image",
156
+ gif: "image",
157
+ bmp: "image",
158
+ heic: "image",
159
+ heif: "image",
160
+ // documents
161
+ pdf: "pdf",
162
+ docx: "docx",
163
+ pptx: "pptx",
164
+ xlsx: "xlsx",
165
+ // OpenDocument + e-book + rich-text + notebook (broader than either rival)
166
+ odt: "odt",
167
+ ods: "ods",
168
+ odp: "odp",
169
+ epub: "epub",
170
+ rtf: "rtf",
171
+ ipynb: "ipynb",
172
+ // markup
173
+ html: "html",
174
+ htm: "html",
175
+ // video
176
+ mp4: "video",
177
+ webm: "video",
178
+ mov: "video",
179
+ m4v: "video",
180
+ mkv: "video",
181
+ avi: "video",
182
+ mpeg: "video",
183
+ mpg: "video",
184
+ // audio (voice notes + clips). `.webm`/`.ogg` are ambiguous (audio OR video);
185
+ // they map to video above — the model can pass an explicit `kind:"audio"`, or
186
+ // a URL's `audio/*` MIME re-routes to audio via `kindFromMime`.
187
+ mp3: "audio",
188
+ wav: "audio",
189
+ m4a: "audio",
190
+ oga: "audio",
191
+ ogg: "audio",
192
+ flac: "audio",
193
+ aac: "audio",
194
+ opus: "audio",
195
+ // plain / structured text + common source-code files. Read as UTF-8, wrapped
196
+ // in the untrusted-content envelope, returned as text. (Both rival tools
197
+ // accept these; Brigade used to reject them outright.)
198
+ txt: "text",
199
+ text: "text",
200
+ log: "text",
201
+ csv: "text",
202
+ tsv: "text",
203
+ json: "text",
204
+ jsonl: "text",
205
+ ndjson: "text",
206
+ json5: "text",
207
+ xml: "text",
208
+ yaml: "text",
209
+ yml: "text",
210
+ toml: "text",
211
+ ini: "text",
212
+ cfg: "text",
213
+ conf: "text",
214
+ env: "text",
215
+ properties: "text",
216
+ md: "text",
217
+ markdown: "text",
218
+ mdx: "text",
219
+ rst: "text",
220
+ tex: "text",
221
+ srt: "text",
222
+ vtt: "text",
223
+ // source code
224
+ js: "text",
225
+ mjs: "text",
226
+ cjs: "text",
227
+ jsx: "text",
228
+ ts: "text",
229
+ tsx: "text",
230
+ mts: "text",
231
+ cts: "text",
232
+ py: "text",
233
+ rb: "text",
234
+ go: "text",
235
+ rs: "text",
236
+ java: "text",
237
+ kt: "text",
238
+ kts: "text",
239
+ c: "text",
240
+ h: "text",
241
+ cc: "text",
242
+ cpp: "text",
243
+ cxx: "text",
244
+ hpp: "text",
245
+ cs: "text",
246
+ php: "text",
247
+ swift: "text",
248
+ scala: "text",
249
+ sh: "text",
250
+ bash: "text",
251
+ zsh: "text",
252
+ fish: "text",
253
+ ps1: "text",
254
+ bat: "text",
255
+ sql: "text",
256
+ r: "text",
257
+ lua: "text",
258
+ pl: "text",
259
+ dart: "text",
260
+ ex: "text",
261
+ exs: "text",
262
+ clj: "text",
263
+ hs: "text",
264
+ css: "text",
265
+ scss: "text",
266
+ sass: "text",
267
+ less: "text",
268
+ svg: "text",
269
+ };
270
+ /** MIME prefix/exact → kind, consulted when the extension is ambiguous (URLs). */
271
+ function kindFromMime(mime) {
272
+ if (!mime)
273
+ return undefined;
274
+ const m = mime.split(";")[0]?.trim().toLowerCase() ?? "";
275
+ if (m.startsWith("image/"))
276
+ return "image";
277
+ if (m.startsWith("video/"))
278
+ return "video";
279
+ if (m.startsWith("audio/"))
280
+ return "audio";
281
+ if (m === "application/pdf")
282
+ return "pdf";
283
+ if (m === "text/html" || m === "application/xhtml+xml")
284
+ return "html";
285
+ // Structured-text content types — JSON / XML / YAML / CSV / source. Checked
286
+ // AFTER html so an HTML page still routes to the readability extractor.
287
+ if (m.startsWith("text/") ||
288
+ m === "application/json" ||
289
+ m === "application/ld+json" ||
290
+ m === "application/xml" ||
291
+ m === "application/x-ndjson" ||
292
+ m === "application/x-yaml" ||
293
+ m === "application/yaml" ||
294
+ m === "application/toml" ||
295
+ m === "application/x-sh" ||
296
+ m === "image/svg+xml" ||
297
+ /\+json$/.test(m) ||
298
+ /\+xml$/.test(m)) {
299
+ return "text";
300
+ }
301
+ if (m === "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
302
+ return "docx";
303
+ if (m === "application/vnd.openxmlformats-officedocument.presentationml.presentation")
304
+ return "pptx";
305
+ if (m === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
306
+ return "xlsx";
307
+ if (m === "application/vnd.oasis.opendocument.text")
308
+ return "odt";
309
+ if (m === "application/vnd.oasis.opendocument.spreadsheet")
310
+ return "ods";
311
+ if (m === "application/vnd.oasis.opendocument.presentation")
312
+ return "odp";
313
+ if (m === "application/epub+zip")
314
+ return "epub";
315
+ if (m === "application/rtf" || m === "text/rtf")
316
+ return "rtf";
317
+ if (m === "application/x-ipynb+json")
318
+ return "ipynb";
319
+ return undefined;
320
+ }
321
+ /** Pull a lowercase extension (no dot) from a path or URL pathname. */
322
+ export function extensionOf(source) {
323
+ let p = source;
324
+ try {
325
+ if (/^https?:\/\//i.test(source))
326
+ p = new URL(source).pathname;
327
+ }
328
+ catch {
329
+ /* not a URL — treat as a path */
330
+ }
331
+ const ext = path.extname(p).toLowerCase().replace(/^\./, "");
332
+ return ext;
333
+ }
334
+ /** Image mime from extension (no `data:` prefix — Pi's ImageContent wants raw base64 + mimeType). */
335
+ function imageMimeFromExt(ext) {
336
+ switch (ext) {
337
+ case "jpg":
338
+ case "jpeg":
339
+ return "image/jpeg";
340
+ case "webp":
341
+ return "image/webp";
342
+ case "gif":
343
+ return "image/gif";
344
+ case "bmp":
345
+ return "image/bmp";
346
+ case "heic":
347
+ return "image/heic";
348
+ case "heif":
349
+ return "image/heif";
350
+ default:
351
+ return "image/png";
352
+ }
353
+ }
354
+ /** Video mime from extension — used when a local video has no declared MIME. */
355
+ function videoMimeFromExt(ext) {
356
+ switch (ext) {
357
+ case "webm":
358
+ return "video/webm";
359
+ case "mov":
360
+ return "video/quicktime";
361
+ case "m4v":
362
+ return "video/x-m4v";
363
+ case "mkv":
364
+ return "video/x-matroska";
365
+ case "avi":
366
+ return "video/x-msvideo";
367
+ case "mpeg":
368
+ case "mpg":
369
+ return "video/mpeg";
370
+ default:
371
+ return "video/mp4";
372
+ }
373
+ }
374
+ /** Audio mime from extension — used when a local audio file has no declared MIME. */
375
+ function audioMimeFromExt(ext) {
376
+ switch (ext) {
377
+ case "wav":
378
+ return "audio/wav";
379
+ case "m4a":
380
+ return "audio/mp4";
381
+ case "aac":
382
+ return "audio/aac";
383
+ case "flac":
384
+ return "audio/flac";
385
+ case "oga":
386
+ case "ogg":
387
+ return "audio/ogg";
388
+ case "opus":
389
+ return "audio/opus";
390
+ default:
391
+ return "audio/mpeg";
392
+ }
393
+ }
394
+ /**
395
+ * Resolve the kind. Explicit `kind` override wins; else extension; else MIME
396
+ * (URL responses). Returns undefined when nothing matches (unsupported).
397
+ */
398
+ export function detectKind(args) {
399
+ if (args.override) {
400
+ const k = args.override.toLowerCase();
401
+ if (k === "image" ||
402
+ k === "pdf" ||
403
+ k === "docx" ||
404
+ k === "pptx" ||
405
+ k === "xlsx" ||
406
+ k === "html" ||
407
+ k === "video" ||
408
+ k === "audio" ||
409
+ k === "text" ||
410
+ k === "odt" ||
411
+ k === "ods" ||
412
+ k === "odp" ||
413
+ k === "epub" ||
414
+ k === "rtf" ||
415
+ k === "ipynb") {
416
+ return k;
417
+ }
418
+ }
419
+ const ext = extensionOf(args.source);
420
+ if (ext && EXT_KIND[ext])
421
+ return EXT_KIND[ext];
422
+ return kindFromMime(args.mime);
423
+ }
424
+ /* ─────────────────────────── params ─────────────────────────── */
425
+ const AnalyzeMediaParams = Type.Object({
426
+ source: Type.Optional(Type.String({
427
+ description: "Local file PATH or http(s) URL to analyze. Images, PDF, DOCX, PPTX, XLSX, HTML, plain/structured text, audio (voice notes), and video are auto-detected by extension/MIME. For a single file. Use `sources` to analyze several at once.",
428
+ })),
429
+ sources: Type.Optional(Type.Array(Type.String(), {
430
+ description: "Several local PATHs / http(s) URLs to analyze together in ONE call (e.g. compare photos, or read many files). Images are shown as multiple image blocks; documents/text are concatenated under per-file labels. Caps: 20 images / 10 documents per call. When set, takes precedence over `source`.",
431
+ })),
432
+ question: Type.Optional(Type.String({
433
+ description: "What to analyze / extract / answer about the media. Optional but strongly encouraged — it is echoed to the model alongside the resolved content.",
434
+ })),
435
+ prompt: Type.Optional(Type.String({
436
+ description: "Alias for `question`. Use one or the other.",
437
+ })),
438
+ pages: Type.Optional(Type.String({
439
+ description: 'Page (PDF) or slide (PPTX) range to limit extraction, e.g. "1-5", "3", or "2-". 1-indexed. Ignored for other kinds.',
440
+ })),
441
+ includeImages: Type.Optional(Type.Boolean({
442
+ description: "For an Office document (PPTX/DOCX/XLSX): whether to ALSO extract the embedded images (wireframes / screenshots / diagrams / charts inside the file) and show them to the model alongside the text — so you SEE the visuals, not just read titles. DEFAULT TRUE: analyzing an Office doc on a vision model returns its embedded images automatically, no flag needed. For a PPTX, `pages` scopes which slides' images come back (e.g. pages:\"8-13\"). Images are labeled by slide where known and capped (≈20); use `pages` to scope a big deck. Set `false` to skip images (text only, cheaper). NEVER unzip the file with bash/python to get its images — this tool already does it.",
443
+ })),
444
+ language: Type.Optional(Type.String({
445
+ description: 'Optional spoken-language hint for AUDIO transcription (e.g. "es", "Spanish", "en-US"). Improves accuracy for non-English voice notes; ignored for non-audio kinds.',
446
+ })),
447
+ provider: Type.Optional(Type.Union([Type.Literal("google"), Type.Literal("anthropic")], {
448
+ description: "Optional provider override for understanding video / native-PDF / text-only-model images (else auto-selected from configured keys). google = Gemini.",
449
+ })),
450
+ model: Type.Optional(Type.String({
451
+ description: "Optional provider model id override for the understanding call (e.g. gemini-2.5-pro, claude-sonnet-4-5). Ignored for the local text-extraction path.",
452
+ })),
453
+ mode: Type.Optional(Type.Union([Type.Literal("auto"), Type.Literal("provider"), Type.Literal("text")], {
454
+ description: 'PDF handling: "auto" (default — provider when a key is configured, else local text extraction), "provider" (force the native provider path), or "text" (force local unpdf text extraction).',
455
+ })),
456
+ maxBytes: Type.Optional(Type.Integer({
457
+ description: `Optional cap on bytes read from the source (default ${DEFAULT_MAX_BYTES}, ceiling ${MAX_BYTES_CEILING}).`,
458
+ minimum: 1024,
459
+ })),
460
+ maxTokens: Type.Optional(Type.Integer({
461
+ description: "Optional cap on the provider answer length (output tokens) for the understanding call (image-via-provider / PDF / audio / video). Default ~4096; clamped to a sane window. Ignored for the local text-extraction path.",
462
+ minimum: 64,
463
+ })),
464
+ kind: Type.Optional(Type.Union([
465
+ Type.Literal("image"),
466
+ Type.Literal("pdf"),
467
+ Type.Literal("docx"),
468
+ Type.Literal("pptx"),
469
+ Type.Literal("xlsx"),
470
+ Type.Literal("html"),
471
+ Type.Literal("video"),
472
+ Type.Literal("audio"),
473
+ Type.Literal("text"),
474
+ // OpenDocument + e-book + rich-text + notebook — `detectKind` already
475
+ // routes these, so the override must accept them too (lets the model
476
+ // force e.g. kind:"epub" to rescue a mis-detected / extension-less file).
477
+ Type.Literal("odt"),
478
+ Type.Literal("ods"),
479
+ Type.Literal("odp"),
480
+ Type.Literal("epub"),
481
+ Type.Literal("rtf"),
482
+ Type.Literal("ipynb"),
483
+ ], {
484
+ description: "Optional override of the auto-detected kind (use when the extension/MIME is wrong or missing). Use \"audio\" for a voice note whose extension is ambiguous (e.g. .ogg/.webm); \"text\" to force plain/structured-text reading; odt/ods/odp/epub/rtf/ipynb to force an OpenDocument / e-book / rich-text / notebook read.",
485
+ })),
486
+ });
487
+ /**
488
+ * Decide whether the current model can consume an IMAGE block. When
489
+ * `imageInput` is set explicitly we trust it. Otherwise we infer from the
490
+ * provider/model id with a conservative, self-contained heuristic (no heavy
491
+ * model-resolution on the hot path): the major multimodal families return
492
+ * true; a small set of known text-only model-id markers return false; unknown
493
+ * → undefined ("assume yes, note it").
494
+ */
495
+ export function modelLikelySeesImages(ctx) {
496
+ if (!ctx)
497
+ return undefined;
498
+ if (typeof ctx.imageInput === "boolean")
499
+ return ctx.imageInput;
500
+ const id = (ctx.modelId ?? "").toLowerCase();
501
+ if (!id)
502
+ return undefined;
503
+ // Known text-only / no-vision markers — be explicit, return false.
504
+ if (/\b(text-only|no-?vision)\b/.test(id))
505
+ return false;
506
+ if (/(^|[/-])(o1-mini|o3-mini)([-/]|$)/.test(id))
507
+ return false;
508
+ if (/(^|[/-])gpt-3\.5/.test(id))
509
+ return false;
510
+ // Major multimodal families — vision-capable.
511
+ if (/(claude|gpt-4|gpt-5|gemini|llava|pixtral|qwen.*vl|grok-(?:2|3|4)|gpt-4o)/.test(id)) {
512
+ return true;
513
+ }
514
+ // Unknown — caller decides; we report uncertainty.
515
+ return undefined;
516
+ }
517
+ /** Roots a local source path is allowed to live under (workspace, cwd, OS cache/temp, state dir). */
518
+ function allowedLocalRoots(opts) {
519
+ const roots = new Set();
520
+ const add = (p) => {
521
+ if (!p)
522
+ return;
523
+ try {
524
+ roots.add(path.resolve(p));
525
+ }
526
+ catch {
527
+ /* ignore */
528
+ }
529
+ };
530
+ add(opts.workspaceDir);
531
+ add(opts.cwd);
532
+ add(resolveCacheDir());
533
+ add(process.env.TMPDIR || process.env.TEMP || process.env.TMP || "");
534
+ try {
535
+ add(os.tmpdir());
536
+ }
537
+ catch {
538
+ /* ignore */
539
+ }
540
+ // The state dir's media/cache subtree is where inbound attachments + generated
541
+ // media land in FILESYSTEM mode; allow it so the model can analyze a file it
542
+ // just received.
543
+ try {
544
+ add(path.join(resolveStateDir(), "channels"));
545
+ add(path.join(resolveStateDir(), "cache"));
546
+ add(path.join(resolveStateDir(), "captures"));
547
+ add(path.join(resolveStateDir(), "workspace"));
548
+ }
549
+ catch {
550
+ /* ignore */
551
+ }
552
+ // In CONVEX mode inbound channel media relocates OUT of ~/.brigade to the OS
553
+ // cache dir (the channel media resolvers write to
554
+ // `resolveOsCacheDir()/channels/<id>/...` — see channels/whatsapp/media.ts;
555
+ // other channels mirror this). BlueBubbles writes inbound media to
556
+ // `resolveOsCacheDir()/bluebubbles/<acct>/inbound-media` in BOTH modes
557
+ // (connection.ts). Without these roots, a perfectly valid "analyze the photo
558
+ // I just sent" fails in convex mode. `resolveCacheDir()` already returns the
559
+ // OS cache root in convex mode, but adding `resolveOsCacheDir()` (+ the two
560
+ // channel subtrees) explicitly covers filesystem-mode BlueBubbles and any
561
+ // pre-context window where the mode peek hasn't settled. The media-path guard
562
+ // (`validateOutboundMediaPath`) still independently refuses secrets / system
563
+ // files / credential dirs, so widening to the machine-local cache is safe.
564
+ try {
565
+ const osCache = resolveOsCacheDir();
566
+ add(osCache);
567
+ add(path.join(osCache, "channels"));
568
+ add(path.join(osCache, "bluebubbles"));
569
+ }
570
+ catch {
571
+ /* ignore */
572
+ }
573
+ // macOS Messages Attachments root. The NATIVE iMessage adapter surfaces the
574
+ // bridge's on-disk attachment path AS-IS (it does NOT copy bytes into a cache
575
+ // dir the way BlueBubbles does — see channels/imessage/media.ts
576
+ // resolveInboundAttachments), so an inbound iMessage PDF/doc lives under
577
+ // `~/Library/Messages/Attachments/...`. Without this root, "analyze the PDF I
578
+ // just iMessaged" throws "outside the allowed roots" (images still auto-see via
579
+ // inline base64). The iMessage adapter already constrains inbound paths to its
580
+ // own attachmentRoots allow-list, and `validateOutboundMediaPath` still refuses
581
+ // secrets/system files independently, so admitting this read-only Apple data
582
+ // dir is safe. (A REMOTE iMessage bridge SCP-copies bytes into an OS temp dir,
583
+ // already covered above.) Only meaningful on macOS; harmless elsewhere (the
584
+ // path simply never exists).
585
+ try {
586
+ add(path.join(os.homedir(), "Library", "Messages", "Attachments"));
587
+ }
588
+ catch {
589
+ /* ignore */
590
+ }
591
+ // OWNER local turns only (TUI / desktop / the operator's own channel messages):
592
+ // the operator routinely references a file by an absolute path that lives in a
593
+ // personal directory — Downloads / Desktop / Documents — NOT under the workspace
594
+ // or cwd. Without this an "analyze C:\Users\me\Downloads\report.pdf" from the
595
+ // owner is refused as "outside the allowed roots", which is the wrong default
596
+ // for the trusted operator on their own machine. We widen to the operator's home
597
+ // dir (covers Downloads/Desktop/Documents and anything they point at). This is
598
+ // gated on `ownerLocalAccess` so an UNTRUSTED remote channel sender can NEVER
599
+ // make Brigade read the operator's home — a remote turn threads senderIsOwner:
600
+ // false → this stays off. `validateOutboundMediaPath` still independently refuses
601
+ // secrets / credential dirs / system files even for the owner, so the home dir's
602
+ // ~/.ssh, ~/.aws, .env, brigade.json, etc. remain denied.
603
+ if (opts.ownerLocalAccess) {
604
+ try {
605
+ add(os.homedir());
606
+ }
607
+ catch {
608
+ /* ignore */
609
+ }
610
+ }
611
+ return [...roots].filter((r) => r.length > 0);
612
+ }
613
+ /** True when `resolved` is inside one of `roots` (path.relative containment, no `..`). */
614
+ function isInsideAnyRoot(resolved, roots) {
615
+ for (const root of roots) {
616
+ const rel = path.relative(root, resolved);
617
+ if (rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel)))
618
+ return true;
619
+ }
620
+ return false;
621
+ }
622
+ /**
623
+ * Read a LOCAL file with the same safety posture as `read` / outbound media:
624
+ * 1. media-path guard (refuse secrets / system files / credential dirs).
625
+ * 2. allowed-root scoping (must be under workspace / cwd / cache / temp /
626
+ * state media subtree) — refuses arbitrary absolute reads outside roots.
627
+ * Symlinks are resolved first (the guards do this too) so a benign name can't
628
+ * smuggle a denied target.
629
+ */
630
+ async function acquireLocalBytes(source, opts) {
631
+ const verdict = validateOutboundMediaPath(source);
632
+ if (!verdict.ok) {
633
+ throw new BrigadeToolInputError(verdict.reason ?? "refusing to read that path");
634
+ }
635
+ let resolved;
636
+ try {
637
+ resolved = fs.realpathSync(path.resolve(source));
638
+ }
639
+ catch {
640
+ resolved = path.resolve(source);
641
+ }
642
+ const roots = allowedLocalRoots(opts);
643
+ if (!isInsideAnyRoot(resolved, roots)) {
644
+ throw new BrigadeToolInputError("refusing to read a path outside the allowed roots (workspace / current dir / cache / temp). " +
645
+ "Move the file into the workspace, or pass a URL.");
646
+ }
647
+ let stat;
648
+ try {
649
+ stat = await fsp.stat(resolved);
650
+ }
651
+ catch {
652
+ throw new BrigadeToolInputError(`file not found: ${source}`);
653
+ }
654
+ if (!stat.isFile())
655
+ throw new BrigadeToolInputError(`not a file: ${source}`);
656
+ if (stat.size === 0)
657
+ throw new BrigadeToolInputError(`file is empty: ${source}`);
658
+ const full = await fsp.readFile(resolved);
659
+ const truncated = full.length > opts.maxBytes;
660
+ const bytes = truncated ? full.subarray(0, opts.maxBytes) : full;
661
+ return { bytes, truncated };
662
+ }
663
+ /**
664
+ * Fetch a URL through the SSRF guard with size + timeout caps. Reads the body
665
+ * in bounded chunks so a giant response can't blow memory.
666
+ */
667
+ async function acquireUrlBytes(source, opts) {
668
+ const { response, finalUrl } = await guardedFetch(source, {
669
+ method: "GET",
670
+ headers: {
671
+ accept: "*/*",
672
+ "user-agent": "Mozilla/5.0 (compatible; Brigade/1.0; +https://brigade.spinabot.com)",
673
+ },
674
+ timeoutMs: FETCH_TIMEOUT_MS,
675
+ ...(opts.signal ? { signal: opts.signal } : {}),
676
+ });
677
+ void finalUrl;
678
+ if (response.status >= 400) {
679
+ throw new BrigadeToolInputError(`fetch failed: HTTP ${response.status} for ${source}`);
680
+ }
681
+ const mime = response.headers.get("content-type") ?? undefined;
682
+ const bytes = await readBodyCapped(response, opts.maxBytes);
683
+ return { bytes: bytes.buf, mime, truncated: bytes.truncated };
684
+ }
685
+ /** Stream a Response body into a Buffer, stopping at `maxBytes`. */
686
+ async function readBodyCapped(response, maxBytes) {
687
+ if (!response.body) {
688
+ const ab = await response.arrayBuffer();
689
+ const full = Buffer.from(ab);
690
+ const truncated = full.length > maxBytes;
691
+ return { buf: truncated ? full.subarray(0, maxBytes) : full, truncated };
692
+ }
693
+ const reader = response.body.getReader();
694
+ const chunks = [];
695
+ let total = 0;
696
+ let truncated = false;
697
+ for (;;) {
698
+ const { done, value } = await reader.read();
699
+ if (done)
700
+ break;
701
+ if (!value)
702
+ continue;
703
+ const chunk = Buffer.from(value);
704
+ if (total + chunk.length > maxBytes) {
705
+ chunks.push(chunk.subarray(0, maxBytes - total));
706
+ truncated = true;
707
+ try {
708
+ await reader.cancel();
709
+ }
710
+ catch {
711
+ /* ignore */
712
+ }
713
+ break;
714
+ }
715
+ chunks.push(chunk);
716
+ total += chunk.length;
717
+ }
718
+ return { buf: Buffer.concat(chunks), truncated };
719
+ }
720
+ /* ─────────────────────────── page-range parsing ─────────────────────────── */
721
+ /**
722
+ * Parse a 1-indexed page/slide range like "1-5", "3", "2-" into a predicate
723
+ * over 1-indexed page numbers. Invalid input → accept all (best-effort, never
724
+ * throws). Exported for tests.
725
+ */
726
+ export function parsePageRange(spec, total) {
727
+ if (!spec || !spec.trim())
728
+ return () => true;
729
+ const s = spec.trim();
730
+ const m = /^(\d+)?\s*-\s*(\d+)?$/.exec(s);
731
+ if (m) {
732
+ const lo = m[1] ? Math.max(1, parseInt(m[1], 10)) : 1;
733
+ const hi = m[2] ? Math.min(total, parseInt(m[2], 10)) : total;
734
+ return (n) => n >= lo && n <= hi;
735
+ }
736
+ const single = /^\d+$/.test(s) ? parseInt(s, 10) : NaN;
737
+ if (Number.isFinite(single))
738
+ return (n) => n === single;
739
+ return () => true;
740
+ }
741
+ /* ─────────────────────────── XML text helpers (docx/pptx/xlsx) ─────────────────────────── */
742
+ /** Decode the 5 predefined XML entities. */
743
+ function decodeXmlEntities(s) {
744
+ return s
745
+ .replace(/&lt;/g, "<")
746
+ .replace(/&gt;/g, ">")
747
+ .replace(/&quot;/g, '"')
748
+ .replace(/&apos;/g, "'")
749
+ .replace(/&#x([0-9a-fA-F]+);/g, (_m, h) => safeCodePoint(parseInt(h, 16)))
750
+ .replace(/&#(\d+);/g, (_m, d) => safeCodePoint(parseInt(d, 10)))
751
+ .replace(/&amp;/g, "&"); // amp LAST so we don't double-decode
752
+ }
753
+ function safeCodePoint(code) {
754
+ return Number.isFinite(code) && code >= 0 && code <= 0x10ffff ? String.fromCodePoint(code) : "";
755
+ }
756
+ /**
757
+ * Pull text from OOXML `<a:t>` / `<w:t>` / `<t>` run elements in document
758
+ * order. Works for Word (`w:t`), PowerPoint (`a:t`), and Excel shared strings
759
+ * (`t`). Paragraph/row boundaries (`</w:p>`, `</a:p>`, `</tr>`) become
760
+ * newlines so the text stays readable.
761
+ */
762
+ function ooxmlRunsToText(xml) {
763
+ // Insert newlines at paragraph / line-break / table-row boundaries first.
764
+ const withBreaks = xml
765
+ .replace(/<\/w:p>/g, "\n")
766
+ .replace(/<\/a:p>/g, "\n")
767
+ .replace(/<w:br\s*\/?>/g, "\n")
768
+ .replace(/<a:br\s*\/?>/g, "\n");
769
+ const out = [];
770
+ // Match <prefix:t ...>text</prefix:t> and bare <t ...>text</t>.
771
+ const re = /<(?:[a-zA-Z]+:)?t(?:\s[^>]*)?>([\s\S]*?)<\/(?:[a-zA-Z]+:)?t>/g;
772
+ let m;
773
+ while ((m = re.exec(withBreaks)) !== null) {
774
+ out.push(decodeXmlEntities(m[1] ?? ""));
775
+ }
776
+ return out.join("");
777
+ }
778
+ /** Lazy fflate import — keeps the unzip cost off the cold-start path. */
779
+ async function unzipEntries(bytes) {
780
+ const { unzipSync } = await import("fflate");
781
+ try {
782
+ return unzipSync(new Uint8Array(bytes));
783
+ }
784
+ catch {
785
+ // fflate throws "invalid zip data" on a corrupt / non-OOXML file.
786
+ // Convert to a clean tool-input error so the model sees a usable
787
+ // message instead of a raw library throw.
788
+ throw new BrigadeToolInputError("could not read the file as an Office document (corrupt, password-protected, or not a real .docx/.pptx/.xlsx)");
789
+ }
790
+ }
791
+ async function entryText(entries, name) {
792
+ const u8 = entries[name];
793
+ if (!u8)
794
+ return undefined;
795
+ const { strFromU8 } = await import("fflate");
796
+ return strFromU8(u8);
797
+ }
798
+ /* ─────────────────────────── per-format extractors ─────────────────────────── */
799
+ async function extractDocx(bytes) {
800
+ const entries = await unzipEntries(bytes);
801
+ const doc = await entryText(entries, "word/document.xml");
802
+ if (!doc)
803
+ throw new BrigadeToolInputError("not a valid .docx (missing word/document.xml)");
804
+ const text = ooxmlRunsToText(doc).replace(/\n{3,}/g, "\n\n").trim();
805
+ if (!text)
806
+ throw new BrigadeToolInputError("no extractable text in the .docx");
807
+ return text;
808
+ }
809
+ async function extractPptx(bytes, pages) {
810
+ const entries = await unzipEntries(bytes);
811
+ // slide files are ppt/slides/slideN.xml — order by N.
812
+ const slideNames = Object.keys(entries)
813
+ .filter((n) => /^ppt\/slides\/slide\d+\.xml$/.test(n))
814
+ .sort((a, b) => slideNum(a) - slideNum(b));
815
+ if (slideNames.length === 0)
816
+ throw new BrigadeToolInputError("not a valid .pptx (no slides found)");
817
+ const inRange = parsePageRange(pages, slideNames.length);
818
+ const parts = [];
819
+ for (let i = 0; i < slideNames.length; i++) {
820
+ const num = i + 1;
821
+ if (!inRange(num))
822
+ continue;
823
+ const xml = await entryText(entries, slideNames[i]);
824
+ const text = xml ? ooxmlRunsToText(xml).replace(/\n{3,}/g, "\n\n").trim() : "";
825
+ parts.push(`--- Slide ${num} ---\n${text}`);
826
+ }
827
+ const joined = parts.join("\n\n").trim();
828
+ if (!joined)
829
+ throw new BrigadeToolInputError("no extractable text in the .pptx");
830
+ return joined;
831
+ }
832
+ function slideNum(name) {
833
+ const m = /slide(\d+)\.xml$/.exec(name);
834
+ return m ? parseInt(m[1], 10) : 0;
835
+ }
836
+ async function extractXlsx(bytes) {
837
+ const entries = await unzipEntries(bytes);
838
+ // Shared strings table — cells reference into it by index.
839
+ const sharedXml = await entryText(entries, "xl/sharedStrings.xml");
840
+ const shared = [];
841
+ if (sharedXml) {
842
+ // Each <si> is one shared string; it may contain multiple <t> runs.
843
+ const siRe = /<si\b[^>]*>([\s\S]*?)<\/si>/g;
844
+ let m;
845
+ while ((m = siRe.exec(sharedXml)) !== null) {
846
+ shared.push(ooxmlRunsToText(m[1] ?? ""));
847
+ }
848
+ }
849
+ const sheetNames = Object.keys(entries)
850
+ .filter((n) => /^xl\/worksheets\/sheet\d+\.xml$/.test(n))
851
+ .sort((a, b) => sheetNum(a) - sheetNum(b));
852
+ if (sheetNames.length === 0)
853
+ throw new BrigadeToolInputError("not a valid .xlsx (no worksheets found)");
854
+ const out = [];
855
+ for (let i = 0; i < sheetNames.length; i++) {
856
+ const xml = await entryText(entries, sheetNames[i]);
857
+ if (!xml)
858
+ continue;
859
+ out.push(`--- Sheet ${i + 1} ---`);
860
+ out.push(sheetXmlToCsv(xml, shared));
861
+ }
862
+ const joined = out.join("\n").replace(/\n{3,}/g, "\n\n").trim();
863
+ if (!joined)
864
+ throw new BrigadeToolInputError("no extractable data in the .xlsx");
865
+ return joined;
866
+ }
867
+ function sheetNum(name) {
868
+ const m = /sheet(\d+)\.xml$/.exec(name);
869
+ return m ? parseInt(m[1], 10) : 0;
870
+ }
871
+ /**
872
+ * Turn a worksheet XML into CSV-ish rows. Each `<row>` becomes a line; each
873
+ * `<c>` cell is resolved — `t="s"` cells index into the shared-string table,
874
+ * inline / numeric cells use their `<v>` (or inline `<t>`). Best-effort: cells
875
+ * are emitted in document order separated by commas (column gaps are not
876
+ * reconstructed — text fidelity over grid fidelity, which is what the model
877
+ * needs to reason about the content).
878
+ */
879
+ function sheetXmlToCsv(xml, shared) {
880
+ const rows = [];
881
+ const rowRe = /<row\b[^>]*>([\s\S]*?)<\/row>/g;
882
+ let rm;
883
+ while ((rm = rowRe.exec(xml)) !== null) {
884
+ const rowXml = rm[1] ?? "";
885
+ const cells = [];
886
+ const cellRe = /<c\b([^>]*)>([\s\S]*?)<\/c>|<c\b([^>]*)\/>/g;
887
+ let cm;
888
+ while ((cm = cellRe.exec(rowXml)) !== null) {
889
+ const attrs = cm[1] ?? cm[3] ?? "";
890
+ const inner = cm[2] ?? "";
891
+ const isShared = /\bt="s"/.test(attrs);
892
+ const vMatch = /<v\b[^>]*>([\s\S]*?)<\/v>/.exec(inner);
893
+ const inlineT = /<t\b[^>]*>([\s\S]*?)<\/t>/.exec(inner);
894
+ let value = "";
895
+ if (isShared && vMatch) {
896
+ const idx = parseInt(vMatch[1] ?? "", 10);
897
+ value = Number.isFinite(idx) ? shared[idx] ?? "" : "";
898
+ }
899
+ else if (inlineT) {
900
+ value = decodeXmlEntities(inlineT[1] ?? "");
901
+ }
902
+ else if (vMatch) {
903
+ value = decodeXmlEntities(vMatch[1] ?? "");
904
+ }
905
+ // CSV-escape: wrap in quotes when it contains a comma / quote / newline.
906
+ if (/[",\n]/.test(value))
907
+ value = `"${value.replace(/"/g, '""')}"`;
908
+ cells.push(value);
909
+ }
910
+ rows.push(cells.join(","));
911
+ }
912
+ return rows.join("\n");
913
+ }
914
+ /** PDF → per-page text via unpdf (zero native deps). Honors `pages`. */
915
+ async function extractPdf(bytes, pages) {
916
+ const { getDocumentProxy, extractText } = await import("unpdf");
917
+ let pdf;
918
+ try {
919
+ pdf = await getDocumentProxy(new Uint8Array(bytes));
920
+ }
921
+ catch {
922
+ throw new BrigadeToolInputError("could not parse the PDF (corrupt or password-protected?)");
923
+ }
924
+ const { totalPages, text } = await extractText(pdf, { mergePages: false });
925
+ const perPage = Array.isArray(text) ? text : [String(text)];
926
+ const inRange = parsePageRange(pages, totalPages);
927
+ const parts = [];
928
+ for (let i = 0; i < perPage.length; i++) {
929
+ const num = i + 1;
930
+ if (!inRange(num))
931
+ continue;
932
+ const t = (perPage[i] ?? "").trim();
933
+ parts.push(`--- Page ${num} ---\n${t}`);
934
+ }
935
+ const joined = parts.join("\n\n").trim();
936
+ return { text: joined, totalPages };
937
+ }
938
+ /** HTML bytes → markdown via the shared readability extractor (with regex fallback). */
939
+ async function extractHtml(bytes, baseUrl) {
940
+ const html = bytes.toString("utf8");
941
+ const readable = await extractReadableContent(html, baseUrl).catch(() => null);
942
+ const extracted = readable ?? extractBasicHtmlContent(html);
943
+ const { text } = composeFetchBody(extracted, {
944
+ extractMode: "markdown",
945
+ maxChars: DEFAULT_MAX_CHARS,
946
+ });
947
+ return text;
948
+ }
949
+ /* ── extra document formats (ODF / EPUB / RTF / IPYNB) — broader than rivals ── */
950
+ /**
951
+ * Pull text from OpenDocument XML (`content.xml`). ODF uses `<text:p>` /
952
+ * `<text:h>` paragraphs, `<text:span>` runs, and `<text:line-break/>` /
953
+ * `<text:tab/>`; spreadsheets use `<table:table-cell>` / `<table:table-row>`.
954
+ * Strategy mirrors `ooxmlRunsToText`: insert newlines at block boundaries, then
955
+ * strip remaining tags and decode entities.
956
+ */
957
+ function odfXmlToText(xml) {
958
+ const withBreaks = xml
959
+ .replace(/<text:line-break\s*\/?>/g, "\n")
960
+ .replace(/<text:tab\s*\/?>/g, "\t")
961
+ .replace(/<\/text:p>/g, "\n")
962
+ .replace(/<\/text:h>/g, "\n")
963
+ .replace(/<\/table:table-row>/g, "\n")
964
+ .replace(/<\/table:table-cell>/g, "\t");
965
+ // Drop every remaining tag, then decode the 5 predefined XML entities.
966
+ const stripped = withBreaks.replace(/<[^>]+>/g, "");
967
+ return decodeXmlEntities(stripped)
968
+ .replace(/[ \t]+\n/g, "\n")
969
+ .replace(/\n{3,}/g, "\n\n")
970
+ .trim();
971
+ }
972
+ /** OpenDocument (odt/ods/odp) → text from `content.xml`. */
973
+ async function extractOpenDocument(bytes, kind) {
974
+ const entries = await unzipEntries(bytes);
975
+ const content = await entryText(entries, "content.xml");
976
+ if (!content)
977
+ throw new BrigadeToolInputError(`not a valid .${kind} (missing content.xml — corrupt or not an OpenDocument file)`);
978
+ const text = odfXmlToText(content);
979
+ if (!text)
980
+ throw new BrigadeToolInputError(`no extractable text in the .${kind}`);
981
+ return text;
982
+ }
983
+ /**
984
+ * EPUB → concatenated readable text. An EPUB is a zip of XHTML "chapters"; we
985
+ * read them in spine order (from the OPF manifest) when resolvable, else fall
986
+ * back to every `.x?html` entry sorted by name. Each chapter's markup is run
987
+ * through the basic HTML extractor so only the readable text survives.
988
+ */
989
+ async function extractEpub(bytes) {
990
+ const entries = await unzipEntries(bytes);
991
+ const names = Object.keys(entries);
992
+ // Resolve spine order via the OPF (content.opf) when present.
993
+ const opfName = names.find((n) => /\.opf$/i.test(n));
994
+ let ordered = [];
995
+ if (opfName) {
996
+ const opf = (await entryText(entries, opfName)) ?? "";
997
+ const opfDir = opfName.includes("/") ? opfName.slice(0, opfName.lastIndexOf("/") + 1) : "";
998
+ // manifest: id → href
999
+ const idToHref = new Map();
1000
+ const itemRe = /<item\b[^>]*\bid="([^"]+)"[^>]*\bhref="([^"]+)"[^>]*\/?>/g;
1001
+ let im;
1002
+ while ((im = itemRe.exec(opf)) !== null) {
1003
+ idToHref.set(im[1], im[2]);
1004
+ }
1005
+ // also handle href-before-id ordering
1006
+ const itemRe2 = /<item\b[^>]*\bhref="([^"]+)"[^>]*\bid="([^"]+)"[^>]*\/?>/g;
1007
+ while ((im = itemRe2.exec(opf)) !== null) {
1008
+ if (!idToHref.has(im[2]))
1009
+ idToHref.set(im[2], im[1]);
1010
+ }
1011
+ const spineRe = /<itemref\b[^>]*\bidref="([^"]+)"/g;
1012
+ let sm;
1013
+ while ((sm = spineRe.exec(opf)) !== null) {
1014
+ const href = idToHref.get(sm[1]);
1015
+ if (href) {
1016
+ const full = decodeURIComponent(opfDir + href).replace(/^\.\//, "");
1017
+ if (entries[full])
1018
+ ordered.push(full);
1019
+ }
1020
+ }
1021
+ }
1022
+ if (ordered.length === 0) {
1023
+ ordered = names.filter((n) => /\.x?html?$/i.test(n)).sort();
1024
+ }
1025
+ const parts = [];
1026
+ for (const name of ordered) {
1027
+ const html = (await entryText(entries, name)) ?? "";
1028
+ if (!html.trim())
1029
+ continue;
1030
+ const extracted = extractBasicHtmlContent(html);
1031
+ const { text } = composeFetchBody(extracted, { extractMode: "markdown", maxChars: DEFAULT_MAX_CHARS });
1032
+ if (text.trim())
1033
+ parts.push(text.trim());
1034
+ if (parts.join("\n\n").length > DEFAULT_MAX_CHARS)
1035
+ break; // bound the work
1036
+ }
1037
+ const joined = parts.join("\n\n").trim();
1038
+ if (!joined)
1039
+ throw new BrigadeToolInputError("no extractable text in the .epub");
1040
+ return joined;
1041
+ }
1042
+ /**
1043
+ * RTF → plain text. A small control-word stripper: drops `{\\*\\...}` groups
1044
+ * (fonts/colour tables/pictures), decodes `\\'hh` hex + `\\uN` unicode escapes,
1045
+ * maps `\\par`/`\\line`/`\\tab` to whitespace, and removes the remaining
1046
+ * `\\control` words and group braces. Best-effort — fidelity is text, not layout.
1047
+ */
1048
+ function extractRtf(bytes) {
1049
+ let rtf = bytes.toString("latin1");
1050
+ if (!/^\s*{\\rtf/i.test(rtf)) {
1051
+ throw new BrigadeToolInputError("not a valid .rtf (missing the {\\rtf header)");
1052
+ }
1053
+ // Remove destination groups that carry no body text (font/colour/info/pict…).
1054
+ rtf = rtf.replace(/\{\\\*?\\(?:fonttbl|colortbl|stylesheet|info|pict|object|themedata|colorschememapping|latentstyles|datastore|generator)[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/gi, " ");
1055
+ // Line / paragraph / tab control words → whitespace.
1056
+ rtf = rtf.replace(/\\par[d]?\b/g, "\n").replace(/\\line\b/g, "\n").replace(/\\tab\b/g, "\t");
1057
+ // Hex escapes \'hh → the byte (latin1).
1058
+ rtf = rtf.replace(/\\'([0-9a-fA-F]{2})/g, (_m, h) => {
1059
+ const code = parseInt(h, 16);
1060
+ return Number.isFinite(code) ? String.fromCharCode(code) : "";
1061
+ });
1062
+ // Unicode escapes \uNNNN (followed by a fallback char we drop).
1063
+ rtf = rtf.replace(/\\u(-?\d+)\??/g, (_m, n) => {
1064
+ let code = parseInt(n, 10);
1065
+ if (code < 0)
1066
+ code += 65536; // RTF emits negative for >32767
1067
+ return Number.isFinite(code) ? String.fromCodePoint(code) : "";
1068
+ });
1069
+ // Escaped literals.
1070
+ rtf = rtf.replace(/\\([{}\\])/g, "$1");
1071
+ // Remaining control words / symbols.
1072
+ rtf = rtf.replace(/\\[a-zA-Z]+-?\d* ?/g, "").replace(/\\[^a-zA-Z]/g, "");
1073
+ // Group braces.
1074
+ rtf = rtf.replace(/[{}]/g, "");
1075
+ return rtf.replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
1076
+ }
1077
+ /**
1078
+ * Jupyter notebook (.ipynb) → text. Walks `cells[]`, joining each cell's
1079
+ * `source` (string or string[]) under a per-cell label, prefixing code cells so
1080
+ * the model knows code from prose. Cell OUTPUTS are skipped (often huge / binary
1081
+ * image data) — only the authored source is returned.
1082
+ */
1083
+ function extractIpynb(bytes) {
1084
+ let nb;
1085
+ try {
1086
+ nb = JSON.parse(bytes.toString("utf8"));
1087
+ }
1088
+ catch {
1089
+ throw new BrigadeToolInputError("not a valid .ipynb (could not parse the notebook JSON)");
1090
+ }
1091
+ const cells = Array.isArray(nb.cells) ? nb.cells : [];
1092
+ if (cells.length === 0)
1093
+ throw new BrigadeToolInputError("the notebook has no cells");
1094
+ const parts = [];
1095
+ let n = 0;
1096
+ for (const cell of cells) {
1097
+ n += 1;
1098
+ const type = typeof cell.cell_type === "string" ? cell.cell_type : "code";
1099
+ const src = Array.isArray(cell.source)
1100
+ ? cell.source.join("")
1101
+ : typeof cell.source === "string"
1102
+ ? cell.source
1103
+ : "";
1104
+ if (!src.trim())
1105
+ continue;
1106
+ if (type === "markdown" || type === "raw") {
1107
+ parts.push(`--- Cell ${n} (${type}) ---\n${src.trim()}`);
1108
+ }
1109
+ else {
1110
+ parts.push(`--- Cell ${n} (code) ---\n\`\`\`\n${src.trim()}\n\`\`\``);
1111
+ }
1112
+ }
1113
+ const joined = parts.join("\n\n").trim();
1114
+ if (!joined)
1115
+ throw new BrigadeToolInputError("no source text found in the notebook cells");
1116
+ return joined;
1117
+ }
1118
+ export function makeAnalyzeMediaTool(opts = {}) {
1119
+ const acquireUrl = opts.acquireUrl ?? acquireUrlBytes;
1120
+ const acquireLocal = opts.acquireLocal ?? acquireLocalBytes;
1121
+ const runUnderstanding = opts.runMediaUnderstanding ?? defaultRunMediaUnderstanding;
1122
+ const downscaleImage = opts.downscaleImage ?? downscaleImageToBudget;
1123
+ // Result cache: ON by default. A test-injected read/write seam overrides the
1124
+ // disk implementation; `resultCache:false` disables it entirely.
1125
+ const cacheEnabled = opts.resultCache !== false;
1126
+ const readCache = opts.readCache ?? readMediaCache;
1127
+ const writeCache = opts.writeCache ?? writeMediaCache;
1128
+ const agentId = opts.agentId ?? DEFAULT_AGENT_ID;
1129
+ // Lazily resolve the media-understanding config (key resolution + per-kind
1130
+ // defaults) from Brigade's credential store the first time it is needed, so
1131
+ // constructing the tool never touches the auth store. A test-injected config
1132
+ // short-circuits this.
1133
+ let muConfig = opts.mediaUnderstandingConfig;
1134
+ const getMuConfig = () => {
1135
+ if (!muConfig)
1136
+ muConfig = buildMediaUnderstandingConfig(agentId);
1137
+ return muConfig;
1138
+ };
1139
+ return {
1140
+ name: "analyze_media",
1141
+ label: "Analyze Media",
1142
+ displaySummary: "analyzing media",
1143
+ // Read capability — NOT owner-only. It reads a file/URL the operator
1144
+ // pointed at and hands content to the model; it never mutates state or
1145
+ // spends. The path guard + SSRF guard are the real safety boundary, and
1146
+ // they run for EVERY sender regardless of owner status.
1147
+ ownerOnly: false,
1148
+ description: [
1149
+ "Understand a local file or URL: images, PDF, DOCX, PPTX, XLSX, ODT/ODS/ODP, EPUB, RTF, Jupyter (.ipynb), HTML, plain/structured text (txt/csv/json/xml/yaml/md/log/source code), audio (voice notes), and video (auto-detected by extension/MIME).",
1150
+ "Pass `source` (a single local path or http(s) URL) — or `sources` (an array) to analyze several at once — and a `question` describing what to analyze.",
1151
+ "Images are shown to a vision model (or, on a text-only model, understood via any configured provider with an image-capable model) and oversize images are DOWNSCALED to fit (never truncated); PDF is read natively when a provider key is configured (scanned PDFs work) else extracted to text; office/e-book/notebook/text files are extracted to text; AUDIO is transcribed/summarized via a Google/Gemini key (with an optional `language` hint); VIDEO is understood via a Google/Gemini key.",
1152
+ "Use `pages` to limit a PDF/PPTX range (e.g. \"1-5\"). Use this instead of bash/curl — it applies the SSRF guard for URLs and the path guard for local files.",
1153
+ ].join(" "),
1154
+ parameters: AnalyzeMediaParams,
1155
+ execute: async (_toolCallId, args, signal) => {
1156
+ // Resolve the source LIST. `sources[]` (new, batch) wins; else the single
1157
+ // `source` (back-compat) becomes a one-element list. De-dupe blanks.
1158
+ const list = (Array.isArray(args.sources) && args.sources.length > 0
1159
+ ? args.sources
1160
+ : args.source
1161
+ ? [args.source]
1162
+ : [])
1163
+ .map((s) => (s ?? "").trim())
1164
+ .filter((s) => s.length > 0);
1165
+ if (list.length === 0)
1166
+ throw new BrigadeToolInputError("source required");
1167
+ // Single source → the exact existing behaviour (one result, image block
1168
+ // or text). Multiple → the batch merge.
1169
+ if (list.length === 1)
1170
+ return analyzeOne(list[0], args, signal);
1171
+ return analyzeBatch(list, args, signal);
1172
+ },
1173
+ };
1174
+ /* ── single-source pipeline (the original per-source path) ── */
1175
+ /** Analyze ONE source end-to-end → a complete tool result (image or text). */
1176
+ async function analyzeOne(source, args, signal) {
1177
+ {
1178
+ const question = (args.question ?? args.prompt ?? "").trim();
1179
+ const isUrl = /^https?:\/\//i.test(source);
1180
+ const sourceType = isUrl ? "url" : "path";
1181
+ // Image blocks are the most token-expensive to ship, so when the
1182
+ // source LOOKS like an image (by extension or explicit kind) apply
1183
+ // the tighter image budget unless the caller raised maxBytes
1184
+ // explicitly. Documents/HTML keep the larger default.
1185
+ const looksImage = (args.kind ? args.kind === "image" : false) ||
1186
+ EXT_KIND[extensionOf(source)] === "image";
1187
+ // The byte BUDGET an image must fit into (downscaled if larger).
1188
+ const imageBudget = clampBytes(args.maxBytes, true);
1189
+ const maxBytes = clampBytes(args.maxBytes, looksImage);
1190
+ // For an image we want the WHOLE file (up to the absolute ceiling) so it
1191
+ // can be DOWNSCALED to a valid image — truncating it mid-stream corrupts
1192
+ // the only copy. So read images at the ceiling and let the image handler
1193
+ // resize to `imageBudget`. Non-image sources keep the existing cap
1194
+ // (a byte prefix is fine for text/doc bytes).
1195
+ const readCap = looksImage ? MAX_BYTES_CEILING : maxBytes;
1196
+ // Acquire bytes (with the right guard for the source type).
1197
+ let acquired;
1198
+ try {
1199
+ acquired = isUrl
1200
+ ? await acquireUrl(source, {
1201
+ maxBytes: readCap,
1202
+ ...(signal ? { signal } : {}),
1203
+ })
1204
+ : await acquireLocal(source, {
1205
+ ...(opts.workspaceDir ? { workspaceDir: opts.workspaceDir } : {}),
1206
+ ...(opts.cwd ? { cwd: opts.cwd } : {}),
1207
+ ...(opts.ownerLocalAccess ? { ownerLocalAccess: true } : {}),
1208
+ maxBytes: readCap,
1209
+ });
1210
+ }
1211
+ catch (err) {
1212
+ if (err instanceof SsrfBlockedError) {
1213
+ throw new BrigadeToolInputError(`refused to fetch the URL: ${err.reason}`);
1214
+ }
1215
+ throw err;
1216
+ }
1217
+ // Detect kind (override → ext → MIME).
1218
+ const kind = detectKind({
1219
+ source,
1220
+ ...(args.kind ? { override: args.kind } : {}),
1221
+ ...(acquired.mime ? { mime: acquired.mime } : {}),
1222
+ });
1223
+ if (!kind) {
1224
+ // Last-resort: an unknown extension/MIME whose bytes decode as UTF-8
1225
+ // text is handled as the `text` kind (structured text / source code /
1226
+ // logs), so a `.toml`/unknown-but-textual file is read rather than
1227
+ // rejected. Binary that is not a known kind stays unsupported.
1228
+ if (looksLikeUtf8Text(acquired.bytes)) {
1229
+ return handleTextPlain({
1230
+ source,
1231
+ sourceType,
1232
+ bytes: acquired.bytes,
1233
+ truncated: acquired.truncated,
1234
+ ...(acquired.mime ? { mime: acquired.mime } : {}),
1235
+ question,
1236
+ });
1237
+ }
1238
+ return failure({
1239
+ source,
1240
+ sourceType,
1241
+ ...(acquired.mime ? { mimeType: acquired.mime } : {}),
1242
+ bytes: acquired.bytes.length,
1243
+ message: "Unsupported or undetectable media type. Supported: image (png/jpg/jpeg/webp/gif/bmp/heic), pdf, docx, pptx, xlsx, html, text (txt/csv/json/xml/md/yaml/log/source), audio, video. " +
1244
+ "Pass an explicit `kind` if the extension/MIME is missing.",
1245
+ });
1246
+ }
1247
+ // Dispatch per kind.
1248
+ switch (kind) {
1249
+ case "image":
1250
+ return handleImage({
1251
+ source,
1252
+ sourceType,
1253
+ bytes: acquired.bytes,
1254
+ truncated: acquired.truncated,
1255
+ mime: acquired.mime,
1256
+ question,
1257
+ imageBudget,
1258
+ modelContext: opts.modelContext,
1259
+ ...(args.provider ? { provider: args.provider } : {}),
1260
+ ...(args.model ? { model: args.model } : {}),
1261
+ ...(args.maxTokens !== undefined ? { maxTokens: args.maxTokens } : {}),
1262
+ ...(signal ? { signal } : {}),
1263
+ });
1264
+ case "video":
1265
+ return handleVideo({
1266
+ source,
1267
+ sourceType,
1268
+ bytes: acquired.bytes,
1269
+ mime: acquired.mime,
1270
+ question,
1271
+ ...(args.provider ? { provider: args.provider } : {}),
1272
+ ...(args.model ? { model: args.model } : {}),
1273
+ ...(args.maxTokens !== undefined ? { maxTokens: args.maxTokens } : {}),
1274
+ ...(signal ? { signal } : {}),
1275
+ });
1276
+ case "audio":
1277
+ return handleAudio({
1278
+ source,
1279
+ sourceType,
1280
+ bytes: acquired.bytes,
1281
+ mime: acquired.mime,
1282
+ question,
1283
+ ...(args.language ? { language: args.language } : {}),
1284
+ ...(args.provider ? { provider: args.provider } : {}),
1285
+ ...(args.model ? { model: args.model } : {}),
1286
+ ...(args.maxTokens !== undefined ? { maxTokens: args.maxTokens } : {}),
1287
+ ...(signal ? { signal } : {}),
1288
+ });
1289
+ case "pdf":
1290
+ return handlePdf({
1291
+ source,
1292
+ sourceType,
1293
+ bytes: acquired.bytes,
1294
+ truncated: acquired.truncated,
1295
+ mime: acquired.mime,
1296
+ question,
1297
+ pages: args.pages,
1298
+ mode: args.mode ?? "auto",
1299
+ ...(args.provider ? { provider: args.provider } : {}),
1300
+ ...(args.model ? { model: args.model } : {}),
1301
+ ...(args.maxTokens !== undefined ? { maxTokens: args.maxTokens } : {}),
1302
+ ...(signal ? { signal } : {}),
1303
+ });
1304
+ case "text":
1305
+ return handleTextPlain({
1306
+ source,
1307
+ sourceType,
1308
+ bytes: acquired.bytes,
1309
+ truncated: acquired.truncated,
1310
+ ...(acquired.mime ? { mime: acquired.mime } : {}),
1311
+ question,
1312
+ });
1313
+ case "docx":
1314
+ case "pptx":
1315
+ case "xlsx":
1316
+ case "html":
1317
+ case "odt":
1318
+ case "ods":
1319
+ case "odp":
1320
+ case "epub":
1321
+ case "rtf":
1322
+ case "ipynb":
1323
+ return handleTextExtract({
1324
+ kind,
1325
+ source,
1326
+ sourceType,
1327
+ bytes: acquired.bytes,
1328
+ truncated: acquired.truncated,
1329
+ mime: acquired.mime,
1330
+ question,
1331
+ pages: args.pages,
1332
+ // Embedded-image surfacing (OOXML only). DEFAULT-ON: when the
1333
+ // caller didn't say otherwise, an Office doc on a vision model
1334
+ // also returns its embedded pictures. `includeImages:false`
1335
+ // opts out (text-only, cheap).
1336
+ includeImages: args.includeImages !== false,
1337
+ modelContext: opts.modelContext,
1338
+ imageBudget,
1339
+ ...(args.provider ? { provider: args.provider } : {}),
1340
+ ...(args.model ? { model: args.model } : {}),
1341
+ ...(args.maxTokens !== undefined ? { maxTokens: args.maxTokens } : {}),
1342
+ ...(signal ? { signal } : {}),
1343
+ });
1344
+ }
1345
+ }
1346
+ }
1347
+ /* ── batch (multi-source) pipeline ── */
1348
+ /**
1349
+ * Analyze MULTIPLE sources in one call. Images are pushed as N image blocks
1350
+ * into a single tool result (Pi tool-result content is an array of blocks);
1351
+ * non-image sources are reduced to their TEXT and concatenated under per-file
1352
+ * labels. Caps: {@link MAX_BATCH_IMAGES} images / {@link MAX_BATCH_DOCS}
1353
+ * non-image sources. The image byte budget is applied PER image (so N images
1354
+ * each get the per-image budget; downscaling keeps each one valid + bounded).
1355
+ * A per-source failure is reported inline (labeled) and never aborts the batch.
1356
+ */
1357
+ async function analyzeBatch(sources, args, signal) {
1358
+ const question = (args.question ?? args.prompt ?? "").trim();
1359
+ // Partition by the cheap up-front signal (explicit kind / extension / —).
1360
+ // MIME-only images in a batch are treated as docs/text here (we don't pre-
1361
+ // fetch to classify); that's an acceptable edge for the batch path.
1362
+ const imageSources = [];
1363
+ const otherSources = [];
1364
+ for (const s of sources) {
1365
+ const k = args.kind ?? EXT_KIND[extensionOf(s)];
1366
+ if (k === "image")
1367
+ imageSources.push(s);
1368
+ else
1369
+ otherSources.push(s);
1370
+ }
1371
+ const cappedImages = imageSources.slice(0, MAX_BATCH_IMAGES);
1372
+ const cappedOthers = otherSources.slice(0, MAX_BATCH_DOCS);
1373
+ const overflow = [];
1374
+ if (imageSources.length > MAX_BATCH_IMAGES)
1375
+ overflow.push(`${imageSources.length - MAX_BATCH_IMAGES} image(s)`);
1376
+ if (otherSources.length > MAX_BATCH_DOCS)
1377
+ overflow.push(`${otherSources.length - MAX_BATCH_DOCS} document(s)`);
1378
+ const content = [];
1379
+ const labelParts = [];
1380
+ let anyOk = false;
1381
+ let imageCount = 0;
1382
+ let textCount = 0;
1383
+ const lead = question
1384
+ ? `Analyze the ${sources.length} attached sources and answer this:\n${question}`
1385
+ : `Analyze the ${sources.length} attached sources and describe / summarize what they contain.`;
1386
+ content.push({ type: "text", text: lead });
1387
+ // Images first → each becomes its own labeled text + image block.
1388
+ for (let i = 0; i < cappedImages.length; i++) {
1389
+ const src = cappedImages[i];
1390
+ const label = `--- Image ${i + 1}: ${basenameOf(src)} ---`;
1391
+ const one = await analyzeOne(src, args, signal);
1392
+ const img = one.content.find((b) => b.type === "image");
1393
+ if (img) {
1394
+ content.push({ type: "text", text: label });
1395
+ content.push(img);
1396
+ imageCount += 1;
1397
+ anyOk = anyOk || one.details.ok;
1398
+ }
1399
+ else {
1400
+ // Text-only model / no key / failure → carry the explanatory text.
1401
+ content.push({ type: "text", text: `${label}\n${firstText(one)}` });
1402
+ }
1403
+ }
1404
+ // Non-image sources → concatenated labeled text extractions.
1405
+ for (let i = 0; i < cappedOthers.length; i++) {
1406
+ const src = cappedOthers[i];
1407
+ const label = `--- File ${i + 1}: ${basenameOf(src)} ---`;
1408
+ const one = await analyzeOne(src, args, signal);
1409
+ content.push({ type: "text", text: `${label}\n${firstText(one)}` });
1410
+ textCount += 1;
1411
+ anyOk = anyOk || one.details.ok;
1412
+ }
1413
+ if (overflow.length > 0) {
1414
+ content.push({
1415
+ type: "text",
1416
+ text: `(Note: ${overflow.join(" and ")} beyond the per-call cap of ${MAX_BATCH_IMAGES} images / ${MAX_BATCH_DOCS} documents were skipped. Split into multiple calls.)`,
1417
+ });
1418
+ }
1419
+ void labelParts;
1420
+ return {
1421
+ content,
1422
+ details: {
1423
+ ok: anyOk,
1424
+ source: sources.join(", "),
1425
+ sourceType: sources.every((s) => /^https?:\/\//i.test(s)) ? "url" : "path",
1426
+ returned: imageCount > 0 ? "image" : textCount > 0 ? "text" : "none",
1427
+ bytes: 0,
1428
+ message: `Batch of ${sources.length} sources: ${imageCount} image block(s), ${textCount} text extraction(s).`,
1429
+ },
1430
+ };
1431
+ }
1432
+ /* ── media-understanding helpers (shared by image/video/pdf provider paths) ── */
1433
+ /**
1434
+ * Run the media-understanding subsystem for `kind` and shape its TEXT into a
1435
+ * tool result. Returns `undefined` when no provider/key is available (so the
1436
+ * caller can fall back), and surfaces provider HTTP failures as a clean
1437
+ * failure result (never a raw throw to the model).
1438
+ */
1439
+ async function understandViaProvider(p) {
1440
+ const cfg = getMuConfig();
1441
+ // Shape a successful provider TEXT into the tool result. Shared by the
1442
+ // cache-HIT and fresh-call paths so they return identically.
1443
+ const buildOk = (text, resolvedProvider, resolvedModel, fromCache) => {
1444
+ const promptText = buildPromptText(p.question, p.kind);
1445
+ // The provider's answer is derived from operator-pointed media but can
1446
+ // still echo injected instructions (a hostile document/video caption),
1447
+ // so wrap it in the untrusted-content envelope like extracted text.
1448
+ const wrapped = wrapWebContent(text, "web_fetch", { includeWarning: true });
1449
+ const notes = [p.note, fromCache ? "cached result" : undefined].filter(Boolean);
1450
+ const lead = notes.length > 0 ? `${promptText}\n\n(${notes.join("; ")})` : promptText;
1451
+ return {
1452
+ ok: true,
1453
+ result: {
1454
+ content: [{ type: "text", text: `${lead}\n\n${wrapped}` }],
1455
+ details: {
1456
+ ok: true,
1457
+ source: p.source,
1458
+ sourceType: p.sourceType,
1459
+ kind: p.kind,
1460
+ mimeType: p.mimeType,
1461
+ bytes: p.bytes.length,
1462
+ returned: "text",
1463
+ provider: resolvedProvider,
1464
+ providerModel: resolvedModel,
1465
+ },
1466
+ },
1467
+ };
1468
+ };
1469
+ // Cache key = content hash + the identity that determines the answer. Use
1470
+ // the REQUEST identity (override provider/model/maxTokens) so a repeat of
1471
+ // the same request hits; the RESOLVED provider/model live in the value.
1472
+ const cacheKey = cacheEnabled
1473
+ ? mediaCacheKey({
1474
+ bytes: p.bytes,
1475
+ question: p.question,
1476
+ provider: p.provider ?? "auto",
1477
+ kind: p.kind,
1478
+ ...(p.model ? { model: p.model } : {}),
1479
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1480
+ })
1481
+ : "";
1482
+ if (cacheEnabled) {
1483
+ const hit = await readCache(cacheKey).catch(() => undefined);
1484
+ if (hit)
1485
+ return buildOk(hit.text, hit.provider, hit.model, true);
1486
+ }
1487
+ try {
1488
+ const res = await runUnderstanding({
1489
+ kind: p.kind,
1490
+ bytes: p.bytes,
1491
+ mimeType: p.mimeType,
1492
+ cfg,
1493
+ ...(p.question ? { prompt: p.question } : {}),
1494
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1495
+ ...(p.provider ? { provider: p.provider } : {}),
1496
+ ...(p.model ? { model: p.model } : {}),
1497
+ ...(p.signal ? { signal: p.signal } : {}),
1498
+ });
1499
+ // Persist for next time (best-effort; never blocks the result).
1500
+ if (cacheEnabled) {
1501
+ const value = { text: res.text, provider: res.provider, model: res.model };
1502
+ void writeCache(cacheKey, value).catch(() => { });
1503
+ }
1504
+ return buildOk(res.text, res.provider, res.model, false);
1505
+ }
1506
+ catch (err) {
1507
+ if (err instanceof MediaUnderstandingUnavailableError) {
1508
+ return { ok: false, unavailable: true, message: err.message };
1509
+ }
1510
+ // Provider HTTP / processing failure — clean failure result.
1511
+ const msg = err instanceof Error ? err.message : String(err);
1512
+ const guidance = p.failureGuidance ? ` ${p.failureGuidance}` : "";
1513
+ return {
1514
+ ok: false,
1515
+ unavailable: false,
1516
+ result: failure({
1517
+ source: p.source,
1518
+ sourceType: p.sourceType,
1519
+ kind: p.kind,
1520
+ mimeType: p.mimeType,
1521
+ bytes: p.bytes.length,
1522
+ message: `Provider media-understanding call failed: ${msg}.${guidance}`,
1523
+ }),
1524
+ };
1525
+ }
1526
+ }
1527
+ /* ── handlers (closures so they share `opts`) ── */
1528
+ async function handleImage(p) {
1529
+ const ext = extensionOf(p.source);
1530
+ let mimeType = (p.mime?.split(";")[0]?.trim() || imageMimeFromExt(ext)).toLowerCase();
1531
+ const isHeic = /heic|heif/.test(mimeType) || ext === "heic" || ext === "heif";
1532
+ const sees = modelLikelySeesImages(p.modelContext);
1533
+ const promptText = buildPromptText(p.question, "image");
1534
+ const warnings = [];
1535
+ // DOWNSCALE (not truncate) an oversize image. Truncating an image
1536
+ // mid-stream produces a broken payload every vision model rejects; instead
1537
+ // we resize it (fit-inside, down a quality grid) + EXIF auto-rotate, so the
1538
+ // model still sees a VALID image under the budget. HEIC/SVG aren't decodable
1539
+ // without a native dep, so they skip this (pass-through + the HEIC warning).
1540
+ let bytes = p.bytes;
1541
+ let imageTruncated = p.truncated;
1542
+ if (!isHeic && isDownscalableImageMime(mimeType)) {
1543
+ const overBudget = bytes.length > p.imageBudget;
1544
+ // Only pay the decode/encode when the image is actually over budget (or
1545
+ // arrived truncated and must be re-validated). A small image is shipped
1546
+ // untouched (lossless).
1547
+ if (overBudget || imageTruncated) {
1548
+ try {
1549
+ const ds = await downscaleImage(bytes, {
1550
+ maxBytes: p.imageBudget,
1551
+ sourceMime: mimeType,
1552
+ });
1553
+ bytes = ds.bytes;
1554
+ mimeType = ds.mimeType;
1555
+ // A successful downscale yields a valid image → clear the truncation
1556
+ // flag (we no longer ship a corrupt prefix).
1557
+ imageTruncated = false;
1558
+ if (ds.resized) {
1559
+ warnings.push(`The image exceeded the byte budget, so it was downscaled to ${ds.width}×${ds.height} (re-encoded as JPEG) to fit — detail may be reduced. Raise \`maxBytes\` for a higher-resolution pass.`);
1560
+ }
1561
+ }
1562
+ catch {
1563
+ // Could not decode (corrupt / unsupported encoding). Keep the
1564
+ // original bytes; the truncation warning below still applies.
1565
+ }
1566
+ }
1567
+ }
1568
+ if (isHeic) {
1569
+ warnings.push("This is a HEIC/HEIF image. Brigade cannot transcode it without a native dependency, so it is passed through as-is — many models reject HEIC. If the model cannot read it, ask the operator to convert it to JPEG/PNG.");
1570
+ }
1571
+ if (sees === false) {
1572
+ // The current model is text-only. Rather than ship a block it will
1573
+ // reject, route the image through a provider that CAN see it (when a
1574
+ // key is configured) and return the resulting text — so vision works
1575
+ // on any model. With no key, fall back to the honest "switch model"
1576
+ // message.
1577
+ //
1578
+ // FIX: a provider-bound image whose MIME the providers DON'T accept
1579
+ // (image/bmp, image/tiff — even when small + under budget, so the
1580
+ // downscale step above didn't fire) would make Anthropic return a 400.
1581
+ // Re-encode it to JPEG first (the downscale grid always emits JPEG and
1582
+ // jimp decodes bmp/tiff). HEIC/SVG aren't decodable → left as-is (the
1583
+ // HEIC warning already covers them).
1584
+ if (!isHeic && !PROVIDER_SAFE_IMAGE_MIME.has(mimeType) && isDownscalableImageMime(mimeType)) {
1585
+ try {
1586
+ const ds = await downscaleImage(bytes, {
1587
+ maxBytes: p.imageBudget,
1588
+ sourceMime: mimeType,
1589
+ });
1590
+ bytes = ds.bytes;
1591
+ mimeType = ds.mimeType; // image/jpeg
1592
+ imageTruncated = false;
1593
+ }
1594
+ catch {
1595
+ // Undecodable — fall through with the original bytes/mime; the
1596
+ // provider may still sniff it, and a 400 surfaces as a clean failure.
1597
+ }
1598
+ }
1599
+ const viaProvider = await understandViaProvider({
1600
+ kind: "image",
1601
+ source: p.source,
1602
+ sourceType: p.sourceType,
1603
+ bytes,
1604
+ mimeType,
1605
+ question: p.question,
1606
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1607
+ ...(p.provider ? { provider: p.provider } : {}),
1608
+ ...(p.model ? { model: p.model } : {}),
1609
+ ...(p.signal ? { signal: p.signal } : {}),
1610
+ note: "The current model is text-only, so the image was understood by a vision-capable provider and the description is below.",
1611
+ // BUG-1: when the current model is text-only AND a provider key exists
1612
+ // BUT the provider HTTP call fails, a bare transport error leaves the
1613
+ // model with no next step. Tell it exactly what unblocks the image.
1614
+ failureGuidance: "To read this image, the turn needs either a vision-capable model (e.g. a Claude / GPT-4o / Gemini model) or a working media-understanding provider key — check the configured key/quota and retry, or switch models.",
1615
+ });
1616
+ if (viaProvider.ok)
1617
+ return viaProvider.result;
1618
+ if (!viaProvider.unavailable)
1619
+ return viaProvider.result; // provider HTTP failure
1620
+ // Unavailable (no key) — be honest.
1621
+ warnings.push("The current model does not appear to accept images, so the image is NOT being attached. Switch to a vision-capable model (e.g. a Claude / GPT-4o / Gemini model), or configure a Google/Anthropic key so Brigade can understand images on any model.");
1622
+ return {
1623
+ content: [{ type: "text", text: `${promptText}\n\n${warnings.join("\n\n")}` }],
1624
+ details: {
1625
+ ok: false,
1626
+ source: p.source,
1627
+ sourceType: p.sourceType,
1628
+ kind: "image",
1629
+ mimeType,
1630
+ bytes: p.bytes.length,
1631
+ returned: "none",
1632
+ warning: warnings.join(" "),
1633
+ },
1634
+ };
1635
+ }
1636
+ if (sees === undefined) {
1637
+ warnings.push("Note: Brigade could not confirm this model is vision-capable. If you cannot see the image, switch to a vision-capable model.");
1638
+ }
1639
+ if (imageTruncated) {
1640
+ // Reached only when the image could NOT be downscaled (undecodable) yet
1641
+ // arrived truncated — the block may be corrupt.
1642
+ warnings.push("The image was truncated at the byte cap and could not be re-encoded, so it may be corrupt — raise `maxBytes` if it does not render.");
1643
+ }
1644
+ const text = warnings.length > 0 ? `${promptText}\n\n${warnings.join("\n\n")}` : promptText;
1645
+ return {
1646
+ // Image block carries raw base64 (NO data: prefix) — Pi's ImageContent
1647
+ // shape. This is the SAME block inbound/history images use, so a
1648
+ // vision model sees it as part of the turn.
1649
+ content: [
1650
+ { type: "text", text },
1651
+ { type: "image", data: bytes.toString("base64"), mimeType },
1652
+ ],
1653
+ details: {
1654
+ ok: true,
1655
+ source: p.source,
1656
+ sourceType: p.sourceType,
1657
+ kind: "image",
1658
+ mimeType,
1659
+ bytes: bytes.length,
1660
+ returned: "image",
1661
+ ...(imageTruncated ? { truncated: true } : {}),
1662
+ ...(warnings.length > 0 ? { warning: warnings.join(" ") } : {}),
1663
+ },
1664
+ };
1665
+ }
1666
+ async function handleVideo(p) {
1667
+ // Pi's content channel can't carry video, so we call a video-capable
1668
+ // provider DIRECTLY (Gemini via the Files API) and return its TEXT.
1669
+ const mimeType = p.mime?.split(";")[0]?.trim().toLowerCase() || videoMimeFromExt(extensionOf(p.source));
1670
+ // Minor (4a): an explicit `provider:"anthropic"` override can't do video —
1671
+ // Anthropic has no video ingestion. Say so crisply instead of letting the
1672
+ // generic "needs a Gemini key" / capable-check message stand in for it.
1673
+ if (p.provider === "anthropic") {
1674
+ const promptText = buildPromptText(p.question, "video");
1675
+ const message = "Anthropic cannot analyze video — it has no video ingestion. Video understanding needs a Google/Gemini key. " +
1676
+ "Drop the `provider` override (or set it to \"google\") and configure a Gemini key.";
1677
+ return {
1678
+ content: [{ type: "text", text: `${promptText}\n\n${message}` }],
1679
+ details: {
1680
+ ok: false,
1681
+ source: p.source,
1682
+ sourceType: p.sourceType,
1683
+ kind: "video",
1684
+ mimeType,
1685
+ bytes: p.bytes.length,
1686
+ returned: "none",
1687
+ message,
1688
+ },
1689
+ };
1690
+ }
1691
+ const viaProvider = await understandViaProvider({
1692
+ kind: "video",
1693
+ source: p.source,
1694
+ sourceType: p.sourceType,
1695
+ bytes: p.bytes,
1696
+ mimeType,
1697
+ question: p.question,
1698
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1699
+ ...(p.provider ? { provider: p.provider } : {}),
1700
+ ...(p.model ? { model: p.model } : {}),
1701
+ ...(p.signal ? { signal: p.signal } : {}),
1702
+ });
1703
+ if (viaProvider.ok)
1704
+ return viaProvider.result;
1705
+ if (!viaProvider.unavailable)
1706
+ return viaProvider.result; // provider HTTP failure
1707
+ // No key configured — clear, actionable message.
1708
+ const promptText = buildPromptText(p.question, "video");
1709
+ return {
1710
+ content: [{ type: "text", text: `${promptText}\n\n${viaProvider.message}` }],
1711
+ details: {
1712
+ ok: false,
1713
+ source: p.source,
1714
+ sourceType: p.sourceType,
1715
+ kind: "video",
1716
+ mimeType,
1717
+ bytes: p.bytes.length,
1718
+ returned: "none",
1719
+ message: viaProvider.message,
1720
+ },
1721
+ };
1722
+ }
1723
+ /**
1724
+ * Audio handler (voice notes + clips). Pi's content channel can't carry
1725
+ * audio (text + image only), so audio understanding is GEMINI-ONLY: we route
1726
+ * to the media-understanding subsystem (Gemini inline audio) and return its
1727
+ * TEXT transcription / summary. With no Google/Gemini key, a clear "configure
1728
+ * a Gemini key" message — never a provider 400 from packing audio into an
1729
+ * image block.
1730
+ */
1731
+ async function handleAudio(p) {
1732
+ const mimeType = p.mime?.split(";")[0]?.trim().toLowerCase() || audioMimeFromExt(extensionOf(p.source));
1733
+ // Fold the language hint (and the question/context) into the provider
1734
+ // prompt — the Gemini generateContent API has no dedicated language field,
1735
+ // so the spoken-language hint rides in the instruction text.
1736
+ const audioPrompt = buildAudioPrompt(p.question, p.language);
1737
+ const viaProvider = await understandViaProvider({
1738
+ kind: "audio",
1739
+ source: p.source,
1740
+ sourceType: p.sourceType,
1741
+ bytes: p.bytes,
1742
+ mimeType,
1743
+ question: audioPrompt,
1744
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1745
+ ...(p.provider ? { provider: p.provider } : {}),
1746
+ ...(p.model ? { model: p.model } : {}),
1747
+ ...(p.signal ? { signal: p.signal } : {}),
1748
+ });
1749
+ if (viaProvider.ok)
1750
+ return viaProvider.result;
1751
+ if (!viaProvider.unavailable)
1752
+ return viaProvider.result; // provider HTTP failure
1753
+ // No capable key — clear, actionable message.
1754
+ const promptText = buildPromptText(p.question, "audio");
1755
+ return {
1756
+ content: [{ type: "text", text: `${promptText}\n\n${viaProvider.message}` }],
1757
+ details: {
1758
+ ok: false,
1759
+ source: p.source,
1760
+ sourceType: p.sourceType,
1761
+ kind: "audio",
1762
+ mimeType,
1763
+ bytes: p.bytes.length,
1764
+ returned: "none",
1765
+ message: viaProvider.message,
1766
+ },
1767
+ };
1768
+ }
1769
+ /**
1770
+ * PDF handler. With an understanding-provider key configured (and `mode` not
1771
+ * forced to "text"), the PDF is sent NATIVELY to the provider (Anthropic
1772
+ * document block — OCRs scanned pages + reads layout; or Gemini inline) and
1773
+ * the provider's TEXT answer is returned. Otherwise — or when `mode:"text"`,
1774
+ * or when the provider call comes back empty/unavailable — it falls back to
1775
+ * the local `unpdf` per-page text extraction (honoring `pages`).
1776
+ */
1777
+ async function handlePdf(p) {
1778
+ // Local text extraction is the fallback (and the forced path for mode:"text").
1779
+ const extractLocally = () => handleTextExtract({
1780
+ kind: "pdf",
1781
+ source: p.source,
1782
+ sourceType: p.sourceType,
1783
+ bytes: p.bytes,
1784
+ truncated: p.truncated,
1785
+ ...(p.mime ? { mime: p.mime } : {}),
1786
+ question: p.question,
1787
+ ...(p.pages ? { pages: p.pages } : {}),
1788
+ });
1789
+ if (p.mode === "text")
1790
+ return extractLocally();
1791
+ const cfg = getMuConfig();
1792
+ // Does any capable provider have a key? (Pure read — no HTTP.)
1793
+ const providerAvailable = p.provider
1794
+ ? Boolean(safeResolveKey(cfg, p.provider))
1795
+ : Boolean(safeResolveKey(cfg, "anthropic")) || Boolean(safeResolveKey(cfg, "google"));
1796
+ if (p.mode === "provider" || providerAvailable) {
1797
+ const viaProvider = await understandViaProvider({
1798
+ kind: "pdf",
1799
+ source: p.source,
1800
+ sourceType: p.sourceType,
1801
+ bytes: p.bytes,
1802
+ mimeType: "application/pdf",
1803
+ question: p.question,
1804
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1805
+ ...(p.provider ? { provider: p.provider } : {}),
1806
+ ...(p.model ? { model: p.model } : {}),
1807
+ ...(p.signal ? { signal: p.signal } : {}),
1808
+ note: "This PDF was read natively by a provider (handles scanned pages + layout)." +
1809
+ (p.pages ? " The `pages` range is not applied on the native path." : ""),
1810
+ });
1811
+ if (viaProvider.ok)
1812
+ return viaProvider.result;
1813
+ // mode:"provider" forces provider — surface the failure/unavailable
1814
+ // rather than silently extracting (the operator asked for native).
1815
+ if (p.mode === "provider") {
1816
+ if (viaProvider.unavailable) {
1817
+ return failure({
1818
+ source: p.source,
1819
+ sourceType: p.sourceType,
1820
+ kind: "pdf",
1821
+ ...(p.mime ? { mimeType: p.mime } : {}),
1822
+ bytes: p.bytes.length,
1823
+ message: viaProvider.message,
1824
+ });
1825
+ }
1826
+ return viaProvider.result;
1827
+ }
1828
+ // auto + provider HTTP failure → fall back to local text extraction.
1829
+ }
1830
+ return extractLocally();
1831
+ }
1832
+ /**
1833
+ * Plain / structured-text handler (txt / csv / tsv / json / xml / yaml / log /
1834
+ * markdown / source code / unknown-but-UTF-8). Decodes the bytes as UTF-8,
1835
+ * wraps them in the untrusted-content envelope (the file is operator-pointed
1836
+ * but can still carry injected instructions), and returns them as text capped
1837
+ * to the char budget. No provider call — this is a pure read, the cheapest
1838
+ * path. Both rival tools accept these formats; Brigade used to reject them.
1839
+ */
1840
+ async function handleTextPlain(p) {
1841
+ // Strip a UTF-8 BOM if present, then decode. `Buffer.toString("utf8")`
1842
+ // replaces invalid sequences with U+FFFD rather than throwing, so even
1843
+ // near-text binary degrades gracefully instead of erroring.
1844
+ let raw = p.bytes.toString("utf8");
1845
+ if (raw.charCodeAt(0) === 0xfeff)
1846
+ raw = raw.slice(1);
1847
+ const rawText = raw.trim();
1848
+ if (!rawText) {
1849
+ return failure({
1850
+ source: p.source,
1851
+ sourceType: p.sourceType,
1852
+ kind: "text",
1853
+ ...(p.mime ? { mimeType: p.mime } : {}),
1854
+ bytes: p.bytes.length,
1855
+ message: "The file is empty or contains no readable text.",
1856
+ });
1857
+ }
1858
+ const { text: clamped, truncated: textTruncated } = truncateText(raw, DEFAULT_MAX_CHARS);
1859
+ const wrapped = wrapWebContent(clamped, "web_fetch", { includeWarning: true });
1860
+ const promptText = buildPromptText(p.question, "text");
1861
+ const truncated = p.truncated || textTruncated;
1862
+ const note = truncated
1863
+ ? "\n\n(Content was truncated to fit the turn — raise `maxBytes` for more.)"
1864
+ : "";
1865
+ return {
1866
+ content: [{ type: "text", text: `${promptText}${note}\n\n${wrapped}` }],
1867
+ details: {
1868
+ ok: true,
1869
+ source: p.source,
1870
+ sourceType: p.sourceType,
1871
+ kind: "text",
1872
+ ...(p.mime ? { mimeType: p.mime } : {}),
1873
+ bytes: p.bytes.length,
1874
+ returned: "text",
1875
+ ...(truncated ? { truncated: true } : {}),
1876
+ },
1877
+ };
1878
+ }
1879
+ async function handleTextExtract(p) {
1880
+ let rawText = "";
1881
+ let totalPages;
1882
+ try {
1883
+ switch (p.kind) {
1884
+ case "pdf": {
1885
+ const r = await extractPdf(p.bytes, p.pages);
1886
+ rawText = r.text;
1887
+ totalPages = r.totalPages;
1888
+ break;
1889
+ }
1890
+ case "docx":
1891
+ rawText = await extractDocx(p.bytes);
1892
+ break;
1893
+ case "pptx":
1894
+ rawText = await extractPptx(p.bytes, p.pages);
1895
+ break;
1896
+ case "xlsx":
1897
+ rawText = await extractXlsx(p.bytes);
1898
+ break;
1899
+ case "html":
1900
+ rawText = await extractHtml(p.bytes, p.sourceType === "url" ? p.source : "about:blank");
1901
+ break;
1902
+ case "odt":
1903
+ case "ods":
1904
+ case "odp":
1905
+ rawText = await extractOpenDocument(p.bytes, p.kind);
1906
+ break;
1907
+ case "epub":
1908
+ rawText = await extractEpub(p.bytes);
1909
+ break;
1910
+ case "rtf":
1911
+ rawText = extractRtf(p.bytes);
1912
+ break;
1913
+ case "ipynb":
1914
+ rawText = extractIpynb(p.bytes);
1915
+ break;
1916
+ }
1917
+ }
1918
+ catch (err) {
1919
+ if (err instanceof BrigadeToolInputError) {
1920
+ return failure({
1921
+ source: p.source,
1922
+ sourceType: p.sourceType,
1923
+ kind: p.kind,
1924
+ ...(p.mime ? { mimeType: p.mime } : {}),
1925
+ bytes: p.bytes.length,
1926
+ message: err.message,
1927
+ });
1928
+ }
1929
+ throw err;
1930
+ }
1931
+ if (!rawText.trim()) {
1932
+ return failure({
1933
+ source: p.source,
1934
+ sourceType: p.sourceType,
1935
+ kind: p.kind,
1936
+ ...(p.mime ? { mimeType: p.mime } : {}),
1937
+ bytes: p.bytes.length,
1938
+ message: p.kind === "pdf"
1939
+ ? "No selectable text found — the PDF may be a scanned image. Image-only PDFs need OCR, which this tool does not perform."
1940
+ : `No extractable text found in the ${p.kind}.`,
1941
+ });
1942
+ }
1943
+ const { text: clamped, truncated: textTruncated } = truncateText(rawText, DEFAULT_MAX_CHARS);
1944
+ // Document text is from a file the operator pointed at, but it can still
1945
+ // carry injected instructions (a hostile PDF/HTML). Wrap it in the
1946
+ // untrusted-content envelope so the model treats it as data, not as
1947
+ // instructions. `web_fetch` is the closest existing envelope source.
1948
+ const wrapped = wrapWebContent(clamped, "web_fetch", { includeWarning: true });
1949
+ const promptText = buildPromptText(p.question, p.kind);
1950
+ const truncated = p.truncated || textTruncated;
1951
+ const notes = [];
1952
+ if (totalPages !== undefined)
1953
+ notes.push(`PDF total pages: ${totalPages}.`);
1954
+ if (p.pages && (p.kind === "pdf" || p.kind === "pptx")) {
1955
+ notes.push(`Limited to ${p.kind === "pdf" ? "pages" : "slides"} "${p.pages}".`);
1956
+ }
1957
+ if (truncated)
1958
+ notes.push("Content was truncated to fit the turn — raise `maxBytes` / narrow `pages` for more.");
1959
+ const noteBlock = notes.length > 0 ? `\n\n(${notes.join(" ")})` : "";
1960
+ // ── Embedded images (OOXML only) ──────────────────────────────────────
1961
+ // For a pptx/docx/xlsx, the substance is often in the PICTURES (wireframes,
1962
+ // screenshots, charts). DEFAULT-ON: also surface the embedded images so the
1963
+ // model SEES them — on a vision model as image blocks alongside the text; on
1964
+ // a text-only model, routed through the understanding provider (or, with no
1965
+ // key, a short note). The agent never needs bash. Honors `pages` (PPTX slide
1966
+ // scope), the count cap + total-byte budget, and skips undecodable embeds.
1967
+ const isOoxml = p.kind === "docx" || p.kind === "pptx" || p.kind === "xlsx";
1968
+ const embedResult = isOoxml && p.includeImages
1969
+ ? await attachEmbeddedImages({
1970
+ kind: p.kind,
1971
+ bytes: p.bytes,
1972
+ pages: p.pages,
1973
+ imageBudget: p.imageBudget ?? DEFAULT_IMAGE_MAX_BYTES,
1974
+ ...(p.modelContext ? { modelContext: p.modelContext } : {}),
1975
+ question: p.question,
1976
+ source: p.source,
1977
+ sourceType: p.sourceType,
1978
+ ...(p.provider ? { provider: p.provider } : {}),
1979
+ ...(p.model ? { model: p.model } : {}),
1980
+ ...(p.maxTokens !== undefined ? { maxTokens: p.maxTokens } : {}),
1981
+ ...(p.signal ? { signal: p.signal } : {}),
1982
+ })
1983
+ : undefined;
1984
+ const content = [
1985
+ { type: "text", text: `${promptText}${noteBlock}\n\n${wrapped}` },
1986
+ ];
1987
+ if (embedResult)
1988
+ content.push(...embedResult.content);
1989
+ return {
1990
+ content,
1991
+ details: {
1992
+ ok: true,
1993
+ source: p.source,
1994
+ sourceType: p.sourceType,
1995
+ kind: p.kind,
1996
+ ...(p.mime ? { mimeType: p.mime } : {}),
1997
+ bytes: p.bytes.length,
1998
+ returned: embedResult && embedResult.imageCount > 0 ? "image" : "text",
1999
+ ...(p.pages ? { pages: p.pages } : {}),
2000
+ ...(truncated ? { truncated: true } : {}),
2001
+ },
2002
+ };
2003
+ }
2004
+ /**
2005
+ * Surface a doc's EMBEDDED images. Unzips the OOXML buffer, maps images to
2006
+ * slides (PPTX) / collects them (DOCX/XLSX) via `extractOoxmlImages`, honoring
2007
+ * `pages` over the PPTX slide order and the per-call count cap. Each selected
2008
+ * image is DOWNSCALED to the per-image budget (small is fine — these are
2009
+ * context), and a running TOTAL-byte budget drops any image that would blow the
2010
+ * turn (reported, not thrown). Then:
2011
+ * • vision model → emits the image blocks (each preceded by a label).
2012
+ * • text-only → routes EACH embed through the understanding provider and
2013
+ * emits the resulting TEXT; with NO provider key, a single "images present,
2014
+ * switch to a vision model" note (NEVER a raw block, NEVER a throw).
2015
+ * Undecodable embeds (`.wdp`/`.emf`/`.wmf`/…) and over-cap images are reported
2016
+ * as short notes. Returns the extra content blocks + how many image blocks were
2017
+ * emitted (so the caller can set `returned`). On ANY failure (corrupt zip,
2018
+ * decode error) it returns `undefined` — the text result already stands; the
2019
+ * embedded images are a best-effort add-on that must never break the text path.
2020
+ */
2021
+ async function attachEmbeddedImages(e) {
2022
+ let entries;
2023
+ try {
2024
+ entries = await unzipEntries(e.bytes);
2025
+ }
2026
+ catch {
2027
+ // Corrupt/locked zip — the text path already reported the real failure
2028
+ // (or succeeded); embedded images are best-effort, so just skip them.
2029
+ return undefined;
2030
+ }
2031
+ // For PPTX, scope the embedded images by the same `pages` slide range the
2032
+ // text honored. DOCX/XLSX have no page→image map, so `inRange` is omitted
2033
+ // (all images, capped). The slide TOTAL drives the range clamp; resolve it
2034
+ // from the presentation order when available, else the slide-file count.
2035
+ let inRange;
2036
+ if (e.kind === "pptx" && e.pages) {
2037
+ const slideTotal = countPptxSlides(entries);
2038
+ inRange = parsePageRange(e.pages, slideTotal);
2039
+ }
2040
+ let extraction;
2041
+ try {
2042
+ extraction = extractOoxmlImages(entries, e.kind, {
2043
+ cap: MAX_EMBEDDED_IMAGES,
2044
+ ...(inRange ? { inRange } : {}),
2045
+ });
2046
+ }
2047
+ catch {
2048
+ return undefined;
2049
+ }
2050
+ if (extraction.images.length === 0 && extraction.skipped === 0)
2051
+ return undefined;
2052
+ const content = [];
2053
+ const sees = modelLikelySeesImages(e.modelContext);
2054
+ // Downscale each selected image to the per-image budget AND enforce the
2055
+ // running total-byte ceiling so a deck full of big pictures can't blow the
2056
+ // turn even under the count cap. Images dropped by the total budget are
2057
+ // reported (like the over-cap ones).
2058
+ const prepared = [];
2059
+ let runningBytes = 0;
2060
+ let droppedForBytes = 0;
2061
+ for (const img of extraction.images) {
2062
+ let bytes = img.bytes;
2063
+ let mime = img.mime;
2064
+ if (isDownscalableImageMime(mime)) {
2065
+ try {
2066
+ const ds = await downscaleImage(bytes, {
2067
+ maxBytes: e.imageBudget,
2068
+ sourceMime: mime,
2069
+ });
2070
+ bytes = ds.bytes;
2071
+ mime = ds.mimeType;
2072
+ }
2073
+ catch {
2074
+ // Undecodable raster (rare for these types) — keep the original bytes;
2075
+ // a vision block may still render and a provider may still sniff it.
2076
+ }
2077
+ }
2078
+ if (runningBytes + bytes.length > EMBEDDED_IMAGES_TOTAL_BYTES && prepared.length > 0) {
2079
+ droppedForBytes += 1;
2080
+ continue;
2081
+ }
2082
+ runningBytes += bytes.length;
2083
+ prepared.push({ ...img, bytes, mime });
2084
+ }
2085
+ // Header note: count, "N of M", skipped, and dropped-for-budget.
2086
+ const emitted = prepared.length;
2087
+ const headerBits = [];
2088
+ if (emitted > 0) {
2089
+ headerBits.push(`${emitted} embedded image${emitted === 1 ? "" : "s"} from this ${e.kind}`);
2090
+ }
2091
+ if (extraction.matched > emitted + droppedForBytes) {
2092
+ headerBits.push(`showing ${emitted} of ${extraction.matched} embedded images — pass \`pages=\` to scope` +
2093
+ (e.kind === "pptx" ? " to specific slides" : ""));
2094
+ }
2095
+ if (droppedForBytes > 0) {
2096
+ headerBits.push(`${droppedForBytes} omitted to stay within the size budget`);
2097
+ }
2098
+ if (extraction.skipped > 0) {
2099
+ headerBits.push(`${extraction.skipped} skipped (unsupported format)`);
2100
+ }
2101
+ if (emitted === 0) {
2102
+ // Nothing decodable to show (e.g. every embed was a .wdp). Report the
2103
+ // skip as a note; do NOT throw.
2104
+ if (headerBits.length > 0) {
2105
+ content.push({ type: "text", text: `(Embedded images: ${headerBits.join("; ")}.)` });
2106
+ }
2107
+ return { content, imageCount: 0 };
2108
+ }
2109
+ if (sees === false) {
2110
+ // TEXT-ONLY current model. Don't ship raw blocks it will reject — route
2111
+ // each embed through the understanding provider and emit the TEXT. With
2112
+ // no key, a single honest note (the text extraction already stands).
2113
+ const cfg = getMuConfig();
2114
+ const providerAvailable = e.provider
2115
+ ? Boolean(safeResolveKey(cfg, e.provider))
2116
+ : Boolean(safeResolveKey(cfg, "anthropic")) ||
2117
+ Boolean(safeResolveKey(cfg, "google")) ||
2118
+ Boolean(safeResolvePiImageModel(cfg));
2119
+ if (!providerAvailable) {
2120
+ content.push({
2121
+ type: "text",
2122
+ text: `(This ${e.kind} has ${headerBits.join("; ")}, but the current model is text-only so they are not attached. ` +
2123
+ "Switch to a vision-capable model to SEE them, or configure a Google/Anthropic key so Brigade can describe embedded images on any model.)",
2124
+ });
2125
+ return { content, imageCount: 0 };
2126
+ }
2127
+ content.push({
2128
+ type: "text",
2129
+ text: `(The current model is text-only, so the ${headerBits.join("; ")} were understood by a vision-capable provider; descriptions follow.)`,
2130
+ });
2131
+ for (const img of prepared) {
2132
+ // Re-encode a non-provider-safe raster (bmp/tiff) to JPEG so the
2133
+ // provider accepts it (Anthropic 400s on image/bmp).
2134
+ let bytes = img.bytes;
2135
+ let mime = img.mime;
2136
+ if (!PROVIDER_SAFE_IMAGE_MIME.has(mime) && isDownscalableImageMime(mime)) {
2137
+ try {
2138
+ const ds = await downscaleImage(bytes, { maxBytes: e.imageBudget, sourceMime: mime });
2139
+ bytes = ds.bytes;
2140
+ mime = ds.mimeType;
2141
+ }
2142
+ catch {
2143
+ /* leave as-is; a 400 surfaces as the note below */
2144
+ }
2145
+ }
2146
+ const viaProvider = await understandViaProvider({
2147
+ kind: "image",
2148
+ source: `${e.source}#${img.label.replace(/\s+/g, "-")}`,
2149
+ sourceType: e.sourceType,
2150
+ bytes,
2151
+ mimeType: mime,
2152
+ question: e.question,
2153
+ ...(e.maxTokens !== undefined ? { maxTokens: e.maxTokens } : {}),
2154
+ ...(e.provider ? { provider: e.provider } : {}),
2155
+ ...(e.model ? { model: e.model } : {}),
2156
+ ...(e.signal ? { signal: e.signal } : {}),
2157
+ });
2158
+ if (viaProvider.ok) {
2159
+ content.push({ type: "text", text: `--- ${img.label} ---\n${firstText(viaProvider.result)}` });
2160
+ }
2161
+ else {
2162
+ content.push({
2163
+ type: "text",
2164
+ text: `--- ${img.label} ---\n(Could not understand this embedded image: ${viaProvider.unavailable ? viaProvider.message : firstText(viaProvider.result)})`,
2165
+ });
2166
+ }
2167
+ }
2168
+ return { content, imageCount: 0 };
2169
+ }
2170
+ // VISION model (or unknown — assume yes, the common case). Emit each image
2171
+ // as a labeled block. `sees === undefined` adds one short uncertainty note.
2172
+ const lead = `(Embedded images from this ${e.kind}: ${headerBits.join("; ")}.` +
2173
+ (sees === undefined
2174
+ ? " Note: Brigade could not confirm this model is vision-capable; if you cannot see them, switch models.)"
2175
+ : ")");
2176
+ content.push({ type: "text", text: lead });
2177
+ for (const img of prepared) {
2178
+ content.push({ type: "text", text: `--- ${img.label} ---` });
2179
+ content.push({ type: "image", data: img.bytes.toString("base64"), mimeType: img.mime });
2180
+ }
2181
+ return { content, imageCount: emitted };
2182
+ }
2183
+ }
2184
+ /* ─────────────────────────── small helpers ─────────────────────────── */
2185
+ /** Resolve a provider key from the mu-config without throwing (pure probe). */
2186
+ function safeResolveKey(cfg, provider) {
2187
+ try {
2188
+ return cfg.resolveKey(provider) || "";
2189
+ }
2190
+ catch {
2191
+ return "";
2192
+ }
2193
+ }
2194
+ /**
2195
+ * True when the Pi image path can resolve an image-capable model for SOME keyed
2196
+ * provider (OpenAI / Groq / Mistral / OpenRouter / xAI / Ollama / …) — so embedded
2197
+ * images from an OOXML doc can be understood on a text-only current model even
2198
+ * when neither google nor anthropic is keyed. Pure probe, never throws.
2199
+ */
2200
+ function safeResolvePiImageModel(cfg) {
2201
+ try {
2202
+ return Boolean(resolvePiModel("image", cfg));
2203
+ }
2204
+ catch {
2205
+ return false;
2206
+ }
2207
+ }
2208
+ /**
2209
+ * Count slides in an already-unzipped PPTX for clamping a `pages` range over the
2210
+ * embedded-image path. Prefers the PRESENTATION order (so the count matches what
2211
+ * the text extractor + the image mapper see); falls back to the slide-file count
2212
+ * when the presentation graph is unreadable. Never throws.
2213
+ */
2214
+ function countPptxSlides(entries) {
2215
+ try {
2216
+ const ordered = resolveSlideOrder(entries);
2217
+ if (ordered.length > 0)
2218
+ return ordered.length;
2219
+ }
2220
+ catch {
2221
+ /* fall through to the filename count */
2222
+ }
2223
+ return Object.keys(entries).filter((n) => /^ppt\/slides\/slide\d+\.xml$/.test(n)).length;
2224
+ }
2225
+ function clampBytes(requested, looksImage = false) {
2226
+ if (typeof requested !== "number" || !Number.isFinite(requested)) {
2227
+ return looksImage ? DEFAULT_IMAGE_MAX_BYTES : DEFAULT_MAX_BYTES;
2228
+ }
2229
+ return Math.max(1024, Math.min(MAX_BYTES_CEILING, Math.floor(requested)));
2230
+ }
2231
+ /** Build the leading instruction text the model reads before the content. */
2232
+ function buildPromptText(question, kind) {
2233
+ const what = kind === "image"
2234
+ ? "the image below"
2235
+ : kind === "video"
2236
+ ? "the video referenced below"
2237
+ : kind === "audio"
2238
+ ? "the audio referenced below"
2239
+ : kind === "text"
2240
+ ? "the text content below"
2241
+ : `the extracted ${kind} content below`;
2242
+ if (question)
2243
+ return `Analyze ${what} and answer this:\n${question}`;
2244
+ return `Analyze ${what} and describe / summarize what it contains.`;
2245
+ }
2246
+ /**
2247
+ * Build the provider prompt for an AUDIO call, folding in an optional spoken-
2248
+ * language hint and the caller's question/context. Gemini's generateContent has
2249
+ * no language field, so the hint is expressed in the instruction text. When the
2250
+ * caller gives no question, default to transcribe-then-summarize.
2251
+ */
2252
+ export function buildAudioPrompt(question, language) {
2253
+ const lang = (language ?? "").trim();
2254
+ const langClause = lang
2255
+ ? ` The spoken language is ${lang} — transcribe in ${lang} and preserve it.`
2256
+ : "";
2257
+ const base = question.trim()
2258
+ ? question.trim()
2259
+ : "Transcribe this audio, then briefly summarize what is said.";
2260
+ return `${base}${langClause}`;
2261
+ }
2262
+ /**
2263
+ * Heuristic: do these bytes look like UTF-8 text (so an unknown extension/MIME
2264
+ * can be read as the `text` kind rather than rejected)? Rejects anything with a
2265
+ * NUL byte or a high ratio of C0 control bytes (binary), and validates that a
2266
+ * leading sample decodes as UTF-8 without replacement characters. Conservative
2267
+ * — a false negative just yields the old "unsupported" message.
2268
+ */
2269
+ export function looksLikeUtf8Text(bytes) {
2270
+ if (bytes.length === 0)
2271
+ return false;
2272
+ const sample = bytes.subarray(0, Math.min(bytes.length, 4096));
2273
+ let control = 0;
2274
+ for (const b of sample) {
2275
+ if (b === 0)
2276
+ return false; // NUL → binary
2277
+ // Allow tab(9), LF(10), CR(13), FF(12); count other C0 controls.
2278
+ if (b < 0x20 && b !== 9 && b !== 10 && b !== 13 && b !== 12)
2279
+ control += 1;
2280
+ }
2281
+ if (control / sample.length > 0.05)
2282
+ return false;
2283
+ // Validate UTF-8: a strict decode shouldn't introduce replacement chars in a
2284
+ // sample that didn't already contain them.
2285
+ const decoded = sample.toString("utf8");
2286
+ const replacements = (decoded.match(/�/g) ?? []).length;
2287
+ if (replacements > 0 && replacements / decoded.length > 0.01)
2288
+ return false;
2289
+ return true;
2290
+ }
2291
+ function failure(d) {
2292
+ return jsonResult({ ok: false, returned: "none", ...d });
2293
+ }
2294
+ /** Short display name for a source (file basename, or the URL pathname tail). */
2295
+ function basenameOf(source) {
2296
+ try {
2297
+ if (/^https?:\/\//i.test(source)) {
2298
+ const u = new URL(source);
2299
+ const last = u.pathname.split("/").filter(Boolean).pop();
2300
+ return last || u.hostname;
2301
+ }
2302
+ }
2303
+ catch {
2304
+ /* fall through to path basename */
2305
+ }
2306
+ const norm = source.replace(/[\\/]+$/, "");
2307
+ const tail = norm.split(/[\\/]/).pop();
2308
+ return tail || source;
2309
+ }
2310
+ /** Concatenate all TEXT blocks of a single-source result (for batch labeling). */
2311
+ function firstText(r) {
2312
+ return r.content
2313
+ .filter((b) => b.type === "text")
2314
+ .map((b) => b.text)
2315
+ .join("\n")
2316
+ .trim();
2317
+ }
2318
+ // Image byte cap is applied where the image handler runs; export the constant
2319
+ // so callers/tests can reference the tighter image default.
2320
+ export { DEFAULT_IMAGE_MAX_BYTES, DEFAULT_MAX_BYTES, DEFAULT_MAX_CHARS };
2321
+ //# sourceMappingURL=analyze-media-tool.js.map