@spinabot/brigade 1.11.2 β†’ 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +36 -0
  2. package/dist/agents/tools/edge-tts.d.ts +44 -0
  3. package/dist/agents/tools/edge-tts.d.ts.map +1 -0
  4. package/dist/agents/tools/edge-tts.js +142 -0
  5. package/dist/agents/tools/edge-tts.js.map +1 -0
  6. package/dist/agents/tools/generate-music-tool.d.ts +61 -0
  7. package/dist/agents/tools/generate-music-tool.d.ts.map +1 -0
  8. package/dist/agents/tools/generate-music-tool.js +286 -0
  9. package/dist/agents/tools/generate-music-tool.js.map +1 -0
  10. package/dist/agents/tools/generate-speech-tool.d.ts +69 -0
  11. package/dist/agents/tools/generate-speech-tool.d.ts.map +1 -0
  12. package/dist/agents/tools/generate-speech-tool.js +331 -0
  13. package/dist/agents/tools/generate-speech-tool.js.map +1 -0
  14. package/dist/agents/tools/generate-video-tool.d.ts +111 -0
  15. package/dist/agents/tools/generate-video-tool.d.ts.map +1 -0
  16. package/dist/agents/tools/generate-video-tool.js +1028 -0
  17. package/dist/agents/tools/generate-video-tool.js.map +1 -0
  18. package/dist/agents/tools/media-command.d.ts +47 -0
  19. package/dist/agents/tools/media-command.d.ts.map +1 -0
  20. package/dist/agents/tools/media-command.js +93 -0
  21. package/dist/agents/tools/media-command.js.map +1 -0
  22. package/dist/agents/tools/registry.d.ts.map +1 -1
  23. package/dist/agents/tools/registry.js +27 -0
  24. package/dist/agents/tools/registry.js.map +1 -1
  25. package/dist/agents/tools/transcribe-audio-tool.d.ts +96 -0
  26. package/dist/agents/tools/transcribe-audio-tool.d.ts.map +1 -0
  27. package/dist/agents/tools/transcribe-audio-tool.js +577 -0
  28. package/dist/agents/tools/transcribe-audio-tool.js.map +1 -0
  29. package/dist/buildstamp.json +1 -1
  30. package/package.json +1 -1
package/README.md CHANGED
@@ -119,6 +119,7 @@ backend β€” all under one `~/.brigade/` directory you fully own.
119
119
  | πŸ“… **Always-on** | Run as a headless WebSocket gateway with a crash supervisor, cron jobs, and OS service install. |
120
120
  | πŸ’¬ **Channels** | Talk to your crew from WhatsApp and Telegram today; the adapter contract is built for more. |
121
121
  | πŸ”— **1,000+ connectors** | Gmail, Slack, GitHub, Notion, Calendar, Linear… via the built-in Composio tool. |
122
+ | πŸ“„ **Reads & writes documents** | Send a PDF, Office doc, image, audio, or video and your crew *understands* it β€” then have it **create or edit** Word, Excel, PowerPoint, and PDF files and hand them back. |
122
123
  | 🧩 **MCP** | Expose your long-term memory to any MCP client (`brigade mcp`), or connect MCP servers in. |
123
124
  | πŸ—„οΈ **Your storage** | Default filesystem mode, or an optional **fully self-hosted Convex** backend with at-rest encryption. |
124
125
  | πŸ” **Yours** | Everything lives under `~/.brigade/`. Keys are stored locally at mode `0600`. `rm -rf ~/.brigade` wipes it clean. |
@@ -283,6 +284,17 @@ to **PLATFORM**, open **Settings β†’ API Keys**, and copy the `ak_…` key. Hand
283
284
  Brigade once (*"set my Composio key to `ak_…`"*) and it's verified and stored encrypted.
284
285
  Full guide: **[docs/composio.md](docs/composio.md)**.
285
286
 
287
+ ### πŸ“„ Documents & media
288
+ Send your crew a file and it actually **reads** it: the `analyze_media` tool
289
+ understands PDFs (text or scanned), Word / Excel / PowerPoint (including the images
290
+ embedded inside them), plain images, audio, and video β€” auto-selecting the right
291
+ provider and caching the result. The write side, `make_document` and
292
+ `edit_document`, **creates and edits** Word / Excel / PowerPoint / PDF in place:
293
+ fill templates and form fields, set cells and formulas, add charts, and
294
+ merge / split / stamp / watermark PDFs β€” all with pure-JS libraries, no sandbox.
295
+ Drop a document into a channel and Brigade sees it; ask for a report and it hands
296
+ one back with `send_media`.
297
+
286
298
  ### 🧩 MCP
287
299
  Run `brigade mcp` to expose your long-term memory to any MCP client (Claude Desktop,
288
300
  editors, etc.) as add/search/context tools over stdio, owner-bound.
@@ -342,6 +354,7 @@ principle is the same: *independent verification, never the agent judging itself
342
354
  | `brigade status` | Snapshot config, sessions, and gateway state (`--json`) |
343
355
  | `brigade doctor` | Health-check Node, config, providers, prompts, logs, gateway (`--json`, `--strict`, `--gateway <url>`) |
344
356
  | `brigade logs` | Tail today's gateway log (`--follow`) |
357
+ | `brigade update` Β· `upgrade` | Update Brigade to the latest code and restart the gateway (`--check`, `--no-restart`) |
345
358
 
346
359
  ### Gateway
347
360
 
@@ -351,6 +364,7 @@ principle is the same: *independent verification, never the agent judging itself
351
364
  | `brigade gateway status` Β· `stop` Β· `restart` | Inspect / stop / restart the running gateway |
352
365
  | `brigade gateway install` Β· `uninstall` | Install/remove as a system service (launchd / systemd / Task Scheduler) |
353
366
  | `brigade gateway supervise` | Out-of-process crash watchdog (respawns a wedged gateway) |
367
+ | `brigade expose` Β· `expose stop` | Publish the gateway to the public internet via a secure, token-gated tunnel (alias: `bloody benchmark`) |
354
368
 
355
369
  ### Agents
356
370
 
@@ -485,6 +499,27 @@ brigade doctor --json # machine-readable
485
499
  brigade doctor --strict # exit 1 on warnings (CI mode)
486
500
  ```
487
501
 
502
+ ### `brigade update`
503
+
504
+ Bring Brigade up to date and reload the gateway. It auto-detects how Brigade is
505
+ installed and does the right thing:
506
+
507
+ - **npm global** β€” `npm i -g @spinabot/brigade@latest`, then restart.
508
+ - **source checkout** β€” `git pull` (fast-forward, only when your tree is clean and
509
+ behind upstream β€” a dirty tree is left untouched and rebuilt as-is), then
510
+ `npm install`, `npm run build`, then restart.
511
+
512
+ ```bash
513
+ brigade update # update + restart the gateway
514
+ brigade upgrade # alias of update
515
+ brigade update --check # report whether newer code is available; change nothing
516
+ brigade update --no-restart
517
+ ```
518
+
519
+ If Brigade is installed as a background service (`brigade gateway install`) the
520
+ restart is automatic; if you run the gateway in the foreground, restart it yourself
521
+ to load the new code. Same behavior on macOS, Linux, and Windows.
522
+
488
523
  ### `brigade config`
489
524
 
490
525
  Read and write the local config without opening the TUI.
@@ -605,6 +640,7 @@ Every agent gets a curated toolset. Mutating/privileged tools are owner-gated
605
640
  - **Web:** `web_search`, `fetch_url`, `browser` (when a provider is configured)
606
641
  - **Connectors:** `composio` (1,000+ apps), `oauth_authorize`
607
642
  - **Generation:** `generate_image`
643
+ - **Documents & media:** `analyze_media` (read/understand PDF Β· Office Β· image Β· audio Β· video), `make_document` Β· `edit_document` (create & edit Word/Excel/PowerPoint/PDF)
608
644
  - **Channels:** `send_message`, `send_media` (when a channel is linked)
609
645
 
610
646
  ---
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Microsoft Edge "Read Aloud" text-to-speech over WebSocket β€” FREE, no API key.
3
+ *
4
+ * This is the same free endpoint the `node-edge-tts` package uses (the Bing /
5
+ * "Read Aloud" TTS WebSocket). Auth is an embedded TrustedClientToken plus a
6
+ * computed `Sec-MS-GEC` token (a SHA-256 of the current Windows file-time ticks,
7
+ * floored to 5 minutes, concatenated with the token). The socket sends a
8
+ * `speech.config` frame then an `ssml` frame; audio arrives as binary WS frames
9
+ * (after a `Path:audio` header) and the turn ends on a `Path:turn.end` text
10
+ * frame. Returns MP3 bytes.
11
+ *
12
+ * Re-implemented self-contained (no new dependency) over the `ws` package that
13
+ * Brigade already ships for the gateway/TUI.
14
+ */
15
+ /** Minimal WebSocket surface edge-tts uses β€” lets tests inject a fake socket. */
16
+ export interface EdgeWebSocketLike {
17
+ on(event: "open" | "message" | "error" | "close", cb: (...args: unknown[]) => void): void;
18
+ send(data: string): void;
19
+ close(): void;
20
+ }
21
+ export interface EdgeTtsOptions {
22
+ text: string;
23
+ voice: string;
24
+ outputFormat?: string;
25
+ signal?: AbortSignal;
26
+ timeoutMs?: number;
27
+ /** Test seam: inject a WebSocket factory instead of opening a real socket. */
28
+ wsFactory?: (url: string, headers: Record<string, string>) => EdgeWebSocketLike;
29
+ }
30
+ /** Synthesize speech via the free Edge endpoint. Resolves with MP3 bytes. */
31
+ export declare function synthesizeEdge(opts: EdgeTtsOptions): Promise<Buffer>;
32
+ /**
33
+ * The `Sec-MS-GEC` auth token: uppercase SHA-256 hex of `${ticks}${TrustedClientToken}`,
34
+ * where `ticks` = Windows file time (100-ns intervals since 1601-01-01) floored to
35
+ * the nearest 5 minutes. BigInt math β€” the tick count exceeds Number.MAX_SAFE_INTEGER.
36
+ */
37
+ export declare function secMsGec(nowMs?: number): string;
38
+ /** The opening `speech.config` frame carrying the requested output format. */
39
+ export declare function configFrame(outputFormat: string): string;
40
+ /** The `ssml` frame carrying the voice + escaped text. */
41
+ export declare function ssmlFrame(text: string, voice: string): string;
42
+ /** Minimal XML escaping for SSML text content. */
43
+ export declare function escapeXml(s: string): string;
44
+ //# sourceMappingURL=edge-tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"edge-tts.d.ts","sourceRoot":"","sources":["../../../src/agents/tools/edge-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAYH,iFAAiF;AACjF,MAAM,WAAW,iBAAiB;IACjC,EAAE,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,GAAG,IAAI,CAAC;IAC1F,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,KAAK,IAAI,IAAI,CAAC;CACd;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8EAA8E;IAC9E,SAAS,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,iBAAiB,CAAC;CAChF;AAED,6EAA6E;AAC7E,wBAAsB,cAAc,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAsE1E;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,KAAK,GAAE,MAAmB,GAAG,MAAM,CAK3D;AAED,8EAA8E;AAC9E,wBAAgB,WAAW,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAYxD;AAED,0DAA0D;AAC1D,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAM7D;AAED,kDAAkD;AAClD,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAO3C"}
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Microsoft Edge "Read Aloud" text-to-speech over WebSocket β€” FREE, no API key.
3
+ *
4
+ * This is the same free endpoint the `node-edge-tts` package uses (the Bing /
5
+ * "Read Aloud" TTS WebSocket). Auth is an embedded TrustedClientToken plus a
6
+ * computed `Sec-MS-GEC` token (a SHA-256 of the current Windows file-time ticks,
7
+ * floored to 5 minutes, concatenated with the token). The socket sends a
8
+ * `speech.config` frame then an `ssml` frame; audio arrives as binary WS frames
9
+ * (after a `Path:audio` header) and the turn ends on a `Path:turn.end` text
10
+ * frame. Returns MP3 bytes.
11
+ *
12
+ * Re-implemented self-contained (no new dependency) over the `ws` package that
13
+ * Brigade already ships for the gateway/TUI.
14
+ */
15
+ import crypto from "node:crypto";
16
+ import { WebSocket } from "ws";
17
+ const TRUSTED_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
18
+ const GEC_VERSION = "1-131.0.2903.86";
19
+ const WSS_BASE = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1";
20
+ const CHROME_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0";
21
+ /** Synthesize speech via the free Edge endpoint. Resolves with MP3 bytes. */
22
+ export async function synthesizeEdge(opts) {
23
+ const outputFormat = opts.outputFormat ?? "audio-24khz-48kbitrate-mono-mp3";
24
+ const url = `${WSS_BASE}?TrustedClientToken=${TRUSTED_TOKEN}&Sec-MS-GEC=${secMsGec()}&Sec-MS-GEC-Version=${GEC_VERSION}`;
25
+ const headers = {
26
+ "User-Agent": CHROME_UA,
27
+ Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
28
+ "Accept-Language": "en-US,en;q=0.9",
29
+ };
30
+ const ws = opts.wsFactory
31
+ ? opts.wsFactory(url, headers)
32
+ : new WebSocket(url, { headers });
33
+ return await new Promise((resolve, reject) => {
34
+ const chunks = [];
35
+ let settled = false;
36
+ const timer = setTimeout(() => fail(new Error("Edge TTS timed out")), opts.timeoutMs ?? 30_000);
37
+ const onAbort = () => fail(new Error("aborted"));
38
+ opts.signal?.addEventListener("abort", onAbort, { once: true });
39
+ const cleanup = () => {
40
+ clearTimeout(timer);
41
+ opts.signal?.removeEventListener("abort", onAbort);
42
+ try {
43
+ ws.close();
44
+ }
45
+ catch {
46
+ /* ignore */
47
+ }
48
+ };
49
+ function fail(err) {
50
+ if (settled)
51
+ return;
52
+ settled = true;
53
+ cleanup();
54
+ reject(err);
55
+ }
56
+ function done() {
57
+ if (settled)
58
+ return;
59
+ settled = true;
60
+ cleanup();
61
+ if (chunks.length === 0)
62
+ reject(new Error("Edge TTS produced no audio"));
63
+ else
64
+ resolve(Buffer.concat(chunks));
65
+ }
66
+ ws.on("open", () => {
67
+ ws.send(configFrame(outputFormat));
68
+ ws.send(ssmlFrame(opts.text, opts.voice));
69
+ });
70
+ ws.on("message", (...args) => {
71
+ const data = args[0];
72
+ const isBinary = args[1] === true;
73
+ const buf = Buffer.isBuffer(data)
74
+ ? data
75
+ : data instanceof ArrayBuffer
76
+ ? Buffer.from(data)
77
+ : Buffer.from(String(data), "utf8");
78
+ if (isBinary) {
79
+ // Binary frame: first 2 bytes = big-endian header length; audio follows.
80
+ if (buf.length < 2)
81
+ return;
82
+ const headerLen = buf.readUInt16BE(0);
83
+ const header = buf.subarray(2, 2 + headerLen).toString("utf8");
84
+ if (header.includes("Path:audio"))
85
+ chunks.push(buf.subarray(2 + headerLen));
86
+ }
87
+ else if (buf.toString("utf8").includes("Path:turn.end")) {
88
+ done();
89
+ }
90
+ });
91
+ ws.on("error", (...args) => {
92
+ const e = args[0];
93
+ fail(e instanceof Error ? e : new Error(String(e)));
94
+ });
95
+ ws.on("close", () => {
96
+ if (!settled)
97
+ done();
98
+ });
99
+ });
100
+ }
101
+ /**
102
+ * The `Sec-MS-GEC` auth token: uppercase SHA-256 hex of `${ticks}${TrustedClientToken}`,
103
+ * where `ticks` = Windows file time (100-ns intervals since 1601-01-01) floored to
104
+ * the nearest 5 minutes. BigInt math β€” the tick count exceeds Number.MAX_SAFE_INTEGER.
105
+ */
106
+ export function secMsGec(nowMs = Date.now()) {
107
+ const secondsSince1601 = BigInt(Math.floor(nowMs / 1000) + 11_644_473_600);
108
+ const roundedSeconds = (secondsSince1601 / 300n) * 300n;
109
+ const ticks = roundedSeconds * 10000000n;
110
+ return crypto.createHash("sha256").update(`${ticks}${TRUSTED_TOKEN}`).digest("hex").toUpperCase();
111
+ }
112
+ /** The opening `speech.config` frame carrying the requested output format. */
113
+ export function configFrame(outputFormat) {
114
+ const cfg = {
115
+ context: {
116
+ synthesis: {
117
+ audio: {
118
+ metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: false },
119
+ outputFormat,
120
+ },
121
+ },
122
+ },
123
+ };
124
+ return `X-Timestamp:${new Date().toString()}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n${JSON.stringify(cfg)}`;
125
+ }
126
+ /** The `ssml` frame carrying the voice + escaped text. */
127
+ export function ssmlFrame(text, voice) {
128
+ const id = crypto.randomUUID().replace(/-/g, "");
129
+ const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>` +
130
+ `<voice name='${voice}'><prosody pitch='+0Hz' rate='+0%' volume='+0%'>${escapeXml(text)}</prosody></voice></speak>`;
131
+ return `X-RequestId:${id}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${new Date().toString()}Z\r\nPath:ssml\r\n\r\n${ssml}`;
132
+ }
133
+ /** Minimal XML escaping for SSML text content. */
134
+ export function escapeXml(s) {
135
+ return s
136
+ .replace(/&/g, "&amp;")
137
+ .replace(/</g, "&lt;")
138
+ .replace(/>/g, "&gt;")
139
+ .replace(/'/g, "&apos;")
140
+ .replace(/"/g, "&quot;");
141
+ }
142
+ //# sourceMappingURL=edge-tts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"edge-tts.js","sourceRoot":"","sources":["../../../src/agents/tools/edge-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAE/B,MAAM,aAAa,GAAG,kCAAkC,CAAC;AACzD,MAAM,WAAW,GAAG,iBAAiB,CAAC;AACtC,MAAM,QAAQ,GAAG,6EAA6E,CAAC;AAC/F,MAAM,SAAS,GACd,+HAA+H,CAAC;AAmBjI,6EAA6E;AAC7E,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAoB;IACxD,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,iCAAiC,CAAC;IAC5E,MAAM,GAAG,GAAG,GAAG,QAAQ,uBAAuB,aAAa,eAAe,QAAQ,EAAE,uBAAuB,WAAW,EAAE,CAAC;IACzH,MAAM,OAAO,GAA2B;QACvC,YAAY,EAAE,SAAS;QACvB,MAAM,EAAE,qDAAqD;QAC7D,iBAAiB,EAAE,gBAAgB;KACnC,CAAC;IACF,MAAM,EAAE,GAAsB,IAAI,CAAC,SAAS;QAC3C,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;QAC9B,CAAC,CAAE,IAAI,SAAS,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAkC,CAAC;IAErE,OAAO,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACpD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,CAAC;QAChG,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;QACjD,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,GAAG,EAAE;YACpB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACnD,IAAI,CAAC;gBACJ,EAAE,CAAC,KAAK,EAAE,CAAC;YACZ,CAAC;YAAC,MAAM,CAAC;gBACR,YAAY;YACb,CAAC;QACF,CAAC,CAAC;QACF,SAAS,IAAI,CAAC,GAAU;YACvB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,GAAG,CAAC,CAAC;QACb,CAAC;QACD,SAAS,IAAI;YACZ,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,EAAE,CAAC;YACV,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM,CAAC,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC,CAAC;;gBACpE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACrC,CAAC;QACD,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;YAClB,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC,CAAC;YACnC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAG,IAAe,EAAE,EAAE;YACvC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;YAClC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAChC,CAAC,CAAC,IAAI;gBACN,CAAC,CAAC,IAAI,YAAY,WAAW;oBAC5B,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;oBACnB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC;YACtC,IAAI,QAAQ,EAAE,CAAC;gBACd,yEAAyE;gBACzE,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;oBAAE,OAAO;gBAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;gBACtC,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC/D,IAAI,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;YAC7E,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBAC3D,IAAI,EAAE,CAAC;YACR,CAAC;QACF,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,IAAe,EAAE,EAAE;YACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,IAAI,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,IAAI,CAAC,OAAO;gBAAE,IAAI,EAAE,CAAC;QACtB,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,QAAQ,CAAC,QAAgB,IAAI,CAAC,GAAG,EAAE;IAClD,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC;IAC3E,MAAM,cAAc,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;IACxD,MAAM,KAAK,GAAG,cAAc,GAAG,SAAW,CAAC;IAC3C,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,GAAG,aAAa,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;AACnG,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,WAAW,CAAC,YAAoB;IAC/C,MAAM,GAAG,GAAG;QACX,OAAO,EAAE;YACR,SAAS,EAAE;gBACV,KAAK,EAAE;oBACN,eAAe,EAAE,EAAE,uBAAuB,EAAE,KAAK,EAAE,mBAAmB,EAAE,KAAK,EAAE;oBAC/E,YAAY;iBACZ;aACD;SACD;KACD,CAAC;IACF,OAAO,eAAe,IAAI,IAAI,EAAE,CAAC,QAAQ,EAAE,iFAAiF,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;AACnJ,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,KAAa;IACpD,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,MAAM,IAAI,GACT,oFAAoF;QACpF,gBAAgB,KAAK,mDAAmD,SAAS,CAAC,IAAI,CAAC,4BAA4B,CAAC;IACrH,OAAO,eAAe,EAAE,wDAAwD,IAAI,IAAI,EAAE,CAAC,QAAQ,EAAE,yBAAyB,IAAI,EAAE,CAAC;AACtI,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,SAAS,CAAC,CAAS;IAClC,OAAO,CAAC;SACN,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC3B,CAAC"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * `generate_music` tool β€” text-to-music generation, modeled on the proven
3
+ * `generate_speech` self-contained pattern.
4
+ *
5
+ * Why this tool exists
6
+ * --------------------
7
+ * Same reasoning as `generate_speech`/`generate_image`: without a first-class
8
+ * tool, "make a song" / "compose background music" sends the model to raw
9
+ * `curl` against a music API β€” the key flows through a shell, the (binary or
10
+ * base64) audio response gets mangled by a text-only parser, and a billed
11
+ * generation is dropped. This tool owns the call in-process: stored auth,
12
+ * validated params, a parser that understands each provider's audio shape, and
13
+ * a saved file the model hands to `send_media`.
14
+ *
15
+ * Providers (auto-selected by which key is configured, preference order):
16
+ * β€’ google β€” Lyria via Gemini generateContent (AUDIO modality) β†’ base64
17
+ * audio (mp3). Single POST, no poll.
18
+ * β€’ minimax β€” Music generation β†’ URL or inline (hex/base64) audio (mp3).
19
+ * β€’ elevenlabs β€” Music endpoint β†’ raw mp3 bytes.
20
+ * Keys resolve through `resolveMediaProviderKey` (the same credential-store +
21
+ * env path the media-understanding subsystem uses), so music generation works
22
+ * for whichever provider the operator already configured β€” no bespoke auth.
23
+ *
24
+ * Flow: generate β†’ bytes saved under `<cache>/audio/` β†’ result text carries a
25
+ * `MEDIA:<saved-path>` line β†’ the model delivers with `send_media({path})`.
26
+ */
27
+ import { Type } from "typebox";
28
+ import type { BrigadeTool } from "./types.js";
29
+ type MusicProviderId = "google" | "minimax" | "elevenlabs";
30
+ declare const GenerateMusicParams: Type.TObject<{
31
+ action: Type.TOptional<Type.TUnion<[Type.TLiteral<"generate">, Type.TLiteral<"list">]>>;
32
+ prompt: Type.TOptional<Type.TString>;
33
+ lyrics: Type.TOptional<Type.TString>;
34
+ instrumental: Type.TOptional<Type.TBoolean>;
35
+ provider: Type.TOptional<Type.TUnion<[Type.TLiteral<"google">, Type.TLiteral<"minimax">, Type.TLiteral<"elevenlabs">]>>;
36
+ model: Type.TOptional<Type.TString>;
37
+ durationSeconds: Type.TOptional<Type.TInteger>;
38
+ filename: Type.TOptional<Type.TString>;
39
+ }>;
40
+ interface GenerateMusicDetails {
41
+ action: "generate" | "list";
42
+ provider?: string;
43
+ model?: string;
44
+ path?: string;
45
+ providers?: string[];
46
+ ok: boolean;
47
+ message?: string;
48
+ }
49
+ export interface MakeGenerateMusicToolOptions {
50
+ /** Caller's agent id β€” drives which credential store backs the key. */
51
+ agentId?: string;
52
+ /** Test seam: replaces global fetch. */
53
+ fetchFn?: typeof fetch;
54
+ /** Test seam: output directory override. Default `<cache>/audio`. */
55
+ outDirOverride?: string;
56
+ /** Test seam: per-provider API-key resolver override. */
57
+ resolveKey?: (provider: MusicProviderId) => string;
58
+ }
59
+ export declare function makeGenerateMusicTool(opts?: MakeGenerateMusicToolOptions): BrigadeTool<typeof GenerateMusicParams, GenerateMusicDetails>;
60
+ export {};
61
+ //# sourceMappingURL=generate-music-tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generate-music-tool.d.ts","sourceRoot":"","sources":["../../../src/agents/tools/generate-music-tool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAKH,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAM/B,OAAO,KAAK,EAAmB,WAAW,EAAE,MAAM,YAAY,CAAC;AAO/D,KAAK,eAAe,GAAG,QAAQ,GAAG,SAAS,GAAG,YAAY,CAAC;AAW3D,QAAA,MAAM,mBAAmB;;;;;;;;;EAwBvB,CAAC;AAEH,UAAU,oBAAoB;IAC7B,MAAM,EAAE,UAAU,GAAG,MAAM,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,EAAE,EAAE,OAAO,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,4BAA4B;IAC5C,uEAAuE;IACvE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wCAAwC;IACxC,OAAO,CAAC,EAAE,OAAO,KAAK,CAAC;IACvB,qEAAqE;IACrE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,yDAAyD;IACzD,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,eAAe,KAAK,MAAM,CAAC;CACnD;AAED,wBAAgB,qBAAqB,CACpC,IAAI,GAAE,4BAAiC,GACrC,WAAW,CAAC,OAAO,mBAAmB,EAAE,oBAAoB,CAAC,CAwG/D"}
@@ -0,0 +1,286 @@
1
+ /**
2
+ * `generate_music` tool β€” text-to-music generation, modeled on the proven
3
+ * `generate_speech` self-contained pattern.
4
+ *
5
+ * Why this tool exists
6
+ * --------------------
7
+ * Same reasoning as `generate_speech`/`generate_image`: without a first-class
8
+ * tool, "make a song" / "compose background music" sends the model to raw
9
+ * `curl` against a music API β€” the key flows through a shell, the (binary or
10
+ * base64) audio response gets mangled by a text-only parser, and a billed
11
+ * generation is dropped. This tool owns the call in-process: stored auth,
12
+ * validated params, a parser that understands each provider's audio shape, and
13
+ * a saved file the model hands to `send_media`.
14
+ *
15
+ * Providers (auto-selected by which key is configured, preference order):
16
+ * β€’ google β€” Lyria via Gemini generateContent (AUDIO modality) β†’ base64
17
+ * audio (mp3). Single POST, no poll.
18
+ * β€’ minimax β€” Music generation β†’ URL or inline (hex/base64) audio (mp3).
19
+ * β€’ elevenlabs β€” Music endpoint β†’ raw mp3 bytes.
20
+ * Keys resolve through `resolveMediaProviderKey` (the same credential-store +
21
+ * env path the media-understanding subsystem uses), so music generation works
22
+ * for whichever provider the operator already configured β€” no bespoke auth.
23
+ *
24
+ * Flow: generate β†’ bytes saved under `<cache>/audio/` β†’ result text carries a
25
+ * `MEDIA:<saved-path>` line β†’ the model delivers with `send_media({path})`.
26
+ */
27
+ import fs from "node:fs";
28
+ import path from "node:path";
29
+ import { Type } from "typebox";
30
+ import { resolveCacheDir, DEFAULT_AGENT_ID } from "../../config/paths.js";
31
+ import { loadConfig } from "../../core/config.js";
32
+ import { resolveMediaProviderKey } from "../media-understanding/config.js";
33
+ import { jsonResult } from "./common.js";
34
+ /** Music generation can take a while; bound each HTTP call generously. */
35
+ const REQUEST_TIMEOUT_MS = 180_000;
36
+ /** Hard cap on prompt length β€” providers reject very long prompts; fail clearly. */
37
+ const MAX_PROMPT_CHARS = 8_000;
38
+ /** Preference order when no provider is pinned: first keyed one wins. */
39
+ const PROVIDER_PREFERENCE = ["google", "minimax", "elevenlabs"];
40
+ const DEFAULTS = {
41
+ google: { model: "lyria-3-clip-preview" },
42
+ minimax: { model: "music-2.5+" },
43
+ elevenlabs: { model: "" },
44
+ };
45
+ const GenerateMusicParams = Type.Object({
46
+ action: Type.Optional(Type.Union([Type.Literal("generate"), Type.Literal("list")], {
47
+ description: 'Optional: "generate" (default) or "list" to see which music providers are configured.',
48
+ })),
49
+ prompt: Type.Optional(Type.String({ description: "The style/description of the music to generate (e.g. 'upbeat lo-fi hip hop with mellow piano')." })),
50
+ lyrics: Type.Optional(Type.String({ description: "Optional lyrics for vocal tracks (ignored when instrumental)." })),
51
+ instrumental: Type.Optional(Type.Boolean({ description: "Optional: when true, generate instrumental-only (no vocals)." })),
52
+ provider: Type.Optional(Type.Union([Type.Literal("google"), Type.Literal("minimax"), Type.Literal("elevenlabs")], { description: "Optional music provider override. Default: the first one with a configured key." })),
53
+ model: Type.Optional(Type.String({ description: "Optional model override for the chosen provider." })),
54
+ durationSeconds: Type.Optional(Type.Integer({ description: "Optional target length in seconds (where the provider supports it, e.g. ElevenLabs)." })),
55
+ filename: Type.Optional(Type.String({ description: "Optional output filename hint (basename preserved, saved under the managed audio dir)." })),
56
+ });
57
+ export function makeGenerateMusicTool(opts = {}) {
58
+ const agentId = opts.agentId ?? DEFAULT_AGENT_ID;
59
+ const fetchFn = opts.fetchFn ?? fetch;
60
+ const resolveKey = opts.resolveKey ?? ((p) => resolveMediaProviderKey(p, agentId));
61
+ return {
62
+ name: "generate_music",
63
+ label: "Generate Music",
64
+ displaySummary: "generating music",
65
+ // Billed per call (cloud music generation) β€” owner-gated like generate_speech.
66
+ ownerOnly: true,
67
+ description: [
68
+ "Generate music from a text description (text-to-music). USE THIS β€” never call a music API with bash/curl: the key must not flow through a shell, and the binary/base64 audio response is parsed here.",
69
+ 'action="generate" (default): requires `prompt` (the style/description). Saves an audio file and returns its REAL path as a `MEDIA:<path>` line β€” reference that path exactly; never invent one.',
70
+ "Optional `lyrics`, `instrumental`, `durationSeconds`. Auto-selects the first configured provider (Google β†’ MiniMax β†’ ElevenLabs); override with `provider`/`model`.",
71
+ "To play it for the operator on a chat surface, follow up with `send_media({path})` β€” generation does NOT auto-send.",
72
+ 'action="list": show which music providers have a configured key.',
73
+ ].join(" "),
74
+ parameters: GenerateMusicParams,
75
+ execute: async (_id, args, signal) => {
76
+ const action = args.action ?? "generate";
77
+ if (action === "list") {
78
+ const providers = PROVIDER_PREFERENCE.filter((p) => resolveKey(p).length > 0);
79
+ return jsonResult({
80
+ action,
81
+ providers,
82
+ ok: true,
83
+ message: providers.length > 0
84
+ ? `${providers.length} music provider(s) configured: ${providers.join(", ")}.`
85
+ : "No music provider configured. Add a Google, MiniMax, or ElevenLabs key with `brigade onboard`.",
86
+ });
87
+ }
88
+ const prompt = (args.prompt ?? "").trim();
89
+ if (!prompt) {
90
+ return fail(action, "`prompt` is required for action=generate.");
91
+ }
92
+ if (prompt.length > MAX_PROMPT_CHARS) {
93
+ return fail(action, `\`prompt\` is too long (${prompt.length} chars; max ${MAX_PROMPT_CHARS}). Shorten it.`);
94
+ }
95
+ const instrumental = args.instrumental === true;
96
+ const lyrics = args.lyrics?.trim() || undefined;
97
+ // Instrumental + lyrics is contradictory β€” refuse rather than silently drop one.
98
+ if (instrumental && lyrics) {
99
+ return fail(action, "`instrumental` and `lyrics` cannot both be set β€” pick one (instrumental = no vocals).");
100
+ }
101
+ // Resolve the provider: explicit override (must be keyed) else first keyed.
102
+ let provider;
103
+ if (args.provider) {
104
+ if (resolveKey(args.provider).length === 0) {
105
+ return fail(action, `Provider "${args.provider}" has no configured key. Add one with \`brigade onboard\`, or omit \`provider\` to auto-select.`);
106
+ }
107
+ provider = args.provider;
108
+ }
109
+ else {
110
+ provider = PROVIDER_PREFERENCE.find((p) => resolveKey(p).length > 0);
111
+ }
112
+ if (!provider) {
113
+ return fail(action, "No music provider is configured. Add a Google, MiniMax, or ElevenLabs API key with `brigade onboard` (then this tool auto-selects it).");
114
+ }
115
+ const apiKey = resolveKey(provider);
116
+ const model = args.model?.trim() || resolveConfiguredModel(provider) || DEFAULTS[provider].model;
117
+ const durationSeconds = typeof args.durationSeconds === "number" && Number.isFinite(args.durationSeconds) && args.durationSeconds > 0
118
+ ? Math.trunc(args.durationSeconds)
119
+ : undefined;
120
+ let audio;
121
+ try {
122
+ audio = await generate({ provider, fetchFn, apiKey, model, prompt, lyrics, instrumental, durationSeconds, signal });
123
+ }
124
+ catch (err) {
125
+ return fail(action, `Music generation via ${provider} failed: ${err instanceof Error ? err.message : String(err)}`, {
126
+ provider,
127
+ model,
128
+ });
129
+ }
130
+ const outDir = opts.outDirOverride ?? path.join(resolveCacheDir(), "audio");
131
+ fs.mkdirSync(outDir, { recursive: true });
132
+ const outPath = path.join(outDir, buildFileName(args.filename, audio.extension));
133
+ fs.writeFileSync(outPath, audio.bytes);
134
+ return {
135
+ content: [
136
+ {
137
+ type: "text",
138
+ text: [
139
+ `Generated music with ${model ? `${provider}/${model}` : provider}.`,
140
+ `MEDIA:${outPath}`,
141
+ "Deliver with send_media({path}) β€” generation does not auto-send.",
142
+ ].join("\n"),
143
+ },
144
+ ],
145
+ details: { action, provider, model, path: outPath, ok: true },
146
+ };
147
+ },
148
+ };
149
+ }
150
+ async function generate(params) {
151
+ switch (params.provider) {
152
+ case "google":
153
+ return generateGoogle(params);
154
+ case "minimax":
155
+ return generateMiniMax(params);
156
+ case "elevenlabs":
157
+ return generateElevenLabs(params);
158
+ }
159
+ }
160
+ async function generateGoogle(p) {
161
+ // Lyria via Gemini generateContent. Assemble the textual prompt with the
162
+ // instrumental hint and lyrics folded in (the API takes a single text part).
163
+ let text = p.prompt;
164
+ if (p.instrumental)
165
+ text += "\n\nInstrumental only. No vocals.";
166
+ if (p.lyrics)
167
+ text += `\n\nLyrics:\n${p.lyrics}`;
168
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(p.model)}:generateContent?key=${encodeURIComponent(p.apiKey)}`;
169
+ const res = await p.fetchFn(url, {
170
+ method: "POST",
171
+ headers: { "Content-Type": "application/json" },
172
+ body: JSON.stringify({
173
+ contents: [{ parts: [{ text }] }],
174
+ generationConfig: { responseModalities: ["AUDIO", "TEXT"] },
175
+ }),
176
+ signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
177
+ });
178
+ if (!res.ok)
179
+ throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
180
+ const body = (await res.json());
181
+ const part = body.candidates?.[0]?.content?.parts?.find((x) => x.inlineData?.data);
182
+ const data = part?.inlineData?.data;
183
+ if (!data)
184
+ throw new Error("Lyria returned no audio data.");
185
+ const bytes = Buffer.from(data, "base64");
186
+ // Lyria returns mp3 by default. (If the mimeType ever reports raw PCM/L16 the
187
+ // bytes won't be playable as-is, but the documented default is mp3.)
188
+ return { bytes, extension: "mp3" };
189
+ }
190
+ async function generateMiniMax(p) {
191
+ const reqBody = {
192
+ model: p.model || "music-2.5+",
193
+ prompt: p.prompt,
194
+ output_format: "url",
195
+ audio_setting: { sample_rate: 44100, bitrate: 256000, format: "mp3" },
196
+ };
197
+ if (p.instrumental)
198
+ reqBody.is_instrumental = true;
199
+ if (p.lyrics)
200
+ reqBody.lyrics = p.lyrics;
201
+ const res = await p.fetchFn("https://api.minimax.io/v1/music_generation", {
202
+ method: "POST",
203
+ headers: { Authorization: `Bearer ${p.apiKey}`, "Content-Type": "application/json" },
204
+ body: JSON.stringify(reqBody),
205
+ signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
206
+ });
207
+ if (!res.ok)
208
+ throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
209
+ const body = (await res.json());
210
+ if (body.base_resp && body.base_resp.status_code !== 0) {
211
+ throw new Error(`MiniMax error ${body.base_resp.status_code}: ${body.base_resp.status_msg ?? ""}`);
212
+ }
213
+ // Prefer an explicit URL field; otherwise the audio field may be a URL or inline.
214
+ const urlValue = body.data?.audio_url ?? body.audio_url;
215
+ const audioValue = body.data?.audio ?? body.audio;
216
+ const candidate = urlValue ?? audioValue;
217
+ if (!candidate)
218
+ throw new Error("MiniMax returned no audio.");
219
+ if (/^https?:\/\//.test(candidate)) {
220
+ const bytes = await downloadBytes(p.fetchFn, candidate, p.signal);
221
+ return { bytes, extension: "mp3" };
222
+ }
223
+ // Inline: hex (only [0-9a-f], even length) β†’ hex; else base64.
224
+ const isHex = candidate.length % 2 === 0 && /^[0-9a-f]+$/.test(candidate);
225
+ const bytes = Buffer.from(candidate, isHex ? "hex" : "base64");
226
+ return { bytes, extension: "mp3" };
227
+ }
228
+ async function generateElevenLabs(p) {
229
+ const reqBody = { prompt: p.prompt };
230
+ if (p.durationSeconds)
231
+ reqBody.music_length_ms = p.durationSeconds * 1000;
232
+ const res = await p.fetchFn("https://api.elevenlabs.io/v1/music", {
233
+ method: "POST",
234
+ headers: { "xi-api-key": p.apiKey, "Content-Type": "application/json", Accept: "audio/mpeg" },
235
+ body: JSON.stringify(reqBody),
236
+ signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
237
+ });
238
+ if (!res.ok)
239
+ throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
240
+ return { bytes: Buffer.from(await res.arrayBuffer()), extension: "mp3" };
241
+ }
242
+ /* ───────────────────────── helpers ───────────────────────── */
243
+ async function downloadBytes(fetchFn, url, signal) {
244
+ const res = await fetchFn(url, { method: "GET", signal: withTimeout(signal, REQUEST_TIMEOUT_MS) });
245
+ if (!res.ok)
246
+ throw new Error(`download HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
247
+ return Buffer.from(await res.arrayBuffer());
248
+ }
249
+ function resolveConfiguredModel(provider) {
250
+ try {
251
+ const cfg = loadConfig();
252
+ const m = cfg.tools?.music?.models?.[provider];
253
+ if (typeof m === "string" && m.trim())
254
+ return m.trim();
255
+ }
256
+ catch {
257
+ /* default below */
258
+ }
259
+ return undefined;
260
+ }
261
+ function buildFileName(hint, extension) {
262
+ const stamp = Date.now().toString(36);
263
+ const base = hint
264
+ ? path.basename(hint).replace(/\.[a-z0-9]+$/i, "").replace(/[^a-zA-Z0-9._-]/g, "_").slice(0, 48)
265
+ : `music-${stamp}`;
266
+ return `${base}.${extension}`;
267
+ }
268
+ function fail(action, message, extra = {}) {
269
+ return jsonResult({ action, ok: false, message, ...extra });
270
+ }
271
+ async function safeText(res) {
272
+ try {
273
+ return await res.text();
274
+ }
275
+ catch {
276
+ return "";
277
+ }
278
+ }
279
+ /** Compose the caller's signal with a hard per-request timeout. */
280
+ function withTimeout(signal, ms) {
281
+ const timeoutSignal = AbortSignal.timeout(ms);
282
+ if (!signal)
283
+ return timeoutSignal;
284
+ return AbortSignal.any([signal, timeoutSignal]);
285
+ }
286
+ //# sourceMappingURL=generate-music-tool.js.map