@spinabot/brigade 1.11.1 β 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/dist/agents/tools/edge-tts.d.ts +44 -0
- package/dist/agents/tools/edge-tts.d.ts.map +1 -0
- package/dist/agents/tools/edge-tts.js +142 -0
- package/dist/agents/tools/edge-tts.js.map +1 -0
- package/dist/agents/tools/generate-music-tool.d.ts +61 -0
- package/dist/agents/tools/generate-music-tool.d.ts.map +1 -0
- package/dist/agents/tools/generate-music-tool.js +286 -0
- package/dist/agents/tools/generate-music-tool.js.map +1 -0
- package/dist/agents/tools/generate-speech-tool.d.ts +69 -0
- package/dist/agents/tools/generate-speech-tool.d.ts.map +1 -0
- package/dist/agents/tools/generate-speech-tool.js +331 -0
- package/dist/agents/tools/generate-speech-tool.js.map +1 -0
- package/dist/agents/tools/generate-video-tool.d.ts +111 -0
- package/dist/agents/tools/generate-video-tool.d.ts.map +1 -0
- package/dist/agents/tools/generate-video-tool.js +1028 -0
- package/dist/agents/tools/generate-video-tool.js.map +1 -0
- package/dist/agents/tools/media-command.d.ts +47 -0
- package/dist/agents/tools/media-command.d.ts.map +1 -0
- package/dist/agents/tools/media-command.js +93 -0
- package/dist/agents/tools/media-command.js.map +1 -0
- package/dist/agents/tools/registry.d.ts.map +1 -1
- package/dist/agents/tools/registry.js +27 -0
- package/dist/agents/tools/registry.js.map +1 -1
- package/dist/agents/tools/transcribe-audio-tool.d.ts +96 -0
- package/dist/agents/tools/transcribe-audio-tool.d.ts.map +1 -0
- package/dist/agents/tools/transcribe-audio-tool.js +577 -0
- package/dist/agents/tools/transcribe-audio-tool.js.map +1 -0
- package/dist/buildstamp.json +1 -1
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +4 -3
- package/dist/cli/commands/update.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -119,6 +119,7 @@ backend β all under one `~/.brigade/` directory you fully own.
|
|
|
119
119
|
| π
**Always-on** | Run as a headless WebSocket gateway with a crash supervisor, cron jobs, and OS service install. |
|
|
120
120
|
| π¬ **Channels** | Talk to your crew from WhatsApp and Telegram today; the adapter contract is built for more. |
|
|
121
121
|
| π **1,000+ connectors** | Gmail, Slack, GitHub, Notion, Calendar, Linearβ¦ via the built-in Composio tool. |
|
|
122
|
+
| π **Reads & writes documents** | Send a PDF, Office doc, image, audio, or video and your crew *understands* it β then have it **create or edit** Word, Excel, PowerPoint, and PDF files and hand them back. |
|
|
122
123
|
| π§© **MCP** | Expose your long-term memory to any MCP client (`brigade mcp`), or connect MCP servers in. |
|
|
123
124
|
| ποΈ **Your storage** | Default filesystem mode, or an optional **fully self-hosted Convex** backend with at-rest encryption. |
|
|
124
125
|
| π **Yours** | Everything lives under `~/.brigade/`. Keys are stored locally at mode `0600`. `rm -rf ~/.brigade` wipes it clean. |
|
|
@@ -283,6 +284,17 @@ to **PLATFORM**, open **Settings β API Keys**, and copy the `ak_β¦` key. Hand
|
|
|
283
284
|
Brigade once (*"set my Composio key to `ak_β¦`"*) and it's verified and stored encrypted.
|
|
284
285
|
Full guide: **[docs/composio.md](docs/composio.md)**.
|
|
285
286
|
|
|
287
|
+
### π Documents & media
|
|
288
|
+
Send your crew a file and it actually **reads** it: the `analyze_media` tool
|
|
289
|
+
understands PDFs (text or scanned), Word / Excel / PowerPoint (including the images
|
|
290
|
+
embedded inside them), plain images, audio, and video β auto-selecting the right
|
|
291
|
+
provider and caching the result. The write side, `make_document` and
|
|
292
|
+
`edit_document`, **creates and edits** Word / Excel / PowerPoint / PDF in place:
|
|
293
|
+
fill templates and form fields, set cells and formulas, add charts, and
|
|
294
|
+
merge / split / stamp / watermark PDFs β all with pure-JS libraries, no sandbox.
|
|
295
|
+
Drop a document into a channel and Brigade sees it; ask for a report and it hands
|
|
296
|
+
one back with `send_media`.
|
|
297
|
+
|
|
286
298
|
### π§© MCP
|
|
287
299
|
Run `brigade mcp` to expose your long-term memory to any MCP client (Claude Desktop,
|
|
288
300
|
editors, etc.) as add/search/context tools over stdio, owner-bound.
|
|
@@ -342,6 +354,7 @@ principle is the same: *independent verification, never the agent judging itself
|
|
|
342
354
|
| `brigade status` | Snapshot config, sessions, and gateway state (`--json`) |
|
|
343
355
|
| `brigade doctor` | Health-check Node, config, providers, prompts, logs, gateway (`--json`, `--strict`, `--gateway <url>`) |
|
|
344
356
|
| `brigade logs` | Tail today's gateway log (`--follow`) |
|
|
357
|
+
| `brigade update` Β· `upgrade` | Update Brigade to the latest code and restart the gateway (`--check`, `--no-restart`) |
|
|
345
358
|
|
|
346
359
|
### Gateway
|
|
347
360
|
|
|
@@ -351,6 +364,7 @@ principle is the same: *independent verification, never the agent judging itself
|
|
|
351
364
|
| `brigade gateway status` Β· `stop` Β· `restart` | Inspect / stop / restart the running gateway |
|
|
352
365
|
| `brigade gateway install` Β· `uninstall` | Install/remove as a system service (launchd / systemd / Task Scheduler) |
|
|
353
366
|
| `brigade gateway supervise` | Out-of-process crash watchdog (respawns a wedged gateway) |
|
|
367
|
+
| `brigade expose` Β· `expose stop` | Publish the gateway to the public internet via a secure, token-gated tunnel (alias: `bloody benchmark`) |
|
|
354
368
|
|
|
355
369
|
### Agents
|
|
356
370
|
|
|
@@ -485,6 +499,27 @@ brigade doctor --json # machine-readable
|
|
|
485
499
|
brigade doctor --strict # exit 1 on warnings (CI mode)
|
|
486
500
|
```
|
|
487
501
|
|
|
502
|
+
### `brigade update`
|
|
503
|
+
|
|
504
|
+
Bring Brigade up to date and reload the gateway. It auto-detects how Brigade is
|
|
505
|
+
installed and does the right thing:
|
|
506
|
+
|
|
507
|
+
- **npm global** β `npm i -g @spinabot/brigade@latest`, then restart.
|
|
508
|
+
- **source checkout** β `git pull` (fast-forward, only when your tree is clean and
|
|
509
|
+
behind upstream β a dirty tree is left untouched and rebuilt as-is), then
|
|
510
|
+
`npm install`, `npm run build`, then restart.
|
|
511
|
+
|
|
512
|
+
```bash
|
|
513
|
+
brigade update # update + restart the gateway
|
|
514
|
+
brigade upgrade # alias of update
|
|
515
|
+
brigade update --check # report whether newer code is available; change nothing
|
|
516
|
+
brigade update --no-restart
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
If Brigade is installed as a background service (`brigade gateway install`) the
|
|
520
|
+
restart is automatic; if you run the gateway in the foreground, restart it yourself
|
|
521
|
+
to load the new code. Same behavior on macOS, Linux, and Windows.
|
|
522
|
+
|
|
488
523
|
### `brigade config`
|
|
489
524
|
|
|
490
525
|
Read and write the local config without opening the TUI.
|
|
@@ -605,6 +640,7 @@ Every agent gets a curated toolset. Mutating/privileged tools are owner-gated
|
|
|
605
640
|
- **Web:** `web_search`, `fetch_url`, `browser` (when a provider is configured)
|
|
606
641
|
- **Connectors:** `composio` (1,000+ apps), `oauth_authorize`
|
|
607
642
|
- **Generation:** `generate_image`
|
|
643
|
+
- **Documents & media:** `analyze_media` (read/understand PDF Β· Office Β· image Β· audio Β· video), `make_document` Β· `edit_document` (create & edit Word/Excel/PowerPoint/PDF)
|
|
608
644
|
- **Channels:** `send_message`, `send_media` (when a channel is linked)
|
|
609
645
|
|
|
610
646
|
---
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Microsoft Edge "Read Aloud" text-to-speech over WebSocket β FREE, no API key.
|
|
3
|
+
*
|
|
4
|
+
* This is the same free endpoint the `node-edge-tts` package uses (the Bing /
|
|
5
|
+
* "Read Aloud" TTS WebSocket). Auth is an embedded TrustedClientToken plus a
|
|
6
|
+
* computed `Sec-MS-GEC` token (a SHA-256 of the current Windows file-time ticks,
|
|
7
|
+
* floored to 5 minutes, concatenated with the token). The socket sends a
|
|
8
|
+
* `speech.config` frame then an `ssml` frame; audio arrives as binary WS frames
|
|
9
|
+
* (after a `Path:audio` header) and the turn ends on a `Path:turn.end` text
|
|
10
|
+
* frame. Returns MP3 bytes.
|
|
11
|
+
*
|
|
12
|
+
* Re-implemented self-contained (no new dependency) over the `ws` package that
|
|
13
|
+
* Brigade already ships for the gateway/TUI.
|
|
14
|
+
*/
|
|
15
|
+
/** Minimal WebSocket surface edge-tts uses β lets tests inject a fake socket. */
|
|
16
|
+
export interface EdgeWebSocketLike {
|
|
17
|
+
on(event: "open" | "message" | "error" | "close", cb: (...args: unknown[]) => void): void;
|
|
18
|
+
send(data: string): void;
|
|
19
|
+
close(): void;
|
|
20
|
+
}
|
|
21
|
+
export interface EdgeTtsOptions {
|
|
22
|
+
text: string;
|
|
23
|
+
voice: string;
|
|
24
|
+
outputFormat?: string;
|
|
25
|
+
signal?: AbortSignal;
|
|
26
|
+
timeoutMs?: number;
|
|
27
|
+
/** Test seam: inject a WebSocket factory instead of opening a real socket. */
|
|
28
|
+
wsFactory?: (url: string, headers: Record<string, string>) => EdgeWebSocketLike;
|
|
29
|
+
}
|
|
30
|
+
/** Synthesize speech via the free Edge endpoint. Resolves with MP3 bytes. */
|
|
31
|
+
export declare function synthesizeEdge(opts: EdgeTtsOptions): Promise<Buffer>;
|
|
32
|
+
/**
|
|
33
|
+
* The `Sec-MS-GEC` auth token: uppercase SHA-256 hex of `${ticks}${TrustedClientToken}`,
|
|
34
|
+
* where `ticks` = Windows file time (100-ns intervals since 1601-01-01) floored to
|
|
35
|
+
* the nearest 5 minutes. BigInt math β the tick count exceeds Number.MAX_SAFE_INTEGER.
|
|
36
|
+
*/
|
|
37
|
+
export declare function secMsGec(nowMs?: number): string;
|
|
38
|
+
/** The opening `speech.config` frame carrying the requested output format. */
|
|
39
|
+
export declare function configFrame(outputFormat: string): string;
|
|
40
|
+
/** The `ssml` frame carrying the voice + escaped text. */
|
|
41
|
+
export declare function ssmlFrame(text: string, voice: string): string;
|
|
42
|
+
/** Minimal XML escaping for SSML text content. */
|
|
43
|
+
export declare function escapeXml(s: string): string;
|
|
44
|
+
//# sourceMappingURL=edge-tts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"edge-tts.d.ts","sourceRoot":"","sources":["../../../src/agents/tools/edge-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAYH,iFAAiF;AACjF,MAAM,WAAW,iBAAiB;IACjC,EAAE,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,GAAG,IAAI,CAAC;IAC1F,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,KAAK,IAAI,IAAI,CAAC;CACd;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8EAA8E;IAC9E,SAAS,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,iBAAiB,CAAC;CAChF;AAED,6EAA6E;AAC7E,wBAAsB,cAAc,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAsE1E;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,KAAK,GAAE,MAAmB,GAAG,MAAM,CAK3D;AAED,8EAA8E;AAC9E,wBAAgB,WAAW,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAYxD;AAED,0DAA0D;AAC1D,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAM7D;AAED,kDAAkD;AAClD,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAO3C"}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Microsoft Edge "Read Aloud" text-to-speech over WebSocket β FREE, no API key.
|
|
3
|
+
*
|
|
4
|
+
* This is the same free endpoint the `node-edge-tts` package uses (the Bing /
|
|
5
|
+
* "Read Aloud" TTS WebSocket). Auth is an embedded TrustedClientToken plus a
|
|
6
|
+
* computed `Sec-MS-GEC` token (a SHA-256 of the current Windows file-time ticks,
|
|
7
|
+
* floored to 5 minutes, concatenated with the token). The socket sends a
|
|
8
|
+
* `speech.config` frame then an `ssml` frame; audio arrives as binary WS frames
|
|
9
|
+
* (after a `Path:audio` header) and the turn ends on a `Path:turn.end` text
|
|
10
|
+
* frame. Returns MP3 bytes.
|
|
11
|
+
*
|
|
12
|
+
* Re-implemented self-contained (no new dependency) over the `ws` package that
|
|
13
|
+
* Brigade already ships for the gateway/TUI.
|
|
14
|
+
*/
|
|
15
|
+
import crypto from "node:crypto";
|
|
16
|
+
import { WebSocket } from "ws";
|
|
17
|
+
const TRUSTED_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
|
|
18
|
+
const GEC_VERSION = "1-131.0.2903.86";
|
|
19
|
+
const WSS_BASE = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1";
|
|
20
|
+
const CHROME_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0";
|
|
21
|
+
/** Synthesize speech via the free Edge endpoint. Resolves with MP3 bytes. */
|
|
22
|
+
export async function synthesizeEdge(opts) {
|
|
23
|
+
const outputFormat = opts.outputFormat ?? "audio-24khz-48kbitrate-mono-mp3";
|
|
24
|
+
const url = `${WSS_BASE}?TrustedClientToken=${TRUSTED_TOKEN}&Sec-MS-GEC=${secMsGec()}&Sec-MS-GEC-Version=${GEC_VERSION}`;
|
|
25
|
+
const headers = {
|
|
26
|
+
"User-Agent": CHROME_UA,
|
|
27
|
+
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
|
28
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
29
|
+
};
|
|
30
|
+
const ws = opts.wsFactory
|
|
31
|
+
? opts.wsFactory(url, headers)
|
|
32
|
+
: new WebSocket(url, { headers });
|
|
33
|
+
return await new Promise((resolve, reject) => {
|
|
34
|
+
const chunks = [];
|
|
35
|
+
let settled = false;
|
|
36
|
+
const timer = setTimeout(() => fail(new Error("Edge TTS timed out")), opts.timeoutMs ?? 30_000);
|
|
37
|
+
const onAbort = () => fail(new Error("aborted"));
|
|
38
|
+
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
|
39
|
+
const cleanup = () => {
|
|
40
|
+
clearTimeout(timer);
|
|
41
|
+
opts.signal?.removeEventListener("abort", onAbort);
|
|
42
|
+
try {
|
|
43
|
+
ws.close();
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
/* ignore */
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
function fail(err) {
|
|
50
|
+
if (settled)
|
|
51
|
+
return;
|
|
52
|
+
settled = true;
|
|
53
|
+
cleanup();
|
|
54
|
+
reject(err);
|
|
55
|
+
}
|
|
56
|
+
function done() {
|
|
57
|
+
if (settled)
|
|
58
|
+
return;
|
|
59
|
+
settled = true;
|
|
60
|
+
cleanup();
|
|
61
|
+
if (chunks.length === 0)
|
|
62
|
+
reject(new Error("Edge TTS produced no audio"));
|
|
63
|
+
else
|
|
64
|
+
resolve(Buffer.concat(chunks));
|
|
65
|
+
}
|
|
66
|
+
ws.on("open", () => {
|
|
67
|
+
ws.send(configFrame(outputFormat));
|
|
68
|
+
ws.send(ssmlFrame(opts.text, opts.voice));
|
|
69
|
+
});
|
|
70
|
+
ws.on("message", (...args) => {
|
|
71
|
+
const data = args[0];
|
|
72
|
+
const isBinary = args[1] === true;
|
|
73
|
+
const buf = Buffer.isBuffer(data)
|
|
74
|
+
? data
|
|
75
|
+
: data instanceof ArrayBuffer
|
|
76
|
+
? Buffer.from(data)
|
|
77
|
+
: Buffer.from(String(data), "utf8");
|
|
78
|
+
if (isBinary) {
|
|
79
|
+
// Binary frame: first 2 bytes = big-endian header length; audio follows.
|
|
80
|
+
if (buf.length < 2)
|
|
81
|
+
return;
|
|
82
|
+
const headerLen = buf.readUInt16BE(0);
|
|
83
|
+
const header = buf.subarray(2, 2 + headerLen).toString("utf8");
|
|
84
|
+
if (header.includes("Path:audio"))
|
|
85
|
+
chunks.push(buf.subarray(2 + headerLen));
|
|
86
|
+
}
|
|
87
|
+
else if (buf.toString("utf8").includes("Path:turn.end")) {
|
|
88
|
+
done();
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
ws.on("error", (...args) => {
|
|
92
|
+
const e = args[0];
|
|
93
|
+
fail(e instanceof Error ? e : new Error(String(e)));
|
|
94
|
+
});
|
|
95
|
+
ws.on("close", () => {
|
|
96
|
+
if (!settled)
|
|
97
|
+
done();
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* The `Sec-MS-GEC` auth token: uppercase SHA-256 hex of `${ticks}${TrustedClientToken}`,
|
|
103
|
+
* where `ticks` = Windows file time (100-ns intervals since 1601-01-01) floored to
|
|
104
|
+
* the nearest 5 minutes. BigInt math β the tick count exceeds Number.MAX_SAFE_INTEGER.
|
|
105
|
+
*/
|
|
106
|
+
export function secMsGec(nowMs = Date.now()) {
|
|
107
|
+
const secondsSince1601 = BigInt(Math.floor(nowMs / 1000) + 11_644_473_600);
|
|
108
|
+
const roundedSeconds = (secondsSince1601 / 300n) * 300n;
|
|
109
|
+
const ticks = roundedSeconds * 10000000n;
|
|
110
|
+
return crypto.createHash("sha256").update(`${ticks}${TRUSTED_TOKEN}`).digest("hex").toUpperCase();
|
|
111
|
+
}
|
|
112
|
+
/** The opening `speech.config` frame carrying the requested output format. */
|
|
113
|
+
export function configFrame(outputFormat) {
|
|
114
|
+
const cfg = {
|
|
115
|
+
context: {
|
|
116
|
+
synthesis: {
|
|
117
|
+
audio: {
|
|
118
|
+
metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: false },
|
|
119
|
+
outputFormat,
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
return `X-Timestamp:${new Date().toString()}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n${JSON.stringify(cfg)}`;
|
|
125
|
+
}
|
|
126
|
+
/** The `ssml` frame carrying the voice + escaped text. */
|
|
127
|
+
export function ssmlFrame(text, voice) {
|
|
128
|
+
const id = crypto.randomUUID().replace(/-/g, "");
|
|
129
|
+
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>` +
|
|
130
|
+
`<voice name='${voice}'><prosody pitch='+0Hz' rate='+0%' volume='+0%'>${escapeXml(text)}</prosody></voice></speak>`;
|
|
131
|
+
return `X-RequestId:${id}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${new Date().toString()}Z\r\nPath:ssml\r\n\r\n${ssml}`;
|
|
132
|
+
}
|
|
133
|
+
/** Minimal XML escaping for SSML text content. */
|
|
134
|
+
export function escapeXml(s) {
|
|
135
|
+
return s
|
|
136
|
+
.replace(/&/g, "&")
|
|
137
|
+
.replace(/</g, "<")
|
|
138
|
+
.replace(/>/g, ">")
|
|
139
|
+
.replace(/'/g, "'")
|
|
140
|
+
.replace(/"/g, """);
|
|
141
|
+
}
|
|
142
|
+
//# sourceMappingURL=edge-tts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"edge-tts.js","sourceRoot":"","sources":["../../../src/agents/tools/edge-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAE/B,MAAM,aAAa,GAAG,kCAAkC,CAAC;AACzD,MAAM,WAAW,GAAG,iBAAiB,CAAC;AACtC,MAAM,QAAQ,GAAG,6EAA6E,CAAC;AAC/F,MAAM,SAAS,GACd,+HAA+H,CAAC;AAmBjI,6EAA6E;AAC7E,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAoB;IACxD,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,iCAAiC,CAAC;IAC5E,MAAM,GAAG,GAAG,GAAG,QAAQ,uBAAuB,aAAa,eAAe,QAAQ,EAAE,uBAAuB,WAAW,EAAE,CAAC;IACzH,MAAM,OAAO,GAA2B;QACvC,YAAY,EAAE,SAAS;QACvB,MAAM,EAAE,qDAAqD;QAC7D,iBAAiB,EAAE,gBAAgB;KACnC,CAAC;IACF,MAAM,EAAE,GAAsB,IAAI,CAAC,SAAS;QAC3C,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC;QAC9B,CAAC,CAAE,IAAI,SAAS,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAkC,CAAC;IAErE,OAAO,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACpD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,CAAC;QAChG,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;QACjD,IAAI,CAAC,MAAM,EAAE,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,GAAG,EAAE;YACpB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACnD,IAAI,CAAC;gBACJ,EAAE,CAAC,KAAK,EAAE,CAAC;YACZ,CAAC;YAAC,MAAM,CAAC;gBACR,YAAY;YACb,CAAC;QACF,CAAC,CAAC;QACF,SAAS,IAAI,CAAC,GAAU;YACvB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,GAAG,CAAC,CAAC;QACb,CAAC;QACD,SAAS,IAAI;YACZ,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,EAAE,CAAC;YACV,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM,CAAC,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC,CAAC;;gBACpE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACrC,CAAC;QACD,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;YAClB,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC,CAAC;YACnC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAG,IAAe,EAAE,EAAE;YACvC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;YAClC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAChC,CAAC,CAAC,IAAI;gBACN,CAAC,CAAC,IAAI,YAAY,WAAW;oBAC5B,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;oBACnB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC;YACtC,IAAI,QAAQ,EAAE,CAAC;gBACd,yEAAyE;gBACzE,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;oBAAE,OAAO;gBAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;gBACtC,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC/D,IAAI,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;YAC7E,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBAC3D,IAAI,EAAE,CAAC;YACR,CAAC;QACF,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,IAAe,EAAE,EAAE;YACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,IAAI,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,IAAI,CAAC,OAAO;gBAAE,IAAI,EAAE,CAAC;QACtB,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,QAAQ,CAAC,QAAgB,IAAI,CAAC,GAAG,EAAE;IAClD,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC;IAC3E,MAAM,cAAc,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;IACxD,MAAM,KAAK,GAAG,cAAc,GAAG,SAAW,CAAC;IAC3C,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,GAAG,aAAa,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;AACnG,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,WAAW,CAAC,YAAoB;IAC/C,MAAM,GAAG,GAAG;QACX,OAAO,EAAE;YACR,SAAS,EAAE;gBACV,KAAK,EAAE;oBACN,eAAe,EAAE,EAAE,uBAAuB,EAAE,KAAK,EAAE,mBAAmB,EAAE,KAAK,EAAE;oBAC/E,YAAY;iBACZ;aACD;SACD;KACD,CAAC;IACF,OAAO,eAAe,IAAI,IAAI,EAAE,CAAC,QAAQ,EAAE,iFAAiF,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;AACnJ,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,KAAa;IACpD,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,MAAM,IAAI,GACT,oFAAoF;QACpF,gBAAgB,KAAK,mDAAmD,SAAS,CAAC,IAAI,CAAC,4BAA4B,CAAC;IACrH,OAAO,eAAe,EAAE,wDAAwD,IAAI,IAAI,EAAE,CAAC,QAAQ,EAAE,yBAAyB,IAAI,EAAE,CAAC;AACtI,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,SAAS,CAAC,CAAS;IAClC,OAAO,CAAC;SACN,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC3B,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `generate_music` tool β text-to-music generation, modeled on the proven
|
|
3
|
+
* `generate_speech` self-contained pattern.
|
|
4
|
+
*
|
|
5
|
+
* Why this tool exists
|
|
6
|
+
* --------------------
|
|
7
|
+
* Same reasoning as `generate_speech`/`generate_image`: without a first-class
|
|
8
|
+
* tool, "make a song" / "compose background music" sends the model to raw
|
|
9
|
+
* `curl` against a music API β the key flows through a shell, the (binary or
|
|
10
|
+
* base64) audio response gets mangled by a text-only parser, and a billed
|
|
11
|
+
* generation is dropped. This tool owns the call in-process: stored auth,
|
|
12
|
+
* validated params, a parser that understands each provider's audio shape, and
|
|
13
|
+
* a saved file the model hands to `send_media`.
|
|
14
|
+
*
|
|
15
|
+
* Providers (auto-selected by which key is configured, preference order):
|
|
16
|
+
* β’ google β Lyria via Gemini generateContent (AUDIO modality) β base64
|
|
17
|
+
* audio (mp3). Single POST, no poll.
|
|
18
|
+
* β’ minimax β Music generation β URL or inline (hex/base64) audio (mp3).
|
|
19
|
+
* β’ elevenlabs β Music endpoint β raw mp3 bytes.
|
|
20
|
+
* Keys resolve through `resolveMediaProviderKey` (the same credential-store +
|
|
21
|
+
* env path the media-understanding subsystem uses), so music generation works
|
|
22
|
+
* for whichever provider the operator already configured β no bespoke auth.
|
|
23
|
+
*
|
|
24
|
+
* Flow: generate β bytes saved under `<cache>/audio/` β result text carries a
|
|
25
|
+
* `MEDIA:<saved-path>` line β the model delivers with `send_media({path})`.
|
|
26
|
+
*/
|
|
27
|
+
import { Type } from "typebox";
|
|
28
|
+
import type { BrigadeTool } from "./types.js";
|
|
29
|
+
type MusicProviderId = "google" | "minimax" | "elevenlabs";
|
|
30
|
+
declare const GenerateMusicParams: Type.TObject<{
|
|
31
|
+
action: Type.TOptional<Type.TUnion<[Type.TLiteral<"generate">, Type.TLiteral<"list">]>>;
|
|
32
|
+
prompt: Type.TOptional<Type.TString>;
|
|
33
|
+
lyrics: Type.TOptional<Type.TString>;
|
|
34
|
+
instrumental: Type.TOptional<Type.TBoolean>;
|
|
35
|
+
provider: Type.TOptional<Type.TUnion<[Type.TLiteral<"google">, Type.TLiteral<"minimax">, Type.TLiteral<"elevenlabs">]>>;
|
|
36
|
+
model: Type.TOptional<Type.TString>;
|
|
37
|
+
durationSeconds: Type.TOptional<Type.TInteger>;
|
|
38
|
+
filename: Type.TOptional<Type.TString>;
|
|
39
|
+
}>;
|
|
40
|
+
interface GenerateMusicDetails {
|
|
41
|
+
action: "generate" | "list";
|
|
42
|
+
provider?: string;
|
|
43
|
+
model?: string;
|
|
44
|
+
path?: string;
|
|
45
|
+
providers?: string[];
|
|
46
|
+
ok: boolean;
|
|
47
|
+
message?: string;
|
|
48
|
+
}
|
|
49
|
+
export interface MakeGenerateMusicToolOptions {
|
|
50
|
+
/** Caller's agent id β drives which credential store backs the key. */
|
|
51
|
+
agentId?: string;
|
|
52
|
+
/** Test seam: replaces global fetch. */
|
|
53
|
+
fetchFn?: typeof fetch;
|
|
54
|
+
/** Test seam: output directory override. Default `<cache>/audio`. */
|
|
55
|
+
outDirOverride?: string;
|
|
56
|
+
/** Test seam: per-provider API-key resolver override. */
|
|
57
|
+
resolveKey?: (provider: MusicProviderId) => string;
|
|
58
|
+
}
|
|
59
|
+
export declare function makeGenerateMusicTool(opts?: MakeGenerateMusicToolOptions): BrigadeTool<typeof GenerateMusicParams, GenerateMusicDetails>;
|
|
60
|
+
export {};
|
|
61
|
+
//# sourceMappingURL=generate-music-tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generate-music-tool.d.ts","sourceRoot":"","sources":["../../../src/agents/tools/generate-music-tool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAKH,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAM/B,OAAO,KAAK,EAAmB,WAAW,EAAE,MAAM,YAAY,CAAC;AAO/D,KAAK,eAAe,GAAG,QAAQ,GAAG,SAAS,GAAG,YAAY,CAAC;AAW3D,QAAA,MAAM,mBAAmB;;;;;;;;;EAwBvB,CAAC;AAEH,UAAU,oBAAoB;IAC7B,MAAM,EAAE,UAAU,GAAG,MAAM,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,EAAE,EAAE,OAAO,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,4BAA4B;IAC5C,uEAAuE;IACvE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wCAAwC;IACxC,OAAO,CAAC,EAAE,OAAO,KAAK,CAAC;IACvB,qEAAqE;IACrE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,yDAAyD;IACzD,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,eAAe,KAAK,MAAM,CAAC;CACnD;AAED,wBAAgB,qBAAqB,CACpC,IAAI,GAAE,4BAAiC,GACrC,WAAW,CAAC,OAAO,mBAAmB,EAAE,oBAAoB,CAAC,CAwG/D"}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `generate_music` tool β text-to-music generation, modeled on the proven
|
|
3
|
+
* `generate_speech` self-contained pattern.
|
|
4
|
+
*
|
|
5
|
+
* Why this tool exists
|
|
6
|
+
* --------------------
|
|
7
|
+
* Same reasoning as `generate_speech`/`generate_image`: without a first-class
|
|
8
|
+
* tool, "make a song" / "compose background music" sends the model to raw
|
|
9
|
+
* `curl` against a music API β the key flows through a shell, the (binary or
|
|
10
|
+
* base64) audio response gets mangled by a text-only parser, and a billed
|
|
11
|
+
* generation is dropped. This tool owns the call in-process: stored auth,
|
|
12
|
+
* validated params, a parser that understands each provider's audio shape, and
|
|
13
|
+
* a saved file the model hands to `send_media`.
|
|
14
|
+
*
|
|
15
|
+
* Providers (auto-selected by which key is configured, preference order):
|
|
16
|
+
* β’ google β Lyria via Gemini generateContent (AUDIO modality) β base64
|
|
17
|
+
* audio (mp3). Single POST, no poll.
|
|
18
|
+
* β’ minimax β Music generation β URL or inline (hex/base64) audio (mp3).
|
|
19
|
+
* β’ elevenlabs β Music endpoint β raw mp3 bytes.
|
|
20
|
+
* Keys resolve through `resolveMediaProviderKey` (the same credential-store +
|
|
21
|
+
* env path the media-understanding subsystem uses), so music generation works
|
|
22
|
+
* for whichever provider the operator already configured β no bespoke auth.
|
|
23
|
+
*
|
|
24
|
+
* Flow: generate β bytes saved under `<cache>/audio/` β result text carries a
|
|
25
|
+
* `MEDIA:<saved-path>` line β the model delivers with `send_media({path})`.
|
|
26
|
+
*/
|
|
27
|
+
import fs from "node:fs";
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
import { Type } from "typebox";
|
|
30
|
+
import { resolveCacheDir, DEFAULT_AGENT_ID } from "../../config/paths.js";
|
|
31
|
+
import { loadConfig } from "../../core/config.js";
|
|
32
|
+
import { resolveMediaProviderKey } from "../media-understanding/config.js";
|
|
33
|
+
import { jsonResult } from "./common.js";
|
|
34
|
+
/** Music generation can take a while; bound each HTTP call generously. */
|
|
35
|
+
const REQUEST_TIMEOUT_MS = 180_000;
|
|
36
|
+
/** Hard cap on prompt length β providers reject very long prompts; fail clearly. */
|
|
37
|
+
const MAX_PROMPT_CHARS = 8_000;
|
|
38
|
+
/** Preference order when no provider is pinned: first keyed one wins. */
|
|
39
|
+
const PROVIDER_PREFERENCE = ["google", "minimax", "elevenlabs"];
|
|
40
|
+
const DEFAULTS = {
|
|
41
|
+
google: { model: "lyria-3-clip-preview" },
|
|
42
|
+
minimax: { model: "music-2.5+" },
|
|
43
|
+
elevenlabs: { model: "" },
|
|
44
|
+
};
|
|
45
|
+
const GenerateMusicParams = Type.Object({
|
|
46
|
+
action: Type.Optional(Type.Union([Type.Literal("generate"), Type.Literal("list")], {
|
|
47
|
+
description: 'Optional: "generate" (default) or "list" to see which music providers are configured.',
|
|
48
|
+
})),
|
|
49
|
+
prompt: Type.Optional(Type.String({ description: "The style/description of the music to generate (e.g. 'upbeat lo-fi hip hop with mellow piano')." })),
|
|
50
|
+
lyrics: Type.Optional(Type.String({ description: "Optional lyrics for vocal tracks (ignored when instrumental)." })),
|
|
51
|
+
instrumental: Type.Optional(Type.Boolean({ description: "Optional: when true, generate instrumental-only (no vocals)." })),
|
|
52
|
+
provider: Type.Optional(Type.Union([Type.Literal("google"), Type.Literal("minimax"), Type.Literal("elevenlabs")], { description: "Optional music provider override. Default: the first one with a configured key." })),
|
|
53
|
+
model: Type.Optional(Type.String({ description: "Optional model override for the chosen provider." })),
|
|
54
|
+
durationSeconds: Type.Optional(Type.Integer({ description: "Optional target length in seconds (where the provider supports it, e.g. ElevenLabs)." })),
|
|
55
|
+
filename: Type.Optional(Type.String({ description: "Optional output filename hint (basename preserved, saved under the managed audio dir)." })),
|
|
56
|
+
});
|
|
57
|
+
export function makeGenerateMusicTool(opts = {}) {
|
|
58
|
+
const agentId = opts.agentId ?? DEFAULT_AGENT_ID;
|
|
59
|
+
const fetchFn = opts.fetchFn ?? fetch;
|
|
60
|
+
const resolveKey = opts.resolveKey ?? ((p) => resolveMediaProviderKey(p, agentId));
|
|
61
|
+
return {
|
|
62
|
+
name: "generate_music",
|
|
63
|
+
label: "Generate Music",
|
|
64
|
+
displaySummary: "generating music",
|
|
65
|
+
// Billed per call (cloud music generation) β owner-gated like generate_speech.
|
|
66
|
+
ownerOnly: true,
|
|
67
|
+
description: [
|
|
68
|
+
"Generate music from a text description (text-to-music). USE THIS β never call a music API with bash/curl: the key must not flow through a shell, and the binary/base64 audio response is parsed here.",
|
|
69
|
+
'action="generate" (default): requires `prompt` (the style/description). Saves an audio file and returns its REAL path as a `MEDIA:<path>` line β reference that path exactly; never invent one.',
|
|
70
|
+
"Optional `lyrics`, `instrumental`, `durationSeconds`. Auto-selects the first configured provider (Google β MiniMax β ElevenLabs); override with `provider`/`model`.",
|
|
71
|
+
"To play it for the operator on a chat surface, follow up with `send_media({path})` β generation does NOT auto-send.",
|
|
72
|
+
'action="list": show which music providers have a configured key.',
|
|
73
|
+
].join(" "),
|
|
74
|
+
parameters: GenerateMusicParams,
|
|
75
|
+
execute: async (_id, args, signal) => {
|
|
76
|
+
const action = args.action ?? "generate";
|
|
77
|
+
if (action === "list") {
|
|
78
|
+
const providers = PROVIDER_PREFERENCE.filter((p) => resolveKey(p).length > 0);
|
|
79
|
+
return jsonResult({
|
|
80
|
+
action,
|
|
81
|
+
providers,
|
|
82
|
+
ok: true,
|
|
83
|
+
message: providers.length > 0
|
|
84
|
+
? `${providers.length} music provider(s) configured: ${providers.join(", ")}.`
|
|
85
|
+
: "No music provider configured. Add a Google, MiniMax, or ElevenLabs key with `brigade onboard`.",
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
const prompt = (args.prompt ?? "").trim();
|
|
89
|
+
if (!prompt) {
|
|
90
|
+
return fail(action, "`prompt` is required for action=generate.");
|
|
91
|
+
}
|
|
92
|
+
if (prompt.length > MAX_PROMPT_CHARS) {
|
|
93
|
+
return fail(action, `\`prompt\` is too long (${prompt.length} chars; max ${MAX_PROMPT_CHARS}). Shorten it.`);
|
|
94
|
+
}
|
|
95
|
+
const instrumental = args.instrumental === true;
|
|
96
|
+
const lyrics = args.lyrics?.trim() || undefined;
|
|
97
|
+
// Instrumental + lyrics is contradictory β refuse rather than silently drop one.
|
|
98
|
+
if (instrumental && lyrics) {
|
|
99
|
+
return fail(action, "`instrumental` and `lyrics` cannot both be set β pick one (instrumental = no vocals).");
|
|
100
|
+
}
|
|
101
|
+
// Resolve the provider: explicit override (must be keyed) else first keyed.
|
|
102
|
+
let provider;
|
|
103
|
+
if (args.provider) {
|
|
104
|
+
if (resolveKey(args.provider).length === 0) {
|
|
105
|
+
return fail(action, `Provider "${args.provider}" has no configured key. Add one with \`brigade onboard\`, or omit \`provider\` to auto-select.`);
|
|
106
|
+
}
|
|
107
|
+
provider = args.provider;
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
provider = PROVIDER_PREFERENCE.find((p) => resolveKey(p).length > 0);
|
|
111
|
+
}
|
|
112
|
+
if (!provider) {
|
|
113
|
+
return fail(action, "No music provider is configured. Add a Google, MiniMax, or ElevenLabs API key with `brigade onboard` (then this tool auto-selects it).");
|
|
114
|
+
}
|
|
115
|
+
const apiKey = resolveKey(provider);
|
|
116
|
+
const model = args.model?.trim() || resolveConfiguredModel(provider) || DEFAULTS[provider].model;
|
|
117
|
+
const durationSeconds = typeof args.durationSeconds === "number" && Number.isFinite(args.durationSeconds) && args.durationSeconds > 0
|
|
118
|
+
? Math.trunc(args.durationSeconds)
|
|
119
|
+
: undefined;
|
|
120
|
+
let audio;
|
|
121
|
+
try {
|
|
122
|
+
audio = await generate({ provider, fetchFn, apiKey, model, prompt, lyrics, instrumental, durationSeconds, signal });
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
return fail(action, `Music generation via ${provider} failed: ${err instanceof Error ? err.message : String(err)}`, {
|
|
126
|
+
provider,
|
|
127
|
+
model,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
const outDir = opts.outDirOverride ?? path.join(resolveCacheDir(), "audio");
|
|
131
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
132
|
+
const outPath = path.join(outDir, buildFileName(args.filename, audio.extension));
|
|
133
|
+
fs.writeFileSync(outPath, audio.bytes);
|
|
134
|
+
return {
|
|
135
|
+
content: [
|
|
136
|
+
{
|
|
137
|
+
type: "text",
|
|
138
|
+
text: [
|
|
139
|
+
`Generated music with ${model ? `${provider}/${model}` : provider}.`,
|
|
140
|
+
`MEDIA:${outPath}`,
|
|
141
|
+
"Deliver with send_media({path}) β generation does not auto-send.",
|
|
142
|
+
].join("\n"),
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
details: { action, provider, model, path: outPath, ok: true },
|
|
146
|
+
};
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
async function generate(params) {
|
|
151
|
+
switch (params.provider) {
|
|
152
|
+
case "google":
|
|
153
|
+
return generateGoogle(params);
|
|
154
|
+
case "minimax":
|
|
155
|
+
return generateMiniMax(params);
|
|
156
|
+
case "elevenlabs":
|
|
157
|
+
return generateElevenLabs(params);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async function generateGoogle(p) {
|
|
161
|
+
// Lyria via Gemini generateContent. Assemble the textual prompt with the
|
|
162
|
+
// instrumental hint and lyrics folded in (the API takes a single text part).
|
|
163
|
+
let text = p.prompt;
|
|
164
|
+
if (p.instrumental)
|
|
165
|
+
text += "\n\nInstrumental only. No vocals.";
|
|
166
|
+
if (p.lyrics)
|
|
167
|
+
text += `\n\nLyrics:\n${p.lyrics}`;
|
|
168
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(p.model)}:generateContent?key=${encodeURIComponent(p.apiKey)}`;
|
|
169
|
+
const res = await p.fetchFn(url, {
|
|
170
|
+
method: "POST",
|
|
171
|
+
headers: { "Content-Type": "application/json" },
|
|
172
|
+
body: JSON.stringify({
|
|
173
|
+
contents: [{ parts: [{ text }] }],
|
|
174
|
+
generationConfig: { responseModalities: ["AUDIO", "TEXT"] },
|
|
175
|
+
}),
|
|
176
|
+
signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
|
|
177
|
+
});
|
|
178
|
+
if (!res.ok)
|
|
179
|
+
throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
|
|
180
|
+
const body = (await res.json());
|
|
181
|
+
const part = body.candidates?.[0]?.content?.parts?.find((x) => x.inlineData?.data);
|
|
182
|
+
const data = part?.inlineData?.data;
|
|
183
|
+
if (!data)
|
|
184
|
+
throw new Error("Lyria returned no audio data.");
|
|
185
|
+
const bytes = Buffer.from(data, "base64");
|
|
186
|
+
// Lyria returns mp3 by default. (If the mimeType ever reports raw PCM/L16 the
|
|
187
|
+
// bytes won't be playable as-is, but the documented default is mp3.)
|
|
188
|
+
return { bytes, extension: "mp3" };
|
|
189
|
+
}
|
|
190
|
+
async function generateMiniMax(p) {
|
|
191
|
+
const reqBody = {
|
|
192
|
+
model: p.model || "music-2.5+",
|
|
193
|
+
prompt: p.prompt,
|
|
194
|
+
output_format: "url",
|
|
195
|
+
audio_setting: { sample_rate: 44100, bitrate: 256000, format: "mp3" },
|
|
196
|
+
};
|
|
197
|
+
if (p.instrumental)
|
|
198
|
+
reqBody.is_instrumental = true;
|
|
199
|
+
if (p.lyrics)
|
|
200
|
+
reqBody.lyrics = p.lyrics;
|
|
201
|
+
const res = await p.fetchFn("https://api.minimax.io/v1/music_generation", {
|
|
202
|
+
method: "POST",
|
|
203
|
+
headers: { Authorization: `Bearer ${p.apiKey}`, "Content-Type": "application/json" },
|
|
204
|
+
body: JSON.stringify(reqBody),
|
|
205
|
+
signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
|
|
206
|
+
});
|
|
207
|
+
if (!res.ok)
|
|
208
|
+
throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
|
|
209
|
+
const body = (await res.json());
|
|
210
|
+
if (body.base_resp && body.base_resp.status_code !== 0) {
|
|
211
|
+
throw new Error(`MiniMax error ${body.base_resp.status_code}: ${body.base_resp.status_msg ?? ""}`);
|
|
212
|
+
}
|
|
213
|
+
// Prefer an explicit URL field; otherwise the audio field may be a URL or inline.
|
|
214
|
+
const urlValue = body.data?.audio_url ?? body.audio_url;
|
|
215
|
+
const audioValue = body.data?.audio ?? body.audio;
|
|
216
|
+
const candidate = urlValue ?? audioValue;
|
|
217
|
+
if (!candidate)
|
|
218
|
+
throw new Error("MiniMax returned no audio.");
|
|
219
|
+
if (/^https?:\/\//.test(candidate)) {
|
|
220
|
+
const bytes = await downloadBytes(p.fetchFn, candidate, p.signal);
|
|
221
|
+
return { bytes, extension: "mp3" };
|
|
222
|
+
}
|
|
223
|
+
// Inline: hex (only [0-9a-f], even length) β hex; else base64.
|
|
224
|
+
const isHex = candidate.length % 2 === 0 && /^[0-9a-f]+$/.test(candidate);
|
|
225
|
+
const bytes = Buffer.from(candidate, isHex ? "hex" : "base64");
|
|
226
|
+
return { bytes, extension: "mp3" };
|
|
227
|
+
}
|
|
228
|
+
async function generateElevenLabs(p) {
|
|
229
|
+
const reqBody = { prompt: p.prompt };
|
|
230
|
+
if (p.durationSeconds)
|
|
231
|
+
reqBody.music_length_ms = p.durationSeconds * 1000;
|
|
232
|
+
const res = await p.fetchFn("https://api.elevenlabs.io/v1/music", {
|
|
233
|
+
method: "POST",
|
|
234
|
+
headers: { "xi-api-key": p.apiKey, "Content-Type": "application/json", Accept: "audio/mpeg" },
|
|
235
|
+
body: JSON.stringify(reqBody),
|
|
236
|
+
signal: withTimeout(p.signal, REQUEST_TIMEOUT_MS),
|
|
237
|
+
});
|
|
238
|
+
if (!res.ok)
|
|
239
|
+
throw new Error(`HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
|
|
240
|
+
return { bytes: Buffer.from(await res.arrayBuffer()), extension: "mp3" };
|
|
241
|
+
}
|
|
242
|
+
/* βββββββββββββββββββββββββ helpers βββββββββββββββββββββββββ */
|
|
243
|
+
async function downloadBytes(fetchFn, url, signal) {
|
|
244
|
+
const res = await fetchFn(url, { method: "GET", signal: withTimeout(signal, REQUEST_TIMEOUT_MS) });
|
|
245
|
+
if (!res.ok)
|
|
246
|
+
throw new Error(`download HTTP ${res.status} ${(await safeText(res)).slice(0, 200)}`);
|
|
247
|
+
return Buffer.from(await res.arrayBuffer());
|
|
248
|
+
}
|
|
249
|
+
function resolveConfiguredModel(provider) {
|
|
250
|
+
try {
|
|
251
|
+
const cfg = loadConfig();
|
|
252
|
+
const m = cfg.tools?.music?.models?.[provider];
|
|
253
|
+
if (typeof m === "string" && m.trim())
|
|
254
|
+
return m.trim();
|
|
255
|
+
}
|
|
256
|
+
catch {
|
|
257
|
+
/* default below */
|
|
258
|
+
}
|
|
259
|
+
return undefined;
|
|
260
|
+
}
|
|
261
|
+
function buildFileName(hint, extension) {
|
|
262
|
+
const stamp = Date.now().toString(36);
|
|
263
|
+
const base = hint
|
|
264
|
+
? path.basename(hint).replace(/\.[a-z0-9]+$/i, "").replace(/[^a-zA-Z0-9._-]/g, "_").slice(0, 48)
|
|
265
|
+
: `music-${stamp}`;
|
|
266
|
+
return `${base}.${extension}`;
|
|
267
|
+
}
|
|
268
|
+
function fail(action, message, extra = {}) {
|
|
269
|
+
return jsonResult({ action, ok: false, message, ...extra });
|
|
270
|
+
}
|
|
271
|
+
async function safeText(res) {
|
|
272
|
+
try {
|
|
273
|
+
return await res.text();
|
|
274
|
+
}
|
|
275
|
+
catch {
|
|
276
|
+
return "";
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/** Compose the caller's signal with a hard per-request timeout. */
|
|
280
|
+
function withTimeout(signal, ms) {
|
|
281
|
+
const timeoutSignal = AbortSignal.timeout(ms);
|
|
282
|
+
if (!signal)
|
|
283
|
+
return timeoutSignal;
|
|
284
|
+
return AbortSignal.any([signal, timeoutSignal]);
|
|
285
|
+
}
|
|
286
|
+
//# sourceMappingURL=generate-music-tool.js.map
|