@ziplayer/plugin 0.1.33 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/TTSPlugin.ts CHANGED
@@ -1,361 +1,361 @@
1
- import { BasePlugin, Track, SearchResult, StreamInfo } from "ziplayer";
2
- import { Readable } from "stream";
3
- import { getTTSUrls } from "@zibot/zitts";
4
- import axios from "axios";
5
-
6
- /**
7
- * Configuration options for the TTSPlugin.
8
- */
9
- export interface TTSPluginOptions {
10
- /** Default language code for TTS (e.g., "vi", "en", "en-US") */
11
- defaultLang?: string;
12
- /** Whether to use slow speech rate */
13
- slow?: boolean;
14
- /**
15
- * Optional custom TTS hook. If provided, it will be used to
16
- * create the audio stream for the given text instead of the
17
- * built-in Google TTS wrapper.
18
- *
19
- * @param text - The text to convert to speech
20
- * @param ctx - Context information including language, speed, and track
21
- * @returns One of:
22
- * - Node Readable (preferred)
23
- * - HTTP(S) URL string or URL object
24
- * - Buffer / Uint8Array / ArrayBuffer
25
- * - Or an object with { stream, type } | { url, type }
26
- */
27
- createStream?: (
28
- text: string,
29
- ctx?: { lang: string; slow: boolean; track?: Track },
30
- ) =>
31
- | Promise<Readable | string | URL | Buffer | Uint8Array | ArrayBuffer>
32
- | Readable
33
- | string
34
- | URL
35
- | Buffer
36
- | Uint8Array
37
- | ArrayBuffer;
38
- }
39
-
40
- /**
41
- * Internal configuration for TTS processing.
42
- */
43
- interface TTSConfig {
44
- /** The text to convert to speech */
45
- text: string;
46
- /** The language code for TTS */
47
- lang: string;
48
- /** Whether to use slow speech rate */
49
- slow: boolean;
50
- }
51
-
52
- /**
53
- * A plugin for Text-to-Speech (TTS) functionality.
54
- *
55
- * This plugin provides support for:
56
- * - Converting text to speech using Google TTS
57
- * - Custom TTS providers via the createStream hook
58
- * - Multiple language support
59
- * - Configurable speech rate (normal/slow)
60
- * - TTS query parsing with language and speed options
61
- *
62
- * @example
63
- * const ttsPlugin = new TTSPlugin({
64
- * defaultLang: "en",
65
- * slow: false
66
- * });
67
- *
68
- * // Add to PlayerManager
69
- * const manager = new PlayerManager({
70
- * plugins: [ttsPlugin]
71
- * });
72
- *
73
- * // Search for TTS content
74
- * const result = await ttsPlugin.search("tts:Hello world", "user123");
75
- * const stream = await ttsPlugin.getStream(result.tracks[0]);
76
- *
77
- * @example
78
- * // Custom TTS provider
79
- * const customTTSPlugin = new TTSPlugin({
80
- * defaultLang: "en",
81
- * createStream: async (text, ctx) => {
82
- * // Custom TTS implementation
83
- * return customTTSProvider.synthesize(text, ctx.lang);
84
- * }
85
- * });
86
- *
87
- * @since 1.0.0
88
- */
89
- export class TTSPlugin extends BasePlugin {
90
- name = "tts";
91
- version = "1.0.0";
92
- private opts: { defaultLang: string; slow: boolean; createStream?: TTSPluginOptions["createStream"] };
93
-
94
- /**
95
- * Creates a new TTSPlugin instance.
96
- *
97
- * @param opts - Configuration options for the TTS plugin
98
- * @param opts.defaultLang - Default language code for TTS (default: "vi")
99
- * @param opts.slow - Whether to use slow speech rate (default: false)
100
- * @param opts.createStream - Optional custom TTS provider function
101
- *
102
- * @example
103
- * // Basic TTS with Vietnamese as default
104
- * const ttsPlugin = new TTSPlugin();
105
- *
106
- * // TTS with English as default and slow speech
107
- * const slowTTSPlugin = new TTSPlugin({
108
- * defaultLang: "en",
109
- * slow: true
110
- * });
111
- *
112
- * // TTS with custom provider
113
- * const customTTSPlugin = new TTSPlugin({
114
- * defaultLang: "en",
115
- * createStream: async (text, ctx) => {
116
- * return await myCustomTTS.synthesize(text, ctx.lang);
117
- * }
118
- * });
119
- */
120
- constructor(opts?: TTSPluginOptions) {
121
- super();
122
- this.opts = {
123
- defaultLang: opts?.defaultLang || "vi",
124
- slow: !!opts?.slow,
125
- createStream: opts?.createStream,
126
- };
127
- }
128
-
129
- /**
130
- * Determines if this plugin can handle the given query.
131
- *
132
- * @param query - The search query to check
133
- * @returns `true` if the query starts with "tts:" or "say ", `false` otherwise
134
- *
135
- * @example
136
- * plugin.canHandle("tts:Hello world"); // true
137
- * plugin.canHandle("say Hello world"); // true
138
- * plugin.canHandle("youtube.com/watch?v=123"); // false
139
- */
140
- canHandle(query: string): boolean {
141
- if (!query) return false;
142
- const q = query.trim().toLowerCase();
143
- return q.startsWith("tts:") || q.startsWith("say ");
144
- }
145
-
146
- /**
147
- * Creates a TTS track from the given query.
148
- *
149
- * This method parses TTS queries and creates a track that can be played as audio.
150
- * It supports various query formats including language and speed specifications.
151
- *
152
- * @param query - The TTS query to process
153
- * @param requestedBy - The user ID who requested the TTS
154
- * @returns A SearchResult containing a single TTS track
155
- *
156
- * @example
157
- * // Basic TTS
158
- * const result = await plugin.search("tts:Hello world", "user123");
159
- *
160
- * // TTS with specific language
161
- * const result2 = await plugin.search("tts:en:Hello world", "user123");
162
- *
163
- * // TTS with language and slow speed
164
- * const result3 = await plugin.search("tts:en:true:Hello world", "user123");
165
- *
166
- * // Using "say" prefix
167
- * const result4 = await plugin.search("say Hello world", "user123");
168
- */
169
- async search(query: string, requestedBy: string): Promise<SearchResult> {
170
- if (!this.canHandle(query)) {
171
- return { tracks: [] };
172
- }
173
- const { text, lang, slow } = this.parseQuery(query);
174
- const config: TTSConfig = { text, lang, slow };
175
- const url = this.encodeConfig(config);
176
- const title = `TTS (${lang}${slow ? ", slow" : ""}): ${text.slice(0, 64)}${text.length > 64 ? "…" : ""}`;
177
- const estimatedSeconds = Math.max(1, Math.min(60, Math.ceil(text.length / 12)));
178
-
179
- const track: Track = {
180
- id: `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
181
- title,
182
- url,
183
- duration: estimatedSeconds,
184
- requestedBy,
185
- source: this.name,
186
- metadata: { tts: config },
187
- };
188
-
189
- return { tracks: [track] };
190
- }
191
-
192
- /**
193
- * Generates an audio stream for a TTS track.
194
- *
195
- * This method converts the text in the track to speech using either the custom
196
- * TTS provider (if configured) or the built-in Google TTS service. It handles
197
- * various return types from custom providers and ensures proper stream formatting.
198
- *
199
- * @param track - The TTS track to convert to audio
200
- * @returns A StreamInfo object containing the audio stream
201
- * @throws {Error} If TTS generation fails or no audio URLs are returned
202
- *
203
- * @example
204
- * const track = { id: "tts-123", title: "TTS: Hello world", ... };
205
- * const streamInfo = await plugin.getStream(track);
206
- * console.log(streamInfo.type); // "arbitrary"
207
- * console.log(streamInfo.stream); // Readable stream with audio
208
- */
209
- async getStream(track: Track): Promise<StreamInfo> {
210
- const cfg = this.extractConfig(track);
211
- if (track.source !== this.name) return {stream:null as any, type:"arbitrary" };
212
- if (this.opts.createStream && typeof this.opts.createStream === "function") {
213
- const out = await this.opts.createStream(cfg.text, { lang: cfg.lang, slow: cfg.slow, track });
214
- let type: StreamInfo["type"] | undefined;
215
- let metadata: Record<string, any> | undefined;
216
- let stream: Readable | null = null;
217
-
218
- const normType = (t?: any): StreamInfo["type"] | undefined => {
219
- if (!t || typeof t !== "string") return undefined;
220
- const v = t.toLowerCase();
221
- if (v.includes("webm") && v.includes("opus")) return "webm/opus";
222
- if (v.includes("ogg") && v.includes("opus")) return "ogg/opus";
223
- return undefined;
224
- };
225
-
226
- if (out && typeof out === "object") {
227
- // If it's already a Readable/Buffer/Uint8Array/ArrayBuffer/URL, let toReadable handle it
228
- if (
229
- out instanceof Readable ||
230
- out instanceof Buffer ||
231
- out instanceof Uint8Array ||
232
- out instanceof ArrayBuffer ||
233
- out instanceof URL
234
- ) {
235
- stream = await this.toReadable(out as any);
236
- } else if ((out as any).stream) {
237
- const o = out as any;
238
- stream = o.stream as Readable;
239
- type = normType(o.type);
240
- metadata = o.metadata;
241
- } else if ((out as any).url) {
242
- const o = out as any;
243
- const urlStr = o.url.toString();
244
- try {
245
- type =
246
- normType(o.type) ||
247
- (urlStr.endsWith(".webm") ? "webm/opus"
248
- : urlStr.endsWith(".ogg") ? "ogg/opus"
249
- : undefined);
250
- const res = await axios.get(urlStr, { responseType: "stream" });
251
- stream = res.data as unknown as Readable;
252
- metadata = o.metadata;
253
- } catch (e) {
254
- throw new Error(`Failed to fetch custom TTS URL: ${e}`);
255
- }
256
- }
257
- }
258
-
259
- if (!stream) {
260
- stream = await this.toReadable(out as any);
261
- }
262
- return { stream, type: type || "arbitrary", metadata: { provider: "custom", ...(metadata || {}) } };
263
- }
264
-
265
- const urls = getTTSUrls(cfg.text, { lang: cfg.lang, slow: cfg.slow });
266
- if (!urls || urls.length === 0) {
267
- throw new Error("TTS returned no audio URLs");
268
- }
269
-
270
- const parts = await Promise.all(
271
- urls.map((u) => axios.get<ArrayBuffer>(u, { responseType: "arraybuffer" }).then((r) => Buffer.from(r.data))),
272
- );
273
-
274
- const merged = Buffer.concat(parts);
275
- const stream = Readable.from([merged]);
276
- return { stream, type: "arbitrary", metadata: { size: merged.length } };
277
- }
278
-
279
- private async toReadable(out: Readable | string | URL | Buffer | Uint8Array | ArrayBuffer): Promise<Readable> {
280
- if (out instanceof Readable) return out;
281
- if (typeof out === "string" || out instanceof URL) {
282
- const url = out instanceof URL ? out.toString() : out;
283
- if (/^https?:\/\//i.test(url)) {
284
- const res = await axios.get(url, { responseType: "stream" });
285
- return res.data as unknown as Readable;
286
- }
287
- return Readable.from([Buffer.from(url)]);
288
- }
289
- if (out instanceof Buffer) return Readable.from([out]);
290
- if (out instanceof Uint8Array) return Readable.from([Buffer.from(out)]);
291
- if (out instanceof ArrayBuffer) return Readable.from([Buffer.from(out)]);
292
- throw new Error("Unsupported return type from createStream");
293
- }
294
-
295
- private parseQuery(query: string): TTSConfig {
296
- const isLangCode = (s: string) => /^[a-z]{2,3}(?:-[A-Z]{2})?$/.test(s);
297
-
298
- const raw = query.trim();
299
- let text = raw;
300
- let lang = this.opts.defaultLang;
301
- let slow = this.opts.slow;
302
-
303
- const lower = raw.toLowerCase();
304
- if (lower.startsWith("say ")) {
305
- text = raw.slice(4).trim();
306
- } else if (lower.startsWith("tts:")) {
307
- const body = raw.slice(4).trim();
308
- // Supported:
309
- // - "tts: <text>" (text may contain colons)
310
- // - "tts:<lang>:<text>"
311
- // - "tts:<lang>:<slow>:<text>" where slow in {0,1,true,false}
312
- const firstSep = body.indexOf(":");
313
- if (firstSep === -1) {
314
- text = body;
315
- } else {
316
- const maybeLang = body.slice(0, firstSep).trim();
317
- const rest = body.slice(firstSep + 1).trim();
318
- if (isLangCode(maybeLang)) {
319
- lang = maybeLang;
320
- const secondSep = rest.indexOf(":");
321
- if (secondSep !== -1) {
322
- const maybeSlow = rest.slice(0, secondSep).trim().toLowerCase();
323
- const remaining = rest.slice(secondSep + 1).trim();
324
- if (["0", "1", "true", "false"].includes(maybeSlow)) {
325
- slow = maybeSlow === "1" || maybeSlow === "true";
326
- text = remaining;
327
- } else {
328
- text = rest;
329
- }
330
- } else {
331
- text = rest;
332
- }
333
- } else {
334
- text = body;
335
- }
336
- }
337
- }
338
-
339
- text = (text || "").trim();
340
- if (!text) throw new Error("No text provided for TTS");
341
- return { text, lang, slow };
342
- }
343
-
344
- private encodeConfig(cfg: TTSConfig): string {
345
- const payload = encodeURIComponent(JSON.stringify(cfg));
346
- return `tts://${payload}`;
347
- }
348
-
349
- private extractConfig(track: Track): TTSConfig {
350
- const meta = (track.metadata as any)?.tts as TTSConfig | undefined;
351
- if (meta && meta.text) return meta;
352
- try {
353
- const url = track.url || "";
354
- const encoded = url.startsWith("tts://") ? url.slice("tts://".length) : url;
355
- const cfg = JSON.parse(decodeURIComponent(encoded));
356
- return { text: cfg.text, lang: cfg.lang || this.opts.defaultLang, slow: !!cfg.slow };
357
- } catch {
358
- return { text: track.title || "", lang: this.opts.defaultLang, slow: this.opts.slow };
359
- }
360
- }
361
- }
1
+ import { BasePlugin, Track, SearchResult, StreamInfo } from "ziplayer";
2
+ import { Readable } from "stream";
3
+ import { getTTSUrls } from "@zibot/zitts";
4
+ import axios from "axios";
5
+
6
+ /**
7
+ * Configuration options for the TTSPlugin.
8
+ */
9
+ export interface TTSPluginOptions {
10
+ /** Default language code for TTS (e.g., "vi", "en", "en-US") */
11
+ defaultLang?: string;
12
+ /** Whether to use slow speech rate */
13
+ slow?: boolean;
14
+ /**
15
+ * Optional custom TTS hook. If provided, it will be used to
16
+ * create the audio stream for the given text instead of the
17
+ * built-in Google TTS wrapper.
18
+ *
19
+ * @param text - The text to convert to speech
20
+ * @param ctx - Context information including language, speed, and track
21
+ * @returns One of:
22
+ * - Node Readable (preferred)
23
+ * - HTTP(S) URL string or URL object
24
+ * - Buffer / Uint8Array / ArrayBuffer
25
+ * - Or an object with { stream, type } | { url, type }
26
+ */
27
+ createStream?: (
28
+ text: string,
29
+ ctx?: { lang: string; slow: boolean; track?: Track },
30
+ ) =>
31
+ | Promise<Readable | string | URL | Buffer | Uint8Array | ArrayBuffer>
32
+ | Readable
33
+ | string
34
+ | URL
35
+ | Buffer
36
+ | Uint8Array
37
+ | ArrayBuffer;
38
+ }
39
+
40
+ /**
41
+ * Internal configuration for TTS processing.
42
+ */
43
+ interface TTSConfig {
44
+ /** The text to convert to speech */
45
+ text: string;
46
+ /** The language code for TTS */
47
+ lang: string;
48
+ /** Whether to use slow speech rate */
49
+ slow: boolean;
50
+ }
51
+
52
+ /**
53
+ * A plugin for Text-to-Speech (TTS) functionality.
54
+ *
55
+ * This plugin provides support for:
56
+ * - Converting text to speech using Google TTS
57
+ * - Custom TTS providers via the createStream hook
58
+ * - Multiple language support
59
+ * - Configurable speech rate (normal/slow)
60
+ * - TTS query parsing with language and speed options
61
+ *
62
+ * @example
63
+ * const ttsPlugin = new TTSPlugin({
64
+ * defaultLang: "en",
65
+ * slow: false
66
+ * });
67
+ *
68
+ * // Add to PlayerManager
69
+ * const manager = new PlayerManager({
70
+ * plugins: [ttsPlugin]
71
+ * });
72
+ *
73
+ * // Search for TTS content
74
+ * const result = await ttsPlugin.search("tts:Hello world", "user123");
75
+ * const stream = await ttsPlugin.getStream(result.tracks[0]);
76
+ *
77
+ * @example
78
+ * // Custom TTS provider
79
+ * const customTTSPlugin = new TTSPlugin({
80
+ * defaultLang: "en",
81
+ * createStream: async (text, ctx) => {
82
+ * // Custom TTS implementation
83
+ * return customTTSProvider.synthesize(text, ctx.lang);
84
+ * }
85
+ * });
86
+ *
87
+ * @since 1.0.0
88
+ */
89
+ export class TTSPlugin extends BasePlugin {
90
+ name = "tts";
91
+ version = "1.0.0";
92
+ private opts: { defaultLang: string; slow: boolean; createStream?: TTSPluginOptions["createStream"] };
93
+
94
+ /**
95
+ * Creates a new TTSPlugin instance.
96
+ *
97
+ * @param opts - Configuration options for the TTS plugin
98
+ * @param opts.defaultLang - Default language code for TTS (default: "vi")
99
+ * @param opts.slow - Whether to use slow speech rate (default: false)
100
+ * @param opts.createStream - Optional custom TTS provider function
101
+ *
102
+ * @example
103
+ * // Basic TTS with Vietnamese as default
104
+ * const ttsPlugin = new TTSPlugin();
105
+ *
106
+ * // TTS with English as default and slow speech
107
+ * const slowTTSPlugin = new TTSPlugin({
108
+ * defaultLang: "en",
109
+ * slow: true
110
+ * });
111
+ *
112
+ * // TTS with custom provider
113
+ * const customTTSPlugin = new TTSPlugin({
114
+ * defaultLang: "en",
115
+ * createStream: async (text, ctx) => {
116
+ * return await myCustomTTS.synthesize(text, ctx.lang);
117
+ * }
118
+ * });
119
+ */
120
+ constructor(opts?: TTSPluginOptions) {
121
+ super();
122
+ this.opts = {
123
+ defaultLang: opts?.defaultLang || "vi",
124
+ slow: !!opts?.slow,
125
+ createStream: opts?.createStream,
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Determines if this plugin can handle the given query.
131
+ *
132
+ * @param query - The search query to check
133
+ * @returns `true` if the query starts with "tts:" or "say ", `false` otherwise
134
+ *
135
+ * @example
136
+ * plugin.canHandle("tts:Hello world"); // true
137
+ * plugin.canHandle("say Hello world"); // true
138
+ * plugin.canHandle("youtube.com/watch?v=123"); // false
139
+ */
140
+ canHandle(query: string): boolean {
141
+ if (!query) return false;
142
+ const q = query.trim().toLowerCase();
143
+ return q.startsWith("tts:") || q.startsWith("say ");
144
+ }
145
+
146
+ /**
147
+ * Creates a TTS track from the given query.
148
+ *
149
+ * This method parses TTS queries and creates a track that can be played as audio.
150
+ * It supports various query formats including language and speed specifications.
151
+ *
152
+ * @param query - The TTS query to process
153
+ * @param requestedBy - The user ID who requested the TTS
154
+ * @returns A SearchResult containing a single TTS track
155
+ *
156
+ * @example
157
+ * // Basic TTS
158
+ * const result = await plugin.search("tts:Hello world", "user123");
159
+ *
160
+ * // TTS with specific language
161
+ * const result2 = await plugin.search("tts:en:Hello world", "user123");
162
+ *
163
+ * // TTS with language and slow speed
164
+ * const result3 = await plugin.search("tts:en:true:Hello world", "user123");
165
+ *
166
+ * // Using "say" prefix
167
+ * const result4 = await plugin.search("say Hello world", "user123");
168
+ */
169
+ async search(query: string, requestedBy: string): Promise<SearchResult> {
170
+ if (!this.canHandle(query)) {
171
+ return { tracks: [] };
172
+ }
173
+ const { text, lang, slow } = this.parseQuery(query);
174
+ const config: TTSConfig = { text, lang, slow };
175
+ const url = this.encodeConfig(config);
176
+ const title = `TTS (${lang}${slow ? ", slow" : ""}): ${text.slice(0, 64)}${text.length > 64 ? "…" : ""}`;
177
+ const estimatedSeconds = Math.max(1, Math.min(60, Math.ceil(text.length / 12)));
178
+
179
+ const track: Track = {
180
+ id: `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
181
+ title,
182
+ url,
183
+ duration: estimatedSeconds,
184
+ requestedBy,
185
+ source: this.name,
186
+ metadata: { tts: config },
187
+ };
188
+
189
+ return { tracks: [track] };
190
+ }
191
+
192
+ /**
193
+ * Generates an audio stream for a TTS track.
194
+ *
195
+ * This method converts the text in the track to speech using either the custom
196
+ * TTS provider (if configured) or the built-in Google TTS service. It handles
197
+ * various return types from custom providers and ensures proper stream formatting.
198
+ *
199
+ * @param track - The TTS track to convert to audio
200
+ * @returns A StreamInfo object containing the audio stream
201
+ * @throws {Error} If TTS generation fails or no audio URLs are returned
202
+ *
203
+ * @example
204
+ * const track = { id: "tts-123", title: "TTS: Hello world", ... };
205
+ * const streamInfo = await plugin.getStream(track);
206
+ * console.log(streamInfo.type); // "arbitrary"
207
+ * console.log(streamInfo.stream); // Readable stream with audio
208
+ */
209
+ async getStream(track: Track): Promise<StreamInfo> {
210
+ const cfg = this.extractConfig(track);
211
+ if (track.source !== this.name) return { stream: null as any, type: "arbitrary" };
212
+ if (this.opts.createStream && typeof this.opts.createStream === "function") {
213
+ const out = await this.opts.createStream(cfg.text, { lang: cfg.lang, slow: cfg.slow, track });
214
+ let type: StreamInfo["type"] | undefined;
215
+ let metadata: Record<string, any> | undefined;
216
+ let stream: Readable | null = null;
217
+
218
+ const normType = (t?: any): StreamInfo["type"] | undefined => {
219
+ if (!t || typeof t !== "string") return undefined;
220
+ const v = t.toLowerCase();
221
+ if (v.includes("webm") && v.includes("opus")) return "webm/opus";
222
+ if (v.includes("ogg") && v.includes("opus")) return "ogg/opus";
223
+ return undefined;
224
+ };
225
+
226
+ if (out && typeof out === "object") {
227
+ // If it's already a Readable/Buffer/Uint8Array/ArrayBuffer/URL, let toReadable handle it
228
+ if (
229
+ out instanceof Readable ||
230
+ out instanceof Buffer ||
231
+ out instanceof Uint8Array ||
232
+ out instanceof ArrayBuffer ||
233
+ out instanceof URL
234
+ ) {
235
+ stream = await this.toReadable(out as any);
236
+ } else if ((out as any).stream) {
237
+ const o = out as any;
238
+ stream = o.stream as Readable;
239
+ type = normType(o.type);
240
+ metadata = o.metadata;
241
+ } else if ((out as any).url) {
242
+ const o = out as any;
243
+ const urlStr = o.url.toString();
244
+ try {
245
+ type =
246
+ normType(o.type) ||
247
+ (urlStr.endsWith(".webm") ? "webm/opus"
248
+ : urlStr.endsWith(".ogg") ? "ogg/opus"
249
+ : undefined);
250
+ const res = await axios.get(urlStr, { responseType: "stream" });
251
+ stream = res.data as unknown as Readable;
252
+ metadata = o.metadata;
253
+ } catch (e) {
254
+ throw new Error(`Failed to fetch custom TTS URL: ${e}`);
255
+ }
256
+ }
257
+ }
258
+
259
+ if (!stream) {
260
+ stream = await this.toReadable(out as any);
261
+ }
262
+ return { stream, type: type || "arbitrary", metadata: { provider: "custom", ...(metadata || {}) } };
263
+ }
264
+
265
+ const urls = getTTSUrls(cfg.text, { lang: cfg.lang, slow: cfg.slow });
266
+ if (!urls || urls.length === 0) {
267
+ throw new Error("TTS returned no audio URLs");
268
+ }
269
+
270
+ const parts = await Promise.all(
271
+ urls.map((u) => axios.get<ArrayBuffer>(u, { responseType: "arraybuffer" }).then((r) => Buffer.from(r.data))),
272
+ );
273
+
274
+ const merged = Buffer.concat(parts);
275
+ const stream = Readable.from([merged]);
276
+ return { stream, type: "arbitrary", metadata: { size: merged.length } };
277
+ }
278
+
279
+ private async toReadable(out: Readable | string | URL | Buffer | Uint8Array | ArrayBuffer): Promise<Readable> {
280
+ if (out instanceof Readable) return out;
281
+ if (typeof out === "string" || out instanceof URL) {
282
+ const url = out instanceof URL ? out.toString() : out;
283
+ if (/^https?:\/\//i.test(url)) {
284
+ const res = await axios.get(url, { responseType: "stream" });
285
+ return res.data as unknown as Readable;
286
+ }
287
+ return Readable.from([Buffer.from(url)]);
288
+ }
289
+ if (out instanceof Buffer) return Readable.from([out]);
290
+ if (out instanceof Uint8Array) return Readable.from([Buffer.from(out)]);
291
+ if (out instanceof ArrayBuffer) return Readable.from([Buffer.from(out)]);
292
+ throw new Error("Unsupported return type from createStream");
293
+ }
294
+
295
+ private parseQuery(query: string): TTSConfig {
296
+ const isLangCode = (s: string) => /^[a-z]{2,3}(?:-[A-Z]{2})?$/.test(s);
297
+
298
+ const raw = query.trim();
299
+ let text = raw;
300
+ let lang = this.opts.defaultLang;
301
+ let slow = this.opts.slow;
302
+
303
+ const lower = raw.toLowerCase();
304
+ if (lower.startsWith("say ")) {
305
+ text = raw.slice(4).trim();
306
+ } else if (lower.startsWith("tts:")) {
307
+ const body = raw.slice(4).trim();
308
+ // Supported:
309
+ // - "tts: <text>" (text may contain colons)
310
+ // - "tts:<lang>:<text>"
311
+ // - "tts:<lang>:<slow>:<text>" where slow in {0,1,true,false}
312
+ const firstSep = body.indexOf(":");
313
+ if (firstSep === -1) {
314
+ text = body;
315
+ } else {
316
+ const maybeLang = body.slice(0, firstSep).trim();
317
+ const rest = body.slice(firstSep + 1).trim();
318
+ if (isLangCode(maybeLang)) {
319
+ lang = maybeLang;
320
+ const secondSep = rest.indexOf(":");
321
+ if (secondSep !== -1) {
322
+ const maybeSlow = rest.slice(0, secondSep).trim().toLowerCase();
323
+ const remaining = rest.slice(secondSep + 1).trim();
324
+ if (["0", "1", "true", "false"].includes(maybeSlow)) {
325
+ slow = maybeSlow === "1" || maybeSlow === "true";
326
+ text = remaining;
327
+ } else {
328
+ text = rest;
329
+ }
330
+ } else {
331
+ text = rest;
332
+ }
333
+ } else {
334
+ text = body;
335
+ }
336
+ }
337
+ }
338
+
339
+ text = (text || "").trim();
340
+ if (!text) throw new Error("No text provided for TTS");
341
+ return { text, lang, slow };
342
+ }
343
+
344
+ private encodeConfig(cfg: TTSConfig): string {
345
+ const payload = encodeURIComponent(JSON.stringify(cfg));
346
+ return `tts://${payload}`;
347
+ }
348
+
349
+ private extractConfig(track: Track): TTSConfig {
350
+ const meta = (track.metadata as any)?.tts as TTSConfig | undefined;
351
+ if (meta && meta.text) return meta;
352
+ try {
353
+ const url = track.url || "";
354
+ const encoded = url.startsWith("tts://") ? url.slice("tts://".length) : url;
355
+ const cfg = JSON.parse(decodeURIComponent(encoded));
356
+ return { text: cfg.text, lang: cfg.lang || this.opts.defaultLang, slow: !!cfg.slow };
357
+ } catch {
358
+ return { text: track.title || "", lang: this.opts.defaultLang, slow: this.opts.slow };
359
+ }
360
+ }
361
+ }