@ziplayer/plugin 0.1.33 → 0.1.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +180 -180
- package/dist/TTSPlugin.js.map +1 -1
- package/dist/YTSRPlugin.d.ts.map +1 -1
- package/dist/YTSRPlugin.js +17 -1
- package/dist/YTSRPlugin.js.map +1 -1
- package/dist/YouTubePlugin.d.ts +15 -6
- package/dist/YouTubePlugin.d.ts.map +1 -1
- package/dist/YouTubePlugin.js +122 -47
- package/dist/YouTubePlugin.js.map +1 -1
- package/dist/utils/progress-bar.d.ts +8 -0
- package/dist/utils/progress-bar.d.ts.map +1 -0
- package/dist/utils/progress-bar.js +24 -0
- package/dist/utils/progress-bar.js.map +1 -0
- package/dist/utils/sabr-stream-factory.d.ts +25 -0
- package/dist/utils/sabr-stream-factory.d.ts.map +1 -0
- package/dist/utils/sabr-stream-factory.js +83 -0
- package/dist/utils/sabr-stream-factory.js.map +1 -0
- package/dist/utils/stream-converter.d.ts +10 -0
- package/dist/utils/stream-converter.d.ts.map +1 -0
- package/dist/utils/stream-converter.js +71 -0
- package/dist/utils/stream-converter.js.map +1 -0
- package/package.json +44 -42
- package/src/SoundCloudPlugin.ts +368 -368
- package/src/SpotifyPlugin.ts +312 -312
- package/src/TTSPlugin.ts +361 -361
- package/src/YTSRPlugin.ts +596 -583
- package/src/YouTubePlugin.ts +620 -528
- package/src/index.ts +103 -103
- package/src/types/googlevideo.d.ts +45 -0
- package/src/utils/sabr-stream-factory.ts +96 -0
- package/src/utils/stream-converter.ts +79 -0
- package/tsconfig.json +23 -23
- package/YTSR_README.md +0 -310
package/src/TTSPlugin.ts
CHANGED
|
@@ -1,361 +1,361 @@
|
|
|
1
|
-
import { BasePlugin, Track, SearchResult, StreamInfo } from "ziplayer";
|
|
2
|
-
import { Readable } from "stream";
|
|
3
|
-
import { getTTSUrls } from "@zibot/zitts";
|
|
4
|
-
import axios from "axios";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Configuration options for the TTSPlugin.
|
|
8
|
-
*/
|
|
9
|
-
export interface TTSPluginOptions {
|
|
10
|
-
/** Default language code for TTS (e.g., "vi", "en", "en-US") */
|
|
11
|
-
defaultLang?: string;
|
|
12
|
-
/** Whether to use slow speech rate */
|
|
13
|
-
slow?: boolean;
|
|
14
|
-
/**
|
|
15
|
-
* Optional custom TTS hook. If provided, it will be used to
|
|
16
|
-
* create the audio stream for the given text instead of the
|
|
17
|
-
* built-in Google TTS wrapper.
|
|
18
|
-
*
|
|
19
|
-
* @param text - The text to convert to speech
|
|
20
|
-
* @param ctx - Context information including language, speed, and track
|
|
21
|
-
* @returns One of:
|
|
22
|
-
* - Node Readable (preferred)
|
|
23
|
-
* - HTTP(S) URL string or URL object
|
|
24
|
-
* - Buffer / Uint8Array / ArrayBuffer
|
|
25
|
-
* - Or an object with { stream, type } | { url, type }
|
|
26
|
-
*/
|
|
27
|
-
createStream?: (
|
|
28
|
-
text: string,
|
|
29
|
-
ctx?: { lang: string; slow: boolean; track?: Track },
|
|
30
|
-
) =>
|
|
31
|
-
| Promise<Readable | string | URL | Buffer | Uint8Array | ArrayBuffer>
|
|
32
|
-
| Readable
|
|
33
|
-
| string
|
|
34
|
-
| URL
|
|
35
|
-
| Buffer
|
|
36
|
-
| Uint8Array
|
|
37
|
-
| ArrayBuffer;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Internal configuration for TTS processing.
|
|
42
|
-
*/
|
|
43
|
-
interface TTSConfig {
|
|
44
|
-
/** The text to convert to speech */
|
|
45
|
-
text: string;
|
|
46
|
-
/** The language code for TTS */
|
|
47
|
-
lang: string;
|
|
48
|
-
/** Whether to use slow speech rate */
|
|
49
|
-
slow: boolean;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* A plugin for Text-to-Speech (TTS) functionality.
|
|
54
|
-
*
|
|
55
|
-
* This plugin provides support for:
|
|
56
|
-
* - Converting text to speech using Google TTS
|
|
57
|
-
* - Custom TTS providers via the createStream hook
|
|
58
|
-
* - Multiple language support
|
|
59
|
-
* - Configurable speech rate (normal/slow)
|
|
60
|
-
* - TTS query parsing with language and speed options
|
|
61
|
-
*
|
|
62
|
-
* @example
|
|
63
|
-
* const ttsPlugin = new TTSPlugin({
|
|
64
|
-
* defaultLang: "en",
|
|
65
|
-
* slow: false
|
|
66
|
-
* });
|
|
67
|
-
*
|
|
68
|
-
* // Add to PlayerManager
|
|
69
|
-
* const manager = new PlayerManager({
|
|
70
|
-
* plugins: [ttsPlugin]
|
|
71
|
-
* });
|
|
72
|
-
*
|
|
73
|
-
* // Search for TTS content
|
|
74
|
-
* const result = await ttsPlugin.search("tts:Hello world", "user123");
|
|
75
|
-
* const stream = await ttsPlugin.getStream(result.tracks[0]);
|
|
76
|
-
*
|
|
77
|
-
* @example
|
|
78
|
-
* // Custom TTS provider
|
|
79
|
-
* const customTTSPlugin = new TTSPlugin({
|
|
80
|
-
* defaultLang: "en",
|
|
81
|
-
* createStream: async (text, ctx) => {
|
|
82
|
-
* // Custom TTS implementation
|
|
83
|
-
* return customTTSProvider.synthesize(text, ctx.lang);
|
|
84
|
-
* }
|
|
85
|
-
* });
|
|
86
|
-
*
|
|
87
|
-
* @since 1.0.0
|
|
88
|
-
*/
|
|
89
|
-
export class TTSPlugin extends BasePlugin {
|
|
90
|
-
name = "tts";
|
|
91
|
-
version = "1.0.0";
|
|
92
|
-
private opts: { defaultLang: string; slow: boolean; createStream?: TTSPluginOptions["createStream"] };
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Creates a new TTSPlugin instance.
|
|
96
|
-
*
|
|
97
|
-
* @param opts - Configuration options for the TTS plugin
|
|
98
|
-
* @param opts.defaultLang - Default language code for TTS (default: "vi")
|
|
99
|
-
* @param opts.slow - Whether to use slow speech rate (default: false)
|
|
100
|
-
* @param opts.createStream - Optional custom TTS provider function
|
|
101
|
-
*
|
|
102
|
-
* @example
|
|
103
|
-
* // Basic TTS with Vietnamese as default
|
|
104
|
-
* const ttsPlugin = new TTSPlugin();
|
|
105
|
-
*
|
|
106
|
-
* // TTS with English as default and slow speech
|
|
107
|
-
* const slowTTSPlugin = new TTSPlugin({
|
|
108
|
-
* defaultLang: "en",
|
|
109
|
-
* slow: true
|
|
110
|
-
* });
|
|
111
|
-
*
|
|
112
|
-
* // TTS with custom provider
|
|
113
|
-
* const customTTSPlugin = new TTSPlugin({
|
|
114
|
-
* defaultLang: "en",
|
|
115
|
-
* createStream: async (text, ctx) => {
|
|
116
|
-
* return await myCustomTTS.synthesize(text, ctx.lang);
|
|
117
|
-
* }
|
|
118
|
-
* });
|
|
119
|
-
*/
|
|
120
|
-
constructor(opts?: TTSPluginOptions) {
|
|
121
|
-
super();
|
|
122
|
-
this.opts = {
|
|
123
|
-
defaultLang: opts?.defaultLang || "vi",
|
|
124
|
-
slow: !!opts?.slow,
|
|
125
|
-
createStream: opts?.createStream,
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Determines if this plugin can handle the given query.
|
|
131
|
-
*
|
|
132
|
-
* @param query - The search query to check
|
|
133
|
-
* @returns `true` if the query starts with "tts:" or "say ", `false` otherwise
|
|
134
|
-
*
|
|
135
|
-
* @example
|
|
136
|
-
* plugin.canHandle("tts:Hello world"); // true
|
|
137
|
-
* plugin.canHandle("say Hello world"); // true
|
|
138
|
-
* plugin.canHandle("youtube.com/watch?v=123"); // false
|
|
139
|
-
*/
|
|
140
|
-
canHandle(query: string): boolean {
|
|
141
|
-
if (!query) return false;
|
|
142
|
-
const q = query.trim().toLowerCase();
|
|
143
|
-
return q.startsWith("tts:") || q.startsWith("say ");
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
/**
|
|
147
|
-
* Creates a TTS track from the given query.
|
|
148
|
-
*
|
|
149
|
-
* This method parses TTS queries and creates a track that can be played as audio.
|
|
150
|
-
* It supports various query formats including language and speed specifications.
|
|
151
|
-
*
|
|
152
|
-
* @param query - The TTS query to process
|
|
153
|
-
* @param requestedBy - The user ID who requested the TTS
|
|
154
|
-
* @returns A SearchResult containing a single TTS track
|
|
155
|
-
*
|
|
156
|
-
* @example
|
|
157
|
-
* // Basic TTS
|
|
158
|
-
* const result = await plugin.search("tts:Hello world", "user123");
|
|
159
|
-
*
|
|
160
|
-
* // TTS with specific language
|
|
161
|
-
* const result2 = await plugin.search("tts:en:Hello world", "user123");
|
|
162
|
-
*
|
|
163
|
-
* // TTS with language and slow speed
|
|
164
|
-
* const result3 = await plugin.search("tts:en:true:Hello world", "user123");
|
|
165
|
-
*
|
|
166
|
-
* // Using "say" prefix
|
|
167
|
-
* const result4 = await plugin.search("say Hello world", "user123");
|
|
168
|
-
*/
|
|
169
|
-
async search(query: string, requestedBy: string): Promise<SearchResult> {
|
|
170
|
-
if (!this.canHandle(query)) {
|
|
171
|
-
return { tracks: [] };
|
|
172
|
-
}
|
|
173
|
-
const { text, lang, slow } = this.parseQuery(query);
|
|
174
|
-
const config: TTSConfig = { text, lang, slow };
|
|
175
|
-
const url = this.encodeConfig(config);
|
|
176
|
-
const title = `TTS (${lang}${slow ? ", slow" : ""}): ${text.slice(0, 64)}${text.length > 64 ? "…" : ""}`;
|
|
177
|
-
const estimatedSeconds = Math.max(1, Math.min(60, Math.ceil(text.length / 12)));
|
|
178
|
-
|
|
179
|
-
const track: Track = {
|
|
180
|
-
id: `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
181
|
-
title,
|
|
182
|
-
url,
|
|
183
|
-
duration: estimatedSeconds,
|
|
184
|
-
requestedBy,
|
|
185
|
-
source: this.name,
|
|
186
|
-
metadata: { tts: config },
|
|
187
|
-
};
|
|
188
|
-
|
|
189
|
-
return { tracks: [track] };
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
/**
|
|
193
|
-
* Generates an audio stream for a TTS track.
|
|
194
|
-
*
|
|
195
|
-
* This method converts the text in the track to speech using either the custom
|
|
196
|
-
* TTS provider (if configured) or the built-in Google TTS service. It handles
|
|
197
|
-
* various return types from custom providers and ensures proper stream formatting.
|
|
198
|
-
*
|
|
199
|
-
* @param track - The TTS track to convert to audio
|
|
200
|
-
* @returns A StreamInfo object containing the audio stream
|
|
201
|
-
* @throws {Error} If TTS generation fails or no audio URLs are returned
|
|
202
|
-
*
|
|
203
|
-
* @example
|
|
204
|
-
* const track = { id: "tts-123", title: "TTS: Hello world", ... };
|
|
205
|
-
* const streamInfo = await plugin.getStream(track);
|
|
206
|
-
* console.log(streamInfo.type); // "arbitrary"
|
|
207
|
-
* console.log(streamInfo.stream); // Readable stream with audio
|
|
208
|
-
*/
|
|
209
|
-
async getStream(track: Track): Promise<StreamInfo> {
|
|
210
|
-
const cfg = this.extractConfig(track);
|
|
211
|
-
if (track.source !== this.name) return {stream:null as any, type:"arbitrary" };
|
|
212
|
-
if (this.opts.createStream && typeof this.opts.createStream === "function") {
|
|
213
|
-
const out = await this.opts.createStream(cfg.text, { lang: cfg.lang, slow: cfg.slow, track });
|
|
214
|
-
let type: StreamInfo["type"] | undefined;
|
|
215
|
-
let metadata: Record<string, any> | undefined;
|
|
216
|
-
let stream: Readable | null = null;
|
|
217
|
-
|
|
218
|
-
const normType = (t?: any): StreamInfo["type"] | undefined => {
|
|
219
|
-
if (!t || typeof t !== "string") return undefined;
|
|
220
|
-
const v = t.toLowerCase();
|
|
221
|
-
if (v.includes("webm") && v.includes("opus")) return "webm/opus";
|
|
222
|
-
if (v.includes("ogg") && v.includes("opus")) return "ogg/opus";
|
|
223
|
-
return undefined;
|
|
224
|
-
};
|
|
225
|
-
|
|
226
|
-
if (out && typeof out === "object") {
|
|
227
|
-
// If it's already a Readable/Buffer/Uint8Array/ArrayBuffer/URL, let toReadable handle it
|
|
228
|
-
if (
|
|
229
|
-
out instanceof Readable ||
|
|
230
|
-
out instanceof Buffer ||
|
|
231
|
-
out instanceof Uint8Array ||
|
|
232
|
-
out instanceof ArrayBuffer ||
|
|
233
|
-
out instanceof URL
|
|
234
|
-
) {
|
|
235
|
-
stream = await this.toReadable(out as any);
|
|
236
|
-
} else if ((out as any).stream) {
|
|
237
|
-
const o = out as any;
|
|
238
|
-
stream = o.stream as Readable;
|
|
239
|
-
type = normType(o.type);
|
|
240
|
-
metadata = o.metadata;
|
|
241
|
-
} else if ((out as any).url) {
|
|
242
|
-
const o = out as any;
|
|
243
|
-
const urlStr = o.url.toString();
|
|
244
|
-
try {
|
|
245
|
-
type =
|
|
246
|
-
normType(o.type) ||
|
|
247
|
-
(urlStr.endsWith(".webm") ? "webm/opus"
|
|
248
|
-
: urlStr.endsWith(".ogg") ? "ogg/opus"
|
|
249
|
-
: undefined);
|
|
250
|
-
const res = await axios.get(urlStr, { responseType: "stream" });
|
|
251
|
-
stream = res.data as unknown as Readable;
|
|
252
|
-
metadata = o.metadata;
|
|
253
|
-
} catch (e) {
|
|
254
|
-
throw new Error(`Failed to fetch custom TTS URL: ${e}`);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
if (!stream) {
|
|
260
|
-
stream = await this.toReadable(out as any);
|
|
261
|
-
}
|
|
262
|
-
return { stream, type: type || "arbitrary", metadata: { provider: "custom", ...(metadata || {}) } };
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
const urls = getTTSUrls(cfg.text, { lang: cfg.lang, slow: cfg.slow });
|
|
266
|
-
if (!urls || urls.length === 0) {
|
|
267
|
-
throw new Error("TTS returned no audio URLs");
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const parts = await Promise.all(
|
|
271
|
-
urls.map((u) => axios.get<ArrayBuffer>(u, { responseType: "arraybuffer" }).then((r) => Buffer.from(r.data))),
|
|
272
|
-
);
|
|
273
|
-
|
|
274
|
-
const merged = Buffer.concat(parts);
|
|
275
|
-
const stream = Readable.from([merged]);
|
|
276
|
-
return { stream, type: "arbitrary", metadata: { size: merged.length } };
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
private async toReadable(out: Readable | string | URL | Buffer | Uint8Array | ArrayBuffer): Promise<Readable> {
|
|
280
|
-
if (out instanceof Readable) return out;
|
|
281
|
-
if (typeof out === "string" || out instanceof URL) {
|
|
282
|
-
const url = out instanceof URL ? out.toString() : out;
|
|
283
|
-
if (/^https?:\/\//i.test(url)) {
|
|
284
|
-
const res = await axios.get(url, { responseType: "stream" });
|
|
285
|
-
return res.data as unknown as Readable;
|
|
286
|
-
}
|
|
287
|
-
return Readable.from([Buffer.from(url)]);
|
|
288
|
-
}
|
|
289
|
-
if (out instanceof Buffer) return Readable.from([out]);
|
|
290
|
-
if (out instanceof Uint8Array) return Readable.from([Buffer.from(out)]);
|
|
291
|
-
if (out instanceof ArrayBuffer) return Readable.from([Buffer.from(out)]);
|
|
292
|
-
throw new Error("Unsupported return type from createStream");
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
private parseQuery(query: string): TTSConfig {
|
|
296
|
-
const isLangCode = (s: string) => /^[a-z]{2,3}(?:-[A-Z]{2})?$/.test(s);
|
|
297
|
-
|
|
298
|
-
const raw = query.trim();
|
|
299
|
-
let text = raw;
|
|
300
|
-
let lang = this.opts.defaultLang;
|
|
301
|
-
let slow = this.opts.slow;
|
|
302
|
-
|
|
303
|
-
const lower = raw.toLowerCase();
|
|
304
|
-
if (lower.startsWith("say ")) {
|
|
305
|
-
text = raw.slice(4).trim();
|
|
306
|
-
} else if (lower.startsWith("tts:")) {
|
|
307
|
-
const body = raw.slice(4).trim();
|
|
308
|
-
// Supported:
|
|
309
|
-
// - "tts: <text>" (text may contain colons)
|
|
310
|
-
// - "tts:<lang>:<text>"
|
|
311
|
-
// - "tts:<lang>:<slow>:<text>" where slow in {0,1,true,false}
|
|
312
|
-
const firstSep = body.indexOf(":");
|
|
313
|
-
if (firstSep === -1) {
|
|
314
|
-
text = body;
|
|
315
|
-
} else {
|
|
316
|
-
const maybeLang = body.slice(0, firstSep).trim();
|
|
317
|
-
const rest = body.slice(firstSep + 1).trim();
|
|
318
|
-
if (isLangCode(maybeLang)) {
|
|
319
|
-
lang = maybeLang;
|
|
320
|
-
const secondSep = rest.indexOf(":");
|
|
321
|
-
if (secondSep !== -1) {
|
|
322
|
-
const maybeSlow = rest.slice(0, secondSep).trim().toLowerCase();
|
|
323
|
-
const remaining = rest.slice(secondSep + 1).trim();
|
|
324
|
-
if (["0", "1", "true", "false"].includes(maybeSlow)) {
|
|
325
|
-
slow = maybeSlow === "1" || maybeSlow === "true";
|
|
326
|
-
text = remaining;
|
|
327
|
-
} else {
|
|
328
|
-
text = rest;
|
|
329
|
-
}
|
|
330
|
-
} else {
|
|
331
|
-
text = rest;
|
|
332
|
-
}
|
|
333
|
-
} else {
|
|
334
|
-
text = body;
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
text = (text || "").trim();
|
|
340
|
-
if (!text) throw new Error("No text provided for TTS");
|
|
341
|
-
return { text, lang, slow };
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
private encodeConfig(cfg: TTSConfig): string {
|
|
345
|
-
const payload = encodeURIComponent(JSON.stringify(cfg));
|
|
346
|
-
return `tts://${payload}`;
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
private extractConfig(track: Track): TTSConfig {
|
|
350
|
-
const meta = (track.metadata as any)?.tts as TTSConfig | undefined;
|
|
351
|
-
if (meta && meta.text) return meta;
|
|
352
|
-
try {
|
|
353
|
-
const url = track.url || "";
|
|
354
|
-
const encoded = url.startsWith("tts://") ? url.slice("tts://".length) : url;
|
|
355
|
-
const cfg = JSON.parse(decodeURIComponent(encoded));
|
|
356
|
-
return { text: cfg.text, lang: cfg.lang || this.opts.defaultLang, slow: !!cfg.slow };
|
|
357
|
-
} catch {
|
|
358
|
-
return { text: track.title || "", lang: this.opts.defaultLang, slow: this.opts.slow };
|
|
359
|
-
}
|
|
360
|
-
}
|
|
361
|
-
}
|
|
1
|
+
import { BasePlugin, Track, SearchResult, StreamInfo } from "ziplayer";
|
|
2
|
+
import { Readable } from "stream";
|
|
3
|
+
import { getTTSUrls } from "@zibot/zitts";
|
|
4
|
+
import axios from "axios";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configuration options for the TTSPlugin.
|
|
8
|
+
*/
|
|
9
|
+
export interface TTSPluginOptions {
|
|
10
|
+
/** Default language code for TTS (e.g., "vi", "en", "en-US") */
|
|
11
|
+
defaultLang?: string;
|
|
12
|
+
/** Whether to use slow speech rate */
|
|
13
|
+
slow?: boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Optional custom TTS hook. If provided, it will be used to
|
|
16
|
+
* create the audio stream for the given text instead of the
|
|
17
|
+
* built-in Google TTS wrapper.
|
|
18
|
+
*
|
|
19
|
+
* @param text - The text to convert to speech
|
|
20
|
+
* @param ctx - Context information including language, speed, and track
|
|
21
|
+
* @returns One of:
|
|
22
|
+
* - Node Readable (preferred)
|
|
23
|
+
* - HTTP(S) URL string or URL object
|
|
24
|
+
* - Buffer / Uint8Array / ArrayBuffer
|
|
25
|
+
* - Or an object with { stream, type } | { url, type }
|
|
26
|
+
*/
|
|
27
|
+
createStream?: (
|
|
28
|
+
text: string,
|
|
29
|
+
ctx?: { lang: string; slow: boolean; track?: Track },
|
|
30
|
+
) =>
|
|
31
|
+
| Promise<Readable | string | URL | Buffer | Uint8Array | ArrayBuffer>
|
|
32
|
+
| Readable
|
|
33
|
+
| string
|
|
34
|
+
| URL
|
|
35
|
+
| Buffer
|
|
36
|
+
| Uint8Array
|
|
37
|
+
| ArrayBuffer;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Internal configuration for TTS processing.
|
|
42
|
+
*/
|
|
43
|
+
interface TTSConfig {
|
|
44
|
+
/** The text to convert to speech */
|
|
45
|
+
text: string;
|
|
46
|
+
/** The language code for TTS */
|
|
47
|
+
lang: string;
|
|
48
|
+
/** Whether to use slow speech rate */
|
|
49
|
+
slow: boolean;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* A plugin for Text-to-Speech (TTS) functionality.
|
|
54
|
+
*
|
|
55
|
+
* This plugin provides support for:
|
|
56
|
+
* - Converting text to speech using Google TTS
|
|
57
|
+
* - Custom TTS providers via the createStream hook
|
|
58
|
+
* - Multiple language support
|
|
59
|
+
* - Configurable speech rate (normal/slow)
|
|
60
|
+
* - TTS query parsing with language and speed options
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* const ttsPlugin = new TTSPlugin({
|
|
64
|
+
* defaultLang: "en",
|
|
65
|
+
* slow: false
|
|
66
|
+
* });
|
|
67
|
+
*
|
|
68
|
+
* // Add to PlayerManager
|
|
69
|
+
* const manager = new PlayerManager({
|
|
70
|
+
* plugins: [ttsPlugin]
|
|
71
|
+
* });
|
|
72
|
+
*
|
|
73
|
+
* // Search for TTS content
|
|
74
|
+
* const result = await ttsPlugin.search("tts:Hello world", "user123");
|
|
75
|
+
* const stream = await ttsPlugin.getStream(result.tracks[0]);
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* // Custom TTS provider
|
|
79
|
+
* const customTTSPlugin = new TTSPlugin({
|
|
80
|
+
* defaultLang: "en",
|
|
81
|
+
* createStream: async (text, ctx) => {
|
|
82
|
+
* // Custom TTS implementation
|
|
83
|
+
* return customTTSProvider.synthesize(text, ctx.lang);
|
|
84
|
+
* }
|
|
85
|
+
* });
|
|
86
|
+
*
|
|
87
|
+
* @since 1.0.0
|
|
88
|
+
*/
|
|
89
|
+
export class TTSPlugin extends BasePlugin {
|
|
90
|
+
name = "tts";
|
|
91
|
+
version = "1.0.0";
|
|
92
|
+
private opts: { defaultLang: string; slow: boolean; createStream?: TTSPluginOptions["createStream"] };
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Creates a new TTSPlugin instance.
|
|
96
|
+
*
|
|
97
|
+
* @param opts - Configuration options for the TTS plugin
|
|
98
|
+
* @param opts.defaultLang - Default language code for TTS (default: "vi")
|
|
99
|
+
* @param opts.slow - Whether to use slow speech rate (default: false)
|
|
100
|
+
* @param opts.createStream - Optional custom TTS provider function
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* // Basic TTS with Vietnamese as default
|
|
104
|
+
* const ttsPlugin = new TTSPlugin();
|
|
105
|
+
*
|
|
106
|
+
* // TTS with English as default and slow speech
|
|
107
|
+
* const slowTTSPlugin = new TTSPlugin({
|
|
108
|
+
* defaultLang: "en",
|
|
109
|
+
* slow: true
|
|
110
|
+
* });
|
|
111
|
+
*
|
|
112
|
+
* // TTS with custom provider
|
|
113
|
+
* const customTTSPlugin = new TTSPlugin({
|
|
114
|
+
* defaultLang: "en",
|
|
115
|
+
* createStream: async (text, ctx) => {
|
|
116
|
+
* return await myCustomTTS.synthesize(text, ctx.lang);
|
|
117
|
+
* }
|
|
118
|
+
* });
|
|
119
|
+
*/
|
|
120
|
+
constructor(opts?: TTSPluginOptions) {
|
|
121
|
+
super();
|
|
122
|
+
this.opts = {
|
|
123
|
+
defaultLang: opts?.defaultLang || "vi",
|
|
124
|
+
slow: !!opts?.slow,
|
|
125
|
+
createStream: opts?.createStream,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Determines if this plugin can handle the given query.
|
|
131
|
+
*
|
|
132
|
+
* @param query - The search query to check
|
|
133
|
+
* @returns `true` if the query starts with "tts:" or "say ", `false` otherwise
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* plugin.canHandle("tts:Hello world"); // true
|
|
137
|
+
* plugin.canHandle("say Hello world"); // true
|
|
138
|
+
* plugin.canHandle("youtube.com/watch?v=123"); // false
|
|
139
|
+
*/
|
|
140
|
+
canHandle(query: string): boolean {
|
|
141
|
+
if (!query) return false;
|
|
142
|
+
const q = query.trim().toLowerCase();
|
|
143
|
+
return q.startsWith("tts:") || q.startsWith("say ");
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Creates a TTS track from the given query.
|
|
148
|
+
*
|
|
149
|
+
* This method parses TTS queries and creates a track that can be played as audio.
|
|
150
|
+
* It supports various query formats including language and speed specifications.
|
|
151
|
+
*
|
|
152
|
+
* @param query - The TTS query to process
|
|
153
|
+
* @param requestedBy - The user ID who requested the TTS
|
|
154
|
+
* @returns A SearchResult containing a single TTS track
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* // Basic TTS
|
|
158
|
+
* const result = await plugin.search("tts:Hello world", "user123");
|
|
159
|
+
*
|
|
160
|
+
* // TTS with specific language
|
|
161
|
+
* const result2 = await plugin.search("tts:en:Hello world", "user123");
|
|
162
|
+
*
|
|
163
|
+
* // TTS with language and slow speed
|
|
164
|
+
* const result3 = await plugin.search("tts:en:true:Hello world", "user123");
|
|
165
|
+
*
|
|
166
|
+
* // Using "say" prefix
|
|
167
|
+
* const result4 = await plugin.search("say Hello world", "user123");
|
|
168
|
+
*/
|
|
169
|
+
async search(query: string, requestedBy: string): Promise<SearchResult> {
|
|
170
|
+
if (!this.canHandle(query)) {
|
|
171
|
+
return { tracks: [] };
|
|
172
|
+
}
|
|
173
|
+
const { text, lang, slow } = this.parseQuery(query);
|
|
174
|
+
const config: TTSConfig = { text, lang, slow };
|
|
175
|
+
const url = this.encodeConfig(config);
|
|
176
|
+
const title = `TTS (${lang}${slow ? ", slow" : ""}): ${text.slice(0, 64)}${text.length > 64 ? "…" : ""}`;
|
|
177
|
+
const estimatedSeconds = Math.max(1, Math.min(60, Math.ceil(text.length / 12)));
|
|
178
|
+
|
|
179
|
+
const track: Track = {
|
|
180
|
+
id: `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
181
|
+
title,
|
|
182
|
+
url,
|
|
183
|
+
duration: estimatedSeconds,
|
|
184
|
+
requestedBy,
|
|
185
|
+
source: this.name,
|
|
186
|
+
metadata: { tts: config },
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
return { tracks: [track] };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Generates an audio stream for a TTS track.
|
|
194
|
+
*
|
|
195
|
+
* This method converts the text in the track to speech using either the custom
|
|
196
|
+
* TTS provider (if configured) or the built-in Google TTS service. It handles
|
|
197
|
+
* various return types from custom providers and ensures proper stream formatting.
|
|
198
|
+
*
|
|
199
|
+
* @param track - The TTS track to convert to audio
|
|
200
|
+
* @returns A StreamInfo object containing the audio stream
|
|
201
|
+
* @throws {Error} If TTS generation fails or no audio URLs are returned
|
|
202
|
+
*
|
|
203
|
+
* @example
|
|
204
|
+
* const track = { id: "tts-123", title: "TTS: Hello world", ... };
|
|
205
|
+
* const streamInfo = await plugin.getStream(track);
|
|
206
|
+
* console.log(streamInfo.type); // "arbitrary"
|
|
207
|
+
* console.log(streamInfo.stream); // Readable stream with audio
|
|
208
|
+
*/
|
|
209
|
+
async getStream(track: Track): Promise<StreamInfo> {
|
|
210
|
+
const cfg = this.extractConfig(track);
|
|
211
|
+
if (track.source !== this.name) return { stream: null as any, type: "arbitrary" };
|
|
212
|
+
if (this.opts.createStream && typeof this.opts.createStream === "function") {
|
|
213
|
+
const out = await this.opts.createStream(cfg.text, { lang: cfg.lang, slow: cfg.slow, track });
|
|
214
|
+
let type: StreamInfo["type"] | undefined;
|
|
215
|
+
let metadata: Record<string, any> | undefined;
|
|
216
|
+
let stream: Readable | null = null;
|
|
217
|
+
|
|
218
|
+
const normType = (t?: any): StreamInfo["type"] | undefined => {
|
|
219
|
+
if (!t || typeof t !== "string") return undefined;
|
|
220
|
+
const v = t.toLowerCase();
|
|
221
|
+
if (v.includes("webm") && v.includes("opus")) return "webm/opus";
|
|
222
|
+
if (v.includes("ogg") && v.includes("opus")) return "ogg/opus";
|
|
223
|
+
return undefined;
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
if (out && typeof out === "object") {
|
|
227
|
+
// If it's already a Readable/Buffer/Uint8Array/ArrayBuffer/URL, let toReadable handle it
|
|
228
|
+
if (
|
|
229
|
+
out instanceof Readable ||
|
|
230
|
+
out instanceof Buffer ||
|
|
231
|
+
out instanceof Uint8Array ||
|
|
232
|
+
out instanceof ArrayBuffer ||
|
|
233
|
+
out instanceof URL
|
|
234
|
+
) {
|
|
235
|
+
stream = await this.toReadable(out as any);
|
|
236
|
+
} else if ((out as any).stream) {
|
|
237
|
+
const o = out as any;
|
|
238
|
+
stream = o.stream as Readable;
|
|
239
|
+
type = normType(o.type);
|
|
240
|
+
metadata = o.metadata;
|
|
241
|
+
} else if ((out as any).url) {
|
|
242
|
+
const o = out as any;
|
|
243
|
+
const urlStr = o.url.toString();
|
|
244
|
+
try {
|
|
245
|
+
type =
|
|
246
|
+
normType(o.type) ||
|
|
247
|
+
(urlStr.endsWith(".webm") ? "webm/opus"
|
|
248
|
+
: urlStr.endsWith(".ogg") ? "ogg/opus"
|
|
249
|
+
: undefined);
|
|
250
|
+
const res = await axios.get(urlStr, { responseType: "stream" });
|
|
251
|
+
stream = res.data as unknown as Readable;
|
|
252
|
+
metadata = o.metadata;
|
|
253
|
+
} catch (e) {
|
|
254
|
+
throw new Error(`Failed to fetch custom TTS URL: ${e}`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (!stream) {
|
|
260
|
+
stream = await this.toReadable(out as any);
|
|
261
|
+
}
|
|
262
|
+
return { stream, type: type || "arbitrary", metadata: { provider: "custom", ...(metadata || {}) } };
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const urls = getTTSUrls(cfg.text, { lang: cfg.lang, slow: cfg.slow });
|
|
266
|
+
if (!urls || urls.length === 0) {
|
|
267
|
+
throw new Error("TTS returned no audio URLs");
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const parts = await Promise.all(
|
|
271
|
+
urls.map((u) => axios.get<ArrayBuffer>(u, { responseType: "arraybuffer" }).then((r) => Buffer.from(r.data))),
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
const merged = Buffer.concat(parts);
|
|
275
|
+
const stream = Readable.from([merged]);
|
|
276
|
+
return { stream, type: "arbitrary", metadata: { size: merged.length } };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
private async toReadable(out: Readable | string | URL | Buffer | Uint8Array | ArrayBuffer): Promise<Readable> {
|
|
280
|
+
if (out instanceof Readable) return out;
|
|
281
|
+
if (typeof out === "string" || out instanceof URL) {
|
|
282
|
+
const url = out instanceof URL ? out.toString() : out;
|
|
283
|
+
if (/^https?:\/\//i.test(url)) {
|
|
284
|
+
const res = await axios.get(url, { responseType: "stream" });
|
|
285
|
+
return res.data as unknown as Readable;
|
|
286
|
+
}
|
|
287
|
+
return Readable.from([Buffer.from(url)]);
|
|
288
|
+
}
|
|
289
|
+
if (out instanceof Buffer) return Readable.from([out]);
|
|
290
|
+
if (out instanceof Uint8Array) return Readable.from([Buffer.from(out)]);
|
|
291
|
+
if (out instanceof ArrayBuffer) return Readable.from([Buffer.from(out)]);
|
|
292
|
+
throw new Error("Unsupported return type from createStream");
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
private parseQuery(query: string): TTSConfig {
|
|
296
|
+
const isLangCode = (s: string) => /^[a-z]{2,3}(?:-[A-Z]{2})?$/.test(s);
|
|
297
|
+
|
|
298
|
+
const raw = query.trim();
|
|
299
|
+
let text = raw;
|
|
300
|
+
let lang = this.opts.defaultLang;
|
|
301
|
+
let slow = this.opts.slow;
|
|
302
|
+
|
|
303
|
+
const lower = raw.toLowerCase();
|
|
304
|
+
if (lower.startsWith("say ")) {
|
|
305
|
+
text = raw.slice(4).trim();
|
|
306
|
+
} else if (lower.startsWith("tts:")) {
|
|
307
|
+
const body = raw.slice(4).trim();
|
|
308
|
+
// Supported:
|
|
309
|
+
// - "tts: <text>" (text may contain colons)
|
|
310
|
+
// - "tts:<lang>:<text>"
|
|
311
|
+
// - "tts:<lang>:<slow>:<text>" where slow in {0,1,true,false}
|
|
312
|
+
const firstSep = body.indexOf(":");
|
|
313
|
+
if (firstSep === -1) {
|
|
314
|
+
text = body;
|
|
315
|
+
} else {
|
|
316
|
+
const maybeLang = body.slice(0, firstSep).trim();
|
|
317
|
+
const rest = body.slice(firstSep + 1).trim();
|
|
318
|
+
if (isLangCode(maybeLang)) {
|
|
319
|
+
lang = maybeLang;
|
|
320
|
+
const secondSep = rest.indexOf(":");
|
|
321
|
+
if (secondSep !== -1) {
|
|
322
|
+
const maybeSlow = rest.slice(0, secondSep).trim().toLowerCase();
|
|
323
|
+
const remaining = rest.slice(secondSep + 1).trim();
|
|
324
|
+
if (["0", "1", "true", "false"].includes(maybeSlow)) {
|
|
325
|
+
slow = maybeSlow === "1" || maybeSlow === "true";
|
|
326
|
+
text = remaining;
|
|
327
|
+
} else {
|
|
328
|
+
text = rest;
|
|
329
|
+
}
|
|
330
|
+
} else {
|
|
331
|
+
text = rest;
|
|
332
|
+
}
|
|
333
|
+
} else {
|
|
334
|
+
text = body;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
text = (text || "").trim();
|
|
340
|
+
if (!text) throw new Error("No text provided for TTS");
|
|
341
|
+
return { text, lang, slow };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
private encodeConfig(cfg: TTSConfig): string {
|
|
345
|
+
const payload = encodeURIComponent(JSON.stringify(cfg));
|
|
346
|
+
return `tts://${payload}`;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
private extractConfig(track: Track): TTSConfig {
|
|
350
|
+
const meta = (track.metadata as any)?.tts as TTSConfig | undefined;
|
|
351
|
+
if (meta && meta.text) return meta;
|
|
352
|
+
try {
|
|
353
|
+
const url = track.url || "";
|
|
354
|
+
const encoded = url.startsWith("tts://") ? url.slice("tts://".length) : url;
|
|
355
|
+
const cfg = JSON.parse(decodeURIComponent(encoded));
|
|
356
|
+
return { text: cfg.text, lang: cfg.lang || this.opts.defaultLang, slow: !!cfg.slow };
|
|
357
|
+
} catch {
|
|
358
|
+
return { text: track.title || "", lang: this.opts.defaultLang, slow: this.opts.slow };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|