univoice 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,947 @@
1
+ import {
2
+ __name
3
+ } from "./chunk-7QVYU63E.js";
4
+
5
+ // src/tts/providers/doubao.ts
6
+ import { Buffer as Buffer3 } from "buffer";
7
+ import { randomUUID } from "crypto";
8
+
9
+ // src/tts/base.ts
10
+ var BaseTTS = class {
11
+ static {
12
+ __name(this, "BaseTTS");
13
+ }
14
+ apiKey;
15
+ baseUrl;
16
+ model;
17
+ voice;
18
+ speed;
19
+ volume;
20
+ pitch;
21
+ format;
22
+ language;
23
+ constructor(options) {
24
+ this.apiKey = options.apiKey || "";
25
+ this.baseUrl = options.baseUrl || "";
26
+ this.model = options.model || "default";
27
+ this.voice = options.voice || "default";
28
+ this.speed = options.speed || 1;
29
+ this.volume = options.volume || 1;
30
+ this.pitch = options.pitch || 1;
31
+ this.format = options.format || "mp3";
32
+ this.language = options.language || "zh-CN";
33
+ }
34
+ async listVoices() {
35
+ return [];
36
+ }
37
+ buildRequestOptions(request) {
38
+ return {
39
+ provider: this.constructor.name,
40
+ apiKey: this.apiKey,
41
+ baseUrl: this.baseUrl,
42
+ model: this.model,
43
+ voice: this.voice,
44
+ speed: this.speed,
45
+ volume: this.volume,
46
+ pitch: this.pitch,
47
+ format: this.format,
48
+ language: this.language,
49
+ ...request.options
50
+ };
51
+ }
52
+ };
53
+
54
+ // src/tts/factory.ts
55
+ var providers = /* @__PURE__ */ new Map();
56
+ function registerTTSProvider(type, provider) {
57
+ providers.set(type, provider);
58
+ }
59
+ __name(registerTTSProvider, "registerTTSProvider");
60
+ function createTTS(options) {
61
+ const ProviderClass = providers.get(options.provider);
62
+ if (!ProviderClass) {
63
+ throw new Error(`TTS provider "${options.provider}" not found`);
64
+ }
65
+ return new ProviderClass(options);
66
+ }
67
+ __name(createTTS, "createTTS");
68
+ function getTTSProviders() {
69
+ return Array.from(providers.keys());
70
+ }
71
+ __name(getTTSProviders, "getTTSProviders");
72
+ async function synthesize(text, options) {
73
+ const tts = createTTS(options);
74
+ return tts.synthesize({ text, options });
75
+ }
76
+ __name(synthesize, "synthesize");
77
+
78
+ // src/tts/protocols/volcengine.ts
79
+ import { Buffer as Buffer2 } from "buffer";
80
+ var EventType = /* @__PURE__ */ ((EventType2) => {
81
+ EventType2[EventType2["None"] = 0] = "None";
82
+ EventType2[EventType2["StartConnection"] = 1] = "StartConnection";
83
+ EventType2[EventType2["FinishConnection"] = 2] = "FinishConnection";
84
+ EventType2[EventType2["ConnectionStarted"] = 50] = "ConnectionStarted";
85
+ EventType2[EventType2["ConnectionFailed"] = 51] = "ConnectionFailed";
86
+ EventType2[EventType2["ConnectionFinished"] = 52] = "ConnectionFinished";
87
+ EventType2[EventType2["StartSession"] = 100] = "StartSession";
88
+ EventType2[EventType2["CancelSession"] = 101] = "CancelSession";
89
+ EventType2[EventType2["FinishSession"] = 102] = "FinishSession";
90
+ EventType2[EventType2["SessionStarted"] = 150] = "SessionStarted";
91
+ EventType2[EventType2["SessionCanceled"] = 151] = "SessionCanceled";
92
+ EventType2[EventType2["SessionFinished"] = 152] = "SessionFinished";
93
+ EventType2[EventType2["SessionFailed"] = 153] = "SessionFailed";
94
+ EventType2[EventType2["UsageResponse"] = 154] = "UsageResponse";
95
+ EventType2[EventType2["TaskRequest"] = 200] = "TaskRequest";
96
+ EventType2[EventType2["UpdateConfig"] = 201] = "UpdateConfig";
97
+ EventType2[EventType2["AudioMuted"] = 250] = "AudioMuted";
98
+ EventType2[EventType2["SayHello"] = 300] = "SayHello";
99
+ EventType2[EventType2["TTSSentenceStart"] = 350] = "TTSSentenceStart";
100
+ EventType2[EventType2["TTSSentenceEnd"] = 351] = "TTSSentenceEnd";
101
+ EventType2[EventType2["TTSResponse"] = 352] = "TTSResponse";
102
+ EventType2[EventType2["TTSEnded"] = 359] = "TTSEnded";
103
+ EventType2[EventType2["PodcastRoundStart"] = 360] = "PodcastRoundStart";
104
+ EventType2[EventType2["PodcastRoundResponse"] = 361] = "PodcastRoundResponse";
105
+ EventType2[EventType2["PodcastRoundEnd"] = 362] = "PodcastRoundEnd";
106
+ EventType2[EventType2["ASRInfo"] = 450] = "ASRInfo";
107
+ EventType2[EventType2["ASRResponse"] = 451] = "ASRResponse";
108
+ EventType2[EventType2["ASREnded"] = 459] = "ASREnded";
109
+ EventType2[EventType2["ChatTTSText"] = 500] = "ChatTTSText";
110
+ EventType2[EventType2["ChatResponse"] = 550] = "ChatResponse";
111
+ EventType2[EventType2["ChatEnded"] = 559] = "ChatEnded";
112
+ EventType2[EventType2["SourceSubtitleStart"] = 650] = "SourceSubtitleStart";
113
+ EventType2[EventType2["SourceSubtitleResponse"] = 651] = "SourceSubtitleResponse";
114
+ EventType2[EventType2["SourceSubtitleEnd"] = 652] = "SourceSubtitleEnd";
115
+ EventType2[EventType2["TranslationSubtitleStart"] = 653] = "TranslationSubtitleStart";
116
+ EventType2[EventType2["TranslationSubtitleResponse"] = 654] = "TranslationSubtitleResponse";
117
+ EventType2[EventType2["TranslationSubtitleEnd"] = 655] = "TranslationSubtitleEnd";
118
+ return EventType2;
119
+ })(EventType || {});
120
+ var MsgType = /* @__PURE__ */ ((MsgType2) => {
121
+ MsgType2[MsgType2["Invalid"] = 0] = "Invalid";
122
+ MsgType2[MsgType2["FullClientRequest"] = 1] = "FullClientRequest";
123
+ MsgType2[MsgType2["AudioOnlyClient"] = 2] = "AudioOnlyClient";
124
+ MsgType2[MsgType2["FullServerResponse"] = 9] = "FullServerResponse";
125
+ MsgType2[MsgType2["AudioOnlyServer"] = 11] = "AudioOnlyServer";
126
+ MsgType2[MsgType2["FrontEndResultServer"] = 12] = "FrontEndResultServer";
127
+ MsgType2[MsgType2["Error"] = 15] = "Error";
128
+ return MsgType2;
129
+ })(MsgType || {});
130
+ function getEventTypeName(eventType) {
131
+ return EventType[eventType] || `invalid event type: ${eventType}`;
132
+ }
133
+ __name(getEventTypeName, "getEventTypeName");
134
+ function getMsgTypeName(msgType) {
135
+ return MsgType[msgType] || `invalid message type: ${msgType}`;
136
+ }
137
+ __name(getMsgTypeName, "getMsgTypeName");
138
+ function messageToString(msg) {
139
+ const eventStr = msg.event !== void 0 ? getEventTypeName(msg.event) : "NoEvent";
140
+ const typeStr = getMsgTypeName(msg.type);
141
+ switch (msg.type) {
142
+ case 11 /* AudioOnlyServer */:
143
+ case 2 /* AudioOnlyClient */:
144
+ if (msg.flag === 1 /* PositiveSeq */ || msg.flag === 3 /* NegativeSeq */) {
145
+ return `MsgType: ${typeStr}, EventType: ${eventStr}, Sequence: ${msg.sequence}, PayloadSize: ${msg.payload.length}`;
146
+ }
147
+ return `MsgType: ${typeStr}, EventType: ${eventStr}, PayloadSize: ${msg.payload.length}`;
148
+ case 15 /* Error */:
149
+ return `MsgType: ${typeStr}, EventType: ${eventStr}, ErrorCode: ${msg.errorCode}, Payload: ${new TextDecoder().decode(msg.payload)}`;
150
+ default:
151
+ if (msg.flag === 1 /* PositiveSeq */ || msg.flag === 3 /* NegativeSeq */) {
152
+ return `MsgType: ${typeStr}, EventType: ${eventStr}, Sequence: ${msg.sequence}, Payload: ${new TextDecoder().decode(msg.payload)}`;
153
+ }
154
+ return `MsgType: ${typeStr}, EventType: ${eventStr}, Payload: ${new TextDecoder().decode(msg.payload)}`;
155
+ }
156
+ }
157
+ __name(messageToString, "messageToString");
158
+ function createMessage(msgType, flag) {
159
+ const msg = {
160
+ type: msgType,
161
+ flag,
162
+ version: 1 /* Version1 */,
163
+ headerSize: 1 /* HeaderSize4 */,
164
+ serialization: 1 /* JSON */,
165
+ compression: 0 /* None */,
166
+ payload: new Uint8Array(0)
167
+ };
168
+ Object.defineProperty(msg, "toString", {
169
+ enumerable: false,
170
+ configurable: true,
171
+ writable: true,
172
+ value: /* @__PURE__ */ __name(function() {
173
+ return messageToString(this);
174
+ }, "value")
175
+ });
176
+ return msg;
177
+ }
178
+ __name(createMessage, "createMessage");
179
+ function marshalMessage(msg) {
180
+ const buffers = [];
181
+ const headerSize = 4 * msg.headerSize;
182
+ const header = new Uint8Array(headerSize);
183
+ header[0] = msg.version << 4 | msg.headerSize;
184
+ header[1] = msg.type << 4 | msg.flag;
185
+ header[2] = msg.serialization << 4 | msg.compression;
186
+ buffers.push(header);
187
+ const writers = getWriters(msg);
188
+ for (const writer of writers) {
189
+ const data = writer(msg);
190
+ if (data) buffers.push(data);
191
+ }
192
+ const totalLength = buffers.reduce((sum, buf) => sum + buf.length, 0);
193
+ const result = new Uint8Array(totalLength);
194
+ let offset = 0;
195
+ for (const buf of buffers) {
196
+ result.set(buf, offset);
197
+ offset += buf.length;
198
+ }
199
+ return result;
200
+ }
201
+ __name(marshalMessage, "marshalMessage");
202
+ function unmarshalMessage(data) {
203
+ if (data.length < 3) {
204
+ throw new Error(`data too short: expected at least 3 bytes, got ${data.length}`);
205
+ }
206
+ let offset = 0;
207
+ const versionAndHeaderSize = data[offset++];
208
+ const typeAndFlag = data[offset++];
209
+ const serializationAndCompression = data[offset++];
210
+ const msg = {
211
+ version: versionAndHeaderSize >> 4,
212
+ headerSize: versionAndHeaderSize & 15,
213
+ type: typeAndFlag >> 4,
214
+ flag: typeAndFlag & 15,
215
+ serialization: serializationAndCompression >> 4,
216
+ compression: serializationAndCompression & 15,
217
+ payload: new Uint8Array(0)
218
+ };
219
+ Object.defineProperty(msg, "toString", {
220
+ enumerable: false,
221
+ configurable: true,
222
+ writable: true,
223
+ value: /* @__PURE__ */ __name(function() {
224
+ return messageToString(this);
225
+ }, "value")
226
+ });
227
+ offset = 4 * msg.headerSize;
228
+ const readers = getReaders(msg);
229
+ for (const reader of readers) {
230
+ offset = reader(msg, data, offset);
231
+ }
232
+ return msg;
233
+ }
234
+ __name(unmarshalMessage, "unmarshalMessage");
235
+ function getWriters(msg) {
236
+ const writers = [];
237
+ if (msg.flag === 4 /* WithEvent */) {
238
+ writers.push(writeEvent, writeSessionId);
239
+ }
240
+ switch (msg.type) {
241
+ case 2 /* AudioOnlyClient */:
242
+ case 11 /* AudioOnlyServer */:
243
+ case 12 /* FrontEndResultServer */:
244
+ case 1 /* FullClientRequest */:
245
+ case 9 /* FullServerResponse */:
246
+ if (msg.flag === 1 /* PositiveSeq */ || msg.flag === 3 /* NegativeSeq */) {
247
+ writers.push(writeSequence);
248
+ }
249
+ break;
250
+ case 15 /* Error */:
251
+ writers.push(writeErrorCode);
252
+ break;
253
+ default:
254
+ throw new Error(`unsupported message type: ${msg.type}`);
255
+ }
256
+ writers.push(writePayload);
257
+ return writers;
258
+ }
259
+ __name(getWriters, "getWriters");
260
+ function getReaders(msg) {
261
+ const readers = [];
262
+ switch (msg.type) {
263
+ case 2 /* AudioOnlyClient */:
264
+ case 11 /* AudioOnlyServer */:
265
+ case 12 /* FrontEndResultServer */:
266
+ case 1 /* FullClientRequest */:
267
+ case 9 /* FullServerResponse */:
268
+ if (msg.flag === 1 /* PositiveSeq */ || msg.flag === 3 /* NegativeSeq */) {
269
+ readers.push(readSequence);
270
+ }
271
+ break;
272
+ case 15 /* Error */:
273
+ readers.push(readErrorCode);
274
+ break;
275
+ default:
276
+ throw new Error(`unsupported message type: ${msg.type}`);
277
+ }
278
+ if (msg.flag === 4 /* WithEvent */) {
279
+ readers.push(readEvent, readSessionId, readConnectId);
280
+ }
281
+ readers.push(readPayload);
282
+ return readers;
283
+ }
284
+ __name(getReaders, "getReaders");
285
+ function writeEvent(msg) {
286
+ if (msg.event === void 0) return null;
287
+ const buffer = new ArrayBuffer(4);
288
+ const view = new DataView(buffer);
289
+ view.setInt32(0, msg.event, false);
290
+ return new Uint8Array(buffer);
291
+ }
292
+ __name(writeEvent, "writeEvent");
293
+ function writeSessionId(msg) {
294
+ if (msg.event === void 0) return null;
295
+ switch (msg.event) {
296
+ case 1 /* StartConnection */:
297
+ case 2 /* FinishConnection */:
298
+ case 50 /* ConnectionStarted */:
299
+ case 51 /* ConnectionFailed */:
300
+ return null;
301
+ }
302
+ const sessionId = msg.sessionId || "";
303
+ const sessionIdBytes = Buffer2.from(sessionId, "utf8");
304
+ const sizeBuffer = new ArrayBuffer(4);
305
+ const sizeView = new DataView(sizeBuffer);
306
+ sizeView.setUint32(0, sessionIdBytes.length, false);
307
+ const result = new Uint8Array(4 + sessionIdBytes.length);
308
+ result.set(new Uint8Array(sizeBuffer), 0);
309
+ result.set(sessionIdBytes, 4);
310
+ return result;
311
+ }
312
+ __name(writeSessionId, "writeSessionId");
313
+ function writeSequence(msg) {
314
+ if (msg.sequence === void 0) return null;
315
+ const buffer = new ArrayBuffer(4);
316
+ const view = new DataView(buffer);
317
+ view.setInt32(0, msg.sequence, false);
318
+ return new Uint8Array(buffer);
319
+ }
320
+ __name(writeSequence, "writeSequence");
321
+ function writeErrorCode(msg) {
322
+ if (msg.errorCode === void 0) return null;
323
+ const buffer = new ArrayBuffer(4);
324
+ const view = new DataView(buffer);
325
+ view.setUint32(0, msg.errorCode, false);
326
+ return new Uint8Array(buffer);
327
+ }
328
+ __name(writeErrorCode, "writeErrorCode");
329
+ function writePayload(msg) {
330
+ const payloadSize = msg.payload.length;
331
+ const sizeBuffer = new ArrayBuffer(4);
332
+ const sizeView = new DataView(sizeBuffer);
333
+ sizeView.setUint32(0, payloadSize, false);
334
+ const result = new Uint8Array(4 + payloadSize);
335
+ result.set(new Uint8Array(sizeBuffer), 0);
336
+ result.set(msg.payload, 4);
337
+ return result;
338
+ }
339
+ __name(writePayload, "writePayload");
340
+ function readEvent(msg, data, offset) {
341
+ if (offset + 4 > data.length) {
342
+ throw new Error("insufficient data for event");
343
+ }
344
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
345
+ msg.event = view.getInt32(0, false);
346
+ return offset + 4;
347
+ }
348
+ __name(readEvent, "readEvent");
349
+ function readSessionId(msg, data, initialOffset) {
350
+ let offset = initialOffset;
351
+ if (msg.event === void 0) return offset;
352
+ switch (msg.event) {
353
+ case 1 /* StartConnection */:
354
+ case 2 /* FinishConnection */:
355
+ case 50 /* ConnectionStarted */:
356
+ case 51 /* ConnectionFailed */:
357
+ case 52 /* ConnectionFinished */:
358
+ return offset;
359
+ }
360
+ if (offset + 4 > data.length) {
361
+ throw new Error("insufficient data for session ID size");
362
+ }
363
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
364
+ const size = view.getUint32(0, false);
365
+ offset += 4;
366
+ if (size > 0) {
367
+ if (offset + size > data.length) {
368
+ throw new Error("insufficient data for session ID");
369
+ }
370
+ msg.sessionId = new TextDecoder().decode(data.slice(offset, offset + size));
371
+ offset += size;
372
+ }
373
+ return offset;
374
+ }
375
+ __name(readSessionId, "readSessionId");
376
+ function readConnectId(msg, data, initialOffset) {
377
+ let offset = initialOffset;
378
+ if (msg.event === void 0) return offset;
379
+ switch (msg.event) {
380
+ case 50 /* ConnectionStarted */:
381
+ case 51 /* ConnectionFailed */:
382
+ case 52 /* ConnectionFinished */:
383
+ break;
384
+ default:
385
+ return offset;
386
+ }
387
+ if (offset + 4 > data.length) {
388
+ throw new Error("insufficient data for connect ID size");
389
+ }
390
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
391
+ const size = view.getUint32(0, false);
392
+ offset += 4;
393
+ if (size > 0) {
394
+ if (offset + size > data.length) {
395
+ throw new Error("insufficient data for connect ID");
396
+ }
397
+ msg.connectId = new TextDecoder().decode(data.slice(offset, offset + size));
398
+ offset += size;
399
+ }
400
+ return offset;
401
+ }
402
+ __name(readConnectId, "readConnectId");
403
+ function readSequence(msg, data, offset) {
404
+ if (offset + 4 > data.length) {
405
+ throw new Error("insufficient data for sequence");
406
+ }
407
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
408
+ msg.sequence = view.getInt32(0, false);
409
+ return offset + 4;
410
+ }
411
+ __name(readSequence, "readSequence");
412
+ function readErrorCode(msg, data, offset) {
413
+ if (offset + 4 > data.length) {
414
+ throw new Error("insufficient data for error code");
415
+ }
416
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
417
+ msg.errorCode = view.getUint32(0, false);
418
+ return offset + 4;
419
+ }
420
+ __name(readErrorCode, "readErrorCode");
421
+ function readPayload(msg, data, initialOffset) {
422
+ let offset = initialOffset;
423
+ if (offset + 4 > data.length) {
424
+ throw new Error("insufficient data for payload size");
425
+ }
426
+ const view = new DataView(data.buffer, data.byteOffset + offset, 4);
427
+ const size = view.getUint32(0, false);
428
+ offset += 4;
429
+ if (size > 0) {
430
+ if (offset + size > data.length) {
431
+ throw new Error("insufficient data for payload");
432
+ }
433
+ msg.payload = data.slice(offset, offset + size);
434
+ offset += size;
435
+ }
436
+ return offset;
437
+ }
438
+ __name(readPayload, "readPayload");
439
+ var wsStates = /* @__PURE__ */ new Map();
440
+ function getOrCreateState(ws) {
441
+ let state = wsStates.get(ws);
442
+ if (!state) {
443
+ state = { queue: [], callbacks: [] };
444
+ wsStates.set(ws, state);
445
+ }
446
+ return state;
447
+ }
448
+ __name(getOrCreateState, "getOrCreateState");
449
+ function setupMessageHandler(ws) {
450
+ if (!wsStates.has(ws)) {
451
+ const state = getOrCreateState(ws);
452
+ ws.on("message", (data) => {
453
+ try {
454
+ let uint8Data;
455
+ if (Buffer2.isBuffer(data)) {
456
+ uint8Data = new Uint8Array(data);
457
+ } else if (data instanceof ArrayBuffer) {
458
+ uint8Data = new Uint8Array(data);
459
+ } else if (data instanceof Uint8Array) {
460
+ uint8Data = data;
461
+ } else {
462
+ throw new Error(`Unexpected WebSocket message type: ${typeof data}`);
463
+ }
464
+ const msg = unmarshalMessage(uint8Data);
465
+ if (state.callbacks.length > 0) {
466
+ const callback = state.callbacks.shift();
467
+ if (callback) callback(msg);
468
+ } else {
469
+ state.queue.push(msg);
470
+ }
471
+ } catch (error) {
472
+ throw new Error(`Error processing message: ${error}`);
473
+ }
474
+ });
475
+ ws.on("close", () => {
476
+ wsStates.delete(ws);
477
+ });
478
+ }
479
+ }
480
+ __name(setupMessageHandler, "setupMessageHandler");
481
+ async function receiveMessage(ws) {
482
+ setupMessageHandler(ws);
483
+ return new Promise((resolve, reject) => {
484
+ const state = wsStates.get(ws);
485
+ if (!state) {
486
+ reject(new Error("WebSocket state not found"));
487
+ return;
488
+ }
489
+ if (state.queue.length > 0) {
490
+ const msg = state.queue.shift();
491
+ if (msg) {
492
+ resolve(msg);
493
+ return;
494
+ }
495
+ }
496
+ const errorHandler = /* @__PURE__ */ __name((error) => {
497
+ const index = state.callbacks.findIndex((cb) => cb === resolver);
498
+ if (index !== -1) {
499
+ state.callbacks.splice(index, 1);
500
+ }
501
+ reject(error);
502
+ }, "errorHandler");
503
+ const resolver = /* @__PURE__ */ __name((msg) => {
504
+ ws.removeListener("error", errorHandler);
505
+ resolve(msg);
506
+ }, "resolver");
507
+ state.callbacks.push(resolver);
508
+ ws.once("error", errorHandler);
509
+ });
510
+ }
511
+ __name(receiveMessage, "receiveMessage");
512
+ async function waitForEvent(ws, msgType, eventType) {
513
+ const msg = await receiveMessage(ws);
514
+ if (msg.type !== msgType || msg.event !== eventType) {
515
+ throw new Error(
516
+ `Unexpected message: type=${getMsgTypeName(msg.type)}, event=${getEventTypeName(msg.event || 0)}`
517
+ );
518
+ }
519
+ return msg;
520
+ }
521
+ __name(waitForEvent, "waitForEvent");
522
+ async function startConnection(ws) {
523
+ const msg = createMessage(1 /* FullClientRequest */, 4 /* WithEvent */);
524
+ msg.event = 1 /* StartConnection */;
525
+ msg.payload = new TextEncoder().encode("{}");
526
+ const data = marshalMessage(msg);
527
+ return new Promise((resolve, reject) => {
528
+ ws.send(data, (error) => {
529
+ if (error) reject(error);
530
+ else resolve();
531
+ });
532
+ });
533
+ }
534
+ __name(startConnection, "startConnection");
535
+ async function finishConnection(ws) {
536
+ const msg = createMessage(1 /* FullClientRequest */, 4 /* WithEvent */);
537
+ msg.event = 2 /* FinishConnection */;
538
+ msg.payload = new TextEncoder().encode("{}");
539
+ const data = marshalMessage(msg);
540
+ return new Promise((resolve, reject) => {
541
+ ws.send(data, (error) => {
542
+ if (error) reject(error);
543
+ else resolve();
544
+ });
545
+ });
546
+ }
547
+ __name(finishConnection, "finishConnection");
548
+ async function startSession(ws, payload, sessionId) {
549
+ const msg = createMessage(1 /* FullClientRequest */, 4 /* WithEvent */);
550
+ msg.event = 100 /* StartSession */;
551
+ msg.sessionId = sessionId;
552
+ msg.payload = payload;
553
+ const data = marshalMessage(msg);
554
+ return new Promise((resolve, reject) => {
555
+ ws.send(data, (error) => {
556
+ if (error) reject(error);
557
+ else resolve();
558
+ });
559
+ });
560
+ }
561
+ __name(startSession, "startSession");
562
+ async function finishSession(ws, sessionId) {
563
+ const msg = createMessage(1 /* FullClientRequest */, 4 /* WithEvent */);
564
+ msg.event = 102 /* FinishSession */;
565
+ msg.sessionId = sessionId;
566
+ msg.payload = new TextEncoder().encode("{}");
567
+ const data = marshalMessage(msg);
568
+ return new Promise((resolve, reject) => {
569
+ ws.send(data, (error) => {
570
+ if (error) reject(error);
571
+ else resolve();
572
+ });
573
+ });
574
+ }
575
+ __name(finishSession, "finishSession");
576
+ async function taskRequest(ws, payload, sessionId) {
577
+ const msg = createMessage(1 /* FullClientRequest */, 4 /* WithEvent */);
578
+ msg.event = 200 /* TaskRequest */;
579
+ msg.sessionId = sessionId;
580
+ msg.payload = payload;
581
+ const data = marshalMessage(msg);
582
+ return new Promise((resolve, reject) => {
583
+ ws.send(data, (error) => {
584
+ if (error) reject(error);
585
+ else resolve();
586
+ });
587
+ });
588
+ }
589
+ __name(taskRequest, "taskRequest");
590
+
591
+ // src/tts/providers/doubao.ts
592
+ import WebSocket from "ws";
593
+ var DoubaoTTS = class extends BaseTTS {
594
+ static {
595
+ __name(this, "DoubaoTTS");
596
+ }
597
+ name = "doubao";
598
+ /** 火山引擎 App ID */
599
+ appId;
600
+ /** 火山引擎 Access Token */
601
+ accessToken;
602
+ /** 火山引擎 Resource ID */
603
+ resourceId;
604
+ /** 采样率 */
605
+ sampleRate;
606
+ /** 是否启用时间戳 */
607
+ enableTimestamp;
608
+ constructor(options) {
609
+ super(options);
610
+ this.appId = options.appId || "";
611
+ this.accessToken = options.accessToken || "";
612
+ this.resourceId = options.resourceId || "seed-tts-2.0";
613
+ this.sampleRate = options.sampleRate || 24e3;
614
+ this.enableTimestamp = options.enableTimestamp ?? false;
615
+ this.baseUrl = options.baseUrl || "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
616
+ this.voice = options.voice || "zh_female_tianmeixiaoyuan_moon_bigtts";
617
+ this.format = options.format || "mp3";
618
+ }
619
+ /**
620
+ * 构建认证请求头
621
+ */
622
+ buildAuthHeaders() {
623
+ return {
624
+ "X-Api-App-Key": this.appId,
625
+ "X-Api-Access-Key": this.accessToken,
626
+ "X-Api-Resource-Id": this.resourceId,
627
+ "X-Api-Connect-Id": randomUUID()
628
+ };
629
+ }
630
+ /**
631
+ * 构建会话请求 payload
632
+ */
633
+ buildSessionPayload() {
634
+ const payload = {
635
+ user: {
636
+ uid: randomUUID()
637
+ },
638
+ req_params: {
639
+ speaker: this.voice,
640
+ audio_params: {
641
+ format: this.format,
642
+ sample_rate: this.sampleRate,
643
+ enable_timestamp: this.enableTimestamp
644
+ },
645
+ additions: JSON.stringify({
646
+ disable_markdown_filter: true
647
+ })
648
+ },
649
+ event: 100 /* StartSession */
650
+ };
651
+ return new TextEncoder().encode(JSON.stringify(payload));
652
+ }
653
+ /**
654
+ * 构建任务请求 payload
655
+ */
656
+ buildTaskPayload(text) {
657
+ const payload = {
658
+ user: {
659
+ uid: randomUUID()
660
+ },
661
+ req_params: {
662
+ speaker: this.voice,
663
+ audio_params: {
664
+ format: this.format,
665
+ sample_rate: this.sampleRate,
666
+ enable_timestamp: this.enableTimestamp
667
+ },
668
+ additions: JSON.stringify({
669
+ disable_markdown_filter: true
670
+ }),
671
+ text
672
+ },
673
+ event: 200 /* TaskRequest */
674
+ };
675
+ return new TextEncoder().encode(JSON.stringify(payload));
676
+ }
677
+ /**
678
+ * 合并多个 Uint8Array
679
+ */
680
+ concatArrays(arrays) {
681
+ const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
682
+ const result = new Uint8Array(totalLength);
683
+ let offset = 0;
684
+ for (const arr of arrays) {
685
+ result.set(arr, offset);
686
+ offset += arr.length;
687
+ }
688
+ return result;
689
+ }
690
+ /**
691
+ * 合成语音
692
+ */
693
+ async synthesize(request) {
694
+ const text = request.text;
695
+ const ws = new WebSocket(this.baseUrl, {
696
+ headers: this.buildAuthHeaders(),
697
+ skipUTF8Validation: true
698
+ });
699
+ await new Promise((resolve, reject) => {
700
+ ws.on("open", resolve);
701
+ ws.on("error", reject);
702
+ });
703
+ try {
704
+ await startConnection(ws);
705
+ await waitForEvent(ws, 9 /* FullServerResponse */, 50 /* ConnectionStarted */);
706
+ const sessionId = randomUUID();
707
+ const sessionPayload = this.buildSessionPayload();
708
+ await startSession(ws, sessionPayload, sessionId);
709
+ await waitForEvent(ws, 9 /* FullServerResponse */, 150 /* SessionStarted */);
710
+ const taskPayload = this.buildTaskPayload(text);
711
+ await taskRequest(ws, taskPayload, sessionId);
712
+ await finishSession(ws, sessionId);
713
+ const audioChunks = [];
714
+ while (true) {
715
+ const msg = await receiveMessage(ws);
716
+ switch (msg.type) {
717
+ case 11 /* AudioOnlyServer */:
718
+ audioChunks.push(msg.payload);
719
+ break;
720
+ case 9 /* FullServerResponse */:
721
+ break;
722
+ case 15 /* Error */:
723
+ throw new Error(
724
+ `TTS error: ${msg.errorCode}, ${new TextDecoder().decode(msg.payload)}`
725
+ );
726
+ default:
727
+ throw new Error(`Unexpected message type: ${msg.type}`);
728
+ }
729
+ if (msg.event === 152 /* SessionFinished */) {
730
+ break;
731
+ }
732
+ }
733
+ await finishConnection(ws);
734
+ await waitForEvent(ws, 9 /* FullServerResponse */, 52 /* ConnectionFinished */);
735
+ const audio = this.concatArrays(audioChunks);
736
+ if (audio.length === 0) {
737
+ throw new Error("No audio received from TTS service");
738
+ }
739
+ return {
740
+ audio: Buffer3.from(audio),
741
+ format: this.format
742
+ };
743
+ } finally {
744
+ ws.close();
745
+ }
746
+ }
747
+ };
748
+ registerTTSProvider("doubao", DoubaoTTS);
749
+
750
+ // src/tts/providers/minimax.ts
751
+ var MinimaxTTS = class extends BaseTTS {
752
+ static {
753
+ __name(this, "MinimaxTTS");
754
+ }
755
+ name = "minimax";
756
+ constructor(options) {
757
+ super(options);
758
+ this.baseUrl = options.baseUrl || "https://api.minimax.chat/v1";
759
+ this.model = options.model || "speech-01-turbo";
760
+ }
761
+ async synthesize(request) {
762
+ const opts = this.buildRequestOptions(request);
763
+ return {
764
+ audio: new Uint8Array(0),
765
+ format: opts.format || "mp3",
766
+ duration: 0
767
+ };
768
+ }
769
+ };
770
+ registerTTSProvider("minimax", MinimaxTTS);
771
+
772
+ // src/tts/providers/qwen.ts
773
+ var QwenTTS = class extends BaseTTS {
774
+ static {
775
+ __name(this, "QwenTTS");
776
+ }
777
+ name = "qwen";
778
+ constructor(options) {
779
+ super(options);
780
+ this.baseUrl = options.baseUrl || "https://dashscope.aliyuncs.com/api/v1";
781
+ this.model = options.model || "paraformer-realtime-v2";
782
+ }
783
+ async synthesize(request) {
784
+ const opts = this.buildRequestOptions(request);
785
+ return {
786
+ audio: new Uint8Array(0),
787
+ format: opts.format || "mp3",
788
+ duration: 0
789
+ };
790
+ }
791
+ };
792
+ registerTTSProvider("qwen", QwenTTS);
793
+
794
+ // src/tts/providers/openai.ts
795
+ var TTS1 = class extends BaseTTS {
796
+ static {
797
+ __name(this, "TTS1");
798
+ }
799
+ name = "openai";
800
+ constructor(options) {
801
+ super(options);
802
+ this.baseUrl = options.baseUrl || "https://api.openai.com/v1";
803
+ this.model = options.model || "tts-1";
804
+ }
805
+ async synthesize(request) {
806
+ const opts = this.buildRequestOptions(request);
807
+ return {
808
+ audio: new Uint8Array(0),
809
+ format: opts.format || "mp3",
810
+ duration: 0
811
+ };
812
+ }
813
+ };
814
+ registerTTSProvider("openai", TTS1);
815
+
816
+ // src/tts/providers/gemini.ts
817
+ var GeminiTTS = class extends BaseTTS {
818
+ static {
819
+ __name(this, "GeminiTTS");
820
+ }
821
+ name = "gemini";
822
+ constructor(options) {
823
+ super(options);
824
+ this.baseUrl = options.baseUrl || "https://generativelanguage.googleapis.com/v1beta";
825
+ this.model = options.model || "gemini-tts";
826
+ }
827
+ async synthesize(request) {
828
+ const opts = this.buildRequestOptions(request);
829
+ return {
830
+ audio: new Uint8Array(0),
831
+ format: opts.format || "mp3",
832
+ duration: 0
833
+ };
834
+ }
835
+ };
836
+ registerTTSProvider("gemini", GeminiTTS);
837
+
838
+ // src/tts/utils/collect.ts
839
+ import { Buffer as Buffer4 } from "buffer";
840
+ async function collectAudio(response, options = {}) {
841
+ const { audio } = response;
842
+ const chunks = [];
843
+ if (isUint8Array(audio)) {
844
+ chunks.push(audio);
845
+ } else if (isBuffer(audio)) {
846
+ chunks.push(new Uint8Array(audio));
847
+ }
848
+ const result = concatUint8Arrays(chunks);
849
+ if (options.onComplete) {
850
+ options.onComplete(result);
851
+ }
852
+ return result;
853
+ }
854
+ __name(collectAudio, "collectAudio");
855
+ function isUint8Array(value) {
856
+ return value instanceof Uint8Array;
857
+ }
858
+ __name(isUint8Array, "isUint8Array");
859
+ function isBuffer(value) {
860
+ return Buffer4.isBuffer(value);
861
+ }
862
+ __name(isBuffer, "isBuffer");
863
+ function concatUint8Arrays(arrays) {
864
+ const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
865
+ const result = new Uint8Array(totalLength);
866
+ let offset = 0;
867
+ for (const arr of arrays) {
868
+ result.set(arr, offset);
869
+ offset += arr.length;
870
+ }
871
+ return result;
872
+ }
873
+ __name(concatUint8Arrays, "concatUint8Arrays");
874
+
875
+ // src/tts/utils/save.ts
876
+ import { writeFile } from "fs/promises";
877
+ async function saveAudio(response, options = {}) {
878
+ const { format } = response;
879
+ const timestamp = Date.now();
880
+ const filename = options.filename || `tts_${timestamp}.${format}`;
881
+ const filepath = options.directory ? `${options.directory}/${filename}` : filename;
882
+ let buffer;
883
+ if (response.audio instanceof Buffer) {
884
+ buffer = response.audio;
885
+ } else if (response.audio instanceof Uint8Array) {
886
+ buffer = Buffer.from(response.audio);
887
+ } else {
888
+ throw new Error("Invalid audio data");
889
+ }
890
+ await writeFile(filepath, buffer);
891
+ return filepath;
892
+ }
893
+ __name(saveAudio, "saveAudio");
894
+
895
+ // src/tts/utils/play.ts
896
+ import { spawn } from "child_process";
897
+ async function playAudio(response, options = {}) {
898
+ const player = options.player || "afplay";
899
+ let buffer;
900
+ if (response.audio instanceof Buffer) {
901
+ buffer = response.audio;
902
+ } else if (response.audio instanceof Uint8Array) {
903
+ buffer = Buffer.from(response.audio);
904
+ } else {
905
+ throw new Error("Invalid audio data");
906
+ }
907
+ return new Promise((resolve, reject) => {
908
+ const proc = spawn(player, [], { stdio: ["pipe", "inherit", "inherit"] });
909
+ proc.stdin.write(buffer);
910
+ proc.stdin.end();
911
+ proc.on("close", (code) => {
912
+ if (code === 0) {
913
+ resolve();
914
+ } else {
915
+ reject(new Error(`Player exited with code ${code}`));
916
+ }
917
+ });
918
+ proc.on("error", reject);
919
+ });
920
+ }
921
+ __name(playAudio, "playAudio");
922
+
923
+ // src/tts/utils/tee.ts
924
+ async function teeAudio(response, options = {}) {
925
+ const audio = await collectAudio(response);
926
+ if (options.save) {
927
+ await saveAudio({ ...response, audio }, options.save);
928
+ }
929
+ if (options.play) {
930
+ await playAudio({ ...response, audio }, options.play);
931
+ }
932
+ return { ...response, audio };
933
+ }
934
+ __name(teeAudio, "teeAudio");
935
+
936
+ export {
937
+ BaseTTS,
938
+ registerTTSProvider,
939
+ createTTS,
940
+ getTTSProviders,
941
+ synthesize,
942
+ collectAudio,
943
+ saveAudio,
944
+ playAudio,
945
+ teeAudio
946
+ };
947
+ //# sourceMappingURL=chunk-HONGPTUH.js.map