@amaster.ai/tts-client 1.1.8 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +959 -130
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +106 -46
- package/dist/index.d.ts +106 -46
- package/dist/index.js +955 -129
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -20,184 +20,1013 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
-
createTTSClient: () =>
|
|
23
|
+
createTTSClient: () => createTTSClient,
|
|
24
|
+
createTTSSpeakController: () => createTTSSpeakController,
|
|
25
|
+
preprocessTTSContent: () => preprocessTTSContent,
|
|
26
|
+
splitTextIntoFragments: () => splitTextIntoFragments
|
|
24
27
|
});
|
|
25
28
|
module.exports = __toCommonJS(index_exports);
|
|
26
29
|
|
|
27
30
|
// src/tts-client.ts
|
|
28
31
|
var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
|
|
29
|
-
|
|
32
|
+
var TTS_MAX_FRAGMENT_LENGTH = 2e3;
|
|
33
|
+
function isBrowserEnvironment() {
|
|
34
|
+
return typeof window !== "undefined";
|
|
35
|
+
}
|
|
36
|
+
function getWeightedTextLength(text) {
|
|
37
|
+
let length = 0;
|
|
38
|
+
for (const char of text) {
|
|
39
|
+
length += /[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]/.test(char) ? 2 : 1;
|
|
40
|
+
}
|
|
41
|
+
return length;
|
|
42
|
+
}
|
|
43
|
+
function splitOversizedSegment(segment, maxLength) {
|
|
44
|
+
const fragments = [];
|
|
45
|
+
let current = "";
|
|
46
|
+
let currentLength = 0;
|
|
47
|
+
for (const char of segment) {
|
|
48
|
+
const charLength = getWeightedTextLength(char);
|
|
49
|
+
if (current && currentLength + charLength > maxLength) {
|
|
50
|
+
fragments.push(current);
|
|
51
|
+
current = char;
|
|
52
|
+
currentLength = charLength;
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
current += char;
|
|
56
|
+
currentLength += charLength;
|
|
57
|
+
}
|
|
58
|
+
if (current) {
|
|
59
|
+
fragments.push(current);
|
|
60
|
+
}
|
|
61
|
+
return fragments;
|
|
62
|
+
}
|
|
63
|
+
function splitTextIntoFragments(text, maxLength = TTS_MAX_FRAGMENT_LENGTH) {
|
|
64
|
+
const fragments = [];
|
|
65
|
+
const segments = text.match(/.+?(?:\r?\n+|$)/gs) ?? [];
|
|
66
|
+
const softBreakPattern = /(?<=[。!?;.!?;])|(?<=[,、,::])|(?<=\s)/;
|
|
67
|
+
let current = "";
|
|
68
|
+
const pushFragment = (fragment) => {
|
|
69
|
+
const trimmed = fragment.trim();
|
|
70
|
+
if (trimmed) {
|
|
71
|
+
fragments.push(trimmed);
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
const appendPart = (part) => {
|
|
75
|
+
if (!part.trim()) {
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
if (getWeightedTextLength(part) > maxLength) {
|
|
79
|
+
for (const fragment of splitOversizedSegment(part, maxLength)) {
|
|
80
|
+
pushFragment(fragment);
|
|
81
|
+
}
|
|
82
|
+
current = "";
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
const next = current ? `${current}${part}` : part;
|
|
86
|
+
if (getWeightedTextLength(next) <= maxLength) {
|
|
87
|
+
current = next;
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
pushFragment(current);
|
|
91
|
+
current = part;
|
|
92
|
+
};
|
|
93
|
+
for (const segment of segments) {
|
|
94
|
+
const parts = segment.split(softBreakPattern).filter((part) => part.trim());
|
|
95
|
+
if (!parts.length) {
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
for (const part of parts) {
|
|
99
|
+
appendPart(part);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
pushFragment(current);
|
|
103
|
+
return fragments;
|
|
104
|
+
}
|
|
105
|
+
function normalizeWhitespace(text) {
|
|
106
|
+
return text.replace(/\r\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]{2,}/g, " ");
|
|
107
|
+
}
|
|
108
|
+
function stripMarkdownTables(text) {
|
|
109
|
+
return text.replace(
|
|
110
|
+
/^\|(.+)\|$/gm,
|
|
111
|
+
(_, row) => row.split("|").map((cell) => cell.trim()).filter(Boolean).join("\uFF0C")
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
function stripMarkdown(text) {
|
|
115
|
+
return text.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1").replace(/\[([^\]]+)\]\(([^)]+)\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/^\s*>\s?/gm, "").replace(/^\s*[-*+]\s+/gm, "").replace(/^\s*\d+\.\s+/gm, "").replace(/[*_~#>]+/g, "");
|
|
116
|
+
}
|
|
117
|
+
function stripUrls(text) {
|
|
118
|
+
return text.replace(/https?:\/\/\S+/gi, " ");
|
|
119
|
+
}
|
|
120
|
+
function stripEmojiAndSymbols(text) {
|
|
121
|
+
return text.replace(/[\u{1F000}-\u{1FAFF}\u{2600}-\u{27BF}]/gu, " ");
|
|
122
|
+
}
|
|
123
|
+
function preprocessTTSContent(text) {
|
|
124
|
+
const normalized = normalizeWhitespace(text);
|
|
125
|
+
const withoutTables = stripMarkdownTables(normalized);
|
|
126
|
+
const withoutMarkdown = stripMarkdown(withoutTables);
|
|
127
|
+
const withoutUrls = stripUrls(withoutMarkdown);
|
|
128
|
+
const withoutEmoji = stripEmojiAndSymbols(withoutUrls);
|
|
129
|
+
return withoutEmoji.replace(/[|]/g, "\uFF0C").replace(/[ \t]+\n/g, "\n").replace(/\n+/g, "\n").replace(/[ ]{2,}/g, " ").trim();
|
|
130
|
+
}
|
|
131
|
+
function decodeBase64Chunk(chunk) {
|
|
132
|
+
const binaryString = atob(chunk);
|
|
133
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
134
|
+
for (let index = 0; index < binaryString.length; index += 1) {
|
|
135
|
+
bytes[index] = binaryString.charCodeAt(index);
|
|
136
|
+
}
|
|
137
|
+
return bytes;
|
|
138
|
+
}
|
|
139
|
+
function concatUint8Arrays(items) {
|
|
140
|
+
let totalLength = 0;
|
|
141
|
+
for (const item of items) {
|
|
142
|
+
totalLength += item.length;
|
|
143
|
+
}
|
|
144
|
+
const result = new Uint8Array(totalLength);
|
|
145
|
+
let offset = 0;
|
|
146
|
+
for (const item of items) {
|
|
147
|
+
result.set(item, offset);
|
|
148
|
+
offset += item.length;
|
|
149
|
+
}
|
|
150
|
+
return result;
|
|
151
|
+
}
|
|
152
|
+
function pcmToWav(pcmBytes, sampleRate) {
|
|
153
|
+
const header = new ArrayBuffer(44);
|
|
154
|
+
const view = new DataView(header);
|
|
155
|
+
const dataSize = pcmBytes.byteLength;
|
|
156
|
+
const writeString = (offset, value) => {
|
|
157
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
158
|
+
view.setUint8(offset + index, value.charCodeAt(index));
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
writeString(0, "RIFF");
|
|
162
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
163
|
+
writeString(8, "WAVE");
|
|
164
|
+
writeString(12, "fmt ");
|
|
165
|
+
view.setUint32(16, 16, true);
|
|
166
|
+
view.setUint16(20, 1, true);
|
|
167
|
+
view.setUint16(22, 1, true);
|
|
168
|
+
view.setUint32(24, sampleRate, true);
|
|
169
|
+
view.setUint32(28, sampleRate * 2, true);
|
|
170
|
+
view.setUint16(32, 2, true);
|
|
171
|
+
view.setUint16(34, 16, true);
|
|
172
|
+
writeString(36, "data");
|
|
173
|
+
view.setUint32(40, dataSize, true);
|
|
174
|
+
return concatUint8Arrays([new Uint8Array(header), pcmBytes]);
|
|
175
|
+
}
|
|
176
|
+
function bytesToDataUri(bytes, mimeType) {
|
|
177
|
+
let binary = "";
|
|
178
|
+
for (const byte of bytes) {
|
|
179
|
+
binary += String.fromCharCode(byte);
|
|
180
|
+
}
|
|
181
|
+
return `data:${mimeType};base64,${btoa(binary)}`;
|
|
182
|
+
}
|
|
183
|
+
function createBrowserPlaybackBackend(config) {
|
|
184
|
+
const { sampleRate, onAudioStart, onAudioEnd, onError } = config;
|
|
185
|
+
let audioContext = null;
|
|
186
|
+
const audioSources = /* @__PURE__ */ new Set();
|
|
187
|
+
let nextScheduleTime = 0;
|
|
188
|
+
let streamEndHandled = false;
|
|
189
|
+
let streamingStarted = false;
|
|
190
|
+
const ensureAudioContext = () => {
|
|
191
|
+
if (!audioContext && typeof AudioContext !== "undefined") {
|
|
192
|
+
audioContext = new AudioContext({ sampleRate });
|
|
193
|
+
}
|
|
194
|
+
return audioContext;
|
|
195
|
+
};
|
|
196
|
+
const createAudioBufferFromPCM = (bytes) => {
|
|
197
|
+
const numSamples = Math.floor(bytes.length / 2);
|
|
198
|
+
const buffer = new AudioBuffer({ length: numSamples, sampleRate });
|
|
199
|
+
const channelData = buffer.getChannelData(0);
|
|
200
|
+
const dataView = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
201
|
+
for (let index = 0; index < numSamples; index += 1) {
|
|
202
|
+
const int16 = dataView.getInt16(index * 2, true);
|
|
203
|
+
channelData[index] = int16 / 32768;
|
|
204
|
+
}
|
|
205
|
+
return buffer;
|
|
206
|
+
};
|
|
207
|
+
const stop = () => {
|
|
208
|
+
for (const source of audioSources) {
|
|
209
|
+
try {
|
|
210
|
+
source.onended = null;
|
|
211
|
+
source.stop();
|
|
212
|
+
} catch {
|
|
213
|
+
}
|
|
214
|
+
source.disconnect();
|
|
215
|
+
}
|
|
216
|
+
audioSources.clear();
|
|
217
|
+
nextScheduleTime = 0;
|
|
218
|
+
streamEndHandled = false;
|
|
219
|
+
streamingStarted = false;
|
|
220
|
+
};
|
|
221
|
+
return {
|
|
222
|
+
kind: "browser",
|
|
223
|
+
hasStreamingPlayback: true,
|
|
224
|
+
isPlaying() {
|
|
225
|
+
return audioSources.size > 0;
|
|
226
|
+
},
|
|
227
|
+
stop,
|
|
228
|
+
async playBuffered(input) {
|
|
229
|
+
if (input.audioFormat !== "pcm") {
|
|
230
|
+
onError?.(new Error(`Built-in playback only supports pcm, got ${input.audioFormat}`));
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
const context = ensureAudioContext();
|
|
234
|
+
if (!context || !input.chunks.length) {
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
stop();
|
|
238
|
+
const chunkBytes = input.chunks.map((chunk) => decodeBase64Chunk(chunk));
|
|
239
|
+
const combined = concatUint8Arrays(chunkBytes);
|
|
240
|
+
const buffer = createAudioBufferFromPCM(combined);
|
|
241
|
+
const source = context.createBufferSource();
|
|
242
|
+
if (context.state === "suspended") {
|
|
243
|
+
await context.resume();
|
|
244
|
+
}
|
|
245
|
+
source.buffer = buffer;
|
|
246
|
+
source.connect(context.destination);
|
|
247
|
+
audioSources.add(source);
|
|
248
|
+
source.onended = () => {
|
|
249
|
+
audioSources.delete(source);
|
|
250
|
+
source.disconnect();
|
|
251
|
+
onAudioEnd?.();
|
|
252
|
+
};
|
|
253
|
+
onAudioStart?.();
|
|
254
|
+
source.start(0);
|
|
255
|
+
},
|
|
256
|
+
async scheduleStreamingChunk(input) {
|
|
257
|
+
if (input.audioFormat !== "pcm") {
|
|
258
|
+
onError?.(new Error(`Built-in playback only supports pcm, got ${input.audioFormat}`));
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
const context = ensureAudioContext();
|
|
262
|
+
if (!context) {
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
if (context.state === "suspended") {
|
|
266
|
+
await context.resume();
|
|
267
|
+
}
|
|
268
|
+
const chunkBytes = decodeBase64Chunk(input.chunk);
|
|
269
|
+
const chunkBuffer = createAudioBufferFromPCM(chunkBytes);
|
|
270
|
+
const source = context.createBufferSource();
|
|
271
|
+
const leadTime = 0.05;
|
|
272
|
+
if (!nextScheduleTime) {
|
|
273
|
+
nextScheduleTime = Math.max(context.currentTime + leadTime, leadTime);
|
|
274
|
+
}
|
|
275
|
+
source.buffer = chunkBuffer;
|
|
276
|
+
source.connect(context.destination);
|
|
277
|
+
audioSources.add(source);
|
|
278
|
+
source.onended = () => {
|
|
279
|
+
audioSources.delete(source);
|
|
280
|
+
source.disconnect();
|
|
281
|
+
if (streamEndHandled && audioSources.size === 0) {
|
|
282
|
+
nextScheduleTime = 0;
|
|
283
|
+
streamEndHandled = false;
|
|
284
|
+
streamingStarted = false;
|
|
285
|
+
onAudioEnd?.();
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
if (!streamingStarted) {
|
|
289
|
+
streamingStarted = true;
|
|
290
|
+
onAudioStart?.();
|
|
291
|
+
}
|
|
292
|
+
source.start(nextScheduleTime);
|
|
293
|
+
nextScheduleTime += chunkBuffer.duration;
|
|
294
|
+
},
|
|
295
|
+
finalizeStreaming() {
|
|
296
|
+
streamEndHandled = true;
|
|
297
|
+
if (audioSources.size === 0 && streamingStarted) {
|
|
298
|
+
nextScheduleTime = 0;
|
|
299
|
+
streamEndHandled = false;
|
|
300
|
+
streamingStarted = false;
|
|
301
|
+
onAudioEnd?.();
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
close() {
|
|
305
|
+
stop();
|
|
306
|
+
if (audioContext) {
|
|
307
|
+
void audioContext.close().catch(() => {
|
|
308
|
+
});
|
|
309
|
+
audioContext = null;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
function createMiniProgramPlaybackBackend(config) {
|
|
315
|
+
const { runtime, onAudioStart, onAudioEnd, onError } = config;
|
|
316
|
+
const taro = runtime?.Taro;
|
|
317
|
+
const createInnerAudioContext = taro?.createInnerAudioContext;
|
|
318
|
+
const getFileSystemManager = taro?.getFileSystemManager;
|
|
319
|
+
const userDataPath = taro?.env?.USER_DATA_PATH;
|
|
320
|
+
let innerAudio = createInnerAudioContext?.() ?? null;
|
|
321
|
+
let currentTempFile = null;
|
|
322
|
+
let playing = false;
|
|
323
|
+
const bindEvents = () => {
|
|
324
|
+
innerAudio?.onPlay?.(() => {
|
|
325
|
+
playing = true;
|
|
326
|
+
onAudioStart?.();
|
|
327
|
+
});
|
|
328
|
+
innerAudio?.onEnded?.(() => {
|
|
329
|
+
playing = false;
|
|
330
|
+
onAudioEnd?.();
|
|
331
|
+
});
|
|
332
|
+
innerAudio?.onStop?.(() => {
|
|
333
|
+
playing = false;
|
|
334
|
+
});
|
|
335
|
+
innerAudio?.onError?.((error) => {
|
|
336
|
+
playing = false;
|
|
337
|
+
onError?.(new Error(error?.errMsg || "Mini-program audio playback failed"));
|
|
338
|
+
});
|
|
339
|
+
};
|
|
340
|
+
bindEvents();
|
|
341
|
+
const cleanupTempFile = () => {
|
|
342
|
+
if (!currentTempFile) {
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
const pathToDelete = currentTempFile;
|
|
346
|
+
currentTempFile = null;
|
|
347
|
+
getFileSystemManager?.()?.unlink?.({
|
|
348
|
+
filePath: pathToDelete,
|
|
349
|
+
fail: () => {
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
};
|
|
353
|
+
const writeTempAudioFile = async (bytes) => {
|
|
354
|
+
if (!userDataPath || !getFileSystemManager) {
|
|
355
|
+
return bytesToDataUri(bytes, "audio/wav");
|
|
356
|
+
}
|
|
357
|
+
const filePath = `${userDataPath}/amaster-tts-${Date.now()}-${Math.random().toString(16).slice(2)}.wav`;
|
|
358
|
+
const fsManager = getFileSystemManager();
|
|
359
|
+
if (!fsManager?.writeFile) {
|
|
360
|
+
return bytesToDataUri(bytes, "audio/wav");
|
|
361
|
+
}
|
|
362
|
+
await new Promise((resolve, reject) => {
|
|
363
|
+
fsManager.writeFile?.({
|
|
364
|
+
filePath,
|
|
365
|
+
data: bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength),
|
|
366
|
+
success: () => resolve(),
|
|
367
|
+
fail: (error) => reject(error)
|
|
368
|
+
});
|
|
369
|
+
});
|
|
370
|
+
cleanupTempFile();
|
|
371
|
+
currentTempFile = filePath;
|
|
372
|
+
return filePath;
|
|
373
|
+
};
|
|
374
|
+
return {
|
|
375
|
+
kind: "mini-program",
|
|
376
|
+
hasStreamingPlayback: false,
|
|
377
|
+
isPlaying() {
|
|
378
|
+
return playing;
|
|
379
|
+
},
|
|
380
|
+
stop() {
|
|
381
|
+
innerAudio?.stop?.();
|
|
382
|
+
playing = false;
|
|
383
|
+
},
|
|
384
|
+
async playBuffered(input) {
|
|
385
|
+
if (input.audioFormat !== "pcm") {
|
|
386
|
+
onError?.(new Error(`Mini-program built-in playback only supports pcm, got ${input.audioFormat}`));
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
const pcmChunks = input.chunks.map((chunk) => decodeBase64Chunk(chunk));
|
|
390
|
+
const wavBytes = pcmToWav(concatUint8Arrays(pcmChunks), input.sampleRate);
|
|
391
|
+
const source = await writeTempAudioFile(wavBytes);
|
|
392
|
+
if (!innerAudio && createInnerAudioContext) {
|
|
393
|
+
innerAudio = createInnerAudioContext();
|
|
394
|
+
bindEvents();
|
|
395
|
+
}
|
|
396
|
+
if (!innerAudio?.play) {
|
|
397
|
+
onError?.(new Error("Mini-program audio context is unavailable"));
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
innerAudio.src = source;
|
|
401
|
+
innerAudio.autoplay = false;
|
|
402
|
+
innerAudio.obeyMuteSwitch = false;
|
|
403
|
+
innerAudio.play();
|
|
404
|
+
},
|
|
405
|
+
close() {
|
|
406
|
+
innerAudio?.stop?.();
|
|
407
|
+
innerAudio?.destroy?.();
|
|
408
|
+
innerAudio = null;
|
|
409
|
+
playing = false;
|
|
410
|
+
cleanupTempFile();
|
|
411
|
+
}
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
function resolvePlaybackBackend(config) {
|
|
415
|
+
if (config.runtime?.Taro?.createInnerAudioContext && !isBrowserEnvironment()) {
|
|
416
|
+
return createMiniProgramPlaybackBackend(config);
|
|
417
|
+
}
|
|
418
|
+
if (typeof AudioContext !== "undefined") {
|
|
419
|
+
return createBrowserPlaybackBackend(config);
|
|
420
|
+
}
|
|
421
|
+
return null;
|
|
422
|
+
}
|
|
423
|
+
function createRawTTSClient(config) {
|
|
30
424
|
const {
|
|
31
425
|
getAccessToken,
|
|
32
426
|
voice = "Cherry",
|
|
33
427
|
autoPlay = true,
|
|
34
428
|
audioFormat = "pcm",
|
|
35
429
|
sampleRate = 24e3,
|
|
430
|
+
runtime,
|
|
36
431
|
onReady,
|
|
37
432
|
onAudioStart,
|
|
38
433
|
onAudioEnd,
|
|
39
434
|
onAudioChunk,
|
|
40
|
-
onError
|
|
435
|
+
onError,
|
|
436
|
+
onClose
|
|
41
437
|
} = config;
|
|
438
|
+
void getAccessToken;
|
|
42
439
|
let ws = null;
|
|
440
|
+
let connected = false;
|
|
43
441
|
let audioChunks = [];
|
|
44
|
-
let
|
|
45
|
-
let
|
|
442
|
+
let responseDone = false;
|
|
443
|
+
let autoPlayed = false;
|
|
444
|
+
let playbackSuppressed = false;
|
|
445
|
+
let playbackBackend = resolvePlaybackBackend({
|
|
446
|
+
runtime,
|
|
447
|
+
sampleRate,
|
|
448
|
+
onAudioStart,
|
|
449
|
+
onAudioEnd,
|
|
450
|
+
onError
|
|
451
|
+
});
|
|
452
|
+
function buildWsUrl() {
|
|
453
|
+
let path = TTS_PATH;
|
|
454
|
+
return path;
|
|
455
|
+
}
|
|
456
|
+
function play() {
|
|
457
|
+
playbackSuppressed = false;
|
|
458
|
+
if (!audioChunks.length || !playbackBackend) {
|
|
459
|
+
return;
|
|
460
|
+
}
|
|
461
|
+
if (!responseDone && playbackBackend.hasStreamingPlayback) {
|
|
462
|
+
return;
|
|
463
|
+
}
|
|
464
|
+
void playbackBackend.playBuffered({
|
|
465
|
+
chunks: [...audioChunks],
|
|
466
|
+
sampleRate,
|
|
467
|
+
audioFormat
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
function stop() {
|
|
471
|
+
playbackSuppressed = true;
|
|
472
|
+
playbackBackend?.stop();
|
|
473
|
+
}
|
|
474
|
+
function close() {
|
|
475
|
+
stop();
|
|
476
|
+
if (ws) {
|
|
477
|
+
ws.close();
|
|
478
|
+
ws = null;
|
|
479
|
+
}
|
|
480
|
+
connected = false;
|
|
481
|
+
playbackBackend?.close();
|
|
482
|
+
playbackBackend = resolvePlaybackBackend({
|
|
483
|
+
runtime,
|
|
484
|
+
sampleRate,
|
|
485
|
+
onAudioStart,
|
|
486
|
+
onAudioEnd,
|
|
487
|
+
onError
|
|
488
|
+
});
|
|
489
|
+
onClose?.();
|
|
490
|
+
}
|
|
46
491
|
async function connect() {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
492
|
+
if (connected && ws?.readyState === WebSocket.OPEN) {
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
await new Promise((resolve, reject) => {
|
|
496
|
+
const socket = new WebSocket(buildWsUrl());
|
|
497
|
+
ws = socket;
|
|
498
|
+
let settled = false;
|
|
499
|
+
const settleResolve = () => {
|
|
500
|
+
if (settled) {
|
|
501
|
+
return;
|
|
54
502
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
503
|
+
settled = true;
|
|
504
|
+
connected = true;
|
|
505
|
+
resolve();
|
|
58
506
|
};
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
ws.send(
|
|
63
|
-
JSON.stringify({
|
|
64
|
-
type: "session.update",
|
|
65
|
-
session: {
|
|
66
|
-
mode: "server_commit",
|
|
67
|
-
voice,
|
|
68
|
-
response_format: audioFormat,
|
|
69
|
-
sample_rate: sampleRate
|
|
70
|
-
}
|
|
71
|
-
})
|
|
72
|
-
);
|
|
507
|
+
const settleReject = (error) => {
|
|
508
|
+
if (settled) {
|
|
509
|
+
return;
|
|
73
510
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
511
|
+
settled = true;
|
|
512
|
+
connected = false;
|
|
513
|
+
reject(error);
|
|
514
|
+
};
|
|
515
|
+
socket.onmessage = async (event) => {
|
|
516
|
+
try {
|
|
517
|
+
const data = JSON.parse(event.data);
|
|
518
|
+
if (data.type === "session.created") {
|
|
519
|
+
socket.send(
|
|
520
|
+
JSON.stringify({
|
|
521
|
+
type: "session.update",
|
|
522
|
+
session: {
|
|
523
|
+
mode: "server_commit",
|
|
524
|
+
voice,
|
|
525
|
+
response_format: audioFormat,
|
|
526
|
+
sample_rate: sampleRate
|
|
527
|
+
}
|
|
528
|
+
})
|
|
529
|
+
);
|
|
86
530
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
531
|
+
if (data.type === "session.updated") {
|
|
532
|
+
onReady?.();
|
|
533
|
+
settleResolve();
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
if (data.type === "response.audio.delta") {
|
|
537
|
+
audioChunks.push(data.delta);
|
|
538
|
+
onAudioChunk?.([...audioChunks]);
|
|
539
|
+
if (autoPlay && !playbackSuppressed && playbackBackend?.hasStreamingPlayback) {
|
|
540
|
+
autoPlayed = true;
|
|
541
|
+
await playbackBackend.scheduleStreamingChunk?.({
|
|
542
|
+
chunk: data.delta,
|
|
543
|
+
sampleRate,
|
|
544
|
+
audioFormat
|
|
545
|
+
});
|
|
546
|
+
}
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
if (data.type === "response.audio.done") {
|
|
550
|
+
responseDone = true;
|
|
551
|
+
onAudioChunk?.([...audioChunks]);
|
|
552
|
+
if (playbackBackend?.hasStreamingPlayback) {
|
|
553
|
+
playbackBackend.finalizeStreaming?.();
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
556
|
+
if (autoPlay && !playbackSuppressed && !autoPlayed) {
|
|
557
|
+
autoPlayed = true;
|
|
558
|
+
await playbackBackend?.playBuffered({
|
|
559
|
+
chunks: [...audioChunks],
|
|
560
|
+
sampleRate,
|
|
561
|
+
audioFormat
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
return;
|
|
565
|
+
}
|
|
566
|
+
if (data.type === "error") {
|
|
567
|
+
const error = new Error(data.error?.message || "Unknown TTS error");
|
|
568
|
+
onError?.(error);
|
|
569
|
+
settleReject(error);
|
|
570
|
+
}
|
|
571
|
+
} catch (error) {
|
|
572
|
+
const parsedError = error instanceof Error ? error : new Error(String(error));
|
|
573
|
+
onError?.(parsedError);
|
|
574
|
+
settleReject(parsedError);
|
|
95
575
|
}
|
|
96
576
|
};
|
|
97
|
-
|
|
98
|
-
const
|
|
99
|
-
onError?.(
|
|
100
|
-
|
|
577
|
+
socket.onerror = () => {
|
|
578
|
+
const error = new Error("WebSocket connection error");
|
|
579
|
+
onError?.(error);
|
|
580
|
+
settleReject(error);
|
|
101
581
|
};
|
|
102
|
-
|
|
582
|
+
socket.onclose = () => {
|
|
583
|
+
connected = false;
|
|
103
584
|
ws = null;
|
|
104
585
|
};
|
|
105
586
|
});
|
|
106
587
|
}
|
|
107
|
-
|
|
588
|
+
function resetSynthesisState() {
|
|
589
|
+
stop();
|
|
590
|
+
audioChunks = [];
|
|
591
|
+
responseDone = false;
|
|
592
|
+
autoPlayed = false;
|
|
593
|
+
playbackSuppressed = false;
|
|
594
|
+
}
|
|
595
|
+
function ensureSocketReady() {
|
|
108
596
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
109
597
|
throw new Error("WebSocket not connected");
|
|
110
598
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
599
|
+
}
|
|
600
|
+
function appendText(text) {
|
|
601
|
+
const normalizedText = preprocessTTSContent(text);
|
|
602
|
+
const fragments = splitTextIntoFragments(normalizedText);
|
|
603
|
+
if (!fragments.length) {
|
|
604
|
+
return;
|
|
605
|
+
}
|
|
606
|
+
ensureSocketReady();
|
|
607
|
+
const socket = ws;
|
|
608
|
+
for (const fragment of fragments) {
|
|
609
|
+
socket?.send(
|
|
120
610
|
JSON.stringify({
|
|
121
|
-
type: "input_text_buffer.
|
|
611
|
+
type: "input_text_buffer.append",
|
|
612
|
+
text: fragment
|
|
122
613
|
})
|
|
123
614
|
);
|
|
124
|
-
}, 100);
|
|
125
|
-
}
|
|
126
|
-
function playAudio() {
|
|
127
|
-
let chunks = audioChunks;
|
|
128
|
-
if (typeof window === "undefined") return;
|
|
129
|
-
try {
|
|
130
|
-
if (!audioContext) {
|
|
131
|
-
audioContext = new AudioContext();
|
|
132
|
-
}
|
|
133
|
-
onAudioStart?.();
|
|
134
|
-
let totalBytes = 0;
|
|
135
|
-
const allBytes = [];
|
|
136
|
-
for (const chunk of chunks) {
|
|
137
|
-
const binaryString = atob(chunk);
|
|
138
|
-
const bytes = new Uint8Array(binaryString.length);
|
|
139
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
140
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
141
|
-
}
|
|
142
|
-
allBytes.push(bytes);
|
|
143
|
-
totalBytes += bytes.length;
|
|
144
|
-
}
|
|
145
|
-
const combined = new Uint8Array(totalBytes);
|
|
146
|
-
let offset = 0;
|
|
147
|
-
for (const bytes of allBytes) {
|
|
148
|
-
combined.set(bytes, offset);
|
|
149
|
-
offset += bytes.length;
|
|
150
|
-
}
|
|
151
|
-
const numSamples = combined.length / 2;
|
|
152
|
-
const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);
|
|
153
|
-
const channelData = audioBuffer.getChannelData(0);
|
|
154
|
-
const dataView = new DataView(combined.buffer);
|
|
155
|
-
for (let i = 0; i < numSamples; i++) {
|
|
156
|
-
const int16 = dataView.getInt16(i * 2, true);
|
|
157
|
-
channelData[i] = int16 / 32768;
|
|
158
|
-
}
|
|
159
|
-
const source = audioContext.createBufferSource();
|
|
160
|
-
source.buffer = audioBuffer;
|
|
161
|
-
source.connect(audioContext.destination);
|
|
162
|
-
source.onended = () => onAudioEnd?.();
|
|
163
|
-
source.start(0);
|
|
164
|
-
audioSource = source;
|
|
165
|
-
} catch (err) {
|
|
166
|
-
onError?.(err);
|
|
167
615
|
}
|
|
168
616
|
}
|
|
169
|
-
function
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
}
|
|
617
|
+
function commitText() {
|
|
618
|
+
ensureSocketReady();
|
|
619
|
+
const socket = ws;
|
|
620
|
+
socket?.send(
|
|
621
|
+
JSON.stringify({
|
|
622
|
+
type: "input_text_buffer.commit"
|
|
623
|
+
})
|
|
624
|
+
);
|
|
178
625
|
}
|
|
179
|
-
function
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
626
|
+
function startStream() {
|
|
627
|
+
resetSynthesisState();
|
|
628
|
+
}
|
|
629
|
+
async function speak(text) {
|
|
630
|
+
startStream();
|
|
631
|
+
appendText(text);
|
|
632
|
+
commitText();
|
|
185
633
|
}
|
|
186
634
|
return {
|
|
187
635
|
connect,
|
|
188
636
|
speak,
|
|
637
|
+
startStream,
|
|
638
|
+
appendText,
|
|
639
|
+
commitText,
|
|
640
|
+
play,
|
|
641
|
+
stop,
|
|
189
642
|
close,
|
|
190
|
-
|
|
191
|
-
|
|
643
|
+
isConnected() {
|
|
644
|
+
return connected && ws?.readyState === WebSocket.OPEN;
|
|
645
|
+
},
|
|
646
|
+
hasAudio() {
|
|
647
|
+
return audioChunks.length > 0;
|
|
648
|
+
},
|
|
649
|
+
isResponseDone() {
|
|
650
|
+
return responseDone;
|
|
651
|
+
},
|
|
652
|
+
isPlaying() {
|
|
653
|
+
return playbackBackend?.isPlaying() ?? false;
|
|
654
|
+
},
|
|
655
|
+
isStreamingPlayback() {
|
|
656
|
+
return playbackBackend?.hasStreamingPlayback ?? false;
|
|
657
|
+
}
|
|
192
658
|
};
|
|
193
659
|
}
|
|
194
|
-
|
|
195
|
-
return
|
|
196
|
-
|
|
660
|
+
function defaultSnapshot(voice) {
|
|
661
|
+
return {
|
|
662
|
+
status: "idle",
|
|
663
|
+
activeId: null,
|
|
664
|
+
error: null,
|
|
665
|
+
requestId: 0,
|
|
666
|
+
text: null,
|
|
667
|
+
voice,
|
|
668
|
+
fallbackMode: "none"
|
|
197
669
|
};
|
|
198
|
-
}
|
|
670
|
+
}
|
|
671
|
+
function canUseSystemSpeech() {
|
|
672
|
+
return isBrowserEnvironment() && "speechSynthesis" in window && "SpeechSynthesisUtterance" in window;
|
|
673
|
+
}
|
|
674
|
+
function systemSpeak(text, options) {
|
|
675
|
+
if (!canUseSystemSpeech()) {
|
|
676
|
+
throw new Error("SpeechSynthesis is not supported");
|
|
677
|
+
}
|
|
678
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
679
|
+
utterance.onstart = () => {
|
|
680
|
+
options.onStart?.();
|
|
681
|
+
};
|
|
682
|
+
utterance.onerror = (event) => {
|
|
683
|
+
options.onError?.(new Error(event.error || "Speech synthesis failed"));
|
|
684
|
+
};
|
|
685
|
+
utterance.onend = () => {
|
|
686
|
+
options.onEnd?.();
|
|
687
|
+
};
|
|
688
|
+
window.speechSynthesis.cancel();
|
|
689
|
+
window.speechSynthesis.speak(utterance);
|
|
690
|
+
}
|
|
691
|
+
function stopSystemSpeech() {
|
|
692
|
+
if (canUseSystemSpeech()) {
|
|
693
|
+
window.speechSynthesis.cancel();
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
function createTTSSpeakController(createClient, options = {}) {
|
|
697
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
698
|
+
const persistedVoice = options.voiceStorageKey && options.storage ? options.storage.getItem(options.voiceStorageKey) || null : null;
|
|
699
|
+
let client = null;
|
|
700
|
+
let snapshot = defaultSnapshot(persistedVoice);
|
|
701
|
+
let streamActive = false;
|
|
702
|
+
let streamId = null;
|
|
703
|
+
const emit = () => {
|
|
704
|
+
for (const listener of listeners) {
|
|
705
|
+
listener(snapshot);
|
|
706
|
+
}
|
|
707
|
+
};
|
|
708
|
+
const setSnapshot = (next) => {
|
|
709
|
+
snapshot = {
|
|
710
|
+
...snapshot,
|
|
711
|
+
...next
|
|
712
|
+
};
|
|
713
|
+
emit();
|
|
714
|
+
};
|
|
715
|
+
const persistVoice = (voice) => {
|
|
716
|
+
if (!options.voiceStorageKey || !options.storage) {
|
|
717
|
+
return;
|
|
718
|
+
}
|
|
719
|
+
if (!voice) {
|
|
720
|
+
options.storage.removeItem?.(options.voiceStorageKey);
|
|
721
|
+
return;
|
|
722
|
+
}
|
|
723
|
+
options.storage.setItem(options.voiceStorageKey, voice);
|
|
724
|
+
};
|
|
725
|
+
const reset = (requestId, preserved) => {
|
|
726
|
+
snapshot = {
|
|
727
|
+
status: "idle",
|
|
728
|
+
activeId: null,
|
|
729
|
+
error: null,
|
|
730
|
+
requestId,
|
|
731
|
+
text: preserved?.text ?? null,
|
|
732
|
+
voice: preserved?.voice ?? snapshot.voice,
|
|
733
|
+
fallbackMode: "none"
|
|
734
|
+
};
|
|
735
|
+
emit();
|
|
736
|
+
};
|
|
737
|
+
const stop = (stopOptions) => {
|
|
738
|
+
const preserveClient = stopOptions?.preserveClient ?? true;
|
|
739
|
+
const nextRequestId = snapshot.requestId + 1;
|
|
740
|
+
streamActive = false;
|
|
741
|
+
streamId = null;
|
|
742
|
+
client?.stop();
|
|
743
|
+
stopSystemSpeech();
|
|
744
|
+
if (!preserveClient || !client || !snapshot.text) {
|
|
745
|
+
client?.close();
|
|
746
|
+
client = null;
|
|
747
|
+
reset(nextRequestId, { voice: snapshot.voice });
|
|
748
|
+
return;
|
|
749
|
+
}
|
|
750
|
+
reset(nextRequestId, {
|
|
751
|
+
text: snapshot.text,
|
|
752
|
+
voice: snapshot.voice
|
|
753
|
+
});
|
|
754
|
+
};
|
|
755
|
+
const createManagedClient = (input, requestId, content) => {
|
|
756
|
+
const nextVoice = input.voice ?? snapshot.voice ?? void 0;
|
|
757
|
+
const nextClient = createClient({
|
|
758
|
+
voice: nextVoice,
|
|
759
|
+
autoPlay: true,
|
|
760
|
+
audioFormat: input.audioFormat,
|
|
761
|
+
sampleRate: input.sampleRate,
|
|
762
|
+
runtime: options.runtime,
|
|
763
|
+
onReady: () => {
|
|
764
|
+
if (client !== nextClient) {
|
|
765
|
+
return;
|
|
766
|
+
}
|
|
767
|
+
setSnapshot({
|
|
768
|
+
status: "connecting",
|
|
769
|
+
error: null
|
|
770
|
+
});
|
|
771
|
+
},
|
|
772
|
+
onAudioStart: () => {
|
|
773
|
+
if (client !== nextClient) {
|
|
774
|
+
return;
|
|
775
|
+
}
|
|
776
|
+
setSnapshot({
|
|
777
|
+
status: "speaking",
|
|
778
|
+
error: null,
|
|
779
|
+
fallbackMode: "none"
|
|
780
|
+
});
|
|
781
|
+
},
|
|
782
|
+
onAudioEnd: () => {
|
|
783
|
+
if (client !== nextClient) {
|
|
784
|
+
return;
|
|
785
|
+
}
|
|
786
|
+
setSnapshot({
|
|
787
|
+
status: streamActive ? "connecting" : "idle",
|
|
788
|
+
activeId: streamActive ? streamId : null,
|
|
789
|
+
error: null,
|
|
790
|
+
fallbackMode: "none"
|
|
791
|
+
});
|
|
792
|
+
},
|
|
793
|
+
onError: (error) => {
|
|
794
|
+
if (client !== nextClient) {
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
streamActive = false;
|
|
798
|
+
streamId = null;
|
|
799
|
+
setSnapshot({
|
|
800
|
+
status: "error",
|
|
801
|
+
error: error.message,
|
|
802
|
+
fallbackMode: "none"
|
|
803
|
+
});
|
|
804
|
+
},
|
|
805
|
+
onClose: () => {
|
|
806
|
+
if (client !== nextClient) {
|
|
807
|
+
return;
|
|
808
|
+
}
|
|
809
|
+
client = null;
|
|
810
|
+
}
|
|
811
|
+
});
|
|
812
|
+
client = nextClient;
|
|
813
|
+
setSnapshot({
|
|
814
|
+
status: "connecting",
|
|
815
|
+
activeId: input.id ?? null,
|
|
816
|
+
error: null,
|
|
817
|
+
requestId,
|
|
818
|
+
text: content,
|
|
819
|
+
voice: nextVoice ?? null,
|
|
820
|
+
fallbackMode: "none"
|
|
821
|
+
});
|
|
822
|
+
return {
|
|
823
|
+
nextClient,
|
|
824
|
+
nextVoice
|
|
825
|
+
};
|
|
826
|
+
};
|
|
827
|
+
const ensureStreamClient = async (streamOptions) => {
|
|
828
|
+
const nextRequestId = snapshot.requestId + 1;
|
|
829
|
+
const nextVoice = streamOptions.voice ?? snapshot.voice ?? void 0;
|
|
830
|
+
if (client && snapshot.voice === (nextVoice ?? null)) {
|
|
831
|
+
streamActive = true;
|
|
832
|
+
streamId = streamOptions.id ?? null;
|
|
833
|
+
setSnapshot({
|
|
834
|
+
status: client.isPlaying() ? "speaking" : "connecting",
|
|
835
|
+
activeId: streamId,
|
|
836
|
+
error: null,
|
|
837
|
+
requestId: nextRequestId,
|
|
838
|
+
voice: nextVoice ?? null,
|
|
839
|
+
fallbackMode: "none"
|
|
840
|
+
});
|
|
841
|
+
client.startStream();
|
|
842
|
+
return;
|
|
843
|
+
}
|
|
844
|
+
stop({ preserveClient: false });
|
|
845
|
+
const { nextClient } = createManagedClient(
|
|
846
|
+
{
|
|
847
|
+
id: streamOptions.id,
|
|
848
|
+
voice: streamOptions.voice,
|
|
849
|
+
audioFormat: streamOptions.audioFormat,
|
|
850
|
+
sampleRate: streamOptions.sampleRate
|
|
851
|
+
},
|
|
852
|
+
nextRequestId,
|
|
853
|
+
""
|
|
854
|
+
);
|
|
855
|
+
streamActive = true;
|
|
856
|
+
streamId = streamOptions.id ?? null;
|
|
857
|
+
await nextClient.connect();
|
|
858
|
+
nextClient.startStream();
|
|
859
|
+
};
|
|
860
|
+
const speak = async (speakOptions) => {
|
|
861
|
+
const content = preprocessTTSContent(speakOptions.text);
|
|
862
|
+
if (!content) {
|
|
863
|
+
stop({ preserveClient: false });
|
|
864
|
+
return;
|
|
865
|
+
}
|
|
866
|
+
const nextRequestId = snapshot.requestId + 1;
|
|
867
|
+
const nextVoice = speakOptions.voice ?? snapshot.voice ?? void 0;
|
|
868
|
+
if (client && snapshot.text === content && snapshot.voice === (nextVoice ?? null) && client.hasAudio()) {
|
|
869
|
+
setSnapshot({
|
|
870
|
+
status: "speaking",
|
|
871
|
+
activeId: speakOptions.id ?? null,
|
|
872
|
+
error: null,
|
|
873
|
+
requestId: nextRequestId,
|
|
874
|
+
text: content,
|
|
875
|
+
voice: nextVoice ?? null,
|
|
876
|
+
fallbackMode: "none"
|
|
877
|
+
});
|
|
878
|
+
client.play();
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
881
|
+
stop({ preserveClient: false });
|
|
882
|
+
streamActive = false;
|
|
883
|
+
streamId = null;
|
|
884
|
+
const { nextClient } = createManagedClient(
|
|
885
|
+
{
|
|
886
|
+
id: speakOptions.id,
|
|
887
|
+
voice: speakOptions.voice,
|
|
888
|
+
audioFormat: speakOptions.audioFormat,
|
|
889
|
+
sampleRate: speakOptions.sampleRate
|
|
890
|
+
},
|
|
891
|
+
nextRequestId,
|
|
892
|
+
content
|
|
893
|
+
);
|
|
894
|
+
try {
|
|
895
|
+
await nextClient.connect();
|
|
896
|
+
await nextClient.speak(content);
|
|
897
|
+
} catch (error) {
|
|
898
|
+
if (client !== nextClient) {
|
|
899
|
+
return;
|
|
900
|
+
}
|
|
901
|
+
if (options.fallbackToSystemSpeech !== false && canUseSystemSpeech()) {
|
|
902
|
+
client?.close();
|
|
903
|
+
client = null;
|
|
904
|
+
systemSpeak(content, {
|
|
905
|
+
onStart: () => {
|
|
906
|
+
setSnapshot({
|
|
907
|
+
status: "speaking",
|
|
908
|
+
error: null,
|
|
909
|
+
activeId: speakOptions.id ?? null,
|
|
910
|
+
requestId: nextRequestId,
|
|
911
|
+
text: content,
|
|
912
|
+
voice: nextVoice ?? null,
|
|
913
|
+
fallbackMode: "system"
|
|
914
|
+
});
|
|
915
|
+
},
|
|
916
|
+
onEnd: () => {
|
|
917
|
+
setSnapshot({
|
|
918
|
+
status: "idle",
|
|
919
|
+
activeId: null,
|
|
920
|
+
error: null,
|
|
921
|
+
fallbackMode: "none"
|
|
922
|
+
});
|
|
923
|
+
},
|
|
924
|
+
onError: (fallbackError) => {
|
|
925
|
+
setSnapshot({
|
|
926
|
+
status: "error",
|
|
927
|
+
error: fallbackError.message,
|
|
928
|
+
fallbackMode: "none"
|
|
929
|
+
});
|
|
930
|
+
}
|
|
931
|
+
});
|
|
932
|
+
return;
|
|
933
|
+
}
|
|
934
|
+
client?.close();
|
|
935
|
+
client = null;
|
|
936
|
+
setSnapshot({
|
|
937
|
+
status: "error",
|
|
938
|
+
error: error instanceof Error ? error.message : String(error),
|
|
939
|
+
fallbackMode: "none"
|
|
940
|
+
});
|
|
941
|
+
}
|
|
942
|
+
};
|
|
943
|
+
return {
|
|
944
|
+
getSnapshot() {
|
|
945
|
+
return snapshot;
|
|
946
|
+
},
|
|
947
|
+
subscribe(listener) {
|
|
948
|
+
listeners.add(listener);
|
|
949
|
+
return () => {
|
|
950
|
+
listeners.delete(listener);
|
|
951
|
+
};
|
|
952
|
+
},
|
|
953
|
+
speak,
|
|
954
|
+
async startStream(streamOptions) {
|
|
955
|
+
await ensureStreamClient(streamOptions);
|
|
956
|
+
},
|
|
957
|
+
async appendStreamText(streamOptions) {
|
|
958
|
+
const content = preprocessTTSContent(streamOptions.text);
|
|
959
|
+
if (!content) {
|
|
960
|
+
return;
|
|
961
|
+
}
|
|
962
|
+
if (!streamActive || streamId !== (streamOptions.id ?? null) || !client) {
|
|
963
|
+
await ensureStreamClient(streamOptions);
|
|
964
|
+
}
|
|
965
|
+
client?.appendText(content);
|
|
966
|
+
setSnapshot({
|
|
967
|
+
status: snapshot.status === "speaking" ? "speaking" : "connecting",
|
|
968
|
+
activeId: streamOptions.id ?? null,
|
|
969
|
+
error: null,
|
|
970
|
+
text: `${snapshot.text || ""}${content}`,
|
|
971
|
+
fallbackMode: "none"
|
|
972
|
+
});
|
|
973
|
+
},
|
|
974
|
+
commitStream() {
|
|
975
|
+
if (!client || !streamActive) {
|
|
976
|
+
return;
|
|
977
|
+
}
|
|
978
|
+
client.commitText();
|
|
979
|
+
setSnapshot({
|
|
980
|
+
status: snapshot.status === "speaking" ? "speaking" : "connecting",
|
|
981
|
+
activeId: streamId,
|
|
982
|
+
error: null,
|
|
983
|
+
fallbackMode: "none"
|
|
984
|
+
});
|
|
985
|
+
},
|
|
986
|
+
finishStream() {
|
|
987
|
+
streamActive = false;
|
|
988
|
+
streamId = null;
|
|
989
|
+
setSnapshot({
|
|
990
|
+
status: client?.isPlaying() ? "speaking" : "idle",
|
|
991
|
+
activeId: client?.isPlaying() ? snapshot.activeId : null,
|
|
992
|
+
error: null,
|
|
993
|
+
fallbackMode: "none"
|
|
994
|
+
});
|
|
995
|
+
},
|
|
996
|
+
stop,
|
|
997
|
+
release() {
|
|
998
|
+
stop({ preserveClient: false });
|
|
999
|
+
},
|
|
1000
|
+
async toggle(toggleOptions) {
|
|
1001
|
+
if (this.isActive(toggleOptions.id)) {
|
|
1002
|
+
stop();
|
|
1003
|
+
return;
|
|
1004
|
+
}
|
|
1005
|
+
await speak(toggleOptions);
|
|
1006
|
+
},
|
|
1007
|
+
isActive(id) {
|
|
1008
|
+
if (!id) {
|
|
1009
|
+
return snapshot.status === "connecting" || snapshot.status === "speaking";
|
|
1010
|
+
}
|
|
1011
|
+
return snapshot.activeId === id && (snapshot.status === "connecting" || snapshot.status === "speaking");
|
|
1012
|
+
},
|
|
1013
|
+
setVoice(voice) {
|
|
1014
|
+
setSnapshot({ voice });
|
|
1015
|
+
persistVoice(voice);
|
|
1016
|
+
},
|
|
1017
|
+
getVoice() {
|
|
1018
|
+
return snapshot.voice;
|
|
1019
|
+
}
|
|
1020
|
+
};
|
|
1021
|
+
}
|
|
1022
|
+
function createTTSClient(authConfig) {
|
|
1023
|
+
return (config) => createRawTTSClient({ ...authConfig, ...config });
|
|
1024
|
+
}
|
|
199
1025
|
// Annotate the CommonJS export names for ESM import in node:
|
|
200
1026
|
0 && (module.exports = {
|
|
201
|
-
createTTSClient
|
|
1027
|
+
createTTSClient,
|
|
1028
|
+
createTTSSpeakController,
|
|
1029
|
+
preprocessTTSContent,
|
|
1030
|
+
splitTextIntoFragments
|
|
202
1031
|
});
|
|
203
1032
|
//# sourceMappingURL=index.cjs.map
|