@lee-zg/melange 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/chunk-3RM45M64.js +1637 -0
  2. package/dist/chunk-3RM45M64.js.map +1 -0
  3. package/dist/{chunk-ALBD5XC5.js → chunk-GXFWPL5M.js} +4 -8
  4. package/dist/chunk-GXFWPL5M.js.map +1 -0
  5. package/dist/{chunk-Q73NOVWX.cjs → chunk-KUJARU3D.cjs} +7 -24
  6. package/dist/chunk-KUJARU3D.cjs.map +1 -0
  7. package/dist/{chunk-2PXWQDZC.js → chunk-MBBJMHTS.js} +3 -5
  8. package/dist/chunk-MBBJMHTS.js.map +1 -0
  9. package/dist/{chunk-ZT6HVG4G.cjs → chunk-UYJUSNDI.cjs} +4 -8
  10. package/dist/chunk-UYJUSNDI.cjs.map +1 -0
  11. package/dist/{chunk-Q7XG6YN6.cjs → chunk-V5THPEB2.cjs} +3 -5
  12. package/dist/chunk-V5THPEB2.cjs.map +1 -0
  13. package/dist/{chunk-352XNR3C.js → chunk-VGZMISJD.js} +7 -24
  14. package/dist/chunk-VGZMISJD.js.map +1 -0
  15. package/dist/chunk-YZVCK6VZ.cjs +1646 -0
  16. package/dist/chunk-YZVCK6VZ.cjs.map +1 -0
  17. package/dist/core/index.cjs +23 -23
  18. package/dist/core/index.js +1 -1
  19. package/dist/fp/index.cjs +45 -45
  20. package/dist/fp/index.js +1 -1
  21. package/dist/index.cjs +147 -147
  22. package/dist/index.js +4 -4
  23. package/dist/plugins/index.cjs +9 -9
  24. package/dist/plugins/index.d.cts +287 -133
  25. package/dist/plugins/index.d.ts +287 -133
  26. package/dist/plugins/index.js +1 -1
  27. package/dist/utils/index.cjs +73 -73
  28. package/dist/utils/index.js +1 -1
  29. package/package.json +5 -2
  30. package/dist/chunk-2PXWQDZC.js.map +0 -1
  31. package/dist/chunk-352XNR3C.js.map +0 -1
  32. package/dist/chunk-ALBD5XC5.js.map +0 -1
  33. package/dist/chunk-O7K662J5.cjs +0 -842
  34. package/dist/chunk-O7K662J5.cjs.map +0 -1
  35. package/dist/chunk-Q73NOVWX.cjs.map +0 -1
  36. package/dist/chunk-Q7XG6YN6.cjs.map +0 -1
  37. package/dist/chunk-YGMBCZJQ.js +0 -833
  38. package/dist/chunk-YGMBCZJQ.js.map +0 -1
  39. package/dist/chunk-ZT6HVG4G.cjs.map +0 -1
@@ -0,0 +1,1637 @@
1
+ import { __name } from './chunk-7QVYU63E.js';
2
+
3
+ // src/plugins/speech/synthesis.ts
4
+ var SynthesisAudioUtils = {
5
+ /**
6
+ * 创建 AudioContext
7
+ */
8
+ createAudioContext() {
9
+ const AudioCtor = typeof window !== "undefined" ? window.AudioContext || window.webkitAudioContext : null;
10
+ if (!AudioCtor) {
11
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 AudioContext");
12
+ }
13
+ return new AudioCtor();
14
+ },
15
+ /**
16
+ * ArrayBuffer 转 Base64
17
+ */
18
+ arrayBufferToBase64(buffer) {
19
+ let binary = "";
20
+ const bytes = new Uint8Array(buffer);
21
+ for (let i = 0; i < bytes.byteLength; i++) {
22
+ binary += String.fromCharCode(bytes[i] ?? 0);
23
+ }
24
+ return typeof btoa !== "undefined" ? btoa(binary) : Buffer.from(binary).toString("base64");
25
+ },
26
+ /**
27
+ * Base64 转 ArrayBuffer
28
+ */
29
+ base64ToArrayBuffer(base64) {
30
+ const binary = typeof atob !== "undefined" ? atob(base64) : Buffer.from(base64, "base64").toString("binary");
31
+ const bytes = new Uint8Array(binary.length);
32
+ for (let i = 0; i < binary.length; i++) {
33
+ bytes[i] = binary.charCodeAt(i);
34
+ }
35
+ return bytes.buffer;
36
+ },
37
+ /**
38
+ * 计算音频时长(粗略估算)
39
+ */
40
+ estimateDuration(byteLength, format) {
41
+ const bitRates = {
42
+ mp3: 128e3,
43
+ // 128 kbps
44
+ wav: 256e3,
45
+ // 16bit 16kHz mono
46
+ ogg: 96e3,
47
+ // 96 kbps
48
+ pcm: 256e3
49
+ // 16bit 16kHz mono
50
+ };
51
+ return Math.ceil(byteLength * 8 / bitRates[format] * 1e3);
52
+ }
53
+ };
54
+ var BaseSynthesisStrategy = class {
55
+ constructor(config) {
56
+ this.config = config;
57
+ }
58
+ static {
59
+ __name(this, "BaseSynthesisStrategy");
60
+ }
61
+ listeners = {
62
+ start: [],
63
+ end: [],
64
+ pause: [],
65
+ resume: [],
66
+ boundary: [],
67
+ mark: [],
68
+ error: [],
69
+ state: []
70
+ };
71
+ _status = "IDLE" /* IDLE */;
72
+ /**
73
+ * 获取当前状态
74
+ */
75
+ get status() {
76
+ return this._status;
77
+ }
78
+ /**
79
+ * 设置状态
80
+ */
81
+ setStatus(status) {
82
+ if (this._status !== status) {
83
+ this._status = status;
84
+ this.emit("state", status);
85
+ }
86
+ }
87
+ /**
88
+ * 添加事件监听
89
+ */
90
+ on(event, fn) {
91
+ this.listeners[event].push(fn);
92
+ }
93
+ /**
94
+ * 移除事件监听
95
+ */
96
+ off(event, fn) {
97
+ const listeners = this.listeners[event];
98
+ const index = listeners.indexOf(fn);
99
+ if (index > -1) {
100
+ listeners.splice(index, 1);
101
+ }
102
+ }
103
+ /**
104
+ * 触发事件
105
+ */
106
+ emit(event, data) {
107
+ const listeners = this.listeners[event];
108
+ listeners.forEach((fn) => {
109
+ try {
110
+ fn(data);
111
+ } catch (e) {
112
+ console.error(`[Synthesis] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF (${event}):`, e);
113
+ }
114
+ });
115
+ }
116
+ /**
117
+ * 销毁资源
118
+ */
119
+ dispose() {
120
+ this.cancel();
121
+ Object.keys(this.listeners).forEach((key) => {
122
+ this.listeners[key] = [];
123
+ });
124
+ }
125
+ };
126
+ var NativeSynthesisStrategy = class extends BaseSynthesisStrategy {
127
+ static {
128
+ __name(this, "NativeSynthesisStrategy");
129
+ }
130
+ synthesis = null;
131
+ // Internal state tracking for current utterance (write-only, used for GC reference)
132
+ currentUtterance = null;
133
+ voicesLoaded = false;
134
+ voicesPromise = null;
135
+ constructor(config) {
136
+ super(config);
137
+ if (this.isAvailable()) {
138
+ this.synthesis = window.speechSynthesis;
139
+ }
140
+ }
141
+ isAvailable() {
142
+ return typeof window !== "undefined" && "speechSynthesis" in window;
143
+ }
144
+ async getVoices() {
145
+ if (!this.synthesis) return [];
146
+ if (this.voicesPromise) {
147
+ return this.voicesPromise;
148
+ }
149
+ this.voicesPromise = new Promise((resolve) => {
150
+ const loadVoices = /* @__PURE__ */ __name(() => {
151
+ const voices = this.synthesis.getVoices();
152
+ if (voices.length > 0) {
153
+ this.voicesLoaded = true;
154
+ resolve(
155
+ voices.map((voice) => ({
156
+ id: voice.voiceURI,
157
+ name: voice.name,
158
+ lang: voice.lang,
159
+ localService: voice.localService,
160
+ default: voice.default,
161
+ provider: "browser"
162
+ }))
163
+ );
164
+ }
165
+ }, "loadVoices");
166
+ loadVoices();
167
+ if (!this.voicesLoaded) {
168
+ this.synthesis.addEventListener("voiceschanged", loadVoices, { once: true });
169
+ setTimeout(() => {
170
+ if (!this.voicesLoaded) {
171
+ resolve([]);
172
+ }
173
+ }, 3e3);
174
+ }
175
+ });
176
+ return this.voicesPromise;
177
+ }
178
+ async speak(text, config) {
179
+ if (!this.synthesis) {
180
+ throw { code: "NOT_SUPPORTED", message: "\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u8BED\u97F3\u5408\u6210" };
181
+ }
182
+ this.cancel();
183
+ this.setStatus("LOADING" /* LOADING */);
184
+ return new Promise((resolve, reject) => {
185
+ const mergedConfig = { ...this.config, ...config };
186
+ const utterance = new SpeechSynthesisUtterance(text);
187
+ this.currentUtterance = utterance;
188
+ if (mergedConfig.lang) utterance.lang = mergedConfig.lang;
189
+ if (mergedConfig.volume !== void 0) utterance.volume = mergedConfig.volume;
190
+ if (mergedConfig.rate !== void 0) utterance.rate = mergedConfig.rate;
191
+ if (mergedConfig.pitch !== void 0) utterance.pitch = mergedConfig.pitch;
192
+ if (mergedConfig.voice) {
193
+ const voices = this.synthesis.getVoices();
194
+ const voiceConfig = mergedConfig.voice;
195
+ const targetVoice = typeof voiceConfig === "string" ? voices.find((v) => v.name === voiceConfig || v.voiceURI === voiceConfig) : voices.find((v) => v.voiceURI === voiceConfig.id);
196
+ if (targetVoice) {
197
+ utterance.voice = targetVoice;
198
+ }
199
+ }
200
+ utterance.onstart = () => {
201
+ this.setStatus("SPEAKING" /* SPEAKING */);
202
+ this.emit("start");
203
+ };
204
+ utterance.onend = () => {
205
+ this.setStatus("IDLE" /* IDLE */);
206
+ this.currentUtterance = null;
207
+ this.emit("end");
208
+ resolve();
209
+ };
210
+ utterance.onpause = () => {
211
+ this.setStatus("PAUSED" /* PAUSED */);
212
+ this.emit("pause");
213
+ };
214
+ utterance.onresume = () => {
215
+ this.setStatus("SPEAKING" /* SPEAKING */);
216
+ this.emit("resume");
217
+ };
218
+ utterance.onboundary = (event) => {
219
+ this.emit("boundary", {
220
+ charIndex: event.charIndex,
221
+ charLength: event.charLength,
222
+ elapsedTime: event.elapsedTime,
223
+ name: event.name
224
+ });
225
+ };
226
+ utterance.onmark = (event) => {
227
+ this.emit("mark", { name: event.name });
228
+ };
229
+ utterance.onerror = (event) => {
230
+ this.setStatus("IDLE" /* IDLE */);
231
+ this.currentUtterance = null;
232
+ const error = {
233
+ code: "UNKNOWN",
234
+ message: `\u8BED\u97F3\u5408\u6210\u9519\u8BEF: ${event.error}`,
235
+ originalError: event
236
+ };
237
+ this.emit("error", error);
238
+ reject(error);
239
+ };
240
+ this.synthesis.speak(utterance);
241
+ });
242
+ }
243
+ pause() {
244
+ if (this.synthesis && this.isSpeaking()) {
245
+ this.synthesis.pause();
246
+ }
247
+ }
248
+ resume() {
249
+ if (this.synthesis && this.isPaused()) {
250
+ this.synthesis.resume();
251
+ }
252
+ }
253
+ cancel() {
254
+ if (this.synthesis) {
255
+ this.synthesis.cancel();
256
+ this.currentUtterance = null;
257
+ this.setStatus("IDLE" /* IDLE */);
258
+ }
259
+ }
260
+ isSpeaking() {
261
+ return this.currentUtterance !== null && (this.synthesis?.speaking ?? false);
262
+ }
263
+ isPaused() {
264
+ return this.synthesis?.paused ?? false;
265
+ }
266
+ };
267
+ var CloudSynthesisStrategy = class extends BaseSynthesisStrategy {
268
+ static {
269
+ __name(this, "CloudSynthesisStrategy");
270
+ }
271
+ adapter;
272
+ audioContext = null;
273
+ sourceNode = null;
274
+ gainNode = null;
275
+ startTime = 0;
276
+ pauseTime = 0;
277
+ audioBuffer = null;
278
+ constructor(config) {
279
+ super(config);
280
+ if (!config.cloudAdapter) {
281
+ throw new Error("[CloudStrategy] \u9700\u8981\u63D0\u4F9B cloudAdapter");
282
+ }
283
+ this.adapter = config.cloudAdapter;
284
+ }
285
+ isAvailable() {
286
+ return typeof window !== "undefined" && "AudioContext" in window;
287
+ }
288
+ async getVoices() {
289
+ if (!this.adapter.getVoices) return [];
290
+ try {
291
+ const cloudVoices = await this.adapter.getVoices();
292
+ return cloudVoices.map((v) => ({
293
+ id: v.id,
294
+ name: v.name,
295
+ lang: v.lang,
296
+ localService: false,
297
+ default: false,
298
+ provider: "custom"
299
+ }));
300
+ } catch {
301
+ return [];
302
+ }
303
+ }
304
+ async speak(text, config) {
305
+ if (!text.trim()) {
306
+ throw { code: "INVALID_TEXT", message: "\u6587\u672C\u5185\u5BB9\u4E3A\u7A7A" };
307
+ }
308
+ this.cancel();
309
+ const mergedConfig = { ...this.config, ...config };
310
+ this.setStatus("LOADING" /* LOADING */);
311
+ try {
312
+ this.emit("start");
313
+ const result = await this.adapter.synthesize(text, mergedConfig);
314
+ if (!this.audioContext) {
315
+ this.audioContext = SynthesisAudioUtils.createAudioContext();
316
+ }
317
+ if (this.audioContext.state === "suspended") {
318
+ await this.audioContext.resume();
319
+ }
320
+ this.audioBuffer = await this.audioContext.decodeAudioData(result.audioData.slice(0));
321
+ await this.playBuffer();
322
+ } catch (e) {
323
+ const error = e;
324
+ this.setStatus("IDLE" /* IDLE */);
325
+ const synthError = {
326
+ code: "ADAPTER_ERROR",
327
+ message: error.message || "\u5408\u6210\u5931\u8D25",
328
+ originalError: e
329
+ };
330
+ this.emit("error", synthError);
331
+ throw synthError;
332
+ }
333
+ }
334
+ async playBuffer() {
335
+ if (!this.audioContext || !this.audioBuffer) return;
336
+ return new Promise((resolve, reject) => {
337
+ try {
338
+ this.sourceNode = this.audioContext.createBufferSource();
339
+ this.sourceNode.buffer = this.audioBuffer;
340
+ this.gainNode = this.audioContext.createGain();
341
+ this.gainNode.gain.value = this.config.volume ?? 1;
342
+ this.sourceNode.connect(this.gainNode);
343
+ this.gainNode.connect(this.audioContext.destination);
344
+ this.sourceNode.onended = () => {
345
+ if (this._status === "SPEAKING" /* SPEAKING */) {
346
+ this.setStatus("IDLE" /* IDLE */);
347
+ this.emit("end");
348
+ resolve();
349
+ }
350
+ };
351
+ const offset = this.pauseTime > 0 ? this.pauseTime : 0;
352
+ this.sourceNode.start(0, offset);
353
+ this.startTime = this.audioContext.currentTime - offset;
354
+ this.pauseTime = 0;
355
+ this.setStatus("SPEAKING" /* SPEAKING */);
356
+ } catch (e) {
357
+ reject(e);
358
+ }
359
+ });
360
+ }
361
+ pause() {
362
+ if (this._status === "SPEAKING" /* SPEAKING */ && this.audioContext && this.sourceNode) {
363
+ this.pauseTime = this.audioContext.currentTime - this.startTime;
364
+ this.sourceNode.stop();
365
+ this.sourceNode.disconnect();
366
+ this.sourceNode = null;
367
+ this.setStatus("PAUSED" /* PAUSED */);
368
+ this.emit("pause");
369
+ }
370
+ }
371
+ resume() {
372
+ if (this._status === "PAUSED" /* PAUSED */ && this.audioBuffer) {
373
+ this.emit("resume");
374
+ void this.playBuffer();
375
+ }
376
+ }
377
+ cancel() {
378
+ if (this.sourceNode) {
379
+ try {
380
+ this.sourceNode.stop();
381
+ this.sourceNode.disconnect();
382
+ } catch {
383
+ }
384
+ this.sourceNode = null;
385
+ }
386
+ this.audioBuffer = null;
387
+ this.pauseTime = 0;
388
+ this.startTime = 0;
389
+ this.setStatus("IDLE" /* IDLE */);
390
+ }
391
+ isSpeaking() {
392
+ return this._status === "SPEAKING" /* SPEAKING */;
393
+ }
394
+ isPaused() {
395
+ return this._status === "PAUSED" /* PAUSED */;
396
+ }
397
+ dispose() {
398
+ super.dispose();
399
+ this.cancel();
400
+ if (this.audioContext) {
401
+ void this.audioContext.close();
402
+ this.audioContext = null;
403
+ }
404
+ }
405
+ };
406
+ var SpeechSynthesizerImpl = class {
407
+ static {
408
+ __name(this, "SpeechSynthesizerImpl");
409
+ }
410
+ strategy = null;
411
+ config = {};
412
+ _currentProvider = "browser";
413
+ _status = "idle";
414
+ eventHandlers = /* @__PURE__ */ new Map();
415
+ customProviders = /* @__PURE__ */ new Map();
416
+ get currentProvider() {
417
+ return this._currentProvider;
418
+ }
419
+ get status() {
420
+ return this._status;
421
+ }
422
+ /**
423
+ * 获取当前合成状态
424
+ */
425
+ get synthesisStatus() {
426
+ return this.strategy?.status ?? "IDLE" /* IDLE */;
427
+ }
428
+ /**
429
+ * 初始化语音合成器
430
+ */
431
+ initialize(config) {
432
+ this._status = "loading";
433
+ this.config = {
434
+ lang: "zh-CN",
435
+ volume: 1,
436
+ rate: 1,
437
+ pitch: 1,
438
+ preferredProvider: "browser",
439
+ autoFallback: true,
440
+ fallbackProviders: ["azure", "google", "aws"],
441
+ mode: "auto",
442
+ audioFormat: "mp3",
443
+ ...config
444
+ };
445
+ try {
446
+ this.initializeStrategy();
447
+ this._status = "ready";
448
+ return Promise.resolve();
449
+ } catch (error) {
450
+ this._status = "error";
451
+ return Promise.reject(error);
452
+ }
453
+ }
454
+ /**
455
+ * 初始化合成策略
456
+ */
457
+ initializeStrategy() {
458
+ const advConfig = this.config;
459
+ const mode = advConfig.mode ?? "auto";
460
+ const hasNative = typeof window !== "undefined" && "speechSynthesis" in window;
461
+ if (mode === "native" || mode === "auto" && hasNative && !advConfig.cloudAdapter) {
462
+ const nativeStrategy = new NativeSynthesisStrategy(advConfig);
463
+ if (!nativeStrategy.isAvailable()) {
464
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u539F\u751F\u8BED\u97F3\u5408\u6210");
465
+ }
466
+ this.strategy = nativeStrategy;
467
+ this._currentProvider = "browser";
468
+ } else if (mode === "cloud" || mode === "auto" && advConfig.cloudAdapter) {
469
+ if (!advConfig.cloudAdapter) {
470
+ throw new Error("\u4E91\u7AEF\u6A21\u5F0F\u9700\u8981\u63D0\u4F9B cloudAdapter");
471
+ }
472
+ this.strategy = new CloudSynthesisStrategy(advConfig);
473
+ this._currentProvider = "custom";
474
+ } else {
475
+ throw new Error("\u6CA1\u6709\u53EF\u7528\u7684\u5408\u6210\u7B56\u7565");
476
+ }
477
+ this.forwardStrategyEvents();
478
+ }
479
+ /**
480
+ * 转发策略事件
481
+ */
482
+ forwardStrategyEvents() {
483
+ if (!this.strategy) return;
484
+ this.strategy.on("start", () => this.emit("start", { type: "start" }));
485
+ this.strategy.on("end", () => this.emit("end", { type: "end" }));
486
+ this.strategy.on("pause", () => this.emit("pause", { type: "pause" }));
487
+ this.strategy.on("resume", () => this.emit("resume", { type: "resume" }));
488
+ this.strategy.on("boundary", (data) => {
489
+ const event = { type: "boundary" };
490
+ if (data.charIndex !== void 0) event.charIndex = data.charIndex;
491
+ if (data.charLength !== void 0) event.charLength = data.charLength;
492
+ if (data.elapsedTime !== void 0) event.elapsedTime = data.elapsedTime;
493
+ if (data.name !== void 0) event.name = data.name;
494
+ this.emit("boundary", event);
495
+ });
496
+ this.strategy.on("mark", (data) => {
497
+ const event = { type: "mark" };
498
+ if (data.name !== void 0) event.name = data.name;
499
+ this.emit("mark", event);
500
+ });
501
+ this.strategy.on("error", (err) => {
502
+ const errorEvent = {
503
+ type: "error",
504
+ error: {
505
+ code: err.code,
506
+ message: err.message
507
+ }
508
+ };
509
+ if (err.originalError) {
510
+ errorEvent.error.originalError = err.originalError;
511
+ }
512
+ this.emit("error", errorEvent);
513
+ });
514
+ }
515
+ /**
516
+ * 获取可用语音列表
517
+ */
518
+ async getVoices() {
519
+ if (!this.strategy) {
520
+ throw new Error("\u8BED\u97F3\u5408\u6210\u5668\u672A\u521D\u59CB\u5316");
521
+ }
522
+ return this.strategy.getVoices();
523
+ }
524
+ /**
525
+ * 朗读文本
526
+ */
527
+ async speak(text, config) {
528
+ if (this._status !== "ready") {
529
+ throw new Error("\u8BED\u97F3\u5408\u6210\u5668\u672A\u5C31\u7EEA");
530
+ }
531
+ return this.strategy?.speak(text, config);
532
+ }
533
+ /**
534
+ * 暂停朗读
535
+ */
536
+ pause() {
537
+ this.strategy?.pause();
538
+ }
539
+ /**
540
+ * 继续朗读
541
+ */
542
+ resume() {
543
+ this.strategy?.resume();
544
+ }
545
+ /**
546
+ * 取消朗读
547
+ */
548
+ cancel() {
549
+ this.strategy?.cancel();
550
+ }
551
+ /**
552
+ * 是否正在朗读
553
+ */
554
+ isSpeaking() {
555
+ return this.strategy?.isSpeaking() ?? false;
556
+ }
557
+ /**
558
+ * 是否已暂停
559
+ */
560
+ isPaused() {
561
+ return this.strategy?.isPaused() ?? false;
562
+ }
563
+ /**
564
+ * 添加事件监听
565
+ */
566
+ on(event, handler) {
567
+ if (!this.eventHandlers.has(event)) {
568
+ this.eventHandlers.set(event, /* @__PURE__ */ new Set());
569
+ }
570
+ this.eventHandlers.get(event).add(handler);
571
+ }
572
+ /**
573
+ * 移除事件监听
574
+ */
575
+ off(event, handler) {
576
+ this.eventHandlers.get(event)?.delete(handler);
577
+ }
578
+ /**
579
+ * 触发事件
580
+ */
581
+ emit(type, event) {
582
+ this.eventHandlers.get(type)?.forEach((handler) => {
583
+ try {
584
+ handler(event);
585
+ } catch (e) {
586
+ console.error("[SpeechSynthesizer] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF:", e);
587
+ }
588
+ });
589
+ }
590
+ /**
591
+ * 销毁实例
592
+ */
593
+ dispose() {
594
+ this.strategy?.dispose();
595
+ this.strategy = null;
596
+ this.eventHandlers.clear();
597
+ this._status = "idle";
598
+ }
599
+ /**
600
+ * 注册自定义提供商
601
+ */
602
+ registerProvider(type, provider) {
603
+ this.customProviders.set(type, provider);
604
+ }
605
+ /**
606
+ * 使用云端适配器
607
+ */
608
+ useCloudAdapter(adapter) {
609
+ this.config.cloudAdapter = adapter;
610
+ this.config.mode = "cloud";
611
+ }
612
+ };
613
+ async function createSpeechSynthesizer(config) {
614
+ const synthesizer = new SpeechSynthesizerImpl();
615
+ await synthesizer.initialize(config);
616
+ return synthesizer;
617
+ }
618
+ __name(createSpeechSynthesizer, "createSpeechSynthesizer");
619
+ function isSpeechSynthesisSupported() {
620
+ return typeof window !== "undefined" && "speechSynthesis" in window;
621
+ }
622
+ __name(isSpeechSynthesisSupported, "isSpeechSynthesisSupported");
623
+ async function speak(text, config) {
624
+ const synthesizer = await createSpeechSynthesizer(config);
625
+ try {
626
+ await synthesizer.speak(text, config);
627
+ } finally {
628
+ synthesizer.dispose();
629
+ }
630
+ }
631
+ __name(speak, "speak");
632
+
633
+ // src/plugins/speech/recognition.ts
634
+ var AudioUtils = {
635
+ /**
636
+ * 重采样音频数据
637
+ * @param data 原始音频数据
638
+ * @param inputRate 输入采样率
639
+ * @param outputRate 输出采样率
640
+ * @returns 重采样后的数据
641
+ */
642
+ resample(data, inputRate, outputRate) {
643
+ if (inputRate === outputRate) return data;
644
+ const compression = inputRate / outputRate;
645
+ const length = Math.ceil(data.length / compression);
646
+ const result = new Float32Array(length);
647
+ for (let i = 0; i < length; i++) {
648
+ result[i] = data[Math.floor(i * compression)] ?? 0;
649
+ }
650
+ return result;
651
+ },
652
+ /**
653
+ * Float32 转 Int16 PCM
654
+ * @param input Float32 数据
655
+ * @returns Int16 PCM 数据
656
+ */
657
+ floatTo16BitPCM(input) {
658
+ const output = new Int16Array(input.length);
659
+ for (let i = 0; i < input.length; i++) {
660
+ const s = Math.max(-1, Math.min(1, input[i] ?? 0));
661
+ output[i] = s < 0 ? s * 32768 : s * 32767;
662
+ }
663
+ return output;
664
+ },
665
+ /**
666
+ * 计算音量 RMS
667
+ * @param data 音频数据
668
+ * @returns RMS 值
669
+ */
670
+ calculateRMS(data) {
671
+ let sum = 0;
672
+ for (let i = 0; i < data.length; i++) {
673
+ const sample = data[i] ?? 0;
674
+ sum += sample * sample;
675
+ }
676
+ return Math.sqrt(sum / data.length);
677
+ },
678
+ /**
679
+ * 合并 PCM 片段
680
+ * @param buffers PCM 缓冲区数组
681
+ * @param totalLength 总采样数
682
+ * @returns 合并后的 Int16 数组
683
+ */
684
+ mergeBuffers(buffers, totalLength) {
685
+ const result = new Int16Array(totalLength);
686
+ let offset = 0;
687
+ for (const buffer of buffers) {
688
+ const view = new Int16Array(buffer);
689
+ result.set(view, offset);
690
+ offset += view.length;
691
+ }
692
+ return result;
693
+ },
694
+ /**
695
+ * PCM 转 WAV 封装
696
+ * @param samples PCM 采样数据
697
+ * @param sampleRate 采样率
698
+ * @param channels 声道数
699
+ * @returns WAV 格式 ArrayBuffer
700
+ */
701
+ encodeWAV(samples, sampleRate = 16e3, channels = 1) {
702
+ const buffer = new ArrayBuffer(44 + samples.length * 2);
703
+ const view = new DataView(buffer);
704
+ const writeString = /* @__PURE__ */ __name((offset, str) => {
705
+ for (let i = 0; i < str.length; i++) {
706
+ view.setUint8(offset + i, str.charCodeAt(i));
707
+ }
708
+ }, "writeString");
709
+ writeString(0, "RIFF");
710
+ view.setUint32(4, 36 + samples.length * 2, true);
711
+ writeString(8, "WAVE");
712
+ writeString(12, "fmt ");
713
+ view.setUint32(16, 16, true);
714
+ view.setUint16(20, 1, true);
715
+ view.setUint16(22, channels, true);
716
+ view.setUint32(24, sampleRate, true);
717
+ view.setUint32(28, sampleRate * channels * 2, true);
718
+ view.setUint16(32, channels * 2, true);
719
+ view.setUint16(34, 16, true);
720
+ writeString(36, "data");
721
+ view.setUint32(40, samples.length * 2, true);
722
+ const dataOffset = 44;
723
+ for (let i = 0; i < samples.length; i++) {
724
+ view.setInt16(dataOffset + i * 2, samples[i] ?? 0, true);
725
+ }
726
+ return buffer;
727
+ },
728
+ /**
729
+ * ArrayBuffer 转 Base64
730
+ * @param buffer ArrayBuffer
731
+ * @returns Base64 字符串
732
+ */
733
+ arrayBufferToBase64(buffer) {
734
+ let binary = "";
735
+ const bytes = new Uint8Array(buffer);
736
+ const len = bytes.byteLength;
737
+ for (let i = 0; i < len; i++) {
738
+ binary += String.fromCharCode(bytes[i] ?? 0);
739
+ }
740
+ return typeof btoa !== "undefined" ? btoa(binary) : Buffer.from(binary).toString("base64");
741
+ }
742
+ };
743
+ var WORKLET_CODE = `
744
+ class SpeechProcessor extends AudioWorkletProcessor {
745
+ constructor() {
746
+ super();
747
+ this.buffer = [];
748
+ this.targetRate = 16000;
749
+ this.currentRate = 44100;
750
+ this.silenceFrames = 0;
751
+ this.maxSilenceFrames = 0;
752
+ this.vadThreshold = 0.02;
753
+ this.isRecording = false;
754
+ this.port.onmessage = this.handleMessage.bind(this);
755
+ }
756
+
757
+ static get parameterDescriptors() {
758
+ return [];
759
+ }
760
+
761
+ process(inputs) {
762
+ const input = inputs[0];
763
+ if (!input || !input.length || !this.isRecording) return true;
764
+ const channelData = input[0];
765
+
766
+ // VAD \u68C0\u6D4B
767
+ let sum = 0;
768
+ for (let i = 0; i < channelData.length; i++) {
769
+ sum += channelData[i] * channelData[i];
770
+ }
771
+ const rms = Math.sqrt(sum / channelData.length);
772
+
773
+ if (rms < this.vadThreshold) {
774
+ this.silenceFrames++;
775
+ if (this.maxSilenceFrames > 0 && this.silenceFrames > this.maxSilenceFrames) {
776
+ this.port.postMessage({ type: 'VAD_TIMEOUT' });
777
+ this.silenceFrames = 0;
778
+ }
779
+ } else {
780
+ this.silenceFrames = 0;
781
+ }
782
+
783
+ // \u91CD\u91C7\u6837
784
+ if (this.targetRate < this.currentRate) {
785
+ const compression = this.currentRate / this.targetRate;
786
+ for (let i = 0; i < channelData.length; i += compression) {
787
+ this.buffer.push(channelData[Math.floor(i)]);
788
+ }
789
+ } else {
790
+ this.buffer.push(...channelData);
791
+ }
792
+
793
+ // \u5B9A\u671F\u5237\u65B0 (~128ms)
794
+ if (this.buffer.length >= 2048) {
795
+ this.flush();
796
+ }
797
+ return true;
798
+ }
799
+
800
+ flush() {
801
+ if (this.buffer.length === 0) return;
802
+ const pcmData = new Int16Array(this.buffer.length);
803
+ for (let i = 0; i < this.buffer.length; i++) {
804
+ let s = Math.max(-1, Math.min(1, this.buffer[i]));
805
+ s = s < 0 ? s * 0x8000 : s * 0x7FFF;
806
+ pcmData[i] = s;
807
+ }
808
+ this.port.postMessage({ type: 'AUDIO_DATA', payload: pcmData.buffer }, [pcmData.buffer]);
809
+ this.buffer = [];
810
+ }
811
+
812
+ handleMessage(event) {
813
+ const { type, payload } = event.data;
814
+ if (type === 'CONFIG') {
815
+ this.targetRate = payload.targetRate || 16000;
816
+ this.currentRate = payload.currentRate || 44100;
817
+ this.vadThreshold = payload.vadThreshold || 0.02;
818
+ const secondsPerBlock = 128 / this.currentRate;
819
+ this.maxSilenceFrames = (payload.vadDuration / 1000) / secondsPerBlock;
820
+ }
821
+ if (type === 'SET_RECORDING') {
822
+ this.isRecording = payload;
823
+ if (!payload) this.flush();
824
+ }
825
+ }
826
+ }
827
+ registerProcessor('speech-processor', SpeechProcessor);
828
+ `;
829
+ var BaseRecognitionStrategy = class {
830
+ constructor(config) {
831
+ this.config = config;
832
+ }
833
+ static {
834
+ __name(this, "BaseRecognitionStrategy");
835
+ }
836
+ listeners = {
837
+ result: [],
838
+ error: [],
839
+ state: [],
840
+ start: [],
841
+ end: [],
842
+ soundstart: [],
843
+ soundend: [],
844
+ speechstart: [],
845
+ speechend: [],
846
+ audiostart: [],
847
+ audioend: []
848
+ };
849
+ _status = "IDLE" /* IDLE */;
850
+ /**
851
+ * 获取当前状态
852
+ */
853
+ get status() {
854
+ return this._status;
855
+ }
856
+ /**
857
+ * 设置状态
858
+ */
859
+ setStatus(status) {
860
+ if (this._status !== status) {
861
+ this._status = status;
862
+ this.emit("state", status);
863
+ }
864
+ }
865
+ /**
866
+ * 添加事件监听
867
+ */
868
+ on(event, fn) {
869
+ this.listeners[event].push(fn);
870
+ }
871
+ /**
872
+ * 移除事件监听
873
+ */
874
+ off(event, fn) {
875
+ const listeners = this.listeners[event];
876
+ const index = listeners.indexOf(fn);
877
+ if (index > -1) {
878
+ listeners.splice(index, 1);
879
+ }
880
+ }
881
+ /**
882
+ * 触发事件
883
+ */
884
+ emit(event, data) {
885
+ const listeners = this.listeners[event];
886
+ listeners.forEach((fn) => {
887
+ try {
888
+ fn(data);
889
+ } catch (e) {
890
+ console.error(`[Recognition] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF (${event}):`, e);
891
+ }
892
+ });
893
+ }
894
+ /**
895
+ * 销毁资源
896
+ */
897
+ dispose() {
898
+ this.abort();
899
+ Object.keys(this.listeners).forEach((key) => {
900
+ this.listeners[key] = [];
901
+ });
902
+ }
903
+ };
904
+ var NativeRecognitionStrategy = class extends BaseRecognitionStrategy {
905
+ static {
906
+ __name(this, "NativeRecognitionStrategy");
907
+ }
908
+ recognition = null;
909
+ SpeechRecognitionClass = null;
910
+ constructor(config) {
911
+ super(config);
912
+ const srClass = typeof window !== "undefined" ? window.SpeechRecognition ?? window.webkitSpeechRecognition : void 0;
913
+ this.SpeechRecognitionClass = srClass ?? null;
914
+ }
915
+ /**
916
+ * 检查是否可用
917
+ */
918
+ isAvailable() {
919
+ return this.SpeechRecognitionClass !== null;
920
+ }
921
+ async start() {
922
+ if (!this.SpeechRecognitionClass) {
923
+ throw { code: "NOT_SUPPORTED", message: "\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u8BED\u97F3\u8BC6\u522B" };
924
+ }
925
+ if (this._status !== "IDLE" /* IDLE */) {
926
+ return;
927
+ }
928
+ this.setStatus("CONNECTING" /* CONNECTING */);
929
+ return new Promise((resolve, reject) => {
930
+ this.recognition = new this.SpeechRecognitionClass();
931
+ this.recognition.lang = this.config.lang ?? "zh-CN";
932
+ this.recognition.continuous = this.config.continuous ?? false;
933
+ this.recognition.interimResults = this.config.interimResults ?? true;
934
+ this.recognition.maxAlternatives = this.config.maxAlternatives ?? 1;
935
+ this.recognition.onstart = () => {
936
+ this.setStatus("RECORDING" /* RECORDING */);
937
+ this.emit("start");
938
+ this.emit("audiostart");
939
+ resolve();
940
+ };
941
+ this.recognition.onend = () => {
942
+ this.setStatus("IDLE" /* IDLE */);
943
+ this.emit("audioend");
944
+ this.emit("end");
945
+ };
946
+ this.recognition.onresult = (e) => {
947
+ const results = e.results[e.resultIndex];
948
+ if (results?.[0]) {
949
+ this.emit("result", {
950
+ transcript: results[0].transcript,
951
+ isFinal: results.isFinal,
952
+ confidence: results[0].confidence,
953
+ original: e
954
+ });
955
+ }
956
+ };
957
+ this.recognition.onerror = (e) => {
958
+ const errorMap = {
959
+ "not-allowed": "NOT_ALLOWED",
960
+ "no-speech": "NO_SPEECH",
961
+ network: "NETWORK"
962
+ };
963
+ const error = {
964
+ code: errorMap[e.error] ?? "UNKNOWN",
965
+ message: this.getErrorMessage(e.error),
966
+ originalError: e
967
+ };
968
+ this.setStatus("IDLE" /* IDLE */);
969
+ this.emit("error", error);
970
+ if (e.error === "not-allowed" || e.error === "audio-capture") {
971
+ reject(error);
972
+ }
973
+ };
974
+ this.recognition.onsoundstart = () => this.emit("soundstart");
975
+ this.recognition.onsoundend = () => this.emit("soundend");
976
+ this.recognition.onspeechstart = () => this.emit("speechstart");
977
+ this.recognition.onspeechend = () => this.emit("speechend");
978
+ try {
979
+ this.recognition.start();
980
+ } catch (e) {
981
+ this.setStatus("IDLE" /* IDLE */);
982
+ reject({ code: "UNKNOWN", message: "\u542F\u52A8\u8BC6\u522B\u5931\u8D25", originalError: e });
983
+ }
984
+ });
985
+ }
986
+ stop() {
987
+ if (this.recognition && this._status === "RECORDING" /* RECORDING */) {
988
+ this.recognition.stop();
989
+ }
990
+ }
991
+ abort() {
992
+ if (this.recognition) {
993
+ this.recognition.abort();
994
+ this.setStatus("IDLE" /* IDLE */);
995
+ }
996
+ }
997
+ isListening() {
998
+ return this._status === "RECORDING" /* RECORDING */;
999
+ }
1000
+ getErrorMessage(errorCode) {
1001
+ const messages = {
1002
+ "no-speech": "\u672A\u68C0\u6D4B\u5230\u8BED\u97F3\u8F93\u5165",
1003
+ "audio-capture": "\u65E0\u6CD5\u8BBF\u95EE\u9EA6\u514B\u98CE",
1004
+ "not-allowed": "\u9EA6\u514B\u98CE\u6743\u9650\u88AB\u62D2\u7EDD",
1005
+ network: "\u7F51\u7EDC\u9519\u8BEF",
1006
+ aborted: "\u8BC6\u522B\u88AB\u4E2D\u6B62",
1007
+ "language-not-supported": "\u4E0D\u652F\u6301\u7684\u8BED\u8A00",
1008
+ "service-not-allowed": "\u8BED\u97F3\u670D\u52A1\u4E0D\u53EF\u7528"
1009
+ };
1010
+ return messages[errorCode] ?? `\u8BED\u97F3\u8BC6\u522B\u9519\u8BEF: ${errorCode}`;
1011
+ }
1012
+ };
1013
+ var CloudRecognitionStrategy = class extends BaseRecognitionStrategy {
1014
+ static {
1015
+ __name(this, "CloudRecognitionStrategy");
1016
+ }
1017
+ audioContext = null;
1018
+ mediaStream = null;
1019
+ workletNode = null;
1020
+ scriptProcessor = null;
1021
+ socket = null;
1022
+ adapter;
1023
+ transport;
1024
+ // HTTP 模式缓冲
1025
+ pcmChunks = [];
1026
+ totalPCMLength = 0;
1027
+ // 页面可见性监听器
1028
+ visibilityHandler = null;
1029
+ // 弱网消息队列
1030
+ msgQueue = [];
1031
+ MSG_QUEUE_MAX = 50;
1032
+ // ScriptProcessor 降级参数
1033
+ spSilenceCount = 0;
1034
+ spMaxSilence = 0;
1035
+ isRecordingFlag = false;
1036
+ // 重连参数
1037
+ reconnectAttempts = 0;
1038
+ reconnectTimer = null;
1039
+ // Worklet 加载标记
1040
+ workletLoaded = false;
1041
+ constructor(config) {
1042
+ super(config);
1043
+ if (!config.cloudAdapter) {
1044
+ throw new Error("[CloudStrategy] \u9700\u8981\u63D0\u4F9B cloudAdapter");
1045
+ }
1046
+ this.adapter = config.cloudAdapter;
1047
+ this.transport = config.transport ?? "websocket";
1048
+ }
1049
+ async start() {
1050
+ if (this._status !== "IDLE" /* IDLE */) {
1051
+ return;
1052
+ }
1053
+ this.setStatus("CONNECTING" /* CONNECTING */);
1054
+ this.pcmChunks = [];
1055
+ this.totalPCMLength = 0;
1056
+ this.reconnectAttempts = 0;
1057
+ try {
1058
+ if (this.transport === "websocket") {
1059
+ if (!this.adapter.getConnectUrl) {
1060
+ throw new Error("\u9002\u914D\u5668\u7F3A\u5C11 getConnectUrl \u65B9\u6CD5");
1061
+ }
1062
+ const url = await Promise.resolve(this.adapter.getConnectUrl());
1063
+ await this.initWebSocket(url);
1064
+ }
1065
+ const audioConfig = this.config.audioConfig ?? {};
1066
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
1067
+ audio: {
1068
+ echoCancellation: audioConfig.echoCancellation ?? true,
1069
+ noiseSuppression: audioConfig.noiseSuppression ?? true,
1070
+ autoGainControl: audioConfig.autoGainControl ?? true
1071
+ }
1072
+ });
1073
+ await this.initAudioContext();
1074
+ if (this.transport === "http") {
1075
+ this.setStatus("RECORDING" /* RECORDING */);
1076
+ }
1077
+ this.setupVisibilityListener();
1078
+ this.emit("start");
1079
+ this.emit("audiostart");
1080
+ } catch (err) {
1081
+ const error = err;
1082
+ this.emit("error", {
1083
+ code: "NOT_ALLOWED",
1084
+ message: error.message || "\u542F\u52A8\u5931\u8D25",
1085
+ originalError: err
1086
+ });
1087
+ this.cleanup();
1088
+ throw err;
1089
+ }
1090
+ }
1091
+ /**
1092
+ * 初始化 WebSocket 连接
1093
+ */
1094
+ initWebSocket(url) {
1095
+ return new Promise((resolve, reject) => {
1096
+ this.socket = new WebSocket(url);
1097
+ this.socket.binaryType = "arraybuffer";
1098
+ const timeout = setTimeout(() => {
1099
+ reject(new Error("WebSocket \u8FDE\u63A5\u8D85\u65F6"));
1100
+ }, 1e4);
1101
+ this.socket.onopen = () => {
1102
+ clearTimeout(timeout);
1103
+ this.setStatus("RECORDING" /* RECORDING */);
1104
+ this.reconnectAttempts = 0;
1105
+ if (this.adapter.getHandshakeParams) {
1106
+ const params = this.adapter.getHandshakeParams();
1107
+ if (params) {
1108
+ const msg = typeof params === "string" ? params : JSON.stringify(params);
1109
+ this.socket?.send(msg);
1110
+ }
1111
+ }
1112
+ resolve();
1113
+ };
1114
+ this.socket.onerror = (e) => {
1115
+ clearTimeout(timeout);
1116
+ reject(e);
1117
+ };
1118
+ this.socket.onmessage = (e) => {
1119
+ try {
1120
+ const data = e.data;
1121
+ const raw = typeof data === "string" ? data : new TextDecoder().decode(data);
1122
+ const res = this.adapter.parseResult(JSON.parse(raw));
1123
+ if (res) {
1124
+ this.emit("result", res);
1125
+ }
1126
+ } catch {
1127
+ }
1128
+ };
1129
+ this.socket.onclose = () => {
1130
+ if (this._status === "RECORDING" /* RECORDING */) {
1131
+ this.handleReconnect();
1132
+ }
1133
+ };
1134
+ });
1135
+ }
1136
+ /**
1137
+ * 处理重连逻辑
1138
+ */
1139
+ handleReconnect() {
1140
+ const maxAttempts = this.config.maxReconnectAttempts ?? 3;
1141
+ const interval = this.config.reconnectInterval ?? 2e3;
1142
+ if (this.config.autoReconnect && this.reconnectAttempts < maxAttempts) {
1143
+ this.reconnectAttempts++;
1144
+ console.log(`[CloudStrategy] \u5C1D\u8BD5\u91CD\u8FDE (${this.reconnectAttempts}/${maxAttempts})`);
1145
+ this.reconnectTimer = setTimeout(() => {
1146
+ void (async () => {
1147
+ try {
1148
+ if (this.adapter.getConnectUrl) {
1149
+ const url = await Promise.resolve(this.adapter.getConnectUrl());
1150
+ await this.initWebSocket(url);
1151
+ }
1152
+ } catch {
1153
+ this.handleReconnect();
1154
+ }
1155
+ })();
1156
+ }, interval);
1157
+ } else {
1158
+ this.emit("error", {
1159
+ code: "NETWORK",
1160
+ message: "\u8FDE\u63A5\u5DF2\u65AD\u5F00"
1161
+ });
1162
+ void this.stop();
1163
+ }
1164
+ }
1165
+ /**
1166
+ * 初始化音频上下文
1167
+ */
1168
+ async initAudioContext() {
1169
+ const AudioCtor = typeof window !== "undefined" && (window.AudioContext || window.webkitAudioContext);
1170
+ if (!AudioCtor) {
1171
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 AudioContext");
1172
+ }
1173
+ this.audioContext = new AudioCtor();
1174
+ if (this.audioContext.state === "suspended") {
1175
+ await this.audioContext.resume();
1176
+ }
1177
+ const source = this.audioContext.createMediaStreamSource(this.mediaStream);
1178
+ if (this.audioContext.audioWorklet && !this.workletLoaded) {
1179
+ try {
1180
+ const blob = new Blob([WORKLET_CODE], { type: "application/javascript" });
1181
+ await this.audioContext.audioWorklet.addModule(URL.createObjectURL(blob));
1182
+ this.workletLoaded = true;
1183
+ this.setupWorklet(source);
1184
+ console.log("[CloudStrategy] \u4F7F\u7528 AudioWorklet \u6A21\u5F0F");
1185
+ return;
1186
+ } catch (e) {
1187
+ console.warn("[CloudStrategy] AudioWorklet \u52A0\u8F7D\u5931\u8D25\uFF0C\u964D\u7EA7\u5230 ScriptProcessor:", e);
1188
+ }
1189
+ }
1190
+ this.setupScriptProcessor(source);
1191
+ console.log("[CloudStrategy] \u4F7F\u7528 ScriptProcessor \u964D\u7EA7\u6A21\u5F0F");
1192
+ }
1193
+ /**
1194
+ * 设置 AudioWorklet
1195
+ */
1196
+ setupWorklet(source) {
1197
+ this.workletNode = new AudioWorkletNode(this.audioContext, "speech-processor");
1198
+ const audioConfig = this.config.audioConfig ?? {};
1199
+ this.workletNode.port.postMessage({
1200
+ type: "CONFIG",
1201
+ payload: {
1202
+ currentRate: this.audioContext.sampleRate,
1203
+ targetRate: audioConfig.sampleRate ?? 16e3,
1204
+ vadThreshold: audioConfig.vadThreshold ?? 0.02,
1205
+ vadDuration: audioConfig.vadDuration ?? 3e3
1206
+ }
1207
+ });
1208
+ this.workletNode.port.onmessage = (e) => {
1209
+ const { type, payload } = e.data;
1210
+ if (type === "AUDIO_DATA" && payload) {
1211
+ this.handlePCM(payload);
1212
+ }
1213
+ if (type === "VAD_TIMEOUT") {
1214
+ this.emit("speechend");
1215
+ void this.stop();
1216
+ }
1217
+ };
1218
+ this.workletNode.port.postMessage({ type: "SET_RECORDING", payload: true });
1219
+ source.connect(this.workletNode);
1220
+ this.workletNode.connect(this.audioContext.destination);
1221
+ this.emit("speechstart");
1222
+ }
1223
+ /**
1224
+ * 设置 ScriptProcessor (降级方案)
1225
+ */
1226
+ setupScriptProcessor(source) {
1227
+ const bufferSize = 4096;
1228
+ this.scriptProcessor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
1229
+ const audioConfig = this.config.audioConfig ?? {};
1230
+ const targetRate = audioConfig.sampleRate ?? 16e3;
1231
+ const currentRate = this.audioContext.sampleRate;
1232
+ const secondsPerBuf = bufferSize / currentRate;
1233
+ this.spMaxSilence = (audioConfig.vadDuration ?? 3e3) / 1e3 / secondsPerBuf;
1234
+ this.spSilenceCount = 0;
1235
+ this.isRecordingFlag = true;
1236
+ this.scriptProcessor.onaudioprocess = (e) => {
1237
+ if (!this.isRecordingFlag) return;
1238
+ const input = e.inputBuffer.getChannelData(0);
1239
+ const rms = AudioUtils.calculateRMS(input);
1240
+ if (rms < (audioConfig.vadThreshold ?? 0.02)) {
1241
+ this.spSilenceCount++;
1242
+ if (this.spSilenceCount > this.spMaxSilence) {
1243
+ this.emit("speechend");
1244
+ void this.stop();
1245
+ return;
1246
+ }
1247
+ } else {
1248
+ this.spSilenceCount = 0;
1249
+ }
1250
+ const resampled = AudioUtils.resample(input, currentRate, targetRate);
1251
+ const pcm = AudioUtils.floatTo16BitPCM(resampled);
1252
+ this.handlePCM(pcm.buffer.slice(0));
1253
+ };
1254
+ source.connect(this.scriptProcessor);
1255
+ this.scriptProcessor.connect(this.audioContext.destination);
1256
+ this.emit("speechstart");
1257
+ }
1258
+ /**
1259
+ * 处理 PCM 数据
1260
+ */
1261
+ handlePCM(buffer) {
1262
+ if (this.transport === "websocket") {
1263
+ let payload = buffer;
1264
+ if (this.adapter.transformAudioData) {
1265
+ payload = this.adapter.transformAudioData(buffer);
1266
+ }
1267
+ if (this.socket && this.socket.readyState === WebSocket.OPEN) {
1268
+ this.flushMsgQueue();
1269
+ this.socket.send(payload);
1270
+ } else if (this.socket && this.socket.readyState === WebSocket.CONNECTING) {
1271
+ if (payload instanceof ArrayBuffer && this.msgQueue.length < this.MSG_QUEUE_MAX) {
1272
+ this.msgQueue.push(payload);
1273
+ }
1274
+ }
1275
+ } else {
1276
+ const chunk = buffer.slice(0);
1277
+ this.pcmChunks.push(chunk);
1278
+ this.totalPCMLength += chunk.byteLength / 2;
1279
+ }
1280
+ }
1281
+ /**
1282
+ * 刷新消息队列
1283
+ */
1284
+ flushMsgQueue() {
1285
+ while (this.msgQueue.length > 0 && this.socket?.readyState === WebSocket.OPEN) {
1286
+ const data = this.msgQueue.shift();
1287
+ if (data) {
1288
+ this.socket.send(data);
1289
+ }
1290
+ }
1291
+ }
1292
+ /**
1293
+ * 设置页面可见性监听
1294
+ */
1295
+ setupVisibilityListener() {
1296
+ if (typeof document === "undefined") return;
1297
+ this.visibilityHandler = () => {
1298
+ if (document.hidden && this._status === "RECORDING" /* RECORDING */) {
1299
+ this.emit("error", {
1300
+ code: "NOT_ALLOWED",
1301
+ message: "\u9875\u9762\u5DF2\u8FDB\u5165\u540E\u53F0\uFF0C\u5F55\u97F3\u5DF2\u6682\u505C"
1302
+ });
1303
+ void this.stop();
1304
+ }
1305
+ };
1306
+ document.addEventListener("visibilitychange", this.visibilityHandler);
1307
+ }
1308
+ async stop() {
1309
+ if (this._status === "IDLE" /* IDLE */) {
1310
+ return;
1311
+ }
1312
+ this.isRecordingFlag = false;
1313
+ this.workletNode?.port.postMessage({ type: "SET_RECORDING", payload: false });
1314
+ this.mediaStream?.getTracks().forEach((t) => t.stop());
1315
+ if (this.transport === "http" && this.pcmChunks.length > 0 && this.adapter.recognizeShortAudio) {
1316
+ try {
1317
+ this.setStatus("PROCESSING" /* PROCESSING */);
1318
+ const mergedPCM = AudioUtils.mergeBuffers(this.pcmChunks, this.totalPCMLength);
1319
+ const sampleRate = this.config.audioConfig?.sampleRate ?? 16e3;
1320
+ const wavBuffer = AudioUtils.encodeWAV(mergedPCM, sampleRate);
1321
+ const result = await this.adapter.recognizeShortAudio(wavBuffer);
1322
+ this.emit("result", result);
1323
+ } catch (e) {
1324
+ const error = e;
1325
+ this.emit("error", {
1326
+ code: "ADAPTER_ERROR",
1327
+ message: error.message || "\u8BC6\u522B\u5931\u8D25",
1328
+ originalError: e
1329
+ });
1330
+ }
1331
+ }
1332
+ this.cleanup();
1333
+ this.emit("audioend");
1334
+ this.emit("end");
1335
+ }
1336
+ abort() {
1337
+ this.isRecordingFlag = false;
1338
+ this.cleanup();
1339
+ this.emit("end");
1340
+ }
1341
+ isListening() {
1342
+ return this._status === "RECORDING" /* RECORDING */;
1343
+ }
1344
+ /**
1345
+ * 清理资源
1346
+ */
1347
+ cleanup() {
1348
+ if (this.reconnectTimer) {
1349
+ clearTimeout(this.reconnectTimer);
1350
+ this.reconnectTimer = null;
1351
+ }
1352
+ if (this.socket) {
1353
+ this.socket.close();
1354
+ this.socket = null;
1355
+ }
1356
+ this.workletNode?.disconnect();
1357
+ this.scriptProcessor?.disconnect();
1358
+ void this.audioContext?.close();
1359
+ if (this.visibilityHandler && typeof document !== "undefined") {
1360
+ document.removeEventListener("visibilitychange", this.visibilityHandler);
1361
+ this.visibilityHandler = null;
1362
+ }
1363
+ this.workletNode = null;
1364
+ this.scriptProcessor = null;
1365
+ this.audioContext = null;
1366
+ this.mediaStream = null;
1367
+ this.pcmChunks = [];
1368
+ this.totalPCMLength = 0;
1369
+ this.msgQueue = [];
1370
+ this.setStatus("IDLE" /* IDLE */);
1371
+ }
1372
+ };
1373
+ var SpeechRecognizerImpl = class {
1374
+ static {
1375
+ __name(this, "SpeechRecognizerImpl");
1376
+ }
1377
+ strategy = null;
1378
+ config = {};
1379
+ _currentProvider = "browser";
1380
+ _status = "idle";
1381
+ eventHandlers = /* @__PURE__ */ new Map();
1382
+ customProviders = /* @__PURE__ */ new Map();
1383
+ get currentProvider() {
1384
+ return this._currentProvider;
1385
+ }
1386
+ get status() {
1387
+ return this._status;
1388
+ }
1389
+ /**
1390
+ * 获取当前识别状态
1391
+ */
1392
+ get recognitionStatus() {
1393
+ return this.strategy?.status ?? "IDLE" /* IDLE */;
1394
+ }
1395
+ /**
1396
+ * 初始化语音识别器
1397
+ */
1398
+ initialize(config) {
1399
+ this._status = "loading";
1400
+ this.config = {
1401
+ lang: "zh-CN",
1402
+ continuous: false,
1403
+ interimResults: true,
1404
+ maxAlternatives: 1,
1405
+ mode: "auto",
1406
+ autoReconnect: true,
1407
+ maxReconnectAttempts: 3,
1408
+ reconnectInterval: 2e3,
1409
+ audioConfig: {
1410
+ sampleRate: 16e3,
1411
+ vadThreshold: 0.02,
1412
+ vadDuration: 3e3,
1413
+ echoCancellation: true,
1414
+ noiseSuppression: true,
1415
+ autoGainControl: true
1416
+ },
1417
+ ...config
1418
+ };
1419
+ try {
1420
+ this.initializeStrategy();
1421
+ this._status = "ready";
1422
+ return Promise.resolve();
1423
+ } catch (error) {
1424
+ this._status = "error";
1425
+ return Promise.reject(error);
1426
+ }
1427
+ }
1428
+ /**
1429
+ * 初始化识别策略
1430
+ */
1431
+ initializeStrategy() {
1432
+ const advConfig = this.config;
1433
+ const mode = advConfig.mode ?? "auto";
1434
+ const hasNative = typeof window !== "undefined" && ("SpeechRecognition" in window || "webkitSpeechRecognition" in window);
1435
+ if (mode === "native" || mode === "auto" && hasNative && !advConfig.cloudAdapter) {
1436
+ const nativeStrategy = new NativeRecognitionStrategy(advConfig);
1437
+ if (!nativeStrategy.isAvailable()) {
1438
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u539F\u751F\u8BED\u97F3\u8BC6\u522B");
1439
+ }
1440
+ this.strategy = nativeStrategy;
1441
+ this._currentProvider = "browser";
1442
+ } else if (mode === "cloud" || mode === "auto" && advConfig.cloudAdapter) {
1443
+ if (!advConfig.cloudAdapter) {
1444
+ throw new Error("\u4E91\u7AEF\u6A21\u5F0F\u9700\u8981\u63D0\u4F9B cloudAdapter");
1445
+ }
1446
+ this.strategy = new CloudRecognitionStrategy(advConfig);
1447
+ this._currentProvider = "custom";
1448
+ } else {
1449
+ throw new Error("\u6CA1\u6709\u53EF\u7528\u7684\u8BC6\u522B\u7B56\u7565");
1450
+ }
1451
+ this.forwardStrategyEvents();
1452
+ }
1453
+ /**
1454
+ * 转发策略事件
1455
+ */
1456
+ forwardStrategyEvents() {
1457
+ if (!this.strategy) return;
1458
+ this.strategy.on("result", (res) => {
1459
+ this.emit("result", {
1460
+ type: "result",
1461
+ result: this.convertResult(res)
1462
+ });
1463
+ });
1464
+ this.strategy.on("error", (err) => {
1465
+ const errorEvent = {
1466
+ type: "error",
1467
+ error: {
1468
+ code: err.code,
1469
+ message: err.message
1470
+ }
1471
+ };
1472
+ if (err.originalError) {
1473
+ errorEvent.error.originalError = err.originalError;
1474
+ }
1475
+ this.emit("error", errorEvent);
1476
+ });
1477
+ this.strategy.on("start", () => this.emit("start", { type: "start" }));
1478
+ this.strategy.on("end", () => this.emit("end", { type: "end" }));
1479
+ this.strategy.on("soundstart", () => this.emit("soundstart", { type: "soundstart" }));
1480
+ this.strategy.on("soundend", () => this.emit("soundend", { type: "soundend" }));
1481
+ this.strategy.on("speechstart", () => this.emit("speechstart", { type: "speechstart" }));
1482
+ this.strategy.on("speechend", () => this.emit("speechend", { type: "speechend" }));
1483
+ this.strategy.on("audiostart", () => this.emit("audiostart", { type: "audiostart" }));
1484
+ this.strategy.on("audioend", () => this.emit("audioend", { type: "audioend" }));
1485
+ }
1486
+ /**
1487
+ * 转换识别结果格式
1488
+ */
1489
+ convertResult(res) {
1490
+ return {
1491
+ results: [
1492
+ {
1493
+ transcript: res.transcript,
1494
+ confidence: res.confidence,
1495
+ isFinal: res.isFinal
1496
+ }
1497
+ ],
1498
+ bestTranscript: res.transcript,
1499
+ bestConfidence: res.confidence,
1500
+ isFinal: res.isFinal
1501
+ };
1502
+ }
1503
+ /**
1504
+ * 开始识别
1505
+ */
1506
+ async start(config) {
1507
+ if (this._status !== "ready") {
1508
+ throw new Error("\u8BC6\u522B\u5668\u672A\u5C31\u7EEA");
1509
+ }
1510
+ if (config) {
1511
+ this.config = { ...this.config, ...config };
1512
+ this.initializeStrategy();
1513
+ }
1514
+ return this.strategy?.start();
1515
+ }
1516
+ /**
1517
+ * 停止识别
1518
+ */
1519
+ stop() {
1520
+ this.strategy?.stop();
1521
+ }
1522
+ /**
1523
+ * 中止识别
1524
+ */
1525
+ abort() {
1526
+ this.strategy?.abort();
1527
+ }
1528
+ /**
1529
+ * 是否正在识别
1530
+ */
1531
+ isListening() {
1532
+ return this.strategy?.isListening() ?? false;
1533
+ }
1534
+ /**
1535
+ * 添加事件监听
1536
+ */
1537
+ on(event, handler) {
1538
+ if (!this.eventHandlers.has(event)) {
1539
+ this.eventHandlers.set(event, /* @__PURE__ */ new Set());
1540
+ }
1541
+ this.eventHandlers.get(event).add(handler);
1542
+ }
1543
+ /**
1544
+ * 移除事件监听
1545
+ */
1546
+ off(event, handler) {
1547
+ this.eventHandlers.get(event)?.delete(handler);
1548
+ }
1549
+ /**
1550
+ * 触发事件
1551
+ */
1552
+ emit(type, event) {
1553
+ this.eventHandlers.get(type)?.forEach((handler) => {
1554
+ try {
1555
+ handler(event);
1556
+ } catch (e) {
1557
+ console.error("[SpeechRecognizer] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF:", e);
1558
+ }
1559
+ });
1560
+ }
1561
+ /**
1562
+ * 销毁实例
1563
+ */
1564
+ dispose() {
1565
+ this.strategy?.dispose();
1566
+ this.strategy = null;
1567
+ this.eventHandlers.clear();
1568
+ this._status = "idle";
1569
+ }
1570
+ /**
1571
+ * 注册自定义提供商
1572
+ */
1573
+ registerProvider(type, provider) {
1574
+ this.customProviders.set(type, provider);
1575
+ }
1576
+ /**
1577
+ * 使用云端适配器
1578
+ */
1579
+ useCloudAdapter(adapter) {
1580
+ this.config.cloudAdapter = adapter;
1581
+ this.config.mode = "cloud";
1582
+ }
1583
+ };
1584
+ function getSpeechRecognitionConstructor() {
1585
+ if (typeof window === "undefined") {
1586
+ return null;
1587
+ }
1588
+ return window.SpeechRecognition ?? window.webkitSpeechRecognition ?? null;
1589
+ }
1590
+ __name(getSpeechRecognitionConstructor, "getSpeechRecognitionConstructor");
1591
+ async function createSpeechRecognizer(config) {
1592
+ const recognizer = new SpeechRecognizerImpl();
1593
+ await recognizer.initialize(config);
1594
+ return recognizer;
1595
+ }
1596
+ __name(createSpeechRecognizer, "createSpeechRecognizer");
1597
+ function isSpeechRecognitionSupported() {
1598
+ return getSpeechRecognitionConstructor() !== null;
1599
+ }
1600
+ __name(isSpeechRecognitionSupported, "isSpeechRecognitionSupported");
1601
+ async function listen(config) {
1602
+ const recognizer = await createSpeechRecognizer({
1603
+ ...config,
1604
+ continuous: false
1605
+ });
1606
+ return new Promise((resolve, reject) => {
1607
+ let hasResult = false;
1608
+ recognizer.on("result", (event) => {
1609
+ if (event.result && event.result.isFinal) {
1610
+ hasResult = true;
1611
+ recognizer.dispose();
1612
+ resolve(event.result);
1613
+ }
1614
+ });
1615
+ recognizer.on("error", (event) => {
1616
+ recognizer.dispose();
1617
+ reject(event.error);
1618
+ });
1619
+ recognizer.on("end", () => {
1620
+ if (!hasResult) {
1621
+ recognizer.dispose();
1622
+ resolve({
1623
+ results: [],
1624
+ bestTranscript: "",
1625
+ bestConfidence: 0,
1626
+ isFinal: true
1627
+ });
1628
+ }
1629
+ });
1630
+ recognizer.start().catch(reject);
1631
+ });
1632
+ }
1633
+ __name(listen, "listen");
1634
+
1635
+ export { SpeechRecognizerImpl, SpeechSynthesizerImpl, createSpeechRecognizer, createSpeechSynthesizer, isSpeechRecognitionSupported, isSpeechSynthesisSupported, listen, speak };
1636
+ //# sourceMappingURL=chunk-3RM45M64.js.map
1637
+ //# sourceMappingURL=chunk-3RM45M64.js.map