@lee-zg/melange 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/chunk-3RM45M64.js +1637 -0
  2. package/dist/chunk-3RM45M64.js.map +1 -0
  3. package/dist/{chunk-ALBD5XC5.js → chunk-GXFWPL5M.js} +4 -8
  4. package/dist/chunk-GXFWPL5M.js.map +1 -0
  5. package/dist/{chunk-Q73NOVWX.cjs → chunk-KUJARU3D.cjs} +7 -24
  6. package/dist/chunk-KUJARU3D.cjs.map +1 -0
  7. package/dist/{chunk-2PXWQDZC.js → chunk-MBBJMHTS.js} +3 -5
  8. package/dist/chunk-MBBJMHTS.js.map +1 -0
  9. package/dist/{chunk-ZT6HVG4G.cjs → chunk-UYJUSNDI.cjs} +4 -8
  10. package/dist/chunk-UYJUSNDI.cjs.map +1 -0
  11. package/dist/{chunk-Q7XG6YN6.cjs → chunk-V5THPEB2.cjs} +3 -5
  12. package/dist/chunk-V5THPEB2.cjs.map +1 -0
  13. package/dist/{chunk-352XNR3C.js → chunk-VGZMISJD.js} +7 -24
  14. package/dist/chunk-VGZMISJD.js.map +1 -0
  15. package/dist/chunk-YZVCK6VZ.cjs +1646 -0
  16. package/dist/chunk-YZVCK6VZ.cjs.map +1 -0
  17. package/dist/core/index.cjs +23 -23
  18. package/dist/core/index.js +1 -1
  19. package/dist/fp/index.cjs +45 -45
  20. package/dist/fp/index.js +1 -1
  21. package/dist/index.cjs +147 -147
  22. package/dist/index.js +4 -4
  23. package/dist/plugins/index.cjs +9 -9
  24. package/dist/plugins/index.d.cts +287 -133
  25. package/dist/plugins/index.d.ts +287 -133
  26. package/dist/plugins/index.js +1 -1
  27. package/dist/utils/index.cjs +73 -73
  28. package/dist/utils/index.js +1 -1
  29. package/package.json +5 -2
  30. package/dist/chunk-2PXWQDZC.js.map +0 -1
  31. package/dist/chunk-352XNR3C.js.map +0 -1
  32. package/dist/chunk-ALBD5XC5.js.map +0 -1
  33. package/dist/chunk-O7K662J5.cjs +0 -842
  34. package/dist/chunk-O7K662J5.cjs.map +0 -1
  35. package/dist/chunk-Q73NOVWX.cjs.map +0 -1
  36. package/dist/chunk-Q7XG6YN6.cjs.map +0 -1
  37. package/dist/chunk-YGMBCZJQ.js +0 -833
  38. package/dist/chunk-YGMBCZJQ.js.map +0 -1
  39. package/dist/chunk-ZT6HVG4G.cjs.map +0 -1
@@ -0,0 +1,1646 @@
1
+ 'use strict';
2
+
3
+ var chunkPK6SKIKE_cjs = require('./chunk-PK6SKIKE.cjs');
4
+
5
+ // src/plugins/speech/synthesis.ts
6
+ var SynthesisAudioUtils = {
7
+ /**
8
+ * 创建 AudioContext
9
+ */
10
+ createAudioContext() {
11
+ const AudioCtor = typeof window !== "undefined" ? window.AudioContext || window.webkitAudioContext : null;
12
+ if (!AudioCtor) {
13
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 AudioContext");
14
+ }
15
+ return new AudioCtor();
16
+ },
17
+ /**
18
+ * ArrayBuffer 转 Base64
19
+ */
20
+ arrayBufferToBase64(buffer) {
21
+ let binary = "";
22
+ const bytes = new Uint8Array(buffer);
23
+ for (let i = 0; i < bytes.byteLength; i++) {
24
+ binary += String.fromCharCode(bytes[i] ?? 0);
25
+ }
26
+ return typeof btoa !== "undefined" ? btoa(binary) : Buffer.from(binary).toString("base64");
27
+ },
28
+ /**
29
+ * Base64 转 ArrayBuffer
30
+ */
31
+ base64ToArrayBuffer(base64) {
32
+ const binary = typeof atob !== "undefined" ? atob(base64) : Buffer.from(base64, "base64").toString("binary");
33
+ const bytes = new Uint8Array(binary.length);
34
+ for (let i = 0; i < binary.length; i++) {
35
+ bytes[i] = binary.charCodeAt(i);
36
+ }
37
+ return bytes.buffer;
38
+ },
39
+ /**
40
+ * 计算音频时长(粗略估算)
41
+ */
42
+ estimateDuration(byteLength, format) {
43
+ const bitRates = {
44
+ mp3: 128e3,
45
+ // 128 kbps
46
+ wav: 256e3,
47
+ // 16bit 16kHz mono
48
+ ogg: 96e3,
49
+ // 96 kbps
50
+ pcm: 256e3
51
+ // 16bit 16kHz mono
52
+ };
53
+ return Math.ceil(byteLength * 8 / bitRates[format] * 1e3);
54
+ }
55
+ };
56
+ var BaseSynthesisStrategy = class {
57
+ constructor(config) {
58
+ this.config = config;
59
+ }
60
+ static {
61
+ chunkPK6SKIKE_cjs.__name(this, "BaseSynthesisStrategy");
62
+ }
63
+ listeners = {
64
+ start: [],
65
+ end: [],
66
+ pause: [],
67
+ resume: [],
68
+ boundary: [],
69
+ mark: [],
70
+ error: [],
71
+ state: []
72
+ };
73
+ _status = "IDLE" /* IDLE */;
74
+ /**
75
+ * 获取当前状态
76
+ */
77
+ get status() {
78
+ return this._status;
79
+ }
80
+ /**
81
+ * 设置状态
82
+ */
83
+ setStatus(status) {
84
+ if (this._status !== status) {
85
+ this._status = status;
86
+ this.emit("state", status);
87
+ }
88
+ }
89
+ /**
90
+ * 添加事件监听
91
+ */
92
+ on(event, fn) {
93
+ this.listeners[event].push(fn);
94
+ }
95
+ /**
96
+ * 移除事件监听
97
+ */
98
+ off(event, fn) {
99
+ const listeners = this.listeners[event];
100
+ const index = listeners.indexOf(fn);
101
+ if (index > -1) {
102
+ listeners.splice(index, 1);
103
+ }
104
+ }
105
+ /**
106
+ * 触发事件
107
+ */
108
+ emit(event, data) {
109
+ const listeners = this.listeners[event];
110
+ listeners.forEach((fn) => {
111
+ try {
112
+ fn(data);
113
+ } catch (e) {
114
+ console.error(`[Synthesis] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF (${event}):`, e);
115
+ }
116
+ });
117
+ }
118
+ /**
119
+ * 销毁资源
120
+ */
121
+ dispose() {
122
+ this.cancel();
123
+ Object.keys(this.listeners).forEach((key) => {
124
+ this.listeners[key] = [];
125
+ });
126
+ }
127
+ };
128
+ var NativeSynthesisStrategy = class extends BaseSynthesisStrategy {
129
+ static {
130
+ chunkPK6SKIKE_cjs.__name(this, "NativeSynthesisStrategy");
131
+ }
132
+ synthesis = null;
133
+ // Internal state tracking for current utterance (write-only, used for GC reference)
134
+ currentUtterance = null;
135
+ voicesLoaded = false;
136
+ voicesPromise = null;
137
+ constructor(config) {
138
+ super(config);
139
+ if (this.isAvailable()) {
140
+ this.synthesis = window.speechSynthesis;
141
+ }
142
+ }
143
+ isAvailable() {
144
+ return typeof window !== "undefined" && "speechSynthesis" in window;
145
+ }
146
+ async getVoices() {
147
+ if (!this.synthesis) return [];
148
+ if (this.voicesPromise) {
149
+ return this.voicesPromise;
150
+ }
151
+ this.voicesPromise = new Promise((resolve) => {
152
+ const loadVoices = /* @__PURE__ */ chunkPK6SKIKE_cjs.__name(() => {
153
+ const voices = this.synthesis.getVoices();
154
+ if (voices.length > 0) {
155
+ this.voicesLoaded = true;
156
+ resolve(
157
+ voices.map((voice) => ({
158
+ id: voice.voiceURI,
159
+ name: voice.name,
160
+ lang: voice.lang,
161
+ localService: voice.localService,
162
+ default: voice.default,
163
+ provider: "browser"
164
+ }))
165
+ );
166
+ }
167
+ }, "loadVoices");
168
+ loadVoices();
169
+ if (!this.voicesLoaded) {
170
+ this.synthesis.addEventListener("voiceschanged", loadVoices, { once: true });
171
+ setTimeout(() => {
172
+ if (!this.voicesLoaded) {
173
+ resolve([]);
174
+ }
175
+ }, 3e3);
176
+ }
177
+ });
178
+ return this.voicesPromise;
179
+ }
180
+ async speak(text, config) {
181
+ if (!this.synthesis) {
182
+ throw { code: "NOT_SUPPORTED", message: "\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u8BED\u97F3\u5408\u6210" };
183
+ }
184
+ this.cancel();
185
+ this.setStatus("LOADING" /* LOADING */);
186
+ return new Promise((resolve, reject) => {
187
+ const mergedConfig = { ...this.config, ...config };
188
+ const utterance = new SpeechSynthesisUtterance(text);
189
+ this.currentUtterance = utterance;
190
+ if (mergedConfig.lang) utterance.lang = mergedConfig.lang;
191
+ if (mergedConfig.volume !== void 0) utterance.volume = mergedConfig.volume;
192
+ if (mergedConfig.rate !== void 0) utterance.rate = mergedConfig.rate;
193
+ if (mergedConfig.pitch !== void 0) utterance.pitch = mergedConfig.pitch;
194
+ if (mergedConfig.voice) {
195
+ const voices = this.synthesis.getVoices();
196
+ const voiceConfig = mergedConfig.voice;
197
+ const targetVoice = typeof voiceConfig === "string" ? voices.find((v) => v.name === voiceConfig || v.voiceURI === voiceConfig) : voices.find((v) => v.voiceURI === voiceConfig.id);
198
+ if (targetVoice) {
199
+ utterance.voice = targetVoice;
200
+ }
201
+ }
202
+ utterance.onstart = () => {
203
+ this.setStatus("SPEAKING" /* SPEAKING */);
204
+ this.emit("start");
205
+ };
206
+ utterance.onend = () => {
207
+ this.setStatus("IDLE" /* IDLE */);
208
+ this.currentUtterance = null;
209
+ this.emit("end");
210
+ resolve();
211
+ };
212
+ utterance.onpause = () => {
213
+ this.setStatus("PAUSED" /* PAUSED */);
214
+ this.emit("pause");
215
+ };
216
+ utterance.onresume = () => {
217
+ this.setStatus("SPEAKING" /* SPEAKING */);
218
+ this.emit("resume");
219
+ };
220
+ utterance.onboundary = (event) => {
221
+ this.emit("boundary", {
222
+ charIndex: event.charIndex,
223
+ charLength: event.charLength,
224
+ elapsedTime: event.elapsedTime,
225
+ name: event.name
226
+ });
227
+ };
228
+ utterance.onmark = (event) => {
229
+ this.emit("mark", { name: event.name });
230
+ };
231
+ utterance.onerror = (event) => {
232
+ this.setStatus("IDLE" /* IDLE */);
233
+ this.currentUtterance = null;
234
+ const error = {
235
+ code: "UNKNOWN",
236
+ message: `\u8BED\u97F3\u5408\u6210\u9519\u8BEF: ${event.error}`,
237
+ originalError: event
238
+ };
239
+ this.emit("error", error);
240
+ reject(error);
241
+ };
242
+ this.synthesis.speak(utterance);
243
+ });
244
+ }
245
+ pause() {
246
+ if (this.synthesis && this.isSpeaking()) {
247
+ this.synthesis.pause();
248
+ }
249
+ }
250
+ resume() {
251
+ if (this.synthesis && this.isPaused()) {
252
+ this.synthesis.resume();
253
+ }
254
+ }
255
+ cancel() {
256
+ if (this.synthesis) {
257
+ this.synthesis.cancel();
258
+ this.currentUtterance = null;
259
+ this.setStatus("IDLE" /* IDLE */);
260
+ }
261
+ }
262
+ isSpeaking() {
263
+ return this.currentUtterance !== null && (this.synthesis?.speaking ?? false);
264
+ }
265
+ isPaused() {
266
+ return this.synthesis?.paused ?? false;
267
+ }
268
+ };
269
+ var CloudSynthesisStrategy = class extends BaseSynthesisStrategy {
270
+ static {
271
+ chunkPK6SKIKE_cjs.__name(this, "CloudSynthesisStrategy");
272
+ }
273
+ adapter;
274
+ audioContext = null;
275
+ sourceNode = null;
276
+ gainNode = null;
277
+ startTime = 0;
278
+ pauseTime = 0;
279
+ audioBuffer = null;
280
+ constructor(config) {
281
+ super(config);
282
+ if (!config.cloudAdapter) {
283
+ throw new Error("[CloudStrategy] \u9700\u8981\u63D0\u4F9B cloudAdapter");
284
+ }
285
+ this.adapter = config.cloudAdapter;
286
+ }
287
+ isAvailable() {
288
+ return typeof window !== "undefined" && "AudioContext" in window;
289
+ }
290
+ async getVoices() {
291
+ if (!this.adapter.getVoices) return [];
292
+ try {
293
+ const cloudVoices = await this.adapter.getVoices();
294
+ return cloudVoices.map((v) => ({
295
+ id: v.id,
296
+ name: v.name,
297
+ lang: v.lang,
298
+ localService: false,
299
+ default: false,
300
+ provider: "custom"
301
+ }));
302
+ } catch {
303
+ return [];
304
+ }
305
+ }
306
+ async speak(text, config) {
307
+ if (!text.trim()) {
308
+ throw { code: "INVALID_TEXT", message: "\u6587\u672C\u5185\u5BB9\u4E3A\u7A7A" };
309
+ }
310
+ this.cancel();
311
+ const mergedConfig = { ...this.config, ...config };
312
+ this.setStatus("LOADING" /* LOADING */);
313
+ try {
314
+ this.emit("start");
315
+ const result = await this.adapter.synthesize(text, mergedConfig);
316
+ if (!this.audioContext) {
317
+ this.audioContext = SynthesisAudioUtils.createAudioContext();
318
+ }
319
+ if (this.audioContext.state === "suspended") {
320
+ await this.audioContext.resume();
321
+ }
322
+ this.audioBuffer = await this.audioContext.decodeAudioData(result.audioData.slice(0));
323
+ await this.playBuffer();
324
+ } catch (e) {
325
+ const error = e;
326
+ this.setStatus("IDLE" /* IDLE */);
327
+ const synthError = {
328
+ code: "ADAPTER_ERROR",
329
+ message: error.message || "\u5408\u6210\u5931\u8D25",
330
+ originalError: e
331
+ };
332
+ this.emit("error", synthError);
333
+ throw synthError;
334
+ }
335
+ }
336
+ async playBuffer() {
337
+ if (!this.audioContext || !this.audioBuffer) return;
338
+ return new Promise((resolve, reject) => {
339
+ try {
340
+ this.sourceNode = this.audioContext.createBufferSource();
341
+ this.sourceNode.buffer = this.audioBuffer;
342
+ this.gainNode = this.audioContext.createGain();
343
+ this.gainNode.gain.value = this.config.volume ?? 1;
344
+ this.sourceNode.connect(this.gainNode);
345
+ this.gainNode.connect(this.audioContext.destination);
346
+ this.sourceNode.onended = () => {
347
+ if (this._status === "SPEAKING" /* SPEAKING */) {
348
+ this.setStatus("IDLE" /* IDLE */);
349
+ this.emit("end");
350
+ resolve();
351
+ }
352
+ };
353
+ const offset = this.pauseTime > 0 ? this.pauseTime : 0;
354
+ this.sourceNode.start(0, offset);
355
+ this.startTime = this.audioContext.currentTime - offset;
356
+ this.pauseTime = 0;
357
+ this.setStatus("SPEAKING" /* SPEAKING */);
358
+ } catch (e) {
359
+ reject(e);
360
+ }
361
+ });
362
+ }
363
+ pause() {
364
+ if (this._status === "SPEAKING" /* SPEAKING */ && this.audioContext && this.sourceNode) {
365
+ this.pauseTime = this.audioContext.currentTime - this.startTime;
366
+ this.sourceNode.stop();
367
+ this.sourceNode.disconnect();
368
+ this.sourceNode = null;
369
+ this.setStatus("PAUSED" /* PAUSED */);
370
+ this.emit("pause");
371
+ }
372
+ }
373
+ resume() {
374
+ if (this._status === "PAUSED" /* PAUSED */ && this.audioBuffer) {
375
+ this.emit("resume");
376
+ void this.playBuffer();
377
+ }
378
+ }
379
+ cancel() {
380
+ if (this.sourceNode) {
381
+ try {
382
+ this.sourceNode.stop();
383
+ this.sourceNode.disconnect();
384
+ } catch {
385
+ }
386
+ this.sourceNode = null;
387
+ }
388
+ this.audioBuffer = null;
389
+ this.pauseTime = 0;
390
+ this.startTime = 0;
391
+ this.setStatus("IDLE" /* IDLE */);
392
+ }
393
+ isSpeaking() {
394
+ return this._status === "SPEAKING" /* SPEAKING */;
395
+ }
396
+ isPaused() {
397
+ return this._status === "PAUSED" /* PAUSED */;
398
+ }
399
+ dispose() {
400
+ super.dispose();
401
+ this.cancel();
402
+ if (this.audioContext) {
403
+ void this.audioContext.close();
404
+ this.audioContext = null;
405
+ }
406
+ }
407
+ };
408
+ var SpeechSynthesizerImpl = class {
409
+ static {
410
+ chunkPK6SKIKE_cjs.__name(this, "SpeechSynthesizerImpl");
411
+ }
412
+ strategy = null;
413
+ config = {};
414
+ _currentProvider = "browser";
415
+ _status = "idle";
416
+ eventHandlers = /* @__PURE__ */ new Map();
417
+ customProviders = /* @__PURE__ */ new Map();
418
+ get currentProvider() {
419
+ return this._currentProvider;
420
+ }
421
+ get status() {
422
+ return this._status;
423
+ }
424
+ /**
425
+ * 获取当前合成状态
426
+ */
427
+ get synthesisStatus() {
428
+ return this.strategy?.status ?? "IDLE" /* IDLE */;
429
+ }
430
+ /**
431
+ * 初始化语音合成器
432
+ */
433
+ initialize(config) {
434
+ this._status = "loading";
435
+ this.config = {
436
+ lang: "zh-CN",
437
+ volume: 1,
438
+ rate: 1,
439
+ pitch: 1,
440
+ preferredProvider: "browser",
441
+ autoFallback: true,
442
+ fallbackProviders: ["azure", "google", "aws"],
443
+ mode: "auto",
444
+ audioFormat: "mp3",
445
+ ...config
446
+ };
447
+ try {
448
+ this.initializeStrategy();
449
+ this._status = "ready";
450
+ return Promise.resolve();
451
+ } catch (error) {
452
+ this._status = "error";
453
+ return Promise.reject(error);
454
+ }
455
+ }
456
+ /**
457
+ * 初始化合成策略
458
+ */
459
+ initializeStrategy() {
460
+ const advConfig = this.config;
461
+ const mode = advConfig.mode ?? "auto";
462
+ const hasNative = typeof window !== "undefined" && "speechSynthesis" in window;
463
+ if (mode === "native" || mode === "auto" && hasNative && !advConfig.cloudAdapter) {
464
+ const nativeStrategy = new NativeSynthesisStrategy(advConfig);
465
+ if (!nativeStrategy.isAvailable()) {
466
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u539F\u751F\u8BED\u97F3\u5408\u6210");
467
+ }
468
+ this.strategy = nativeStrategy;
469
+ this._currentProvider = "browser";
470
+ } else if (mode === "cloud" || mode === "auto" && advConfig.cloudAdapter) {
471
+ if (!advConfig.cloudAdapter) {
472
+ throw new Error("\u4E91\u7AEF\u6A21\u5F0F\u9700\u8981\u63D0\u4F9B cloudAdapter");
473
+ }
474
+ this.strategy = new CloudSynthesisStrategy(advConfig);
475
+ this._currentProvider = "custom";
476
+ } else {
477
+ throw new Error("\u6CA1\u6709\u53EF\u7528\u7684\u5408\u6210\u7B56\u7565");
478
+ }
479
+ this.forwardStrategyEvents();
480
+ }
481
+ /**
482
+ * 转发策略事件
483
+ */
484
+ forwardStrategyEvents() {
485
+ if (!this.strategy) return;
486
+ this.strategy.on("start", () => this.emit("start", { type: "start" }));
487
+ this.strategy.on("end", () => this.emit("end", { type: "end" }));
488
+ this.strategy.on("pause", () => this.emit("pause", { type: "pause" }));
489
+ this.strategy.on("resume", () => this.emit("resume", { type: "resume" }));
490
+ this.strategy.on("boundary", (data) => {
491
+ const event = { type: "boundary" };
492
+ if (data.charIndex !== void 0) event.charIndex = data.charIndex;
493
+ if (data.charLength !== void 0) event.charLength = data.charLength;
494
+ if (data.elapsedTime !== void 0) event.elapsedTime = data.elapsedTime;
495
+ if (data.name !== void 0) event.name = data.name;
496
+ this.emit("boundary", event);
497
+ });
498
+ this.strategy.on("mark", (data) => {
499
+ const event = { type: "mark" };
500
+ if (data.name !== void 0) event.name = data.name;
501
+ this.emit("mark", event);
502
+ });
503
+ this.strategy.on("error", (err) => {
504
+ const errorEvent = {
505
+ type: "error",
506
+ error: {
507
+ code: err.code,
508
+ message: err.message
509
+ }
510
+ };
511
+ if (err.originalError) {
512
+ errorEvent.error.originalError = err.originalError;
513
+ }
514
+ this.emit("error", errorEvent);
515
+ });
516
+ }
517
+ /**
518
+ * 获取可用语音列表
519
+ */
520
+ async getVoices() {
521
+ if (!this.strategy) {
522
+ throw new Error("\u8BED\u97F3\u5408\u6210\u5668\u672A\u521D\u59CB\u5316");
523
+ }
524
+ return this.strategy.getVoices();
525
+ }
526
+ /**
527
+ * 朗读文本
528
+ */
529
+ async speak(text, config) {
530
+ if (this._status !== "ready") {
531
+ throw new Error("\u8BED\u97F3\u5408\u6210\u5668\u672A\u5C31\u7EEA");
532
+ }
533
+ return this.strategy?.speak(text, config);
534
+ }
535
+ /**
536
+ * 暂停朗读
537
+ */
538
+ pause() {
539
+ this.strategy?.pause();
540
+ }
541
+ /**
542
+ * 继续朗读
543
+ */
544
+ resume() {
545
+ this.strategy?.resume();
546
+ }
547
+ /**
548
+ * 取消朗读
549
+ */
550
+ cancel() {
551
+ this.strategy?.cancel();
552
+ }
553
+ /**
554
+ * 是否正在朗读
555
+ */
556
+ isSpeaking() {
557
+ return this.strategy?.isSpeaking() ?? false;
558
+ }
559
+ /**
560
+ * 是否已暂停
561
+ */
562
+ isPaused() {
563
+ return this.strategy?.isPaused() ?? false;
564
+ }
565
+ /**
566
+ * 添加事件监听
567
+ */
568
+ on(event, handler) {
569
+ if (!this.eventHandlers.has(event)) {
570
+ this.eventHandlers.set(event, /* @__PURE__ */ new Set());
571
+ }
572
+ this.eventHandlers.get(event).add(handler);
573
+ }
574
+ /**
575
+ * 移除事件监听
576
+ */
577
+ off(event, handler) {
578
+ this.eventHandlers.get(event)?.delete(handler);
579
+ }
580
+ /**
581
+ * 触发事件
582
+ */
583
+ emit(type, event) {
584
+ this.eventHandlers.get(type)?.forEach((handler) => {
585
+ try {
586
+ handler(event);
587
+ } catch (e) {
588
+ console.error("[SpeechSynthesizer] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF:", e);
589
+ }
590
+ });
591
+ }
592
+ /**
593
+ * 销毁实例
594
+ */
595
+ dispose() {
596
+ this.strategy?.dispose();
597
+ this.strategy = null;
598
+ this.eventHandlers.clear();
599
+ this._status = "idle";
600
+ }
601
+ /**
602
+ * 注册自定义提供商
603
+ */
604
+ registerProvider(type, provider) {
605
+ this.customProviders.set(type, provider);
606
+ }
607
+ /**
608
+ * 使用云端适配器
609
+ */
610
+ useCloudAdapter(adapter) {
611
+ this.config.cloudAdapter = adapter;
612
+ this.config.mode = "cloud";
613
+ }
614
+ };
615
+ async function createSpeechSynthesizer(config) {
616
+ const synthesizer = new SpeechSynthesizerImpl();
617
+ await synthesizer.initialize(config);
618
+ return synthesizer;
619
+ }
620
+ chunkPK6SKIKE_cjs.__name(createSpeechSynthesizer, "createSpeechSynthesizer");
621
+ function isSpeechSynthesisSupported() {
622
+ return typeof window !== "undefined" && "speechSynthesis" in window;
623
+ }
624
+ chunkPK6SKIKE_cjs.__name(isSpeechSynthesisSupported, "isSpeechSynthesisSupported");
625
+ async function speak(text, config) {
626
+ const synthesizer = await createSpeechSynthesizer(config);
627
+ try {
628
+ await synthesizer.speak(text, config);
629
+ } finally {
630
+ synthesizer.dispose();
631
+ }
632
+ }
633
+ chunkPK6SKIKE_cjs.__name(speak, "speak");
634
+
635
+ // src/plugins/speech/recognition.ts
636
+ var AudioUtils = {
637
+ /**
638
+ * 重采样音频数据
639
+ * @param data 原始音频数据
640
+ * @param inputRate 输入采样率
641
+ * @param outputRate 输出采样率
642
+ * @returns 重采样后的数据
643
+ */
644
+ resample(data, inputRate, outputRate) {
645
+ if (inputRate === outputRate) return data;
646
+ const compression = inputRate / outputRate;
647
+ const length = Math.ceil(data.length / compression);
648
+ const result = new Float32Array(length);
649
+ for (let i = 0; i < length; i++) {
650
+ result[i] = data[Math.floor(i * compression)] ?? 0;
651
+ }
652
+ return result;
653
+ },
654
+ /**
655
+ * Float32 转 Int16 PCM
656
+ * @param input Float32 数据
657
+ * @returns Int16 PCM 数据
658
+ */
659
+ floatTo16BitPCM(input) {
660
+ const output = new Int16Array(input.length);
661
+ for (let i = 0; i < input.length; i++) {
662
+ const s = Math.max(-1, Math.min(1, input[i] ?? 0));
663
+ output[i] = s < 0 ? s * 32768 : s * 32767;
664
+ }
665
+ return output;
666
+ },
667
+ /**
668
+ * 计算音量 RMS
669
+ * @param data 音频数据
670
+ * @returns RMS 值
671
+ */
672
+ calculateRMS(data) {
673
+ let sum = 0;
674
+ for (let i = 0; i < data.length; i++) {
675
+ const sample = data[i] ?? 0;
676
+ sum += sample * sample;
677
+ }
678
+ return Math.sqrt(sum / data.length);
679
+ },
680
+ /**
681
+ * 合并 PCM 片段
682
+ * @param buffers PCM 缓冲区数组
683
+ * @param totalLength 总采样数
684
+ * @returns 合并后的 Int16 数组
685
+ */
686
+ mergeBuffers(buffers, totalLength) {
687
+ const result = new Int16Array(totalLength);
688
+ let offset = 0;
689
+ for (const buffer of buffers) {
690
+ const view = new Int16Array(buffer);
691
+ result.set(view, offset);
692
+ offset += view.length;
693
+ }
694
+ return result;
695
+ },
696
+ /**
697
+ * PCM 转 WAV 封装
698
+ * @param samples PCM 采样数据
699
+ * @param sampleRate 采样率
700
+ * @param channels 声道数
701
+ * @returns WAV 格式 ArrayBuffer
702
+ */
703
+ encodeWAV(samples, sampleRate = 16e3, channels = 1) {
704
+ const buffer = new ArrayBuffer(44 + samples.length * 2);
705
+ const view = new DataView(buffer);
706
+ const writeString = /* @__PURE__ */ chunkPK6SKIKE_cjs.__name((offset, str) => {
707
+ for (let i = 0; i < str.length; i++) {
708
+ view.setUint8(offset + i, str.charCodeAt(i));
709
+ }
710
+ }, "writeString");
711
+ writeString(0, "RIFF");
712
+ view.setUint32(4, 36 + samples.length * 2, true);
713
+ writeString(8, "WAVE");
714
+ writeString(12, "fmt ");
715
+ view.setUint32(16, 16, true);
716
+ view.setUint16(20, 1, true);
717
+ view.setUint16(22, channels, true);
718
+ view.setUint32(24, sampleRate, true);
719
+ view.setUint32(28, sampleRate * channels * 2, true);
720
+ view.setUint16(32, channels * 2, true);
721
+ view.setUint16(34, 16, true);
722
+ writeString(36, "data");
723
+ view.setUint32(40, samples.length * 2, true);
724
+ const dataOffset = 44;
725
+ for (let i = 0; i < samples.length; i++) {
726
+ view.setInt16(dataOffset + i * 2, samples[i] ?? 0, true);
727
+ }
728
+ return buffer;
729
+ },
730
+ /**
731
+ * ArrayBuffer 转 Base64
732
+ * @param buffer ArrayBuffer
733
+ * @returns Base64 字符串
734
+ */
735
+ arrayBufferToBase64(buffer) {
736
+ let binary = "";
737
+ const bytes = new Uint8Array(buffer);
738
+ const len = bytes.byteLength;
739
+ for (let i = 0; i < len; i++) {
740
+ binary += String.fromCharCode(bytes[i] ?? 0);
741
+ }
742
+ return typeof btoa !== "undefined" ? btoa(binary) : Buffer.from(binary).toString("base64");
743
+ }
744
+ };
745
+ var WORKLET_CODE = `
746
+ class SpeechProcessor extends AudioWorkletProcessor {
747
+ constructor() {
748
+ super();
749
+ this.buffer = [];
750
+ this.targetRate = 16000;
751
+ this.currentRate = 44100;
752
+ this.silenceFrames = 0;
753
+ this.maxSilenceFrames = 0;
754
+ this.vadThreshold = 0.02;
755
+ this.isRecording = false;
756
+ this.port.onmessage = this.handleMessage.bind(this);
757
+ }
758
+
759
+ static get parameterDescriptors() {
760
+ return [];
761
+ }
762
+
763
+ process(inputs) {
764
+ const input = inputs[0];
765
+ if (!input || !input.length || !this.isRecording) return true;
766
+ const channelData = input[0];
767
+
768
+ // VAD \u68C0\u6D4B
769
+ let sum = 0;
770
+ for (let i = 0; i < channelData.length; i++) {
771
+ sum += channelData[i] * channelData[i];
772
+ }
773
+ const rms = Math.sqrt(sum / channelData.length);
774
+
775
+ if (rms < this.vadThreshold) {
776
+ this.silenceFrames++;
777
+ if (this.maxSilenceFrames > 0 && this.silenceFrames > this.maxSilenceFrames) {
778
+ this.port.postMessage({ type: 'VAD_TIMEOUT' });
779
+ this.silenceFrames = 0;
780
+ }
781
+ } else {
782
+ this.silenceFrames = 0;
783
+ }
784
+
785
+ // \u91CD\u91C7\u6837
786
+ if (this.targetRate < this.currentRate) {
787
+ const compression = this.currentRate / this.targetRate;
788
+ for (let i = 0; i < channelData.length; i += compression) {
789
+ this.buffer.push(channelData[Math.floor(i)]);
790
+ }
791
+ } else {
792
+ this.buffer.push(...channelData);
793
+ }
794
+
795
+ // \u5B9A\u671F\u5237\u65B0 (~128ms)
796
+ if (this.buffer.length >= 2048) {
797
+ this.flush();
798
+ }
799
+ return true;
800
+ }
801
+
802
+ flush() {
803
+ if (this.buffer.length === 0) return;
804
+ const pcmData = new Int16Array(this.buffer.length);
805
+ for (let i = 0; i < this.buffer.length; i++) {
806
+ let s = Math.max(-1, Math.min(1, this.buffer[i]));
807
+ s = s < 0 ? s * 0x8000 : s * 0x7FFF;
808
+ pcmData[i] = s;
809
+ }
810
+ this.port.postMessage({ type: 'AUDIO_DATA', payload: pcmData.buffer }, [pcmData.buffer]);
811
+ this.buffer = [];
812
+ }
813
+
814
+ handleMessage(event) {
815
+ const { type, payload } = event.data;
816
+ if (type === 'CONFIG') {
817
+ this.targetRate = payload.targetRate || 16000;
818
+ this.currentRate = payload.currentRate || 44100;
819
+ this.vadThreshold = payload.vadThreshold || 0.02;
820
+ const secondsPerBlock = 128 / this.currentRate;
821
+ this.maxSilenceFrames = (payload.vadDuration / 1000) / secondsPerBlock;
822
+ }
823
+ if (type === 'SET_RECORDING') {
824
+ this.isRecording = payload;
825
+ if (!payload) this.flush();
826
+ }
827
+ }
828
+ }
829
+ registerProcessor('speech-processor', SpeechProcessor);
830
+ `;
831
+ var BaseRecognitionStrategy = class {
832
+ constructor(config) {
833
+ this.config = config;
834
+ }
835
+ static {
836
+ chunkPK6SKIKE_cjs.__name(this, "BaseRecognitionStrategy");
837
+ }
838
+ listeners = {
839
+ result: [],
840
+ error: [],
841
+ state: [],
842
+ start: [],
843
+ end: [],
844
+ soundstart: [],
845
+ soundend: [],
846
+ speechstart: [],
847
+ speechend: [],
848
+ audiostart: [],
849
+ audioend: []
850
+ };
851
+ _status = "IDLE" /* IDLE */;
852
+ /**
853
+ * 获取当前状态
854
+ */
855
+ get status() {
856
+ return this._status;
857
+ }
858
+ /**
859
+ * 设置状态
860
+ */
861
+ setStatus(status) {
862
+ if (this._status !== status) {
863
+ this._status = status;
864
+ this.emit("state", status);
865
+ }
866
+ }
867
+ /**
868
+ * 添加事件监听
869
+ */
870
+ on(event, fn) {
871
+ this.listeners[event].push(fn);
872
+ }
873
+ /**
874
+ * 移除事件监听
875
+ */
876
+ off(event, fn) {
877
+ const listeners = this.listeners[event];
878
+ const index = listeners.indexOf(fn);
879
+ if (index > -1) {
880
+ listeners.splice(index, 1);
881
+ }
882
+ }
883
+ /**
884
+ * 触发事件
885
+ */
886
+ emit(event, data) {
887
+ const listeners = this.listeners[event];
888
+ listeners.forEach((fn) => {
889
+ try {
890
+ fn(data);
891
+ } catch (e) {
892
+ console.error(`[Recognition] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF (${event}):`, e);
893
+ }
894
+ });
895
+ }
896
+ /**
897
+ * 销毁资源
898
+ */
899
+ dispose() {
900
+ this.abort();
901
+ Object.keys(this.listeners).forEach((key) => {
902
+ this.listeners[key] = [];
903
+ });
904
+ }
905
+ };
906
+ var NativeRecognitionStrategy = class extends BaseRecognitionStrategy {
907
+ static {
908
+ chunkPK6SKIKE_cjs.__name(this, "NativeRecognitionStrategy");
909
+ }
910
+ recognition = null;
911
+ SpeechRecognitionClass = null;
912
+ constructor(config) {
913
+ super(config);
914
+ const srClass = typeof window !== "undefined" ? window.SpeechRecognition ?? window.webkitSpeechRecognition : void 0;
915
+ this.SpeechRecognitionClass = srClass ?? null;
916
+ }
917
+ /**
918
+ * 检查是否可用
919
+ */
920
+ isAvailable() {
921
+ return this.SpeechRecognitionClass !== null;
922
+ }
923
+ async start() {
924
+ if (!this.SpeechRecognitionClass) {
925
+ throw { code: "NOT_SUPPORTED", message: "\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u8BED\u97F3\u8BC6\u522B" };
926
+ }
927
+ if (this._status !== "IDLE" /* IDLE */) {
928
+ return;
929
+ }
930
+ this.setStatus("CONNECTING" /* CONNECTING */);
931
+ return new Promise((resolve, reject) => {
932
+ this.recognition = new this.SpeechRecognitionClass();
933
+ this.recognition.lang = this.config.lang ?? "zh-CN";
934
+ this.recognition.continuous = this.config.continuous ?? false;
935
+ this.recognition.interimResults = this.config.interimResults ?? true;
936
+ this.recognition.maxAlternatives = this.config.maxAlternatives ?? 1;
937
+ this.recognition.onstart = () => {
938
+ this.setStatus("RECORDING" /* RECORDING */);
939
+ this.emit("start");
940
+ this.emit("audiostart");
941
+ resolve();
942
+ };
943
+ this.recognition.onend = () => {
944
+ this.setStatus("IDLE" /* IDLE */);
945
+ this.emit("audioend");
946
+ this.emit("end");
947
+ };
948
+ this.recognition.onresult = (e) => {
949
+ const results = e.results[e.resultIndex];
950
+ if (results?.[0]) {
951
+ this.emit("result", {
952
+ transcript: results[0].transcript,
953
+ isFinal: results.isFinal,
954
+ confidence: results[0].confidence,
955
+ original: e
956
+ });
957
+ }
958
+ };
959
+ this.recognition.onerror = (e) => {
960
+ const errorMap = {
961
+ "not-allowed": "NOT_ALLOWED",
962
+ "no-speech": "NO_SPEECH",
963
+ network: "NETWORK"
964
+ };
965
+ const error = {
966
+ code: errorMap[e.error] ?? "UNKNOWN",
967
+ message: this.getErrorMessage(e.error),
968
+ originalError: e
969
+ };
970
+ this.setStatus("IDLE" /* IDLE */);
971
+ this.emit("error", error);
972
+ if (e.error === "not-allowed" || e.error === "audio-capture") {
973
+ reject(error);
974
+ }
975
+ };
976
+ this.recognition.onsoundstart = () => this.emit("soundstart");
977
+ this.recognition.onsoundend = () => this.emit("soundend");
978
+ this.recognition.onspeechstart = () => this.emit("speechstart");
979
+ this.recognition.onspeechend = () => this.emit("speechend");
980
+ try {
981
+ this.recognition.start();
982
+ } catch (e) {
983
+ this.setStatus("IDLE" /* IDLE */);
984
+ reject({ code: "UNKNOWN", message: "\u542F\u52A8\u8BC6\u522B\u5931\u8D25", originalError: e });
985
+ }
986
+ });
987
+ }
988
+ stop() {
989
+ if (this.recognition && this._status === "RECORDING" /* RECORDING */) {
990
+ this.recognition.stop();
991
+ }
992
+ }
993
+ abort() {
994
+ if (this.recognition) {
995
+ this.recognition.abort();
996
+ this.setStatus("IDLE" /* IDLE */);
997
+ }
998
+ }
999
+ isListening() {
1000
+ return this._status === "RECORDING" /* RECORDING */;
1001
+ }
1002
+ getErrorMessage(errorCode) {
1003
+ const messages = {
1004
+ "no-speech": "\u672A\u68C0\u6D4B\u5230\u8BED\u97F3\u8F93\u5165",
1005
+ "audio-capture": "\u65E0\u6CD5\u8BBF\u95EE\u9EA6\u514B\u98CE",
1006
+ "not-allowed": "\u9EA6\u514B\u98CE\u6743\u9650\u88AB\u62D2\u7EDD",
1007
+ network: "\u7F51\u7EDC\u9519\u8BEF",
1008
+ aborted: "\u8BC6\u522B\u88AB\u4E2D\u6B62",
1009
+ "language-not-supported": "\u4E0D\u652F\u6301\u7684\u8BED\u8A00",
1010
+ "service-not-allowed": "\u8BED\u97F3\u670D\u52A1\u4E0D\u53EF\u7528"
1011
+ };
1012
+ return messages[errorCode] ?? `\u8BED\u97F3\u8BC6\u522B\u9519\u8BEF: ${errorCode}`;
1013
+ }
1014
+ };
1015
+ var CloudRecognitionStrategy = class extends BaseRecognitionStrategy {
1016
+ static {
1017
+ chunkPK6SKIKE_cjs.__name(this, "CloudRecognitionStrategy");
1018
+ }
1019
+ audioContext = null;
1020
+ mediaStream = null;
1021
+ workletNode = null;
1022
+ scriptProcessor = null;
1023
+ socket = null;
1024
+ adapter;
1025
+ transport;
1026
+ // HTTP 模式缓冲
1027
+ pcmChunks = [];
1028
+ totalPCMLength = 0;
1029
+ // 页面可见性监听器
1030
+ visibilityHandler = null;
1031
+ // 弱网消息队列
1032
+ msgQueue = [];
1033
+ MSG_QUEUE_MAX = 50;
1034
+ // ScriptProcessor 降级参数
1035
+ spSilenceCount = 0;
1036
+ spMaxSilence = 0;
1037
+ isRecordingFlag = false;
1038
+ // 重连参数
1039
+ reconnectAttempts = 0;
1040
+ reconnectTimer = null;
1041
+ // Worklet 加载标记
1042
+ workletLoaded = false;
1043
+ constructor(config) {
1044
+ super(config);
1045
+ if (!config.cloudAdapter) {
1046
+ throw new Error("[CloudStrategy] \u9700\u8981\u63D0\u4F9B cloudAdapter");
1047
+ }
1048
+ this.adapter = config.cloudAdapter;
1049
+ this.transport = config.transport ?? "websocket";
1050
+ }
1051
+ async start() {
1052
+ if (this._status !== "IDLE" /* IDLE */) {
1053
+ return;
1054
+ }
1055
+ this.setStatus("CONNECTING" /* CONNECTING */);
1056
+ this.pcmChunks = [];
1057
+ this.totalPCMLength = 0;
1058
+ this.reconnectAttempts = 0;
1059
+ try {
1060
+ if (this.transport === "websocket") {
1061
+ if (!this.adapter.getConnectUrl) {
1062
+ throw new Error("\u9002\u914D\u5668\u7F3A\u5C11 getConnectUrl \u65B9\u6CD5");
1063
+ }
1064
+ const url = await Promise.resolve(this.adapter.getConnectUrl());
1065
+ await this.initWebSocket(url);
1066
+ }
1067
+ const audioConfig = this.config.audioConfig ?? {};
1068
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
1069
+ audio: {
1070
+ echoCancellation: audioConfig.echoCancellation ?? true,
1071
+ noiseSuppression: audioConfig.noiseSuppression ?? true,
1072
+ autoGainControl: audioConfig.autoGainControl ?? true
1073
+ }
1074
+ });
1075
+ await this.initAudioContext();
1076
+ if (this.transport === "http") {
1077
+ this.setStatus("RECORDING" /* RECORDING */);
1078
+ }
1079
+ this.setupVisibilityListener();
1080
+ this.emit("start");
1081
+ this.emit("audiostart");
1082
+ } catch (err) {
1083
+ const error = err;
1084
+ this.emit("error", {
1085
+ code: "NOT_ALLOWED",
1086
+ message: error.message || "\u542F\u52A8\u5931\u8D25",
1087
+ originalError: err
1088
+ });
1089
+ this.cleanup();
1090
+ throw err;
1091
+ }
1092
+ }
1093
+ /**
1094
+ * 初始化 WebSocket 连接
1095
+ */
1096
+ initWebSocket(url) {
1097
+ return new Promise((resolve, reject) => {
1098
+ this.socket = new WebSocket(url);
1099
+ this.socket.binaryType = "arraybuffer";
1100
+ const timeout = setTimeout(() => {
1101
+ reject(new Error("WebSocket \u8FDE\u63A5\u8D85\u65F6"));
1102
+ }, 1e4);
1103
+ this.socket.onopen = () => {
1104
+ clearTimeout(timeout);
1105
+ this.setStatus("RECORDING" /* RECORDING */);
1106
+ this.reconnectAttempts = 0;
1107
+ if (this.adapter.getHandshakeParams) {
1108
+ const params = this.adapter.getHandshakeParams();
1109
+ if (params) {
1110
+ const msg = typeof params === "string" ? params : JSON.stringify(params);
1111
+ this.socket?.send(msg);
1112
+ }
1113
+ }
1114
+ resolve();
1115
+ };
1116
+ this.socket.onerror = (e) => {
1117
+ clearTimeout(timeout);
1118
+ reject(e);
1119
+ };
1120
+ this.socket.onmessage = (e) => {
1121
+ try {
1122
+ const data = e.data;
1123
+ const raw = typeof data === "string" ? data : new TextDecoder().decode(data);
1124
+ const res = this.adapter.parseResult(JSON.parse(raw));
1125
+ if (res) {
1126
+ this.emit("result", res);
1127
+ }
1128
+ } catch {
1129
+ }
1130
+ };
1131
+ this.socket.onclose = () => {
1132
+ if (this._status === "RECORDING" /* RECORDING */) {
1133
+ this.handleReconnect();
1134
+ }
1135
+ };
1136
+ });
1137
+ }
1138
+ /**
1139
+ * 处理重连逻辑
1140
+ */
1141
+ handleReconnect() {
1142
+ const maxAttempts = this.config.maxReconnectAttempts ?? 3;
1143
+ const interval = this.config.reconnectInterval ?? 2e3;
1144
+ if (this.config.autoReconnect && this.reconnectAttempts < maxAttempts) {
1145
+ this.reconnectAttempts++;
1146
+ console.log(`[CloudStrategy] \u5C1D\u8BD5\u91CD\u8FDE (${this.reconnectAttempts}/${maxAttempts})`);
1147
+ this.reconnectTimer = setTimeout(() => {
1148
+ void (async () => {
1149
+ try {
1150
+ if (this.adapter.getConnectUrl) {
1151
+ const url = await Promise.resolve(this.adapter.getConnectUrl());
1152
+ await this.initWebSocket(url);
1153
+ }
1154
+ } catch {
1155
+ this.handleReconnect();
1156
+ }
1157
+ })();
1158
+ }, interval);
1159
+ } else {
1160
+ this.emit("error", {
1161
+ code: "NETWORK",
1162
+ message: "\u8FDE\u63A5\u5DF2\u65AD\u5F00"
1163
+ });
1164
+ void this.stop();
1165
+ }
1166
+ }
1167
+ /**
1168
+ * 初始化音频上下文
1169
+ */
1170
+ async initAudioContext() {
1171
+ const AudioCtor = typeof window !== "undefined" && (window.AudioContext || window.webkitAudioContext);
1172
+ if (!AudioCtor) {
1173
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 AudioContext");
1174
+ }
1175
+ this.audioContext = new AudioCtor();
1176
+ if (this.audioContext.state === "suspended") {
1177
+ await this.audioContext.resume();
1178
+ }
1179
+ const source = this.audioContext.createMediaStreamSource(this.mediaStream);
1180
+ if (this.audioContext.audioWorklet && !this.workletLoaded) {
1181
+ try {
1182
+ const blob = new Blob([WORKLET_CODE], { type: "application/javascript" });
1183
+ await this.audioContext.audioWorklet.addModule(URL.createObjectURL(blob));
1184
+ this.workletLoaded = true;
1185
+ this.setupWorklet(source);
1186
+ console.log("[CloudStrategy] \u4F7F\u7528 AudioWorklet \u6A21\u5F0F");
1187
+ return;
1188
+ } catch (e) {
1189
+ console.warn("[CloudStrategy] AudioWorklet \u52A0\u8F7D\u5931\u8D25\uFF0C\u964D\u7EA7\u5230 ScriptProcessor:", e);
1190
+ }
1191
+ }
1192
+ this.setupScriptProcessor(source);
1193
+ console.log("[CloudStrategy] \u4F7F\u7528 ScriptProcessor \u964D\u7EA7\u6A21\u5F0F");
1194
+ }
1195
+ /**
1196
+ * 设置 AudioWorklet
1197
+ */
1198
+ setupWorklet(source) {
1199
+ this.workletNode = new AudioWorkletNode(this.audioContext, "speech-processor");
1200
+ const audioConfig = this.config.audioConfig ?? {};
1201
+ this.workletNode.port.postMessage({
1202
+ type: "CONFIG",
1203
+ payload: {
1204
+ currentRate: this.audioContext.sampleRate,
1205
+ targetRate: audioConfig.sampleRate ?? 16e3,
1206
+ vadThreshold: audioConfig.vadThreshold ?? 0.02,
1207
+ vadDuration: audioConfig.vadDuration ?? 3e3
1208
+ }
1209
+ });
1210
+ this.workletNode.port.onmessage = (e) => {
1211
+ const { type, payload } = e.data;
1212
+ if (type === "AUDIO_DATA" && payload) {
1213
+ this.handlePCM(payload);
1214
+ }
1215
+ if (type === "VAD_TIMEOUT") {
1216
+ this.emit("speechend");
1217
+ void this.stop();
1218
+ }
1219
+ };
1220
+ this.workletNode.port.postMessage({ type: "SET_RECORDING", payload: true });
1221
+ source.connect(this.workletNode);
1222
+ this.workletNode.connect(this.audioContext.destination);
1223
+ this.emit("speechstart");
1224
+ }
1225
+ /**
1226
+ * 设置 ScriptProcessor (降级方案)
1227
+ */
1228
+ setupScriptProcessor(source) {
1229
+ const bufferSize = 4096;
1230
+ this.scriptProcessor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
1231
+ const audioConfig = this.config.audioConfig ?? {};
1232
+ const targetRate = audioConfig.sampleRate ?? 16e3;
1233
+ const currentRate = this.audioContext.sampleRate;
1234
+ const secondsPerBuf = bufferSize / currentRate;
1235
+ this.spMaxSilence = (audioConfig.vadDuration ?? 3e3) / 1e3 / secondsPerBuf;
1236
+ this.spSilenceCount = 0;
1237
+ this.isRecordingFlag = true;
1238
+ this.scriptProcessor.onaudioprocess = (e) => {
1239
+ if (!this.isRecordingFlag) return;
1240
+ const input = e.inputBuffer.getChannelData(0);
1241
+ const rms = AudioUtils.calculateRMS(input);
1242
+ if (rms < (audioConfig.vadThreshold ?? 0.02)) {
1243
+ this.spSilenceCount++;
1244
+ if (this.spSilenceCount > this.spMaxSilence) {
1245
+ this.emit("speechend");
1246
+ void this.stop();
1247
+ return;
1248
+ }
1249
+ } else {
1250
+ this.spSilenceCount = 0;
1251
+ }
1252
+ const resampled = AudioUtils.resample(input, currentRate, targetRate);
1253
+ const pcm = AudioUtils.floatTo16BitPCM(resampled);
1254
+ this.handlePCM(pcm.buffer.slice(0));
1255
+ };
1256
+ source.connect(this.scriptProcessor);
1257
+ this.scriptProcessor.connect(this.audioContext.destination);
1258
+ this.emit("speechstart");
1259
+ }
1260
+ /**
1261
+ * 处理 PCM 数据
1262
+ */
1263
+ handlePCM(buffer) {
1264
+ if (this.transport === "websocket") {
1265
+ let payload = buffer;
1266
+ if (this.adapter.transformAudioData) {
1267
+ payload = this.adapter.transformAudioData(buffer);
1268
+ }
1269
+ if (this.socket && this.socket.readyState === WebSocket.OPEN) {
1270
+ this.flushMsgQueue();
1271
+ this.socket.send(payload);
1272
+ } else if (this.socket && this.socket.readyState === WebSocket.CONNECTING) {
1273
+ if (payload instanceof ArrayBuffer && this.msgQueue.length < this.MSG_QUEUE_MAX) {
1274
+ this.msgQueue.push(payload);
1275
+ }
1276
+ }
1277
+ } else {
1278
+ const chunk = buffer.slice(0);
1279
+ this.pcmChunks.push(chunk);
1280
+ this.totalPCMLength += chunk.byteLength / 2;
1281
+ }
1282
+ }
1283
+ /**
1284
+ * 刷新消息队列
1285
+ */
1286
+ flushMsgQueue() {
1287
+ while (this.msgQueue.length > 0 && this.socket?.readyState === WebSocket.OPEN) {
1288
+ const data = this.msgQueue.shift();
1289
+ if (data) {
1290
+ this.socket.send(data);
1291
+ }
1292
+ }
1293
+ }
1294
+ /**
1295
+ * 设置页面可见性监听
1296
+ */
1297
+ setupVisibilityListener() {
1298
+ if (typeof document === "undefined") return;
1299
+ this.visibilityHandler = () => {
1300
+ if (document.hidden && this._status === "RECORDING" /* RECORDING */) {
1301
+ this.emit("error", {
1302
+ code: "NOT_ALLOWED",
1303
+ message: "\u9875\u9762\u5DF2\u8FDB\u5165\u540E\u53F0\uFF0C\u5F55\u97F3\u5DF2\u6682\u505C"
1304
+ });
1305
+ void this.stop();
1306
+ }
1307
+ };
1308
+ document.addEventListener("visibilitychange", this.visibilityHandler);
1309
+ }
1310
+ async stop() {
1311
+ if (this._status === "IDLE" /* IDLE */) {
1312
+ return;
1313
+ }
1314
+ this.isRecordingFlag = false;
1315
+ this.workletNode?.port.postMessage({ type: "SET_RECORDING", payload: false });
1316
+ this.mediaStream?.getTracks().forEach((t) => t.stop());
1317
+ if (this.transport === "http" && this.pcmChunks.length > 0 && this.adapter.recognizeShortAudio) {
1318
+ try {
1319
+ this.setStatus("PROCESSING" /* PROCESSING */);
1320
+ const mergedPCM = AudioUtils.mergeBuffers(this.pcmChunks, this.totalPCMLength);
1321
+ const sampleRate = this.config.audioConfig?.sampleRate ?? 16e3;
1322
+ const wavBuffer = AudioUtils.encodeWAV(mergedPCM, sampleRate);
1323
+ const result = await this.adapter.recognizeShortAudio(wavBuffer);
1324
+ this.emit("result", result);
1325
+ } catch (e) {
1326
+ const error = e;
1327
+ this.emit("error", {
1328
+ code: "ADAPTER_ERROR",
1329
+ message: error.message || "\u8BC6\u522B\u5931\u8D25",
1330
+ originalError: e
1331
+ });
1332
+ }
1333
+ }
1334
+ this.cleanup();
1335
+ this.emit("audioend");
1336
+ this.emit("end");
1337
+ }
1338
+ abort() {
1339
+ this.isRecordingFlag = false;
1340
+ this.cleanup();
1341
+ this.emit("end");
1342
+ }
1343
+ isListening() {
1344
+ return this._status === "RECORDING" /* RECORDING */;
1345
+ }
1346
+ /**
1347
+ * 清理资源
1348
+ */
1349
+ cleanup() {
1350
+ if (this.reconnectTimer) {
1351
+ clearTimeout(this.reconnectTimer);
1352
+ this.reconnectTimer = null;
1353
+ }
1354
+ if (this.socket) {
1355
+ this.socket.close();
1356
+ this.socket = null;
1357
+ }
1358
+ this.workletNode?.disconnect();
1359
+ this.scriptProcessor?.disconnect();
1360
+ void this.audioContext?.close();
1361
+ if (this.visibilityHandler && typeof document !== "undefined") {
1362
+ document.removeEventListener("visibilitychange", this.visibilityHandler);
1363
+ this.visibilityHandler = null;
1364
+ }
1365
+ this.workletNode = null;
1366
+ this.scriptProcessor = null;
1367
+ this.audioContext = null;
1368
+ this.mediaStream = null;
1369
+ this.pcmChunks = [];
1370
+ this.totalPCMLength = 0;
1371
+ this.msgQueue = [];
1372
+ this.setStatus("IDLE" /* IDLE */);
1373
+ }
1374
+ };
1375
+ var SpeechRecognizerImpl = class {
1376
+ static {
1377
+ chunkPK6SKIKE_cjs.__name(this, "SpeechRecognizerImpl");
1378
+ }
1379
+ strategy = null;
1380
+ config = {};
1381
+ _currentProvider = "browser";
1382
+ _status = "idle";
1383
+ eventHandlers = /* @__PURE__ */ new Map();
1384
+ customProviders = /* @__PURE__ */ new Map();
1385
+ get currentProvider() {
1386
+ return this._currentProvider;
1387
+ }
1388
+ get status() {
1389
+ return this._status;
1390
+ }
1391
+ /**
1392
+ * 获取当前识别状态
1393
+ */
1394
+ get recognitionStatus() {
1395
+ return this.strategy?.status ?? "IDLE" /* IDLE */;
1396
+ }
1397
+ /**
1398
+ * 初始化语音识别器
1399
+ */
1400
+ initialize(config) {
1401
+ this._status = "loading";
1402
+ this.config = {
1403
+ lang: "zh-CN",
1404
+ continuous: false,
1405
+ interimResults: true,
1406
+ maxAlternatives: 1,
1407
+ mode: "auto",
1408
+ autoReconnect: true,
1409
+ maxReconnectAttempts: 3,
1410
+ reconnectInterval: 2e3,
1411
+ audioConfig: {
1412
+ sampleRate: 16e3,
1413
+ vadThreshold: 0.02,
1414
+ vadDuration: 3e3,
1415
+ echoCancellation: true,
1416
+ noiseSuppression: true,
1417
+ autoGainControl: true
1418
+ },
1419
+ ...config
1420
+ };
1421
+ try {
1422
+ this.initializeStrategy();
1423
+ this._status = "ready";
1424
+ return Promise.resolve();
1425
+ } catch (error) {
1426
+ this._status = "error";
1427
+ return Promise.reject(error);
1428
+ }
1429
+ }
1430
+ /**
1431
+ * 初始化识别策略
1432
+ */
1433
+ initializeStrategy() {
1434
+ const advConfig = this.config;
1435
+ const mode = advConfig.mode ?? "auto";
1436
+ const hasNative = typeof window !== "undefined" && ("SpeechRecognition" in window || "webkitSpeechRecognition" in window);
1437
+ if (mode === "native" || mode === "auto" && hasNative && !advConfig.cloudAdapter) {
1438
+ const nativeStrategy = new NativeRecognitionStrategy(advConfig);
1439
+ if (!nativeStrategy.isAvailable()) {
1440
+ throw new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301\u539F\u751F\u8BED\u97F3\u8BC6\u522B");
1441
+ }
1442
+ this.strategy = nativeStrategy;
1443
+ this._currentProvider = "browser";
1444
+ } else if (mode === "cloud" || mode === "auto" && advConfig.cloudAdapter) {
1445
+ if (!advConfig.cloudAdapter) {
1446
+ throw new Error("\u4E91\u7AEF\u6A21\u5F0F\u9700\u8981\u63D0\u4F9B cloudAdapter");
1447
+ }
1448
+ this.strategy = new CloudRecognitionStrategy(advConfig);
1449
+ this._currentProvider = "custom";
1450
+ } else {
1451
+ throw new Error("\u6CA1\u6709\u53EF\u7528\u7684\u8BC6\u522B\u7B56\u7565");
1452
+ }
1453
+ this.forwardStrategyEvents();
1454
+ }
1455
+ /**
1456
+ * 转发策略事件
1457
+ */
1458
+ forwardStrategyEvents() {
1459
+ if (!this.strategy) return;
1460
+ this.strategy.on("result", (res) => {
1461
+ this.emit("result", {
1462
+ type: "result",
1463
+ result: this.convertResult(res)
1464
+ });
1465
+ });
1466
+ this.strategy.on("error", (err) => {
1467
+ const errorEvent = {
1468
+ type: "error",
1469
+ error: {
1470
+ code: err.code,
1471
+ message: err.message
1472
+ }
1473
+ };
1474
+ if (err.originalError) {
1475
+ errorEvent.error.originalError = err.originalError;
1476
+ }
1477
+ this.emit("error", errorEvent);
1478
+ });
1479
+ this.strategy.on("start", () => this.emit("start", { type: "start" }));
1480
+ this.strategy.on("end", () => this.emit("end", { type: "end" }));
1481
+ this.strategy.on("soundstart", () => this.emit("soundstart", { type: "soundstart" }));
1482
+ this.strategy.on("soundend", () => this.emit("soundend", { type: "soundend" }));
1483
+ this.strategy.on("speechstart", () => this.emit("speechstart", { type: "speechstart" }));
1484
+ this.strategy.on("speechend", () => this.emit("speechend", { type: "speechend" }));
1485
+ this.strategy.on("audiostart", () => this.emit("audiostart", { type: "audiostart" }));
1486
+ this.strategy.on("audioend", () => this.emit("audioend", { type: "audioend" }));
1487
+ }
1488
+ /**
1489
+ * 转换识别结果格式
1490
+ */
1491
+ convertResult(res) {
1492
+ return {
1493
+ results: [
1494
+ {
1495
+ transcript: res.transcript,
1496
+ confidence: res.confidence,
1497
+ isFinal: res.isFinal
1498
+ }
1499
+ ],
1500
+ bestTranscript: res.transcript,
1501
+ bestConfidence: res.confidence,
1502
+ isFinal: res.isFinal
1503
+ };
1504
+ }
1505
+ /**
1506
+ * 开始识别
1507
+ */
1508
+ async start(config) {
1509
+ if (this._status !== "ready") {
1510
+ throw new Error("\u8BC6\u522B\u5668\u672A\u5C31\u7EEA");
1511
+ }
1512
+ if (config) {
1513
+ this.config = { ...this.config, ...config };
1514
+ this.initializeStrategy();
1515
+ }
1516
+ return this.strategy?.start();
1517
+ }
1518
+ /**
1519
+ * 停止识别
1520
+ */
1521
+ stop() {
1522
+ this.strategy?.stop();
1523
+ }
1524
+ /**
1525
+ * 中止识别
1526
+ */
1527
+ abort() {
1528
+ this.strategy?.abort();
1529
+ }
1530
+ /**
1531
+ * 是否正在识别
1532
+ */
1533
+ isListening() {
1534
+ return this.strategy?.isListening() ?? false;
1535
+ }
1536
+ /**
1537
+ * 添加事件监听
1538
+ */
1539
+ on(event, handler) {
1540
+ if (!this.eventHandlers.has(event)) {
1541
+ this.eventHandlers.set(event, /* @__PURE__ */ new Set());
1542
+ }
1543
+ this.eventHandlers.get(event).add(handler);
1544
+ }
1545
+ /**
1546
+ * 移除事件监听
1547
+ */
1548
+ off(event, handler) {
1549
+ this.eventHandlers.get(event)?.delete(handler);
1550
+ }
1551
+ /**
1552
+ * 触发事件
1553
+ */
1554
+ emit(type, event) {
1555
+ this.eventHandlers.get(type)?.forEach((handler) => {
1556
+ try {
1557
+ handler(event);
1558
+ } catch (e) {
1559
+ console.error("[SpeechRecognizer] \u4E8B\u4EF6\u5904\u7406\u5668\u9519\u8BEF:", e);
1560
+ }
1561
+ });
1562
+ }
1563
+ /**
1564
+ * 销毁实例
1565
+ */
1566
+ dispose() {
1567
+ this.strategy?.dispose();
1568
+ this.strategy = null;
1569
+ this.eventHandlers.clear();
1570
+ this._status = "idle";
1571
+ }
1572
+ /**
1573
+ * 注册自定义提供商
1574
+ */
1575
+ registerProvider(type, provider) {
1576
+ this.customProviders.set(type, provider);
1577
+ }
1578
+ /**
1579
+ * 使用云端适配器
1580
+ */
1581
+ useCloudAdapter(adapter) {
1582
+ this.config.cloudAdapter = adapter;
1583
+ this.config.mode = "cloud";
1584
+ }
1585
+ };
1586
+ function getSpeechRecognitionConstructor() {
1587
+ if (typeof window === "undefined") {
1588
+ return null;
1589
+ }
1590
+ return window.SpeechRecognition ?? window.webkitSpeechRecognition ?? null;
1591
+ }
1592
+ chunkPK6SKIKE_cjs.__name(getSpeechRecognitionConstructor, "getSpeechRecognitionConstructor");
1593
+ async function createSpeechRecognizer(config) {
1594
+ const recognizer = new SpeechRecognizerImpl();
1595
+ await recognizer.initialize(config);
1596
+ return recognizer;
1597
+ }
1598
+ chunkPK6SKIKE_cjs.__name(createSpeechRecognizer, "createSpeechRecognizer");
1599
+ function isSpeechRecognitionSupported() {
1600
+ return getSpeechRecognitionConstructor() !== null;
1601
+ }
1602
+ chunkPK6SKIKE_cjs.__name(isSpeechRecognitionSupported, "isSpeechRecognitionSupported");
1603
+ async function listen(config) {
1604
+ const recognizer = await createSpeechRecognizer({
1605
+ ...config,
1606
+ continuous: false
1607
+ });
1608
+ return new Promise((resolve, reject) => {
1609
+ let hasResult = false;
1610
+ recognizer.on("result", (event) => {
1611
+ if (event.result && event.result.isFinal) {
1612
+ hasResult = true;
1613
+ recognizer.dispose();
1614
+ resolve(event.result);
1615
+ }
1616
+ });
1617
+ recognizer.on("error", (event) => {
1618
+ recognizer.dispose();
1619
+ reject(event.error);
1620
+ });
1621
+ recognizer.on("end", () => {
1622
+ if (!hasResult) {
1623
+ recognizer.dispose();
1624
+ resolve({
1625
+ results: [],
1626
+ bestTranscript: "",
1627
+ bestConfidence: 0,
1628
+ isFinal: true
1629
+ });
1630
+ }
1631
+ });
1632
+ recognizer.start().catch(reject);
1633
+ });
1634
+ }
1635
+ chunkPK6SKIKE_cjs.__name(listen, "listen");
1636
+
1637
+ exports.SpeechRecognizerImpl = SpeechRecognizerImpl;
1638
+ exports.SpeechSynthesizerImpl = SpeechSynthesizerImpl;
1639
+ exports.createSpeechRecognizer = createSpeechRecognizer;
1640
+ exports.createSpeechSynthesizer = createSpeechSynthesizer;
1641
+ exports.isSpeechRecognitionSupported = isSpeechRecognitionSupported;
1642
+ exports.isSpeechSynthesisSupported = isSpeechSynthesisSupported;
1643
+ exports.listen = listen;
1644
+ exports.speak = speak;
1645
+ //# sourceMappingURL=chunk-YZVCK6VZ.cjs.map
1646
+ //# sourceMappingURL=chunk-YZVCK6VZ.cjs.map