univoice 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +78 -16
  2. package/dist/{base--a8Bo2f0.d.ts → base-Bw2Puefv.d.ts} +22 -2
  3. package/dist/{base-Bae_riVx.d.ts → base-DhiS1mCx.d.ts} +17 -2
  4. package/dist/{chunk-RRXTYGBU.js → chunk-2MN6RZSS.js} +375 -4
  5. package/dist/chunk-2MN6RZSS.js.map +1 -0
  6. package/dist/{chunk-EHSTFTRI.js → chunk-FUVQN5PE.js} +3 -3
  7. package/dist/{chunk-EHSTFTRI.js.map → chunk-FUVQN5PE.js.map} +1 -1
  8. package/dist/{chunk-TY2HDS4F.js → chunk-NYM7PZUP.js} +3 -3
  9. package/dist/{chunk-TY2HDS4F.js.map → chunk-NYM7PZUP.js.map} +1 -1
  10. package/dist/chunk-R66GH6Y5.js +105 -0
  11. package/dist/chunk-R66GH6Y5.js.map +1 -0
  12. package/dist/{chunk-CM7VAOIV.js → chunk-U5KAFNFM.js} +287 -8
  13. package/dist/chunk-U5KAFNFM.js.map +1 -0
  14. package/dist/{save-DGQVjHM1.d.ts → save-lNS0YZU4.d.ts} +1 -1
  15. package/dist/src/asr/index.d.ts +3 -3
  16. package/dist/src/asr/index.js +3 -3
  17. package/dist/src/asr/providers/index.d.ts +2 -2
  18. package/dist/src/asr/providers/index.js +4 -3
  19. package/dist/src/asr/providers/index.js.map +1 -1
  20. package/dist/src/index.d.ts +4 -4
  21. package/dist/src/index.js +5 -5
  22. package/dist/src/tts/index.d.ts +3 -3
  23. package/dist/src/tts/index.js +3 -3
  24. package/dist/src/tts/providers/index.d.ts +2 -2
  25. package/dist/src/tts/providers/index.js +2 -2
  26. package/dist/{tee-BufkUu6s.d.ts → tee-CRWCx7JR.d.ts} +1 -1
  27. package/dist/{qwen-0GGX_nkP.d.ts → xfyun-ciu6L1-M.d.ts} +23 -2
  28. package/dist/{qwen-DzZEciEh.d.ts → xfyun-iV-ra5ZL.d.ts} +30 -2
  29. package/examples/asr/providers/xfyun/README.md +77 -0
  30. package/examples/asr/providers/xfyun/pcm-stream-in-stream-out.ts +98 -0
  31. package/examples/asr/providers/xfyun/stream-in-stream-out.ts +104 -0
  32. package/examples/tts/providers/doubao/seed-tts-1.0/README.md +7 -0
  33. package/examples/tts/providers/doubao/seed-tts-1.0/stream-in-stream-out-ogg-opus.ts +94 -0
  34. package/examples/tts/providers/doubao/seed-tts-2.0/README.md +9 -2
  35. package/examples/tts/providers/doubao/seed-tts-2.0/stream-in-stream-out-ogg-opus.ts +94 -0
  36. package/examples/tts/providers/xfyun/super-human/direct-instance.ts +86 -0
  37. package/examples/tts/providers/xfyun/super-human/non-stream-in-non-stream-out.ts +77 -0
  38. package/examples/tts/providers/xfyun/super-human/stream-in-stream-out.ts +85 -0
  39. package/examples/utils/common.ts +44 -0
  40. package/package.json +8 -8
  41. package/dist/chunk-7QVYU63E.js +0 -6
  42. package/dist/chunk-7QVYU63E.js.map +0 -1
  43. package/dist/chunk-CM7VAOIV.js.map +0 -1
  44. package/dist/chunk-RRXTYGBU.js.map +0 -1
package/README.md CHANGED
@@ -269,7 +269,7 @@ const response = await tts.synthesize({ text: '你好' });
269
269
  import { createTTS } from 'univoice';
270
270
 
271
271
  const tts = createTTS({
272
- provider: 'doubao' | 'openai' | 'minimax' | 'qwen' | 'gemini',
272
+ provider: 'doubao' | 'openai' | 'minimax' | 'qwen' | 'qwen-realtime' | 'gemini' | 'glm' | 'xfyun',
273
273
  // 通用配置
274
274
  apiKey?: string,
275
275
  baseUrl?: string,
@@ -314,7 +314,7 @@ const tts = createTTS({
314
314
  import { createASR } from 'univoice';
315
315
 
316
316
  const asr = createASR({
317
- provider: 'doubao' | 'openai' | 'minimax' | 'qwen' | 'gemini',
317
+ provider: 'doubao' | 'openai' | 'minimax' | 'qwen' | 'gemini' | 'glm' | 'xfyun',
318
318
  apiKey?: string,
319
319
  baseUrl?: string,
320
320
  model?: string,
@@ -355,6 +355,7 @@ const asr = createASR({
355
355
  | OpenAI | `openai` | 待实现 | 待实现 | 待实现 | 待实现 |
356
356
  | MiniMax | `minimax` | - | - | - | - |
357
357
  | Gemini | `gemini` | 待实现 | 待实现 | 待实现 | 待实现 |
358
+ | 科大讯飞 | `xfyun` | ✅ | ✅ | ✅ | ✅ |
358
359
 
359
360
  #### TTS 能力矩阵
360
361
 
@@ -366,6 +367,7 @@ const asr = createASR({
366
367
  | OpenAI | `openai` | 待实现 | 待实现 | 待实现 | 待实现 |
367
368
  | MiniMax | `minimax` | ✅ | ✅ | ✅ | ✅ |
368
369
  | Gemini | `gemini` | 待实现 | 待实现 | 待实现 | 待实现 |
370
+ | 科大讯飞 | `xfyun` | ✅ | ✅ | ✅ | ✅ |
369
371
 
370
372
  #### 能力说明
371
373
 
@@ -474,6 +476,29 @@ const asr = createASR({
474
476
  });
475
477
  ```
476
478
 
479
+ #### 科大讯飞
480
+
481
+ ```typescript
482
+ const tts = createTTS({
483
+ provider: 'xfyun',
484
+ appId: process.env.XFYUN_APP_ID,
485
+ apiSecret: process.env.XFYUN_API_SECRET,
486
+ apiKey: process.env.XFYUN_API_KEY,
487
+ voice: 'x5_lingxiaoxuan_flow',
488
+ model: 'super-human-tts',
489
+ format: 'pcm',
490
+ sampleRate: 16000,
491
+ });
492
+
493
+ const asr = createASR({
494
+ provider: 'xfyun',
495
+ appId: process.env.XFYUN_APP_ID,
496
+ apiSecret: process.env.XFYUN_API_SECRET,
497
+ apiKey: process.env.XFYUN_API_KEY,
498
+ language: 'zh-CN',
499
+ });
500
+ ```
501
+
477
502
  ---
478
503
 
479
504
  <!-- PERFORMANCE_TABLE_START -->
@@ -493,7 +518,7 @@ const asr = createASR({
493
518
  >
494
519
  > 如需评估服务商的真实性能,建议直接使用服务商官方 SDK 进行测试。
495
520
 
496
- > 生成时间: 2026/3/29 21:20:30
521
+ > 生成时间: 2026/4/8 17:39:11
497
522
 
498
523
  > 环境: Node.js v24.14.0, darwin arm64
499
524
 
@@ -588,7 +613,7 @@ const asr = createASR({
588
613
  | 豆包 | seed-tts-1.0 | zh_male_lengkugege_emo_v2_mars_bigtts | ogg_opus | 16000 | 3 | 653 | 62 | 3254 | 3461 | 231 | 88.9 |
589
614
  | 豆包 | seed-tts-1.0 | zh_male_lengkugege_emo_v2_mars_bigtts | ogg_opus | 24000 | 3 | 670 | 102 | 3429 | 7919 | 2209 | 59.3 |
590
615
  | 豆包 | seed-tts-1.0 | zh_male_lengkugege_emo_v2_mars_bigtts | ogg_opus | 48000 | 3 | 698 | 65 | 3217 | 3603 | 200 | 85.8 |
591
- | 豆包 | seed-tts-2.0 | zh_female_vv_uranus_bigtts | pcm | 8000 | 3 | 491 | 65 | 3810 | 3859 | **29 🏆** | 74.6 |
616
+ | 豆包 | seed-tts-2.0 | zh_female_vv_uranus_bigtts | pcm | 8000 | 3 | 491 | 65 | 3810 | 3859 | 29 | 74.6 |
592
617
  | 豆包 | seed-tts-2.0 | zh_female_vv_uranus_bigtts | pcm | 16000 | 3 | 500 | 66 | 3752 | 3832 | 43 | 75.5 |
593
618
  | 豆包 | seed-tts-2.0 | zh_female_vv_uranus_bigtts | pcm | 24000 | 3 | 510 | 65 | 3760 | 3783 | 50 | 76.3 |
594
619
  | 豆包 | seed-tts-2.0 | zh_female_vv_uranus_bigtts | pcm | 48000 | 3 | 560 | 64 | 3770 | 3835 | 43 | 75.4 |
@@ -645,6 +670,9 @@ const asr = createASR({
645
670
  | MiniMax | speech-01-turbo | male-qn-qingse | pcm | 32000 | 3 | 490 | 2 | 2101 | 2148 | 99 | 138.6 |
646
671
  | MiniMax | speech-01-turbo | male-qn-qingse | pcm | 44100 | 3 | 448 | **2 🏆** | 1920 | 2100 | 99 | 145.2 |
647
672
  | 智谱 GLM | glm-tts | tongtong | pcm | 24000 | 3 | 861 | *542* | 5037 | 5614 | 296 | 54.8 |
673
+ | 科大讯飞 | super-human-tts | x5_lingxiaoxuan_flow | pcm | 8000 | 3 | 543 | 17 | 2704 | 2897 | 119 | 104.1 |
674
+ | 科大讯飞 | super-human-tts | x5_lingxiaoxuan_flow | pcm | 16000 | 3 | 511 | 17 | 2629 | 2721 | 51 | 107.5 |
675
+ | 科大讯飞 | super-human-tts | x5_lingxiaoxuan_flow | pcm | 24000 | 3 | 592 | 16 | 2690 | 2709 | **28 🏆** | 106.3 |
648
676
 
649
677
  ## ASR 性能指标
650
678
 
@@ -675,13 +703,14 @@ const asr = createASR({
675
703
 
676
704
  | 服务商 | 模型 | 语言 | 输入格式 | 采样率 (Hz) | 测试次数 | 首包延迟 (ms) | 平均间隔 (ms) | P50 (ms) | P95 (ms) | 标准差 (ms) | RTF |
677
705
  |--------|------|------|----------|-------------|----------|---------------|---------------|----------|----------|-------------|-----|
678
- | 通义千问 | paraformer-realtime-v2 | zh-CN | pcm | 16000 | 3 | *978* | *82* | 685 | *2085* | *666* | *1.32* |
679
- | 通义千问 | paraformer-realtime-v1 | zh-CN | pcm | 16000 | 3 | **439 🏆** | **29 🏆** | **498 🏆** | **509 🏆** | **10 🏆** | **0.57 🏆** |
680
- | 豆包 | bigmodel | zh-CN | pcm | 16000 | 3 | 513 | 69 | *904* | 960 | 107 | 0.99 |
706
+ | 通义千问 | paraformer-realtime-v2 | zh-CN | pcm | 16000 | 3 | 978 | 82 | 685 | 2085 | *666* | *1.32* |
707
+ | 通义千问 | paraformer-realtime-v1 | zh-CN | pcm | 16000 | 3 | **439 🏆** | **29 🏆** | **498 🏆** | **509 🏆** | **10 🏆** | 0.57 |
708
+ | 豆包 | bigmodel | zh-CN | pcm | 16000 | 3 | 513 | 69 | 904 | 960 | 107 | 0.99 |
709
+ | 科大讯飞 | iat | zh-CN | pcm | 16000 | 3 | *1551* | *927* | *2835* | *2948* | 587 | **0.12 🏆** |
681
710
 
682
711
  ---
683
712
 
684
- *数据更新于: 2026-03-29*
713
+ *数据更新于: 2026-04-08*
685
714
 
686
715
  <!-- PERFORMANCE_TABLE_END -->
687
716
 
@@ -756,26 +785,58 @@ src/
756
785
  ├── tts/ # TTS 模块
757
786
  │ ├── base.ts # BaseTTS 抽象类
758
787
  │ ├── factory.ts # 工厂函数
788
+ │ ├── protocols/ # 协议实现
789
+ │ │ ├── volcengine.ts
790
+ │ │ ├── dashscope.ts
791
+ │ │ ├── dashscope-realtime.ts
792
+ │ │ ├── minimax.ts
793
+ │ │ └── xfyun.ts
759
794
  │ ├── utils/ # 工具函数
760
- │ │ ├── save.ts # 保存音频
761
- │ │ ├── collect.ts # 收集音频
762
- │ │ ├── play.ts # 播放音频
763
- │ │ └── tee.ts # 同时保存和播放
795
+ │ │ ├── save.ts
796
+ │ │ ├── save-audio.ts
797
+ │ │ ├── collect.ts
798
+ │ │ ├── play.ts
799
+ │ │ └── tee.ts
764
800
  │ └── providers/ # 提供商实现
765
801
  │ ├── doubao.ts
766
802
  │ ├── openai.ts
767
803
  │ ├── minimax.ts
768
804
  │ ├── qwen.ts
769
- └── gemini.ts
805
+ ├── qwen-realtime.ts
806
+ │ ├── gemini.ts
807
+ │ ├── glm.ts
808
+ │ └── xfyun.ts
770
809
  ├── asr/ # ASR 模块
771
810
  │ ├── base.ts # BaseASR 抽象类
772
811
  │ ├── factory.ts # 工厂函数
812
+ │ ├── protocols/ # 协议实现
813
+ │ │ ├── dashscope.ts
814
+ │ │ ├── sauc.ts
815
+ │ │ └── xfyun.ts
773
816
  │ ├── utils/ # 工具函数
817
+ │ │ ├── audio.ts
818
+ │ │ ├── collect.ts
819
+ │ │ ├── save.ts
820
+ │ │ ├── ogg-muxer.ts
821
+ │ │ └── opus-decode.ts
774
822
  │ └── providers/ # 提供商实现
823
+ │ ├── doubao.ts
824
+ │ ├── openai.ts
825
+ │ ├── minimax.ts
826
+ │ ├── qwen.ts
827
+ │ ├── gemini.ts
828
+ │ ├── glm.ts
829
+ │ └── xfyun.ts
775
830
  └── types/ # 类型定义
776
- ├── tts.ts # TTS 相关类型
777
- ├── asr.ts # ASR 相关类型
778
- └── llm-stream.ts # LLM 流式输出类型
831
+ ├── index.ts
832
+ ├── tts.ts
833
+ ├── asr.ts
834
+ ├── llm-stream.ts
835
+ └── voices/
836
+ ├── doubao.ts
837
+ ├── minimax.ts
838
+ ├── qwen.ts
839
+ └── glm.ts
779
840
  ```
780
841
 
781
842
  ---
@@ -801,3 +862,4 @@ src/
801
862
  - [阿里云通义千问](https://tongyi.aliyun.com/)
802
863
  - [Google Gemini](https://ai.google.dev/)
803
864
  - [智谱 AI](https://open.bigmodel.cn/)
865
+ - [科大讯飞](https://www.xfyun.cn/)
@@ -39,6 +39,24 @@ interface GlmASROptions extends BaseASROptions {
39
39
  hotwords?: string[];
40
40
  context?: string;
41
41
  }
42
+ interface XfyunASROptions extends BaseASROptions {
43
+ appId?: string;
44
+ apiSecret?: string;
45
+ sampleRate?: number;
46
+ domain?: string;
47
+ accent?: string;
48
+ eos?: number;
49
+ dwa?: string;
50
+ ltc?: number;
51
+ dhw?: string;
52
+ ptt?: number;
53
+ rlang?: string;
54
+ vinfo?: number;
55
+ nunum?: number;
56
+ nbest?: number;
57
+ wbest?: number;
58
+ sendInterval?: number;
59
+ }
42
60
  type ASROptions = ({
43
61
  provider: 'doubao';
44
62
  } & DoubaoASROptions) | ({
@@ -52,6 +70,8 @@ type ASROptions = ({
52
70
  } & BaseASROptions) | ({
53
71
  provider: 'gemini';
54
72
  } & BaseASROptions) | ({
73
+ provider: 'xfyun';
74
+ } & XfyunASROptions) | ({
55
75
  provider: string;
56
76
  } & BaseASROptions);
57
77
  interface ListenInstanceOptions {
@@ -85,7 +105,7 @@ interface ASRProvider {
85
105
  name: string;
86
106
  listenStream(audio: AudioStream): AsyncIterable<ASRStreamChunk>;
87
107
  }
88
- type ASRProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | string;
108
+ type ASRProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | 'xfyun' | string;
89
109
  type AudioStream = AsyncIterable<Buffer | Uint8Array>;
90
110
  type AudioStreamInput = AudioStream | Buffer | Uint8Array | string;
91
111
  type ASRConnectionState = 'connected' | 'closed' | 'error';
@@ -131,4 +151,4 @@ declare abstract class BaseASR {
131
151
  }): Promise<ASRResponse>;
132
152
  }
133
153
 
134
- export { type ASRConnectOptions as A, BaseASR as B, type DoubaoASROptions as D, type GlmASROptions as G, type ListenInstanceOptions as L, type QwenASROptions as Q, type ASRConnection as a, type ASRConnectionState as b, type ASROptions as c, type ASRProvider as d, type ASRProviderType as e, type ASRRequest as f, type ASRResponse as g, type ASRSegment as h, type ASRStreamChunk as i, type AudioCodecFormat as j, type AudioContainerFormat as k, type AudioFormat as l, type AudioStream as m, type AudioStreamInput as n, type BaseASROptions as o };
154
+ export { type ASRConnectOptions as A, BaseASR as B, type DoubaoASROptions as D, type GlmASROptions as G, type ListenInstanceOptions as L, type QwenASROptions as Q, type XfyunASROptions as X, type ASRConnection as a, type ASRConnectionState as b, type ASROptions as c, type ASRProvider as d, type ASRProviderType as e, type ASRRequest as f, type ASRResponse as g, type ASRSegment as h, type ASRStreamChunk as i, type AudioCodecFormat as j, type AudioContainerFormat as k, type AudioFormat as l, type AudioStream as m, type AudioStreamInput as n, type BaseASROptions as o };
@@ -96,6 +96,19 @@ interface QwenRealtimeTTSOptions extends BaseTTSOptions {
96
96
  interface GlmTTSOptions extends BaseTTSOptions {
97
97
  voice?: AcceptAnyString<GlmVoice>;
98
98
  }
99
+ interface XfyunTTSOptions extends BaseTTSOptions {
100
+ appId?: string;
101
+ apiSecret?: string;
102
+ sampleRate?: number;
103
+ oralLevel?: 'high' | 'mid' | 'low';
104
+ sparkAssist?: number;
105
+ stopSplit?: number;
106
+ remain?: number;
107
+ reg?: number;
108
+ rdn?: number;
109
+ rhy?: number;
110
+ bgs?: number;
111
+ }
99
112
  type TTSOptions = ({
100
113
  provider: 'doubao';
101
114
  } & DoubaoTTSOptions) | ({
@@ -111,6 +124,8 @@ type TTSOptions = ({
111
124
  } & BaseTTSOptions) | ({
112
125
  provider: 'glm';
113
126
  } & GlmTTSOptions) | ({
127
+ provider: 'xfyun';
128
+ } & XfyunTTSOptions) | ({
114
129
  provider: string;
115
130
  } & BaseTTSOptions);
116
131
  interface TTSRequest {
@@ -141,7 +156,7 @@ interface TTSVoice {
141
156
  language: string;
142
157
  gender?: 'male' | 'female' | 'neutral';
143
158
  }
144
- type TTSProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | string;
159
+ type TTSProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | 'xfyun' | string;
145
160
  type TTSConnectionState = 'connected' | 'closed' | 'error';
146
161
  interface TTSConnectOptions {
147
162
  timeout?: number;
@@ -188,4 +203,4 @@ declare abstract class BaseTTS implements TTSProvider {
188
203
  };
189
204
  }
190
205
 
191
- export { BaseTTS as B, type CosyVoiceV1Voice as C, type DoubaoTTSOptions as D, type GlmTTSOptions as G, type MinimaxTTSOptions as M, type OpenAIChatCompletionChunk as O, type QwenRealtimeOptions as Q, type SpeakInstanceOptions as S, type TTSConnectOptions as T, type BaseTTSOptions as a, type QwenRealtimeTTSOptions as b, type QwenTTSOptions as c, type TTSConnection as d, type TTSConnectionState as e, type TTSOptions as f, type TTSProvider as g, type TTSProviderType as h, type TTSRequest as i, type TTSResponse as j, type TTSStreamChunk as k, type TTSVoice as l, type TextStream as m, type CosyVoiceV2Voice as n, type CosyVoiceV3FlashVoice as o, type CosyVoiceV3PlusVoice as p, type CosyVoiceVoice as q, type DoubaoJupiterVoice as r, type DoubaoV1Voice as s, type DoubaoV2Voice as t, type DoubaoVoice as u, type MinimaxVoice as v, type OpenAIStream as w, type QwenRealtimeVoice as x, type QwenTTSModel as y };
206
+ export { BaseTTS as B, type CosyVoiceV1Voice as C, type DoubaoTTSOptions as D, type GlmTTSOptions as G, type MinimaxTTSOptions as M, type OpenAIChatCompletionChunk as O, type QwenRealtimeOptions as Q, type SpeakInstanceOptions as S, type TTSConnectOptions as T, type XfyunTTSOptions as X, type BaseTTSOptions as a, type QwenRealtimeTTSOptions as b, type QwenTTSOptions as c, type TTSConnection as d, type TTSConnectionState as e, type TTSOptions as f, type TTSProvider as g, type TTSProviderType as h, type TTSRequest as i, type TTSResponse as j, type TTSStreamChunk as k, type TTSVoice as l, type TextStream as m, type CosyVoiceV2Voice as n, type CosyVoiceV3FlashVoice as o, type CosyVoiceV3PlusVoice as p, type CosyVoiceVoice as q, type DoubaoJupiterVoice as r, type DoubaoV1Voice as s, type DoubaoV2Voice as t, type DoubaoVoice as u, type MinimaxVoice as v, type OpenAIStream as w, type QwenRealtimeVoice as x, type QwenTTSModel as y };
@@ -1,4 +1,4 @@
1
- import { __name } from './chunk-7QVYU63E.js';
1
+ import { __name, buildAuthUrl } from './chunk-R66GH6Y5.js';
2
2
  import { Buffer } from 'buffer';
3
3
  import { randomUUID } from 'crypto';
4
4
  import WebSocket from 'ws';
@@ -3142,6 +3142,376 @@ var QwenRealtimeTTSConnection = class {
3142
3142
  return this.provider.synthesizeOnConnection(this.ws, textChunks.join(""));
3143
3143
  }
3144
3144
  };
3145
+ function buildTTSAuthUrl(apiKey, apiSecret) {
3146
+ return buildAuthUrl("cbm01.cn-huabei-1.xf-yun.com", "/v1/private/mcd9m97e6", apiKey, apiSecret);
3147
+ }
3148
+ __name(buildTTSAuthUrl, "buildTTSAuthUrl");
3149
+ function mapAudioEncoding(format) {
3150
+ const encodingMap = {
3151
+ mp3: "lame",
3152
+ pcm: "raw",
3153
+ opus: "opus"
3154
+ };
3155
+ return encodingMap[format] || "lame";
3156
+ }
3157
+ __name(mapAudioEncoding, "mapAudioEncoding");
3158
+ function createRequestPayload(options, text, status, seq) {
3159
+ const payload = {
3160
+ header: {
3161
+ app_id: options.appId,
3162
+ status
3163
+ },
3164
+ parameter: {
3165
+ tts: {
3166
+ vcn: options.vcn,
3167
+ speed: options.speed,
3168
+ volume: options.volume,
3169
+ pitch: options.pitch,
3170
+ bgs: options.bgs ?? 0,
3171
+ reg: options.reg ?? 0,
3172
+ rdn: options.rdn ?? 0,
3173
+ rhy: options.rhy ?? 0,
3174
+ audio: {
3175
+ encoding: options.encoding,
3176
+ sample_rate: options.sampleRate,
3177
+ channels: 1,
3178
+ bit_depth: 16,
3179
+ frame_size: 0
3180
+ }
3181
+ }
3182
+ },
3183
+ payload: {
3184
+ text: {
3185
+ encoding: "utf8",
3186
+ compress: "raw",
3187
+ format: "plain",
3188
+ status,
3189
+ seq,
3190
+ text: Buffer.from(text).toString("base64")
3191
+ }
3192
+ }
3193
+ };
3194
+ if (options.oralLevel || options.sparkAssist != null || options.stopSplit != null || options.remain != null) {
3195
+ payload.parameter.oral = {
3196
+ ...options.oralLevel ? { oral_level: options.oralLevel } : {},
3197
+ ...options.sparkAssist != null ? { spark_assist: options.sparkAssist } : {},
3198
+ ...options.stopSplit != null ? { stop_split: options.stopSplit } : {},
3199
+ ...options.remain != null ? { remain: options.remain } : {}
3200
+ };
3201
+ }
3202
+ return JSON.stringify(payload);
3203
+ }
3204
+ __name(createRequestPayload, "createRequestPayload");
3205
+ function parseTTSResponse(data) {
3206
+ let text;
3207
+ if (Buffer.isBuffer(data)) {
3208
+ text = data.toString("utf8");
3209
+ } else if (data instanceof ArrayBuffer) {
3210
+ text = new TextDecoder().decode(data);
3211
+ } else if (Array.isArray(data)) {
3212
+ text = Buffer.concat(data).toString("utf8");
3213
+ } else {
3214
+ text = String(data);
3215
+ }
3216
+ return JSON.parse(text);
3217
+ }
3218
+ __name(parseTTSResponse, "parseTTSResponse");
3219
+ function extractAudioFromResponse(response) {
3220
+ return response.payload?.audio?.audio ?? null;
3221
+ }
3222
+ __name(extractAudioFromResponse, "extractAudioFromResponse");
3223
+ function isTTSSuccessResponse(response) {
3224
+ return response.header.code === 0;
3225
+ }
3226
+ __name(isTTSSuccessResponse, "isTTSSuccessResponse");
3227
+ function isTTSFinishedResponse(response) {
3228
+ return response.header.status === 2;
3229
+ }
3230
+ __name(isTTSFinishedResponse, "isTTSFinishedResponse");
3231
+
3232
+ // src/tts/providers/xfyun.ts
3233
+ var XfyunTTS = class extends BaseTTS {
3234
+ static {
3235
+ __name(this, "XfyunTTS");
3236
+ }
3237
+ name = "xfyun";
3238
+ /** 讯飞 AppID */
3239
+ appId;
3240
+ /** 讯飞 APISecret(用于 HMAC-SHA256 签名) */
3241
+ apiSecret;
3242
+ /** 音频采样率 */
3243
+ sampleRate;
3244
+ /** 口语化等级(仅 x4 系列发音人支持) */
3245
+ oralLevel;
3246
+ /** 是否通过大模型进行口语化(仅 x4 系列发音人支持) */
3247
+ sparkAssist;
3248
+ /** 是否关闭服务端拆句(仅 x4 系列发音人支持) */
3249
+ stopSplit;
3250
+ /** 是否保留原书面语(仅 x4 系列发音人支持) */
3251
+ remain;
3252
+ /** 英文发音方式 */
3253
+ reg;
3254
+ /** 数字发音方式 */
3255
+ rdn;
3256
+ /** 是否返回拼音标注 */
3257
+ rhy;
3258
+ /** 背景音 */
3259
+ bgs;
3260
+ constructor(options) {
3261
+ super(options);
3262
+ this.appId = options.appId || "";
3263
+ this.apiSecret = options.apiSecret || "";
3264
+ this.sampleRate = options.sampleRate ?? 24e3;
3265
+ this.oralLevel = options.oralLevel;
3266
+ this.sparkAssist = options.sparkAssist;
3267
+ this.stopSplit = options.stopSplit;
3268
+ this.remain = options.remain;
3269
+ this.reg = options.reg;
3270
+ this.rdn = options.rdn;
3271
+ this.rhy = options.rhy;
3272
+ this.bgs = options.bgs;
3273
+ this.voice = options.voice || "x5_lingxiaoxuan_flow";
3274
+ this.format = options.format || "mp3";
3275
+ }
3276
+ /**
3277
+ * 将 BaseTTS 的 speed/volume/pitch (0-2 范围) 映射为讯飞的 0-100 范围
3278
+ * BaseTTS 默认 1.0 → xfyun 50
3279
+ */
3280
+ mapParam(value) {
3281
+ return Math.round(value * 50);
3282
+ }
3283
+ /**
3284
+ * 构建协议配置选项
3285
+ */
3286
+ buildProtocolOptions() {
3287
+ return {
3288
+ appId: this.appId,
3289
+ vcn: this.voice,
3290
+ speed: this.mapParam(this.speed),
3291
+ volume: this.mapParam(this.volume),
3292
+ pitch: this.mapParam(this.pitch),
3293
+ encoding: mapAudioEncoding(this.format),
3294
+ sampleRate: this.sampleRate,
3295
+ oralLevel: this.oralLevel,
3296
+ sparkAssist: this.sparkAssist,
3297
+ stopSplit: this.stopSplit,
3298
+ remain: this.remain,
3299
+ reg: this.reg,
3300
+ rdn: this.rdn,
3301
+ rhy: this.rhy,
3302
+ bgs: this.bgs
3303
+ };
3304
+ }
3305
+ /**
3306
+ * 合并多个 Uint8Array
3307
+ */
3308
+ concatArrays(arrays) {
3309
+ const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
3310
+ const result = new Uint8Array(totalLength);
3311
+ let offset = 0;
3312
+ for (const arr of arrays) {
3313
+ result.set(arr, offset);
3314
+ offset += arr.length;
3315
+ }
3316
+ return result;
3317
+ }
3318
+ /**
3319
+ * 合成语音(非流式)
3320
+ * 建立 WebSocket → 发送请求(status=2 一次性发送)→ 收集所有音频块 → 合并返回
3321
+ */
3322
+ async synthesize(request) {
3323
+ if (!this.appId) {
3324
+ throw new Error("appId is required for Xfyun TTS");
3325
+ }
3326
+ if (!this.apiKey) {
3327
+ throw new Error("apiKey is required for Xfyun TTS");
3328
+ }
3329
+ if (!this.apiSecret) {
3330
+ throw new Error("apiSecret is required for Xfyun TTS");
3331
+ }
3332
+ const protocolOptions = this.buildProtocolOptions();
3333
+ const url = buildTTSAuthUrl(this.apiKey, this.apiSecret);
3334
+ const ws = new WebSocket(url);
3335
+ await new Promise((resolve, reject) => {
3336
+ ws.on("open", resolve);
3337
+ ws.on("error", reject);
3338
+ });
3339
+ try {
3340
+ const payload = createRequestPayload(protocolOptions, request.text, 2, 0);
3341
+ ws.send(payload);
3342
+ const audioChunks = [];
3343
+ await new Promise((resolve, reject) => {
3344
+ ws.on("message", (data) => {
3345
+ try {
3346
+ const response = parseTTSResponse(data);
3347
+ if (!isTTSSuccessResponse(response)) {
3348
+ reject(
3349
+ new Error(`Xfyun TTS error: ${response.header.code} - ${response.header.message}`)
3350
+ );
3351
+ return;
3352
+ }
3353
+ const audioBase64 = extractAudioFromResponse(response);
3354
+ if (audioBase64) {
3355
+ audioChunks.push(Buffer.from(audioBase64, "base64"));
3356
+ }
3357
+ if (isTTSFinishedResponse(response)) {
3358
+ resolve();
3359
+ }
3360
+ } catch (err) {
3361
+ reject(err instanceof Error ? err : new Error(String(err)));
3362
+ }
3363
+ });
3364
+ ws.on("error", reject);
3365
+ ws.on("close", () => resolve());
3366
+ });
3367
+ const audio = this.concatArrays(audioChunks);
3368
+ if (audio.length === 0) {
3369
+ throw new Error("No audio received from Xfyun TTS service");
3370
+ }
3371
+ return {
3372
+ audio: Buffer.from(audio),
3373
+ format: this.format
3374
+ };
3375
+ } finally {
3376
+ ws.close();
3377
+ }
3378
+ }
3379
+ /**
3380
+ * 流式语音合成(内部实现方法)
3381
+ * 支持双向流式:流式文本输入,流式音频输出
3382
+ *
3383
+ * @param input 文本输入,可以是字符串或文本流
3384
+ * @returns 流式音频块
3385
+ * @internal
3386
+ */
3387
+ async *speakStream(input) {
3388
+ if (!this.appId) {
3389
+ throw new Error("appId is required for Xfyun TTS");
3390
+ }
3391
+ if (!this.apiKey) {
3392
+ throw new Error("apiKey is required for Xfyun TTS");
3393
+ }
3394
+ if (!this.apiSecret) {
3395
+ throw new Error("apiSecret is required for Xfyun TTS");
3396
+ }
3397
+ const textStream = normalizeTextStream(input);
3398
+ const protocolOptions = this.buildProtocolOptions();
3399
+ const url = buildTTSAuthUrl(this.apiKey, this.apiSecret);
3400
+ const queue = [];
3401
+ const syncState = { resolveWait: null, finished: false };
3402
+ const enqueue = /* @__PURE__ */ __name((item) => {
3403
+ queue.push(item);
3404
+ syncState.resolveWait?.();
3405
+ syncState.resolveWait = null;
3406
+ }, "enqueue");
3407
+ const ws = new WebSocket(url);
3408
+ await new Promise((resolve, reject) => {
3409
+ ws.on("open", resolve);
3410
+ ws.on("error", reject);
3411
+ });
3412
+ const processPromise = (async () => {
3413
+ try {
3414
+ await Promise.all([
3415
+ sendTextStream(ws, protocolOptions, textStream),
3416
+ receiveAudioToQueue(ws, enqueue)
3417
+ ]);
3418
+ } catch (error) {
3419
+ enqueue({
3420
+ type: "error",
3421
+ error: error instanceof Error ? error : new Error(String(error))
3422
+ });
3423
+ } finally {
3424
+ syncState.finished = true;
3425
+ syncState.resolveWait?.();
3426
+ syncState.resolveWait = null;
3427
+ ws.close();
3428
+ }
3429
+ })();
3430
+ try {
3431
+ while (!syncState.finished || queue.length > 0) {
3432
+ while (queue.length === 0 && !syncState.finished) {
3433
+ await new Promise((resolve) => {
3434
+ syncState.resolveWait = resolve;
3435
+ });
3436
+ }
3437
+ if (queue.length === 0) break;
3438
+ const item = queue.shift();
3439
+ if (!item) break;
3440
+ switch (item.type) {
3441
+ case "audio":
3442
+ yield { audioChunk: item.chunk };
3443
+ break;
3444
+ case "error":
3445
+ throw item.error;
3446
+ case "end":
3447
+ return;
3448
+ }
3449
+ }
3450
+ } finally {
3451
+ await processPromise.catch(() => {
3452
+ });
3453
+ }
3454
+ }
3455
+ };
3456
+ async function sendTextStream(ws, protocolOptions, textStream) {
3457
+ let seq = 0;
3458
+ let isFirst = true;
3459
+ for await (const chunk of textStream) {
3460
+ if (!chunk) continue;
3461
+ if (isFirst) {
3462
+ const payload = createRequestPayload(protocolOptions, chunk, 0, seq);
3463
+ ws.send(payload);
3464
+ isFirst = false;
3465
+ } else {
3466
+ const payload = createRequestPayload(protocolOptions, chunk, 1, seq);
3467
+ ws.send(payload);
3468
+ }
3469
+ seq++;
3470
+ }
3471
+ const endPayload = createRequestPayload(protocolOptions, "", 2, seq);
3472
+ ws.send(endPayload);
3473
+ }
3474
+ __name(sendTextStream, "sendTextStream");
3475
+ async function receiveAudioToQueue(ws, enqueue) {
3476
+ return new Promise((resolve, reject) => {
3477
+ ws.on("message", (data) => {
3478
+ try {
3479
+ const response = parseTTSResponse(data);
3480
+ if (!isTTSSuccessResponse(response)) {
3481
+ enqueue({
3482
+ type: "error",
3483
+ error: new Error(
3484
+ `Xfyun TTS error: ${response.header.code} - ${response.header.message}`
3485
+ )
3486
+ });
3487
+ resolve();
3488
+ return;
3489
+ }
3490
+ const audioBase64 = extractAudioFromResponse(response);
3491
+ if (audioBase64) {
3492
+ enqueue({ type: "audio", chunk: Buffer.from(audioBase64, "base64") });
3493
+ }
3494
+ if (isTTSFinishedResponse(response)) {
3495
+ enqueue({ type: "end" });
3496
+ resolve();
3497
+ }
3498
+ } catch (err) {
3499
+ enqueue({
3500
+ type: "error",
3501
+ error: err instanceof Error ? err : new Error(String(err))
3502
+ });
3503
+ resolve();
3504
+ }
3505
+ });
3506
+ ws.on("error", (err) => {
3507
+ reject(err);
3508
+ });
3509
+ ws.on("close", () => {
3510
+ resolve();
3511
+ });
3512
+ });
3513
+ }
3514
+ __name(receiveAudioToQueue, "receiveAudioToQueue");
3145
3515
 
3146
3516
  // src/tts/providers/index.ts
3147
3517
  registerTTSProvider("doubao", DoubaoTTS);
@@ -3151,7 +3521,8 @@ registerTTSProvider("qwen", QwenTTS);
3151
3521
  registerTTSProvider("qwen-realtime", QwenRealtimeTTS);
3152
3522
  registerTTSProvider("openai", TTS1);
3153
3523
  registerTTSProvider("gemini", GeminiTTS);
3524
+ registerTTSProvider("xfyun", XfyunTTS);
3154
3525
 
3155
- export { BaseTTS, DoubaoTTS, GeminiTTS, GlmTTS, MinimaxTTS, QwenRealtimeTTS, QwenTTS, TTS1, createTTS, getTTSProviders, registerTTSProvider };
3156
- //# sourceMappingURL=chunk-RRXTYGBU.js.map
3157
- //# sourceMappingURL=chunk-RRXTYGBU.js.map
3526
+ export { BaseTTS, DoubaoTTS, GeminiTTS, GlmTTS, MinimaxTTS, QwenRealtimeTTS, QwenTTS, TTS1, XfyunTTS, createTTS, getTTSProviders, registerTTSProvider };
3527
+ //# sourceMappingURL=chunk-2MN6RZSS.js.map
3528
+ //# sourceMappingURL=chunk-2MN6RZSS.js.map