univoice 0.2.0 → 0.3.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,7 @@
30
30
  - 🚀 **边发边收** - LLM 流式输出可直接转换为语音,显著降低首字延迟
31
31
  - 🔌 **插件化架构** - 轻松扩展支持新的语音服务提供商
32
32
  - 📦 **TypeScript 优先** - 完整的类型定义支持
33
+ - 🌳 **Tree-Shaking 支持** - 按需加载,减少打包体积
33
34
 
34
35
  ### 适用场景
35
36
 
@@ -201,6 +202,63 @@ for await (const chunk of asr.listen(audioBuffer)) {
201
202
 
202
203
  ---
203
204
 
205
+ ## 按需加载(Tree-Shaking)
206
+
207
+ univoice 支持 tree-shaking,你可以按需加载所需的 provider,减少打包体积。
208
+
209
+ ### 方式一:自动注册全部 Provider
210
+
211
+ 适合需要使用多个 provider 的场景:
212
+
213
+ ```typescript
214
+ import 'univoice/tts/providers'; // 注册所有 TTS provider
215
+ import { createTTS } from 'univoice/tts';
216
+
217
+ const tts = createTTS({ provider: 'doubao', ... });
218
+ ```
219
+
220
+ ### 方式二:手动注册单个 Provider(推荐)
221
+
222
+ 只打包需要的 provider,最小化打包体积:
223
+
224
+ ```typescript
225
+ import { createTTS, registerTTSProvider } from 'univoice/tts';
226
+ import { DoubaoTTS } from 'univoice/tts/providers/doubao';
227
+
228
+ // 只注册需要的 provider
229
+ registerTTSProvider('doubao', DoubaoTTS);
230
+
231
+ const tts = createTTS({ provider: 'doubao', ... });
232
+ ```
233
+
234
+ ### 方式三:直接使用 Provider 类
235
+
236
+ 最精简的方式,不使用工厂函数:
237
+
238
+ ```typescript
239
+ import { DoubaoTTS } from 'univoice/tts/providers/doubao';
240
+
241
+ const tts = new DoubaoTTS({
242
+ appId: 'your-app-id',
243
+ accessToken: 'your-access-token',
244
+ // ...
245
+ });
246
+
247
+ const response = await tts.synthesize({ text: '你好' });
248
+ ```
249
+
250
+ ### 可用导入路径
251
+
252
+ | 路径 | 说明 |
253
+ |------|------|
254
+ | `univoice` | 主入口,导出所有 API(不自动注册 provider) |
255
+ | `univoice/tts` | TTS 模块入口 |
256
+ | `univoice/tts/providers` | 自动注册所有 TTS provider |
257
+ | `univoice/asr` | ASR 模块入口 |
258
+ | `univoice/asr/providers` | 自动注册所有 ASR provider |
259
+
260
+ ---
261
+
204
262
  ## API 文档
205
263
 
206
264
  ### TTS API
@@ -405,14 +463,14 @@ pnpm format
405
463
  1. 在 `src/tts/providers/` 或 `src/asr/providers/` 创建新文件
406
464
  2. 继承 `BaseTTS` 或 `BaseASR` 类
407
465
  3. 实现必要的方法
408
- 4. 在文件末尾调用 `registerTTSProvider()` 或 `registerASRProvider()`
466
+ 4. 导出 Provider
409
467
 
410
468
  ```typescript
411
469
  // src/tts/providers/my-provider.ts
412
- import { BaseTTS, registerTTSProvider } from '@/tts/index';
470
+ import { BaseTTS } from '@/tts/index';
413
471
  import type { TTSOptions, TTSRequest, TTSResponse } from '@/types/tts';
414
472
 
415
- class MyTTS extends BaseTTS {
473
+ export class MyTTS extends BaseTTS {
416
474
  constructor(options: TTSOptions) {
417
475
  super(options);
418
476
  }
@@ -425,8 +483,15 @@ class MyTTS extends BaseTTS {
425
483
  };
426
484
  }
427
485
  }
486
+ ```
487
+
488
+ 然后在 `src/tts/providers/index.ts` 中添加自动注册:
489
+
490
+ ```typescript
491
+ import { MyTTS } from './my-provider';
492
+ import { registerTTSProvider } from '../index';
428
493
 
429
- registerTTSProvider('my-provider', (options) => new MyTTS(options));
494
+ registerTTSProvider('my-provider', MyTTS);
430
495
  ```
431
496
 
432
497
  ### 项目结构
@@ -0,0 +1,85 @@
1
+ interface OpenAIChatCompletionChunk {
2
+ choices?: Array<{
3
+ delta?: {
4
+ content?: string | null;
5
+ reasoning_content?: string | null;
6
+ };
7
+ }>;
8
+ }
9
+ type OpenAIStream = AsyncIterable<OpenAIChatCompletionChunk>;
10
+
11
+ interface TTSOptions {
12
+ provider: string;
13
+ apiKey?: string;
14
+ baseUrl?: string;
15
+ model?: string;
16
+ voice?: string;
17
+ speed?: number;
18
+ volume?: number;
19
+ pitch?: number;
20
+ format?: 'mp3' | 'wav' | 'ogg' | 'flac' | 'pcm';
21
+ language?: string;
22
+ appId?: string;
23
+ accessToken?: string;
24
+ resourceId?: string;
25
+ sampleRate?: number;
26
+ enableTimestamp?: boolean;
27
+ }
28
+ interface TTSRequest {
29
+ text: string;
30
+ options?: Partial<TTSOptions>;
31
+ }
32
+ interface TTSResponse {
33
+ audio: Buffer | Uint8Array;
34
+ format: string;
35
+ duration?: number;
36
+ }
37
+ interface SpeakInstanceOptions {
38
+ stream?: boolean;
39
+ }
40
+ interface TTSProvider {
41
+ name: string;
42
+ synthesize(request: TTSRequest): Promise<TTSResponse>;
43
+ speak?(input: string | TextStream, options?: SpeakInstanceOptions): AsyncIterable<TTSStreamChunk> | Promise<TTSResponse>;
44
+ listVoices?(): Promise<TTSVoice[]>;
45
+ }
46
+ interface TTSStreamChunk {
47
+ audioChunk: Uint8Array;
48
+ }
49
+ type TextStream = AsyncIterable<string> | AsyncGenerator<string> | OpenAIStream;
50
+ interface TTSVoice {
51
+ id: string;
52
+ name: string;
53
+ language: string;
54
+ gender?: 'male' | 'female' | 'neutral';
55
+ }
56
+ type TTSProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | string;
57
+
58
+ declare abstract class BaseTTS implements TTSProvider {
59
+ abstract name: string;
60
+ apiKey: string;
61
+ baseUrl: string;
62
+ model: string;
63
+ voice: string;
64
+ speed: number;
65
+ volume: number;
66
+ pitch: number;
67
+ format: 'mp3' | 'wav' | 'ogg' | 'flac' | 'pcm';
68
+ language: string;
69
+ constructor(options: TTSOptions);
70
+ abstract synthesize(request: TTSRequest): Promise<TTSResponse>;
71
+ speak(input: string | TextStream): Promise<TTSResponse>;
72
+ speak(input: string | TextStream, options: SpeakInstanceOptions & {
73
+ stream: true;
74
+ }): AsyncIterable<TTSStreamChunk>;
75
+ speak(input: string | TextStream, options: SpeakInstanceOptions & {
76
+ stream: false;
77
+ }): Promise<TTSResponse>;
78
+ private createSpeakStreamIterable;
79
+ private synthesizeFromInput;
80
+ protected speakStream(_input: string | TextStream): AsyncIterable<TTSStreamChunk>;
81
+ listVoices(): Promise<TTSVoice[]>;
82
+ buildRequestOptions(request: TTSRequest): TTSOptions;
83
+ }
84
+
85
+ export { BaseTTS as B, type OpenAIChatCompletionChunk as O, type SpeakInstanceOptions as S, type TTSOptions as T, type TTSProvider as a, type TTSProviderType as b, type TTSRequest as c, type TTSResponse as d, type TTSStreamChunk as e, type TTSVoice as f, type TextStream as g, type OpenAIStream as h };
@@ -0,0 +1,84 @@
1
+ interface AudioFormat {
2
+ sampleRate?: number;
3
+ bits?: number;
4
+ channel?: number;
5
+ }
6
+ interface ASROptions {
7
+ provider: string;
8
+ apiKey?: string;
9
+ baseUrl?: string;
10
+ model?: string;
11
+ language?: string;
12
+ prompt?: string;
13
+ responseFormat?: 'json' | 'text' | 'srt' | 'vtt' | 'verbose_json';
14
+ appKey?: string;
15
+ accessKey?: string;
16
+ resourceId?: string;
17
+ mode?: 'streaming' | 'nostream' | 'async';
18
+ audioFormat?: AudioFormat;
19
+ segmentDuration?: number;
20
+ enableItn?: boolean;
21
+ enablePunc?: boolean;
22
+ enableDdc?: boolean;
23
+ showUtterances?: boolean;
24
+ }
25
+ interface ListenInstanceOptions {
26
+ stream?: boolean;
27
+ }
28
+ interface ASRRequest {
29
+ audio: Buffer | Uint8Array | string;
30
+ options?: Partial<ASROptions>;
31
+ }
32
+ interface ASRResponse {
33
+ text: string;
34
+ language?: string;
35
+ duration?: number;
36
+ segments?: ASRSegment[];
37
+ }
38
+ interface ASRSegment {
39
+ id: number;
40
+ start: number;
41
+ end: number;
42
+ text: string;
43
+ speaker?: string;
44
+ confidence?: number;
45
+ }
46
+ interface ASRStreamChunk {
47
+ text: string;
48
+ isFinal: boolean;
49
+ confidence?: number;
50
+ segment?: ASRSegment;
51
+ }
52
+ interface ASRProvider {
53
+ name: string;
54
+ listenStream(audio: AudioStream): AsyncIterable<ASRStreamChunk>;
55
+ }
56
+ type ASRProviderType = 'doubao' | 'minimax' | 'qwen' | 'openai' | 'gemini' | string;
57
+ type AudioStream = AsyncIterable<Buffer | Uint8Array>;
58
+ type AudioStreamInput = AudioStream | Buffer | Uint8Array | string;
59
+
60
+ declare abstract class BaseASR {
61
+ abstract name: string;
62
+ apiKey: string;
63
+ baseUrl: string;
64
+ model: string;
65
+ language: string;
66
+ prompt: string;
67
+ responseFormat: 'json' | 'text' | 'srt' | 'vtt' | 'verbose_json';
68
+ constructor(options: ASROptions);
69
+ abstract listenStream(audio: AudioStream): AsyncIterable<ASRStreamChunk>;
70
+ private isAudioStream;
71
+ private isString;
72
+ private adaptAudioInput;
73
+ private fileToPcmAudioStream;
74
+ private createStreamIterable;
75
+ private collectASRResponse;
76
+ listen(audio: AudioStreamInput, options: ListenInstanceOptions & {
77
+ stream: true;
78
+ }): AsyncIterable<ASRStreamChunk>;
79
+ listen(audio: AudioStreamInput, options?: ListenInstanceOptions & {
80
+ stream?: false;
81
+ }): Promise<ASRResponse>;
82
+ }
83
+
84
+ export { type ASROptions as A, BaseASR as B, type ListenInstanceOptions as L, type ASRProvider as a, type ASRProviderType as b, type ASRRequest as c, type ASRResponse as d, type ASRSegment as e, type ASRStreamChunk as f, type AudioFormat as g, type AudioStream as h, type AudioStreamInput as i };
@@ -1,7 +1,6 @@
1
1
  var __defProp = Object.defineProperty;
2
2
  var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
3
3
 
4
- export {
5
- __name
6
- };
4
+ export { __name };
5
+ //# sourceMappingURL=chunk-7QVYU63E.js.map
7
6
  //# sourceMappingURL=chunk-7QVYU63E.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"chunk-7QVYU63E.js"}