@amaster.ai/tts-client 1.0.0-beta.6 → 1.0.0-beta.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -94
- package/dist/index.cjs +61 -28
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +35 -3
- package/dist/index.d.ts +35 -3
- package/dist/index.js +60 -26
- package/dist/index.js.map +1 -1
- package/package.json +8 -3
package/README.md
CHANGED
|
@@ -1,143 +1,269 @@
|
|
|
1
|
-
#
|
|
1
|
+
# TTS Realtime WebSocket Client SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
基于 WebSocket 的实时文本转语音(TTS)客户端 SDK,用于对接 `qwen-tts` 实时语音合成服务,支持流式音频返回与自动播放。
|
|
4
|
+
|
|
5
|
+
---
|
|
4
6
|
|
|
5
7
|
## 特性
|
|
6
8
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
9
|
+
- 🔊 WebSocket 实时 TTS
|
|
10
|
+
- 🎵 支持多种音频格式(`pcm` / `mp3` / `wav` / `opus`)
|
|
11
|
+
- ▶️ 支持自动播放或手动播放
|
|
12
|
+
- 📦 支持音频流分片回调
|
|
13
|
+
- 🎧 浏览器原生 `AudioContext` 播放(PCM)
|
|
14
|
+
- 🎭 多种音色可选
|
|
15
|
+
|
|
16
|
+
---
|
|
12
17
|
|
|
13
18
|
## 安装
|
|
14
19
|
|
|
15
20
|
```bash
|
|
21
|
+
# npm
|
|
16
22
|
npm install @amaster.ai/tts-client
|
|
17
|
-
```
|
|
18
23
|
|
|
19
|
-
|
|
24
|
+
# pnpm
|
|
25
|
+
pnpm add @amaster.ai/tts-client
|
|
20
26
|
|
|
21
|
-
|
|
22
|
-
|
|
27
|
+
# yarn
|
|
28
|
+
yarn add @amaster.ai/tts-client
|
|
29
|
+
```
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
const tts = createTTSClient({
|
|
26
|
-
gatewayUrl: 'ws://www.appok.ai/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime',
|
|
27
|
-
voice: 'Cherry',
|
|
28
|
-
autoPlay: true, // 自动播放音频
|
|
29
|
-
});
|
|
31
|
+
---
|
|
30
32
|
|
|
31
|
-
|
|
32
|
-
tts.on('session-created', (session) => {
|
|
33
|
-
console.log('会话创建:', session.id, session.model);
|
|
34
|
-
});
|
|
33
|
+
## 快速开始
|
|
35
34
|
|
|
36
|
-
|
|
37
|
-
console.log('接收音频片段:', count);
|
|
38
|
-
});
|
|
35
|
+
### 基础用法
|
|
39
36
|
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
```ts
|
|
38
|
+
import { createTTSClient } from "@amaster.ai/tts-client";
|
|
39
|
+
|
|
40
|
+
const tts = createTTSClient({
|
|
41
|
+
voice: "Cherry",
|
|
42
|
+
autoPlay: true,
|
|
43
|
+
audioFormat: "pcm",
|
|
44
|
+
sampleRate: 24000,
|
|
45
|
+
onReady() {
|
|
46
|
+
console.log("TTS 已就绪");
|
|
47
|
+
},
|
|
48
|
+
onAudioStart() {
|
|
49
|
+
console.log("开始播放");
|
|
50
|
+
},
|
|
51
|
+
onAudioEnd() {
|
|
52
|
+
console.log("播放结束");
|
|
53
|
+
},
|
|
54
|
+
onAudioChunk(chunks) {
|
|
55
|
+
console.log("收到音频片段:", chunks.length);
|
|
56
|
+
},
|
|
57
|
+
onError(err) {
|
|
58
|
+
console.error("TTS 错误:", err);
|
|
59
|
+
},
|
|
42
60
|
});
|
|
43
61
|
|
|
44
|
-
//
|
|
62
|
+
// 建立连接
|
|
45
63
|
await tts.connect();
|
|
46
|
-
await tts.speak('你好,欢迎使用通义千问实时语音合成服务。');
|
|
47
|
-
```
|
|
48
64
|
|
|
49
|
-
|
|
65
|
+
// 合成并播放语音
|
|
66
|
+
await tts.speak("你好,欢迎使用实时语音合成服务。");
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
**参数**:
|
|
56
|
-
- `gatewayUrl`: Gateway WebSocket URL(会自动追加 model 参数)
|
|
57
|
-
- `voice`: 音色名称,默认 `'Cherry'`
|
|
58
|
-
- `autoPlay`: 是否自动播放,默认 `true`
|
|
59
|
-
- `audioFormat`: 音频格式,默认 `'pcm'`
|
|
60
|
-
- `sampleRate`: 采样率,默认 `24000`
|
|
61
|
-
|
|
62
|
-
**返回**:`TTSClient` 实例
|
|
63
|
-
|
|
64
|
-
### TTSClient
|
|
68
|
+
// 关闭连接
|
|
69
|
+
// tts.close();
|
|
70
|
+
```
|
|
65
71
|
|
|
66
|
-
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
### React 完整示例
|
|
75
|
+
|
|
76
|
+
```tsx
|
|
77
|
+
import { useRef, useState } from "react";
|
|
78
|
+
import { createTTSClient, type TTSClient } from "@amaster.ai/tts-client";
|
|
79
|
+
|
|
80
|
+
const VoiceTypes = {
|
|
81
|
+
Cherry: "Cherry - 甜美女声",
|
|
82
|
+
Serena: "苏瑶 - 温柔小姐姐",
|
|
83
|
+
Ethan: "晨煦 - 标准普通话",
|
|
84
|
+
Chelsie: "千雪 - 二次元虚拟女友",
|
|
85
|
+
Peter: "天津话",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
function TTSPlayer() {
|
|
89
|
+
const [voice, setVoice] = useState("Cherry");
|
|
90
|
+
const [connected, setConnected] = useState(false);
|
|
91
|
+
const [status, setStatus] = useState("disconnected");
|
|
92
|
+
const [text, setText] = useState("你好,欢迎使用通义千问实时语音合成服务。");
|
|
93
|
+
const clientRef = useRef<TTSClient | null>(null);
|
|
94
|
+
|
|
95
|
+
const connectTTS = () => {
|
|
96
|
+
if (clientRef.current) return;
|
|
97
|
+
|
|
98
|
+
const ttsClient = createTTSClient({
|
|
99
|
+
voice,
|
|
100
|
+
autoPlay: true,
|
|
101
|
+
audioFormat: "pcm",
|
|
102
|
+
sampleRate: 24000,
|
|
103
|
+
onReady: () => {
|
|
104
|
+
setConnected(true);
|
|
105
|
+
setStatus("connected");
|
|
106
|
+
},
|
|
107
|
+
onAudioStart: () => setStatus("playing"),
|
|
108
|
+
onAudioEnd: () => setStatus("connected"),
|
|
109
|
+
onAudioChunk: (chunks) => {
|
|
110
|
+
console.log("音频片段数:", chunks.length);
|
|
111
|
+
},
|
|
112
|
+
onError: (err) => {
|
|
113
|
+
console.error("TTS Error:", err);
|
|
114
|
+
setStatus("error");
|
|
115
|
+
setConnected(false);
|
|
116
|
+
},
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
ttsClient.connect();
|
|
120
|
+
clientRef.current = ttsClient;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const sendTTS = () => {
|
|
124
|
+
if (!text || !clientRef.current) return;
|
|
125
|
+
clientRef.current.speak(text);
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const disconnectTTS = () => {
|
|
129
|
+
clientRef.current?.close();
|
|
130
|
+
clientRef.current = null;
|
|
131
|
+
setConnected(false);
|
|
132
|
+
setStatus("disconnected");
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
return (
|
|
136
|
+
<div>
|
|
137
|
+
<h3>🔊 实时语音合成(TTS)</h3>
|
|
138
|
+
|
|
139
|
+
<div>状态: {status}</div>
|
|
140
|
+
|
|
141
|
+
<div>
|
|
142
|
+
<label>音色:</label>
|
|
143
|
+
<select value={voice} onChange={(e) => setVoice(e.target.value)}>
|
|
144
|
+
{Object.entries(VoiceTypes).map(([key, label]) => (
|
|
145
|
+
<option key={key} value={key}>
|
|
146
|
+
{label}
|
|
147
|
+
</option>
|
|
148
|
+
))}
|
|
149
|
+
</select>
|
|
150
|
+
</div>
|
|
151
|
+
|
|
152
|
+
<div>
|
|
153
|
+
<label>合成文本:</label>
|
|
154
|
+
<textarea rows={4} value={text} onChange={(e) => setText(e.target.value)} />
|
|
155
|
+
</div>
|
|
156
|
+
|
|
157
|
+
<div>
|
|
158
|
+
<button onClick={connectTTS} disabled={connected}>
|
|
159
|
+
1. 连接
|
|
160
|
+
</button>
|
|
161
|
+
<button onClick={sendTTS} disabled={!connected}>
|
|
162
|
+
2. 合成语音
|
|
163
|
+
</button>
|
|
164
|
+
<button onClick={disconnectTTS} disabled={!connected}>
|
|
165
|
+
断开
|
|
166
|
+
</button>
|
|
167
|
+
</div>
|
|
168
|
+
</div>
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
```
|
|
67
172
|
|
|
68
|
-
|
|
173
|
+
---
|
|
69
174
|
|
|
70
|
-
|
|
175
|
+
## API 说明
|
|
71
176
|
|
|
72
|
-
|
|
177
|
+
### `createTTSClient(config)`
|
|
73
178
|
|
|
74
|
-
|
|
179
|
+
创建一个 TTS 客户端实例。
|
|
75
180
|
|
|
76
|
-
|
|
77
|
-
- `text`: 要合成的文本
|
|
181
|
+
#### `TTSClientConfig`
|
|
78
182
|
|
|
79
|
-
|
|
183
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
184
|
+
| ---------------- | ----------------------------------- | ---------- | ---------------------------------------------------------------------- |
|
|
185
|
+
| `voice` | `string` | `"Cherry"` | 发音人名称,可选值:`Cherry`, `Serena`, `Ethan`, `Chelsie`, `Peter` 等 |
|
|
186
|
+
| `autoPlay` | `boolean` | `true` | 是否在音频接收完成后自动播放 |
|
|
187
|
+
| `audioFormat` | `"pcm" \| "mp3" \| "wav" \| "opus"` | `"pcm"` | 音频格式,**注意:内置播放仅支持 `pcm`** |
|
|
188
|
+
| `sampleRate` | `number` | `24000` | 采样率 |
|
|
189
|
+
| `getAccessToken` | `() => string \| null` | - | 获取访问令牌(用于 WebSocket 认证) |
|
|
190
|
+
| `onReady` | `() => void` | - | 会话初始化完成回调 |
|
|
191
|
+
| `onAudioStart` | `() => void` | - | 音频开始播放回调 |
|
|
192
|
+
| `onAudioEnd` | `() => void` | - | 音频播放结束回调 |
|
|
193
|
+
| `onAudioChunk` | `(chunks: string[]) => void` | - | 接收到音频分片回调 |
|
|
194
|
+
| `onError` | `(error: Error) => void` | - | 错误回调 |
|
|
80
195
|
|
|
81
|
-
####
|
|
196
|
+
#### `TTSClient`
|
|
82
197
|
|
|
83
|
-
|
|
198
|
+
```ts
|
|
199
|
+
interface TTSClient {
|
|
200
|
+
connect(): Promise<void>; // 建立 WebSocket 连接
|
|
201
|
+
speak(text: string): Promise<void>; // 发送文本进行语音合成
|
|
202
|
+
play(): void; // 手动播放(autoPlay=false 时使用)
|
|
203
|
+
close(): void; // 关闭连接并释放资源
|
|
204
|
+
}
|
|
205
|
+
```
|
|
84
206
|
|
|
85
|
-
|
|
86
|
-
- `connected`: WebSocket 连接建立
|
|
87
|
-
- `session-created`: 会话创建成功
|
|
88
|
-
- `session-updated`: 会话配置完成
|
|
89
|
-
- `audio-chunk`: 收到音频片段
|
|
90
|
-
- `audio-done`: 音频生成完成
|
|
91
|
-
- `completed`: 响应完成
|
|
92
|
-
- `error`: 发生错误
|
|
93
|
-
- `closed`: 连接关闭
|
|
207
|
+
---
|
|
94
208
|
|
|
95
|
-
|
|
209
|
+
## 配合统一客户端使用
|
|
96
210
|
|
|
97
|
-
|
|
211
|
+
推荐与 `@amaster.ai/client` 统一客户端一起使用,自动处理认证:
|
|
98
212
|
|
|
99
|
-
|
|
213
|
+
```tsx
|
|
214
|
+
import { createClient } from "@amaster.ai/client";
|
|
100
215
|
|
|
101
|
-
|
|
216
|
+
const client = createClient({
|
|
217
|
+
baseURL: "https://api.amaster.ai",
|
|
218
|
+
});
|
|
102
219
|
|
|
103
|
-
|
|
220
|
+
const ttsClient = client.tts({
|
|
221
|
+
voice: "Cherry",
|
|
222
|
+
autoPlay: true,
|
|
223
|
+
onReady() {
|
|
224
|
+
console.log("TTS 已就绪");
|
|
225
|
+
},
|
|
226
|
+
onAudioStart() {
|
|
227
|
+
console.log("开始播放");
|
|
228
|
+
},
|
|
229
|
+
onAudioEnd() {
|
|
230
|
+
console.log("播放结束");
|
|
231
|
+
},
|
|
232
|
+
});
|
|
104
233
|
|
|
105
|
-
|
|
234
|
+
await ttsClient.connect();
|
|
235
|
+
await ttsClient.speak("你好,欢迎使用 Amaster AI!");
|
|
236
|
+
```
|
|
106
237
|
|
|
107
|
-
|
|
238
|
+
---
|
|
108
239
|
|
|
109
|
-
|
|
110
|
-
- 解码:自动转换为 Float32 AudioBuffer
|
|
111
|
-
- 播放:使用 Web Audio API
|
|
112
|
-
- 采样率:24kHz(高清音质)
|
|
240
|
+
## 音频说明
|
|
113
241
|
|
|
114
|
-
|
|
242
|
+
- 当前内置播放逻辑 **仅支持 `pcm` 格式**
|
|
243
|
+
- `pcm` 数据为 **16-bit little-endian 单声道**
|
|
244
|
+
- 播放基于 `AudioContext`,仅支持浏览器环境
|
|
245
|
+
- 若使用 `mp3 / wav / opus`,需自行实现解码与播放逻辑
|
|
115
246
|
|
|
116
|
-
|
|
117
|
-
- `session.update` - 配置会话
|
|
118
|
-
- `input_text_buffer.append` - 发送文本
|
|
119
|
-
- `input_text_buffer.commit` - 提交文本
|
|
120
|
-
- `session.finish` - 结束会话
|
|
247
|
+
---
|
|
121
248
|
|
|
122
|
-
|
|
249
|
+
## 运行环境
|
|
123
250
|
|
|
124
|
-
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
251
|
+
- 浏览器(Chrome / Edge / Safari)
|
|
252
|
+
- 需支持:
|
|
253
|
+
- WebSocket
|
|
254
|
+
- AudioContext
|
|
255
|
+
- atob
|
|
128
256
|
|
|
129
|
-
|
|
257
|
+
---
|
|
130
258
|
|
|
131
|
-
|
|
132
|
-
# 构建
|
|
133
|
-
npm run build
|
|
259
|
+
## 注意事项
|
|
134
260
|
|
|
135
|
-
|
|
136
|
-
|
|
261
|
+
- WebSocket 必须在 `connect()` 成功后才能调用 `speak()`
|
|
262
|
+
- 多次 `speak()` 会覆盖之前的音频缓存
|
|
263
|
+
- 自动播放依赖浏览器的自动播放策略,部分场景可能需要用户交互触发
|
|
264
|
+
- 建议在组件卸载时调用 `close()` 释放资源
|
|
137
265
|
|
|
138
|
-
|
|
139
|
-
npm run type-check
|
|
140
|
-
```
|
|
266
|
+
---
|
|
141
267
|
|
|
142
268
|
## License
|
|
143
269
|
|
package/dist/index.cjs
CHANGED
|
@@ -20,8 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
-
createTTSClient: () =>
|
|
24
|
-
createTtsClient: () => createTTSClient
|
|
23
|
+
createTTSClient: () => tts_client_default
|
|
25
24
|
});
|
|
26
25
|
module.exports = __toCommonJS(index_exports);
|
|
27
26
|
|
|
@@ -29,6 +28,7 @@ module.exports = __toCommonJS(index_exports);
|
|
|
29
28
|
var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
|
|
30
29
|
function createTTSClient(config) {
|
|
31
30
|
const {
|
|
31
|
+
getAccessToken,
|
|
32
32
|
voice = "Cherry",
|
|
33
33
|
autoPlay = true,
|
|
34
34
|
audioFormat = "pcm",
|
|
@@ -36,28 +36,40 @@ function createTTSClient(config) {
|
|
|
36
36
|
onReady,
|
|
37
37
|
onAudioStart,
|
|
38
38
|
onAudioEnd,
|
|
39
|
+
onAudioChunk,
|
|
39
40
|
onError
|
|
40
41
|
} = config;
|
|
41
42
|
let ws = null;
|
|
42
43
|
let audioChunks = [];
|
|
43
44
|
let audioContext = null;
|
|
45
|
+
let audioSource = null;
|
|
44
46
|
async function connect() {
|
|
45
47
|
return new Promise((resolve, reject) => {
|
|
46
|
-
|
|
48
|
+
let wsUrl = TTS_PATH;
|
|
49
|
+
if (getAccessToken) {
|
|
50
|
+
const token = getAccessToken();
|
|
51
|
+
if (token) {
|
|
52
|
+
const separator = wsUrl.includes("?") ? "&" : "?";
|
|
53
|
+
wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
ws = new WebSocket(wsUrl);
|
|
47
57
|
ws.onopen = () => {
|
|
48
58
|
};
|
|
49
59
|
ws.onmessage = (event) => {
|
|
50
60
|
const data = JSON.parse(event.data);
|
|
51
61
|
if (data.type === "session.created") {
|
|
52
|
-
ws.send(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
62
|
+
ws.send(
|
|
63
|
+
JSON.stringify({
|
|
64
|
+
type: "session.update",
|
|
65
|
+
session: {
|
|
66
|
+
mode: "server_commit",
|
|
67
|
+
voice,
|
|
68
|
+
response_format: audioFormat,
|
|
69
|
+
sample_rate: sampleRate
|
|
70
|
+
}
|
|
71
|
+
})
|
|
72
|
+
);
|
|
61
73
|
}
|
|
62
74
|
if (data.type === "session.updated") {
|
|
63
75
|
onReady?.();
|
|
@@ -65,10 +77,12 @@ function createTTSClient(config) {
|
|
|
65
77
|
}
|
|
66
78
|
if (data.type === "response.audio.delta") {
|
|
67
79
|
audioChunks.push(data.delta);
|
|
80
|
+
onAudioChunk?.(audioChunks);
|
|
68
81
|
}
|
|
69
82
|
if (data.type === "response.audio.done") {
|
|
83
|
+
onAudioChunk?.(audioChunks);
|
|
70
84
|
if (autoPlay && typeof window !== "undefined") {
|
|
71
|
-
playAudio(
|
|
85
|
+
playAudio();
|
|
72
86
|
}
|
|
73
87
|
}
|
|
74
88
|
if (data.type === "response.done") {
|
|
@@ -95,17 +109,22 @@ function createTTSClient(config) {
|
|
|
95
109
|
throw new Error("WebSocket not connected");
|
|
96
110
|
}
|
|
97
111
|
audioChunks = [];
|
|
98
|
-
ws.send(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
112
|
+
ws.send(
|
|
113
|
+
JSON.stringify({
|
|
114
|
+
type: "input_text_buffer.append",
|
|
115
|
+
text
|
|
116
|
+
})
|
|
117
|
+
);
|
|
102
118
|
setTimeout(() => {
|
|
103
|
-
ws.send(
|
|
104
|
-
|
|
105
|
-
|
|
119
|
+
ws.send(
|
|
120
|
+
JSON.stringify({
|
|
121
|
+
type: "input_text_buffer.commit"
|
|
122
|
+
})
|
|
123
|
+
);
|
|
106
124
|
}, 100);
|
|
107
125
|
}
|
|
108
|
-
function playAudio(
|
|
126
|
+
function playAudio() {
|
|
127
|
+
let chunks = audioChunks;
|
|
109
128
|
if (typeof window === "undefined") return;
|
|
110
129
|
try {
|
|
111
130
|
if (!audioContext) {
|
|
@@ -142,29 +161,43 @@ function createTTSClient(config) {
|
|
|
142
161
|
source.connect(audioContext.destination);
|
|
143
162
|
source.onended = () => onAudioEnd?.();
|
|
144
163
|
source.start(0);
|
|
164
|
+
audioSource = source;
|
|
145
165
|
} catch (err) {
|
|
146
166
|
onError?.(err);
|
|
147
167
|
}
|
|
148
168
|
}
|
|
149
|
-
function
|
|
150
|
-
if (
|
|
151
|
-
|
|
152
|
-
|
|
169
|
+
function stopAudio() {
|
|
170
|
+
if (audioSource) {
|
|
171
|
+
audioSource.stop();
|
|
172
|
+
audioSource = null;
|
|
153
173
|
}
|
|
154
174
|
if (audioContext) {
|
|
155
175
|
audioContext.close();
|
|
156
176
|
audioContext = null;
|
|
157
177
|
}
|
|
158
178
|
}
|
|
179
|
+
function close() {
|
|
180
|
+
if (ws) {
|
|
181
|
+
ws.close();
|
|
182
|
+
ws = null;
|
|
183
|
+
}
|
|
184
|
+
stopAudio();
|
|
185
|
+
}
|
|
159
186
|
return {
|
|
160
187
|
connect,
|
|
161
188
|
speak,
|
|
162
|
-
close
|
|
189
|
+
close,
|
|
190
|
+
play: playAudio,
|
|
191
|
+
stop: stopAudio
|
|
163
192
|
};
|
|
164
193
|
}
|
|
194
|
+
var tts_client_default = (authConfig) => {
|
|
195
|
+
return (config) => {
|
|
196
|
+
return createTTSClient({ ...authConfig, ...config });
|
|
197
|
+
};
|
|
198
|
+
};
|
|
165
199
|
// Annotate the CommonJS export names for ESM import in node:
|
|
166
200
|
0 && (module.exports = {
|
|
167
|
-
createTTSClient
|
|
168
|
-
createTtsClient
|
|
201
|
+
createTTSClient
|
|
169
202
|
});
|
|
170
203
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/tts-client.ts"],"sourcesContent":["export type { TTSClient, TTSClientConfig } from './tts-client';\nexport { createTTSClient } from './tts-client';\nexport { createTTSClient as createTtsClient } from './tts-client';\n","/**\n * TTS Realtime WebSocket Client\n */\n\nconst TTS_PATH = '/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime';\n\nexport interface TTSClientConfig {\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: 'pcm' | 'mp3' | 'wav' | 'opus';\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Close connection */\n close(): void;\n}\n\nexport function createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n voice = 'Cherry',\n autoPlay = true,\n audioFormat = 'pcm',\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n ws = new WebSocket(TTS_PATH);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws!.send(JSON.stringify({\n type: 'session.update',\n session: {\n mode: 'server_commit',\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n }));\n }\n\n if (data.type === 'session.updated') {\n onReady?.();\n resolve();\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n if (autoPlay && typeof window !== 'undefined') {\n playAudio(audioChunks);\n }\n }\n\n if (data.type === 'response.done') {\n ws!.send(JSON.stringify({ type: 'session.finish' }));\n }\n\n if (data.type === 'error') {\n const err = new Error(data.error?.message || 'Unknown error');\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error('WebSocket connection error');\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error('WebSocket not connected');\n }\n\n audioChunks = [];\n\n ws.send(JSON.stringify({\n type: 'input_text_buffer.append',\n text,\n }));\n\n setTimeout(() => {\n ws!.send(JSON.stringify({\n type: 'input_text_buffer.commit',\n }));\n }, 100);\n }\n\n function playAudio(chunks: string[]) {\n if (typeof window === 'undefined') return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n return {\n connect,\n speak,\n close,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,IAAM,WAAW;AA8BV,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AAExC,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI,KAAK,KAAK,UAAU;AAAA,YACtB,MAAM;AAAA,YACN,SAAS;AAAA,cACP,MAAM;AAAA,cACN;AAAA,cACA,iBAAiB;AAAA,cACjB,aAAa;AAAA,YACf;AAAA,UACF,CAAC,CAAC;AAAA,QACJ;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAAA,QAC7B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU,WAAW;AAAA,UACvB;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG,KAAK,KAAK,UAAU;AAAA,MACrB,MAAM;AAAA,MACN;AAAA,IACF,CAAC,CAAC;AAEF,eAAW,MAAM;AACf,SAAI,KAAK,KAAK,UAAU;AAAA,QACtB,MAAM;AAAA,MACR,CAAC,CAAC;AAAA,IACJ,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,UAAU,QAAkB;AACnC,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/tts-client.ts"],"sourcesContent":["import type { TTSClient, TTSClientConfig } from \"./tts-client\";\nimport createTTSClient from \"./tts-client\";\nexport { createTTSClient, type TTSClient, type TTSClientConfig };\n","/**\n * TTS Realtime WebSocket Client\n *\n * WebSocket-based real-time text-to-speech synthesis with multiple voice options.\n * Built-in playback only supports PCM format.\n *\n * @example\n * ```typescript\n * const client = createTTSClient({\n * voice: \"Cherry\",\n * autoPlay: true,\n * onReady() {\n * console.log(\"TTS ready\");\n * },\n * onAudioStart() {\n * console.log(\"Playing audio\");\n * },\n * onAudioEnd() {\n * console.log(\"Playback ended\");\n * },\n * });\n *\n * await client.connect();\n * await client.speak(\"Hello, this is a test.\");\n * // client.close();\n * ```\n */\n\nconst TTS_PATH = \"/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime\";\n\nexport interface TTSClientConfig {\n /** Get access token for WebSocket authentication */\n getAccessToken?: () => string | null;\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: \"pcm\" | \"mp3\" | \"wav\" | \"opus\";\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on each audio chunk received */\n onAudioChunk?: (chunk: string[]) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Play audio from chunks */\n play(): void;\n /** Stop audio playback */\n stop(): void;\n /** Close connection */\n close(): void;\n}\n\nfunction createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n getAccessToken,\n voice = \"Cherry\",\n autoPlay = true,\n audioFormat = \"pcm\",\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onAudioChunk,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n let audioSource: AudioBufferSourceNode | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n // Build WebSocket URL with optional token parameter\n let wsUrl = TTS_PATH;\n if (getAccessToken) {\n const token = getAccessToken();\n if (token) {\n const separator = wsUrl.includes(\"?\") ? \"&\" : \"?\";\n wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;\n }\n }\n\n ws = new WebSocket(wsUrl);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n ws!.send(\n JSON.stringify({\n type: \"session.update\",\n session: {\n mode: \"server_commit\",\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n })\n );\n }\n\n if (data.type === \"session.updated\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"response.audio.delta\") {\n audioChunks.push(data.delta);\n onAudioChunk?.(audioChunks);\n }\n\n if (data.type === \"response.audio.done\") {\n onAudioChunk?.(audioChunks);\n if (autoPlay && typeof window !== \"undefined\") {\n playAudio();\n }\n }\n\n if (data.type === \"response.done\") {\n ws!.send(JSON.stringify({ type: \"session.finish\" }));\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"Unknown error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error(\"WebSocket connection error\");\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n audioChunks = [];\n\n ws.send(\n JSON.stringify({\n type: \"input_text_buffer.append\",\n text,\n })\n );\n\n setTimeout(() => {\n ws!.send(\n JSON.stringify({\n type: \"input_text_buffer.commit\",\n })\n );\n }, 100);\n }\n\n function playAudio() {\n let chunks: string[] = audioChunks;\n if (typeof window === \"undefined\") return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n audioSource = source;\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function stopAudio() {\n if (audioSource) {\n audioSource.stop();\n audioSource = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n stopAudio();\n }\n\n return {\n connect,\n speak,\n close,\n play: playAudio,\n stop: stopAudio,\n };\n}\n\nexport default (\n authConfig: Pick<TTSClientConfig, \"getAccessToken\">\n): ((config: TTSClientConfig) => TTSClient) => {\n return (config: TTSClientConfig) => {\n return createTTSClient({ ...authConfig, ...config });\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC4BA,IAAM,WAAW;AAsCjB,SAAS,gBAAgB,QAAoC;AAC3D,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AACxC,MAAI,cAA4C;AAEhD,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AAEtC,UAAI,QAAQ;AACZ,UAAI,gBAAgB;AAClB,cAAM,QAAQ,eAAe;AAC7B,YAAI,OAAO;AACT,gBAAM,YAAY,MAAM,SAAS,GAAG,IAAI,MAAM;AAC9C,kBAAQ,GAAG,KAAK,GAAG,SAAS,SAAS,mBAAmB,KAAK,CAAC;AAAA,QAChE;AAAA,MACF;AAEA,WAAK,IAAI,UAAU,KAAK;AAExB,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI;AAAA,YACF,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,MAAM;AAAA,gBACN;AAAA,gBACA,iBAAiB;AAAA,gBACjB,aAAa;AAAA,cACf;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAC3B,yBAAe,WAAW;AAAA,QAC5B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,yBAAe,WAAW;AAC1B,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU;AAAA,UACZ;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN;AAAA,MACF,CAAC;AAAA,IACH;AAEA,eAAW,MAAM;AACf,SAAI;AAAA,QACF,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,YAAY;AACnB,QAAI,SAAmB;AACvB,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AACd,oBAAc;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,YAAY;AACnB,QAAI,aAAa;AACf,kBAAY,KAAK;AACjB,oBAAc;AAAA,IAChB;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,cAAU;AAAA,EACZ;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM;AAAA,IACN,MAAM;AAAA,EACR;AACF;AAEA,IAAO,qBAAQ,CACb,eAC6C;AAC7C,SAAO,CAAC,WAA4B;AAClC,WAAO,gBAAgB,EAAE,GAAG,YAAY,GAAG,OAAO,CAAC;AAAA,EACrD;AACF;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,13 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TTS Realtime WebSocket Client
|
|
3
|
+
*
|
|
4
|
+
* WebSocket-based real-time text-to-speech synthesis with multiple voice options.
|
|
5
|
+
* Built-in playback only supports PCM format.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const client = createTTSClient({
|
|
10
|
+
* voice: "Cherry",
|
|
11
|
+
* autoPlay: true,
|
|
12
|
+
* onReady() {
|
|
13
|
+
* console.log("TTS ready");
|
|
14
|
+
* },
|
|
15
|
+
* onAudioStart() {
|
|
16
|
+
* console.log("Playing audio");
|
|
17
|
+
* },
|
|
18
|
+
* onAudioEnd() {
|
|
19
|
+
* console.log("Playback ended");
|
|
20
|
+
* },
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* await client.connect();
|
|
24
|
+
* await client.speak("Hello, this is a test.");
|
|
25
|
+
* // client.close();
|
|
26
|
+
* ```
|
|
3
27
|
*/
|
|
4
28
|
interface TTSClientConfig {
|
|
29
|
+
/** Get access token for WebSocket authentication */
|
|
30
|
+
getAccessToken?: () => string | null;
|
|
5
31
|
/** Voice name, default 'Cherry' */
|
|
6
32
|
voice?: string;
|
|
7
33
|
/** Auto play audio, default true */
|
|
8
34
|
autoPlay?: boolean;
|
|
9
35
|
/** Audio format, default 'pcm' */
|
|
10
|
-
audioFormat?:
|
|
36
|
+
audioFormat?: "pcm" | "mp3" | "wav" | "opus";
|
|
11
37
|
/** Sample rate, default 24000 */
|
|
12
38
|
sampleRate?: number;
|
|
13
39
|
/** Called when connection is ready */
|
|
@@ -16,6 +42,8 @@ interface TTSClientConfig {
|
|
|
16
42
|
onAudioStart?: () => void;
|
|
17
43
|
/** Called when audio playback ends */
|
|
18
44
|
onAudioEnd?: () => void;
|
|
45
|
+
/** Called on each audio chunk received */
|
|
46
|
+
onAudioChunk?: (chunk: string[]) => void;
|
|
19
47
|
/** Called on error */
|
|
20
48
|
onError?: (error: Error) => void;
|
|
21
49
|
}
|
|
@@ -24,9 +52,13 @@ interface TTSClient {
|
|
|
24
52
|
connect(): Promise<void>;
|
|
25
53
|
/** Synthesize speech from text */
|
|
26
54
|
speak(text: string): Promise<void>;
|
|
55
|
+
/** Play audio from chunks */
|
|
56
|
+
play(): void;
|
|
57
|
+
/** Stop audio playback */
|
|
58
|
+
stop(): void;
|
|
27
59
|
/** Close connection */
|
|
28
60
|
close(): void;
|
|
29
61
|
}
|
|
30
|
-
declare
|
|
62
|
+
declare const _default: (authConfig: Pick<TTSClientConfig, "getAccessToken">) => ((config: TTSClientConfig) => TTSClient);
|
|
31
63
|
|
|
32
|
-
export { type TTSClient, type TTSClientConfig,
|
|
64
|
+
export { type TTSClient, type TTSClientConfig, _default as createTTSClient };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,13 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TTS Realtime WebSocket Client
|
|
3
|
+
*
|
|
4
|
+
* WebSocket-based real-time text-to-speech synthesis with multiple voice options.
|
|
5
|
+
* Built-in playback only supports PCM format.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const client = createTTSClient({
|
|
10
|
+
* voice: "Cherry",
|
|
11
|
+
* autoPlay: true,
|
|
12
|
+
* onReady() {
|
|
13
|
+
* console.log("TTS ready");
|
|
14
|
+
* },
|
|
15
|
+
* onAudioStart() {
|
|
16
|
+
* console.log("Playing audio");
|
|
17
|
+
* },
|
|
18
|
+
* onAudioEnd() {
|
|
19
|
+
* console.log("Playback ended");
|
|
20
|
+
* },
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* await client.connect();
|
|
24
|
+
* await client.speak("Hello, this is a test.");
|
|
25
|
+
* // client.close();
|
|
26
|
+
* ```
|
|
3
27
|
*/
|
|
4
28
|
interface TTSClientConfig {
|
|
29
|
+
/** Get access token for WebSocket authentication */
|
|
30
|
+
getAccessToken?: () => string | null;
|
|
5
31
|
/** Voice name, default 'Cherry' */
|
|
6
32
|
voice?: string;
|
|
7
33
|
/** Auto play audio, default true */
|
|
8
34
|
autoPlay?: boolean;
|
|
9
35
|
/** Audio format, default 'pcm' */
|
|
10
|
-
audioFormat?:
|
|
36
|
+
audioFormat?: "pcm" | "mp3" | "wav" | "opus";
|
|
11
37
|
/** Sample rate, default 24000 */
|
|
12
38
|
sampleRate?: number;
|
|
13
39
|
/** Called when connection is ready */
|
|
@@ -16,6 +42,8 @@ interface TTSClientConfig {
|
|
|
16
42
|
onAudioStart?: () => void;
|
|
17
43
|
/** Called when audio playback ends */
|
|
18
44
|
onAudioEnd?: () => void;
|
|
45
|
+
/** Called on each audio chunk received */
|
|
46
|
+
onAudioChunk?: (chunk: string[]) => void;
|
|
19
47
|
/** Called on error */
|
|
20
48
|
onError?: (error: Error) => void;
|
|
21
49
|
}
|
|
@@ -24,9 +52,13 @@ interface TTSClient {
|
|
|
24
52
|
connect(): Promise<void>;
|
|
25
53
|
/** Synthesize speech from text */
|
|
26
54
|
speak(text: string): Promise<void>;
|
|
55
|
+
/** Play audio from chunks */
|
|
56
|
+
play(): void;
|
|
57
|
+
/** Stop audio playback */
|
|
58
|
+
stop(): void;
|
|
27
59
|
/** Close connection */
|
|
28
60
|
close(): void;
|
|
29
61
|
}
|
|
30
|
-
declare
|
|
62
|
+
declare const _default: (authConfig: Pick<TTSClientConfig, "getAccessToken">) => ((config: TTSClientConfig) => TTSClient);
|
|
31
63
|
|
|
32
|
-
export { type TTSClient, type TTSClientConfig,
|
|
64
|
+
export { type TTSClient, type TTSClientConfig, _default as createTTSClient };
|
package/dist/index.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
|
|
3
3
|
function createTTSClient(config) {
|
|
4
4
|
const {
|
|
5
|
+
getAccessToken,
|
|
5
6
|
voice = "Cherry",
|
|
6
7
|
autoPlay = true,
|
|
7
8
|
audioFormat = "pcm",
|
|
@@ -9,28 +10,40 @@ function createTTSClient(config) {
|
|
|
9
10
|
onReady,
|
|
10
11
|
onAudioStart,
|
|
11
12
|
onAudioEnd,
|
|
13
|
+
onAudioChunk,
|
|
12
14
|
onError
|
|
13
15
|
} = config;
|
|
14
16
|
let ws = null;
|
|
15
17
|
let audioChunks = [];
|
|
16
18
|
let audioContext = null;
|
|
19
|
+
let audioSource = null;
|
|
17
20
|
async function connect() {
|
|
18
21
|
return new Promise((resolve, reject) => {
|
|
19
|
-
|
|
22
|
+
let wsUrl = TTS_PATH;
|
|
23
|
+
if (getAccessToken) {
|
|
24
|
+
const token = getAccessToken();
|
|
25
|
+
if (token) {
|
|
26
|
+
const separator = wsUrl.includes("?") ? "&" : "?";
|
|
27
|
+
wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
ws = new WebSocket(wsUrl);
|
|
20
31
|
ws.onopen = () => {
|
|
21
32
|
};
|
|
22
33
|
ws.onmessage = (event) => {
|
|
23
34
|
const data = JSON.parse(event.data);
|
|
24
35
|
if (data.type === "session.created") {
|
|
25
|
-
ws.send(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
36
|
+
ws.send(
|
|
37
|
+
JSON.stringify({
|
|
38
|
+
type: "session.update",
|
|
39
|
+
session: {
|
|
40
|
+
mode: "server_commit",
|
|
41
|
+
voice,
|
|
42
|
+
response_format: audioFormat,
|
|
43
|
+
sample_rate: sampleRate
|
|
44
|
+
}
|
|
45
|
+
})
|
|
46
|
+
);
|
|
34
47
|
}
|
|
35
48
|
if (data.type === "session.updated") {
|
|
36
49
|
onReady?.();
|
|
@@ -38,10 +51,12 @@ function createTTSClient(config) {
|
|
|
38
51
|
}
|
|
39
52
|
if (data.type === "response.audio.delta") {
|
|
40
53
|
audioChunks.push(data.delta);
|
|
54
|
+
onAudioChunk?.(audioChunks);
|
|
41
55
|
}
|
|
42
56
|
if (data.type === "response.audio.done") {
|
|
57
|
+
onAudioChunk?.(audioChunks);
|
|
43
58
|
if (autoPlay && typeof window !== "undefined") {
|
|
44
|
-
playAudio(
|
|
59
|
+
playAudio();
|
|
45
60
|
}
|
|
46
61
|
}
|
|
47
62
|
if (data.type === "response.done") {
|
|
@@ -68,17 +83,22 @@ function createTTSClient(config) {
|
|
|
68
83
|
throw new Error("WebSocket not connected");
|
|
69
84
|
}
|
|
70
85
|
audioChunks = [];
|
|
71
|
-
ws.send(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
86
|
+
ws.send(
|
|
87
|
+
JSON.stringify({
|
|
88
|
+
type: "input_text_buffer.append",
|
|
89
|
+
text
|
|
90
|
+
})
|
|
91
|
+
);
|
|
75
92
|
setTimeout(() => {
|
|
76
|
-
ws.send(
|
|
77
|
-
|
|
78
|
-
|
|
93
|
+
ws.send(
|
|
94
|
+
JSON.stringify({
|
|
95
|
+
type: "input_text_buffer.commit"
|
|
96
|
+
})
|
|
97
|
+
);
|
|
79
98
|
}, 100);
|
|
80
99
|
}
|
|
81
|
-
function playAudio(
|
|
100
|
+
function playAudio() {
|
|
101
|
+
let chunks = audioChunks;
|
|
82
102
|
if (typeof window === "undefined") return;
|
|
83
103
|
try {
|
|
84
104
|
if (!audioContext) {
|
|
@@ -115,28 +135,42 @@ function createTTSClient(config) {
|
|
|
115
135
|
source.connect(audioContext.destination);
|
|
116
136
|
source.onended = () => onAudioEnd?.();
|
|
117
137
|
source.start(0);
|
|
138
|
+
audioSource = source;
|
|
118
139
|
} catch (err) {
|
|
119
140
|
onError?.(err);
|
|
120
141
|
}
|
|
121
142
|
}
|
|
122
|
-
function
|
|
123
|
-
if (
|
|
124
|
-
|
|
125
|
-
|
|
143
|
+
function stopAudio() {
|
|
144
|
+
if (audioSource) {
|
|
145
|
+
audioSource.stop();
|
|
146
|
+
audioSource = null;
|
|
126
147
|
}
|
|
127
148
|
if (audioContext) {
|
|
128
149
|
audioContext.close();
|
|
129
150
|
audioContext = null;
|
|
130
151
|
}
|
|
131
152
|
}
|
|
153
|
+
function close() {
|
|
154
|
+
if (ws) {
|
|
155
|
+
ws.close();
|
|
156
|
+
ws = null;
|
|
157
|
+
}
|
|
158
|
+
stopAudio();
|
|
159
|
+
}
|
|
132
160
|
return {
|
|
133
161
|
connect,
|
|
134
162
|
speak,
|
|
135
|
-
close
|
|
163
|
+
close,
|
|
164
|
+
play: playAudio,
|
|
165
|
+
stop: stopAudio
|
|
136
166
|
};
|
|
137
167
|
}
|
|
168
|
+
var tts_client_default = (authConfig) => {
|
|
169
|
+
return (config) => {
|
|
170
|
+
return createTTSClient({ ...authConfig, ...config });
|
|
171
|
+
};
|
|
172
|
+
};
|
|
138
173
|
export {
|
|
139
|
-
createTTSClient
|
|
140
|
-
createTTSClient as createTtsClient
|
|
174
|
+
tts_client_default as createTTSClient
|
|
141
175
|
};
|
|
142
176
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts-client.ts"],"sourcesContent":["/**\n * TTS Realtime WebSocket Client\n */\n\nconst TTS_PATH = '/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime';\n\nexport interface TTSClientConfig {\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: 'pcm' | 'mp3' | 'wav' | 'opus';\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Close connection */\n close(): void;\n}\n\nexport function createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n voice = 'Cherry',\n autoPlay = true,\n audioFormat = 'pcm',\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n ws = new WebSocket(TTS_PATH);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws!.send(JSON.stringify({\n type: 'session.update',\n session: {\n mode: 'server_commit',\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n }));\n }\n\n if (data.type === 'session.updated') {\n onReady?.();\n resolve();\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n if (autoPlay && typeof window !== 'undefined') {\n playAudio(audioChunks);\n }\n }\n\n if (data.type === 'response.done') {\n ws!.send(JSON.stringify({ type: 'session.finish' }));\n }\n\n if (data.type === 'error') {\n const err = new Error(data.error?.message || 'Unknown error');\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error('WebSocket connection error');\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error('WebSocket not connected');\n }\n\n audioChunks = [];\n\n ws.send(JSON.stringify({\n type: 'input_text_buffer.append',\n text,\n }));\n\n setTimeout(() => {\n ws!.send(JSON.stringify({\n type: 'input_text_buffer.commit',\n }));\n }, 100);\n }\n\n function playAudio(chunks: string[]) {\n if (typeof window === 'undefined') return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n return {\n connect,\n speak,\n close,\n };\n}\n"],"mappings":";AAIA,IAAM,WAAW;AA8BV,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AAExC,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI,KAAK,KAAK,UAAU;AAAA,YACtB,MAAM;AAAA,YACN,SAAS;AAAA,cACP,MAAM;AAAA,cACN;AAAA,cACA,iBAAiB;AAAA,cACjB,aAAa;AAAA,YACf;AAAA,UACF,CAAC,CAAC;AAAA,QACJ;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAAA,QAC7B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU,WAAW;AAAA,UACvB;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG,KAAK,KAAK,UAAU;AAAA,MACrB,MAAM;AAAA,MACN;AAAA,IACF,CAAC,CAAC;AAEF,eAAW,MAAM;AACf,SAAI,KAAK,KAAK,UAAU;AAAA,QACtB,MAAM;AAAA,MACR,CAAC,CAAC;AAAA,IACJ,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,UAAU,QAAkB;AACnC,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/tts-client.ts"],"sourcesContent":["/**\n * TTS Realtime WebSocket Client\n *\n * WebSocket-based real-time text-to-speech synthesis with multiple voice options.\n * Built-in playback only supports PCM format.\n *\n * @example\n * ```typescript\n * const client = createTTSClient({\n * voice: \"Cherry\",\n * autoPlay: true,\n * onReady() {\n * console.log(\"TTS ready\");\n * },\n * onAudioStart() {\n * console.log(\"Playing audio\");\n * },\n * onAudioEnd() {\n * console.log(\"Playback ended\");\n * },\n * });\n *\n * await client.connect();\n * await client.speak(\"Hello, this is a test.\");\n * // client.close();\n * ```\n */\n\nconst TTS_PATH = \"/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime\";\n\nexport interface TTSClientConfig {\n /** Get access token for WebSocket authentication */\n getAccessToken?: () => string | null;\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: \"pcm\" | \"mp3\" | \"wav\" | \"opus\";\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on each audio chunk received */\n onAudioChunk?: (chunk: string[]) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Play audio from chunks */\n play(): void;\n /** Stop audio playback */\n stop(): void;\n /** Close connection */\n close(): void;\n}\n\nfunction createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n getAccessToken,\n voice = \"Cherry\",\n autoPlay = true,\n audioFormat = \"pcm\",\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onAudioChunk,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n let audioSource: AudioBufferSourceNode | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n // Build WebSocket URL with optional token parameter\n let wsUrl = TTS_PATH;\n if (getAccessToken) {\n const token = getAccessToken();\n if (token) {\n const separator = wsUrl.includes(\"?\") ? \"&\" : \"?\";\n wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;\n }\n }\n\n ws = new WebSocket(wsUrl);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n ws!.send(\n JSON.stringify({\n type: \"session.update\",\n session: {\n mode: \"server_commit\",\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n })\n );\n }\n\n if (data.type === \"session.updated\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"response.audio.delta\") {\n audioChunks.push(data.delta);\n onAudioChunk?.(audioChunks);\n }\n\n if (data.type === \"response.audio.done\") {\n onAudioChunk?.(audioChunks);\n if (autoPlay && typeof window !== \"undefined\") {\n playAudio();\n }\n }\n\n if (data.type === \"response.done\") {\n ws!.send(JSON.stringify({ type: \"session.finish\" }));\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"Unknown error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error(\"WebSocket connection error\");\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n audioChunks = [];\n\n ws.send(\n JSON.stringify({\n type: \"input_text_buffer.append\",\n text,\n })\n );\n\n setTimeout(() => {\n ws!.send(\n JSON.stringify({\n type: \"input_text_buffer.commit\",\n })\n );\n }, 100);\n }\n\n function playAudio() {\n let chunks: string[] = audioChunks;\n if (typeof window === \"undefined\") return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n audioSource = source;\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function stopAudio() {\n if (audioSource) {\n audioSource.stop();\n audioSource = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n stopAudio();\n }\n\n return {\n connect,\n speak,\n close,\n play: playAudio,\n stop: stopAudio,\n };\n}\n\nexport default (\n authConfig: Pick<TTSClientConfig, \"getAccessToken\">\n): ((config: TTSClientConfig) => TTSClient) => {\n return (config: TTSClientConfig) => {\n return createTTSClient({ ...authConfig, ...config });\n };\n};\n"],"mappings":";AA4BA,IAAM,WAAW;AAsCjB,SAAS,gBAAgB,QAAoC;AAC3D,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AACxC,MAAI,cAA4C;AAEhD,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AAEtC,UAAI,QAAQ;AACZ,UAAI,gBAAgB;AAClB,cAAM,QAAQ,eAAe;AAC7B,YAAI,OAAO;AACT,gBAAM,YAAY,MAAM,SAAS,GAAG,IAAI,MAAM;AAC9C,kBAAQ,GAAG,KAAK,GAAG,SAAS,SAAS,mBAAmB,KAAK,CAAC;AAAA,QAChE;AAAA,MACF;AAEA,WAAK,IAAI,UAAU,KAAK;AAExB,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI;AAAA,YACF,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,MAAM;AAAA,gBACN;AAAA,gBACA,iBAAiB;AAAA,gBACjB,aAAa;AAAA,cACf;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAC3B,yBAAe,WAAW;AAAA,QAC5B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,yBAAe,WAAW;AAC1B,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU;AAAA,UACZ;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN;AAAA,MACF,CAAC;AAAA,IACH;AAEA,eAAW,MAAM;AACf,SAAI;AAAA,QACF,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,YAAY;AACnB,QAAI,SAAmB;AACvB,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AACd,oBAAc;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,YAAY;AACnB,QAAI,aAAa;AACf,kBAAY,KAAK;AACjB,oBAAc;AAAA,IAChB;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,cAAU;AAAA,EACZ;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM;AAAA,IACN,MAAM;AAAA,EACR;AACF;AAEA,IAAO,qBAAQ,CACb,eAC6C;AAC7C,SAAO,CAAC,WAA4B;AAClC,WAAO,gBAAgB,EAAE,GAAG,YAAY,GAAG,OAAO,CAAC;AAAA,EACrD;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@amaster.ai/tts-client",
|
|
3
|
-
"version": "1.0.0-beta.
|
|
3
|
+
"version": "1.0.0-beta.72",
|
|
4
4
|
"description": "Qwen TTS Realtime WebSocket client with audio playback",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -33,13 +33,18 @@
|
|
|
33
33
|
"registry": "https://registry.npmjs.org/"
|
|
34
34
|
},
|
|
35
35
|
"devDependencies": {
|
|
36
|
+
"jsdom": "^23.2.0",
|
|
36
37
|
"tsup": "^8.3.5",
|
|
37
|
-
"typescript": "~5.7.2"
|
|
38
|
+
"typescript": "~5.7.2",
|
|
39
|
+
"vitest": "^1.6.1"
|
|
38
40
|
},
|
|
39
41
|
"scripts": {
|
|
40
42
|
"build": "tsup",
|
|
41
43
|
"dev": "tsup --watch",
|
|
42
44
|
"clean": "rm -rf dist *.tsbuildinfo",
|
|
43
|
-
"type-check": "tsc --noEmit"
|
|
45
|
+
"type-check": "tsc --noEmit",
|
|
46
|
+
"test": "vitest run",
|
|
47
|
+
"test:watch": "vitest",
|
|
48
|
+
"test:ui": "vitest --ui"
|
|
44
49
|
}
|
|
45
50
|
}
|