@amaster.ai/tts-client 1.0.0-beta.7 → 1.0.0-beta.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +220 -94
- package/dist/index.cjs +63 -122
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +35 -8
- package/dist/index.d.ts +35 -8
- package/dist/index.js +62 -119
- package/dist/index.js.map +1 -1
- package/package.json +50 -45
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Amaster Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,143 +1,269 @@
|
|
|
1
|
-
#
|
|
1
|
+
# TTS Realtime WebSocket Client SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
基于 WebSocket 的实时文本转语音(TTS)客户端 SDK,用于对接 `qwen-tts` 实时语音合成服务,支持流式音频返回与自动播放。
|
|
4
|
+
|
|
5
|
+
---
|
|
4
6
|
|
|
5
7
|
## 特性
|
|
6
8
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
9
|
+
- 🔊 WebSocket 实时 TTS
|
|
10
|
+
- 🎵 支持多种音频格式(`pcm` / `mp3` / `wav` / `opus`)
|
|
11
|
+
- ▶️ 支持自动播放或手动播放
|
|
12
|
+
- 📦 支持音频流分片回调
|
|
13
|
+
- 🎧 浏览器原生 `AudioContext` 播放(PCM)
|
|
14
|
+
- 🎭 多种音色可选
|
|
15
|
+
|
|
16
|
+
---
|
|
12
17
|
|
|
13
18
|
## 安装
|
|
14
19
|
|
|
15
20
|
```bash
|
|
21
|
+
# npm
|
|
16
22
|
npm install @amaster.ai/tts-client
|
|
17
|
-
```
|
|
18
23
|
|
|
19
|
-
|
|
24
|
+
# pnpm
|
|
25
|
+
pnpm add @amaster.ai/tts-client
|
|
20
26
|
|
|
21
|
-
|
|
22
|
-
|
|
27
|
+
# yarn
|
|
28
|
+
yarn add @amaster.ai/tts-client
|
|
29
|
+
```
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
const tts = createTTSClient({
|
|
26
|
-
gatewayUrl: 'ws://www.appok.ai/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime',
|
|
27
|
-
voice: 'Cherry',
|
|
28
|
-
autoPlay: true, // 自动播放音频
|
|
29
|
-
});
|
|
31
|
+
---
|
|
30
32
|
|
|
31
|
-
|
|
32
|
-
tts.on('session-created', (session) => {
|
|
33
|
-
console.log('会话创建:', session.id, session.model);
|
|
34
|
-
});
|
|
33
|
+
## 快速开始
|
|
35
34
|
|
|
36
|
-
|
|
37
|
-
console.log('接收音频片段:', count);
|
|
38
|
-
});
|
|
35
|
+
### 基础用法
|
|
39
36
|
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
```ts
|
|
38
|
+
import { createTTSClient } from "@amaster.ai/tts-client";
|
|
39
|
+
|
|
40
|
+
const tts = createTTSClient({
|
|
41
|
+
voice: "Cherry",
|
|
42
|
+
autoPlay: true,
|
|
43
|
+
audioFormat: "pcm",
|
|
44
|
+
sampleRate: 24000,
|
|
45
|
+
onReady() {
|
|
46
|
+
console.log("TTS 已就绪");
|
|
47
|
+
},
|
|
48
|
+
onAudioStart() {
|
|
49
|
+
console.log("开始播放");
|
|
50
|
+
},
|
|
51
|
+
onAudioEnd() {
|
|
52
|
+
console.log("播放结束");
|
|
53
|
+
},
|
|
54
|
+
onAudioChunk(chunks) {
|
|
55
|
+
console.log("收到音频片段:", chunks.length);
|
|
56
|
+
},
|
|
57
|
+
onError(err) {
|
|
58
|
+
console.error("TTS 错误:", err);
|
|
59
|
+
},
|
|
42
60
|
});
|
|
43
61
|
|
|
44
|
-
//
|
|
62
|
+
// 建立连接
|
|
45
63
|
await tts.connect();
|
|
46
|
-
await tts.speak('你好,欢迎使用通义千问实时语音合成服务。');
|
|
47
|
-
```
|
|
48
64
|
|
|
49
|
-
|
|
65
|
+
// 合成并播放语音
|
|
66
|
+
await tts.speak("你好,欢迎使用实时语音合成服务。");
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
**参数**:
|
|
56
|
-
- `gatewayUrl`: Gateway WebSocket URL(会自动追加 model 参数)
|
|
57
|
-
- `voice`: 音色名称,默认 `'Cherry'`
|
|
58
|
-
- `autoPlay`: 是否自动播放,默认 `true`
|
|
59
|
-
- `audioFormat`: 音频格式,默认 `'pcm'`
|
|
60
|
-
- `sampleRate`: 采样率,默认 `24000`
|
|
61
|
-
|
|
62
|
-
**返回**:`TTSClient` 实例
|
|
63
|
-
|
|
64
|
-
### TTSClient
|
|
68
|
+
// 关闭连接
|
|
69
|
+
// tts.close();
|
|
70
|
+
```
|
|
65
71
|
|
|
66
|
-
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
### React 完整示例
|
|
75
|
+
|
|
76
|
+
```tsx
|
|
77
|
+
import { useRef, useState } from "react";
|
|
78
|
+
import { createTTSClient, type TTSClient } from "@amaster.ai/tts-client";
|
|
79
|
+
|
|
80
|
+
const VoiceTypes = {
|
|
81
|
+
Cherry: "Cherry - 甜美女声",
|
|
82
|
+
Serena: "苏瑶 - 温柔小姐姐",
|
|
83
|
+
Ethan: "晨煦 - 标准普通话",
|
|
84
|
+
Chelsie: "千雪 - 二次元虚拟女友",
|
|
85
|
+
Peter: "天津话",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
function TTSPlayer() {
|
|
89
|
+
const [voice, setVoice] = useState("Cherry");
|
|
90
|
+
const [connected, setConnected] = useState(false);
|
|
91
|
+
const [status, setStatus] = useState("disconnected");
|
|
92
|
+
const [text, setText] = useState("你好,欢迎使用通义千问实时语音合成服务。");
|
|
93
|
+
const clientRef = useRef<TTSClient | null>(null);
|
|
94
|
+
|
|
95
|
+
const connectTTS = () => {
|
|
96
|
+
if (clientRef.current) return;
|
|
97
|
+
|
|
98
|
+
const ttsClient = createTTSClient({
|
|
99
|
+
voice,
|
|
100
|
+
autoPlay: true,
|
|
101
|
+
audioFormat: "pcm",
|
|
102
|
+
sampleRate: 24000,
|
|
103
|
+
onReady: () => {
|
|
104
|
+
setConnected(true);
|
|
105
|
+
setStatus("connected");
|
|
106
|
+
},
|
|
107
|
+
onAudioStart: () => setStatus("playing"),
|
|
108
|
+
onAudioEnd: () => setStatus("connected"),
|
|
109
|
+
onAudioChunk: (chunks) => {
|
|
110
|
+
console.log("音频片段数:", chunks.length);
|
|
111
|
+
},
|
|
112
|
+
onError: (err) => {
|
|
113
|
+
console.error("TTS Error:", err);
|
|
114
|
+
setStatus("error");
|
|
115
|
+
setConnected(false);
|
|
116
|
+
},
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
ttsClient.connect();
|
|
120
|
+
clientRef.current = ttsClient;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const sendTTS = () => {
|
|
124
|
+
if (!text || !clientRef.current) return;
|
|
125
|
+
clientRef.current.speak(text);
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const disconnectTTS = () => {
|
|
129
|
+
clientRef.current?.close();
|
|
130
|
+
clientRef.current = null;
|
|
131
|
+
setConnected(false);
|
|
132
|
+
setStatus("disconnected");
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
return (
|
|
136
|
+
<div>
|
|
137
|
+
<h3>🔊 实时语音合成(TTS)</h3>
|
|
138
|
+
|
|
139
|
+
<div>状态: {status}</div>
|
|
140
|
+
|
|
141
|
+
<div>
|
|
142
|
+
<label>音色:</label>
|
|
143
|
+
<select value={voice} onChange={(e) => setVoice(e.target.value)}>
|
|
144
|
+
{Object.entries(VoiceTypes).map(([key, label]) => (
|
|
145
|
+
<option key={key} value={key}>
|
|
146
|
+
{label}
|
|
147
|
+
</option>
|
|
148
|
+
))}
|
|
149
|
+
</select>
|
|
150
|
+
</div>
|
|
151
|
+
|
|
152
|
+
<div>
|
|
153
|
+
<label>合成文本:</label>
|
|
154
|
+
<textarea rows={4} value={text} onChange={(e) => setText(e.target.value)} />
|
|
155
|
+
</div>
|
|
156
|
+
|
|
157
|
+
<div>
|
|
158
|
+
<button onClick={connectTTS} disabled={connected}>
|
|
159
|
+
1. 连接
|
|
160
|
+
</button>
|
|
161
|
+
<button onClick={sendTTS} disabled={!connected}>
|
|
162
|
+
2. 合成语音
|
|
163
|
+
</button>
|
|
164
|
+
<button onClick={disconnectTTS} disabled={!connected}>
|
|
165
|
+
断开
|
|
166
|
+
</button>
|
|
167
|
+
</div>
|
|
168
|
+
</div>
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
```
|
|
67
172
|
|
|
68
|
-
|
|
173
|
+
---
|
|
69
174
|
|
|
70
|
-
|
|
175
|
+
## API 说明
|
|
71
176
|
|
|
72
|
-
|
|
177
|
+
### `createTTSClient(config)`
|
|
73
178
|
|
|
74
|
-
|
|
179
|
+
创建一个 TTS 客户端实例。
|
|
75
180
|
|
|
76
|
-
|
|
77
|
-
- `text`: 要合成的文本
|
|
181
|
+
#### `TTSClientConfig`
|
|
78
182
|
|
|
79
|
-
|
|
183
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
184
|
+
| ---------------- | ----------------------------------- | ---------- | ---------------------------------------------------------------------- |
|
|
185
|
+
| `voice` | `string` | `"Cherry"` | 发音人名称,可选值:`Cherry`, `Serena`, `Ethan`, `Chelsie`, `Peter` 等 |
|
|
186
|
+
| `autoPlay` | `boolean` | `true` | 是否在音频接收完成后自动播放 |
|
|
187
|
+
| `audioFormat` | `"pcm" \| "mp3" \| "wav" \| "opus"` | `"pcm"` | 音频格式,**注意:内置播放仅支持 `pcm`** |
|
|
188
|
+
| `sampleRate` | `number` | `24000` | 采样率 |
|
|
189
|
+
| `getAccessToken` | `() => string \| null` | - | 获取访问令牌(用于 WebSocket 认证) |
|
|
190
|
+
| `onReady` | `() => void` | - | 会话初始化完成回调 |
|
|
191
|
+
| `onAudioStart` | `() => void` | - | 音频开始播放回调 |
|
|
192
|
+
| `onAudioEnd` | `() => void` | - | 音频播放结束回调 |
|
|
193
|
+
| `onAudioChunk` | `(chunks: string[]) => void` | - | 接收到音频分片回调 |
|
|
194
|
+
| `onError` | `(error: Error) => void` | - | 错误回调 |
|
|
80
195
|
|
|
81
|
-
####
|
|
196
|
+
#### `TTSClient`
|
|
82
197
|
|
|
83
|
-
|
|
198
|
+
```ts
|
|
199
|
+
interface TTSClient {
|
|
200
|
+
connect(): Promise<void>; // 建立 WebSocket 连接
|
|
201
|
+
speak(text: string): Promise<void>; // 发送文本进行语音合成
|
|
202
|
+
play(): void; // 手动播放(autoPlay=false 时使用)
|
|
203
|
+
close(): void; // 关闭连接并释放资源
|
|
204
|
+
}
|
|
205
|
+
```
|
|
84
206
|
|
|
85
|
-
|
|
86
|
-
- `connected`: WebSocket 连接建立
|
|
87
|
-
- `session-created`: 会话创建成功
|
|
88
|
-
- `session-updated`: 会话配置完成
|
|
89
|
-
- `audio-chunk`: 收到音频片段
|
|
90
|
-
- `audio-done`: 音频生成完成
|
|
91
|
-
- `completed`: 响应完成
|
|
92
|
-
- `error`: 发生错误
|
|
93
|
-
- `closed`: 连接关闭
|
|
207
|
+
---
|
|
94
208
|
|
|
95
|
-
|
|
209
|
+
## 配合统一客户端使用
|
|
96
210
|
|
|
97
|
-
|
|
211
|
+
推荐与 `@amaster.ai/client` 统一客户端一起使用,自动处理认证:
|
|
98
212
|
|
|
99
|
-
|
|
213
|
+
```tsx
|
|
214
|
+
import { createClient } from "@amaster.ai/client";
|
|
100
215
|
|
|
101
|
-
|
|
216
|
+
const client = createClient({
|
|
217
|
+
baseURL: "https://api.amaster.ai",
|
|
218
|
+
});
|
|
102
219
|
|
|
103
|
-
|
|
220
|
+
const ttsClient = client.tts({
|
|
221
|
+
voice: "Cherry",
|
|
222
|
+
autoPlay: true,
|
|
223
|
+
onReady() {
|
|
224
|
+
console.log("TTS 已就绪");
|
|
225
|
+
},
|
|
226
|
+
onAudioStart() {
|
|
227
|
+
console.log("开始播放");
|
|
228
|
+
},
|
|
229
|
+
onAudioEnd() {
|
|
230
|
+
console.log("播放结束");
|
|
231
|
+
},
|
|
232
|
+
});
|
|
104
233
|
|
|
105
|
-
|
|
234
|
+
await ttsClient.connect();
|
|
235
|
+
await ttsClient.speak("你好,欢迎使用 Amaster AI!");
|
|
236
|
+
```
|
|
106
237
|
|
|
107
|
-
|
|
238
|
+
---
|
|
108
239
|
|
|
109
|
-
|
|
110
|
-
- 解码:自动转换为 Float32 AudioBuffer
|
|
111
|
-
- 播放:使用 Web Audio API
|
|
112
|
-
- 采样率:24kHz(高清音质)
|
|
240
|
+
## 音频说明
|
|
113
241
|
|
|
114
|
-
|
|
242
|
+
- 当前内置播放逻辑 **仅支持 `pcm` 格式**
|
|
243
|
+
- `pcm` 数据为 **16-bit little-endian 单声道**
|
|
244
|
+
- 播放基于 `AudioContext`,仅支持浏览器环境
|
|
245
|
+
- 若使用 `mp3 / wav / opus`,需自行实现解码与播放逻辑
|
|
115
246
|
|
|
116
|
-
|
|
117
|
-
- `session.update` - 配置会话
|
|
118
|
-
- `input_text_buffer.append` - 发送文本
|
|
119
|
-
- `input_text_buffer.commit` - 提交文本
|
|
120
|
-
- `session.finish` - 结束会话
|
|
247
|
+
---
|
|
121
248
|
|
|
122
|
-
|
|
249
|
+
## 运行环境
|
|
123
250
|
|
|
124
|
-
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
251
|
+
- 浏览器(Chrome / Edge / Safari)
|
|
252
|
+
- 需支持:
|
|
253
|
+
- WebSocket
|
|
254
|
+
- AudioContext
|
|
255
|
+
- atob
|
|
128
256
|
|
|
129
|
-
|
|
257
|
+
---
|
|
130
258
|
|
|
131
|
-
|
|
132
|
-
# 构建
|
|
133
|
-
npm run build
|
|
259
|
+
## 注意事项
|
|
134
260
|
|
|
135
|
-
|
|
136
|
-
|
|
261
|
+
- WebSocket 必须在 `connect()` 成功后才能调用 `speak()`
|
|
262
|
+
- 多次 `speak()` 会覆盖之前的音频缓存
|
|
263
|
+
- 自动播放依赖浏览器的自动播放策略,部分场景可能需要用户交互触发
|
|
264
|
+
- 建议在组件卸载时调用 `close()` 释放资源
|
|
137
265
|
|
|
138
|
-
|
|
139
|
-
npm run type-check
|
|
140
|
-
```
|
|
266
|
+
---
|
|
141
267
|
|
|
142
268
|
## License
|
|
143
269
|
|
package/dist/index.cjs
CHANGED
|
@@ -20,106 +20,15 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
-
createTTSClient: () =>
|
|
24
|
-
createTtsClient: () => createTTSClient,
|
|
25
|
-
speak: () => speak
|
|
23
|
+
createTTSClient: () => tts_client_default
|
|
26
24
|
});
|
|
27
25
|
module.exports = __toCommonJS(index_exports);
|
|
28
26
|
|
|
29
27
|
// src/tts-client.ts
|
|
30
28
|
var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
|
|
31
|
-
async function speak(text, voice = "Cherry") {
|
|
32
|
-
return new Promise((resolve, reject) => {
|
|
33
|
-
const ws = new WebSocket(TTS_PATH);
|
|
34
|
-
const audioChunks = [];
|
|
35
|
-
let resolved = false;
|
|
36
|
-
ws.onmessage = (event) => {
|
|
37
|
-
const data = JSON.parse(event.data);
|
|
38
|
-
if (data.type === "session.created") {
|
|
39
|
-
ws.send(JSON.stringify({
|
|
40
|
-
type: "session.update",
|
|
41
|
-
session: { mode: "server_commit", voice, response_format: "pcm", sample_rate: 24e3 }
|
|
42
|
-
}));
|
|
43
|
-
}
|
|
44
|
-
if (data.type === "session.updated") {
|
|
45
|
-
ws.send(JSON.stringify({ type: "input_text_buffer.append", text }));
|
|
46
|
-
ws.send(JSON.stringify({ type: "input_text_buffer.commit" }));
|
|
47
|
-
}
|
|
48
|
-
if (data.type === "response.audio.delta") {
|
|
49
|
-
audioChunks.push(data.delta);
|
|
50
|
-
}
|
|
51
|
-
if (data.type === "response.audio.done") {
|
|
52
|
-
playPcmAudio(audioChunks, 24e3).then(() => {
|
|
53
|
-
ws.send(JSON.stringify({ type: "session.finish" }));
|
|
54
|
-
});
|
|
55
|
-
}
|
|
56
|
-
if (data.type === "session.finished") {
|
|
57
|
-
ws.close();
|
|
58
|
-
if (!resolved) {
|
|
59
|
-
resolved = true;
|
|
60
|
-
resolve();
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
if (data.type === "error") {
|
|
64
|
-
ws.close();
|
|
65
|
-
if (!resolved) {
|
|
66
|
-
resolved = true;
|
|
67
|
-
reject(new Error(data.error?.message || "TTS error"));
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
};
|
|
71
|
-
ws.onerror = () => {
|
|
72
|
-
if (!resolved) {
|
|
73
|
-
resolved = true;
|
|
74
|
-
reject(new Error("TTS connection failed"));
|
|
75
|
-
}
|
|
76
|
-
};
|
|
77
|
-
ws.onclose = () => {
|
|
78
|
-
if (!resolved) {
|
|
79
|
-
resolved = true;
|
|
80
|
-
resolve();
|
|
81
|
-
}
|
|
82
|
-
};
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
async function playPcmAudio(chunks, sampleRate) {
|
|
86
|
-
if (typeof window === "undefined" || chunks.length === 0) return;
|
|
87
|
-
const audioContext = new AudioContext();
|
|
88
|
-
let totalBytes = 0;
|
|
89
|
-
const allBytes = [];
|
|
90
|
-
for (const chunk of chunks) {
|
|
91
|
-
const binary = atob(chunk);
|
|
92
|
-
const bytes = new Uint8Array(binary.length);
|
|
93
|
-
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
|
94
|
-
allBytes.push(bytes);
|
|
95
|
-
totalBytes += bytes.length;
|
|
96
|
-
}
|
|
97
|
-
const combined = new Uint8Array(totalBytes);
|
|
98
|
-
let offset = 0;
|
|
99
|
-
for (const bytes of allBytes) {
|
|
100
|
-
combined.set(bytes, offset);
|
|
101
|
-
offset += bytes.length;
|
|
102
|
-
}
|
|
103
|
-
const numSamples = combined.length / 2;
|
|
104
|
-
const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);
|
|
105
|
-
const channelData = audioBuffer.getChannelData(0);
|
|
106
|
-
const dataView = new DataView(combined.buffer);
|
|
107
|
-
for (let i = 0; i < numSamples; i++) {
|
|
108
|
-
channelData[i] = dataView.getInt16(i * 2, true) / 32768;
|
|
109
|
-
}
|
|
110
|
-
return new Promise((resolve) => {
|
|
111
|
-
const source = audioContext.createBufferSource();
|
|
112
|
-
source.buffer = audioBuffer;
|
|
113
|
-
source.connect(audioContext.destination);
|
|
114
|
-
source.onended = () => {
|
|
115
|
-
audioContext.close();
|
|
116
|
-
resolve();
|
|
117
|
-
};
|
|
118
|
-
source.start(0);
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
29
|
function createTTSClient(config) {
|
|
122
30
|
const {
|
|
31
|
+
getAccessToken,
|
|
123
32
|
voice = "Cherry",
|
|
124
33
|
autoPlay = true,
|
|
125
34
|
audioFormat = "pcm",
|
|
@@ -127,28 +36,40 @@ function createTTSClient(config) {
|
|
|
127
36
|
onReady,
|
|
128
37
|
onAudioStart,
|
|
129
38
|
onAudioEnd,
|
|
39
|
+
onAudioChunk,
|
|
130
40
|
onError
|
|
131
41
|
} = config;
|
|
132
42
|
let ws = null;
|
|
133
43
|
let audioChunks = [];
|
|
134
44
|
let audioContext = null;
|
|
45
|
+
let audioSource = null;
|
|
135
46
|
async function connect() {
|
|
136
47
|
return new Promise((resolve, reject) => {
|
|
137
|
-
|
|
48
|
+
let wsUrl = TTS_PATH;
|
|
49
|
+
if (getAccessToken) {
|
|
50
|
+
const token = getAccessToken();
|
|
51
|
+
if (token) {
|
|
52
|
+
const separator = wsUrl.includes("?") ? "&" : "?";
|
|
53
|
+
wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
ws = new WebSocket(wsUrl);
|
|
138
57
|
ws.onopen = () => {
|
|
139
58
|
};
|
|
140
59
|
ws.onmessage = (event) => {
|
|
141
60
|
const data = JSON.parse(event.data);
|
|
142
61
|
if (data.type === "session.created") {
|
|
143
|
-
ws.send(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
62
|
+
ws.send(
|
|
63
|
+
JSON.stringify({
|
|
64
|
+
type: "session.update",
|
|
65
|
+
session: {
|
|
66
|
+
mode: "server_commit",
|
|
67
|
+
voice,
|
|
68
|
+
response_format: audioFormat,
|
|
69
|
+
sample_rate: sampleRate
|
|
70
|
+
}
|
|
71
|
+
})
|
|
72
|
+
);
|
|
152
73
|
}
|
|
153
74
|
if (data.type === "session.updated") {
|
|
154
75
|
onReady?.();
|
|
@@ -156,10 +77,12 @@ function createTTSClient(config) {
|
|
|
156
77
|
}
|
|
157
78
|
if (data.type === "response.audio.delta") {
|
|
158
79
|
audioChunks.push(data.delta);
|
|
80
|
+
onAudioChunk?.(audioChunks);
|
|
159
81
|
}
|
|
160
82
|
if (data.type === "response.audio.done") {
|
|
83
|
+
onAudioChunk?.(audioChunks);
|
|
161
84
|
if (autoPlay && typeof window !== "undefined") {
|
|
162
|
-
playAudio(
|
|
85
|
+
playAudio();
|
|
163
86
|
}
|
|
164
87
|
}
|
|
165
88
|
if (data.type === "response.done") {
|
|
@@ -181,22 +104,27 @@ function createTTSClient(config) {
|
|
|
181
104
|
};
|
|
182
105
|
});
|
|
183
106
|
}
|
|
184
|
-
async function
|
|
107
|
+
async function speak(text) {
|
|
185
108
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
186
109
|
throw new Error("WebSocket not connected");
|
|
187
110
|
}
|
|
188
111
|
audioChunks = [];
|
|
189
|
-
ws.send(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
112
|
+
ws.send(
|
|
113
|
+
JSON.stringify({
|
|
114
|
+
type: "input_text_buffer.append",
|
|
115
|
+
text
|
|
116
|
+
})
|
|
117
|
+
);
|
|
193
118
|
setTimeout(() => {
|
|
194
|
-
ws.send(
|
|
195
|
-
|
|
196
|
-
|
|
119
|
+
ws.send(
|
|
120
|
+
JSON.stringify({
|
|
121
|
+
type: "input_text_buffer.commit"
|
|
122
|
+
})
|
|
123
|
+
);
|
|
197
124
|
}, 100);
|
|
198
125
|
}
|
|
199
|
-
function playAudio(
|
|
126
|
+
function playAudio() {
|
|
127
|
+
let chunks = audioChunks;
|
|
200
128
|
if (typeof window === "undefined") return;
|
|
201
129
|
try {
|
|
202
130
|
if (!audioContext) {
|
|
@@ -233,30 +161,43 @@ function createTTSClient(config) {
|
|
|
233
161
|
source.connect(audioContext.destination);
|
|
234
162
|
source.onended = () => onAudioEnd?.();
|
|
235
163
|
source.start(0);
|
|
164
|
+
audioSource = source;
|
|
236
165
|
} catch (err) {
|
|
237
166
|
onError?.(err);
|
|
238
167
|
}
|
|
239
168
|
}
|
|
240
|
-
function
|
|
241
|
-
if (
|
|
242
|
-
|
|
243
|
-
|
|
169
|
+
function stopAudio() {
|
|
170
|
+
if (audioSource) {
|
|
171
|
+
audioSource.stop();
|
|
172
|
+
audioSource = null;
|
|
244
173
|
}
|
|
245
174
|
if (audioContext) {
|
|
246
175
|
audioContext.close();
|
|
247
176
|
audioContext = null;
|
|
248
177
|
}
|
|
249
178
|
}
|
|
179
|
+
function close() {
|
|
180
|
+
if (ws) {
|
|
181
|
+
ws.close();
|
|
182
|
+
ws = null;
|
|
183
|
+
}
|
|
184
|
+
stopAudio();
|
|
185
|
+
}
|
|
250
186
|
return {
|
|
251
187
|
connect,
|
|
252
|
-
speak
|
|
253
|
-
close
|
|
188
|
+
speak,
|
|
189
|
+
close,
|
|
190
|
+
play: playAudio,
|
|
191
|
+
stop: stopAudio
|
|
254
192
|
};
|
|
255
193
|
}
|
|
194
|
+
var tts_client_default = (authConfig) => {
|
|
195
|
+
return (config) => {
|
|
196
|
+
return createTTSClient({ ...authConfig, ...config });
|
|
197
|
+
};
|
|
198
|
+
};
|
|
256
199
|
// Annotate the CommonJS export names for ESM import in node:
|
|
257
200
|
0 && (module.exports = {
|
|
258
|
-
createTTSClient
|
|
259
|
-
createTtsClient,
|
|
260
|
-
speak
|
|
201
|
+
createTTSClient
|
|
261
202
|
});
|
|
262
203
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/tts-client.ts"],"sourcesContent":["export { speak } from './tts-client';\nexport type { TTSClient, TTSClientConfig } from './tts-client';\nexport { createTTSClient, createTTSClient as createTtsClient } from './tts-client';\n","/**\n * TTS Realtime WebSocket Client\n */\n\nconst TTS_PATH = '/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime';\n\n/**\n * One-line TTS: speak text and play audio\n * @example await speak('Hello world')\n */\nexport async function speak(text: string, voice: string = 'Cherry'): Promise<void> {\n return new Promise((resolve, reject) => {\n const ws = new WebSocket(TTS_PATH);\n const audioChunks: string[] = [];\n let resolved = false;\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws.send(JSON.stringify({\n type: 'session.update',\n session: { mode: 'server_commit', voice, response_format: 'pcm', sample_rate: 24000 },\n }));\n }\n\n if (data.type === 'session.updated') {\n ws.send(JSON.stringify({ type: 'input_text_buffer.append', text }));\n ws.send(JSON.stringify({ type: 'input_text_buffer.commit' }));\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n playPcmAudio(audioChunks, 24000).then(() => {\n ws.send(JSON.stringify({ type: 'session.finish' }));\n });\n }\n\n if (data.type === 'session.finished') {\n ws.close();\n if (!resolved) { resolved = true; resolve(); }\n }\n\n if (data.type === 'error') {\n ws.close();\n if (!resolved) { resolved = true; reject(new Error(data.error?.message || 'TTS error')); }\n }\n };\n\n ws.onerror = () => {\n if (!resolved) { resolved = true; reject(new Error('TTS connection failed')); }\n };\n\n ws.onclose = () => {\n if (!resolved) { resolved = true; resolve(); }\n };\n });\n}\n\n/** Play PCM audio data */\nasync function playPcmAudio(chunks: string[], sampleRate: number): Promise<void> {\n if (typeof window === 'undefined' || chunks.length === 0) return;\n\n const audioContext = new AudioContext();\n \n // Decode base64 chunks\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n for (const chunk of chunks) {\n const binary = atob(chunk);\n const bytes = new Uint8Array(binary.length);\n for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n // Combine\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) { combined.set(bytes, offset); offset += bytes.length; }\n\n // Int16 PCM to Float32\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n channelData[i] = dataView.getInt16(i * 2, true) / 32768.0;\n }\n\n // Play\n return new Promise((resolve) => {\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => { audioContext.close(); resolve(); };\n source.start(0);\n });\n}\n\nexport interface TTSClientConfig {\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: 'pcm' | 'mp3' | 'wav' | 'opus';\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Close connection */\n close(): void;\n}\n\nexport function createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n voice = 'Cherry',\n autoPlay = true,\n audioFormat = 'pcm',\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n ws = new WebSocket(TTS_PATH);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws!.send(JSON.stringify({\n type: 'session.update',\n session: {\n mode: 'server_commit',\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n }));\n }\n\n if (data.type === 'session.updated') {\n onReady?.();\n resolve();\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n if (autoPlay && typeof window !== 'undefined') {\n playAudio(audioChunks);\n }\n }\n\n if (data.type === 'response.done') {\n ws!.send(JSON.stringify({ type: 'session.finish' }));\n }\n\n if (data.type === 'error') {\n const err = new Error(data.error?.message || 'Unknown error');\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error('WebSocket connection error');\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error('WebSocket not connected');\n }\n\n audioChunks = [];\n\n ws.send(JSON.stringify({\n type: 'input_text_buffer.append',\n text,\n }));\n\n setTimeout(() => {\n ws!.send(JSON.stringify({\n type: 'input_text_buffer.commit',\n }));\n }, 100);\n }\n\n function playAudio(chunks: string[]) {\n if (typeof window === 'undefined') return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n return {\n connect,\n speak,\n close,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,IAAM,WAAW;AAMjB,eAAsB,MAAM,MAAc,QAAgB,UAAyB;AACjF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,KAAK,IAAI,UAAU,QAAQ;AACjC,UAAM,cAAwB,CAAC;AAC/B,QAAI,WAAW;AAEf,OAAG,YAAY,CAAC,UAAU;AACxB,YAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,UAAI,KAAK,SAAS,mBAAmB;AACnC,WAAG,KAAK,KAAK,UAAU;AAAA,UACrB,MAAM;AAAA,UACN,SAAS,EAAE,MAAM,iBAAiB,OAAO,iBAAiB,OAAO,aAAa,KAAM;AAAA,QACtF,CAAC,CAAC;AAAA,MACJ;AAEA,UAAI,KAAK,SAAS,mBAAmB;AACnC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,KAAK,CAAC,CAAC;AAClE,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,2BAA2B,CAAC,CAAC;AAAA,MAC9D;AAEA,UAAI,KAAK,SAAS,wBAAwB;AACxC,oBAAY,KAAK,KAAK,KAAK;AAAA,MAC7B;AAEA,UAAI,KAAK,SAAS,uBAAuB;AACvC,qBAAa,aAAa,IAAK,EAAE,KAAK,MAAM;AAC1C,aAAG,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACpD,CAAC;AAAA,MACH;AAEA,UAAI,KAAK,SAAS,oBAAoB;AACpC,WAAG,MAAM;AACT,YAAI,CAAC,UAAU;AAAE,qBAAW;AAAM,kBAAQ;AAAA,QAAG;AAAA,MAC/C;AAEA,UAAI,KAAK,SAAS,SAAS;AACzB,WAAG,MAAM;AACT,YAAI,CAAC,UAAU;AAAE,qBAAW;AAAM,iBAAO,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW,CAAC;AAAA,QAAG;AAAA,MAC3F;AAAA,IACF;AAEA,OAAG,UAAU,MAAM;AACjB,UAAI,CAAC,UAAU;AAAE,mBAAW;AAAM,eAAO,IAAI,MAAM,uBAAuB,CAAC;AAAA,MAAG;AAAA,IAChF;AAEA,OAAG,UAAU,MAAM;AACjB,UAAI,CAAC,UAAU;AAAE,mBAAW;AAAM,gBAAQ;AAAA,MAAG;AAAA,IAC/C;AAAA,EACF,CAAC;AACH;AAGA,eAAe,aAAa,QAAkB,YAAmC;AAC/E,MAAI,OAAO,WAAW,eAAe,OAAO,WAAW,EAAG;AAE1D,QAAM,eAAe,IAAI,aAAa;AAGtC,MAAI,aAAa;AACjB,QAAM,WAAyB,CAAC;AAChC,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAS,KAAK,KAAK;AACzB,UAAM,QAAQ,IAAI,WAAW,OAAO,MAAM;AAC1C,aAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,IAAK,OAAM,CAAC,IAAI,OAAO,WAAW,CAAC;AACtE,aAAS,KAAK,KAAK;AACnB,kBAAc,MAAM;AAAA,EACtB;AAGA,QAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,MAAI,SAAS;AACb,aAAW,SAAS,UAAU;AAAE,aAAS,IAAI,OAAO,MAAM;AAAG,cAAU,MAAM;AAAA,EAAQ;AAGrF,QAAM,aAAa,SAAS,SAAS;AACrC,QAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,QAAM,cAAc,YAAY,eAAe,CAAC;AAChD,QAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,gBAAY,CAAC,IAAI,SAAS,SAAS,IAAI,GAAG,IAAI,IAAI;AAAA,EACpD;AAGA,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,SAAS,aAAa,mBAAmB;AAC/C,WAAO,SAAS;AAChB,WAAO,QAAQ,aAAa,WAAW;AACvC,WAAO,UAAU,MAAM;AAAE,mBAAa,MAAM;AAAG,cAAQ;AAAA,IAAG;AAC1D,WAAO,MAAM,CAAC;AAAA,EAChB,CAAC;AACH;AA8BO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AAExC,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI,KAAK,KAAK,UAAU;AAAA,YACtB,MAAM;AAAA,YACN,SAAS;AAAA,cACP,MAAM;AAAA,cACN;AAAA,cACA,iBAAiB;AAAA,cACjB,aAAa;AAAA,YACf;AAAA,UACF,CAAC,CAAC;AAAA,QACJ;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAAA,QAC7B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU,WAAW;AAAA,UACvB;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAeA,OAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG,KAAK,KAAK,UAAU;AAAA,MACrB,MAAM;AAAA,MACN;AAAA,IACF,CAAC,CAAC;AAEF,eAAW,MAAM;AACf,SAAI,KAAK,KAAK,UAAU;AAAA,QACtB,MAAM;AAAA,MACR,CAAC,CAAC;AAAA,IACJ,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,UAAU,QAAkB;AACnC,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,OAAAA;AAAA,IACA;AAAA,EACF;AACF;","names":["speak"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/tts-client.ts"],"sourcesContent":["import type { TTSClient, TTSClientConfig } from \"./tts-client\";\nimport createTTSClient from \"./tts-client\";\nexport { createTTSClient, type TTSClient, type TTSClientConfig };\n","/**\n * TTS Realtime WebSocket Client\n *\n * WebSocket-based real-time text-to-speech synthesis with multiple voice options.\n * Built-in playback only supports PCM format.\n *\n * @example\n * ```typescript\n * const client = createTTSClient({\n * voice: \"Cherry\",\n * autoPlay: true,\n * onReady() {\n * console.log(\"TTS ready\");\n * },\n * onAudioStart() {\n * console.log(\"Playing audio\");\n * },\n * onAudioEnd() {\n * console.log(\"Playback ended\");\n * },\n * });\n *\n * await client.connect();\n * await client.speak(\"Hello, this is a test.\");\n * // client.close();\n * ```\n */\n\nconst TTS_PATH = \"/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime\";\n\nexport interface TTSClientConfig {\n /** Get access token for WebSocket authentication */\n getAccessToken?: () => string | null;\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: \"pcm\" | \"mp3\" | \"wav\" | \"opus\";\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on each audio chunk received */\n onAudioChunk?: (chunk: string[]) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Play audio from chunks */\n play(): void;\n /** Stop audio playback */\n stop(): void;\n /** Close connection */\n close(): void;\n}\n\nfunction createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n getAccessToken,\n voice = \"Cherry\",\n autoPlay = true,\n audioFormat = \"pcm\",\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onAudioChunk,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n let audioSource: AudioBufferSourceNode | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n // Build WebSocket URL with optional token parameter\n let wsUrl = TTS_PATH;\n if (getAccessToken) {\n const token = getAccessToken();\n if (token) {\n const separator = wsUrl.includes(\"?\") ? \"&\" : \"?\";\n wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;\n }\n }\n\n ws = new WebSocket(wsUrl);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n ws!.send(\n JSON.stringify({\n type: \"session.update\",\n session: {\n mode: \"server_commit\",\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n })\n );\n }\n\n if (data.type === \"session.updated\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"response.audio.delta\") {\n audioChunks.push(data.delta);\n onAudioChunk?.(audioChunks);\n }\n\n if (data.type === \"response.audio.done\") {\n onAudioChunk?.(audioChunks);\n if (autoPlay && typeof window !== \"undefined\") {\n playAudio();\n }\n }\n\n if (data.type === \"response.done\") {\n ws!.send(JSON.stringify({ type: \"session.finish\" }));\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"Unknown error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error(\"WebSocket connection error\");\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n audioChunks = [];\n\n ws.send(\n JSON.stringify({\n type: \"input_text_buffer.append\",\n text,\n })\n );\n\n setTimeout(() => {\n ws!.send(\n JSON.stringify({\n type: \"input_text_buffer.commit\",\n })\n );\n }, 100);\n }\n\n function playAudio() {\n let chunks: string[] = audioChunks;\n if (typeof window === \"undefined\") return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n audioSource = source;\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function stopAudio() {\n if (audioSource) {\n audioSource.stop();\n audioSource = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n stopAudio();\n }\n\n return {\n connect,\n speak,\n close,\n play: playAudio,\n stop: stopAudio,\n };\n}\n\nexport default (\n authConfig: Pick<TTSClientConfig, \"getAccessToken\">\n): ((config: TTSClientConfig) => TTSClient) => {\n return (config: TTSClientConfig) => {\n return createTTSClient({ ...authConfig, ...config });\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;AC4BA,IAAM,WAAW;AAsCjB,SAAS,gBAAgB,QAAoC;AAC3D,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AACxC,MAAI,cAA4C;AAEhD,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AAEtC,UAAI,QAAQ;AACZ,UAAI,gBAAgB;AAClB,cAAM,QAAQ,eAAe;AAC7B,YAAI,OAAO;AACT,gBAAM,YAAY,MAAM,SAAS,GAAG,IAAI,MAAM;AAC9C,kBAAQ,GAAG,KAAK,GAAG,SAAS,SAAS,mBAAmB,KAAK,CAAC;AAAA,QAChE;AAAA,MACF;AAEA,WAAK,IAAI,UAAU,KAAK;AAExB,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI;AAAA,YACF,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,MAAM;AAAA,gBACN;AAAA,gBACA,iBAAiB;AAAA,gBACjB,aAAa;AAAA,cACf;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAC3B,yBAAe,WAAW;AAAA,QAC5B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,yBAAe,WAAW;AAC1B,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU;AAAA,UACZ;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN;AAAA,MACF,CAAC;AAAA,IACH;AAEA,eAAW,MAAM;AACf,SAAI;AAAA,QACF,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,YAAY;AACnB,QAAI,SAAmB;AACvB,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AACd,oBAAc;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,YAAY;AACnB,QAAI,aAAa;AACf,kBAAY,KAAK;AACjB,oBAAc;AAAA,IAChB;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,cAAU;AAAA,EACZ;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM;AAAA,IACN,MAAM;AAAA,EACR;AACF;AAEA,IAAO,qBAAQ,CACb,eAC6C;AAC7C,SAAO,CAAC,WAA4B;AAClC,WAAO,gBAAgB,EAAE,GAAG,YAAY,GAAG,OAAO,CAAC;AAAA,EACrD;AACF;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TTS Realtime WebSocket Client
|
|
3
|
+
*
|
|
4
|
+
* WebSocket-based real-time text-to-speech synthesis with multiple voice options.
|
|
5
|
+
* Built-in playback only supports PCM format.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const client = createTTSClient({
|
|
10
|
+
* voice: "Cherry",
|
|
11
|
+
* autoPlay: true,
|
|
12
|
+
* onReady() {
|
|
13
|
+
* console.log("TTS ready");
|
|
14
|
+
* },
|
|
15
|
+
* onAudioStart() {
|
|
16
|
+
* console.log("Playing audio");
|
|
17
|
+
* },
|
|
18
|
+
* onAudioEnd() {
|
|
19
|
+
* console.log("Playback ended");
|
|
20
|
+
* },
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* await client.connect();
|
|
24
|
+
* await client.speak("Hello, this is a test.");
|
|
25
|
+
* // client.close();
|
|
26
|
+
* ```
|
|
3
27
|
*/
|
|
4
|
-
/**
|
|
5
|
-
* One-line TTS: speak text and play audio
|
|
6
|
-
* @example await speak('Hello world')
|
|
7
|
-
*/
|
|
8
|
-
declare function speak(text: string, voice?: string): Promise<void>;
|
|
9
28
|
interface TTSClientConfig {
|
|
29
|
+
/** Get access token for WebSocket authentication */
|
|
30
|
+
getAccessToken?: () => string | null;
|
|
10
31
|
/** Voice name, default 'Cherry' */
|
|
11
32
|
voice?: string;
|
|
12
33
|
/** Auto play audio, default true */
|
|
13
34
|
autoPlay?: boolean;
|
|
14
35
|
/** Audio format, default 'pcm' */
|
|
15
|
-
audioFormat?:
|
|
36
|
+
audioFormat?: "pcm" | "mp3" | "wav" | "opus";
|
|
16
37
|
/** Sample rate, default 24000 */
|
|
17
38
|
sampleRate?: number;
|
|
18
39
|
/** Called when connection is ready */
|
|
@@ -21,6 +42,8 @@ interface TTSClientConfig {
|
|
|
21
42
|
onAudioStart?: () => void;
|
|
22
43
|
/** Called when audio playback ends */
|
|
23
44
|
onAudioEnd?: () => void;
|
|
45
|
+
/** Called on each audio chunk received */
|
|
46
|
+
onAudioChunk?: (chunk: string[]) => void;
|
|
24
47
|
/** Called on error */
|
|
25
48
|
onError?: (error: Error) => void;
|
|
26
49
|
}
|
|
@@ -29,9 +52,13 @@ interface TTSClient {
|
|
|
29
52
|
connect(): Promise<void>;
|
|
30
53
|
/** Synthesize speech from text */
|
|
31
54
|
speak(text: string): Promise<void>;
|
|
55
|
+
/** Play audio from chunks */
|
|
56
|
+
play(): void;
|
|
57
|
+
/** Stop audio playback */
|
|
58
|
+
stop(): void;
|
|
32
59
|
/** Close connection */
|
|
33
60
|
close(): void;
|
|
34
61
|
}
|
|
35
|
-
declare
|
|
62
|
+
declare const _default: (authConfig: Pick<TTSClientConfig, "getAccessToken">) => ((config: TTSClientConfig) => TTSClient);
|
|
36
63
|
|
|
37
|
-
export { type TTSClient, type TTSClientConfig,
|
|
64
|
+
export { type TTSClient, type TTSClientConfig, _default as createTTSClient };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TTS Realtime WebSocket Client
|
|
3
|
+
*
|
|
4
|
+
* WebSocket-based real-time text-to-speech synthesis with multiple voice options.
|
|
5
|
+
* Built-in playback only supports PCM format.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const client = createTTSClient({
|
|
10
|
+
* voice: "Cherry",
|
|
11
|
+
* autoPlay: true,
|
|
12
|
+
* onReady() {
|
|
13
|
+
* console.log("TTS ready");
|
|
14
|
+
* },
|
|
15
|
+
* onAudioStart() {
|
|
16
|
+
* console.log("Playing audio");
|
|
17
|
+
* },
|
|
18
|
+
* onAudioEnd() {
|
|
19
|
+
* console.log("Playback ended");
|
|
20
|
+
* },
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* await client.connect();
|
|
24
|
+
* await client.speak("Hello, this is a test.");
|
|
25
|
+
* // client.close();
|
|
26
|
+
* ```
|
|
3
27
|
*/
|
|
4
|
-
/**
|
|
5
|
-
* One-line TTS: speak text and play audio
|
|
6
|
-
* @example await speak('Hello world')
|
|
7
|
-
*/
|
|
8
|
-
declare function speak(text: string, voice?: string): Promise<void>;
|
|
9
28
|
interface TTSClientConfig {
|
|
29
|
+
/** Get access token for WebSocket authentication */
|
|
30
|
+
getAccessToken?: () => string | null;
|
|
10
31
|
/** Voice name, default 'Cherry' */
|
|
11
32
|
voice?: string;
|
|
12
33
|
/** Auto play audio, default true */
|
|
13
34
|
autoPlay?: boolean;
|
|
14
35
|
/** Audio format, default 'pcm' */
|
|
15
|
-
audioFormat?:
|
|
36
|
+
audioFormat?: "pcm" | "mp3" | "wav" | "opus";
|
|
16
37
|
/** Sample rate, default 24000 */
|
|
17
38
|
sampleRate?: number;
|
|
18
39
|
/** Called when connection is ready */
|
|
@@ -21,6 +42,8 @@ interface TTSClientConfig {
|
|
|
21
42
|
onAudioStart?: () => void;
|
|
22
43
|
/** Called when audio playback ends */
|
|
23
44
|
onAudioEnd?: () => void;
|
|
45
|
+
/** Called on each audio chunk received */
|
|
46
|
+
onAudioChunk?: (chunk: string[]) => void;
|
|
24
47
|
/** Called on error */
|
|
25
48
|
onError?: (error: Error) => void;
|
|
26
49
|
}
|
|
@@ -29,9 +52,13 @@ interface TTSClient {
|
|
|
29
52
|
connect(): Promise<void>;
|
|
30
53
|
/** Synthesize speech from text */
|
|
31
54
|
speak(text: string): Promise<void>;
|
|
55
|
+
/** Play audio from chunks */
|
|
56
|
+
play(): void;
|
|
57
|
+
/** Stop audio playback */
|
|
58
|
+
stop(): void;
|
|
32
59
|
/** Close connection */
|
|
33
60
|
close(): void;
|
|
34
61
|
}
|
|
35
|
-
declare
|
|
62
|
+
declare const _default: (authConfig: Pick<TTSClientConfig, "getAccessToken">) => ((config: TTSClientConfig) => TTSClient);
|
|
36
63
|
|
|
37
|
-
export { type TTSClient, type TTSClientConfig,
|
|
64
|
+
export { type TTSClient, type TTSClientConfig, _default as createTTSClient };
|
package/dist/index.js
CHANGED
|
@@ -1,97 +1,8 @@
|
|
|
1
1
|
// src/tts-client.ts
|
|
2
2
|
var TTS_PATH = "/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime";
|
|
3
|
-
async function speak(text, voice = "Cherry") {
|
|
4
|
-
return new Promise((resolve, reject) => {
|
|
5
|
-
const ws = new WebSocket(TTS_PATH);
|
|
6
|
-
const audioChunks = [];
|
|
7
|
-
let resolved = false;
|
|
8
|
-
ws.onmessage = (event) => {
|
|
9
|
-
const data = JSON.parse(event.data);
|
|
10
|
-
if (data.type === "session.created") {
|
|
11
|
-
ws.send(JSON.stringify({
|
|
12
|
-
type: "session.update",
|
|
13
|
-
session: { mode: "server_commit", voice, response_format: "pcm", sample_rate: 24e3 }
|
|
14
|
-
}));
|
|
15
|
-
}
|
|
16
|
-
if (data.type === "session.updated") {
|
|
17
|
-
ws.send(JSON.stringify({ type: "input_text_buffer.append", text }));
|
|
18
|
-
ws.send(JSON.stringify({ type: "input_text_buffer.commit" }));
|
|
19
|
-
}
|
|
20
|
-
if (data.type === "response.audio.delta") {
|
|
21
|
-
audioChunks.push(data.delta);
|
|
22
|
-
}
|
|
23
|
-
if (data.type === "response.audio.done") {
|
|
24
|
-
playPcmAudio(audioChunks, 24e3).then(() => {
|
|
25
|
-
ws.send(JSON.stringify({ type: "session.finish" }));
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
if (data.type === "session.finished") {
|
|
29
|
-
ws.close();
|
|
30
|
-
if (!resolved) {
|
|
31
|
-
resolved = true;
|
|
32
|
-
resolve();
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
if (data.type === "error") {
|
|
36
|
-
ws.close();
|
|
37
|
-
if (!resolved) {
|
|
38
|
-
resolved = true;
|
|
39
|
-
reject(new Error(data.error?.message || "TTS error"));
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
};
|
|
43
|
-
ws.onerror = () => {
|
|
44
|
-
if (!resolved) {
|
|
45
|
-
resolved = true;
|
|
46
|
-
reject(new Error("TTS connection failed"));
|
|
47
|
-
}
|
|
48
|
-
};
|
|
49
|
-
ws.onclose = () => {
|
|
50
|
-
if (!resolved) {
|
|
51
|
-
resolved = true;
|
|
52
|
-
resolve();
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
async function playPcmAudio(chunks, sampleRate) {
|
|
58
|
-
if (typeof window === "undefined" || chunks.length === 0) return;
|
|
59
|
-
const audioContext = new AudioContext();
|
|
60
|
-
let totalBytes = 0;
|
|
61
|
-
const allBytes = [];
|
|
62
|
-
for (const chunk of chunks) {
|
|
63
|
-
const binary = atob(chunk);
|
|
64
|
-
const bytes = new Uint8Array(binary.length);
|
|
65
|
-
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
|
66
|
-
allBytes.push(bytes);
|
|
67
|
-
totalBytes += bytes.length;
|
|
68
|
-
}
|
|
69
|
-
const combined = new Uint8Array(totalBytes);
|
|
70
|
-
let offset = 0;
|
|
71
|
-
for (const bytes of allBytes) {
|
|
72
|
-
combined.set(bytes, offset);
|
|
73
|
-
offset += bytes.length;
|
|
74
|
-
}
|
|
75
|
-
const numSamples = combined.length / 2;
|
|
76
|
-
const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);
|
|
77
|
-
const channelData = audioBuffer.getChannelData(0);
|
|
78
|
-
const dataView = new DataView(combined.buffer);
|
|
79
|
-
for (let i = 0; i < numSamples; i++) {
|
|
80
|
-
channelData[i] = dataView.getInt16(i * 2, true) / 32768;
|
|
81
|
-
}
|
|
82
|
-
return new Promise((resolve) => {
|
|
83
|
-
const source = audioContext.createBufferSource();
|
|
84
|
-
source.buffer = audioBuffer;
|
|
85
|
-
source.connect(audioContext.destination);
|
|
86
|
-
source.onended = () => {
|
|
87
|
-
audioContext.close();
|
|
88
|
-
resolve();
|
|
89
|
-
};
|
|
90
|
-
source.start(0);
|
|
91
|
-
});
|
|
92
|
-
}
|
|
93
3
|
function createTTSClient(config) {
|
|
94
4
|
const {
|
|
5
|
+
getAccessToken,
|
|
95
6
|
voice = "Cherry",
|
|
96
7
|
autoPlay = true,
|
|
97
8
|
audioFormat = "pcm",
|
|
@@ -99,28 +10,40 @@ function createTTSClient(config) {
|
|
|
99
10
|
onReady,
|
|
100
11
|
onAudioStart,
|
|
101
12
|
onAudioEnd,
|
|
13
|
+
onAudioChunk,
|
|
102
14
|
onError
|
|
103
15
|
} = config;
|
|
104
16
|
let ws = null;
|
|
105
17
|
let audioChunks = [];
|
|
106
18
|
let audioContext = null;
|
|
19
|
+
let audioSource = null;
|
|
107
20
|
async function connect() {
|
|
108
21
|
return new Promise((resolve, reject) => {
|
|
109
|
-
|
|
22
|
+
let wsUrl = TTS_PATH;
|
|
23
|
+
if (getAccessToken) {
|
|
24
|
+
const token = getAccessToken();
|
|
25
|
+
if (token) {
|
|
26
|
+
const separator = wsUrl.includes("?") ? "&" : "?";
|
|
27
|
+
wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
ws = new WebSocket(wsUrl);
|
|
110
31
|
ws.onopen = () => {
|
|
111
32
|
};
|
|
112
33
|
ws.onmessage = (event) => {
|
|
113
34
|
const data = JSON.parse(event.data);
|
|
114
35
|
if (data.type === "session.created") {
|
|
115
|
-
ws.send(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
36
|
+
ws.send(
|
|
37
|
+
JSON.stringify({
|
|
38
|
+
type: "session.update",
|
|
39
|
+
session: {
|
|
40
|
+
mode: "server_commit",
|
|
41
|
+
voice,
|
|
42
|
+
response_format: audioFormat,
|
|
43
|
+
sample_rate: sampleRate
|
|
44
|
+
}
|
|
45
|
+
})
|
|
46
|
+
);
|
|
124
47
|
}
|
|
125
48
|
if (data.type === "session.updated") {
|
|
126
49
|
onReady?.();
|
|
@@ -128,10 +51,12 @@ function createTTSClient(config) {
|
|
|
128
51
|
}
|
|
129
52
|
if (data.type === "response.audio.delta") {
|
|
130
53
|
audioChunks.push(data.delta);
|
|
54
|
+
onAudioChunk?.(audioChunks);
|
|
131
55
|
}
|
|
132
56
|
if (data.type === "response.audio.done") {
|
|
57
|
+
onAudioChunk?.(audioChunks);
|
|
133
58
|
if (autoPlay && typeof window !== "undefined") {
|
|
134
|
-
playAudio(
|
|
59
|
+
playAudio();
|
|
135
60
|
}
|
|
136
61
|
}
|
|
137
62
|
if (data.type === "response.done") {
|
|
@@ -153,22 +78,27 @@ function createTTSClient(config) {
|
|
|
153
78
|
};
|
|
154
79
|
});
|
|
155
80
|
}
|
|
156
|
-
async function
|
|
81
|
+
async function speak(text) {
|
|
157
82
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
158
83
|
throw new Error("WebSocket not connected");
|
|
159
84
|
}
|
|
160
85
|
audioChunks = [];
|
|
161
|
-
ws.send(
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
86
|
+
ws.send(
|
|
87
|
+
JSON.stringify({
|
|
88
|
+
type: "input_text_buffer.append",
|
|
89
|
+
text
|
|
90
|
+
})
|
|
91
|
+
);
|
|
165
92
|
setTimeout(() => {
|
|
166
|
-
ws.send(
|
|
167
|
-
|
|
168
|
-
|
|
93
|
+
ws.send(
|
|
94
|
+
JSON.stringify({
|
|
95
|
+
type: "input_text_buffer.commit"
|
|
96
|
+
})
|
|
97
|
+
);
|
|
169
98
|
}, 100);
|
|
170
99
|
}
|
|
171
|
-
function playAudio(
|
|
100
|
+
function playAudio() {
|
|
101
|
+
let chunks = audioChunks;
|
|
172
102
|
if (typeof window === "undefined") return;
|
|
173
103
|
try {
|
|
174
104
|
if (!audioContext) {
|
|
@@ -205,29 +135,42 @@ function createTTSClient(config) {
|
|
|
205
135
|
source.connect(audioContext.destination);
|
|
206
136
|
source.onended = () => onAudioEnd?.();
|
|
207
137
|
source.start(0);
|
|
138
|
+
audioSource = source;
|
|
208
139
|
} catch (err) {
|
|
209
140
|
onError?.(err);
|
|
210
141
|
}
|
|
211
142
|
}
|
|
212
|
-
function
|
|
213
|
-
if (
|
|
214
|
-
|
|
215
|
-
|
|
143
|
+
function stopAudio() {
|
|
144
|
+
if (audioSource) {
|
|
145
|
+
audioSource.stop();
|
|
146
|
+
audioSource = null;
|
|
216
147
|
}
|
|
217
148
|
if (audioContext) {
|
|
218
149
|
audioContext.close();
|
|
219
150
|
audioContext = null;
|
|
220
151
|
}
|
|
221
152
|
}
|
|
153
|
+
function close() {
|
|
154
|
+
if (ws) {
|
|
155
|
+
ws.close();
|
|
156
|
+
ws = null;
|
|
157
|
+
}
|
|
158
|
+
stopAudio();
|
|
159
|
+
}
|
|
222
160
|
return {
|
|
223
161
|
connect,
|
|
224
|
-
speak
|
|
225
|
-
close
|
|
162
|
+
speak,
|
|
163
|
+
close,
|
|
164
|
+
play: playAudio,
|
|
165
|
+
stop: stopAudio
|
|
226
166
|
};
|
|
227
167
|
}
|
|
168
|
+
var tts_client_default = (authConfig) => {
|
|
169
|
+
return (config) => {
|
|
170
|
+
return createTTSClient({ ...authConfig, ...config });
|
|
171
|
+
};
|
|
172
|
+
};
|
|
228
173
|
export {
|
|
229
|
-
createTTSClient
|
|
230
|
-
createTTSClient as createTtsClient,
|
|
231
|
-
speak
|
|
174
|
+
tts_client_default as createTTSClient
|
|
232
175
|
};
|
|
233
176
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts-client.ts"],"sourcesContent":["/**\n * TTS Realtime WebSocket Client\n */\n\nconst TTS_PATH = '/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime';\n\n/**\n * One-line TTS: speak text and play audio\n * @example await speak('Hello world')\n */\nexport async function speak(text: string, voice: string = 'Cherry'): Promise<void> {\n return new Promise((resolve, reject) => {\n const ws = new WebSocket(TTS_PATH);\n const audioChunks: string[] = [];\n let resolved = false;\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws.send(JSON.stringify({\n type: 'session.update',\n session: { mode: 'server_commit', voice, response_format: 'pcm', sample_rate: 24000 },\n }));\n }\n\n if (data.type === 'session.updated') {\n ws.send(JSON.stringify({ type: 'input_text_buffer.append', text }));\n ws.send(JSON.stringify({ type: 'input_text_buffer.commit' }));\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n playPcmAudio(audioChunks, 24000).then(() => {\n ws.send(JSON.stringify({ type: 'session.finish' }));\n });\n }\n\n if (data.type === 'session.finished') {\n ws.close();\n if (!resolved) { resolved = true; resolve(); }\n }\n\n if (data.type === 'error') {\n ws.close();\n if (!resolved) { resolved = true; reject(new Error(data.error?.message || 'TTS error')); }\n }\n };\n\n ws.onerror = () => {\n if (!resolved) { resolved = true; reject(new Error('TTS connection failed')); }\n };\n\n ws.onclose = () => {\n if (!resolved) { resolved = true; resolve(); }\n };\n });\n}\n\n/** Play PCM audio data */\nasync function playPcmAudio(chunks: string[], sampleRate: number): Promise<void> {\n if (typeof window === 'undefined' || chunks.length === 0) return;\n\n const audioContext = new AudioContext();\n \n // Decode base64 chunks\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n for (const chunk of chunks) {\n const binary = atob(chunk);\n const bytes = new Uint8Array(binary.length);\n for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n // Combine\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) { combined.set(bytes, offset); offset += bytes.length; }\n\n // Int16 PCM to Float32\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n channelData[i] = dataView.getInt16(i * 2, true) / 32768.0;\n }\n\n // Play\n return new Promise((resolve) => {\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => { audioContext.close(); resolve(); };\n source.start(0);\n });\n}\n\nexport interface TTSClientConfig {\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: 'pcm' | 'mp3' | 'wav' | 'opus';\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Close connection */\n close(): void;\n}\n\nexport function createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n voice = 'Cherry',\n autoPlay = true,\n audioFormat = 'pcm',\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n ws = new WebSocket(TTS_PATH);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === 'session.created') {\n ws!.send(JSON.stringify({\n type: 'session.update',\n session: {\n mode: 'server_commit',\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n }));\n }\n\n if (data.type === 'session.updated') {\n onReady?.();\n resolve();\n }\n\n if (data.type === 'response.audio.delta') {\n audioChunks.push(data.delta);\n }\n\n if (data.type === 'response.audio.done') {\n if (autoPlay && typeof window !== 'undefined') {\n playAudio(audioChunks);\n }\n }\n\n if (data.type === 'response.done') {\n ws!.send(JSON.stringify({ type: 'session.finish' }));\n }\n\n if (data.type === 'error') {\n const err = new Error(data.error?.message || 'Unknown error');\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error('WebSocket connection error');\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error('WebSocket not connected');\n }\n\n audioChunks = [];\n\n ws.send(JSON.stringify({\n type: 'input_text_buffer.append',\n text,\n }));\n\n setTimeout(() => {\n ws!.send(JSON.stringify({\n type: 'input_text_buffer.commit',\n }));\n }, 100);\n }\n\n function playAudio(chunks: string[]) {\n if (typeof window === 'undefined') return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n return {\n connect,\n speak,\n close,\n };\n}\n"],"mappings":";AAIA,IAAM,WAAW;AAMjB,eAAsB,MAAM,MAAc,QAAgB,UAAyB;AACjF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,KAAK,IAAI,UAAU,QAAQ;AACjC,UAAM,cAAwB,CAAC;AAC/B,QAAI,WAAW;AAEf,OAAG,YAAY,CAAC,UAAU;AACxB,YAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,UAAI,KAAK,SAAS,mBAAmB;AACnC,WAAG,KAAK,KAAK,UAAU;AAAA,UACrB,MAAM;AAAA,UACN,SAAS,EAAE,MAAM,iBAAiB,OAAO,iBAAiB,OAAO,aAAa,KAAM;AAAA,QACtF,CAAC,CAAC;AAAA,MACJ;AAEA,UAAI,KAAK,SAAS,mBAAmB;AACnC,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,KAAK,CAAC,CAAC;AAClE,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,2BAA2B,CAAC,CAAC;AAAA,MAC9D;AAEA,UAAI,KAAK,SAAS,wBAAwB;AACxC,oBAAY,KAAK,KAAK,KAAK;AAAA,MAC7B;AAEA,UAAI,KAAK,SAAS,uBAAuB;AACvC,qBAAa,aAAa,IAAK,EAAE,KAAK,MAAM;AAC1C,aAAG,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACpD,CAAC;AAAA,MACH;AAEA,UAAI,KAAK,SAAS,oBAAoB;AACpC,WAAG,MAAM;AACT,YAAI,CAAC,UAAU;AAAE,qBAAW;AAAM,kBAAQ;AAAA,QAAG;AAAA,MAC/C;AAEA,UAAI,KAAK,SAAS,SAAS;AACzB,WAAG,MAAM;AACT,YAAI,CAAC,UAAU;AAAE,qBAAW;AAAM,iBAAO,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW,CAAC;AAAA,QAAG;AAAA,MAC3F;AAAA,IACF;AAEA,OAAG,UAAU,MAAM;AACjB,UAAI,CAAC,UAAU;AAAE,mBAAW;AAAM,eAAO,IAAI,MAAM,uBAAuB,CAAC;AAAA,MAAG;AAAA,IAChF;AAEA,OAAG,UAAU,MAAM;AACjB,UAAI,CAAC,UAAU;AAAE,mBAAW;AAAM,gBAAQ;AAAA,MAAG;AAAA,IAC/C;AAAA,EACF,CAAC;AACH;AAGA,eAAe,aAAa,QAAkB,YAAmC;AAC/E,MAAI,OAAO,WAAW,eAAe,OAAO,WAAW,EAAG;AAE1D,QAAM,eAAe,IAAI,aAAa;AAGtC,MAAI,aAAa;AACjB,QAAM,WAAyB,CAAC;AAChC,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAS,KAAK,KAAK;AACzB,UAAM,QAAQ,IAAI,WAAW,OAAO,MAAM;AAC1C,aAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,IAAK,OAAM,CAAC,IAAI,OAAO,WAAW,CAAC;AACtE,aAAS,KAAK,KAAK;AACnB,kBAAc,MAAM;AAAA,EACtB;AAGA,QAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,MAAI,SAAS;AACb,aAAW,SAAS,UAAU;AAAE,aAAS,IAAI,OAAO,MAAM;AAAG,cAAU,MAAM;AAAA,EAAQ;AAGrF,QAAM,aAAa,SAAS,SAAS;AACrC,QAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,QAAM,cAAc,YAAY,eAAe,CAAC;AAChD,QAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,gBAAY,CAAC,IAAI,SAAS,SAAS,IAAI,GAAG,IAAI,IAAI;AAAA,EACpD;AAGA,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,SAAS,aAAa,mBAAmB;AAC/C,WAAO,SAAS;AAChB,WAAO,QAAQ,aAAa,WAAW;AACvC,WAAO,UAAU,MAAM;AAAE,mBAAa,MAAM;AAAG,cAAQ;AAAA,IAAG;AAC1D,WAAO,MAAM,CAAC;AAAA,EAChB,CAAC;AACH;AA8BO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AAExC,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI,KAAK,KAAK,UAAU;AAAA,YACtB,MAAM;AAAA,YACN,SAAS;AAAA,cACP,MAAM;AAAA,cACN;AAAA,cACA,iBAAiB;AAAA,cACjB,aAAa;AAAA,YACf;AAAA,UACF,CAAC,CAAC;AAAA,QACJ;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAAA,QAC7B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU,WAAW;AAAA,UACvB;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAeA,OAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG,KAAK,KAAK,UAAU;AAAA,MACrB,MAAM;AAAA,MACN;AAAA,IACF,CAAC,CAAC;AAEF,eAAW,MAAM;AACf,SAAI,KAAK,KAAK,UAAU;AAAA,QACtB,MAAM;AAAA,MACR,CAAC,CAAC;AAAA,IACJ,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,UAAU,QAAkB;AACnC,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,OAAAA;AAAA,IACA;AAAA,EACF;AACF;","names":["speak"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts-client.ts"],"sourcesContent":["/**\n * TTS Realtime WebSocket Client\n *\n * WebSocket-based real-time text-to-speech synthesis with multiple voice options.\n * Built-in playback only supports PCM format.\n *\n * @example\n * ```typescript\n * const client = createTTSClient({\n * voice: \"Cherry\",\n * autoPlay: true,\n * onReady() {\n * console.log(\"TTS ready\");\n * },\n * onAudioStart() {\n * console.log(\"Playing audio\");\n * },\n * onAudioEnd() {\n * console.log(\"Playback ended\");\n * },\n * });\n *\n * await client.connect();\n * await client.speak(\"Hello, this is a test.\");\n * // client.close();\n * ```\n */\n\nconst TTS_PATH = \"/api/proxy/builtin/platform/qwen-tts/api-ws/v1/realtime\";\n\nexport interface TTSClientConfig {\n /** Get access token for WebSocket authentication */\n getAccessToken?: () => string | null;\n /** Voice name, default 'Cherry' */\n voice?: string;\n /** Auto play audio, default true */\n autoPlay?: boolean;\n /** Audio format, default 'pcm' */\n audioFormat?: \"pcm\" | \"mp3\" | \"wav\" | \"opus\";\n /** Sample rate, default 24000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when audio playback starts */\n onAudioStart?: () => void;\n /** Called when audio playback ends */\n onAudioEnd?: () => void;\n /** Called on each audio chunk received */\n onAudioChunk?: (chunk: string[]) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n}\n\nexport interface TTSClient {\n /** Connect to TTS service */\n connect(): Promise<void>;\n /** Synthesize speech from text */\n speak(text: string): Promise<void>;\n /** Play audio from chunks */\n play(): void;\n /** Stop audio playback */\n stop(): void;\n /** Close connection */\n close(): void;\n}\n\nfunction createTTSClient(config: TTSClientConfig): TTSClient {\n const {\n getAccessToken,\n voice = \"Cherry\",\n autoPlay = true,\n audioFormat = \"pcm\",\n sampleRate = 24000,\n onReady,\n onAudioStart,\n onAudioEnd,\n onAudioChunk,\n onError,\n } = config;\n\n let ws: WebSocket | null = null;\n let audioChunks: string[] = [];\n let audioContext: AudioContext | null = null;\n let audioSource: AudioBufferSourceNode | null = null;\n\n async function connect(): Promise<void> {\n return new Promise((resolve, reject) => {\n // Build WebSocket URL with optional token parameter\n let wsUrl = TTS_PATH;\n if (getAccessToken) {\n const token = getAccessToken();\n if (token) {\n const separator = wsUrl.includes(\"?\") ? \"&\" : \"?\";\n wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;\n }\n }\n\n ws = new WebSocket(wsUrl);\n\n ws.onopen = () => {};\n\n ws.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n ws!.send(\n JSON.stringify({\n type: \"session.update\",\n session: {\n mode: \"server_commit\",\n voice,\n response_format: audioFormat,\n sample_rate: sampleRate,\n },\n })\n );\n }\n\n if (data.type === \"session.updated\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"response.audio.delta\") {\n audioChunks.push(data.delta);\n onAudioChunk?.(audioChunks);\n }\n\n if (data.type === \"response.audio.done\") {\n onAudioChunk?.(audioChunks);\n if (autoPlay && typeof window !== \"undefined\") {\n playAudio();\n }\n }\n\n if (data.type === \"response.done\") {\n ws!.send(JSON.stringify({ type: \"session.finish\" }));\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"Unknown error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws.onerror = () => {\n const err = new Error(\"WebSocket connection error\");\n onError?.(err);\n reject(err);\n };\n\n ws.onclose = () => {\n ws = null;\n };\n });\n }\n\n async function speak(text: string): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n audioChunks = [];\n\n ws.send(\n JSON.stringify({\n type: \"input_text_buffer.append\",\n text,\n })\n );\n\n setTimeout(() => {\n ws!.send(\n JSON.stringify({\n type: \"input_text_buffer.commit\",\n })\n );\n }, 100);\n }\n\n function playAudio() {\n let chunks: string[] = audioChunks;\n if (typeof window === \"undefined\") return;\n\n try {\n if (!audioContext) {\n audioContext = new AudioContext();\n }\n\n onAudioStart?.();\n\n let totalBytes = 0;\n const allBytes: Uint8Array[] = [];\n\n for (const chunk of chunks) {\n const binaryString = atob(chunk);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n allBytes.push(bytes);\n totalBytes += bytes.length;\n }\n\n const combined = new Uint8Array(totalBytes);\n let offset = 0;\n for (const bytes of allBytes) {\n combined.set(bytes, offset);\n offset += bytes.length;\n }\n\n const numSamples = combined.length / 2;\n const audioBuffer = audioContext.createBuffer(1, numSamples, sampleRate);\n const channelData = audioBuffer.getChannelData(0);\n\n const dataView = new DataView(combined.buffer);\n for (let i = 0; i < numSamples; i++) {\n const int16 = dataView.getInt16(i * 2, true);\n channelData[i] = int16 / 32768.0;\n }\n\n const source = audioContext.createBufferSource();\n source.buffer = audioBuffer;\n source.connect(audioContext.destination);\n source.onended = () => onAudioEnd?.();\n source.start(0);\n audioSource = source;\n } catch (err) {\n onError?.(err as Error);\n }\n }\n\n function stopAudio() {\n if (audioSource) {\n audioSource.stop();\n audioSource = null;\n }\n if (audioContext) {\n audioContext.close();\n audioContext = null;\n }\n }\n\n function close() {\n if (ws) {\n ws.close();\n ws = null;\n }\n stopAudio();\n }\n\n return {\n connect,\n speak,\n close,\n play: playAudio,\n stop: stopAudio,\n };\n}\n\nexport default (\n authConfig: Pick<TTSClientConfig, \"getAccessToken\">\n): ((config: TTSClientConfig) => TTSClient) => {\n return (config: TTSClientConfig) => {\n return createTTSClient({ ...authConfig, ...config });\n };\n};\n"],"mappings":";AA4BA,IAAM,WAAW;AAsCjB,SAAS,gBAAgB,QAAoC;AAC3D,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR,WAAW;AAAA,IACX,cAAc;AAAA,IACd,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAwB,CAAC;AAC7B,MAAI,eAAoC;AACxC,MAAI,cAA4C;AAEhD,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AAEtC,UAAI,QAAQ;AACZ,UAAI,gBAAgB;AAClB,cAAM,QAAQ,eAAe;AAC7B,YAAI,OAAO;AACT,gBAAM,YAAY,MAAM,SAAS,GAAG,IAAI,MAAM;AAC9C,kBAAQ,GAAG,KAAK,GAAG,SAAS,SAAS,mBAAmB,KAAK,CAAC;AAAA,QAChE;AAAA,MACF;AAEA,WAAK,IAAI,UAAU,KAAK;AAExB,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,aAAI;AAAA,YACF,KAAK,UAAU;AAAA,cACb,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,MAAM;AAAA,gBACN;AAAA,gBACA,iBAAiB;AAAA,gBACjB,aAAa;AAAA,cACf;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,wBAAwB;AACxC,sBAAY,KAAK,KAAK,KAAK;AAC3B,yBAAe,WAAW;AAAA,QAC5B;AAEA,YAAI,KAAK,SAAS,uBAAuB;AACvC,yBAAe,WAAW;AAC1B,cAAI,YAAY,OAAO,WAAW,aAAa;AAC7C,sBAAU;AAAA,UACZ;AAAA,QACF;AAEA,YAAI,KAAK,SAAS,iBAAiB;AACjC,aAAI,KAAK,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC,CAAC;AAAA,QACrD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,MAAM,MAA6B;AAChD,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,kBAAc,CAAC;AAEf,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN;AAAA,MACF,CAAC;AAAA,IACH;AAEA,eAAW,MAAM;AACf,SAAI;AAAA,QACF,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF,GAAG,GAAG;AAAA,EACR;AAEA,WAAS,YAAY;AACnB,QAAI,SAAmB;AACvB,QAAI,OAAO,WAAW,YAAa;AAEnC,QAAI;AACF,UAAI,CAAC,cAAc;AACjB,uBAAe,IAAI,aAAa;AAAA,MAClC;AAEA,qBAAe;AAEf,UAAI,aAAa;AACjB,YAAM,WAAyB,CAAC;AAEhC,iBAAW,SAAS,QAAQ;AAC1B,cAAM,eAAe,KAAK,KAAK;AAC/B,cAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,iBAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,gBAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,QACtC;AACA,iBAAS,KAAK,KAAK;AACnB,sBAAc,MAAM;AAAA,MACtB;AAEA,YAAM,WAAW,IAAI,WAAW,UAAU;AAC1C,UAAI,SAAS;AACb,iBAAW,SAAS,UAAU;AAC5B,iBAAS,IAAI,OAAO,MAAM;AAC1B,kBAAU,MAAM;AAAA,MAClB;AAEA,YAAM,aAAa,SAAS,SAAS;AACrC,YAAM,cAAc,aAAa,aAAa,GAAG,YAAY,UAAU;AACvE,YAAM,cAAc,YAAY,eAAe,CAAC;AAEhD,YAAM,WAAW,IAAI,SAAS,SAAS,MAAM;AAC7C,eAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,cAAM,QAAQ,SAAS,SAAS,IAAI,GAAG,IAAI;AAC3C,oBAAY,CAAC,IAAI,QAAQ;AAAA,MAC3B;AAEA,YAAM,SAAS,aAAa,mBAAmB;AAC/C,aAAO,SAAS;AAChB,aAAO,QAAQ,aAAa,WAAW;AACvC,aAAO,UAAU,MAAM,aAAa;AACpC,aAAO,MAAM,CAAC;AACd,oBAAc;AAAA,IAChB,SAAS,KAAK;AACZ,gBAAU,GAAY;AAAA,IACxB;AAAA,EACF;AAEA,WAAS,YAAY;AACnB,QAAI,aAAa;AACf,kBAAY,KAAK;AACjB,oBAAc;AAAA,IAChB;AACA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AACA,cAAU;AAAA,EACZ;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM;AAAA,IACN,MAAM;AAAA,EACR;AACF;AAEA,IAAO,qBAAQ,CACb,eAC6C;AAC7C,SAAO,CAAC,WAA4B;AAClC,WAAO,gBAAgB,EAAE,GAAG,YAAY,GAAG,OAAO,CAAC;AAAA,EACrD;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,45 +1,50 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@amaster.ai/tts-client",
|
|
3
|
-
"version": "1.0.0-beta.
|
|
4
|
-
"description": "Qwen TTS Realtime WebSocket client with audio playback",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "./dist/index.cjs",
|
|
7
|
-
"module": "./dist/index.js",
|
|
8
|
-
"types": "./dist/index.d.ts",
|
|
9
|
-
"exports": {
|
|
10
|
-
".": {
|
|
11
|
-
"types": "./dist/index.d.ts",
|
|
12
|
-
"import": "./dist/index.js",
|
|
13
|
-
"require": "./dist/index.cjs"
|
|
14
|
-
}
|
|
15
|
-
},
|
|
16
|
-
"files": [
|
|
17
|
-
"dist",
|
|
18
|
-
"README.md"
|
|
19
|
-
],
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
},
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
|
|
45
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "@amaster.ai/tts-client",
|
|
3
|
+
"version": "1.0.0-beta.73",
|
|
4
|
+
"description": "Qwen TTS Realtime WebSocket client with audio playback",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist",
|
|
18
|
+
"README.md"
|
|
19
|
+
],
|
|
20
|
+
"keywords": [
|
|
21
|
+
"tts",
|
|
22
|
+
"text-to-speech",
|
|
23
|
+
"qwen",
|
|
24
|
+
"realtime",
|
|
25
|
+
"websocket",
|
|
26
|
+
"audio",
|
|
27
|
+
"speech-synthesis"
|
|
28
|
+
],
|
|
29
|
+
"author": "Amaster Team",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public",
|
|
33
|
+
"registry": "https://registry.npmjs.org/"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"jsdom": "^23.2.0",
|
|
37
|
+
"tsup": "^8.3.5",
|
|
38
|
+
"typescript": "~5.7.2",
|
|
39
|
+
"vitest": "^1.6.1"
|
|
40
|
+
},
|
|
41
|
+
"scripts": {
|
|
42
|
+
"build": "tsup",
|
|
43
|
+
"dev": "tsup --watch",
|
|
44
|
+
"clean": "rm -rf dist *.tsbuildinfo",
|
|
45
|
+
"type-check": "tsc --noEmit",
|
|
46
|
+
"test": "vitest run",
|
|
47
|
+
"test:watch": "vitest",
|
|
48
|
+
"test:ui": "vitest --ui"
|
|
49
|
+
}
|
|
50
|
+
}
|