@baishuyun/chat-sdk 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/chat-sdk.js +10842 -10678
- package/dist/chat-sdk.js.map +1 -1
- package/dist/chat-sdk.umd.cjs +154 -154
- package/dist/chat-sdk.umd.cjs.map +1 -1
- package/dist/index.css +1 -1
- package/package.json +1 -1
- package/src/chat.tsx +5 -0
- package/src/components/biz-comp/multi-modal-input/index.tsx +47 -0
- package/src/components/biz-comp/multi-modal-input/voice-btn.tsx +36 -0
- package/src/components/bs-ui/bs-icons.tsx +13 -0
- package/src/hooks/use-voice-input.ts +254 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import { useCallback, useRef, useState } from 'react';
|
|
2
|
+
|
|
3
|
+
export interface UseVoiceInputOptions {
|
|
4
|
+
/** ASR WebSocket 端点,如 ws://localhost:3001/web/api/asr */
|
|
5
|
+
endpoint?: string;
|
|
6
|
+
/** 收到识别文本 */
|
|
7
|
+
onText: (text: string) => void;
|
|
8
|
+
/** 识别完成 */
|
|
9
|
+
onDone: () => void;
|
|
10
|
+
/** 发生错误 */
|
|
11
|
+
onError: (msg: string) => void;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface UseVoiceInputReturn {
|
|
15
|
+
/** 是否正在录音(麦克风采集音频中) */
|
|
16
|
+
isRecording: boolean;
|
|
17
|
+
/** 是否正在连接(获取权限 / WebSocket 握手) */
|
|
18
|
+
isConnecting: boolean;
|
|
19
|
+
/** 是否正在识别(已停止录音,等待后端返回结果) */
|
|
20
|
+
isRecognizing: boolean;
|
|
21
|
+
/** 开始录音 */
|
|
22
|
+
start: () => Promise<void>;
|
|
23
|
+
/** 停止录音 */
|
|
24
|
+
stop: () => void;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* 语音输入 Hook
|
|
29
|
+
*
|
|
30
|
+
* 使用浏览器原生 Web Audio API(AudioContext + ScriptProcessorNode)
|
|
31
|
+
* 直接录制 pcm_s16le 格式音频,通过 WebSocket 发送到后端
|
|
32
|
+
* 无第三方语音识别 SDK,只与 Node 后端通信
|
|
33
|
+
*/
|
|
34
|
+
export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputReturn {
|
|
35
|
+
const { endpoint, onText, onDone, onError } = options;
|
|
36
|
+
|
|
37
|
+
const [isRecording, setIsRecording] = useState(false);
|
|
38
|
+
const [isConnecting, setIsConnecting] = useState(false);
|
|
39
|
+
const [isRecognizing, setIsRecognizing] = useState(false);
|
|
40
|
+
|
|
41
|
+
const audioContextRef = useRef<AudioContext | null>(null);
|
|
42
|
+
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
|
43
|
+
const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
|
44
|
+
const wsRef = useRef<WebSocket | null>(null);
|
|
45
|
+
const streamRef = useRef<MediaStream | null>(null);
|
|
46
|
+
|
|
47
|
+
const cleanup = useCallback(() => {
|
|
48
|
+
if (processorRef.current) {
|
|
49
|
+
try {
|
|
50
|
+
processorRef.current.disconnect();
|
|
51
|
+
} catch {
|
|
52
|
+
// ignore
|
|
53
|
+
}
|
|
54
|
+
processorRef.current = null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (sourceRef.current) {
|
|
58
|
+
try {
|
|
59
|
+
sourceRef.current.disconnect();
|
|
60
|
+
} catch {
|
|
61
|
+
// ignore
|
|
62
|
+
}
|
|
63
|
+
sourceRef.current = null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
|
|
67
|
+
try {
|
|
68
|
+
audioContextRef.current.close();
|
|
69
|
+
} catch {
|
|
70
|
+
// ignore
|
|
71
|
+
}
|
|
72
|
+
audioContextRef.current = null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (wsRef.current) {
|
|
76
|
+
try {
|
|
77
|
+
wsRef.current.close();
|
|
78
|
+
} catch {
|
|
79
|
+
// ignore
|
|
80
|
+
}
|
|
81
|
+
wsRef.current = null;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (streamRef.current) {
|
|
85
|
+
streamRef.current.getTracks().forEach((track) => track.stop());
|
|
86
|
+
streamRef.current = null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
setIsRecording(false);
|
|
90
|
+
setIsConnecting(false);
|
|
91
|
+
setIsRecognizing(false);
|
|
92
|
+
}, []);
|
|
93
|
+
|
|
94
|
+
const start = useCallback(async () => {
|
|
95
|
+
if (!endpoint) {
|
|
96
|
+
onError('ASR 服务未配置');
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// 清理之前的状态
|
|
101
|
+
cleanup();
|
|
102
|
+
|
|
103
|
+
setIsConnecting(true);
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
// 1. 获取麦克风权限
|
|
107
|
+
const stream = await navigator.mediaDevices.getUserMedia({
|
|
108
|
+
audio: {
|
|
109
|
+
sampleRate: 16000,
|
|
110
|
+
channelCount: 1,
|
|
111
|
+
echoCancellation: true,
|
|
112
|
+
noiseSuppression: true,
|
|
113
|
+
},
|
|
114
|
+
});
|
|
115
|
+
streamRef.current = stream;
|
|
116
|
+
|
|
117
|
+
// 2. 创建 AudioContext(16kHz 单声道,匹配火山引擎要求)
|
|
118
|
+
const audioContext = new AudioContext({ sampleRate: 16000 });
|
|
119
|
+
audioContextRef.current = audioContext;
|
|
120
|
+
|
|
121
|
+
const source = audioContext.createMediaStreamSource(stream);
|
|
122
|
+
sourceRef.current = source;
|
|
123
|
+
|
|
124
|
+
// 3. 创建 ScriptProcessorNode(bufferSize=2048,约 128ms @ 16kHz,符合火山引擎建议的 100-200ms)
|
|
125
|
+
const processor = audioContext.createScriptProcessor(2048, 1, 1);
|
|
126
|
+
processorRef.current = processor;
|
|
127
|
+
|
|
128
|
+
// 4. 创建 WebSocket 连接
|
|
129
|
+
const ws = new WebSocket(endpoint);
|
|
130
|
+
ws.binaryType = 'arraybuffer';
|
|
131
|
+
wsRef.current = ws;
|
|
132
|
+
|
|
133
|
+
let hasReceivedResult = false;
|
|
134
|
+
|
|
135
|
+
ws.onopen = () => {
|
|
136
|
+
// WebSocket 连接成功后,连接音频处理节点开始录音
|
|
137
|
+
source.connect(processor);
|
|
138
|
+
processor.connect(audioContext.destination);
|
|
139
|
+
|
|
140
|
+
processor.onaudioprocess = (e) => {
|
|
141
|
+
if (ws.readyState !== WebSocket.OPEN) return;
|
|
142
|
+
|
|
143
|
+
const inputData = e.inputBuffer.getChannelData(0); // Float32Array
|
|
144
|
+
const pcmData = float32ToInt16(inputData);
|
|
145
|
+
ws.send(pcmData.buffer);
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
setIsConnecting(false);
|
|
149
|
+
setIsRecording(true);
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
ws.onmessage = (event) => {
|
|
153
|
+
try {
|
|
154
|
+
const data = JSON.parse(event.data as string);
|
|
155
|
+
if (data.type === 'result' || data.type === 'final') {
|
|
156
|
+
hasReceivedResult = true;
|
|
157
|
+
onText(data.text || '');
|
|
158
|
+
}
|
|
159
|
+
if (data.type === 'done') {
|
|
160
|
+
setIsRecognizing(false);
|
|
161
|
+
onDone();
|
|
162
|
+
}
|
|
163
|
+
if (data.type === 'error') {
|
|
164
|
+
setIsRecognizing(false);
|
|
165
|
+
onError(data.message || '语音识别出错');
|
|
166
|
+
}
|
|
167
|
+
} catch {
|
|
168
|
+
// 忽略非 JSON 消息
|
|
169
|
+
}
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
ws.onerror = () => {
|
|
173
|
+
onError('语音识别连接失败');
|
|
174
|
+
cleanup();
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
ws.onclose = () => {
|
|
178
|
+
// 如果已经收到过结果但还没收到 done,可能是连接异常关闭
|
|
179
|
+
if (hasReceivedResult) {
|
|
180
|
+
setIsRecognizing(false);
|
|
181
|
+
}
|
|
182
|
+
setIsRecording(false);
|
|
183
|
+
setIsConnecting(false);
|
|
184
|
+
};
|
|
185
|
+
} catch (error) {
|
|
186
|
+
setIsConnecting(false);
|
|
187
|
+
const msg = error instanceof Error ? error.message : '录音启动失败';
|
|
188
|
+
if (msg.includes('Permission denied') || msg.includes('NotAllowedError')) {
|
|
189
|
+
onError('麦克风权限被拒绝');
|
|
190
|
+
} else if (msg.includes('NotFoundError')) {
|
|
191
|
+
onError('未找到麦克风设备');
|
|
192
|
+
} else {
|
|
193
|
+
onError(msg);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}, [endpoint, onText, onDone, onError, cleanup]);
|
|
197
|
+
|
|
198
|
+
const stop = useCallback(() => {
|
|
199
|
+
// 断开音频处理节点,停止录音
|
|
200
|
+
if (processorRef.current) {
|
|
201
|
+
processorRef.current.onaudioprocess = null;
|
|
202
|
+
try {
|
|
203
|
+
processorRef.current.disconnect();
|
|
204
|
+
} catch {
|
|
205
|
+
// ignore
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (sourceRef.current) {
|
|
210
|
+
try {
|
|
211
|
+
sourceRef.current.disconnect();
|
|
212
|
+
} catch {
|
|
213
|
+
// ignore
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
|
|
218
|
+
try {
|
|
219
|
+
audioContextRef.current.close();
|
|
220
|
+
} catch {
|
|
221
|
+
// ignore
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// 停止麦克风轨道
|
|
226
|
+
if (streamRef.current) {
|
|
227
|
+
streamRef.current.getTracks().forEach((track) => track.stop());
|
|
228
|
+
streamRef.current = null;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// 发送结束信号
|
|
232
|
+
if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
|
|
233
|
+
wsRef.current.send('end');
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// 停止录音,进入识别中状态
|
|
237
|
+
setIsRecording(false);
|
|
238
|
+
setIsRecognizing(true);
|
|
239
|
+
}, []);
|
|
240
|
+
|
|
241
|
+
return { isRecording, isConnecting, isRecognizing, start, stop };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Float32Array (-1.0 ~ 1.0) 转换为 Int16Array (pcm_s16le)
|
|
246
|
+
*/
|
|
247
|
+
function float32ToInt16(float32Array: Float32Array): Int16Array {
|
|
248
|
+
const int16Array = new Int16Array(float32Array.length);
|
|
249
|
+
for (let i = 0; i < float32Array.length; i++) {
|
|
250
|
+
const s = Math.max(-1, Math.min(1, float32Array[i]));
|
|
251
|
+
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
|
252
|
+
}
|
|
253
|
+
return int16Array;
|
|
254
|
+
}
|