@baishuyun/chat-sdk 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,254 @@
1
+ import { useCallback, useRef, useState } from 'react';
2
+
3
+ export interface UseVoiceInputOptions {
4
+ /** ASR WebSocket 端点,如 ws://localhost:3001/web/api/asr */
5
+ endpoint?: string;
6
+ /** 收到识别文本 */
7
+ onText: (text: string) => void;
8
+ /** 识别完成 */
9
+ onDone: () => void;
10
+ /** 发生错误 */
11
+ onError: (msg: string) => void;
12
+ }
13
+
14
+ export interface UseVoiceInputReturn {
15
+ /** 是否正在录音(麦克风采集音频中) */
16
+ isRecording: boolean;
17
+ /** 是否正在连接(获取权限 / WebSocket 握手) */
18
+ isConnecting: boolean;
19
+ /** 是否正在识别(已停止录音,等待后端返回结果) */
20
+ isRecognizing: boolean;
21
+ /** 开始录音 */
22
+ start: () => Promise<void>;
23
+ /** 停止录音 */
24
+ stop: () => void;
25
+ }
26
+
27
+ /**
28
+ * 语音输入 Hook
29
+ *
30
+ * 使用浏览器原生 Web Audio API(AudioContext + ScriptProcessorNode)
31
+ * 直接录制 pcm_s16le 格式音频,通过 WebSocket 发送到后端
32
+ * 无第三方语音识别 SDK,只与 Node 后端通信
33
+ */
34
+ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputReturn {
35
+ const { endpoint, onText, onDone, onError } = options;
36
+
37
+ const [isRecording, setIsRecording] = useState(false);
38
+ const [isConnecting, setIsConnecting] = useState(false);
39
+ const [isRecognizing, setIsRecognizing] = useState(false);
40
+
41
+ const audioContextRef = useRef<AudioContext | null>(null);
42
+ const processorRef = useRef<ScriptProcessorNode | null>(null);
43
+ const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
44
+ const wsRef = useRef<WebSocket | null>(null);
45
+ const streamRef = useRef<MediaStream | null>(null);
46
+
47
+ const cleanup = useCallback(() => {
48
+ if (processorRef.current) {
49
+ try {
50
+ processorRef.current.disconnect();
51
+ } catch {
52
+ // ignore
53
+ }
54
+ processorRef.current = null;
55
+ }
56
+
57
+ if (sourceRef.current) {
58
+ try {
59
+ sourceRef.current.disconnect();
60
+ } catch {
61
+ // ignore
62
+ }
63
+ sourceRef.current = null;
64
+ }
65
+
66
+ if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
67
+ try {
68
+ audioContextRef.current.close();
69
+ } catch {
70
+ // ignore
71
+ }
72
+ audioContextRef.current = null;
73
+ }
74
+
75
+ if (wsRef.current) {
76
+ try {
77
+ wsRef.current.close();
78
+ } catch {
79
+ // ignore
80
+ }
81
+ wsRef.current = null;
82
+ }
83
+
84
+ if (streamRef.current) {
85
+ streamRef.current.getTracks().forEach((track) => track.stop());
86
+ streamRef.current = null;
87
+ }
88
+
89
+ setIsRecording(false);
90
+ setIsConnecting(false);
91
+ setIsRecognizing(false);
92
+ }, []);
93
+
94
+ const start = useCallback(async () => {
95
+ if (!endpoint) {
96
+ onError('ASR 服务未配置');
97
+ return;
98
+ }
99
+
100
+ // 清理之前的状态
101
+ cleanup();
102
+
103
+ setIsConnecting(true);
104
+
105
+ try {
106
+ // 1. 获取麦克风权限
107
+ const stream = await navigator.mediaDevices.getUserMedia({
108
+ audio: {
109
+ sampleRate: 16000,
110
+ channelCount: 1,
111
+ echoCancellation: true,
112
+ noiseSuppression: true,
113
+ },
114
+ });
115
+ streamRef.current = stream;
116
+
117
+ // 2. 创建 AudioContext(16kHz 单声道,匹配火山引擎要求)
118
+ const audioContext = new AudioContext({ sampleRate: 16000 });
119
+ audioContextRef.current = audioContext;
120
+
121
+ const source = audioContext.createMediaStreamSource(stream);
122
+ sourceRef.current = source;
123
+
124
+ // 3. 创建 ScriptProcessorNode(bufferSize=2048,约 128ms @ 16kHz,符合火山引擎建议的 100-200ms)
125
+ const processor = audioContext.createScriptProcessor(2048, 1, 1);
126
+ processorRef.current = processor;
127
+
128
+ // 4. 创建 WebSocket 连接
129
+ const ws = new WebSocket(endpoint);
130
+ ws.binaryType = 'arraybuffer';
131
+ wsRef.current = ws;
132
+
133
+ let hasReceivedResult = false;
134
+
135
+ ws.onopen = () => {
136
+ // WebSocket 连接成功后,连接音频处理节点开始录音
137
+ source.connect(processor);
138
+ processor.connect(audioContext.destination);
139
+
140
+ processor.onaudioprocess = (e) => {
141
+ if (ws.readyState !== WebSocket.OPEN) return;
142
+
143
+ const inputData = e.inputBuffer.getChannelData(0); // Float32Array
144
+ const pcmData = float32ToInt16(inputData);
145
+ ws.send(pcmData.buffer);
146
+ };
147
+
148
+ setIsConnecting(false);
149
+ setIsRecording(true);
150
+ };
151
+
152
+ ws.onmessage = (event) => {
153
+ try {
154
+ const data = JSON.parse(event.data as string);
155
+ if (data.type === 'result' || data.type === 'final') {
156
+ hasReceivedResult = true;
157
+ onText(data.text || '');
158
+ }
159
+ if (data.type === 'done') {
160
+ setIsRecognizing(false);
161
+ onDone();
162
+ }
163
+ if (data.type === 'error') {
164
+ setIsRecognizing(false);
165
+ onError(data.message || '语音识别出错');
166
+ }
167
+ } catch {
168
+ // 忽略非 JSON 消息
169
+ }
170
+ };
171
+
172
+ ws.onerror = () => {
173
+ onError('语音识别连接失败');
174
+ cleanup();
175
+ };
176
+
177
+ ws.onclose = () => {
178
+ // 如果已经收到过结果但还没收到 done,可能是连接异常关闭
179
+ if (hasReceivedResult) {
180
+ setIsRecognizing(false);
181
+ }
182
+ setIsRecording(false);
183
+ setIsConnecting(false);
184
+ };
185
+ } catch (error) {
186
+ setIsConnecting(false);
187
+ const msg = error instanceof Error ? error.message : '录音启动失败';
188
+ if (msg.includes('Permission denied') || msg.includes('NotAllowedError')) {
189
+ onError('麦克风权限被拒绝');
190
+ } else if (msg.includes('NotFoundError')) {
191
+ onError('未找到麦克风设备');
192
+ } else {
193
+ onError(msg);
194
+ }
195
+ }
196
+ }, [endpoint, onText, onDone, onError, cleanup]);
197
+
198
+ const stop = useCallback(() => {
199
+ // 断开音频处理节点,停止录音
200
+ if (processorRef.current) {
201
+ processorRef.current.onaudioprocess = null;
202
+ try {
203
+ processorRef.current.disconnect();
204
+ } catch {
205
+ // ignore
206
+ }
207
+ }
208
+
209
+ if (sourceRef.current) {
210
+ try {
211
+ sourceRef.current.disconnect();
212
+ } catch {
213
+ // ignore
214
+ }
215
+ }
216
+
217
+ if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
218
+ try {
219
+ audioContextRef.current.close();
220
+ } catch {
221
+ // ignore
222
+ }
223
+ }
224
+
225
+ // 停止麦克风轨道
226
+ if (streamRef.current) {
227
+ streamRef.current.getTracks().forEach((track) => track.stop());
228
+ streamRef.current = null;
229
+ }
230
+
231
+ // 发送结束信号
232
+ if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
233
+ wsRef.current.send('end');
234
+ }
235
+
236
+ // 停止录音,进入识别中状态
237
+ setIsRecording(false);
238
+ setIsRecognizing(true);
239
+ }, []);
240
+
241
+ return { isRecording, isConnecting, isRecognizing, start, stop };
242
+ }
243
+
244
+ /**
245
+ * Float32Array (-1.0 ~ 1.0) 转换为 Int16Array (pcm_s16le)
246
+ */
247
+ function float32ToInt16(float32Array: Float32Array): Int16Array {
248
+ const int16Array = new Int16Array(float32Array.length);
249
+ for (let i = 0; i < float32Array.length; i++) {
250
+ const s = Math.max(-1, Math.min(1, float32Array[i]));
251
+ int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
252
+ }
253
+ return int16Array;
254
+ }
package/src/lib/utils.ts CHANGED
@@ -419,17 +419,87 @@ export const formatJSONIfValid = (str: string): string | null => {
419
419
  }
420
420
  };
421
421
 
422
+ const MIME_MAP: Record<string, string> = {
423
+ // 文档
424
+ xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
425
+ xls: 'application/vnd.ms-excel',
426
+ docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
427
+ pdf: 'application/pdf',
428
+ txt: 'text/plain',
429
+ // 图片
430
+ png: 'image/png',
431
+ jpg: 'image/jpeg',
432
+ jpeg: 'image/jpeg',
433
+ gif: 'image/gif',
434
+ webp: 'image/webp',
435
+ svg: 'image/svg+xml',
436
+ // 音视频
437
+ mp4: 'video/mp4',
438
+ mp3: 'audio/mpeg',
439
+ // 压缩包
440
+ zip: 'application/zip',
441
+ rar: 'application/x-rar-compressed',
442
+ '7z': 'application/x-7z-compressed',
443
+ // 代码/数据
444
+ json: 'application/json',
445
+ js: 'application/javascript',
446
+ ts: 'application/typescript',
447
+ csv: 'text/csv',
448
+ xml: 'application/xml',
449
+ // 默认回退
450
+ default: 'application/octet-stream',
451
+ };
452
+
453
+ const getMimeType = (filename: string, serverType: string | null): string => {
454
+ // 如果服务器给了具体类型(不是 octet-stream),就信任它
455
+ if (serverType && serverType !== 'application/octet-stream') {
456
+ return serverType;
457
+ }
458
+
459
+ // 否则从扩展名推断
460
+ const ext = filename.split('.').pop()?.toLowerCase() || '';
461
+ return MIME_MAP[ext] || MIME_MAP.default;
462
+ };
463
+
422
464
  /**
423
465
  * 从 URL 下载文件,返回 File 对象
424
466
  */
425
467
  export const downloadFile = async (url: string): Promise<File> => {
426
- const response = await fetch(url);
468
+ const response = await fetch(url, {
469
+ credentials: 'include', // 必须携带,因为 Allow-Credentials: true
470
+ mode: 'cors',
471
+ });
472
+
427
473
  if (!response.ok) {
428
- throw new Error(`Failed to download file from ${url}: ${response.status}`);
474
+ throw new Error(`Failed to download: ${response.status}`);
429
475
  }
476
+
430
477
  const blob = await response.blob();
431
- const contentType = response.headers.get('content-type') || 'application/octet-stream';
432
- const filename = new URL(url).pathname.split('/').pop() || 'attachment';
478
+
479
+ // 1. Content-Disposition 解析文件名(优先)
480
+ const disposition = response.headers.get('content-disposition');
481
+ let filename = 'attachment';
482
+
483
+ if (disposition) {
484
+ // 尝试 filename*=UTF-8'' 格式(RFC 5987)
485
+ const utf8Match = disposition.match(/filename\*=UTF-8''([^;]+)/i);
486
+ // 尝试 filename="..." 或 filename=... 格式(需要解码 MinIO 的 URL 编码)
487
+ const asciiMatch = disposition.match(/filename=["']?([^"';]+)["']?/i);
488
+
489
+ if (utf8Match) {
490
+ filename = decodeURIComponent(utf8Match[1]);
491
+ } else if (asciiMatch) {
492
+ // MinIO 特殊处理:filename 可能是 URL 编码的
493
+ filename = decodeURIComponent(asciiMatch[1]);
494
+ }
495
+ }
496
+
497
+ // 2. 修正 MIME 类型(如果服务器返回的是 octet-stream)
498
+ let contentType = response.headers.get('content-type') || 'application/octet-stream';
499
+ if (contentType === 'application/octet-stream') {
500
+ contentType = getMimeType(filename, contentType);
501
+ }
502
+
433
503
  return new File([blob], filename, { type: contentType });
434
504
  };
435
505
 
@@ -468,12 +538,18 @@ export const urls2fileParts = async (attachmentsUrl: string[], uploadEndpoint: s
468
538
  return [];
469
539
  }
470
540
 
471
- const results = await Promise.all(
472
- attachmentsUrl.map(async (url) => {
473
- const file = await downloadFile(url);
474
- return uploadFile(file, uploadEndpoint);
475
- })
476
- );
541
+ let results: (Attachment | undefined)[];
542
+ try {
543
+ results = await Promise.all(
544
+ attachmentsUrl.map(async (url) => {
545
+ const file = await downloadFile(url);
546
+ return uploadFile(file, uploadEndpoint);
547
+ })
548
+ );
549
+ } catch (error) {
550
+ console.error('Error processing attachments:', error);
551
+ return [];
552
+ }
477
553
 
478
554
  return results
479
555
  .filter((attachment): attachment is Attachment => attachment !== undefined)
package/src/sdk.impl.tsx CHANGED
@@ -133,16 +133,19 @@ export class ChatSDK implements IChatSDK {
133
133
  let fileParts: ChatMessage['parts'] = await urls2fileParts(attachmentsUrl || [], uploadEndpoint);
134
134
  this.Store.getState().clearGlobalFakeLoadingMessage();
135
135
 
136
+ const textParts = text.trim() ? [{ type: 'text' as const, text }] : [];
137
+
136
138
  const msg = fileParts.length > 0
137
139
  ? {
138
140
  role: 'user',
139
141
  parts: [
140
142
  ...fileParts,
141
- { type: 'text' as const, text },
143
+ ...textParts,
142
144
  ],
143
145
  } as ChatMessage
144
146
  : { text };
145
147
 
148
+
146
149
  this._sendMessage?.(
147
150
  msg,
148
151
  {