yt-chat-components 1.1.9 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yt-chat-components",
3
- "version": "1.1.9",
3
+ "version": "1.2.1",
4
4
  "main": "build/static/js/bundle.min.js",
5
5
  "module": "build/static/js/bundle.min.js",
6
6
  "types": "build/static/js/index.d.ts",
@@ -42,9 +42,10 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
42
42
 
43
43
  // 添加VAD相关状态
44
44
  const isSpeakingRef = useRef<boolean>(false);
45
- const [isListening, setIsListening] = useState(false);
46
45
  const SILENCE_THRESHOLD = 35; // 静音阈值,可根据环境调整
47
46
  const SPEECH_DELAY = 1500; // 停止说话多久后认为一段话结束(毫秒)
47
+ // 发送信息后持有这个id,当id不为null放弃处理后续说话
48
+ const msgIdRef = useRef<string | null>(null);
48
49
 
49
50
  // WebRTC相关状态和引用
50
51
  const peerConnectionRef = useRef<RTCPeerConnection | null>(null);
@@ -61,7 +62,8 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
61
62
  const speechTimeoutRef = useRef<NodeJS.Timeout | null>(null);
62
63
  const vadProcessorRef = useRef<ScriptProcessorNode | null>(null);
63
64
  const audioContextRef = useRef<AudioContext | null>(null);
64
-
65
+ const [isSpeakingNow,setIsSpeakingNow] = useState(false);
66
+
65
67
 
66
68
  // 添加统一的WebSocket消息发送方法
67
69
  const sendWebSocketMessage = (type: string, data: any) => {
@@ -73,7 +75,6 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
73
75
  };
74
76
 
75
77
  const handleAudioData = (audioData: string) => {
76
- setIsListening(true);
77
78
  setWorkStatus('正在回复');
78
79
 
79
80
  // 将base64音频转换为Blob
@@ -90,33 +91,24 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
90
91
  const audio = new Audio(audioUrl);
91
92
  responseAudio = audio;
92
93
 
93
- // 监听播放结束事件
94
- audio.onended = () => {
95
- URL.revokeObjectURL(audioUrl);
96
- setIsListening(false);
97
- setWorkStatus('正在聆听');
98
- };
99
-
100
94
  // 监听用户打断
101
95
  audio.onplay = () => {
102
- // 设置打断检测
96
+ // 设置打断检测,3秒后才启动
103
97
  const interruptionCheck = setInterval(() => {
104
98
  if (isSpeakingRef.current) {
105
99
  // 用户开始说话,打断播放
106
- console.log('用户打断播放');
100
+ // console.log('用户打断播放');
107
101
  audio.pause();
108
102
  URL.revokeObjectURL(audioUrl);
109
- setIsListening(false);
110
- setWorkStatus('正在聆听');
111
103
  clearInterval(interruptionCheck);
104
+ setWorkStatus('正在聆听');
112
105
  }
113
- }, 100);
106
+ }, 3000);
114
107
 
115
108
  // 播放结束时清除检测
116
109
  audio.onended = () => {
117
110
  clearInterval(interruptionCheck);
118
111
  URL.revokeObjectURL(audioUrl);
119
- setIsListening(false);
120
112
  setWorkStatus('正在聆听');
121
113
  };
122
114
  };
@@ -124,7 +116,6 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
124
116
  // 开始播放
125
117
  audio.play().catch(err => {
126
118
  console.error('播放音频失败:', err);
127
- setIsListening(false);
128
119
  setWorkStatus('正在聆听');
129
120
  });
130
121
  };
@@ -145,7 +136,7 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
145
136
  const {code} = userInfo;
146
137
  const opeartorId = code || sessionId;
147
138
  // 创建WebSocket连接
148
- console.log('开始连接WebSocket', flowId, sessionId, api_key);
139
+ console.log('开始连接WebSocket');
149
140
  const ws = new WebSocket(`${hostUrl.replace('http', 'ws')}/api/v1/ws/webrtc?flow_id=${flowId}&session_id=${sessionId}&api_key=${api_key}&operator_id=${opeartorId}`);
150
141
  wsRef.current = ws;
151
142
 
@@ -210,7 +201,14 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
210
201
  vadProcessorRef.current = vadProcessor;
211
202
 
212
203
  vadProcessor.onaudioprocess = (e) => {
213
- if (isListening) return; // 正在听对方说话,不处理
204
+ // 正在听对方说话 or 思考,不处理
205
+ if (workStatus === '正在回复'){
206
+ console.log("--------------------- 正在回复")
207
+ return
208
+ }else if(msgIdRef.current !== null){
209
+ console.log("--------------------- 正在思考")
210
+ return;
211
+ }
214
212
 
215
213
  analyser.getByteFrequencyData(dataArray);
216
214
 
@@ -223,12 +221,12 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
223
221
 
224
222
  // 检测是否有语音
225
223
  const isSpeakingNow = average > SILENCE_THRESHOLD;
226
-
224
+
227
225
  if (isSpeakingNow && !isSpeakingRef.current) {
226
+ setIsSpeakingNow(true)
228
227
  // 开始说话
229
- console.log('开始说话', isSpeakingRef.current);
230
228
  isSpeakingRef.current = true;
231
-
229
+
232
230
  // 开始录制
233
231
  if (!recorderRef.current && localStreamRef.current) {
234
232
  console.log('准备录制');
@@ -257,21 +255,20 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
257
255
  });
258
256
  recorder.start();
259
257
  }
260
-
258
+
261
259
  // 清除之前的超时
262
260
  if (speechTimeoutRef.current) {
263
261
  clearTimeout(speechTimeoutRef.current);
264
262
  speechTimeoutRef.current = null;
265
263
  }
266
- } else if (!isSpeakingNow && isSpeakingRef.current) {
267
- console.log('准备停止说话');
264
+ }
265
+ else if (!isSpeakingNow && isSpeakingRef.current) {
268
266
  // 可能停止说话,设置超时
269
267
  if (!speechTimeoutRef.current) {
270
268
  speechTimeoutRef.current = setTimeout(() => {
271
269
  // 确认停止说话
272
- console.log('停止说话');
273
270
  isSpeakingRef.current = false;
274
-
271
+
275
272
  // 停止录制
276
273
  if (recorderRef.current) {
277
274
  recorderRef.current.stop((blob: Blob, duration:any, mime:any) => {
@@ -281,16 +278,15 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
281
278
  reader.readAsDataURL(blob);
282
279
  reader.onloadend = () => {
283
280
  const base64Audio = reader.result as string;
284
- console.log('准备发送');
285
281
  // 发送完整的语音片段到服务器
286
282
  if (wsRef.current) {
287
- console.log('发送数据');
283
+ msgIdRef.current = new Date().getTime() + ""
288
284
  sendWebSocketMessage('complete-audio', {
289
285
  audioData: base64Audio.split(',')[1], // 移除data URL前缀
290
286
  userMessage: true,
291
287
  audioFormat:"mp3",
292
288
  sampleRate:16000,
293
- voice:"longxiang"
289
+ msgId: msgIdRef.current,
294
290
  });
295
291
  console.log('发送完毕');
296
292
 
@@ -301,9 +297,8 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
301
297
  // timestamp: new Date().toISOString()
302
298
  // });
303
299
 
304
- // 设置为正在听模式
305
- setIsListening(true);
306
300
  setWorkStatus('正在思考');
301
+ setIsSpeakingNow(false)
307
302
  }
308
303
  };
309
304
 
@@ -316,11 +311,12 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
316
311
  recorderRef.current=null;
317
312
  });
318
313
  }
319
-
314
+
320
315
  speechTimeoutRef.current = null;
321
316
  }, SPEECH_DELAY);
322
317
  }
323
- } else if (isSpeakingNow && isSpeakingRef.current) {
318
+ }
319
+ else if (isSpeakingNow && isSpeakingRef.current) {
324
320
  console.log('继续说话');
325
321
  // 继续说话,重置超时
326
322
  if (speechTimeoutRef.current) {
@@ -352,11 +348,12 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
352
348
  }
353
349
  } else if (message.type === 'complete-audio') {
354
350
  // 处理从后端返回的完整音频响应
351
+ msgIdRef.current = null
355
352
  if (message.audioData) {
356
353
  handleAudioData(message.audioData);
354
+ }else {
355
+ setWorkStatus('正在聆听');
357
356
  }
358
- } else if (message.type === 'busy') {
359
- message.info('对方正在思考,请稍等');
360
357
  }
361
358
  };
362
359
 
@@ -520,8 +517,8 @@ const CallInterface: React.FC<CallInterfaceProps> = ({
520
517
  {/* <div className='recwave' style={{width: 120, height: 50, display: workStatus === '正在聆听' ? 'block' : 'none'}}></div>*/}
521
518
  {/* }*/}
522
519
  {/*</div>*/}
523
- <div className='recwave' style={{width: 120, height: 50}}>{
524
- workStatus === '正在聆听'|| workStatus === '正在回复' ? <img src={ai_call_working}/>:<img src={ai_call_thinking}/>
520
+ <div className='recwave' style={{width: 120, height: 65}}>{
521
+ (isSpeakingNow && workStatus === '正在聆听')|| workStatus === '正在回复' ? <img src={ai_call_working}/>:<img src={ai_call_thinking}/>
525
522
  }</div>
526
523
  <p className="call-status">
527
524
  {workStatus}