npm - @apteva/apteva-kit - Versions diffs - 0.1.137 → 0.1.138 - Mend

@apteva/apteva-kit 0.1.137 → 0.1.138

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.mjs CHANGED Viewed

@@ -4457,22 +4457,6 @@ function base64ToFloat32(base64) {
   }
   return float32Array;
 }
-function resampleAudio(inputData, inputSampleRate, outputSampleRate) {
-  if (inputSampleRate === outputSampleRate) {
-    return inputData;
-  }
-  const ratio = inputSampleRate / outputSampleRate;
-  const outputLength = Math.floor(inputData.length / ratio);
-  const output = new Float32Array(outputLength);
-  for (let i = 0; i < outputLength; i++) {
-    const srcIndex = i * ratio;
-    const srcIndexFloor = Math.floor(srcIndex);
-    const srcIndexCeil = Math.min(srcIndexFloor + 1, inputData.length - 1);
-    const t = srcIndex - srcIndexFloor;
-    output[i] = inputData[srcIndexFloor] * (1 - t) + inputData[srcIndexCeil] * t;
-  }
-  return output;
-}
 // src/hooks/useVoiceSession.ts
 function useVoiceSession(config) {
@@ -4491,8 +4475,10 @@ function useVoiceSession(config) {
   const mutedRef = useRef9(false);
   const configRef = useRef9(config);
   configRef.current = config;
-  const agentSpeakingRef = useRef9(false);
-  const agentSpeakingTimeoutRef = useRef9(null);
+  const activeSourcesRef = useRef9([]);
+  const responseStartTimeRef = useRef9(0);
+  const totalAudioDurationMsRef = useRef9(0);
+  const interruptedRef = useRef9(false);
   const cleanup = useCallback4(() => {
     if (durationIntervalRef.current) {
       clearInterval(durationIntervalRef.current);
@@ -4529,15 +4515,26 @@ function useVoiceSession(config) {
     }
     nextPlayTimeRef.current = 0;
     mutedRef.current = false;
-    agentSpeakingRef.current = false;
-    if (agentSpeakingTimeoutRef.current) {
-      clearTimeout(agentSpeakingTimeoutRef.current);
-      agentSpeakingTimeoutRef.current = null;
-    }
+    activeSourcesRef.current = [];
+    responseStartTimeRef.current = 0;
+    totalAudioDurationMsRef.current = 0;
+    interruptedRef.current = false;
     setMuted(false);
     setPartialTranscript("");
     setDuration(0);
   }, []);
+  const resetPlayback = useCallback4(() => {
+    activeSourcesRef.current.forEach((source) => {
+      try {
+        source.stop();
+      } catch (_) {
+      }
+    });
+    activeSourcesRef.current = [];
+    nextPlayTimeRef.current = 0;
+    responseStartTimeRef.current = 0;
+    totalAudioDurationMsRef.current = 0;
+  }, []);
   useEffect9(() => {
     return () => {
       cleanup();
@@ -4557,18 +4554,18 @@ function useVoiceSession(config) {
     const source = ctx.createBufferSource();
     source.buffer = audioBuffer;
     source.connect(ctx.destination);
+    activeSourcesRef.current.push(source);
+    source.onended = () => {
+      activeSourcesRef.current = activeSourcesRef.current.filter((s) => s !== source);
+    };
     const currentTime = ctx.currentTime;
     const startTime = Math.max(currentTime, nextPlayTimeRef.current);
     source.start(startTime);
     nextPlayTimeRef.current = startTime + audioBuffer.duration;
-    agentSpeakingRef.current = true;
-    if (agentSpeakingTimeoutRef.current) {
-      clearTimeout(agentSpeakingTimeoutRef.current);
-    }
-    const remainingMs = (nextPlayTimeRef.current - currentTime) * 1e3 + 150;
-    agentSpeakingTimeoutRef.current = setTimeout(() => {
-      agentSpeakingRef.current = false;
-    }, remainingMs);
+    if (responseStartTimeRef.current === 0) {
+      responseStartTimeRef.current = startTime;
+    }
+    totalAudioDurationMsRef.current += Math.floor(audioBuffer.duration * 1e3);
   }, []);
   const startCaptureRef = useRef9(() => {
   });
@@ -4584,10 +4581,43 @@ function useVoiceSession(config) {
         startCaptureRef.current();
         break;
       case "audio_delta":
+        if (interruptedRef.current) break;
         if (msg.data?.chunk) {
           playAudioChunk(msg.data.chunk);
         }
         break;
+      case "audio_complete":
+        interruptedRef.current = false;
+        break;
+      case "audio_interrupt": {
+        if (activeSourcesRef.current.length === 0) break;
+        let audioEndMs = 0;
+        if (playbackCtxRef.current && responseStartTimeRef.current > 0) {
+          const elapsedMs = Math.max(0, Math.floor(
+            (playbackCtxRef.current.currentTime - responseStartTimeRef.current) * 1e3
+          ));
+          audioEndMs = Math.min(elapsedMs, totalAudioDurationMsRef.current);
+        }
+        const itemId = msg.data?.item_id;
+        const contentIndex = msg.data?.content_index || 0;
+        resetPlayback();
+        if (itemId) {
+          interruptedRef.current = true;
+        }
+        const ws = wsRef.current;
+        if (ws && ws.readyState === WebSocket.OPEN && itemId) {
+          ws.send(JSON.stringify({
+            type: "control",
+            data: {
+              action: "truncate",
+              item_id: itemId,
+              content_index: contentIndex,
+              audio_end_ms: audioEndMs
+            }
+          }));
+        }
+        break;
+      }
       case "transcript":
         if (msg.data) {
           if (msg.data.partial) {
@@ -4606,7 +4636,7 @@ function useVoiceSession(config) {
         break;
       case "tool_call":
         if (msg.data) {
-          nextPlayTimeRef.current = 0;
+          resetPlayback();
           cfg.onTranscript?.({
             id: `vt-tool-${Date.now()}`,
             role: "system",
@@ -4618,24 +4648,48 @@ function useVoiceSession(config) {
         break;
       case "tool_result":
         if (msg.data) {
-          nextPlayTimeRef.current = 0;
+          const status = msg.data.error ? "failed" : "completed";
+          cfg.onTranscript?.({
+            id: `vt-toolresult-${Date.now()}`,
+            role: "system",
+            content: `Tool ${status}: ${msg.data.name || msg.data.call_id}`,
+            partial: false,
+            timestamp: /* @__PURE__ */ new Date()
+          });
         }
         break;
+      case "turn_end":
+        interruptedRef.current = false;
+        break;
       case "error":
         setState("error");
         cfg.onError?.(new Error(msg.data?.message || "Voice session error"));
         break;
     }
-  }, [playAudioChunk]);
+  }, [playAudioChunk, resetPlayback]);
   const startCapture = useCallback4(async () => {
     const ws = wsRef.current;
     if (!ws) return;
+    if (processorRef.current) {
+      processorRef.current.disconnect();
+      processorRef.current = null;
+    }
+    if (mediaStreamRef.current) {
+      mediaStreamRef.current.getTracks().forEach((t) => t.stop());
+      mediaStreamRef.current = null;
+    }
+    if (captureCtxRef.current) {
+      try {
+        captureCtxRef.current.close();
+      } catch (_) {
+      }
+      captureCtxRef.current = null;
+    }
     try {
-      captureCtxRef.current = new AudioContext();
+      captureCtxRef.current = new AudioContext({ sampleRate: 24e3 });
       if (captureCtxRef.current.state === "suspended") {
         await captureCtxRef.current.resume();
       }
-      const nativeSampleRate = captureCtxRef.current.sampleRate;
       mediaStreamRef.current = await navigator.mediaDevices.getUserMedia({
         audio: {
           echoCancellation: true,
@@ -4644,27 +4698,28 @@ function useVoiceSession(config) {
         }
       });
       const source = captureCtxRef.current.createMediaStreamSource(mediaStreamRef.current);
-      processorRef.current = captureCtxRef.current.createScriptProcessor(2048, 1, 1);
+      processorRef.current = captureCtxRef.current.createScriptProcessor(4096, 1, 1);
       processorRef.current.onaudioprocess = (e) => {
         if (!ws || ws.readyState !== WebSocket.OPEN) return;
         if (mutedRef.current) return;
-        if (agentSpeakingRef.current) return;
         const inputData = e.inputBuffer.getChannelData(0);
-        const resampledData = resampleAudio(inputData, nativeSampleRate, 16e3);
-        const int16Data = float32ToInt16(resampledData);
+        const int16Data = float32ToInt16(inputData);
         const base64Data = int16ToBase64(int16Data);
         ws.send(JSON.stringify({
           type: "audio",
-          data: { chunk: base64Data }
+          data: { chunk: base64Data, sample_rate: 24e3 }
         }));
       };
       source.connect(processorRef.current);
-      processorRef.current.connect(captureCtxRef.current.destination);
+      const silentGain = captureCtxRef.current.createGain();
+      silentGain.gain.value = 0;
+      processorRef.current.connect(silentGain);
+      silentGain.connect(captureCtxRef.current.destination);
     } catch (e) {
       console.warn("Microphone access denied:", e);
       configRef.current.onError?.(new Error("Microphone access denied"));
     }
-  }, [cleanup]);
+  }, []);
   startCaptureRef.current = startCapture;
   const start = useCallback4(() => {
     if (state !== "idle") return;
@@ -4710,7 +4765,7 @@ function useVoiceSession(config) {
       cleanup();
       setState("idle");
     };
-  }, [state, config.apiUrl, handleMessage, cleanup]);
+  }, [state, config.apiUrl, config.apiKey, handleMessage, cleanup]);
   const stop = useCallback4(() => {
     cleanup();
     setState("idle");