npm - @ottocode/web-sdk - Versions diffs - 0.1.315 → 0.1.316 - Mend

@ottocode/web-sdk 0.1.315 → 0.1.316

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/components/index.js +70 -33
package/dist/components/index.js.map +4 -4
package/dist/hooks/index.js +69 -32
package/dist/hooks/index.js.map +3 -3
package/dist/hooks/useVoiceInput.d.ts.map +1 -1
package/dist/index.js +70 -33
package/dist/index.js.map +4 -4
package/package.json +3 -3

package/dist/hooks/index.js CHANGED Viewed

@@ -8142,6 +8142,7 @@ function useEdgeHover({
 import { useCallback as useCallback14, useEffect as useEffect22, useRef as useRef11, useState as useState10 } from "react";
 var TARGET_SAMPLE_RATE = 16000;
 var PCM_FRAME_BYTES = 3200;
+var PROCESSOR_BUFFER_SIZE = 4096;
 function getAudioContextConstructor() {
   if (typeof window === "undefined")
     return null;
@@ -8286,8 +8287,7 @@ function useVoiceInput({
   }, []);
   const handleAudioProcess = useCallback14((event) => {
     const audioContext = audioContextRef.current;
-    const socket = socketRef.current;
-    if (!audioContext || !socket || socket.readyState !== WebSocket.OPEN || stoppingRef.current) {
+    if (!audioContext || stoppingRef.current) {
       return;
     }
     const input = event.inputBuffer.getChannelData(0);
@@ -8319,9 +8319,53 @@ function useVoiceInput({
     setIsTranscribing(false);
     stoppingRef.current = false;
     try {
-      const status = await apiClient.getDictationStatus();
+      const streamPromise = navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          autoGainControl: true
+        }
+      });
+      const statusPromise = apiClient.getDictationStatus().then((status2) => ({ status: status2 }), (error2) => ({ error: error2 }));
+      const stream = await streamPromise;
+      if (stoppingRef.current) {
+        for (const track of stream.getTracks())
+          track.stop();
+        return;
+      }
+      streamRef.current = stream;
+      const AudioContextCtor = getAudioContextConstructor();
+      if (!AudioContextCtor)
+        throw new Error("AudioContext is unavailable");
+      const audioContext = new AudioContextCtor;
+      audioContextRef.current = audioContext;
+      const source = audioContext.createMediaStreamSource(stream);
+      const analyserNode = audioContext.createAnalyser();
+      analyserNode.fftSize = 256;
+      analyserNode.smoothingTimeConstant = 0.55;
+      const processor = audioContext.createScriptProcessor(PROCESSOR_BUFFER_SIZE, 1, 1);
+      processor.onaudioprocess = handleAudioProcess;
+      source.connect(analyserNode);
+      source.connect(processor);
+      processor.connect(audioContext.destination);
+      sourceRef.current = source;
+      processorRef.current = processor;
+      if (audioContext.state === "suspended") {
+        await audioContext.resume();
+      }
+      if (stoppingRef.current)
+        return;
+      setAnalyser(analyserNode);
+      setIsListening(true);
+      const statusResult = await statusPromise;
+      if ("error" in statusResult)
+        throw statusResult.error;
+      const { status } = statusResult;
+      if (stoppingRef.current)
+        return;
       const model = status.models.find((item) => item.id === status.defaultModel);
       if (!model?.installed) {
+        cleanup();
         handleMissingModel();
         return;
       }
@@ -8329,7 +8373,10 @@ function useVoiceInput({
         model: status.defaultModel,
         language: toLanguageCode(lang)
       });
+      if (stoppingRef.current)
+        return;
       if (!session.modelInstalled) {
+        cleanup();
         handleMissingModel();
         return;
       }
@@ -8342,7 +8389,6 @@ function useVoiceInput({
           reject(new Error("Timed out connecting to local dictation"));
         }, 5000);
         socket.onopen = () => {
-          window.clearTimeout(timeout);
           socket.send(JSON.stringify({
             type: "start",
             model: session.model,
@@ -8354,7 +8400,23 @@ function useVoiceInput({
             },
             partialResults: false
           }));
-          resolve();
+        };
+        socket.onmessage = (event) => {
+          if (typeof event.data !== "string")
+            return;
+          const payload = parseServerEvent(event.data);
+          if (!payload)
+            return;
+          if (payload.type === "ready") {
+            window.clearTimeout(timeout);
+            flushFrameBuffer(false);
+            resolve();
+            return;
+          }
+          if (payload.type === "error") {
+            window.clearTimeout(timeout);
+            reject(new Error(payload.message));
+          }
         };
         socket.onerror = () => {
           window.clearTimeout(timeout);
@@ -8382,32 +8444,6 @@ function useVoiceInput({
           setIsListening(false);
         setIsTranscribing(false);
       };
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: {
-          echoCancellation: true,
-          noiseSuppression: true,
-          autoGainControl: true
-        }
-      });
-      streamRef.current = stream;
-      const AudioContextCtor = getAudioContextConstructor();
-      if (!AudioContextCtor)
-        throw new Error("AudioContext is unavailable");
-      const audioContext = new AudioContextCtor;
-      const source = audioContext.createMediaStreamSource(stream);
-      const analyserNode = audioContext.createAnalyser();
-      analyserNode.fftSize = 256;
-      analyserNode.smoothingTimeConstant = 0.55;
-      const processor = audioContext.createScriptProcessor(4096, 1, 1);
-      processor.onaudioprocess = handleAudioProcess;
-      source.connect(analyserNode);
-      source.connect(processor);
-      processor.connect(audioContext.destination);
-      audioContextRef.current = audioContext;
-      sourceRef.current = source;
-      processorRef.current = processor;
-      setAnalyser(analyserNode);
-      setIsListening(true);
     } catch (err) {
       const name = err instanceof Error ? err.name : "";
       const msg = name === "NotAllowedError" ? "Microphone permission denied" : err instanceof Error ? err.message : "Could not start voice input";
@@ -8419,6 +8455,7 @@ function useVoiceInput({
     emitError,
     handleAudioProcess,
     handleMissingModel,
+    flushFrameBuffer,
     isSupported,
     lang
   ]);
@@ -8664,4 +8701,4 @@ export {
   getAgentToolCount
 };
-//# debugId=EA09E996DD4075CD64756E2164756E21
+//# debugId=43D698DB1EF766A364756E2164756E21