@ottocode/web-sdk 0.1.315 → 0.1.316

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8142,6 +8142,7 @@ function useEdgeHover({
8142
8142
  import { useCallback as useCallback14, useEffect as useEffect22, useRef as useRef11, useState as useState10 } from "react";
8143
8143
  var TARGET_SAMPLE_RATE = 16000;
8144
8144
  var PCM_FRAME_BYTES = 3200;
8145
+ var PROCESSOR_BUFFER_SIZE = 4096;
8145
8146
  function getAudioContextConstructor() {
8146
8147
  if (typeof window === "undefined")
8147
8148
  return null;
@@ -8286,8 +8287,7 @@ function useVoiceInput({
8286
8287
  }, []);
8287
8288
  const handleAudioProcess = useCallback14((event) => {
8288
8289
  const audioContext = audioContextRef.current;
8289
- const socket = socketRef.current;
8290
- if (!audioContext || !socket || socket.readyState !== WebSocket.OPEN || stoppingRef.current) {
8290
+ if (!audioContext || stoppingRef.current) {
8291
8291
  return;
8292
8292
  }
8293
8293
  const input = event.inputBuffer.getChannelData(0);
@@ -8319,9 +8319,53 @@ function useVoiceInput({
8319
8319
  setIsTranscribing(false);
8320
8320
  stoppingRef.current = false;
8321
8321
  try {
8322
- const status = await apiClient.getDictationStatus();
8322
+ const streamPromise = navigator.mediaDevices.getUserMedia({
8323
+ audio: {
8324
+ echoCancellation: true,
8325
+ noiseSuppression: true,
8326
+ autoGainControl: true
8327
+ }
8328
+ });
8329
+ const statusPromise = apiClient.getDictationStatus().then((status2) => ({ status: status2 }), (error2) => ({ error: error2 }));
8330
+ const stream = await streamPromise;
8331
+ if (stoppingRef.current) {
8332
+ for (const track of stream.getTracks())
8333
+ track.stop();
8334
+ return;
8335
+ }
8336
+ streamRef.current = stream;
8337
+ const AudioContextCtor = getAudioContextConstructor();
8338
+ if (!AudioContextCtor)
8339
+ throw new Error("AudioContext is unavailable");
8340
+ const audioContext = new AudioContextCtor;
8341
+ audioContextRef.current = audioContext;
8342
+ const source = audioContext.createMediaStreamSource(stream);
8343
+ const analyserNode = audioContext.createAnalyser();
8344
+ analyserNode.fftSize = 256;
8345
+ analyserNode.smoothingTimeConstant = 0.55;
8346
+ const processor = audioContext.createScriptProcessor(PROCESSOR_BUFFER_SIZE, 1, 1);
8347
+ processor.onaudioprocess = handleAudioProcess;
8348
+ source.connect(analyserNode);
8349
+ source.connect(processor);
8350
+ processor.connect(audioContext.destination);
8351
+ sourceRef.current = source;
8352
+ processorRef.current = processor;
8353
+ if (audioContext.state === "suspended") {
8354
+ await audioContext.resume();
8355
+ }
8356
+ if (stoppingRef.current)
8357
+ return;
8358
+ setAnalyser(analyserNode);
8359
+ setIsListening(true);
8360
+ const statusResult = await statusPromise;
8361
+ if ("error" in statusResult)
8362
+ throw statusResult.error;
8363
+ const { status } = statusResult;
8364
+ if (stoppingRef.current)
8365
+ return;
8323
8366
  const model = status.models.find((item) => item.id === status.defaultModel);
8324
8367
  if (!model?.installed) {
8368
+ cleanup();
8325
8369
  handleMissingModel();
8326
8370
  return;
8327
8371
  }
@@ -8329,7 +8373,10 @@ function useVoiceInput({
8329
8373
  model: status.defaultModel,
8330
8374
  language: toLanguageCode(lang)
8331
8375
  });
8376
+ if (stoppingRef.current)
8377
+ return;
8332
8378
  if (!session.modelInstalled) {
8379
+ cleanup();
8333
8380
  handleMissingModel();
8334
8381
  return;
8335
8382
  }
@@ -8342,7 +8389,6 @@ function useVoiceInput({
8342
8389
  reject(new Error("Timed out connecting to local dictation"));
8343
8390
  }, 5000);
8344
8391
  socket.onopen = () => {
8345
- window.clearTimeout(timeout);
8346
8392
  socket.send(JSON.stringify({
8347
8393
  type: "start",
8348
8394
  model: session.model,
@@ -8354,7 +8400,23 @@ function useVoiceInput({
8354
8400
  },
8355
8401
  partialResults: false
8356
8402
  }));
8357
- resolve();
8403
+ };
8404
+ socket.onmessage = (event) => {
8405
+ if (typeof event.data !== "string")
8406
+ return;
8407
+ const payload = parseServerEvent(event.data);
8408
+ if (!payload)
8409
+ return;
8410
+ if (payload.type === "ready") {
8411
+ window.clearTimeout(timeout);
8412
+ flushFrameBuffer(false);
8413
+ resolve();
8414
+ return;
8415
+ }
8416
+ if (payload.type === "error") {
8417
+ window.clearTimeout(timeout);
8418
+ reject(new Error(payload.message));
8419
+ }
8358
8420
  };
8359
8421
  socket.onerror = () => {
8360
8422
  window.clearTimeout(timeout);
@@ -8382,32 +8444,6 @@ function useVoiceInput({
8382
8444
  setIsListening(false);
8383
8445
  setIsTranscribing(false);
8384
8446
  };
8385
- const stream = await navigator.mediaDevices.getUserMedia({
8386
- audio: {
8387
- echoCancellation: true,
8388
- noiseSuppression: true,
8389
- autoGainControl: true
8390
- }
8391
- });
8392
- streamRef.current = stream;
8393
- const AudioContextCtor = getAudioContextConstructor();
8394
- if (!AudioContextCtor)
8395
- throw new Error("AudioContext is unavailable");
8396
- const audioContext = new AudioContextCtor;
8397
- const source = audioContext.createMediaStreamSource(stream);
8398
- const analyserNode = audioContext.createAnalyser();
8399
- analyserNode.fftSize = 256;
8400
- analyserNode.smoothingTimeConstant = 0.55;
8401
- const processor = audioContext.createScriptProcessor(4096, 1, 1);
8402
- processor.onaudioprocess = handleAudioProcess;
8403
- source.connect(analyserNode);
8404
- source.connect(processor);
8405
- processor.connect(audioContext.destination);
8406
- audioContextRef.current = audioContext;
8407
- sourceRef.current = source;
8408
- processorRef.current = processor;
8409
- setAnalyser(analyserNode);
8410
- setIsListening(true);
8411
8447
  } catch (err) {
8412
8448
  const name = err instanceof Error ? err.name : "";
8413
8449
  const msg = name === "NotAllowedError" ? "Microphone permission denied" : err instanceof Error ? err.message : "Could not start voice input";
@@ -8419,6 +8455,7 @@ function useVoiceInput({
8419
8455
  emitError,
8420
8456
  handleAudioProcess,
8421
8457
  handleMissingModel,
8458
+ flushFrameBuffer,
8422
8459
  isSupported,
8423
8460
  lang
8424
8461
  ]);
@@ -8664,4 +8701,4 @@ export {
8664
8701
  getAgentToolCount
8665
8702
  };
8666
8703
 
8667
- //# debugId=EA09E996DD4075CD64756E2164756E21
8704
+ //# debugId=43D698DB1EF766A364756E2164756E21