npm - @omote/core - Versions diffs - 0.2.0 → 0.2.2 - Mend

@omote/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -366,12 +366,13 @@ declare function isSafari(): boolean;
 /**
  * Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
  *
- * Safari (macOS + iOS) has WebGPU JSEP bugs that crash ONNX Runtime,
- * and the 384MB LAM model stack-overflows in WASM mode.
+ * All WebKit browsers (Safari macOS, Safari iOS, Chrome iOS, Firefox iOS)
+ * have ONNX Runtime WebGPU JSEP bugs that crash session creation, and the
+ * 384MB LAM model stack-overflows in WASM mode.
  * The wav2arkit_cpu model (1.8MB) provides identical 52 ARKit blendshape
  * output at 22x real-time on CPU/WASM.
  *
- * @returns true if on Safari (should use CPU lip sync model)
+ * @returns true if on Safari or any iOS browser (should use CPU lip sync model)
  */
 declare function shouldUseCpuLipSync(): boolean;
 /**
@@ -1366,6 +1367,7 @@ declare class SileroVADInference {
     private inferenceQueue;
     private preSpeechBuffer;
     private wasSpeaking;
+    private srTensor;
     constructor(config: SileroVADConfig);
     get backend(): RuntimeBackend | null;
     get isLoaded(): boolean;

package/dist/index.d.ts CHANGED Viewed

@@ -366,12 +366,13 @@ declare function isSafari(): boolean;
 /**
  * Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
  *
- * Safari (macOS + iOS) has WebGPU JSEP bugs that crash ONNX Runtime,
- * and the 384MB LAM model stack-overflows in WASM mode.
+ * All WebKit browsers (Safari macOS, Safari iOS, Chrome iOS, Firefox iOS)
+ * have ONNX Runtime WebGPU JSEP bugs that crash session creation, and the
+ * 384MB LAM model stack-overflows in WASM mode.
  * The wav2arkit_cpu model (1.8MB) provides identical 52 ARKit blendshape
  * output at 22x real-time on CPU/WASM.
  *
- * @returns true if on Safari (should use CPU lip sync model)
+ * @returns true if on Safari or any iOS browser (should use CPU lip sync model)
  */
 declare function shouldUseCpuLipSync(): boolean;
 /**
@@ -1366,6 +1367,7 @@ declare class SileroVADInference {
     private inferenceQueue;
     private preSpeechBuffer;
     private wasSpeaking;
+    private srTensor;
     constructor(config: SileroVADConfig);
     get backend(): RuntimeBackend | null;
     get isLoaded(): boolean;

package/dist/index.js CHANGED Viewed

@@ -28970,7 +28970,7 @@ function isSafari() {
   return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
 }
 function shouldUseCpuLipSync() {
-  return isSafari();
+  return isSafari() || isIOS();
 }
 function isSpeechRecognitionAvailable() {
   if (typeof window === "undefined") return false;
@@ -30138,7 +30138,7 @@ function createLipSync(config) {
     useCpu = false;
     logger6.info("Forcing GPU lip sync model (Wav2Vec2)");
   } else {
-    useCpu = isSafari();
+    useCpu = shouldUseCpuLipSync();
     logger6.info("Auto-detected lip sync model", {
       useCpu,
       isSafari: isSafari()
@@ -30216,6 +30216,8 @@ var SileroVADInference = class {
     // Pre-speech buffer for capturing beginning of speech
     this.preSpeechBuffer = [];
     this.wasSpeaking = false;
+    // Cached sample rate tensor (int64 scalar, never changes per instance)
+    this.srTensor = null;
     const sampleRate = config.sampleRate ?? 16e3;
     if (sampleRate !== 8e3 && sampleRate !== 16e3) {
       throw new Error("Silero VAD only supports 8000 or 16000 Hz sample rates");
@@ -30346,6 +30348,24 @@ var SileroVADInference = class {
     this.context = new Float32Array(this.contextSize);
     this.preSpeechBuffer = [];
     this.wasSpeaking = false;
+    if (!this.srTensor) {
+      try {
+        this.srTensor = new this.ort.Tensor(
+          "int64",
+          new BigInt64Array([BigInt(this.config.sampleRate)]),
+          []
+        );
+      } catch (e) {
+        logger7.warn("BigInt64Array not available, using bigint array fallback", {
+          error: e instanceof Error ? e.message : String(e)
+        });
+        this.srTensor = new this.ort.Tensor(
+          "int64",
+          [BigInt(this.config.sampleRate)],
+          []
+        );
+      }
+    }
   }
   /**
    * Process a single audio chunk
@@ -30477,7 +30497,7 @@ var SileroVADInference = class {
           inputBuffer.set(audioChunkCopy, this.contextSize);
           const inputBufferCopy = new Float32Array(inputBuffer);
           const inputTensor = new this.ort.Tensor("float32", inputBufferCopy, [1, inputSize]);
-          const srTensor = new this.ort.Tensor("int64", BigInt64Array.from([BigInt(this.config.sampleRate)]), []);
+          const srTensor = this.srTensor;
           const stateCopy = new Float32Array(this.state.data);
           const stateTensor = new this.ort.Tensor("float32", stateCopy, this.state.dims);
           const feeds = {
@@ -30566,6 +30586,7 @@ var SileroVADInference = class {
       this.session = null;
     }
     this.state = null;
+    this.srTensor = null;
   }
 };
 /**
@@ -30670,7 +30691,14 @@ async function runInference(audio, state, context) {
   // Create tensors
   const inputTensor = new ort.Tensor('float32', new Float32Array(inputBuffer), [1, inputSize]);
   const stateTensor = new ort.Tensor('float32', new Float32Array(state), [2, 1, 128]);
-  const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(sampleRate)]), []);
+  // Use BigInt64Array constructor (not .from()) for broader compatibility
+  let srTensor;
+  try {
+    srTensor = new ort.Tensor('int64', new BigInt64Array([BigInt(sampleRate)]), []);
+  } catch (e) {
+    // Fallback for environments without BigInt64Array support
+    srTensor = new ort.Tensor('int64', [BigInt(sampleRate)], []);
+  }
   const feeds = {
     'input': inputTensor,