@omote/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -366,12 +366,13 @@ declare function isSafari(): boolean;
366
366
  /**
367
367
  * Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
368
368
  *
369
- * Safari (macOS + iOS) has WebGPU JSEP bugs that crash ONNX Runtime,
370
- * and the 384MB LAM model stack-overflows in WASM mode.
369
+ * All WebKit browsers (Safari macOS, Safari iOS, Chrome iOS, Firefox iOS)
370
+ * have ONNX Runtime WebGPU JSEP bugs that crash session creation, and the
371
+ * 384MB LAM model stack-overflows in WASM mode.
371
372
  * The wav2arkit_cpu model (1.8MB) provides identical 52 ARKit blendshape
372
373
  * output at 22x real-time on CPU/WASM.
373
374
  *
374
- * @returns true if on Safari (should use CPU lip sync model)
375
+ * @returns true if on Safari or any iOS browser (should use CPU lip sync model)
375
376
  */
376
377
  declare function shouldUseCpuLipSync(): boolean;
377
378
  /**
@@ -1366,6 +1367,7 @@ declare class SileroVADInference {
1366
1367
  private inferenceQueue;
1367
1368
  private preSpeechBuffer;
1368
1369
  private wasSpeaking;
1370
+ private srTensor;
1369
1371
  constructor(config: SileroVADConfig);
1370
1372
  get backend(): RuntimeBackend | null;
1371
1373
  get isLoaded(): boolean;
package/dist/index.d.ts CHANGED
@@ -366,12 +366,13 @@ declare function isSafari(): boolean;
366
366
  /**
367
367
  * Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
368
368
  *
369
- * Safari (macOS + iOS) has WebGPU JSEP bugs that crash ONNX Runtime,
370
- * and the 384MB LAM model stack-overflows in WASM mode.
369
+ * All WebKit browsers (Safari macOS, Safari iOS, Chrome iOS, Firefox iOS)
370
+ * have ONNX Runtime WebGPU JSEP bugs that crash session creation, and the
371
+ * 384MB LAM model stack-overflows in WASM mode.
371
372
  * The wav2arkit_cpu model (1.8MB) provides identical 52 ARKit blendshape
372
373
  * output at 22x real-time on CPU/WASM.
373
374
  *
374
- * @returns true if on Safari (should use CPU lip sync model)
375
+ * @returns true if on Safari or any iOS browser (should use CPU lip sync model)
375
376
  */
376
377
  declare function shouldUseCpuLipSync(): boolean;
377
378
  /**
@@ -1366,6 +1367,7 @@ declare class SileroVADInference {
1366
1367
  private inferenceQueue;
1367
1368
  private preSpeechBuffer;
1368
1369
  private wasSpeaking;
1370
+ private srTensor;
1369
1371
  constructor(config: SileroVADConfig);
1370
1372
  get backend(): RuntimeBackend | null;
1371
1373
  get isLoaded(): boolean;
package/dist/index.js CHANGED
@@ -28970,7 +28970,7 @@ function isSafari() {
28970
28970
  return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
28971
28971
  }
28972
28972
  function shouldUseCpuLipSync() {
28973
- return isSafari();
28973
+ return isSafari() || isIOS();
28974
28974
  }
28975
28975
  function isSpeechRecognitionAvailable() {
28976
28976
  if (typeof window === "undefined") return false;
@@ -30138,7 +30138,7 @@ function createLipSync(config) {
30138
30138
  useCpu = false;
30139
30139
  logger6.info("Forcing GPU lip sync model (Wav2Vec2)");
30140
30140
  } else {
30141
- useCpu = isSafari();
30141
+ useCpu = shouldUseCpuLipSync();
30142
30142
  logger6.info("Auto-detected lip sync model", {
30143
30143
  useCpu,
30144
30144
  isSafari: isSafari()
@@ -30216,6 +30216,8 @@ var SileroVADInference = class {
30216
30216
  // Pre-speech buffer for capturing beginning of speech
30217
30217
  this.preSpeechBuffer = [];
30218
30218
  this.wasSpeaking = false;
30219
+ // Cached sample rate tensor (int64 scalar, never changes per instance)
30220
+ this.srTensor = null;
30219
30221
  const sampleRate = config.sampleRate ?? 16e3;
30220
30222
  if (sampleRate !== 8e3 && sampleRate !== 16e3) {
30221
30223
  throw new Error("Silero VAD only supports 8000 or 16000 Hz sample rates");
@@ -30346,6 +30348,24 @@ var SileroVADInference = class {
30346
30348
  this.context = new Float32Array(this.contextSize);
30347
30349
  this.preSpeechBuffer = [];
30348
30350
  this.wasSpeaking = false;
30351
+ if (!this.srTensor) {
30352
+ try {
30353
+ this.srTensor = new this.ort.Tensor(
30354
+ "int64",
30355
+ new BigInt64Array([BigInt(this.config.sampleRate)]),
30356
+ []
30357
+ );
30358
+ } catch (e) {
30359
+ logger7.warn("BigInt64Array not available, using bigint array fallback", {
30360
+ error: e instanceof Error ? e.message : String(e)
30361
+ });
30362
+ this.srTensor = new this.ort.Tensor(
30363
+ "int64",
30364
+ [BigInt(this.config.sampleRate)],
30365
+ []
30366
+ );
30367
+ }
30368
+ }
30349
30369
  }
30350
30370
  /**
30351
30371
  * Process a single audio chunk
@@ -30477,7 +30497,7 @@ var SileroVADInference = class {
30477
30497
  inputBuffer.set(audioChunkCopy, this.contextSize);
30478
30498
  const inputBufferCopy = new Float32Array(inputBuffer);
30479
30499
  const inputTensor = new this.ort.Tensor("float32", inputBufferCopy, [1, inputSize]);
30480
- const srTensor = new this.ort.Tensor("int64", BigInt64Array.from([BigInt(this.config.sampleRate)]), []);
30500
+ const srTensor = this.srTensor;
30481
30501
  const stateCopy = new Float32Array(this.state.data);
30482
30502
  const stateTensor = new this.ort.Tensor("float32", stateCopy, this.state.dims);
30483
30503
  const feeds = {
@@ -30566,6 +30586,7 @@ var SileroVADInference = class {
30566
30586
  this.session = null;
30567
30587
  }
30568
30588
  this.state = null;
30589
+ this.srTensor = null;
30569
30590
  }
30570
30591
  };
30571
30592
  /**
@@ -30670,7 +30691,14 @@ async function runInference(audio, state, context) {
30670
30691
  // Create tensors
30671
30692
  const inputTensor = new ort.Tensor('float32', new Float32Array(inputBuffer), [1, inputSize]);
30672
30693
  const stateTensor = new ort.Tensor('float32', new Float32Array(state), [2, 1, 128]);
30673
- const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(sampleRate)]), []);
30694
+ // Use BigInt64Array constructor (not .from()) for broader compatibility
30695
+ let srTensor;
30696
+ try {
30697
+ srTensor = new ort.Tensor('int64', new BigInt64Array([BigInt(sampleRate)]), []);
30698
+ } catch (e) {
30699
+ // Fallback for environments without BigInt64Array support
30700
+ srTensor = new ort.Tensor('int64', [BigInt(sampleRate)], []);
30701
+ }
30674
30702
 
30675
30703
  const feeds = {
30676
30704
  'input': inputTensor,