npm - @omote/core - Versions diffs - 0.3.1 → 0.4.1 - Mend

@omote/core 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/events/index.mjs +0 -1
package/dist/index.d.mts +287 -304
package/dist/index.d.ts +287 -304
package/dist/index.js +883 -40000
package/dist/index.js.map +1 -1
package/dist/index.mjs +847 -949
package/dist/index.mjs.map +1 -1
package/dist/logging/index.mjs +0 -1
package/package.json +1 -3
package/dist/chunk-6W7G6WE7.mjs +0 -13
package/dist/chunk-6W7G6WE7.mjs.map +0 -1
package/dist/chunk-C3Y37HKD.mjs +0 -26378
package/dist/chunk-C3Y37HKD.mjs.map +0 -1
package/dist/chunk-NSSMTXJJ.mjs +0 -8
package/dist/chunk-NSSMTXJJ.mjs.map +0 -1
package/dist/chunk-RI6UQ7WF.mjs +0 -26378
package/dist/chunk-RI6UQ7WF.mjs.map +0 -1
package/dist/chunk-T465MTDX.mjs +0 -38869
package/dist/chunk-T465MTDX.mjs.map +0 -1
package/dist/transformers.web-4C62MDO6.mjs +0 -1724
package/dist/transformers.web-4C62MDO6.mjs.map +0 -1
package/dist/transformers.web-ALDLCPHT.mjs +0 -1725
package/dist/transformers.web-ALDLCPHT.mjs.map +0 -1
package/dist/transformers.web-MHLR33H6.mjs +0 -1718
package/dist/transformers.web-MHLR33H6.mjs.map +0 -1

package/dist/index.mjs CHANGED Viewed

@@ -12,11 +12,6 @@ import {
   setLogLevel,
   setLoggingEnabled
 } from "./chunk-ESU52TDS.mjs";
-import {
-  __webpack_exports__env,
-  __webpack_exports__pipeline
-} from "./chunk-T465MTDX.mjs";
-import "./chunk-6W7G6WE7.mjs";
 // src/audio/MicrophoneCapture.ts
 var MicrophoneCapture = class {
@@ -28,6 +23,8 @@ var MicrophoneCapture = class {
     this.buffer = new Float32Array(0);
     this._isRecording = false;
     this._loggedFirstChunk = false;
+    /** Actual AudioContext sample rate (may differ from target on Firefox) */
+    this._nativeSampleRate = 0;
     this.config = {
       sampleRate: config.sampleRate ?? 16e3,
       chunkSize: config.chunkSize ?? 1600
@@ -62,10 +59,29 @@ var MicrophoneCapture = class {
       if (this.context.state === "suspended") {
         await this.context.resume();
       }
-      const source = this.context.createMediaStreamSource(this.stream);
+      let source;
+      try {
+        source = this.context.createMediaStreamSource(this.stream);
+        this._nativeSampleRate = this.context.sampleRate;
+      } catch (sourceErr) {
+        console.warn(
+          "[MicrophoneCapture] Cannot connect stream at",
+          this.config.sampleRate + "Hz, falling back to native rate:",
+          sourceErr.message
+        );
+        await this.context.close();
+        this.context = new AudioContext();
+        if (this.context.state === "suspended") {
+          await this.context.resume();
+        }
+        source = this.context.createMediaStreamSource(this.stream);
+        this._nativeSampleRate = this.context.sampleRate;
+        console.log("[MicrophoneCapture] Using native rate:", this._nativeSampleRate, "Hz \u2192 resampling to", this.config.sampleRate, "Hz");
+      }
       this.processor = this.context.createScriptProcessor(4096, 1, 1);
       this.processor.onaudioprocess = (e) => {
-        const input = e.inputBuffer.getChannelData(0);
+        const raw = e.inputBuffer.getChannelData(0);
+        const input = this._nativeSampleRate !== this.config.sampleRate ? this.resample(raw, this._nativeSampleRate, this.config.sampleRate) : raw;
         let rms = 0;
         let peak = 0;
         for (let i = 0; i < input.length; i++) {
@@ -123,6 +139,25 @@ var MicrophoneCapture = class {
     this.buffer = new Float32Array(0);
     this._isRecording = false;
   }
+  /**
+   * Resample audio using linear interpolation.
+   * Used when the AudioContext runs at the device's native rate (e.g. 48kHz)
+   * and we need to downsample to the target rate (e.g. 16kHz).
+   */
+  resample(input, fromRate, toRate) {
+    if (fromRate === toRate) return input;
+    const ratio = fromRate / toRate;
+    const outputLength = Math.floor(input.length / ratio);
+    const output = new Float32Array(outputLength);
+    for (let i = 0; i < outputLength; i++) {
+      const srcIdx = i * ratio;
+      const lo = Math.floor(srcIdx);
+      const hi = Math.min(lo + 1, input.length - 1);
+      const frac = srcIdx - lo;
+      output[i] = input[lo] * (1 - frac) + input[hi] * frac;
+    }
+    return output;
+  }
   floatToPCM16(float32) {
     const pcm = new Int16Array(float32.length);
     for (let i = 0; i < float32.length; i++) {
@@ -263,7 +298,8 @@ var AudioScheduler = class {
     const ctx = await this.ensureContext();
     const channels = this.options.channels ?? 1;
     if (!this.isPlaying) {
-      this.nextPlayTime = ctx.currentTime + (this.options.initialDelayS ?? 0.05);
+      const lookahead = this.options.initialLookaheadSec ?? 0.05;
+      this.nextPlayTime = ctx.currentTime + lookahead;
       this.isPlaying = true;
     }
     const audioBuffer = ctx.createBuffer(channels, audioData.length, ctx.sampleRate);
@@ -446,8 +482,8 @@ var AudioChunkCoalescer = class {
 var LAMPipeline = class {
   constructor(options = {}) {
     this.options = options;
-    this.DEFAULT_CHUNK_SAMPLES = 16e3;
-    // 1.0s at 16kHz (Wav2Vec2 requirement)
+    this.REQUIRED_SAMPLES = 16e3;
+    // 1.0s at 16kHz (LAM requirement)
     this.FRAME_RATE = 30;
     // LAM outputs 30fps
     this.buffer = new Float32Array(0);
@@ -477,20 +513,22 @@ var LAMPipeline = class {
     newBuffer.set(this.buffer, 0);
     newBuffer.set(samples, this.buffer.length);
     this.buffer = newBuffer;
-    const chunkSize = lam.chunkSamples ?? this.DEFAULT_CHUNK_SAMPLES;
-    while (this.buffer.length >= chunkSize) {
-      await this.processBuffer(lam, chunkSize);
+    while (this.buffer.length >= this.REQUIRED_SAMPLES) {
+      await this.processBuffer(lam);
+      if (this.buffer.length >= this.REQUIRED_SAMPLES) {
+        await new Promise((r) => setTimeout(r, 0));
+      }
     }
   }
   /**
    * Process accumulated buffer through LAM inference
    */
-  async processBuffer(lam, chunkSize) {
+  async processBuffer(lam) {
     try {
-      const toProcess = this.buffer.slice(0, chunkSize);
+      const toProcess = this.buffer.slice(0, this.REQUIRED_SAMPLES);
       const processedStartTime = this.bufferStartTime;
-      this.buffer = this.buffer.slice(chunkSize);
-      const processedDuration = chunkSize / (this.options.sampleRate ?? 16e3);
+      this.buffer = this.buffer.slice(this.REQUIRED_SAMPLES);
+      const processedDuration = this.REQUIRED_SAMPLES / (this.options.sampleRate ?? 16e3);
       this.bufferStartTime = processedStartTime + processedDuration;
       const result = await lam.infer(toProcess);
       const frameDuration = 1 / this.FRAME_RATE;
@@ -509,22 +547,35 @@ var LAMPipeline = class {
   /**
    * Get the frame that should be displayed at the current time
    *
-   * Timestamp-synced playback for all backends. Audio playback is delayed
-   * for slow backends (WASM gets 1s head start via AudioScheduler) so
-   * frames are ready by the time their corresponding audio plays.
+   * Automatically removes frames that have already been displayed.
+   * This prevents memory leaks from accumulating old frames.
    *
-   * Discard window is generous for WASM to handle inference jitter.
-   * Late frames play at RAF rate (~60fps) until caught up, then settle
-   * to natural 30fps pacing via timestamp gating.
+   * Discard Window (prevents premature frame discarding):
+   * - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
+   * - WASM: 1.0s (LAM inference 50-500ms + higher variability)
+   *
+   * Last-Frame-Hold: Returns last valid frame instead of null to prevent
+   * avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
    *
    * @param currentTime - Current AudioContext time
    * @param lam - LAM inference engine (optional, for backend detection)
    * @returns Current frame, or last frame as fallback, or null if no frames yet
    */
   getFrameForTime(currentTime, lam) {
-    const discardWindow = lam?.backend === "wasm" ? 10 : 0.5;
+    const discardWindow = lam?.backend === "wasm" ? 1 : 0.5;
+    let discardedCount = 0;
     while (this.frameQueue.length > 0 && this.frameQueue[0].timestamp < currentTime - discardWindow) {
-      this.frameQueue.shift();
+      const discarded = this.frameQueue.shift();
+      discardedCount++;
+      if (discardedCount === 1) {
+        const ageMs = ((currentTime - discarded.timestamp) * 1e3).toFixed(0);
+        console.warn("[LAM] Frame(s) discarded as too old", {
+          ageMs,
+          discardWindowMs: discardWindow * 1e3,
+          queueLength: this.frameQueue.length,
+          backend: lam?.backend ?? "unknown"
+        });
+      }
     }
     if (this.frameQueue.length > 0 && this.frameQueue[0].timestamp <= currentTime) {
       const { frame } = this.frameQueue.shift();
@@ -543,7 +594,7 @@ var LAMPipeline = class {
    * Get current buffer fill level (0-1)
    */
   get fillLevel() {
-    return Math.min(1, this.buffer.length / this.DEFAULT_CHUNK_SAMPLES);
+    return Math.min(1, this.buffer.length / this.REQUIRED_SAMPLES);
   }
   /**
    * Get number of frames queued
@@ -560,7 +611,7 @@ var LAMPipeline = class {
   /**
    * Flush remaining buffered audio
    *
-   * Processes any remaining audio in the buffer, even if less than the chunk size.
+   * Processes any remaining audio in the buffer, even if less than REQUIRED_SAMPLES.
    * This ensures the final audio chunk generates blendshape frames.
    *
    * Should be called when audio stream ends to prevent losing the last 0-1 seconds.
@@ -571,17 +622,12 @@ var LAMPipeline = class {
     if (this.buffer.length === 0) {
       return;
     }
+    const padded = new Float32Array(this.REQUIRED_SAMPLES);
+    padded.set(this.buffer, 0);
     const processedStartTime = this.bufferStartTime;
-    const sampleRate = this.options.sampleRate ?? 16e3;
-    const minSize = lam.chunkSamples ?? this.DEFAULT_CHUNK_SAMPLES;
-    const audioToInfer = this.buffer.length >= minSize ? this.buffer : (() => {
-      const padded = new Float32Array(minSize);
-      padded.set(this.buffer, 0);
-      return padded;
-    })();
     try {
-      const result = await lam.infer(audioToInfer);
-      const actualDuration = this.buffer.length / sampleRate;
+      const result = await lam.infer(padded);
+      const actualDuration = this.buffer.length / (this.options.sampleRate ?? 16e3);
       const frameDuration = 1 / this.FRAME_RATE;
       const actualFrameCount = Math.ceil(actualDuration * this.FRAME_RATE);
       for (let i = 0; i < Math.min(actualFrameCount, result.blendshapes.length); i++) {
@@ -640,13 +686,12 @@ var SyncedAudioPipeline = class extends EventEmitter {
     this.monitorInterval = null;
     this.frameAnimationId = null;
     const sampleRate = options.sampleRate ?? 16e3;
-    if (!options.lam.isLoaded) {
-      throw new Error(
-        "LipSyncBackend must be loaded before constructing SyncedAudioPipeline. Call lam.load() first so backend type is known for timing configuration."
-      );
-    }
-    const initialDelayS = options.lam.backend === "wasm" ? 1 : 0.05;
-    this.scheduler = new AudioScheduler({ sampleRate, initialDelayS });
+    const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
+    const audioDelayMs = options.audioDelayMs ?? autoDelay;
+    this.scheduler = new AudioScheduler({
+      sampleRate,
+      initialLookaheadSec: audioDelayMs / 1e3
+    });
     this.coalescer = new AudioChunkCoalescer({
       sampleRate,
       targetDurationMs: options.chunkTargetMs ?? 200
@@ -2014,9 +2059,7 @@ function formatBytes(bytes) {
 function isIOSSafari() {
   if (typeof navigator === "undefined") return false;
   const ua = navigator.userAgent.toLowerCase();
-  return /iphone|ipad|ipod/.test(ua) || // Safari on macOS could also have issues, but less severe
-  // Only force WASM on actual iOS devices
-  /safari/.test(ua) && /mobile/.test(ua) && !/chrome|crios|fxios/.test(ua);
+  return /iphone|ipad|ipod/.test(ua) && /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
 }
 function isIOS() {
   if (typeof navigator === "undefined") return false;
@@ -2074,10 +2117,7 @@ function getOptimalWasmThreads() {
   return 4;
 }
 function shouldEnableWasmProxy() {
-  if (isMobile()) {
-    return false;
-  }
-  return true;
+  return false;
 }
 function isSafari() {
   if (typeof navigator === "undefined") return false;
@@ -2092,7 +2132,7 @@ function isSpeechRecognitionAvailable() {
   return "SpeechRecognition" in window || "webkitSpeechRecognition" in window;
 }
 function shouldUseNativeASR() {
-  return isIOS() && isSpeechRecognitionAvailable();
+  return (isIOS() || isSafari()) && isSpeechRecognitionAvailable();
 }
 function shouldUseServerLipSync() {
   return isIOS();
@@ -2105,11 +2145,13 @@ var loadedBackend = null;
 var WASM_CDN_PATH = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
 async function isWebGPUAvailable() {
   if (isIOS()) {
-    logger.debug("WebGPU check: iOS detected, returning false");
+    logger.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
     return false;
   }
   if (!hasWebGPUApi()) {
-    logger.debug("WebGPU check: navigator.gpu not available");
+    logger.debug("WebGPU check: navigator.gpu not available", {
+      isSecureContext: typeof window !== "undefined" ? window.isSecureContext : "N/A"
+    });
     return false;
   }
   try {
@@ -2133,14 +2175,20 @@ async function isWebGPUAvailable() {
 }
 var iosWasmPatched = false;
 function applyIOSWasmMemoryPatch() {
-  if (iosWasmPatched || !isIOS()) return;
+  if (iosWasmPatched || !isIOSSafari()) return;
   iosWasmPatched = true;
   const OrigMemory = WebAssembly.Memory;
-  const MAX_IOS_PAGES = 16384;
-  logger.info("Applying iOS WASM memory patch (max capped to 1GB, shared preserved)");
+  const MAX_IOS_PAGES = 32768;
+  logger.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
   WebAssembly.Memory = function IOSPatchedMemory(descriptor) {
     const patched = { ...descriptor };
     if (patched.maximum !== void 0 && patched.maximum > MAX_IOS_PAGES) {
+      logger.info("iOS memory patch: capping maximum", {
+        original: patched.maximum,
+        capped: MAX_IOS_PAGES,
+        shared: patched.shared,
+        initial: patched.initial
+      });
       patched.maximum = MAX_IOS_PAGES;
     }
     return new OrigMemory(patched);
@@ -2174,7 +2222,10 @@ async function getOnnxRuntime(backend) {
   logger.info(`Loading ONNX Runtime with ${backend} backend...`);
   applyIOSWasmMemoryPatch();
   try {
-    if (backend === "wasm") {
+    if (backend === "wasm" && (isIOS() || isSafari())) {
+      const module = await import("onnxruntime-web/wasm");
+      ortInstance = module.default || module;
+    } else if (backend === "wasm") {
       const module = await import("onnxruntime-web");
       ortInstance = module.default || module;
     } else {
@@ -2218,6 +2269,14 @@ function getSessionOptions(backend) {
       graphOptimizationLevel: "all"
     };
   }
+  if (isIOS()) {
+    return {
+      executionProviders: ["wasm"],
+      graphOptimizationLevel: "basic",
+      enableCpuMemArena: false,
+      enableMemPattern: false
+    };
+  }
   return {
     executionProviders: ["wasm"],
     graphOptimizationLevel: "all"
@@ -2249,6 +2308,16 @@ function getLoadedBackend() {
 function isOnnxRuntimeLoaded() {
   return ortInstance !== null;
 }
+async function preloadOnnxRuntime(preference = "auto") {
+  if (ortInstance) {
+    logger.info("ONNX Runtime already preloaded", { backend: loadedBackend });
+    return loadedBackend;
+  }
+  logger.info("Preloading ONNX Runtime...", { preference });
+  const { backend } = await getOnnxRuntimeForPreference(preference);
+  logger.info("ONNX Runtime preloaded", { backend });
+  return backend;
+}
 // src/inference/blendshapeUtils.ts
 var LAM_BLENDSHAPES = [
@@ -2444,6 +2513,7 @@ var CTC_VOCAB = [
 ];
 var Wav2Vec2Inference = class {
   constructor(config) {
+    this.modelId = "wav2vec2";
     this.session = null;
     this.ort = null;
     this._backend = "wasm";
@@ -2482,38 +2552,108 @@ var Wav2Vec2Inference = class {
       this.ort = ort;
       this._backend = backend;
       logger2.info("ONNX Runtime loaded", { backend: this._backend });
-      const cache = getModelCache();
       const modelUrl = this.config.modelUrl;
-      const isCached = await cache.has(modelUrl);
-      let modelBuffer;
-      if (isCached) {
-        logger2.debug("Loading model from cache", { modelUrl });
-        modelBuffer = await cache.get(modelUrl);
-        if (!modelBuffer) {
-          logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
-          await cache.delete(modelUrl);
-          logger2.info("Corrupted cache entry deleted, fetching fresh model", { modelUrl });
-          modelBuffer = await fetchWithCache(modelUrl);
+      const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
+      const sessionOptions = getSessionOptions(this._backend);
+      let isCached = false;
+      if (isIOS()) {
+        logger2.info("iOS: passing model URLs directly to ORT (low-memory path)", {
+          modelUrl,
+          dataUrl
+        });
+        if (dataUrl) {
+          const dataFilename = dataUrl.split("/").pop();
+          logger2.info("iOS: setting externalData", { dataFilename, dataUrl });
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: dataUrl
+            // URL string — ORT fetches directly into WASM
+          }];
         }
+        logger2.info("iOS: calling InferenceSession.create() with URL string", {
+          modelUrl,
+          sessionOptions: JSON.stringify(
+            sessionOptions,
+            (_, v) => typeof v === "string" && v.length > 100 ? v.slice(0, 100) + "..." : v
+          )
+        });
+        try {
+          this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
+        } catch (sessionErr) {
+          logger2.error("iOS: InferenceSession.create() failed", {
+            error: sessionErr instanceof Error ? sessionErr.message : String(sessionErr),
+            errorType: sessionErr?.constructor?.name,
+            stack: sessionErr instanceof Error ? sessionErr.stack : void 0
+          });
+          throw sessionErr;
+        }
+        logger2.info("iOS: session created successfully", {
+          inputNames: this.session.inputNames,
+          outputNames: this.session.outputNames
+        });
       } else {
-        logger2.debug("Fetching and caching model", { modelUrl });
-        modelBuffer = await fetchWithCache(modelUrl);
-      }
-      if (!modelBuffer) {
-        const errorMsg = `Failed to load model: ${modelUrl}. Model buffer is null or undefined even after retry.`;
-        logger2.error(errorMsg, { modelUrl, isCached });
-        throw new Error(errorMsg);
+        const cache = getModelCache();
+        isCached = await cache.has(modelUrl);
+        let modelBuffer;
+        if (isCached) {
+          logger2.debug("Loading model from cache", { modelUrl });
+          modelBuffer = await cache.get(modelUrl);
+          if (!modelBuffer) {
+            logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
+            await cache.delete(modelUrl);
+            modelBuffer = await fetchWithCache(modelUrl);
+          }
+        } else {
+          logger2.debug("Fetching and caching model", { modelUrl });
+          modelBuffer = await fetchWithCache(modelUrl);
+        }
+        if (!modelBuffer) {
+          throw new Error(`Failed to load model: ${modelUrl}`);
+        }
+        let externalDataBuffer = null;
+        if (dataUrl) {
+          try {
+            const isDataCached = await cache.has(dataUrl);
+            if (isDataCached) {
+              logger2.debug("Loading external data from cache", { dataUrl });
+              externalDataBuffer = await cache.get(dataUrl);
+              if (!externalDataBuffer) {
+                logger2.warn("Cache corruption for external data, retrying", { dataUrl });
+                await cache.delete(dataUrl);
+                externalDataBuffer = await fetchWithCache(dataUrl);
+              }
+            } else {
+              logger2.info("Fetching external model data", {
+                dataUrl,
+                note: "This may be a large download (383MB+)"
+              });
+              externalDataBuffer = await fetchWithCache(dataUrl);
+            }
+            logger2.info("External data loaded", {
+              size: formatBytes(externalDataBuffer.byteLength)
+            });
+          } catch (err) {
+            logger2.debug("No external data file found (single-file model)", {
+              dataUrl,
+              error: err.message
+            });
+          }
+        }
+        logger2.debug("Creating ONNX session", {
+          graphSize: formatBytes(modelBuffer.byteLength),
+          externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
+          backend: this._backend
+        });
+        if (externalDataBuffer) {
+          const dataFilename = dataUrl.split("/").pop();
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: new Uint8Array(externalDataBuffer)
+          }];
+        }
+        const modelData = new Uint8Array(modelBuffer);
+        this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
       }
-      logger2.debug("Creating ONNX session", {
-        size: formatBytes(modelBuffer.byteLength),
-        backend: this._backend
-      });
-      const sessionOptions = getSessionOptions(this._backend);
-      logger2.info("Creating session with execution provider", {
-        executionProvider: this._backend
-      });
-      const modelData = new Uint8Array(modelBuffer);
-      this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
       logger2.info("ONNX session created successfully", {
         executionProvider: this._backend,
         backend: this._backend
@@ -2528,7 +2668,7 @@ var Wav2Vec2Inference = class {
       span?.setAttributes({
         "model.backend": this._backend,
         "model.load_time_ms": loadTimeMs,
-        "model.cached": isCached
+        "model.cached": !isIOS() && isCached
       });
       span?.end();
       telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
@@ -2731,319 +2871,550 @@ LAM_BLENDSHAPES.forEach((name, index) => {
 });
 var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
-// src/inference/WhisperInference.ts
-var logger4 = createLogger("Whisper");
-var WhisperInference = class _WhisperInference {
-  constructor(config = {}) {
-    this.pipeline = null;
-    this.currentModel = null;
+// src/inference/kaldiFbank.ts
+function fft(re, im) {
+  const n = re.length;
+  for (let i = 1, j = 0; i < n; i++) {
+    let bit = n >> 1;
+    while (j & bit) {
+      j ^= bit;
+      bit >>= 1;
+    }
+    j ^= bit;
+    if (i < j) {
+      let tmp = re[i];
+      re[i] = re[j];
+      re[j] = tmp;
+      tmp = im[i];
+      im[i] = im[j];
+      im[j] = tmp;
+    }
+  }
+  for (let len = 2; len <= n; len *= 2) {
+    const halfLen = len / 2;
+    const angle = -2 * Math.PI / len;
+    const wRe = Math.cos(angle);
+    const wIm = Math.sin(angle);
+    for (let i = 0; i < n; i += len) {
+      let curRe = 1;
+      let curIm = 0;
+      for (let j = 0; j < halfLen; j++) {
+        const a = i + j;
+        const b = a + halfLen;
+        const tRe = curRe * re[b] - curIm * im[b];
+        const tIm = curRe * im[b] + curIm * re[b];
+        re[b] = re[a] - tRe;
+        im[b] = im[a] - tIm;
+        re[a] += tRe;
+        im[a] += tIm;
+        const nextRe = curRe * wRe - curIm * wIm;
+        curIm = curRe * wIm + curIm * wRe;
+        curRe = nextRe;
+      }
+    }
+  }
+}
+function htkMel(freq) {
+  return 1127 * Math.log(1 + freq / 700);
+}
+function htkMelInverse(mel) {
+  return 700 * (Math.exp(mel / 1127) - 1);
+}
+function buildMelFilterbank(numBins, fftSize, sampleRate, lowFreq, highFreq) {
+  const numFftBins = fftSize / 2 + 1;
+  const lowMel = htkMel(lowFreq);
+  const highMel = htkMel(highFreq);
+  const melPoints = new Float64Array(numBins + 2);
+  for (let i = 0; i < numBins + 2; i++) {
+    melPoints[i] = lowMel + (highMel - lowMel) * i / (numBins + 1);
+  }
+  const binFreqs = new Float64Array(numBins + 2);
+  for (let i = 0; i < numBins + 2; i++) {
+    binFreqs[i] = htkMelInverse(melPoints[i]) * fftSize / sampleRate;
+  }
+  const filters = [];
+  for (let m = 0; m < numBins; m++) {
+    const left = binFreqs[m];
+    const center = binFreqs[m + 1];
+    const right = binFreqs[m + 2];
+    const startBin = Math.max(0, Math.ceil(left));
+    const endBin = Math.min(numFftBins - 1, Math.floor(right));
+    const weights = new Float32Array(endBin - startBin + 1);
+    for (let k = startBin; k <= endBin; k++) {
+      if (k <= center) {
+        weights[k - startBin] = center - left > 0 ? (k - left) / (center - left) : 0;
+      } else {
+        weights[k - startBin] = right - center > 0 ? (right - k) / (right - center) : 0;
+      }
+    }
+    filters.push({ startBin, weights });
+  }
+  return filters;
+}
+function createHammingWindow(length) {
+  const window2 = new Float32Array(length);
+  for (let i = 0; i < length; i++) {
+    window2[i] = 0.54 - 0.46 * Math.cos(2 * Math.PI * i / (length - 1));
+  }
+  return window2;
+}
+function computeKaldiFbank(audio, sampleRate, numMelBins, opts) {
+  const frameLengthMs = opts?.frameLengthMs ?? 25;
+  const frameShiftMs = opts?.frameShiftMs ?? 10;
+  const lowFreq = opts?.lowFreq ?? 20;
+  const highFreq = opts?.highFreq ?? sampleRate / 2;
+  const dither = opts?.dither ?? 0;
+  const preemphasis = opts?.preemphasis ?? 0.97;
+  const frameLengthSamples = Math.round(sampleRate * frameLengthMs / 1e3);
+  const frameShiftSamples = Math.round(sampleRate * frameShiftMs / 1e3);
+  const scaled = new Float32Array(audio.length);
+  for (let i = 0; i < audio.length; i++) {
+    scaled[i] = audio[i] * 32768;
+  }
+  if (dither > 0) {
+    for (let i = 0; i < scaled.length; i++) {
+      const u1 = Math.random();
+      const u2 = Math.random();
+      scaled[i] += dither * Math.sqrt(-2 * Math.log(u1 + 1e-10)) * Math.cos(2 * Math.PI * u2);
+    }
+  }
+  const numFrames = Math.max(0, Math.floor((scaled.length - frameLengthSamples) / frameShiftSamples) + 1);
+  if (numFrames === 0) {
+    return new Float32Array(0);
+  }
+  let fftSize = 1;
+  while (fftSize < frameLengthSamples) fftSize *= 2;
+  const numFftBins = fftSize / 2 + 1;
+  const window2 = createHammingWindow(frameLengthSamples);
+  const filters = buildMelFilterbank(numMelBins, fftSize, sampleRate, lowFreq, highFreq);
+  const output = new Float32Array(numFrames * numMelBins);
+  const fftRe = new Float64Array(fftSize);
+  const fftIm = new Float64Array(fftSize);
+  for (let f = 0; f < numFrames; f++) {
+    const offset = f * frameShiftSamples;
+    fftRe.fill(0);
+    fftIm.fill(0);
+    for (let i = 0; i < frameLengthSamples; i++) {
+      let sample = scaled[offset + i];
+      if (preemphasis > 0 && i > 0) {
+        sample -= preemphasis * scaled[offset + i - 1];
+      } else if (preemphasis > 0 && i === 0 && offset > 0) {
+        sample -= preemphasis * scaled[offset - 1];
+      }
+      fftRe[i] = sample * window2[i];
+    }
+    fft(fftRe, fftIm);
+    const outOffset = f * numMelBins;
+    for (let m = 0; m < numMelBins; m++) {
+      const filter = filters[m];
+      let energy = 0;
+      for (let k = 0; k < filter.weights.length; k++) {
+        const bin = filter.startBin + k;
+        if (bin < numFftBins) {
+          const powerSpec = fftRe[bin] * fftRe[bin] + fftIm[bin] * fftIm[bin];
+          energy += filter.weights[k] * powerSpec;
+        }
+      }
+      output[outOffset + m] = Math.log(Math.max(energy, 1e-10));
+    }
+  }
+  return output;
+}
+function applyLFR(features, featureDim, lfrM = 7, lfrN = 6) {
+  const numFrames = features.length / featureDim;
+  if (numFrames === 0) return new Float32Array(0);
+  const leftPad = Math.floor((lfrM - 1) / 2);
+  const paddedLen = numFrames + leftPad;
+  const numOutputFrames = Math.ceil(paddedLen / lfrN);
+  const outputDim = featureDim * lfrM;
+  const output = new Float32Array(numOutputFrames * outputDim);
+  for (let i = 0; i < numOutputFrames; i++) {
+    const startFrame = i * lfrN - leftPad;
+    for (let j = 0; j < lfrM; j++) {
+      let srcFrame = startFrame + j;
+      if (srcFrame < 0) srcFrame = 0;
+      if (srcFrame >= numFrames) srcFrame = numFrames - 1;
+      const srcOffset = srcFrame * featureDim;
+      const dstOffset = i * outputDim + j * featureDim;
+      for (let k = 0; k < featureDim; k++) {
+        output[dstOffset + k] = features[srcOffset + k];
+      }
+    }
+  }
+  return output;
+}
+function applyCMVN(features, dim, negMean, invStddev) {
+  for (let i = 0; i < features.length; i++) {
+    const d = i % dim;
+    features[i] = (features[i] + negMean[d]) * invStddev[d];
+  }
+  return features;
+}
+function parseCMVNFromMetadata(negMeanStr, invStddevStr) {
+  const negMean = new Float32Array(
+    negMeanStr.split(",").map((s) => parseFloat(s.trim()))
+  );
+  const invStddev = new Float32Array(
+    invStddevStr.split(",").map((s) => parseFloat(s.trim()))
+  );
+  return { negMean, invStddev };
+}
+// src/inference/ctcDecoder.ts
+function resolveLanguageId(language) {
+  const map = {
+    auto: 0,
+    zh: 3,
+    en: 4,
+    yue: 7,
+    ja: 11,
+    ko: 12
+  };
+  return map[language] ?? 0;
+}
+function resolveTextNormId(textNorm) {
+  return textNorm === "without_itn" ? 15 : 14;
+}
+function parseTokensFile(content) {
+  const map = /* @__PURE__ */ new Map();
+  const lines = content.split("\n");
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    const lastSpace = trimmed.lastIndexOf(" ");
+    if (lastSpace === -1) continue;
+    const token = trimmed.substring(0, lastSpace);
+    const id = parseInt(trimmed.substring(lastSpace + 1), 10);
+    if (!isNaN(id)) {
+      map.set(id, token);
+    }
+  }
+  return map;
+}
+function parseStructuredToken(token) {
+  const match = token.match(/^<\|(.+)\|>$/);
+  if (!match) return null;
+  const value = match[1];
+  if (value === "zh" || value === "en" || value === "ja" || value === "ko" || value === "yue" || value === "nospeech") {
+    return { type: "language", value };
+  }
+  const emotions = ["HAPPY", "SAD", "ANGRY", "NEUTRAL", "FEARFUL", "DISGUSTED", "SURPRISED", "EMO_UNKNOWN"];
+  if (emotions.includes(value)) {
+    return { type: "emotion", value };
+  }
+  const events = ["Speech", "BGM", "Applause", "Laughter", "Crying", "Coughing", "Sneezing", "EVENT_UNKNOWN"];
+  if (events.includes(value)) {
+    return { type: "event", value };
+  }
+  if (value === "withitn" || value === "woitn" || value === "with_itn" || value === "without_itn") {
+    return { type: "textnorm", value };
+  }
+  return null;
+}
+function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
+  const tokenIds = [];
+  for (let t = 0; t < seqLen; t++) {
+    const offset = t * vocabSize;
+    let maxIdx = 0;
+    let maxVal = logits[offset];
+    for (let v = 1; v < vocabSize; v++) {
+      if (logits[offset + v] > maxVal) {
+        maxVal = logits[offset + v];
+        maxIdx = v;
+      }
+    }
+    tokenIds.push(maxIdx);
+  }
+  const collapsed = [];
+  let prev = -1;
+  for (const id of tokenIds) {
+    if (id !== prev) {
+      collapsed.push(id);
+      prev = id;
+    }
+  }
+  const filtered = collapsed.filter((id) => id !== 0 && id !== 1 && id !== 2);
+  let language;
+  let emotion;
+  let event;
+  const textTokens = [];
+  for (const id of filtered) {
+    const token = tokenMap.get(id);
+    if (!token) continue;
+    const structured = parseStructuredToken(token);
+    if (structured) {
+      if (structured.type === "language") language = structured.value;
+      else if (structured.type === "emotion") emotion = structured.value;
+      else if (structured.type === "event") event = structured.value;
+    } else {
+      textTokens.push(token);
+    }
+  }
+  let text = textTokens.join("");
+  text = text.replace(/\u2581/g, " ").trim();
+  return { text, language, emotion, event };
+}
+// src/inference/SenseVoiceInference.ts
+var logger4 = createLogger("SenseVoice");
+var SenseVoiceInference = class {
+  constructor(config) {
+    this.session = null;
+    this.ort = null;
+    this._backend = "wasm";
     this.isLoading = false;
-    this.actualBackend = "unknown";
+    this.inferenceQueue = Promise.resolve();
+    // Preprocessing state (loaded once)
+    this.tokenMap = null;
+    this.negMean = null;
+    this.invStddev = null;
+    this.languageId = 0;
+    this.textNormId = 14;
+    const modelDir = config.modelUrl.substring(0, config.modelUrl.lastIndexOf("/"));
+    const tokensUrl = config.tokensUrl ?? `${modelDir}/tokens.txt`;
     this.config = {
-      model: config.model || "tiny",
-      multilingual: config.multilingual || false,
-      language: config.language || "en",
-      task: config.task || "transcribe",
-      dtype: config.dtype || "q8",
-      device: config.device || "auto",
-      localModelPath: config.localModelPath,
-      token: config.token,
-      suppressNonSpeech: config.suppressNonSpeech !== false
-      // Default true
+      modelUrl: config.modelUrl,
+      tokensUrl,
+      language: config.language ?? "auto",
+      textNorm: config.textNorm ?? "with_itn",
+      backend: config.backend ?? "auto"
     };
+    this.languageId = resolveLanguageId(this.config.language);
+    this.textNormId = resolveTextNormId(this.config.textNorm);
   }
-  /**
-   * Check if WebGPU is available in this browser
-   */
-  static async isWebGPUAvailable() {
-    return isWebGPUAvailable();
+  get backend() {
+    return this.session ? this._backend : null;
   }
-  /**
-   * Load the Whisper model pipeline
-   */
+  get isLoaded() {
+    return this.session !== null;
+  }
+  // ─── Load ───────────────────────────────────────────────────────────────
   async load(onProgress) {
     if (this.isLoading) {
-      logger4.debug("Already loading model, waiting...");
-      while (this.isLoading) {
-        await new Promise((resolve) => setTimeout(resolve, 100));
-      }
-      return;
+      throw new Error("Model is already loading");
     }
-    const modelName = this.getModelName();
-    if (this.pipeline !== null && this.currentModel === modelName) {
-      logger4.debug("Model already loaded", { model: modelName });
-      return;
+    if (this.session) {
+      throw new Error("Model already loaded. Call dispose() first.");
     }
     this.isLoading = true;
+    const startTime = performance.now();
     const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.load", {
-      "whisper.model": modelName,
-      "whisper.dtype": this.config.dtype,
-      "whisper.device": this.config.device
+    const span = telemetry?.startSpan("SenseVoice.load", {
+      "model.url": this.config.modelUrl,
+      "model.backend_requested": this.config.backend
     });
     try {
-      const loadStart = performance.now();
-      logger4.info("Loading model", {
-        model: modelName,
-        dtype: this.config.dtype,
-        device: this.config.device,
-        multilingual: this.config.multilingual
-      });
-      if (this.pipeline !== null && this.currentModel !== modelName) {
-        logger4.debug("Disposing old model", { oldModel: this.currentModel });
-        await this.pipeline.dispose();
-        this.pipeline = null;
+      logger4.info("Loading ONNX Runtime...", { preference: this.config.backend });
+      const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
+      this.ort = ort;
+      this._backend = backend;
+      logger4.info("ONNX Runtime loaded", { backend: this._backend });
+      logger4.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
+      const tokensResponse = await fetch(this.config.tokensUrl);
+      if (!tokensResponse.ok) {
+        throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
       }
-      const hasWebGPU = await _WhisperInference.isWebGPUAvailable();
-      const device = this.config.device === "auto" ? hasWebGPU ? "webgpu" : "wasm" : this.config.device;
-      logger4.info("Creating pipeline", { device, hasWebGPU });
-      __webpack_exports__env.allowLocalModels = false;
-      __webpack_exports__env.allowRemoteModels = true;
-      __webpack_exports__env.useBrowserCache = false;
-      __webpack_exports__env.useCustomCache = false;
-      __webpack_exports__env.useWasmCache = false;
-      if (__webpack_exports__env.backends.onnx.wasm) {
-        __webpack_exports__env.backends.onnx.wasm.proxy = false;
-        __webpack_exports__env.backends.onnx.wasm.numThreads = 1;
+      const tokensText = await tokensResponse.text();
+      this.tokenMap = parseTokensFile(tokensText);
+      logger4.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
+      const sessionOptions = getSessionOptions(this._backend);
+      if (this._backend === "webgpu") {
+        sessionOptions.graphOptimizationLevel = "basic";
       }
-      logger4.info("Configured transformers.js env", {
-        allowLocalModels: __webpack_exports__env.allowLocalModels,
-        useBrowserCache: __webpack_exports__env.useBrowserCache,
-        useWasmCache: __webpack_exports__env.useWasmCache
-      });
-      const pipelineOptions = {
-        dtype: this.config.dtype,
-        device,
-        progress_callback: onProgress,
-        // For medium models, use no_attentions revision to save memory
-        revision: modelName.includes("whisper-medium") ? "no_attentions" : "main",
-        // Pass HuggingFace token to bypass rate limits
-        ...this.config.token && { token: this.config.token }
-      };
-      if (device === "webgpu") {
-        pipelineOptions.session_options = {
-          executionProviders: ["webgpu"]
-        };
-        logger4.info("Forcing WebGPU execution providers");
+      let isCached = false;
+      if (isIOS()) {
+        logger4.info("iOS: passing model URL directly to ORT (low-memory path)", {
+          modelUrl: this.config.modelUrl
+        });
+        this.session = await this.ort.InferenceSession.create(
+          this.config.modelUrl,
+          sessionOptions
+        );
+      } else {
+        const cache = getModelCache();
+        isCached = await cache.has(this.config.modelUrl);
+        let modelBuffer;
+        if (isCached) {
+          logger4.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
+          modelBuffer = await cache.get(this.config.modelUrl);
+          onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
+        } else {
+          logger4.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
+          modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
+        }
+        logger4.debug("Creating ONNX session", {
+          size: formatBytes(modelBuffer.byteLength),
+          backend: this._backend
+        });
+        const modelData = new Uint8Array(modelBuffer);
+        this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
       }
-      this.pipeline = await __webpack_exports__pipeline(
-        "automatic-speech-recognition",
-        modelName,
-        pipelineOptions
-      );
-      this.actualBackend = device;
-      this.currentModel = modelName;
-      const loadTimeMs = performance.now() - loadStart;
-      logger4.info("Model loaded successfully", {
-        model: modelName,
-        loadTimeMs: Math.round(loadTimeMs)
+      try {
+        const metadata = this.session.handler?.metadata;
+        if (metadata?.neg_mean && metadata?.inv_stddev) {
+          const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
+          this.negMean = cmvn.negMean;
+          this.invStddev = cmvn.invStddev;
+          logger4.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
+        } else {
+          logger4.warn("CMVN not found in model metadata \u2014 features will not be normalized");
+        }
+      } catch (cmvnErr) {
+        logger4.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
+      }
+      const loadTimeMs = performance.now() - startTime;
+      logger4.info("SenseVoice model loaded", {
+        backend: this._backend,
+        loadTimeMs: Math.round(loadTimeMs),
+        vocabSize: this.tokenMap.size,
+        inputs: this.session.inputNames,
+        outputs: this.session.outputNames,
+        hasCMVN: this.negMean !== null
       });
       span?.setAttributes({
-        "whisper.load_time_ms": loadTimeMs
+        "model.backend": this._backend,
+        "model.load_time_ms": loadTimeMs,
+        "model.cached": !isIOS() && isCached,
+        "model.vocab_size": this.tokenMap.size
       });
       span?.end();
-    } catch (error) {
-      const errorDetails = {
-        message: error instanceof Error ? error.message : String(error),
-        stack: error instanceof Error ? error.stack : void 0,
-        name: error instanceof Error ? error.name : void 0,
-        error
+      telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
+        model: "sensevoice",
+        backend: this._backend
+      });
+      return {
+        backend: this._backend,
+        loadTimeMs,
+        inputNames: [...this.session.inputNames],
+        outputNames: [...this.session.outputNames],
+        vocabSize: this.tokenMap.size
       };
-      logger4.error("Failed to load model", errorDetails);
-      span?.endWithError(error);
+    } catch (error) {
+      span?.endWithError(error instanceof Error ? error : new Error(String(error)));
+      telemetry?.incrementCounter("omote.errors.total", 1, {
+        model: "sensevoice",
+        error_type: "load_failed"
+      });
       throw error;
     } finally {
       this.isLoading = false;
     }
   }
+  // ─── Transcribe ─────────────────────────────────────────────────────────
   /**
-   * Transcribe audio to text
+   * Transcribe audio samples to text
    *
-   * @param audio Audio samples (Float32Array, 16kHz mono)
-   * @param options Transcription options
+   * @param audioSamples Float32Array of audio samples at 16kHz, [-1, 1] range
+   * @returns Transcription result with text, emotion, language, and event
    */
-  async transcribe(audio, options) {
-    if (!this.pipeline) {
+  async transcribe(audioSamples) {
+    if (!this.session || !this.ort || !this.tokenMap) {
       throw new Error("Model not loaded. Call load() first.");
     }
-    const audioCopy = new Float32Array(audio);
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.transcribe", {
-      "audio.samples": audioCopy.length,
-      "audio.duration_s": audioCopy.length / 16e3,
-      "whisper.model": this.currentModel
-    });
-    try {
-      const inferStart = performance.now();
-      const audioDurationSec = audioCopy.length / 16e3;
-      const isShortAudio = audioDurationSec < 10;
-      logger4.debug("Starting transcription", {
-        audioSamples: audioCopy.length,
-        durationSeconds: audioDurationSec.toFixed(2),
-        isShortAudio
-      });
-      const transcribeOptions = {
-        // Decoding strategy
-        top_k: 0,
-        do_sample: false,
-        // Adaptive chunking: Disable for short audio, enable for long audio
-        chunk_length_s: options?.chunkLengthS || (isShortAudio ? audioDurationSec : 30),
-        stride_length_s: options?.strideLengthS || (isShortAudio ? 0 : 5),
-        // Timestamps
-        return_timestamps: options?.returnTimestamps || false,
-        force_full_sequences: false
-      };
-      if (this.config.multilingual) {
-        transcribeOptions.language = options?.language || this.config.language;
-        transcribeOptions.task = options?.task || this.config.task;
-      }
-      const rawResult = await this.pipeline(audioCopy, transcribeOptions);
-      const result = Array.isArray(rawResult) ? rawResult[0] : rawResult;
-      const inferenceTimeMs = performance.now() - inferStart;
-      let cleanedText = result.text;
-      if (this.config.suppressNonSpeech) {
-        cleanedText = this.removeNonSpeechTokens(cleanedText);
-      }
-      const transcription = {
-        text: cleanedText,
-        language: this.config.language,
-        inferenceTimeMs,
-        chunks: result.chunks
-      };
-      logger4.debug("Transcription complete", {
-        text: transcription.text,
-        inferenceTimeMs: Math.round(inferenceTimeMs),
-        chunksCount: result.chunks?.length || 0
-      });
-      span?.setAttributes({
-        "whisper.inference_time_ms": inferenceTimeMs,
-        "whisper.text_length": transcription.text.length
-      });
-      span?.end();
-      return transcription;
-    } catch (error) {
-      logger4.error("Transcribe error", { error });
-      span?.endWithError(error);
-      throw new Error(`Whisper transcription failed: ${error}`);
-    }
+    const audio = new Float32Array(audioSamples);
+    return this.queueInference(audio);
   }
-  /**
-   * Transcribe with streaming chunks (progressive results)
-   *
-   * @param audio Audio samples
-   * @param onChunk Called when each chunk is finalized
-   * @param onUpdate Called after each generation step (optional)
-   */
-  async transcribeStreaming(audio, onChunk, onUpdate, options) {
-    if (!this.pipeline) {
-      throw new Error("Model not loaded. Call load() first.");
-    }
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.transcribe_streaming", {
-      "audio.samples": audio.length,
-      "audio.duration_s": audio.length / 16e3
-    });
-    try {
-      const inferStart = performance.now();
-      logger4.debug("Starting streaming transcription", {
-        audioSamples: audio.length,
-        durationSeconds: (audio.length / 16e3).toFixed(2)
-      });
-      const transcribeOptions = {
-        top_k: 0,
-        do_sample: false,
-        chunk_length_s: options?.chunkLengthS || 30,
-        stride_length_s: options?.strideLengthS || 5,
-        return_timestamps: true,
-        force_full_sequences: false
-      };
-      if (this.config.multilingual) {
-        transcribeOptions.language = options?.language || this.config.language;
-        transcribeOptions.task = options?.task || this.config.task;
-      }
-      const rawResult = await this.pipeline(audio, transcribeOptions);
-      const result = Array.isArray(rawResult) ? rawResult[0] : rawResult;
-      const inferenceTimeMs = performance.now() - inferStart;
-      if (result.chunks && onChunk) {
-        for (const chunk of result.chunks) {
-          onChunk({
-            text: chunk.text,
-            timestamp: chunk.timestamp
+  queueInference(audio) {
+    return new Promise((resolve, reject) => {
+      this.inferenceQueue = this.inferenceQueue.then(async () => {
+        const telemetry = getTelemetry();
+        const span = telemetry?.startSpan("SenseVoice.transcribe", {
+          "inference.backend": this._backend,
+          "inference.input_samples": audio.length
+        });
+        try {
+          const startTime = performance.now();
+          const preprocessStart = performance.now();
+          const fbank = computeKaldiFbank(audio, 16e3, 80);
+          const numFrames = fbank.length / 80;
+          if (numFrames === 0) {
+            resolve({
+              text: "",
+              inferenceTimeMs: performance.now() - startTime,
+              preprocessTimeMs: performance.now() - preprocessStart
+            });
+            return;
+          }
+          const lfrFeatures = applyLFR(fbank, 80, 7, 6);
+          const numLfrFrames = lfrFeatures.length / 560;
+          if (this.negMean && this.invStddev) {
+            applyCMVN(lfrFeatures, 560, this.negMean, this.invStddev);
+          }
+          const preprocessTimeMs = performance.now() - preprocessStart;
+          const ort = this.ort;
+          const feeds = {
+            x: new ort.Tensor("float32", lfrFeatures, [1, numLfrFrames, 560]),
+            x_length: new ort.Tensor("int32", new Int32Array([numLfrFrames]), [1]),
+            language: new ort.Tensor("int32", new Int32Array([this.languageId]), [1]),
+            text_norm: new ort.Tensor("int32", new Int32Array([this.textNormId]), [1])
+          };
+          const results = await this.session.run(feeds);
+          const logitsOutput = results["logits"];
+          if (!logitsOutput) {
+            throw new Error('Model output missing "logits" tensor');
+          }
+          const logitsData = logitsOutput.data;
+          const logitsDims = logitsOutput.dims;
+          const seqLen = logitsDims[1];
+          const vocabSize = logitsDims[2];
+          const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
+          const inferenceTimeMs = performance.now() - startTime;
+          logger4.trace("Transcription complete", {
+            text: decoded.text.substring(0, 50),
+            language: decoded.language,
+            emotion: decoded.emotion,
+            event: decoded.event,
+            preprocessTimeMs: Math.round(preprocessTimeMs * 100) / 100,
+            inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
+            numFrames,
+            numLfrFrames
+          });
+          span?.setAttributes({
+            "inference.duration_ms": inferenceTimeMs,
+            "inference.preprocess_ms": preprocessTimeMs,
+            "inference.num_frames": numFrames,
+            "inference.text_length": decoded.text.length
+          });
+          span?.end();
+          telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
+            model: "sensevoice",
+            backend: this._backend
+          });
+          telemetry?.incrementCounter("omote.inference.total", 1, {
+            model: "sensevoice",
+            backend: this._backend,
+            status: "success"
+          });
+          resolve({
+            text: decoded.text,
+            language: decoded.language,
+            emotion: decoded.emotion,
+            event: decoded.event,
+            inferenceTimeMs,
+            preprocessTimeMs
           });
+        } catch (err) {
+          span?.endWithError(err instanceof Error ? err : new Error(String(err)));
+          telemetry?.incrementCounter("omote.inference.total", 1, {
+            model: "sensevoice",
+            backend: this._backend,
+            status: "error"
+          });
+          reject(err);
         }
-      }
-      if (onUpdate) {
-        onUpdate(result.text);
-      }
-      logger4.debug("Streaming transcription complete", {
-        text: result.text,
-        inferenceTimeMs: Math.round(inferenceTimeMs),
-        chunksCount: result.chunks?.length || 0
-      });
-      span?.setAttributes({
-        "whisper.inference_time_ms": inferenceTimeMs,
-        "whisper.chunks_count": result.chunks?.length || 0
       });
-      span?.end();
-      return {
-        text: result.text,
-        language: this.config.language,
-        inferenceTimeMs,
-        chunks: result.chunks
-      };
-    } catch (error) {
-      logger4.error("Streaming transcribe error", { error });
-      span?.endWithError(error);
-      throw new Error(`Whisper streaming transcription failed: ${error}`);
-    }
+    });
   }
-  /**
-   * Dispose of the model and free resources
-   */
+  // ─── Dispose ──────────────────────────────────────────────────────────
   async dispose() {
-    if (this.pipeline) {
-      logger4.debug("Disposing model", { model: this.currentModel });
-      await this.pipeline.dispose();
-      this.pipeline = null;
-      this.currentModel = null;
-    }
-  }
-  /**
-   * Check if model is loaded
-   */
-  get isLoaded() {
-    return this.pipeline !== null;
-  }
-  /**
-   * Get the backend being used (webgpu or wasm)
-   */
-  get backend() {
-    return this.actualBackend;
-  }
-  /**
-   * Get the full model name used by transformers.js
-   */
-  getModelName() {
-    if (this.config.localModelPath) {
-      return this.config.localModelPath;
-    }
-    let modelName = `onnx-community/whisper-${this.config.model}`;
-    if (!this.config.multilingual) {
-      modelName += ".en";
+    if (this.session) {
+      await this.session.release();
+      this.session = null;
     }
-    return modelName;
-  }
-  /**
-   * Remove non-speech event tokens from transcription
-   *
-   * Whisper outputs special tokens for non-speech events like:
-   * [LAUGHTER], [APPLAUSE], [MUSIC], [BLANK_AUDIO], [CLICKING], etc.
-   *
-   * This method strips these tokens and cleans up extra whitespace.
-   */
-  removeNonSpeechTokens(text) {
-    const cleaned = text.replace(/\[[\w\s_]+\]/g, "");
-    return cleaned.replace(/\s+/g, " ").trim();
+    this.ort = null;
+    this.tokenMap = null;
+    this.negMean = null;
+    this.invStddev = null;
   }
 };
@@ -3051,18 +3422,13 @@ var WhisperInference = class _WhisperInference {
 var logger5 = createLogger("Wav2ArkitCpu");
 var Wav2ArkitCpuInference = class {
   constructor(config) {
+    this.modelId = "wav2arkit_cpu";
     this.session = null;
     this.ort = null;
     this._backend = "wasm";
     this.isLoading = false;
     // Inference queue for handling concurrent calls
     this.inferenceQueue = Promise.resolve();
-    /**
-     * Preferred chunk size: 4000 samples (250ms at 16kHz).
-     * wav2arkit_cpu accepts variable-length input, so we use smaller chunks
-     * for lower latency on WASM (vs 16000 for Wav2Vec2's fixed requirement).
-     */
-    this.chunkSamples = 4e3;
     this.config = config;
   }
   get backend() {
@@ -3096,23 +3462,25 @@ var Wav2ArkitCpuInference = class {
       this._backend = backend;
       logger5.info("ONNX Runtime loaded", { backend: this._backend });
       const modelUrl = this.config.modelUrl;
-      const sessionOptions = { ...getSessionOptions(this._backend) };
-      let isCached = false;
-      if (isIOS() && this.config.modelDataUrl) {
-        const dataFilename = this.config.modelDataUrl.split("/").pop();
-        sessionOptions.externalData = [{
-          path: dataFilename,
-          data: this.config.modelDataUrl
-        }];
-        logger5.info("iOS: URL-based session creation (ORT handles fetch internally)", {
+      const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
+      const sessionOptions = getSessionOptions(this._backend);
+      if (isIOS()) {
+        logger5.info("iOS: passing model URLs directly to ORT (low-memory path)", {
           modelUrl,
-          dataFile: dataFilename,
-          dataUrl: this.config.modelDataUrl
+          dataUrl
         });
+        if (dataUrl) {
+          const dataFilename = dataUrl.split("/").pop();
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: dataUrl
+            // URL string — ORT fetches directly into WASM
+          }];
+        }
         this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
       } else {
         const cache = getModelCache();
-        isCached = await cache.has(modelUrl);
+        const isCached = await cache.has(modelUrl);
         let modelBuffer;
         if (isCached) {
           logger5.debug("Loading model from cache", { modelUrl });
@@ -3123,42 +3491,48 @@ var Wav2ArkitCpuInference = class {
             modelBuffer = await fetchWithCache(modelUrl);
           }
         } else {
-          logger5.debug("Fetching and caching model", { modelUrl });
+          logger5.debug("Fetching and caching model graph", { modelUrl });
           modelBuffer = await fetchWithCache(modelUrl);
         }
         if (!modelBuffer) {
           throw new Error(`Failed to load model: ${modelUrl}`);
         }
-        let externalDataBuffer;
-        if (this.config.modelDataUrl) {
-          const dataUrl = this.config.modelDataUrl;
-          const isDataCached = await cache.has(dataUrl);
-          if (isDataCached) {
-            logger5.debug("Loading external data from cache", { dataUrl });
-            externalDataBuffer = await cache.get(dataUrl);
-            if (!externalDataBuffer) {
-              logger5.warn("External data cache corruption, re-fetching", { dataUrl });
-              await cache.delete(dataUrl);
+        let externalDataBuffer = null;
+        if (dataUrl) {
+          try {
+            const isDataCached = await cache.has(dataUrl);
+            if (isDataCached) {
+              logger5.debug("Loading external data from cache", { dataUrl });
+              externalDataBuffer = await cache.get(dataUrl);
+              if (!externalDataBuffer) {
+                logger5.warn("Cache corruption for external data, retrying", { dataUrl });
+                await cache.delete(dataUrl);
+                externalDataBuffer = await fetchWithCache(dataUrl);
+              }
+            } else {
+              logger5.info("Fetching external model data", {
+                dataUrl,
+                note: "This may be a large download (400MB+)"
+              });
               externalDataBuffer = await fetchWithCache(dataUrl);
             }
-          } else {
-            logger5.info("Fetching external data (this may take a while on first load)", {
-              dataUrl
+            logger5.info("External data loaded", {
+              size: formatBytes(externalDataBuffer.byteLength)
+            });
+          } catch (err) {
+            logger5.debug("No external data file found (single-file model)", {
+              dataUrl,
+              error: err.message
             });
-            externalDataBuffer = await fetchWithCache(dataUrl);
           }
-          logger5.debug("External data loaded", {
-            size: formatBytes(externalDataBuffer.byteLength)
-          });
         }
         logger5.debug("Creating ONNX session", {
-          size: formatBytes(modelBuffer.byteLength),
-          hasExternalData: !!externalDataBuffer,
-          externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : void 0,
+          graphSize: formatBytes(modelBuffer.byteLength),
+          externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
           backend: this._backend
         });
         if (externalDataBuffer) {
-          const dataFilename = this.config.modelDataUrl.split("/").pop();
+          const dataFilename = dataUrl.split("/").pop();
           sessionOptions.externalData = [{
             path: dataFilename,
             data: new Uint8Array(externalDataBuffer)
@@ -3177,7 +3551,7 @@ var Wav2ArkitCpuInference = class {
       span?.setAttributes({
         "model.backend": this._backend,
         "model.load_time_ms": loadTimeMs,
-        "model.cached": isCached
+        "model.cached": !isIOS()
       });
       span?.end();
       telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
@@ -3258,11 +3632,11 @@ var Wav2ArkitCpuInference = class {
           const blendshapes = [];
           for (let f = 0; f < numFrames; f++) {
             const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
-            const remapped = remapWav2ArkitToLam(rawFrame);
-            blendshapes.push(symmetrizeBlendshapes(remapped));
+            const symmetrized = symmetrizeBlendshapes(rawFrame);
+            blendshapes.push(symmetrized);
           }
           logger5.trace("Inference completed", {
-            inferenceTimeMs: Math.round(inferenceTimeMs),
+            inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
             numFrames,
             inputSamples
           });
@@ -3328,14 +3702,14 @@ function createLipSync(config) {
     });
   }
   if (useCpu) {
-    logger6.info("Creating Wav2ArkitCpuInference (WASM)");
+    logger6.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
     return new Wav2ArkitCpuInference({
-      modelUrl: config.cpuModelUrl,
-      modelDataUrl: config.cpuModelDataUrl
+      modelUrl: config.cpuModelUrl
     });
   }
   const gpuInstance = new Wav2Vec2Inference({
     modelUrl: config.gpuModelUrl,
+    externalDataUrl: config.gpuExternalDataUrl,
     backend: config.gpuBackend ?? "auto",
     numIdentityClasses: config.numIdentityClasses
   });
@@ -3352,15 +3726,15 @@ var LipSyncWithFallback = class {
     this.implementation = gpuInstance;
     this.config = config;
   }
+  get modelId() {
+    return this.implementation.modelId;
+  }
   get backend() {
     return this.implementation.backend;
   }
   get isLoaded() {
     return this.implementation.isLoaded;
   }
-  get chunkSamples() {
-    return this.implementation.chunkSamples;
-  }
   async load() {
     try {
       return await this.implementation.load();
@@ -3373,8 +3747,7 @@ var LipSyncWithFallback = class {
       } catch {
       }
       this.implementation = new Wav2ArkitCpuInference({
-        modelUrl: this.config.cpuModelUrl,
-        modelDataUrl: this.config.cpuModelDataUrl
+        modelUrl: this.config.cpuModelUrl
       });
       this.hasFallenBack = true;
       logger6.info("Fallback to Wav2ArkitCpuInference successful");
@@ -3404,6 +3777,8 @@ var SileroVADInference = class {
     // Pre-speech buffer for capturing beginning of speech
     this.preSpeechBuffer = [];
     this.wasSpeaking = false;
+    // Cached sample rate tensor (int64 scalar, never changes per instance)
+    this.srTensor = null;
     const sampleRate = config.sampleRate ?? 16e3;
     if (sampleRate !== 8e3 && sampleRate !== 16e3) {
       throw new Error("Silero VAD only supports 8000 or 16000 Hz sample rates");
@@ -3534,6 +3909,24 @@ var SileroVADInference = class {
     this.context = new Float32Array(this.contextSize);
     this.preSpeechBuffer = [];
     this.wasSpeaking = false;
+    if (!this.srTensor) {
+      try {
+        this.srTensor = new this.ort.Tensor(
+          "int64",
+          new BigInt64Array([BigInt(this.config.sampleRate)]),
+          []
+        );
+      } catch (e) {
+        logger7.warn("BigInt64Array not available, using bigint array fallback", {
+          error: e instanceof Error ? e.message : String(e)
+        });
+        this.srTensor = new this.ort.Tensor(
+          "int64",
+          [BigInt(this.config.sampleRate)],
+          []
+        );
+      }
+    }
   }
   /**
    * Process a single audio chunk
@@ -3665,20 +4058,7 @@ var SileroVADInference = class {
           inputBuffer.set(audioChunkCopy, this.contextSize);
           const inputBufferCopy = new Float32Array(inputBuffer);
           const inputTensor = new this.ort.Tensor("float32", inputBufferCopy, [1, inputSize]);
-          let srTensor;
-          try {
-            srTensor = new this.ort.Tensor(
-              "int64",
-              new BigInt64Array([BigInt(this.config.sampleRate)]),
-              []
-            );
-          } catch {
-            srTensor = new this.ort.Tensor(
-              "int64",
-              [BigInt(this.config.sampleRate)],
-              []
-            );
-          }
+          const srTensor = this.srTensor;
           const stateCopy = new Float32Array(this.state.data);
           const stateTensor = new this.ort.Tensor("float32", stateCopy, this.state.dims);
           const feeds = {
@@ -3767,6 +4147,7 @@ var SileroVADInference = class {
       this.session = null;
     }
     this.state = null;
+    this.srTensor = null;
   }
 };
 /**
@@ -4429,268 +4810,8 @@ var VADWorkerWithFallback = class {
   }
 };
-// src/inference/Emotion2VecInference.ts
-var logger10 = createLogger("Emotion2Vec");
-var EMOTION2VEC_LABELS = ["neutral", "happy", "angry", "sad"];
-var Emotion2VecInference = class {
-  constructor(config) {
-    this.session = null;
-    this.ort = null;
-    this._backend = "wasm";
-    this.isLoading = false;
-    this.inferenceQueue = Promise.resolve();
-    this.config = {
-      modelUrl: config.modelUrl,
-      backend: config.backend ?? "auto",
-      sampleRate: config.sampleRate ?? 16e3
-    };
-  }
-  get backend() {
-    return this.session ? this._backend : null;
-  }
-  get isLoaded() {
-    return this.session !== null;
-  }
-  get sampleRate() {
-    return this.config.sampleRate;
-  }
-  /**
-   * Load the ONNX model
-   */
-  async load() {
-    if (this.isLoading) {
-      throw new Error("Model is already loading");
-    }
-    if (this.session) {
-      throw new Error("Model already loaded. Call dispose() first.");
-    }
-    this.isLoading = true;
-    const startTime = performance.now();
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("Emotion2Vec.load", {
-      "model.url": this.config.modelUrl,
-      "model.backend_requested": this.config.backend
-    });
-    try {
-      logger10.info("Loading ONNX Runtime...", { preference: this.config.backend });
-      const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
-      this.ort = ort;
-      this._backend = backend;
-      logger10.info("ONNX Runtime loaded", { backend: this._backend });
-      logger10.info("Checking model cache...");
-      const cache = getModelCache();
-      const modelUrl = this.config.modelUrl;
-      const isCached = await cache.has(modelUrl);
-      logger10.info("Cache check complete", { modelUrl, isCached });
-      let modelBuffer;
-      if (isCached) {
-        logger10.info("Loading model from cache...", { modelUrl });
-        modelBuffer = await cache.get(modelUrl);
-        logger10.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
-      } else {
-        logger10.info("Fetching model (not cached)...", { modelUrl });
-        modelBuffer = await fetchWithCache(modelUrl);
-        logger10.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
-      }
-      logger10.info("Creating ONNX session (this may take a while for large models)...");
-      logger10.debug("Creating ONNX session", {
-        size: formatBytes(modelBuffer.byteLength),
-        backend: this._backend
-      });
-      const sessionOptions = getSessionOptions(this._backend);
-      const modelData = new Uint8Array(modelBuffer);
-      this.session = await ort.InferenceSession.create(modelData, sessionOptions);
-      const loadTimeMs = performance.now() - startTime;
-      logger10.info("Model loaded successfully", {
-        backend: this._backend,
-        loadTimeMs: Math.round(loadTimeMs),
-        sampleRate: this.config.sampleRate,
-        inputNames: [...this.session.inputNames],
-        outputNames: [...this.session.outputNames]
-      });
-      span?.setAttributes({
-        "model.backend": this._backend,
-        "model.load_time_ms": loadTimeMs,
-        "model.cached": isCached
-      });
-      span?.end();
-      telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
-        model: "emotion2vec",
-        backend: this._backend
-      });
-      return {
-        backend: this._backend,
-        loadTimeMs,
-        inputNames: [...this.session.inputNames],
-        outputNames: [...this.session.outputNames],
-        sampleRate: this.config.sampleRate
-      };
-    } catch (error) {
-      span?.endWithError(error instanceof Error ? error : new Error(String(error)));
-      telemetry?.incrementCounter("omote.errors.total", 1, {
-        model: "emotion2vec",
-        error_type: "load_failed"
-      });
-      throw error;
-    } finally {
-      this.isLoading = false;
-    }
-  }
-  /**
-   * Run emotion inference on audio samples
-   *
-   * @param audio - Float32Array of 16kHz audio samples
-   * @returns Frame-level emotion results at 50Hz
-   */
-  async infer(audio) {
-    if (!this.session) {
-      throw new Error("Model not loaded. Call load() first.");
-    }
-    return this.queueInference(audio);
-  }
-  queueInference(audio) {
-    const audioCopy = new Float32Array(audio);
-    return new Promise((resolve, reject) => {
-      this.inferenceQueue = this.inferenceQueue.then(async () => {
-        const telemetry = getTelemetry();
-        const span = telemetry?.startSpan("Emotion2Vec.infer", {
-          "inference.backend": this._backend,
-          "inference.audio_samples": audioCopy.length
-        });
-        try {
-          const startTime = performance.now();
-          const inputTensor = new this.ort.Tensor("float32", audioCopy, [1, audioCopy.length]);
-          const results = await this.session.run({ audio: inputTensor });
-          const logitsTensor = results["logits"];
-          const embeddingsTensor = results["layer_norm_25"];
-          if (!logitsTensor) {
-            throw new Error(
-              `Missing logits tensor from SUPERB model. Got outputs: ${Object.keys(results).join(", ")}`
-            );
-          }
-          const logitsData = logitsTensor.data;
-          const logits = new Float32Array(logitsData);
-          const probs = this.softmax(logits);
-          const probabilities = {
-            neutral: probs[0],
-            happy: probs[1],
-            angry: probs[2],
-            sad: probs[3]
-          };
-          let maxIdx = 0;
-          let maxProb = probs[0];
-          for (let i = 1; i < probs.length; i++) {
-            if (probs[i] > maxProb) {
-              maxProb = probs[i];
-              maxIdx = i;
-            }
-          }
-          const dominant = {
-            emotion: EMOTION2VEC_LABELS[maxIdx],
-            confidence: maxProb,
-            probabilities
-          };
-          let embeddings = [];
-          let numFrames = 1;
-          if (embeddingsTensor) {
-            const embeddingData = embeddingsTensor.data;
-            const dims = embeddingsTensor.dims;
-            if (dims.length === 3) {
-              numFrames = dims[1];
-              const embeddingDim = dims[2];
-              for (let i = 0; i < numFrames; i++) {
-                const start = i * embeddingDim;
-                embeddings.push(new Float32Array(embeddingData.slice(start, start + embeddingDim)));
-              }
-            }
-          }
-          const frames = [];
-          for (let i = 0; i < numFrames; i++) {
-            frames.push({
-              emotion: dominant.emotion,
-              confidence: dominant.confidence,
-              probabilities: { ...probabilities }
-            });
-          }
-          const inferenceTimeMs = performance.now() - startTime;
-          logger10.debug("Emotion inference completed", {
-            numFrames,
-            dominant: dominant.emotion,
-            confidence: Math.round(dominant.confidence * 100),
-            inferenceTimeMs: Math.round(inferenceTimeMs)
-          });
-          span?.setAttributes({
-            "inference.duration_ms": inferenceTimeMs,
-            "inference.num_frames": numFrames,
-            "inference.dominant_emotion": dominant.emotion
-          });
-          span?.end();
-          telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
-            model: "emotion2vec",
-            backend: this._backend
-          });
-          telemetry?.incrementCounter("omote.inference.total", 1, {
-            model: "emotion2vec",
-            backend: this._backend,
-            status: "success"
-          });
-          resolve({
-            frames,
-            dominant,
-            embeddings,
-            logits,
-            inferenceTimeMs
-          });
-        } catch (err) {
-          span?.endWithError(err instanceof Error ? err : new Error(String(err)));
-          telemetry?.incrementCounter("omote.inference.total", 1, {
-            model: "emotion2vec",
-            backend: this._backend,
-            status: "error"
-          });
-          reject(err);
-        }
-      });
-    });
-  }
-  /**
-   * Apply softmax to convert logits to probabilities
-   */
-  softmax(logits) {
-    let max = logits[0];
-    for (let i = 1; i < logits.length; i++) {
-      if (logits[i] > max) max = logits[i];
-    }
-    const exp = new Float32Array(logits.length);
-    let sum = 0;
-    for (let i = 0; i < logits.length; i++) {
-      exp[i] = Math.exp(logits[i] - max);
-      sum += exp[i];
-    }
-    const probs = new Float32Array(logits.length);
-    for (let i = 0; i < logits.length; i++) {
-      probs[i] = exp[i] / sum;
-    }
-    return probs;
-  }
-  /**
-   * Dispose of the model and free resources
-   */
-  async dispose() {
-    if (this.session) {
-      await this.session.release();
-      this.session = null;
-    }
-  }
-};
-/**
- * Check if WebGPU is available and working
- * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
- */
-Emotion2VecInference.isWebGPUAvailable = isWebGPUAvailable;
 // src/inference/SafariSpeechRecognition.ts
-var logger11 = createLogger("SafariSpeech");
+var logger10 = createLogger("SafariSpeech");
 var SafariSpeechRecognition = class _SafariSpeechRecognition {
   constructor(config = {}) {
     this.recognition = null;
@@ -4709,7 +4830,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       interimResults: config.interimResults ?? true,
       maxAlternatives: config.maxAlternatives ?? 1
     };
-    logger11.debug("SafariSpeechRecognition created", {
+    logger10.debug("SafariSpeechRecognition created", {
       language: this.config.language,
       continuous: this.config.continuous
     });
@@ -4770,7 +4891,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
    */
   async start() {
     if (this.isListening) {
-      logger11.warn("Already listening");
+      logger10.warn("Already listening");
       return;
     }
     if (!_SafariSpeechRecognition.isAvailable()) {
@@ -4800,7 +4921,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       this.isListening = true;
       this.startTime = performance.now();
       this.accumulatedText = "";
-      logger11.info("Speech recognition started", {
+      logger10.info("Speech recognition started", {
         language: this.config.language
       });
       span?.end();
@@ -4815,7 +4936,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
    */
   async stop() {
     if (!this.isListening || !this.recognition) {
-      logger11.warn("Not currently listening");
+      logger10.warn("Not currently listening");
       return {
         text: this.accumulatedText,
         language: this.config.language,
@@ -4844,7 +4965,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     if (this.recognition && this.isListening) {
       this.recognition.abort();
       this.isListening = false;
-      logger11.info("Speech recognition aborted");
+      logger10.info("Speech recognition aborted");
     }
   }
   /**
@@ -4875,7 +4996,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     this.isListening = false;
     this.resultCallbacks = [];
     this.errorCallbacks = [];
-    logger11.debug("SafariSpeechRecognition disposed");
+    logger10.debug("SafariSpeechRecognition disposed");
   }
   /**
    * Set up event handlers for the recognition instance
@@ -4903,7 +5024,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
               confidence: alternative.confidence
             };
             this.emitResult(speechResult);
-            logger11.trace("Speech result", {
+            logger10.trace("Speech result", {
               text: text.substring(0, 50),
               isFinal,
               confidence: alternative.confidence
@@ -4913,12 +5034,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
         span?.end();
       } catch (error) {
         span?.endWithError(error instanceof Error ? error : new Error(String(error)));
-        logger11.error("Error processing speech result", { error });
+        logger10.error("Error processing speech result", { error });
       }
     };
     this.recognition.onerror = (event) => {
       const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
-      logger11.error("Speech recognition error", { error: event.error, message: event.message });
+      logger10.error("Speech recognition error", { error: event.error, message: event.message });
       this.emitError(error);
       if (this.stopRejecter) {
         this.stopRejecter(error);
@@ -4928,7 +5049,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     };
     this.recognition.onend = () => {
       this.isListening = false;
-      logger11.info("Speech recognition ended", {
+      logger10.info("Speech recognition ended", {
         totalText: this.accumulatedText.length,
         durationMs: performance.now() - this.startTime
       });
@@ -4945,13 +5066,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       }
     };
     this.recognition.onstart = () => {
-      logger11.debug("Speech recognition started by browser");
+      logger10.debug("Speech recognition started by browser");
     };
     this.recognition.onspeechstart = () => {
-      logger11.debug("Speech detected");
+      logger10.debug("Speech detected");
     };
     this.recognition.onspeechend = () => {
-      logger11.debug("Speech ended");
+      logger10.debug("Speech ended");
     };
   }
   /**
@@ -4962,7 +5083,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       try {
         callback(result);
       } catch (error) {
-        logger11.error("Error in result callback", { error });
+        logger10.error("Error in result callback", { error });
       }
     }
   }
@@ -4974,7 +5095,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       try {
         callback(error);
       } catch (callbackError) {
-        logger11.error("Error in error callback", { error: callbackError });
+        logger10.error("Error in error callback", { error: callbackError });
       }
     }
   }
@@ -5148,7 +5269,7 @@ var AgentCoreAdapter = class extends EventEmitter {
     this._sessionId = null;
     this._isConnected = false;
     // Sub-components
-    this.whisper = null;
+    this.asr = null;
     this.vad = null;
     this.lam = null;
     this.pipeline = null;
@@ -5187,7 +5308,7 @@ var AgentCoreAdapter = class extends EventEmitter {
     try {
       const authToken = await this.getAuthToken(config.tenant);
       await Promise.all([
-        this.initWhisper(),
+        this.initASR(),
         this.initLAM()
       ]);
       await this.connectWebSocket(authToken, config);
@@ -5217,7 +5338,7 @@ var AgentCoreAdapter = class extends EventEmitter {
       this.ws = null;
     }
     await Promise.all([
-      this.whisper?.dispose(),
+      this.asr?.dispose(),
       this.vad?.dispose(),
       this.lam?.dispose()
     ]);
@@ -5349,16 +5470,15 @@ var AgentCoreAdapter = class extends EventEmitter {
     });
     return token;
   }
-  async initWhisper() {
+  async initASR() {
     await Promise.all([
-      // Whisper ASR
+      // SenseVoice ASR
       (async () => {
-        this.whisper = new WhisperInference({
-          model: "tiny",
-          device: "auto",
-          language: "en"
+        this.asr = new SenseVoiceInference({
+          modelUrl: "/models/sensevoice/model.int8.onnx",
+          language: "auto"
         });
-        await this.whisper.load();
+        await this.asr.load();
       })(),
       // Silero VAD for accurate voice activity detection
       (async () => {
@@ -5544,17 +5664,17 @@ var AgentCoreAdapter = class extends EventEmitter {
       console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
       return;
     }
-    if (this.whisper) {
+    if (this.asr) {
       this.setState("listening");
       this.emit("user.speech.start", { timestamp: Date.now() });
-      this.whisper.transcribe(audio).then((result) => {
+      this.asr.transcribe(audio).then((result) => {
         this.emit("user.transcript.final", {
           text: result.text,
           confidence: 1
         });
         this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
         const cleanText = result.text.trim();
-        if (cleanText && !cleanText.includes("[BLANK_AUDIO]")) {
+        if (cleanText) {
           this.sendText(cleanText).catch((error) => {
             console.error("[AgentCore] Send text error:", error);
           });
@@ -6368,228 +6488,6 @@ var InterruptionHandler = class extends EventEmitter {
   }
 };
-// src/cache/huggingFaceCDN.ts
-var HF_CDN_TEST_URL = "https://huggingface.co/Xenova/whisper-tiny/resolve/main/config.json";
-function parseHuggingFaceUrl(url) {
-  const pattern = /^https:\/\/huggingface\.co\/([^/]+)\/([^/]+)\/resolve\/([^/]+)\/(.+)$/;
-  const match = url.match(pattern);
-  if (!match) {
-    return null;
-  }
-  return {
-    org: match[1],
-    model: match[2],
-    branch: match[3],
-    file: match[4]
-  };
-}
-async function isHuggingFaceCDNReachable(testUrl = HF_CDN_TEST_URL) {
-  try {
-    const response = await fetch(testUrl, {
-      method: "HEAD",
-      cache: "no-store"
-      // Don't use cached response for reachability check
-    });
-    return response.ok;
-  } catch {
-    return false;
-  }
-}
-// src/utils/transformersCacheClear.ts
-var logger12 = createLogger("TransformersCache");
-async function clearTransformersCache(options) {
-  const verbose = options?.verbose ?? true;
-  const additionalPatterns = options?.additionalPatterns ?? [];
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return [];
-  }
-  try {
-    const cacheNames = await caches.keys();
-    const deletedCaches = [];
-    const patterns = [
-      "transformers",
-      "huggingface",
-      "onnx",
-      ...additionalPatterns
-    ];
-    for (const cacheName of cacheNames) {
-      const shouldDelete = patterns.some(
-        (pattern) => cacheName.toLowerCase().includes(pattern.toLowerCase())
-      );
-      if (shouldDelete) {
-        if (verbose) {
-          logger12.info("Deleting cache", { cacheName });
-        }
-        const deleted = await caches.delete(cacheName);
-        if (deleted) {
-          deletedCaches.push(cacheName);
-        } else if (verbose) {
-          logger12.warn("Failed to delete cache", { cacheName });
-        }
-      }
-    }
-    if (verbose) {
-      logger12.info("Cache clearing complete", {
-        totalCaches: cacheNames.length,
-        deletedCount: deletedCaches.length,
-        deletedCaches
-      });
-    }
-    return deletedCaches;
-  } catch (error) {
-    logger12.error("Error clearing caches", { error });
-    throw error;
-  }
-}
-async function clearSpecificCache(cacheName) {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return false;
-  }
-  try {
-    const deleted = await caches.delete(cacheName);
-    logger12.info("Cache deletion attempt", { cacheName, deleted });
-    return deleted;
-  } catch (error) {
-    logger12.error("Error deleting cache", { cacheName, error });
-    return false;
-  }
-}
-async function listCaches() {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return [];
-  }
-  try {
-    const cacheNames = await caches.keys();
-    logger12.debug("Available caches", { cacheNames });
-    return cacheNames;
-  } catch (error) {
-    logger12.error("Error listing caches", { error });
-    return [];
-  }
-}
-async function validateCachedResponse(cacheName, requestUrl) {
-  if (!("caches" in window)) {
-    return {
-      exists: false,
-      valid: false,
-      contentType: null,
-      isHtml: false,
-      reason: "Cache API not available"
-    };
-  }
-  try {
-    const cache = await caches.open(cacheName);
-    const response = await cache.match(requestUrl);
-    if (!response) {
-      return {
-        exists: false,
-        valid: false,
-        contentType: null,
-        isHtml: false,
-        reason: "Not in cache"
-      };
-    }
-    const contentType = response.headers.get("content-type");
-    const isHtml = contentType?.includes("text/html") || contentType?.includes("text/plain");
-    const clonedResponse = response.clone();
-    const text = await clonedResponse.text();
-    const looksLikeHtml = text.trim().startsWith("<") || text.includes("<!DOCTYPE");
-    const valid = Boolean(
-      response.status === 200 && !isHtml && !looksLikeHtml && contentType && (contentType.includes("application/json") || contentType.includes("application/octet-stream") || contentType.includes("binary"))
-    );
-    return {
-      exists: true,
-      valid,
-      contentType,
-      isHtml: isHtml || looksLikeHtml,
-      reason: valid ? "Valid response" : `Invalid: status=${response.status}, contentType=${contentType}, isHtml=${isHtml || looksLikeHtml}`
-    };
-  } catch (error) {
-    logger12.error("Error validating cached response", { cacheName, requestUrl, error });
-    return {
-      exists: false,
-      valid: false,
-      contentType: null,
-      isHtml: false,
-      reason: `Error: ${error}`
-    };
-  }
-}
-async function scanForInvalidCaches() {
-  if (!("caches" in window)) {
-    return { totalCaches: 0, scannedEntries: 0, invalidEntries: [] };
-  }
-  const invalidEntries = [];
-  let scannedEntries = 0;
-  try {
-    const cacheNames = await caches.keys();
-    for (const cacheName of cacheNames) {
-      if (!cacheName.toLowerCase().includes("transformers")) {
-        continue;
-      }
-      const cache = await caches.open(cacheName);
-      const requests = await cache.keys();
-      for (const request of requests) {
-        scannedEntries++;
-        const url = request.url;
-        const validation = await validateCachedResponse(cacheName, url);
-        if (validation.exists && !validation.valid) {
-          invalidEntries.push({
-            cacheName,
-            url,
-            reason: validation.reason || "Unknown"
-          });
-        }
-      }
-    }
-    logger12.info("Cache scan complete", {
-      totalCaches: cacheNames.length,
-      scannedEntries,
-      invalidCount: invalidEntries.length
-    });
-    return {
-      totalCaches: cacheNames.length,
-      scannedEntries,
-      invalidEntries
-    };
-  } catch (error) {
-    logger12.error("Error scanning caches", { error });
-    throw error;
-  }
-}
-async function nukeBrowserCaches(preventRecreation = false) {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return 0;
-  }
-  try {
-    const cacheNames = await caches.keys();
-    let deletedCount = 0;
-    for (const cacheName of cacheNames) {
-      const deleted = await caches.delete(cacheName);
-      if (deleted) {
-        deletedCount++;
-      }
-    }
-    logger12.info("All browser caches cleared", {
-      totalDeleted: deletedCount
-    });
-    if (preventRecreation) {
-      const { env } = await import("./transformers.web-MHLR33H6.mjs");
-      env.useBrowserCache = false;
-      logger12.warn("Browser cache creation disabled (env.useBrowserCache = false)");
-    }
-    return deletedCount;
-  } catch (error) {
-    logger12.error("Error nuking caches", { error });
-    throw error;
-  }
-}
 // src/animation/types.ts
 var DEFAULT_ANIMATION_CONFIG = {
   initialState: "idle",
@@ -7129,7 +7027,6 @@ export {
   EmotionPresets,
   EmphasisDetector,
   EventEmitter,
-  HF_CDN_TEST_URL,
   INFERENCE_LATENCY_BUCKETS,
   InterruptionHandler,
   LAMPipeline,
@@ -7143,6 +7040,7 @@ export {
   OmoteTelemetry,
   RingBuffer,
   SafariSpeechRecognition,
+  SenseVoiceInference,
   SileroVADInference,
   SileroVADWorker,
   SyncedAudioPipeline,
@@ -7150,12 +7048,12 @@ export {
   WAV2ARKIT_BLENDSHAPES,
   Wav2ArkitCpuInference,
   Wav2Vec2Inference,
-  WhisperInference,
+  applyCMVN,
+  applyLFR,
   blendEmotions,
   calculatePeak,
   calculateRMS,
-  clearSpecificCache,
-  clearTransformersCache,
+  computeKaldiFbank,
   configureCacheLimit,
   configureLogging,
   configureTelemetry,
@@ -7164,6 +7062,7 @@ export {
   createLogger,
   createSessionWithFallback,
   createSileroVAD,
+  ctcGreedyDecode,
   fetchWithCache,
   formatBytes,
   getCacheConfig,
@@ -7180,7 +7079,6 @@ export {
   getTelemetry,
   hasWebGPUApi,
   isAndroid,
-  isHuggingFaceCDNReachable,
   isIOS,
   isIOSSafari,
   isMobile,
@@ -7189,15 +7087,16 @@ export {
   isSpeechRecognitionAvailable,
   isWebGPUAvailable,
   lerpEmotion,
-  listCaches,
   noopLogger,
-  nukeBrowserCaches,
-  parseHuggingFaceUrl,
+  parseCMVNFromMetadata,
+  parseTokensFile,
   preloadModels,
+  preloadOnnxRuntime,
   remapWav2ArkitToLam,
   resetLoggingConfig,
   resolveBackend,
-  scanForInvalidCaches,
+  resolveLanguageId,
+  resolveTextNormId,
   setLogLevel,
   setLoggingEnabled,
   shouldEnableWasmProxy,
@@ -7205,7 +7104,6 @@ export {
   shouldUseNativeASR,
   shouldUseServerLipSync,
   supportsVADWorker,
-  symmetrizeBlendshapes,
-  validateCachedResponse
+  symmetrizeBlendshapes
 };
 //# sourceMappingURL=index.mjs.map