npm - @omote/core - Versions diffs - 0.3.25 → 0.4.2 - Mend

@omote/core 0.3.25 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/events/index.mjs +0 -1
package/dist/index.d.mts +201 -259
package/dist/index.d.ts +201 -259
package/dist/index.js +706 -38696
package/dist/index.js.map +1 -1
package/dist/index.mjs +723 -930
package/dist/index.mjs.map +1 -1
package/dist/logging/index.mjs +0 -1
package/package.json +1 -2
package/dist/chunk-B6TIE56N.mjs +0 -37779
package/dist/chunk-B6TIE56N.mjs.map +0 -1
package/dist/chunk-NSSMTXJJ.mjs +0 -8
package/dist/chunk-NSSMTXJJ.mjs.map +0 -1
package/dist/transformers.web-T5LWC34T.mjs +0 -1718
package/dist/transformers.web-T5LWC34T.mjs.map +0 -1

package/dist/index.mjs CHANGED Viewed

@@ -12,11 +12,6 @@ import {
   setLogLevel,
   setLoggingEnabled
 } from "./chunk-ESU52TDS.mjs";
-import {
-  __webpack_exports__env,
-  __webpack_exports__pipeline
-} from "./chunk-B6TIE56N.mjs";
-import "./chunk-NSSMTXJJ.mjs";
 // src/audio/MicrophoneCapture.ts
 var MicrophoneCapture = class {
@@ -2274,6 +2269,14 @@ function getSessionOptions(backend) {
       graphOptimizationLevel: "all"
     };
   }
+  if (isIOS()) {
+    return {
+      executionProviders: ["wasm"],
+      graphOptimizationLevel: "basic",
+      enableCpuMemArena: false,
+      enableMemPattern: false
+    };
+  }
   return {
     executionProviders: ["wasm"],
     graphOptimizationLevel: "all"
@@ -2549,77 +2552,108 @@ var Wav2Vec2Inference = class {
       this.ort = ort;
       this._backend = backend;
       logger2.info("ONNX Runtime loaded", { backend: this._backend });
-      const cache = getModelCache();
       const modelUrl = this.config.modelUrl;
-      const isCached = await cache.has(modelUrl);
-      let modelBuffer;
-      if (isCached) {
-        logger2.debug("Loading model from cache", { modelUrl });
-        modelBuffer = await cache.get(modelUrl);
-        if (!modelBuffer) {
-          logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
-          await cache.delete(modelUrl);
-          logger2.info("Corrupted cache entry deleted, fetching fresh model", { modelUrl });
-          modelBuffer = await fetchWithCache(modelUrl);
+      const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
+      const sessionOptions = getSessionOptions(this._backend);
+      let isCached = false;
+      if (isIOS()) {
+        logger2.info("iOS: passing model URLs directly to ORT (low-memory path)", {
+          modelUrl,
+          dataUrl
+        });
+        if (dataUrl) {
+          const dataFilename = dataUrl.split("/").pop();
+          logger2.info("iOS: setting externalData", { dataFilename, dataUrl });
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: dataUrl
+            // URL string — ORT fetches directly into WASM
+          }];
         }
-      } else {
-        logger2.debug("Fetching and caching model", { modelUrl });
-        modelBuffer = await fetchWithCache(modelUrl);
-      }
-      if (!modelBuffer) {
-        const errorMsg = `Failed to load model: ${modelUrl}. Model buffer is null or undefined even after retry.`;
-        logger2.error(errorMsg, { modelUrl, isCached });
-        throw new Error(errorMsg);
-      }
-      let externalDataBuffer = null;
-      if (this.config.externalDataUrl !== false) {
-        const dataUrl = typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data`;
+        logger2.info("iOS: calling InferenceSession.create() with URL string", {
+          modelUrl,
+          sessionOptions: JSON.stringify(
+            sessionOptions,
+            (_, v) => typeof v === "string" && v.length > 100 ? v.slice(0, 100) + "..." : v
+          )
+        });
         try {
-          const isDataCached = await cache.has(dataUrl);
-          if (isDataCached) {
-            logger2.debug("Loading external data from cache", { dataUrl });
-            externalDataBuffer = await cache.get(dataUrl);
-            if (!externalDataBuffer) {
-              logger2.warn("Cache corruption for external data, retrying", { dataUrl });
-              await cache.delete(dataUrl);
+          this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
+        } catch (sessionErr) {
+          logger2.error("iOS: InferenceSession.create() failed", {
+            error: sessionErr instanceof Error ? sessionErr.message : String(sessionErr),
+            errorType: sessionErr?.constructor?.name,
+            stack: sessionErr instanceof Error ? sessionErr.stack : void 0
+          });
+          throw sessionErr;
+        }
+        logger2.info("iOS: session created successfully", {
+          inputNames: this.session.inputNames,
+          outputNames: this.session.outputNames
+        });
+      } else {
+        const cache = getModelCache();
+        isCached = await cache.has(modelUrl);
+        let modelBuffer;
+        if (isCached) {
+          logger2.debug("Loading model from cache", { modelUrl });
+          modelBuffer = await cache.get(modelUrl);
+          if (!modelBuffer) {
+            logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
+            await cache.delete(modelUrl);
+            modelBuffer = await fetchWithCache(modelUrl);
+          }
+        } else {
+          logger2.debug("Fetching and caching model", { modelUrl });
+          modelBuffer = await fetchWithCache(modelUrl);
+        }
+        if (!modelBuffer) {
+          throw new Error(`Failed to load model: ${modelUrl}`);
+        }
+        let externalDataBuffer = null;
+        if (dataUrl) {
+          try {
+            const isDataCached = await cache.has(dataUrl);
+            if (isDataCached) {
+              logger2.debug("Loading external data from cache", { dataUrl });
+              externalDataBuffer = await cache.get(dataUrl);
+              if (!externalDataBuffer) {
+                logger2.warn("Cache corruption for external data, retrying", { dataUrl });
+                await cache.delete(dataUrl);
+                externalDataBuffer = await fetchWithCache(dataUrl);
+              }
+            } else {
+              logger2.info("Fetching external model data", {
+                dataUrl,
+                note: "This may be a large download (383MB+)"
+              });
               externalDataBuffer = await fetchWithCache(dataUrl);
             }
-          } else {
-            logger2.info("Fetching external model data", {
+            logger2.info("External data loaded", {
+              size: formatBytes(externalDataBuffer.byteLength)
+            });
+          } catch (err) {
+            logger2.debug("No external data file found (single-file model)", {
               dataUrl,
-              note: "This may be a large download (383MB+)"
+              error: err.message
             });
-            externalDataBuffer = await fetchWithCache(dataUrl);
           }
-          logger2.info("External data loaded", {
-            size: formatBytes(externalDataBuffer.byteLength)
-          });
-        } catch (err) {
-          logger2.debug("No external data file found (single-file model)", {
-            dataUrl,
-            error: err.message
-          });
         }
+        logger2.debug("Creating ONNX session", {
+          graphSize: formatBytes(modelBuffer.byteLength),
+          externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
+          backend: this._backend
+        });
+        if (externalDataBuffer) {
+          const dataFilename = dataUrl.split("/").pop();
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: new Uint8Array(externalDataBuffer)
+          }];
+        }
+        const modelData = new Uint8Array(modelBuffer);
+        this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
       }
-      logger2.debug("Creating ONNX session", {
-        graphSize: formatBytes(modelBuffer.byteLength),
-        externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
-        backend: this._backend
-      });
-      const sessionOptions = getSessionOptions(this._backend);
-      if (externalDataBuffer) {
-        const dataFilename = (typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data`).split("/").pop();
-        sessionOptions.externalData = [{
-          path: dataFilename,
-          data: new Uint8Array(externalDataBuffer)
-        }];
-      }
-      logger2.info("Creating session with execution provider", {
-        executionProvider: this._backend,
-        hasExternalData: !!externalDataBuffer
-      });
-      const modelData = new Uint8Array(modelBuffer);
-      this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
       logger2.info("ONNX session created successfully", {
         executionProvider: this._backend,
         backend: this._backend
@@ -2634,7 +2668,7 @@ var Wav2Vec2Inference = class {
       span?.setAttributes({
         "model.backend": this._backend,
         "model.load_time_ms": loadTimeMs,
-        "model.cached": isCached
+        "model.cached": !isIOS() && isCached
       });
       span?.end();
       telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
@@ -2644,12 +2678,23 @@ var Wav2Vec2Inference = class {
       logger2.debug("Running warmup inference to initialize GPU context");
       const warmupStart = performance.now();
       const silentAudio = new Float32Array(16e3);
-      await this.infer(silentAudio, 0);
+      const WARMUP_TIMEOUT_MS = 15e3;
+      const warmupResult = await Promise.race([
+        this.infer(silentAudio, 0).then(() => "ok"),
+        new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
+      ]);
       const warmupTimeMs = performance.now() - warmupStart;
-      logger2.info("Warmup inference complete", {
-        warmupTimeMs: Math.round(warmupTimeMs),
-        backend: this._backend
-      });
+      if (warmupResult === "timeout") {
+        logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
+          timeoutMs: WARMUP_TIMEOUT_MS,
+          backend: this._backend
+        });
+      } else {
+        logger2.info("Warmup inference complete", {
+          warmupTimeMs: Math.round(warmupTimeMs),
+          backend: this._backend
+        });
+      }
       telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
         model: "wav2vec2",
         backend: this._backend
@@ -2837,334 +2882,316 @@ LAM_BLENDSHAPES.forEach((name, index) => {
 });
 var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
-// src/inference/WhisperInference.ts
-var logger4 = createLogger("Whisper");
-var WhisperInference = class _WhisperInference {
-  constructor(config = {}) {
-    this.pipeline = null;
-    this.currentModel = null;
-    this.isLoading = false;
-    this.actualBackend = "unknown";
-    this.config = {
-      model: config.model || "tiny",
-      multilingual: config.multilingual || false,
-      language: config.language || "en",
-      task: config.task || "transcribe",
-      dtype: config.dtype || "q8",
-      device: config.device || "auto",
-      localModelPath: config.localModelPath,
-      token: config.token,
-      suppressNonSpeech: config.suppressNonSpeech !== false
-      // Default true
-    };
-  }
-  /**
-   * Check if WebGPU is available in this browser
-   */
-  static async isWebGPUAvailable() {
-    return "gpu" in navigator;
-  }
-  /**
-   * Load the Whisper model pipeline
-   */
-  async load(onProgress) {
-    if (this.isLoading) {
-      logger4.debug("Already loading model, waiting...");
-      while (this.isLoading) {
-        await new Promise((resolve) => setTimeout(resolve, 100));
-      }
-      return;
-    }
-    const modelName = this.getModelName();
-    if (this.pipeline !== null && this.currentModel === modelName) {
-      logger4.debug("Model already loaded", { model: modelName });
-      return;
-    }
-    this.isLoading = true;
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.load", {
-      "whisper.model": modelName,
-      "whisper.dtype": this.config.dtype,
-      "whisper.device": this.config.device
-    });
-    try {
-      const loadStart = performance.now();
-      logger4.info("Loading model", {
-        model: modelName,
-        dtype: this.config.dtype,
-        device: this.config.device,
-        multilingual: this.config.multilingual
-      });
-      if (this.pipeline !== null && this.currentModel !== modelName) {
-        logger4.debug("Disposing old model", { oldModel: this.currentModel });
-        await this.pipeline.dispose();
-        this.pipeline = null;
-      }
-      const hasWebGPU = await _WhisperInference.isWebGPUAvailable();
-      const device = this.config.device === "auto" ? hasWebGPU ? "webgpu" : "wasm" : this.config.device;
-      logger4.info("Creating pipeline", { device, hasWebGPU });
-      __webpack_exports__env.allowLocalModels = false;
-      __webpack_exports__env.allowRemoteModels = true;
-      __webpack_exports__env.useBrowserCache = false;
-      __webpack_exports__env.useCustomCache = false;
-      __webpack_exports__env.useWasmCache = false;
-      if (__webpack_exports__env.backends.onnx.wasm) {
-        __webpack_exports__env.backends.onnx.wasm.proxy = false;
-        __webpack_exports__env.backends.onnx.wasm.numThreads = 1;
-      }
-      logger4.info("Configured transformers.js env", {
-        allowLocalModels: __webpack_exports__env.allowLocalModels,
-        useBrowserCache: __webpack_exports__env.useBrowserCache,
-        useWasmCache: __webpack_exports__env.useWasmCache
-      });
-      const pipelineOptions = {
-        dtype: this.config.dtype,
-        device,
-        progress_callback: onProgress,
-        // For medium models, use no_attentions revision to save memory
-        revision: modelName.includes("whisper-medium") ? "no_attentions" : "main",
-        // Pass HuggingFace token to bypass rate limits
-        ...this.config.token && { token: this.config.token }
-      };
-      if (device === "webgpu") {
-        pipelineOptions.session_options = {
-          executionProviders: ["webgpu"]
-        };
-        logger4.info("Forcing WebGPU execution providers");
+// src/inference/kaldiFbank.ts
+function fft(re, im) {
+  const n = re.length;
+  for (let i = 1, j = 0; i < n; i++) {
+    let bit = n >> 1;
+    while (j & bit) {
+      j ^= bit;
+      bit >>= 1;
+    }
+    j ^= bit;
+    if (i < j) {
+      let tmp = re[i];
+      re[i] = re[j];
+      re[j] = tmp;
+      tmp = im[i];
+      im[i] = im[j];
+      im[j] = tmp;
+    }
+  }
+  for (let len = 2; len <= n; len *= 2) {
+    const halfLen = len / 2;
+    const angle = -2 * Math.PI / len;
+    const wRe = Math.cos(angle);
+    const wIm = Math.sin(angle);
+    for (let i = 0; i < n; i += len) {
+      let curRe = 1;
+      let curIm = 0;
+      for (let j = 0; j < halfLen; j++) {
+        const a = i + j;
+        const b = a + halfLen;
+        const tRe = curRe * re[b] - curIm * im[b];
+        const tIm = curRe * im[b] + curIm * re[b];
+        re[b] = re[a] - tRe;
+        im[b] = im[a] - tIm;
+        re[a] += tRe;
+        im[a] += tIm;
+        const nextRe = curRe * wRe - curIm * wIm;
+        curIm = curRe * wIm + curIm * wRe;
+        curRe = nextRe;
       }
-      this.pipeline = await __webpack_exports__pipeline(
-        "automatic-speech-recognition",
-        modelName,
-        pipelineOptions
-      );
-      this.actualBackend = device;
-      this.currentModel = modelName;
-      const loadTimeMs = performance.now() - loadStart;
-      logger4.info("Model loaded successfully", {
-        model: modelName,
-        loadTimeMs: Math.round(loadTimeMs)
-      });
-      span?.setAttributes({
-        "whisper.load_time_ms": loadTimeMs
-      });
-      span?.end();
-    } catch (error) {
-      const errorDetails = {
-        message: error instanceof Error ? error.message : String(error),
-        stack: error instanceof Error ? error.stack : void 0,
-        name: error instanceof Error ? error.name : void 0,
-        error
-      };
-      logger4.error("Failed to load model", errorDetails);
-      span?.endWithError(error);
-      throw error;
-    } finally {
-      this.isLoading = false;
     }
   }
-  /**
-   * Transcribe audio to text
-   *
-   * @param audio Audio samples (Float32Array, 16kHz mono)
-   * @param options Transcription options
-   */
-  async transcribe(audio, options) {
-    if (!this.pipeline) {
-      throw new Error("Model not loaded. Call load() first.");
-    }
-    const audioCopy = new Float32Array(audio);
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.transcribe", {
-      "audio.samples": audioCopy.length,
-      "audio.duration_s": audioCopy.length / 16e3,
-      "whisper.model": this.currentModel
-    });
-    try {
-      const inferStart = performance.now();
-      const audioDurationSec = audioCopy.length / 16e3;
-      const isShortAudio = audioDurationSec < 10;
-      logger4.debug("Starting transcription", {
-        audioSamples: audioCopy.length,
-        durationSeconds: audioDurationSec.toFixed(2),
-        isShortAudio
-      });
-      const transcribeOptions = {
-        // Decoding strategy
-        top_k: 0,
-        do_sample: false,
-        // Adaptive chunking: Disable for short audio, enable for long audio
-        chunk_length_s: options?.chunkLengthS || (isShortAudio ? audioDurationSec : 30),
-        stride_length_s: options?.strideLengthS || (isShortAudio ? 0 : 5),
-        // Timestamps
-        return_timestamps: options?.returnTimestamps || false,
-        force_full_sequences: false
-      };
-      if (this.config.multilingual) {
-        transcribeOptions.language = options?.language || this.config.language;
-        transcribeOptions.task = options?.task || this.config.task;
-      }
-      const rawResult = await this.pipeline(audioCopy, transcribeOptions);
-      const result = Array.isArray(rawResult) ? rawResult[0] : rawResult;
-      const inferenceTimeMs = performance.now() - inferStart;
-      let cleanedText = result.text;
-      if (this.config.suppressNonSpeech) {
-        cleanedText = this.removeNonSpeechTokens(cleanedText);
+}
+function htkMel(freq) {
+  return 1127 * Math.log(1 + freq / 700);
+}
+function htkMelInverse(mel) {
+  return 700 * (Math.exp(mel / 1127) - 1);
+}
+function buildMelFilterbank(numBins, fftSize, sampleRate, lowFreq, highFreq) {
+  const numFftBins = fftSize / 2 + 1;
+  const lowMel = htkMel(lowFreq);
+  const highMel = htkMel(highFreq);
+  const melPoints = new Float64Array(numBins + 2);
+  for (let i = 0; i < numBins + 2; i++) {
+    melPoints[i] = lowMel + (highMel - lowMel) * i / (numBins + 1);
+  }
+  const binFreqs = new Float64Array(numBins + 2);
+  for (let i = 0; i < numBins + 2; i++) {
+    binFreqs[i] = htkMelInverse(melPoints[i]) * fftSize / sampleRate;
+  }
+  const filters = [];
+  for (let m = 0; m < numBins; m++) {
+    const left = binFreqs[m];
+    const center = binFreqs[m + 1];
+    const right = binFreqs[m + 2];
+    const startBin = Math.max(0, Math.ceil(left));
+    const endBin = Math.min(numFftBins - 1, Math.floor(right));
+    const weights = new Float32Array(endBin - startBin + 1);
+    for (let k = startBin; k <= endBin; k++) {
+      if (k <= center) {
+        weights[k - startBin] = center - left > 0 ? (k - left) / (center - left) : 0;
+      } else {
+        weights[k - startBin] = right - center > 0 ? (right - k) / (right - center) : 0;
       }
-      const transcription = {
-        text: cleanedText,
-        language: this.config.language,
-        inferenceTimeMs,
-        chunks: result.chunks
-      };
-      logger4.debug("Transcription complete", {
-        text: transcription.text,
-        inferenceTimeMs: Math.round(inferenceTimeMs),
-        chunksCount: result.chunks?.length || 0
-      });
-      span?.setAttributes({
-        "whisper.inference_time_ms": inferenceTimeMs,
-        "whisper.text_length": transcription.text.length
-      });
-      span?.end();
-      return transcription;
-    } catch (error) {
-      logger4.error("Transcribe error", { error });
-      span?.endWithError(error);
-      throw new Error(`Whisper transcription failed: ${error}`);
     }
+    filters.push({ startBin, weights });
   }
-  /**
-   * Transcribe with streaming chunks (progressive results)
-   *
-   * @param audio Audio samples
-   * @param onChunk Called when each chunk is finalized
-   * @param onUpdate Called after each generation step (optional)
-   */
-  async transcribeStreaming(audio, onChunk, onUpdate, options) {
-    if (!this.pipeline) {
-      throw new Error("Model not loaded. Call load() first.");
-    }
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("whisper.transcribe_streaming", {
-      "audio.samples": audio.length,
-      "audio.duration_s": audio.length / 16e3
-    });
-    try {
-      const inferStart = performance.now();
-      logger4.debug("Starting streaming transcription", {
-        audioSamples: audio.length,
-        durationSeconds: (audio.length / 16e3).toFixed(2)
-      });
-      const transcribeOptions = {
-        top_k: 0,
-        do_sample: false,
-        chunk_length_s: options?.chunkLengthS || 30,
-        stride_length_s: options?.strideLengthS || 5,
-        return_timestamps: true,
-        force_full_sequences: false
-      };
-      if (this.config.multilingual) {
-        transcribeOptions.language = options?.language || this.config.language;
-        transcribeOptions.task = options?.task || this.config.task;
+  return filters;
+}
+function createHammingWindow(length) {
+  const window2 = new Float32Array(length);
+  for (let i = 0; i < length; i++) {
+    window2[i] = 0.54 - 0.46 * Math.cos(2 * Math.PI * i / (length - 1));
+  }
+  return window2;
+}
+function computeKaldiFbank(audio, sampleRate, numMelBins, opts) {
+  const frameLengthMs = opts?.frameLengthMs ?? 25;
+  const frameShiftMs = opts?.frameShiftMs ?? 10;
+  const lowFreq = opts?.lowFreq ?? 20;
+  const highFreq = opts?.highFreq ?? sampleRate / 2;
+  const dither = opts?.dither ?? 0;
+  const preemphasis = opts?.preemphasis ?? 0.97;
+  const frameLengthSamples = Math.round(sampleRate * frameLengthMs / 1e3);
+  const frameShiftSamples = Math.round(sampleRate * frameShiftMs / 1e3);
+  const scaled = new Float32Array(audio.length);
+  for (let i = 0; i < audio.length; i++) {
+    scaled[i] = audio[i] * 32768;
+  }
+  if (dither > 0) {
+    for (let i = 0; i < scaled.length; i++) {
+      const u1 = Math.random();
+      const u2 = Math.random();
+      scaled[i] += dither * Math.sqrt(-2 * Math.log(u1 + 1e-10)) * Math.cos(2 * Math.PI * u2);
+    }
+  }
+  const numFrames = Math.max(0, Math.floor((scaled.length - frameLengthSamples) / frameShiftSamples) + 1);
+  if (numFrames === 0) {
+    return new Float32Array(0);
+  }
+  let fftSize = 1;
+  while (fftSize < frameLengthSamples) fftSize *= 2;
+  const numFftBins = fftSize / 2 + 1;
+  const window2 = createHammingWindow(frameLengthSamples);
+  const filters = buildMelFilterbank(numMelBins, fftSize, sampleRate, lowFreq, highFreq);
+  const output = new Float32Array(numFrames * numMelBins);
+  const fftRe = new Float64Array(fftSize);
+  const fftIm = new Float64Array(fftSize);
+  for (let f = 0; f < numFrames; f++) {
+    const offset = f * frameShiftSamples;
+    fftRe.fill(0);
+    fftIm.fill(0);
+    for (let i = 0; i < frameLengthSamples; i++) {
+      let sample = scaled[offset + i];
+      if (preemphasis > 0 && i > 0) {
+        sample -= preemphasis * scaled[offset + i - 1];
+      } else if (preemphasis > 0 && i === 0 && offset > 0) {
+        sample -= preemphasis * scaled[offset - 1];
       }
-      const rawResult = await this.pipeline(audio, transcribeOptions);
-      const result = Array.isArray(rawResult) ? rawResult[0] : rawResult;
-      const inferenceTimeMs = performance.now() - inferStart;
-      if (result.chunks && onChunk) {
-        for (const chunk of result.chunks) {
-          onChunk({
-            text: chunk.text,
-            timestamp: chunk.timestamp
-          });
+      fftRe[i] = sample * window2[i];
+    }
+    fft(fftRe, fftIm);
+    const outOffset = f * numMelBins;
+    for (let m = 0; m < numMelBins; m++) {
+      const filter = filters[m];
+      let energy = 0;
+      for (let k = 0; k < filter.weights.length; k++) {
+        const bin = filter.startBin + k;
+        if (bin < numFftBins) {
+          const powerSpec = fftRe[bin] * fftRe[bin] + fftIm[bin] * fftIm[bin];
+          energy += filter.weights[k] * powerSpec;
         }
       }
-      if (onUpdate) {
-        onUpdate(result.text);
-      }
-      logger4.debug("Streaming transcription complete", {
-        text: result.text,
-        inferenceTimeMs: Math.round(inferenceTimeMs),
-        chunksCount: result.chunks?.length || 0
-      });
-      span?.setAttributes({
-        "whisper.inference_time_ms": inferenceTimeMs,
-        "whisper.chunks_count": result.chunks?.length || 0
-      });
-      span?.end();
-      return {
-        text: result.text,
-        language: this.config.language,
-        inferenceTimeMs,
-        chunks: result.chunks
-      };
-    } catch (error) {
-      logger4.error("Streaming transcribe error", { error });
-      span?.endWithError(error);
-      throw new Error(`Whisper streaming transcription failed: ${error}`);
+      output[outOffset + m] = Math.log(Math.max(energy, 1e-10));
     }
   }
-  /**
-   * Dispose of the model and free resources
-   */
-  async dispose() {
-    if (this.pipeline) {
-      logger4.debug("Disposing model", { model: this.currentModel });
-      await this.pipeline.dispose();
-      this.pipeline = null;
-      this.currentModel = null;
+  return output;
+}
+function applyLFR(features, featureDim, lfrM = 7, lfrN = 6) {
+  const numFrames = features.length / featureDim;
+  if (numFrames === 0) return new Float32Array(0);
+  const leftPad = Math.floor((lfrM - 1) / 2);
+  const paddedLen = numFrames + leftPad;
+  const numOutputFrames = Math.ceil(paddedLen / lfrN);
+  const outputDim = featureDim * lfrM;
+  const output = new Float32Array(numOutputFrames * outputDim);
+  for (let i = 0; i < numOutputFrames; i++) {
+    const startFrame = i * lfrN - leftPad;
+    for (let j = 0; j < lfrM; j++) {
+      let srcFrame = startFrame + j;
+      if (srcFrame < 0) srcFrame = 0;
+      if (srcFrame >= numFrames) srcFrame = numFrames - 1;
+      const srcOffset = srcFrame * featureDim;
+      const dstOffset = i * outputDim + j * featureDim;
+      for (let k = 0; k < featureDim; k++) {
+        output[dstOffset + k] = features[srcOffset + k];
+      }
     }
   }
-  /**
-   * Check if model is loaded
-   */
-  get isLoaded() {
-    return this.pipeline !== null;
+  return output;
+}
+function applyCMVN(features, dim, negMean, invStddev) {
+  for (let i = 0; i < features.length; i++) {
+    const d = i % dim;
+    features[i] = (features[i] + negMean[d]) * invStddev[d];
   }
-  /**
-   * Get the backend being used (webgpu or wasm)
-   */
-  get backend() {
-    return this.actualBackend;
+  return features;
+}
+function parseCMVNFromMetadata(negMeanStr, invStddevStr) {
+  const negMean = new Float32Array(
+    negMeanStr.split(",").map((s) => parseFloat(s.trim()))
+  );
+  const invStddev = new Float32Array(
+    invStddevStr.split(",").map((s) => parseFloat(s.trim()))
+  );
+  return { negMean, invStddev };
+}
+// src/inference/ctcDecoder.ts
+function resolveLanguageId(language) {
+  const map = {
+    auto: 0,
+    zh: 3,
+    en: 4,
+    yue: 7,
+    ja: 11,
+    ko: 12
+  };
+  return map[language] ?? 0;
+}
+function resolveTextNormId(textNorm) {
+  return textNorm === "without_itn" ? 15 : 14;
+}
+function parseTokensFile(content) {
+  const map = /* @__PURE__ */ new Map();
+  const lines = content.split("\n");
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    const lastSpace = trimmed.lastIndexOf(" ");
+    if (lastSpace === -1) continue;
+    const token = trimmed.substring(0, lastSpace);
+    const id = parseInt(trimmed.substring(lastSpace + 1), 10);
+    if (!isNaN(id)) {
+      map.set(id, token);
+    }
+  }
+  return map;
+}
+function parseStructuredToken(token) {
+  const match = token.match(/^<\|(.+)\|>$/);
+  if (!match) return null;
+  const value = match[1];
+  if (value === "zh" || value === "en" || value === "ja" || value === "ko" || value === "yue" || value === "nospeech") {
+    return { type: "language", value };
   }
-  /**
-   * Get the full model name used by transformers.js
-   */
-  getModelName() {
-    if (this.config.localModelPath) {
-      return this.config.localModelPath;
+  const emotions = ["HAPPY", "SAD", "ANGRY", "NEUTRAL", "FEARFUL", "DISGUSTED", "SURPRISED", "EMO_UNKNOWN"];
+  if (emotions.includes(value)) {
+    return { type: "emotion", value };
+  }
+  const events = ["Speech", "BGM", "Applause", "Laughter", "Crying", "Coughing", "Sneezing", "EVENT_UNKNOWN"];
+  if (events.includes(value)) {
+    return { type: "event", value };
+  }
+  if (value === "withitn" || value === "woitn" || value === "with_itn" || value === "without_itn") {
+    return { type: "textnorm", value };
+  }
+  return null;
+}
+function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
+  const tokenIds = [];
+  for (let t = 0; t < seqLen; t++) {
+    const offset = t * vocabSize;
+    let maxIdx = 0;
+    let maxVal = logits[offset];
+    for (let v = 1; v < vocabSize; v++) {
+      if (logits[offset + v] > maxVal) {
+        maxVal = logits[offset + v];
+        maxIdx = v;
+      }
     }
-    let modelName = `onnx-community/whisper-${this.config.model}`;
-    if (!this.config.multilingual) {
-      modelName += ".en";
+    tokenIds.push(maxIdx);
+  }
+  const collapsed = [];
+  let prev = -1;
+  for (const id of tokenIds) {
+    if (id !== prev) {
+      collapsed.push(id);
+      prev = id;
+    }
+  }
+  const filtered = collapsed.filter((id) => id !== 0 && id !== 1 && id !== 2);
+  let language;
+  let emotion;
+  let event;
+  const textTokens = [];
+  for (const id of filtered) {
+    const token = tokenMap.get(id);
+    if (!token) continue;
+    const structured = parseStructuredToken(token);
+    if (structured) {
+      if (structured.type === "language") language = structured.value;
+      else if (structured.type === "emotion") emotion = structured.value;
+      else if (structured.type === "event") event = structured.value;
+    } else {
+      textTokens.push(token);
     }
-    return modelName;
-  }
-  /**
-   * Remove non-speech event tokens from transcription
-   *
-   * Whisper outputs special tokens for non-speech events like:
-   * [LAUGHTER], [APPLAUSE], [MUSIC], [BLANK_AUDIO], [CLICKING], etc.
-   *
-   * This method strips these tokens and cleans up extra whitespace.
-   */
-  removeNonSpeechTokens(text) {
-    const cleaned = text.replace(/\[[\w\s_]+\]/g, "");
-    return cleaned.replace(/\s+/g, " ").trim();
   }
-};
+  let text = textTokens.join("");
+  text = text.replace(/\u2581/g, " ").trim();
+  return { text, language, emotion, event };
+}
-// src/inference/Wav2ArkitCpuInference.ts
-var logger5 = createLogger("Wav2ArkitCpu");
-var Wav2ArkitCpuInference = class {
+// src/inference/SenseVoiceInference.ts
+var logger4 = createLogger("SenseVoice");
+var SenseVoiceInference = class {
   constructor(config) {
-    this.modelId = "wav2arkit_cpu";
     this.session = null;
     this.ort = null;
     this._backend = "wasm";
     this.isLoading = false;
-    // Inference queue for handling concurrent calls
     this.inferenceQueue = Promise.resolve();
-    this.config = config;
+    // Preprocessing state (loaded once)
+    this.tokenMap = null;
+    this.negMean = null;
+    this.invStddev = null;
+    this.languageId = 0;
+    this.textNormId = 14;
+    const modelDir = config.modelUrl.substring(0, config.modelUrl.lastIndexOf("/"));
+    const tokensUrl = config.tokensUrl ?? `${modelDir}/tokens.txt`;
+    this.config = {
+      modelUrl: config.modelUrl,
+      tokensUrl,
+      language: config.language ?? "auto",
+      textNorm: config.textNorm ?? "with_itn",
+      backend: config.backend ?? "auto"
+    };
+    this.languageId = resolveLanguageId(this.config.language);
+    this.textNormId = resolveTextNormId(this.config.textNorm);
   }
   get backend() {
     return this.session ? this._backend : null;
@@ -3172,10 +3199,8 @@ var Wav2ArkitCpuInference = class {
   get isLoaded() {
     return this.session !== null;
   }
-  /**
-   * Load the ONNX model
-   */
-  async load() {
+  // ─── Load ───────────────────────────────────────────────────────────────
+  async load(onProgress) {
     if (this.isLoading) {
       throw new Error("Model is already loading");
     }
@@ -3185,30 +3210,281 @@ var Wav2ArkitCpuInference = class {
     this.isLoading = true;
     const startTime = performance.now();
     const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("Wav2ArkitCpu.load", {
+    const span = telemetry?.startSpan("SenseVoice.load", {
       "model.url": this.config.modelUrl,
-      "model.backend_requested": this.config.backend || "wasm"
+      "model.backend_requested": this.config.backend
     });
     try {
-      const preference = this.config.backend || "wasm";
-      logger5.info("Loading ONNX Runtime...", { preference });
-      const { ort, backend } = await getOnnxRuntimeForPreference(preference);
+      logger4.info("Loading ONNX Runtime...", { preference: this.config.backend });
+      const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
       this.ort = ort;
       this._backend = backend;
-      logger5.info("ONNX Runtime loaded", { backend: this._backend });
-      const modelUrl = this.config.modelUrl;
-      const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
+      logger4.info("ONNX Runtime loaded", { backend: this._backend });
+      logger4.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
+      const tokensResponse = await fetch(this.config.tokensUrl);
+      if (!tokensResponse.ok) {
+        throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
+      }
+      const tokensText = await tokensResponse.text();
+      this.tokenMap = parseTokensFile(tokensText);
+      logger4.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
       const sessionOptions = getSessionOptions(this._backend);
+      if (this._backend === "webgpu") {
+        sessionOptions.graphOptimizationLevel = "basic";
+      }
+      let isCached = false;
       if (isIOS()) {
-        logger5.info("iOS: passing model URLs directly to ORT (low-memory path)", {
-          modelUrl,
-          dataUrl
+        logger4.info("iOS: passing model URL directly to ORT (low-memory path)", {
+          modelUrl: this.config.modelUrl
         });
-        if (dataUrl) {
-          const dataFilename = dataUrl.split("/").pop();
-          sessionOptions.externalData = [{
-            path: dataFilename,
-            data: dataUrl
+        this.session = await this.ort.InferenceSession.create(
+          this.config.modelUrl,
+          sessionOptions
+        );
+      } else {
+        const cache = getModelCache();
+        isCached = await cache.has(this.config.modelUrl);
+        let modelBuffer;
+        if (isCached) {
+          logger4.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
+          modelBuffer = await cache.get(this.config.modelUrl);
+          onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
+        } else {
+          logger4.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
+          modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
+        }
+        logger4.debug("Creating ONNX session", {
+          size: formatBytes(modelBuffer.byteLength),
+          backend: this._backend
+        });
+        const modelData = new Uint8Array(modelBuffer);
+        this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
+      }
+      try {
+        const metadata = this.session.handler?.metadata;
+        if (metadata?.neg_mean && metadata?.inv_stddev) {
+          const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
+          this.negMean = cmvn.negMean;
+          this.invStddev = cmvn.invStddev;
+          logger4.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
+        } else {
+          logger4.warn("CMVN not found in model metadata \u2014 features will not be normalized");
+        }
+      } catch (cmvnErr) {
+        logger4.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
+      }
+      const loadTimeMs = performance.now() - startTime;
+      logger4.info("SenseVoice model loaded", {
+        backend: this._backend,
+        loadTimeMs: Math.round(loadTimeMs),
+        vocabSize: this.tokenMap.size,
+        inputs: this.session.inputNames,
+        outputs: this.session.outputNames,
+        hasCMVN: this.negMean !== null
+      });
+      span?.setAttributes({
+        "model.backend": this._backend,
+        "model.load_time_ms": loadTimeMs,
+        "model.cached": !isIOS() && isCached,
+        "model.vocab_size": this.tokenMap.size
+      });
+      span?.end();
+      telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
+        model: "sensevoice",
+        backend: this._backend
+      });
+      return {
+        backend: this._backend,
+        loadTimeMs,
+        inputNames: [...this.session.inputNames],
+        outputNames: [...this.session.outputNames],
+        vocabSize: this.tokenMap.size
+      };
+    } catch (error) {
+      span?.endWithError(error instanceof Error ? error : new Error(String(error)));
+      telemetry?.incrementCounter("omote.errors.total", 1, {
+        model: "sensevoice",
+        error_type: "load_failed"
+      });
+      throw error;
+    } finally {
+      this.isLoading = false;
+    }
+  }
+  // ─── Transcribe ─────────────────────────────────────────────────────────
+  /**
+   * Transcribe audio samples to text
+   *
+   * @param audioSamples Float32Array of audio samples at 16kHz, [-1, 1] range
+   * @returns Transcription result with text, emotion, language, and event
+   */
+  async transcribe(audioSamples) {
+    if (!this.session || !this.ort || !this.tokenMap) {
+      throw new Error("Model not loaded. Call load() first.");
+    }
+    const audio = new Float32Array(audioSamples);
+    return this.queueInference(audio);
+  }
+  queueInference(audio) {
+    return new Promise((resolve, reject) => {
+      this.inferenceQueue = this.inferenceQueue.then(async () => {
+        const telemetry = getTelemetry();
+        const span = telemetry?.startSpan("SenseVoice.transcribe", {
+          "inference.backend": this._backend,
+          "inference.input_samples": audio.length
+        });
+        try {
+          const startTime = performance.now();
+          const preprocessStart = performance.now();
+          const fbank = computeKaldiFbank(audio, 16e3, 80);
+          const numFrames = fbank.length / 80;
+          if (numFrames === 0) {
+            resolve({
+              text: "",
+              inferenceTimeMs: performance.now() - startTime,
+              preprocessTimeMs: performance.now() - preprocessStart
+            });
+            return;
+          }
+          const lfrFeatures = applyLFR(fbank, 80, 7, 6);
+          const numLfrFrames = lfrFeatures.length / 560;
+          if (this.negMean && this.invStddev) {
+            applyCMVN(lfrFeatures, 560, this.negMean, this.invStddev);
+          }
+          const preprocessTimeMs = performance.now() - preprocessStart;
+          const ort = this.ort;
+          const feeds = {
+            x: new ort.Tensor("float32", lfrFeatures, [1, numLfrFrames, 560]),
+            x_length: new ort.Tensor("int32", new Int32Array([numLfrFrames]), [1]),
+            language: new ort.Tensor("int32", new Int32Array([this.languageId]), [1]),
+            text_norm: new ort.Tensor("int32", new Int32Array([this.textNormId]), [1])
+          };
+          const results = await this.session.run(feeds);
+          const logitsOutput = results["logits"];
+          if (!logitsOutput) {
+            throw new Error('Model output missing "logits" tensor');
+          }
+          const logitsData = logitsOutput.data;
+          const logitsDims = logitsOutput.dims;
+          const seqLen = logitsDims[1];
+          const vocabSize = logitsDims[2];
+          const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
+          const inferenceTimeMs = performance.now() - startTime;
+          logger4.trace("Transcription complete", {
+            text: decoded.text.substring(0, 50),
+            language: decoded.language,
+            emotion: decoded.emotion,
+            event: decoded.event,
+            preprocessTimeMs: Math.round(preprocessTimeMs * 100) / 100,
+            inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
+            numFrames,
+            numLfrFrames
+          });
+          span?.setAttributes({
+            "inference.duration_ms": inferenceTimeMs,
+            "inference.preprocess_ms": preprocessTimeMs,
+            "inference.num_frames": numFrames,
+            "inference.text_length": decoded.text.length
+          });
+          span?.end();
+          telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
+            model: "sensevoice",
+            backend: this._backend
+          });
+          telemetry?.incrementCounter("omote.inference.total", 1, {
+            model: "sensevoice",
+            backend: this._backend,
+            status: "success"
+          });
+          resolve({
+            text: decoded.text,
+            language: decoded.language,
+            emotion: decoded.emotion,
+            event: decoded.event,
+            inferenceTimeMs,
+            preprocessTimeMs
+          });
+        } catch (err) {
+          span?.endWithError(err instanceof Error ? err : new Error(String(err)));
+          telemetry?.incrementCounter("omote.inference.total", 1, {
+            model: "sensevoice",
+            backend: this._backend,
+            status: "error"
+          });
+          reject(err);
+        }
+      });
+    });
+  }
+  // ─── Dispose ──────────────────────────────────────────────────────────
+  async dispose() {
+    if (this.session) {
+      await this.session.release();
+      this.session = null;
+    }
+    this.ort = null;
+    this.tokenMap = null;
+    this.negMean = null;
+    this.invStddev = null;
+  }
+};
+// src/inference/Wav2ArkitCpuInference.ts
+var logger5 = createLogger("Wav2ArkitCpu");
+var Wav2ArkitCpuInference = class {
+  constructor(config) {
+    this.modelId = "wav2arkit_cpu";
+    this.session = null;
+    this.ort = null;
+    this._backend = "wasm";
+    this.isLoading = false;
+    // Inference queue for handling concurrent calls
+    this.inferenceQueue = Promise.resolve();
+    this.config = config;
+  }
+  get backend() {
+    return this.session ? this._backend : null;
+  }
+  get isLoaded() {
+    return this.session !== null;
+  }
+  /**
+   * Load the ONNX model
+   */
+  async load() {
+    if (this.isLoading) {
+      throw new Error("Model is already loading");
+    }
+    if (this.session) {
+      throw new Error("Model already loaded. Call dispose() first.");
+    }
+    this.isLoading = true;
+    const startTime = performance.now();
+    const telemetry = getTelemetry();
+    const span = telemetry?.startSpan("Wav2ArkitCpu.load", {
+      "model.url": this.config.modelUrl,
+      "model.backend_requested": this.config.backend || "wasm"
+    });
+    try {
+      const preference = this.config.backend || "wasm";
+      logger5.info("Loading ONNX Runtime...", { preference });
+      const { ort, backend } = await getOnnxRuntimeForPreference(preference);
+      this.ort = ort;
+      this._backend = backend;
+      logger5.info("ONNX Runtime loaded", { backend: this._backend });
+      const modelUrl = this.config.modelUrl;
+      const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
+      const sessionOptions = getSessionOptions(this._backend);
+      if (isIOS()) {
+        logger5.info("iOS: passing model URLs directly to ORT (low-memory path)", {
+          modelUrl,
+          dataUrl
+        });
+        if (dataUrl) {
+          const dataFilename = dataUrl.split("/").pop();
+          sessionOptions.externalData = [{
+            path: dataFilename,
+            data: dataUrl
             // URL string — ORT fetches directly into WASM
           }];
         }
@@ -3474,21 +3750,22 @@ var LipSyncWithFallback = class {
     try {
       return await this.implementation.load();
     } catch (error) {
-      logger6.warn("GPU model load failed, falling back to CPU model", {
-        error: error instanceof Error ? error.message : String(error)
-      });
-      try {
-        await this.implementation.dispose();
-      } catch {
-      }
-      this.implementation = new Wav2ArkitCpuInference({
-        modelUrl: this.config.cpuModelUrl
-      });
-      this.hasFallenBack = true;
-      logger6.info("Fallback to Wav2ArkitCpuInference successful");
-      return await this.implementation.load();
+      return this.fallbackToCpu(error instanceof Error ? error.message : String(error));
     }
   }
+  async fallbackToCpu(reason) {
+    logger6.warn("GPU model load failed, falling back to CPU model", { reason });
+    try {
+      await this.implementation.dispose();
+    } catch {
+    }
+    this.implementation = new Wav2ArkitCpuInference({
+      modelUrl: this.config.cpuModelUrl
+    });
+    this.hasFallenBack = true;
+    logger6.info("Fallback to Wav2ArkitCpuInference successful");
+    return await this.implementation.load();
+  }
   async infer(audioSamples, identityIndex) {
     return this.implementation.infer(audioSamples, identityIndex);
   }
@@ -4545,268 +4822,8 @@ var VADWorkerWithFallback = class {
   }
 };
-// src/inference/Emotion2VecInference.ts
-var logger10 = createLogger("Emotion2Vec");
-var EMOTION2VEC_LABELS = ["neutral", "happy", "angry", "sad"];
-var Emotion2VecInference = class {
-  constructor(config) {
-    this.session = null;
-    this.ort = null;
-    this._backend = "wasm";
-    this.isLoading = false;
-    this.inferenceQueue = Promise.resolve();
-    this.config = {
-      modelUrl: config.modelUrl,
-      backend: config.backend ?? "auto",
-      sampleRate: config.sampleRate ?? 16e3
-    };
-  }
-  get backend() {
-    return this.session ? this._backend : null;
-  }
-  get isLoaded() {
-    return this.session !== null;
-  }
-  get sampleRate() {
-    return this.config.sampleRate;
-  }
-  /**
-   * Load the ONNX model
-   */
-  async load() {
-    if (this.isLoading) {
-      throw new Error("Model is already loading");
-    }
-    if (this.session) {
-      throw new Error("Model already loaded. Call dispose() first.");
-    }
-    this.isLoading = true;
-    const startTime = performance.now();
-    const telemetry = getTelemetry();
-    const span = telemetry?.startSpan("Emotion2Vec.load", {
-      "model.url": this.config.modelUrl,
-      "model.backend_requested": this.config.backend
-    });
-    try {
-      logger10.info("Loading ONNX Runtime...", { preference: this.config.backend });
-      const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
-      this.ort = ort;
-      this._backend = backend;
-      logger10.info("ONNX Runtime loaded", { backend: this._backend });
-      logger10.info("Checking model cache...");
-      const cache = getModelCache();
-      const modelUrl = this.config.modelUrl;
-      const isCached = await cache.has(modelUrl);
-      logger10.info("Cache check complete", { modelUrl, isCached });
-      let modelBuffer;
-      if (isCached) {
-        logger10.info("Loading model from cache...", { modelUrl });
-        modelBuffer = await cache.get(modelUrl);
-        logger10.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
-      } else {
-        logger10.info("Fetching model (not cached)...", { modelUrl });
-        modelBuffer = await fetchWithCache(modelUrl);
-        logger10.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
-      }
-      logger10.info("Creating ONNX session (this may take a while for large models)...");
-      logger10.debug("Creating ONNX session", {
-        size: formatBytes(modelBuffer.byteLength),
-        backend: this._backend
-      });
-      const sessionOptions = getSessionOptions(this._backend);
-      const modelData = new Uint8Array(modelBuffer);
-      this.session = await ort.InferenceSession.create(modelData, sessionOptions);
-      const loadTimeMs = performance.now() - startTime;
-      logger10.info("Model loaded successfully", {
-        backend: this._backend,
-        loadTimeMs: Math.round(loadTimeMs),
-        sampleRate: this.config.sampleRate,
-        inputNames: [...this.session.inputNames],
-        outputNames: [...this.session.outputNames]
-      });
-      span?.setAttributes({
-        "model.backend": this._backend,
-        "model.load_time_ms": loadTimeMs,
-        "model.cached": isCached
-      });
-      span?.end();
-      telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
-        model: "emotion2vec",
-        backend: this._backend
-      });
-      return {
-        backend: this._backend,
-        loadTimeMs,
-        inputNames: [...this.session.inputNames],
-        outputNames: [...this.session.outputNames],
-        sampleRate: this.config.sampleRate
-      };
-    } catch (error) {
-      span?.endWithError(error instanceof Error ? error : new Error(String(error)));
-      telemetry?.incrementCounter("omote.errors.total", 1, {
-        model: "emotion2vec",
-        error_type: "load_failed"
-      });
-      throw error;
-    } finally {
-      this.isLoading = false;
-    }
-  }
-  /**
-   * Run emotion inference on audio samples
-   *
-   * @param audio - Float32Array of 16kHz audio samples
-   * @returns Frame-level emotion results at 50Hz
-   */
-  async infer(audio) {
-    if (!this.session) {
-      throw new Error("Model not loaded. Call load() first.");
-    }
-    return this.queueInference(audio);
-  }
-  queueInference(audio) {
-    const audioCopy = new Float32Array(audio);
-    return new Promise((resolve, reject) => {
-      this.inferenceQueue = this.inferenceQueue.then(async () => {
-        const telemetry = getTelemetry();
-        const span = telemetry?.startSpan("Emotion2Vec.infer", {
-          "inference.backend": this._backend,
-          "inference.audio_samples": audioCopy.length
-        });
-        try {
-          const startTime = performance.now();
-          const inputTensor = new this.ort.Tensor("float32", audioCopy, [1, audioCopy.length]);
-          const results = await this.session.run({ audio: inputTensor });
-          const logitsTensor = results["logits"];
-          const embeddingsTensor = results["layer_norm_25"];
-          if (!logitsTensor) {
-            throw new Error(
-              `Missing logits tensor from SUPERB model. Got outputs: ${Object.keys(results).join(", ")}`
-            );
-          }
-          const logitsData = logitsTensor.data;
-          const logits = new Float32Array(logitsData);
-          const probs = this.softmax(logits);
-          const probabilities = {
-            neutral: probs[0],
-            happy: probs[1],
-            angry: probs[2],
-            sad: probs[3]
-          };
-          let maxIdx = 0;
-          let maxProb = probs[0];
-          for (let i = 1; i < probs.length; i++) {
-            if (probs[i] > maxProb) {
-              maxProb = probs[i];
-              maxIdx = i;
-            }
-          }
-          const dominant = {
-            emotion: EMOTION2VEC_LABELS[maxIdx],
-            confidence: maxProb,
-            probabilities
-          };
-          let embeddings = [];
-          let numFrames = 1;
-          if (embeddingsTensor) {
-            const embeddingData = embeddingsTensor.data;
-            const dims = embeddingsTensor.dims;
-            if (dims.length === 3) {
-              numFrames = dims[1];
-              const embeddingDim = dims[2];
-              for (let i = 0; i < numFrames; i++) {
-                const start = i * embeddingDim;
-                embeddings.push(new Float32Array(embeddingData.slice(start, start + embeddingDim)));
-              }
-            }
-          }
-          const frames = [];
-          for (let i = 0; i < numFrames; i++) {
-            frames.push({
-              emotion: dominant.emotion,
-              confidence: dominant.confidence,
-              probabilities: { ...probabilities }
-            });
-          }
-          const inferenceTimeMs = performance.now() - startTime;
-          logger10.debug("Emotion inference completed", {
-            numFrames,
-            dominant: dominant.emotion,
-            confidence: Math.round(dominant.confidence * 100),
-            inferenceTimeMs: Math.round(inferenceTimeMs)
-          });
-          span?.setAttributes({
-            "inference.duration_ms": inferenceTimeMs,
-            "inference.num_frames": numFrames,
-            "inference.dominant_emotion": dominant.emotion
-          });
-          span?.end();
-          telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
-            model: "emotion2vec",
-            backend: this._backend
-          });
-          telemetry?.incrementCounter("omote.inference.total", 1, {
-            model: "emotion2vec",
-            backend: this._backend,
-            status: "success"
-          });
-          resolve({
-            frames,
-            dominant,
-            embeddings,
-            logits,
-            inferenceTimeMs
-          });
-        } catch (err) {
-          span?.endWithError(err instanceof Error ? err : new Error(String(err)));
-          telemetry?.incrementCounter("omote.inference.total", 1, {
-            model: "emotion2vec",
-            backend: this._backend,
-            status: "error"
-          });
-          reject(err);
-        }
-      });
-    });
-  }
-  /**
-   * Apply softmax to convert logits to probabilities
-   */
-  softmax(logits) {
-    let max = logits[0];
-    for (let i = 1; i < logits.length; i++) {
-      if (logits[i] > max) max = logits[i];
-    }
-    const exp = new Float32Array(logits.length);
-    let sum = 0;
-    for (let i = 0; i < logits.length; i++) {
-      exp[i] = Math.exp(logits[i] - max);
-      sum += exp[i];
-    }
-    const probs = new Float32Array(logits.length);
-    for (let i = 0; i < logits.length; i++) {
-      probs[i] = exp[i] / sum;
-    }
-    return probs;
-  }
-  /**
-   * Dispose of the model and free resources
-   */
-  async dispose() {
-    if (this.session) {
-      await this.session.release();
-      this.session = null;
-    }
-  }
-};
-/**
- * Check if WebGPU is available and working
- * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
- */
-Emotion2VecInference.isWebGPUAvailable = isWebGPUAvailable;
 // src/inference/SafariSpeechRecognition.ts
-var logger11 = createLogger("SafariSpeech");
+var logger10 = createLogger("SafariSpeech");
 var SafariSpeechRecognition = class _SafariSpeechRecognition {
   constructor(config = {}) {
     this.recognition = null;
@@ -4825,7 +4842,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       interimResults: config.interimResults ?? true,
       maxAlternatives: config.maxAlternatives ?? 1
     };
-    logger11.debug("SafariSpeechRecognition created", {
+    logger10.debug("SafariSpeechRecognition created", {
       language: this.config.language,
       continuous: this.config.continuous
     });
@@ -4886,7 +4903,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
    */
   async start() {
     if (this.isListening) {
-      logger11.warn("Already listening");
+      logger10.warn("Already listening");
       return;
     }
     if (!_SafariSpeechRecognition.isAvailable()) {
@@ -4916,7 +4933,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       this.isListening = true;
       this.startTime = performance.now();
       this.accumulatedText = "";
-      logger11.info("Speech recognition started", {
+      logger10.info("Speech recognition started", {
         language: this.config.language
       });
       span?.end();
@@ -4931,7 +4948,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
    */
   async stop() {
     if (!this.isListening || !this.recognition) {
-      logger11.warn("Not currently listening");
+      logger10.warn("Not currently listening");
       return {
         text: this.accumulatedText,
         language: this.config.language,
@@ -4960,7 +4977,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     if (this.recognition && this.isListening) {
       this.recognition.abort();
       this.isListening = false;
-      logger11.info("Speech recognition aborted");
+      logger10.info("Speech recognition aborted");
     }
   }
   /**
@@ -4991,7 +5008,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     this.isListening = false;
     this.resultCallbacks = [];
     this.errorCallbacks = [];
-    logger11.debug("SafariSpeechRecognition disposed");
+    logger10.debug("SafariSpeechRecognition disposed");
   }
   /**
    * Set up event handlers for the recognition instance
@@ -5019,7 +5036,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
               confidence: alternative.confidence
             };
             this.emitResult(speechResult);
-            logger11.trace("Speech result", {
+            logger10.trace("Speech result", {
               text: text.substring(0, 50),
               isFinal,
               confidence: alternative.confidence
@@ -5029,12 +5046,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
         span?.end();
       } catch (error) {
         span?.endWithError(error instanceof Error ? error : new Error(String(error)));
-        logger11.error("Error processing speech result", { error });
+        logger10.error("Error processing speech result", { error });
       }
     };
     this.recognition.onerror = (event) => {
       const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
-      logger11.error("Speech recognition error", { error: event.error, message: event.message });
+      logger10.error("Speech recognition error", { error: event.error, message: event.message });
       this.emitError(error);
       if (this.stopRejecter) {
         this.stopRejecter(error);
@@ -5044,7 +5061,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
     };
     this.recognition.onend = () => {
       this.isListening = false;
-      logger11.info("Speech recognition ended", {
+      logger10.info("Speech recognition ended", {
         totalText: this.accumulatedText.length,
         durationMs: performance.now() - this.startTime
       });
@@ -5061,13 +5078,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       }
     };
     this.recognition.onstart = () => {
-      logger11.debug("Speech recognition started by browser");
+      logger10.debug("Speech recognition started by browser");
     };
     this.recognition.onspeechstart = () => {
-      logger11.debug("Speech detected");
+      logger10.debug("Speech detected");
     };
     this.recognition.onspeechend = () => {
-      logger11.debug("Speech ended");
+      logger10.debug("Speech ended");
     };
   }
   /**
@@ -5078,7 +5095,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       try {
         callback(result);
       } catch (error) {
-        logger11.error("Error in result callback", { error });
+        logger10.error("Error in result callback", { error });
       }
     }
   }
@@ -5090,7 +5107,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
       try {
         callback(error);
       } catch (callbackError) {
-        logger11.error("Error in error callback", { error: callbackError });
+        logger10.error("Error in error callback", { error: callbackError });
       }
     }
   }
@@ -5264,7 +5281,7 @@ var AgentCoreAdapter = class extends EventEmitter {
     this._sessionId = null;
     this._isConnected = false;
     // Sub-components
-    this.whisper = null;
+    this.asr = null;
     this.vad = null;
     this.lam = null;
     this.pipeline = null;
@@ -5303,7 +5320,7 @@ var AgentCoreAdapter = class extends EventEmitter {
     try {
       const authToken = await this.getAuthToken(config.tenant);
       await Promise.all([
-        this.initWhisper(),
+        this.initASR(),
         this.initLAM()
       ]);
       await this.connectWebSocket(authToken, config);
@@ -5333,7 +5350,7 @@ var AgentCoreAdapter = class extends EventEmitter {
       this.ws = null;
     }
     await Promise.all([
-      this.whisper?.dispose(),
+      this.asr?.dispose(),
       this.vad?.dispose(),
       this.lam?.dispose()
     ]);
@@ -5465,16 +5482,15 @@ var AgentCoreAdapter = class extends EventEmitter {
     });
     return token;
   }
-  async initWhisper() {
+  async initASR() {
     await Promise.all([
-      // Whisper ASR
+      // SenseVoice ASR
       (async () => {
-        this.whisper = new WhisperInference({
-          model: "tiny",
-          device: "auto",
-          language: "en"
+        this.asr = new SenseVoiceInference({
+          modelUrl: "/models/sensevoice/model.int8.onnx",
+          language: "auto"
         });
-        await this.whisper.load();
+        await this.asr.load();
       })(),
       // Silero VAD for accurate voice activity detection
       (async () => {
@@ -5660,17 +5676,17 @@ var AgentCoreAdapter = class extends EventEmitter {
       console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
       return;
     }
-    if (this.whisper) {
+    if (this.asr) {
       this.setState("listening");
       this.emit("user.speech.start", { timestamp: Date.now() });
-      this.whisper.transcribe(audio).then((result) => {
+      this.asr.transcribe(audio).then((result) => {
         this.emit("user.transcript.final", {
           text: result.text,
           confidence: 1
         });
         this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
         const cleanText = result.text.trim();
-        if (cleanText && !cleanText.includes("[BLANK_AUDIO]")) {
+        if (cleanText) {
           this.sendText(cleanText).catch((error) => {
             console.error("[AgentCore] Send text error:", error);
           });
@@ -6484,228 +6500,6 @@ var InterruptionHandler = class extends EventEmitter {
   }
 };
-// src/cache/huggingFaceCDN.ts
-var HF_CDN_TEST_URL = "https://huggingface.co/Xenova/whisper-tiny/resolve/main/config.json";
-function parseHuggingFaceUrl(url) {
-  const pattern = /^https:\/\/huggingface\.co\/([^/]+)\/([^/]+)\/resolve\/([^/]+)\/(.+)$/;
-  const match = url.match(pattern);
-  if (!match) {
-    return null;
-  }
-  return {
-    org: match[1],
-    model: match[2],
-    branch: match[3],
-    file: match[4]
-  };
-}
-async function isHuggingFaceCDNReachable(testUrl = HF_CDN_TEST_URL) {
-  try {
-    const response = await fetch(testUrl, {
-      method: "HEAD",
-      cache: "no-store"
-      // Don't use cached response for reachability check
-    });
-    return response.ok;
-  } catch {
-    return false;
-  }
-}
-// src/utils/transformersCacheClear.ts
-var logger12 = createLogger("TransformersCache");
-async function clearTransformersCache(options) {
-  const verbose = options?.verbose ?? true;
-  const additionalPatterns = options?.additionalPatterns ?? [];
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return [];
-  }
-  try {
-    const cacheNames = await caches.keys();
-    const deletedCaches = [];
-    const patterns = [
-      "transformers",
-      "huggingface",
-      "onnx",
-      ...additionalPatterns
-    ];
-    for (const cacheName of cacheNames) {
-      const shouldDelete = patterns.some(
-        (pattern) => cacheName.toLowerCase().includes(pattern.toLowerCase())
-      );
-      if (shouldDelete) {
-        if (verbose) {
-          logger12.info("Deleting cache", { cacheName });
-        }
-        const deleted = await caches.delete(cacheName);
-        if (deleted) {
-          deletedCaches.push(cacheName);
-        } else if (verbose) {
-          logger12.warn("Failed to delete cache", { cacheName });
-        }
-      }
-    }
-    if (verbose) {
-      logger12.info("Cache clearing complete", {
-        totalCaches: cacheNames.length,
-        deletedCount: deletedCaches.length,
-        deletedCaches
-      });
-    }
-    return deletedCaches;
-  } catch (error) {
-    logger12.error("Error clearing caches", { error });
-    throw error;
-  }
-}
-async function clearSpecificCache(cacheName) {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return false;
-  }
-  try {
-    const deleted = await caches.delete(cacheName);
-    logger12.info("Cache deletion attempt", { cacheName, deleted });
-    return deleted;
-  } catch (error) {
-    logger12.error("Error deleting cache", { cacheName, error });
-    return false;
-  }
-}
-async function listCaches() {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return [];
-  }
-  try {
-    const cacheNames = await caches.keys();
-    logger12.debug("Available caches", { cacheNames });
-    return cacheNames;
-  } catch (error) {
-    logger12.error("Error listing caches", { error });
-    return [];
-  }
-}
-async function validateCachedResponse(cacheName, requestUrl) {
-  if (!("caches" in window)) {
-    return {
-      exists: false,
-      valid: false,
-      contentType: null,
-      isHtml: false,
-      reason: "Cache API not available"
-    };
-  }
-  try {
-    const cache = await caches.open(cacheName);
-    const response = await cache.match(requestUrl);
-    if (!response) {
-      return {
-        exists: false,
-        valid: false,
-        contentType: null,
-        isHtml: false,
-        reason: "Not in cache"
-      };
-    }
-    const contentType = response.headers.get("content-type");
-    const isHtml = contentType?.includes("text/html") || contentType?.includes("text/plain");
-    const clonedResponse = response.clone();
-    const text = await clonedResponse.text();
-    const looksLikeHtml = text.trim().startsWith("<") || text.includes("<!DOCTYPE");
-    const valid = Boolean(
-      response.status === 200 && !isHtml && !looksLikeHtml && contentType && (contentType.includes("application/json") || contentType.includes("application/octet-stream") || contentType.includes("binary"))
-    );
-    return {
-      exists: true,
-      valid,
-      contentType,
-      isHtml: isHtml || looksLikeHtml,
-      reason: valid ? "Valid response" : `Invalid: status=${response.status}, contentType=${contentType}, isHtml=${isHtml || looksLikeHtml}`
-    };
-  } catch (error) {
-    logger12.error("Error validating cached response", { cacheName, requestUrl, error });
-    return {
-      exists: false,
-      valid: false,
-      contentType: null,
-      isHtml: false,
-      reason: `Error: ${error}`
-    };
-  }
-}
-async function scanForInvalidCaches() {
-  if (!("caches" in window)) {
-    return { totalCaches: 0, scannedEntries: 0, invalidEntries: [] };
-  }
-  const invalidEntries = [];
-  let scannedEntries = 0;
-  try {
-    const cacheNames = await caches.keys();
-    for (const cacheName of cacheNames) {
-      if (!cacheName.toLowerCase().includes("transformers")) {
-        continue;
-      }
-      const cache = await caches.open(cacheName);
-      const requests = await cache.keys();
-      for (const request of requests) {
-        scannedEntries++;
-        const url = request.url;
-        const validation = await validateCachedResponse(cacheName, url);
-        if (validation.exists && !validation.valid) {
-          invalidEntries.push({
-            cacheName,
-            url,
-            reason: validation.reason || "Unknown"
-          });
-        }
-      }
-    }
-    logger12.info("Cache scan complete", {
-      totalCaches: cacheNames.length,
-      scannedEntries,
-      invalidCount: invalidEntries.length
-    });
-    return {
-      totalCaches: cacheNames.length,
-      scannedEntries,
-      invalidEntries
-    };
-  } catch (error) {
-    logger12.error("Error scanning caches", { error });
-    throw error;
-  }
-}
-async function nukeBrowserCaches(preventRecreation = false) {
-  if (!("caches" in window)) {
-    logger12.warn("Cache API not available in this environment");
-    return 0;
-  }
-  try {
-    const cacheNames = await caches.keys();
-    let deletedCount = 0;
-    for (const cacheName of cacheNames) {
-      const deleted = await caches.delete(cacheName);
-      if (deleted) {
-        deletedCount++;
-      }
-    }
-    logger12.info("All browser caches cleared", {
-      totalDeleted: deletedCount
-    });
-    if (preventRecreation) {
-      const { env } = await import("./transformers.web-T5LWC34T.mjs");
-      env.useBrowserCache = false;
-      logger12.warn("Browser cache creation disabled (env.useBrowserCache = false)");
-    }
-    return deletedCount;
-  } catch (error) {
-    logger12.error("Error nuking caches", { error });
-    throw error;
-  }
-}
 // src/animation/types.ts
 var DEFAULT_ANIMATION_CONFIG = {
   initialState: "idle",
@@ -7245,7 +7039,6 @@ export {
   EmotionPresets,
   EmphasisDetector,
   EventEmitter,
-  HF_CDN_TEST_URL,
   INFERENCE_LATENCY_BUCKETS,
   InterruptionHandler,
   LAMPipeline,
@@ -7259,6 +7052,7 @@ export {
   OmoteTelemetry,
   RingBuffer,
   SafariSpeechRecognition,
+  SenseVoiceInference,
   SileroVADInference,
   SileroVADWorker,
   SyncedAudioPipeline,
@@ -7266,12 +7060,12 @@ export {
   WAV2ARKIT_BLENDSHAPES,
   Wav2ArkitCpuInference,
   Wav2Vec2Inference,
-  WhisperInference,
+  applyCMVN,
+  applyLFR,
   blendEmotions,
   calculatePeak,
   calculateRMS,
-  clearSpecificCache,
-  clearTransformersCache,
+  computeKaldiFbank,
   configureCacheLimit,
   configureLogging,
   configureTelemetry,
@@ -7280,6 +7074,7 @@ export {
   createLogger,
   createSessionWithFallback,
   createSileroVAD,
+  ctcGreedyDecode,
   fetchWithCache,
   formatBytes,
   getCacheConfig,
@@ -7296,7 +7091,6 @@ export {
   getTelemetry,
   hasWebGPUApi,
   isAndroid,
-  isHuggingFaceCDNReachable,
   isIOS,
   isIOSSafari,
   isMobile,
@@ -7305,16 +7099,16 @@ export {
   isSpeechRecognitionAvailable,
   isWebGPUAvailable,
   lerpEmotion,
-  listCaches,
   noopLogger,
-  nukeBrowserCaches,
-  parseHuggingFaceUrl,
+  parseCMVNFromMetadata,
+  parseTokensFile,
   preloadModels,
   preloadOnnxRuntime,
   remapWav2ArkitToLam,
   resetLoggingConfig,
   resolveBackend,
-  scanForInvalidCaches,
+  resolveLanguageId,
+  resolveTextNormId,
   setLogLevel,
   setLoggingEnabled,
   shouldEnableWasmProxy,
@@ -7322,7 +7116,6 @@ export {
   shouldUseNativeASR,
   shouldUseServerLipSync,
   supportsVADWorker,
-  symmetrizeBlendshapes,
-  validateCachedResponse
+  symmetrizeBlendshapes
 };
 //# sourceMappingURL=index.mjs.map