@omote/core 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -220,6 +220,19 @@ var AudioScheduler = class {
220
220
  async initialize() {
221
221
  console.log("[AudioScheduler] Ready for lazy initialization");
222
222
  }
223
+ /**
224
+ * Eagerly create and warm up the AudioContext
225
+ *
226
+ * Call this when a playback session starts (e.g., when AI response begins).
227
+ * The AudioContext needs time to initialize the audio hardware — on Windows
228
+ * this can take 50-100ms. By warming up early (before audio data arrives),
229
+ * the context is fully ready when schedule() is first called.
230
+ *
231
+ * Must be called after a user gesture (click/tap) for autoplay policy.
232
+ */
233
+ async warmup() {
234
+ await this.ensureContext();
235
+ }
223
236
  /**
224
237
  * Ensure AudioContext is created and ready
225
238
  * Called lazily on first schedule() - requires user gesture
@@ -250,7 +263,7 @@ var AudioScheduler = class {
250
263
  const ctx = await this.ensureContext();
251
264
  const channels = this.options.channels ?? 1;
252
265
  if (!this.isPlaying) {
253
- this.nextPlayTime = ctx.currentTime;
266
+ this.nextPlayTime = ctx.currentTime + 0.05;
254
267
  this.isPlaying = true;
255
268
  }
256
269
  const audioBuffer = ctx.createBuffer(channels, audioData.length, ctx.sampleRate);
@@ -324,8 +337,19 @@ var AudioScheduler = class {
324
337
  }
325
338
  /**
326
339
  * Reset scheduler state for new playback session
340
+ * Stops any orphaned sources that weren't cleaned up by cancelAll()
327
341
  */
328
342
  reset() {
343
+ if (this.context) {
344
+ const now = this.context.currentTime;
345
+ for (const { source, gainNode } of this.scheduledSources) {
346
+ try {
347
+ gainNode.gain.setValueAtTime(0, now);
348
+ source.stop(now);
349
+ } catch {
350
+ }
351
+ }
352
+ }
329
353
  this.nextPlayTime = 0;
330
354
  this.isPlaying = false;
331
355
  this.scheduledSources = [];
@@ -453,7 +477,7 @@ var LAMPipeline = class {
453
477
  newBuffer.set(this.buffer, 0);
454
478
  newBuffer.set(samples, this.buffer.length);
455
479
  this.buffer = newBuffer;
456
- if (this.buffer.length >= this.REQUIRED_SAMPLES) {
480
+ while (this.buffer.length >= this.REQUIRED_SAMPLES) {
457
481
  await this.processBuffer(lam);
458
482
  }
459
483
  }
@@ -606,12 +630,20 @@ var LAMPipeline = class {
606
630
  };
607
631
 
608
632
  // src/audio/SyncedAudioPipeline.ts
633
+ function pcm16ToFloat32(buffer) {
634
+ const byteLen = buffer.byteLength & ~1;
635
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
636
+ const float32 = new Float32Array(int16.length);
637
+ for (let i = 0; i < int16.length; i++) {
638
+ float32[i] = int16[i] / 32768;
639
+ }
640
+ return float32;
641
+ }
609
642
  var SyncedAudioPipeline = class extends EventEmitter {
610
643
  constructor(options) {
611
644
  super();
612
645
  this.options = options;
613
- this.waitingForFirstLAM = false;
614
- this.bufferedChunks = [];
646
+ this.playbackStarted = false;
615
647
  this.monitorInterval = null;
616
648
  this.frameAnimationId = null;
617
649
  const sampleRate = options.sampleRate ?? 16e3;
@@ -622,11 +654,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
622
654
  });
623
655
  this.lamPipeline = new LAMPipeline({
624
656
  sampleRate,
625
- onInference: (frameCount) => {
626
- if (this.waitingForFirstLAM) {
627
- this.onFirstLAMComplete();
628
- }
629
- },
630
657
  onError: (error) => {
631
658
  this.emit("error", error);
632
659
  }
@@ -642,25 +669,24 @@ var SyncedAudioPipeline = class extends EventEmitter {
642
669
  * Start a new playback session
643
670
  *
644
671
  * Resets all state and prepares for incoming audio chunks.
645
- * Enables wait-for-first-LAM synchronization.
672
+ * Audio will be scheduled immediately as chunks arrive (no buffering).
646
673
  */
647
674
  start() {
675
+ this.stopMonitoring();
648
676
  this.scheduler.reset();
649
677
  this.coalescer.reset();
650
678
  this.lamPipeline.reset();
651
- this.bufferedChunks = [];
652
- this.waitingForFirstLAM = true;
679
+ this.playbackStarted = false;
680
+ this.scheduler.warmup();
653
681
  this.startFrameLoop();
654
682
  this.startMonitoring();
655
683
  }
656
684
  /**
657
685
  * Receive audio chunk from network
658
686
  *
659
- * Implements wait-for-first-LAM pattern:
660
- * - Chunks are coalesced into optimal buffers
661
- * - Buffers are sent to LAM for processing
662
- * - Audio scheduling waits until first LAM completes
663
- * - Then all buffered audio is scheduled together with LAM frames
687
+ * Audio-first design: schedules audio immediately, LAM runs in background.
688
+ * This prevents LAM inference (50-300ms) from blocking audio scheduling,
689
+ * which caused audible stuttering with continuous audio streams.
664
690
  *
665
691
  * @param chunk - Uint8Array containing Int16 PCM audio
666
692
  */
@@ -669,51 +695,15 @@ var SyncedAudioPipeline = class extends EventEmitter {
669
695
  if (!combined) {
670
696
  return;
671
697
  }
672
- const int16 = new Int16Array(combined);
673
- const float32 = new Float32Array(int16.length);
674
- for (let i = 0; i < int16.length; i++) {
675
- float32[i] = int16[i] / 32768;
676
- }
677
- if (this.waitingForFirstLAM) {
678
- this.bufferedChunks.push(combined);
679
- const estimatedTime = this.scheduler.getCurrentTime();
680
- await this.lamPipeline.push(float32, estimatedTime, this.options.lam);
681
- } else {
682
- const scheduleTime = await this.scheduler.schedule(float32);
683
- await this.lamPipeline.push(float32, scheduleTime, this.options.lam);
698
+ const float32 = pcm16ToFloat32(combined);
699
+ const scheduleTime = await this.scheduler.schedule(float32);
700
+ if (!this.playbackStarted) {
701
+ this.playbackStarted = true;
702
+ this.emit("playback_start", scheduleTime);
684
703
  }
685
- }
686
- /**
687
- * Handle first LAM inference completion
688
- *
689
- * This is the critical synchronization point:
690
- * - LAM frames are now ready in the queue
691
- * - Schedule all buffered audio chunks
692
- * - Adjust LAM frame timestamps to match actual schedule time
693
- * - Audio and LAM start playing together, perfectly synchronized
694
- */
695
- async onFirstLAMComplete() {
696
- this.waitingForFirstLAM = false;
697
- const beforeSchedule = this.scheduler.getCurrentTime();
698
- let actualStartTime = beforeSchedule;
699
- for (let i = 0; i < this.bufferedChunks.length; i++) {
700
- const buffer = this.bufferedChunks[i];
701
- const int16 = new Int16Array(buffer);
702
- const float32 = new Float32Array(int16.length);
703
- for (let j = 0; j < int16.length; j++) {
704
- float32[j] = int16[j] / 32768;
705
- }
706
- const scheduleTime = await this.scheduler.schedule(float32);
707
- if (i === 0) {
708
- actualStartTime = scheduleTime;
709
- }
710
- }
711
- const timeOffset = actualStartTime - beforeSchedule;
712
- if (timeOffset !== 0) {
713
- this.lamPipeline.adjustTimestamps(timeOffset);
714
- }
715
- this.bufferedChunks = [];
716
- this.emit("playback_start", actualStartTime);
704
+ this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
705
+ this.emit("error", err);
706
+ });
717
707
  }
718
708
  /**
719
709
  * End of audio stream
@@ -745,10 +735,9 @@ var SyncedAudioPipeline = class extends EventEmitter {
745
735
  async stop(fadeOutMs = 50) {
746
736
  this.stopMonitoring();
747
737
  await this.scheduler.cancelAll(fadeOutMs);
748
- this.bufferedChunks = [];
749
738
  this.coalescer.reset();
750
739
  this.lamPipeline.reset();
751
- this.waitingForFirstLAM = false;
740
+ this.playbackStarted = false;
752
741
  this.emit("playback_complete", void 0);
753
742
  }
754
743
  /**
@@ -805,8 +794,7 @@ var SyncedAudioPipeline = class extends EventEmitter {
805
794
  */
806
795
  getState() {
807
796
  return {
808
- waitingForFirstLAM: this.waitingForFirstLAM,
809
- bufferedChunks: this.bufferedChunks.length,
797
+ playbackStarted: this.playbackStarted,
810
798
  coalescerFill: this.coalescer.fillLevel,
811
799
  lamFill: this.lamPipeline.fillLevel,
812
800
  queuedFrames: this.lamPipeline.queuedFrameCount,
@@ -822,7 +810,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
822
810
  this.scheduler.dispose();
823
811
  this.coalescer.reset();
824
812
  this.lamPipeline.reset();
825
- this.bufferedChunks = [];
826
813
  }
827
814
  };
828
815
 
@@ -2049,7 +2036,7 @@ function hasWebGPUApi() {
2049
2036
  return "gpu" in navigator && navigator.gpu !== void 0;
2050
2037
  }
2051
2038
  function getRecommendedBackend() {
2052
- if (isIOS()) {
2039
+ if (isSafari() || isIOS()) {
2053
2040
  return "wasm";
2054
2041
  }
2055
2042
  return "webgpu";
@@ -2093,6 +2080,14 @@ function shouldEnableWasmProxy() {
2093
2080
  }
2094
2081
  return true;
2095
2082
  }
2083
+ function isSafari() {
2084
+ if (typeof navigator === "undefined") return false;
2085
+ const ua = navigator.userAgent.toLowerCase();
2086
+ return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
2087
+ }
2088
+ function shouldUseCpuLipSync() {
2089
+ return isSafari();
2090
+ }
2096
2091
  function isSpeechRecognitionAvailable() {
2097
2092
  if (typeof window === "undefined") return false;
2098
2093
  return "SpeechRecognition" in window || "webkitSpeechRecognition" in window;
@@ -2137,13 +2132,13 @@ async function isWebGPUAvailable() {
2137
2132
  return false;
2138
2133
  }
2139
2134
  }
2140
- function configureWasm(ort2) {
2141
- ort2.env.wasm.wasmPaths = WASM_CDN_PATH;
2135
+ function configureWasm(ort) {
2136
+ ort.env.wasm.wasmPaths = WASM_CDN_PATH;
2142
2137
  const numThreads = getOptimalWasmThreads();
2143
2138
  const enableProxy = shouldEnableWasmProxy();
2144
- ort2.env.wasm.numThreads = numThreads;
2145
- ort2.env.wasm.simd = true;
2146
- ort2.env.wasm.proxy = enableProxy;
2139
+ ort.env.wasm.numThreads = numThreads;
2140
+ ort.env.wasm.simd = true;
2141
+ ort.env.wasm.proxy = enableProxy;
2147
2142
  logger.info("WASM configured", {
2148
2143
  numThreads,
2149
2144
  simd: true,
@@ -2191,8 +2186,8 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
2191
2186
  webgpuAvailable,
2192
2187
  resolvedBackend: backend
2193
2188
  });
2194
- const ort2 = await getOnnxRuntime(backend);
2195
- return { ort: ort2, backend };
2189
+ const ort = await getOnnxRuntime(backend);
2190
+ return { ort, backend };
2196
2191
  }
2197
2192
  function getSessionOptions(backend) {
2198
2193
  if (backend === "webgpu") {
@@ -2213,12 +2208,12 @@ function getSessionOptions(backend) {
2213
2208
  };
2214
2209
  }
2215
2210
  async function createSessionWithFallback(modelBuffer, preferredBackend) {
2216
- const ort2 = await getOnnxRuntime(preferredBackend);
2211
+ const ort = await getOnnxRuntime(preferredBackend);
2217
2212
  const modelData = new Uint8Array(modelBuffer);
2218
2213
  if (preferredBackend === "webgpu") {
2219
2214
  try {
2220
2215
  const options2 = getSessionOptions("webgpu");
2221
- const session2 = await ort2.InferenceSession.create(modelData, options2);
2216
+ const session2 = await ort.InferenceSession.create(modelData, options2);
2222
2217
  logger.info("Session created with WebGPU backend");
2223
2218
  return { session: session2, backend: "webgpu" };
2224
2219
  } catch (err) {
@@ -2228,7 +2223,7 @@ async function createSessionWithFallback(modelBuffer, preferredBackend) {
2228
2223
  }
2229
2224
  }
2230
2225
  const options = getSessionOptions("wasm");
2231
- const session = await ort2.InferenceSession.create(modelData, options);
2226
+ const session = await ort.InferenceSession.create(modelData, options);
2232
2227
  logger.info("Session created with WASM backend");
2233
2228
  return { session, backend: "wasm" };
2234
2229
  }
@@ -2239,8 +2234,7 @@ function isOnnxRuntimeLoaded() {
2239
2234
  return ortInstance !== null;
2240
2235
  }
2241
2236
 
2242
- // src/inference/Wav2Vec2Inference.ts
2243
- var logger2 = createLogger("Wav2Vec2");
2237
+ // src/inference/blendshapeUtils.ts
2244
2238
  var LAM_BLENDSHAPES = [
2245
2239
  "browDownLeft",
2246
2240
  "browDownRight",
@@ -2295,40 +2289,7 @@ var LAM_BLENDSHAPES = [
2295
2289
  "noseSneerRight",
2296
2290
  "tongueOut"
2297
2291
  ];
2298
- var CTC_VOCAB = [
2299
- "<pad>",
2300
- "<s>",
2301
- "</s>",
2302
- "<unk>",
2303
- "|",
2304
- "E",
2305
- "T",
2306
- "A",
2307
- "O",
2308
- "N",
2309
- "I",
2310
- "H",
2311
- "S",
2312
- "R",
2313
- "D",
2314
- "L",
2315
- "U",
2316
- "M",
2317
- "W",
2318
- "C",
2319
- "F",
2320
- "G",
2321
- "Y",
2322
- "P",
2323
- "B",
2324
- "V",
2325
- "K",
2326
- "'",
2327
- "X",
2328
- "J",
2329
- "Q",
2330
- "Z"
2331
- ];
2292
+ var ARKIT_BLENDSHAPES = LAM_BLENDSHAPES;
2332
2293
  var ARKIT_SYMMETRIC_PAIRS = [
2333
2294
  ["jawLeft", "jawRight"],
2334
2295
  ["mouthLeft", "mouthRight"],
@@ -2364,6 +2325,107 @@ function symmetrizeBlendshapes(frame) {
2364
2325
  }
2365
2326
  return result;
2366
2327
  }
2328
+ var WAV2ARKIT_BLENDSHAPES = [
2329
+ "browDownLeft",
2330
+ "browDownRight",
2331
+ "browInnerUp",
2332
+ "browOuterUpLeft",
2333
+ "browOuterUpRight",
2334
+ "cheekPuff",
2335
+ "cheekSquintLeft",
2336
+ "cheekSquintRight",
2337
+ "eyeBlinkLeft",
2338
+ "eyeBlinkRight",
2339
+ "eyeLookDownLeft",
2340
+ "eyeLookDownRight",
2341
+ "eyeLookInLeft",
2342
+ "eyeLookInRight",
2343
+ "eyeLookOutLeft",
2344
+ "eyeLookOutRight",
2345
+ "eyeLookUpLeft",
2346
+ "eyeLookUpRight",
2347
+ "eyeSquintLeft",
2348
+ "eyeSquintRight",
2349
+ "eyeWideLeft",
2350
+ "eyeWideRight",
2351
+ "jawForward",
2352
+ "jawLeft",
2353
+ "jawOpen",
2354
+ "mouthFrownLeft",
2355
+ "mouthFrownRight",
2356
+ "mouthFunnel",
2357
+ "mouthLeft",
2358
+ "mouthLowerDownLeft",
2359
+ "mouthLowerDownRight",
2360
+ "mouthPressLeft",
2361
+ "mouthPressRight",
2362
+ "mouthPucker",
2363
+ "mouthRight",
2364
+ "mouthRollLower",
2365
+ "mouthRollUpper",
2366
+ "mouthShrugLower",
2367
+ "mouthShrugUpper",
2368
+ "mouthSmileLeft",
2369
+ "mouthSmileRight",
2370
+ "mouthStretchLeft",
2371
+ "mouthStretchRight",
2372
+ "mouthUpperUpLeft",
2373
+ "mouthUpperUpRight",
2374
+ "noseSneerLeft",
2375
+ "noseSneerRight",
2376
+ "tongueOut",
2377
+ "mouthClose",
2378
+ "mouthDimpleLeft",
2379
+ "mouthDimpleRight",
2380
+ "jawRight"
2381
+ ];
2382
+ var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
2383
+ (name) => LAM_BLENDSHAPES.indexOf(name)
2384
+ );
2385
+ function remapWav2ArkitToLam(frame) {
2386
+ const result = new Float32Array(52);
2387
+ for (let i = 0; i < 52; i++) {
2388
+ result[REMAP_WAV2ARKIT_TO_LAM[i]] = frame[i];
2389
+ }
2390
+ return result;
2391
+ }
2392
+
2393
+ // src/inference/Wav2Vec2Inference.ts
2394
+ var logger2 = createLogger("Wav2Vec2");
2395
+ var CTC_VOCAB = [
2396
+ "<pad>",
2397
+ "<s>",
2398
+ "</s>",
2399
+ "<unk>",
2400
+ "|",
2401
+ "E",
2402
+ "T",
2403
+ "A",
2404
+ "O",
2405
+ "N",
2406
+ "I",
2407
+ "H",
2408
+ "S",
2409
+ "R",
2410
+ "D",
2411
+ "L",
2412
+ "U",
2413
+ "M",
2414
+ "W",
2415
+ "C",
2416
+ "F",
2417
+ "G",
2418
+ "Y",
2419
+ "P",
2420
+ "B",
2421
+ "V",
2422
+ "K",
2423
+ "'",
2424
+ "X",
2425
+ "J",
2426
+ "Q",
2427
+ "Z"
2428
+ ];
2367
2429
  var Wav2Vec2Inference = class {
2368
2430
  constructor(config) {
2369
2431
  this.session = null;
@@ -2400,8 +2462,8 @@ var Wav2Vec2Inference = class {
2400
2462
  });
2401
2463
  try {
2402
2464
  logger2.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
2403
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
2404
- this.ort = ort2;
2465
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
2466
+ this.ort = ort;
2405
2467
  this._backend = backend;
2406
2468
  logger2.info("ONNX Runtime loaded", { backend: this._backend });
2407
2469
  const cache = getModelCache();
@@ -2602,6 +2664,7 @@ var Wav2Vec2Inference = class {
2602
2664
  blendshapes,
2603
2665
  asrLogits,
2604
2666
  text,
2667
+ numFrames: numA2EFrames,
2605
2668
  numA2EFrames,
2606
2669
  numASRFrames,
2607
2670
  inferenceTimeMs
@@ -2968,8 +3031,293 @@ var WhisperInference = class _WhisperInference {
2968
3031
  }
2969
3032
  };
2970
3033
 
3034
+ // src/inference/Wav2ArkitCpuInference.ts
3035
+ var logger5 = createLogger("Wav2ArkitCpu");
3036
+ var Wav2ArkitCpuInference = class {
3037
+ constructor(config) {
3038
+ this.session = null;
3039
+ this.ort = null;
3040
+ this._backend = "wasm";
3041
+ this.isLoading = false;
3042
+ // Inference queue for handling concurrent calls
3043
+ this.inferenceQueue = Promise.resolve();
3044
+ this.config = config;
3045
+ }
3046
+ get backend() {
3047
+ return this.session ? this._backend : null;
3048
+ }
3049
+ get isLoaded() {
3050
+ return this.session !== null;
3051
+ }
3052
+ /**
3053
+ * Load the ONNX model
3054
+ */
3055
+ async load() {
3056
+ if (this.isLoading) {
3057
+ throw new Error("Model is already loading");
3058
+ }
3059
+ if (this.session) {
3060
+ throw new Error("Model already loaded. Call dispose() first.");
3061
+ }
3062
+ this.isLoading = true;
3063
+ const startTime = performance.now();
3064
+ const telemetry = getTelemetry();
3065
+ const span = telemetry?.startSpan("Wav2ArkitCpu.load", {
3066
+ "model.url": this.config.modelUrl,
3067
+ "model.backend_requested": this.config.backend || "wasm"
3068
+ });
3069
+ try {
3070
+ const preference = this.config.backend || "wasm";
3071
+ logger5.info("Loading ONNX Runtime...", { preference });
3072
+ const { ort, backend } = await getOnnxRuntimeForPreference(preference);
3073
+ this.ort = ort;
3074
+ this._backend = backend;
3075
+ logger5.info("ONNX Runtime loaded", { backend: this._backend });
3076
+ const cache = getModelCache();
3077
+ const modelUrl = this.config.modelUrl;
3078
+ const isCached = await cache.has(modelUrl);
3079
+ let modelBuffer;
3080
+ if (isCached) {
3081
+ logger5.debug("Loading model from cache", { modelUrl });
3082
+ modelBuffer = await cache.get(modelUrl);
3083
+ if (!modelBuffer) {
3084
+ logger5.warn("Cache corruption detected, clearing and retrying", { modelUrl });
3085
+ await cache.delete(modelUrl);
3086
+ modelBuffer = await fetchWithCache(modelUrl);
3087
+ }
3088
+ } else {
3089
+ logger5.debug("Fetching and caching model", { modelUrl });
3090
+ modelBuffer = await fetchWithCache(modelUrl);
3091
+ }
3092
+ if (!modelBuffer) {
3093
+ throw new Error(`Failed to load model: ${modelUrl}`);
3094
+ }
3095
+ logger5.debug("Creating ONNX session", {
3096
+ size: formatBytes(modelBuffer.byteLength),
3097
+ backend: this._backend
3098
+ });
3099
+ const sessionOptions = getSessionOptions(this._backend);
3100
+ const modelData = new Uint8Array(modelBuffer);
3101
+ this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
3102
+ const loadTimeMs = performance.now() - startTime;
3103
+ logger5.info("Model loaded successfully", {
3104
+ backend: this._backend,
3105
+ loadTimeMs: Math.round(loadTimeMs),
3106
+ inputs: this.session.inputNames,
3107
+ outputs: this.session.outputNames
3108
+ });
3109
+ span?.setAttributes({
3110
+ "model.backend": this._backend,
3111
+ "model.load_time_ms": loadTimeMs,
3112
+ "model.cached": isCached
3113
+ });
3114
+ span?.end();
3115
+ telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
3116
+ model: "wav2arkit_cpu",
3117
+ backend: this._backend
3118
+ });
3119
+ logger5.debug("Running warmup inference");
3120
+ const warmupStart = performance.now();
3121
+ const silentAudio = new Float32Array(16e3);
3122
+ await this.infer(silentAudio);
3123
+ const warmupTimeMs = performance.now() - warmupStart;
3124
+ logger5.info("Warmup inference complete", {
3125
+ warmupTimeMs: Math.round(warmupTimeMs),
3126
+ backend: this._backend
3127
+ });
3128
+ telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
3129
+ model: "wav2arkit_cpu",
3130
+ backend: this._backend
3131
+ });
3132
+ return {
3133
+ backend: this._backend,
3134
+ loadTimeMs,
3135
+ inputNames: [...this.session.inputNames],
3136
+ outputNames: [...this.session.outputNames]
3137
+ };
3138
+ } catch (error) {
3139
+ span?.endWithError(error instanceof Error ? error : new Error(String(error)));
3140
+ telemetry?.incrementCounter("omote.errors.total", 1, {
3141
+ model: "wav2arkit_cpu",
3142
+ error_type: "load_failed"
3143
+ });
3144
+ throw error;
3145
+ } finally {
3146
+ this.isLoading = false;
3147
+ }
3148
+ }
3149
+ /**
3150
+ * Run inference on raw audio
3151
+ *
3152
+ * Accepts variable-length audio (not fixed to 16000 samples).
3153
+ * Output frames = ceil(30 * numSamples / 16000).
3154
+ *
3155
+ * @param audioSamples - Float32Array of raw audio at 16kHz
3156
+ * @param _identityIndex - Ignored (identity 11 is baked into the model)
3157
+ */
3158
+ async infer(audioSamples, _identityIndex) {
3159
+ if (!this.session) {
3160
+ throw new Error("Model not loaded. Call load() first.");
3161
+ }
3162
+ const audioCopy = new Float32Array(audioSamples);
3163
+ const feeds = {
3164
+ "audio_waveform": new this.ort.Tensor("float32", audioCopy, [1, audioCopy.length])
3165
+ };
3166
+ return this.queueInference(feeds, audioCopy.length);
3167
+ }
3168
+ /**
3169
+ * Queue inference to serialize ONNX session calls
3170
+ */
3171
+ queueInference(feeds, inputSamples) {
3172
+ return new Promise((resolve, reject) => {
3173
+ this.inferenceQueue = this.inferenceQueue.then(async () => {
3174
+ const telemetry = getTelemetry();
3175
+ const span = telemetry?.startSpan("Wav2ArkitCpu.infer", {
3176
+ "inference.backend": this._backend,
3177
+ "inference.input_samples": inputSamples
3178
+ });
3179
+ try {
3180
+ const startTime = performance.now();
3181
+ const results = await this.session.run(feeds);
3182
+ const inferenceTimeMs = performance.now() - startTime;
3183
+ const blendshapeOutput = results["blendshapes"];
3184
+ if (!blendshapeOutput) {
3185
+ throw new Error("Missing blendshapes output from model");
3186
+ }
3187
+ const blendshapeData = blendshapeOutput.data;
3188
+ const numFrames = blendshapeOutput.dims[1];
3189
+ const numBlendshapes = blendshapeOutput.dims[2];
3190
+ const blendshapes = [];
3191
+ for (let f = 0; f < numFrames; f++) {
3192
+ const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
3193
+ const remapped = remapWav2ArkitToLam(rawFrame);
3194
+ blendshapes.push(symmetrizeBlendshapes(remapped));
3195
+ }
3196
+ logger5.trace("Inference completed", {
3197
+ inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
3198
+ numFrames,
3199
+ inputSamples
3200
+ });
3201
+ span?.setAttributes({
3202
+ "inference.duration_ms": inferenceTimeMs,
3203
+ "inference.frames": numFrames
3204
+ });
3205
+ span?.end();
3206
+ telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
3207
+ model: "wav2arkit_cpu",
3208
+ backend: this._backend
3209
+ });
3210
+ telemetry?.incrementCounter("omote.inference.total", 1, {
3211
+ model: "wav2arkit_cpu",
3212
+ backend: this._backend,
3213
+ status: "success"
3214
+ });
3215
+ resolve({
3216
+ blendshapes,
3217
+ numFrames,
3218
+ inferenceTimeMs
3219
+ });
3220
+ } catch (err) {
3221
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
3222
+ telemetry?.incrementCounter("omote.inference.total", 1, {
3223
+ model: "wav2arkit_cpu",
3224
+ backend: this._backend,
3225
+ status: "error"
3226
+ });
3227
+ reject(err);
3228
+ }
3229
+ });
3230
+ });
3231
+ }
3232
+ /**
3233
+ * Dispose of the model and free resources
3234
+ */
3235
+ async dispose() {
3236
+ if (this.session) {
3237
+ await this.session.release();
3238
+ this.session = null;
3239
+ }
3240
+ }
3241
+ };
3242
+
3243
+ // src/inference/createLipSync.ts
3244
+ var logger6 = createLogger("createLipSync");
3245
+ function createLipSync(config) {
3246
+ const mode = config.mode ?? "auto";
3247
+ const fallbackOnError = config.fallbackOnError ?? true;
3248
+ let useCpu;
3249
+ if (mode === "cpu") {
3250
+ useCpu = true;
3251
+ logger6.info("Forcing CPU lip sync model (wav2arkit_cpu)");
3252
+ } else if (mode === "gpu") {
3253
+ useCpu = false;
3254
+ logger6.info("Forcing GPU lip sync model (Wav2Vec2)");
3255
+ } else {
3256
+ useCpu = isSafari();
3257
+ logger6.info("Auto-detected lip sync model", {
3258
+ useCpu,
3259
+ isSafari: isSafari()
3260
+ });
3261
+ }
3262
+ if (useCpu) {
3263
+ logger6.info("Creating Wav2ArkitCpuInference (1.8MB, WASM)");
3264
+ return new Wav2ArkitCpuInference({
3265
+ modelUrl: config.cpuModelUrl
3266
+ });
3267
+ }
3268
+ const gpuInstance = new Wav2Vec2Inference({
3269
+ modelUrl: config.gpuModelUrl,
3270
+ backend: config.gpuBackend ?? "auto",
3271
+ numIdentityClasses: config.numIdentityClasses
3272
+ });
3273
+ if (fallbackOnError) {
3274
+ logger6.info("Creating Wav2Vec2Inference with CPU fallback");
3275
+ return new LipSyncWithFallback(gpuInstance, config);
3276
+ }
3277
+ logger6.info("Creating Wav2Vec2Inference (no fallback)");
3278
+ return gpuInstance;
3279
+ }
3280
+ var LipSyncWithFallback = class {
3281
+ constructor(gpuInstance, config) {
3282
+ this.hasFallenBack = false;
3283
+ this.implementation = gpuInstance;
3284
+ this.config = config;
3285
+ }
3286
+ get backend() {
3287
+ return this.implementation.backend;
3288
+ }
3289
+ get isLoaded() {
3290
+ return this.implementation.isLoaded;
3291
+ }
3292
+ async load() {
3293
+ try {
3294
+ return await this.implementation.load();
3295
+ } catch (error) {
3296
+ logger6.warn("GPU model load failed, falling back to CPU model", {
3297
+ error: error instanceof Error ? error.message : String(error)
3298
+ });
3299
+ try {
3300
+ await this.implementation.dispose();
3301
+ } catch {
3302
+ }
3303
+ this.implementation = new Wav2ArkitCpuInference({
3304
+ modelUrl: this.config.cpuModelUrl
3305
+ });
3306
+ this.hasFallenBack = true;
3307
+ logger6.info("Fallback to Wav2ArkitCpuInference successful");
3308
+ return await this.implementation.load();
3309
+ }
3310
+ }
3311
+ async infer(audioSamples, identityIndex) {
3312
+ return this.implementation.infer(audioSamples, identityIndex);
3313
+ }
3314
+ async dispose() {
3315
+ return this.implementation.dispose();
3316
+ }
3317
+ };
3318
+
2971
3319
  // src/inference/SileroVADInference.ts
2972
- var logger5 = createLogger("SileroVAD");
3320
+ var logger7 = createLogger("SileroVAD");
2973
3321
  var SileroVADInference = class {
2974
3322
  constructor(config) {
2975
3323
  this.session = null;
@@ -3041,32 +3389,32 @@ var SileroVADInference = class {
3041
3389
  "model.sample_rate": this.config.sampleRate
3042
3390
  });
3043
3391
  try {
3044
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
3045
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3046
- this.ort = ort2;
3392
+ logger7.info("Loading ONNX Runtime...", { preference: this.config.backend });
3393
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3394
+ this.ort = ort;
3047
3395
  this._backend = backend;
3048
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
3396
+ logger7.info("ONNX Runtime loaded", { backend: this._backend });
3049
3397
  const cache = getModelCache();
3050
3398
  const modelUrl = this.config.modelUrl;
3051
3399
  const isCached = await cache.has(modelUrl);
3052
3400
  let modelBuffer;
3053
3401
  if (isCached) {
3054
- logger5.debug("Loading model from cache", { modelUrl });
3402
+ logger7.debug("Loading model from cache", { modelUrl });
3055
3403
  modelBuffer = await cache.get(modelUrl);
3056
3404
  } else {
3057
- logger5.debug("Fetching and caching model", { modelUrl });
3405
+ logger7.debug("Fetching and caching model", { modelUrl });
3058
3406
  modelBuffer = await fetchWithCache(modelUrl);
3059
3407
  }
3060
- logger5.debug("Creating ONNX session", {
3408
+ logger7.debug("Creating ONNX session", {
3061
3409
  size: formatBytes(modelBuffer.byteLength),
3062
3410
  backend: this._backend
3063
3411
  });
3064
3412
  const sessionOptions = getSessionOptions(this._backend);
3065
3413
  const modelData = new Uint8Array(modelBuffer);
3066
- this.session = await ort2.InferenceSession.create(modelData, sessionOptions);
3414
+ this.session = await ort.InferenceSession.create(modelData, sessionOptions);
3067
3415
  this.reset();
3068
3416
  const loadTimeMs = performance.now() - startTime;
3069
- logger5.info("Model loaded successfully", {
3417
+ logger7.info("Model loaded successfully", {
3070
3418
  backend: this._backend,
3071
3419
  loadTimeMs: Math.round(loadTimeMs),
3072
3420
  sampleRate: this.config.sampleRate,
@@ -3219,7 +3567,7 @@ var SileroVADInference = class {
3219
3567
  this.preSpeechBuffer.shift();
3220
3568
  }
3221
3569
  }
3222
- logger5.trace("Skipping VAD inference - audio too quiet", {
3570
+ logger7.trace("Skipping VAD inference - audio too quiet", {
3223
3571
  rms: Math.round(rms * 1e4) / 1e4,
3224
3572
  threshold: MIN_ENERGY_THRESHOLD
3225
3573
  });
@@ -3273,7 +3621,7 @@ var SileroVADInference = class {
3273
3621
  if (isSpeech && !this.wasSpeaking) {
3274
3622
  preSpeechChunks = [...this.preSpeechBuffer];
3275
3623
  this.preSpeechBuffer = [];
3276
- logger5.debug("Speech started with pre-speech buffer", {
3624
+ logger7.debug("Speech started with pre-speech buffer", {
3277
3625
  preSpeechChunks: preSpeechChunks.length,
3278
3626
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
3279
3627
  });
@@ -3286,7 +3634,7 @@ var SileroVADInference = class {
3286
3634
  this.preSpeechBuffer = [];
3287
3635
  }
3288
3636
  this.wasSpeaking = isSpeech;
3289
- logger5.trace("VAD inference completed", {
3637
+ logger7.trace("VAD inference completed", {
3290
3638
  probability: Math.round(probability * 1e3) / 1e3,
3291
3639
  isSpeech,
3292
3640
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -3342,7 +3690,7 @@ var SileroVADInference = class {
3342
3690
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
3343
3691
 
3344
3692
  // src/inference/SileroVADWorker.ts
3345
- var logger6 = createLogger("SileroVADWorker");
3693
+ var logger8 = createLogger("SileroVADWorker");
3346
3694
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
3347
3695
  var LOAD_TIMEOUT_MS = 1e4;
3348
3696
  var INFERENCE_TIMEOUT_MS = 1e3;
@@ -3605,7 +3953,7 @@ var SileroVADWorker = class {
3605
3953
  this.handleWorkerMessage(event.data);
3606
3954
  };
3607
3955
  worker.onerror = (error) => {
3608
- logger6.error("Worker error", { error: error.message });
3956
+ logger8.error("Worker error", { error: error.message });
3609
3957
  for (const [, resolver] of this.pendingResolvers) {
3610
3958
  resolver.reject(new Error(`Worker error: ${error.message}`));
3611
3959
  }
@@ -3681,9 +4029,9 @@ var SileroVADWorker = class {
3681
4029
  "model.sample_rate": this.config.sampleRate
3682
4030
  });
3683
4031
  try {
3684
- logger6.info("Creating VAD worker...");
4032
+ logger8.info("Creating VAD worker...");
3685
4033
  this.worker = this.createWorker();
3686
- logger6.info("Loading model in worker...", {
4034
+ logger8.info("Loading model in worker...", {
3687
4035
  modelUrl: this.config.modelUrl,
3688
4036
  sampleRate: this.config.sampleRate
3689
4037
  });
@@ -3699,7 +4047,7 @@ var SileroVADWorker = class {
3699
4047
  );
3700
4048
  this._isLoaded = true;
3701
4049
  const loadTimeMs = performance.now() - startTime;
3702
- logger6.info("VAD worker loaded successfully", {
4050
+ logger8.info("VAD worker loaded successfully", {
3703
4051
  backend: "wasm",
3704
4052
  loadTimeMs: Math.round(loadTimeMs),
3705
4053
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -3806,7 +4154,7 @@ var SileroVADWorker = class {
3806
4154
  if (isSpeech && !this.wasSpeaking) {
3807
4155
  preSpeechChunks = [...this.preSpeechBuffer];
3808
4156
  this.preSpeechBuffer = [];
3809
- logger6.debug("Speech started with pre-speech buffer", {
4157
+ logger8.debug("Speech started with pre-speech buffer", {
3810
4158
  preSpeechChunks: preSpeechChunks.length,
3811
4159
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
3812
4160
  });
@@ -3819,7 +4167,7 @@ var SileroVADWorker = class {
3819
4167
  this.preSpeechBuffer = [];
3820
4168
  }
3821
4169
  this.wasSpeaking = isSpeech;
3822
- logger6.trace("VAD worker inference completed", {
4170
+ logger8.trace("VAD worker inference completed", {
3823
4171
  probability: Math.round(result.probability * 1e3) / 1e3,
3824
4172
  isSpeech,
3825
4173
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -3887,18 +4235,18 @@ var SileroVADWorker = class {
3887
4235
  };
3888
4236
 
3889
4237
  // src/inference/createSileroVAD.ts
3890
- var logger7 = createLogger("createSileroVAD");
4238
+ var logger9 = createLogger("createSileroVAD");
3891
4239
  function supportsVADWorker() {
3892
4240
  if (typeof Worker === "undefined") {
3893
- logger7.debug("Worker not supported: Worker constructor undefined");
4241
+ logger9.debug("Worker not supported: Worker constructor undefined");
3894
4242
  return false;
3895
4243
  }
3896
4244
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
3897
- logger7.debug("Worker not supported: URL.createObjectURL unavailable");
4245
+ logger9.debug("Worker not supported: URL.createObjectURL unavailable");
3898
4246
  return false;
3899
4247
  }
3900
4248
  if (typeof Blob === "undefined") {
3901
- logger7.debug("Worker not supported: Blob constructor unavailable");
4249
+ logger9.debug("Worker not supported: Blob constructor unavailable");
3902
4250
  return false;
3903
4251
  }
3904
4252
  return true;
@@ -3908,19 +4256,19 @@ function createSileroVAD(config) {
3908
4256
  let useWorker;
3909
4257
  if (config.useWorker !== void 0) {
3910
4258
  useWorker = config.useWorker;
3911
- logger7.debug("Worker preference explicitly set", { useWorker });
4259
+ logger9.debug("Worker preference explicitly set", { useWorker });
3912
4260
  } else {
3913
4261
  const workerSupported = supportsVADWorker();
3914
4262
  const onMobile = isMobile();
3915
4263
  useWorker = workerSupported && !onMobile;
3916
- logger7.debug("Auto-detected Worker preference", {
4264
+ logger9.debug("Auto-detected Worker preference", {
3917
4265
  useWorker,
3918
4266
  workerSupported,
3919
4267
  onMobile
3920
4268
  });
3921
4269
  }
3922
4270
  if (useWorker) {
3923
- logger7.info("Creating SileroVADWorker (off-main-thread)");
4271
+ logger9.info("Creating SileroVADWorker (off-main-thread)");
3924
4272
  const worker = new SileroVADWorker({
3925
4273
  modelUrl: config.modelUrl,
3926
4274
  sampleRate: config.sampleRate,
@@ -3932,7 +4280,7 @@ function createSileroVAD(config) {
3932
4280
  }
3933
4281
  return worker;
3934
4282
  }
3935
- logger7.info("Creating SileroVADInference (main thread)");
4283
+ logger9.info("Creating SileroVADInference (main thread)");
3936
4284
  return new SileroVADInference(config);
3937
4285
  }
3938
4286
  var VADWorkerWithFallback = class {
@@ -3958,7 +4306,7 @@ var VADWorkerWithFallback = class {
3958
4306
  try {
3959
4307
  return await this.implementation.load();
3960
4308
  } catch (error) {
3961
- logger7.warn("Worker load failed, falling back to main thread", {
4309
+ logger9.warn("Worker load failed, falling back to main thread", {
3962
4310
  error: error instanceof Error ? error.message : String(error)
3963
4311
  });
3964
4312
  try {
@@ -3967,7 +4315,7 @@ var VADWorkerWithFallback = class {
3967
4315
  }
3968
4316
  this.implementation = new SileroVADInference(this.config);
3969
4317
  this.hasFallenBack = true;
3970
- logger7.info("Fallback to SileroVADInference successful");
4318
+ logger9.info("Fallback to SileroVADInference successful");
3971
4319
  return await this.implementation.load();
3972
4320
  }
3973
4321
  }
@@ -3989,7 +4337,7 @@ var VADWorkerWithFallback = class {
3989
4337
  };
3990
4338
 
3991
4339
  // src/inference/Emotion2VecInference.ts
3992
- var logger8 = createLogger("Emotion2Vec");
4340
+ var logger10 = createLogger("Emotion2Vec");
3993
4341
  var EMOTION2VEC_LABELS = ["neutral", "happy", "angry", "sad"];
3994
4342
  var Emotion2VecInference = class {
3995
4343
  constructor(config) {
@@ -4031,36 +4379,36 @@ var Emotion2VecInference = class {
4031
4379
  "model.backend_requested": this.config.backend
4032
4380
  });
4033
4381
  try {
4034
- logger8.info("Loading ONNX Runtime...", { preference: this.config.backend });
4035
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend);
4036
- this.ort = ort2;
4382
+ logger10.info("Loading ONNX Runtime...", { preference: this.config.backend });
4383
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
4384
+ this.ort = ort;
4037
4385
  this._backend = backend;
4038
- logger8.info("ONNX Runtime loaded", { backend: this._backend });
4039
- logger8.info("Checking model cache...");
4386
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
4387
+ logger10.info("Checking model cache...");
4040
4388
  const cache = getModelCache();
4041
4389
  const modelUrl = this.config.modelUrl;
4042
4390
  const isCached = await cache.has(modelUrl);
4043
- logger8.info("Cache check complete", { modelUrl, isCached });
4391
+ logger10.info("Cache check complete", { modelUrl, isCached });
4044
4392
  let modelBuffer;
4045
4393
  if (isCached) {
4046
- logger8.info("Loading model from cache...", { modelUrl });
4394
+ logger10.info("Loading model from cache...", { modelUrl });
4047
4395
  modelBuffer = await cache.get(modelUrl);
4048
- logger8.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
4396
+ logger10.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
4049
4397
  } else {
4050
- logger8.info("Fetching model (not cached)...", { modelUrl });
4398
+ logger10.info("Fetching model (not cached)...", { modelUrl });
4051
4399
  modelBuffer = await fetchWithCache(modelUrl);
4052
- logger8.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
4400
+ logger10.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
4053
4401
  }
4054
- logger8.info("Creating ONNX session (this may take a while for large models)...");
4055
- logger8.debug("Creating ONNX session", {
4402
+ logger10.info("Creating ONNX session (this may take a while for large models)...");
4403
+ logger10.debug("Creating ONNX session", {
4056
4404
  size: formatBytes(modelBuffer.byteLength),
4057
4405
  backend: this._backend
4058
4406
  });
4059
4407
  const sessionOptions = getSessionOptions(this._backend);
4060
4408
  const modelData = new Uint8Array(modelBuffer);
4061
- this.session = await ort2.InferenceSession.create(modelData, sessionOptions);
4409
+ this.session = await ort.InferenceSession.create(modelData, sessionOptions);
4062
4410
  const loadTimeMs = performance.now() - startTime;
4063
- logger8.info("Model loaded successfully", {
4411
+ logger10.info("Model loaded successfully", {
4064
4412
  backend: this._backend,
4065
4413
  loadTimeMs: Math.round(loadTimeMs),
4066
4414
  sampleRate: this.config.sampleRate,
@@ -4172,7 +4520,7 @@ var Emotion2VecInference = class {
4172
4520
  });
4173
4521
  }
4174
4522
  const inferenceTimeMs = performance.now() - startTime;
4175
- logger8.debug("Emotion inference completed", {
4523
+ logger10.debug("Emotion inference completed", {
4176
4524
  numFrames,
4177
4525
  dominant: dominant.emotion,
4178
4526
  confidence: Math.round(dominant.confidence * 100),
@@ -4248,14 +4596,8 @@ var Emotion2VecInference = class {
4248
4596
  */
4249
4597
  Emotion2VecInference.isWebGPUAvailable = isWebGPUAvailable;
4250
4598
 
4251
- // src/inference/ChatterboxTurboInference.ts
4252
- import ort from "onnxruntime-web/webgpu";
4253
- var logger9 = createLogger("ChatterboxTurbo");
4254
- env.allowLocalModels = true;
4255
- ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4256
-
4257
4599
  // src/inference/SafariSpeechRecognition.ts
4258
- var logger10 = createLogger("SafariSpeech");
4600
+ var logger11 = createLogger("SafariSpeech");
4259
4601
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
4260
4602
  constructor(config = {}) {
4261
4603
  this.recognition = null;
@@ -4274,7 +4616,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4274
4616
  interimResults: config.interimResults ?? true,
4275
4617
  maxAlternatives: config.maxAlternatives ?? 1
4276
4618
  };
4277
- logger10.debug("SafariSpeechRecognition created", {
4619
+ logger11.debug("SafariSpeechRecognition created", {
4278
4620
  language: this.config.language,
4279
4621
  continuous: this.config.continuous
4280
4622
  });
@@ -4335,7 +4677,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4335
4677
  */
4336
4678
  async start() {
4337
4679
  if (this.isListening) {
4338
- logger10.warn("Already listening");
4680
+ logger11.warn("Already listening");
4339
4681
  return;
4340
4682
  }
4341
4683
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -4365,7 +4707,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4365
4707
  this.isListening = true;
4366
4708
  this.startTime = performance.now();
4367
4709
  this.accumulatedText = "";
4368
- logger10.info("Speech recognition started", {
4710
+ logger11.info("Speech recognition started", {
4369
4711
  language: this.config.language
4370
4712
  });
4371
4713
  span?.end();
@@ -4380,7 +4722,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4380
4722
  */
4381
4723
  async stop() {
4382
4724
  if (!this.isListening || !this.recognition) {
4383
- logger10.warn("Not currently listening");
4725
+ logger11.warn("Not currently listening");
4384
4726
  return {
4385
4727
  text: this.accumulatedText,
4386
4728
  language: this.config.language,
@@ -4409,7 +4751,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4409
4751
  if (this.recognition && this.isListening) {
4410
4752
  this.recognition.abort();
4411
4753
  this.isListening = false;
4412
- logger10.info("Speech recognition aborted");
4754
+ logger11.info("Speech recognition aborted");
4413
4755
  }
4414
4756
  }
4415
4757
  /**
@@ -4440,7 +4782,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4440
4782
  this.isListening = false;
4441
4783
  this.resultCallbacks = [];
4442
4784
  this.errorCallbacks = [];
4443
- logger10.debug("SafariSpeechRecognition disposed");
4785
+ logger11.debug("SafariSpeechRecognition disposed");
4444
4786
  }
4445
4787
  /**
4446
4788
  * Set up event handlers for the recognition instance
@@ -4468,7 +4810,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4468
4810
  confidence: alternative.confidence
4469
4811
  };
4470
4812
  this.emitResult(speechResult);
4471
- logger10.trace("Speech result", {
4813
+ logger11.trace("Speech result", {
4472
4814
  text: text.substring(0, 50),
4473
4815
  isFinal,
4474
4816
  confidence: alternative.confidence
@@ -4478,12 +4820,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4478
4820
  span?.end();
4479
4821
  } catch (error) {
4480
4822
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
4481
- logger10.error("Error processing speech result", { error });
4823
+ logger11.error("Error processing speech result", { error });
4482
4824
  }
4483
4825
  };
4484
4826
  this.recognition.onerror = (event) => {
4485
4827
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
4486
- logger10.error("Speech recognition error", { error: event.error, message: event.message });
4828
+ logger11.error("Speech recognition error", { error: event.error, message: event.message });
4487
4829
  this.emitError(error);
4488
4830
  if (this.stopRejecter) {
4489
4831
  this.stopRejecter(error);
@@ -4493,7 +4835,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4493
4835
  };
4494
4836
  this.recognition.onend = () => {
4495
4837
  this.isListening = false;
4496
- logger10.info("Speech recognition ended", {
4838
+ logger11.info("Speech recognition ended", {
4497
4839
  totalText: this.accumulatedText.length,
4498
4840
  durationMs: performance.now() - this.startTime
4499
4841
  });
@@ -4510,13 +4852,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4510
4852
  }
4511
4853
  };
4512
4854
  this.recognition.onstart = () => {
4513
- logger10.debug("Speech recognition started by browser");
4855
+ logger11.debug("Speech recognition started by browser");
4514
4856
  };
4515
4857
  this.recognition.onspeechstart = () => {
4516
- logger10.debug("Speech detected");
4858
+ logger11.debug("Speech detected");
4517
4859
  };
4518
4860
  this.recognition.onspeechend = () => {
4519
- logger10.debug("Speech ended");
4861
+ logger11.debug("Speech ended");
4520
4862
  };
4521
4863
  }
4522
4864
  /**
@@ -4527,7 +4869,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4527
4869
  try {
4528
4870
  callback(result);
4529
4871
  } catch (error) {
4530
- logger10.error("Error in result callback", { error });
4872
+ logger11.error("Error in result callback", { error });
4531
4873
  }
4532
4874
  }
4533
4875
  }
@@ -4539,7 +4881,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
4539
4881
  try {
4540
4882
  callback(error);
4541
4883
  } catch (callbackError) {
4542
- logger10.error("Error in error callback", { error: callbackError });
4884
+ logger11.error("Error in error callback", { error: callbackError });
4543
4885
  }
4544
4886
  }
4545
4887
  }
@@ -5962,12 +6304,12 @@ async function isHuggingFaceCDNReachable(testUrl = HF_CDN_TEST_URL) {
5962
6304
  }
5963
6305
 
5964
6306
  // src/utils/transformersCacheClear.ts
5965
- var logger11 = createLogger("TransformersCache");
6307
+ var logger12 = createLogger("TransformersCache");
5966
6308
  async function clearTransformersCache(options) {
5967
6309
  const verbose = options?.verbose ?? true;
5968
6310
  const additionalPatterns = options?.additionalPatterns ?? [];
5969
6311
  if (!("caches" in window)) {
5970
- logger11.warn("Cache API not available in this environment");
6312
+ logger12.warn("Cache API not available in this environment");
5971
6313
  return [];
5972
6314
  }
5973
6315
  try {
@@ -5985,18 +6327,18 @@ async function clearTransformersCache(options) {
5985
6327
  );
5986
6328
  if (shouldDelete) {
5987
6329
  if (verbose) {
5988
- logger11.info("Deleting cache", { cacheName });
6330
+ logger12.info("Deleting cache", { cacheName });
5989
6331
  }
5990
6332
  const deleted = await caches.delete(cacheName);
5991
6333
  if (deleted) {
5992
6334
  deletedCaches.push(cacheName);
5993
6335
  } else if (verbose) {
5994
- logger11.warn("Failed to delete cache", { cacheName });
6336
+ logger12.warn("Failed to delete cache", { cacheName });
5995
6337
  }
5996
6338
  }
5997
6339
  }
5998
6340
  if (verbose) {
5999
- logger11.info("Cache clearing complete", {
6341
+ logger12.info("Cache clearing complete", {
6000
6342
  totalCaches: cacheNames.length,
6001
6343
  deletedCount: deletedCaches.length,
6002
6344
  deletedCaches
@@ -6004,35 +6346,35 @@ async function clearTransformersCache(options) {
6004
6346
  }
6005
6347
  return deletedCaches;
6006
6348
  } catch (error) {
6007
- logger11.error("Error clearing caches", { error });
6349
+ logger12.error("Error clearing caches", { error });
6008
6350
  throw error;
6009
6351
  }
6010
6352
  }
6011
6353
  async function clearSpecificCache(cacheName) {
6012
6354
  if (!("caches" in window)) {
6013
- logger11.warn("Cache API not available in this environment");
6355
+ logger12.warn("Cache API not available in this environment");
6014
6356
  return false;
6015
6357
  }
6016
6358
  try {
6017
6359
  const deleted = await caches.delete(cacheName);
6018
- logger11.info("Cache deletion attempt", { cacheName, deleted });
6360
+ logger12.info("Cache deletion attempt", { cacheName, deleted });
6019
6361
  return deleted;
6020
6362
  } catch (error) {
6021
- logger11.error("Error deleting cache", { cacheName, error });
6363
+ logger12.error("Error deleting cache", { cacheName, error });
6022
6364
  return false;
6023
6365
  }
6024
6366
  }
6025
6367
  async function listCaches() {
6026
6368
  if (!("caches" in window)) {
6027
- logger11.warn("Cache API not available in this environment");
6369
+ logger12.warn("Cache API not available in this environment");
6028
6370
  return [];
6029
6371
  }
6030
6372
  try {
6031
6373
  const cacheNames = await caches.keys();
6032
- logger11.debug("Available caches", { cacheNames });
6374
+ logger12.debug("Available caches", { cacheNames });
6033
6375
  return cacheNames;
6034
6376
  } catch (error) {
6035
- logger11.error("Error listing caches", { error });
6377
+ logger12.error("Error listing caches", { error });
6036
6378
  return [];
6037
6379
  }
6038
6380
  }
@@ -6074,7 +6416,7 @@ async function validateCachedResponse(cacheName, requestUrl) {
6074
6416
  reason: valid ? "Valid response" : `Invalid: status=${response.status}, contentType=${contentType}, isHtml=${isHtml || looksLikeHtml}`
6075
6417
  };
6076
6418
  } catch (error) {
6077
- logger11.error("Error validating cached response", { cacheName, requestUrl, error });
6419
+ logger12.error("Error validating cached response", { cacheName, requestUrl, error });
6078
6420
  return {
6079
6421
  exists: false,
6080
6422
  valid: false,
@@ -6111,7 +6453,7 @@ async function scanForInvalidCaches() {
6111
6453
  }
6112
6454
  }
6113
6455
  }
6114
- logger11.info("Cache scan complete", {
6456
+ logger12.info("Cache scan complete", {
6115
6457
  totalCaches: cacheNames.length,
6116
6458
  scannedEntries,
6117
6459
  invalidCount: invalidEntries.length
@@ -6122,13 +6464,13 @@ async function scanForInvalidCaches() {
6122
6464
  invalidEntries
6123
6465
  };
6124
6466
  } catch (error) {
6125
- logger11.error("Error scanning caches", { error });
6467
+ logger12.error("Error scanning caches", { error });
6126
6468
  throw error;
6127
6469
  }
6128
6470
  }
6129
6471
  async function nukeBrowserCaches(preventRecreation = false) {
6130
6472
  if (!("caches" in window)) {
6131
- logger11.warn("Cache API not available in this environment");
6473
+ logger12.warn("Cache API not available in this environment");
6132
6474
  return 0;
6133
6475
  }
6134
6476
  try {
@@ -6140,17 +6482,17 @@ async function nukeBrowserCaches(preventRecreation = false) {
6140
6482
  deletedCount++;
6141
6483
  }
6142
6484
  }
6143
- logger11.info("All browser caches cleared", {
6485
+ logger12.info("All browser caches cleared", {
6144
6486
  totalDeleted: deletedCount
6145
6487
  });
6146
6488
  if (preventRecreation) {
6147
6489
  const { env: env2 } = await import("./transformers.web-ALDLCPHT.mjs");
6148
6490
  env2.useBrowserCache = false;
6149
- logger11.warn("Browser cache creation disabled (env.useBrowserCache = false)");
6491
+ logger12.warn("Browser cache creation disabled (env.useBrowserCache = false)");
6150
6492
  }
6151
6493
  return deletedCount;
6152
6494
  } catch (error) {
6153
- logger11.error("Error nuking caches", { error });
6495
+ logger12.error("Error nuking caches", { error });
6154
6496
  throw error;
6155
6497
  }
6156
6498
  }
@@ -6676,6 +7018,7 @@ var EmphasisDetector = class {
6676
7018
  }
6677
7019
  };
6678
7020
  export {
7021
+ ARKIT_BLENDSHAPES,
6679
7022
  AgentCoreAdapter,
6680
7023
  AnimationGraph,
6681
7024
  AudioChunkCoalescer,
@@ -6711,6 +7054,8 @@ export {
6711
7054
  SileroVADWorker,
6712
7055
  SyncedAudioPipeline,
6713
7056
  TenantManager,
7057
+ WAV2ARKIT_BLENDSHAPES,
7058
+ Wav2ArkitCpuInference,
6714
7059
  Wav2Vec2Inference,
6715
7060
  WhisperInference,
6716
7061
  blendEmotions,
@@ -6722,6 +7067,7 @@ export {
6722
7067
  configureLogging,
6723
7068
  configureTelemetry,
6724
7069
  createEmotionVector,
7070
+ createLipSync,
6725
7071
  createLogger,
6726
7072
  createSessionWithFallback,
6727
7073
  createSileroVAD,
@@ -6746,6 +7092,7 @@ export {
6746
7092
  isIOSSafari,
6747
7093
  isMobile,
6748
7094
  isOnnxRuntimeLoaded,
7095
+ isSafari,
6749
7096
  isSpeechRecognitionAvailable,
6750
7097
  isWebGPUAvailable,
6751
7098
  lerpEmotion,
@@ -6754,15 +7101,18 @@ export {
6754
7101
  nukeBrowserCaches,
6755
7102
  parseHuggingFaceUrl,
6756
7103
  preloadModels,
7104
+ remapWav2ArkitToLam,
6757
7105
  resetLoggingConfig,
6758
7106
  resolveBackend,
6759
7107
  scanForInvalidCaches,
6760
7108
  setLogLevel,
6761
7109
  setLoggingEnabled,
6762
7110
  shouldEnableWasmProxy,
7111
+ shouldUseCpuLipSync,
6763
7112
  shouldUseNativeASR,
6764
7113
  shouldUseServerLipSync,
6765
7114
  supportsVADWorker,
7115
+ symmetrizeBlendshapes,
6766
7116
  validateCachedResponse
6767
7117
  };
6768
7118
  //# sourceMappingURL=index.mjs.map