@omote/core 0.5.7 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -762,6 +762,24 @@ var A2EProcessor = class {
762
762
  }
763
763
  };
764
764
 
765
+ // src/audio/audioUtils.ts
766
+ function pcm16ToFloat32(buffer) {
767
+ const byteLen = buffer.byteLength & ~1;
768
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
769
+ const float32 = new Float32Array(int16.length);
770
+ for (let i = 0; i < int16.length; i++) {
771
+ float32[i] = int16[i] / 32768;
772
+ }
773
+ return float32;
774
+ }
775
+ function int16ToFloat32(int16) {
776
+ const float32 = new Float32Array(int16.length);
777
+ for (let i = 0; i < int16.length; i++) {
778
+ float32[i] = int16[i] / 32768;
779
+ }
780
+ return float32;
781
+ }
782
+
765
783
  // src/telemetry/exporters/console.ts
766
784
  var ConsoleExporter = class {
767
785
  constructor(options = {}) {
@@ -2534,7 +2552,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2534
2552
  } else {
2535
2553
  logger3.info("Fetching external model data", {
2536
2554
  dataUrl,
2537
- note: "This may be a large download (383MB+)"
2555
+ note: "This may be a large download"
2538
2556
  });
2539
2557
  externalDataBuffer = await fetchWithCache(dataUrl);
2540
2558
  }
@@ -2542,6 +2560,9 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2542
2560
  size: formatBytes(externalDataBuffer.byteLength)
2543
2561
  });
2544
2562
  } catch (err) {
2563
+ if (typeof this.config.externalDataUrl === "string") {
2564
+ throw new Error(`Failed to fetch external data: ${dataUrl} \u2014 ${err.message}`);
2565
+ }
2545
2566
  logger3.debug("No external data file found (single-file model)", {
2546
2567
  dataUrl,
2547
2568
  error: err.message
@@ -2665,28 +2686,6 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2665
2686
  };
2666
2687
  return this.queueInference(feeds);
2667
2688
  }
2668
- /**
2669
- * Decode CTC logits to text using greedy decoding
2670
- */
2671
- decodeCTC(logits) {
2672
- const tokens = [];
2673
- let prevToken = -1;
2674
- for (const frame of logits) {
2675
- let maxIdx = 0;
2676
- let maxVal = frame[0];
2677
- for (let i = 1; i < frame.length; i++) {
2678
- if (frame[i] > maxVal) {
2679
- maxVal = frame[i];
2680
- maxIdx = i;
2681
- }
2682
- }
2683
- if (maxIdx !== prevToken && maxIdx !== 0) {
2684
- tokens.push(maxIdx);
2685
- }
2686
- prevToken = maxIdx;
2687
- }
2688
- return tokens.map((t) => CTC_VOCAB[t] === "|" ? " " : CTC_VOCAB[t]).join("");
2689
- }
2690
2689
  /**
2691
2690
  * Queue inference to serialize ONNX session calls
2692
2691
  */
@@ -2714,37 +2713,25 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2714
2713
  })
2715
2714
  ]);
2716
2715
  const inferenceTimeMs = performance.now() - startTime;
2717
- const asrOutput = results["asr_logits"];
2718
2716
  const blendshapeOutput = results["blendshapes"];
2719
- if (!asrOutput || !blendshapeOutput) {
2720
- throw new Error("Missing outputs from model");
2717
+ if (!blendshapeOutput) {
2718
+ throw new Error("Missing blendshapes output from model");
2721
2719
  }
2722
- const asrData = asrOutput.data;
2723
2720
  const blendshapeData = blendshapeOutput.data;
2724
- const numASRFrames = asrOutput.dims[1];
2725
2721
  const numA2EFrames = blendshapeOutput.dims[1];
2726
- const asrVocabSize = asrOutput.dims[2];
2727
2722
  const numBlendshapes = blendshapeOutput.dims[2];
2728
- const asrLogits = [];
2729
2723
  const blendshapes = [];
2730
- for (let f = 0; f < numASRFrames; f++) {
2731
- asrLogits.push(asrData.slice(f * asrVocabSize, (f + 1) * asrVocabSize));
2732
- }
2733
2724
  for (let f = 0; f < numA2EFrames; f++) {
2734
2725
  const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
2735
2726
  blendshapes.push(symmetrizeBlendshapes(rawFrame));
2736
2727
  }
2737
- const text = this.decodeCTC(asrLogits);
2738
2728
  logger3.trace("Inference completed", {
2739
2729
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
2740
- numA2EFrames,
2741
- numASRFrames,
2742
- textLength: text.length
2730
+ numA2EFrames
2743
2731
  });
2744
2732
  span?.setAttributes({
2745
2733
  "inference.duration_ms": inferenceTimeMs,
2746
- "inference.a2e_frames": numA2EFrames,
2747
- "inference.asr_frames": numASRFrames
2734
+ "inference.a2e_frames": numA2EFrames
2748
2735
  });
2749
2736
  span?.end();
2750
2737
  telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
@@ -2758,11 +2745,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2758
2745
  });
2759
2746
  resolve({
2760
2747
  blendshapes,
2761
- asrLogits,
2762
- text,
2763
2748
  numFrames: numA2EFrames,
2764
- numA2EFrames,
2765
- numASRFrames,
2766
2749
  inferenceTimeMs
2767
2750
  });
2768
2751
  } catch (err) {
@@ -2815,19 +2798,7 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
2815
2798
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
2816
2799
  var Wav2Vec2Inference = _Wav2Vec2Inference;
2817
2800
 
2818
- // src/audio/audioUtils.ts
2819
- function pcm16ToFloat32(buffer) {
2820
- const byteLen = buffer.byteLength & ~1;
2821
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
2822
- const float32 = new Float32Array(int16.length);
2823
- for (let i = 0; i < int16.length; i++) {
2824
- float32[i] = int16[i] / 32768;
2825
- }
2826
- return float32;
2827
- }
2828
-
2829
- // src/audio/FullFacePipeline.ts
2830
- var logger4 = createLogger("FullFacePipeline");
2801
+ // src/audio/expressionProfile.ts
2831
2802
  var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
2832
2803
  for (const name of LAM_BLENDSHAPES) {
2833
2804
  if (name.startsWith("eye")) {
@@ -2846,6 +2817,24 @@ for (const name of LAM_BLENDSHAPES) {
2846
2817
  BLENDSHAPE_TO_GROUP.set(name, "tongue");
2847
2818
  }
2848
2819
  }
2820
+ function applyProfile(raw, profile) {
2821
+ const scaled = new Float32Array(52);
2822
+ for (let i = 0; i < 52; i++) {
2823
+ const name = LAM_BLENDSHAPES[i];
2824
+ let scaler;
2825
+ if (profile.overrides && profile.overrides[name] !== void 0) {
2826
+ scaler = profile.overrides[name];
2827
+ } else {
2828
+ const group = BLENDSHAPE_TO_GROUP.get(name);
2829
+ scaler = group ? profile[group] ?? 1 : 1;
2830
+ }
2831
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
2832
+ }
2833
+ return scaled;
2834
+ }
2835
+
2836
+ // src/audio/FullFacePipeline.ts
2837
+ var logger4 = createLogger("FullFacePipeline");
2849
2838
  var FullFacePipeline = class extends EventEmitter {
2850
2839
  constructor(options) {
2851
2840
  super();
@@ -2910,25 +2899,10 @@ var FullFacePipeline = class extends EventEmitter {
2910
2899
  /**
2911
2900
  * Apply ExpressionProfile scaling to raw A2E blendshapes.
2912
2901
  *
2913
- * For each blendshape:
2914
- * 1. If an override exists for the blendshape name, use override as scaler
2915
- * 2. Otherwise, use the group scaler (default 1.0)
2916
- * 3. Clamp result to [0, 1]
2902
+ * Delegates to the standalone applyProfile() utility from expressionProfile.ts.
2917
2903
  */
2918
2904
  applyProfile(raw) {
2919
- const scaled = new Float32Array(52);
2920
- for (let i = 0; i < 52; i++) {
2921
- const name = LAM_BLENDSHAPES[i];
2922
- let scaler;
2923
- if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
2924
- scaler = this.profile.overrides[name];
2925
- } else {
2926
- const group = BLENDSHAPE_TO_GROUP.get(name);
2927
- scaler = group ? this.profile[group] ?? 1 : 1;
2928
- }
2929
- scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
2930
- }
2931
- return scaled;
2905
+ return applyProfile(raw, this.profile);
2932
2906
  }
2933
2907
  /**
2934
2908
  * Start a new playback session
@@ -3113,6 +3087,329 @@ var FullFacePipeline = class extends EventEmitter {
3113
3087
  }
3114
3088
  };
3115
3089
 
3090
+ // src/audio/PlaybackPipeline.ts
3091
+ var logger5 = createLogger("PlaybackPipeline");
3092
+ var PlaybackPipeline = class extends EventEmitter {
3093
+ constructor(config) {
3094
+ super();
3095
+ this.config = config;
3096
+ this._state = "idle";
3097
+ this.playbackStarted = false;
3098
+ this.monitorInterval = null;
3099
+ this.frameAnimationId = null;
3100
+ // Stale frame detection
3101
+ this.lastNewFrameTime = 0;
3102
+ this.lastKnownLamFrame = null;
3103
+ this.staleWarningEmitted = false;
3104
+ // Diagnostic counter
3105
+ this.frameLoopCount = 0;
3106
+ this.neutralTransitionFrame = null;
3107
+ this.neutralTransitionStart = 0;
3108
+ this.neutralAnimationId = null;
3109
+ // Current frame refs
3110
+ this._currentFrame = null;
3111
+ this._currentRawFrame = null;
3112
+ this.sampleRate = config.sampleRate ?? 16e3;
3113
+ this.profile = config.profile ?? {};
3114
+ this.staleThresholdMs = config.staleThresholdMs ?? 2e3;
3115
+ this.neutralTransitionEnabled = config.neutralTransitionEnabled ?? false;
3116
+ this.neutralTransitionMs = config.neutralTransitionMs ?? 250;
3117
+ const isCpuModel = config.lam.modelId === "wav2arkit_cpu";
3118
+ const chunkSize = config.chunkSize ?? config.lam.chunkSize ?? 16e3;
3119
+ const chunkAccumulationMs = chunkSize / this.sampleRate * 1e3;
3120
+ const inferenceEstimateMs = isCpuModel ? 300 : config.lam.backend === "wasm" ? 250 : 80;
3121
+ const marginMs = 100;
3122
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3123
+ const audioDelayMs = config.audioDelayMs ?? autoDelay;
3124
+ logger5.info("PlaybackPipeline config", {
3125
+ chunkSize,
3126
+ audioDelayMs,
3127
+ autoDelay,
3128
+ backend: config.lam.backend,
3129
+ modelId: config.lam.modelId,
3130
+ neutralTransitionEnabled: this.neutralTransitionEnabled
3131
+ });
3132
+ this.scheduler = new AudioScheduler({
3133
+ sampleRate: this.sampleRate,
3134
+ initialLookaheadSec: audioDelayMs / 1e3
3135
+ });
3136
+ this.coalescer = new AudioChunkCoalescer({
3137
+ sampleRate: this.sampleRate,
3138
+ targetDurationMs: config.chunkTargetMs ?? 200
3139
+ });
3140
+ this.processor = new A2EProcessor({
3141
+ backend: config.lam,
3142
+ sampleRate: this.sampleRate,
3143
+ chunkSize,
3144
+ identityIndex: config.identityIndex,
3145
+ onError: (error) => {
3146
+ logger5.error("A2E inference error", { message: error.message, stack: error.stack });
3147
+ this.emit("error", error);
3148
+ }
3149
+ });
3150
+ }
3151
+ /** Current pipeline state */
3152
+ get state() {
3153
+ return this._state;
3154
+ }
3155
+ /** Current scaled blendshapes (updated in-place for perf) */
3156
+ get currentFrame() {
3157
+ return this._currentFrame;
3158
+ }
3159
+ /** Raw A2E blendshapes (before profile scaling) */
3160
+ get currentRawFrame() {
3161
+ return this._currentRawFrame;
3162
+ }
3163
+ // ---------------------------------------------------------------------------
3164
+ // Lifecycle
3165
+ // ---------------------------------------------------------------------------
3166
+ /** Initialize AudioContext (lazy, call after user gesture) */
3167
+ async initialize() {
3168
+ await this.scheduler.initialize();
3169
+ }
3170
+ /** Update ExpressionProfile at runtime */
3171
+ setProfile(profile) {
3172
+ this.profile = profile;
3173
+ }
3174
+ // ---------------------------------------------------------------------------
3175
+ // Async mode (streaming TTS)
3176
+ // ---------------------------------------------------------------------------
3177
+ /**
3178
+ * Start a new playback session.
3179
+ * Idempotent — calling during playback resets cleanly without emitting
3180
+ * spurious playback:complete.
3181
+ */
3182
+ start() {
3183
+ this.stopInternal(false);
3184
+ this.scheduler.reset();
3185
+ this.coalescer.reset();
3186
+ this.processor.reset();
3187
+ this.playbackStarted = false;
3188
+ this.lastNewFrameTime = 0;
3189
+ this.lastKnownLamFrame = null;
3190
+ this.staleWarningEmitted = false;
3191
+ this.frameLoopCount = 0;
3192
+ this._currentFrame = null;
3193
+ this._currentRawFrame = null;
3194
+ this.cancelNeutralTransition();
3195
+ this.scheduler.warmup();
3196
+ this.startFrameLoop();
3197
+ this.startMonitoring();
3198
+ this.setState("playing");
3199
+ }
3200
+ /** Feed a streaming audio chunk (PCM16 Uint8Array) */
3201
+ async onAudioChunk(chunk) {
3202
+ const combined = this.coalescer.add(chunk);
3203
+ if (!combined) return;
3204
+ const float32 = pcm16ToFloat32(combined);
3205
+ const scheduleTime = await this.scheduler.schedule(float32);
3206
+ if (!this.playbackStarted) {
3207
+ this.playbackStarted = true;
3208
+ this.emit("playback:start", { time: scheduleTime });
3209
+ this.emit("playback_start", scheduleTime);
3210
+ }
3211
+ this.processor.pushAudio(float32, scheduleTime);
3212
+ }
3213
+ /** Signal end of audio stream (flushes remaining audio) */
3214
+ async end() {
3215
+ const remaining = this.coalescer.flush();
3216
+ if (remaining) {
3217
+ const chunk = new Uint8Array(remaining);
3218
+ await this.onAudioChunk(chunk);
3219
+ }
3220
+ await this.processor.flush();
3221
+ }
3222
+ // ---------------------------------------------------------------------------
3223
+ // Sync mode (full buffer)
3224
+ // ---------------------------------------------------------------------------
3225
+ /**
3226
+ * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
3227
+ * for playback, runs A2E inference, then waits for completion.
3228
+ */
3229
+ async feedBuffer(audio) {
3230
+ const float32 = audio instanceof Float32Array ? audio : pcm16ToFloat32(audio);
3231
+ this.start();
3232
+ const chunkSamples = Math.floor(this.sampleRate * 0.2);
3233
+ for (let i = 0; i < float32.length; i += chunkSamples) {
3234
+ const chunk = float32.subarray(i, Math.min(i + chunkSamples, float32.length));
3235
+ const scheduleTime = await this.scheduler.schedule(chunk);
3236
+ this.processor.pushAudio(chunk, scheduleTime);
3237
+ if (!this.playbackStarted) {
3238
+ this.playbackStarted = true;
3239
+ this.emit("playback:start", { time: scheduleTime });
3240
+ this.emit("playback_start", scheduleTime);
3241
+ }
3242
+ }
3243
+ await this.processor.flush();
3244
+ return new Promise((resolve) => {
3245
+ const unsub = this.on("playback:complete", () => {
3246
+ unsub();
3247
+ resolve();
3248
+ });
3249
+ });
3250
+ }
3251
+ // ---------------------------------------------------------------------------
3252
+ // Control
3253
+ // ---------------------------------------------------------------------------
3254
+ /** Stop playback immediately with fade-out */
3255
+ async stop(fadeOutMs = 50) {
3256
+ this.setState("stopping");
3257
+ this.stopInternal(true);
3258
+ await this.scheduler.cancelAll(fadeOutMs);
3259
+ this.coalescer.reset();
3260
+ this.processor.reset();
3261
+ this.playbackStarted = false;
3262
+ this._currentFrame = null;
3263
+ this._currentRawFrame = null;
3264
+ this.emit("playback:stop", void 0);
3265
+ this.setState("idle");
3266
+ }
3267
+ /** Cleanup all resources */
3268
+ dispose() {
3269
+ this.stopInternal(true);
3270
+ this.cancelNeutralTransition();
3271
+ this.scheduler.dispose();
3272
+ this.coalescer.reset();
3273
+ this.processor.dispose();
3274
+ this._state = "idle";
3275
+ }
3276
+ /** Get pipeline debug state */
3277
+ getDebugState() {
3278
+ return {
3279
+ state: this._state,
3280
+ playbackStarted: this.playbackStarted,
3281
+ coalescerFill: this.coalescer.fillLevel,
3282
+ processorFill: this.processor.fillLevel,
3283
+ queuedFrames: this.processor.queuedFrameCount,
3284
+ currentTime: this.scheduler.getCurrentTime(),
3285
+ playbackEndTime: this.scheduler.getPlaybackEndTime()
3286
+ };
3287
+ }
3288
+ // ---------------------------------------------------------------------------
3289
+ // Internal: Frame loop
3290
+ // ---------------------------------------------------------------------------
3291
+ startFrameLoop() {
3292
+ const updateFrame = () => {
3293
+ this.frameLoopCount++;
3294
+ const currentTime = this.scheduler.getCurrentTime();
3295
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3296
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3297
+ this.lastNewFrameTime = performance.now();
3298
+ this.lastKnownLamFrame = lamFrame;
3299
+ this.staleWarningEmitted = false;
3300
+ }
3301
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3302
+ if (!this.staleWarningEmitted) {
3303
+ this.staleWarningEmitted = true;
3304
+ logger5.warn("A2E stalled \u2014 no new inference frames", {
3305
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3306
+ queuedFrames: this.processor.queuedFrameCount
3307
+ });
3308
+ }
3309
+ }
3310
+ if (lamFrame) {
3311
+ const scaled = applyProfile(lamFrame, this.profile);
3312
+ this._currentFrame = scaled;
3313
+ this._currentRawFrame = lamFrame;
3314
+ const fullFrame = {
3315
+ blendshapes: scaled,
3316
+ rawBlendshapes: lamFrame,
3317
+ timestamp: currentTime
3318
+ };
3319
+ this.emit("frame", fullFrame);
3320
+ this.emit("frame:raw", lamFrame);
3321
+ this.emit("full_frame_ready", fullFrame);
3322
+ this.emit("lam_frame_ready", lamFrame);
3323
+ }
3324
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3325
+ };
3326
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3327
+ }
3328
+ // ---------------------------------------------------------------------------
3329
+ // Internal: Playback monitoring
3330
+ // ---------------------------------------------------------------------------
3331
+ startMonitoring() {
3332
+ if (this.monitorInterval) {
3333
+ clearInterval(this.monitorInterval);
3334
+ }
3335
+ this.monitorInterval = setInterval(() => {
3336
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3337
+ this.onPlaybackComplete();
3338
+ }
3339
+ }, 100);
3340
+ }
3341
+ onPlaybackComplete() {
3342
+ this.stopInternal(false);
3343
+ this.playbackStarted = false;
3344
+ this.emit("playback:complete", void 0);
3345
+ this.emit("playback_complete", void 0);
3346
+ if (this.neutralTransitionEnabled && this._currentFrame) {
3347
+ this.startNeutralTransition(this._currentFrame);
3348
+ } else {
3349
+ this.setState("idle");
3350
+ }
3351
+ }
3352
+ // ---------------------------------------------------------------------------
3353
+ // Internal: Neutral transition (opt-in)
3354
+ // ---------------------------------------------------------------------------
3355
+ startNeutralTransition(fromFrame) {
3356
+ this.neutralTransitionFrame = new Float32Array(fromFrame);
3357
+ this.neutralTransitionStart = performance.now();
3358
+ const animate = () => {
3359
+ const elapsed = performance.now() - this.neutralTransitionStart;
3360
+ const t = Math.min(1, elapsed / this.neutralTransitionMs);
3361
+ const eased = 1 - Math.pow(1 - t, 3);
3362
+ const blendshapes = new Float32Array(52);
3363
+ for (let i = 0; i < 52; i++) {
3364
+ blendshapes[i] = this.neutralTransitionFrame[i] * (1 - eased);
3365
+ }
3366
+ this._currentFrame = blendshapes;
3367
+ const frame = {
3368
+ blendshapes,
3369
+ rawBlendshapes: blendshapes,
3370
+ // raw = scaled during transition
3371
+ timestamp: performance.now() / 1e3
3372
+ };
3373
+ this.emit("frame", frame);
3374
+ this.emit("full_frame_ready", frame);
3375
+ if (t >= 1) {
3376
+ this.neutralTransitionFrame = null;
3377
+ this._currentFrame = null;
3378
+ this._currentRawFrame = null;
3379
+ this.setState("idle");
3380
+ return;
3381
+ }
3382
+ this.neutralAnimationId = requestAnimationFrame(animate);
3383
+ };
3384
+ this.neutralAnimationId = requestAnimationFrame(animate);
3385
+ }
3386
+ cancelNeutralTransition() {
3387
+ if (this.neutralAnimationId) {
3388
+ cancelAnimationFrame(this.neutralAnimationId);
3389
+ this.neutralAnimationId = null;
3390
+ }
3391
+ this.neutralTransitionFrame = null;
3392
+ }
3393
+ // ---------------------------------------------------------------------------
3394
+ // Internal: Helpers
3395
+ // ---------------------------------------------------------------------------
3396
+ stopInternal(emitEvents) {
3397
+ if (this.monitorInterval) {
3398
+ clearInterval(this.monitorInterval);
3399
+ this.monitorInterval = null;
3400
+ }
3401
+ if (this.frameAnimationId) {
3402
+ cancelAnimationFrame(this.frameAnimationId);
3403
+ this.frameAnimationId = null;
3404
+ }
3405
+ }
3406
+ setState(state) {
3407
+ if (this._state === state) return;
3408
+ this._state = state;
3409
+ this.emit("state", state);
3410
+ }
3411
+ };
3412
+
3116
3413
  // src/audio/InterruptionHandler.ts
3117
3414
  var InterruptionHandler = class extends EventEmitter {
3118
3415
  constructor(config = {}) {
@@ -3500,7 +3797,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
3500
3797
  }
3501
3798
 
3502
3799
  // src/inference/SenseVoiceInference.ts
3503
- var logger5 = createLogger("SenseVoice");
3800
+ var logger6 = createLogger("SenseVoice");
3504
3801
  var _SenseVoiceInference = class _SenseVoiceInference {
3505
3802
  constructor(config) {
3506
3803
  this.session = null;
@@ -3553,26 +3850,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3553
3850
  "model.backend_requested": this.config.backend
3554
3851
  });
3555
3852
  try {
3556
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
3853
+ logger6.info("Loading ONNX Runtime...", { preference: this.config.backend });
3557
3854
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3558
3855
  this.ort = ort;
3559
3856
  this._backend = backend;
3560
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
3561
- logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3857
+ logger6.info("ONNX Runtime loaded", { backend: this._backend });
3858
+ logger6.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3562
3859
  const tokensResponse = await fetch(this.config.tokensUrl);
3563
3860
  if (!tokensResponse.ok) {
3564
3861
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
3565
3862
  }
3566
3863
  const tokensText = await tokensResponse.text();
3567
3864
  this.tokenMap = parseTokensFile(tokensText);
3568
- logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3865
+ logger6.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3569
3866
  const sessionOptions = getSessionOptions(this._backend);
3570
3867
  if (this._backend === "webgpu") {
3571
3868
  sessionOptions.graphOptimizationLevel = "basic";
3572
3869
  }
3573
3870
  let isCached = false;
3574
3871
  if (isIOS()) {
3575
- logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
3872
+ logger6.info("iOS: passing model URL directly to ORT (low-memory path)", {
3576
3873
  modelUrl: this.config.modelUrl
3577
3874
  });
3578
3875
  this.session = await withTimeout(
@@ -3585,14 +3882,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3585
3882
  isCached = await cache.has(this.config.modelUrl);
3586
3883
  let modelBuffer;
3587
3884
  if (isCached) {
3588
- logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3885
+ logger6.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3589
3886
  modelBuffer = await cache.get(this.config.modelUrl);
3590
3887
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
3591
3888
  } else {
3592
- logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3889
+ logger6.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3593
3890
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
3594
3891
  }
3595
- logger5.debug("Creating ONNX session", {
3892
+ logger6.debug("Creating ONNX session", {
3596
3893
  size: formatBytes(modelBuffer.byteLength),
3597
3894
  backend: this._backend
3598
3895
  });
@@ -3605,15 +3902,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3605
3902
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
3606
3903
  this.negMean = cmvn.negMean;
3607
3904
  this.invStddev = cmvn.invStddev;
3608
- logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3905
+ logger6.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3609
3906
  } else {
3610
- logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3907
+ logger6.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3611
3908
  }
3612
3909
  } catch (cmvnErr) {
3613
- logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3910
+ logger6.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3614
3911
  }
3615
3912
  const loadTimeMs = performance.now() - startTime;
3616
- logger5.info("SenseVoice model loaded", {
3913
+ logger6.info("SenseVoice model loaded", {
3617
3914
  backend: this._backend,
3618
3915
  loadTimeMs: Math.round(loadTimeMs),
3619
3916
  vocabSize: this.tokenMap.size,
@@ -3724,7 +4021,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3724
4021
  const vocabSize = logitsDims[2];
3725
4022
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
3726
4023
  const inferenceTimeMs = performance.now() - startTime;
3727
- logger5.trace("Transcription complete", {
4024
+ logger6.trace("Transcription complete", {
3728
4025
  text: decoded.text.substring(0, 50),
3729
4026
  language: decoded.language,
3730
4027
  emotion: decoded.emotion,
@@ -3762,7 +4059,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3762
4059
  const errMsg = err instanceof Error ? err.message : String(err);
3763
4060
  if (errMsg.includes("timed out")) {
3764
4061
  this.poisoned = true;
3765
- logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4062
+ logger6.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
3766
4063
  backend: this._backend,
3767
4064
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
3768
4065
  });
@@ -3770,7 +4067,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3770
4067
  const oomError = new Error(
3771
4068
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
3772
4069
  );
3773
- logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4070
+ logger6.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
3774
4071
  pointer: `0x${err.toString(16)}`,
3775
4072
  backend: this._backend
3776
4073
  });
@@ -3783,7 +4080,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3783
4080
  reject(oomError);
3784
4081
  return;
3785
4082
  } else {
3786
- logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4083
+ logger6.error("Inference failed", { error: errMsg, backend: this._backend });
3787
4084
  }
3788
4085
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
3789
4086
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -3812,7 +4109,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
3812
4109
  var SenseVoiceInference = _SenseVoiceInference;
3813
4110
 
3814
4111
  // src/inference/SenseVoiceWorker.ts
3815
- var logger6 = createLogger("SenseVoiceWorker");
4112
+ var logger7 = createLogger("SenseVoiceWorker");
3816
4113
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
3817
4114
  var LOAD_TIMEOUT_MS = 3e5;
3818
4115
  var INFERENCE_TIMEOUT_MS = 1e4;
@@ -4551,7 +4848,7 @@ var SenseVoiceWorker = class {
4551
4848
  this.handleWorkerMessage(event.data);
4552
4849
  };
4553
4850
  worker.onerror = (error) => {
4554
- logger6.error("Worker error", { error: error.message });
4851
+ logger7.error("Worker error", { error: error.message });
4555
4852
  for (const [, resolver] of this.pendingResolvers) {
4556
4853
  resolver.reject(new Error(`Worker error: ${error.message}`));
4557
4854
  }
@@ -4631,9 +4928,9 @@ var SenseVoiceWorker = class {
4631
4928
  "model.language": this.config.language
4632
4929
  });
4633
4930
  try {
4634
- logger6.info("Creating SenseVoice worker...");
4931
+ logger7.info("Creating SenseVoice worker...");
4635
4932
  this.worker = this.createWorker();
4636
- logger6.info("Loading model in worker...", {
4933
+ logger7.info("Loading model in worker...", {
4637
4934
  modelUrl: this.config.modelUrl,
4638
4935
  tokensUrl: this.config.tokensUrl,
4639
4936
  language: this.config.language,
@@ -4655,7 +4952,7 @@ var SenseVoiceWorker = class {
4655
4952
  this._isLoaded = true;
4656
4953
  const loadTimeMs = performance.now() - startTime;
4657
4954
  onProgress?.(1, 1);
4658
- logger6.info("SenseVoice worker loaded successfully", {
4955
+ logger7.info("SenseVoice worker loaded successfully", {
4659
4956
  backend: "wasm",
4660
4957
  loadTimeMs: Math.round(loadTimeMs),
4661
4958
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -4734,7 +5031,7 @@ var SenseVoiceWorker = class {
4734
5031
  INFERENCE_TIMEOUT_MS
4735
5032
  );
4736
5033
  const totalTimeMs = performance.now() - startTime;
4737
- logger6.trace("Worker transcription complete", {
5034
+ logger7.trace("Worker transcription complete", {
4738
5035
  text: result.text.substring(0, 50),
4739
5036
  language: result.language,
4740
5037
  emotion: result.emotion,
@@ -4770,11 +5067,11 @@ var SenseVoiceWorker = class {
4770
5067
  } catch (err) {
4771
5068
  const errMsg = err instanceof Error ? err.message : String(err);
4772
5069
  if (errMsg.includes("timed out")) {
4773
- logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5070
+ logger7.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
4774
5071
  timeoutMs: INFERENCE_TIMEOUT_MS
4775
5072
  });
4776
5073
  } else {
4777
- logger6.error("Worker inference failed", { error: errMsg });
5074
+ logger7.error("Worker inference failed", { error: errMsg });
4778
5075
  }
4779
5076
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4780
5077
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4811,8 +5108,53 @@ var SenseVoiceWorker = class {
4811
5108
  }
4812
5109
  };
4813
5110
 
5111
+ // src/inference/defaultModelUrls.ts
5112
+ var HF = "https://huggingface.co";
5113
+ var HF_MODEL_URLS = {
5114
+ /** LAM A2E model — fp16 external data (385KB graph + 192MB weights, WebGPU) — 52 ARKit blendshapes */
5115
+ lam: `${HF}/omote-ai/lam-a2e/resolve/main/model_fp16.onnx`,
5116
+ /** wav2arkit_cpu A2E model graph (1.86MB, WASM) — Safari/iOS fallback */
5117
+ wav2arkitCpu: `${HF}/myned-ai/wav2arkit_cpu/resolve/main/wav2arkit_cpu.onnx`,
5118
+ /** SenseVoice ASR model (228MB int8, WASM) — speech recognition + emotion + language */
5119
+ senseVoice: `${HF}/omote-ai/sensevoice-asr/resolve/main/model.int8.onnx`,
5120
+ /** Silero VAD model (~2MB, WASM) — voice activity detection */
5121
+ sileroVad: `${HF}/deepghs/silero-vad-onnx/resolve/main/silero_vad.onnx`
5122
+ };
5123
+ var _overrides = {};
5124
+ var DEFAULT_MODEL_URLS = new Proxy(
5125
+ {},
5126
+ {
5127
+ get(_target, prop) {
5128
+ const key = prop;
5129
+ return _overrides[key] ?? HF_MODEL_URLS[key];
5130
+ },
5131
+ ownKeys() {
5132
+ return Object.keys(HF_MODEL_URLS);
5133
+ },
5134
+ getOwnPropertyDescriptor(_target, prop) {
5135
+ if (prop in HF_MODEL_URLS) {
5136
+ return { configurable: true, enumerable: true, value: this.get(_target, prop, _target) };
5137
+ }
5138
+ return void 0;
5139
+ }
5140
+ }
5141
+ );
5142
+ function configureModelUrls(urls) {
5143
+ for (const [key, url] of Object.entries(urls)) {
5144
+ if (key in HF_MODEL_URLS && typeof url === "string") {
5145
+ _overrides[key] = url;
5146
+ }
5147
+ }
5148
+ }
5149
+ function resetModelUrls() {
5150
+ for (const key of Object.keys(_overrides)) {
5151
+ delete _overrides[key];
5152
+ }
5153
+ }
5154
+ var HF_CDN_URLS = HF_MODEL_URLS;
5155
+
4814
5156
  // src/inference/UnifiedInferenceWorker.ts
4815
- var logger7 = createLogger("UnifiedInferenceWorker");
5157
+ var logger8 = createLogger("UnifiedInferenceWorker");
4816
5158
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4817
5159
  var INIT_TIMEOUT_MS = 6e4;
4818
5160
  var SV_LOAD_TIMEOUT_MS = 3e5;
@@ -5514,7 +5856,7 @@ var UnifiedInferenceWorker = class {
5514
5856
  const telemetry = getTelemetry();
5515
5857
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
5516
5858
  try {
5517
- logger7.info("Creating unified inference worker...");
5859
+ logger8.info("Creating unified inference worker...");
5518
5860
  this.worker = this.createWorker();
5519
5861
  await this.sendMessage(
5520
5862
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -5523,7 +5865,7 @@ var UnifiedInferenceWorker = class {
5523
5865
  );
5524
5866
  this.initialized = true;
5525
5867
  const loadTimeMs = performance.now() - startTime;
5526
- logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5868
+ logger8.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5527
5869
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
5528
5870
  span?.end();
5529
5871
  } catch (error) {
@@ -5697,7 +6039,7 @@ var UnifiedInferenceWorker = class {
5697
6039
  this.handleWorkerMessage(event.data);
5698
6040
  };
5699
6041
  worker.onerror = (error) => {
5700
- logger7.error("Unified worker error", { error: error.message });
6042
+ logger8.error("Unified worker error", { error: error.message });
5701
6043
  this.rejectAllPending(`Worker error: ${error.message}`);
5702
6044
  };
5703
6045
  return worker;
@@ -5711,7 +6053,7 @@ var UnifiedInferenceWorker = class {
5711
6053
  this.pendingRequests.delete(requestId);
5712
6054
  pending.reject(new Error(data.error));
5713
6055
  } else {
5714
- logger7.error("Worker broadcast error", { error: data.error });
6056
+ logger8.error("Worker broadcast error", { error: data.error });
5715
6057
  this.rejectAllPending(data.error);
5716
6058
  }
5717
6059
  return;
@@ -5733,7 +6075,7 @@ var UnifiedInferenceWorker = class {
5733
6075
  const timeout = setTimeout(() => {
5734
6076
  this.pendingRequests.delete(requestId);
5735
6077
  this.poisoned = true;
5736
- logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6078
+ logger8.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
5737
6079
  type: message.type,
5738
6080
  timeoutMs
5739
6081
  });
@@ -5799,7 +6141,7 @@ var SenseVoiceUnifiedAdapter = class {
5799
6141
  });
5800
6142
  this._isLoaded = true;
5801
6143
  onProgress?.(1, 1);
5802
- logger7.info("SenseVoice loaded via unified worker", {
6144
+ logger8.info("SenseVoice loaded via unified worker", {
5803
6145
  backend: "wasm",
5804
6146
  loadTimeMs: Math.round(result.loadTimeMs),
5805
6147
  vocabSize: result.vocabSize
@@ -5864,7 +6206,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
5864
6206
  externalDataUrl: externalDataUrl || null
5865
6207
  });
5866
6208
  this._isLoaded = true;
5867
- logger7.info("Wav2ArkitCpu loaded via unified worker", {
6209
+ logger8.info("Wav2ArkitCpu loaded via unified worker", {
5868
6210
  backend: "wasm",
5869
6211
  loadTimeMs: Math.round(result.loadTimeMs)
5870
6212
  });
@@ -5970,7 +6312,7 @@ var SileroVADUnifiedAdapter = class {
5970
6312
  sampleRate: this.config.sampleRate
5971
6313
  });
5972
6314
  this._isLoaded = true;
5973
- logger7.info("SileroVAD loaded via unified worker", {
6315
+ logger8.info("SileroVAD loaded via unified worker", {
5974
6316
  backend: "wasm",
5975
6317
  loadTimeMs: Math.round(result.loadTimeMs),
5976
6318
  sampleRate: this.config.sampleRate,
@@ -6051,12 +6393,13 @@ var SileroVADUnifiedAdapter = class {
6051
6393
  };
6052
6394
 
6053
6395
  // src/inference/createSenseVoice.ts
6054
- var logger8 = createLogger("createSenseVoice");
6055
- function createSenseVoice(config) {
6396
+ var logger9 = createLogger("createSenseVoice");
6397
+ function createSenseVoice(config = {}) {
6398
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.senseVoice;
6056
6399
  if (config.unifiedWorker) {
6057
- logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6400
+ logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6058
6401
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6059
- modelUrl: config.modelUrl,
6402
+ modelUrl,
6060
6403
  tokensUrl: config.tokensUrl,
6061
6404
  language: config.language,
6062
6405
  textNorm: config.textNorm
@@ -6067,37 +6410,37 @@ function createSenseVoice(config) {
6067
6410
  if (!SenseVoiceWorker.isSupported()) {
6068
6411
  throw new Error("Web Workers are not supported in this environment");
6069
6412
  }
6070
- logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6413
+ logger9.info("Creating SenseVoiceWorker (off-main-thread)");
6071
6414
  return new SenseVoiceWorker({
6072
- modelUrl: config.modelUrl,
6415
+ modelUrl,
6073
6416
  tokensUrl: config.tokensUrl,
6074
6417
  language: config.language,
6075
6418
  textNorm: config.textNorm
6076
6419
  });
6077
6420
  }
6078
6421
  if (useWorker === false) {
6079
- logger8.info("Creating SenseVoiceInference (main thread)");
6422
+ logger9.info("Creating SenseVoiceInference (main thread)");
6080
6423
  return new SenseVoiceInference({
6081
- modelUrl: config.modelUrl,
6424
+ modelUrl,
6082
6425
  tokensUrl: config.tokensUrl,
6083
6426
  language: config.language,
6084
6427
  textNorm: config.textNorm
6085
6428
  });
6086
6429
  }
6087
6430
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6088
- logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6431
+ logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6089
6432
  return new SenseVoiceWorker({
6090
- modelUrl: config.modelUrl,
6433
+ modelUrl,
6091
6434
  tokensUrl: config.tokensUrl,
6092
6435
  language: config.language,
6093
6436
  textNorm: config.textNorm
6094
6437
  });
6095
6438
  }
6096
- logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6439
+ logger9.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6097
6440
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6098
6441
  });
6099
6442
  return new SenseVoiceInference({
6100
- modelUrl: config.modelUrl,
6443
+ modelUrl,
6101
6444
  tokensUrl: config.tokensUrl,
6102
6445
  language: config.language,
6103
6446
  textNorm: config.textNorm
@@ -6105,7 +6448,7 @@ function createSenseVoice(config) {
6105
6448
  }
6106
6449
 
6107
6450
  // src/inference/Wav2ArkitCpuInference.ts
6108
- var logger9 = createLogger("Wav2ArkitCpu");
6451
+ var logger10 = createLogger("Wav2ArkitCpu");
6109
6452
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6110
6453
  constructor(config) {
6111
6454
  this.modelId = "wav2arkit_cpu";
@@ -6147,16 +6490,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6147
6490
  });
6148
6491
  try {
6149
6492
  const preference = this.config.backend || "wasm";
6150
- logger9.info("Loading ONNX Runtime...", { preference });
6493
+ logger10.info("Loading ONNX Runtime...", { preference });
6151
6494
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6152
6495
  this.ort = ort;
6153
6496
  this._backend = backend;
6154
- logger9.info("ONNX Runtime loaded", { backend: this._backend });
6497
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
6155
6498
  const modelUrl = this.config.modelUrl;
6156
6499
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6157
6500
  const sessionOptions = getSessionOptions(this._backend);
6158
6501
  if (isIOS()) {
6159
- logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6502
+ logger10.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6160
6503
  modelUrl,
6161
6504
  dataUrl
6162
6505
  });
@@ -6178,15 +6521,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6178
6521
  const isCached = await cache.has(modelUrl);
6179
6522
  let modelBuffer;
6180
6523
  if (isCached) {
6181
- logger9.debug("Loading model from cache", { modelUrl });
6524
+ logger10.debug("Loading model from cache", { modelUrl });
6182
6525
  modelBuffer = await cache.get(modelUrl);
6183
6526
  if (!modelBuffer) {
6184
- logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6527
+ logger10.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6185
6528
  await cache.delete(modelUrl);
6186
6529
  modelBuffer = await fetchWithCache(modelUrl);
6187
6530
  }
6188
6531
  } else {
6189
- logger9.debug("Fetching and caching model graph", { modelUrl });
6532
+ logger10.debug("Fetching and caching model graph", { modelUrl });
6190
6533
  modelBuffer = await fetchWithCache(modelUrl);
6191
6534
  }
6192
6535
  if (!modelBuffer) {
@@ -6197,31 +6540,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6197
6540
  try {
6198
6541
  const isDataCached = await cache.has(dataUrl);
6199
6542
  if (isDataCached) {
6200
- logger9.debug("Loading external data from cache", { dataUrl });
6543
+ logger10.debug("Loading external data from cache", { dataUrl });
6201
6544
  externalDataBuffer = await cache.get(dataUrl);
6202
6545
  if (!externalDataBuffer) {
6203
- logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6546
+ logger10.warn("Cache corruption for external data, retrying", { dataUrl });
6204
6547
  await cache.delete(dataUrl);
6205
6548
  externalDataBuffer = await fetchWithCache(dataUrl);
6206
6549
  }
6207
6550
  } else {
6208
- logger9.info("Fetching external model data", {
6551
+ logger10.info("Fetching external model data", {
6209
6552
  dataUrl,
6210
6553
  note: "This may be a large download (400MB+)"
6211
6554
  });
6212
6555
  externalDataBuffer = await fetchWithCache(dataUrl);
6213
6556
  }
6214
- logger9.info("External data loaded", {
6557
+ logger10.info("External data loaded", {
6215
6558
  size: formatBytes(externalDataBuffer.byteLength)
6216
6559
  });
6217
6560
  } catch (err) {
6218
- logger9.debug("No external data file found (single-file model)", {
6561
+ logger10.debug("No external data file found (single-file model)", {
6219
6562
  dataUrl,
6220
6563
  error: err.message
6221
6564
  });
6222
6565
  }
6223
6566
  }
6224
- logger9.debug("Creating ONNX session", {
6567
+ logger10.debug("Creating ONNX session", {
6225
6568
  graphSize: formatBytes(modelBuffer.byteLength),
6226
6569
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
6227
6570
  backend: this._backend
@@ -6237,7 +6580,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6237
6580
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
6238
6581
  }
6239
6582
  const loadTimeMs = performance.now() - startTime;
6240
- logger9.info("Model loaded successfully", {
6583
+ logger10.info("Model loaded successfully", {
6241
6584
  backend: this._backend,
6242
6585
  loadTimeMs: Math.round(loadTimeMs),
6243
6586
  inputs: this.session.inputNames,
@@ -6253,12 +6596,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6253
6596
  model: "wav2arkit_cpu",
6254
6597
  backend: this._backend
6255
6598
  });
6256
- logger9.debug("Running warmup inference");
6599
+ logger10.debug("Running warmup inference");
6257
6600
  const warmupStart = performance.now();
6258
6601
  const silentAudio = new Float32Array(16e3);
6259
6602
  await this.infer(silentAudio);
6260
6603
  const warmupTimeMs = performance.now() - warmupStart;
6261
- logger9.info("Warmup inference complete", {
6604
+ logger10.info("Warmup inference complete", {
6262
6605
  warmupTimeMs: Math.round(warmupTimeMs),
6263
6606
  backend: this._backend
6264
6607
  });
@@ -6345,7 +6688,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6345
6688
  const symmetrized = symmetrizeBlendshapes(rawFrame);
6346
6689
  blendshapes.push(symmetrized);
6347
6690
  }
6348
- logger9.trace("Inference completed", {
6691
+ logger10.trace("Inference completed", {
6349
6692
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6350
6693
  numFrames,
6351
6694
  inputSamples
@@ -6373,7 +6716,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6373
6716
  const errMsg = err instanceof Error ? err.message : String(err);
6374
6717
  if (errMsg.includes("timed out")) {
6375
6718
  this.poisoned = true;
6376
- logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6719
+ logger10.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6377
6720
  backend: this._backend,
6378
6721
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
6379
6722
  });
@@ -6381,7 +6724,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6381
6724
  const oomError = new Error(
6382
6725
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
6383
6726
  );
6384
- logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6727
+ logger10.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6385
6728
  pointer: `0x${err.toString(16)}`,
6386
6729
  backend: this._backend
6387
6730
  });
@@ -6394,7 +6737,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6394
6737
  reject(oomError);
6395
6738
  return;
6396
6739
  } else {
6397
- logger9.error("Inference failed", { error: errMsg, backend: this._backend });
6740
+ logger10.error("Inference failed", { error: errMsg, backend: this._backend });
6398
6741
  }
6399
6742
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6400
6743
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6421,7 +6764,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
6421
6764
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
6422
6765
 
6423
6766
  // src/inference/Wav2ArkitCpuWorker.ts
6424
- var logger10 = createLogger("Wav2ArkitCpuWorker");
6767
+ var logger11 = createLogger("Wav2ArkitCpuWorker");
6425
6768
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
6426
6769
  var LOAD_TIMEOUT_MS2 = 42e4;
6427
6770
  var INFERENCE_TIMEOUT_MS2 = 5e3;
@@ -6708,7 +7051,7 @@ var Wav2ArkitCpuWorker = class {
6708
7051
  this.handleWorkerMessage(event.data);
6709
7052
  };
6710
7053
  worker.onerror = (error) => {
6711
- logger10.error("Worker error", { error: error.message });
7054
+ logger11.error("Worker error", { error: error.message });
6712
7055
  for (const [, resolver] of this.pendingResolvers) {
6713
7056
  resolver.reject(new Error(`Worker error: ${error.message}`));
6714
7057
  }
@@ -6784,10 +7127,10 @@ var Wav2ArkitCpuWorker = class {
6784
7127
  "model.backend_requested": "wasm"
6785
7128
  });
6786
7129
  try {
6787
- logger10.info("Creating wav2arkit_cpu worker...");
7130
+ logger11.info("Creating wav2arkit_cpu worker...");
6788
7131
  this.worker = this.createWorker();
6789
7132
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
6790
- logger10.info("Loading model in worker...", {
7133
+ logger11.info("Loading model in worker...", {
6791
7134
  modelUrl: this.config.modelUrl,
6792
7135
  externalDataUrl,
6793
7136
  isIOS: isIOS()
@@ -6805,7 +7148,7 @@ var Wav2ArkitCpuWorker = class {
6805
7148
  );
6806
7149
  this._isLoaded = true;
6807
7150
  const loadTimeMs = performance.now() - startTime;
6808
- logger10.info("Wav2ArkitCpu worker loaded successfully", {
7151
+ logger11.info("Wav2ArkitCpu worker loaded successfully", {
6809
7152
  backend: "wasm",
6810
7153
  loadTimeMs: Math.round(loadTimeMs),
6811
7154
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -6890,7 +7233,7 @@ var Wav2ArkitCpuWorker = class {
6890
7233
  for (let f = 0; f < numFrames; f++) {
6891
7234
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
6892
7235
  }
6893
- logger10.trace("Worker inference completed", {
7236
+ logger11.trace("Worker inference completed", {
6894
7237
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6895
7238
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
6896
7239
  numFrames,
@@ -6920,12 +7263,12 @@ var Wav2ArkitCpuWorker = class {
6920
7263
  const errMsg = err instanceof Error ? err.message : String(err);
6921
7264
  if (errMsg.includes("timed out")) {
6922
7265
  this.poisoned = true;
6923
- logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7266
+ logger11.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
6924
7267
  backend: "wasm",
6925
7268
  timeoutMs: INFERENCE_TIMEOUT_MS2
6926
7269
  });
6927
7270
  } else {
6928
- logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7271
+ logger11.error("Worker inference failed", { error: errMsg, backend: "wasm" });
6929
7272
  }
6930
7273
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6931
7274
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6963,53 +7306,56 @@ var Wav2ArkitCpuWorker = class {
6963
7306
  };
6964
7307
 
6965
7308
  // src/inference/createA2E.ts
6966
- var logger11 = createLogger("createA2E");
6967
- function createA2E(config) {
7309
+ var logger12 = createLogger("createA2E");
7310
+ function createA2E(config = {}) {
6968
7311
  const mode = config.mode ?? "auto";
6969
7312
  const fallbackOnError = config.fallbackOnError ?? true;
7313
+ const gpuModelUrl = config.gpuModelUrl ?? DEFAULT_MODEL_URLS.lam;
7314
+ const cpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
6970
7315
  let useCpu;
6971
7316
  if (mode === "cpu") {
6972
7317
  useCpu = true;
6973
- logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7318
+ logger12.info("Forcing CPU A2E model (wav2arkit_cpu)");
6974
7319
  } else if (mode === "gpu") {
6975
7320
  useCpu = false;
6976
- logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7321
+ logger12.info("Forcing GPU A2E model (Wav2Vec2)");
6977
7322
  } else {
6978
7323
  useCpu = shouldUseCpuA2E();
6979
- logger11.info("Auto-detected A2E model", {
7324
+ logger12.info("Auto-detected A2E model", {
6980
7325
  useCpu,
6981
7326
  isSafari: isSafari()
6982
7327
  });
6983
7328
  }
6984
7329
  if (useCpu) {
6985
7330
  if (config.unifiedWorker) {
6986
- logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7331
+ logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
6987
7332
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
6988
- modelUrl: config.cpuModelUrl
7333
+ modelUrl: cpuModelUrl
6989
7334
  });
6990
7335
  }
6991
7336
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
6992
- logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7337
+ logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
6993
7338
  return new Wav2ArkitCpuWorker({
6994
- modelUrl: config.cpuModelUrl
7339
+ modelUrl: cpuModelUrl
6995
7340
  });
6996
7341
  }
6997
- logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7342
+ logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
6998
7343
  return new Wav2ArkitCpuInference({
6999
- modelUrl: config.cpuModelUrl
7344
+ modelUrl: cpuModelUrl
7000
7345
  });
7001
7346
  }
7347
+ const gpuExternalDataUrl = config.gpuExternalDataUrl !== void 0 ? config.gpuExternalDataUrl : void 0;
7002
7348
  const gpuInstance = new Wav2Vec2Inference({
7003
- modelUrl: config.gpuModelUrl,
7004
- externalDataUrl: config.gpuExternalDataUrl,
7349
+ modelUrl: gpuModelUrl,
7350
+ externalDataUrl: gpuExternalDataUrl,
7005
7351
  backend: config.gpuBackend ?? "auto",
7006
7352
  numIdentityClasses: config.numIdentityClasses
7007
7353
  });
7008
7354
  if (fallbackOnError) {
7009
- logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7355
+ logger12.info("Creating Wav2Vec2Inference with CPU fallback");
7010
7356
  return new A2EWithFallback(gpuInstance, config);
7011
7357
  }
7012
- logger11.info("Creating Wav2Vec2Inference (no fallback)");
7358
+ logger12.info("Creating Wav2Vec2Inference (no fallback)");
7013
7359
  return gpuInstance;
7014
7360
  }
7015
7361
  var A2EWithFallback = class {
@@ -7017,6 +7363,7 @@ var A2EWithFallback = class {
7017
7363
  this.hasFallenBack = false;
7018
7364
  this.implementation = gpuInstance;
7019
7365
  this.config = config;
7366
+ this.resolvedCpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
7020
7367
  }
7021
7368
  get modelId() {
7022
7369
  return this.implementation.modelId;
@@ -7038,26 +7385,26 @@ var A2EWithFallback = class {
7038
7385
  }
7039
7386
  }
7040
7387
  async fallbackToCpu(reason) {
7041
- logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7388
+ logger12.warn("GPU model load failed, falling back to CPU model", { reason });
7042
7389
  try {
7043
7390
  await this.implementation.dispose();
7044
7391
  } catch {
7045
7392
  }
7046
7393
  if (this.config.unifiedWorker) {
7047
7394
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7048
- modelUrl: this.config.cpuModelUrl
7395
+ modelUrl: this.resolvedCpuModelUrl
7049
7396
  });
7050
- logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7397
+ logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7051
7398
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7052
7399
  this.implementation = new Wav2ArkitCpuWorker({
7053
- modelUrl: this.config.cpuModelUrl
7400
+ modelUrl: this.resolvedCpuModelUrl
7054
7401
  });
7055
- logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7402
+ logger12.info("Fallback to Wav2ArkitCpuWorker successful");
7056
7403
  } else {
7057
7404
  this.implementation = new Wav2ArkitCpuInference({
7058
- modelUrl: this.config.cpuModelUrl
7405
+ modelUrl: this.resolvedCpuModelUrl
7059
7406
  });
7060
- logger11.info("Fallback to Wav2ArkitCpuInference successful");
7407
+ logger12.info("Fallback to Wav2ArkitCpuInference successful");
7061
7408
  }
7062
7409
  this.hasFallenBack = true;
7063
7410
  return await this.implementation.load();
@@ -7261,7 +7608,7 @@ var EmphasisDetector = class {
7261
7608
  };
7262
7609
 
7263
7610
  // src/inference/SileroVADInference.ts
7264
- var logger12 = createLogger("SileroVAD");
7611
+ var logger13 = createLogger("SileroVAD");
7265
7612
  var SileroVADInference = class {
7266
7613
  constructor(config) {
7267
7614
  this.session = null;
@@ -7335,23 +7682,23 @@ var SileroVADInference = class {
7335
7682
  "model.sample_rate": this.config.sampleRate
7336
7683
  });
7337
7684
  try {
7338
- logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
7685
+ logger13.info("Loading ONNX Runtime...", { preference: this.config.backend });
7339
7686
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7340
7687
  this.ort = ort;
7341
7688
  this._backend = backend;
7342
- logger12.info("ONNX Runtime loaded", { backend: this._backend });
7689
+ logger13.info("ONNX Runtime loaded", { backend: this._backend });
7343
7690
  const cache = getModelCache();
7344
7691
  const modelUrl = this.config.modelUrl;
7345
7692
  const isCached = await cache.has(modelUrl);
7346
7693
  let modelBuffer;
7347
7694
  if (isCached) {
7348
- logger12.debug("Loading model from cache", { modelUrl });
7695
+ logger13.debug("Loading model from cache", { modelUrl });
7349
7696
  modelBuffer = await cache.get(modelUrl);
7350
7697
  } else {
7351
- logger12.debug("Fetching and caching model", { modelUrl });
7698
+ logger13.debug("Fetching and caching model", { modelUrl });
7352
7699
  modelBuffer = await fetchWithCache(modelUrl);
7353
7700
  }
7354
- logger12.debug("Creating ONNX session", {
7701
+ logger13.debug("Creating ONNX session", {
7355
7702
  size: formatBytes(modelBuffer.byteLength),
7356
7703
  backend: this._backend
7357
7704
  });
@@ -7360,7 +7707,7 @@ var SileroVADInference = class {
7360
7707
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7361
7708
  this.reset();
7362
7709
  const loadTimeMs = performance.now() - startTime;
7363
- logger12.info("Model loaded successfully", {
7710
+ logger13.info("Model loaded successfully", {
7364
7711
  backend: this._backend,
7365
7712
  loadTimeMs: Math.round(loadTimeMs),
7366
7713
  sampleRate: this.config.sampleRate,
@@ -7415,7 +7762,7 @@ var SileroVADInference = class {
7415
7762
  []
7416
7763
  );
7417
7764
  } catch (e) {
7418
- logger12.warn("BigInt64Array not available, using bigint array fallback", {
7765
+ logger13.warn("BigInt64Array not available, using bigint array fallback", {
7419
7766
  error: e instanceof Error ? e.message : String(e)
7420
7767
  });
7421
7768
  this.srTensor = new this.ort.Tensor(
@@ -7521,7 +7868,7 @@ var SileroVADInference = class {
7521
7868
  this.preSpeechBuffer.shift();
7522
7869
  }
7523
7870
  }
7524
- logger12.trace("Skipping VAD inference - audio too quiet", {
7871
+ logger13.trace("Skipping VAD inference - audio too quiet", {
7525
7872
  rms: Math.round(rms * 1e4) / 1e4,
7526
7873
  threshold: MIN_ENERGY_THRESHOLD
7527
7874
  });
@@ -7575,7 +7922,7 @@ var SileroVADInference = class {
7575
7922
  if (isSpeech && !this.wasSpeaking) {
7576
7923
  preSpeechChunks = [...this.preSpeechBuffer];
7577
7924
  this.preSpeechBuffer = [];
7578
- logger12.debug("Speech started with pre-speech buffer", {
7925
+ logger13.debug("Speech started with pre-speech buffer", {
7579
7926
  preSpeechChunks: preSpeechChunks.length,
7580
7927
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
7581
7928
  });
@@ -7588,7 +7935,7 @@ var SileroVADInference = class {
7588
7935
  this.preSpeechBuffer = [];
7589
7936
  }
7590
7937
  this.wasSpeaking = isSpeech;
7591
- logger12.trace("VAD inference completed", {
7938
+ logger13.trace("VAD inference completed", {
7592
7939
  probability: Math.round(probability * 1e3) / 1e3,
7593
7940
  isSpeech,
7594
7941
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -7619,7 +7966,7 @@ var SileroVADInference = class {
7619
7966
  const oomError = new Error(
7620
7967
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
7621
7968
  );
7622
- logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7969
+ logger13.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7623
7970
  pointer: `0x${err.toString(16)}`,
7624
7971
  backend: this._backend
7625
7972
  });
@@ -7662,7 +8009,7 @@ var SileroVADInference = class {
7662
8009
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
7663
8010
 
7664
8011
  // src/inference/SileroVADWorker.ts
7665
- var logger13 = createLogger("SileroVADWorker");
8012
+ var logger14 = createLogger("SileroVADWorker");
7666
8013
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
7667
8014
  var LOAD_TIMEOUT_MS3 = 12e4;
7668
8015
  var INFERENCE_TIMEOUT_MS3 = 1e3;
@@ -7947,7 +8294,7 @@ var SileroVADWorker = class {
7947
8294
  this.handleWorkerMessage(event.data);
7948
8295
  };
7949
8296
  worker.onerror = (error) => {
7950
- logger13.error("Worker error", { error: error.message });
8297
+ logger14.error("Worker error", { error: error.message });
7951
8298
  for (const [, resolver] of this.pendingResolvers) {
7952
8299
  resolver.reject(new Error(`Worker error: ${error.message}`));
7953
8300
  }
@@ -8023,9 +8370,9 @@ var SileroVADWorker = class {
8023
8370
  "model.sample_rate": this.config.sampleRate
8024
8371
  });
8025
8372
  try {
8026
- logger13.info("Creating VAD worker...");
8373
+ logger14.info("Creating VAD worker...");
8027
8374
  this.worker = this.createWorker();
8028
- logger13.info("Loading model in worker...", {
8375
+ logger14.info("Loading model in worker...", {
8029
8376
  modelUrl: this.config.modelUrl,
8030
8377
  sampleRate: this.config.sampleRate
8031
8378
  });
@@ -8041,7 +8388,7 @@ var SileroVADWorker = class {
8041
8388
  );
8042
8389
  this._isLoaded = true;
8043
8390
  const loadTimeMs = performance.now() - startTime;
8044
- logger13.info("VAD worker loaded successfully", {
8391
+ logger14.info("VAD worker loaded successfully", {
8045
8392
  backend: "wasm",
8046
8393
  loadTimeMs: Math.round(loadTimeMs),
8047
8394
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8148,7 +8495,7 @@ var SileroVADWorker = class {
8148
8495
  if (isSpeech && !this.wasSpeaking) {
8149
8496
  preSpeechChunks = [...this.preSpeechBuffer];
8150
8497
  this.preSpeechBuffer = [];
8151
- logger13.debug("Speech started with pre-speech buffer", {
8498
+ logger14.debug("Speech started with pre-speech buffer", {
8152
8499
  preSpeechChunks: preSpeechChunks.length,
8153
8500
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8154
8501
  });
@@ -8161,7 +8508,7 @@ var SileroVADWorker = class {
8161
8508
  this.preSpeechBuffer = [];
8162
8509
  }
8163
8510
  this.wasSpeaking = isSpeech;
8164
- logger13.trace("VAD worker inference completed", {
8511
+ logger14.trace("VAD worker inference completed", {
8165
8512
  probability: Math.round(result.probability * 1e3) / 1e3,
8166
8513
  isSpeech,
8167
8514
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8229,63 +8576,65 @@ var SileroVADWorker = class {
8229
8576
  };
8230
8577
 
8231
8578
  // src/inference/createSileroVAD.ts
8232
- var logger14 = createLogger("createSileroVAD");
8579
+ var logger15 = createLogger("createSileroVAD");
8233
8580
  function supportsVADWorker() {
8234
8581
  if (typeof Worker === "undefined") {
8235
- logger14.debug("Worker not supported: Worker constructor undefined");
8582
+ logger15.debug("Worker not supported: Worker constructor undefined");
8236
8583
  return false;
8237
8584
  }
8238
8585
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8239
- logger14.debug("Worker not supported: URL.createObjectURL unavailable");
8586
+ logger15.debug("Worker not supported: URL.createObjectURL unavailable");
8240
8587
  return false;
8241
8588
  }
8242
8589
  if (typeof Blob === "undefined") {
8243
- logger14.debug("Worker not supported: Blob constructor unavailable");
8590
+ logger15.debug("Worker not supported: Blob constructor unavailable");
8244
8591
  return false;
8245
8592
  }
8246
8593
  return true;
8247
8594
  }
8248
- function createSileroVAD(config) {
8595
+ function createSileroVAD(config = {}) {
8596
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.sileroVad;
8597
+ const resolvedConfig = { ...config, modelUrl };
8249
8598
  if (config.unifiedWorker) {
8250
- logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8251
- return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8599
+ logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8600
+ return new SileroVADUnifiedAdapter(config.unifiedWorker, resolvedConfig);
8252
8601
  }
8253
8602
  const fallbackOnError = config.fallbackOnError ?? true;
8254
8603
  let useWorker;
8255
8604
  if (config.useWorker !== void 0) {
8256
8605
  useWorker = config.useWorker;
8257
- logger14.debug("Worker preference explicitly set", { useWorker });
8606
+ logger15.debug("Worker preference explicitly set", { useWorker });
8258
8607
  } else {
8259
8608
  const workerSupported = supportsVADWorker();
8260
8609
  const onMobile = isMobile();
8261
8610
  useWorker = workerSupported && !onMobile;
8262
- logger14.debug("Auto-detected Worker preference", {
8611
+ logger15.debug("Auto-detected Worker preference", {
8263
8612
  useWorker,
8264
8613
  workerSupported,
8265
8614
  onMobile
8266
8615
  });
8267
8616
  }
8268
8617
  if (useWorker) {
8269
- logger14.info("Creating SileroVADWorker (off-main-thread)");
8618
+ logger15.info("Creating SileroVADWorker (off-main-thread)");
8270
8619
  const worker = new SileroVADWorker({
8271
- modelUrl: config.modelUrl,
8620
+ modelUrl,
8272
8621
  sampleRate: config.sampleRate,
8273
8622
  threshold: config.threshold,
8274
8623
  preSpeechBufferChunks: config.preSpeechBufferChunks
8275
8624
  });
8276
8625
  if (fallbackOnError) {
8277
- return new VADWorkerWithFallback(worker, config);
8626
+ return new VADWorkerWithFallback(worker, resolvedConfig);
8278
8627
  }
8279
8628
  return worker;
8280
8629
  }
8281
- logger14.info("Creating SileroVADInference (main thread)");
8282
- return new SileroVADInference(config);
8630
+ logger15.info("Creating SileroVADInference (main thread)");
8631
+ return new SileroVADInference(resolvedConfig);
8283
8632
  }
8284
8633
  var VADWorkerWithFallback = class {
8285
- constructor(worker, config) {
8634
+ constructor(worker, resolvedConfig) {
8286
8635
  this.hasFallenBack = false;
8287
8636
  this.implementation = worker;
8288
- this.config = config;
8637
+ this.resolvedConfig = resolvedConfig;
8289
8638
  }
8290
8639
  get backend() {
8291
8640
  if (!this.isLoaded) return null;
@@ -8304,16 +8653,16 @@ var VADWorkerWithFallback = class {
8304
8653
  try {
8305
8654
  return await this.implementation.load();
8306
8655
  } catch (error) {
8307
- logger14.warn("Worker load failed, falling back to main thread", {
8656
+ logger15.warn("Worker load failed, falling back to main thread", {
8308
8657
  error: error instanceof Error ? error.message : String(error)
8309
8658
  });
8310
8659
  try {
8311
8660
  await this.implementation.dispose();
8312
8661
  } catch {
8313
8662
  }
8314
- this.implementation = new SileroVADInference(this.config);
8663
+ this.implementation = new SileroVADInference(this.resolvedConfig);
8315
8664
  this.hasFallenBack = true;
8316
- logger14.info("Fallback to SileroVADInference successful");
8665
+ logger15.info("Fallback to SileroVADInference successful");
8317
8666
  return await this.implementation.load();
8318
8667
  }
8319
8668
  }
@@ -8335,7 +8684,7 @@ var VADWorkerWithFallback = class {
8335
8684
  };
8336
8685
 
8337
8686
  // src/inference/A2EOrchestrator.ts
8338
- var logger15 = createLogger("A2EOrchestrator");
8687
+ var logger16 = createLogger("A2EOrchestrator");
8339
8688
  var A2EOrchestrator = class {
8340
8689
  constructor(config) {
8341
8690
  this.a2e = null;
@@ -8376,7 +8725,7 @@ var A2EOrchestrator = class {
8376
8725
  */
8377
8726
  async load() {
8378
8727
  if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8379
- logger15.info("Loading A2E model...");
8728
+ logger16.info("Loading A2E model...");
8380
8729
  this.a2e = createA2E({
8381
8730
  gpuModelUrl: this.config.gpuModelUrl,
8382
8731
  gpuExternalDataUrl: this.config.gpuExternalDataUrl,
@@ -8393,7 +8742,7 @@ var A2EOrchestrator = class {
8393
8742
  onError: this.config.onError
8394
8743
  });
8395
8744
  this._isReady = true;
8396
- logger15.info("A2E model loaded", {
8745
+ logger16.info("A2E model loaded", {
8397
8746
  backend: info.backend,
8398
8747
  loadTimeMs: info.loadTimeMs,
8399
8748
  modelId: this.a2e.modelId
@@ -8448,10 +8797,10 @@ var A2EOrchestrator = class {
8448
8797
  this.scriptProcessor.connect(this.audioContext.destination);
8449
8798
  this._isStreaming = true;
8450
8799
  this.processor.startDrip();
8451
- logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8800
+ logger16.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8452
8801
  } catch (err) {
8453
8802
  const error = err instanceof Error ? err : new Error(String(err));
8454
- logger15.error("Failed to start mic capture", { error: error.message });
8803
+ logger16.error("Failed to start mic capture", { error: error.message });
8455
8804
  this.config.onError?.(error);
8456
8805
  throw error;
8457
8806
  }
@@ -8479,7 +8828,7 @@ var A2EOrchestrator = class {
8479
8828
  });
8480
8829
  this.audioContext = null;
8481
8830
  }
8482
- logger15.info("Mic capture stopped");
8831
+ logger16.info("Mic capture stopped");
8483
8832
  }
8484
8833
  /**
8485
8834
  * Dispose of all resources
@@ -8502,7 +8851,7 @@ var A2EOrchestrator = class {
8502
8851
  };
8503
8852
 
8504
8853
  // src/inference/SafariSpeechRecognition.ts
8505
- var logger16 = createLogger("SafariSpeech");
8854
+ var logger17 = createLogger("SafariSpeech");
8506
8855
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8507
8856
  constructor(config = {}) {
8508
8857
  this.recognition = null;
@@ -8521,7 +8870,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8521
8870
  interimResults: config.interimResults ?? true,
8522
8871
  maxAlternatives: config.maxAlternatives ?? 1
8523
8872
  };
8524
- logger16.debug("SafariSpeechRecognition created", {
8873
+ logger17.debug("SafariSpeechRecognition created", {
8525
8874
  language: this.config.language,
8526
8875
  continuous: this.config.continuous
8527
8876
  });
@@ -8582,7 +8931,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8582
8931
  */
8583
8932
  async start() {
8584
8933
  if (this.isListening) {
8585
- logger16.warn("Already listening");
8934
+ logger17.warn("Already listening");
8586
8935
  return;
8587
8936
  }
8588
8937
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -8612,7 +8961,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8612
8961
  this.isListening = true;
8613
8962
  this.startTime = performance.now();
8614
8963
  this.accumulatedText = "";
8615
- logger16.info("Speech recognition started", {
8964
+ logger17.info("Speech recognition started", {
8616
8965
  language: this.config.language
8617
8966
  });
8618
8967
  span?.end();
@@ -8627,7 +8976,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8627
8976
  */
8628
8977
  async stop() {
8629
8978
  if (!this.isListening || !this.recognition) {
8630
- logger16.warn("Not currently listening");
8979
+ logger17.warn("Not currently listening");
8631
8980
  return {
8632
8981
  text: this.accumulatedText,
8633
8982
  language: this.config.language,
@@ -8656,7 +9005,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8656
9005
  if (this.recognition && this.isListening) {
8657
9006
  this.recognition.abort();
8658
9007
  this.isListening = false;
8659
- logger16.info("Speech recognition aborted");
9008
+ logger17.info("Speech recognition aborted");
8660
9009
  }
8661
9010
  }
8662
9011
  /**
@@ -8687,7 +9036,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8687
9036
  this.isListening = false;
8688
9037
  this.resultCallbacks = [];
8689
9038
  this.errorCallbacks = [];
8690
- logger16.debug("SafariSpeechRecognition disposed");
9039
+ logger17.debug("SafariSpeechRecognition disposed");
8691
9040
  }
8692
9041
  /**
8693
9042
  * Set up event handlers for the recognition instance
@@ -8715,7 +9064,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8715
9064
  confidence: alternative.confidence
8716
9065
  };
8717
9066
  this.emitResult(speechResult);
8718
- logger16.trace("Speech result", {
9067
+ logger17.trace("Speech result", {
8719
9068
  text: text.substring(0, 50),
8720
9069
  isFinal,
8721
9070
  confidence: alternative.confidence
@@ -8725,12 +9074,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8725
9074
  span?.end();
8726
9075
  } catch (error) {
8727
9076
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
8728
- logger16.error("Error processing speech result", { error });
9077
+ logger17.error("Error processing speech result", { error });
8729
9078
  }
8730
9079
  };
8731
9080
  this.recognition.onerror = (event) => {
8732
9081
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
8733
- logger16.error("Speech recognition error", { error: event.error, message: event.message });
9082
+ logger17.error("Speech recognition error", { error: event.error, message: event.message });
8734
9083
  this.emitError(error);
8735
9084
  if (this.stopRejecter) {
8736
9085
  this.stopRejecter(error);
@@ -8740,7 +9089,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8740
9089
  };
8741
9090
  this.recognition.onend = () => {
8742
9091
  this.isListening = false;
8743
- logger16.info("Speech recognition ended", {
9092
+ logger17.info("Speech recognition ended", {
8744
9093
  totalText: this.accumulatedText.length,
8745
9094
  durationMs: performance.now() - this.startTime
8746
9095
  });
@@ -8757,13 +9106,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8757
9106
  }
8758
9107
  };
8759
9108
  this.recognition.onstart = () => {
8760
- logger16.debug("Speech recognition started by browser");
9109
+ logger17.debug("Speech recognition started by browser");
8761
9110
  };
8762
9111
  this.recognition.onspeechstart = () => {
8763
- logger16.debug("Speech detected");
9112
+ logger17.debug("Speech detected");
8764
9113
  };
8765
9114
  this.recognition.onspeechend = () => {
8766
- logger16.debug("Speech ended");
9115
+ logger17.debug("Speech ended");
8767
9116
  };
8768
9117
  }
8769
9118
  /**
@@ -8774,7 +9123,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8774
9123
  try {
8775
9124
  callback(result);
8776
9125
  } catch (error) {
8777
- logger16.error("Error in result callback", { error });
9126
+ logger17.error("Error in result callback", { error });
8778
9127
  }
8779
9128
  }
8780
9129
  }
@@ -8786,7 +9135,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8786
9135
  try {
8787
9136
  callback(error);
8788
9137
  } catch (callbackError) {
8789
- logger16.error("Error in error callback", { error: callbackError });
9138
+ logger17.error("Error in error callback", { error: callbackError });
8790
9139
  }
8791
9140
  }
8792
9141
  }
@@ -9356,338 +9705,32 @@ var AnimationGraph = class extends EventEmitter {
9356
9705
  }
9357
9706
  };
9358
9707
 
9359
- // src/animation/simplex2d.ts
9360
- var perm = new Uint8Array(512);
9361
- var grad2 = [
9362
- [1, 1],
9363
- [-1, 1],
9364
- [1, -1],
9365
- [-1, -1],
9366
- [1, 0],
9367
- [-1, 0],
9368
- [0, 1],
9369
- [0, -1]
9370
- ];
9371
- var p = [
9372
- 151,
9373
- 160,
9374
- 137,
9375
- 91,
9376
- 90,
9377
- 15,
9378
- 131,
9379
- 13,
9380
- 201,
9381
- 95,
9382
- 96,
9383
- 53,
9384
- 194,
9385
- 233,
9386
- 7,
9387
- 225,
9388
- 140,
9389
- 36,
9390
- 103,
9391
- 30,
9392
- 69,
9393
- 142,
9394
- 8,
9395
- 99,
9396
- 37,
9397
- 240,
9398
- 21,
9399
- 10,
9400
- 23,
9401
- 190,
9402
- 6,
9403
- 148,
9404
- 247,
9405
- 120,
9406
- 234,
9407
- 75,
9408
- 0,
9409
- 26,
9410
- 197,
9411
- 62,
9412
- 94,
9413
- 252,
9414
- 219,
9415
- 203,
9416
- 117,
9417
- 35,
9418
- 11,
9419
- 32,
9420
- 57,
9421
- 177,
9422
- 33,
9423
- 88,
9424
- 237,
9425
- 149,
9426
- 56,
9427
- 87,
9428
- 174,
9429
- 20,
9430
- 125,
9431
- 136,
9432
- 171,
9433
- 168,
9434
- 68,
9435
- 175,
9436
- 74,
9437
- 165,
9438
- 71,
9439
- 134,
9440
- 139,
9441
- 48,
9442
- 27,
9443
- 166,
9444
- 77,
9445
- 146,
9446
- 158,
9447
- 231,
9448
- 83,
9449
- 111,
9450
- 229,
9451
- 122,
9452
- 60,
9453
- 211,
9454
- 133,
9455
- 230,
9456
- 220,
9457
- 105,
9458
- 92,
9459
- 41,
9460
- 55,
9461
- 46,
9462
- 245,
9463
- 40,
9464
- 244,
9465
- 102,
9466
- 143,
9467
- 54,
9468
- 65,
9469
- 25,
9470
- 63,
9471
- 161,
9472
- 1,
9473
- 216,
9474
- 80,
9475
- 73,
9476
- 209,
9477
- 76,
9478
- 132,
9479
- 187,
9480
- 208,
9481
- 89,
9482
- 18,
9483
- 169,
9484
- 200,
9485
- 196,
9486
- 135,
9487
- 130,
9488
- 116,
9489
- 188,
9490
- 159,
9491
- 86,
9492
- 164,
9493
- 100,
9494
- 109,
9495
- 198,
9496
- 173,
9497
- 186,
9498
- 3,
9499
- 64,
9500
- 52,
9501
- 217,
9502
- 226,
9503
- 250,
9504
- 124,
9505
- 123,
9506
- 5,
9507
- 202,
9508
- 38,
9509
- 147,
9510
- 118,
9511
- 126,
9512
- 255,
9513
- 82,
9514
- 85,
9515
- 212,
9516
- 207,
9517
- 206,
9518
- 59,
9519
- 227,
9520
- 47,
9521
- 16,
9522
- 58,
9523
- 17,
9524
- 182,
9525
- 189,
9526
- 28,
9527
- 42,
9528
- 223,
9529
- 183,
9530
- 170,
9531
- 213,
9532
- 119,
9533
- 248,
9534
- 152,
9535
- 2,
9536
- 44,
9537
- 154,
9538
- 163,
9539
- 70,
9540
- 221,
9541
- 153,
9542
- 101,
9543
- 155,
9544
- 167,
9545
- 43,
9546
- 172,
9547
- 9,
9548
- 129,
9549
- 22,
9550
- 39,
9551
- 253,
9552
- 19,
9553
- 98,
9554
- 108,
9555
- 110,
9556
- 79,
9557
- 113,
9558
- 224,
9559
- 232,
9560
- 178,
9561
- 185,
9562
- 112,
9563
- 104,
9564
- 218,
9565
- 246,
9566
- 97,
9567
- 228,
9568
- 251,
9569
- 34,
9570
- 242,
9571
- 193,
9572
- 238,
9573
- 210,
9574
- 144,
9575
- 12,
9576
- 191,
9577
- 179,
9578
- 162,
9579
- 241,
9580
- 81,
9581
- 51,
9582
- 145,
9583
- 235,
9584
- 249,
9585
- 14,
9586
- 239,
9587
- 107,
9588
- 49,
9589
- 192,
9590
- 214,
9591
- 31,
9592
- 181,
9593
- 199,
9594
- 106,
9595
- 157,
9596
- 184,
9597
- 84,
9598
- 204,
9599
- 176,
9600
- 115,
9601
- 121,
9602
- 50,
9603
- 45,
9604
- 127,
9605
- 4,
9606
- 150,
9607
- 254,
9608
- 138,
9609
- 236,
9610
- 205,
9611
- 93,
9612
- 222,
9613
- 114,
9614
- 67,
9615
- 29,
9616
- 24,
9617
- 72,
9618
- 243,
9619
- 141,
9620
- 128,
9621
- 195,
9622
- 78,
9623
- 66,
9624
- 215,
9625
- 61,
9626
- 156,
9627
- 180
9628
- ];
9629
- for (let i = 0; i < 256; i++) {
9630
- perm[i] = p[i];
9631
- perm[i + 256] = p[i];
9632
- }
9633
- var F2 = 0.5 * (Math.sqrt(3) - 1);
9634
- var G2 = (3 - Math.sqrt(3)) / 6;
9635
- function dot2(g, x, y) {
9636
- return g[0] * x + g[1] * y;
9637
- }
9638
- function simplex2d(x, y) {
9639
- const s = (x + y) * F2;
9640
- const i = Math.floor(x + s);
9641
- const j = Math.floor(y + s);
9642
- const t = (i + j) * G2;
9643
- const X0 = i - t;
9644
- const Y0 = j - t;
9645
- const x0 = x - X0;
9646
- const y0 = y - Y0;
9647
- const i1 = x0 > y0 ? 1 : 0;
9648
- const j1 = x0 > y0 ? 0 : 1;
9649
- const x1 = x0 - i1 + G2;
9650
- const y1 = y0 - j1 + G2;
9651
- const x2 = x0 - 1 + 2 * G2;
9652
- const y2 = y0 - 1 + 2 * G2;
9653
- const ii = i & 255;
9654
- const jj = j & 255;
9655
- const gi0 = perm[ii + perm[jj]] % 8;
9656
- const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
9657
- const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
9658
- let n0 = 0;
9659
- let t0 = 0.5 - x0 * x0 - y0 * y0;
9660
- if (t0 >= 0) {
9661
- t0 *= t0;
9662
- n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
9663
- }
9664
- let n1 = 0;
9665
- let t1 = 0.5 - x1 * x1 - y1 * y1;
9666
- if (t1 >= 0) {
9667
- t1 *= t1;
9668
- n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
9669
- }
9670
- let n2 = 0;
9671
- let t2 = 0.5 - x2 * x2 - y2 * y2;
9672
- if (t2 >= 0) {
9673
- t2 *= t2;
9674
- n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
9675
- }
9676
- return 70 * (n0 + n1 + n2);
9677
- }
9678
-
9679
9708
  // src/animation/ProceduralLifeLayer.ts
9709
+ import { createNoise2D } from "simplex-noise";
9710
+ var simplex2d = createNoise2D();
9711
+ var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
9712
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
9713
+ LIFE_BS_INDEX.set(LAM_BLENDSHAPES[i], i);
9714
+ }
9680
9715
  var PHASE_OPEN = 0;
9681
9716
  var PHASE_CLOSING = 1;
9682
9717
  var PHASE_CLOSED = 2;
9683
9718
  var PHASE_OPENING = 3;
9684
- var BLINK_CLOSE_DURATION = 0.06;
9719
+ var BLINK_CLOSE_DURATION = 0.092;
9685
9720
  var BLINK_HOLD_DURATION = 0.04;
9686
- var BLINK_OPEN_DURATION = 0.15;
9721
+ var BLINK_OPEN_DURATION = 0.242;
9687
9722
  var BLINK_ASYMMETRY_DELAY = 8e-3;
9723
+ var BLINK_IBI_MU = Math.log(5.97);
9724
+ var BLINK_IBI_SIGMA = 0.89;
9688
9725
  var GAZE_BREAK_DURATION = 0.12;
9689
9726
  var GAZE_BREAK_HOLD_DURATION = 0.3;
9690
9727
  var GAZE_BREAK_RETURN_DURATION = 0.15;
9728
+ var GAZE_STATE_PARAMS = {
9729
+ idle: { interval: [2, 5], amplitude: [0.15, 0.4] },
9730
+ listening: { interval: [4, 10], amplitude: [0.1, 0.25] },
9731
+ thinking: { interval: [1, 3], amplitude: [0.2, 0.5] },
9732
+ speaking: { interval: [2, 6], amplitude: [0.15, 0.35] }
9733
+ };
9691
9734
  var EYE_NOISE_X_FREQ = 0.8;
9692
9735
  var EYE_NOISE_Y_FREQ = 0.6;
9693
9736
  var EYE_NOISE_X_PHASE = 73.1;
@@ -9715,6 +9758,12 @@ function smoothStep(t) {
9715
9758
  function softClamp(v, max) {
9716
9759
  return Math.tanh(v / max) * max;
9717
9760
  }
9761
+ function sampleLogNormal(mu, sigma) {
9762
+ const u1 = Math.random();
9763
+ const u2 = Math.random();
9764
+ const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2);
9765
+ return Math.exp(mu + sigma * z);
9766
+ }
9718
9767
  var ProceduralLifeLayer = class {
9719
9768
  constructor(config) {
9720
9769
  // Blink state
@@ -9727,7 +9776,7 @@ var ProceduralLifeLayer = class {
9727
9776
  // Eye contact (smoothed)
9728
9777
  this.smoothedEyeX = 0;
9729
9778
  this.smoothedEyeY = 0;
9730
- // Eye micro-motion (continuous simplex noise, no discrete events)
9779
+ // Eye micro-motion
9731
9780
  this.eyeNoiseTime = 0;
9732
9781
  // Gaze break state
9733
9782
  this.gazeBreakTimer = 0;
@@ -9737,6 +9786,8 @@ var ProceduralLifeLayer = class {
9737
9786
  this.gazeBreakTargetY = 0;
9738
9787
  this.gazeBreakCurrentX = 0;
9739
9788
  this.gazeBreakCurrentY = 0;
9789
+ // Conversational state for gaze
9790
+ this.currentState = null;
9740
9791
  // Breathing / postural sway
9741
9792
  this.microMotionTime = 0;
9742
9793
  this.breathingPhase = 0;
@@ -9745,6 +9796,7 @@ var ProceduralLifeLayer = class {
9745
9796
  this.previousEnergy = 0;
9746
9797
  this.emphasisLevel = 0;
9747
9798
  this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
9799
+ this.useLogNormalBlinks = !config?.blinkIntervalRange;
9748
9800
  this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
9749
9801
  this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
9750
9802
  this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
@@ -9754,7 +9806,7 @@ var ProceduralLifeLayer = class {
9754
9806
  this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
9755
9807
  this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
9756
9808
  this.eyeSmoothing = config?.eyeSmoothing ?? 15;
9757
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9809
+ this.blinkInterval = this.nextBlinkInterval();
9758
9810
  this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
9759
9811
  }
9760
9812
  /**
@@ -9769,6 +9821,7 @@ var ProceduralLifeLayer = class {
9769
9821
  const eyeTargetY = input?.eyeTargetY ?? 0;
9770
9822
  const audioEnergy = input?.audioEnergy ?? 0;
9771
9823
  const isSpeaking = input?.isSpeaking ?? false;
9824
+ this.currentState = input?.state ?? null;
9772
9825
  const safeDelta = Math.min(delta, 0.1);
9773
9826
  const blendshapes = {};
9774
9827
  this.updateBlinks(delta);
@@ -9807,6 +9860,12 @@ var ProceduralLifeLayer = class {
9807
9860
  const swayAmp = this.posturalSwayAmplitude;
9808
9861
  const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
9809
9862
  const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
9863
+ const breathVal = Math.sin(this.breathingPhase);
9864
+ if (breathVal > 0) {
9865
+ blendshapes["jawOpen"] = breathVal * 0.015;
9866
+ blendshapes["noseSneerLeft"] = breathVal * 8e-3;
9867
+ blendshapes["noseSneerRight"] = breathVal * 8e-3;
9868
+ }
9810
9869
  return {
9811
9870
  blendshapes,
9812
9871
  headDelta: {
@@ -9815,12 +9874,35 @@ var ProceduralLifeLayer = class {
9815
9874
  }
9816
9875
  };
9817
9876
  }
9877
+ /**
9878
+ * Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
9879
+ *
9880
+ * Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
9881
+ * break uncanny stillness on undriven channels.
9882
+ *
9883
+ * @param delta - Time since last frame in seconds
9884
+ * @param input - Per-frame input
9885
+ * @param out - Pre-allocated Float32Array(52) to write into
9886
+ */
9887
+ updateToArray(delta, input, out) {
9888
+ out.fill(0);
9889
+ const result = this.update(delta, input);
9890
+ for (const [name, value] of Object.entries(result.blendshapes)) {
9891
+ const idx = LIFE_BS_INDEX.get(name);
9892
+ if (idx !== void 0) {
9893
+ out[idx] = value;
9894
+ }
9895
+ }
9896
+ for (let i = 0; i < 52; i++) {
9897
+ out[i] += simplex2d(this.noiseTime * 0.3, i * 7.13) * 4e-3;
9898
+ }
9899
+ }
9818
9900
  /**
9819
9901
  * Reset all internal state to initial values.
9820
9902
  */
9821
9903
  reset() {
9822
9904
  this.blinkTimer = 0;
9823
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9905
+ this.blinkInterval = this.nextBlinkInterval();
9824
9906
  this.blinkPhase = PHASE_OPEN;
9825
9907
  this.blinkProgress = 0;
9826
9908
  this.asymmetryRight = 0.97;
@@ -9837,6 +9919,7 @@ var ProceduralLifeLayer = class {
9837
9919
  this.gazeBreakTargetY = 0;
9838
9920
  this.gazeBreakCurrentX = 0;
9839
9921
  this.gazeBreakCurrentY = 0;
9922
+ this.currentState = null;
9840
9923
  this.microMotionTime = 0;
9841
9924
  this.breathingPhase = 0;
9842
9925
  this.noiseTime = 0;
@@ -9844,6 +9927,21 @@ var ProceduralLifeLayer = class {
9844
9927
  this.emphasisLevel = 0;
9845
9928
  }
9846
9929
  // =====================================================================
9930
+ // PRIVATE: Blink interval sampling
9931
+ // =====================================================================
9932
+ /**
9933
+ * Sample next blink interval.
9934
+ * Uses log-normal distribution (PMC3565584) when using default config,
9935
+ * or uniform random when custom blinkIntervalRange is provided.
9936
+ */
9937
+ nextBlinkInterval() {
9938
+ if (this.useLogNormalBlinks) {
9939
+ const sample = sampleLogNormal(BLINK_IBI_MU, BLINK_IBI_SIGMA);
9940
+ return clamp(sample, 1.5, 12);
9941
+ }
9942
+ return randomRange(...this.blinkIntervalRange);
9943
+ }
9944
+ // =====================================================================
9847
9945
  // PRIVATE: Blink system
9848
9946
  // =====================================================================
9849
9947
  updateBlinks(delta) {
@@ -9852,7 +9950,7 @@ var ProceduralLifeLayer = class {
9852
9950
  this.blinkPhase = PHASE_CLOSING;
9853
9951
  this.blinkProgress = 0;
9854
9952
  this.blinkTimer = 0;
9855
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9953
+ this.blinkInterval = this.nextBlinkInterval();
9856
9954
  this.asymmetryRight = 0.95 + Math.random() * 0.08;
9857
9955
  }
9858
9956
  if (this.blinkPhase > PHASE_OPEN) {
@@ -9908,18 +10006,32 @@ var ProceduralLifeLayer = class {
9908
10006
  return { x, y };
9909
10007
  }
9910
10008
  // =====================================================================
9911
- // PRIVATE: Gaze breaks
10009
+ // PRIVATE: Gaze breaks (state-dependent)
9912
10010
  // =====================================================================
10011
+ /**
10012
+ * Get active gaze parameters — uses state-dependent params when
10013
+ * conversational state is provided, otherwise falls back to config ranges.
10014
+ */
10015
+ getActiveGazeParams() {
10016
+ if (this.currentState && GAZE_STATE_PARAMS[this.currentState]) {
10017
+ return GAZE_STATE_PARAMS[this.currentState];
10018
+ }
10019
+ return {
10020
+ interval: this.gazeBreakIntervalRange,
10021
+ amplitude: this.gazeBreakAmplitudeRange
10022
+ };
10023
+ }
9913
10024
  updateGazeBreaks(delta) {
9914
10025
  this.gazeBreakTimer += delta;
9915
10026
  if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
9916
10027
  this.gazeBreakPhase = PHASE_CLOSING;
9917
10028
  this.gazeBreakProgress = 0;
9918
10029
  this.gazeBreakTimer = 0;
9919
- const amp = randomRange(...this.gazeBreakAmplitudeRange);
10030
+ const params = this.getActiveGazeParams();
10031
+ const amp = randomRange(...params.amplitude);
9920
10032
  this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
9921
10033
  this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
9922
- this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
10034
+ this.gazeBreakInterval = randomRange(...params.interval);
9923
10035
  }
9924
10036
  if (this.gazeBreakPhase > PHASE_OPEN) {
9925
10037
  this.gazeBreakProgress += delta;
@@ -9984,6 +10096,971 @@ var ProceduralLifeLayer = class {
9984
10096
  }
9985
10097
  };
9986
10098
 
10099
+ // src/face/FACSMapping.ts
10100
+ var EMOTION_TO_AU = {
10101
+ joy: [
10102
+ { au: "AU6", intensity: 0.7, region: "upper" },
10103
+ // cheek raise (Duchenne)
10104
+ { au: "AU12", intensity: 0.8, region: "lower" }
10105
+ // lip corner pull (smile)
10106
+ ],
10107
+ anger: [
10108
+ { au: "AU4", intensity: 0.8, region: "upper" },
10109
+ // brow lower
10110
+ { au: "AU5", intensity: 0.4, region: "upper" },
10111
+ // upper lid raise
10112
+ { au: "AU7", intensity: 0.3, region: "upper" },
10113
+ // lid tighten
10114
+ { au: "AU23", intensity: 0.6, region: "lower" }
10115
+ // lip tighten
10116
+ ],
10117
+ sadness: [
10118
+ { au: "AU1", intensity: 0.7, region: "upper" },
10119
+ // inner brow raise
10120
+ { au: "AU4", intensity: 0.3, region: "upper" },
10121
+ // brow lower (furrow)
10122
+ { au: "AU15", intensity: 0.5, region: "lower" }
10123
+ // lip corner depress
10124
+ ],
10125
+ fear: [
10126
+ { au: "AU1", intensity: 0.6, region: "upper" },
10127
+ // inner brow raise
10128
+ { au: "AU2", intensity: 0.5, region: "upper" },
10129
+ // outer brow raise
10130
+ { au: "AU4", intensity: 0.3, region: "upper" },
10131
+ // brow lower
10132
+ { au: "AU5", intensity: 0.5, region: "upper" },
10133
+ // upper lid raise
10134
+ { au: "AU20", intensity: 0.4, region: "lower" }
10135
+ // lip stretch
10136
+ ],
10137
+ disgust: [
10138
+ { au: "AU9", intensity: 0.7, region: "upper" },
10139
+ // nose wrinkle
10140
+ { au: "AU10", intensity: 0.5, region: "lower" },
10141
+ // upper lip raise
10142
+ { au: "AU15", intensity: 0.4, region: "lower" }
10143
+ // lip corner depress
10144
+ ],
10145
+ amazement: [
10146
+ { au: "AU1", intensity: 0.6, region: "upper" },
10147
+ // inner brow raise
10148
+ { au: "AU2", intensity: 0.7, region: "upper" },
10149
+ // outer brow raise
10150
+ { au: "AU5", intensity: 0.6, region: "upper" },
10151
+ // upper lid raise
10152
+ { au: "AU26", intensity: 0.4, region: "lower" }
10153
+ // jaw drop
10154
+ ],
10155
+ grief: [
10156
+ { au: "AU1", intensity: 0.8, region: "upper" },
10157
+ // inner brow raise
10158
+ { au: "AU4", intensity: 0.5, region: "upper" },
10159
+ // brow lower
10160
+ { au: "AU6", intensity: 0.3, region: "upper" },
10161
+ // cheek raise (grief cry)
10162
+ { au: "AU15", intensity: 0.6, region: "lower" }
10163
+ // lip corner depress
10164
+ ],
10165
+ cheekiness: [
10166
+ { au: "AU2", intensity: 0.4, region: "upper" },
10167
+ // outer brow raise
10168
+ { au: "AU6", intensity: 0.4, region: "upper" },
10169
+ // cheek raise
10170
+ { au: "AU12", intensity: 0.6, region: "lower" }
10171
+ // lip corner pull (smirk)
10172
+ ],
10173
+ pain: [
10174
+ { au: "AU4", intensity: 0.7, region: "upper" },
10175
+ // brow lower
10176
+ { au: "AU6", intensity: 0.4, region: "upper" },
10177
+ // cheek raise (orbicularis)
10178
+ { au: "AU7", intensity: 0.7, region: "upper" },
10179
+ // lid tighten (squint)
10180
+ { au: "AU9", intensity: 0.5, region: "upper" }
10181
+ // nose wrinkle
10182
+ ],
10183
+ outofbreath: [
10184
+ { au: "AU1", intensity: 0.3, region: "upper" },
10185
+ // inner brow raise
10186
+ { au: "AU25", intensity: 0.3, region: "lower" },
10187
+ // lips part
10188
+ { au: "AU26", intensity: 0.5, region: "lower" }
10189
+ // jaw drop
10190
+ ]
10191
+ };
10192
+ var AU_TO_ARKIT = {
10193
+ "AU1": [{ blendshape: "browInnerUp", weight: 1 }],
10194
+ "AU2": [{ blendshape: "browOuterUpLeft", weight: 1 }, { blendshape: "browOuterUpRight", weight: 1 }],
10195
+ "AU4": [{ blendshape: "browDownLeft", weight: 1 }, { blendshape: "browDownRight", weight: 1 }],
10196
+ "AU5": [{ blendshape: "eyeWideLeft", weight: 1 }, { blendshape: "eyeWideRight", weight: 1 }],
10197
+ "AU6": [{ blendshape: "cheekSquintLeft", weight: 1 }, { blendshape: "cheekSquintRight", weight: 1 }],
10198
+ "AU7": [{ blendshape: "eyeSquintLeft", weight: 1 }, { blendshape: "eyeSquintRight", weight: 1 }],
10199
+ "AU9": [{ blendshape: "noseSneerLeft", weight: 1 }, { blendshape: "noseSneerRight", weight: 1 }],
10200
+ "AU10": [{ blendshape: "mouthUpperUpLeft", weight: 1 }, { blendshape: "mouthUpperUpRight", weight: 1 }],
10201
+ "AU12": [{ blendshape: "mouthSmileLeft", weight: 1 }, { blendshape: "mouthSmileRight", weight: 1 }],
10202
+ "AU15": [{ blendshape: "mouthFrownLeft", weight: 1 }, { blendshape: "mouthFrownRight", weight: 1 }],
10203
+ "AU20": [{ blendshape: "mouthStretchLeft", weight: 1 }, { blendshape: "mouthStretchRight", weight: 1 }],
10204
+ "AU23": [{ blendshape: "mouthPressLeft", weight: 1 }, { blendshape: "mouthPressRight", weight: 1 }],
10205
+ "AU25": [{ blendshape: "jawOpen", weight: 0.3 }],
10206
+ "AU26": [{ blendshape: "jawOpen", weight: 1 }]
10207
+ };
10208
+ var ALL_AUS = [...new Set(
10209
+ Object.values(EMOTION_TO_AU).flatMap((activations) => activations.map((a) => a.au))
10210
+ )];
10211
+
10212
+ // src/face/EmotionResolver.ts
10213
+ var BS_INDEX = /* @__PURE__ */ new Map();
10214
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10215
+ BS_INDEX.set(LAM_BLENDSHAPES[i], i);
10216
+ }
10217
+ var EmotionResolver = class {
10218
+ constructor() {
10219
+ this.upperBuffer = new Float32Array(52);
10220
+ this.lowerBuffer = new Float32Array(52);
10221
+ }
10222
+ /**
10223
+ * Resolve emotion weights to upper/lower face blendshape contributions.
10224
+ *
10225
+ * @param weights - Emotion channel weights from EmotionController
10226
+ * @param intensity - Global intensity multiplier (0-2). Default: 1.0
10227
+ * @returns Upper and lower face blendshape arrays (52 channels each)
10228
+ */
10229
+ resolve(weights, intensity = 1) {
10230
+ const upper = this.upperBuffer;
10231
+ const lower = this.lowerBuffer;
10232
+ upper.fill(0);
10233
+ lower.fill(0);
10234
+ for (const emotionName of EMOTION_NAMES) {
10235
+ const emotionWeight = weights[emotionName];
10236
+ if (!emotionWeight || emotionWeight < 0.01) continue;
10237
+ const auActivations = EMOTION_TO_AU[emotionName];
10238
+ if (!auActivations) continue;
10239
+ for (const activation of auActivations) {
10240
+ const arkitMappings = AU_TO_ARKIT[activation.au];
10241
+ if (!arkitMappings) continue;
10242
+ const target = activation.region === "upper" ? upper : lower;
10243
+ const scale = emotionWeight * activation.intensity * intensity;
10244
+ for (const mapping of arkitMappings) {
10245
+ const idx = BS_INDEX.get(mapping.blendshape);
10246
+ if (idx !== void 0) {
10247
+ target[idx] += mapping.weight * scale;
10248
+ }
10249
+ }
10250
+ }
10251
+ }
10252
+ for (let i = 0; i < 52; i++) {
10253
+ if (upper[i] > 1) upper[i] = 1;
10254
+ if (lower[i] > 1) lower[i] = 1;
10255
+ }
10256
+ return {
10257
+ upper: new Float32Array(upper),
10258
+ lower: new Float32Array(lower)
10259
+ };
10260
+ }
10261
+ };
10262
+
10263
+ // src/face/FaceCompositor.ts
10264
+ function smoothstep(t) {
10265
+ return t * t * (3 - 2 * t);
10266
+ }
10267
+ var BS_INDEX2 = /* @__PURE__ */ new Map();
10268
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10269
+ BS_INDEX2.set(LAM_BLENDSHAPES[i], i);
10270
+ }
10271
+ var IDX_MOUTH_CLOSE = BS_INDEX2.get("mouthClose");
10272
+ var IS_EYE_CHANNEL = new Array(52).fill(false);
10273
+ for (const name of LAM_BLENDSHAPES) {
10274
+ if (name.startsWith("eyeBlink") || name.startsWith("eyeLook")) {
10275
+ IS_EYE_CHANNEL[BS_INDEX2.get(name)] = true;
10276
+ }
10277
+ }
10278
+ var FaceCompositor = class {
10279
+ constructor(config) {
10280
+ this.emotionResolver = new EmotionResolver();
10281
+ // Pre-allocated buffers
10282
+ this.smoothedUpper = new Float32Array(52);
10283
+ this.smoothedLower = new Float32Array(52);
10284
+ this.lifeBuffer = new Float32Array(52);
10285
+ // Profile arrays (pre-expanded to 52 channels)
10286
+ this.multiplier = new Float32Array(52).fill(1);
10287
+ this.offset = new Float32Array(52);
10288
+ this.lifeLayer = config?.lifeLayer ?? new ProceduralLifeLayer();
10289
+ this.emotionSmoothing = config?.emotionSmoothing ?? 0.12;
10290
+ if (config?.profile) {
10291
+ this.applyProfileArrays(config.profile);
10292
+ }
10293
+ }
10294
+ /**
10295
+ * Compose a single output frame from the 5-stage signal chain.
10296
+ *
10297
+ * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
10298
+ * @param input - Per-frame input (deltaTime, emotion, life layer params)
10299
+ * @returns Float32Array[52] with all values clamped to [0, 1]
10300
+ */
10301
+ compose(base, input) {
10302
+ const out = new Float32Array(52);
10303
+ out.set(base);
10304
+ const emotion = input.emotion ?? this.stickyEmotion;
10305
+ if (emotion) {
10306
+ const resolved = this.emotionResolver.resolve(
10307
+ emotion,
10308
+ input.emotionIntensity ?? 1
10309
+ );
10310
+ const k = this.emotionSmoothing;
10311
+ for (let i = 0; i < 52; i++) {
10312
+ this.smoothedUpper[i] += (resolved.upper[i] - this.smoothedUpper[i]) * k;
10313
+ this.smoothedLower[i] += (resolved.lower[i] - this.smoothedLower[i]) * k;
10314
+ }
10315
+ const mc = base[IDX_MOUTH_CLOSE];
10316
+ const bilabialSuppress = mc <= 0.3 ? 1 : mc >= 0.7 ? 0.1 : 1 - 0.9 * smoothstep((mc - 0.3) * 2.5);
10317
+ for (let i = 0; i < 52; i++) {
10318
+ out[i] += this.smoothedUpper[i];
10319
+ }
10320
+ for (let i = 0; i < 52; i++) {
10321
+ out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
10322
+ }
10323
+ }
10324
+ this.lifeLayer.updateToArray(input.deltaTime, input, this.lifeBuffer);
10325
+ for (let i = 0; i < 52; i++) {
10326
+ if (IS_EYE_CHANNEL[i]) {
10327
+ out[i] = this.lifeBuffer[i];
10328
+ } else {
10329
+ out[i] += this.lifeBuffer[i];
10330
+ }
10331
+ }
10332
+ for (let i = 0; i < 52; i++) {
10333
+ out[i] = out[i] * this.multiplier[i] + this.offset[i];
10334
+ }
10335
+ for (let i = 0; i < 52; i++) {
10336
+ if (out[i] < 0) out[i] = 0;
10337
+ else if (out[i] > 1) out[i] = 1;
10338
+ }
10339
+ return out;
10340
+ }
10341
+ /**
10342
+ * Set sticky emotion (used when input.emotion is not provided).
10343
+ */
10344
+ setEmotion(weights) {
10345
+ this.stickyEmotion = weights;
10346
+ }
10347
+ /**
10348
+ * Update character profile at runtime.
10349
+ */
10350
+ setProfile(profile) {
10351
+ this.multiplier.fill(1);
10352
+ this.offset.fill(0);
10353
+ this.applyProfileArrays(profile);
10354
+ }
10355
+ /**
10356
+ * Reset all smoothing state and life layer.
10357
+ */
10358
+ reset() {
10359
+ this.smoothedUpper.fill(0);
10360
+ this.smoothedLower.fill(0);
10361
+ this.lifeBuffer.fill(0);
10362
+ this.stickyEmotion = void 0;
10363
+ this.lifeLayer.reset();
10364
+ }
10365
+ /** Expand partial profile maps into dense Float32Arrays */
10366
+ applyProfileArrays(profile) {
10367
+ if (profile.multiplier) {
10368
+ for (const [name, value] of Object.entries(profile.multiplier)) {
10369
+ const idx = BS_INDEX2.get(name);
10370
+ if (idx !== void 0 && value !== void 0) {
10371
+ this.multiplier[idx] = value;
10372
+ }
10373
+ }
10374
+ }
10375
+ if (profile.offset) {
10376
+ for (const [name, value] of Object.entries(profile.offset)) {
10377
+ const idx = BS_INDEX2.get(name);
10378
+ if (idx !== void 0 && value !== void 0) {
10379
+ this.offset[idx] = value;
10380
+ }
10381
+ }
10382
+ }
10383
+ }
10384
+ };
10385
+
10386
+ // src/orchestration/MicLipSync.ts
10387
+ var logger18 = createLogger("MicLipSync");
10388
+ var MicLipSync = class extends EventEmitter {
10389
+ constructor(config) {
10390
+ super();
10391
+ this.omoteEvents = new EventEmitter();
10392
+ this._state = "idle";
10393
+ this._isSpeaking = false;
10394
+ this._currentFrame = null;
10395
+ this._currentRawFrame = null;
10396
+ // VAD state
10397
+ this.speechStartTime = 0;
10398
+ this.vadChunkSize = 0;
10399
+ this.vadBuffer = null;
10400
+ this.vadBufferOffset = 0;
10401
+ this.profile = config.profile ?? {};
10402
+ this.vad = config.vad;
10403
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10404
+ sampleRate: config.sampleRate ?? 16e3,
10405
+ chunkSize: config.micChunkSize ?? 512
10406
+ });
10407
+ this.processor = new A2EProcessor({
10408
+ backend: config.lam,
10409
+ sampleRate: config.sampleRate ?? 16e3,
10410
+ identityIndex: config.identityIndex,
10411
+ onFrame: (raw) => {
10412
+ const scaled = applyProfile(raw, this.profile);
10413
+ this._currentFrame = scaled;
10414
+ this._currentRawFrame = raw;
10415
+ this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
10416
+ },
10417
+ onError: (error) => {
10418
+ logger18.error("A2E inference error", { message: error.message });
10419
+ this.emit("error", error);
10420
+ }
10421
+ });
10422
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10423
+ const float32 = int16ToFloat32(pcm);
10424
+ this.processor.pushAudio(float32);
10425
+ if (this.vad) {
10426
+ this.processVAD(float32);
10427
+ }
10428
+ });
10429
+ this.omoteEvents.on("audio.level", (level) => {
10430
+ this.emit("audio:level", level);
10431
+ });
10432
+ if (this.vad) {
10433
+ this.vadChunkSize = this.vad.getChunkSize();
10434
+ this.vadBuffer = new Float32Array(this.vadChunkSize);
10435
+ this.vadBufferOffset = 0;
10436
+ }
10437
+ }
10438
+ /** Current state */
10439
+ get state() {
10440
+ return this._state;
10441
+ }
10442
+ /** Latest blendshape frame (null before first inference) */
10443
+ get currentFrame() {
10444
+ return this._currentFrame;
10445
+ }
10446
+ /** Whether speech is currently detected (requires VAD) */
10447
+ get isSpeaking() {
10448
+ return this._isSpeaking;
10449
+ }
10450
+ /** Current backend type */
10451
+ get backend() {
10452
+ return this.processor ? "active" : null;
10453
+ }
10454
+ // ---------------------------------------------------------------------------
10455
+ // Public API
10456
+ // ---------------------------------------------------------------------------
10457
+ /** Start microphone capture and inference loop */
10458
+ async start() {
10459
+ if (this._state === "active") return;
10460
+ await this.mic.start();
10461
+ this.processor.startDrip();
10462
+ this.emit("mic:start", void 0);
10463
+ this.setState("active");
10464
+ }
10465
+ /** Stop microphone and inference */
10466
+ stop() {
10467
+ if (this._state === "idle") return;
10468
+ this.processor.stopDrip();
10469
+ this.mic.stop();
10470
+ this._isSpeaking = false;
10471
+ this.emit("mic:stop", void 0);
10472
+ this.setState("idle");
10473
+ }
10474
+ /** Pause inference (mic stays open for faster resume) */
10475
+ pause() {
10476
+ if (this._state !== "active") return;
10477
+ this.processor.stopDrip();
10478
+ this.setState("paused");
10479
+ }
10480
+ /** Resume inference after pause */
10481
+ resume() {
10482
+ if (this._state !== "paused") return;
10483
+ this.processor.startDrip();
10484
+ this.setState("active");
10485
+ }
10486
+ /** Update ExpressionProfile at runtime */
10487
+ setProfile(profile) {
10488
+ this.profile = profile;
10489
+ }
10490
+ /** Dispose of all resources */
10491
+ async dispose() {
10492
+ this.stop();
10493
+ this.processor.dispose();
10494
+ }
10495
+ // ---------------------------------------------------------------------------
10496
+ // Internal: VAD processing
10497
+ // ---------------------------------------------------------------------------
10498
+ async processVAD(samples) {
10499
+ if (!this.vad || !this.vadBuffer) return;
10500
+ for (let i = 0; i < samples.length; i++) {
10501
+ this.vadBuffer[this.vadBufferOffset++] = samples[i];
10502
+ if (this.vadBufferOffset >= this.vadChunkSize) {
10503
+ try {
10504
+ const result = await this.vad.process(this.vadBuffer);
10505
+ const wasSpeaking = this._isSpeaking;
10506
+ this._isSpeaking = result.isSpeech;
10507
+ if (!wasSpeaking && result.isSpeech) {
10508
+ this.speechStartTime = performance.now();
10509
+ this.emit("speech:start", void 0);
10510
+ } else if (wasSpeaking && !result.isSpeech) {
10511
+ const durationMs = performance.now() - this.speechStartTime;
10512
+ this.emit("speech:end", { durationMs });
10513
+ }
10514
+ } catch (err) {
10515
+ logger18.warn("VAD process error", { error: String(err) });
10516
+ }
10517
+ this.vadBufferOffset = 0;
10518
+ }
10519
+ }
10520
+ }
10521
+ // ---------------------------------------------------------------------------
10522
+ // Internal: State management
10523
+ // ---------------------------------------------------------------------------
10524
+ setState(state) {
10525
+ if (this._state === state) return;
10526
+ this._state = state;
10527
+ this.emit("state", state);
10528
+ }
10529
+ };
10530
+
10531
+ // src/orchestration/VoicePipeline.ts
10532
+ var logger19 = createLogger("VoicePipeline");
10533
+ var VoicePipeline = class extends EventEmitter {
10534
+ constructor(config) {
10535
+ super();
10536
+ // State
10537
+ this._state = "idle";
10538
+ this.stopped = false;
10539
+ this.epoch = 0;
10540
+ this._sessionId = null;
10541
+ // Models
10542
+ this.asr = null;
10543
+ this.lam = null;
10544
+ this.vad = null;
10545
+ this.unifiedWorker = null;
10546
+ // Pipelines
10547
+ this.playback = null;
10548
+ this.interruption = null;
10549
+ this.omoteEvents = new EventEmitter();
10550
+ this.mic = null;
10551
+ // Audio accumulation
10552
+ this.audioBuffer = [];
10553
+ this.audioBufferSamples = 0;
10554
+ this.speechStartTime = 0;
10555
+ this.silenceTimer = null;
10556
+ this.isSpeaking = false;
10557
+ // Progressive transcription
10558
+ this.progressiveTimer = null;
10559
+ this.progressivePromise = null;
10560
+ this.lastProgressiveResult = null;
10561
+ this.lastProgressiveSamples = 0;
10562
+ // ASR error recovery
10563
+ this.asrErrorCount = 0;
10564
+ // Response abort
10565
+ this.responseAbortController = null;
10566
+ // Frame refs
10567
+ this._currentFrame = null;
10568
+ this.config = config;
10569
+ }
10570
+ /** Current pipeline state */
10571
+ get state() {
10572
+ return this._state;
10573
+ }
10574
+ /** Latest blendshape frame */
10575
+ get currentFrame() {
10576
+ return this._currentFrame;
10577
+ }
10578
+ /** Whether user is currently speaking */
10579
+ get isSpeechActive() {
10580
+ return this.isSpeaking;
10581
+ }
10582
+ /** Session ID (generated on start(), null before) */
10583
+ get sessionId() {
10584
+ return this._sessionId;
10585
+ }
10586
+ // ---------------------------------------------------------------------------
10587
+ // Model loading
10588
+ // ---------------------------------------------------------------------------
10589
+ async loadModels() {
10590
+ this.setState("loading");
10591
+ const timeoutMs = this.config.lamLoadTimeoutMs ?? 3e4;
10592
+ try {
10593
+ if (isIOS()) {
10594
+ this.unifiedWorker = new UnifiedInferenceWorker();
10595
+ await this.unifiedWorker.init();
10596
+ }
10597
+ this.emitProgress("Speech recognition", 0, 3, 0);
10598
+ this.asr = createSenseVoice({
10599
+ modelUrl: this.config.models.senseVoice.modelUrl,
10600
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
10601
+ language: this.config.models.senseVoice.language,
10602
+ unifiedWorker: this.unifiedWorker ?? void 0
10603
+ });
10604
+ await this.asr.load();
10605
+ this.emitProgress("Speech recognition", 45, 3, 1);
10606
+ this.emitProgress("Lip sync", 45, 3, 1);
10607
+ let lam = createA2E({
10608
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10609
+ gpuExternalDataUrl: this.config.models.lam.gpuExternalDataUrl,
10610
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10611
+ mode: this.config.models.lam.mode,
10612
+ unifiedWorker: this.unifiedWorker ?? void 0
10613
+ });
10614
+ let lamProgress = 45;
10615
+ const lamTickInterval = setInterval(() => {
10616
+ const remaining = 85 - lamProgress;
10617
+ lamProgress += Math.max(0.5, remaining * 0.08);
10618
+ this.emitProgress("Lip sync", Math.round(lamProgress), 3, 1);
10619
+ }, 300);
10620
+ try {
10621
+ const lamLoadResult = await Promise.race([
10622
+ lam.load().then(() => "ok"),
10623
+ new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
10624
+ ]);
10625
+ if (lamLoadResult === "timeout") {
10626
+ logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
10627
+ await lam.dispose();
10628
+ lam = createA2E({
10629
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10630
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10631
+ mode: "cpu",
10632
+ unifiedWorker: this.unifiedWorker ?? void 0
10633
+ });
10634
+ await lam.load();
10635
+ }
10636
+ } finally {
10637
+ clearInterval(lamTickInterval);
10638
+ }
10639
+ this.lam = lam;
10640
+ this.emitProgress("Lip sync", 85, 3, 2);
10641
+ this.emitProgress("Voice detection", 85, 3, 2);
10642
+ this.vad = createSileroVAD({
10643
+ modelUrl: this.config.models.vad.modelUrl,
10644
+ threshold: this.config.models.vad.threshold,
10645
+ unifiedWorker: this.unifiedWorker ?? void 0
10646
+ });
10647
+ await this.vad.load();
10648
+ this.emitProgress("Voice detection", 100, 3, 3);
10649
+ this.playback = new PlaybackPipeline({
10650
+ lam: this.lam,
10651
+ profile: this.config.profile,
10652
+ identityIndex: this.config.identityIndex,
10653
+ neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
10654
+ neutralTransitionMs: this.config.neutralTransitionMs,
10655
+ audioDelayMs: this.config.audioDelayMs,
10656
+ chunkTargetMs: this.config.chunkTargetMs
10657
+ });
10658
+ await this.playback.initialize();
10659
+ this.playback.on("frame", (f) => {
10660
+ this._currentFrame = f.blendshapes;
10661
+ this.emit("frame", f);
10662
+ });
10663
+ this.playback.on("frame:raw", (f) => this.emit("frame:raw", f));
10664
+ this.playback.on("playback:start", (t) => this.emit("playback:start", t));
10665
+ this.playback.on("playback:complete", () => {
10666
+ if (this.stopped) return;
10667
+ this.emit("playback:complete", void 0);
10668
+ this.vad?.reset();
10669
+ this.epoch++;
10670
+ this.setState("listening");
10671
+ });
10672
+ this.playback.on("error", (e) => this.emit("error", e));
10673
+ this.interruption = new InterruptionHandler({
10674
+ enabled: this.config.interruptionEnabled ?? true,
10675
+ minSpeechDurationMs: this.config.interruptionMinSpeechMs ?? 200
10676
+ });
10677
+ this.interruption.on("interruption.triggered", () => {
10678
+ this.handleInterruption();
10679
+ });
10680
+ this.setState("ready");
10681
+ } catch (error) {
10682
+ const err = error instanceof Error ? error : new Error(String(error));
10683
+ logger19.error("Model loading failed", { message: err.message });
10684
+ this.emit("error", err);
10685
+ this.setState("error");
10686
+ throw err;
10687
+ }
10688
+ }
10689
+ // ---------------------------------------------------------------------------
10690
+ // Conversation lifecycle
10691
+ // ---------------------------------------------------------------------------
10692
+ async start() {
10693
+ if (this._state !== "ready") {
10694
+ throw new Error(`Cannot start: state is '${this._state}', expected 'ready'`);
10695
+ }
10696
+ this.stopped = false;
10697
+ this.epoch++;
10698
+ this._sessionId = crypto.randomUUID();
10699
+ this.asrErrorCount = 0;
10700
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10701
+ sampleRate: 16e3,
10702
+ chunkSize: 512
10703
+ });
10704
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10705
+ const float32 = int16ToFloat32(pcm);
10706
+ this.processAudioChunk(float32);
10707
+ });
10708
+ this.omoteEvents.on("audio.level", (level) => {
10709
+ this.emit("audio:level", level);
10710
+ });
10711
+ await this.mic.start();
10712
+ this.setState("listening");
10713
+ }
10714
+ stop() {
10715
+ this.stopped = true;
10716
+ this.epoch++;
10717
+ this.clearSilenceTimer();
10718
+ this.stopProgressiveTranscription();
10719
+ this.responseAbortController?.abort();
10720
+ this.responseAbortController = null;
10721
+ this.vad?.reset();
10722
+ this.playback?.stop();
10723
+ this.mic?.stop();
10724
+ this.mic = null;
10725
+ this.isSpeaking = false;
10726
+ this.audioBuffer = [];
10727
+ this.audioBufferSamples = 0;
10728
+ this._currentFrame = null;
10729
+ this.interruption?.setAISpeaking(false);
10730
+ if (this._state !== "idle") {
10731
+ this.setState("ready");
10732
+ }
10733
+ }
10734
+ setProfile(profile) {
10735
+ this.config.profile = profile;
10736
+ this.playback?.setProfile(profile);
10737
+ }
10738
+ async dispose() {
10739
+ this.stop();
10740
+ this.epoch++;
10741
+ await this.playback?.dispose();
10742
+ await this.asr?.dispose();
10743
+ await this.lam?.dispose();
10744
+ await this.vad?.dispose();
10745
+ this.playback = null;
10746
+ this.asr = null;
10747
+ this.lam = null;
10748
+ this.vad = null;
10749
+ this._state = "idle";
10750
+ }
10751
+ // ---------------------------------------------------------------------------
10752
+ // Audio processing
10753
+ // ---------------------------------------------------------------------------
10754
+ async processAudioChunk(samples) {
10755
+ if (!this.vad) return;
10756
+ try {
10757
+ const result = await this.vad.process(samples);
10758
+ if (this._state === "speaking" && this.interruption) {
10759
+ this.interruption.processVADResult(result.probability);
10760
+ return;
10761
+ }
10762
+ if (this._state !== "listening" && this._state !== "thinking") return;
10763
+ const wasSpeaking = this.isSpeaking;
10764
+ if (result.isSpeech) {
10765
+ if (!wasSpeaking) {
10766
+ this.isSpeaking = true;
10767
+ this.speechStartTime = performance.now();
10768
+ this.audioBuffer = [];
10769
+ this.audioBufferSamples = 0;
10770
+ this.lastProgressiveResult = null;
10771
+ this.lastProgressiveSamples = 0;
10772
+ this.emit("speech:start", void 0);
10773
+ this.startProgressiveTranscription();
10774
+ }
10775
+ this.audioBuffer.push(new Float32Array(samples));
10776
+ this.audioBufferSamples += samples.length;
10777
+ this.clearSilenceTimer();
10778
+ } else if (wasSpeaking) {
10779
+ this.audioBuffer.push(new Float32Array(samples));
10780
+ this.audioBufferSamples += samples.length;
10781
+ if (!this.silenceTimer) {
10782
+ const timeoutMs = this.getSilenceTimeout();
10783
+ this.silenceTimer = setTimeout(() => {
10784
+ this.onSilenceDetected();
10785
+ }, timeoutMs);
10786
+ }
10787
+ }
10788
+ } catch (err) {
10789
+ logger19.warn("VAD error", { error: String(err) });
10790
+ }
10791
+ }
10792
+ // ---------------------------------------------------------------------------
10793
+ // Silence detection
10794
+ // ---------------------------------------------------------------------------
10795
+ getSilenceTimeout() {
10796
+ const base = this.config.silenceTimeoutMs ?? 500;
10797
+ const extended = this.config.silenceTimeoutExtendedMs ?? 700;
10798
+ const adaptive = this.config.adaptiveTimeout ?? true;
10799
+ if (!adaptive) return base;
10800
+ const speechDurationMs = performance.now() - this.speechStartTime;
10801
+ return speechDurationMs > 3e3 ? extended : base;
10802
+ }
10803
+ onSilenceDetected() {
10804
+ const capturedEpoch = this.epoch;
10805
+ this.isSpeaking = false;
10806
+ const durationMs = performance.now() - this.speechStartTime;
10807
+ this.emit("speech:end", { durationMs });
10808
+ this.clearSilenceTimer();
10809
+ this.processEndOfSpeech(capturedEpoch).catch((err) => {
10810
+ logger19.error("End of speech processing failed", { error: String(err) });
10811
+ if (this.epoch === capturedEpoch && !this.stopped) {
10812
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
10813
+ this.setState("listening");
10814
+ }
10815
+ });
10816
+ }
10817
+ // ---------------------------------------------------------------------------
10818
+ // End of speech → transcription → response
10819
+ // ---------------------------------------------------------------------------
10820
+ async processEndOfSpeech(capturedEpoch) {
10821
+ if (this.progressivePromise) {
10822
+ try {
10823
+ await this.progressivePromise;
10824
+ } catch {
10825
+ }
10826
+ }
10827
+ this.stopProgressiveTranscription();
10828
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10829
+ const totalSamples = this.audioBufferSamples;
10830
+ const fullAudio = new Float32Array(totalSamples);
10831
+ let offset = 0;
10832
+ for (const chunk of this.audioBuffer) {
10833
+ fullAudio.set(chunk, offset);
10834
+ offset += chunk.length;
10835
+ }
10836
+ this.audioBuffer = [];
10837
+ this.audioBufferSamples = 0;
10838
+ const minDuration = this.config.minAudioDurationSec ?? 0.3;
10839
+ const minEnergy = this.config.minAudioEnergy ?? 0.02;
10840
+ const durationSec = totalSamples / 16e3;
10841
+ if (durationSec < minDuration) {
10842
+ logger19.info("Audio too short, discarding", { durationSec });
10843
+ this.setState("listening");
10844
+ return;
10845
+ }
10846
+ let maxAbs = 0;
10847
+ for (let i = 0; i < fullAudio.length; i++) {
10848
+ const abs = Math.abs(fullAudio[i]);
10849
+ if (abs > maxAbs) maxAbs = abs;
10850
+ }
10851
+ let rms = 0;
10852
+ for (let i = 0; i < fullAudio.length; i++) {
10853
+ rms += fullAudio[i] * fullAudio[i];
10854
+ }
10855
+ rms = Math.sqrt(rms / fullAudio.length);
10856
+ if (rms < minEnergy) {
10857
+ logger19.info("Audio too quiet, discarding", { rms });
10858
+ this.setState("listening");
10859
+ return;
10860
+ }
10861
+ const normalizedAudio = this.normalizeAudio(fullAudio);
10862
+ this.setState("thinking");
10863
+ let transcript = null;
10864
+ const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
10865
+ if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
10866
+ transcript = { ...this.lastProgressiveResult, isFinal: true };
10867
+ logger19.info("Using progressive result", {
10868
+ coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
10869
+ text: transcript.text
10870
+ });
10871
+ } else {
10872
+ this.lastProgressiveResult = null;
10873
+ transcript = await this.transcribeWithTimeout(normalizedAudio);
10874
+ if (transcript) {
10875
+ transcript.isFinal = true;
10876
+ }
10877
+ }
10878
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10879
+ if (!transcript || !transcript.text.trim()) {
10880
+ logger19.info("No transcript, resuming listening");
10881
+ this.setState("listening");
10882
+ return;
10883
+ }
10884
+ this.emit("transcript", transcript);
10885
+ await this.callResponseHandler(transcript, capturedEpoch);
10886
+ }
10887
+ // ---------------------------------------------------------------------------
10888
+ // Response handler
10889
+ // ---------------------------------------------------------------------------
10890
+ async callResponseHandler(transcript, capturedEpoch) {
10891
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10892
+ this.setState("speaking");
10893
+ this.interruption?.setAISpeaking(true);
10894
+ const abortController = new AbortController();
10895
+ this.responseAbortController = abortController;
10896
+ try {
10897
+ this.playback.start();
10898
+ await this.config.onResponse({
10899
+ text: transcript.text,
10900
+ emotion: transcript.emotion,
10901
+ event: transcript.event,
10902
+ send: async (chunk) => {
10903
+ if (abortController.signal.aborted) return;
10904
+ await this.playback.onAudioChunk(chunk);
10905
+ },
10906
+ done: async () => {
10907
+ if (abortController.signal.aborted) return;
10908
+ await this.playback.end();
10909
+ },
10910
+ signal: abortController.signal,
10911
+ sessionId: this._sessionId
10912
+ });
10913
+ } catch (error) {
10914
+ if (abortController.signal.aborted) return;
10915
+ const err = error instanceof Error ? error : new Error(String(error));
10916
+ logger19.error("Response handler error", { message: err.message });
10917
+ this.emit("error", err);
10918
+ if (this.epoch === capturedEpoch && !this.stopped) {
10919
+ this.interruption?.setAISpeaking(false);
10920
+ this.setState("listening");
10921
+ }
10922
+ } finally {
10923
+ this.responseAbortController = null;
10924
+ }
10925
+ }
10926
+ // ---------------------------------------------------------------------------
10927
+ // Interruption handling
10928
+ // ---------------------------------------------------------------------------
10929
+ handleInterruption() {
10930
+ if (this._state !== "speaking") return;
10931
+ logger19.info("Interruption triggered");
10932
+ this.epoch++;
10933
+ this.responseAbortController?.abort();
10934
+ this.playback?.stop();
10935
+ this.interruption?.setAISpeaking(false);
10936
+ this.emit("interruption", void 0);
10937
+ if (!this.stopped) {
10938
+ this.setState("listening");
10939
+ }
10940
+ }
10941
+ // ---------------------------------------------------------------------------
10942
+ // Progressive transcription
10943
+ // ---------------------------------------------------------------------------
10944
+ startProgressiveTranscription() {
10945
+ this.stopProgressiveTranscription();
10946
+ const intervalMs = isIOS() ? this.config.progressiveIntervalIosMs ?? 800 : this.config.progressiveIntervalMs ?? 500;
10947
+ const minSamples = this.config.progressiveMinSamples ?? 8e3;
10948
+ this.progressiveTimer = setInterval(() => {
10949
+ if (this.audioBufferSamples < minSamples) return;
10950
+ if (!this.asr) return;
10951
+ const capturedEpoch = this.epoch;
10952
+ const snapshot = new Float32Array(this.audioBufferSamples);
10953
+ let offset = 0;
10954
+ for (const chunk of this.audioBuffer) {
10955
+ snapshot.set(chunk, offset);
10956
+ offset += chunk.length;
10957
+ }
10958
+ const snapshotSamples = this.audioBufferSamples;
10959
+ this.progressivePromise = (async () => {
10960
+ try {
10961
+ const result = await this.transcribeWithTimeout(snapshot);
10962
+ if (this.epoch !== capturedEpoch) return;
10963
+ if (result && result.text.trim()) {
10964
+ this.lastProgressiveResult = result;
10965
+ this.lastProgressiveSamples = snapshotSamples;
10966
+ this.emit("transcript", { ...result, isFinal: false });
10967
+ }
10968
+ } catch {
10969
+ }
10970
+ })();
10971
+ }, intervalMs);
10972
+ }
10973
+ stopProgressiveTranscription() {
10974
+ if (this.progressiveTimer) {
10975
+ clearInterval(this.progressiveTimer);
10976
+ this.progressiveTimer = null;
10977
+ }
10978
+ }
10979
+ // ---------------------------------------------------------------------------
10980
+ // Transcription with timeout + ASR error recovery
10981
+ // ---------------------------------------------------------------------------
10982
+ async transcribeWithTimeout(audio) {
10983
+ if (!this.asr) return null;
10984
+ const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
10985
+ const startTime = performance.now();
10986
+ try {
10987
+ const result = await Promise.race([
10988
+ this.asr.transcribe(audio),
10989
+ new Promise(
10990
+ (_, reject) => setTimeout(() => reject(new Error(`Transcription timed out after ${timeoutMs}ms`)), timeoutMs)
10991
+ )
10992
+ ]);
10993
+ this.asrErrorCount = 0;
10994
+ return {
10995
+ text: result.text,
10996
+ emotion: result.emotion,
10997
+ language: result.language,
10998
+ isFinal: false,
10999
+ inferenceTimeMs: performance.now() - startTime
11000
+ };
11001
+ } catch (error) {
11002
+ this.asrErrorCount++;
11003
+ logger19.warn("Transcription failed", {
11004
+ attempt: this.asrErrorCount,
11005
+ error: String(error)
11006
+ });
11007
+ if (this.asrErrorCount >= 3) {
11008
+ logger19.warn("3 consecutive ASR errors, recreating session");
11009
+ try {
11010
+ await this.asr.dispose();
11011
+ this.asr = createSenseVoice({
11012
+ modelUrl: this.config.models.senseVoice.modelUrl,
11013
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
11014
+ language: this.config.models.senseVoice.language,
11015
+ unifiedWorker: this.unifiedWorker ?? void 0
11016
+ });
11017
+ await this.asr.load();
11018
+ this.asrErrorCount = 0;
11019
+ } catch (recreateErr) {
11020
+ logger19.error("ASR session recreation failed", { error: String(recreateErr) });
11021
+ }
11022
+ }
11023
+ return null;
11024
+ }
11025
+ }
11026
+ // ---------------------------------------------------------------------------
11027
+ // Audio normalization
11028
+ // ---------------------------------------------------------------------------
11029
+ normalizeAudio(audio) {
11030
+ if (!(this.config.normalizeAudio ?? true)) return audio;
11031
+ let maxAbs = 0;
11032
+ for (let i = 0; i < audio.length; i++) {
11033
+ const abs = Math.abs(audio[i]);
11034
+ if (abs > maxAbs) maxAbs = abs;
11035
+ }
11036
+ if (maxAbs >= 0.1 || maxAbs === 0) return audio;
11037
+ const gain = 0.5 / maxAbs;
11038
+ const normalized = new Float32Array(audio.length);
11039
+ for (let i = 0; i < audio.length; i++) {
11040
+ normalized[i] = audio[i] * gain;
11041
+ }
11042
+ return normalized;
11043
+ }
11044
+ // ---------------------------------------------------------------------------
11045
+ // Helpers
11046
+ // ---------------------------------------------------------------------------
11047
+ setState(state) {
11048
+ if (this._state === state) return;
11049
+ logger19.info("State transition", { from: this._state, to: state });
11050
+ this._state = state;
11051
+ this.emit("state", state);
11052
+ }
11053
+ emitProgress(currentModel, progress, totalModels, modelsLoaded) {
11054
+ this.emit("loading:progress", { currentModel, progress, totalModels, modelsLoaded });
11055
+ }
11056
+ clearSilenceTimer() {
11057
+ if (this.silenceTimer) {
11058
+ clearTimeout(this.silenceTimer);
11059
+ this.silenceTimer = null;
11060
+ }
11061
+ }
11062
+ };
11063
+
9987
11064
  // ../types/dist/index.mjs
9988
11065
  var PROTOCOL_VERSION = 1;
9989
11066
  function isProtocolEvent(obj) {
@@ -9992,7 +11069,9 @@ function isProtocolEvent(obj) {
9992
11069
  export {
9993
11070
  A2EOrchestrator,
9994
11071
  A2EProcessor,
11072
+ ALL_AUS,
9995
11073
  ARKIT_BLENDSHAPES,
11074
+ AU_TO_ARKIT,
9996
11075
  AnimationGraph,
9997
11076
  AudioChunkCoalescer,
9998
11077
  AudioEnergyAnalyzer,
@@ -10003,24 +11082,31 @@ export {
10003
11082
  ConsoleExporter,
10004
11083
  DEFAULT_ANIMATION_CONFIG,
10005
11084
  DEFAULT_LOGGING_CONFIG,
11085
+ DEFAULT_MODEL_URLS,
10006
11086
  EMOTION_NAMES,
11087
+ EMOTION_TO_AU,
10007
11088
  EMOTION_VECTOR_SIZE,
10008
11089
  EmotionController,
10009
11090
  EmotionPresets,
11091
+ EmotionResolver,
10010
11092
  EmphasisDetector,
10011
11093
  EventEmitter,
11094
+ FaceCompositor,
10012
11095
  FullFacePipeline,
11096
+ HF_CDN_URLS,
10013
11097
  INFERENCE_LATENCY_BUCKETS,
10014
11098
  InterruptionHandler,
10015
11099
  LAM_BLENDSHAPES,
10016
11100
  LOG_LEVEL_PRIORITY,
10017
11101
  MODEL_LOAD_TIME_BUCKETS,
10018
11102
  MetricNames,
11103
+ MicLipSync,
10019
11104
  MicrophoneCapture,
10020
11105
  ModelCache,
10021
11106
  OTLPExporter,
10022
11107
  OmoteTelemetry,
10023
11108
  PROTOCOL_VERSION,
11109
+ PlaybackPipeline,
10024
11110
  ProceduralLifeLayer,
10025
11111
  RingBuffer,
10026
11112
  SafariSpeechRecognition,
@@ -10031,15 +11117,18 @@ export {
10031
11117
  SileroVADUnifiedAdapter,
10032
11118
  SileroVADWorker,
10033
11119
  UnifiedInferenceWorker,
11120
+ VoicePipeline,
10034
11121
  Wav2ArkitCpuInference,
10035
11122
  Wav2ArkitCpuUnifiedAdapter,
10036
11123
  Wav2ArkitCpuWorker,
10037
11124
  Wav2Vec2Inference,
11125
+ applyProfile,
10038
11126
  blendEmotions,
10039
11127
  calculatePeak,
10040
11128
  calculateRMS,
10041
11129
  configureCacheLimit,
10042
11130
  configureLogging,
11131
+ configureModelUrls,
10043
11132
  configureTelemetry,
10044
11133
  createA2E,
10045
11134
  createEmotionVector,
@@ -10070,6 +11159,7 @@ export {
10070
11159
  noopLogger,
10071
11160
  preloadModels,
10072
11161
  resetLoggingConfig,
11162
+ resetModelUrls,
10073
11163
  resolveBackend,
10074
11164
  setLogLevel,
10075
11165
  setLoggingEnabled,