@omote/core 0.5.7 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -32,7 +32,9 @@ var index_exports = {};
32
32
  __export(index_exports, {
33
33
  A2EOrchestrator: () => A2EOrchestrator,
34
34
  A2EProcessor: () => A2EProcessor,
35
+ ALL_AUS: () => ALL_AUS,
35
36
  ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
37
+ AU_TO_ARKIT: () => AU_TO_ARKIT,
36
38
  AnimationGraph: () => AnimationGraph,
37
39
  AudioChunkCoalescer: () => AudioChunkCoalescer,
38
40
  AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
@@ -43,24 +45,31 @@ __export(index_exports, {
43
45
  ConsoleExporter: () => ConsoleExporter,
44
46
  DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
45
47
  DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
48
+ DEFAULT_MODEL_URLS: () => DEFAULT_MODEL_URLS,
46
49
  EMOTION_NAMES: () => EMOTION_NAMES,
50
+ EMOTION_TO_AU: () => EMOTION_TO_AU,
47
51
  EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
48
52
  EmotionController: () => EmotionController,
49
53
  EmotionPresets: () => EmotionPresets,
54
+ EmotionResolver: () => EmotionResolver,
50
55
  EmphasisDetector: () => EmphasisDetector,
51
56
  EventEmitter: () => EventEmitter,
57
+ FaceCompositor: () => FaceCompositor,
52
58
  FullFacePipeline: () => FullFacePipeline,
59
+ HF_CDN_URLS: () => HF_CDN_URLS,
53
60
  INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
54
61
  InterruptionHandler: () => InterruptionHandler,
55
62
  LAM_BLENDSHAPES: () => LAM_BLENDSHAPES,
56
63
  LOG_LEVEL_PRIORITY: () => LOG_LEVEL_PRIORITY,
57
64
  MODEL_LOAD_TIME_BUCKETS: () => MODEL_LOAD_TIME_BUCKETS,
58
65
  MetricNames: () => MetricNames,
66
+ MicLipSync: () => MicLipSync,
59
67
  MicrophoneCapture: () => MicrophoneCapture,
60
68
  ModelCache: () => ModelCache,
61
69
  OTLPExporter: () => OTLPExporter,
62
70
  OmoteTelemetry: () => OmoteTelemetry,
63
71
  PROTOCOL_VERSION: () => PROTOCOL_VERSION,
72
+ PlaybackPipeline: () => PlaybackPipeline,
64
73
  ProceduralLifeLayer: () => ProceduralLifeLayer,
65
74
  RingBuffer: () => RingBuffer,
66
75
  SafariSpeechRecognition: () => SafariSpeechRecognition,
@@ -71,15 +80,18 @@ __export(index_exports, {
71
80
  SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
72
81
  SileroVADWorker: () => SileroVADWorker,
73
82
  UnifiedInferenceWorker: () => UnifiedInferenceWorker,
83
+ VoicePipeline: () => VoicePipeline,
74
84
  Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
75
85
  Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
76
86
  Wav2ArkitCpuWorker: () => Wav2ArkitCpuWorker,
77
87
  Wav2Vec2Inference: () => Wav2Vec2Inference,
88
+ applyProfile: () => applyProfile,
78
89
  blendEmotions: () => blendEmotions,
79
90
  calculatePeak: () => calculatePeak,
80
91
  calculateRMS: () => calculateRMS,
81
92
  configureCacheLimit: () => configureCacheLimit,
82
93
  configureLogging: () => configureLogging,
94
+ configureModelUrls: () => configureModelUrls,
83
95
  configureTelemetry: () => configureTelemetry,
84
96
  createA2E: () => createA2E,
85
97
  createEmotionVector: () => createEmotionVector,
@@ -110,6 +122,7 @@ __export(index_exports, {
110
122
  noopLogger: () => noopLogger,
111
123
  preloadModels: () => preloadModels,
112
124
  resetLoggingConfig: () => resetLoggingConfig,
125
+ resetModelUrls: () => resetModelUrls,
113
126
  resolveBackend: () => resolveBackend,
114
127
  setLogLevel: () => setLogLevel,
115
128
  setLoggingEnabled: () => setLoggingEnabled,
@@ -867,12 +880,12 @@ var Logger = class _Logger {
867
880
  };
868
881
  var loggerCache = /* @__PURE__ */ new Map();
869
882
  function createLogger(module2) {
870
- let logger17 = loggerCache.get(module2);
871
- if (!logger17) {
872
- logger17 = new Logger(module2);
873
- loggerCache.set(module2, logger17);
883
+ let logger20 = loggerCache.get(module2);
884
+ if (!logger20) {
885
+ logger20 = new Logger(module2);
886
+ loggerCache.set(module2, logger20);
874
887
  }
875
- return logger17;
888
+ return logger20;
876
889
  }
877
890
  var noopLogger = {
878
891
  module: "noop",
@@ -1168,6 +1181,24 @@ var A2EProcessor = class {
1168
1181
  }
1169
1182
  };
1170
1183
 
1184
+ // src/audio/audioUtils.ts
1185
+ function pcm16ToFloat32(buffer) {
1186
+ const byteLen = buffer.byteLength & ~1;
1187
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
1188
+ const float32 = new Float32Array(int16.length);
1189
+ for (let i = 0; i < int16.length; i++) {
1190
+ float32[i] = int16[i] / 32768;
1191
+ }
1192
+ return float32;
1193
+ }
1194
+ function int16ToFloat32(int16) {
1195
+ const float32 = new Float32Array(int16.length);
1196
+ for (let i = 0; i < int16.length; i++) {
1197
+ float32[i] = int16[i] / 32768;
1198
+ }
1199
+ return float32;
1200
+ }
1201
+
1171
1202
  // src/telemetry/exporters/console.ts
1172
1203
  var ConsoleExporter = class {
1173
1204
  constructor(options = {}) {
@@ -2940,7 +2971,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2940
2971
  } else {
2941
2972
  logger3.info("Fetching external model data", {
2942
2973
  dataUrl,
2943
- note: "This may be a large download (383MB+)"
2974
+ note: "This may be a large download"
2944
2975
  });
2945
2976
  externalDataBuffer = await fetchWithCache(dataUrl);
2946
2977
  }
@@ -2948,6 +2979,9 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2948
2979
  size: formatBytes(externalDataBuffer.byteLength)
2949
2980
  });
2950
2981
  } catch (err) {
2982
+ if (typeof this.config.externalDataUrl === "string") {
2983
+ throw new Error(`Failed to fetch external data: ${dataUrl} \u2014 ${err.message}`);
2984
+ }
2951
2985
  logger3.debug("No external data file found (single-file model)", {
2952
2986
  dataUrl,
2953
2987
  error: err.message
@@ -3071,28 +3105,6 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3071
3105
  };
3072
3106
  return this.queueInference(feeds);
3073
3107
  }
3074
- /**
3075
- * Decode CTC logits to text using greedy decoding
3076
- */
3077
- decodeCTC(logits) {
3078
- const tokens = [];
3079
- let prevToken = -1;
3080
- for (const frame of logits) {
3081
- let maxIdx = 0;
3082
- let maxVal = frame[0];
3083
- for (let i = 1; i < frame.length; i++) {
3084
- if (frame[i] > maxVal) {
3085
- maxVal = frame[i];
3086
- maxIdx = i;
3087
- }
3088
- }
3089
- if (maxIdx !== prevToken && maxIdx !== 0) {
3090
- tokens.push(maxIdx);
3091
- }
3092
- prevToken = maxIdx;
3093
- }
3094
- return tokens.map((t) => CTC_VOCAB[t] === "|" ? " " : CTC_VOCAB[t]).join("");
3095
- }
3096
3108
  /**
3097
3109
  * Queue inference to serialize ONNX session calls
3098
3110
  */
@@ -3120,37 +3132,25 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3120
3132
  })
3121
3133
  ]);
3122
3134
  const inferenceTimeMs = performance.now() - startTime;
3123
- const asrOutput = results["asr_logits"];
3124
3135
  const blendshapeOutput = results["blendshapes"];
3125
- if (!asrOutput || !blendshapeOutput) {
3126
- throw new Error("Missing outputs from model");
3136
+ if (!blendshapeOutput) {
3137
+ throw new Error("Missing blendshapes output from model");
3127
3138
  }
3128
- const asrData = asrOutput.data;
3129
3139
  const blendshapeData = blendshapeOutput.data;
3130
- const numASRFrames = asrOutput.dims[1];
3131
3140
  const numA2EFrames = blendshapeOutput.dims[1];
3132
- const asrVocabSize = asrOutput.dims[2];
3133
3141
  const numBlendshapes = blendshapeOutput.dims[2];
3134
- const asrLogits = [];
3135
3142
  const blendshapes = [];
3136
- for (let f = 0; f < numASRFrames; f++) {
3137
- asrLogits.push(asrData.slice(f * asrVocabSize, (f + 1) * asrVocabSize));
3138
- }
3139
3143
  for (let f = 0; f < numA2EFrames; f++) {
3140
3144
  const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
3141
3145
  blendshapes.push(symmetrizeBlendshapes(rawFrame));
3142
3146
  }
3143
- const text = this.decodeCTC(asrLogits);
3144
3147
  logger3.trace("Inference completed", {
3145
3148
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
3146
- numA2EFrames,
3147
- numASRFrames,
3148
- textLength: text.length
3149
+ numA2EFrames
3149
3150
  });
3150
3151
  span?.setAttributes({
3151
3152
  "inference.duration_ms": inferenceTimeMs,
3152
- "inference.a2e_frames": numA2EFrames,
3153
- "inference.asr_frames": numASRFrames
3153
+ "inference.a2e_frames": numA2EFrames
3154
3154
  });
3155
3155
  span?.end();
3156
3156
  telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
@@ -3164,11 +3164,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3164
3164
  });
3165
3165
  resolve({
3166
3166
  blendshapes,
3167
- asrLogits,
3168
- text,
3169
3167
  numFrames: numA2EFrames,
3170
- numA2EFrames,
3171
- numASRFrames,
3172
3168
  inferenceTimeMs
3173
3169
  });
3174
3170
  } catch (err) {
@@ -3221,19 +3217,7 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
3221
3217
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
3222
3218
  var Wav2Vec2Inference = _Wav2Vec2Inference;
3223
3219
 
3224
- // src/audio/audioUtils.ts
3225
- function pcm16ToFloat32(buffer) {
3226
- const byteLen = buffer.byteLength & ~1;
3227
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
3228
- const float32 = new Float32Array(int16.length);
3229
- for (let i = 0; i < int16.length; i++) {
3230
- float32[i] = int16[i] / 32768;
3231
- }
3232
- return float32;
3233
- }
3234
-
3235
- // src/audio/FullFacePipeline.ts
3236
- var logger4 = createLogger("FullFacePipeline");
3220
+ // src/audio/expressionProfile.ts
3237
3221
  var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
3238
3222
  for (const name of LAM_BLENDSHAPES) {
3239
3223
  if (name.startsWith("eye")) {
@@ -3252,6 +3236,24 @@ for (const name of LAM_BLENDSHAPES) {
3252
3236
  BLENDSHAPE_TO_GROUP.set(name, "tongue");
3253
3237
  }
3254
3238
  }
3239
+ function applyProfile(raw, profile) {
3240
+ const scaled = new Float32Array(52);
3241
+ for (let i = 0; i < 52; i++) {
3242
+ const name = LAM_BLENDSHAPES[i];
3243
+ let scaler;
3244
+ if (profile.overrides && profile.overrides[name] !== void 0) {
3245
+ scaler = profile.overrides[name];
3246
+ } else {
3247
+ const group = BLENDSHAPE_TO_GROUP.get(name);
3248
+ scaler = group ? profile[group] ?? 1 : 1;
3249
+ }
3250
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3251
+ }
3252
+ return scaled;
3253
+ }
3254
+
3255
+ // src/audio/FullFacePipeline.ts
3256
+ var logger4 = createLogger("FullFacePipeline");
3255
3257
  var FullFacePipeline = class extends EventEmitter {
3256
3258
  constructor(options) {
3257
3259
  super();
@@ -3316,25 +3318,10 @@ var FullFacePipeline = class extends EventEmitter {
3316
3318
  /**
3317
3319
  * Apply ExpressionProfile scaling to raw A2E blendshapes.
3318
3320
  *
3319
- * For each blendshape:
3320
- * 1. If an override exists for the blendshape name, use override as scaler
3321
- * 2. Otherwise, use the group scaler (default 1.0)
3322
- * 3. Clamp result to [0, 1]
3321
+ * Delegates to the standalone applyProfile() utility from expressionProfile.ts.
3323
3322
  */
3324
3323
  applyProfile(raw) {
3325
- const scaled = new Float32Array(52);
3326
- for (let i = 0; i < 52; i++) {
3327
- const name = LAM_BLENDSHAPES[i];
3328
- let scaler;
3329
- if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
3330
- scaler = this.profile.overrides[name];
3331
- } else {
3332
- const group = BLENDSHAPE_TO_GROUP.get(name);
3333
- scaler = group ? this.profile[group] ?? 1 : 1;
3334
- }
3335
- scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3336
- }
3337
- return scaled;
3324
+ return applyProfile(raw, this.profile);
3338
3325
  }
3339
3326
  /**
3340
3327
  * Start a new playback session
@@ -3519,6 +3506,329 @@ var FullFacePipeline = class extends EventEmitter {
3519
3506
  }
3520
3507
  };
3521
3508
 
3509
+ // src/audio/PlaybackPipeline.ts
3510
+ var logger5 = createLogger("PlaybackPipeline");
3511
+ var PlaybackPipeline = class extends EventEmitter {
3512
+ constructor(config) {
3513
+ super();
3514
+ this.config = config;
3515
+ this._state = "idle";
3516
+ this.playbackStarted = false;
3517
+ this.monitorInterval = null;
3518
+ this.frameAnimationId = null;
3519
+ // Stale frame detection
3520
+ this.lastNewFrameTime = 0;
3521
+ this.lastKnownLamFrame = null;
3522
+ this.staleWarningEmitted = false;
3523
+ // Diagnostic counter
3524
+ this.frameLoopCount = 0;
3525
+ this.neutralTransitionFrame = null;
3526
+ this.neutralTransitionStart = 0;
3527
+ this.neutralAnimationId = null;
3528
+ // Current frame refs
3529
+ this._currentFrame = null;
3530
+ this._currentRawFrame = null;
3531
+ this.sampleRate = config.sampleRate ?? 16e3;
3532
+ this.profile = config.profile ?? {};
3533
+ this.staleThresholdMs = config.staleThresholdMs ?? 2e3;
3534
+ this.neutralTransitionEnabled = config.neutralTransitionEnabled ?? false;
3535
+ this.neutralTransitionMs = config.neutralTransitionMs ?? 250;
3536
+ const isCpuModel = config.lam.modelId === "wav2arkit_cpu";
3537
+ const chunkSize = config.chunkSize ?? config.lam.chunkSize ?? 16e3;
3538
+ const chunkAccumulationMs = chunkSize / this.sampleRate * 1e3;
3539
+ const inferenceEstimateMs = isCpuModel ? 300 : config.lam.backend === "wasm" ? 250 : 80;
3540
+ const marginMs = 100;
3541
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3542
+ const audioDelayMs = config.audioDelayMs ?? autoDelay;
3543
+ logger5.info("PlaybackPipeline config", {
3544
+ chunkSize,
3545
+ audioDelayMs,
3546
+ autoDelay,
3547
+ backend: config.lam.backend,
3548
+ modelId: config.lam.modelId,
3549
+ neutralTransitionEnabled: this.neutralTransitionEnabled
3550
+ });
3551
+ this.scheduler = new AudioScheduler({
3552
+ sampleRate: this.sampleRate,
3553
+ initialLookaheadSec: audioDelayMs / 1e3
3554
+ });
3555
+ this.coalescer = new AudioChunkCoalescer({
3556
+ sampleRate: this.sampleRate,
3557
+ targetDurationMs: config.chunkTargetMs ?? 200
3558
+ });
3559
+ this.processor = new A2EProcessor({
3560
+ backend: config.lam,
3561
+ sampleRate: this.sampleRate,
3562
+ chunkSize,
3563
+ identityIndex: config.identityIndex,
3564
+ onError: (error) => {
3565
+ logger5.error("A2E inference error", { message: error.message, stack: error.stack });
3566
+ this.emit("error", error);
3567
+ }
3568
+ });
3569
+ }
3570
+ /** Current pipeline state */
3571
+ get state() {
3572
+ return this._state;
3573
+ }
3574
+ /** Current scaled blendshapes (updated in-place for perf) */
3575
+ get currentFrame() {
3576
+ return this._currentFrame;
3577
+ }
3578
+ /** Raw A2E blendshapes (before profile scaling) */
3579
+ get currentRawFrame() {
3580
+ return this._currentRawFrame;
3581
+ }
3582
+ // ---------------------------------------------------------------------------
3583
+ // Lifecycle
3584
+ // ---------------------------------------------------------------------------
3585
+ /** Initialize AudioContext (lazy, call after user gesture) */
3586
+ async initialize() {
3587
+ await this.scheduler.initialize();
3588
+ }
3589
+ /** Update ExpressionProfile at runtime */
3590
+ setProfile(profile) {
3591
+ this.profile = profile;
3592
+ }
3593
+ // ---------------------------------------------------------------------------
3594
+ // Async mode (streaming TTS)
3595
+ // ---------------------------------------------------------------------------
3596
+ /**
3597
+ * Start a new playback session.
3598
+ * Idempotent — calling during playback resets cleanly without emitting
3599
+ * spurious playback:complete.
3600
+ */
3601
+ start() {
3602
+ this.stopInternal(false);
3603
+ this.scheduler.reset();
3604
+ this.coalescer.reset();
3605
+ this.processor.reset();
3606
+ this.playbackStarted = false;
3607
+ this.lastNewFrameTime = 0;
3608
+ this.lastKnownLamFrame = null;
3609
+ this.staleWarningEmitted = false;
3610
+ this.frameLoopCount = 0;
3611
+ this._currentFrame = null;
3612
+ this._currentRawFrame = null;
3613
+ this.cancelNeutralTransition();
3614
+ this.scheduler.warmup();
3615
+ this.startFrameLoop();
3616
+ this.startMonitoring();
3617
+ this.setState("playing");
3618
+ }
3619
+ /** Feed a streaming audio chunk (PCM16 Uint8Array) */
3620
+ async onAudioChunk(chunk) {
3621
+ const combined = this.coalescer.add(chunk);
3622
+ if (!combined) return;
3623
+ const float32 = pcm16ToFloat32(combined);
3624
+ const scheduleTime = await this.scheduler.schedule(float32);
3625
+ if (!this.playbackStarted) {
3626
+ this.playbackStarted = true;
3627
+ this.emit("playback:start", { time: scheduleTime });
3628
+ this.emit("playback_start", scheduleTime);
3629
+ }
3630
+ this.processor.pushAudio(float32, scheduleTime);
3631
+ }
3632
+ /** Signal end of audio stream (flushes remaining audio) */
3633
+ async end() {
3634
+ const remaining = this.coalescer.flush();
3635
+ if (remaining) {
3636
+ const chunk = new Uint8Array(remaining);
3637
+ await this.onAudioChunk(chunk);
3638
+ }
3639
+ await this.processor.flush();
3640
+ }
3641
+ // ---------------------------------------------------------------------------
3642
+ // Sync mode (full buffer)
3643
+ // ---------------------------------------------------------------------------
3644
+ /**
3645
+ * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
3646
+ * for playback, runs A2E inference, then waits for completion.
3647
+ */
3648
+ async feedBuffer(audio) {
3649
+ const float32 = audio instanceof Float32Array ? audio : pcm16ToFloat32(audio);
3650
+ this.start();
3651
+ const chunkSamples = Math.floor(this.sampleRate * 0.2);
3652
+ for (let i = 0; i < float32.length; i += chunkSamples) {
3653
+ const chunk = float32.subarray(i, Math.min(i + chunkSamples, float32.length));
3654
+ const scheduleTime = await this.scheduler.schedule(chunk);
3655
+ this.processor.pushAudio(chunk, scheduleTime);
3656
+ if (!this.playbackStarted) {
3657
+ this.playbackStarted = true;
3658
+ this.emit("playback:start", { time: scheduleTime });
3659
+ this.emit("playback_start", scheduleTime);
3660
+ }
3661
+ }
3662
+ await this.processor.flush();
3663
+ return new Promise((resolve) => {
3664
+ const unsub = this.on("playback:complete", () => {
3665
+ unsub();
3666
+ resolve();
3667
+ });
3668
+ });
3669
+ }
3670
+ // ---------------------------------------------------------------------------
3671
+ // Control
3672
+ // ---------------------------------------------------------------------------
3673
+ /** Stop playback immediately with fade-out */
3674
+ async stop(fadeOutMs = 50) {
3675
+ this.setState("stopping");
3676
+ this.stopInternal(true);
3677
+ await this.scheduler.cancelAll(fadeOutMs);
3678
+ this.coalescer.reset();
3679
+ this.processor.reset();
3680
+ this.playbackStarted = false;
3681
+ this._currentFrame = null;
3682
+ this._currentRawFrame = null;
3683
+ this.emit("playback:stop", void 0);
3684
+ this.setState("idle");
3685
+ }
3686
+ /** Cleanup all resources */
3687
+ dispose() {
3688
+ this.stopInternal(true);
3689
+ this.cancelNeutralTransition();
3690
+ this.scheduler.dispose();
3691
+ this.coalescer.reset();
3692
+ this.processor.dispose();
3693
+ this._state = "idle";
3694
+ }
3695
+ /** Get pipeline debug state */
3696
+ getDebugState() {
3697
+ return {
3698
+ state: this._state,
3699
+ playbackStarted: this.playbackStarted,
3700
+ coalescerFill: this.coalescer.fillLevel,
3701
+ processorFill: this.processor.fillLevel,
3702
+ queuedFrames: this.processor.queuedFrameCount,
3703
+ currentTime: this.scheduler.getCurrentTime(),
3704
+ playbackEndTime: this.scheduler.getPlaybackEndTime()
3705
+ };
3706
+ }
3707
+ // ---------------------------------------------------------------------------
3708
+ // Internal: Frame loop
3709
+ // ---------------------------------------------------------------------------
3710
+ startFrameLoop() {
3711
+ const updateFrame = () => {
3712
+ this.frameLoopCount++;
3713
+ const currentTime = this.scheduler.getCurrentTime();
3714
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3715
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3716
+ this.lastNewFrameTime = performance.now();
3717
+ this.lastKnownLamFrame = lamFrame;
3718
+ this.staleWarningEmitted = false;
3719
+ }
3720
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3721
+ if (!this.staleWarningEmitted) {
3722
+ this.staleWarningEmitted = true;
3723
+ logger5.warn("A2E stalled \u2014 no new inference frames", {
3724
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3725
+ queuedFrames: this.processor.queuedFrameCount
3726
+ });
3727
+ }
3728
+ }
3729
+ if (lamFrame) {
3730
+ const scaled = applyProfile(lamFrame, this.profile);
3731
+ this._currentFrame = scaled;
3732
+ this._currentRawFrame = lamFrame;
3733
+ const fullFrame = {
3734
+ blendshapes: scaled,
3735
+ rawBlendshapes: lamFrame,
3736
+ timestamp: currentTime
3737
+ };
3738
+ this.emit("frame", fullFrame);
3739
+ this.emit("frame:raw", lamFrame);
3740
+ this.emit("full_frame_ready", fullFrame);
3741
+ this.emit("lam_frame_ready", lamFrame);
3742
+ }
3743
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3744
+ };
3745
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3746
+ }
3747
+ // ---------------------------------------------------------------------------
3748
+ // Internal: Playback monitoring
3749
+ // ---------------------------------------------------------------------------
3750
+ startMonitoring() {
3751
+ if (this.monitorInterval) {
3752
+ clearInterval(this.monitorInterval);
3753
+ }
3754
+ this.monitorInterval = setInterval(() => {
3755
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3756
+ this.onPlaybackComplete();
3757
+ }
3758
+ }, 100);
3759
+ }
3760
+ onPlaybackComplete() {
3761
+ this.stopInternal(false);
3762
+ this.playbackStarted = false;
3763
+ this.emit("playback:complete", void 0);
3764
+ this.emit("playback_complete", void 0);
3765
+ if (this.neutralTransitionEnabled && this._currentFrame) {
3766
+ this.startNeutralTransition(this._currentFrame);
3767
+ } else {
3768
+ this.setState("idle");
3769
+ }
3770
+ }
3771
+ // ---------------------------------------------------------------------------
3772
+ // Internal: Neutral transition (opt-in)
3773
+ // ---------------------------------------------------------------------------
3774
+ startNeutralTransition(fromFrame) {
3775
+ this.neutralTransitionFrame = new Float32Array(fromFrame);
3776
+ this.neutralTransitionStart = performance.now();
3777
+ const animate = () => {
3778
+ const elapsed = performance.now() - this.neutralTransitionStart;
3779
+ const t = Math.min(1, elapsed / this.neutralTransitionMs);
3780
+ const eased = 1 - Math.pow(1 - t, 3);
3781
+ const blendshapes = new Float32Array(52);
3782
+ for (let i = 0; i < 52; i++) {
3783
+ blendshapes[i] = this.neutralTransitionFrame[i] * (1 - eased);
3784
+ }
3785
+ this._currentFrame = blendshapes;
3786
+ const frame = {
3787
+ blendshapes,
3788
+ rawBlendshapes: blendshapes,
3789
+ // raw = scaled during transition
3790
+ timestamp: performance.now() / 1e3
3791
+ };
3792
+ this.emit("frame", frame);
3793
+ this.emit("full_frame_ready", frame);
3794
+ if (t >= 1) {
3795
+ this.neutralTransitionFrame = null;
3796
+ this._currentFrame = null;
3797
+ this._currentRawFrame = null;
3798
+ this.setState("idle");
3799
+ return;
3800
+ }
3801
+ this.neutralAnimationId = requestAnimationFrame(animate);
3802
+ };
3803
+ this.neutralAnimationId = requestAnimationFrame(animate);
3804
+ }
3805
+ cancelNeutralTransition() {
3806
+ if (this.neutralAnimationId) {
3807
+ cancelAnimationFrame(this.neutralAnimationId);
3808
+ this.neutralAnimationId = null;
3809
+ }
3810
+ this.neutralTransitionFrame = null;
3811
+ }
3812
+ // ---------------------------------------------------------------------------
3813
+ // Internal: Helpers
3814
+ // ---------------------------------------------------------------------------
3815
+ stopInternal(emitEvents) {
3816
+ if (this.monitorInterval) {
3817
+ clearInterval(this.monitorInterval);
3818
+ this.monitorInterval = null;
3819
+ }
3820
+ if (this.frameAnimationId) {
3821
+ cancelAnimationFrame(this.frameAnimationId);
3822
+ this.frameAnimationId = null;
3823
+ }
3824
+ }
3825
+ setState(state) {
3826
+ if (this._state === state) return;
3827
+ this._state = state;
3828
+ this.emit("state", state);
3829
+ }
3830
+ };
3831
+
3522
3832
  // src/audio/InterruptionHandler.ts
3523
3833
  var InterruptionHandler = class extends EventEmitter {
3524
3834
  constructor(config = {}) {
@@ -3906,7 +4216,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
3906
4216
  }
3907
4217
 
3908
4218
  // src/inference/SenseVoiceInference.ts
3909
- var logger5 = createLogger("SenseVoice");
4219
+ var logger6 = createLogger("SenseVoice");
3910
4220
  var _SenseVoiceInference = class _SenseVoiceInference {
3911
4221
  constructor(config) {
3912
4222
  this.session = null;
@@ -3959,26 +4269,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3959
4269
  "model.backend_requested": this.config.backend
3960
4270
  });
3961
4271
  try {
3962
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
4272
+ logger6.info("Loading ONNX Runtime...", { preference: this.config.backend });
3963
4273
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3964
4274
  this.ort = ort;
3965
4275
  this._backend = backend;
3966
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
3967
- logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
4276
+ logger6.info("ONNX Runtime loaded", { backend: this._backend });
4277
+ logger6.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3968
4278
  const tokensResponse = await fetch(this.config.tokensUrl);
3969
4279
  if (!tokensResponse.ok) {
3970
4280
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
3971
4281
  }
3972
4282
  const tokensText = await tokensResponse.text();
3973
4283
  this.tokenMap = parseTokensFile(tokensText);
3974
- logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
4284
+ logger6.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3975
4285
  const sessionOptions = getSessionOptions(this._backend);
3976
4286
  if (this._backend === "webgpu") {
3977
4287
  sessionOptions.graphOptimizationLevel = "basic";
3978
4288
  }
3979
4289
  let isCached = false;
3980
4290
  if (isIOS()) {
3981
- logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
4291
+ logger6.info("iOS: passing model URL directly to ORT (low-memory path)", {
3982
4292
  modelUrl: this.config.modelUrl
3983
4293
  });
3984
4294
  this.session = await withTimeout(
@@ -3991,14 +4301,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3991
4301
  isCached = await cache.has(this.config.modelUrl);
3992
4302
  let modelBuffer;
3993
4303
  if (isCached) {
3994
- logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
4304
+ logger6.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3995
4305
  modelBuffer = await cache.get(this.config.modelUrl);
3996
4306
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
3997
4307
  } else {
3998
- logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
4308
+ logger6.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3999
4309
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
4000
4310
  }
4001
- logger5.debug("Creating ONNX session", {
4311
+ logger6.debug("Creating ONNX session", {
4002
4312
  size: formatBytes(modelBuffer.byteLength),
4003
4313
  backend: this._backend
4004
4314
  });
@@ -4011,15 +4321,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4011
4321
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
4012
4322
  this.negMean = cmvn.negMean;
4013
4323
  this.invStddev = cmvn.invStddev;
4014
- logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4324
+ logger6.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4015
4325
  } else {
4016
- logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4326
+ logger6.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4017
4327
  }
4018
4328
  } catch (cmvnErr) {
4019
- logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4329
+ logger6.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4020
4330
  }
4021
4331
  const loadTimeMs = performance.now() - startTime;
4022
- logger5.info("SenseVoice model loaded", {
4332
+ logger6.info("SenseVoice model loaded", {
4023
4333
  backend: this._backend,
4024
4334
  loadTimeMs: Math.round(loadTimeMs),
4025
4335
  vocabSize: this.tokenMap.size,
@@ -4130,7 +4440,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4130
4440
  const vocabSize = logitsDims[2];
4131
4441
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
4132
4442
  const inferenceTimeMs = performance.now() - startTime;
4133
- logger5.trace("Transcription complete", {
4443
+ logger6.trace("Transcription complete", {
4134
4444
  text: decoded.text.substring(0, 50),
4135
4445
  language: decoded.language,
4136
4446
  emotion: decoded.emotion,
@@ -4168,7 +4478,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4168
4478
  const errMsg = err instanceof Error ? err.message : String(err);
4169
4479
  if (errMsg.includes("timed out")) {
4170
4480
  this.poisoned = true;
4171
- logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4481
+ logger6.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4172
4482
  backend: this._backend,
4173
4483
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
4174
4484
  });
@@ -4176,7 +4486,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4176
4486
  const oomError = new Error(
4177
4487
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
4178
4488
  );
4179
- logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4489
+ logger6.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4180
4490
  pointer: `0x${err.toString(16)}`,
4181
4491
  backend: this._backend
4182
4492
  });
@@ -4189,7 +4499,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4189
4499
  reject(oomError);
4190
4500
  return;
4191
4501
  } else {
4192
- logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4502
+ logger6.error("Inference failed", { error: errMsg, backend: this._backend });
4193
4503
  }
4194
4504
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4195
4505
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4218,7 +4528,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
4218
4528
  var SenseVoiceInference = _SenseVoiceInference;
4219
4529
 
4220
4530
  // src/inference/SenseVoiceWorker.ts
4221
- var logger6 = createLogger("SenseVoiceWorker");
4531
+ var logger7 = createLogger("SenseVoiceWorker");
4222
4532
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4223
4533
  var LOAD_TIMEOUT_MS = 3e5;
4224
4534
  var INFERENCE_TIMEOUT_MS = 1e4;
@@ -4957,7 +5267,7 @@ var SenseVoiceWorker = class {
4957
5267
  this.handleWorkerMessage(event.data);
4958
5268
  };
4959
5269
  worker.onerror = (error) => {
4960
- logger6.error("Worker error", { error: error.message });
5270
+ logger7.error("Worker error", { error: error.message });
4961
5271
  for (const [, resolver] of this.pendingResolvers) {
4962
5272
  resolver.reject(new Error(`Worker error: ${error.message}`));
4963
5273
  }
@@ -5037,9 +5347,9 @@ var SenseVoiceWorker = class {
5037
5347
  "model.language": this.config.language
5038
5348
  });
5039
5349
  try {
5040
- logger6.info("Creating SenseVoice worker...");
5350
+ logger7.info("Creating SenseVoice worker...");
5041
5351
  this.worker = this.createWorker();
5042
- logger6.info("Loading model in worker...", {
5352
+ logger7.info("Loading model in worker...", {
5043
5353
  modelUrl: this.config.modelUrl,
5044
5354
  tokensUrl: this.config.tokensUrl,
5045
5355
  language: this.config.language,
@@ -5061,7 +5371,7 @@ var SenseVoiceWorker = class {
5061
5371
  this._isLoaded = true;
5062
5372
  const loadTimeMs = performance.now() - startTime;
5063
5373
  onProgress?.(1, 1);
5064
- logger6.info("SenseVoice worker loaded successfully", {
5374
+ logger7.info("SenseVoice worker loaded successfully", {
5065
5375
  backend: "wasm",
5066
5376
  loadTimeMs: Math.round(loadTimeMs),
5067
5377
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -5140,7 +5450,7 @@ var SenseVoiceWorker = class {
5140
5450
  INFERENCE_TIMEOUT_MS
5141
5451
  );
5142
5452
  const totalTimeMs = performance.now() - startTime;
5143
- logger6.trace("Worker transcription complete", {
5453
+ logger7.trace("Worker transcription complete", {
5144
5454
  text: result.text.substring(0, 50),
5145
5455
  language: result.language,
5146
5456
  emotion: result.emotion,
@@ -5176,11 +5486,11 @@ var SenseVoiceWorker = class {
5176
5486
  } catch (err) {
5177
5487
  const errMsg = err instanceof Error ? err.message : String(err);
5178
5488
  if (errMsg.includes("timed out")) {
5179
- logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5489
+ logger7.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5180
5490
  timeoutMs: INFERENCE_TIMEOUT_MS
5181
5491
  });
5182
5492
  } else {
5183
- logger6.error("Worker inference failed", { error: errMsg });
5493
+ logger7.error("Worker inference failed", { error: errMsg });
5184
5494
  }
5185
5495
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
5186
5496
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -5217,8 +5527,53 @@ var SenseVoiceWorker = class {
5217
5527
  }
5218
5528
  };
5219
5529
 
5530
+ // src/inference/defaultModelUrls.ts
5531
+ var HF = "https://huggingface.co";
5532
+ var HF_MODEL_URLS = {
5533
+ /** LAM A2E model — fp16 external data (385KB graph + 192MB weights, WebGPU) — 52 ARKit blendshapes */
5534
+ lam: `${HF}/omote-ai/lam-a2e/resolve/main/model_fp16.onnx`,
5535
+ /** wav2arkit_cpu A2E model graph (1.86MB, WASM) — Safari/iOS fallback */
5536
+ wav2arkitCpu: `${HF}/myned-ai/wav2arkit_cpu/resolve/main/wav2arkit_cpu.onnx`,
5537
+ /** SenseVoice ASR model (228MB int8, WASM) — speech recognition + emotion + language */
5538
+ senseVoice: `${HF}/omote-ai/sensevoice-asr/resolve/main/model.int8.onnx`,
5539
+ /** Silero VAD model (~2MB, WASM) — voice activity detection */
5540
+ sileroVad: `${HF}/deepghs/silero-vad-onnx/resolve/main/silero_vad.onnx`
5541
+ };
5542
+ var _overrides = {};
5543
+ var DEFAULT_MODEL_URLS = new Proxy(
5544
+ {},
5545
+ {
5546
+ get(_target, prop) {
5547
+ const key = prop;
5548
+ return _overrides[key] ?? HF_MODEL_URLS[key];
5549
+ },
5550
+ ownKeys() {
5551
+ return Object.keys(HF_MODEL_URLS);
5552
+ },
5553
+ getOwnPropertyDescriptor(_target, prop) {
5554
+ if (prop in HF_MODEL_URLS) {
5555
+ return { configurable: true, enumerable: true, value: this.get(_target, prop, _target) };
5556
+ }
5557
+ return void 0;
5558
+ }
5559
+ }
5560
+ );
5561
+ function configureModelUrls(urls) {
5562
+ for (const [key, url] of Object.entries(urls)) {
5563
+ if (key in HF_MODEL_URLS && typeof url === "string") {
5564
+ _overrides[key] = url;
5565
+ }
5566
+ }
5567
+ }
5568
+ function resetModelUrls() {
5569
+ for (const key of Object.keys(_overrides)) {
5570
+ delete _overrides[key];
5571
+ }
5572
+ }
5573
+ var HF_CDN_URLS = HF_MODEL_URLS;
5574
+
5220
5575
  // src/inference/UnifiedInferenceWorker.ts
5221
- var logger7 = createLogger("UnifiedInferenceWorker");
5576
+ var logger8 = createLogger("UnifiedInferenceWorker");
5222
5577
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
5223
5578
  var INIT_TIMEOUT_MS = 6e4;
5224
5579
  var SV_LOAD_TIMEOUT_MS = 3e5;
@@ -5920,7 +6275,7 @@ var UnifiedInferenceWorker = class {
5920
6275
  const telemetry = getTelemetry();
5921
6276
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
5922
6277
  try {
5923
- logger7.info("Creating unified inference worker...");
6278
+ logger8.info("Creating unified inference worker...");
5924
6279
  this.worker = this.createWorker();
5925
6280
  await this.sendMessage(
5926
6281
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -5929,7 +6284,7 @@ var UnifiedInferenceWorker = class {
5929
6284
  );
5930
6285
  this.initialized = true;
5931
6286
  const loadTimeMs = performance.now() - startTime;
5932
- logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
6287
+ logger8.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5933
6288
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
5934
6289
  span?.end();
5935
6290
  } catch (error) {
@@ -6103,7 +6458,7 @@ var UnifiedInferenceWorker = class {
6103
6458
  this.handleWorkerMessage(event.data);
6104
6459
  };
6105
6460
  worker.onerror = (error) => {
6106
- logger7.error("Unified worker error", { error: error.message });
6461
+ logger8.error("Unified worker error", { error: error.message });
6107
6462
  this.rejectAllPending(`Worker error: ${error.message}`);
6108
6463
  };
6109
6464
  return worker;
@@ -6117,7 +6472,7 @@ var UnifiedInferenceWorker = class {
6117
6472
  this.pendingRequests.delete(requestId);
6118
6473
  pending.reject(new Error(data.error));
6119
6474
  } else {
6120
- logger7.error("Worker broadcast error", { error: data.error });
6475
+ logger8.error("Worker broadcast error", { error: data.error });
6121
6476
  this.rejectAllPending(data.error);
6122
6477
  }
6123
6478
  return;
@@ -6139,7 +6494,7 @@ var UnifiedInferenceWorker = class {
6139
6494
  const timeout = setTimeout(() => {
6140
6495
  this.pendingRequests.delete(requestId);
6141
6496
  this.poisoned = true;
6142
- logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6497
+ logger8.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6143
6498
  type: message.type,
6144
6499
  timeoutMs
6145
6500
  });
@@ -6205,7 +6560,7 @@ var SenseVoiceUnifiedAdapter = class {
6205
6560
  });
6206
6561
  this._isLoaded = true;
6207
6562
  onProgress?.(1, 1);
6208
- logger7.info("SenseVoice loaded via unified worker", {
6563
+ logger8.info("SenseVoice loaded via unified worker", {
6209
6564
  backend: "wasm",
6210
6565
  loadTimeMs: Math.round(result.loadTimeMs),
6211
6566
  vocabSize: result.vocabSize
@@ -6270,7 +6625,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6270
6625
  externalDataUrl: externalDataUrl || null
6271
6626
  });
6272
6627
  this._isLoaded = true;
6273
- logger7.info("Wav2ArkitCpu loaded via unified worker", {
6628
+ logger8.info("Wav2ArkitCpu loaded via unified worker", {
6274
6629
  backend: "wasm",
6275
6630
  loadTimeMs: Math.round(result.loadTimeMs)
6276
6631
  });
@@ -6376,7 +6731,7 @@ var SileroVADUnifiedAdapter = class {
6376
6731
  sampleRate: this.config.sampleRate
6377
6732
  });
6378
6733
  this._isLoaded = true;
6379
- logger7.info("SileroVAD loaded via unified worker", {
6734
+ logger8.info("SileroVAD loaded via unified worker", {
6380
6735
  backend: "wasm",
6381
6736
  loadTimeMs: Math.round(result.loadTimeMs),
6382
6737
  sampleRate: this.config.sampleRate,
@@ -6457,12 +6812,13 @@ var SileroVADUnifiedAdapter = class {
6457
6812
  };
6458
6813
 
6459
6814
  // src/inference/createSenseVoice.ts
6460
- var logger8 = createLogger("createSenseVoice");
6461
- function createSenseVoice(config) {
6815
+ var logger9 = createLogger("createSenseVoice");
6816
+ function createSenseVoice(config = {}) {
6817
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.senseVoice;
6462
6818
  if (config.unifiedWorker) {
6463
- logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6819
+ logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6464
6820
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6465
- modelUrl: config.modelUrl,
6821
+ modelUrl,
6466
6822
  tokensUrl: config.tokensUrl,
6467
6823
  language: config.language,
6468
6824
  textNorm: config.textNorm
@@ -6473,37 +6829,37 @@ function createSenseVoice(config) {
6473
6829
  if (!SenseVoiceWorker.isSupported()) {
6474
6830
  throw new Error("Web Workers are not supported in this environment");
6475
6831
  }
6476
- logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6832
+ logger9.info("Creating SenseVoiceWorker (off-main-thread)");
6477
6833
  return new SenseVoiceWorker({
6478
- modelUrl: config.modelUrl,
6834
+ modelUrl,
6479
6835
  tokensUrl: config.tokensUrl,
6480
6836
  language: config.language,
6481
6837
  textNorm: config.textNorm
6482
6838
  });
6483
6839
  }
6484
6840
  if (useWorker === false) {
6485
- logger8.info("Creating SenseVoiceInference (main thread)");
6841
+ logger9.info("Creating SenseVoiceInference (main thread)");
6486
6842
  return new SenseVoiceInference({
6487
- modelUrl: config.modelUrl,
6843
+ modelUrl,
6488
6844
  tokensUrl: config.tokensUrl,
6489
6845
  language: config.language,
6490
6846
  textNorm: config.textNorm
6491
6847
  });
6492
6848
  }
6493
6849
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6494
- logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6850
+ logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6495
6851
  return new SenseVoiceWorker({
6496
- modelUrl: config.modelUrl,
6852
+ modelUrl,
6497
6853
  tokensUrl: config.tokensUrl,
6498
6854
  language: config.language,
6499
6855
  textNorm: config.textNorm
6500
6856
  });
6501
6857
  }
6502
- logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6858
+ logger9.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6503
6859
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6504
6860
  });
6505
6861
  return new SenseVoiceInference({
6506
- modelUrl: config.modelUrl,
6862
+ modelUrl,
6507
6863
  tokensUrl: config.tokensUrl,
6508
6864
  language: config.language,
6509
6865
  textNorm: config.textNorm
@@ -6511,7 +6867,7 @@ function createSenseVoice(config) {
6511
6867
  }
6512
6868
 
6513
6869
  // src/inference/Wav2ArkitCpuInference.ts
6514
- var logger9 = createLogger("Wav2ArkitCpu");
6870
+ var logger10 = createLogger("Wav2ArkitCpu");
6515
6871
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6516
6872
  constructor(config) {
6517
6873
  this.modelId = "wav2arkit_cpu";
@@ -6553,16 +6909,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6553
6909
  });
6554
6910
  try {
6555
6911
  const preference = this.config.backend || "wasm";
6556
- logger9.info("Loading ONNX Runtime...", { preference });
6912
+ logger10.info("Loading ONNX Runtime...", { preference });
6557
6913
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6558
6914
  this.ort = ort;
6559
6915
  this._backend = backend;
6560
- logger9.info("ONNX Runtime loaded", { backend: this._backend });
6916
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
6561
6917
  const modelUrl = this.config.modelUrl;
6562
6918
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6563
6919
  const sessionOptions = getSessionOptions(this._backend);
6564
6920
  if (isIOS()) {
6565
- logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6921
+ logger10.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6566
6922
  modelUrl,
6567
6923
  dataUrl
6568
6924
  });
@@ -6584,15 +6940,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6584
6940
  const isCached = await cache.has(modelUrl);
6585
6941
  let modelBuffer;
6586
6942
  if (isCached) {
6587
- logger9.debug("Loading model from cache", { modelUrl });
6943
+ logger10.debug("Loading model from cache", { modelUrl });
6588
6944
  modelBuffer = await cache.get(modelUrl);
6589
6945
  if (!modelBuffer) {
6590
- logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6946
+ logger10.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6591
6947
  await cache.delete(modelUrl);
6592
6948
  modelBuffer = await fetchWithCache(modelUrl);
6593
6949
  }
6594
6950
  } else {
6595
- logger9.debug("Fetching and caching model graph", { modelUrl });
6951
+ logger10.debug("Fetching and caching model graph", { modelUrl });
6596
6952
  modelBuffer = await fetchWithCache(modelUrl);
6597
6953
  }
6598
6954
  if (!modelBuffer) {
@@ -6603,31 +6959,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6603
6959
  try {
6604
6960
  const isDataCached = await cache.has(dataUrl);
6605
6961
  if (isDataCached) {
6606
- logger9.debug("Loading external data from cache", { dataUrl });
6962
+ logger10.debug("Loading external data from cache", { dataUrl });
6607
6963
  externalDataBuffer = await cache.get(dataUrl);
6608
6964
  if (!externalDataBuffer) {
6609
- logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6965
+ logger10.warn("Cache corruption for external data, retrying", { dataUrl });
6610
6966
  await cache.delete(dataUrl);
6611
6967
  externalDataBuffer = await fetchWithCache(dataUrl);
6612
6968
  }
6613
6969
  } else {
6614
- logger9.info("Fetching external model data", {
6970
+ logger10.info("Fetching external model data", {
6615
6971
  dataUrl,
6616
6972
  note: "This may be a large download (400MB+)"
6617
6973
  });
6618
6974
  externalDataBuffer = await fetchWithCache(dataUrl);
6619
6975
  }
6620
- logger9.info("External data loaded", {
6976
+ logger10.info("External data loaded", {
6621
6977
  size: formatBytes(externalDataBuffer.byteLength)
6622
6978
  });
6623
6979
  } catch (err) {
6624
- logger9.debug("No external data file found (single-file model)", {
6980
+ logger10.debug("No external data file found (single-file model)", {
6625
6981
  dataUrl,
6626
6982
  error: err.message
6627
6983
  });
6628
6984
  }
6629
6985
  }
6630
- logger9.debug("Creating ONNX session", {
6986
+ logger10.debug("Creating ONNX session", {
6631
6987
  graphSize: formatBytes(modelBuffer.byteLength),
6632
6988
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
6633
6989
  backend: this._backend
@@ -6643,7 +6999,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6643
6999
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
6644
7000
  }
6645
7001
  const loadTimeMs = performance.now() - startTime;
6646
- logger9.info("Model loaded successfully", {
7002
+ logger10.info("Model loaded successfully", {
6647
7003
  backend: this._backend,
6648
7004
  loadTimeMs: Math.round(loadTimeMs),
6649
7005
  inputs: this.session.inputNames,
@@ -6659,12 +7015,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6659
7015
  model: "wav2arkit_cpu",
6660
7016
  backend: this._backend
6661
7017
  });
6662
- logger9.debug("Running warmup inference");
7018
+ logger10.debug("Running warmup inference");
6663
7019
  const warmupStart = performance.now();
6664
7020
  const silentAudio = new Float32Array(16e3);
6665
7021
  await this.infer(silentAudio);
6666
7022
  const warmupTimeMs = performance.now() - warmupStart;
6667
- logger9.info("Warmup inference complete", {
7023
+ logger10.info("Warmup inference complete", {
6668
7024
  warmupTimeMs: Math.round(warmupTimeMs),
6669
7025
  backend: this._backend
6670
7026
  });
@@ -6751,7 +7107,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6751
7107
  const symmetrized = symmetrizeBlendshapes(rawFrame);
6752
7108
  blendshapes.push(symmetrized);
6753
7109
  }
6754
- logger9.trace("Inference completed", {
7110
+ logger10.trace("Inference completed", {
6755
7111
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6756
7112
  numFrames,
6757
7113
  inputSamples
@@ -6779,7 +7135,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6779
7135
  const errMsg = err instanceof Error ? err.message : String(err);
6780
7136
  if (errMsg.includes("timed out")) {
6781
7137
  this.poisoned = true;
6782
- logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
7138
+ logger10.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6783
7139
  backend: this._backend,
6784
7140
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
6785
7141
  });
@@ -6787,7 +7143,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6787
7143
  const oomError = new Error(
6788
7144
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
6789
7145
  );
6790
- logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7146
+ logger10.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6791
7147
  pointer: `0x${err.toString(16)}`,
6792
7148
  backend: this._backend
6793
7149
  });
@@ -6800,7 +7156,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6800
7156
  reject(oomError);
6801
7157
  return;
6802
7158
  } else {
6803
- logger9.error("Inference failed", { error: errMsg, backend: this._backend });
7159
+ logger10.error("Inference failed", { error: errMsg, backend: this._backend });
6804
7160
  }
6805
7161
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6806
7162
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6827,7 +7183,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
6827
7183
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
6828
7184
 
6829
7185
  // src/inference/Wav2ArkitCpuWorker.ts
6830
- var logger10 = createLogger("Wav2ArkitCpuWorker");
7186
+ var logger11 = createLogger("Wav2ArkitCpuWorker");
6831
7187
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
6832
7188
  var LOAD_TIMEOUT_MS2 = 42e4;
6833
7189
  var INFERENCE_TIMEOUT_MS2 = 5e3;
@@ -7114,7 +7470,7 @@ var Wav2ArkitCpuWorker = class {
7114
7470
  this.handleWorkerMessage(event.data);
7115
7471
  };
7116
7472
  worker.onerror = (error) => {
7117
- logger10.error("Worker error", { error: error.message });
7473
+ logger11.error("Worker error", { error: error.message });
7118
7474
  for (const [, resolver] of this.pendingResolvers) {
7119
7475
  resolver.reject(new Error(`Worker error: ${error.message}`));
7120
7476
  }
@@ -7190,10 +7546,10 @@ var Wav2ArkitCpuWorker = class {
7190
7546
  "model.backend_requested": "wasm"
7191
7547
  });
7192
7548
  try {
7193
- logger10.info("Creating wav2arkit_cpu worker...");
7549
+ logger11.info("Creating wav2arkit_cpu worker...");
7194
7550
  this.worker = this.createWorker();
7195
7551
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
7196
- logger10.info("Loading model in worker...", {
7552
+ logger11.info("Loading model in worker...", {
7197
7553
  modelUrl: this.config.modelUrl,
7198
7554
  externalDataUrl,
7199
7555
  isIOS: isIOS()
@@ -7211,7 +7567,7 @@ var Wav2ArkitCpuWorker = class {
7211
7567
  );
7212
7568
  this._isLoaded = true;
7213
7569
  const loadTimeMs = performance.now() - startTime;
7214
- logger10.info("Wav2ArkitCpu worker loaded successfully", {
7570
+ logger11.info("Wav2ArkitCpu worker loaded successfully", {
7215
7571
  backend: "wasm",
7216
7572
  loadTimeMs: Math.round(loadTimeMs),
7217
7573
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -7296,7 +7652,7 @@ var Wav2ArkitCpuWorker = class {
7296
7652
  for (let f = 0; f < numFrames; f++) {
7297
7653
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
7298
7654
  }
7299
- logger10.trace("Worker inference completed", {
7655
+ logger11.trace("Worker inference completed", {
7300
7656
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7301
7657
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
7302
7658
  numFrames,
@@ -7326,12 +7682,12 @@ var Wav2ArkitCpuWorker = class {
7326
7682
  const errMsg = err instanceof Error ? err.message : String(err);
7327
7683
  if (errMsg.includes("timed out")) {
7328
7684
  this.poisoned = true;
7329
- logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7685
+ logger11.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7330
7686
  backend: "wasm",
7331
7687
  timeoutMs: INFERENCE_TIMEOUT_MS2
7332
7688
  });
7333
7689
  } else {
7334
- logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7690
+ logger11.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7335
7691
  }
7336
7692
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7337
7693
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7369,53 +7725,56 @@ var Wav2ArkitCpuWorker = class {
7369
7725
  };
7370
7726
 
7371
7727
  // src/inference/createA2E.ts
7372
- var logger11 = createLogger("createA2E");
7373
- function createA2E(config) {
7728
+ var logger12 = createLogger("createA2E");
7729
+ function createA2E(config = {}) {
7374
7730
  const mode = config.mode ?? "auto";
7375
7731
  const fallbackOnError = config.fallbackOnError ?? true;
7732
+ const gpuModelUrl = config.gpuModelUrl ?? DEFAULT_MODEL_URLS.lam;
7733
+ const cpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
7376
7734
  let useCpu;
7377
7735
  if (mode === "cpu") {
7378
7736
  useCpu = true;
7379
- logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7737
+ logger12.info("Forcing CPU A2E model (wav2arkit_cpu)");
7380
7738
  } else if (mode === "gpu") {
7381
7739
  useCpu = false;
7382
- logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7740
+ logger12.info("Forcing GPU A2E model (Wav2Vec2)");
7383
7741
  } else {
7384
7742
  useCpu = shouldUseCpuA2E();
7385
- logger11.info("Auto-detected A2E model", {
7743
+ logger12.info("Auto-detected A2E model", {
7386
7744
  useCpu,
7387
7745
  isSafari: isSafari()
7388
7746
  });
7389
7747
  }
7390
7748
  if (useCpu) {
7391
7749
  if (config.unifiedWorker) {
7392
- logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7750
+ logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7393
7751
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
7394
- modelUrl: config.cpuModelUrl
7752
+ modelUrl: cpuModelUrl
7395
7753
  });
7396
7754
  }
7397
7755
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7398
- logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7756
+ logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7399
7757
  return new Wav2ArkitCpuWorker({
7400
- modelUrl: config.cpuModelUrl
7758
+ modelUrl: cpuModelUrl
7401
7759
  });
7402
7760
  }
7403
- logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7761
+ logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7404
7762
  return new Wav2ArkitCpuInference({
7405
- modelUrl: config.cpuModelUrl
7763
+ modelUrl: cpuModelUrl
7406
7764
  });
7407
7765
  }
7766
+ const gpuExternalDataUrl = config.gpuExternalDataUrl !== void 0 ? config.gpuExternalDataUrl : void 0;
7408
7767
  const gpuInstance = new Wav2Vec2Inference({
7409
- modelUrl: config.gpuModelUrl,
7410
- externalDataUrl: config.gpuExternalDataUrl,
7768
+ modelUrl: gpuModelUrl,
7769
+ externalDataUrl: gpuExternalDataUrl,
7411
7770
  backend: config.gpuBackend ?? "auto",
7412
7771
  numIdentityClasses: config.numIdentityClasses
7413
7772
  });
7414
7773
  if (fallbackOnError) {
7415
- logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7774
+ logger12.info("Creating Wav2Vec2Inference with CPU fallback");
7416
7775
  return new A2EWithFallback(gpuInstance, config);
7417
7776
  }
7418
- logger11.info("Creating Wav2Vec2Inference (no fallback)");
7777
+ logger12.info("Creating Wav2Vec2Inference (no fallback)");
7419
7778
  return gpuInstance;
7420
7779
  }
7421
7780
  var A2EWithFallback = class {
@@ -7423,6 +7782,7 @@ var A2EWithFallback = class {
7423
7782
  this.hasFallenBack = false;
7424
7783
  this.implementation = gpuInstance;
7425
7784
  this.config = config;
7785
+ this.resolvedCpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
7426
7786
  }
7427
7787
  get modelId() {
7428
7788
  return this.implementation.modelId;
@@ -7444,26 +7804,26 @@ var A2EWithFallback = class {
7444
7804
  }
7445
7805
  }
7446
7806
  async fallbackToCpu(reason) {
7447
- logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7807
+ logger12.warn("GPU model load failed, falling back to CPU model", { reason });
7448
7808
  try {
7449
7809
  await this.implementation.dispose();
7450
7810
  } catch {
7451
7811
  }
7452
7812
  if (this.config.unifiedWorker) {
7453
7813
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7454
- modelUrl: this.config.cpuModelUrl
7814
+ modelUrl: this.resolvedCpuModelUrl
7455
7815
  });
7456
- logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7816
+ logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7457
7817
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7458
7818
  this.implementation = new Wav2ArkitCpuWorker({
7459
- modelUrl: this.config.cpuModelUrl
7819
+ modelUrl: this.resolvedCpuModelUrl
7460
7820
  });
7461
- logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7821
+ logger12.info("Fallback to Wav2ArkitCpuWorker successful");
7462
7822
  } else {
7463
7823
  this.implementation = new Wav2ArkitCpuInference({
7464
- modelUrl: this.config.cpuModelUrl
7824
+ modelUrl: this.resolvedCpuModelUrl
7465
7825
  });
7466
- logger11.info("Fallback to Wav2ArkitCpuInference successful");
7826
+ logger12.info("Fallback to Wav2ArkitCpuInference successful");
7467
7827
  }
7468
7828
  this.hasFallenBack = true;
7469
7829
  return await this.implementation.load();
@@ -7667,7 +8027,7 @@ var EmphasisDetector = class {
7667
8027
  };
7668
8028
 
7669
8029
  // src/inference/SileroVADInference.ts
7670
- var logger12 = createLogger("SileroVAD");
8030
+ var logger13 = createLogger("SileroVAD");
7671
8031
  var SileroVADInference = class {
7672
8032
  constructor(config) {
7673
8033
  this.session = null;
@@ -7741,23 +8101,23 @@ var SileroVADInference = class {
7741
8101
  "model.sample_rate": this.config.sampleRate
7742
8102
  });
7743
8103
  try {
7744
- logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
8104
+ logger13.info("Loading ONNX Runtime...", { preference: this.config.backend });
7745
8105
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7746
8106
  this.ort = ort;
7747
8107
  this._backend = backend;
7748
- logger12.info("ONNX Runtime loaded", { backend: this._backend });
8108
+ logger13.info("ONNX Runtime loaded", { backend: this._backend });
7749
8109
  const cache = getModelCache();
7750
8110
  const modelUrl = this.config.modelUrl;
7751
8111
  const isCached = await cache.has(modelUrl);
7752
8112
  let modelBuffer;
7753
8113
  if (isCached) {
7754
- logger12.debug("Loading model from cache", { modelUrl });
8114
+ logger13.debug("Loading model from cache", { modelUrl });
7755
8115
  modelBuffer = await cache.get(modelUrl);
7756
8116
  } else {
7757
- logger12.debug("Fetching and caching model", { modelUrl });
8117
+ logger13.debug("Fetching and caching model", { modelUrl });
7758
8118
  modelBuffer = await fetchWithCache(modelUrl);
7759
8119
  }
7760
- logger12.debug("Creating ONNX session", {
8120
+ logger13.debug("Creating ONNX session", {
7761
8121
  size: formatBytes(modelBuffer.byteLength),
7762
8122
  backend: this._backend
7763
8123
  });
@@ -7766,7 +8126,7 @@ var SileroVADInference = class {
7766
8126
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7767
8127
  this.reset();
7768
8128
  const loadTimeMs = performance.now() - startTime;
7769
- logger12.info("Model loaded successfully", {
8129
+ logger13.info("Model loaded successfully", {
7770
8130
  backend: this._backend,
7771
8131
  loadTimeMs: Math.round(loadTimeMs),
7772
8132
  sampleRate: this.config.sampleRate,
@@ -7821,7 +8181,7 @@ var SileroVADInference = class {
7821
8181
  []
7822
8182
  );
7823
8183
  } catch (e) {
7824
- logger12.warn("BigInt64Array not available, using bigint array fallback", {
8184
+ logger13.warn("BigInt64Array not available, using bigint array fallback", {
7825
8185
  error: e instanceof Error ? e.message : String(e)
7826
8186
  });
7827
8187
  this.srTensor = new this.ort.Tensor(
@@ -7927,7 +8287,7 @@ var SileroVADInference = class {
7927
8287
  this.preSpeechBuffer.shift();
7928
8288
  }
7929
8289
  }
7930
- logger12.trace("Skipping VAD inference - audio too quiet", {
8290
+ logger13.trace("Skipping VAD inference - audio too quiet", {
7931
8291
  rms: Math.round(rms * 1e4) / 1e4,
7932
8292
  threshold: MIN_ENERGY_THRESHOLD
7933
8293
  });
@@ -7981,7 +8341,7 @@ var SileroVADInference = class {
7981
8341
  if (isSpeech && !this.wasSpeaking) {
7982
8342
  preSpeechChunks = [...this.preSpeechBuffer];
7983
8343
  this.preSpeechBuffer = [];
7984
- logger12.debug("Speech started with pre-speech buffer", {
8344
+ logger13.debug("Speech started with pre-speech buffer", {
7985
8345
  preSpeechChunks: preSpeechChunks.length,
7986
8346
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
7987
8347
  });
@@ -7994,7 +8354,7 @@ var SileroVADInference = class {
7994
8354
  this.preSpeechBuffer = [];
7995
8355
  }
7996
8356
  this.wasSpeaking = isSpeech;
7997
- logger12.trace("VAD inference completed", {
8357
+ logger13.trace("VAD inference completed", {
7998
8358
  probability: Math.round(probability * 1e3) / 1e3,
7999
8359
  isSpeech,
8000
8360
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -8025,7 +8385,7 @@ var SileroVADInference = class {
8025
8385
  const oomError = new Error(
8026
8386
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
8027
8387
  );
8028
- logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8388
+ logger13.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8029
8389
  pointer: `0x${err.toString(16)}`,
8030
8390
  backend: this._backend
8031
8391
  });
@@ -8068,7 +8428,7 @@ var SileroVADInference = class {
8068
8428
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
8069
8429
 
8070
8430
  // src/inference/SileroVADWorker.ts
8071
- var logger13 = createLogger("SileroVADWorker");
8431
+ var logger14 = createLogger("SileroVADWorker");
8072
8432
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
8073
8433
  var LOAD_TIMEOUT_MS3 = 12e4;
8074
8434
  var INFERENCE_TIMEOUT_MS3 = 1e3;
@@ -8353,7 +8713,7 @@ var SileroVADWorker = class {
8353
8713
  this.handleWorkerMessage(event.data);
8354
8714
  };
8355
8715
  worker.onerror = (error) => {
8356
- logger13.error("Worker error", { error: error.message });
8716
+ logger14.error("Worker error", { error: error.message });
8357
8717
  for (const [, resolver] of this.pendingResolvers) {
8358
8718
  resolver.reject(new Error(`Worker error: ${error.message}`));
8359
8719
  }
@@ -8429,9 +8789,9 @@ var SileroVADWorker = class {
8429
8789
  "model.sample_rate": this.config.sampleRate
8430
8790
  });
8431
8791
  try {
8432
- logger13.info("Creating VAD worker...");
8792
+ logger14.info("Creating VAD worker...");
8433
8793
  this.worker = this.createWorker();
8434
- logger13.info("Loading model in worker...", {
8794
+ logger14.info("Loading model in worker...", {
8435
8795
  modelUrl: this.config.modelUrl,
8436
8796
  sampleRate: this.config.sampleRate
8437
8797
  });
@@ -8447,7 +8807,7 @@ var SileroVADWorker = class {
8447
8807
  );
8448
8808
  this._isLoaded = true;
8449
8809
  const loadTimeMs = performance.now() - startTime;
8450
- logger13.info("VAD worker loaded successfully", {
8810
+ logger14.info("VAD worker loaded successfully", {
8451
8811
  backend: "wasm",
8452
8812
  loadTimeMs: Math.round(loadTimeMs),
8453
8813
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8554,7 +8914,7 @@ var SileroVADWorker = class {
8554
8914
  if (isSpeech && !this.wasSpeaking) {
8555
8915
  preSpeechChunks = [...this.preSpeechBuffer];
8556
8916
  this.preSpeechBuffer = [];
8557
- logger13.debug("Speech started with pre-speech buffer", {
8917
+ logger14.debug("Speech started with pre-speech buffer", {
8558
8918
  preSpeechChunks: preSpeechChunks.length,
8559
8919
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8560
8920
  });
@@ -8567,7 +8927,7 @@ var SileroVADWorker = class {
8567
8927
  this.preSpeechBuffer = [];
8568
8928
  }
8569
8929
  this.wasSpeaking = isSpeech;
8570
- logger13.trace("VAD worker inference completed", {
8930
+ logger14.trace("VAD worker inference completed", {
8571
8931
  probability: Math.round(result.probability * 1e3) / 1e3,
8572
8932
  isSpeech,
8573
8933
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8635,63 +8995,65 @@ var SileroVADWorker = class {
8635
8995
  };
8636
8996
 
8637
8997
  // src/inference/createSileroVAD.ts
8638
- var logger14 = createLogger("createSileroVAD");
8998
+ var logger15 = createLogger("createSileroVAD");
8639
8999
  function supportsVADWorker() {
8640
9000
  if (typeof Worker === "undefined") {
8641
- logger14.debug("Worker not supported: Worker constructor undefined");
9001
+ logger15.debug("Worker not supported: Worker constructor undefined");
8642
9002
  return false;
8643
9003
  }
8644
9004
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8645
- logger14.debug("Worker not supported: URL.createObjectURL unavailable");
9005
+ logger15.debug("Worker not supported: URL.createObjectURL unavailable");
8646
9006
  return false;
8647
9007
  }
8648
9008
  if (typeof Blob === "undefined") {
8649
- logger14.debug("Worker not supported: Blob constructor unavailable");
9009
+ logger15.debug("Worker not supported: Blob constructor unavailable");
8650
9010
  return false;
8651
9011
  }
8652
9012
  return true;
8653
9013
  }
8654
- function createSileroVAD(config) {
9014
+ function createSileroVAD(config = {}) {
9015
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.sileroVad;
9016
+ const resolvedConfig = { ...config, modelUrl };
8655
9017
  if (config.unifiedWorker) {
8656
- logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8657
- return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
9018
+ logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
9019
+ return new SileroVADUnifiedAdapter(config.unifiedWorker, resolvedConfig);
8658
9020
  }
8659
9021
  const fallbackOnError = config.fallbackOnError ?? true;
8660
9022
  let useWorker;
8661
9023
  if (config.useWorker !== void 0) {
8662
9024
  useWorker = config.useWorker;
8663
- logger14.debug("Worker preference explicitly set", { useWorker });
9025
+ logger15.debug("Worker preference explicitly set", { useWorker });
8664
9026
  } else {
8665
9027
  const workerSupported = supportsVADWorker();
8666
9028
  const onMobile = isMobile();
8667
9029
  useWorker = workerSupported && !onMobile;
8668
- logger14.debug("Auto-detected Worker preference", {
9030
+ logger15.debug("Auto-detected Worker preference", {
8669
9031
  useWorker,
8670
9032
  workerSupported,
8671
9033
  onMobile
8672
9034
  });
8673
9035
  }
8674
9036
  if (useWorker) {
8675
- logger14.info("Creating SileroVADWorker (off-main-thread)");
9037
+ logger15.info("Creating SileroVADWorker (off-main-thread)");
8676
9038
  const worker = new SileroVADWorker({
8677
- modelUrl: config.modelUrl,
9039
+ modelUrl,
8678
9040
  sampleRate: config.sampleRate,
8679
9041
  threshold: config.threshold,
8680
9042
  preSpeechBufferChunks: config.preSpeechBufferChunks
8681
9043
  });
8682
9044
  if (fallbackOnError) {
8683
- return new VADWorkerWithFallback(worker, config);
9045
+ return new VADWorkerWithFallback(worker, resolvedConfig);
8684
9046
  }
8685
9047
  return worker;
8686
9048
  }
8687
- logger14.info("Creating SileroVADInference (main thread)");
8688
- return new SileroVADInference(config);
9049
+ logger15.info("Creating SileroVADInference (main thread)");
9050
+ return new SileroVADInference(resolvedConfig);
8689
9051
  }
8690
9052
  var VADWorkerWithFallback = class {
8691
- constructor(worker, config) {
9053
+ constructor(worker, resolvedConfig) {
8692
9054
  this.hasFallenBack = false;
8693
9055
  this.implementation = worker;
8694
- this.config = config;
9056
+ this.resolvedConfig = resolvedConfig;
8695
9057
  }
8696
9058
  get backend() {
8697
9059
  if (!this.isLoaded) return null;
@@ -8710,16 +9072,16 @@ var VADWorkerWithFallback = class {
8710
9072
  try {
8711
9073
  return await this.implementation.load();
8712
9074
  } catch (error) {
8713
- logger14.warn("Worker load failed, falling back to main thread", {
9075
+ logger15.warn("Worker load failed, falling back to main thread", {
8714
9076
  error: error instanceof Error ? error.message : String(error)
8715
9077
  });
8716
9078
  try {
8717
9079
  await this.implementation.dispose();
8718
9080
  } catch {
8719
9081
  }
8720
- this.implementation = new SileroVADInference(this.config);
9082
+ this.implementation = new SileroVADInference(this.resolvedConfig);
8721
9083
  this.hasFallenBack = true;
8722
- logger14.info("Fallback to SileroVADInference successful");
9084
+ logger15.info("Fallback to SileroVADInference successful");
8723
9085
  return await this.implementation.load();
8724
9086
  }
8725
9087
  }
@@ -8741,7 +9103,7 @@ var VADWorkerWithFallback = class {
8741
9103
  };
8742
9104
 
8743
9105
  // src/inference/A2EOrchestrator.ts
8744
- var logger15 = createLogger("A2EOrchestrator");
9106
+ var logger16 = createLogger("A2EOrchestrator");
8745
9107
  var A2EOrchestrator = class {
8746
9108
  constructor(config) {
8747
9109
  this.a2e = null;
@@ -8782,7 +9144,7 @@ var A2EOrchestrator = class {
8782
9144
  */
8783
9145
  async load() {
8784
9146
  if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8785
- logger15.info("Loading A2E model...");
9147
+ logger16.info("Loading A2E model...");
8786
9148
  this.a2e = createA2E({
8787
9149
  gpuModelUrl: this.config.gpuModelUrl,
8788
9150
  gpuExternalDataUrl: this.config.gpuExternalDataUrl,
@@ -8799,7 +9161,7 @@ var A2EOrchestrator = class {
8799
9161
  onError: this.config.onError
8800
9162
  });
8801
9163
  this._isReady = true;
8802
- logger15.info("A2E model loaded", {
9164
+ logger16.info("A2E model loaded", {
8803
9165
  backend: info.backend,
8804
9166
  loadTimeMs: info.loadTimeMs,
8805
9167
  modelId: this.a2e.modelId
@@ -8854,10 +9216,10 @@ var A2EOrchestrator = class {
8854
9216
  this.scriptProcessor.connect(this.audioContext.destination);
8855
9217
  this._isStreaming = true;
8856
9218
  this.processor.startDrip();
8857
- logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
9219
+ logger16.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8858
9220
  } catch (err) {
8859
9221
  const error = err instanceof Error ? err : new Error(String(err));
8860
- logger15.error("Failed to start mic capture", { error: error.message });
9222
+ logger16.error("Failed to start mic capture", { error: error.message });
8861
9223
  this.config.onError?.(error);
8862
9224
  throw error;
8863
9225
  }
@@ -8885,7 +9247,7 @@ var A2EOrchestrator = class {
8885
9247
  });
8886
9248
  this.audioContext = null;
8887
9249
  }
8888
- logger15.info("Mic capture stopped");
9250
+ logger16.info("Mic capture stopped");
8889
9251
  }
8890
9252
  /**
8891
9253
  * Dispose of all resources
@@ -8908,7 +9270,7 @@ var A2EOrchestrator = class {
8908
9270
  };
8909
9271
 
8910
9272
  // src/inference/SafariSpeechRecognition.ts
8911
- var logger16 = createLogger("SafariSpeech");
9273
+ var logger17 = createLogger("SafariSpeech");
8912
9274
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8913
9275
  constructor(config = {}) {
8914
9276
  this.recognition = null;
@@ -8927,7 +9289,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8927
9289
  interimResults: config.interimResults ?? true,
8928
9290
  maxAlternatives: config.maxAlternatives ?? 1
8929
9291
  };
8930
- logger16.debug("SafariSpeechRecognition created", {
9292
+ logger17.debug("SafariSpeechRecognition created", {
8931
9293
  language: this.config.language,
8932
9294
  continuous: this.config.continuous
8933
9295
  });
@@ -8988,7 +9350,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8988
9350
  */
8989
9351
  async start() {
8990
9352
  if (this.isListening) {
8991
- logger16.warn("Already listening");
9353
+ logger17.warn("Already listening");
8992
9354
  return;
8993
9355
  }
8994
9356
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -9018,7 +9380,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9018
9380
  this.isListening = true;
9019
9381
  this.startTime = performance.now();
9020
9382
  this.accumulatedText = "";
9021
- logger16.info("Speech recognition started", {
9383
+ logger17.info("Speech recognition started", {
9022
9384
  language: this.config.language
9023
9385
  });
9024
9386
  span?.end();
@@ -9033,7 +9395,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9033
9395
  */
9034
9396
  async stop() {
9035
9397
  if (!this.isListening || !this.recognition) {
9036
- logger16.warn("Not currently listening");
9398
+ logger17.warn("Not currently listening");
9037
9399
  return {
9038
9400
  text: this.accumulatedText,
9039
9401
  language: this.config.language,
@@ -9062,7 +9424,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9062
9424
  if (this.recognition && this.isListening) {
9063
9425
  this.recognition.abort();
9064
9426
  this.isListening = false;
9065
- logger16.info("Speech recognition aborted");
9427
+ logger17.info("Speech recognition aborted");
9066
9428
  }
9067
9429
  }
9068
9430
  /**
@@ -9093,7 +9455,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9093
9455
  this.isListening = false;
9094
9456
  this.resultCallbacks = [];
9095
9457
  this.errorCallbacks = [];
9096
- logger16.debug("SafariSpeechRecognition disposed");
9458
+ logger17.debug("SafariSpeechRecognition disposed");
9097
9459
  }
9098
9460
  /**
9099
9461
  * Set up event handlers for the recognition instance
@@ -9121,7 +9483,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9121
9483
  confidence: alternative.confidence
9122
9484
  };
9123
9485
  this.emitResult(speechResult);
9124
- logger16.trace("Speech result", {
9486
+ logger17.trace("Speech result", {
9125
9487
  text: text.substring(0, 50),
9126
9488
  isFinal,
9127
9489
  confidence: alternative.confidence
@@ -9131,12 +9493,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9131
9493
  span?.end();
9132
9494
  } catch (error) {
9133
9495
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
9134
- logger16.error("Error processing speech result", { error });
9496
+ logger17.error("Error processing speech result", { error });
9135
9497
  }
9136
9498
  };
9137
9499
  this.recognition.onerror = (event) => {
9138
9500
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
9139
- logger16.error("Speech recognition error", { error: event.error, message: event.message });
9501
+ logger17.error("Speech recognition error", { error: event.error, message: event.message });
9140
9502
  this.emitError(error);
9141
9503
  if (this.stopRejecter) {
9142
9504
  this.stopRejecter(error);
@@ -9146,7 +9508,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9146
9508
  };
9147
9509
  this.recognition.onend = () => {
9148
9510
  this.isListening = false;
9149
- logger16.info("Speech recognition ended", {
9511
+ logger17.info("Speech recognition ended", {
9150
9512
  totalText: this.accumulatedText.length,
9151
9513
  durationMs: performance.now() - this.startTime
9152
9514
  });
@@ -9163,13 +9525,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9163
9525
  }
9164
9526
  };
9165
9527
  this.recognition.onstart = () => {
9166
- logger16.debug("Speech recognition started by browser");
9528
+ logger17.debug("Speech recognition started by browser");
9167
9529
  };
9168
9530
  this.recognition.onspeechstart = () => {
9169
- logger16.debug("Speech detected");
9531
+ logger17.debug("Speech detected");
9170
9532
  };
9171
9533
  this.recognition.onspeechend = () => {
9172
- logger16.debug("Speech ended");
9534
+ logger17.debug("Speech ended");
9173
9535
  };
9174
9536
  }
9175
9537
  /**
@@ -9180,7 +9542,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9180
9542
  try {
9181
9543
  callback(result);
9182
9544
  } catch (error) {
9183
- logger16.error("Error in result callback", { error });
9545
+ logger17.error("Error in result callback", { error });
9184
9546
  }
9185
9547
  }
9186
9548
  }
@@ -9192,7 +9554,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9192
9554
  try {
9193
9555
  callback(error);
9194
9556
  } catch (callbackError) {
9195
- logger16.error("Error in error callback", { error: callbackError });
9557
+ logger17.error("Error in error callback", { error: callbackError });
9196
9558
  }
9197
9559
  }
9198
9560
  }
@@ -9762,338 +10124,32 @@ var AnimationGraph = class extends EventEmitter {
9762
10124
  }
9763
10125
  };
9764
10126
 
9765
- // src/animation/simplex2d.ts
9766
- var perm = new Uint8Array(512);
9767
- var grad2 = [
9768
- [1, 1],
9769
- [-1, 1],
9770
- [1, -1],
9771
- [-1, -1],
9772
- [1, 0],
9773
- [-1, 0],
9774
- [0, 1],
9775
- [0, -1]
9776
- ];
9777
- var p = [
9778
- 151,
9779
- 160,
9780
- 137,
9781
- 91,
9782
- 90,
9783
- 15,
9784
- 131,
9785
- 13,
9786
- 201,
9787
- 95,
9788
- 96,
9789
- 53,
9790
- 194,
9791
- 233,
9792
- 7,
9793
- 225,
9794
- 140,
9795
- 36,
9796
- 103,
9797
- 30,
9798
- 69,
9799
- 142,
9800
- 8,
9801
- 99,
9802
- 37,
9803
- 240,
9804
- 21,
9805
- 10,
9806
- 23,
9807
- 190,
9808
- 6,
9809
- 148,
9810
- 247,
9811
- 120,
9812
- 234,
9813
- 75,
9814
- 0,
9815
- 26,
9816
- 197,
9817
- 62,
9818
- 94,
9819
- 252,
9820
- 219,
9821
- 203,
9822
- 117,
9823
- 35,
9824
- 11,
9825
- 32,
9826
- 57,
9827
- 177,
9828
- 33,
9829
- 88,
9830
- 237,
9831
- 149,
9832
- 56,
9833
- 87,
9834
- 174,
9835
- 20,
9836
- 125,
9837
- 136,
9838
- 171,
9839
- 168,
9840
- 68,
9841
- 175,
9842
- 74,
9843
- 165,
9844
- 71,
9845
- 134,
9846
- 139,
9847
- 48,
9848
- 27,
9849
- 166,
9850
- 77,
9851
- 146,
9852
- 158,
9853
- 231,
9854
- 83,
9855
- 111,
9856
- 229,
9857
- 122,
9858
- 60,
9859
- 211,
9860
- 133,
9861
- 230,
9862
- 220,
9863
- 105,
9864
- 92,
9865
- 41,
9866
- 55,
9867
- 46,
9868
- 245,
9869
- 40,
9870
- 244,
9871
- 102,
9872
- 143,
9873
- 54,
9874
- 65,
9875
- 25,
9876
- 63,
9877
- 161,
9878
- 1,
9879
- 216,
9880
- 80,
9881
- 73,
9882
- 209,
9883
- 76,
9884
- 132,
9885
- 187,
9886
- 208,
9887
- 89,
9888
- 18,
9889
- 169,
9890
- 200,
9891
- 196,
9892
- 135,
9893
- 130,
9894
- 116,
9895
- 188,
9896
- 159,
9897
- 86,
9898
- 164,
9899
- 100,
9900
- 109,
9901
- 198,
9902
- 173,
9903
- 186,
9904
- 3,
9905
- 64,
9906
- 52,
9907
- 217,
9908
- 226,
9909
- 250,
9910
- 124,
9911
- 123,
9912
- 5,
9913
- 202,
9914
- 38,
9915
- 147,
9916
- 118,
9917
- 126,
9918
- 255,
9919
- 82,
9920
- 85,
9921
- 212,
9922
- 207,
9923
- 206,
9924
- 59,
9925
- 227,
9926
- 47,
9927
- 16,
9928
- 58,
9929
- 17,
9930
- 182,
9931
- 189,
9932
- 28,
9933
- 42,
9934
- 223,
9935
- 183,
9936
- 170,
9937
- 213,
9938
- 119,
9939
- 248,
9940
- 152,
9941
- 2,
9942
- 44,
9943
- 154,
9944
- 163,
9945
- 70,
9946
- 221,
9947
- 153,
9948
- 101,
9949
- 155,
9950
- 167,
9951
- 43,
9952
- 172,
9953
- 9,
9954
- 129,
9955
- 22,
9956
- 39,
9957
- 253,
9958
- 19,
9959
- 98,
9960
- 108,
9961
- 110,
9962
- 79,
9963
- 113,
9964
- 224,
9965
- 232,
9966
- 178,
9967
- 185,
9968
- 112,
9969
- 104,
9970
- 218,
9971
- 246,
9972
- 97,
9973
- 228,
9974
- 251,
9975
- 34,
9976
- 242,
9977
- 193,
9978
- 238,
9979
- 210,
9980
- 144,
9981
- 12,
9982
- 191,
9983
- 179,
9984
- 162,
9985
- 241,
9986
- 81,
9987
- 51,
9988
- 145,
9989
- 235,
9990
- 249,
9991
- 14,
9992
- 239,
9993
- 107,
9994
- 49,
9995
- 192,
9996
- 214,
9997
- 31,
9998
- 181,
9999
- 199,
10000
- 106,
10001
- 157,
10002
- 184,
10003
- 84,
10004
- 204,
10005
- 176,
10006
- 115,
10007
- 121,
10008
- 50,
10009
- 45,
10010
- 127,
10011
- 4,
10012
- 150,
10013
- 254,
10014
- 138,
10015
- 236,
10016
- 205,
10017
- 93,
10018
- 222,
10019
- 114,
10020
- 67,
10021
- 29,
10022
- 24,
10023
- 72,
10024
- 243,
10025
- 141,
10026
- 128,
10027
- 195,
10028
- 78,
10029
- 66,
10030
- 215,
10031
- 61,
10032
- 156,
10033
- 180
10034
- ];
10035
- for (let i = 0; i < 256; i++) {
10036
- perm[i] = p[i];
10037
- perm[i + 256] = p[i];
10038
- }
10039
- var F2 = 0.5 * (Math.sqrt(3) - 1);
10040
- var G2 = (3 - Math.sqrt(3)) / 6;
10041
- function dot2(g, x, y) {
10042
- return g[0] * x + g[1] * y;
10043
- }
10044
- function simplex2d(x, y) {
10045
- const s = (x + y) * F2;
10046
- const i = Math.floor(x + s);
10047
- const j = Math.floor(y + s);
10048
- const t = (i + j) * G2;
10049
- const X0 = i - t;
10050
- const Y0 = j - t;
10051
- const x0 = x - X0;
10052
- const y0 = y - Y0;
10053
- const i1 = x0 > y0 ? 1 : 0;
10054
- const j1 = x0 > y0 ? 0 : 1;
10055
- const x1 = x0 - i1 + G2;
10056
- const y1 = y0 - j1 + G2;
10057
- const x2 = x0 - 1 + 2 * G2;
10058
- const y2 = y0 - 1 + 2 * G2;
10059
- const ii = i & 255;
10060
- const jj = j & 255;
10061
- const gi0 = perm[ii + perm[jj]] % 8;
10062
- const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
10063
- const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
10064
- let n0 = 0;
10065
- let t0 = 0.5 - x0 * x0 - y0 * y0;
10066
- if (t0 >= 0) {
10067
- t0 *= t0;
10068
- n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
10069
- }
10070
- let n1 = 0;
10071
- let t1 = 0.5 - x1 * x1 - y1 * y1;
10072
- if (t1 >= 0) {
10073
- t1 *= t1;
10074
- n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
10075
- }
10076
- let n2 = 0;
10077
- let t2 = 0.5 - x2 * x2 - y2 * y2;
10078
- if (t2 >= 0) {
10079
- t2 *= t2;
10080
- n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
10081
- }
10082
- return 70 * (n0 + n1 + n2);
10083
- }
10084
-
10085
10127
  // src/animation/ProceduralLifeLayer.ts
10128
+ var import_simplex_noise = require("simplex-noise");
10129
+ var simplex2d = (0, import_simplex_noise.createNoise2D)();
10130
+ var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
10131
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10132
+ LIFE_BS_INDEX.set(LAM_BLENDSHAPES[i], i);
10133
+ }
10086
10134
  var PHASE_OPEN = 0;
10087
10135
  var PHASE_CLOSING = 1;
10088
10136
  var PHASE_CLOSED = 2;
10089
10137
  var PHASE_OPENING = 3;
10090
- var BLINK_CLOSE_DURATION = 0.06;
10138
+ var BLINK_CLOSE_DURATION = 0.092;
10091
10139
  var BLINK_HOLD_DURATION = 0.04;
10092
- var BLINK_OPEN_DURATION = 0.15;
10140
+ var BLINK_OPEN_DURATION = 0.242;
10093
10141
  var BLINK_ASYMMETRY_DELAY = 8e-3;
10142
+ var BLINK_IBI_MU = Math.log(5.97);
10143
+ var BLINK_IBI_SIGMA = 0.89;
10094
10144
  var GAZE_BREAK_DURATION = 0.12;
10095
10145
  var GAZE_BREAK_HOLD_DURATION = 0.3;
10096
10146
  var GAZE_BREAK_RETURN_DURATION = 0.15;
10147
+ var GAZE_STATE_PARAMS = {
10148
+ idle: { interval: [2, 5], amplitude: [0.15, 0.4] },
10149
+ listening: { interval: [4, 10], amplitude: [0.1, 0.25] },
10150
+ thinking: { interval: [1, 3], amplitude: [0.2, 0.5] },
10151
+ speaking: { interval: [2, 6], amplitude: [0.15, 0.35] }
10152
+ };
10097
10153
  var EYE_NOISE_X_FREQ = 0.8;
10098
10154
  var EYE_NOISE_Y_FREQ = 0.6;
10099
10155
  var EYE_NOISE_X_PHASE = 73.1;
@@ -10121,6 +10177,12 @@ function smoothStep(t) {
10121
10177
  function softClamp(v, max) {
10122
10178
  return Math.tanh(v / max) * max;
10123
10179
  }
10180
+ function sampleLogNormal(mu, sigma) {
10181
+ const u1 = Math.random();
10182
+ const u2 = Math.random();
10183
+ const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2);
10184
+ return Math.exp(mu + sigma * z);
10185
+ }
10124
10186
  var ProceduralLifeLayer = class {
10125
10187
  constructor(config) {
10126
10188
  // Blink state
@@ -10133,7 +10195,7 @@ var ProceduralLifeLayer = class {
10133
10195
  // Eye contact (smoothed)
10134
10196
  this.smoothedEyeX = 0;
10135
10197
  this.smoothedEyeY = 0;
10136
- // Eye micro-motion (continuous simplex noise, no discrete events)
10198
+ // Eye micro-motion
10137
10199
  this.eyeNoiseTime = 0;
10138
10200
  // Gaze break state
10139
10201
  this.gazeBreakTimer = 0;
@@ -10143,6 +10205,8 @@ var ProceduralLifeLayer = class {
10143
10205
  this.gazeBreakTargetY = 0;
10144
10206
  this.gazeBreakCurrentX = 0;
10145
10207
  this.gazeBreakCurrentY = 0;
10208
+ // Conversational state for gaze
10209
+ this.currentState = null;
10146
10210
  // Breathing / postural sway
10147
10211
  this.microMotionTime = 0;
10148
10212
  this.breathingPhase = 0;
@@ -10151,6 +10215,7 @@ var ProceduralLifeLayer = class {
10151
10215
  this.previousEnergy = 0;
10152
10216
  this.emphasisLevel = 0;
10153
10217
  this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
10218
+ this.useLogNormalBlinks = !config?.blinkIntervalRange;
10154
10219
  this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
10155
10220
  this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
10156
10221
  this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
@@ -10160,7 +10225,7 @@ var ProceduralLifeLayer = class {
10160
10225
  this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
10161
10226
  this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
10162
10227
  this.eyeSmoothing = config?.eyeSmoothing ?? 15;
10163
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
10228
+ this.blinkInterval = this.nextBlinkInterval();
10164
10229
  this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
10165
10230
  }
10166
10231
  /**
@@ -10175,6 +10240,7 @@ var ProceduralLifeLayer = class {
10175
10240
  const eyeTargetY = input?.eyeTargetY ?? 0;
10176
10241
  const audioEnergy = input?.audioEnergy ?? 0;
10177
10242
  const isSpeaking = input?.isSpeaking ?? false;
10243
+ this.currentState = input?.state ?? null;
10178
10244
  const safeDelta = Math.min(delta, 0.1);
10179
10245
  const blendshapes = {};
10180
10246
  this.updateBlinks(delta);
@@ -10213,6 +10279,12 @@ var ProceduralLifeLayer = class {
10213
10279
  const swayAmp = this.posturalSwayAmplitude;
10214
10280
  const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
10215
10281
  const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
10282
+ const breathVal = Math.sin(this.breathingPhase);
10283
+ if (breathVal > 0) {
10284
+ blendshapes["jawOpen"] = breathVal * 0.015;
10285
+ blendshapes["noseSneerLeft"] = breathVal * 8e-3;
10286
+ blendshapes["noseSneerRight"] = breathVal * 8e-3;
10287
+ }
10216
10288
  return {
10217
10289
  blendshapes,
10218
10290
  headDelta: {
@@ -10221,12 +10293,35 @@ var ProceduralLifeLayer = class {
10221
10293
  }
10222
10294
  };
10223
10295
  }
10296
+ /**
10297
+ * Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
10298
+ *
10299
+ * Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
10300
+ * break uncanny stillness on undriven channels.
10301
+ *
10302
+ * @param delta - Time since last frame in seconds
10303
+ * @param input - Per-frame input
10304
+ * @param out - Pre-allocated Float32Array(52) to write into
10305
+ */
10306
+ updateToArray(delta, input, out) {
10307
+ out.fill(0);
10308
+ const result = this.update(delta, input);
10309
+ for (const [name, value] of Object.entries(result.blendshapes)) {
10310
+ const idx = LIFE_BS_INDEX.get(name);
10311
+ if (idx !== void 0) {
10312
+ out[idx] = value;
10313
+ }
10314
+ }
10315
+ for (let i = 0; i < 52; i++) {
10316
+ out[i] += simplex2d(this.noiseTime * 0.3, i * 7.13) * 4e-3;
10317
+ }
10318
+ }
10224
10319
  /**
10225
10320
  * Reset all internal state to initial values.
10226
10321
  */
10227
10322
  reset() {
10228
10323
  this.blinkTimer = 0;
10229
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
10324
+ this.blinkInterval = this.nextBlinkInterval();
10230
10325
  this.blinkPhase = PHASE_OPEN;
10231
10326
  this.blinkProgress = 0;
10232
10327
  this.asymmetryRight = 0.97;
@@ -10243,6 +10338,7 @@ var ProceduralLifeLayer = class {
10243
10338
  this.gazeBreakTargetY = 0;
10244
10339
  this.gazeBreakCurrentX = 0;
10245
10340
  this.gazeBreakCurrentY = 0;
10341
+ this.currentState = null;
10246
10342
  this.microMotionTime = 0;
10247
10343
  this.breathingPhase = 0;
10248
10344
  this.noiseTime = 0;
@@ -10250,6 +10346,21 @@ var ProceduralLifeLayer = class {
10250
10346
  this.emphasisLevel = 0;
10251
10347
  }
10252
10348
  // =====================================================================
10349
+ // PRIVATE: Blink interval sampling
10350
+ // =====================================================================
10351
+ /**
10352
+ * Sample next blink interval.
10353
+ * Uses log-normal distribution (PMC3565584) when using default config,
10354
+ * or uniform random when custom blinkIntervalRange is provided.
10355
+ */
10356
+ nextBlinkInterval() {
10357
+ if (this.useLogNormalBlinks) {
10358
+ const sample = sampleLogNormal(BLINK_IBI_MU, BLINK_IBI_SIGMA);
10359
+ return clamp(sample, 1.5, 12);
10360
+ }
10361
+ return randomRange(...this.blinkIntervalRange);
10362
+ }
10363
+ // =====================================================================
10253
10364
  // PRIVATE: Blink system
10254
10365
  // =====================================================================
10255
10366
  updateBlinks(delta) {
@@ -10258,7 +10369,7 @@ var ProceduralLifeLayer = class {
10258
10369
  this.blinkPhase = PHASE_CLOSING;
10259
10370
  this.blinkProgress = 0;
10260
10371
  this.blinkTimer = 0;
10261
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
10372
+ this.blinkInterval = this.nextBlinkInterval();
10262
10373
  this.asymmetryRight = 0.95 + Math.random() * 0.08;
10263
10374
  }
10264
10375
  if (this.blinkPhase > PHASE_OPEN) {
@@ -10314,18 +10425,32 @@ var ProceduralLifeLayer = class {
10314
10425
  return { x, y };
10315
10426
  }
10316
10427
  // =====================================================================
10317
- // PRIVATE: Gaze breaks
10428
+ // PRIVATE: Gaze breaks (state-dependent)
10318
10429
  // =====================================================================
10430
+ /**
10431
+ * Get active gaze parameters — uses state-dependent params when
10432
+ * conversational state is provided, otherwise falls back to config ranges.
10433
+ */
10434
+ getActiveGazeParams() {
10435
+ if (this.currentState && GAZE_STATE_PARAMS[this.currentState]) {
10436
+ return GAZE_STATE_PARAMS[this.currentState];
10437
+ }
10438
+ return {
10439
+ interval: this.gazeBreakIntervalRange,
10440
+ amplitude: this.gazeBreakAmplitudeRange
10441
+ };
10442
+ }
10319
10443
  updateGazeBreaks(delta) {
10320
10444
  this.gazeBreakTimer += delta;
10321
10445
  if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
10322
10446
  this.gazeBreakPhase = PHASE_CLOSING;
10323
10447
  this.gazeBreakProgress = 0;
10324
10448
  this.gazeBreakTimer = 0;
10325
- const amp = randomRange(...this.gazeBreakAmplitudeRange);
10449
+ const params = this.getActiveGazeParams();
10450
+ const amp = randomRange(...params.amplitude);
10326
10451
  this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
10327
10452
  this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
10328
- this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
10453
+ this.gazeBreakInterval = randomRange(...params.interval);
10329
10454
  }
10330
10455
  if (this.gazeBreakPhase > PHASE_OPEN) {
10331
10456
  this.gazeBreakProgress += delta;
@@ -10390,6 +10515,971 @@ var ProceduralLifeLayer = class {
10390
10515
  }
10391
10516
  };
10392
10517
 
10518
+ // src/face/FACSMapping.ts
10519
+ var EMOTION_TO_AU = {
10520
+ joy: [
10521
+ { au: "AU6", intensity: 0.7, region: "upper" },
10522
+ // cheek raise (Duchenne)
10523
+ { au: "AU12", intensity: 0.8, region: "lower" }
10524
+ // lip corner pull (smile)
10525
+ ],
10526
+ anger: [
10527
+ { au: "AU4", intensity: 0.8, region: "upper" },
10528
+ // brow lower
10529
+ { au: "AU5", intensity: 0.4, region: "upper" },
10530
+ // upper lid raise
10531
+ { au: "AU7", intensity: 0.3, region: "upper" },
10532
+ // lid tighten
10533
+ { au: "AU23", intensity: 0.6, region: "lower" }
10534
+ // lip tighten
10535
+ ],
10536
+ sadness: [
10537
+ { au: "AU1", intensity: 0.7, region: "upper" },
10538
+ // inner brow raise
10539
+ { au: "AU4", intensity: 0.3, region: "upper" },
10540
+ // brow lower (furrow)
10541
+ { au: "AU15", intensity: 0.5, region: "lower" }
10542
+ // lip corner depress
10543
+ ],
10544
+ fear: [
10545
+ { au: "AU1", intensity: 0.6, region: "upper" },
10546
+ // inner brow raise
10547
+ { au: "AU2", intensity: 0.5, region: "upper" },
10548
+ // outer brow raise
10549
+ { au: "AU4", intensity: 0.3, region: "upper" },
10550
+ // brow lower
10551
+ { au: "AU5", intensity: 0.5, region: "upper" },
10552
+ // upper lid raise
10553
+ { au: "AU20", intensity: 0.4, region: "lower" }
10554
+ // lip stretch
10555
+ ],
10556
+ disgust: [
10557
+ { au: "AU9", intensity: 0.7, region: "upper" },
10558
+ // nose wrinkle
10559
+ { au: "AU10", intensity: 0.5, region: "lower" },
10560
+ // upper lip raise
10561
+ { au: "AU15", intensity: 0.4, region: "lower" }
10562
+ // lip corner depress
10563
+ ],
10564
+ amazement: [
10565
+ { au: "AU1", intensity: 0.6, region: "upper" },
10566
+ // inner brow raise
10567
+ { au: "AU2", intensity: 0.7, region: "upper" },
10568
+ // outer brow raise
10569
+ { au: "AU5", intensity: 0.6, region: "upper" },
10570
+ // upper lid raise
10571
+ { au: "AU26", intensity: 0.4, region: "lower" }
10572
+ // jaw drop
10573
+ ],
10574
+ grief: [
10575
+ { au: "AU1", intensity: 0.8, region: "upper" },
10576
+ // inner brow raise
10577
+ { au: "AU4", intensity: 0.5, region: "upper" },
10578
+ // brow lower
10579
+ { au: "AU6", intensity: 0.3, region: "upper" },
10580
+ // cheek raise (grief cry)
10581
+ { au: "AU15", intensity: 0.6, region: "lower" }
10582
+ // lip corner depress
10583
+ ],
10584
+ cheekiness: [
10585
+ { au: "AU2", intensity: 0.4, region: "upper" },
10586
+ // outer brow raise
10587
+ { au: "AU6", intensity: 0.4, region: "upper" },
10588
+ // cheek raise
10589
+ { au: "AU12", intensity: 0.6, region: "lower" }
10590
+ // lip corner pull (smirk)
10591
+ ],
10592
+ pain: [
10593
+ { au: "AU4", intensity: 0.7, region: "upper" },
10594
+ // brow lower
10595
+ { au: "AU6", intensity: 0.4, region: "upper" },
10596
+ // cheek raise (orbicularis)
10597
+ { au: "AU7", intensity: 0.7, region: "upper" },
10598
+ // lid tighten (squint)
10599
+ { au: "AU9", intensity: 0.5, region: "upper" }
10600
+ // nose wrinkle
10601
+ ],
10602
+ outofbreath: [
10603
+ { au: "AU1", intensity: 0.3, region: "upper" },
10604
+ // inner brow raise
10605
+ { au: "AU25", intensity: 0.3, region: "lower" },
10606
+ // lips part
10607
+ { au: "AU26", intensity: 0.5, region: "lower" }
10608
+ // jaw drop
10609
+ ]
10610
+ };
10611
+ var AU_TO_ARKIT = {
10612
+ "AU1": [{ blendshape: "browInnerUp", weight: 1 }],
10613
+ "AU2": [{ blendshape: "browOuterUpLeft", weight: 1 }, { blendshape: "browOuterUpRight", weight: 1 }],
10614
+ "AU4": [{ blendshape: "browDownLeft", weight: 1 }, { blendshape: "browDownRight", weight: 1 }],
10615
+ "AU5": [{ blendshape: "eyeWideLeft", weight: 1 }, { blendshape: "eyeWideRight", weight: 1 }],
10616
+ "AU6": [{ blendshape: "cheekSquintLeft", weight: 1 }, { blendshape: "cheekSquintRight", weight: 1 }],
10617
+ "AU7": [{ blendshape: "eyeSquintLeft", weight: 1 }, { blendshape: "eyeSquintRight", weight: 1 }],
10618
+ "AU9": [{ blendshape: "noseSneerLeft", weight: 1 }, { blendshape: "noseSneerRight", weight: 1 }],
10619
+ "AU10": [{ blendshape: "mouthUpperUpLeft", weight: 1 }, { blendshape: "mouthUpperUpRight", weight: 1 }],
10620
+ "AU12": [{ blendshape: "mouthSmileLeft", weight: 1 }, { blendshape: "mouthSmileRight", weight: 1 }],
10621
+ "AU15": [{ blendshape: "mouthFrownLeft", weight: 1 }, { blendshape: "mouthFrownRight", weight: 1 }],
10622
+ "AU20": [{ blendshape: "mouthStretchLeft", weight: 1 }, { blendshape: "mouthStretchRight", weight: 1 }],
10623
+ "AU23": [{ blendshape: "mouthPressLeft", weight: 1 }, { blendshape: "mouthPressRight", weight: 1 }],
10624
+ "AU25": [{ blendshape: "jawOpen", weight: 0.3 }],
10625
+ "AU26": [{ blendshape: "jawOpen", weight: 1 }]
10626
+ };
10627
+ var ALL_AUS = [...new Set(
10628
+ Object.values(EMOTION_TO_AU).flatMap((activations) => activations.map((a) => a.au))
10629
+ )];
10630
+
10631
+ // src/face/EmotionResolver.ts
10632
+ var BS_INDEX = /* @__PURE__ */ new Map();
10633
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10634
+ BS_INDEX.set(LAM_BLENDSHAPES[i], i);
10635
+ }
10636
+ var EmotionResolver = class {
10637
+ constructor() {
10638
+ this.upperBuffer = new Float32Array(52);
10639
+ this.lowerBuffer = new Float32Array(52);
10640
+ }
10641
+ /**
10642
+ * Resolve emotion weights to upper/lower face blendshape contributions.
10643
+ *
10644
+ * @param weights - Emotion channel weights from EmotionController
10645
+ * @param intensity - Global intensity multiplier (0-2). Default: 1.0
10646
+ * @returns Upper and lower face blendshape arrays (52 channels each)
10647
+ */
10648
+ resolve(weights, intensity = 1) {
10649
+ const upper = this.upperBuffer;
10650
+ const lower = this.lowerBuffer;
10651
+ upper.fill(0);
10652
+ lower.fill(0);
10653
+ for (const emotionName of EMOTION_NAMES) {
10654
+ const emotionWeight = weights[emotionName];
10655
+ if (!emotionWeight || emotionWeight < 0.01) continue;
10656
+ const auActivations = EMOTION_TO_AU[emotionName];
10657
+ if (!auActivations) continue;
10658
+ for (const activation of auActivations) {
10659
+ const arkitMappings = AU_TO_ARKIT[activation.au];
10660
+ if (!arkitMappings) continue;
10661
+ const target = activation.region === "upper" ? upper : lower;
10662
+ const scale = emotionWeight * activation.intensity * intensity;
10663
+ for (const mapping of arkitMappings) {
10664
+ const idx = BS_INDEX.get(mapping.blendshape);
10665
+ if (idx !== void 0) {
10666
+ target[idx] += mapping.weight * scale;
10667
+ }
10668
+ }
10669
+ }
10670
+ }
10671
+ for (let i = 0; i < 52; i++) {
10672
+ if (upper[i] > 1) upper[i] = 1;
10673
+ if (lower[i] > 1) lower[i] = 1;
10674
+ }
10675
+ return {
10676
+ upper: new Float32Array(upper),
10677
+ lower: new Float32Array(lower)
10678
+ };
10679
+ }
10680
+ };
10681
+
10682
+ // src/face/FaceCompositor.ts
10683
+ function smoothstep(t) {
10684
+ return t * t * (3 - 2 * t);
10685
+ }
10686
+ var BS_INDEX2 = /* @__PURE__ */ new Map();
10687
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10688
+ BS_INDEX2.set(LAM_BLENDSHAPES[i], i);
10689
+ }
10690
+ var IDX_MOUTH_CLOSE = BS_INDEX2.get("mouthClose");
10691
+ var IS_EYE_CHANNEL = new Array(52).fill(false);
10692
+ for (const name of LAM_BLENDSHAPES) {
10693
+ if (name.startsWith("eyeBlink") || name.startsWith("eyeLook")) {
10694
+ IS_EYE_CHANNEL[BS_INDEX2.get(name)] = true;
10695
+ }
10696
+ }
10697
+ var FaceCompositor = class {
10698
+ constructor(config) {
10699
+ this.emotionResolver = new EmotionResolver();
10700
+ // Pre-allocated buffers
10701
+ this.smoothedUpper = new Float32Array(52);
10702
+ this.smoothedLower = new Float32Array(52);
10703
+ this.lifeBuffer = new Float32Array(52);
10704
+ // Profile arrays (pre-expanded to 52 channels)
10705
+ this.multiplier = new Float32Array(52).fill(1);
10706
+ this.offset = new Float32Array(52);
10707
+ this.lifeLayer = config?.lifeLayer ?? new ProceduralLifeLayer();
10708
+ this.emotionSmoothing = config?.emotionSmoothing ?? 0.12;
10709
+ if (config?.profile) {
10710
+ this.applyProfileArrays(config.profile);
10711
+ }
10712
+ }
10713
+ /**
10714
+ * Compose a single output frame from the 5-stage signal chain.
10715
+ *
10716
+ * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
10717
+ * @param input - Per-frame input (deltaTime, emotion, life layer params)
10718
+ * @returns Float32Array[52] with all values clamped to [0, 1]
10719
+ */
10720
+ compose(base, input) {
10721
+ const out = new Float32Array(52);
10722
+ out.set(base);
10723
+ const emotion = input.emotion ?? this.stickyEmotion;
10724
+ if (emotion) {
10725
+ const resolved = this.emotionResolver.resolve(
10726
+ emotion,
10727
+ input.emotionIntensity ?? 1
10728
+ );
10729
+ const k = this.emotionSmoothing;
10730
+ for (let i = 0; i < 52; i++) {
10731
+ this.smoothedUpper[i] += (resolved.upper[i] - this.smoothedUpper[i]) * k;
10732
+ this.smoothedLower[i] += (resolved.lower[i] - this.smoothedLower[i]) * k;
10733
+ }
10734
+ const mc = base[IDX_MOUTH_CLOSE];
10735
+ const bilabialSuppress = mc <= 0.3 ? 1 : mc >= 0.7 ? 0.1 : 1 - 0.9 * smoothstep((mc - 0.3) * 2.5);
10736
+ for (let i = 0; i < 52; i++) {
10737
+ out[i] += this.smoothedUpper[i];
10738
+ }
10739
+ for (let i = 0; i < 52; i++) {
10740
+ out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
10741
+ }
10742
+ }
10743
+ this.lifeLayer.updateToArray(input.deltaTime, input, this.lifeBuffer);
10744
+ for (let i = 0; i < 52; i++) {
10745
+ if (IS_EYE_CHANNEL[i]) {
10746
+ out[i] = this.lifeBuffer[i];
10747
+ } else {
10748
+ out[i] += this.lifeBuffer[i];
10749
+ }
10750
+ }
10751
+ for (let i = 0; i < 52; i++) {
10752
+ out[i] = out[i] * this.multiplier[i] + this.offset[i];
10753
+ }
10754
+ for (let i = 0; i < 52; i++) {
10755
+ if (out[i] < 0) out[i] = 0;
10756
+ else if (out[i] > 1) out[i] = 1;
10757
+ }
10758
+ return out;
10759
+ }
10760
+ /**
10761
+ * Set sticky emotion (used when input.emotion is not provided).
10762
+ */
10763
+ setEmotion(weights) {
10764
+ this.stickyEmotion = weights;
10765
+ }
10766
+ /**
10767
+ * Update character profile at runtime.
10768
+ */
10769
+ setProfile(profile) {
10770
+ this.multiplier.fill(1);
10771
+ this.offset.fill(0);
10772
+ this.applyProfileArrays(profile);
10773
+ }
10774
+ /**
10775
+ * Reset all smoothing state and life layer.
10776
+ */
10777
+ reset() {
10778
+ this.smoothedUpper.fill(0);
10779
+ this.smoothedLower.fill(0);
10780
+ this.lifeBuffer.fill(0);
10781
+ this.stickyEmotion = void 0;
10782
+ this.lifeLayer.reset();
10783
+ }
10784
+ /** Expand partial profile maps into dense Float32Arrays */
10785
+ applyProfileArrays(profile) {
10786
+ if (profile.multiplier) {
10787
+ for (const [name, value] of Object.entries(profile.multiplier)) {
10788
+ const idx = BS_INDEX2.get(name);
10789
+ if (idx !== void 0 && value !== void 0) {
10790
+ this.multiplier[idx] = value;
10791
+ }
10792
+ }
10793
+ }
10794
+ if (profile.offset) {
10795
+ for (const [name, value] of Object.entries(profile.offset)) {
10796
+ const idx = BS_INDEX2.get(name);
10797
+ if (idx !== void 0 && value !== void 0) {
10798
+ this.offset[idx] = value;
10799
+ }
10800
+ }
10801
+ }
10802
+ }
10803
+ };
10804
+
10805
+ // src/orchestration/MicLipSync.ts
10806
+ var logger18 = createLogger("MicLipSync");
10807
+ var MicLipSync = class extends EventEmitter {
10808
+ constructor(config) {
10809
+ super();
10810
+ this.omoteEvents = new EventEmitter();
10811
+ this._state = "idle";
10812
+ this._isSpeaking = false;
10813
+ this._currentFrame = null;
10814
+ this._currentRawFrame = null;
10815
+ // VAD state
10816
+ this.speechStartTime = 0;
10817
+ this.vadChunkSize = 0;
10818
+ this.vadBuffer = null;
10819
+ this.vadBufferOffset = 0;
10820
+ this.profile = config.profile ?? {};
10821
+ this.vad = config.vad;
10822
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10823
+ sampleRate: config.sampleRate ?? 16e3,
10824
+ chunkSize: config.micChunkSize ?? 512
10825
+ });
10826
+ this.processor = new A2EProcessor({
10827
+ backend: config.lam,
10828
+ sampleRate: config.sampleRate ?? 16e3,
10829
+ identityIndex: config.identityIndex,
10830
+ onFrame: (raw) => {
10831
+ const scaled = applyProfile(raw, this.profile);
10832
+ this._currentFrame = scaled;
10833
+ this._currentRawFrame = raw;
10834
+ this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
10835
+ },
10836
+ onError: (error) => {
10837
+ logger18.error("A2E inference error", { message: error.message });
10838
+ this.emit("error", error);
10839
+ }
10840
+ });
10841
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10842
+ const float32 = int16ToFloat32(pcm);
10843
+ this.processor.pushAudio(float32);
10844
+ if (this.vad) {
10845
+ this.processVAD(float32);
10846
+ }
10847
+ });
10848
+ this.omoteEvents.on("audio.level", (level) => {
10849
+ this.emit("audio:level", level);
10850
+ });
10851
+ if (this.vad) {
10852
+ this.vadChunkSize = this.vad.getChunkSize();
10853
+ this.vadBuffer = new Float32Array(this.vadChunkSize);
10854
+ this.vadBufferOffset = 0;
10855
+ }
10856
+ }
10857
+ /** Current state */
10858
+ get state() {
10859
+ return this._state;
10860
+ }
10861
+ /** Latest blendshape frame (null before first inference) */
10862
+ get currentFrame() {
10863
+ return this._currentFrame;
10864
+ }
10865
+ /** Whether speech is currently detected (requires VAD) */
10866
+ get isSpeaking() {
10867
+ return this._isSpeaking;
10868
+ }
10869
+ /** Current backend type */
10870
+ get backend() {
10871
+ return this.processor ? "active" : null;
10872
+ }
10873
+ // ---------------------------------------------------------------------------
10874
+ // Public API
10875
+ // ---------------------------------------------------------------------------
10876
+ /** Start microphone capture and inference loop */
10877
+ async start() {
10878
+ if (this._state === "active") return;
10879
+ await this.mic.start();
10880
+ this.processor.startDrip();
10881
+ this.emit("mic:start", void 0);
10882
+ this.setState("active");
10883
+ }
10884
+ /** Stop microphone and inference */
10885
+ stop() {
10886
+ if (this._state === "idle") return;
10887
+ this.processor.stopDrip();
10888
+ this.mic.stop();
10889
+ this._isSpeaking = false;
10890
+ this.emit("mic:stop", void 0);
10891
+ this.setState("idle");
10892
+ }
10893
+ /** Pause inference (mic stays open for faster resume) */
10894
+ pause() {
10895
+ if (this._state !== "active") return;
10896
+ this.processor.stopDrip();
10897
+ this.setState("paused");
10898
+ }
10899
+ /** Resume inference after pause */
10900
+ resume() {
10901
+ if (this._state !== "paused") return;
10902
+ this.processor.startDrip();
10903
+ this.setState("active");
10904
+ }
10905
+ /** Update ExpressionProfile at runtime */
10906
+ setProfile(profile) {
10907
+ this.profile = profile;
10908
+ }
10909
+ /** Dispose of all resources */
10910
+ async dispose() {
10911
+ this.stop();
10912
+ this.processor.dispose();
10913
+ }
10914
+ // ---------------------------------------------------------------------------
10915
+ // Internal: VAD processing
10916
+ // ---------------------------------------------------------------------------
10917
+ async processVAD(samples) {
10918
+ if (!this.vad || !this.vadBuffer) return;
10919
+ for (let i = 0; i < samples.length; i++) {
10920
+ this.vadBuffer[this.vadBufferOffset++] = samples[i];
10921
+ if (this.vadBufferOffset >= this.vadChunkSize) {
10922
+ try {
10923
+ const result = await this.vad.process(this.vadBuffer);
10924
+ const wasSpeaking = this._isSpeaking;
10925
+ this._isSpeaking = result.isSpeech;
10926
+ if (!wasSpeaking && result.isSpeech) {
10927
+ this.speechStartTime = performance.now();
10928
+ this.emit("speech:start", void 0);
10929
+ } else if (wasSpeaking && !result.isSpeech) {
10930
+ const durationMs = performance.now() - this.speechStartTime;
10931
+ this.emit("speech:end", { durationMs });
10932
+ }
10933
+ } catch (err) {
10934
+ logger18.warn("VAD process error", { error: String(err) });
10935
+ }
10936
+ this.vadBufferOffset = 0;
10937
+ }
10938
+ }
10939
+ }
10940
+ // ---------------------------------------------------------------------------
10941
+ // Internal: State management
10942
+ // ---------------------------------------------------------------------------
10943
+ setState(state) {
10944
+ if (this._state === state) return;
10945
+ this._state = state;
10946
+ this.emit("state", state);
10947
+ }
10948
+ };
10949
+
10950
+ // src/orchestration/VoicePipeline.ts
10951
+ var logger19 = createLogger("VoicePipeline");
10952
+ var VoicePipeline = class extends EventEmitter {
10953
+ constructor(config) {
10954
+ super();
10955
+ // State
10956
+ this._state = "idle";
10957
+ this.stopped = false;
10958
+ this.epoch = 0;
10959
+ this._sessionId = null;
10960
+ // Models
10961
+ this.asr = null;
10962
+ this.lam = null;
10963
+ this.vad = null;
10964
+ this.unifiedWorker = null;
10965
+ // Pipelines
10966
+ this.playback = null;
10967
+ this.interruption = null;
10968
+ this.omoteEvents = new EventEmitter();
10969
+ this.mic = null;
10970
+ // Audio accumulation
10971
+ this.audioBuffer = [];
10972
+ this.audioBufferSamples = 0;
10973
+ this.speechStartTime = 0;
10974
+ this.silenceTimer = null;
10975
+ this.isSpeaking = false;
10976
+ // Progressive transcription
10977
+ this.progressiveTimer = null;
10978
+ this.progressivePromise = null;
10979
+ this.lastProgressiveResult = null;
10980
+ this.lastProgressiveSamples = 0;
10981
+ // ASR error recovery
10982
+ this.asrErrorCount = 0;
10983
+ // Response abort
10984
+ this.responseAbortController = null;
10985
+ // Frame refs
10986
+ this._currentFrame = null;
10987
+ this.config = config;
10988
+ }
10989
+ /** Current pipeline state */
10990
+ get state() {
10991
+ return this._state;
10992
+ }
10993
+ /** Latest blendshape frame */
10994
+ get currentFrame() {
10995
+ return this._currentFrame;
10996
+ }
10997
+ /** Whether user is currently speaking */
10998
+ get isSpeechActive() {
10999
+ return this.isSpeaking;
11000
+ }
11001
+ /** Session ID (generated on start(), null before) */
11002
+ get sessionId() {
11003
+ return this._sessionId;
11004
+ }
11005
+ // ---------------------------------------------------------------------------
11006
+ // Model loading
11007
+ // ---------------------------------------------------------------------------
11008
+ async loadModels() {
11009
+ this.setState("loading");
11010
+ const timeoutMs = this.config.lamLoadTimeoutMs ?? 3e4;
11011
+ try {
11012
+ if (isIOS()) {
11013
+ this.unifiedWorker = new UnifiedInferenceWorker();
11014
+ await this.unifiedWorker.init();
11015
+ }
11016
+ this.emitProgress("Speech recognition", 0, 3, 0);
11017
+ this.asr = createSenseVoice({
11018
+ modelUrl: this.config.models.senseVoice.modelUrl,
11019
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
11020
+ language: this.config.models.senseVoice.language,
11021
+ unifiedWorker: this.unifiedWorker ?? void 0
11022
+ });
11023
+ await this.asr.load();
11024
+ this.emitProgress("Speech recognition", 45, 3, 1);
11025
+ this.emitProgress("Lip sync", 45, 3, 1);
11026
+ let lam = createA2E({
11027
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
11028
+ gpuExternalDataUrl: this.config.models.lam.gpuExternalDataUrl,
11029
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
11030
+ mode: this.config.models.lam.mode,
11031
+ unifiedWorker: this.unifiedWorker ?? void 0
11032
+ });
11033
+ let lamProgress = 45;
11034
+ const lamTickInterval = setInterval(() => {
11035
+ const remaining = 85 - lamProgress;
11036
+ lamProgress += Math.max(0.5, remaining * 0.08);
11037
+ this.emitProgress("Lip sync", Math.round(lamProgress), 3, 1);
11038
+ }, 300);
11039
+ try {
11040
+ const lamLoadResult = await Promise.race([
11041
+ lam.load().then(() => "ok"),
11042
+ new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
11043
+ ]);
11044
+ if (lamLoadResult === "timeout") {
11045
+ logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
11046
+ await lam.dispose();
11047
+ lam = createA2E({
11048
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
11049
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
11050
+ mode: "cpu",
11051
+ unifiedWorker: this.unifiedWorker ?? void 0
11052
+ });
11053
+ await lam.load();
11054
+ }
11055
+ } finally {
11056
+ clearInterval(lamTickInterval);
11057
+ }
11058
+ this.lam = lam;
11059
+ this.emitProgress("Lip sync", 85, 3, 2);
11060
+ this.emitProgress("Voice detection", 85, 3, 2);
11061
+ this.vad = createSileroVAD({
11062
+ modelUrl: this.config.models.vad.modelUrl,
11063
+ threshold: this.config.models.vad.threshold,
11064
+ unifiedWorker: this.unifiedWorker ?? void 0
11065
+ });
11066
+ await this.vad.load();
11067
+ this.emitProgress("Voice detection", 100, 3, 3);
11068
+ this.playback = new PlaybackPipeline({
11069
+ lam: this.lam,
11070
+ profile: this.config.profile,
11071
+ identityIndex: this.config.identityIndex,
11072
+ neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
11073
+ neutralTransitionMs: this.config.neutralTransitionMs,
11074
+ audioDelayMs: this.config.audioDelayMs,
11075
+ chunkTargetMs: this.config.chunkTargetMs
11076
+ });
11077
+ await this.playback.initialize();
11078
+ this.playback.on("frame", (f) => {
11079
+ this._currentFrame = f.blendshapes;
11080
+ this.emit("frame", f);
11081
+ });
11082
+ this.playback.on("frame:raw", (f) => this.emit("frame:raw", f));
11083
+ this.playback.on("playback:start", (t) => this.emit("playback:start", t));
11084
+ this.playback.on("playback:complete", () => {
11085
+ if (this.stopped) return;
11086
+ this.emit("playback:complete", void 0);
11087
+ this.vad?.reset();
11088
+ this.epoch++;
11089
+ this.setState("listening");
11090
+ });
11091
+ this.playback.on("error", (e) => this.emit("error", e));
11092
+ this.interruption = new InterruptionHandler({
11093
+ enabled: this.config.interruptionEnabled ?? true,
11094
+ minSpeechDurationMs: this.config.interruptionMinSpeechMs ?? 200
11095
+ });
11096
+ this.interruption.on("interruption.triggered", () => {
11097
+ this.handleInterruption();
11098
+ });
11099
+ this.setState("ready");
11100
+ } catch (error) {
11101
+ const err = error instanceof Error ? error : new Error(String(error));
11102
+ logger19.error("Model loading failed", { message: err.message });
11103
+ this.emit("error", err);
11104
+ this.setState("error");
11105
+ throw err;
11106
+ }
11107
+ }
11108
+ // ---------------------------------------------------------------------------
11109
+ // Conversation lifecycle
11110
+ // ---------------------------------------------------------------------------
11111
+ async start() {
11112
+ if (this._state !== "ready") {
11113
+ throw new Error(`Cannot start: state is '${this._state}', expected 'ready'`);
11114
+ }
11115
+ this.stopped = false;
11116
+ this.epoch++;
11117
+ this._sessionId = crypto.randomUUID();
11118
+ this.asrErrorCount = 0;
11119
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
11120
+ sampleRate: 16e3,
11121
+ chunkSize: 512
11122
+ });
11123
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
11124
+ const float32 = int16ToFloat32(pcm);
11125
+ this.processAudioChunk(float32);
11126
+ });
11127
+ this.omoteEvents.on("audio.level", (level) => {
11128
+ this.emit("audio:level", level);
11129
+ });
11130
+ await this.mic.start();
11131
+ this.setState("listening");
11132
+ }
11133
+ stop() {
11134
+ this.stopped = true;
11135
+ this.epoch++;
11136
+ this.clearSilenceTimer();
11137
+ this.stopProgressiveTranscription();
11138
+ this.responseAbortController?.abort();
11139
+ this.responseAbortController = null;
11140
+ this.vad?.reset();
11141
+ this.playback?.stop();
11142
+ this.mic?.stop();
11143
+ this.mic = null;
11144
+ this.isSpeaking = false;
11145
+ this.audioBuffer = [];
11146
+ this.audioBufferSamples = 0;
11147
+ this._currentFrame = null;
11148
+ this.interruption?.setAISpeaking(false);
11149
+ if (this._state !== "idle") {
11150
+ this.setState("ready");
11151
+ }
11152
+ }
11153
+ setProfile(profile) {
11154
+ this.config.profile = profile;
11155
+ this.playback?.setProfile(profile);
11156
+ }
11157
+ async dispose() {
11158
+ this.stop();
11159
+ this.epoch++;
11160
+ await this.playback?.dispose();
11161
+ await this.asr?.dispose();
11162
+ await this.lam?.dispose();
11163
+ await this.vad?.dispose();
11164
+ this.playback = null;
11165
+ this.asr = null;
11166
+ this.lam = null;
11167
+ this.vad = null;
11168
+ this._state = "idle";
11169
+ }
11170
+ // ---------------------------------------------------------------------------
11171
+ // Audio processing
11172
+ // ---------------------------------------------------------------------------
11173
+ async processAudioChunk(samples) {
11174
+ if (!this.vad) return;
11175
+ try {
11176
+ const result = await this.vad.process(samples);
11177
+ if (this._state === "speaking" && this.interruption) {
11178
+ this.interruption.processVADResult(result.probability);
11179
+ return;
11180
+ }
11181
+ if (this._state !== "listening" && this._state !== "thinking") return;
11182
+ const wasSpeaking = this.isSpeaking;
11183
+ if (result.isSpeech) {
11184
+ if (!wasSpeaking) {
11185
+ this.isSpeaking = true;
11186
+ this.speechStartTime = performance.now();
11187
+ this.audioBuffer = [];
11188
+ this.audioBufferSamples = 0;
11189
+ this.lastProgressiveResult = null;
11190
+ this.lastProgressiveSamples = 0;
11191
+ this.emit("speech:start", void 0);
11192
+ this.startProgressiveTranscription();
11193
+ }
11194
+ this.audioBuffer.push(new Float32Array(samples));
11195
+ this.audioBufferSamples += samples.length;
11196
+ this.clearSilenceTimer();
11197
+ } else if (wasSpeaking) {
11198
+ this.audioBuffer.push(new Float32Array(samples));
11199
+ this.audioBufferSamples += samples.length;
11200
+ if (!this.silenceTimer) {
11201
+ const timeoutMs = this.getSilenceTimeout();
11202
+ this.silenceTimer = setTimeout(() => {
11203
+ this.onSilenceDetected();
11204
+ }, timeoutMs);
11205
+ }
11206
+ }
11207
+ } catch (err) {
11208
+ logger19.warn("VAD error", { error: String(err) });
11209
+ }
11210
+ }
11211
+ // ---------------------------------------------------------------------------
11212
+ // Silence detection
11213
+ // ---------------------------------------------------------------------------
11214
+ getSilenceTimeout() {
11215
+ const base = this.config.silenceTimeoutMs ?? 500;
11216
+ const extended = this.config.silenceTimeoutExtendedMs ?? 700;
11217
+ const adaptive = this.config.adaptiveTimeout ?? true;
11218
+ if (!adaptive) return base;
11219
+ const speechDurationMs = performance.now() - this.speechStartTime;
11220
+ return speechDurationMs > 3e3 ? extended : base;
11221
+ }
11222
+ onSilenceDetected() {
11223
+ const capturedEpoch = this.epoch;
11224
+ this.isSpeaking = false;
11225
+ const durationMs = performance.now() - this.speechStartTime;
11226
+ this.emit("speech:end", { durationMs });
11227
+ this.clearSilenceTimer();
11228
+ this.processEndOfSpeech(capturedEpoch).catch((err) => {
11229
+ logger19.error("End of speech processing failed", { error: String(err) });
11230
+ if (this.epoch === capturedEpoch && !this.stopped) {
11231
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
11232
+ this.setState("listening");
11233
+ }
11234
+ });
11235
+ }
11236
+ // ---------------------------------------------------------------------------
11237
+ // End of speech → transcription → response
11238
+ // ---------------------------------------------------------------------------
11239
+ async processEndOfSpeech(capturedEpoch) {
11240
+ if (this.progressivePromise) {
11241
+ try {
11242
+ await this.progressivePromise;
11243
+ } catch {
11244
+ }
11245
+ }
11246
+ this.stopProgressiveTranscription();
11247
+ if (this.epoch !== capturedEpoch || this.stopped) return;
11248
+ const totalSamples = this.audioBufferSamples;
11249
+ const fullAudio = new Float32Array(totalSamples);
11250
+ let offset = 0;
11251
+ for (const chunk of this.audioBuffer) {
11252
+ fullAudio.set(chunk, offset);
11253
+ offset += chunk.length;
11254
+ }
11255
+ this.audioBuffer = [];
11256
+ this.audioBufferSamples = 0;
11257
+ const minDuration = this.config.minAudioDurationSec ?? 0.3;
11258
+ const minEnergy = this.config.minAudioEnergy ?? 0.02;
11259
+ const durationSec = totalSamples / 16e3;
11260
+ if (durationSec < minDuration) {
11261
+ logger19.info("Audio too short, discarding", { durationSec });
11262
+ this.setState("listening");
11263
+ return;
11264
+ }
11265
+ let maxAbs = 0;
11266
+ for (let i = 0; i < fullAudio.length; i++) {
11267
+ const abs = Math.abs(fullAudio[i]);
11268
+ if (abs > maxAbs) maxAbs = abs;
11269
+ }
11270
+ let rms = 0;
11271
+ for (let i = 0; i < fullAudio.length; i++) {
11272
+ rms += fullAudio[i] * fullAudio[i];
11273
+ }
11274
+ rms = Math.sqrt(rms / fullAudio.length);
11275
+ if (rms < minEnergy) {
11276
+ logger19.info("Audio too quiet, discarding", { rms });
11277
+ this.setState("listening");
11278
+ return;
11279
+ }
11280
+ const normalizedAudio = this.normalizeAudio(fullAudio);
11281
+ this.setState("thinking");
11282
+ let transcript = null;
11283
+ const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
11284
+ if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
11285
+ transcript = { ...this.lastProgressiveResult, isFinal: true };
11286
+ logger19.info("Using progressive result", {
11287
+ coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
11288
+ text: transcript.text
11289
+ });
11290
+ } else {
11291
+ this.lastProgressiveResult = null;
11292
+ transcript = await this.transcribeWithTimeout(normalizedAudio);
11293
+ if (transcript) {
11294
+ transcript.isFinal = true;
11295
+ }
11296
+ }
11297
+ if (this.epoch !== capturedEpoch || this.stopped) return;
11298
+ if (!transcript || !transcript.text.trim()) {
11299
+ logger19.info("No transcript, resuming listening");
11300
+ this.setState("listening");
11301
+ return;
11302
+ }
11303
+ this.emit("transcript", transcript);
11304
+ await this.callResponseHandler(transcript, capturedEpoch);
11305
+ }
11306
+ // ---------------------------------------------------------------------------
11307
+ // Response handler
11308
+ // ---------------------------------------------------------------------------
11309
+ async callResponseHandler(transcript, capturedEpoch) {
11310
+ if (this.epoch !== capturedEpoch || this.stopped) return;
11311
+ this.setState("speaking");
11312
+ this.interruption?.setAISpeaking(true);
11313
+ const abortController = new AbortController();
11314
+ this.responseAbortController = abortController;
11315
+ try {
11316
+ this.playback.start();
11317
+ await this.config.onResponse({
11318
+ text: transcript.text,
11319
+ emotion: transcript.emotion,
11320
+ event: transcript.event,
11321
+ send: async (chunk) => {
11322
+ if (abortController.signal.aborted) return;
11323
+ await this.playback.onAudioChunk(chunk);
11324
+ },
11325
+ done: async () => {
11326
+ if (abortController.signal.aborted) return;
11327
+ await this.playback.end();
11328
+ },
11329
+ signal: abortController.signal,
11330
+ sessionId: this._sessionId
11331
+ });
11332
+ } catch (error) {
11333
+ if (abortController.signal.aborted) return;
11334
+ const err = error instanceof Error ? error : new Error(String(error));
11335
+ logger19.error("Response handler error", { message: err.message });
11336
+ this.emit("error", err);
11337
+ if (this.epoch === capturedEpoch && !this.stopped) {
11338
+ this.interruption?.setAISpeaking(false);
11339
+ this.setState("listening");
11340
+ }
11341
+ } finally {
11342
+ this.responseAbortController = null;
11343
+ }
11344
+ }
11345
+ // ---------------------------------------------------------------------------
11346
+ // Interruption handling
11347
+ // ---------------------------------------------------------------------------
11348
+ handleInterruption() {
11349
+ if (this._state !== "speaking") return;
11350
+ logger19.info("Interruption triggered");
11351
+ this.epoch++;
11352
+ this.responseAbortController?.abort();
11353
+ this.playback?.stop();
11354
+ this.interruption?.setAISpeaking(false);
11355
+ this.emit("interruption", void 0);
11356
+ if (!this.stopped) {
11357
+ this.setState("listening");
11358
+ }
11359
+ }
11360
+ // ---------------------------------------------------------------------------
11361
+ // Progressive transcription
11362
+ // ---------------------------------------------------------------------------
11363
+ startProgressiveTranscription() {
11364
+ this.stopProgressiveTranscription();
11365
+ const intervalMs = isIOS() ? this.config.progressiveIntervalIosMs ?? 800 : this.config.progressiveIntervalMs ?? 500;
11366
+ const minSamples = this.config.progressiveMinSamples ?? 8e3;
11367
+ this.progressiveTimer = setInterval(() => {
11368
+ if (this.audioBufferSamples < minSamples) return;
11369
+ if (!this.asr) return;
11370
+ const capturedEpoch = this.epoch;
11371
+ const snapshot = new Float32Array(this.audioBufferSamples);
11372
+ let offset = 0;
11373
+ for (const chunk of this.audioBuffer) {
11374
+ snapshot.set(chunk, offset);
11375
+ offset += chunk.length;
11376
+ }
11377
+ const snapshotSamples = this.audioBufferSamples;
11378
+ this.progressivePromise = (async () => {
11379
+ try {
11380
+ const result = await this.transcribeWithTimeout(snapshot);
11381
+ if (this.epoch !== capturedEpoch) return;
11382
+ if (result && result.text.trim()) {
11383
+ this.lastProgressiveResult = result;
11384
+ this.lastProgressiveSamples = snapshotSamples;
11385
+ this.emit("transcript", { ...result, isFinal: false });
11386
+ }
11387
+ } catch {
11388
+ }
11389
+ })();
11390
+ }, intervalMs);
11391
+ }
11392
+ stopProgressiveTranscription() {
11393
+ if (this.progressiveTimer) {
11394
+ clearInterval(this.progressiveTimer);
11395
+ this.progressiveTimer = null;
11396
+ }
11397
+ }
11398
+ // ---------------------------------------------------------------------------
11399
+ // Transcription with timeout + ASR error recovery
11400
+ // ---------------------------------------------------------------------------
11401
+ async transcribeWithTimeout(audio) {
11402
+ if (!this.asr) return null;
11403
+ const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
11404
+ const startTime = performance.now();
11405
+ try {
11406
+ const result = await Promise.race([
11407
+ this.asr.transcribe(audio),
11408
+ new Promise(
11409
+ (_, reject) => setTimeout(() => reject(new Error(`Transcription timed out after ${timeoutMs}ms`)), timeoutMs)
11410
+ )
11411
+ ]);
11412
+ this.asrErrorCount = 0;
11413
+ return {
11414
+ text: result.text,
11415
+ emotion: result.emotion,
11416
+ language: result.language,
11417
+ isFinal: false,
11418
+ inferenceTimeMs: performance.now() - startTime
11419
+ };
11420
+ } catch (error) {
11421
+ this.asrErrorCount++;
11422
+ logger19.warn("Transcription failed", {
11423
+ attempt: this.asrErrorCount,
11424
+ error: String(error)
11425
+ });
11426
+ if (this.asrErrorCount >= 3) {
11427
+ logger19.warn("3 consecutive ASR errors, recreating session");
11428
+ try {
11429
+ await this.asr.dispose();
11430
+ this.asr = createSenseVoice({
11431
+ modelUrl: this.config.models.senseVoice.modelUrl,
11432
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
11433
+ language: this.config.models.senseVoice.language,
11434
+ unifiedWorker: this.unifiedWorker ?? void 0
11435
+ });
11436
+ await this.asr.load();
11437
+ this.asrErrorCount = 0;
11438
+ } catch (recreateErr) {
11439
+ logger19.error("ASR session recreation failed", { error: String(recreateErr) });
11440
+ }
11441
+ }
11442
+ return null;
11443
+ }
11444
+ }
11445
+ // ---------------------------------------------------------------------------
11446
+ // Audio normalization
11447
+ // ---------------------------------------------------------------------------
11448
+ normalizeAudio(audio) {
11449
+ if (!(this.config.normalizeAudio ?? true)) return audio;
11450
+ let maxAbs = 0;
11451
+ for (let i = 0; i < audio.length; i++) {
11452
+ const abs = Math.abs(audio[i]);
11453
+ if (abs > maxAbs) maxAbs = abs;
11454
+ }
11455
+ if (maxAbs >= 0.1 || maxAbs === 0) return audio;
11456
+ const gain = 0.5 / maxAbs;
11457
+ const normalized = new Float32Array(audio.length);
11458
+ for (let i = 0; i < audio.length; i++) {
11459
+ normalized[i] = audio[i] * gain;
11460
+ }
11461
+ return normalized;
11462
+ }
11463
+ // ---------------------------------------------------------------------------
11464
+ // Helpers
11465
+ // ---------------------------------------------------------------------------
11466
+ setState(state) {
11467
+ if (this._state === state) return;
11468
+ logger19.info("State transition", { from: this._state, to: state });
11469
+ this._state = state;
11470
+ this.emit("state", state);
11471
+ }
11472
+ emitProgress(currentModel, progress, totalModels, modelsLoaded) {
11473
+ this.emit("loading:progress", { currentModel, progress, totalModels, modelsLoaded });
11474
+ }
11475
+ clearSilenceTimer() {
11476
+ if (this.silenceTimer) {
11477
+ clearTimeout(this.silenceTimer);
11478
+ this.silenceTimer = null;
11479
+ }
11480
+ }
11481
+ };
11482
+
10393
11483
  // ../types/dist/index.mjs
10394
11484
  var PROTOCOL_VERSION = 1;
10395
11485
  function isProtocolEvent(obj) {