@omote/core 0.5.7 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -56,11 +56,13 @@ __export(index_exports, {
56
56
  LOG_LEVEL_PRIORITY: () => LOG_LEVEL_PRIORITY,
57
57
  MODEL_LOAD_TIME_BUCKETS: () => MODEL_LOAD_TIME_BUCKETS,
58
58
  MetricNames: () => MetricNames,
59
+ MicLipSync: () => MicLipSync,
59
60
  MicrophoneCapture: () => MicrophoneCapture,
60
61
  ModelCache: () => ModelCache,
61
62
  OTLPExporter: () => OTLPExporter,
62
63
  OmoteTelemetry: () => OmoteTelemetry,
63
64
  PROTOCOL_VERSION: () => PROTOCOL_VERSION,
65
+ PlaybackPipeline: () => PlaybackPipeline,
64
66
  ProceduralLifeLayer: () => ProceduralLifeLayer,
65
67
  RingBuffer: () => RingBuffer,
66
68
  SafariSpeechRecognition: () => SafariSpeechRecognition,
@@ -71,10 +73,12 @@ __export(index_exports, {
71
73
  SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
72
74
  SileroVADWorker: () => SileroVADWorker,
73
75
  UnifiedInferenceWorker: () => UnifiedInferenceWorker,
76
+ VoicePipeline: () => VoicePipeline,
74
77
  Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
75
78
  Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
76
79
  Wav2ArkitCpuWorker: () => Wav2ArkitCpuWorker,
77
80
  Wav2Vec2Inference: () => Wav2Vec2Inference,
81
+ applyProfile: () => applyProfile,
78
82
  blendEmotions: () => blendEmotions,
79
83
  calculatePeak: () => calculatePeak,
80
84
  calculateRMS: () => calculateRMS,
@@ -867,12 +871,12 @@ var Logger = class _Logger {
867
871
  };
868
872
  var loggerCache = /* @__PURE__ */ new Map();
869
873
  function createLogger(module2) {
870
- let logger17 = loggerCache.get(module2);
871
- if (!logger17) {
872
- logger17 = new Logger(module2);
873
- loggerCache.set(module2, logger17);
874
+ let logger20 = loggerCache.get(module2);
875
+ if (!logger20) {
876
+ logger20 = new Logger(module2);
877
+ loggerCache.set(module2, logger20);
874
878
  }
875
- return logger17;
879
+ return logger20;
876
880
  }
877
881
  var noopLogger = {
878
882
  module: "noop",
@@ -1168,6 +1172,24 @@ var A2EProcessor = class {
1168
1172
  }
1169
1173
  };
1170
1174
 
1175
+ // src/audio/audioUtils.ts
1176
+ function pcm16ToFloat32(buffer) {
1177
+ const byteLen = buffer.byteLength & ~1;
1178
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
1179
+ const float32 = new Float32Array(int16.length);
1180
+ for (let i = 0; i < int16.length; i++) {
1181
+ float32[i] = int16[i] / 32768;
1182
+ }
1183
+ return float32;
1184
+ }
1185
+ function int16ToFloat32(int16) {
1186
+ const float32 = new Float32Array(int16.length);
1187
+ for (let i = 0; i < int16.length; i++) {
1188
+ float32[i] = int16[i] / 32768;
1189
+ }
1190
+ return float32;
1191
+ }
1192
+
1171
1193
  // src/telemetry/exporters/console.ts
1172
1194
  var ConsoleExporter = class {
1173
1195
  constructor(options = {}) {
@@ -3221,19 +3243,7 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
3221
3243
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
3222
3244
  var Wav2Vec2Inference = _Wav2Vec2Inference;
3223
3245
 
3224
- // src/audio/audioUtils.ts
3225
- function pcm16ToFloat32(buffer) {
3226
- const byteLen = buffer.byteLength & ~1;
3227
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
3228
- const float32 = new Float32Array(int16.length);
3229
- for (let i = 0; i < int16.length; i++) {
3230
- float32[i] = int16[i] / 32768;
3231
- }
3232
- return float32;
3233
- }
3234
-
3235
- // src/audio/FullFacePipeline.ts
3236
- var logger4 = createLogger("FullFacePipeline");
3246
+ // src/audio/expressionProfile.ts
3237
3247
  var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
3238
3248
  for (const name of LAM_BLENDSHAPES) {
3239
3249
  if (name.startsWith("eye")) {
@@ -3252,6 +3262,24 @@ for (const name of LAM_BLENDSHAPES) {
3252
3262
  BLENDSHAPE_TO_GROUP.set(name, "tongue");
3253
3263
  }
3254
3264
  }
3265
+ function applyProfile(raw, profile) {
3266
+ const scaled = new Float32Array(52);
3267
+ for (let i = 0; i < 52; i++) {
3268
+ const name = LAM_BLENDSHAPES[i];
3269
+ let scaler;
3270
+ if (profile.overrides && profile.overrides[name] !== void 0) {
3271
+ scaler = profile.overrides[name];
3272
+ } else {
3273
+ const group = BLENDSHAPE_TO_GROUP.get(name);
3274
+ scaler = group ? profile[group] ?? 1 : 1;
3275
+ }
3276
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3277
+ }
3278
+ return scaled;
3279
+ }
3280
+
3281
+ // src/audio/FullFacePipeline.ts
3282
+ var logger4 = createLogger("FullFacePipeline");
3255
3283
  var FullFacePipeline = class extends EventEmitter {
3256
3284
  constructor(options) {
3257
3285
  super();
@@ -3316,25 +3344,10 @@ var FullFacePipeline = class extends EventEmitter {
3316
3344
  /**
3317
3345
  * Apply ExpressionProfile scaling to raw A2E blendshapes.
3318
3346
  *
3319
- * For each blendshape:
3320
- * 1. If an override exists for the blendshape name, use override as scaler
3321
- * 2. Otherwise, use the group scaler (default 1.0)
3322
- * 3. Clamp result to [0, 1]
3347
+ * Delegates to the standalone applyProfile() utility from expressionProfile.ts.
3323
3348
  */
3324
3349
  applyProfile(raw) {
3325
- const scaled = new Float32Array(52);
3326
- for (let i = 0; i < 52; i++) {
3327
- const name = LAM_BLENDSHAPES[i];
3328
- let scaler;
3329
- if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
3330
- scaler = this.profile.overrides[name];
3331
- } else {
3332
- const group = BLENDSHAPE_TO_GROUP.get(name);
3333
- scaler = group ? this.profile[group] ?? 1 : 1;
3334
- }
3335
- scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3336
- }
3337
- return scaled;
3350
+ return applyProfile(raw, this.profile);
3338
3351
  }
3339
3352
  /**
3340
3353
  * Start a new playback session
@@ -3519,6 +3532,329 @@ var FullFacePipeline = class extends EventEmitter {
3519
3532
  }
3520
3533
  };
3521
3534
 
3535
+ // src/audio/PlaybackPipeline.ts
3536
+ var logger5 = createLogger("PlaybackPipeline");
3537
+ var PlaybackPipeline = class extends EventEmitter {
3538
+ constructor(config) {
3539
+ super();
3540
+ this.config = config;
3541
+ this._state = "idle";
3542
+ this.playbackStarted = false;
3543
+ this.monitorInterval = null;
3544
+ this.frameAnimationId = null;
3545
+ // Stale frame detection
3546
+ this.lastNewFrameTime = 0;
3547
+ this.lastKnownLamFrame = null;
3548
+ this.staleWarningEmitted = false;
3549
+ // Diagnostic counter
3550
+ this.frameLoopCount = 0;
3551
+ this.neutralTransitionFrame = null;
3552
+ this.neutralTransitionStart = 0;
3553
+ this.neutralAnimationId = null;
3554
+ // Current frame refs
3555
+ this._currentFrame = null;
3556
+ this._currentRawFrame = null;
3557
+ this.sampleRate = config.sampleRate ?? 16e3;
3558
+ this.profile = config.profile ?? {};
3559
+ this.staleThresholdMs = config.staleThresholdMs ?? 2e3;
3560
+ this.neutralTransitionEnabled = config.neutralTransitionEnabled ?? false;
3561
+ this.neutralTransitionMs = config.neutralTransitionMs ?? 250;
3562
+ const isCpuModel = config.lam.modelId === "wav2arkit_cpu";
3563
+ const chunkSize = config.chunkSize ?? config.lam.chunkSize ?? 16e3;
3564
+ const chunkAccumulationMs = chunkSize / this.sampleRate * 1e3;
3565
+ const inferenceEstimateMs = isCpuModel ? 300 : config.lam.backend === "wasm" ? 250 : 80;
3566
+ const marginMs = 100;
3567
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3568
+ const audioDelayMs = config.audioDelayMs ?? autoDelay;
3569
+ logger5.info("PlaybackPipeline config", {
3570
+ chunkSize,
3571
+ audioDelayMs,
3572
+ autoDelay,
3573
+ backend: config.lam.backend,
3574
+ modelId: config.lam.modelId,
3575
+ neutralTransitionEnabled: this.neutralTransitionEnabled
3576
+ });
3577
+ this.scheduler = new AudioScheduler({
3578
+ sampleRate: this.sampleRate,
3579
+ initialLookaheadSec: audioDelayMs / 1e3
3580
+ });
3581
+ this.coalescer = new AudioChunkCoalescer({
3582
+ sampleRate: this.sampleRate,
3583
+ targetDurationMs: config.chunkTargetMs ?? 200
3584
+ });
3585
+ this.processor = new A2EProcessor({
3586
+ backend: config.lam,
3587
+ sampleRate: this.sampleRate,
3588
+ chunkSize,
3589
+ identityIndex: config.identityIndex,
3590
+ onError: (error) => {
3591
+ logger5.error("A2E inference error", { message: error.message, stack: error.stack });
3592
+ this.emit("error", error);
3593
+ }
3594
+ });
3595
+ }
3596
+ /** Current pipeline state */
3597
+ get state() {
3598
+ return this._state;
3599
+ }
3600
+ /** Current scaled blendshapes (updated in-place for perf) */
3601
+ get currentFrame() {
3602
+ return this._currentFrame;
3603
+ }
3604
+ /** Raw A2E blendshapes (before profile scaling) */
3605
+ get currentRawFrame() {
3606
+ return this._currentRawFrame;
3607
+ }
3608
+ // ---------------------------------------------------------------------------
3609
+ // Lifecycle
3610
+ // ---------------------------------------------------------------------------
3611
+ /** Initialize AudioContext (lazy, call after user gesture) */
3612
+ async initialize() {
3613
+ await this.scheduler.initialize();
3614
+ }
3615
+ /** Update ExpressionProfile at runtime */
3616
+ setProfile(profile) {
3617
+ this.profile = profile;
3618
+ }
3619
+ // ---------------------------------------------------------------------------
3620
+ // Async mode (streaming TTS)
3621
+ // ---------------------------------------------------------------------------
3622
+ /**
3623
+ * Start a new playback session.
3624
+ * Idempotent — calling during playback resets cleanly without emitting
3625
+ * spurious playback:complete.
3626
+ */
3627
+ start() {
3628
+ this.stopInternal(false);
3629
+ this.scheduler.reset();
3630
+ this.coalescer.reset();
3631
+ this.processor.reset();
3632
+ this.playbackStarted = false;
3633
+ this.lastNewFrameTime = 0;
3634
+ this.lastKnownLamFrame = null;
3635
+ this.staleWarningEmitted = false;
3636
+ this.frameLoopCount = 0;
3637
+ this._currentFrame = null;
3638
+ this._currentRawFrame = null;
3639
+ this.cancelNeutralTransition();
3640
+ this.scheduler.warmup();
3641
+ this.startFrameLoop();
3642
+ this.startMonitoring();
3643
+ this.setState("playing");
3644
+ }
3645
+ /** Feed a streaming audio chunk (PCM16 Uint8Array) */
3646
+ async onAudioChunk(chunk) {
3647
+ const combined = this.coalescer.add(chunk);
3648
+ if (!combined) return;
3649
+ const float32 = pcm16ToFloat32(combined);
3650
+ const scheduleTime = await this.scheduler.schedule(float32);
3651
+ if (!this.playbackStarted) {
3652
+ this.playbackStarted = true;
3653
+ this.emit("playback:start", { time: scheduleTime });
3654
+ this.emit("playback_start", scheduleTime);
3655
+ }
3656
+ this.processor.pushAudio(float32, scheduleTime);
3657
+ }
3658
+ /** Signal end of audio stream (flushes remaining audio) */
3659
+ async end() {
3660
+ const remaining = this.coalescer.flush();
3661
+ if (remaining) {
3662
+ const chunk = new Uint8Array(remaining);
3663
+ await this.onAudioChunk(chunk);
3664
+ }
3665
+ await this.processor.flush();
3666
+ }
3667
+ // ---------------------------------------------------------------------------
3668
+ // Sync mode (full buffer)
3669
+ // ---------------------------------------------------------------------------
3670
+ /**
3671
+ * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
3672
+ * for playback, runs A2E inference, then waits for completion.
3673
+ */
3674
+ async feedBuffer(audio) {
3675
+ const float32 = audio instanceof Float32Array ? audio : pcm16ToFloat32(audio);
3676
+ this.start();
3677
+ const chunkSamples = Math.floor(this.sampleRate * 0.2);
3678
+ for (let i = 0; i < float32.length; i += chunkSamples) {
3679
+ const chunk = float32.subarray(i, Math.min(i + chunkSamples, float32.length));
3680
+ const scheduleTime = await this.scheduler.schedule(chunk);
3681
+ this.processor.pushAudio(chunk, scheduleTime);
3682
+ if (!this.playbackStarted) {
3683
+ this.playbackStarted = true;
3684
+ this.emit("playback:start", { time: scheduleTime });
3685
+ this.emit("playback_start", scheduleTime);
3686
+ }
3687
+ }
3688
+ await this.processor.flush();
3689
+ return new Promise((resolve) => {
3690
+ const unsub = this.on("playback:complete", () => {
3691
+ unsub();
3692
+ resolve();
3693
+ });
3694
+ });
3695
+ }
3696
+ // ---------------------------------------------------------------------------
3697
+ // Control
3698
+ // ---------------------------------------------------------------------------
3699
+ /** Stop playback immediately with fade-out */
3700
+ async stop(fadeOutMs = 50) {
3701
+ this.setState("stopping");
3702
+ this.stopInternal(true);
3703
+ await this.scheduler.cancelAll(fadeOutMs);
3704
+ this.coalescer.reset();
3705
+ this.processor.reset();
3706
+ this.playbackStarted = false;
3707
+ this._currentFrame = null;
3708
+ this._currentRawFrame = null;
3709
+ this.emit("playback:stop", void 0);
3710
+ this.setState("idle");
3711
+ }
3712
+ /** Cleanup all resources */
3713
+ dispose() {
3714
+ this.stopInternal(true);
3715
+ this.cancelNeutralTransition();
3716
+ this.scheduler.dispose();
3717
+ this.coalescer.reset();
3718
+ this.processor.dispose();
3719
+ this._state = "idle";
3720
+ }
3721
+ /** Get pipeline debug state */
3722
+ getDebugState() {
3723
+ return {
3724
+ state: this._state,
3725
+ playbackStarted: this.playbackStarted,
3726
+ coalescerFill: this.coalescer.fillLevel,
3727
+ processorFill: this.processor.fillLevel,
3728
+ queuedFrames: this.processor.queuedFrameCount,
3729
+ currentTime: this.scheduler.getCurrentTime(),
3730
+ playbackEndTime: this.scheduler.getPlaybackEndTime()
3731
+ };
3732
+ }
3733
+ // ---------------------------------------------------------------------------
3734
+ // Internal: Frame loop
3735
+ // ---------------------------------------------------------------------------
3736
+ startFrameLoop() {
3737
+ const updateFrame = () => {
3738
+ this.frameLoopCount++;
3739
+ const currentTime = this.scheduler.getCurrentTime();
3740
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3741
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3742
+ this.lastNewFrameTime = performance.now();
3743
+ this.lastKnownLamFrame = lamFrame;
3744
+ this.staleWarningEmitted = false;
3745
+ }
3746
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3747
+ if (!this.staleWarningEmitted) {
3748
+ this.staleWarningEmitted = true;
3749
+ logger5.warn("A2E stalled \u2014 no new inference frames", {
3750
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3751
+ queuedFrames: this.processor.queuedFrameCount
3752
+ });
3753
+ }
3754
+ }
3755
+ if (lamFrame) {
3756
+ const scaled = applyProfile(lamFrame, this.profile);
3757
+ this._currentFrame = scaled;
3758
+ this._currentRawFrame = lamFrame;
3759
+ const fullFrame = {
3760
+ blendshapes: scaled,
3761
+ rawBlendshapes: lamFrame,
3762
+ timestamp: currentTime
3763
+ };
3764
+ this.emit("frame", fullFrame);
3765
+ this.emit("frame:raw", lamFrame);
3766
+ this.emit("full_frame_ready", fullFrame);
3767
+ this.emit("lam_frame_ready", lamFrame);
3768
+ }
3769
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3770
+ };
3771
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3772
+ }
3773
+ // ---------------------------------------------------------------------------
3774
+ // Internal: Playback monitoring
3775
+ // ---------------------------------------------------------------------------
3776
+ startMonitoring() {
3777
+ if (this.monitorInterval) {
3778
+ clearInterval(this.monitorInterval);
3779
+ }
3780
+ this.monitorInterval = setInterval(() => {
3781
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3782
+ this.onPlaybackComplete();
3783
+ }
3784
+ }, 100);
3785
+ }
3786
+ onPlaybackComplete() {
3787
+ this.stopInternal(false);
3788
+ this.playbackStarted = false;
3789
+ this.emit("playback:complete", void 0);
3790
+ this.emit("playback_complete", void 0);
3791
+ if (this.neutralTransitionEnabled && this._currentFrame) {
3792
+ this.startNeutralTransition(this._currentFrame);
3793
+ } else {
3794
+ this.setState("idle");
3795
+ }
3796
+ }
3797
+ // ---------------------------------------------------------------------------
3798
+ // Internal: Neutral transition (opt-in)
3799
+ // ---------------------------------------------------------------------------
3800
+ startNeutralTransition(fromFrame) {
3801
+ this.neutralTransitionFrame = new Float32Array(fromFrame);
3802
+ this.neutralTransitionStart = performance.now();
3803
+ const animate = () => {
3804
+ const elapsed = performance.now() - this.neutralTransitionStart;
3805
+ const t = Math.min(1, elapsed / this.neutralTransitionMs);
3806
+ const eased = 1 - Math.pow(1 - t, 3);
3807
+ const blendshapes = new Float32Array(52);
3808
+ for (let i = 0; i < 52; i++) {
3809
+ blendshapes[i] = this.neutralTransitionFrame[i] * (1 - eased);
3810
+ }
3811
+ this._currentFrame = blendshapes;
3812
+ const frame = {
3813
+ blendshapes,
3814
+ rawBlendshapes: blendshapes,
3815
+ // raw = scaled during transition
3816
+ timestamp: performance.now() / 1e3
3817
+ };
3818
+ this.emit("frame", frame);
3819
+ this.emit("full_frame_ready", frame);
3820
+ if (t >= 1) {
3821
+ this.neutralTransitionFrame = null;
3822
+ this._currentFrame = null;
3823
+ this._currentRawFrame = null;
3824
+ this.setState("idle");
3825
+ return;
3826
+ }
3827
+ this.neutralAnimationId = requestAnimationFrame(animate);
3828
+ };
3829
+ this.neutralAnimationId = requestAnimationFrame(animate);
3830
+ }
3831
+ cancelNeutralTransition() {
3832
+ if (this.neutralAnimationId) {
3833
+ cancelAnimationFrame(this.neutralAnimationId);
3834
+ this.neutralAnimationId = null;
3835
+ }
3836
+ this.neutralTransitionFrame = null;
3837
+ }
3838
+ // ---------------------------------------------------------------------------
3839
+ // Internal: Helpers
3840
+ // ---------------------------------------------------------------------------
3841
+ stopInternal(emitEvents) {
3842
+ if (this.monitorInterval) {
3843
+ clearInterval(this.monitorInterval);
3844
+ this.monitorInterval = null;
3845
+ }
3846
+ if (this.frameAnimationId) {
3847
+ cancelAnimationFrame(this.frameAnimationId);
3848
+ this.frameAnimationId = null;
3849
+ }
3850
+ }
3851
+ setState(state) {
3852
+ if (this._state === state) return;
3853
+ this._state = state;
3854
+ this.emit("state", state);
3855
+ }
3856
+ };
3857
+
3522
3858
  // src/audio/InterruptionHandler.ts
3523
3859
  var InterruptionHandler = class extends EventEmitter {
3524
3860
  constructor(config = {}) {
@@ -3906,7 +4242,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
3906
4242
  }
3907
4243
 
3908
4244
  // src/inference/SenseVoiceInference.ts
3909
- var logger5 = createLogger("SenseVoice");
4245
+ var logger6 = createLogger("SenseVoice");
3910
4246
  var _SenseVoiceInference = class _SenseVoiceInference {
3911
4247
  constructor(config) {
3912
4248
  this.session = null;
@@ -3959,26 +4295,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3959
4295
  "model.backend_requested": this.config.backend
3960
4296
  });
3961
4297
  try {
3962
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
4298
+ logger6.info("Loading ONNX Runtime...", { preference: this.config.backend });
3963
4299
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3964
4300
  this.ort = ort;
3965
4301
  this._backend = backend;
3966
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
3967
- logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
4302
+ logger6.info("ONNX Runtime loaded", { backend: this._backend });
4303
+ logger6.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3968
4304
  const tokensResponse = await fetch(this.config.tokensUrl);
3969
4305
  if (!tokensResponse.ok) {
3970
4306
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
3971
4307
  }
3972
4308
  const tokensText = await tokensResponse.text();
3973
4309
  this.tokenMap = parseTokensFile(tokensText);
3974
- logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
4310
+ logger6.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3975
4311
  const sessionOptions = getSessionOptions(this._backend);
3976
4312
  if (this._backend === "webgpu") {
3977
4313
  sessionOptions.graphOptimizationLevel = "basic";
3978
4314
  }
3979
4315
  let isCached = false;
3980
4316
  if (isIOS()) {
3981
- logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
4317
+ logger6.info("iOS: passing model URL directly to ORT (low-memory path)", {
3982
4318
  modelUrl: this.config.modelUrl
3983
4319
  });
3984
4320
  this.session = await withTimeout(
@@ -3991,14 +4327,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3991
4327
  isCached = await cache.has(this.config.modelUrl);
3992
4328
  let modelBuffer;
3993
4329
  if (isCached) {
3994
- logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
4330
+ logger6.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3995
4331
  modelBuffer = await cache.get(this.config.modelUrl);
3996
4332
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
3997
4333
  } else {
3998
- logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
4334
+ logger6.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3999
4335
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
4000
4336
  }
4001
- logger5.debug("Creating ONNX session", {
4337
+ logger6.debug("Creating ONNX session", {
4002
4338
  size: formatBytes(modelBuffer.byteLength),
4003
4339
  backend: this._backend
4004
4340
  });
@@ -4011,15 +4347,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4011
4347
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
4012
4348
  this.negMean = cmvn.negMean;
4013
4349
  this.invStddev = cmvn.invStddev;
4014
- logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4350
+ logger6.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4015
4351
  } else {
4016
- logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4352
+ logger6.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4017
4353
  }
4018
4354
  } catch (cmvnErr) {
4019
- logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4355
+ logger6.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4020
4356
  }
4021
4357
  const loadTimeMs = performance.now() - startTime;
4022
- logger5.info("SenseVoice model loaded", {
4358
+ logger6.info("SenseVoice model loaded", {
4023
4359
  backend: this._backend,
4024
4360
  loadTimeMs: Math.round(loadTimeMs),
4025
4361
  vocabSize: this.tokenMap.size,
@@ -4130,7 +4466,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4130
4466
  const vocabSize = logitsDims[2];
4131
4467
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
4132
4468
  const inferenceTimeMs = performance.now() - startTime;
4133
- logger5.trace("Transcription complete", {
4469
+ logger6.trace("Transcription complete", {
4134
4470
  text: decoded.text.substring(0, 50),
4135
4471
  language: decoded.language,
4136
4472
  emotion: decoded.emotion,
@@ -4168,7 +4504,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4168
4504
  const errMsg = err instanceof Error ? err.message : String(err);
4169
4505
  if (errMsg.includes("timed out")) {
4170
4506
  this.poisoned = true;
4171
- logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4507
+ logger6.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4172
4508
  backend: this._backend,
4173
4509
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
4174
4510
  });
@@ -4176,7 +4512,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4176
4512
  const oomError = new Error(
4177
4513
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
4178
4514
  );
4179
- logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4515
+ logger6.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4180
4516
  pointer: `0x${err.toString(16)}`,
4181
4517
  backend: this._backend
4182
4518
  });
@@ -4189,7 +4525,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4189
4525
  reject(oomError);
4190
4526
  return;
4191
4527
  } else {
4192
- logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4528
+ logger6.error("Inference failed", { error: errMsg, backend: this._backend });
4193
4529
  }
4194
4530
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4195
4531
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4218,7 +4554,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
4218
4554
  var SenseVoiceInference = _SenseVoiceInference;
4219
4555
 
4220
4556
  // src/inference/SenseVoiceWorker.ts
4221
- var logger6 = createLogger("SenseVoiceWorker");
4557
+ var logger7 = createLogger("SenseVoiceWorker");
4222
4558
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4223
4559
  var LOAD_TIMEOUT_MS = 3e5;
4224
4560
  var INFERENCE_TIMEOUT_MS = 1e4;
@@ -4957,7 +5293,7 @@ var SenseVoiceWorker = class {
4957
5293
  this.handleWorkerMessage(event.data);
4958
5294
  };
4959
5295
  worker.onerror = (error) => {
4960
- logger6.error("Worker error", { error: error.message });
5296
+ logger7.error("Worker error", { error: error.message });
4961
5297
  for (const [, resolver] of this.pendingResolvers) {
4962
5298
  resolver.reject(new Error(`Worker error: ${error.message}`));
4963
5299
  }
@@ -5037,9 +5373,9 @@ var SenseVoiceWorker = class {
5037
5373
  "model.language": this.config.language
5038
5374
  });
5039
5375
  try {
5040
- logger6.info("Creating SenseVoice worker...");
5376
+ logger7.info("Creating SenseVoice worker...");
5041
5377
  this.worker = this.createWorker();
5042
- logger6.info("Loading model in worker...", {
5378
+ logger7.info("Loading model in worker...", {
5043
5379
  modelUrl: this.config.modelUrl,
5044
5380
  tokensUrl: this.config.tokensUrl,
5045
5381
  language: this.config.language,
@@ -5061,7 +5397,7 @@ var SenseVoiceWorker = class {
5061
5397
  this._isLoaded = true;
5062
5398
  const loadTimeMs = performance.now() - startTime;
5063
5399
  onProgress?.(1, 1);
5064
- logger6.info("SenseVoice worker loaded successfully", {
5400
+ logger7.info("SenseVoice worker loaded successfully", {
5065
5401
  backend: "wasm",
5066
5402
  loadTimeMs: Math.round(loadTimeMs),
5067
5403
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -5140,7 +5476,7 @@ var SenseVoiceWorker = class {
5140
5476
  INFERENCE_TIMEOUT_MS
5141
5477
  );
5142
5478
  const totalTimeMs = performance.now() - startTime;
5143
- logger6.trace("Worker transcription complete", {
5479
+ logger7.trace("Worker transcription complete", {
5144
5480
  text: result.text.substring(0, 50),
5145
5481
  language: result.language,
5146
5482
  emotion: result.emotion,
@@ -5176,11 +5512,11 @@ var SenseVoiceWorker = class {
5176
5512
  } catch (err) {
5177
5513
  const errMsg = err instanceof Error ? err.message : String(err);
5178
5514
  if (errMsg.includes("timed out")) {
5179
- logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5515
+ logger7.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5180
5516
  timeoutMs: INFERENCE_TIMEOUT_MS
5181
5517
  });
5182
5518
  } else {
5183
- logger6.error("Worker inference failed", { error: errMsg });
5519
+ logger7.error("Worker inference failed", { error: errMsg });
5184
5520
  }
5185
5521
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
5186
5522
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -5218,7 +5554,7 @@ var SenseVoiceWorker = class {
5218
5554
  };
5219
5555
 
5220
5556
  // src/inference/UnifiedInferenceWorker.ts
5221
- var logger7 = createLogger("UnifiedInferenceWorker");
5557
+ var logger8 = createLogger("UnifiedInferenceWorker");
5222
5558
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
5223
5559
  var INIT_TIMEOUT_MS = 6e4;
5224
5560
  var SV_LOAD_TIMEOUT_MS = 3e5;
@@ -5920,7 +6256,7 @@ var UnifiedInferenceWorker = class {
5920
6256
  const telemetry = getTelemetry();
5921
6257
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
5922
6258
  try {
5923
- logger7.info("Creating unified inference worker...");
6259
+ logger8.info("Creating unified inference worker...");
5924
6260
  this.worker = this.createWorker();
5925
6261
  await this.sendMessage(
5926
6262
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -5929,7 +6265,7 @@ var UnifiedInferenceWorker = class {
5929
6265
  );
5930
6266
  this.initialized = true;
5931
6267
  const loadTimeMs = performance.now() - startTime;
5932
- logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
6268
+ logger8.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5933
6269
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
5934
6270
  span?.end();
5935
6271
  } catch (error) {
@@ -6103,7 +6439,7 @@ var UnifiedInferenceWorker = class {
6103
6439
  this.handleWorkerMessage(event.data);
6104
6440
  };
6105
6441
  worker.onerror = (error) => {
6106
- logger7.error("Unified worker error", { error: error.message });
6442
+ logger8.error("Unified worker error", { error: error.message });
6107
6443
  this.rejectAllPending(`Worker error: ${error.message}`);
6108
6444
  };
6109
6445
  return worker;
@@ -6117,7 +6453,7 @@ var UnifiedInferenceWorker = class {
6117
6453
  this.pendingRequests.delete(requestId);
6118
6454
  pending.reject(new Error(data.error));
6119
6455
  } else {
6120
- logger7.error("Worker broadcast error", { error: data.error });
6456
+ logger8.error("Worker broadcast error", { error: data.error });
6121
6457
  this.rejectAllPending(data.error);
6122
6458
  }
6123
6459
  return;
@@ -6139,7 +6475,7 @@ var UnifiedInferenceWorker = class {
6139
6475
  const timeout = setTimeout(() => {
6140
6476
  this.pendingRequests.delete(requestId);
6141
6477
  this.poisoned = true;
6142
- logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6478
+ logger8.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6143
6479
  type: message.type,
6144
6480
  timeoutMs
6145
6481
  });
@@ -6205,7 +6541,7 @@ var SenseVoiceUnifiedAdapter = class {
6205
6541
  });
6206
6542
  this._isLoaded = true;
6207
6543
  onProgress?.(1, 1);
6208
- logger7.info("SenseVoice loaded via unified worker", {
6544
+ logger8.info("SenseVoice loaded via unified worker", {
6209
6545
  backend: "wasm",
6210
6546
  loadTimeMs: Math.round(result.loadTimeMs),
6211
6547
  vocabSize: result.vocabSize
@@ -6270,7 +6606,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6270
6606
  externalDataUrl: externalDataUrl || null
6271
6607
  });
6272
6608
  this._isLoaded = true;
6273
- logger7.info("Wav2ArkitCpu loaded via unified worker", {
6609
+ logger8.info("Wav2ArkitCpu loaded via unified worker", {
6274
6610
  backend: "wasm",
6275
6611
  loadTimeMs: Math.round(result.loadTimeMs)
6276
6612
  });
@@ -6376,7 +6712,7 @@ var SileroVADUnifiedAdapter = class {
6376
6712
  sampleRate: this.config.sampleRate
6377
6713
  });
6378
6714
  this._isLoaded = true;
6379
- logger7.info("SileroVAD loaded via unified worker", {
6715
+ logger8.info("SileroVAD loaded via unified worker", {
6380
6716
  backend: "wasm",
6381
6717
  loadTimeMs: Math.round(result.loadTimeMs),
6382
6718
  sampleRate: this.config.sampleRate,
@@ -6457,10 +6793,10 @@ var SileroVADUnifiedAdapter = class {
6457
6793
  };
6458
6794
 
6459
6795
  // src/inference/createSenseVoice.ts
6460
- var logger8 = createLogger("createSenseVoice");
6796
+ var logger9 = createLogger("createSenseVoice");
6461
6797
  function createSenseVoice(config) {
6462
6798
  if (config.unifiedWorker) {
6463
- logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6799
+ logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6464
6800
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6465
6801
  modelUrl: config.modelUrl,
6466
6802
  tokensUrl: config.tokensUrl,
@@ -6473,7 +6809,7 @@ function createSenseVoice(config) {
6473
6809
  if (!SenseVoiceWorker.isSupported()) {
6474
6810
  throw new Error("Web Workers are not supported in this environment");
6475
6811
  }
6476
- logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6812
+ logger9.info("Creating SenseVoiceWorker (off-main-thread)");
6477
6813
  return new SenseVoiceWorker({
6478
6814
  modelUrl: config.modelUrl,
6479
6815
  tokensUrl: config.tokensUrl,
@@ -6482,7 +6818,7 @@ function createSenseVoice(config) {
6482
6818
  });
6483
6819
  }
6484
6820
  if (useWorker === false) {
6485
- logger8.info("Creating SenseVoiceInference (main thread)");
6821
+ logger9.info("Creating SenseVoiceInference (main thread)");
6486
6822
  return new SenseVoiceInference({
6487
6823
  modelUrl: config.modelUrl,
6488
6824
  tokensUrl: config.tokensUrl,
@@ -6491,7 +6827,7 @@ function createSenseVoice(config) {
6491
6827
  });
6492
6828
  }
6493
6829
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6494
- logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6830
+ logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6495
6831
  return new SenseVoiceWorker({
6496
6832
  modelUrl: config.modelUrl,
6497
6833
  tokensUrl: config.tokensUrl,
@@ -6499,7 +6835,7 @@ function createSenseVoice(config) {
6499
6835
  textNorm: config.textNorm
6500
6836
  });
6501
6837
  }
6502
- logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6838
+ logger9.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6503
6839
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6504
6840
  });
6505
6841
  return new SenseVoiceInference({
@@ -6511,7 +6847,7 @@ function createSenseVoice(config) {
6511
6847
  }
6512
6848
 
6513
6849
  // src/inference/Wav2ArkitCpuInference.ts
6514
- var logger9 = createLogger("Wav2ArkitCpu");
6850
+ var logger10 = createLogger("Wav2ArkitCpu");
6515
6851
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6516
6852
  constructor(config) {
6517
6853
  this.modelId = "wav2arkit_cpu";
@@ -6553,16 +6889,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6553
6889
  });
6554
6890
  try {
6555
6891
  const preference = this.config.backend || "wasm";
6556
- logger9.info("Loading ONNX Runtime...", { preference });
6892
+ logger10.info("Loading ONNX Runtime...", { preference });
6557
6893
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6558
6894
  this.ort = ort;
6559
6895
  this._backend = backend;
6560
- logger9.info("ONNX Runtime loaded", { backend: this._backend });
6896
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
6561
6897
  const modelUrl = this.config.modelUrl;
6562
6898
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6563
6899
  const sessionOptions = getSessionOptions(this._backend);
6564
6900
  if (isIOS()) {
6565
- logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6901
+ logger10.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6566
6902
  modelUrl,
6567
6903
  dataUrl
6568
6904
  });
@@ -6584,15 +6920,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6584
6920
  const isCached = await cache.has(modelUrl);
6585
6921
  let modelBuffer;
6586
6922
  if (isCached) {
6587
- logger9.debug("Loading model from cache", { modelUrl });
6923
+ logger10.debug("Loading model from cache", { modelUrl });
6588
6924
  modelBuffer = await cache.get(modelUrl);
6589
6925
  if (!modelBuffer) {
6590
- logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6926
+ logger10.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6591
6927
  await cache.delete(modelUrl);
6592
6928
  modelBuffer = await fetchWithCache(modelUrl);
6593
6929
  }
6594
6930
  } else {
6595
- logger9.debug("Fetching and caching model graph", { modelUrl });
6931
+ logger10.debug("Fetching and caching model graph", { modelUrl });
6596
6932
  modelBuffer = await fetchWithCache(modelUrl);
6597
6933
  }
6598
6934
  if (!modelBuffer) {
@@ -6603,31 +6939,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6603
6939
  try {
6604
6940
  const isDataCached = await cache.has(dataUrl);
6605
6941
  if (isDataCached) {
6606
- logger9.debug("Loading external data from cache", { dataUrl });
6942
+ logger10.debug("Loading external data from cache", { dataUrl });
6607
6943
  externalDataBuffer = await cache.get(dataUrl);
6608
6944
  if (!externalDataBuffer) {
6609
- logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6945
+ logger10.warn("Cache corruption for external data, retrying", { dataUrl });
6610
6946
  await cache.delete(dataUrl);
6611
6947
  externalDataBuffer = await fetchWithCache(dataUrl);
6612
6948
  }
6613
6949
  } else {
6614
- logger9.info("Fetching external model data", {
6950
+ logger10.info("Fetching external model data", {
6615
6951
  dataUrl,
6616
6952
  note: "This may be a large download (400MB+)"
6617
6953
  });
6618
6954
  externalDataBuffer = await fetchWithCache(dataUrl);
6619
6955
  }
6620
- logger9.info("External data loaded", {
6956
+ logger10.info("External data loaded", {
6621
6957
  size: formatBytes(externalDataBuffer.byteLength)
6622
6958
  });
6623
6959
  } catch (err) {
6624
- logger9.debug("No external data file found (single-file model)", {
6960
+ logger10.debug("No external data file found (single-file model)", {
6625
6961
  dataUrl,
6626
6962
  error: err.message
6627
6963
  });
6628
6964
  }
6629
6965
  }
6630
- logger9.debug("Creating ONNX session", {
6966
+ logger10.debug("Creating ONNX session", {
6631
6967
  graphSize: formatBytes(modelBuffer.byteLength),
6632
6968
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
6633
6969
  backend: this._backend
@@ -6643,7 +6979,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6643
6979
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
6644
6980
  }
6645
6981
  const loadTimeMs = performance.now() - startTime;
6646
- logger9.info("Model loaded successfully", {
6982
+ logger10.info("Model loaded successfully", {
6647
6983
  backend: this._backend,
6648
6984
  loadTimeMs: Math.round(loadTimeMs),
6649
6985
  inputs: this.session.inputNames,
@@ -6659,12 +6995,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6659
6995
  model: "wav2arkit_cpu",
6660
6996
  backend: this._backend
6661
6997
  });
6662
- logger9.debug("Running warmup inference");
6998
+ logger10.debug("Running warmup inference");
6663
6999
  const warmupStart = performance.now();
6664
7000
  const silentAudio = new Float32Array(16e3);
6665
7001
  await this.infer(silentAudio);
6666
7002
  const warmupTimeMs = performance.now() - warmupStart;
6667
- logger9.info("Warmup inference complete", {
7003
+ logger10.info("Warmup inference complete", {
6668
7004
  warmupTimeMs: Math.round(warmupTimeMs),
6669
7005
  backend: this._backend
6670
7006
  });
@@ -6751,7 +7087,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6751
7087
  const symmetrized = symmetrizeBlendshapes(rawFrame);
6752
7088
  blendshapes.push(symmetrized);
6753
7089
  }
6754
- logger9.trace("Inference completed", {
7090
+ logger10.trace("Inference completed", {
6755
7091
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6756
7092
  numFrames,
6757
7093
  inputSamples
@@ -6779,7 +7115,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6779
7115
  const errMsg = err instanceof Error ? err.message : String(err);
6780
7116
  if (errMsg.includes("timed out")) {
6781
7117
  this.poisoned = true;
6782
- logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
7118
+ logger10.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6783
7119
  backend: this._backend,
6784
7120
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
6785
7121
  });
@@ -6787,7 +7123,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6787
7123
  const oomError = new Error(
6788
7124
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
6789
7125
  );
6790
- logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7126
+ logger10.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6791
7127
  pointer: `0x${err.toString(16)}`,
6792
7128
  backend: this._backend
6793
7129
  });
@@ -6800,7 +7136,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6800
7136
  reject(oomError);
6801
7137
  return;
6802
7138
  } else {
6803
- logger9.error("Inference failed", { error: errMsg, backend: this._backend });
7139
+ logger10.error("Inference failed", { error: errMsg, backend: this._backend });
6804
7140
  }
6805
7141
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6806
7142
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6827,7 +7163,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
6827
7163
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
6828
7164
 
6829
7165
  // src/inference/Wav2ArkitCpuWorker.ts
6830
- var logger10 = createLogger("Wav2ArkitCpuWorker");
7166
+ var logger11 = createLogger("Wav2ArkitCpuWorker");
6831
7167
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
6832
7168
  var LOAD_TIMEOUT_MS2 = 42e4;
6833
7169
  var INFERENCE_TIMEOUT_MS2 = 5e3;
@@ -7114,7 +7450,7 @@ var Wav2ArkitCpuWorker = class {
7114
7450
  this.handleWorkerMessage(event.data);
7115
7451
  };
7116
7452
  worker.onerror = (error) => {
7117
- logger10.error("Worker error", { error: error.message });
7453
+ logger11.error("Worker error", { error: error.message });
7118
7454
  for (const [, resolver] of this.pendingResolvers) {
7119
7455
  resolver.reject(new Error(`Worker error: ${error.message}`));
7120
7456
  }
@@ -7190,10 +7526,10 @@ var Wav2ArkitCpuWorker = class {
7190
7526
  "model.backend_requested": "wasm"
7191
7527
  });
7192
7528
  try {
7193
- logger10.info("Creating wav2arkit_cpu worker...");
7529
+ logger11.info("Creating wav2arkit_cpu worker...");
7194
7530
  this.worker = this.createWorker();
7195
7531
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
7196
- logger10.info("Loading model in worker...", {
7532
+ logger11.info("Loading model in worker...", {
7197
7533
  modelUrl: this.config.modelUrl,
7198
7534
  externalDataUrl,
7199
7535
  isIOS: isIOS()
@@ -7211,7 +7547,7 @@ var Wav2ArkitCpuWorker = class {
7211
7547
  );
7212
7548
  this._isLoaded = true;
7213
7549
  const loadTimeMs = performance.now() - startTime;
7214
- logger10.info("Wav2ArkitCpu worker loaded successfully", {
7550
+ logger11.info("Wav2ArkitCpu worker loaded successfully", {
7215
7551
  backend: "wasm",
7216
7552
  loadTimeMs: Math.round(loadTimeMs),
7217
7553
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -7296,7 +7632,7 @@ var Wav2ArkitCpuWorker = class {
7296
7632
  for (let f = 0; f < numFrames; f++) {
7297
7633
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
7298
7634
  }
7299
- logger10.trace("Worker inference completed", {
7635
+ logger11.trace("Worker inference completed", {
7300
7636
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7301
7637
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
7302
7638
  numFrames,
@@ -7326,12 +7662,12 @@ var Wav2ArkitCpuWorker = class {
7326
7662
  const errMsg = err instanceof Error ? err.message : String(err);
7327
7663
  if (errMsg.includes("timed out")) {
7328
7664
  this.poisoned = true;
7329
- logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7665
+ logger11.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7330
7666
  backend: "wasm",
7331
7667
  timeoutMs: INFERENCE_TIMEOUT_MS2
7332
7668
  });
7333
7669
  } else {
7334
- logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7670
+ logger11.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7335
7671
  }
7336
7672
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7337
7673
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7369,38 +7705,38 @@ var Wav2ArkitCpuWorker = class {
7369
7705
  };
7370
7706
 
7371
7707
  // src/inference/createA2E.ts
7372
- var logger11 = createLogger("createA2E");
7708
+ var logger12 = createLogger("createA2E");
7373
7709
  function createA2E(config) {
7374
7710
  const mode = config.mode ?? "auto";
7375
7711
  const fallbackOnError = config.fallbackOnError ?? true;
7376
7712
  let useCpu;
7377
7713
  if (mode === "cpu") {
7378
7714
  useCpu = true;
7379
- logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7715
+ logger12.info("Forcing CPU A2E model (wav2arkit_cpu)");
7380
7716
  } else if (mode === "gpu") {
7381
7717
  useCpu = false;
7382
- logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7718
+ logger12.info("Forcing GPU A2E model (Wav2Vec2)");
7383
7719
  } else {
7384
7720
  useCpu = shouldUseCpuA2E();
7385
- logger11.info("Auto-detected A2E model", {
7721
+ logger12.info("Auto-detected A2E model", {
7386
7722
  useCpu,
7387
7723
  isSafari: isSafari()
7388
7724
  });
7389
7725
  }
7390
7726
  if (useCpu) {
7391
7727
  if (config.unifiedWorker) {
7392
- logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7728
+ logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7393
7729
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
7394
7730
  modelUrl: config.cpuModelUrl
7395
7731
  });
7396
7732
  }
7397
7733
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7398
- logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7734
+ logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7399
7735
  return new Wav2ArkitCpuWorker({
7400
7736
  modelUrl: config.cpuModelUrl
7401
7737
  });
7402
7738
  }
7403
- logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7739
+ logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7404
7740
  return new Wav2ArkitCpuInference({
7405
7741
  modelUrl: config.cpuModelUrl
7406
7742
  });
@@ -7412,10 +7748,10 @@ function createA2E(config) {
7412
7748
  numIdentityClasses: config.numIdentityClasses
7413
7749
  });
7414
7750
  if (fallbackOnError) {
7415
- logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7751
+ logger12.info("Creating Wav2Vec2Inference with CPU fallback");
7416
7752
  return new A2EWithFallback(gpuInstance, config);
7417
7753
  }
7418
- logger11.info("Creating Wav2Vec2Inference (no fallback)");
7754
+ logger12.info("Creating Wav2Vec2Inference (no fallback)");
7419
7755
  return gpuInstance;
7420
7756
  }
7421
7757
  var A2EWithFallback = class {
@@ -7444,7 +7780,7 @@ var A2EWithFallback = class {
7444
7780
  }
7445
7781
  }
7446
7782
  async fallbackToCpu(reason) {
7447
- logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7783
+ logger12.warn("GPU model load failed, falling back to CPU model", { reason });
7448
7784
  try {
7449
7785
  await this.implementation.dispose();
7450
7786
  } catch {
@@ -7453,17 +7789,17 @@ var A2EWithFallback = class {
7453
7789
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7454
7790
  modelUrl: this.config.cpuModelUrl
7455
7791
  });
7456
- logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7792
+ logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7457
7793
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7458
7794
  this.implementation = new Wav2ArkitCpuWorker({
7459
7795
  modelUrl: this.config.cpuModelUrl
7460
7796
  });
7461
- logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7797
+ logger12.info("Fallback to Wav2ArkitCpuWorker successful");
7462
7798
  } else {
7463
7799
  this.implementation = new Wav2ArkitCpuInference({
7464
7800
  modelUrl: this.config.cpuModelUrl
7465
7801
  });
7466
- logger11.info("Fallback to Wav2ArkitCpuInference successful");
7802
+ logger12.info("Fallback to Wav2ArkitCpuInference successful");
7467
7803
  }
7468
7804
  this.hasFallenBack = true;
7469
7805
  return await this.implementation.load();
@@ -7667,7 +8003,7 @@ var EmphasisDetector = class {
7667
8003
  };
7668
8004
 
7669
8005
  // src/inference/SileroVADInference.ts
7670
- var logger12 = createLogger("SileroVAD");
8006
+ var logger13 = createLogger("SileroVAD");
7671
8007
  var SileroVADInference = class {
7672
8008
  constructor(config) {
7673
8009
  this.session = null;
@@ -7741,23 +8077,23 @@ var SileroVADInference = class {
7741
8077
  "model.sample_rate": this.config.sampleRate
7742
8078
  });
7743
8079
  try {
7744
- logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
8080
+ logger13.info("Loading ONNX Runtime...", { preference: this.config.backend });
7745
8081
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7746
8082
  this.ort = ort;
7747
8083
  this._backend = backend;
7748
- logger12.info("ONNX Runtime loaded", { backend: this._backend });
8084
+ logger13.info("ONNX Runtime loaded", { backend: this._backend });
7749
8085
  const cache = getModelCache();
7750
8086
  const modelUrl = this.config.modelUrl;
7751
8087
  const isCached = await cache.has(modelUrl);
7752
8088
  let modelBuffer;
7753
8089
  if (isCached) {
7754
- logger12.debug("Loading model from cache", { modelUrl });
8090
+ logger13.debug("Loading model from cache", { modelUrl });
7755
8091
  modelBuffer = await cache.get(modelUrl);
7756
8092
  } else {
7757
- logger12.debug("Fetching and caching model", { modelUrl });
8093
+ logger13.debug("Fetching and caching model", { modelUrl });
7758
8094
  modelBuffer = await fetchWithCache(modelUrl);
7759
8095
  }
7760
- logger12.debug("Creating ONNX session", {
8096
+ logger13.debug("Creating ONNX session", {
7761
8097
  size: formatBytes(modelBuffer.byteLength),
7762
8098
  backend: this._backend
7763
8099
  });
@@ -7766,7 +8102,7 @@ var SileroVADInference = class {
7766
8102
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7767
8103
  this.reset();
7768
8104
  const loadTimeMs = performance.now() - startTime;
7769
- logger12.info("Model loaded successfully", {
8105
+ logger13.info("Model loaded successfully", {
7770
8106
  backend: this._backend,
7771
8107
  loadTimeMs: Math.round(loadTimeMs),
7772
8108
  sampleRate: this.config.sampleRate,
@@ -7821,7 +8157,7 @@ var SileroVADInference = class {
7821
8157
  []
7822
8158
  );
7823
8159
  } catch (e) {
7824
- logger12.warn("BigInt64Array not available, using bigint array fallback", {
8160
+ logger13.warn("BigInt64Array not available, using bigint array fallback", {
7825
8161
  error: e instanceof Error ? e.message : String(e)
7826
8162
  });
7827
8163
  this.srTensor = new this.ort.Tensor(
@@ -7927,7 +8263,7 @@ var SileroVADInference = class {
7927
8263
  this.preSpeechBuffer.shift();
7928
8264
  }
7929
8265
  }
7930
- logger12.trace("Skipping VAD inference - audio too quiet", {
8266
+ logger13.trace("Skipping VAD inference - audio too quiet", {
7931
8267
  rms: Math.round(rms * 1e4) / 1e4,
7932
8268
  threshold: MIN_ENERGY_THRESHOLD
7933
8269
  });
@@ -7981,7 +8317,7 @@ var SileroVADInference = class {
7981
8317
  if (isSpeech && !this.wasSpeaking) {
7982
8318
  preSpeechChunks = [...this.preSpeechBuffer];
7983
8319
  this.preSpeechBuffer = [];
7984
- logger12.debug("Speech started with pre-speech buffer", {
8320
+ logger13.debug("Speech started with pre-speech buffer", {
7985
8321
  preSpeechChunks: preSpeechChunks.length,
7986
8322
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
7987
8323
  });
@@ -7994,7 +8330,7 @@ var SileroVADInference = class {
7994
8330
  this.preSpeechBuffer = [];
7995
8331
  }
7996
8332
  this.wasSpeaking = isSpeech;
7997
- logger12.trace("VAD inference completed", {
8333
+ logger13.trace("VAD inference completed", {
7998
8334
  probability: Math.round(probability * 1e3) / 1e3,
7999
8335
  isSpeech,
8000
8336
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -8025,7 +8361,7 @@ var SileroVADInference = class {
8025
8361
  const oomError = new Error(
8026
8362
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
8027
8363
  );
8028
- logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8364
+ logger13.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8029
8365
  pointer: `0x${err.toString(16)}`,
8030
8366
  backend: this._backend
8031
8367
  });
@@ -8068,7 +8404,7 @@ var SileroVADInference = class {
8068
8404
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
8069
8405
 
8070
8406
  // src/inference/SileroVADWorker.ts
8071
- var logger13 = createLogger("SileroVADWorker");
8407
+ var logger14 = createLogger("SileroVADWorker");
8072
8408
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
8073
8409
  var LOAD_TIMEOUT_MS3 = 12e4;
8074
8410
  var INFERENCE_TIMEOUT_MS3 = 1e3;
@@ -8353,7 +8689,7 @@ var SileroVADWorker = class {
8353
8689
  this.handleWorkerMessage(event.data);
8354
8690
  };
8355
8691
  worker.onerror = (error) => {
8356
- logger13.error("Worker error", { error: error.message });
8692
+ logger14.error("Worker error", { error: error.message });
8357
8693
  for (const [, resolver] of this.pendingResolvers) {
8358
8694
  resolver.reject(new Error(`Worker error: ${error.message}`));
8359
8695
  }
@@ -8429,9 +8765,9 @@ var SileroVADWorker = class {
8429
8765
  "model.sample_rate": this.config.sampleRate
8430
8766
  });
8431
8767
  try {
8432
- logger13.info("Creating VAD worker...");
8768
+ logger14.info("Creating VAD worker...");
8433
8769
  this.worker = this.createWorker();
8434
- logger13.info("Loading model in worker...", {
8770
+ logger14.info("Loading model in worker...", {
8435
8771
  modelUrl: this.config.modelUrl,
8436
8772
  sampleRate: this.config.sampleRate
8437
8773
  });
@@ -8447,7 +8783,7 @@ var SileroVADWorker = class {
8447
8783
  );
8448
8784
  this._isLoaded = true;
8449
8785
  const loadTimeMs = performance.now() - startTime;
8450
- logger13.info("VAD worker loaded successfully", {
8786
+ logger14.info("VAD worker loaded successfully", {
8451
8787
  backend: "wasm",
8452
8788
  loadTimeMs: Math.round(loadTimeMs),
8453
8789
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8554,7 +8890,7 @@ var SileroVADWorker = class {
8554
8890
  if (isSpeech && !this.wasSpeaking) {
8555
8891
  preSpeechChunks = [...this.preSpeechBuffer];
8556
8892
  this.preSpeechBuffer = [];
8557
- logger13.debug("Speech started with pre-speech buffer", {
8893
+ logger14.debug("Speech started with pre-speech buffer", {
8558
8894
  preSpeechChunks: preSpeechChunks.length,
8559
8895
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8560
8896
  });
@@ -8567,7 +8903,7 @@ var SileroVADWorker = class {
8567
8903
  this.preSpeechBuffer = [];
8568
8904
  }
8569
8905
  this.wasSpeaking = isSpeech;
8570
- logger13.trace("VAD worker inference completed", {
8906
+ logger14.trace("VAD worker inference completed", {
8571
8907
  probability: Math.round(result.probability * 1e3) / 1e3,
8572
8908
  isSpeech,
8573
8909
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8635,44 +8971,44 @@ var SileroVADWorker = class {
8635
8971
  };
8636
8972
 
8637
8973
  // src/inference/createSileroVAD.ts
8638
- var logger14 = createLogger("createSileroVAD");
8974
+ var logger15 = createLogger("createSileroVAD");
8639
8975
  function supportsVADWorker() {
8640
8976
  if (typeof Worker === "undefined") {
8641
- logger14.debug("Worker not supported: Worker constructor undefined");
8977
+ logger15.debug("Worker not supported: Worker constructor undefined");
8642
8978
  return false;
8643
8979
  }
8644
8980
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8645
- logger14.debug("Worker not supported: URL.createObjectURL unavailable");
8981
+ logger15.debug("Worker not supported: URL.createObjectURL unavailable");
8646
8982
  return false;
8647
8983
  }
8648
8984
  if (typeof Blob === "undefined") {
8649
- logger14.debug("Worker not supported: Blob constructor unavailable");
8985
+ logger15.debug("Worker not supported: Blob constructor unavailable");
8650
8986
  return false;
8651
8987
  }
8652
8988
  return true;
8653
8989
  }
8654
8990
  function createSileroVAD(config) {
8655
8991
  if (config.unifiedWorker) {
8656
- logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8992
+ logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8657
8993
  return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8658
8994
  }
8659
8995
  const fallbackOnError = config.fallbackOnError ?? true;
8660
8996
  let useWorker;
8661
8997
  if (config.useWorker !== void 0) {
8662
8998
  useWorker = config.useWorker;
8663
- logger14.debug("Worker preference explicitly set", { useWorker });
8999
+ logger15.debug("Worker preference explicitly set", { useWorker });
8664
9000
  } else {
8665
9001
  const workerSupported = supportsVADWorker();
8666
9002
  const onMobile = isMobile();
8667
9003
  useWorker = workerSupported && !onMobile;
8668
- logger14.debug("Auto-detected Worker preference", {
9004
+ logger15.debug("Auto-detected Worker preference", {
8669
9005
  useWorker,
8670
9006
  workerSupported,
8671
9007
  onMobile
8672
9008
  });
8673
9009
  }
8674
9010
  if (useWorker) {
8675
- logger14.info("Creating SileroVADWorker (off-main-thread)");
9011
+ logger15.info("Creating SileroVADWorker (off-main-thread)");
8676
9012
  const worker = new SileroVADWorker({
8677
9013
  modelUrl: config.modelUrl,
8678
9014
  sampleRate: config.sampleRate,
@@ -8684,7 +9020,7 @@ function createSileroVAD(config) {
8684
9020
  }
8685
9021
  return worker;
8686
9022
  }
8687
- logger14.info("Creating SileroVADInference (main thread)");
9023
+ logger15.info("Creating SileroVADInference (main thread)");
8688
9024
  return new SileroVADInference(config);
8689
9025
  }
8690
9026
  var VADWorkerWithFallback = class {
@@ -8710,7 +9046,7 @@ var VADWorkerWithFallback = class {
8710
9046
  try {
8711
9047
  return await this.implementation.load();
8712
9048
  } catch (error) {
8713
- logger14.warn("Worker load failed, falling back to main thread", {
9049
+ logger15.warn("Worker load failed, falling back to main thread", {
8714
9050
  error: error instanceof Error ? error.message : String(error)
8715
9051
  });
8716
9052
  try {
@@ -8719,7 +9055,7 @@ var VADWorkerWithFallback = class {
8719
9055
  }
8720
9056
  this.implementation = new SileroVADInference(this.config);
8721
9057
  this.hasFallenBack = true;
8722
- logger14.info("Fallback to SileroVADInference successful");
9058
+ logger15.info("Fallback to SileroVADInference successful");
8723
9059
  return await this.implementation.load();
8724
9060
  }
8725
9061
  }
@@ -8741,7 +9077,7 @@ var VADWorkerWithFallback = class {
8741
9077
  };
8742
9078
 
8743
9079
  // src/inference/A2EOrchestrator.ts
8744
- var logger15 = createLogger("A2EOrchestrator");
9080
+ var logger16 = createLogger("A2EOrchestrator");
8745
9081
  var A2EOrchestrator = class {
8746
9082
  constructor(config) {
8747
9083
  this.a2e = null;
@@ -8782,7 +9118,7 @@ var A2EOrchestrator = class {
8782
9118
  */
8783
9119
  async load() {
8784
9120
  if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8785
- logger15.info("Loading A2E model...");
9121
+ logger16.info("Loading A2E model...");
8786
9122
  this.a2e = createA2E({
8787
9123
  gpuModelUrl: this.config.gpuModelUrl,
8788
9124
  gpuExternalDataUrl: this.config.gpuExternalDataUrl,
@@ -8799,7 +9135,7 @@ var A2EOrchestrator = class {
8799
9135
  onError: this.config.onError
8800
9136
  });
8801
9137
  this._isReady = true;
8802
- logger15.info("A2E model loaded", {
9138
+ logger16.info("A2E model loaded", {
8803
9139
  backend: info.backend,
8804
9140
  loadTimeMs: info.loadTimeMs,
8805
9141
  modelId: this.a2e.modelId
@@ -8854,10 +9190,10 @@ var A2EOrchestrator = class {
8854
9190
  this.scriptProcessor.connect(this.audioContext.destination);
8855
9191
  this._isStreaming = true;
8856
9192
  this.processor.startDrip();
8857
- logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
9193
+ logger16.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8858
9194
  } catch (err) {
8859
9195
  const error = err instanceof Error ? err : new Error(String(err));
8860
- logger15.error("Failed to start mic capture", { error: error.message });
9196
+ logger16.error("Failed to start mic capture", { error: error.message });
8861
9197
  this.config.onError?.(error);
8862
9198
  throw error;
8863
9199
  }
@@ -8885,7 +9221,7 @@ var A2EOrchestrator = class {
8885
9221
  });
8886
9222
  this.audioContext = null;
8887
9223
  }
8888
- logger15.info("Mic capture stopped");
9224
+ logger16.info("Mic capture stopped");
8889
9225
  }
8890
9226
  /**
8891
9227
  * Dispose of all resources
@@ -8908,7 +9244,7 @@ var A2EOrchestrator = class {
8908
9244
  };
8909
9245
 
8910
9246
  // src/inference/SafariSpeechRecognition.ts
8911
- var logger16 = createLogger("SafariSpeech");
9247
+ var logger17 = createLogger("SafariSpeech");
8912
9248
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8913
9249
  constructor(config = {}) {
8914
9250
  this.recognition = null;
@@ -8927,7 +9263,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8927
9263
  interimResults: config.interimResults ?? true,
8928
9264
  maxAlternatives: config.maxAlternatives ?? 1
8929
9265
  };
8930
- logger16.debug("SafariSpeechRecognition created", {
9266
+ logger17.debug("SafariSpeechRecognition created", {
8931
9267
  language: this.config.language,
8932
9268
  continuous: this.config.continuous
8933
9269
  });
@@ -8988,7 +9324,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8988
9324
  */
8989
9325
  async start() {
8990
9326
  if (this.isListening) {
8991
- logger16.warn("Already listening");
9327
+ logger17.warn("Already listening");
8992
9328
  return;
8993
9329
  }
8994
9330
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -9018,7 +9354,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9018
9354
  this.isListening = true;
9019
9355
  this.startTime = performance.now();
9020
9356
  this.accumulatedText = "";
9021
- logger16.info("Speech recognition started", {
9357
+ logger17.info("Speech recognition started", {
9022
9358
  language: this.config.language
9023
9359
  });
9024
9360
  span?.end();
@@ -9033,7 +9369,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9033
9369
  */
9034
9370
  async stop() {
9035
9371
  if (!this.isListening || !this.recognition) {
9036
- logger16.warn("Not currently listening");
9372
+ logger17.warn("Not currently listening");
9037
9373
  return {
9038
9374
  text: this.accumulatedText,
9039
9375
  language: this.config.language,
@@ -9062,7 +9398,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9062
9398
  if (this.recognition && this.isListening) {
9063
9399
  this.recognition.abort();
9064
9400
  this.isListening = false;
9065
- logger16.info("Speech recognition aborted");
9401
+ logger17.info("Speech recognition aborted");
9066
9402
  }
9067
9403
  }
9068
9404
  /**
@@ -9093,7 +9429,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9093
9429
  this.isListening = false;
9094
9430
  this.resultCallbacks = [];
9095
9431
  this.errorCallbacks = [];
9096
- logger16.debug("SafariSpeechRecognition disposed");
9432
+ logger17.debug("SafariSpeechRecognition disposed");
9097
9433
  }
9098
9434
  /**
9099
9435
  * Set up event handlers for the recognition instance
@@ -9121,7 +9457,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9121
9457
  confidence: alternative.confidence
9122
9458
  };
9123
9459
  this.emitResult(speechResult);
9124
- logger16.trace("Speech result", {
9460
+ logger17.trace("Speech result", {
9125
9461
  text: text.substring(0, 50),
9126
9462
  isFinal,
9127
9463
  confidence: alternative.confidence
@@ -9131,12 +9467,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9131
9467
  span?.end();
9132
9468
  } catch (error) {
9133
9469
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
9134
- logger16.error("Error processing speech result", { error });
9470
+ logger17.error("Error processing speech result", { error });
9135
9471
  }
9136
9472
  };
9137
9473
  this.recognition.onerror = (event) => {
9138
9474
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
9139
- logger16.error("Speech recognition error", { error: event.error, message: event.message });
9475
+ logger17.error("Speech recognition error", { error: event.error, message: event.message });
9140
9476
  this.emitError(error);
9141
9477
  if (this.stopRejecter) {
9142
9478
  this.stopRejecter(error);
@@ -9146,7 +9482,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9146
9482
  };
9147
9483
  this.recognition.onend = () => {
9148
9484
  this.isListening = false;
9149
- logger16.info("Speech recognition ended", {
9485
+ logger17.info("Speech recognition ended", {
9150
9486
  totalText: this.accumulatedText.length,
9151
9487
  durationMs: performance.now() - this.startTime
9152
9488
  });
@@ -9163,13 +9499,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9163
9499
  }
9164
9500
  };
9165
9501
  this.recognition.onstart = () => {
9166
- logger16.debug("Speech recognition started by browser");
9502
+ logger17.debug("Speech recognition started by browser");
9167
9503
  };
9168
9504
  this.recognition.onspeechstart = () => {
9169
- logger16.debug("Speech detected");
9505
+ logger17.debug("Speech detected");
9170
9506
  };
9171
9507
  this.recognition.onspeechend = () => {
9172
- logger16.debug("Speech ended");
9508
+ logger17.debug("Speech ended");
9173
9509
  };
9174
9510
  }
9175
9511
  /**
@@ -9180,7 +9516,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9180
9516
  try {
9181
9517
  callback(result);
9182
9518
  } catch (error) {
9183
- logger16.error("Error in result callback", { error });
9519
+ logger17.error("Error in result callback", { error });
9184
9520
  }
9185
9521
  }
9186
9522
  }
@@ -9192,7 +9528,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9192
9528
  try {
9193
9529
  callback(error);
9194
9530
  } catch (callbackError) {
9195
- logger16.error("Error in error callback", { error: callbackError });
9531
+ logger17.error("Error in error callback", { error: callbackError });
9196
9532
  }
9197
9533
  }
9198
9534
  }
@@ -9762,327 +10098,9 @@ var AnimationGraph = class extends EventEmitter {
9762
10098
  }
9763
10099
  };
9764
10100
 
9765
- // src/animation/simplex2d.ts
9766
- var perm = new Uint8Array(512);
9767
- var grad2 = [
9768
- [1, 1],
9769
- [-1, 1],
9770
- [1, -1],
9771
- [-1, -1],
9772
- [1, 0],
9773
- [-1, 0],
9774
- [0, 1],
9775
- [0, -1]
9776
- ];
9777
- var p = [
9778
- 151,
9779
- 160,
9780
- 137,
9781
- 91,
9782
- 90,
9783
- 15,
9784
- 131,
9785
- 13,
9786
- 201,
9787
- 95,
9788
- 96,
9789
- 53,
9790
- 194,
9791
- 233,
9792
- 7,
9793
- 225,
9794
- 140,
9795
- 36,
9796
- 103,
9797
- 30,
9798
- 69,
9799
- 142,
9800
- 8,
9801
- 99,
9802
- 37,
9803
- 240,
9804
- 21,
9805
- 10,
9806
- 23,
9807
- 190,
9808
- 6,
9809
- 148,
9810
- 247,
9811
- 120,
9812
- 234,
9813
- 75,
9814
- 0,
9815
- 26,
9816
- 197,
9817
- 62,
9818
- 94,
9819
- 252,
9820
- 219,
9821
- 203,
9822
- 117,
9823
- 35,
9824
- 11,
9825
- 32,
9826
- 57,
9827
- 177,
9828
- 33,
9829
- 88,
9830
- 237,
9831
- 149,
9832
- 56,
9833
- 87,
9834
- 174,
9835
- 20,
9836
- 125,
9837
- 136,
9838
- 171,
9839
- 168,
9840
- 68,
9841
- 175,
9842
- 74,
9843
- 165,
9844
- 71,
9845
- 134,
9846
- 139,
9847
- 48,
9848
- 27,
9849
- 166,
9850
- 77,
9851
- 146,
9852
- 158,
9853
- 231,
9854
- 83,
9855
- 111,
9856
- 229,
9857
- 122,
9858
- 60,
9859
- 211,
9860
- 133,
9861
- 230,
9862
- 220,
9863
- 105,
9864
- 92,
9865
- 41,
9866
- 55,
9867
- 46,
9868
- 245,
9869
- 40,
9870
- 244,
9871
- 102,
9872
- 143,
9873
- 54,
9874
- 65,
9875
- 25,
9876
- 63,
9877
- 161,
9878
- 1,
9879
- 216,
9880
- 80,
9881
- 73,
9882
- 209,
9883
- 76,
9884
- 132,
9885
- 187,
9886
- 208,
9887
- 89,
9888
- 18,
9889
- 169,
9890
- 200,
9891
- 196,
9892
- 135,
9893
- 130,
9894
- 116,
9895
- 188,
9896
- 159,
9897
- 86,
9898
- 164,
9899
- 100,
9900
- 109,
9901
- 198,
9902
- 173,
9903
- 186,
9904
- 3,
9905
- 64,
9906
- 52,
9907
- 217,
9908
- 226,
9909
- 250,
9910
- 124,
9911
- 123,
9912
- 5,
9913
- 202,
9914
- 38,
9915
- 147,
9916
- 118,
9917
- 126,
9918
- 255,
9919
- 82,
9920
- 85,
9921
- 212,
9922
- 207,
9923
- 206,
9924
- 59,
9925
- 227,
9926
- 47,
9927
- 16,
9928
- 58,
9929
- 17,
9930
- 182,
9931
- 189,
9932
- 28,
9933
- 42,
9934
- 223,
9935
- 183,
9936
- 170,
9937
- 213,
9938
- 119,
9939
- 248,
9940
- 152,
9941
- 2,
9942
- 44,
9943
- 154,
9944
- 163,
9945
- 70,
9946
- 221,
9947
- 153,
9948
- 101,
9949
- 155,
9950
- 167,
9951
- 43,
9952
- 172,
9953
- 9,
9954
- 129,
9955
- 22,
9956
- 39,
9957
- 253,
9958
- 19,
9959
- 98,
9960
- 108,
9961
- 110,
9962
- 79,
9963
- 113,
9964
- 224,
9965
- 232,
9966
- 178,
9967
- 185,
9968
- 112,
9969
- 104,
9970
- 218,
9971
- 246,
9972
- 97,
9973
- 228,
9974
- 251,
9975
- 34,
9976
- 242,
9977
- 193,
9978
- 238,
9979
- 210,
9980
- 144,
9981
- 12,
9982
- 191,
9983
- 179,
9984
- 162,
9985
- 241,
9986
- 81,
9987
- 51,
9988
- 145,
9989
- 235,
9990
- 249,
9991
- 14,
9992
- 239,
9993
- 107,
9994
- 49,
9995
- 192,
9996
- 214,
9997
- 31,
9998
- 181,
9999
- 199,
10000
- 106,
10001
- 157,
10002
- 184,
10003
- 84,
10004
- 204,
10005
- 176,
10006
- 115,
10007
- 121,
10008
- 50,
10009
- 45,
10010
- 127,
10011
- 4,
10012
- 150,
10013
- 254,
10014
- 138,
10015
- 236,
10016
- 205,
10017
- 93,
10018
- 222,
10019
- 114,
10020
- 67,
10021
- 29,
10022
- 24,
10023
- 72,
10024
- 243,
10025
- 141,
10026
- 128,
10027
- 195,
10028
- 78,
10029
- 66,
10030
- 215,
10031
- 61,
10032
- 156,
10033
- 180
10034
- ];
10035
- for (let i = 0; i < 256; i++) {
10036
- perm[i] = p[i];
10037
- perm[i + 256] = p[i];
10038
- }
10039
- var F2 = 0.5 * (Math.sqrt(3) - 1);
10040
- var G2 = (3 - Math.sqrt(3)) / 6;
10041
- function dot2(g, x, y) {
10042
- return g[0] * x + g[1] * y;
10043
- }
10044
- function simplex2d(x, y) {
10045
- const s = (x + y) * F2;
10046
- const i = Math.floor(x + s);
10047
- const j = Math.floor(y + s);
10048
- const t = (i + j) * G2;
10049
- const X0 = i - t;
10050
- const Y0 = j - t;
10051
- const x0 = x - X0;
10052
- const y0 = y - Y0;
10053
- const i1 = x0 > y0 ? 1 : 0;
10054
- const j1 = x0 > y0 ? 0 : 1;
10055
- const x1 = x0 - i1 + G2;
10056
- const y1 = y0 - j1 + G2;
10057
- const x2 = x0 - 1 + 2 * G2;
10058
- const y2 = y0 - 1 + 2 * G2;
10059
- const ii = i & 255;
10060
- const jj = j & 255;
10061
- const gi0 = perm[ii + perm[jj]] % 8;
10062
- const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
10063
- const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
10064
- let n0 = 0;
10065
- let t0 = 0.5 - x0 * x0 - y0 * y0;
10066
- if (t0 >= 0) {
10067
- t0 *= t0;
10068
- n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
10069
- }
10070
- let n1 = 0;
10071
- let t1 = 0.5 - x1 * x1 - y1 * y1;
10072
- if (t1 >= 0) {
10073
- t1 *= t1;
10074
- n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
10075
- }
10076
- let n2 = 0;
10077
- let t2 = 0.5 - x2 * x2 - y2 * y2;
10078
- if (t2 >= 0) {
10079
- t2 *= t2;
10080
- n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
10081
- }
10082
- return 70 * (n0 + n1 + n2);
10083
- }
10084
-
10085
10101
  // src/animation/ProceduralLifeLayer.ts
10102
+ var import_simplex_noise = require("simplex-noise");
10103
+ var simplex2d = (0, import_simplex_noise.createNoise2D)();
10086
10104
  var PHASE_OPEN = 0;
10087
10105
  var PHASE_CLOSING = 1;
10088
10106
  var PHASE_CLOSED = 2;
@@ -10390,6 +10408,684 @@ var ProceduralLifeLayer = class {
10390
10408
  }
10391
10409
  };
10392
10410
 
10411
+ // src/orchestration/MicLipSync.ts
10412
+ var logger18 = createLogger("MicLipSync");
10413
+ var MicLipSync = class extends EventEmitter {
10414
+ constructor(config) {
10415
+ super();
10416
+ this.omoteEvents = new EventEmitter();
10417
+ this._state = "idle";
10418
+ this._isSpeaking = false;
10419
+ this._currentFrame = null;
10420
+ this._currentRawFrame = null;
10421
+ // VAD state
10422
+ this.speechStartTime = 0;
10423
+ this.vadChunkSize = 0;
10424
+ this.vadBuffer = null;
10425
+ this.vadBufferOffset = 0;
10426
+ this.profile = config.profile ?? {};
10427
+ this.vad = config.vad;
10428
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10429
+ sampleRate: config.sampleRate ?? 16e3,
10430
+ chunkSize: config.micChunkSize ?? 512
10431
+ });
10432
+ this.processor = new A2EProcessor({
10433
+ backend: config.lam,
10434
+ sampleRate: config.sampleRate ?? 16e3,
10435
+ identityIndex: config.identityIndex,
10436
+ onFrame: (raw) => {
10437
+ const scaled = applyProfile(raw, this.profile);
10438
+ this._currentFrame = scaled;
10439
+ this._currentRawFrame = raw;
10440
+ this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
10441
+ },
10442
+ onError: (error) => {
10443
+ logger18.error("A2E inference error", { message: error.message });
10444
+ this.emit("error", error);
10445
+ }
10446
+ });
10447
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10448
+ const float32 = int16ToFloat32(pcm);
10449
+ this.processor.pushAudio(float32);
10450
+ if (this.vad) {
10451
+ this.processVAD(float32);
10452
+ }
10453
+ });
10454
+ this.omoteEvents.on("audio.level", (level) => {
10455
+ this.emit("audio:level", level);
10456
+ });
10457
+ if (this.vad) {
10458
+ this.vadChunkSize = this.vad.getChunkSize();
10459
+ this.vadBuffer = new Float32Array(this.vadChunkSize);
10460
+ this.vadBufferOffset = 0;
10461
+ }
10462
+ }
10463
+ /** Current state */
10464
+ get state() {
10465
+ return this._state;
10466
+ }
10467
+ /** Latest blendshape frame (null before first inference) */
10468
+ get currentFrame() {
10469
+ return this._currentFrame;
10470
+ }
10471
+ /** Whether speech is currently detected (requires VAD) */
10472
+ get isSpeaking() {
10473
+ return this._isSpeaking;
10474
+ }
10475
+ /** Current backend type */
10476
+ get backend() {
10477
+ return this.processor ? "active" : null;
10478
+ }
10479
+ // ---------------------------------------------------------------------------
10480
+ // Public API
10481
+ // ---------------------------------------------------------------------------
10482
+ /** Start microphone capture and inference loop */
10483
+ async start() {
10484
+ if (this._state === "active") return;
10485
+ await this.mic.start();
10486
+ this.processor.startDrip();
10487
+ this.emit("mic:start", void 0);
10488
+ this.setState("active");
10489
+ }
10490
+ /** Stop microphone and inference */
10491
+ stop() {
10492
+ if (this._state === "idle") return;
10493
+ this.processor.stopDrip();
10494
+ this.mic.stop();
10495
+ this._isSpeaking = false;
10496
+ this.emit("mic:stop", void 0);
10497
+ this.setState("idle");
10498
+ }
10499
+ /** Pause inference (mic stays open for faster resume) */
10500
+ pause() {
10501
+ if (this._state !== "active") return;
10502
+ this.processor.stopDrip();
10503
+ this.setState("paused");
10504
+ }
10505
+ /** Resume inference after pause */
10506
+ resume() {
10507
+ if (this._state !== "paused") return;
10508
+ this.processor.startDrip();
10509
+ this.setState("active");
10510
+ }
10511
+ /** Update ExpressionProfile at runtime */
10512
+ setProfile(profile) {
10513
+ this.profile = profile;
10514
+ }
10515
+ /** Dispose of all resources */
10516
+ async dispose() {
10517
+ this.stop();
10518
+ this.processor.dispose();
10519
+ }
10520
+ // ---------------------------------------------------------------------------
10521
+ // Internal: VAD processing
10522
+ // ---------------------------------------------------------------------------
10523
+ async processVAD(samples) {
10524
+ if (!this.vad || !this.vadBuffer) return;
10525
+ for (let i = 0; i < samples.length; i++) {
10526
+ this.vadBuffer[this.vadBufferOffset++] = samples[i];
10527
+ if (this.vadBufferOffset >= this.vadChunkSize) {
10528
+ try {
10529
+ const result = await this.vad.process(this.vadBuffer);
10530
+ const wasSpeaking = this._isSpeaking;
10531
+ this._isSpeaking = result.isSpeech;
10532
+ if (!wasSpeaking && result.isSpeech) {
10533
+ this.speechStartTime = performance.now();
10534
+ this.emit("speech:start", void 0);
10535
+ } else if (wasSpeaking && !result.isSpeech) {
10536
+ const durationMs = performance.now() - this.speechStartTime;
10537
+ this.emit("speech:end", { durationMs });
10538
+ }
10539
+ } catch (err) {
10540
+ logger18.warn("VAD process error", { error: String(err) });
10541
+ }
10542
+ this.vadBufferOffset = 0;
10543
+ }
10544
+ }
10545
+ }
10546
+ // ---------------------------------------------------------------------------
10547
+ // Internal: State management
10548
+ // ---------------------------------------------------------------------------
10549
+ setState(state) {
10550
+ if (this._state === state) return;
10551
+ this._state = state;
10552
+ this.emit("state", state);
10553
+ }
10554
+ };
10555
+
10556
+ // src/orchestration/VoicePipeline.ts
10557
+ var logger19 = createLogger("VoicePipeline");
10558
+ var VoicePipeline = class extends EventEmitter {
10559
+ constructor(config) {
10560
+ super();
10561
+ // State
10562
+ this._state = "idle";
10563
+ this.stopped = false;
10564
+ this.epoch = 0;
10565
+ this._sessionId = null;
10566
+ // Models
10567
+ this.asr = null;
10568
+ this.lam = null;
10569
+ this.vad = null;
10570
+ this.unifiedWorker = null;
10571
+ // Pipelines
10572
+ this.playback = null;
10573
+ this.interruption = null;
10574
+ this.omoteEvents = new EventEmitter();
10575
+ this.mic = null;
10576
+ // Audio accumulation
10577
+ this.audioBuffer = [];
10578
+ this.audioBufferSamples = 0;
10579
+ this.speechStartTime = 0;
10580
+ this.silenceTimer = null;
10581
+ this.isSpeaking = false;
10582
+ // Progressive transcription
10583
+ this.progressiveTimer = null;
10584
+ this.progressivePromise = null;
10585
+ this.lastProgressiveResult = null;
10586
+ this.lastProgressiveSamples = 0;
10587
+ // ASR error recovery
10588
+ this.asrErrorCount = 0;
10589
+ // Response abort
10590
+ this.responseAbortController = null;
10591
+ // Frame refs
10592
+ this._currentFrame = null;
10593
+ this.config = config;
10594
+ }
10595
+ /** Current pipeline state */
10596
+ get state() {
10597
+ return this._state;
10598
+ }
10599
+ /** Latest blendshape frame */
10600
+ get currentFrame() {
10601
+ return this._currentFrame;
10602
+ }
10603
+ /** Whether user is currently speaking */
10604
+ get isSpeechActive() {
10605
+ return this.isSpeaking;
10606
+ }
10607
+ /** Session ID (generated on start(), null before) */
10608
+ get sessionId() {
10609
+ return this._sessionId;
10610
+ }
10611
+ // ---------------------------------------------------------------------------
10612
+ // Model loading
10613
+ // ---------------------------------------------------------------------------
10614
+ async loadModels() {
10615
+ this.setState("loading");
10616
+ const timeoutMs = this.config.lamLoadTimeoutMs ?? 3e4;
10617
+ try {
10618
+ if (isIOS()) {
10619
+ this.unifiedWorker = new UnifiedInferenceWorker();
10620
+ await this.unifiedWorker.init();
10621
+ }
10622
+ this.emitProgress("Speech recognition", 0, 3, 0);
10623
+ this.asr = createSenseVoice({
10624
+ modelUrl: this.config.models.senseVoice.modelUrl,
10625
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
10626
+ language: this.config.models.senseVoice.language,
10627
+ unifiedWorker: this.unifiedWorker ?? void 0
10628
+ });
10629
+ await this.asr.load();
10630
+ this.emitProgress("Speech recognition", 45, 3, 1);
10631
+ this.emitProgress("Lip sync", 45, 3, 1);
10632
+ let lam = createA2E({
10633
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10634
+ gpuExternalDataUrl: this.config.models.lam.gpuExternalDataUrl,
10635
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10636
+ mode: this.config.models.lam.mode,
10637
+ unifiedWorker: this.unifiedWorker ?? void 0
10638
+ });
10639
+ let lamProgress = 45;
10640
+ const lamTickInterval = setInterval(() => {
10641
+ const remaining = 85 - lamProgress;
10642
+ lamProgress += Math.max(0.5, remaining * 0.08);
10643
+ this.emitProgress("Lip sync", Math.round(lamProgress), 3, 1);
10644
+ }, 300);
10645
+ try {
10646
+ const lamLoadResult = await Promise.race([
10647
+ lam.load().then(() => "ok"),
10648
+ new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
10649
+ ]);
10650
+ if (lamLoadResult === "timeout") {
10651
+ logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
10652
+ await lam.dispose();
10653
+ lam = createA2E({
10654
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10655
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10656
+ mode: "cpu",
10657
+ unifiedWorker: this.unifiedWorker ?? void 0
10658
+ });
10659
+ await lam.load();
10660
+ }
10661
+ } finally {
10662
+ clearInterval(lamTickInterval);
10663
+ }
10664
+ this.lam = lam;
10665
+ this.emitProgress("Lip sync", 85, 3, 2);
10666
+ this.emitProgress("Voice detection", 85, 3, 2);
10667
+ this.vad = createSileroVAD({
10668
+ modelUrl: this.config.models.vad.modelUrl,
10669
+ threshold: this.config.models.vad.threshold,
10670
+ unifiedWorker: this.unifiedWorker ?? void 0
10671
+ });
10672
+ await this.vad.load();
10673
+ this.emitProgress("Voice detection", 100, 3, 3);
10674
+ this.playback = new PlaybackPipeline({
10675
+ lam: this.lam,
10676
+ profile: this.config.profile,
10677
+ identityIndex: this.config.identityIndex,
10678
+ neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
10679
+ neutralTransitionMs: this.config.neutralTransitionMs,
10680
+ audioDelayMs: this.config.audioDelayMs,
10681
+ chunkTargetMs: this.config.chunkTargetMs
10682
+ });
10683
+ await this.playback.initialize();
10684
+ this.playback.on("frame", (f) => {
10685
+ this._currentFrame = f.blendshapes;
10686
+ this.emit("frame", f);
10687
+ });
10688
+ this.playback.on("frame:raw", (f) => this.emit("frame:raw", f));
10689
+ this.playback.on("playback:start", (t) => this.emit("playback:start", t));
10690
+ this.playback.on("playback:complete", () => {
10691
+ if (this.stopped) return;
10692
+ this.emit("playback:complete", void 0);
10693
+ this.vad?.reset();
10694
+ this.epoch++;
10695
+ this.setState("listening");
10696
+ });
10697
+ this.playback.on("error", (e) => this.emit("error", e));
10698
+ this.interruption = new InterruptionHandler({
10699
+ enabled: this.config.interruptionEnabled ?? true,
10700
+ minSpeechDurationMs: this.config.interruptionMinSpeechMs ?? 200
10701
+ });
10702
+ this.interruption.on("interruption.triggered", () => {
10703
+ this.handleInterruption();
10704
+ });
10705
+ this.setState("ready");
10706
+ } catch (error) {
10707
+ const err = error instanceof Error ? error : new Error(String(error));
10708
+ logger19.error("Model loading failed", { message: err.message });
10709
+ this.emit("error", err);
10710
+ this.setState("error");
10711
+ throw err;
10712
+ }
10713
+ }
10714
+ // ---------------------------------------------------------------------------
10715
+ // Conversation lifecycle
10716
+ // ---------------------------------------------------------------------------
10717
+ async start() {
10718
+ if (this._state !== "ready") {
10719
+ throw new Error(`Cannot start: state is '${this._state}', expected 'ready'`);
10720
+ }
10721
+ this.stopped = false;
10722
+ this.epoch++;
10723
+ this._sessionId = crypto.randomUUID();
10724
+ this.asrErrorCount = 0;
10725
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10726
+ sampleRate: 16e3,
10727
+ chunkSize: 512
10728
+ });
10729
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10730
+ const float32 = int16ToFloat32(pcm);
10731
+ this.processAudioChunk(float32);
10732
+ });
10733
+ this.omoteEvents.on("audio.level", (level) => {
10734
+ this.emit("audio:level", level);
10735
+ });
10736
+ await this.mic.start();
10737
+ this.setState("listening");
10738
+ }
10739
+ stop() {
10740
+ this.stopped = true;
10741
+ this.epoch++;
10742
+ this.clearSilenceTimer();
10743
+ this.stopProgressiveTranscription();
10744
+ this.responseAbortController?.abort();
10745
+ this.responseAbortController = null;
10746
+ this.vad?.reset();
10747
+ this.playback?.stop();
10748
+ this.mic?.stop();
10749
+ this.mic = null;
10750
+ this.isSpeaking = false;
10751
+ this.audioBuffer = [];
10752
+ this.audioBufferSamples = 0;
10753
+ this._currentFrame = null;
10754
+ this.interruption?.setAISpeaking(false);
10755
+ if (this._state !== "idle") {
10756
+ this.setState("ready");
10757
+ }
10758
+ }
10759
+ setProfile(profile) {
10760
+ this.config.profile = profile;
10761
+ this.playback?.setProfile(profile);
10762
+ }
10763
+ async dispose() {
10764
+ this.stop();
10765
+ this.epoch++;
10766
+ await this.playback?.dispose();
10767
+ await this.asr?.dispose();
10768
+ await this.lam?.dispose();
10769
+ await this.vad?.dispose();
10770
+ this.playback = null;
10771
+ this.asr = null;
10772
+ this.lam = null;
10773
+ this.vad = null;
10774
+ this._state = "idle";
10775
+ }
10776
+ // ---------------------------------------------------------------------------
10777
+ // Audio processing
10778
+ // ---------------------------------------------------------------------------
10779
+ async processAudioChunk(samples) {
10780
+ if (!this.vad) return;
10781
+ try {
10782
+ const result = await this.vad.process(samples);
10783
+ if (this._state === "speaking" && this.interruption) {
10784
+ this.interruption.processVADResult(result.probability);
10785
+ return;
10786
+ }
10787
+ if (this._state !== "listening" && this._state !== "thinking") return;
10788
+ const wasSpeaking = this.isSpeaking;
10789
+ if (result.isSpeech) {
10790
+ if (!wasSpeaking) {
10791
+ this.isSpeaking = true;
10792
+ this.speechStartTime = performance.now();
10793
+ this.audioBuffer = [];
10794
+ this.audioBufferSamples = 0;
10795
+ this.lastProgressiveResult = null;
10796
+ this.lastProgressiveSamples = 0;
10797
+ this.emit("speech:start", void 0);
10798
+ this.startProgressiveTranscription();
10799
+ }
10800
+ this.audioBuffer.push(new Float32Array(samples));
10801
+ this.audioBufferSamples += samples.length;
10802
+ this.clearSilenceTimer();
10803
+ } else if (wasSpeaking) {
10804
+ this.audioBuffer.push(new Float32Array(samples));
10805
+ this.audioBufferSamples += samples.length;
10806
+ if (!this.silenceTimer) {
10807
+ const timeoutMs = this.getSilenceTimeout();
10808
+ this.silenceTimer = setTimeout(() => {
10809
+ this.onSilenceDetected();
10810
+ }, timeoutMs);
10811
+ }
10812
+ }
10813
+ } catch (err) {
10814
+ logger19.warn("VAD error", { error: String(err) });
10815
+ }
10816
+ }
10817
+ // ---------------------------------------------------------------------------
10818
+ // Silence detection
10819
+ // ---------------------------------------------------------------------------
10820
+ getSilenceTimeout() {
10821
+ const base = this.config.silenceTimeoutMs ?? 500;
10822
+ const extended = this.config.silenceTimeoutExtendedMs ?? 700;
10823
+ const adaptive = this.config.adaptiveTimeout ?? true;
10824
+ if (!adaptive) return base;
10825
+ const speechDurationMs = performance.now() - this.speechStartTime;
10826
+ return speechDurationMs > 3e3 ? extended : base;
10827
+ }
10828
+ onSilenceDetected() {
10829
+ const capturedEpoch = this.epoch;
10830
+ this.isSpeaking = false;
10831
+ const durationMs = performance.now() - this.speechStartTime;
10832
+ this.emit("speech:end", { durationMs });
10833
+ this.clearSilenceTimer();
10834
+ this.processEndOfSpeech(capturedEpoch).catch((err) => {
10835
+ logger19.error("End of speech processing failed", { error: String(err) });
10836
+ if (this.epoch === capturedEpoch && !this.stopped) {
10837
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
10838
+ this.setState("listening");
10839
+ }
10840
+ });
10841
+ }
10842
+ // ---------------------------------------------------------------------------
10843
+ // End of speech → transcription → response
10844
+ // ---------------------------------------------------------------------------
10845
+ async processEndOfSpeech(capturedEpoch) {
10846
+ if (this.progressivePromise) {
10847
+ try {
10848
+ await this.progressivePromise;
10849
+ } catch {
10850
+ }
10851
+ }
10852
+ this.stopProgressiveTranscription();
10853
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10854
+ const totalSamples = this.audioBufferSamples;
10855
+ const fullAudio = new Float32Array(totalSamples);
10856
+ let offset = 0;
10857
+ for (const chunk of this.audioBuffer) {
10858
+ fullAudio.set(chunk, offset);
10859
+ offset += chunk.length;
10860
+ }
10861
+ this.audioBuffer = [];
10862
+ this.audioBufferSamples = 0;
10863
+ const minDuration = this.config.minAudioDurationSec ?? 0.3;
10864
+ const minEnergy = this.config.minAudioEnergy ?? 0.02;
10865
+ const durationSec = totalSamples / 16e3;
10866
+ if (durationSec < minDuration) {
10867
+ logger19.info("Audio too short, discarding", { durationSec });
10868
+ this.setState("listening");
10869
+ return;
10870
+ }
10871
+ let maxAbs = 0;
10872
+ for (let i = 0; i < fullAudio.length; i++) {
10873
+ const abs = Math.abs(fullAudio[i]);
10874
+ if (abs > maxAbs) maxAbs = abs;
10875
+ }
10876
+ let rms = 0;
10877
+ for (let i = 0; i < fullAudio.length; i++) {
10878
+ rms += fullAudio[i] * fullAudio[i];
10879
+ }
10880
+ rms = Math.sqrt(rms / fullAudio.length);
10881
+ if (rms < minEnergy) {
10882
+ logger19.info("Audio too quiet, discarding", { rms });
10883
+ this.setState("listening");
10884
+ return;
10885
+ }
10886
+ const normalizedAudio = this.normalizeAudio(fullAudio);
10887
+ this.setState("thinking");
10888
+ let transcript = null;
10889
+ const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
10890
+ if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
10891
+ transcript = { ...this.lastProgressiveResult, isFinal: true };
10892
+ logger19.info("Using progressive result", {
10893
+ coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
10894
+ text: transcript.text
10895
+ });
10896
+ } else {
10897
+ this.lastProgressiveResult = null;
10898
+ transcript = await this.transcribeWithTimeout(normalizedAudio);
10899
+ if (transcript) {
10900
+ transcript.isFinal = true;
10901
+ }
10902
+ }
10903
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10904
+ if (!transcript || !transcript.text.trim()) {
10905
+ logger19.info("No transcript, resuming listening");
10906
+ this.setState("listening");
10907
+ return;
10908
+ }
10909
+ this.emit("transcript", transcript);
10910
+ await this.callResponseHandler(transcript, capturedEpoch);
10911
+ }
10912
+ // ---------------------------------------------------------------------------
10913
+ // Response handler
10914
+ // ---------------------------------------------------------------------------
10915
+ async callResponseHandler(transcript, capturedEpoch) {
10916
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10917
+ this.setState("speaking");
10918
+ this.interruption?.setAISpeaking(true);
10919
+ const abortController = new AbortController();
10920
+ this.responseAbortController = abortController;
10921
+ try {
10922
+ this.playback.start();
10923
+ await this.config.onResponse({
10924
+ text: transcript.text,
10925
+ emotion: transcript.emotion,
10926
+ event: transcript.event,
10927
+ send: async (chunk) => {
10928
+ if (abortController.signal.aborted) return;
10929
+ await this.playback.onAudioChunk(chunk);
10930
+ },
10931
+ done: async () => {
10932
+ if (abortController.signal.aborted) return;
10933
+ await this.playback.end();
10934
+ },
10935
+ signal: abortController.signal,
10936
+ sessionId: this._sessionId
10937
+ });
10938
+ } catch (error) {
10939
+ if (abortController.signal.aborted) return;
10940
+ const err = error instanceof Error ? error : new Error(String(error));
10941
+ logger19.error("Response handler error", { message: err.message });
10942
+ this.emit("error", err);
10943
+ if (this.epoch === capturedEpoch && !this.stopped) {
10944
+ this.interruption?.setAISpeaking(false);
10945
+ this.setState("listening");
10946
+ }
10947
+ } finally {
10948
+ this.responseAbortController = null;
10949
+ }
10950
+ }
10951
+ // ---------------------------------------------------------------------------
10952
+ // Interruption handling
10953
+ // ---------------------------------------------------------------------------
10954
+ handleInterruption() {
10955
+ if (this._state !== "speaking") return;
10956
+ logger19.info("Interruption triggered");
10957
+ this.epoch++;
10958
+ this.responseAbortController?.abort();
10959
+ this.playback?.stop();
10960
+ this.interruption?.setAISpeaking(false);
10961
+ this.emit("interruption", void 0);
10962
+ if (!this.stopped) {
10963
+ this.setState("listening");
10964
+ }
10965
+ }
10966
+ // ---------------------------------------------------------------------------
10967
+ // Progressive transcription
10968
+ // ---------------------------------------------------------------------------
10969
+ startProgressiveTranscription() {
10970
+ this.stopProgressiveTranscription();
10971
+ const intervalMs = isIOS() ? this.config.progressiveIntervalIosMs ?? 800 : this.config.progressiveIntervalMs ?? 500;
10972
+ const minSamples = this.config.progressiveMinSamples ?? 8e3;
10973
+ this.progressiveTimer = setInterval(() => {
10974
+ if (this.audioBufferSamples < minSamples) return;
10975
+ if (!this.asr) return;
10976
+ const capturedEpoch = this.epoch;
10977
+ const snapshot = new Float32Array(this.audioBufferSamples);
10978
+ let offset = 0;
10979
+ for (const chunk of this.audioBuffer) {
10980
+ snapshot.set(chunk, offset);
10981
+ offset += chunk.length;
10982
+ }
10983
+ const snapshotSamples = this.audioBufferSamples;
10984
+ this.progressivePromise = (async () => {
10985
+ try {
10986
+ const result = await this.transcribeWithTimeout(snapshot);
10987
+ if (this.epoch !== capturedEpoch) return;
10988
+ if (result && result.text.trim()) {
10989
+ this.lastProgressiveResult = result;
10990
+ this.lastProgressiveSamples = snapshotSamples;
10991
+ this.emit("transcript", { ...result, isFinal: false });
10992
+ }
10993
+ } catch {
10994
+ }
10995
+ })();
10996
+ }, intervalMs);
10997
+ }
10998
+ stopProgressiveTranscription() {
10999
+ if (this.progressiveTimer) {
11000
+ clearInterval(this.progressiveTimer);
11001
+ this.progressiveTimer = null;
11002
+ }
11003
+ }
11004
+ // ---------------------------------------------------------------------------
11005
+ // Transcription with timeout + ASR error recovery
11006
+ // ---------------------------------------------------------------------------
11007
+ async transcribeWithTimeout(audio) {
11008
+ if (!this.asr) return null;
11009
+ const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
11010
+ const startTime = performance.now();
11011
+ try {
11012
+ const result = await Promise.race([
11013
+ this.asr.transcribe(audio),
11014
+ new Promise(
11015
+ (_, reject) => setTimeout(() => reject(new Error(`Transcription timed out after ${timeoutMs}ms`)), timeoutMs)
11016
+ )
11017
+ ]);
11018
+ this.asrErrorCount = 0;
11019
+ return {
11020
+ text: result.text,
11021
+ emotion: result.emotion,
11022
+ language: result.language,
11023
+ isFinal: false,
11024
+ inferenceTimeMs: performance.now() - startTime
11025
+ };
11026
+ } catch (error) {
11027
+ this.asrErrorCount++;
11028
+ logger19.warn("Transcription failed", {
11029
+ attempt: this.asrErrorCount,
11030
+ error: String(error)
11031
+ });
11032
+ if (this.asrErrorCount >= 3) {
11033
+ logger19.warn("3 consecutive ASR errors, recreating session");
11034
+ try {
11035
+ await this.asr.dispose();
11036
+ this.asr = createSenseVoice({
11037
+ modelUrl: this.config.models.senseVoice.modelUrl,
11038
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
11039
+ language: this.config.models.senseVoice.language,
11040
+ unifiedWorker: this.unifiedWorker ?? void 0
11041
+ });
11042
+ await this.asr.load();
11043
+ this.asrErrorCount = 0;
11044
+ } catch (recreateErr) {
11045
+ logger19.error("ASR session recreation failed", { error: String(recreateErr) });
11046
+ }
11047
+ }
11048
+ return null;
11049
+ }
11050
+ }
11051
+ // ---------------------------------------------------------------------------
11052
+ // Audio normalization
11053
+ // ---------------------------------------------------------------------------
11054
+ normalizeAudio(audio) {
11055
+ if (!(this.config.normalizeAudio ?? true)) return audio;
11056
+ let maxAbs = 0;
11057
+ for (let i = 0; i < audio.length; i++) {
11058
+ const abs = Math.abs(audio[i]);
11059
+ if (abs > maxAbs) maxAbs = abs;
11060
+ }
11061
+ if (maxAbs >= 0.1 || maxAbs === 0) return audio;
11062
+ const gain = 0.5 / maxAbs;
11063
+ const normalized = new Float32Array(audio.length);
11064
+ for (let i = 0; i < audio.length; i++) {
11065
+ normalized[i] = audio[i] * gain;
11066
+ }
11067
+ return normalized;
11068
+ }
11069
+ // ---------------------------------------------------------------------------
11070
+ // Helpers
11071
+ // ---------------------------------------------------------------------------
11072
+ setState(state) {
11073
+ if (this._state === state) return;
11074
+ logger19.info("State transition", { from: this._state, to: state });
11075
+ this._state = state;
11076
+ this.emit("state", state);
11077
+ }
11078
+ emitProgress(currentModel, progress, totalModels, modelsLoaded) {
11079
+ this.emit("loading:progress", { currentModel, progress, totalModels, modelsLoaded });
11080
+ }
11081
+ clearSilenceTimer() {
11082
+ if (this.silenceTimer) {
11083
+ clearTimeout(this.silenceTimer);
11084
+ this.silenceTimer = null;
11085
+ }
11086
+ }
11087
+ };
11088
+
10393
11089
  // ../types/dist/index.mjs
10394
11090
  var PROTOCOL_VERSION = 1;
10395
11091
  function isProtocolEvent(obj) {