@omote/core 0.5.6 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -762,6 +762,24 @@ var A2EProcessor = class {
762
762
  }
763
763
  };
764
764
 
765
+ // src/audio/audioUtils.ts
766
+ function pcm16ToFloat32(buffer) {
767
+ const byteLen = buffer.byteLength & ~1;
768
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
769
+ const float32 = new Float32Array(int16.length);
770
+ for (let i = 0; i < int16.length; i++) {
771
+ float32[i] = int16[i] / 32768;
772
+ }
773
+ return float32;
774
+ }
775
+ function int16ToFloat32(int16) {
776
+ const float32 = new Float32Array(int16.length);
777
+ for (let i = 0; i < int16.length; i++) {
778
+ float32[i] = int16[i] / 32768;
779
+ }
780
+ return float32;
781
+ }
782
+
765
783
  // src/telemetry/exporters/console.ts
766
784
  var ConsoleExporter = class {
767
785
  constructor(options = {}) {
@@ -2815,19 +2833,7 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
2815
2833
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
2816
2834
  var Wav2Vec2Inference = _Wav2Vec2Inference;
2817
2835
 
2818
- // src/audio/audioUtils.ts
2819
- function pcm16ToFloat32(buffer) {
2820
- const byteLen = buffer.byteLength & ~1;
2821
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
2822
- const float32 = new Float32Array(int16.length);
2823
- for (let i = 0; i < int16.length; i++) {
2824
- float32[i] = int16[i] / 32768;
2825
- }
2826
- return float32;
2827
- }
2828
-
2829
- // src/audio/FullFacePipeline.ts
2830
- var logger4 = createLogger("FullFacePipeline");
2836
+ // src/audio/expressionProfile.ts
2831
2837
  var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
2832
2838
  for (const name of LAM_BLENDSHAPES) {
2833
2839
  if (name.startsWith("eye")) {
@@ -2846,6 +2852,24 @@ for (const name of LAM_BLENDSHAPES) {
2846
2852
  BLENDSHAPE_TO_GROUP.set(name, "tongue");
2847
2853
  }
2848
2854
  }
2855
+ function applyProfile(raw, profile) {
2856
+ const scaled = new Float32Array(52);
2857
+ for (let i = 0; i < 52; i++) {
2858
+ const name = LAM_BLENDSHAPES[i];
2859
+ let scaler;
2860
+ if (profile.overrides && profile.overrides[name] !== void 0) {
2861
+ scaler = profile.overrides[name];
2862
+ } else {
2863
+ const group = BLENDSHAPE_TO_GROUP.get(name);
2864
+ scaler = group ? profile[group] ?? 1 : 1;
2865
+ }
2866
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
2867
+ }
2868
+ return scaled;
2869
+ }
2870
+
2871
+ // src/audio/FullFacePipeline.ts
2872
+ var logger4 = createLogger("FullFacePipeline");
2849
2873
  var FullFacePipeline = class extends EventEmitter {
2850
2874
  constructor(options) {
2851
2875
  super();
@@ -2910,25 +2934,10 @@ var FullFacePipeline = class extends EventEmitter {
2910
2934
  /**
2911
2935
  * Apply ExpressionProfile scaling to raw A2E blendshapes.
2912
2936
  *
2913
- * For each blendshape:
2914
- * 1. If an override exists for the blendshape name, use override as scaler
2915
- * 2. Otherwise, use the group scaler (default 1.0)
2916
- * 3. Clamp result to [0, 1]
2937
+ * Delegates to the standalone applyProfile() utility from expressionProfile.ts.
2917
2938
  */
2918
2939
  applyProfile(raw) {
2919
- const scaled = new Float32Array(52);
2920
- for (let i = 0; i < 52; i++) {
2921
- const name = LAM_BLENDSHAPES[i];
2922
- let scaler;
2923
- if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
2924
- scaler = this.profile.overrides[name];
2925
- } else {
2926
- const group = BLENDSHAPE_TO_GROUP.get(name);
2927
- scaler = group ? this.profile[group] ?? 1 : 1;
2928
- }
2929
- scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
2930
- }
2931
- return scaled;
2940
+ return applyProfile(raw, this.profile);
2932
2941
  }
2933
2942
  /**
2934
2943
  * Start a new playback session
@@ -3113,6 +3122,329 @@ var FullFacePipeline = class extends EventEmitter {
3113
3122
  }
3114
3123
  };
3115
3124
 
3125
+ // src/audio/PlaybackPipeline.ts
3126
+ var logger5 = createLogger("PlaybackPipeline");
3127
+ var PlaybackPipeline = class extends EventEmitter {
3128
+ constructor(config) {
3129
+ super();
3130
+ this.config = config;
3131
+ this._state = "idle";
3132
+ this.playbackStarted = false;
3133
+ this.monitorInterval = null;
3134
+ this.frameAnimationId = null;
3135
+ // Stale frame detection
3136
+ this.lastNewFrameTime = 0;
3137
+ this.lastKnownLamFrame = null;
3138
+ this.staleWarningEmitted = false;
3139
+ // Diagnostic counter
3140
+ this.frameLoopCount = 0;
3141
+ this.neutralTransitionFrame = null;
3142
+ this.neutralTransitionStart = 0;
3143
+ this.neutralAnimationId = null;
3144
+ // Current frame refs
3145
+ this._currentFrame = null;
3146
+ this._currentRawFrame = null;
3147
+ this.sampleRate = config.sampleRate ?? 16e3;
3148
+ this.profile = config.profile ?? {};
3149
+ this.staleThresholdMs = config.staleThresholdMs ?? 2e3;
3150
+ this.neutralTransitionEnabled = config.neutralTransitionEnabled ?? false;
3151
+ this.neutralTransitionMs = config.neutralTransitionMs ?? 250;
3152
+ const isCpuModel = config.lam.modelId === "wav2arkit_cpu";
3153
+ const chunkSize = config.chunkSize ?? config.lam.chunkSize ?? 16e3;
3154
+ const chunkAccumulationMs = chunkSize / this.sampleRate * 1e3;
3155
+ const inferenceEstimateMs = isCpuModel ? 300 : config.lam.backend === "wasm" ? 250 : 80;
3156
+ const marginMs = 100;
3157
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3158
+ const audioDelayMs = config.audioDelayMs ?? autoDelay;
3159
+ logger5.info("PlaybackPipeline config", {
3160
+ chunkSize,
3161
+ audioDelayMs,
3162
+ autoDelay,
3163
+ backend: config.lam.backend,
3164
+ modelId: config.lam.modelId,
3165
+ neutralTransitionEnabled: this.neutralTransitionEnabled
3166
+ });
3167
+ this.scheduler = new AudioScheduler({
3168
+ sampleRate: this.sampleRate,
3169
+ initialLookaheadSec: audioDelayMs / 1e3
3170
+ });
3171
+ this.coalescer = new AudioChunkCoalescer({
3172
+ sampleRate: this.sampleRate,
3173
+ targetDurationMs: config.chunkTargetMs ?? 200
3174
+ });
3175
+ this.processor = new A2EProcessor({
3176
+ backend: config.lam,
3177
+ sampleRate: this.sampleRate,
3178
+ chunkSize,
3179
+ identityIndex: config.identityIndex,
3180
+ onError: (error) => {
3181
+ logger5.error("A2E inference error", { message: error.message, stack: error.stack });
3182
+ this.emit("error", error);
3183
+ }
3184
+ });
3185
+ }
3186
+ /** Current pipeline state */
3187
+ get state() {
3188
+ return this._state;
3189
+ }
3190
+ /** Current scaled blendshapes (updated in-place for perf) */
3191
+ get currentFrame() {
3192
+ return this._currentFrame;
3193
+ }
3194
+ /** Raw A2E blendshapes (before profile scaling) */
3195
+ get currentRawFrame() {
3196
+ return this._currentRawFrame;
3197
+ }
3198
+ // ---------------------------------------------------------------------------
3199
+ // Lifecycle
3200
+ // ---------------------------------------------------------------------------
3201
+ /** Initialize AudioContext (lazy, call after user gesture) */
3202
+ async initialize() {
3203
+ await this.scheduler.initialize();
3204
+ }
3205
+ /** Update ExpressionProfile at runtime */
3206
+ setProfile(profile) {
3207
+ this.profile = profile;
3208
+ }
3209
+ // ---------------------------------------------------------------------------
3210
+ // Async mode (streaming TTS)
3211
+ // ---------------------------------------------------------------------------
3212
+ /**
3213
+ * Start a new playback session.
3214
+ * Idempotent — calling during playback resets cleanly without emitting
3215
+ * spurious playback:complete.
3216
+ */
3217
+ start() {
3218
+ this.stopInternal(false);
3219
+ this.scheduler.reset();
3220
+ this.coalescer.reset();
3221
+ this.processor.reset();
3222
+ this.playbackStarted = false;
3223
+ this.lastNewFrameTime = 0;
3224
+ this.lastKnownLamFrame = null;
3225
+ this.staleWarningEmitted = false;
3226
+ this.frameLoopCount = 0;
3227
+ this._currentFrame = null;
3228
+ this._currentRawFrame = null;
3229
+ this.cancelNeutralTransition();
3230
+ this.scheduler.warmup();
3231
+ this.startFrameLoop();
3232
+ this.startMonitoring();
3233
+ this.setState("playing");
3234
+ }
3235
+ /** Feed a streaming audio chunk (PCM16 Uint8Array) */
3236
+ async onAudioChunk(chunk) {
3237
+ const combined = this.coalescer.add(chunk);
3238
+ if (!combined) return;
3239
+ const float32 = pcm16ToFloat32(combined);
3240
+ const scheduleTime = await this.scheduler.schedule(float32);
3241
+ if (!this.playbackStarted) {
3242
+ this.playbackStarted = true;
3243
+ this.emit("playback:start", { time: scheduleTime });
3244
+ this.emit("playback_start", scheduleTime);
3245
+ }
3246
+ this.processor.pushAudio(float32, scheduleTime);
3247
+ }
3248
+ /** Signal end of audio stream (flushes remaining audio) */
3249
+ async end() {
3250
+ const remaining = this.coalescer.flush();
3251
+ if (remaining) {
3252
+ const chunk = new Uint8Array(remaining);
3253
+ await this.onAudioChunk(chunk);
3254
+ }
3255
+ await this.processor.flush();
3256
+ }
3257
+ // ---------------------------------------------------------------------------
3258
+ // Sync mode (full buffer)
3259
+ // ---------------------------------------------------------------------------
3260
+ /**
3261
+ * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
3262
+ * for playback, runs A2E inference, then waits for completion.
3263
+ */
3264
+ async feedBuffer(audio) {
3265
+ const float32 = audio instanceof Float32Array ? audio : pcm16ToFloat32(audio);
3266
+ this.start();
3267
+ const chunkSamples = Math.floor(this.sampleRate * 0.2);
3268
+ for (let i = 0; i < float32.length; i += chunkSamples) {
3269
+ const chunk = float32.subarray(i, Math.min(i + chunkSamples, float32.length));
3270
+ const scheduleTime = await this.scheduler.schedule(chunk);
3271
+ this.processor.pushAudio(chunk, scheduleTime);
3272
+ if (!this.playbackStarted) {
3273
+ this.playbackStarted = true;
3274
+ this.emit("playback:start", { time: scheduleTime });
3275
+ this.emit("playback_start", scheduleTime);
3276
+ }
3277
+ }
3278
+ await this.processor.flush();
3279
+ return new Promise((resolve) => {
3280
+ const unsub = this.on("playback:complete", () => {
3281
+ unsub();
3282
+ resolve();
3283
+ });
3284
+ });
3285
+ }
3286
+ // ---------------------------------------------------------------------------
3287
+ // Control
3288
+ // ---------------------------------------------------------------------------
3289
+ /** Stop playback immediately with fade-out */
3290
+ async stop(fadeOutMs = 50) {
3291
+ this.setState("stopping");
3292
+ this.stopInternal(true);
3293
+ await this.scheduler.cancelAll(fadeOutMs);
3294
+ this.coalescer.reset();
3295
+ this.processor.reset();
3296
+ this.playbackStarted = false;
3297
+ this._currentFrame = null;
3298
+ this._currentRawFrame = null;
3299
+ this.emit("playback:stop", void 0);
3300
+ this.setState("idle");
3301
+ }
3302
+ /** Cleanup all resources */
3303
+ dispose() {
3304
+ this.stopInternal(true);
3305
+ this.cancelNeutralTransition();
3306
+ this.scheduler.dispose();
3307
+ this.coalescer.reset();
3308
+ this.processor.dispose();
3309
+ this._state = "idle";
3310
+ }
3311
+ /** Get pipeline debug state */
3312
+ getDebugState() {
3313
+ return {
3314
+ state: this._state,
3315
+ playbackStarted: this.playbackStarted,
3316
+ coalescerFill: this.coalescer.fillLevel,
3317
+ processorFill: this.processor.fillLevel,
3318
+ queuedFrames: this.processor.queuedFrameCount,
3319
+ currentTime: this.scheduler.getCurrentTime(),
3320
+ playbackEndTime: this.scheduler.getPlaybackEndTime()
3321
+ };
3322
+ }
3323
+ // ---------------------------------------------------------------------------
3324
+ // Internal: Frame loop
3325
+ // ---------------------------------------------------------------------------
3326
+ startFrameLoop() {
3327
+ const updateFrame = () => {
3328
+ this.frameLoopCount++;
3329
+ const currentTime = this.scheduler.getCurrentTime();
3330
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3331
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3332
+ this.lastNewFrameTime = performance.now();
3333
+ this.lastKnownLamFrame = lamFrame;
3334
+ this.staleWarningEmitted = false;
3335
+ }
3336
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3337
+ if (!this.staleWarningEmitted) {
3338
+ this.staleWarningEmitted = true;
3339
+ logger5.warn("A2E stalled \u2014 no new inference frames", {
3340
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3341
+ queuedFrames: this.processor.queuedFrameCount
3342
+ });
3343
+ }
3344
+ }
3345
+ if (lamFrame) {
3346
+ const scaled = applyProfile(lamFrame, this.profile);
3347
+ this._currentFrame = scaled;
3348
+ this._currentRawFrame = lamFrame;
3349
+ const fullFrame = {
3350
+ blendshapes: scaled,
3351
+ rawBlendshapes: lamFrame,
3352
+ timestamp: currentTime
3353
+ };
3354
+ this.emit("frame", fullFrame);
3355
+ this.emit("frame:raw", lamFrame);
3356
+ this.emit("full_frame_ready", fullFrame);
3357
+ this.emit("lam_frame_ready", lamFrame);
3358
+ }
3359
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3360
+ };
3361
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3362
+ }
3363
+ // ---------------------------------------------------------------------------
3364
+ // Internal: Playback monitoring
3365
+ // ---------------------------------------------------------------------------
3366
+ startMonitoring() {
3367
+ if (this.monitorInterval) {
3368
+ clearInterval(this.monitorInterval);
3369
+ }
3370
+ this.monitorInterval = setInterval(() => {
3371
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3372
+ this.onPlaybackComplete();
3373
+ }
3374
+ }, 100);
3375
+ }
3376
+ onPlaybackComplete() {
3377
+ this.stopInternal(false);
3378
+ this.playbackStarted = false;
3379
+ this.emit("playback:complete", void 0);
3380
+ this.emit("playback_complete", void 0);
3381
+ if (this.neutralTransitionEnabled && this._currentFrame) {
3382
+ this.startNeutralTransition(this._currentFrame);
3383
+ } else {
3384
+ this.setState("idle");
3385
+ }
3386
+ }
3387
+ // ---------------------------------------------------------------------------
3388
+ // Internal: Neutral transition (opt-in)
3389
+ // ---------------------------------------------------------------------------
3390
+ startNeutralTransition(fromFrame) {
3391
+ this.neutralTransitionFrame = new Float32Array(fromFrame);
3392
+ this.neutralTransitionStart = performance.now();
3393
+ const animate = () => {
3394
+ const elapsed = performance.now() - this.neutralTransitionStart;
3395
+ const t = Math.min(1, elapsed / this.neutralTransitionMs);
3396
+ const eased = 1 - Math.pow(1 - t, 3);
3397
+ const blendshapes = new Float32Array(52);
3398
+ for (let i = 0; i < 52; i++) {
3399
+ blendshapes[i] = this.neutralTransitionFrame[i] * (1 - eased);
3400
+ }
3401
+ this._currentFrame = blendshapes;
3402
+ const frame = {
3403
+ blendshapes,
3404
+ rawBlendshapes: blendshapes,
3405
+ // raw = scaled during transition
3406
+ timestamp: performance.now() / 1e3
3407
+ };
3408
+ this.emit("frame", frame);
3409
+ this.emit("full_frame_ready", frame);
3410
+ if (t >= 1) {
3411
+ this.neutralTransitionFrame = null;
3412
+ this._currentFrame = null;
3413
+ this._currentRawFrame = null;
3414
+ this.setState("idle");
3415
+ return;
3416
+ }
3417
+ this.neutralAnimationId = requestAnimationFrame(animate);
3418
+ };
3419
+ this.neutralAnimationId = requestAnimationFrame(animate);
3420
+ }
3421
+ cancelNeutralTransition() {
3422
+ if (this.neutralAnimationId) {
3423
+ cancelAnimationFrame(this.neutralAnimationId);
3424
+ this.neutralAnimationId = null;
3425
+ }
3426
+ this.neutralTransitionFrame = null;
3427
+ }
3428
+ // ---------------------------------------------------------------------------
3429
+ // Internal: Helpers
3430
+ // ---------------------------------------------------------------------------
3431
+ stopInternal(emitEvents) {
3432
+ if (this.monitorInterval) {
3433
+ clearInterval(this.monitorInterval);
3434
+ this.monitorInterval = null;
3435
+ }
3436
+ if (this.frameAnimationId) {
3437
+ cancelAnimationFrame(this.frameAnimationId);
3438
+ this.frameAnimationId = null;
3439
+ }
3440
+ }
3441
+ setState(state) {
3442
+ if (this._state === state) return;
3443
+ this._state = state;
3444
+ this.emit("state", state);
3445
+ }
3446
+ };
3447
+
3116
3448
  // src/audio/InterruptionHandler.ts
3117
3449
  var InterruptionHandler = class extends EventEmitter {
3118
3450
  constructor(config = {}) {
@@ -3500,7 +3832,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
3500
3832
  }
3501
3833
 
3502
3834
  // src/inference/SenseVoiceInference.ts
3503
- var logger5 = createLogger("SenseVoice");
3835
+ var logger6 = createLogger("SenseVoice");
3504
3836
  var _SenseVoiceInference = class _SenseVoiceInference {
3505
3837
  constructor(config) {
3506
3838
  this.session = null;
@@ -3553,26 +3885,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3553
3885
  "model.backend_requested": this.config.backend
3554
3886
  });
3555
3887
  try {
3556
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
3888
+ logger6.info("Loading ONNX Runtime...", { preference: this.config.backend });
3557
3889
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
3558
3890
  this.ort = ort;
3559
3891
  this._backend = backend;
3560
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
3561
- logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3892
+ logger6.info("ONNX Runtime loaded", { backend: this._backend });
3893
+ logger6.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3562
3894
  const tokensResponse = await fetch(this.config.tokensUrl);
3563
3895
  if (!tokensResponse.ok) {
3564
3896
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
3565
3897
  }
3566
3898
  const tokensText = await tokensResponse.text();
3567
3899
  this.tokenMap = parseTokensFile(tokensText);
3568
- logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3900
+ logger6.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3569
3901
  const sessionOptions = getSessionOptions(this._backend);
3570
3902
  if (this._backend === "webgpu") {
3571
3903
  sessionOptions.graphOptimizationLevel = "basic";
3572
3904
  }
3573
3905
  let isCached = false;
3574
3906
  if (isIOS()) {
3575
- logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
3907
+ logger6.info("iOS: passing model URL directly to ORT (low-memory path)", {
3576
3908
  modelUrl: this.config.modelUrl
3577
3909
  });
3578
3910
  this.session = await withTimeout(
@@ -3585,14 +3917,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3585
3917
  isCached = await cache.has(this.config.modelUrl);
3586
3918
  let modelBuffer;
3587
3919
  if (isCached) {
3588
- logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3920
+ logger6.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3589
3921
  modelBuffer = await cache.get(this.config.modelUrl);
3590
3922
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
3591
3923
  } else {
3592
- logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3924
+ logger6.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3593
3925
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
3594
3926
  }
3595
- logger5.debug("Creating ONNX session", {
3927
+ logger6.debug("Creating ONNX session", {
3596
3928
  size: formatBytes(modelBuffer.byteLength),
3597
3929
  backend: this._backend
3598
3930
  });
@@ -3605,15 +3937,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3605
3937
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
3606
3938
  this.negMean = cmvn.negMean;
3607
3939
  this.invStddev = cmvn.invStddev;
3608
- logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3940
+ logger6.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3609
3941
  } else {
3610
- logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3942
+ logger6.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3611
3943
  }
3612
3944
  } catch (cmvnErr) {
3613
- logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3945
+ logger6.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3614
3946
  }
3615
3947
  const loadTimeMs = performance.now() - startTime;
3616
- logger5.info("SenseVoice model loaded", {
3948
+ logger6.info("SenseVoice model loaded", {
3617
3949
  backend: this._backend,
3618
3950
  loadTimeMs: Math.round(loadTimeMs),
3619
3951
  vocabSize: this.tokenMap.size,
@@ -3724,7 +4056,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3724
4056
  const vocabSize = logitsDims[2];
3725
4057
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
3726
4058
  const inferenceTimeMs = performance.now() - startTime;
3727
- logger5.trace("Transcription complete", {
4059
+ logger6.trace("Transcription complete", {
3728
4060
  text: decoded.text.substring(0, 50),
3729
4061
  language: decoded.language,
3730
4062
  emotion: decoded.emotion,
@@ -3762,7 +4094,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3762
4094
  const errMsg = err instanceof Error ? err.message : String(err);
3763
4095
  if (errMsg.includes("timed out")) {
3764
4096
  this.poisoned = true;
3765
- logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4097
+ logger6.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
3766
4098
  backend: this._backend,
3767
4099
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
3768
4100
  });
@@ -3770,7 +4102,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3770
4102
  const oomError = new Error(
3771
4103
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
3772
4104
  );
3773
- logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4105
+ logger6.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
3774
4106
  pointer: `0x${err.toString(16)}`,
3775
4107
  backend: this._backend
3776
4108
  });
@@ -3783,7 +4115,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
3783
4115
  reject(oomError);
3784
4116
  return;
3785
4117
  } else {
3786
- logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4118
+ logger6.error("Inference failed", { error: errMsg, backend: this._backend });
3787
4119
  }
3788
4120
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
3789
4121
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -3812,9 +4144,9 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
3812
4144
  var SenseVoiceInference = _SenseVoiceInference;
3813
4145
 
3814
4146
  // src/inference/SenseVoiceWorker.ts
3815
- var logger6 = createLogger("SenseVoiceWorker");
4147
+ var logger7 = createLogger("SenseVoiceWorker");
3816
4148
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
3817
- var LOAD_TIMEOUT_MS = 3e4;
4149
+ var LOAD_TIMEOUT_MS = 3e5;
3818
4150
  var INFERENCE_TIMEOUT_MS = 1e4;
3819
4151
  function resolveUrl(url) {
3820
4152
  if (/^https?:\/\//i.test(url) || /^blob:/i.test(url)) return url;
@@ -4551,7 +4883,7 @@ var SenseVoiceWorker = class {
4551
4883
  this.handleWorkerMessage(event.data);
4552
4884
  };
4553
4885
  worker.onerror = (error) => {
4554
- logger6.error("Worker error", { error: error.message });
4886
+ logger7.error("Worker error", { error: error.message });
4555
4887
  for (const [, resolver] of this.pendingResolvers) {
4556
4888
  resolver.reject(new Error(`Worker error: ${error.message}`));
4557
4889
  }
@@ -4631,9 +4963,9 @@ var SenseVoiceWorker = class {
4631
4963
  "model.language": this.config.language
4632
4964
  });
4633
4965
  try {
4634
- logger6.info("Creating SenseVoice worker...");
4966
+ logger7.info("Creating SenseVoice worker...");
4635
4967
  this.worker = this.createWorker();
4636
- logger6.info("Loading model in worker...", {
4968
+ logger7.info("Loading model in worker...", {
4637
4969
  modelUrl: this.config.modelUrl,
4638
4970
  tokensUrl: this.config.tokensUrl,
4639
4971
  language: this.config.language,
@@ -4655,7 +4987,7 @@ var SenseVoiceWorker = class {
4655
4987
  this._isLoaded = true;
4656
4988
  const loadTimeMs = performance.now() - startTime;
4657
4989
  onProgress?.(1, 1);
4658
- logger6.info("SenseVoice worker loaded successfully", {
4990
+ logger7.info("SenseVoice worker loaded successfully", {
4659
4991
  backend: "wasm",
4660
4992
  loadTimeMs: Math.round(loadTimeMs),
4661
4993
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -4734,7 +5066,7 @@ var SenseVoiceWorker = class {
4734
5066
  INFERENCE_TIMEOUT_MS
4735
5067
  );
4736
5068
  const totalTimeMs = performance.now() - startTime;
4737
- logger6.trace("Worker transcription complete", {
5069
+ logger7.trace("Worker transcription complete", {
4738
5070
  text: result.text.substring(0, 50),
4739
5071
  language: result.language,
4740
5072
  emotion: result.emotion,
@@ -4770,11 +5102,11 @@ var SenseVoiceWorker = class {
4770
5102
  } catch (err) {
4771
5103
  const errMsg = err instanceof Error ? err.message : String(err);
4772
5104
  if (errMsg.includes("timed out")) {
4773
- logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5105
+ logger7.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
4774
5106
  timeoutMs: INFERENCE_TIMEOUT_MS
4775
5107
  });
4776
5108
  } else {
4777
- logger6.error("Worker inference failed", { error: errMsg });
5109
+ logger7.error("Worker inference failed", { error: errMsg });
4778
5110
  }
4779
5111
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4780
5112
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4812,14 +5144,14 @@ var SenseVoiceWorker = class {
4812
5144
  };
4813
5145
 
4814
5146
  // src/inference/UnifiedInferenceWorker.ts
4815
- var logger7 = createLogger("UnifiedInferenceWorker");
5147
+ var logger8 = createLogger("UnifiedInferenceWorker");
4816
5148
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4817
- var INIT_TIMEOUT_MS = 15e3;
4818
- var SV_LOAD_TIMEOUT_MS = 3e4;
5149
+ var INIT_TIMEOUT_MS = 6e4;
5150
+ var SV_LOAD_TIMEOUT_MS = 3e5;
4819
5151
  var SV_INFER_TIMEOUT_MS = 1e4;
4820
- var CPU_LOAD_TIMEOUT_MS = 6e4;
5152
+ var CPU_LOAD_TIMEOUT_MS = 42e4;
4821
5153
  var CPU_INFER_TIMEOUT_MS = 5e3;
4822
- var VAD_LOAD_TIMEOUT_MS = 1e4;
5154
+ var VAD_LOAD_TIMEOUT_MS = 12e4;
4823
5155
  var VAD_INFER_TIMEOUT_MS = 1e3;
4824
5156
  var DISPOSE_TIMEOUT_MS = 5e3;
4825
5157
  function resolveUrl2(url) {
@@ -5514,7 +5846,7 @@ var UnifiedInferenceWorker = class {
5514
5846
  const telemetry = getTelemetry();
5515
5847
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
5516
5848
  try {
5517
- logger7.info("Creating unified inference worker...");
5849
+ logger8.info("Creating unified inference worker...");
5518
5850
  this.worker = this.createWorker();
5519
5851
  await this.sendMessage(
5520
5852
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -5523,7 +5855,7 @@ var UnifiedInferenceWorker = class {
5523
5855
  );
5524
5856
  this.initialized = true;
5525
5857
  const loadTimeMs = performance.now() - startTime;
5526
- logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5858
+ logger8.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5527
5859
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
5528
5860
  span?.end();
5529
5861
  } catch (error) {
@@ -5697,7 +6029,7 @@ var UnifiedInferenceWorker = class {
5697
6029
  this.handleWorkerMessage(event.data);
5698
6030
  };
5699
6031
  worker.onerror = (error) => {
5700
- logger7.error("Unified worker error", { error: error.message });
6032
+ logger8.error("Unified worker error", { error: error.message });
5701
6033
  this.rejectAllPending(`Worker error: ${error.message}`);
5702
6034
  };
5703
6035
  return worker;
@@ -5711,7 +6043,7 @@ var UnifiedInferenceWorker = class {
5711
6043
  this.pendingRequests.delete(requestId);
5712
6044
  pending.reject(new Error(data.error));
5713
6045
  } else {
5714
- logger7.error("Worker broadcast error", { error: data.error });
6046
+ logger8.error("Worker broadcast error", { error: data.error });
5715
6047
  this.rejectAllPending(data.error);
5716
6048
  }
5717
6049
  return;
@@ -5733,7 +6065,7 @@ var UnifiedInferenceWorker = class {
5733
6065
  const timeout = setTimeout(() => {
5734
6066
  this.pendingRequests.delete(requestId);
5735
6067
  this.poisoned = true;
5736
- logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6068
+ logger8.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
5737
6069
  type: message.type,
5738
6070
  timeoutMs
5739
6071
  });
@@ -5799,7 +6131,7 @@ var SenseVoiceUnifiedAdapter = class {
5799
6131
  });
5800
6132
  this._isLoaded = true;
5801
6133
  onProgress?.(1, 1);
5802
- logger7.info("SenseVoice loaded via unified worker", {
6134
+ logger8.info("SenseVoice loaded via unified worker", {
5803
6135
  backend: "wasm",
5804
6136
  loadTimeMs: Math.round(result.loadTimeMs),
5805
6137
  vocabSize: result.vocabSize
@@ -5864,7 +6196,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
5864
6196
  externalDataUrl: externalDataUrl || null
5865
6197
  });
5866
6198
  this._isLoaded = true;
5867
- logger7.info("Wav2ArkitCpu loaded via unified worker", {
6199
+ logger8.info("Wav2ArkitCpu loaded via unified worker", {
5868
6200
  backend: "wasm",
5869
6201
  loadTimeMs: Math.round(result.loadTimeMs)
5870
6202
  });
@@ -5970,7 +6302,7 @@ var SileroVADUnifiedAdapter = class {
5970
6302
  sampleRate: this.config.sampleRate
5971
6303
  });
5972
6304
  this._isLoaded = true;
5973
- logger7.info("SileroVAD loaded via unified worker", {
6305
+ logger8.info("SileroVAD loaded via unified worker", {
5974
6306
  backend: "wasm",
5975
6307
  loadTimeMs: Math.round(result.loadTimeMs),
5976
6308
  sampleRate: this.config.sampleRate,
@@ -6051,10 +6383,10 @@ var SileroVADUnifiedAdapter = class {
6051
6383
  };
6052
6384
 
6053
6385
  // src/inference/createSenseVoice.ts
6054
- var logger8 = createLogger("createSenseVoice");
6386
+ var logger9 = createLogger("createSenseVoice");
6055
6387
  function createSenseVoice(config) {
6056
6388
  if (config.unifiedWorker) {
6057
- logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6389
+ logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6058
6390
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6059
6391
  modelUrl: config.modelUrl,
6060
6392
  tokensUrl: config.tokensUrl,
@@ -6067,7 +6399,7 @@ function createSenseVoice(config) {
6067
6399
  if (!SenseVoiceWorker.isSupported()) {
6068
6400
  throw new Error("Web Workers are not supported in this environment");
6069
6401
  }
6070
- logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6402
+ logger9.info("Creating SenseVoiceWorker (off-main-thread)");
6071
6403
  return new SenseVoiceWorker({
6072
6404
  modelUrl: config.modelUrl,
6073
6405
  tokensUrl: config.tokensUrl,
@@ -6076,7 +6408,7 @@ function createSenseVoice(config) {
6076
6408
  });
6077
6409
  }
6078
6410
  if (useWorker === false) {
6079
- logger8.info("Creating SenseVoiceInference (main thread)");
6411
+ logger9.info("Creating SenseVoiceInference (main thread)");
6080
6412
  return new SenseVoiceInference({
6081
6413
  modelUrl: config.modelUrl,
6082
6414
  tokensUrl: config.tokensUrl,
@@ -6085,7 +6417,7 @@ function createSenseVoice(config) {
6085
6417
  });
6086
6418
  }
6087
6419
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6088
- logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6420
+ logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6089
6421
  return new SenseVoiceWorker({
6090
6422
  modelUrl: config.modelUrl,
6091
6423
  tokensUrl: config.tokensUrl,
@@ -6093,7 +6425,7 @@ function createSenseVoice(config) {
6093
6425
  textNorm: config.textNorm
6094
6426
  });
6095
6427
  }
6096
- logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6428
+ logger9.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6097
6429
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6098
6430
  });
6099
6431
  return new SenseVoiceInference({
@@ -6105,7 +6437,7 @@ function createSenseVoice(config) {
6105
6437
  }
6106
6438
 
6107
6439
  // src/inference/Wav2ArkitCpuInference.ts
6108
- var logger9 = createLogger("Wav2ArkitCpu");
6440
+ var logger10 = createLogger("Wav2ArkitCpu");
6109
6441
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6110
6442
  constructor(config) {
6111
6443
  this.modelId = "wav2arkit_cpu";
@@ -6147,16 +6479,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6147
6479
  });
6148
6480
  try {
6149
6481
  const preference = this.config.backend || "wasm";
6150
- logger9.info("Loading ONNX Runtime...", { preference });
6482
+ logger10.info("Loading ONNX Runtime...", { preference });
6151
6483
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6152
6484
  this.ort = ort;
6153
6485
  this._backend = backend;
6154
- logger9.info("ONNX Runtime loaded", { backend: this._backend });
6486
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
6155
6487
  const modelUrl = this.config.modelUrl;
6156
6488
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6157
6489
  const sessionOptions = getSessionOptions(this._backend);
6158
6490
  if (isIOS()) {
6159
- logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6491
+ logger10.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6160
6492
  modelUrl,
6161
6493
  dataUrl
6162
6494
  });
@@ -6178,15 +6510,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6178
6510
  const isCached = await cache.has(modelUrl);
6179
6511
  let modelBuffer;
6180
6512
  if (isCached) {
6181
- logger9.debug("Loading model from cache", { modelUrl });
6513
+ logger10.debug("Loading model from cache", { modelUrl });
6182
6514
  modelBuffer = await cache.get(modelUrl);
6183
6515
  if (!modelBuffer) {
6184
- logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6516
+ logger10.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6185
6517
  await cache.delete(modelUrl);
6186
6518
  modelBuffer = await fetchWithCache(modelUrl);
6187
6519
  }
6188
6520
  } else {
6189
- logger9.debug("Fetching and caching model graph", { modelUrl });
6521
+ logger10.debug("Fetching and caching model graph", { modelUrl });
6190
6522
  modelBuffer = await fetchWithCache(modelUrl);
6191
6523
  }
6192
6524
  if (!modelBuffer) {
@@ -6197,31 +6529,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6197
6529
  try {
6198
6530
  const isDataCached = await cache.has(dataUrl);
6199
6531
  if (isDataCached) {
6200
- logger9.debug("Loading external data from cache", { dataUrl });
6532
+ logger10.debug("Loading external data from cache", { dataUrl });
6201
6533
  externalDataBuffer = await cache.get(dataUrl);
6202
6534
  if (!externalDataBuffer) {
6203
- logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6535
+ logger10.warn("Cache corruption for external data, retrying", { dataUrl });
6204
6536
  await cache.delete(dataUrl);
6205
6537
  externalDataBuffer = await fetchWithCache(dataUrl);
6206
6538
  }
6207
6539
  } else {
6208
- logger9.info("Fetching external model data", {
6540
+ logger10.info("Fetching external model data", {
6209
6541
  dataUrl,
6210
6542
  note: "This may be a large download (400MB+)"
6211
6543
  });
6212
6544
  externalDataBuffer = await fetchWithCache(dataUrl);
6213
6545
  }
6214
- logger9.info("External data loaded", {
6546
+ logger10.info("External data loaded", {
6215
6547
  size: formatBytes(externalDataBuffer.byteLength)
6216
6548
  });
6217
6549
  } catch (err) {
6218
- logger9.debug("No external data file found (single-file model)", {
6550
+ logger10.debug("No external data file found (single-file model)", {
6219
6551
  dataUrl,
6220
6552
  error: err.message
6221
6553
  });
6222
6554
  }
6223
6555
  }
6224
- logger9.debug("Creating ONNX session", {
6556
+ logger10.debug("Creating ONNX session", {
6225
6557
  graphSize: formatBytes(modelBuffer.byteLength),
6226
6558
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
6227
6559
  backend: this._backend
@@ -6237,7 +6569,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6237
6569
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
6238
6570
  }
6239
6571
  const loadTimeMs = performance.now() - startTime;
6240
- logger9.info("Model loaded successfully", {
6572
+ logger10.info("Model loaded successfully", {
6241
6573
  backend: this._backend,
6242
6574
  loadTimeMs: Math.round(loadTimeMs),
6243
6575
  inputs: this.session.inputNames,
@@ -6253,12 +6585,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6253
6585
  model: "wav2arkit_cpu",
6254
6586
  backend: this._backend
6255
6587
  });
6256
- logger9.debug("Running warmup inference");
6588
+ logger10.debug("Running warmup inference");
6257
6589
  const warmupStart = performance.now();
6258
6590
  const silentAudio = new Float32Array(16e3);
6259
6591
  await this.infer(silentAudio);
6260
6592
  const warmupTimeMs = performance.now() - warmupStart;
6261
- logger9.info("Warmup inference complete", {
6593
+ logger10.info("Warmup inference complete", {
6262
6594
  warmupTimeMs: Math.round(warmupTimeMs),
6263
6595
  backend: this._backend
6264
6596
  });
@@ -6345,7 +6677,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6345
6677
  const symmetrized = symmetrizeBlendshapes(rawFrame);
6346
6678
  blendshapes.push(symmetrized);
6347
6679
  }
6348
- logger9.trace("Inference completed", {
6680
+ logger10.trace("Inference completed", {
6349
6681
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6350
6682
  numFrames,
6351
6683
  inputSamples
@@ -6373,7 +6705,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6373
6705
  const errMsg = err instanceof Error ? err.message : String(err);
6374
6706
  if (errMsg.includes("timed out")) {
6375
6707
  this.poisoned = true;
6376
- logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6708
+ logger10.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6377
6709
  backend: this._backend,
6378
6710
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
6379
6711
  });
@@ -6381,7 +6713,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6381
6713
  const oomError = new Error(
6382
6714
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
6383
6715
  );
6384
- logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6716
+ logger10.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6385
6717
  pointer: `0x${err.toString(16)}`,
6386
6718
  backend: this._backend
6387
6719
  });
@@ -6394,7 +6726,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6394
6726
  reject(oomError);
6395
6727
  return;
6396
6728
  } else {
6397
- logger9.error("Inference failed", { error: errMsg, backend: this._backend });
6729
+ logger10.error("Inference failed", { error: errMsg, backend: this._backend });
6398
6730
  }
6399
6731
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6400
6732
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6421,9 +6753,9 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
6421
6753
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
6422
6754
 
6423
6755
  // src/inference/Wav2ArkitCpuWorker.ts
6424
- var logger10 = createLogger("Wav2ArkitCpuWorker");
6756
+ var logger11 = createLogger("Wav2ArkitCpuWorker");
6425
6757
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
6426
- var LOAD_TIMEOUT_MS2 = 6e4;
6758
+ var LOAD_TIMEOUT_MS2 = 42e4;
6427
6759
  var INFERENCE_TIMEOUT_MS2 = 5e3;
6428
6760
  function resolveUrl3(url) {
6429
6761
  if (/^https?:\/\//i.test(url) || /^blob:/i.test(url)) return url;
@@ -6708,7 +7040,7 @@ var Wav2ArkitCpuWorker = class {
6708
7040
  this.handleWorkerMessage(event.data);
6709
7041
  };
6710
7042
  worker.onerror = (error) => {
6711
- logger10.error("Worker error", { error: error.message });
7043
+ logger11.error("Worker error", { error: error.message });
6712
7044
  for (const [, resolver] of this.pendingResolvers) {
6713
7045
  resolver.reject(new Error(`Worker error: ${error.message}`));
6714
7046
  }
@@ -6784,10 +7116,10 @@ var Wav2ArkitCpuWorker = class {
6784
7116
  "model.backend_requested": "wasm"
6785
7117
  });
6786
7118
  try {
6787
- logger10.info("Creating wav2arkit_cpu worker...");
7119
+ logger11.info("Creating wav2arkit_cpu worker...");
6788
7120
  this.worker = this.createWorker();
6789
7121
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
6790
- logger10.info("Loading model in worker...", {
7122
+ logger11.info("Loading model in worker...", {
6791
7123
  modelUrl: this.config.modelUrl,
6792
7124
  externalDataUrl,
6793
7125
  isIOS: isIOS()
@@ -6805,7 +7137,7 @@ var Wav2ArkitCpuWorker = class {
6805
7137
  );
6806
7138
  this._isLoaded = true;
6807
7139
  const loadTimeMs = performance.now() - startTime;
6808
- logger10.info("Wav2ArkitCpu worker loaded successfully", {
7140
+ logger11.info("Wav2ArkitCpu worker loaded successfully", {
6809
7141
  backend: "wasm",
6810
7142
  loadTimeMs: Math.round(loadTimeMs),
6811
7143
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -6890,7 +7222,7 @@ var Wav2ArkitCpuWorker = class {
6890
7222
  for (let f = 0; f < numFrames; f++) {
6891
7223
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
6892
7224
  }
6893
- logger10.trace("Worker inference completed", {
7225
+ logger11.trace("Worker inference completed", {
6894
7226
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
6895
7227
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
6896
7228
  numFrames,
@@ -6920,12 +7252,12 @@ var Wav2ArkitCpuWorker = class {
6920
7252
  const errMsg = err instanceof Error ? err.message : String(err);
6921
7253
  if (errMsg.includes("timed out")) {
6922
7254
  this.poisoned = true;
6923
- logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7255
+ logger11.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
6924
7256
  backend: "wasm",
6925
7257
  timeoutMs: INFERENCE_TIMEOUT_MS2
6926
7258
  });
6927
7259
  } else {
6928
- logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7260
+ logger11.error("Worker inference failed", { error: errMsg, backend: "wasm" });
6929
7261
  }
6930
7262
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
6931
7263
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -6963,38 +7295,38 @@ var Wav2ArkitCpuWorker = class {
6963
7295
  };
6964
7296
 
6965
7297
  // src/inference/createA2E.ts
6966
- var logger11 = createLogger("createA2E");
7298
+ var logger12 = createLogger("createA2E");
6967
7299
  function createA2E(config) {
6968
7300
  const mode = config.mode ?? "auto";
6969
7301
  const fallbackOnError = config.fallbackOnError ?? true;
6970
7302
  let useCpu;
6971
7303
  if (mode === "cpu") {
6972
7304
  useCpu = true;
6973
- logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7305
+ logger12.info("Forcing CPU A2E model (wav2arkit_cpu)");
6974
7306
  } else if (mode === "gpu") {
6975
7307
  useCpu = false;
6976
- logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7308
+ logger12.info("Forcing GPU A2E model (Wav2Vec2)");
6977
7309
  } else {
6978
7310
  useCpu = shouldUseCpuA2E();
6979
- logger11.info("Auto-detected A2E model", {
7311
+ logger12.info("Auto-detected A2E model", {
6980
7312
  useCpu,
6981
7313
  isSafari: isSafari()
6982
7314
  });
6983
7315
  }
6984
7316
  if (useCpu) {
6985
7317
  if (config.unifiedWorker) {
6986
- logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7318
+ logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
6987
7319
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
6988
7320
  modelUrl: config.cpuModelUrl
6989
7321
  });
6990
7322
  }
6991
7323
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
6992
- logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7324
+ logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
6993
7325
  return new Wav2ArkitCpuWorker({
6994
7326
  modelUrl: config.cpuModelUrl
6995
7327
  });
6996
7328
  }
6997
- logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7329
+ logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
6998
7330
  return new Wav2ArkitCpuInference({
6999
7331
  modelUrl: config.cpuModelUrl
7000
7332
  });
@@ -7006,10 +7338,10 @@ function createA2E(config) {
7006
7338
  numIdentityClasses: config.numIdentityClasses
7007
7339
  });
7008
7340
  if (fallbackOnError) {
7009
- logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7341
+ logger12.info("Creating Wav2Vec2Inference with CPU fallback");
7010
7342
  return new A2EWithFallback(gpuInstance, config);
7011
7343
  }
7012
- logger11.info("Creating Wav2Vec2Inference (no fallback)");
7344
+ logger12.info("Creating Wav2Vec2Inference (no fallback)");
7013
7345
  return gpuInstance;
7014
7346
  }
7015
7347
  var A2EWithFallback = class {
@@ -7038,7 +7370,7 @@ var A2EWithFallback = class {
7038
7370
  }
7039
7371
  }
7040
7372
  async fallbackToCpu(reason) {
7041
- logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7373
+ logger12.warn("GPU model load failed, falling back to CPU model", { reason });
7042
7374
  try {
7043
7375
  await this.implementation.dispose();
7044
7376
  } catch {
@@ -7047,17 +7379,17 @@ var A2EWithFallback = class {
7047
7379
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7048
7380
  modelUrl: this.config.cpuModelUrl
7049
7381
  });
7050
- logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7382
+ logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7051
7383
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7052
7384
  this.implementation = new Wav2ArkitCpuWorker({
7053
7385
  modelUrl: this.config.cpuModelUrl
7054
7386
  });
7055
- logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7387
+ logger12.info("Fallback to Wav2ArkitCpuWorker successful");
7056
7388
  } else {
7057
7389
  this.implementation = new Wav2ArkitCpuInference({
7058
7390
  modelUrl: this.config.cpuModelUrl
7059
7391
  });
7060
- logger11.info("Fallback to Wav2ArkitCpuInference successful");
7392
+ logger12.info("Fallback to Wav2ArkitCpuInference successful");
7061
7393
  }
7062
7394
  this.hasFallenBack = true;
7063
7395
  return await this.implementation.load();
@@ -7261,7 +7593,7 @@ var EmphasisDetector = class {
7261
7593
  };
7262
7594
 
7263
7595
  // src/inference/SileroVADInference.ts
7264
- var logger12 = createLogger("SileroVAD");
7596
+ var logger13 = createLogger("SileroVAD");
7265
7597
  var SileroVADInference = class {
7266
7598
  constructor(config) {
7267
7599
  this.session = null;
@@ -7335,23 +7667,23 @@ var SileroVADInference = class {
7335
7667
  "model.sample_rate": this.config.sampleRate
7336
7668
  });
7337
7669
  try {
7338
- logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
7670
+ logger13.info("Loading ONNX Runtime...", { preference: this.config.backend });
7339
7671
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7340
7672
  this.ort = ort;
7341
7673
  this._backend = backend;
7342
- logger12.info("ONNX Runtime loaded", { backend: this._backend });
7674
+ logger13.info("ONNX Runtime loaded", { backend: this._backend });
7343
7675
  const cache = getModelCache();
7344
7676
  const modelUrl = this.config.modelUrl;
7345
7677
  const isCached = await cache.has(modelUrl);
7346
7678
  let modelBuffer;
7347
7679
  if (isCached) {
7348
- logger12.debug("Loading model from cache", { modelUrl });
7680
+ logger13.debug("Loading model from cache", { modelUrl });
7349
7681
  modelBuffer = await cache.get(modelUrl);
7350
7682
  } else {
7351
- logger12.debug("Fetching and caching model", { modelUrl });
7683
+ logger13.debug("Fetching and caching model", { modelUrl });
7352
7684
  modelBuffer = await fetchWithCache(modelUrl);
7353
7685
  }
7354
- logger12.debug("Creating ONNX session", {
7686
+ logger13.debug("Creating ONNX session", {
7355
7687
  size: formatBytes(modelBuffer.byteLength),
7356
7688
  backend: this._backend
7357
7689
  });
@@ -7360,7 +7692,7 @@ var SileroVADInference = class {
7360
7692
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7361
7693
  this.reset();
7362
7694
  const loadTimeMs = performance.now() - startTime;
7363
- logger12.info("Model loaded successfully", {
7695
+ logger13.info("Model loaded successfully", {
7364
7696
  backend: this._backend,
7365
7697
  loadTimeMs: Math.round(loadTimeMs),
7366
7698
  sampleRate: this.config.sampleRate,
@@ -7415,7 +7747,7 @@ var SileroVADInference = class {
7415
7747
  []
7416
7748
  );
7417
7749
  } catch (e) {
7418
- logger12.warn("BigInt64Array not available, using bigint array fallback", {
7750
+ logger13.warn("BigInt64Array not available, using bigint array fallback", {
7419
7751
  error: e instanceof Error ? e.message : String(e)
7420
7752
  });
7421
7753
  this.srTensor = new this.ort.Tensor(
@@ -7521,7 +7853,7 @@ var SileroVADInference = class {
7521
7853
  this.preSpeechBuffer.shift();
7522
7854
  }
7523
7855
  }
7524
- logger12.trace("Skipping VAD inference - audio too quiet", {
7856
+ logger13.trace("Skipping VAD inference - audio too quiet", {
7525
7857
  rms: Math.round(rms * 1e4) / 1e4,
7526
7858
  threshold: MIN_ENERGY_THRESHOLD
7527
7859
  });
@@ -7575,7 +7907,7 @@ var SileroVADInference = class {
7575
7907
  if (isSpeech && !this.wasSpeaking) {
7576
7908
  preSpeechChunks = [...this.preSpeechBuffer];
7577
7909
  this.preSpeechBuffer = [];
7578
- logger12.debug("Speech started with pre-speech buffer", {
7910
+ logger13.debug("Speech started with pre-speech buffer", {
7579
7911
  preSpeechChunks: preSpeechChunks.length,
7580
7912
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
7581
7913
  });
@@ -7588,7 +7920,7 @@ var SileroVADInference = class {
7588
7920
  this.preSpeechBuffer = [];
7589
7921
  }
7590
7922
  this.wasSpeaking = isSpeech;
7591
- logger12.trace("VAD inference completed", {
7923
+ logger13.trace("VAD inference completed", {
7592
7924
  probability: Math.round(probability * 1e3) / 1e3,
7593
7925
  isSpeech,
7594
7926
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -7619,7 +7951,7 @@ var SileroVADInference = class {
7619
7951
  const oomError = new Error(
7620
7952
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
7621
7953
  );
7622
- logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7954
+ logger13.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7623
7955
  pointer: `0x${err.toString(16)}`,
7624
7956
  backend: this._backend
7625
7957
  });
@@ -7662,9 +7994,9 @@ var SileroVADInference = class {
7662
7994
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
7663
7995
 
7664
7996
  // src/inference/SileroVADWorker.ts
7665
- var logger13 = createLogger("SileroVADWorker");
7997
+ var logger14 = createLogger("SileroVADWorker");
7666
7998
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
7667
- var LOAD_TIMEOUT_MS3 = 1e4;
7999
+ var LOAD_TIMEOUT_MS3 = 12e4;
7668
8000
  var INFERENCE_TIMEOUT_MS3 = 1e3;
7669
8001
  function resolveUrl4(url) {
7670
8002
  if (/^https?:\/\//i.test(url) || /^blob:/i.test(url)) return url;
@@ -7947,7 +8279,7 @@ var SileroVADWorker = class {
7947
8279
  this.handleWorkerMessage(event.data);
7948
8280
  };
7949
8281
  worker.onerror = (error) => {
7950
- logger13.error("Worker error", { error: error.message });
8282
+ logger14.error("Worker error", { error: error.message });
7951
8283
  for (const [, resolver] of this.pendingResolvers) {
7952
8284
  resolver.reject(new Error(`Worker error: ${error.message}`));
7953
8285
  }
@@ -8023,9 +8355,9 @@ var SileroVADWorker = class {
8023
8355
  "model.sample_rate": this.config.sampleRate
8024
8356
  });
8025
8357
  try {
8026
- logger13.info("Creating VAD worker...");
8358
+ logger14.info("Creating VAD worker...");
8027
8359
  this.worker = this.createWorker();
8028
- logger13.info("Loading model in worker...", {
8360
+ logger14.info("Loading model in worker...", {
8029
8361
  modelUrl: this.config.modelUrl,
8030
8362
  sampleRate: this.config.sampleRate
8031
8363
  });
@@ -8041,7 +8373,7 @@ var SileroVADWorker = class {
8041
8373
  );
8042
8374
  this._isLoaded = true;
8043
8375
  const loadTimeMs = performance.now() - startTime;
8044
- logger13.info("VAD worker loaded successfully", {
8376
+ logger14.info("VAD worker loaded successfully", {
8045
8377
  backend: "wasm",
8046
8378
  loadTimeMs: Math.round(loadTimeMs),
8047
8379
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8148,7 +8480,7 @@ var SileroVADWorker = class {
8148
8480
  if (isSpeech && !this.wasSpeaking) {
8149
8481
  preSpeechChunks = [...this.preSpeechBuffer];
8150
8482
  this.preSpeechBuffer = [];
8151
- logger13.debug("Speech started with pre-speech buffer", {
8483
+ logger14.debug("Speech started with pre-speech buffer", {
8152
8484
  preSpeechChunks: preSpeechChunks.length,
8153
8485
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8154
8486
  });
@@ -8161,7 +8493,7 @@ var SileroVADWorker = class {
8161
8493
  this.preSpeechBuffer = [];
8162
8494
  }
8163
8495
  this.wasSpeaking = isSpeech;
8164
- logger13.trace("VAD worker inference completed", {
8496
+ logger14.trace("VAD worker inference completed", {
8165
8497
  probability: Math.round(result.probability * 1e3) / 1e3,
8166
8498
  isSpeech,
8167
8499
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8229,44 +8561,44 @@ var SileroVADWorker = class {
8229
8561
  };
8230
8562
 
8231
8563
  // src/inference/createSileroVAD.ts
8232
- var logger14 = createLogger("createSileroVAD");
8564
+ var logger15 = createLogger("createSileroVAD");
8233
8565
  function supportsVADWorker() {
8234
8566
  if (typeof Worker === "undefined") {
8235
- logger14.debug("Worker not supported: Worker constructor undefined");
8567
+ logger15.debug("Worker not supported: Worker constructor undefined");
8236
8568
  return false;
8237
8569
  }
8238
8570
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8239
- logger14.debug("Worker not supported: URL.createObjectURL unavailable");
8571
+ logger15.debug("Worker not supported: URL.createObjectURL unavailable");
8240
8572
  return false;
8241
8573
  }
8242
8574
  if (typeof Blob === "undefined") {
8243
- logger14.debug("Worker not supported: Blob constructor unavailable");
8575
+ logger15.debug("Worker not supported: Blob constructor unavailable");
8244
8576
  return false;
8245
8577
  }
8246
8578
  return true;
8247
8579
  }
8248
8580
  function createSileroVAD(config) {
8249
8581
  if (config.unifiedWorker) {
8250
- logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8582
+ logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8251
8583
  return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8252
8584
  }
8253
8585
  const fallbackOnError = config.fallbackOnError ?? true;
8254
8586
  let useWorker;
8255
8587
  if (config.useWorker !== void 0) {
8256
8588
  useWorker = config.useWorker;
8257
- logger14.debug("Worker preference explicitly set", { useWorker });
8589
+ logger15.debug("Worker preference explicitly set", { useWorker });
8258
8590
  } else {
8259
8591
  const workerSupported = supportsVADWorker();
8260
8592
  const onMobile = isMobile();
8261
8593
  useWorker = workerSupported && !onMobile;
8262
- logger14.debug("Auto-detected Worker preference", {
8594
+ logger15.debug("Auto-detected Worker preference", {
8263
8595
  useWorker,
8264
8596
  workerSupported,
8265
8597
  onMobile
8266
8598
  });
8267
8599
  }
8268
8600
  if (useWorker) {
8269
- logger14.info("Creating SileroVADWorker (off-main-thread)");
8601
+ logger15.info("Creating SileroVADWorker (off-main-thread)");
8270
8602
  const worker = new SileroVADWorker({
8271
8603
  modelUrl: config.modelUrl,
8272
8604
  sampleRate: config.sampleRate,
@@ -8278,7 +8610,7 @@ function createSileroVAD(config) {
8278
8610
  }
8279
8611
  return worker;
8280
8612
  }
8281
- logger14.info("Creating SileroVADInference (main thread)");
8613
+ logger15.info("Creating SileroVADInference (main thread)");
8282
8614
  return new SileroVADInference(config);
8283
8615
  }
8284
8616
  var VADWorkerWithFallback = class {
@@ -8304,7 +8636,7 @@ var VADWorkerWithFallback = class {
8304
8636
  try {
8305
8637
  return await this.implementation.load();
8306
8638
  } catch (error) {
8307
- logger14.warn("Worker load failed, falling back to main thread", {
8639
+ logger15.warn("Worker load failed, falling back to main thread", {
8308
8640
  error: error instanceof Error ? error.message : String(error)
8309
8641
  });
8310
8642
  try {
@@ -8313,7 +8645,7 @@ var VADWorkerWithFallback = class {
8313
8645
  }
8314
8646
  this.implementation = new SileroVADInference(this.config);
8315
8647
  this.hasFallenBack = true;
8316
- logger14.info("Fallback to SileroVADInference successful");
8648
+ logger15.info("Fallback to SileroVADInference successful");
8317
8649
  return await this.implementation.load();
8318
8650
  }
8319
8651
  }
@@ -8335,7 +8667,7 @@ var VADWorkerWithFallback = class {
8335
8667
  };
8336
8668
 
8337
8669
  // src/inference/A2EOrchestrator.ts
8338
- var logger15 = createLogger("A2EOrchestrator");
8670
+ var logger16 = createLogger("A2EOrchestrator");
8339
8671
  var A2EOrchestrator = class {
8340
8672
  constructor(config) {
8341
8673
  this.a2e = null;
@@ -8376,7 +8708,7 @@ var A2EOrchestrator = class {
8376
8708
  */
8377
8709
  async load() {
8378
8710
  if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8379
- logger15.info("Loading A2E model...");
8711
+ logger16.info("Loading A2E model...");
8380
8712
  this.a2e = createA2E({
8381
8713
  gpuModelUrl: this.config.gpuModelUrl,
8382
8714
  gpuExternalDataUrl: this.config.gpuExternalDataUrl,
@@ -8393,7 +8725,7 @@ var A2EOrchestrator = class {
8393
8725
  onError: this.config.onError
8394
8726
  });
8395
8727
  this._isReady = true;
8396
- logger15.info("A2E model loaded", {
8728
+ logger16.info("A2E model loaded", {
8397
8729
  backend: info.backend,
8398
8730
  loadTimeMs: info.loadTimeMs,
8399
8731
  modelId: this.a2e.modelId
@@ -8448,10 +8780,10 @@ var A2EOrchestrator = class {
8448
8780
  this.scriptProcessor.connect(this.audioContext.destination);
8449
8781
  this._isStreaming = true;
8450
8782
  this.processor.startDrip();
8451
- logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8783
+ logger16.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8452
8784
  } catch (err) {
8453
8785
  const error = err instanceof Error ? err : new Error(String(err));
8454
- logger15.error("Failed to start mic capture", { error: error.message });
8786
+ logger16.error("Failed to start mic capture", { error: error.message });
8455
8787
  this.config.onError?.(error);
8456
8788
  throw error;
8457
8789
  }
@@ -8479,7 +8811,7 @@ var A2EOrchestrator = class {
8479
8811
  });
8480
8812
  this.audioContext = null;
8481
8813
  }
8482
- logger15.info("Mic capture stopped");
8814
+ logger16.info("Mic capture stopped");
8483
8815
  }
8484
8816
  /**
8485
8817
  * Dispose of all resources
@@ -8502,7 +8834,7 @@ var A2EOrchestrator = class {
8502
8834
  };
8503
8835
 
8504
8836
  // src/inference/SafariSpeechRecognition.ts
8505
- var logger16 = createLogger("SafariSpeech");
8837
+ var logger17 = createLogger("SafariSpeech");
8506
8838
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8507
8839
  constructor(config = {}) {
8508
8840
  this.recognition = null;
@@ -8521,7 +8853,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8521
8853
  interimResults: config.interimResults ?? true,
8522
8854
  maxAlternatives: config.maxAlternatives ?? 1
8523
8855
  };
8524
- logger16.debug("SafariSpeechRecognition created", {
8856
+ logger17.debug("SafariSpeechRecognition created", {
8525
8857
  language: this.config.language,
8526
8858
  continuous: this.config.continuous
8527
8859
  });
@@ -8582,7 +8914,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8582
8914
  */
8583
8915
  async start() {
8584
8916
  if (this.isListening) {
8585
- logger16.warn("Already listening");
8917
+ logger17.warn("Already listening");
8586
8918
  return;
8587
8919
  }
8588
8920
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -8612,7 +8944,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8612
8944
  this.isListening = true;
8613
8945
  this.startTime = performance.now();
8614
8946
  this.accumulatedText = "";
8615
- logger16.info("Speech recognition started", {
8947
+ logger17.info("Speech recognition started", {
8616
8948
  language: this.config.language
8617
8949
  });
8618
8950
  span?.end();
@@ -8627,7 +8959,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8627
8959
  */
8628
8960
  async stop() {
8629
8961
  if (!this.isListening || !this.recognition) {
8630
- logger16.warn("Not currently listening");
8962
+ logger17.warn("Not currently listening");
8631
8963
  return {
8632
8964
  text: this.accumulatedText,
8633
8965
  language: this.config.language,
@@ -8656,7 +8988,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8656
8988
  if (this.recognition && this.isListening) {
8657
8989
  this.recognition.abort();
8658
8990
  this.isListening = false;
8659
- logger16.info("Speech recognition aborted");
8991
+ logger17.info("Speech recognition aborted");
8660
8992
  }
8661
8993
  }
8662
8994
  /**
@@ -8687,7 +9019,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8687
9019
  this.isListening = false;
8688
9020
  this.resultCallbacks = [];
8689
9021
  this.errorCallbacks = [];
8690
- logger16.debug("SafariSpeechRecognition disposed");
9022
+ logger17.debug("SafariSpeechRecognition disposed");
8691
9023
  }
8692
9024
  /**
8693
9025
  * Set up event handlers for the recognition instance
@@ -8715,7 +9047,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8715
9047
  confidence: alternative.confidence
8716
9048
  };
8717
9049
  this.emitResult(speechResult);
8718
- logger16.trace("Speech result", {
9050
+ logger17.trace("Speech result", {
8719
9051
  text: text.substring(0, 50),
8720
9052
  isFinal,
8721
9053
  confidence: alternative.confidence
@@ -8725,12 +9057,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8725
9057
  span?.end();
8726
9058
  } catch (error) {
8727
9059
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
8728
- logger16.error("Error processing speech result", { error });
9060
+ logger17.error("Error processing speech result", { error });
8729
9061
  }
8730
9062
  };
8731
9063
  this.recognition.onerror = (event) => {
8732
9064
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
8733
- logger16.error("Speech recognition error", { error: event.error, message: event.message });
9065
+ logger17.error("Speech recognition error", { error: event.error, message: event.message });
8734
9066
  this.emitError(error);
8735
9067
  if (this.stopRejecter) {
8736
9068
  this.stopRejecter(error);
@@ -8740,7 +9072,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8740
9072
  };
8741
9073
  this.recognition.onend = () => {
8742
9074
  this.isListening = false;
8743
- logger16.info("Speech recognition ended", {
9075
+ logger17.info("Speech recognition ended", {
8744
9076
  totalText: this.accumulatedText.length,
8745
9077
  durationMs: performance.now() - this.startTime
8746
9078
  });
@@ -8757,13 +9089,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8757
9089
  }
8758
9090
  };
8759
9091
  this.recognition.onstart = () => {
8760
- logger16.debug("Speech recognition started by browser");
9092
+ logger17.debug("Speech recognition started by browser");
8761
9093
  };
8762
9094
  this.recognition.onspeechstart = () => {
8763
- logger16.debug("Speech detected");
9095
+ logger17.debug("Speech detected");
8764
9096
  };
8765
9097
  this.recognition.onspeechend = () => {
8766
- logger16.debug("Speech ended");
9098
+ logger17.debug("Speech ended");
8767
9099
  };
8768
9100
  }
8769
9101
  /**
@@ -8774,7 +9106,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8774
9106
  try {
8775
9107
  callback(result);
8776
9108
  } catch (error) {
8777
- logger16.error("Error in result callback", { error });
9109
+ logger17.error("Error in result callback", { error });
8778
9110
  }
8779
9111
  }
8780
9112
  }
@@ -8786,7 +9118,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8786
9118
  try {
8787
9119
  callback(error);
8788
9120
  } catch (callbackError) {
8789
- logger16.error("Error in error callback", { error: callbackError });
9121
+ logger17.error("Error in error callback", { error: callbackError });
8790
9122
  }
8791
9123
  }
8792
9124
  }
@@ -9356,327 +9688,9 @@ var AnimationGraph = class extends EventEmitter {
9356
9688
  }
9357
9689
  };
9358
9690
 
9359
- // src/animation/simplex2d.ts
9360
- var perm = new Uint8Array(512);
9361
- var grad2 = [
9362
- [1, 1],
9363
- [-1, 1],
9364
- [1, -1],
9365
- [-1, -1],
9366
- [1, 0],
9367
- [-1, 0],
9368
- [0, 1],
9369
- [0, -1]
9370
- ];
9371
- var p = [
9372
- 151,
9373
- 160,
9374
- 137,
9375
- 91,
9376
- 90,
9377
- 15,
9378
- 131,
9379
- 13,
9380
- 201,
9381
- 95,
9382
- 96,
9383
- 53,
9384
- 194,
9385
- 233,
9386
- 7,
9387
- 225,
9388
- 140,
9389
- 36,
9390
- 103,
9391
- 30,
9392
- 69,
9393
- 142,
9394
- 8,
9395
- 99,
9396
- 37,
9397
- 240,
9398
- 21,
9399
- 10,
9400
- 23,
9401
- 190,
9402
- 6,
9403
- 148,
9404
- 247,
9405
- 120,
9406
- 234,
9407
- 75,
9408
- 0,
9409
- 26,
9410
- 197,
9411
- 62,
9412
- 94,
9413
- 252,
9414
- 219,
9415
- 203,
9416
- 117,
9417
- 35,
9418
- 11,
9419
- 32,
9420
- 57,
9421
- 177,
9422
- 33,
9423
- 88,
9424
- 237,
9425
- 149,
9426
- 56,
9427
- 87,
9428
- 174,
9429
- 20,
9430
- 125,
9431
- 136,
9432
- 171,
9433
- 168,
9434
- 68,
9435
- 175,
9436
- 74,
9437
- 165,
9438
- 71,
9439
- 134,
9440
- 139,
9441
- 48,
9442
- 27,
9443
- 166,
9444
- 77,
9445
- 146,
9446
- 158,
9447
- 231,
9448
- 83,
9449
- 111,
9450
- 229,
9451
- 122,
9452
- 60,
9453
- 211,
9454
- 133,
9455
- 230,
9456
- 220,
9457
- 105,
9458
- 92,
9459
- 41,
9460
- 55,
9461
- 46,
9462
- 245,
9463
- 40,
9464
- 244,
9465
- 102,
9466
- 143,
9467
- 54,
9468
- 65,
9469
- 25,
9470
- 63,
9471
- 161,
9472
- 1,
9473
- 216,
9474
- 80,
9475
- 73,
9476
- 209,
9477
- 76,
9478
- 132,
9479
- 187,
9480
- 208,
9481
- 89,
9482
- 18,
9483
- 169,
9484
- 200,
9485
- 196,
9486
- 135,
9487
- 130,
9488
- 116,
9489
- 188,
9490
- 159,
9491
- 86,
9492
- 164,
9493
- 100,
9494
- 109,
9495
- 198,
9496
- 173,
9497
- 186,
9498
- 3,
9499
- 64,
9500
- 52,
9501
- 217,
9502
- 226,
9503
- 250,
9504
- 124,
9505
- 123,
9506
- 5,
9507
- 202,
9508
- 38,
9509
- 147,
9510
- 118,
9511
- 126,
9512
- 255,
9513
- 82,
9514
- 85,
9515
- 212,
9516
- 207,
9517
- 206,
9518
- 59,
9519
- 227,
9520
- 47,
9521
- 16,
9522
- 58,
9523
- 17,
9524
- 182,
9525
- 189,
9526
- 28,
9527
- 42,
9528
- 223,
9529
- 183,
9530
- 170,
9531
- 213,
9532
- 119,
9533
- 248,
9534
- 152,
9535
- 2,
9536
- 44,
9537
- 154,
9538
- 163,
9539
- 70,
9540
- 221,
9541
- 153,
9542
- 101,
9543
- 155,
9544
- 167,
9545
- 43,
9546
- 172,
9547
- 9,
9548
- 129,
9549
- 22,
9550
- 39,
9551
- 253,
9552
- 19,
9553
- 98,
9554
- 108,
9555
- 110,
9556
- 79,
9557
- 113,
9558
- 224,
9559
- 232,
9560
- 178,
9561
- 185,
9562
- 112,
9563
- 104,
9564
- 218,
9565
- 246,
9566
- 97,
9567
- 228,
9568
- 251,
9569
- 34,
9570
- 242,
9571
- 193,
9572
- 238,
9573
- 210,
9574
- 144,
9575
- 12,
9576
- 191,
9577
- 179,
9578
- 162,
9579
- 241,
9580
- 81,
9581
- 51,
9582
- 145,
9583
- 235,
9584
- 249,
9585
- 14,
9586
- 239,
9587
- 107,
9588
- 49,
9589
- 192,
9590
- 214,
9591
- 31,
9592
- 181,
9593
- 199,
9594
- 106,
9595
- 157,
9596
- 184,
9597
- 84,
9598
- 204,
9599
- 176,
9600
- 115,
9601
- 121,
9602
- 50,
9603
- 45,
9604
- 127,
9605
- 4,
9606
- 150,
9607
- 254,
9608
- 138,
9609
- 236,
9610
- 205,
9611
- 93,
9612
- 222,
9613
- 114,
9614
- 67,
9615
- 29,
9616
- 24,
9617
- 72,
9618
- 243,
9619
- 141,
9620
- 128,
9621
- 195,
9622
- 78,
9623
- 66,
9624
- 215,
9625
- 61,
9626
- 156,
9627
- 180
9628
- ];
9629
- for (let i = 0; i < 256; i++) {
9630
- perm[i] = p[i];
9631
- perm[i + 256] = p[i];
9632
- }
9633
- var F2 = 0.5 * (Math.sqrt(3) - 1);
9634
- var G2 = (3 - Math.sqrt(3)) / 6;
9635
- function dot2(g, x, y) {
9636
- return g[0] * x + g[1] * y;
9637
- }
9638
- function simplex2d(x, y) {
9639
- const s = (x + y) * F2;
9640
- const i = Math.floor(x + s);
9641
- const j = Math.floor(y + s);
9642
- const t = (i + j) * G2;
9643
- const X0 = i - t;
9644
- const Y0 = j - t;
9645
- const x0 = x - X0;
9646
- const y0 = y - Y0;
9647
- const i1 = x0 > y0 ? 1 : 0;
9648
- const j1 = x0 > y0 ? 0 : 1;
9649
- const x1 = x0 - i1 + G2;
9650
- const y1 = y0 - j1 + G2;
9651
- const x2 = x0 - 1 + 2 * G2;
9652
- const y2 = y0 - 1 + 2 * G2;
9653
- const ii = i & 255;
9654
- const jj = j & 255;
9655
- const gi0 = perm[ii + perm[jj]] % 8;
9656
- const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
9657
- const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
9658
- let n0 = 0;
9659
- let t0 = 0.5 - x0 * x0 - y0 * y0;
9660
- if (t0 >= 0) {
9661
- t0 *= t0;
9662
- n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
9663
- }
9664
- let n1 = 0;
9665
- let t1 = 0.5 - x1 * x1 - y1 * y1;
9666
- if (t1 >= 0) {
9667
- t1 *= t1;
9668
- n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
9669
- }
9670
- let n2 = 0;
9671
- let t2 = 0.5 - x2 * x2 - y2 * y2;
9672
- if (t2 >= 0) {
9673
- t2 *= t2;
9674
- n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
9675
- }
9676
- return 70 * (n0 + n1 + n2);
9677
- }
9678
-
9679
9691
  // src/animation/ProceduralLifeLayer.ts
9692
+ import { createNoise2D } from "simplex-noise";
9693
+ var simplex2d = createNoise2D();
9680
9694
  var PHASE_OPEN = 0;
9681
9695
  var PHASE_CLOSING = 1;
9682
9696
  var PHASE_CLOSED = 2;
@@ -9984,6 +9998,684 @@ var ProceduralLifeLayer = class {
9984
9998
  }
9985
9999
  };
9986
10000
 
10001
+ // src/orchestration/MicLipSync.ts
10002
+ var logger18 = createLogger("MicLipSync");
10003
+ var MicLipSync = class extends EventEmitter {
10004
+ constructor(config) {
10005
+ super();
10006
+ this.omoteEvents = new EventEmitter();
10007
+ this._state = "idle";
10008
+ this._isSpeaking = false;
10009
+ this._currentFrame = null;
10010
+ this._currentRawFrame = null;
10011
+ // VAD state
10012
+ this.speechStartTime = 0;
10013
+ this.vadChunkSize = 0;
10014
+ this.vadBuffer = null;
10015
+ this.vadBufferOffset = 0;
10016
+ this.profile = config.profile ?? {};
10017
+ this.vad = config.vad;
10018
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10019
+ sampleRate: config.sampleRate ?? 16e3,
10020
+ chunkSize: config.micChunkSize ?? 512
10021
+ });
10022
+ this.processor = new A2EProcessor({
10023
+ backend: config.lam,
10024
+ sampleRate: config.sampleRate ?? 16e3,
10025
+ identityIndex: config.identityIndex,
10026
+ onFrame: (raw) => {
10027
+ const scaled = applyProfile(raw, this.profile);
10028
+ this._currentFrame = scaled;
10029
+ this._currentRawFrame = raw;
10030
+ this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
10031
+ },
10032
+ onError: (error) => {
10033
+ logger18.error("A2E inference error", { message: error.message });
10034
+ this.emit("error", error);
10035
+ }
10036
+ });
10037
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10038
+ const float32 = int16ToFloat32(pcm);
10039
+ this.processor.pushAudio(float32);
10040
+ if (this.vad) {
10041
+ this.processVAD(float32);
10042
+ }
10043
+ });
10044
+ this.omoteEvents.on("audio.level", (level) => {
10045
+ this.emit("audio:level", level);
10046
+ });
10047
+ if (this.vad) {
10048
+ this.vadChunkSize = this.vad.getChunkSize();
10049
+ this.vadBuffer = new Float32Array(this.vadChunkSize);
10050
+ this.vadBufferOffset = 0;
10051
+ }
10052
+ }
10053
+ /** Current state */
10054
+ get state() {
10055
+ return this._state;
10056
+ }
10057
+ /** Latest blendshape frame (null before first inference) */
10058
+ get currentFrame() {
10059
+ return this._currentFrame;
10060
+ }
10061
+ /** Whether speech is currently detected (requires VAD) */
10062
+ get isSpeaking() {
10063
+ return this._isSpeaking;
10064
+ }
10065
+ /** Current backend type */
10066
+ get backend() {
10067
+ return this.processor ? "active" : null;
10068
+ }
10069
+ // ---------------------------------------------------------------------------
10070
+ // Public API
10071
+ // ---------------------------------------------------------------------------
10072
+ /** Start microphone capture and inference loop */
10073
+ async start() {
10074
+ if (this._state === "active") return;
10075
+ await this.mic.start();
10076
+ this.processor.startDrip();
10077
+ this.emit("mic:start", void 0);
10078
+ this.setState("active");
10079
+ }
10080
+ /** Stop microphone and inference */
10081
+ stop() {
10082
+ if (this._state === "idle") return;
10083
+ this.processor.stopDrip();
10084
+ this.mic.stop();
10085
+ this._isSpeaking = false;
10086
+ this.emit("mic:stop", void 0);
10087
+ this.setState("idle");
10088
+ }
10089
+ /** Pause inference (mic stays open for faster resume) */
10090
+ pause() {
10091
+ if (this._state !== "active") return;
10092
+ this.processor.stopDrip();
10093
+ this.setState("paused");
10094
+ }
10095
+ /** Resume inference after pause */
10096
+ resume() {
10097
+ if (this._state !== "paused") return;
10098
+ this.processor.startDrip();
10099
+ this.setState("active");
10100
+ }
10101
+ /** Update ExpressionProfile at runtime */
10102
+ setProfile(profile) {
10103
+ this.profile = profile;
10104
+ }
10105
+ /** Dispose of all resources */
10106
+ async dispose() {
10107
+ this.stop();
10108
+ this.processor.dispose();
10109
+ }
10110
+ // ---------------------------------------------------------------------------
10111
+ // Internal: VAD processing
10112
+ // ---------------------------------------------------------------------------
10113
+ async processVAD(samples) {
10114
+ if (!this.vad || !this.vadBuffer) return;
10115
+ for (let i = 0; i < samples.length; i++) {
10116
+ this.vadBuffer[this.vadBufferOffset++] = samples[i];
10117
+ if (this.vadBufferOffset >= this.vadChunkSize) {
10118
+ try {
10119
+ const result = await this.vad.process(this.vadBuffer);
10120
+ const wasSpeaking = this._isSpeaking;
10121
+ this._isSpeaking = result.isSpeech;
10122
+ if (!wasSpeaking && result.isSpeech) {
10123
+ this.speechStartTime = performance.now();
10124
+ this.emit("speech:start", void 0);
10125
+ } else if (wasSpeaking && !result.isSpeech) {
10126
+ const durationMs = performance.now() - this.speechStartTime;
10127
+ this.emit("speech:end", { durationMs });
10128
+ }
10129
+ } catch (err) {
10130
+ logger18.warn("VAD process error", { error: String(err) });
10131
+ }
10132
+ this.vadBufferOffset = 0;
10133
+ }
10134
+ }
10135
+ }
10136
+ // ---------------------------------------------------------------------------
10137
+ // Internal: State management
10138
+ // ---------------------------------------------------------------------------
10139
+ setState(state) {
10140
+ if (this._state === state) return;
10141
+ this._state = state;
10142
+ this.emit("state", state);
10143
+ }
10144
+ };
10145
+
10146
+ // src/orchestration/VoicePipeline.ts
10147
+ var logger19 = createLogger("VoicePipeline");
10148
+ var VoicePipeline = class extends EventEmitter {
10149
+ constructor(config) {
10150
+ super();
10151
+ // State
10152
+ this._state = "idle";
10153
+ this.stopped = false;
10154
+ this.epoch = 0;
10155
+ this._sessionId = null;
10156
+ // Models
10157
+ this.asr = null;
10158
+ this.lam = null;
10159
+ this.vad = null;
10160
+ this.unifiedWorker = null;
10161
+ // Pipelines
10162
+ this.playback = null;
10163
+ this.interruption = null;
10164
+ this.omoteEvents = new EventEmitter();
10165
+ this.mic = null;
10166
+ // Audio accumulation
10167
+ this.audioBuffer = [];
10168
+ this.audioBufferSamples = 0;
10169
+ this.speechStartTime = 0;
10170
+ this.silenceTimer = null;
10171
+ this.isSpeaking = false;
10172
+ // Progressive transcription
10173
+ this.progressiveTimer = null;
10174
+ this.progressivePromise = null;
10175
+ this.lastProgressiveResult = null;
10176
+ this.lastProgressiveSamples = 0;
10177
+ // ASR error recovery
10178
+ this.asrErrorCount = 0;
10179
+ // Response abort
10180
+ this.responseAbortController = null;
10181
+ // Frame refs
10182
+ this._currentFrame = null;
10183
+ this.config = config;
10184
+ }
10185
+ /** Current pipeline state */
10186
+ get state() {
10187
+ return this._state;
10188
+ }
10189
+ /** Latest blendshape frame */
10190
+ get currentFrame() {
10191
+ return this._currentFrame;
10192
+ }
10193
+ /** Whether user is currently speaking */
10194
+ get isSpeechActive() {
10195
+ return this.isSpeaking;
10196
+ }
10197
+ /** Session ID (generated on start(), null before) */
10198
+ get sessionId() {
10199
+ return this._sessionId;
10200
+ }
10201
+ // ---------------------------------------------------------------------------
10202
+ // Model loading
10203
+ // ---------------------------------------------------------------------------
10204
+ async loadModels() {
10205
+ this.setState("loading");
10206
+ const timeoutMs = this.config.lamLoadTimeoutMs ?? 3e4;
10207
+ try {
10208
+ if (isIOS()) {
10209
+ this.unifiedWorker = new UnifiedInferenceWorker();
10210
+ await this.unifiedWorker.init();
10211
+ }
10212
+ this.emitProgress("Speech recognition", 0, 3, 0);
10213
+ this.asr = createSenseVoice({
10214
+ modelUrl: this.config.models.senseVoice.modelUrl,
10215
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
10216
+ language: this.config.models.senseVoice.language,
10217
+ unifiedWorker: this.unifiedWorker ?? void 0
10218
+ });
10219
+ await this.asr.load();
10220
+ this.emitProgress("Speech recognition", 45, 3, 1);
10221
+ this.emitProgress("Lip sync", 45, 3, 1);
10222
+ let lam = createA2E({
10223
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10224
+ gpuExternalDataUrl: this.config.models.lam.gpuExternalDataUrl,
10225
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10226
+ mode: this.config.models.lam.mode,
10227
+ unifiedWorker: this.unifiedWorker ?? void 0
10228
+ });
10229
+ let lamProgress = 45;
10230
+ const lamTickInterval = setInterval(() => {
10231
+ const remaining = 85 - lamProgress;
10232
+ lamProgress += Math.max(0.5, remaining * 0.08);
10233
+ this.emitProgress("Lip sync", Math.round(lamProgress), 3, 1);
10234
+ }, 300);
10235
+ try {
10236
+ const lamLoadResult = await Promise.race([
10237
+ lam.load().then(() => "ok"),
10238
+ new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
10239
+ ]);
10240
+ if (lamLoadResult === "timeout") {
10241
+ logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
10242
+ await lam.dispose();
10243
+ lam = createA2E({
10244
+ gpuModelUrl: this.config.models.lam.gpuModelUrl,
10245
+ cpuModelUrl: this.config.models.lam.cpuModelUrl,
10246
+ mode: "cpu",
10247
+ unifiedWorker: this.unifiedWorker ?? void 0
10248
+ });
10249
+ await lam.load();
10250
+ }
10251
+ } finally {
10252
+ clearInterval(lamTickInterval);
10253
+ }
10254
+ this.lam = lam;
10255
+ this.emitProgress("Lip sync", 85, 3, 2);
10256
+ this.emitProgress("Voice detection", 85, 3, 2);
10257
+ this.vad = createSileroVAD({
10258
+ modelUrl: this.config.models.vad.modelUrl,
10259
+ threshold: this.config.models.vad.threshold,
10260
+ unifiedWorker: this.unifiedWorker ?? void 0
10261
+ });
10262
+ await this.vad.load();
10263
+ this.emitProgress("Voice detection", 100, 3, 3);
10264
+ this.playback = new PlaybackPipeline({
10265
+ lam: this.lam,
10266
+ profile: this.config.profile,
10267
+ identityIndex: this.config.identityIndex,
10268
+ neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
10269
+ neutralTransitionMs: this.config.neutralTransitionMs,
10270
+ audioDelayMs: this.config.audioDelayMs,
10271
+ chunkTargetMs: this.config.chunkTargetMs
10272
+ });
10273
+ await this.playback.initialize();
10274
+ this.playback.on("frame", (f) => {
10275
+ this._currentFrame = f.blendshapes;
10276
+ this.emit("frame", f);
10277
+ });
10278
+ this.playback.on("frame:raw", (f) => this.emit("frame:raw", f));
10279
+ this.playback.on("playback:start", (t) => this.emit("playback:start", t));
10280
+ this.playback.on("playback:complete", () => {
10281
+ if (this.stopped) return;
10282
+ this.emit("playback:complete", void 0);
10283
+ this.vad?.reset();
10284
+ this.epoch++;
10285
+ this.setState("listening");
10286
+ });
10287
+ this.playback.on("error", (e) => this.emit("error", e));
10288
+ this.interruption = new InterruptionHandler({
10289
+ enabled: this.config.interruptionEnabled ?? true,
10290
+ minSpeechDurationMs: this.config.interruptionMinSpeechMs ?? 200
10291
+ });
10292
+ this.interruption.on("interruption.triggered", () => {
10293
+ this.handleInterruption();
10294
+ });
10295
+ this.setState("ready");
10296
+ } catch (error) {
10297
+ const err = error instanceof Error ? error : new Error(String(error));
10298
+ logger19.error("Model loading failed", { message: err.message });
10299
+ this.emit("error", err);
10300
+ this.setState("error");
10301
+ throw err;
10302
+ }
10303
+ }
10304
+ // ---------------------------------------------------------------------------
10305
+ // Conversation lifecycle
10306
+ // ---------------------------------------------------------------------------
10307
+ async start() {
10308
+ if (this._state !== "ready") {
10309
+ throw new Error(`Cannot start: state is '${this._state}', expected 'ready'`);
10310
+ }
10311
+ this.stopped = false;
10312
+ this.epoch++;
10313
+ this._sessionId = crypto.randomUUID();
10314
+ this.asrErrorCount = 0;
10315
+ this.mic = new MicrophoneCapture(this.omoteEvents, {
10316
+ sampleRate: 16e3,
10317
+ chunkSize: 512
10318
+ });
10319
+ this.omoteEvents.on("audio.chunk", ({ pcm }) => {
10320
+ const float32 = int16ToFloat32(pcm);
10321
+ this.processAudioChunk(float32);
10322
+ });
10323
+ this.omoteEvents.on("audio.level", (level) => {
10324
+ this.emit("audio:level", level);
10325
+ });
10326
+ await this.mic.start();
10327
+ this.setState("listening");
10328
+ }
10329
+ stop() {
10330
+ this.stopped = true;
10331
+ this.epoch++;
10332
+ this.clearSilenceTimer();
10333
+ this.stopProgressiveTranscription();
10334
+ this.responseAbortController?.abort();
10335
+ this.responseAbortController = null;
10336
+ this.vad?.reset();
10337
+ this.playback?.stop();
10338
+ this.mic?.stop();
10339
+ this.mic = null;
10340
+ this.isSpeaking = false;
10341
+ this.audioBuffer = [];
10342
+ this.audioBufferSamples = 0;
10343
+ this._currentFrame = null;
10344
+ this.interruption?.setAISpeaking(false);
10345
+ if (this._state !== "idle") {
10346
+ this.setState("ready");
10347
+ }
10348
+ }
10349
+ setProfile(profile) {
10350
+ this.config.profile = profile;
10351
+ this.playback?.setProfile(profile);
10352
+ }
10353
+ async dispose() {
10354
+ this.stop();
10355
+ this.epoch++;
10356
+ await this.playback?.dispose();
10357
+ await this.asr?.dispose();
10358
+ await this.lam?.dispose();
10359
+ await this.vad?.dispose();
10360
+ this.playback = null;
10361
+ this.asr = null;
10362
+ this.lam = null;
10363
+ this.vad = null;
10364
+ this._state = "idle";
10365
+ }
10366
+ // ---------------------------------------------------------------------------
10367
+ // Audio processing
10368
+ // ---------------------------------------------------------------------------
10369
+ async processAudioChunk(samples) {
10370
+ if (!this.vad) return;
10371
+ try {
10372
+ const result = await this.vad.process(samples);
10373
+ if (this._state === "speaking" && this.interruption) {
10374
+ this.interruption.processVADResult(result.probability);
10375
+ return;
10376
+ }
10377
+ if (this._state !== "listening" && this._state !== "thinking") return;
10378
+ const wasSpeaking = this.isSpeaking;
10379
+ if (result.isSpeech) {
10380
+ if (!wasSpeaking) {
10381
+ this.isSpeaking = true;
10382
+ this.speechStartTime = performance.now();
10383
+ this.audioBuffer = [];
10384
+ this.audioBufferSamples = 0;
10385
+ this.lastProgressiveResult = null;
10386
+ this.lastProgressiveSamples = 0;
10387
+ this.emit("speech:start", void 0);
10388
+ this.startProgressiveTranscription();
10389
+ }
10390
+ this.audioBuffer.push(new Float32Array(samples));
10391
+ this.audioBufferSamples += samples.length;
10392
+ this.clearSilenceTimer();
10393
+ } else if (wasSpeaking) {
10394
+ this.audioBuffer.push(new Float32Array(samples));
10395
+ this.audioBufferSamples += samples.length;
10396
+ if (!this.silenceTimer) {
10397
+ const timeoutMs = this.getSilenceTimeout();
10398
+ this.silenceTimer = setTimeout(() => {
10399
+ this.onSilenceDetected();
10400
+ }, timeoutMs);
10401
+ }
10402
+ }
10403
+ } catch (err) {
10404
+ logger19.warn("VAD error", { error: String(err) });
10405
+ }
10406
+ }
10407
+ // ---------------------------------------------------------------------------
10408
+ // Silence detection
10409
+ // ---------------------------------------------------------------------------
10410
+ getSilenceTimeout() {
10411
+ const base = this.config.silenceTimeoutMs ?? 500;
10412
+ const extended = this.config.silenceTimeoutExtendedMs ?? 700;
10413
+ const adaptive = this.config.adaptiveTimeout ?? true;
10414
+ if (!adaptive) return base;
10415
+ const speechDurationMs = performance.now() - this.speechStartTime;
10416
+ return speechDurationMs > 3e3 ? extended : base;
10417
+ }
10418
+ onSilenceDetected() {
10419
+ const capturedEpoch = this.epoch;
10420
+ this.isSpeaking = false;
10421
+ const durationMs = performance.now() - this.speechStartTime;
10422
+ this.emit("speech:end", { durationMs });
10423
+ this.clearSilenceTimer();
10424
+ this.processEndOfSpeech(capturedEpoch).catch((err) => {
10425
+ logger19.error("End of speech processing failed", { error: String(err) });
10426
+ if (this.epoch === capturedEpoch && !this.stopped) {
10427
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
10428
+ this.setState("listening");
10429
+ }
10430
+ });
10431
+ }
10432
+ // ---------------------------------------------------------------------------
10433
+ // End of speech → transcription → response
10434
+ // ---------------------------------------------------------------------------
10435
+ async processEndOfSpeech(capturedEpoch) {
10436
+ if (this.progressivePromise) {
10437
+ try {
10438
+ await this.progressivePromise;
10439
+ } catch {
10440
+ }
10441
+ }
10442
+ this.stopProgressiveTranscription();
10443
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10444
+ const totalSamples = this.audioBufferSamples;
10445
+ const fullAudio = new Float32Array(totalSamples);
10446
+ let offset = 0;
10447
+ for (const chunk of this.audioBuffer) {
10448
+ fullAudio.set(chunk, offset);
10449
+ offset += chunk.length;
10450
+ }
10451
+ this.audioBuffer = [];
10452
+ this.audioBufferSamples = 0;
10453
+ const minDuration = this.config.minAudioDurationSec ?? 0.3;
10454
+ const minEnergy = this.config.minAudioEnergy ?? 0.02;
10455
+ const durationSec = totalSamples / 16e3;
10456
+ if (durationSec < minDuration) {
10457
+ logger19.info("Audio too short, discarding", { durationSec });
10458
+ this.setState("listening");
10459
+ return;
10460
+ }
10461
+ let maxAbs = 0;
10462
+ for (let i = 0; i < fullAudio.length; i++) {
10463
+ const abs = Math.abs(fullAudio[i]);
10464
+ if (abs > maxAbs) maxAbs = abs;
10465
+ }
10466
+ let rms = 0;
10467
+ for (let i = 0; i < fullAudio.length; i++) {
10468
+ rms += fullAudio[i] * fullAudio[i];
10469
+ }
10470
+ rms = Math.sqrt(rms / fullAudio.length);
10471
+ if (rms < minEnergy) {
10472
+ logger19.info("Audio too quiet, discarding", { rms });
10473
+ this.setState("listening");
10474
+ return;
10475
+ }
10476
+ const normalizedAudio = this.normalizeAudio(fullAudio);
10477
+ this.setState("thinking");
10478
+ let transcript = null;
10479
+ const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
10480
+ if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
10481
+ transcript = { ...this.lastProgressiveResult, isFinal: true };
10482
+ logger19.info("Using progressive result", {
10483
+ coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
10484
+ text: transcript.text
10485
+ });
10486
+ } else {
10487
+ this.lastProgressiveResult = null;
10488
+ transcript = await this.transcribeWithTimeout(normalizedAudio);
10489
+ if (transcript) {
10490
+ transcript.isFinal = true;
10491
+ }
10492
+ }
10493
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10494
+ if (!transcript || !transcript.text.trim()) {
10495
+ logger19.info("No transcript, resuming listening");
10496
+ this.setState("listening");
10497
+ return;
10498
+ }
10499
+ this.emit("transcript", transcript);
10500
+ await this.callResponseHandler(transcript, capturedEpoch);
10501
+ }
10502
+ // ---------------------------------------------------------------------------
10503
+ // Response handler
10504
+ // ---------------------------------------------------------------------------
10505
+ async callResponseHandler(transcript, capturedEpoch) {
10506
+ if (this.epoch !== capturedEpoch || this.stopped) return;
10507
+ this.setState("speaking");
10508
+ this.interruption?.setAISpeaking(true);
10509
+ const abortController = new AbortController();
10510
+ this.responseAbortController = abortController;
10511
+ try {
10512
+ this.playback.start();
10513
+ await this.config.onResponse({
10514
+ text: transcript.text,
10515
+ emotion: transcript.emotion,
10516
+ event: transcript.event,
10517
+ send: async (chunk) => {
10518
+ if (abortController.signal.aborted) return;
10519
+ await this.playback.onAudioChunk(chunk);
10520
+ },
10521
+ done: async () => {
10522
+ if (abortController.signal.aborted) return;
10523
+ await this.playback.end();
10524
+ },
10525
+ signal: abortController.signal,
10526
+ sessionId: this._sessionId
10527
+ });
10528
+ } catch (error) {
10529
+ if (abortController.signal.aborted) return;
10530
+ const err = error instanceof Error ? error : new Error(String(error));
10531
+ logger19.error("Response handler error", { message: err.message });
10532
+ this.emit("error", err);
10533
+ if (this.epoch === capturedEpoch && !this.stopped) {
10534
+ this.interruption?.setAISpeaking(false);
10535
+ this.setState("listening");
10536
+ }
10537
+ } finally {
10538
+ this.responseAbortController = null;
10539
+ }
10540
+ }
10541
+ // ---------------------------------------------------------------------------
10542
+ // Interruption handling
10543
+ // ---------------------------------------------------------------------------
10544
+ handleInterruption() {
10545
+ if (this._state !== "speaking") return;
10546
+ logger19.info("Interruption triggered");
10547
+ this.epoch++;
10548
+ this.responseAbortController?.abort();
10549
+ this.playback?.stop();
10550
+ this.interruption?.setAISpeaking(false);
10551
+ this.emit("interruption", void 0);
10552
+ if (!this.stopped) {
10553
+ this.setState("listening");
10554
+ }
10555
+ }
10556
+ // ---------------------------------------------------------------------------
10557
+ // Progressive transcription
10558
+ // ---------------------------------------------------------------------------
10559
+ startProgressiveTranscription() {
10560
+ this.stopProgressiveTranscription();
10561
+ const intervalMs = isIOS() ? this.config.progressiveIntervalIosMs ?? 800 : this.config.progressiveIntervalMs ?? 500;
10562
+ const minSamples = this.config.progressiveMinSamples ?? 8e3;
10563
+ this.progressiveTimer = setInterval(() => {
10564
+ if (this.audioBufferSamples < minSamples) return;
10565
+ if (!this.asr) return;
10566
+ const capturedEpoch = this.epoch;
10567
+ const snapshot = new Float32Array(this.audioBufferSamples);
10568
+ let offset = 0;
10569
+ for (const chunk of this.audioBuffer) {
10570
+ snapshot.set(chunk, offset);
10571
+ offset += chunk.length;
10572
+ }
10573
+ const snapshotSamples = this.audioBufferSamples;
10574
+ this.progressivePromise = (async () => {
10575
+ try {
10576
+ const result = await this.transcribeWithTimeout(snapshot);
10577
+ if (this.epoch !== capturedEpoch) return;
10578
+ if (result && result.text.trim()) {
10579
+ this.lastProgressiveResult = result;
10580
+ this.lastProgressiveSamples = snapshotSamples;
10581
+ this.emit("transcript", { ...result, isFinal: false });
10582
+ }
10583
+ } catch {
10584
+ }
10585
+ })();
10586
+ }, intervalMs);
10587
+ }
10588
+ stopProgressiveTranscription() {
10589
+ if (this.progressiveTimer) {
10590
+ clearInterval(this.progressiveTimer);
10591
+ this.progressiveTimer = null;
10592
+ }
10593
+ }
10594
+ // ---------------------------------------------------------------------------
10595
+ // Transcription with timeout + ASR error recovery
10596
+ // ---------------------------------------------------------------------------
10597
+ async transcribeWithTimeout(audio) {
10598
+ if (!this.asr) return null;
10599
+ const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
10600
+ const startTime = performance.now();
10601
+ try {
10602
+ const result = await Promise.race([
10603
+ this.asr.transcribe(audio),
10604
+ new Promise(
10605
+ (_, reject) => setTimeout(() => reject(new Error(`Transcription timed out after ${timeoutMs}ms`)), timeoutMs)
10606
+ )
10607
+ ]);
10608
+ this.asrErrorCount = 0;
10609
+ return {
10610
+ text: result.text,
10611
+ emotion: result.emotion,
10612
+ language: result.language,
10613
+ isFinal: false,
10614
+ inferenceTimeMs: performance.now() - startTime
10615
+ };
10616
+ } catch (error) {
10617
+ this.asrErrorCount++;
10618
+ logger19.warn("Transcription failed", {
10619
+ attempt: this.asrErrorCount,
10620
+ error: String(error)
10621
+ });
10622
+ if (this.asrErrorCount >= 3) {
10623
+ logger19.warn("3 consecutive ASR errors, recreating session");
10624
+ try {
10625
+ await this.asr.dispose();
10626
+ this.asr = createSenseVoice({
10627
+ modelUrl: this.config.models.senseVoice.modelUrl,
10628
+ tokensUrl: this.config.models.senseVoice.tokensUrl,
10629
+ language: this.config.models.senseVoice.language,
10630
+ unifiedWorker: this.unifiedWorker ?? void 0
10631
+ });
10632
+ await this.asr.load();
10633
+ this.asrErrorCount = 0;
10634
+ } catch (recreateErr) {
10635
+ logger19.error("ASR session recreation failed", { error: String(recreateErr) });
10636
+ }
10637
+ }
10638
+ return null;
10639
+ }
10640
+ }
10641
+ // ---------------------------------------------------------------------------
10642
+ // Audio normalization
10643
+ // ---------------------------------------------------------------------------
10644
+ normalizeAudio(audio) {
10645
+ if (!(this.config.normalizeAudio ?? true)) return audio;
10646
+ let maxAbs = 0;
10647
+ for (let i = 0; i < audio.length; i++) {
10648
+ const abs = Math.abs(audio[i]);
10649
+ if (abs > maxAbs) maxAbs = abs;
10650
+ }
10651
+ if (maxAbs >= 0.1 || maxAbs === 0) return audio;
10652
+ const gain = 0.5 / maxAbs;
10653
+ const normalized = new Float32Array(audio.length);
10654
+ for (let i = 0; i < audio.length; i++) {
10655
+ normalized[i] = audio[i] * gain;
10656
+ }
10657
+ return normalized;
10658
+ }
10659
+ // ---------------------------------------------------------------------------
10660
+ // Helpers
10661
+ // ---------------------------------------------------------------------------
10662
+ setState(state) {
10663
+ if (this._state === state) return;
10664
+ logger19.info("State transition", { from: this._state, to: state });
10665
+ this._state = state;
10666
+ this.emit("state", state);
10667
+ }
10668
+ emitProgress(currentModel, progress, totalModels, modelsLoaded) {
10669
+ this.emit("loading:progress", { currentModel, progress, totalModels, modelsLoaded });
10670
+ }
10671
+ clearSilenceTimer() {
10672
+ if (this.silenceTimer) {
10673
+ clearTimeout(this.silenceTimer);
10674
+ this.silenceTimer = null;
10675
+ }
10676
+ }
10677
+ };
10678
+
9987
10679
  // ../types/dist/index.mjs
9988
10680
  var PROTOCOL_VERSION = 1;
9989
10681
  function isProtocolEvent(obj) {
@@ -10016,11 +10708,13 @@ export {
10016
10708
  LOG_LEVEL_PRIORITY,
10017
10709
  MODEL_LOAD_TIME_BUCKETS,
10018
10710
  MetricNames,
10711
+ MicLipSync,
10019
10712
  MicrophoneCapture,
10020
10713
  ModelCache,
10021
10714
  OTLPExporter,
10022
10715
  OmoteTelemetry,
10023
10716
  PROTOCOL_VERSION,
10717
+ PlaybackPipeline,
10024
10718
  ProceduralLifeLayer,
10025
10719
  RingBuffer,
10026
10720
  SafariSpeechRecognition,
@@ -10031,10 +10725,12 @@ export {
10031
10725
  SileroVADUnifiedAdapter,
10032
10726
  SileroVADWorker,
10033
10727
  UnifiedInferenceWorker,
10728
+ VoicePipeline,
10034
10729
  Wav2ArkitCpuInference,
10035
10730
  Wav2ArkitCpuUnifiedAdapter,
10036
10731
  Wav2ArkitCpuWorker,
10037
10732
  Wav2Vec2Inference,
10733
+ applyProfile,
10038
10734
  blendEmotions,
10039
10735
  calculatePeak,
10040
10736
  calculateRMS,