@omote/core 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -52,10 +52,12 @@ __export(index_exports, {
52
52
  EMOTION_TO_AU: () => EMOTION_TO_AU,
53
53
  EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
54
54
  EXPLICIT_EMOTION_COUNT: () => EXPLICIT_EMOTION_COUNT,
55
+ ElevenLabsTTSBackend: () => ElevenLabsTTSBackend,
55
56
  EmotionController: () => EmotionController,
56
57
  EmotionPresets: () => EmotionPresets,
57
58
  EmotionResolver: () => EmotionResolver,
58
59
  EmphasisDetector: () => EmphasisDetector,
60
+ ErrorCodes: () => ErrorCodes,
59
61
  ErrorTypes: () => ErrorTypes,
60
62
  EventEmitter: () => EventEmitter,
61
63
  FaceCompositor: () => FaceCompositor,
@@ -79,6 +81,7 @@ __export(index_exports, {
79
81
  PRESERVE_POSITION_BONES: () => PRESERVE_POSITION_BONES,
80
82
  PROTOCOL_VERSION: () => PROTOCOL_VERSION,
81
83
  PlaybackPipeline: () => PlaybackPipeline,
84
+ PollyTTSBackend: () => PollyTTSBackend,
82
85
  ProceduralLifeLayer: () => ProceduralLifeLayer,
83
86
  RingBuffer: () => RingBuffer,
84
87
  SafariSpeechRecognition: () => SafariSpeechRecognition,
@@ -102,6 +105,7 @@ __export(index_exports, {
102
105
  calculatePeak: () => calculatePeak,
103
106
  calculateRMS: () => calculateRMS,
104
107
  configureCacheLimit: () => configureCacheLimit,
108
+ configureClock: () => configureClock,
105
109
  configureLogging: () => configureLogging,
106
110
  configureModelUrls: () => configureModelUrls,
107
111
  configureOrtCdn: () => configureOrtCdn,
@@ -118,6 +122,7 @@ __export(index_exports, {
118
122
  formatBytes: () => formatBytes,
119
123
  getCacheConfig: () => getCacheConfig,
120
124
  getCacheKey: () => getCacheKey,
125
+ getClock: () => getClock,
121
126
  getEmotionPreset: () => getEmotionPreset,
122
127
  getLoggingConfig: () => getLoggingConfig,
123
128
  getModelCache: () => getModelCache,
@@ -654,6 +659,19 @@ var OTLPExporter = class {
654
659
  }
655
660
  };
656
661
 
662
+ // src/logging/Clock.ts
663
+ var defaultClock = {
664
+ now: () => performance.now(),
665
+ timestamp: () => Date.now()
666
+ };
667
+ var activeClock = defaultClock;
668
+ function configureClock(clock) {
669
+ activeClock = clock;
670
+ }
671
+ function getClock() {
672
+ return activeClock;
673
+ }
674
+
657
675
  // src/telemetry/OmoteTelemetry.ts
658
676
  function generateId(length = 16) {
659
677
  const bytes = new Uint8Array(length);
@@ -762,7 +780,7 @@ var OmoteTelemetry = class {
762
780
  const traceId = parentContext?.traceId ?? this.activeTraceId ?? generateId(16);
763
781
  const spanId = generateId(8);
764
782
  const parentSpanId = parentContext?.spanId;
765
- const startTime = performance.now();
783
+ const startTime = getClock().now();
766
784
  if (!parentContext && !this.activeTraceId) {
767
785
  this.activeTraceId = traceId;
768
786
  }
@@ -776,7 +794,7 @@ var OmoteTelemetry = class {
776
794
  ended = true;
777
795
  const idx = this.spanStack.findIndex((s) => s.spanId === spanId);
778
796
  if (idx !== -1) this.spanStack.splice(idx, 1);
779
- const endTime = performance.now();
797
+ const endTime = getClock().now();
780
798
  const durationMs = endTime - startTime;
781
799
  if (status === "error" && !sampled) {
782
800
  sampled = this.shouldSample(true);
@@ -891,7 +909,7 @@ var OmoteTelemetry = class {
891
909
  */
892
910
  flushMetrics() {
893
911
  if (!this.exporter) return;
894
- const timestamp = performance.now();
912
+ const timestamp = getClock().now();
895
913
  for (const [key, data] of this.counters) {
896
914
  if (data.value === 0) continue;
897
915
  const name = key.split("|")[0];
@@ -1012,7 +1030,7 @@ var Logger = class _Logger {
1012
1030
  log(level, message, data) {
1013
1031
  if (!shouldLog(level)) return;
1014
1032
  const entry = {
1015
- timestamp: Date.now(),
1033
+ timestamp: getClock().timestamp(),
1016
1034
  level,
1017
1035
  module: this.module,
1018
1036
  message,
@@ -1054,12 +1072,12 @@ var Logger = class _Logger {
1054
1072
  };
1055
1073
  var loggerCache = /* @__PURE__ */ new Map();
1056
1074
  function createLogger(module2) {
1057
- let logger43 = loggerCache.get(module2);
1058
- if (!logger43) {
1059
- logger43 = new Logger(module2);
1060
- loggerCache.set(module2, logger43);
1075
+ let logger45 = loggerCache.get(module2);
1076
+ if (!logger45) {
1077
+ logger45 = new Logger(module2);
1078
+ loggerCache.set(module2, logger45);
1061
1079
  }
1062
- return logger43;
1080
+ return logger45;
1063
1081
  }
1064
1082
  var noopLogger = {
1065
1083
  module: "noop",
@@ -1078,6 +1096,63 @@ var noopLogger = {
1078
1096
  child: () => noopLogger
1079
1097
  };
1080
1098
 
1099
+ // src/logging/ErrorCodes.ts
1100
+ var ErrorCodes = {
1101
+ // ── Inference ──────────────────────────────────────────────────────────
1102
+ /** Model failed to load (file not found, corrupted, unsupported format) */
1103
+ INF_LOAD_FAILED: "OMOTE_INF_001",
1104
+ /** ORT session poisoned after WebGPU device loss — must reload tab */
1105
+ INF_SESSION_POISON: "OMOTE_INF_002",
1106
+ /** Inference exceeded timeout threshold */
1107
+ INF_TIMEOUT: "OMOTE_INF_003",
1108
+ /** Out-of-memory during inference or model loading */
1109
+ INF_OOM: "OMOTE_INF_004",
1110
+ /** WebGPU unavailable, fell back to WASM */
1111
+ INF_WEBGPU_FALLBACK: "OMOTE_INF_005",
1112
+ /** Input tensor shape does not match model expectations */
1113
+ INF_SHAPE_MISMATCH: "OMOTE_INF_006",
1114
+ // ── Audio ──────────────────────────────────────────────────────────────
1115
+ /** AudioContext creation or resume failed */
1116
+ AUD_CONTEXT_FAILED: "OMOTE_AUD_001",
1117
+ /** Gap detected in audio scheduling (buffer underrun) */
1118
+ AUD_SCHEDULE_GAP: "OMOTE_AUD_002",
1119
+ /** Audio buffer decoding failed */
1120
+ AUD_DECODE_FAILED: "OMOTE_AUD_003",
1121
+ // ── Speech ─────────────────────────────────────────────────────────────
1122
+ /** Voice activity detection error */
1123
+ SPH_VAD_ERROR: "OMOTE_SPH_001",
1124
+ /** Automatic speech recognition error */
1125
+ SPH_ASR_ERROR: "OMOTE_SPH_002",
1126
+ /** Microphone access denied or unavailable */
1127
+ SPH_MIC_DENIED: "OMOTE_SPH_003",
1128
+ // ── TTS ────────────────────────────────────────────────────────────────
1129
+ /** TTS synthesis failed */
1130
+ TTS_SYNTH_FAILED: "OMOTE_TTS_001",
1131
+ /** TTS streaming error (chunk delivery failure) */
1132
+ TTS_STREAM_ERROR: "OMOTE_TTS_002",
1133
+ /** Phonemizer (eSpeak-NG WASM) ran out of memory */
1134
+ TTS_PHONEMIZER_OOM: "OMOTE_TTS_003",
1135
+ // ── Pipeline ───────────────────────────────────────────────────────────
1136
+ /** Invalid state transition in pipeline state machine */
1137
+ PIP_STATE_ERROR: "OMOTE_PIP_001",
1138
+ /** Pipeline operation aborted (user interrupt or signal) */
1139
+ PIP_ABORT: "OMOTE_PIP_002",
1140
+ // ── Cache ──────────────────────────────────────────────────────────────
1141
+ /** IndexedDB storage quota exceeded */
1142
+ CAC_QUOTA_EXCEEDED: "OMOTE_CAC_001",
1143
+ /** Cache entry evicted (LRU or manual) */
1144
+ CAC_EVICTION: "OMOTE_CAC_002",
1145
+ /** Cached model is stale (version mismatch) */
1146
+ CAC_STALE: "OMOTE_CAC_003",
1147
+ // ── Network ────────────────────────────────────────────────────────────
1148
+ /** HTTP fetch failed (model download, CDN) */
1149
+ NET_FETCH_FAILED: "OMOTE_NET_001",
1150
+ /** Network request timed out */
1151
+ NET_TIMEOUT: "OMOTE_NET_002",
1152
+ /** WebSocket connection error */
1153
+ NET_WEBSOCKET_ERROR: "OMOTE_NET_003"
1154
+ };
1155
+
1081
1156
  // src/audio/MicrophoneCapture.ts
1082
1157
  var logger = createLogger("MicrophoneCapture");
1083
1158
  var MicrophoneCapture = class {
@@ -1175,7 +1250,7 @@ var MicrophoneCapture = class {
1175
1250
  const pcm = this.floatToPCM16(chunk);
1176
1251
  this.events.emit("audio.chunk", {
1177
1252
  pcm,
1178
- timestamp: performance.now()
1253
+ timestamp: getClock().now()
1179
1254
  });
1180
1255
  chunkCount++;
1181
1256
  }
@@ -1406,11 +1481,23 @@ var AudioScheduler = class {
1406
1481
  source.connect(gainNode);
1407
1482
  const scheduleTime = this.nextPlayTime;
1408
1483
  if (scheduleTime < ctx.currentTime) {
1409
- logger2.warn("Audio gap detected", {
1410
- scheduleTime,
1411
- currentTime: ctx.currentTime,
1412
- gapSec: ctx.currentTime - scheduleTime
1413
- });
1484
+ const gap = ctx.currentTime - scheduleTime;
1485
+ const gapMs = gap * 1e3;
1486
+ if (gap > 0.5) {
1487
+ logger2.error("Critical audio scheduling gap", {
1488
+ code: ErrorCodes.AUD_SCHEDULE_GAP,
1489
+ scheduleTime,
1490
+ currentTime: ctx.currentTime,
1491
+ gapMs: Math.round(gapMs)
1492
+ });
1493
+ this.options.onError?.(new Error(`Audio scheduling gap: ${gap.toFixed(3)}s`));
1494
+ } else {
1495
+ logger2.warn("Audio gap detected", {
1496
+ scheduleTime,
1497
+ currentTime: ctx.currentTime,
1498
+ gapMs: Math.round(gapMs)
1499
+ });
1500
+ }
1414
1501
  }
1415
1502
  source.start(scheduleTime);
1416
1503
  const entry = { source, gainNode };
@@ -1604,8 +1691,8 @@ var AudioChunkCoalescer = class {
1604
1691
  var logger4 = createLogger("A2EProcessor");
1605
1692
  var FRAME_RATE = 30;
1606
1693
  var DRIP_INTERVAL_MS = 33;
1607
- var HOLD_DURATION_MS = 100;
1608
- var DECAY_DURATION_MS = 200;
1694
+ var HOLD_DURATION_MS = 400;
1695
+ var DECAY_DURATION_MS = 300;
1609
1696
  var _A2EProcessor = class _A2EProcessor {
1610
1697
  constructor(config) {
1611
1698
  this.writeOffset = 0;
@@ -1769,7 +1856,7 @@ var _A2EProcessor = class _A2EProcessor {
1769
1856
  if (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp <= currentTime) {
1770
1857
  const { frame } = this.timestampedQueue.shift();
1771
1858
  this.lastPulledFrame = frame;
1772
- this.lastDequeuedTime = performance.now();
1859
+ this.lastDequeuedTime = getClock().now();
1773
1860
  return frame;
1774
1861
  }
1775
1862
  if (this.timestampedQueue.length > 0 && this.getFrameCallCount % 60 === 0) {
@@ -1781,7 +1868,7 @@ var _A2EProcessor = class _A2EProcessor {
1781
1868
  });
1782
1869
  }
1783
1870
  if (this.lastPulledFrame) {
1784
- const elapsed = performance.now() - this.lastDequeuedTime;
1871
+ const elapsed = getClock().now() - this.lastDequeuedTime;
1785
1872
  if (elapsed < HOLD_DURATION_MS) {
1786
1873
  return this.lastPulledFrame;
1787
1874
  }
@@ -1866,9 +1953,9 @@ var _A2EProcessor = class _A2EProcessor {
1866
1953
  while (this.pendingChunks.length > 0 && !this.disposed) {
1867
1954
  const { chunk, timestamp } = this.pendingChunks.shift();
1868
1955
  try {
1869
- const t0 = performance.now();
1956
+ const t0 = getClock().now();
1870
1957
  const result = await this.backend.infer(chunk, this.identityIndex);
1871
- const inferMs = Math.round(performance.now() - t0);
1958
+ const inferMs = Math.round(getClock().now() - t0);
1872
1959
  const actualDuration = chunk.length / this.sampleRate;
1873
1960
  const actualFrameCount = Math.ceil(actualDuration * FRAME_RATE);
1874
1961
  const framesToQueue = Math.min(actualFrameCount, result.blendshapes.length);
@@ -1907,7 +1994,11 @@ var _A2EProcessor = class _A2EProcessor {
1907
1994
  }
1908
1995
  handleError(err) {
1909
1996
  const error = err instanceof Error ? err : new Error(String(err));
1910
- logger4.warn("A2EProcessor inference error", { error: error.message });
1997
+ const isOOM = typeof err === "number" || error.message && /out of memory|oom|alloc/i.test(error.message);
1998
+ logger4.warn("A2EProcessor inference error", {
1999
+ error: error.message,
2000
+ code: isOOM ? ErrorCodes.INF_OOM : ErrorCodes.INF_SESSION_POISON
2001
+ });
1911
2002
  this.onError?.(error);
1912
2003
  }
1913
2004
  };
@@ -1929,6 +2020,12 @@ var MetricNames = {
1929
2020
  CACHE_HITS: "omote.cache.hits",
1930
2021
  /** Counter: Cache misses */
1931
2022
  CACHE_MISSES: "omote.cache.misses",
2023
+ /** Counter: Cache stale (version/etag mismatch) */
2024
+ CACHE_STALE: "omote.cache.stale",
2025
+ /** Counter: Cache quota warning (>90% used) */
2026
+ CACHE_QUOTA_WARNING: "omote.cache.quota_warning",
2027
+ /** Counter: Cache eviction (LRU) */
2028
+ CACHE_EVICTION: "omote.cache.eviction",
1932
2029
  // --- Pipeline ---
1933
2030
  /** Histogram: VoicePipeline turn latency (speech end → transcript ready, excludes playback) */
1934
2031
  VOICE_TURN_LATENCY: "omote.voice.turn.latency",
@@ -2228,14 +2325,14 @@ var PlaybackPipeline = class extends EventEmitter {
2228
2325
  this._currentRawFrame = null;
2229
2326
  this.cancelNeutralTransition();
2230
2327
  this.scheduler.warmup();
2231
- this.sessionStartTime = performance.now();
2328
+ this.sessionStartTime = getClock().now();
2232
2329
  this.startFrameLoop();
2233
2330
  this.startMonitoring();
2234
2331
  this.setState("playing");
2235
2332
  }
2236
2333
  /** Feed a streaming audio chunk (PCM16 Uint8Array) */
2237
2334
  async onAudioChunk(chunk) {
2238
- const chunkStart = performance.now();
2335
+ const chunkStart = getClock().now();
2239
2336
  const combined = this.coalescer.add(chunk);
2240
2337
  if (!combined) return;
2241
2338
  const float32 = pcm16ToFloat32(combined);
@@ -2245,7 +2342,7 @@ var PlaybackPipeline = class extends EventEmitter {
2245
2342
  this.emit("playback:start", { time: scheduleTime });
2246
2343
  }
2247
2344
  this.processor.pushAudio(float32, scheduleTime);
2248
- getTelemetry()?.recordHistogram(MetricNames.PLAYBACK_CHUNK_LATENCY, performance.now() - chunkStart);
2345
+ getTelemetry()?.recordHistogram(MetricNames.PLAYBACK_CHUNK_LATENCY, getClock().now() - chunkStart);
2249
2346
  }
2250
2347
  /** Signal end of audio stream (flushes remaining audio) */
2251
2348
  async end() {
@@ -2348,15 +2445,15 @@ var PlaybackPipeline = class extends EventEmitter {
2348
2445
  const currentTime = this.scheduler.getCurrentTime();
2349
2446
  const lamFrame = this.processor.getFrameForTime(currentTime);
2350
2447
  if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
2351
- this.lastNewFrameTime = performance.now();
2448
+ this.lastNewFrameTime = getClock().now();
2352
2449
  this.lastKnownLamFrame = lamFrame;
2353
2450
  this.staleWarningEmitted = false;
2354
2451
  }
2355
- if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
2452
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && getClock().now() - this.lastNewFrameTime > this.staleThresholdMs) {
2356
2453
  if (!this.staleWarningEmitted) {
2357
2454
  this.staleWarningEmitted = true;
2358
2455
  logger5.warn("A2E stalled \u2014 no new inference frames", {
2359
- staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
2456
+ staleDurationMs: Math.round(getClock().now() - this.lastNewFrameTime),
2360
2457
  queuedFrames: this.processor.queuedFrameCount
2361
2458
  });
2362
2459
  }
@@ -2396,7 +2493,7 @@ var PlaybackPipeline = class extends EventEmitter {
2396
2493
  if (this.sessionStartTime > 0) {
2397
2494
  getTelemetry()?.recordHistogram(
2398
2495
  MetricNames.PLAYBACK_SESSION_DURATION,
2399
- performance.now() - this.sessionStartTime
2496
+ getClock().now() - this.sessionStartTime
2400
2497
  );
2401
2498
  }
2402
2499
  this.stopInternal();
@@ -2414,9 +2511,9 @@ var PlaybackPipeline = class extends EventEmitter {
2414
2511
  // ---------------------------------------------------------------------------
2415
2512
  startNeutralTransition(fromFrame) {
2416
2513
  this.neutralTransitionFrame = new Float32Array(fromFrame);
2417
- this.neutralTransitionStart = performance.now();
2514
+ this.neutralTransitionStart = getClock().now();
2418
2515
  const animate = () => {
2419
- const elapsed = performance.now() - this.neutralTransitionStart;
2516
+ const elapsed = getClock().now() - this.neutralTransitionStart;
2420
2517
  const t = Math.min(1, elapsed / this.neutralTransitionMs);
2421
2518
  const eased = 1 - Math.pow(1 - t, 3);
2422
2519
  logger5.trace("neutral transition", { t: Math.round(t * 1e3) / 1e3, eased: Math.round(eased * 1e3) / 1e3 });
@@ -2429,7 +2526,7 @@ var PlaybackPipeline = class extends EventEmitter {
2429
2526
  blendshapes,
2430
2527
  rawBlendshapes: blendshapes,
2431
2528
  // raw = scaled during transition
2432
- timestamp: performance.now() / 1e3,
2529
+ timestamp: getClock().now() / 1e3,
2433
2530
  emotion: this._emotion ?? void 0
2434
2531
  };
2435
2532
  this.emit("frame", frame);
@@ -2660,7 +2757,7 @@ var ModelCache = class {
2660
2757
  logger7.warn("Failed to request persistent storage", { error: String(err) });
2661
2758
  }
2662
2759
  }
2663
- const dbOpenStart = performance.now();
2760
+ const dbOpenStart = getClock().now();
2664
2761
  this.dbPromise = new Promise((resolve, reject) => {
2665
2762
  const request = indexedDB.open(DB_NAME, DB_VERSION);
2666
2763
  request.onerror = () => {
@@ -2669,7 +2766,7 @@ var ModelCache = class {
2669
2766
  };
2670
2767
  request.onsuccess = () => {
2671
2768
  this.db = request.result;
2672
- logger7.debug("IndexedDB opened", { durationMs: Math.round(performance.now() - dbOpenStart) });
2769
+ logger7.debug("IndexedDB opened", { durationMs: Math.round(getClock().now() - dbOpenStart) });
2673
2770
  resolve(this.db);
2674
2771
  };
2675
2772
  request.onupgradeneeded = (event) => {
@@ -2743,16 +2840,16 @@ var ModelCache = class {
2743
2840
  }
2744
2841
  span?.end();
2745
2842
  if (hit) {
2746
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
2843
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
2747
2844
  } else {
2748
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
2845
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
2749
2846
  }
2750
2847
  resolve(cached?.data ?? null);
2751
2848
  };
2752
2849
  request.onerror = () => {
2753
2850
  span?.setAttributes({ "cache.hit": false });
2754
2851
  span?.end();
2755
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
2852
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
2756
2853
  resolve(null);
2757
2854
  };
2758
2855
  });
@@ -2796,14 +2893,14 @@ var ModelCache = class {
2796
2893
  if (!cached?.data) {
2797
2894
  span?.setAttributes({ "cache.hit": false });
2798
2895
  span?.end();
2799
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
2896
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
2800
2897
  return { data: null, stale: false };
2801
2898
  }
2802
2899
  span?.setAttributes({ "cache.hit": true, "cache.size_bytes": cached.size });
2803
2900
  if (!cached.etag) {
2804
2901
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
2805
2902
  span?.end();
2806
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
2903
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
2807
2904
  return { data: cached.data, stale: false };
2808
2905
  }
2809
2906
  const fetchUrl = originalUrl || url;
@@ -2812,7 +2909,7 @@ var ModelCache = class {
2812
2909
  if (!response.ok) {
2813
2910
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
2814
2911
  span?.end();
2815
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
2912
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
2816
2913
  return { data: cached.data, stale: false };
2817
2914
  }
2818
2915
  const serverEtag = response.headers.get("etag");
@@ -2825,17 +2922,17 @@ var ModelCache = class {
2825
2922
  });
2826
2923
  span?.end();
2827
2924
  if (isStale) {
2828
- telemetry?.incrementCounter("omote.cache.stale", 1, {});
2925
+ telemetry?.incrementCounter(MetricNames.CACHE_STALE, 1, {});
2829
2926
  logger7.debug("Stale cache detected", { url });
2830
2927
  } else {
2831
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
2928
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
2832
2929
  }
2833
2930
  return { data: cached.data, stale: isStale };
2834
2931
  } catch (fetchError) {
2835
2932
  logger7.warn("HEAD validation failed, using cached data", { error: String(fetchError) });
2836
2933
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
2837
2934
  span?.end();
2838
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
2935
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
2839
2936
  return { data: cached.data, stale: false };
2840
2937
  }
2841
2938
  } catch {
@@ -2916,7 +3013,7 @@ var ModelCache = class {
2916
3013
  const telemetry = getTelemetry();
2917
3014
  if (quota.percentUsed > 90) {
2918
3015
  logger7.warn("Storage quota warning", { percentUsed: quota.percentUsed.toFixed(1), used: formatBytes(quota.usedBytes), quota: formatBytes(quota.quotaBytes) });
2919
- telemetry?.incrementCounter("omote.cache.quota_warning", 1, {
3016
+ telemetry?.incrementCounter(MetricNames.CACHE_QUOTA_WARNING, 1, {
2920
3017
  percent_used: String(Math.round(quota.percentUsed))
2921
3018
  });
2922
3019
  if (config.onQuotaWarning) {
@@ -3058,7 +3155,7 @@ var ModelCache = class {
3058
3155
  });
3059
3156
  span?.end();
3060
3157
  if (freedBytes > 0) {
3061
- telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
3158
+ telemetry?.incrementCounter(MetricNames.CACHE_EVICTION, evictedUrls.length, {
3062
3159
  bytes_freed: String(freedBytes)
3063
3160
  });
3064
3161
  }
@@ -3578,7 +3675,7 @@ var _A2EInference = class _A2EInference {
3578
3675
  throw new Error("Model already loaded. Call dispose() first.");
3579
3676
  }
3580
3677
  this.isLoading = true;
3581
- const startTime = performance.now();
3678
+ const startTime = getClock().now();
3582
3679
  const telemetry = getTelemetry();
3583
3680
  const span = telemetry?.startSpan("A2EInference.load", {
3584
3681
  "model.url": this.config.modelUrl,
@@ -3674,7 +3771,7 @@ var _A2EInference = class _A2EInference {
3674
3771
  executionProvider: this._backend,
3675
3772
  backend: this._backend
3676
3773
  });
3677
- const loadTimeMs = performance.now() - startTime;
3774
+ const loadTimeMs = getClock().now() - startTime;
3678
3775
  logger10.info("Model loaded successfully", {
3679
3776
  backend: this._backend,
3680
3777
  loadTimeMs: Math.round(loadTimeMs),
@@ -3693,7 +3790,7 @@ var _A2EInference = class _A2EInference {
3693
3790
  });
3694
3791
  await new Promise((r) => setTimeout(r, 0));
3695
3792
  logger10.debug("Running warmup inference to initialize GPU context");
3696
- const warmupStart = performance.now();
3793
+ const warmupStart = getClock().now();
3697
3794
  const warmupAudio = new Float32Array(this.chunkSize);
3698
3795
  const warmupIdentity = new Float32Array(this.numIdentityClasses);
3699
3796
  warmupIdentity[0] = 1;
@@ -3706,7 +3803,7 @@ var _A2EInference = class _A2EInference {
3706
3803
  this.session.run(warmupFeeds).then(() => "ok"),
3707
3804
  new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
3708
3805
  ]);
3709
- const warmupTimeMs = performance.now() - warmupStart;
3806
+ const warmupTimeMs = getClock().now() - warmupStart;
3710
3807
  if (warmupResult === "timeout") {
3711
3808
  logger10.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
3712
3809
  timeoutMs: WARMUP_TIMEOUT_MS,
@@ -3786,7 +3883,7 @@ var _A2EInference = class _A2EInference {
3786
3883
  "inference.input_samples": this.chunkSize
3787
3884
  });
3788
3885
  try {
3789
- const startTime = performance.now();
3886
+ const startTime = getClock().now();
3790
3887
  let timeoutId;
3791
3888
  const results = await Promise.race([
3792
3889
  this.session.run(feeds).then((r) => {
@@ -3800,7 +3897,7 @@ var _A2EInference = class _A2EInference {
3800
3897
  );
3801
3898
  })
3802
3899
  ]);
3803
- const inferenceTimeMs = performance.now() - startTime;
3900
+ const inferenceTimeMs = getClock().now() - startTime;
3804
3901
  const blendshapeOutput = results["blendshapes"];
3805
3902
  if (!blendshapeOutput) {
3806
3903
  throw new Error("Missing blendshapes output from model");
@@ -4207,9 +4304,9 @@ var A2EUnifiedAdapter = class {
4207
4304
  "inference.input_samples": audio.length
4208
4305
  });
4209
4306
  try {
4210
- const startTime = performance.now();
4307
+ const startTime = getClock().now();
4211
4308
  const result = await this.worker.inferLAM(audio, identityIndex);
4212
- const inferenceTimeMs = performance.now() - startTime;
4309
+ const inferenceTimeMs = getClock().now() - startTime;
4213
4310
  const flatBuffer = result.blendshapes;
4214
4311
  const { numFrames, numBlendshapes } = result;
4215
4312
  const blendshapes = [];
@@ -4860,7 +4957,7 @@ var KokoroTTSInference = class {
4860
4957
  throw new Error("KokoroTTS is already loading");
4861
4958
  }
4862
4959
  this.isLoading = true;
4863
- const startTime = performance.now();
4960
+ const startTime = getClock().now();
4864
4961
  try {
4865
4962
  const backendPref = this.config.backend ?? "wasm";
4866
4963
  const ortResult = await getOnnxRuntimeForPreference(backendPref);
@@ -4884,7 +4981,7 @@ var KokoroTTSInference = class {
4884
4981
  "KokoroTTS InferenceSession.create"
4885
4982
  );
4886
4983
  }
4887
- const loadTimeMs = performance.now() - startTime;
4984
+ const loadTimeMs = getClock().now() - startTime;
4888
4985
  logger17.info("Kokoro TTS loaded", {
4889
4986
  backend: this._backend,
4890
4987
  loadTimeMs: Math.round(loadTimeMs),
@@ -4971,7 +5068,18 @@ var KokoroTTSInference = class {
4971
5068
  logger17.debug("stream aborted");
4972
5069
  return;
4973
5070
  }
4974
- const phonemes = await phonemize(sentence, language);
5071
+ let phonemes;
5072
+ try {
5073
+ phonemes = await phonemize(sentence, language);
5074
+ } catch (phonErr) {
5075
+ logger17.error("Phonemizer failed (possible OOM)", {
5076
+ code: ErrorCodes.TTS_PHONEMIZER_OOM,
5077
+ error: String(phonErr),
5078
+ textLength: sentence.length
5079
+ });
5080
+ yield { audio: new Float32Array(0), text: sentence, phonemes: "", duration: 0 };
5081
+ continue;
5082
+ }
4975
5083
  const tokens = tokenize(phonemes);
4976
5084
  const voiceData = await this.ensureVoice(voiceName);
4977
5085
  const style = getStyleForTokenCount(voiceData, tokens.length);
@@ -5031,16 +5139,27 @@ var KokoroTTSInference = class {
5031
5139
  "tts.speed": speed
5032
5140
  });
5033
5141
  try {
5034
- const startTime = performance.now();
5142
+ const startTime = getClock().now();
5035
5143
  const language = getVoiceLanguage(voiceName);
5036
- const phonemes = await phonemize(text, language);
5144
+ let phonemes;
5145
+ try {
5146
+ phonemes = await phonemize(text, language);
5147
+ } catch (phonErr) {
5148
+ logger17.error("Phonemizer failed (possible OOM)", {
5149
+ code: ErrorCodes.TTS_PHONEMIZER_OOM,
5150
+ error: String(phonErr),
5151
+ textLength: text.length
5152
+ });
5153
+ resolve({ audio: new Float32Array(0), duration: 0, inferenceTimeMs: 0 });
5154
+ return;
5155
+ }
5037
5156
  logger17.trace("Phonemized", { text: text.substring(0, 50), phonemes: phonemes.substring(0, 50) });
5038
5157
  const tokens = tokenize(phonemes);
5039
5158
  logger17.trace("Tokenized", { numTokens: tokens.length });
5040
5159
  const voiceData = await this.ensureVoice(voiceName);
5041
5160
  const style = getStyleForTokenCount(voiceData, tokens.length);
5042
5161
  const audio = await this.runInference(tokens, style, speed);
5043
- const inferenceTimeMs = performance.now() - startTime;
5162
+ const inferenceTimeMs = getClock().now() - startTime;
5044
5163
  const duration = audio.length / SAMPLE_RATE;
5045
5164
  logger17.trace("Synthesis complete", {
5046
5165
  duration: `${duration.toFixed(2)}s`,
@@ -5159,11 +5278,11 @@ var KokoroTTSUnifiedAdapter = class {
5159
5278
  "model.url": this.modelUrl
5160
5279
  });
5161
5280
  try {
5162
- const startTime = performance.now();
5281
+ const startTime = getClock().now();
5163
5282
  await this.worker.loadKokoro({ modelUrl: this.modelUrl });
5164
5283
  this._isLoaded = true;
5165
5284
  this.loadedGeneration = this.worker.workerGeneration;
5166
- const loadTimeMs = performance.now() - startTime;
5285
+ const loadTimeMs = getClock().now() - startTime;
5167
5286
  logger18.info("Kokoro TTS loaded via unified worker", {
5168
5287
  backend: "wasm",
5169
5288
  loadTimeMs: Math.round(loadTimeMs),
@@ -5238,11 +5357,11 @@ var KokoroTTSUnifiedAdapter = class {
5238
5357
  runWorkerInference(tokens, style, speed) {
5239
5358
  return new Promise((resolve, reject) => {
5240
5359
  this.inferenceQueue = this.inferenceQueue.then(async () => {
5241
- const startTime = performance.now();
5360
+ const startTime = getClock().now();
5242
5361
  const telemetry = getTelemetry();
5243
5362
  try {
5244
5363
  const result = await this.worker.inferKokoro(tokens, style, speed);
5245
- const latencyMs = performance.now() - startTime;
5364
+ const latencyMs = getClock().now() - startTime;
5246
5365
  telemetry?.recordHistogram("omote.inference.latency", latencyMs, {
5247
5366
  model: "kokoro-tts-unified",
5248
5367
  backend: "wasm"
@@ -5357,11 +5476,11 @@ var SileroVADUnifiedAdapter = class {
5357
5476
  return new Promise((resolve, reject) => {
5358
5477
  this.inferenceQueue = this.inferenceQueue.then(async () => {
5359
5478
  try {
5360
- const startTime = performance.now();
5479
+ const startTime = getClock().now();
5361
5480
  const result = await this.worker.processVAD(audioChunkCopy, this.state, this.context);
5362
5481
  this.state = result.state;
5363
5482
  this.context = audioChunkCopy.slice(-this.contextSize);
5364
- const inferenceTimeMs = performance.now() - startTime;
5483
+ const inferenceTimeMs = getClock().now() - startTime;
5365
5484
  const isSpeech = result.probability > this.config.threshold;
5366
5485
  let preSpeechChunks;
5367
5486
  if (isSpeech && !this.wasSpeaking) {
@@ -5419,17 +5538,20 @@ var SileroVADUnifiedAdapter = class {
5419
5538
  var logger20 = createLogger("createA2E");
5420
5539
  function createA2E(config = {}) {
5421
5540
  const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.lam;
5541
+ const platformInfo = {
5542
+ modelUrl,
5543
+ isIOS: isIOS(),
5544
+ webgpu: typeof navigator !== "undefined" && "gpu" in navigator
5545
+ };
5422
5546
  if (config.unifiedWorker) {
5423
- logger20.info("Creating A2EUnifiedAdapter (via unified worker)", {
5424
- modelUrl
5425
- });
5547
+ logger20.info("Creating A2EUnifiedAdapter (via unified worker)", platformInfo);
5426
5548
  return new A2EUnifiedAdapter(config.unifiedWorker, {
5427
5549
  modelUrl,
5428
5550
  externalDataUrl: config.externalDataUrl,
5429
5551
  numIdentityClasses: config.numIdentityClasses
5430
5552
  });
5431
5553
  }
5432
- logger20.info("Creating A2EInference", { modelUrl });
5554
+ logger20.info("Creating A2EInference", platformInfo);
5433
5555
  return new A2EInference({
5434
5556
  modelUrl,
5435
5557
  externalDataUrl: config.externalDataUrl,
@@ -5805,16 +5927,28 @@ async function loadOrt(wasmPaths, isIOSDevice) {
5805
5927
  // ort.webgpu.min.js crashes WebKit's JIT compiler.
5806
5928
  var isSafariWorker = typeof navigator !== 'undefined' && /safari/i.test(navigator.userAgent) && !/chrome|crios|fxios|chromium|edg/i.test(navigator.userAgent);
5807
5929
  var hasWebGPU = false;
5808
- if (!isIOSDevice && !isSafariWorker && typeof navigator !== 'undefined' && navigator.gpu) {
5930
+ var webgpuReason = '';
5931
+ if (isIOSDevice) {
5932
+ webgpuReason = 'iOS device';
5933
+ } else if (isSafariWorker) {
5934
+ webgpuReason = 'Safari (JSEP/ASYNCIFY crash)';
5935
+ } else if (typeof navigator === 'undefined' || !navigator.gpu) {
5936
+ webgpuReason = 'navigator.gpu unavailable';
5937
+ } else {
5809
5938
  try {
5810
5939
  var adapter = await navigator.gpu.requestAdapter();
5811
5940
  if (adapter) {
5812
5941
  hasWebGPU = true;
5942
+ } else {
5943
+ webgpuReason = 'requestAdapter returned null';
5813
5944
  }
5814
5945
  } catch (e) {
5815
- // WebGPU not available \u2014 fall through to WASM
5946
+ webgpuReason = 'requestAdapter failed: ' + String(e);
5816
5947
  }
5817
5948
  }
5949
+ if (!hasWebGPU && webgpuReason) {
5950
+ console.warn('[UnifiedWorker] WebGPU unavailable: ' + webgpuReason + ', falling back to WASM');
5951
+ }
5818
5952
 
5819
5953
  var ortUrl;
5820
5954
  if (hasWebGPU) {
@@ -6299,7 +6433,12 @@ var UnifiedInferenceWorker = class {
6299
6433
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs, "worker.backend": this._workerBackend });
6300
6434
  span?.end();
6301
6435
  } catch (error) {
6302
- span?.endWithError(error instanceof Error ? error : new Error(String(error)));
6436
+ const err = error instanceof Error ? error : new Error(String(error));
6437
+ const isTimeout = err.message.includes("timed out");
6438
+ if (isTimeout) {
6439
+ logger21.error("Worker init timed out", { code: "OMOTE_INF_003", timeoutMs: INIT_TIMEOUT_MS });
6440
+ }
6441
+ span?.endWithError(err);
6303
6442
  this.cleanup();
6304
6443
  throw error;
6305
6444
  }
@@ -6683,7 +6822,7 @@ var TTSSpeaker = class {
6683
6822
  async connect(tts, config) {
6684
6823
  logger22.info("Connecting TTS...");
6685
6824
  const span = getTelemetry()?.startSpan("TTSSpeaker.connect");
6686
- const connectStart = performance.now();
6825
+ const connectStart = getClock().now();
6687
6826
  this.tts = tts;
6688
6827
  if (!tts.isLoaded) {
6689
6828
  await tts.load();
@@ -6692,7 +6831,7 @@ var TTSSpeaker = class {
6692
6831
  if (!hasLam) {
6693
6832
  this._audioOnly = true;
6694
6833
  this.scheduler = new AudioScheduler({ sampleRate: tts.sampleRate });
6695
- getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, performance.now() - connectStart);
6834
+ getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, getClock().now() - connectStart);
6696
6835
  span?.end();
6697
6836
  logger22.info("TTS connected (audio-only mode)");
6698
6837
  return;
@@ -6726,7 +6865,7 @@ var TTSSpeaker = class {
6726
6865
  neutralTransitionMs: config?.neutralTransitionMs
6727
6866
  });
6728
6867
  await this.ttsPlayback.initialize();
6729
- getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, performance.now() - connectStart);
6868
+ getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, getClock().now() - connectStart);
6730
6869
  span?.end();
6731
6870
  logger22.info("TTS connected (lip sync mode)");
6732
6871
  }
@@ -6761,7 +6900,7 @@ var TTSSpeaker = class {
6761
6900
  const span = getTelemetry()?.startSpan("TTSSpeaker.speak", {
6762
6901
  "text.length": text.length
6763
6902
  });
6764
- const speakStart = performance.now();
6903
+ const speakStart = getClock().now();
6765
6904
  try {
6766
6905
  if (this._audioOnly) {
6767
6906
  await this.speakAudioOnly(text, abort, options?.voice);
@@ -6771,7 +6910,7 @@ var TTSSpeaker = class {
6771
6910
  voice: options?.voice
6772
6911
  });
6773
6912
  }
6774
- getTelemetry()?.recordHistogram(MetricNames.TTS_SPEAK_LATENCY, performance.now() - speakStart);
6913
+ getTelemetry()?.recordHistogram(MetricNames.TTS_SPEAK_LATENCY, getClock().now() - speakStart);
6775
6914
  span?.end();
6776
6915
  } catch (err) {
6777
6916
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
@@ -6901,42 +7040,42 @@ var TTSSpeaker = class {
6901
7040
  end: async () => {
6902
7041
  if (ended) return;
6903
7042
  ended = true;
6904
- if (abort.signal.aborted) {
6905
- this._isSpeaking = false;
6906
- if (this.currentAbort === abort) this.currentAbort = null;
6907
- return;
6908
- }
6909
- if (buffer.trim()) {
6910
- enqueueSentence(buffer.trim());
6911
- buffer = "";
6912
- }
6913
- await processChain;
6914
- if (abort.signal.aborted) {
6915
- this._isSpeaking = false;
6916
- if (this.currentAbort === abort) this.currentAbort = null;
6917
- return;
6918
- }
6919
- await pipeline.end();
6920
- await new Promise((resolve) => {
6921
- let resolved = false;
6922
- const done = () => {
6923
- if (resolved) return;
6924
- resolved = true;
6925
- unsubC();
6926
- unsubS();
6927
- abort.signal.removeEventListener("abort", done);
6928
- resolve();
6929
- };
7043
+ const unsubs = [];
7044
+ try {
6930
7045
  if (abort.signal.aborted) {
6931
- resolve();
6932
7046
  return;
6933
7047
  }
6934
- const unsubC = pipeline.on("playback:complete", done);
6935
- const unsubS = pipeline.on("playback:stop", done);
6936
- abort.signal.addEventListener("abort", done);
6937
- });
6938
- this._isSpeaking = false;
6939
- if (this.currentAbort === abort) this.currentAbort = null;
7048
+ if (buffer.trim()) {
7049
+ enqueueSentence(buffer.trim());
7050
+ buffer = "";
7051
+ }
7052
+ await processChain;
7053
+ if (abort.signal.aborted) {
7054
+ return;
7055
+ }
7056
+ await pipeline.end();
7057
+ await new Promise((resolve) => {
7058
+ let resolved = false;
7059
+ const done = () => {
7060
+ if (resolved) return;
7061
+ resolved = true;
7062
+ resolve();
7063
+ };
7064
+ if (abort.signal.aborted) {
7065
+ resolve();
7066
+ return;
7067
+ }
7068
+ unsubs.push(pipeline.on("playback:complete", done));
7069
+ unsubs.push(pipeline.on("playback:stop", done));
7070
+ const onAbort = () => done();
7071
+ abort.signal.addEventListener("abort", onAbort);
7072
+ unsubs.push(() => abort.signal.removeEventListener("abort", onAbort));
7073
+ });
7074
+ } finally {
7075
+ unsubs.forEach((fn) => fn());
7076
+ this._isSpeaking = false;
7077
+ if (this.currentAbort === abort) this.currentAbort = null;
7078
+ }
6940
7079
  }
6941
7080
  };
6942
7081
  }
@@ -7627,14 +7766,14 @@ function createKokoroTTS(config = {}) {
7627
7766
  logger24.info("iOS + unified worker: creating KokoroTTSUnifiedAdapter (off-main-thread ONNX)");
7628
7767
  return new KokoroTTSUnifiedAdapter(config.unifiedWorker, config);
7629
7768
  }
7630
- logger24.info("iOS detected: creating KokoroTTSInference (main thread, shared ORT)");
7769
+ logger24.info("iOS: creating KokoroTTSInference (main thread, shared ORT)");
7631
7770
  return new KokoroTTSInference(config);
7632
7771
  }
7633
7772
  if (!KokoroTTSWorker.isSupported()) {
7634
7773
  logger24.info("Worker not supported: creating KokoroTTSInference (main thread)");
7635
7774
  return new KokoroTTSInference(config);
7636
7775
  }
7637
- logger24.info("Auto-detected: creating KokoroTTSWorker (off-main-thread)");
7776
+ logger24.info("Auto: creating KokoroTTSWorker (off-main-thread)");
7638
7777
  return new KokoroTTSWorker(config);
7639
7778
  }
7640
7779
 
@@ -7868,6 +8007,9 @@ var _SenseVoiceInference = class _SenseVoiceInference {
7868
8007
  // so all future transcribe() calls reject immediately to prevent concurrent access.
7869
8008
  this.poisoned = false;
7870
8009
  // 10s for SenseVoice (heavier preprocessing)
8010
+ // WebGPU shape change tracking (for dynamic shape warning)
8011
+ this.lastLfrFrames = 0;
8012
+ this.webgpuShapeWarned = false;
7871
8013
  // Preprocessing state (loaded once)
7872
8014
  this.tokenMap = null;
7873
8015
  this.negMean = null;
@@ -7902,7 +8044,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
7902
8044
  throw new Error("Model already loaded. Call dispose() first.");
7903
8045
  }
7904
8046
  this.isLoading = true;
7905
- const startTime = performance.now();
8047
+ const startTime = getClock().now();
7906
8048
  const telemetry = getTelemetry();
7907
8049
  const span = telemetry?.startSpan("SenseVoice.load", {
7908
8050
  "model.url": this.config.modelUrl,
@@ -7969,7 +8111,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
7969
8111
  } catch (cmvnErr) {
7970
8112
  logger25.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
7971
8113
  }
7972
- const loadTimeMs = performance.now() - startTime;
8114
+ const loadTimeMs = getClock().now() - startTime;
7973
8115
  logger25.info("SenseVoice model loaded", {
7974
8116
  backend: this._backend,
7975
8117
  loadTimeMs: Math.round(loadTimeMs),
@@ -8034,24 +8176,35 @@ var _SenseVoiceInference = class _SenseVoiceInference {
8034
8176
  "inference.input_samples": audio.length
8035
8177
  });
8036
8178
  try {
8037
- const startTime = performance.now();
8038
- const preprocessStart = performance.now();
8179
+ const startTime = getClock().now();
8180
+ const preprocessStart = getClock().now();
8039
8181
  const fbank = computeKaldiFbank(audio, 16e3, 80);
8040
8182
  const numFrames = fbank.length / 80;
8041
8183
  if (numFrames === 0) {
8042
8184
  resolve({
8043
8185
  text: "",
8044
- inferenceTimeMs: performance.now() - startTime,
8045
- preprocessTimeMs: performance.now() - preprocessStart
8186
+ inferenceTimeMs: getClock().now() - startTime,
8187
+ preprocessTimeMs: getClock().now() - preprocessStart
8046
8188
  });
8047
8189
  return;
8048
8190
  }
8049
8191
  const lfrFeatures = applyLFR(fbank, 80, 7, 6);
8050
8192
  const numLfrFrames = lfrFeatures.length / 560;
8193
+ if (this._backend === "webgpu" && this.lastLfrFrames !== 0 && numLfrFrames !== this.lastLfrFrames) {
8194
+ if (!this.webgpuShapeWarned) {
8195
+ this.webgpuShapeWarned = true;
8196
+ logger25.warn("SenseVoice running on WebGPU with variable audio shapes \u2014 risk of kernel crash", {
8197
+ code: ErrorCodes.INF_SHAPE_MISMATCH,
8198
+ previousFrames: this.lastLfrFrames,
8199
+ currentFrames: numLfrFrames
8200
+ });
8201
+ }
8202
+ }
8203
+ this.lastLfrFrames = numLfrFrames;
8051
8204
  if (this.negMean && this.invStddev) {
8052
8205
  applyCMVN(lfrFeatures, 560, this.negMean, this.invStddev);
8053
8206
  }
8054
- const preprocessTimeMs = performance.now() - preprocessStart;
8207
+ const preprocessTimeMs = getClock().now() - preprocessStart;
8055
8208
  const ort = this.ort;
8056
8209
  const feeds = {
8057
8210
  x: new ort.Tensor("float32", lfrFeatures, [1, numLfrFrames, 560]),
@@ -8081,7 +8234,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
8081
8234
  const seqLen = logitsDims[1];
8082
8235
  const vocabSize = logitsDims[2];
8083
8236
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
8084
- const inferenceTimeMs = performance.now() - startTime;
8237
+ const inferenceTimeMs = getClock().now() - startTime;
8085
8238
  logger25.trace("Transcription complete", {
8086
8239
  text: decoded.text.substring(0, 50),
8087
8240
  language: decoded.language,
@@ -9420,7 +9573,7 @@ var SileroVADInference = class {
9420
9573
  throw new Error("Model already loaded. Call dispose() first.");
9421
9574
  }
9422
9575
  this.isLoading = true;
9423
- const startTime = performance.now();
9576
+ const startTime = getClock().now();
9424
9577
  const telemetry = getTelemetry();
9425
9578
  const span = telemetry?.startSpan("SileroVAD.load", {
9426
9579
  "model.url": this.config.modelUrl,
@@ -9452,7 +9605,7 @@ var SileroVADInference = class {
9452
9605
  const modelData = new Uint8Array(modelBuffer);
9453
9606
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
9454
9607
  this.reset();
9455
- const loadTimeMs = performance.now() - startTime;
9608
+ const loadTimeMs = getClock().now() - startTime;
9456
9609
  logger28.info("Model loaded successfully", {
9457
9610
  backend: this._backend,
9458
9611
  loadTimeMs: Math.round(loadTimeMs),
@@ -9632,7 +9785,7 @@ var SileroVADInference = class {
9632
9785
  "inference.chunk_size": this.chunkSize
9633
9786
  });
9634
9787
  try {
9635
- const startTime = performance.now();
9788
+ const startTime = getClock().now();
9636
9789
  const inputSize = this.contextSize + this.chunkSize;
9637
9790
  const inputBuffer = new Float32Array(inputSize);
9638
9791
  inputBuffer.set(this.context, 0);
@@ -9662,7 +9815,7 @@ var SileroVADInference = class {
9662
9815
  );
9663
9816
  }
9664
9817
  this.context = audioChunkCopy.slice(-this.contextSize);
9665
- const inferenceTimeMs = performance.now() - startTime;
9818
+ const inferenceTimeMs = getClock().now() - startTime;
9666
9819
  const isSpeech = probability > this.config.threshold;
9667
9820
  let preSpeechChunks;
9668
9821
  if (isSpeech && !this.wasSpeaking) {
@@ -10477,6 +10630,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10477
10630
  this.lastProgressiveSamples = 0;
10478
10631
  // ASR error recovery
10479
10632
  this.asrErrorCount = 0;
10633
+ this.progressiveErrorCount = 0;
10480
10634
  this.config = config ?? {};
10481
10635
  }
10482
10636
  /** Current listener state */
@@ -10669,7 +10823,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10669
10823
  if (result.isSpeech) {
10670
10824
  if (!wasSpeaking) {
10671
10825
  this.isSpeechActive = true;
10672
- this.speechStartTime = performance.now();
10826
+ this.speechStartTime = getClock().now();
10673
10827
  this.audioBuffer = [];
10674
10828
  this.audioBufferSamples = 0;
10675
10829
  this.lastProgressiveResult = null;
@@ -10708,13 +10862,13 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10708
10862
  const extended = this.config.silenceTimeoutExtendedMs ?? 700;
10709
10863
  const adaptive = this.config.adaptiveTimeout ?? true;
10710
10864
  if (!adaptive) return base;
10711
- const speechDurationMs = performance.now() - this.speechStartTime;
10865
+ const speechDurationMs = getClock().now() - this.speechStartTime;
10712
10866
  return speechDurationMs > 3e3 ? extended : base;
10713
10867
  }
10714
10868
  onSilenceDetected() {
10715
10869
  const capturedEpoch = this.epoch;
10716
10870
  this.isSpeechActive = false;
10717
- const durationMs = performance.now() - this.speechStartTime;
10871
+ const durationMs = getClock().now() - this.speechStartTime;
10718
10872
  logger31.debug("Speech end", { durationMs: Math.round(durationMs) });
10719
10873
  this.emit("speech:end", { durationMs });
10720
10874
  this.clearSilenceTimer();
@@ -10811,7 +10965,15 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10811
10965
  this.lastProgressiveSamples = snapshotSamples;
10812
10966
  this.emit("transcript", { ...result, isFinal: false });
10813
10967
  }
10814
- } catch {
10968
+ } catch (err) {
10969
+ this.progressiveErrorCount = (this.progressiveErrorCount ?? 0) + 1;
10970
+ if (this.progressiveErrorCount % 10 === 1) {
10971
+ logger31.warn("Progressive transcription error", {
10972
+ code: ErrorCodes.SPH_ASR_ERROR,
10973
+ error: String(err),
10974
+ count: this.progressiveErrorCount
10975
+ });
10976
+ }
10815
10977
  }
10816
10978
  })();
10817
10979
  }, intervalMs);
@@ -10828,7 +10990,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10828
10990
  async transcribeWithTimeout(audio) {
10829
10991
  if (!this.asr) return null;
10830
10992
  const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
10831
- const startTime = performance.now();
10993
+ const startTime = getClock().now();
10832
10994
  const span = getTelemetry()?.startSpan("SpeechListener.transcribe", {
10833
10995
  "inference.input_samples": audio.length,
10834
10996
  "inference.input_duration_ms": audio.length / 16e3 * 1e3
@@ -10842,7 +11004,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
10842
11004
  })
10843
11005
  ]);
10844
11006
  clearTimeout(timeoutId);
10845
- const latency = performance.now() - startTime;
11007
+ const latency = getClock().now() - startTime;
10846
11008
  this.asrErrorCount = 0;
10847
11009
  getTelemetry()?.recordHistogram(MetricNames.VOICE_TRANSCRIPTION_LATENCY, latency);
10848
11010
  getTelemetry()?.incrementCounter(MetricNames.VOICE_TRANSCRIPTIONS);
@@ -11016,11 +11178,11 @@ var InterruptionHandler = class extends EventEmitter {
11016
11178
  getState() {
11017
11179
  return {
11018
11180
  isSpeaking: this.isSpeaking,
11019
- speechDurationMs: this.isSpeaking ? performance.now() - this.speechStartTime : 0
11181
+ speechDurationMs: this.isSpeaking ? getClock().now() - this.speechStartTime : 0
11020
11182
  };
11021
11183
  }
11022
11184
  onSpeechDetected(rms) {
11023
- const now = performance.now();
11185
+ const now = getClock().now();
11024
11186
  this.lastSpeechTime = now;
11025
11187
  if (this.silenceTimer) {
11026
11188
  clearTimeout(this.silenceTimer);
@@ -11237,7 +11399,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
11237
11399
  this.setupEventHandlers();
11238
11400
  this.recognition.start();
11239
11401
  this.isListening = true;
11240
- this.startTime = performance.now();
11402
+ this.startTime = getClock().now();
11241
11403
  this.accumulatedText = "";
11242
11404
  logger33.info("Speech recognition started", {
11243
11405
  language: this.config.language
@@ -11338,7 +11500,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
11338
11500
  const speechResult = {
11339
11501
  text: isFinal ? this.accumulatedText.trim() : text,
11340
11502
  language: this.config.language,
11341
- inferenceTimeMs: performance.now() - this.startTime,
11503
+ inferenceTimeMs: getClock().now() - this.startTime,
11342
11504
  isFinal,
11343
11505
  confidence: alternative.confidence
11344
11506
  };
@@ -11370,13 +11532,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
11370
11532
  this.isListening = false;
11371
11533
  logger33.info("Speech recognition ended", {
11372
11534
  totalText: this.accumulatedText.length,
11373
- durationMs: performance.now() - this.startTime
11535
+ durationMs: getClock().now() - this.startTime
11374
11536
  });
11375
11537
  if (this.stopResolver) {
11376
11538
  const result = {
11377
11539
  text: this.accumulatedText.trim(),
11378
11540
  language: this.config.language,
11379
- inferenceTimeMs: performance.now() - this.startTime,
11541
+ inferenceTimeMs: getClock().now() - this.startTime,
11380
11542
  isFinal: true
11381
11543
  };
11382
11544
  this.stopResolver(result);
@@ -11420,6 +11582,303 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
11420
11582
  }
11421
11583
  };
11422
11584
 
11585
+ // src/inference/ElevenLabsTTSBackend.ts
11586
+ var logger34 = createLogger("ElevenLabsTTS");
11587
+ var DEFAULT_MODEL = "eleven_multilingual_v2";
11588
+ var DEFAULT_OUTPUT_FORMAT = "pcm_16000";
11589
+ var DEFAULT_STABILITY = 0.5;
11590
+ var DEFAULT_SIMILARITY_BOOST = 0.75;
11591
+ var DEFAULT_BASE_URL = "https://api.elevenlabs.io";
11592
+ var FORMAT_TO_SAMPLE_RATE = {
11593
+ pcm_16000: 16e3,
11594
+ pcm_22050: 22050,
11595
+ pcm_24000: 24e3,
11596
+ pcm_44100: 44100
11597
+ };
11598
+ var ElevenLabsTTSBackend = class {
11599
+ constructor(config) {
11600
+ this._isLoaded = false;
11601
+ if (!config.apiKey) throw new Error("ElevenLabsTTS: apiKey is required");
11602
+ if (!config.voiceId) throw new Error("ElevenLabsTTS: voiceId is required");
11603
+ this.apiKey = config.apiKey;
11604
+ this.voiceId = config.voiceId;
11605
+ this.model = config.model ?? DEFAULT_MODEL;
11606
+ this.outputFormat = config.outputFormat ?? DEFAULT_OUTPUT_FORMAT;
11607
+ this.stability = config.stability ?? DEFAULT_STABILITY;
11608
+ this.similarityBoost = config.similarityBoost ?? DEFAULT_SIMILARITY_BOOST;
11609
+ this.baseUrl = config.baseUrl ?? DEFAULT_BASE_URL;
11610
+ const rate = FORMAT_TO_SAMPLE_RATE[this.outputFormat];
11611
+ if (!rate) {
11612
+ throw new Error(
11613
+ `ElevenLabsTTS: unsupported outputFormat "${this.outputFormat}". Supported: ${Object.keys(FORMAT_TO_SAMPLE_RATE).join(", ")}`
11614
+ );
11615
+ }
11616
+ this._sampleRate = rate;
11617
+ }
11618
+ get sampleRate() {
11619
+ return this._sampleRate;
11620
+ }
11621
+ get isLoaded() {
11622
+ return this._isLoaded;
11623
+ }
11624
+ // ─── Load ───────────────────────────────────────────────────────────────
11625
+ /**
11626
+ * No-op for cloud TTS (no model to load).
11627
+ * Marks backend as ready.
11628
+ */
11629
+ async load() {
11630
+ this._isLoaded = true;
11631
+ logger34.info("ElevenLabs TTS ready", { voiceId: this.voiceId, model: this.model });
11632
+ }
11633
+ // ─── Stream ─────────────────────────────────────────────────────────────
11634
+ /**
11635
+ * Stream audio from ElevenLabs for the given text.
11636
+ *
11637
+ * Uses the streaming endpoint. Yields a single chunk for non-streaming
11638
+ * or multiple chunks as response data arrives.
11639
+ */
11640
+ async *stream(text, options) {
11641
+ if (!this._isLoaded) {
11642
+ throw new Error("ElevenLabsTTS: not loaded. Call load() first.");
11643
+ }
11644
+ const trimmed = text.trim();
11645
+ if (trimmed.length === 0) {
11646
+ throw new Error("ElevenLabsTTS: text must not be empty");
11647
+ }
11648
+ const startTime = getClock().now();
11649
+ const telemetry = getTelemetry();
11650
+ const span = telemetry?.startSpan("ElevenLabsTTS.stream", {
11651
+ "tts.text_length": trimmed.length,
11652
+ "tts.voice_id": this.voiceId,
11653
+ "tts.model": this.model
11654
+ });
11655
+ const url = `${this.baseUrl}/v1/text-to-speech/${this.voiceId}?output_format=${this.outputFormat}`;
11656
+ try {
11657
+ const response = await fetch(url, {
11658
+ method: "POST",
11659
+ headers: {
11660
+ "xi-api-key": this.apiKey,
11661
+ "Content-Type": "application/json",
11662
+ Accept: "audio/pcm"
11663
+ },
11664
+ body: JSON.stringify({
11665
+ text: trimmed,
11666
+ model_id: this.model,
11667
+ voice_settings: {
11668
+ stability: this.stability,
11669
+ similarity_boost: this.similarityBoost
11670
+ }
11671
+ }),
11672
+ signal: options?.signal
11673
+ });
11674
+ if (!response.ok) {
11675
+ const errorText = await response.text().catch(() => "unknown");
11676
+ const msg = `ElevenLabsTTS: HTTP ${response.status} \u2014 ${this.getHttpErrorMessage(response.status, errorText)}`;
11677
+ logger34.error(msg);
11678
+ throw new Error(msg);
11679
+ }
11680
+ if (!response.body) {
11681
+ const buffer = await response.arrayBuffer();
11682
+ const audio = pcm16ToFloat32(buffer);
11683
+ const duration = audio.length / this._sampleRate;
11684
+ const latency2 = getClock().now() - startTime;
11685
+ span?.setAttributes({ "tts.duration_s": duration, "tts.latency_ms": latency2 });
11686
+ span?.end();
11687
+ telemetry?.recordHistogram("omote.inference.latency", latency2, {
11688
+ model: "elevenlabs-tts",
11689
+ backend: "cloud"
11690
+ });
11691
+ yield { audio, duration, text: trimmed };
11692
+ return;
11693
+ }
11694
+ const reader = response.body.getReader();
11695
+ let totalSamples = 0;
11696
+ try {
11697
+ while (true) {
11698
+ if (options?.signal?.aborted) {
11699
+ reader.cancel();
11700
+ logger34.debug("Stream aborted by signal");
11701
+ return;
11702
+ }
11703
+ const { done, value } = await reader.read();
11704
+ if (done) break;
11705
+ if (value && value.byteLength > 0) {
11706
+ const usableBytes = value.byteLength & ~1;
11707
+ if (usableBytes === 0) continue;
11708
+ const audio = pcm16ToFloat32(value.buffer.slice(value.byteOffset, value.byteOffset + usableBytes));
11709
+ const duration = audio.length / this._sampleRate;
11710
+ totalSamples += audio.length;
11711
+ yield { audio, duration, text: trimmed };
11712
+ }
11713
+ }
11714
+ } finally {
11715
+ reader.releaseLock();
11716
+ }
11717
+ const latency = getClock().now() - startTime;
11718
+ const totalDuration = totalSamples / this._sampleRate;
11719
+ logger34.debug("Stream complete", {
11720
+ totalDuration: `${totalDuration.toFixed(2)}s`,
11721
+ latencyMs: Math.round(latency),
11722
+ totalSamples
11723
+ });
11724
+ span?.setAttributes({ "tts.duration_s": totalDuration, "tts.latency_ms": latency });
11725
+ span?.end();
11726
+ telemetry?.recordHistogram("omote.inference.latency", latency, {
11727
+ model: "elevenlabs-tts",
11728
+ backend: "cloud"
11729
+ });
11730
+ telemetry?.incrementCounter("omote.inference.total", 1, {
11731
+ model: "elevenlabs-tts",
11732
+ backend: "cloud",
11733
+ status: "success"
11734
+ });
11735
+ } catch (err) {
11736
+ if (err instanceof DOMException && err.name === "AbortError") {
11737
+ logger34.debug("Stream aborted");
11738
+ span?.end();
11739
+ return;
11740
+ }
11741
+ const errMsg = err instanceof Error ? err.message : String(err);
11742
+ logger34.error("Stream failed", { error: errMsg });
11743
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
11744
+ telemetry?.incrementCounter("omote.inference.total", 1, {
11745
+ model: "elevenlabs-tts",
11746
+ backend: "cloud",
11747
+ status: "error"
11748
+ });
11749
+ throw err;
11750
+ }
11751
+ }
11752
+ // ─── Dispose ────────────────────────────────────────────────────────────
11753
+ async dispose() {
11754
+ this._isLoaded = false;
11755
+ logger34.info("ElevenLabs TTS disposed");
11756
+ }
11757
+ // ─── Private ────────────────────────────────────────────────────────────
11758
+ getHttpErrorMessage(status, body) {
11759
+ switch (status) {
11760
+ case 401:
11761
+ return "Unauthorized \u2014 check your API key";
11762
+ case 403:
11763
+ return "Forbidden \u2014 API key lacks required permissions";
11764
+ case 429:
11765
+ return "Rate limited \u2014 too many requests";
11766
+ case 400:
11767
+ return `Bad request \u2014 ${body}`;
11768
+ default:
11769
+ return body || `HTTP error ${status}`;
11770
+ }
11771
+ }
11772
+ };
11773
+
11774
+ // src/inference/PollyTTSBackend.ts
11775
+ var logger35 = createLogger("PollyTTS");
11776
+ var DEFAULT_VOICE = "Joanna";
11777
+ var DEFAULT_SAMPLE_RATE = 16e3;
11778
+ var PollyTTSBackend = class {
11779
+ constructor(config) {
11780
+ this._isLoaded = false;
11781
+ if (!config.synthesizeFn) {
11782
+ throw new Error("PollyTTS: synthesizeFn is required");
11783
+ }
11784
+ this.synthesizeFn = config.synthesizeFn;
11785
+ this.voice = config.voice ?? DEFAULT_VOICE;
11786
+ this._sampleRate = config.sampleRate ?? DEFAULT_SAMPLE_RATE;
11787
+ this.engine = config.engine ?? "neural";
11788
+ }
11789
+ get sampleRate() {
11790
+ return this._sampleRate;
11791
+ }
11792
+ get isLoaded() {
11793
+ return this._isLoaded;
11794
+ }
11795
+ // ─── Load ───────────────────────────────────────────────────────────────
11796
+ /**
11797
+ * No-op for cloud TTS (no model to load).
11798
+ * Marks backend as ready.
11799
+ */
11800
+ async load() {
11801
+ this._isLoaded = true;
11802
+ logger35.info("Polly TTS ready", { voice: this.voice, engine: this.engine, sampleRate: this._sampleRate });
11803
+ }
11804
+ // ─── Stream ─────────────────────────────────────────────────────────────
11805
+ /**
11806
+ * Synthesize audio via consumer's Polly function.
11807
+ *
11808
+ * Polly's SynthesizeSpeech is request/response (not streaming for PCM),
11809
+ * so this yields a single chunk per call. For long text, consider splitting
11810
+ * into sentences on the consumer side.
11811
+ */
11812
+ async *stream(text, options) {
11813
+ if (!this._isLoaded) {
11814
+ throw new Error("PollyTTS: not loaded. Call load() first.");
11815
+ }
11816
+ const trimmed = text.trim();
11817
+ if (trimmed.length === 0) {
11818
+ throw new Error("PollyTTS: text must not be empty");
11819
+ }
11820
+ if (options?.signal?.aborted) {
11821
+ return;
11822
+ }
11823
+ const voiceName = options?.voice ?? this.voice;
11824
+ const startTime = getClock().now();
11825
+ const telemetry = getTelemetry();
11826
+ const span = telemetry?.startSpan("PollyTTS.stream", {
11827
+ "tts.text_length": trimmed.length,
11828
+ "tts.voice": voiceName,
11829
+ "tts.engine": this.engine
11830
+ });
11831
+ try {
11832
+ const result = await this.synthesizeFn(trimmed, voiceName, this._sampleRate);
11833
+ if (options?.signal?.aborted) {
11834
+ span?.end();
11835
+ return;
11836
+ }
11837
+ const audio = pcm16ToFloat32(result.audio);
11838
+ const duration = audio.length / this._sampleRate;
11839
+ const latency = getClock().now() - startTime;
11840
+ logger35.debug("Synthesis complete", {
11841
+ voice: voiceName,
11842
+ duration: `${duration.toFixed(2)}s`,
11843
+ latencyMs: Math.round(latency),
11844
+ numSamples: audio.length
11845
+ });
11846
+ span?.setAttributes({ "tts.duration_s": duration, "tts.latency_ms": latency });
11847
+ span?.end();
11848
+ telemetry?.recordHistogram("omote.inference.latency", latency, {
11849
+ model: "polly-tts",
11850
+ backend: "cloud"
11851
+ });
11852
+ telemetry?.incrementCounter("omote.inference.total", 1, {
11853
+ model: "polly-tts",
11854
+ backend: "cloud",
11855
+ status: "success"
11856
+ });
11857
+ yield { audio, duration, text: trimmed };
11858
+ } catch (err) {
11859
+ if (err instanceof DOMException && err.name === "AbortError") {
11860
+ logger35.debug("Synthesis aborted");
11861
+ span?.end();
11862
+ return;
11863
+ }
11864
+ const errMsg = err instanceof Error ? err.message : String(err);
11865
+ logger35.error("Synthesis failed", { error: errMsg });
11866
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
11867
+ telemetry?.incrementCounter("omote.inference.total", 1, {
11868
+ model: "polly-tts",
11869
+ backend: "cloud",
11870
+ status: "error"
11871
+ });
11872
+ throw err;
11873
+ }
11874
+ }
11875
+ // ─── Dispose ────────────────────────────────────────────────────────────
11876
+ async dispose() {
11877
+ this._isLoaded = false;
11878
+ logger35.info("Polly TTS disposed");
11879
+ }
11880
+ };
11881
+
11423
11882
  // src/inference/ortConfig.ts
11424
11883
  var ortCdnBase = null;
11425
11884
  function configureOrtCdn(cdnPath) {
@@ -11433,7 +11892,7 @@ function getOrtCdnBase() {
11433
11892
  }
11434
11893
 
11435
11894
  // src/emotion/Emotion.ts
11436
- var logger34 = createLogger("EmotionController");
11895
+ var logger36 = createLogger("EmotionController");
11437
11896
  var EMOTION_NAMES = [
11438
11897
  "amazement",
11439
11898
  "anger",
@@ -11455,7 +11914,7 @@ function createEmotionVector(weights = {}) {
11455
11914
  if (idx >= 0) {
11456
11915
  vector[idx] = Math.max(0, Math.min(1, value));
11457
11916
  } else {
11458
- logger34.warn(`Invalid emotion name in createEmotionVector: "${name}"`);
11917
+ logger36.warn(`Invalid emotion name in createEmotionVector: "${name}"`);
11459
11918
  }
11460
11919
  }
11461
11920
  return vector;
@@ -11538,7 +11997,7 @@ var EmotionController = class {
11538
11997
  this.targetEmotion.set(newEmotion);
11539
11998
  this.currentEmotion.set(newEmotion);
11540
11999
  this.transitionProgress = 1;
11541
- logger34.debug("set", { weights });
12000
+ logger36.debug("set", { weights });
11542
12001
  }
11543
12002
  /**
11544
12003
  * Set emotion from preset immediately
@@ -11548,7 +12007,7 @@ var EmotionController = class {
11548
12007
  this.targetEmotion.set(newEmotion);
11549
12008
  this.currentEmotion.set(newEmotion);
11550
12009
  this.transitionProgress = 1;
11551
- logger34.debug("setPreset", { preset });
12010
+ logger36.debug("setPreset", { preset });
11552
12011
  }
11553
12012
  /**
11554
12013
  * Transition to new emotion over time
@@ -11560,9 +12019,9 @@ var EmotionController = class {
11560
12019
  this.currentEmotion.set(this.emotion);
11561
12020
  this.targetEmotion.set(createEmotionVector(weights));
11562
12021
  this.transitionDuration = durationMs;
11563
- this.transitionStartTime = performance.now();
12022
+ this.transitionStartTime = getClock().now();
11564
12023
  this.transitionProgress = 0;
11565
- logger34.debug("transitionTo", { weights, durationMs });
12024
+ logger36.debug("transitionTo", { weights, durationMs });
11566
12025
  }
11567
12026
  /**
11568
12027
  * Transition to preset over time
@@ -11571,7 +12030,7 @@ var EmotionController = class {
11571
12030
  this.currentEmotion.set(this.emotion);
11572
12031
  this.targetEmotion.set(getEmotionPreset(preset));
11573
12032
  this.transitionDuration = durationMs;
11574
- this.transitionStartTime = performance.now();
12033
+ this.transitionStartTime = getClock().now();
11575
12034
  this.transitionProgress = 0;
11576
12035
  }
11577
12036
  /**
@@ -11579,7 +12038,7 @@ var EmotionController = class {
11579
12038
  */
11580
12039
  update() {
11581
12040
  if (this.transitionProgress >= 1) return;
11582
- const elapsed = performance.now() - this.transitionStartTime;
12041
+ const elapsed = getClock().now() - this.transitionStartTime;
11583
12042
  this.transitionProgress = Math.min(1, elapsed / this.transitionDuration);
11584
12043
  }
11585
12044
  /**
@@ -11595,7 +12054,7 @@ var EmotionController = class {
11595
12054
  this.currentEmotion.fill(0);
11596
12055
  this.targetEmotion.fill(0);
11597
12056
  this.transitionProgress = 1;
11598
- logger34.debug("reset");
12057
+ logger36.debug("reset");
11599
12058
  }
11600
12059
  };
11601
12060
 
@@ -11676,7 +12135,7 @@ var DEFAULT_ANIMATION_CONFIG = {
11676
12135
  };
11677
12136
 
11678
12137
  // src/animation/AnimationGraph.ts
11679
- var logger35 = createLogger("AnimationGraph");
12138
+ var logger37 = createLogger("AnimationGraph");
11680
12139
  var AnimationGraph = class extends EventEmitter {
11681
12140
  constructor(config = {}) {
11682
12141
  super();
@@ -11709,7 +12168,7 @@ var AnimationGraph = class extends EventEmitter {
11709
12168
  this.stateEnterTime = Date.now();
11710
12169
  this.lastUpdateTime = Date.now();
11711
12170
  this.cachedOutput = this.computeOutput();
11712
- logger35.info("constructor", {
12171
+ logger37.info("constructor", {
11713
12172
  initialState: this.config.initialState,
11714
12173
  stateCount: this.config.states.length,
11715
12174
  transitionCount: this.config.transitions.length
@@ -11780,7 +12239,7 @@ var AnimationGraph = class extends EventEmitter {
11780
12239
  setState(stateName, blendDuration = 300) {
11781
12240
  const targetState = this.config.states.find((s) => s.name === stateName);
11782
12241
  if (!targetState) {
11783
- logger35.warn(`State '${stateName}' not found`);
12242
+ logger37.warn(`State '${stateName}' not found`);
11784
12243
  return;
11785
12244
  }
11786
12245
  if (targetState.name === this.currentState.name && !this.isTransitioning) {
@@ -11858,7 +12317,7 @@ var AnimationGraph = class extends EventEmitter {
11858
12317
  (s) => s.name === transition.to
11859
12318
  );
11860
12319
  if (!targetState) {
11861
- logger35.warn(`Target state '${transition.to}' not found`);
12320
+ logger37.warn(`Target state '${transition.to}' not found`);
11862
12321
  return;
11863
12322
  }
11864
12323
  const fromState = this.currentState.name;
@@ -11872,7 +12331,7 @@ var AnimationGraph = class extends EventEmitter {
11872
12331
  if (!this.currentState.emotionBlendEnabled) {
11873
12332
  this.targetEmotionWeight = 0;
11874
12333
  }
11875
- logger35.debug("state transition", {
12334
+ logger37.debug("state transition", {
11876
12335
  from: fromState,
11877
12336
  to: targetState.name,
11878
12337
  trigger: event,
@@ -11909,7 +12368,7 @@ var AnimationGraph = class extends EventEmitter {
11909
12368
  if (this.currentState.timeout <= 0) return;
11910
12369
  const elapsed = now - this.stateEnterTime;
11911
12370
  if (elapsed >= this.currentState.timeout) {
11912
- logger35.debug("timeout transition", {
12371
+ logger37.debug("timeout transition", {
11913
12372
  state: this.currentState.name,
11914
12373
  elapsed,
11915
12374
  timeout: this.currentState.timeout
@@ -12023,7 +12482,7 @@ var AnimationGraph = class extends EventEmitter {
12023
12482
 
12024
12483
  // src/animation/ProceduralLifeLayer.ts
12025
12484
  var import_simplex_noise = require("simplex-noise");
12026
- var logger36 = createLogger("ProceduralLifeLayer");
12485
+ var logger38 = createLogger("ProceduralLifeLayer");
12027
12486
  var simplex2d = (0, import_simplex_noise.createNoise2D)();
12028
12487
  var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
12029
12488
  for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
@@ -12129,7 +12588,7 @@ var ProceduralLifeLayer = class {
12129
12588
  }
12130
12589
  this.blinkInterval = this.nextBlinkInterval();
12131
12590
  this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
12132
- logger36.debug("constructor", {
12591
+ logger38.debug("constructor", {
12133
12592
  blinkIntervalRange: this.blinkIntervalRange,
12134
12593
  useLogNormalBlinks: this.useLogNormalBlinks,
12135
12594
  gazeBreakIntervalRange: this.gazeBreakIntervalRange,
@@ -12233,7 +12692,7 @@ var ProceduralLifeLayer = class {
12233
12692
  * Reset all internal state to initial values.
12234
12693
  */
12235
12694
  reset() {
12236
- logger36.debug("reset");
12695
+ logger38.debug("reset");
12237
12696
  this.blinkTimer = 0;
12238
12697
  this.blinkInterval = this.nextBlinkInterval();
12239
12698
  this.blinkPhase = PHASE_OPEN;
@@ -12285,7 +12744,7 @@ var ProceduralLifeLayer = class {
12285
12744
  this.blinkTimer = 0;
12286
12745
  this.blinkInterval = this.nextBlinkInterval();
12287
12746
  this.asymmetryRight = 0.95 + Math.random() * 0.08;
12288
- logger36.trace("blink", { nextInterval: this.blinkInterval });
12747
+ logger38.trace("blink", { nextInterval: this.blinkInterval });
12289
12748
  }
12290
12749
  if (this.blinkPhase > PHASE_OPEN) {
12291
12750
  this.blinkProgress += delta;
@@ -12366,7 +12825,7 @@ var ProceduralLifeLayer = class {
12366
12825
  this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
12367
12826
  this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
12368
12827
  this.gazeBreakInterval = randomRange(...params.interval);
12369
- logger36.trace("gaze break", {
12828
+ logger38.trace("gaze break", {
12370
12829
  targetX: this.gazeBreakTargetX.toFixed(3),
12371
12830
  targetY: this.gazeBreakTargetY.toFixed(3),
12372
12831
  nextInterval: this.gazeBreakInterval.toFixed(2),
@@ -12609,7 +13068,7 @@ var ALL_AUS = [...new Set(
12609
13068
  )];
12610
13069
 
12611
13070
  // src/face/EmotionResolver.ts
12612
- var logger37 = createLogger("EmotionResolver");
13071
+ var logger39 = createLogger("EmotionResolver");
12613
13072
  var BS_INDEX = /* @__PURE__ */ new Map();
12614
13073
  for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
12615
13074
  BS_INDEX.set(LAM_BLENDSHAPES[i], i);
@@ -12636,7 +13095,7 @@ var EmotionResolver = class {
12636
13095
  if (!emotionWeight || emotionWeight < 0.01) continue;
12637
13096
  const auActivations = EMOTION_TO_AU[emotionName];
12638
13097
  if (!auActivations) {
12639
- logger37.warn(`Unknown emotion name with no AU mapping: "${emotionName}"`);
13098
+ logger39.warn(`Unknown emotion name with no AU mapping: "${emotionName}"`);
12640
13099
  continue;
12641
13100
  }
12642
13101
  for (const activation of auActivations) {
@@ -12661,7 +13120,7 @@ var EmotionResolver = class {
12661
13120
  };
12662
13121
 
12663
13122
  // src/face/FaceCompositor.ts
12664
- var logger38 = createLogger("FaceCompositor");
13123
+ var logger40 = createLogger("FaceCompositor");
12665
13124
  function smoothstep(t) {
12666
13125
  return t * t * (3 - 2 * t);
12667
13126
  }
@@ -12692,7 +13151,7 @@ var FaceCompositor = class {
12692
13151
  if (config?.profile) {
12693
13152
  this.applyProfileArrays(config.profile);
12694
13153
  }
12695
- logger38.debug("constructor", {
13154
+ logger40.debug("constructor", {
12696
13155
  emotionSmoothing: this.emotionSmoothing,
12697
13156
  hasProfile: !!config?.profile,
12698
13157
  hasLifeLayer: !!config?.lifeLayer
@@ -12708,7 +13167,7 @@ var FaceCompositor = class {
12708
13167
  * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
12709
13168
  */
12710
13169
  compose(base, input, target) {
12711
- const composeStart = performance.now();
13170
+ const composeStart = getClock().now();
12712
13171
  const out = target ?? this.outputBuffer;
12713
13172
  out.set(base);
12714
13173
  const emotion = input.emotion ?? this.stickyEmotion;
@@ -12755,7 +13214,7 @@ var FaceCompositor = class {
12755
13214
  }
12756
13215
  getTelemetry()?.recordHistogram(
12757
13216
  MetricNames.COMPOSITOR_COMPOSE_LATENCY,
12758
- (performance.now() - composeStart) * 1e3
13217
+ (getClock().now() - composeStart) * 1e3
12759
13218
  // µs
12760
13219
  );
12761
13220
  return { blendshapes: out, headDelta: lifeResult.headDelta };
@@ -12765,7 +13224,7 @@ var FaceCompositor = class {
12765
13224
  */
12766
13225
  setEmotion(weights) {
12767
13226
  this.stickyEmotion = weights;
12768
- logger38.debug("setEmotion", { weights });
13227
+ logger40.debug("setEmotion", { weights });
12769
13228
  }
12770
13229
  /**
12771
13230
  * Update character profile at runtime.
@@ -12774,7 +13233,7 @@ var FaceCompositor = class {
12774
13233
  this.multiplier.fill(1);
12775
13234
  this.offset.fill(0);
12776
13235
  this.applyProfileArrays(profile);
12777
- logger38.debug("setProfile", {
13236
+ logger40.debug("setProfile", {
12778
13237
  multiplierKeys: profile.multiplier ? Object.keys(profile.multiplier).length : 0,
12779
13238
  offsetKeys: profile.offset ? Object.keys(profile.offset).length : 0
12780
13239
  });
@@ -12788,7 +13247,7 @@ var FaceCompositor = class {
12788
13247
  this.lifeBuffer.fill(0);
12789
13248
  this.stickyEmotion = void 0;
12790
13249
  this.lifeLayer.reset();
12791
- logger38.debug("reset");
13250
+ logger40.debug("reset");
12792
13251
  }
12793
13252
  /** Expand partial profile maps into dense Float32Arrays */
12794
13253
  applyProfileArrays(profile) {
@@ -12873,7 +13332,7 @@ function parseEmotionTags(text) {
12873
13332
  }
12874
13333
 
12875
13334
  // src/character/CharacterController.ts
12876
- var logger39 = createLogger("CharacterController");
13335
+ var logger41 = createLogger("CharacterController");
12877
13336
  var FRAME_BUDGET_US = 33e3;
12878
13337
  var EMOTION_MAP = {
12879
13338
  // Synced with EmotionPresets (packages/core/src/emotion/Emotion.ts)
@@ -12943,7 +13402,7 @@ var CharacterController = class {
12943
13402
  this.gazeYawInfluence = config?.gaze?.yawInfluence ?? 0.4;
12944
13403
  this.gazePitchInfluence = config?.gaze?.pitchInfluence ?? 0.3;
12945
13404
  this.gazeSmoothing = config?.gaze?.smoothing ?? 5;
12946
- logger39.debug("constructor", {
13405
+ logger41.debug("constructor", {
12947
13406
  gazeEnabled: this.gazeEnabled,
12948
13407
  gazeYawInfluence: this.gazeYawInfluence,
12949
13408
  gazePitchInfluence: this.gazePitchInfluence,
@@ -12958,7 +13417,7 @@ var CharacterController = class {
12958
13417
  * into a single output frame.
12959
13418
  */
12960
13419
  update(input) {
12961
- const frameStart = performance.now();
13420
+ const frameStart = getClock().now();
12962
13421
  const base = input.baseBlendshapes ?? this.zeroBase;
12963
13422
  const eyeTargets = this.computeEyeTargets(
12964
13423
  input.cameraWorldPos,
@@ -12985,7 +13444,7 @@ var CharacterController = class {
12985
13444
  lifeHeadDelta,
12986
13445
  input.avatarRotationY ?? 0
12987
13446
  );
12988
- const frameUs = (performance.now() - frameStart) * 1e3;
13447
+ const frameUs = (getClock().now() - frameStart) * 1e3;
12989
13448
  this.frameTimes[this.frameTimeIdx] = frameUs;
12990
13449
  this.frameTimeIdx = (this.frameTimeIdx + 1) % this.frameTimes.length;
12991
13450
  if (this.frameTimeFill < this.frameTimes.length) this.frameTimeFill++;
@@ -13007,13 +13466,13 @@ var CharacterController = class {
13007
13466
  const resolved = resolveEmotion(emotion);
13008
13467
  if (resolved) {
13009
13468
  this._compositor.setEmotion(resolved);
13010
- logger39.debug("setEmotion", { emotion, resolved });
13469
+ logger41.debug("setEmotion", { emotion, resolved });
13011
13470
  }
13012
13471
  }
13013
13472
  /** Update character profile at runtime. */
13014
13473
  setProfile(profile) {
13015
13474
  this._compositor.setProfile(profile);
13016
- logger39.debug("setProfile", {
13475
+ logger41.debug("setProfile", {
13017
13476
  multiplierKeys: profile.multiplier ? Object.keys(profile.multiplier).length : 0,
13018
13477
  offsetKeys: profile.offset ? Object.keys(profile.offset).length : 0
13019
13478
  });
@@ -13048,11 +13507,11 @@ var CharacterController = class {
13048
13507
  this._compositor.reset();
13049
13508
  this.gazeHeadYaw = 0;
13050
13509
  this.gazeHeadPitch = -0.1;
13051
- logger39.debug("reset");
13510
+ logger41.debug("reset");
13052
13511
  }
13053
13512
  dispose() {
13054
13513
  this.reset();
13055
- logger39.debug("dispose");
13514
+ logger41.debug("dispose");
13056
13515
  }
13057
13516
  // ---------------------------------------------------------------------------
13058
13517
  // Eye angle math (extracted from r3f useGazeTracking.computeEyeTargets)
@@ -13134,7 +13593,7 @@ var CharacterController = class {
13134
13593
  };
13135
13594
 
13136
13595
  // src/orchestration/MicLipSync.ts
13137
- var logger40 = createLogger("MicLipSync");
13596
+ var logger42 = createLogger("MicLipSync");
13138
13597
  var MicLipSync = class extends EventEmitter {
13139
13598
  constructor(config) {
13140
13599
  super();
@@ -13153,7 +13612,7 @@ var MicLipSync = class extends EventEmitter {
13153
13612
  this.vadChunkSize = 0;
13154
13613
  this.vadBuffer = null;
13155
13614
  this.vadBufferOffset = 0;
13156
- logger40.info("MicLipSync created", {
13615
+ logger42.info("MicLipSync created", {
13157
13616
  sampleRate: config.sampleRate ?? 16e3,
13158
13617
  micChunkSize: config.micChunkSize ?? 512,
13159
13618
  hasVAD: !!config.vad,
@@ -13175,12 +13634,12 @@ var MicLipSync = class extends EventEmitter {
13175
13634
  this._currentFrame = scaled;
13176
13635
  if (!this._firstFrameEmitted) {
13177
13636
  this._firstFrameEmitted = true;
13178
- logger40.trace("First blendshape frame emitted");
13637
+ logger42.trace("First blendshape frame emitted");
13179
13638
  }
13180
13639
  this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
13181
13640
  },
13182
13641
  onError: (error) => {
13183
- logger40.error("A2E inference error", { message: error.message });
13642
+ logger42.error("A2E inference error", { message: error.message });
13184
13643
  this.emit("error", error);
13185
13644
  }
13186
13645
  });
@@ -13188,7 +13647,9 @@ var MicLipSync = class extends EventEmitter {
13188
13647
  const float32 = int16ToFloat32(pcm);
13189
13648
  this.processor.pushAudio(float32);
13190
13649
  if (this.vad) {
13191
- this.vadQueue = this.vadQueue.then(() => this.processVAD(float32)).catch(() => {
13650
+ this.vadQueue = this.vadQueue.then(() => this.processVAD(float32)).catch((err) => {
13651
+ logger42.warn("VAD processing error", { error: String(err), code: ErrorCodes.SPH_VAD_ERROR });
13652
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
13192
13653
  });
13193
13654
  }
13194
13655
  });
@@ -13223,7 +13684,7 @@ var MicLipSync = class extends EventEmitter {
13223
13684
  /** Start microphone capture and inference loop */
13224
13685
  async start() {
13225
13686
  if (this._state === "active") return;
13226
- logger40.info("Starting MicLipSync");
13687
+ logger42.info("Starting MicLipSync");
13227
13688
  getTelemetry()?.incrementCounter(MetricNames.MIC_SESSIONS);
13228
13689
  await this.mic.start();
13229
13690
  this.processor.startDrip();
@@ -13233,7 +13694,7 @@ var MicLipSync = class extends EventEmitter {
13233
13694
  /** Stop microphone and inference */
13234
13695
  stop() {
13235
13696
  if (this._state === "idle") return;
13236
- logger40.info("Stopping MicLipSync");
13697
+ logger42.info("Stopping MicLipSync");
13237
13698
  this.processor.stopDrip();
13238
13699
  this.mic.stop();
13239
13700
  this._isSpeaking = false;
@@ -13275,14 +13736,15 @@ var MicLipSync = class extends EventEmitter {
13275
13736
  const wasSpeaking = this._isSpeaking;
13276
13737
  this._isSpeaking = result.isSpeech;
13277
13738
  if (!wasSpeaking && result.isSpeech) {
13278
- this.speechStartTime = performance.now();
13739
+ this.speechStartTime = getClock().now();
13279
13740
  this.emit("speech:start");
13280
13741
  } else if (wasSpeaking && !result.isSpeech) {
13281
- const durationMs = performance.now() - this.speechStartTime;
13742
+ const durationMs = getClock().now() - this.speechStartTime;
13282
13743
  this.emit("speech:end", { durationMs });
13283
13744
  }
13284
13745
  } catch (err) {
13285
- logger40.warn("VAD process error", { error: String(err) });
13746
+ logger42.warn("VAD process error", { error: String(err), code: ErrorCodes.SPH_VAD_ERROR });
13747
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
13286
13748
  }
13287
13749
  this.vadBufferOffset = 0;
13288
13750
  }
@@ -13299,7 +13761,7 @@ var MicLipSync = class extends EventEmitter {
13299
13761
  };
13300
13762
 
13301
13763
  // src/orchestration/VoicePipeline.ts
13302
- var logger41 = createLogger("VoicePipeline");
13764
+ var logger43 = createLogger("VoicePipeline");
13303
13765
  var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13304
13766
  constructor(config) {
13305
13767
  super();
@@ -13331,6 +13793,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13331
13793
  this.lastProgressiveSamples = 0;
13332
13794
  // ASR error recovery
13333
13795
  this.asrErrorCount = 0;
13796
+ this.progressiveErrorCount = 0;
13334
13797
  // Response abort
13335
13798
  this.responseAbortController = null;
13336
13799
  // Listener cleanup
@@ -13374,7 +13837,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13374
13837
  if (typeof requestAnimationFrame !== "undefined") {
13375
13838
  await new Promise((r) => requestAnimationFrame(() => r()));
13376
13839
  }
13377
- logger41.debug("Creating PlaybackPipeline", {
13840
+ logger43.debug("Creating PlaybackPipeline", {
13378
13841
  neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
13379
13842
  audioDelayMs: this.config.audioDelayMs,
13380
13843
  chunkTargetMs: this.config.chunkTargetMs
@@ -13414,8 +13877,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13414
13877
  this.setState("ready");
13415
13878
  } catch (error) {
13416
13879
  const err = error instanceof Error ? error : new Error(String(error));
13880
+ span?.setAttributes({ "error.type": ErrorTypes.MODEL });
13417
13881
  span?.endWithError(err);
13418
- logger41.error("Model loading failed", { message: err.message });
13882
+ logger43.error("Model loading failed", { message: err.message });
13419
13883
  this.emit("error", err);
13420
13884
  this.setState("error");
13421
13885
  throw err;
@@ -13429,7 +13893,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13429
13893
  const { backends } = this.config;
13430
13894
  if (!backends) throw new Error("No backends config");
13431
13895
  this.emitProgress("Loading models", 0, 1, 0);
13432
- logger41.info("Loading from pre-built backends");
13896
+ logger43.info("Loading from pre-built backends");
13433
13897
  const toLoad = [];
13434
13898
  if (!backends.asr.isLoaded) toLoad.push(backends.asr.load());
13435
13899
  if (!backends.lam.isLoaded) toLoad.push(backends.lam.load());
@@ -13463,7 +13927,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13463
13927
  } else if (UnifiedInferenceWorker.isSupported()) {
13464
13928
  this.unifiedWorker = new UnifiedInferenceWorker();
13465
13929
  await this.unifiedWorker.init();
13466
- logger41.info("Created internal unified worker", { backend: this.unifiedWorker.backend });
13930
+ logger43.info("Created internal unified worker", { backend: this.unifiedWorker.backend });
13467
13931
  }
13468
13932
  }
13469
13933
  this.emitProgress("Loading models", 0, 3, 0);
@@ -13500,17 +13964,17 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13500
13964
  throw asrResult.reason;
13501
13965
  }
13502
13966
  this.asr = asr;
13503
- logger41.info("SenseVoice loaded");
13967
+ logger43.info("SenseVoice loaded");
13504
13968
  if (vadResult.status === "rejected") {
13505
13969
  throw vadResult.reason;
13506
13970
  }
13507
13971
  this.vad = vad;
13508
- logger41.info("Silero VAD loaded");
13972
+ logger43.info("Silero VAD loaded");
13509
13973
  if (lamResult.status === "rejected") {
13510
13974
  throw lamResult.reason;
13511
13975
  }
13512
13976
  this.lam = lam;
13513
- logger41.info("LAM loaded");
13977
+ logger43.info("LAM loaded");
13514
13978
  } finally {
13515
13979
  clearInterval(progressInterval);
13516
13980
  }
@@ -13518,7 +13982,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13518
13982
  if (this.isLocalMode) {
13519
13983
  const localConfig = this.config;
13520
13984
  if (localConfig.ttsConfig && !localConfig.tts) {
13521
- logger41.info("Creating Kokoro TTS from config", {
13985
+ logger43.info("Creating Kokoro TTS from config", {
13522
13986
  hasUnifiedWorker: !!this.unifiedWorker,
13523
13987
  voice: localConfig.ttsConfig.defaultVoice
13524
13988
  });
@@ -13528,7 +13992,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13528
13992
  });
13529
13993
  }
13530
13994
  if (localConfig.tts && !localConfig.ttsConfig && isIOS()) {
13531
- logger41.warn(
13995
+ logger43.warn(
13532
13996
  "External TTS on iOS creates a separate ORT WASM instance, risking OOM. Prefer ttsConfig for automatic unified worker integration."
13533
13997
  );
13534
13998
  }
@@ -13536,9 +14000,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13536
14000
  throw new Error("VoicePipeline local mode requires either tts or ttsConfig");
13537
14001
  }
13538
14002
  if (!localConfig.tts.isLoaded) {
13539
- logger41.info("Loading local TTS model...");
14003
+ logger43.info("Loading local TTS model...");
13540
14004
  await localConfig.tts.load();
13541
- logger41.info("Local TTS model loaded");
14005
+ logger43.info("Local TTS model loaded");
13542
14006
  }
13543
14007
  }
13544
14008
  this.emitProgress("Loading models", 100, 3, 3);
@@ -13554,8 +14018,8 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13554
14018
  this.epoch++;
13555
14019
  this._sessionId = crypto.randomUUID();
13556
14020
  this.asrErrorCount = 0;
13557
- logger41.info("Starting voice pipeline", { sessionId: this._sessionId });
13558
- logger41.debug("Pipeline mode", { mode: this.isLocalMode ? "local" : "cloud" });
14021
+ logger43.info("Starting voice pipeline", { sessionId: this._sessionId });
14022
+ logger43.debug("Pipeline mode", { mode: this.isLocalMode ? "local" : "cloud" });
13559
14023
  this.mic = new MicrophoneCapture(this.omoteEvents, {
13560
14024
  sampleRate: 16e3,
13561
14025
  chunkSize: 512
@@ -13568,11 +14032,11 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13568
14032
  this.emit("audio:level", level);
13569
14033
  });
13570
14034
  await this.mic.start();
13571
- logger41.debug("Microphone started");
14035
+ logger43.debug("Microphone started");
13572
14036
  this.setState("listening");
13573
14037
  }
13574
14038
  stop() {
13575
- logger41.info("Stopping voice pipeline", { sessionId: this._sessionId });
14039
+ logger43.info("Stopping voice pipeline", { sessionId: this._sessionId });
13576
14040
  this.stopped = true;
13577
14041
  this.epoch++;
13578
14042
  this.clearSilenceTimer();
@@ -13601,7 +14065,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13601
14065
  this.playback?.setProfile(profile);
13602
14066
  }
13603
14067
  async dispose() {
13604
- logger41.debug("Disposing VoicePipeline");
14068
+ logger43.debug("Disposing VoicePipeline");
13605
14069
  this.stop();
13606
14070
  this.epoch++;
13607
14071
  await Promise.allSettled([
@@ -13635,19 +14099,20 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13635
14099
  if (result.isSpeech) {
13636
14100
  if (!wasSpeaking) {
13637
14101
  this.isSpeaking = true;
13638
- this.speechStartTime = performance.now();
14102
+ this.speechStartTime = getClock().now();
13639
14103
  this.audioBuffer = [];
13640
14104
  this.audioBufferSamples = 0;
13641
14105
  this.lastProgressiveResult = null;
13642
14106
  this.lastProgressiveSamples = 0;
13643
- logger41.debug("VAD speech start");
14107
+ this.progressiveErrorCount = 0;
14108
+ logger43.debug("VAD speech start");
13644
14109
  this.emit("speech:start");
13645
14110
  this.startProgressiveTranscription();
13646
14111
  }
13647
14112
  this.audioBuffer.push(new Float32Array(samples));
13648
14113
  this.audioBufferSamples += samples.length;
13649
14114
  if (this.audioBufferSamples >= _VoicePipeline.MAX_AUDIO_BUFFER_SAMPLES) {
13650
- logger41.warn("Audio buffer exceeded max, forcing transcription flush");
14115
+ logger43.warn("Audio buffer exceeded max, forcing transcription flush");
13651
14116
  this.onSilenceDetected();
13652
14117
  return;
13653
14118
  }
@@ -13663,7 +14128,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13663
14128
  }
13664
14129
  }
13665
14130
  } catch (err) {
13666
- logger41.warn("VAD error", { error: String(err) });
14131
+ logger43.warn("VAD error", { error: String(err) });
13667
14132
  }
13668
14133
  }
13669
14134
  // ---------------------------------------------------------------------------
@@ -13674,18 +14139,18 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13674
14139
  const extended = this.config.silenceTimeoutExtendedMs ?? 700;
13675
14140
  const adaptive = this.config.adaptiveTimeout ?? true;
13676
14141
  if (!adaptive) return base;
13677
- const speechDurationMs = performance.now() - this.speechStartTime;
14142
+ const speechDurationMs = getClock().now() - this.speechStartTime;
13678
14143
  return speechDurationMs > 3e3 ? extended : base;
13679
14144
  }
13680
14145
  onSilenceDetected() {
13681
14146
  const capturedEpoch = this.epoch;
13682
14147
  this.isSpeaking = false;
13683
- const durationMs = performance.now() - this.speechStartTime;
13684
- logger41.debug("VAD speech end", { durationMs: Math.round(durationMs) });
14148
+ const durationMs = getClock().now() - this.speechStartTime;
14149
+ logger43.debug("VAD speech end", { durationMs: Math.round(durationMs) });
13685
14150
  this.emit("speech:end", { durationMs });
13686
14151
  this.clearSilenceTimer();
13687
14152
  this.processEndOfSpeech(capturedEpoch).catch((err) => {
13688
- logger41.error("End of speech processing failed", { error: String(err) });
14153
+ logger43.error("End of speech processing failed", { error: String(err) });
13689
14154
  if (this.epoch === capturedEpoch && !this.stopped) {
13690
14155
  this.emit("error", err instanceof Error ? err : new Error(String(err)));
13691
14156
  this.setState("listening");
@@ -13699,7 +14164,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13699
14164
  const turnSpan = getTelemetry()?.startSpan("VoicePipeline.turn", {
13700
14165
  "session.id": this._sessionId ?? ""
13701
14166
  });
13702
- const turnStart = performance.now();
14167
+ const turnStart = getClock().now();
13703
14168
  if (this.progressivePromise) {
13704
14169
  try {
13705
14170
  await this.progressivePromise;
@@ -13724,7 +14189,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13724
14189
  const minEnergy = this.config.minAudioEnergy ?? 0.02;
13725
14190
  const durationSec = totalSamples / 16e3;
13726
14191
  if (durationSec < minDuration) {
13727
- logger41.info("Audio too short, discarding", { durationSec });
14192
+ logger43.info("Audio too short, discarding", { durationSec });
13728
14193
  turnSpan?.end();
13729
14194
  this.setState("listening");
13730
14195
  return;
@@ -13735,7 +14200,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13735
14200
  }
13736
14201
  rms = Math.sqrt(rms / fullAudio.length);
13737
14202
  if (rms < minEnergy) {
13738
- logger41.info("Audio too quiet, discarding", { rms });
14203
+ logger43.info("Audio too quiet, discarding", { rms });
13739
14204
  turnSpan?.end();
13740
14205
  this.setState("listening");
13741
14206
  return;
@@ -13746,12 +14211,12 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13746
14211
  const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
13747
14212
  if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
13748
14213
  transcript = { ...this.lastProgressiveResult, isFinal: true };
13749
- logger41.info("Using progressive result", {
14214
+ logger43.info("Using progressive result", {
13750
14215
  coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
13751
14216
  text: transcript.text
13752
14217
  });
13753
14218
  } else {
13754
- logger41.debug("Progressive result insufficient, running final transcription", {
14219
+ logger43.debug("Progressive result insufficient, running final transcription", {
13755
14220
  samples: totalSamples,
13756
14221
  hadProgressive: !!this.lastProgressiveResult
13757
14222
  });
@@ -13766,7 +14231,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13766
14231
  return;
13767
14232
  }
13768
14233
  if (!transcript || !transcript.text.trim()) {
13769
- logger41.info("No transcript, resuming listening");
14234
+ logger43.info("No transcript, resuming listening");
13770
14235
  turnSpan?.end();
13771
14236
  this.setState("listening");
13772
14237
  return;
@@ -13774,7 +14239,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13774
14239
  this.emit("transcript", transcript);
13775
14240
  getTelemetry()?.recordHistogram(
13776
14241
  MetricNames.VOICE_TURN_LATENCY,
13777
- performance.now() - turnStart,
14242
+ getClock().now() - turnStart,
13778
14243
  { mode: this.isLocalMode ? "local" : "cloud" }
13779
14244
  );
13780
14245
  await this.callResponseHandler(transcript, capturedEpoch, turnSpan?.getContext());
@@ -13788,7 +14253,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13788
14253
  const span = getTelemetry()?.startSpan("VoicePipeline.response", {
13789
14254
  "text.length": transcript.text.length
13790
14255
  }, parentContext);
13791
- const responseStart = performance.now();
14256
+ const responseStart = getClock().now();
13792
14257
  this.setState("speaking");
13793
14258
  this.interruption?.setAISpeaking(true);
13794
14259
  if (transcript.emotion) {
@@ -13805,7 +14270,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13805
14270
  }
13806
14271
  getTelemetry()?.recordHistogram(
13807
14272
  MetricNames.VOICE_RESPONSE_LATENCY,
13808
- performance.now() - responseStart,
14273
+ getClock().now() - responseStart,
13809
14274
  { mode: this.isLocalMode ? "local" : "cloud" }
13810
14275
  );
13811
14276
  span?.end();
@@ -13815,8 +14280,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13815
14280
  return;
13816
14281
  }
13817
14282
  const err = error instanceof Error ? error : new Error(String(error));
14283
+ span?.setAttributes({ "error.type": ErrorTypes.RUNTIME });
13818
14284
  span?.endWithError(err);
13819
- logger41.error("Response handler error", { message: err.message });
14285
+ logger43.error("Response handler error", { message: err.message });
13820
14286
  this.emit("error", err);
13821
14287
  if (this.epoch === capturedEpoch && !this.stopped) {
13822
14288
  this.interruption?.setAISpeaking(false);
@@ -13887,11 +14353,11 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13887
14353
  // ---------------------------------------------------------------------------
13888
14354
  handleInterruption() {
13889
14355
  if (this._state !== "speaking") return;
13890
- logger41.info("Interruption triggered");
14356
+ logger43.info("Interruption triggered");
13891
14357
  getTelemetry()?.incrementCounter(MetricNames.VOICE_INTERRUPTIONS);
13892
14358
  this.epoch++;
13893
14359
  if (this.responseAbortController) {
13894
- logger41.debug("Aborting in-flight response");
14360
+ logger43.debug("Aborting in-flight response");
13895
14361
  }
13896
14362
  this.responseAbortController?.abort();
13897
14363
  this.playback?.stop();
@@ -13929,7 +14395,15 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13929
14395
  this.lastProgressiveSamples = snapshotSamples;
13930
14396
  this.emit("transcript", { ...result, isFinal: false });
13931
14397
  }
13932
- } catch {
14398
+ } catch (err) {
14399
+ this.progressiveErrorCount++;
14400
+ if (this.progressiveErrorCount % 10 === 1) {
14401
+ logger43.warn("Progressive transcription error", {
14402
+ code: ErrorCodes.SPH_ASR_ERROR,
14403
+ count: this.progressiveErrorCount,
14404
+ error: String(err)
14405
+ });
14406
+ }
13933
14407
  }
13934
14408
  })();
13935
14409
  }, intervalMs);
@@ -13945,8 +14419,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13945
14419
  // ---------------------------------------------------------------------------
13946
14420
  async transcribeWithTimeout(audio) {
13947
14421
  if (!this.asr) return null;
14422
+ const currentEpoch = this.epoch;
13948
14423
  const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
13949
- const startTime = performance.now();
14424
+ const startTime = getClock().now();
13950
14425
  const span = getTelemetry()?.startSpan("VoicePipeline.transcribe", {
13951
14426
  "inference.input_samples": audio.length,
13952
14427
  "inference.input_duration_ms": audio.length / 16e3 * 1e3
@@ -13960,7 +14435,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13960
14435
  })
13961
14436
  ]);
13962
14437
  clearTimeout(timeoutId);
13963
- const latency = performance.now() - startTime;
14438
+ const latency = getClock().now() - startTime;
13964
14439
  this.asrErrorCount = 0;
13965
14440
  getTelemetry()?.recordHistogram(MetricNames.VOICE_TRANSCRIPTION_LATENCY, latency);
13966
14441
  getTelemetry()?.incrementCounter(MetricNames.VOICE_TRANSCRIPTIONS);
@@ -13974,14 +14449,18 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13974
14449
  inferenceTimeMs: latency
13975
14450
  };
13976
14451
  } catch (error) {
14452
+ span?.setAttributes({ "error.type": ErrorTypes.INFERENCE });
13977
14453
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
13978
14454
  this.asrErrorCount++;
13979
- logger41.warn("Transcription failed", {
14455
+ logger43.warn("Transcription failed", {
13980
14456
  attempt: this.asrErrorCount,
13981
14457
  error: String(error)
13982
14458
  });
13983
14459
  if (this.asrErrorCount >= 3 && this.config.models) {
13984
- logger41.warn("3 consecutive ASR errors, recreating session");
14460
+ if (this.epoch !== currentEpoch) return null;
14461
+ logger43.warn("3 consecutive ASR errors, recreating session", {
14462
+ code: ErrorCodes.SPH_ASR_ERROR
14463
+ });
13985
14464
  try {
13986
14465
  await this.asr.dispose();
13987
14466
  this.asr = createSenseVoice({
@@ -13991,9 +14470,10 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13991
14470
  unifiedWorker: this.unifiedWorker ?? void 0
13992
14471
  });
13993
14472
  await this.asr.load();
14473
+ if (this.epoch !== currentEpoch) return null;
13994
14474
  this.asrErrorCount = 0;
13995
14475
  } catch (recreateErr) {
13996
- logger41.error("ASR session recreation failed", { error: String(recreateErr) });
14476
+ logger43.error("ASR session recreation failed", { error: String(recreateErr) });
13997
14477
  }
13998
14478
  }
13999
14479
  return null;
@@ -14022,7 +14502,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
14022
14502
  // ---------------------------------------------------------------------------
14023
14503
  setState(state) {
14024
14504
  if (this._state === state) return;
14025
- logger41.info("State transition", { from: this._state, to: state });
14505
+ logger43.info("State transition", { from: this._state, to: state });
14026
14506
  this._state = state;
14027
14507
  this.emit("state", state);
14028
14508
  }
@@ -14041,7 +14521,7 @@ _VoicePipeline.MAX_AUDIO_BUFFER_SAMPLES = 16e3 * 30;
14041
14521
  var VoicePipeline = _VoicePipeline;
14042
14522
 
14043
14523
  // src/orchestration/VoiceOrchestrator.ts
14044
- var logger42 = createLogger("VoiceOrchestrator");
14524
+ var logger44 = createLogger("VoiceOrchestrator");
14045
14525
  var VoiceOrchestrator = class extends EventEmitter {
14046
14526
  constructor() {
14047
14527
  super(...arguments);
@@ -14091,7 +14571,7 @@ var VoiceOrchestrator = class extends EventEmitter {
14091
14571
  const epoch = ++this.connectEpoch;
14092
14572
  this._mode = config.mode ?? "local";
14093
14573
  this._sessionId = crypto.randomUUID();
14094
- logger42.info("Connecting voice orchestrator", { mode: this._mode });
14574
+ logger44.info("Connecting voice orchestrator", { mode: this._mode });
14095
14575
  if (this._mode === "local") {
14096
14576
  const localCfg = config;
14097
14577
  this.ttsSpeaker = new TTSSpeaker();
@@ -14144,7 +14624,7 @@ var VoiceOrchestrator = class extends EventEmitter {
14144
14624
  } else {
14145
14625
  this.wireCloudTranscript(config);
14146
14626
  }
14147
- logger42.info("Voice orchestrator connected", { mode: this._mode });
14627
+ logger44.info("Voice orchestrator connected", { mode: this._mode });
14148
14628
  }
14149
14629
  async disconnect() {
14150
14630
  this.connectEpoch++;
@@ -14258,7 +14738,7 @@ var VoiceOrchestrator = class extends EventEmitter {
14258
14738
  await this.speak(text);
14259
14739
  }
14260
14740
  } catch (e) {
14261
- logger42.error("Voice transcript handler error", { error: String(e) });
14741
+ logger44.error("Voice transcript handler error", { error: String(e) });
14262
14742
  } finally {
14263
14743
  this.interruption?.setAISpeaking(false);
14264
14744
  this.speechListener?.resume();
@@ -14299,7 +14779,7 @@ var VoiceOrchestrator = class extends EventEmitter {
14299
14779
  });
14300
14780
  } catch (e) {
14301
14781
  if (!abortController.signal.aborted) {
14302
- logger42.error("Cloud response handler error", { error: String(e) });
14782
+ logger44.error("Cloud response handler error", { error: String(e) });
14303
14783
  }
14304
14784
  } finally {
14305
14785
  this.responseAbortController = null;
@@ -14313,7 +14793,7 @@ var VoiceOrchestrator = class extends EventEmitter {
14313
14793
  // -------------------------------------------------------------------------
14314
14794
  handleInterruption() {
14315
14795
  if (this._state !== "speaking") return;
14316
- logger42.info("Interruption triggered");
14796
+ logger44.info("Interruption triggered");
14317
14797
  this.stopSpeaking();
14318
14798
  this.speechListener?.resume();
14319
14799
  this.setState("listening");