@omote/core 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -4,19 +4,22 @@ import {
4
4
  import {
5
5
  ConsoleExporter,
6
6
  DEFAULT_LOGGING_CONFIG,
7
+ ErrorCodes,
7
8
  LOG_LEVEL_PRIORITY,
8
9
  OTLPExporter,
9
10
  OmoteTelemetry,
11
+ configureClock,
10
12
  configureLogging,
11
13
  configureTelemetry,
12
14
  createLogger,
15
+ getClock,
13
16
  getLoggingConfig,
14
17
  getTelemetry,
15
18
  noopLogger,
16
19
  resetLoggingConfig,
17
20
  setLogLevel,
18
21
  setLoggingEnabled
19
- } from "./chunk-3NDJA3I4.mjs";
22
+ } from "./chunk-X5OTUOE6.mjs";
20
23
 
21
24
  // src/audio/audioConvert.ts
22
25
  function float32ToPcm16(samples) {
@@ -168,7 +171,7 @@ var MicrophoneCapture = class {
168
171
  const pcm = this.floatToPCM16(chunk);
169
172
  this.events.emit("audio.chunk", {
170
173
  pcm,
171
- timestamp: performance.now()
174
+ timestamp: getClock().now()
172
175
  });
173
176
  chunkCount++;
174
177
  }
@@ -399,11 +402,23 @@ var AudioScheduler = class {
399
402
  source.connect(gainNode);
400
403
  const scheduleTime = this.nextPlayTime;
401
404
  if (scheduleTime < ctx.currentTime) {
402
- logger2.warn("Audio gap detected", {
403
- scheduleTime,
404
- currentTime: ctx.currentTime,
405
- gapSec: ctx.currentTime - scheduleTime
406
- });
405
+ const gap = ctx.currentTime - scheduleTime;
406
+ const gapMs = gap * 1e3;
407
+ if (gap > 0.5) {
408
+ logger2.error("Critical audio scheduling gap", {
409
+ code: ErrorCodes.AUD_SCHEDULE_GAP,
410
+ scheduleTime,
411
+ currentTime: ctx.currentTime,
412
+ gapMs: Math.round(gapMs)
413
+ });
414
+ this.options.onError?.(new Error(`Audio scheduling gap: ${gap.toFixed(3)}s`));
415
+ } else {
416
+ logger2.warn("Audio gap detected", {
417
+ scheduleTime,
418
+ currentTime: ctx.currentTime,
419
+ gapMs: Math.round(gapMs)
420
+ });
421
+ }
407
422
  }
408
423
  source.start(scheduleTime);
409
424
  const entry = { source, gainNode };
@@ -762,7 +777,7 @@ var _A2EProcessor = class _A2EProcessor {
762
777
  if (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp <= currentTime) {
763
778
  const { frame } = this.timestampedQueue.shift();
764
779
  this.lastPulledFrame = frame;
765
- this.lastDequeuedTime = performance.now();
780
+ this.lastDequeuedTime = getClock().now();
766
781
  return frame;
767
782
  }
768
783
  if (this.timestampedQueue.length > 0 && this.getFrameCallCount % 60 === 0) {
@@ -774,7 +789,7 @@ var _A2EProcessor = class _A2EProcessor {
774
789
  });
775
790
  }
776
791
  if (this.lastPulledFrame) {
777
- const elapsed = performance.now() - this.lastDequeuedTime;
792
+ const elapsed = getClock().now() - this.lastDequeuedTime;
778
793
  if (elapsed < HOLD_DURATION_MS) {
779
794
  return this.lastPulledFrame;
780
795
  }
@@ -859,9 +874,9 @@ var _A2EProcessor = class _A2EProcessor {
859
874
  while (this.pendingChunks.length > 0 && !this.disposed) {
860
875
  const { chunk, timestamp } = this.pendingChunks.shift();
861
876
  try {
862
- const t0 = performance.now();
877
+ const t0 = getClock().now();
863
878
  const result = await this.backend.infer(chunk, this.identityIndex);
864
- const inferMs = Math.round(performance.now() - t0);
879
+ const inferMs = Math.round(getClock().now() - t0);
865
880
  const actualDuration = chunk.length / this.sampleRate;
866
881
  const actualFrameCount = Math.ceil(actualDuration * FRAME_RATE);
867
882
  const framesToQueue = Math.min(actualFrameCount, result.blendshapes.length);
@@ -900,7 +915,11 @@ var _A2EProcessor = class _A2EProcessor {
900
915
  }
901
916
  handleError(err) {
902
917
  const error = err instanceof Error ? err : new Error(String(err));
903
- logger4.warn("A2EProcessor inference error", { error: error.message });
918
+ const isOOM = typeof err === "number" || error.message && /out of memory|oom|alloc/i.test(error.message);
919
+ logger4.warn("A2EProcessor inference error", {
920
+ error: error.message,
921
+ code: isOOM ? ErrorCodes.INF_OOM : ErrorCodes.INF_SESSION_POISON
922
+ });
904
923
  this.onError?.(error);
905
924
  }
906
925
  };
@@ -922,6 +941,12 @@ var MetricNames = {
922
941
  CACHE_HITS: "omote.cache.hits",
923
942
  /** Counter: Cache misses */
924
943
  CACHE_MISSES: "omote.cache.misses",
944
+ /** Counter: Cache stale (version/etag mismatch) */
945
+ CACHE_STALE: "omote.cache.stale",
946
+ /** Counter: Cache quota warning (>90% used) */
947
+ CACHE_QUOTA_WARNING: "omote.cache.quota_warning",
948
+ /** Counter: Cache eviction (LRU) */
949
+ CACHE_EVICTION: "omote.cache.eviction",
925
950
  // --- Pipeline ---
926
951
  /** Histogram: VoicePipeline turn latency (speech end → transcript ready, excludes playback) */
927
952
  VOICE_TURN_LATENCY: "omote.voice.turn.latency",
@@ -1221,14 +1246,14 @@ var PlaybackPipeline = class extends EventEmitter {
1221
1246
  this._currentRawFrame = null;
1222
1247
  this.cancelNeutralTransition();
1223
1248
  this.scheduler.warmup();
1224
- this.sessionStartTime = performance.now();
1249
+ this.sessionStartTime = getClock().now();
1225
1250
  this.startFrameLoop();
1226
1251
  this.startMonitoring();
1227
1252
  this.setState("playing");
1228
1253
  }
1229
1254
  /** Feed a streaming audio chunk (PCM16 Uint8Array) */
1230
1255
  async onAudioChunk(chunk) {
1231
- const chunkStart = performance.now();
1256
+ const chunkStart = getClock().now();
1232
1257
  const combined = this.coalescer.add(chunk);
1233
1258
  if (!combined) return;
1234
1259
  const float32 = pcm16ToFloat32(combined);
@@ -1238,7 +1263,7 @@ var PlaybackPipeline = class extends EventEmitter {
1238
1263
  this.emit("playback:start", { time: scheduleTime });
1239
1264
  }
1240
1265
  this.processor.pushAudio(float32, scheduleTime);
1241
- getTelemetry()?.recordHistogram(MetricNames.PLAYBACK_CHUNK_LATENCY, performance.now() - chunkStart);
1266
+ getTelemetry()?.recordHistogram(MetricNames.PLAYBACK_CHUNK_LATENCY, getClock().now() - chunkStart);
1242
1267
  }
1243
1268
  /** Signal end of audio stream (flushes remaining audio) */
1244
1269
  async end() {
@@ -1341,15 +1366,15 @@ var PlaybackPipeline = class extends EventEmitter {
1341
1366
  const currentTime = this.scheduler.getCurrentTime();
1342
1367
  const lamFrame = this.processor.getFrameForTime(currentTime);
1343
1368
  if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
1344
- this.lastNewFrameTime = performance.now();
1369
+ this.lastNewFrameTime = getClock().now();
1345
1370
  this.lastKnownLamFrame = lamFrame;
1346
1371
  this.staleWarningEmitted = false;
1347
1372
  }
1348
- if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
1373
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && getClock().now() - this.lastNewFrameTime > this.staleThresholdMs) {
1349
1374
  if (!this.staleWarningEmitted) {
1350
1375
  this.staleWarningEmitted = true;
1351
1376
  logger5.warn("A2E stalled \u2014 no new inference frames", {
1352
- staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
1377
+ staleDurationMs: Math.round(getClock().now() - this.lastNewFrameTime),
1353
1378
  queuedFrames: this.processor.queuedFrameCount
1354
1379
  });
1355
1380
  }
@@ -1389,7 +1414,7 @@ var PlaybackPipeline = class extends EventEmitter {
1389
1414
  if (this.sessionStartTime > 0) {
1390
1415
  getTelemetry()?.recordHistogram(
1391
1416
  MetricNames.PLAYBACK_SESSION_DURATION,
1392
- performance.now() - this.sessionStartTime
1417
+ getClock().now() - this.sessionStartTime
1393
1418
  );
1394
1419
  }
1395
1420
  this.stopInternal();
@@ -1407,9 +1432,9 @@ var PlaybackPipeline = class extends EventEmitter {
1407
1432
  // ---------------------------------------------------------------------------
1408
1433
  startNeutralTransition(fromFrame) {
1409
1434
  this.neutralTransitionFrame = new Float32Array(fromFrame);
1410
- this.neutralTransitionStart = performance.now();
1435
+ this.neutralTransitionStart = getClock().now();
1411
1436
  const animate = () => {
1412
- const elapsed = performance.now() - this.neutralTransitionStart;
1437
+ const elapsed = getClock().now() - this.neutralTransitionStart;
1413
1438
  const t = Math.min(1, elapsed / this.neutralTransitionMs);
1414
1439
  const eased = 1 - Math.pow(1 - t, 3);
1415
1440
  logger5.trace("neutral transition", { t: Math.round(t * 1e3) / 1e3, eased: Math.round(eased * 1e3) / 1e3 });
@@ -1422,7 +1447,7 @@ var PlaybackPipeline = class extends EventEmitter {
1422
1447
  blendshapes,
1423
1448
  rawBlendshapes: blendshapes,
1424
1449
  // raw = scaled during transition
1425
- timestamp: performance.now() / 1e3,
1450
+ timestamp: getClock().now() / 1e3,
1426
1451
  emotion: this._emotion ?? void 0
1427
1452
  };
1428
1453
  this.emit("frame", frame);
@@ -1653,7 +1678,7 @@ var ModelCache = class {
1653
1678
  logger7.warn("Failed to request persistent storage", { error: String(err) });
1654
1679
  }
1655
1680
  }
1656
- const dbOpenStart = performance.now();
1681
+ const dbOpenStart = getClock().now();
1657
1682
  this.dbPromise = new Promise((resolve, reject) => {
1658
1683
  const request = indexedDB.open(DB_NAME, DB_VERSION);
1659
1684
  request.onerror = () => {
@@ -1662,7 +1687,7 @@ var ModelCache = class {
1662
1687
  };
1663
1688
  request.onsuccess = () => {
1664
1689
  this.db = request.result;
1665
- logger7.debug("IndexedDB opened", { durationMs: Math.round(performance.now() - dbOpenStart) });
1690
+ logger7.debug("IndexedDB opened", { durationMs: Math.round(getClock().now() - dbOpenStart) });
1666
1691
  resolve(this.db);
1667
1692
  };
1668
1693
  request.onupgradeneeded = (event) => {
@@ -1736,16 +1761,16 @@ var ModelCache = class {
1736
1761
  }
1737
1762
  span?.end();
1738
1763
  if (hit) {
1739
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
1764
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
1740
1765
  } else {
1741
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
1766
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
1742
1767
  }
1743
1768
  resolve(cached?.data ?? null);
1744
1769
  };
1745
1770
  request.onerror = () => {
1746
1771
  span?.setAttributes({ "cache.hit": false });
1747
1772
  span?.end();
1748
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
1773
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
1749
1774
  resolve(null);
1750
1775
  };
1751
1776
  });
@@ -1789,14 +1814,14 @@ var ModelCache = class {
1789
1814
  if (!cached?.data) {
1790
1815
  span?.setAttributes({ "cache.hit": false });
1791
1816
  span?.end();
1792
- telemetry?.incrementCounter("omote.cache.misses", 1, {});
1817
+ telemetry?.incrementCounter(MetricNames.CACHE_MISSES, 1, {});
1793
1818
  return { data: null, stale: false };
1794
1819
  }
1795
1820
  span?.setAttributes({ "cache.hit": true, "cache.size_bytes": cached.size });
1796
1821
  if (!cached.etag) {
1797
1822
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
1798
1823
  span?.end();
1799
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
1824
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
1800
1825
  return { data: cached.data, stale: false };
1801
1826
  }
1802
1827
  const fetchUrl = originalUrl || url;
@@ -1805,7 +1830,7 @@ var ModelCache = class {
1805
1830
  if (!response.ok) {
1806
1831
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
1807
1832
  span?.end();
1808
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
1833
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
1809
1834
  return { data: cached.data, stale: false };
1810
1835
  }
1811
1836
  const serverEtag = response.headers.get("etag");
@@ -1818,17 +1843,17 @@ var ModelCache = class {
1818
1843
  });
1819
1844
  span?.end();
1820
1845
  if (isStale) {
1821
- telemetry?.incrementCounter("omote.cache.stale", 1, {});
1846
+ telemetry?.incrementCounter(MetricNames.CACHE_STALE, 1, {});
1822
1847
  logger7.debug("Stale cache detected", { url });
1823
1848
  } else {
1824
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
1849
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
1825
1850
  }
1826
1851
  return { data: cached.data, stale: isStale };
1827
1852
  } catch (fetchError) {
1828
1853
  logger7.warn("HEAD validation failed, using cached data", { error: String(fetchError) });
1829
1854
  span?.setAttributes({ "cache.validated": false, "cache.stale": false });
1830
1855
  span?.end();
1831
- telemetry?.incrementCounter("omote.cache.hits", 1, {});
1856
+ telemetry?.incrementCounter(MetricNames.CACHE_HITS, 1, {});
1832
1857
  return { data: cached.data, stale: false };
1833
1858
  }
1834
1859
  } catch {
@@ -1909,7 +1934,7 @@ var ModelCache = class {
1909
1934
  const telemetry = getTelemetry();
1910
1935
  if (quota.percentUsed > 90) {
1911
1936
  logger7.warn("Storage quota warning", { percentUsed: quota.percentUsed.toFixed(1), used: formatBytes(quota.usedBytes), quota: formatBytes(quota.quotaBytes) });
1912
- telemetry?.incrementCounter("omote.cache.quota_warning", 1, {
1937
+ telemetry?.incrementCounter(MetricNames.CACHE_QUOTA_WARNING, 1, {
1913
1938
  percent_used: String(Math.round(quota.percentUsed))
1914
1939
  });
1915
1940
  if (config.onQuotaWarning) {
@@ -2051,7 +2076,7 @@ var ModelCache = class {
2051
2076
  });
2052
2077
  span?.end();
2053
2078
  if (freedBytes > 0) {
2054
- telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
2079
+ telemetry?.incrementCounter(MetricNames.CACHE_EVICTION, evictedUrls.length, {
2055
2080
  bytes_freed: String(freedBytes)
2056
2081
  });
2057
2082
  }
@@ -2571,7 +2596,7 @@ var _A2EInference = class _A2EInference {
2571
2596
  throw new Error("Model already loaded. Call dispose() first.");
2572
2597
  }
2573
2598
  this.isLoading = true;
2574
- const startTime = performance.now();
2599
+ const startTime = getClock().now();
2575
2600
  const telemetry = getTelemetry();
2576
2601
  const span = telemetry?.startSpan("A2EInference.load", {
2577
2602
  "model.url": this.config.modelUrl,
@@ -2667,7 +2692,7 @@ var _A2EInference = class _A2EInference {
2667
2692
  executionProvider: this._backend,
2668
2693
  backend: this._backend
2669
2694
  });
2670
- const loadTimeMs = performance.now() - startTime;
2695
+ const loadTimeMs = getClock().now() - startTime;
2671
2696
  logger10.info("Model loaded successfully", {
2672
2697
  backend: this._backend,
2673
2698
  loadTimeMs: Math.round(loadTimeMs),
@@ -2686,7 +2711,7 @@ var _A2EInference = class _A2EInference {
2686
2711
  });
2687
2712
  await new Promise((r) => setTimeout(r, 0));
2688
2713
  logger10.debug("Running warmup inference to initialize GPU context");
2689
- const warmupStart = performance.now();
2714
+ const warmupStart = getClock().now();
2690
2715
  const warmupAudio = new Float32Array(this.chunkSize);
2691
2716
  const warmupIdentity = new Float32Array(this.numIdentityClasses);
2692
2717
  warmupIdentity[0] = 1;
@@ -2699,7 +2724,7 @@ var _A2EInference = class _A2EInference {
2699
2724
  this.session.run(warmupFeeds).then(() => "ok"),
2700
2725
  new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
2701
2726
  ]);
2702
- const warmupTimeMs = performance.now() - warmupStart;
2727
+ const warmupTimeMs = getClock().now() - warmupStart;
2703
2728
  if (warmupResult === "timeout") {
2704
2729
  logger10.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
2705
2730
  timeoutMs: WARMUP_TIMEOUT_MS,
@@ -2779,7 +2804,7 @@ var _A2EInference = class _A2EInference {
2779
2804
  "inference.input_samples": this.chunkSize
2780
2805
  });
2781
2806
  try {
2782
- const startTime = performance.now();
2807
+ const startTime = getClock().now();
2783
2808
  let timeoutId;
2784
2809
  const results = await Promise.race([
2785
2810
  this.session.run(feeds).then((r) => {
@@ -2793,7 +2818,7 @@ var _A2EInference = class _A2EInference {
2793
2818
  );
2794
2819
  })
2795
2820
  ]);
2796
- const inferenceTimeMs = performance.now() - startTime;
2821
+ const inferenceTimeMs = getClock().now() - startTime;
2797
2822
  const blendshapeOutput = results["blendshapes"];
2798
2823
  if (!blendshapeOutput) {
2799
2824
  throw new Error("Missing blendshapes output from model");
@@ -3200,9 +3225,9 @@ var A2EUnifiedAdapter = class {
3200
3225
  "inference.input_samples": audio.length
3201
3226
  });
3202
3227
  try {
3203
- const startTime = performance.now();
3228
+ const startTime = getClock().now();
3204
3229
  const result = await this.worker.inferLAM(audio, identityIndex);
3205
- const inferenceTimeMs = performance.now() - startTime;
3230
+ const inferenceTimeMs = getClock().now() - startTime;
3206
3231
  const flatBuffer = result.blendshapes;
3207
3232
  const { numFrames, numBlendshapes } = result;
3208
3233
  const blendshapes = [];
@@ -3853,7 +3878,7 @@ var KokoroTTSInference = class {
3853
3878
  throw new Error("KokoroTTS is already loading");
3854
3879
  }
3855
3880
  this.isLoading = true;
3856
- const startTime = performance.now();
3881
+ const startTime = getClock().now();
3857
3882
  try {
3858
3883
  const backendPref = this.config.backend ?? "wasm";
3859
3884
  const ortResult = await getOnnxRuntimeForPreference(backendPref);
@@ -3877,7 +3902,7 @@ var KokoroTTSInference = class {
3877
3902
  "KokoroTTS InferenceSession.create"
3878
3903
  );
3879
3904
  }
3880
- const loadTimeMs = performance.now() - startTime;
3905
+ const loadTimeMs = getClock().now() - startTime;
3881
3906
  logger17.info("Kokoro TTS loaded", {
3882
3907
  backend: this._backend,
3883
3908
  loadTimeMs: Math.round(loadTimeMs),
@@ -3964,7 +3989,18 @@ var KokoroTTSInference = class {
3964
3989
  logger17.debug("stream aborted");
3965
3990
  return;
3966
3991
  }
3967
- const phonemes = await phonemize(sentence, language);
3992
+ let phonemes;
3993
+ try {
3994
+ phonemes = await phonemize(sentence, language);
3995
+ } catch (phonErr) {
3996
+ logger17.error("Phonemizer failed (possible OOM)", {
3997
+ code: ErrorCodes.TTS_PHONEMIZER_OOM,
3998
+ error: String(phonErr),
3999
+ textLength: sentence.length
4000
+ });
4001
+ yield { audio: new Float32Array(0), text: sentence, phonemes: "", duration: 0 };
4002
+ continue;
4003
+ }
3968
4004
  const tokens = tokenize(phonemes);
3969
4005
  const voiceData = await this.ensureVoice(voiceName);
3970
4006
  const style = getStyleForTokenCount(voiceData, tokens.length);
@@ -4024,16 +4060,27 @@ var KokoroTTSInference = class {
4024
4060
  "tts.speed": speed
4025
4061
  });
4026
4062
  try {
4027
- const startTime = performance.now();
4063
+ const startTime = getClock().now();
4028
4064
  const language = getVoiceLanguage(voiceName);
4029
- const phonemes = await phonemize(text, language);
4065
+ let phonemes;
4066
+ try {
4067
+ phonemes = await phonemize(text, language);
4068
+ } catch (phonErr) {
4069
+ logger17.error("Phonemizer failed (possible OOM)", {
4070
+ code: ErrorCodes.TTS_PHONEMIZER_OOM,
4071
+ error: String(phonErr),
4072
+ textLength: text.length
4073
+ });
4074
+ resolve({ audio: new Float32Array(0), duration: 0, inferenceTimeMs: 0 });
4075
+ return;
4076
+ }
4030
4077
  logger17.trace("Phonemized", { text: text.substring(0, 50), phonemes: phonemes.substring(0, 50) });
4031
4078
  const tokens = tokenize(phonemes);
4032
4079
  logger17.trace("Tokenized", { numTokens: tokens.length });
4033
4080
  const voiceData = await this.ensureVoice(voiceName);
4034
4081
  const style = getStyleForTokenCount(voiceData, tokens.length);
4035
4082
  const audio = await this.runInference(tokens, style, speed);
4036
- const inferenceTimeMs = performance.now() - startTime;
4083
+ const inferenceTimeMs = getClock().now() - startTime;
4037
4084
  const duration = audio.length / SAMPLE_RATE;
4038
4085
  logger17.trace("Synthesis complete", {
4039
4086
  duration: `${duration.toFixed(2)}s`,
@@ -4152,11 +4199,11 @@ var KokoroTTSUnifiedAdapter = class {
4152
4199
  "model.url": this.modelUrl
4153
4200
  });
4154
4201
  try {
4155
- const startTime = performance.now();
4202
+ const startTime = getClock().now();
4156
4203
  await this.worker.loadKokoro({ modelUrl: this.modelUrl });
4157
4204
  this._isLoaded = true;
4158
4205
  this.loadedGeneration = this.worker.workerGeneration;
4159
- const loadTimeMs = performance.now() - startTime;
4206
+ const loadTimeMs = getClock().now() - startTime;
4160
4207
  logger18.info("Kokoro TTS loaded via unified worker", {
4161
4208
  backend: "wasm",
4162
4209
  loadTimeMs: Math.round(loadTimeMs),
@@ -4231,11 +4278,11 @@ var KokoroTTSUnifiedAdapter = class {
4231
4278
  runWorkerInference(tokens, style, speed) {
4232
4279
  return new Promise((resolve, reject) => {
4233
4280
  this.inferenceQueue = this.inferenceQueue.then(async () => {
4234
- const startTime = performance.now();
4281
+ const startTime = getClock().now();
4235
4282
  const telemetry = getTelemetry();
4236
4283
  try {
4237
4284
  const result = await this.worker.inferKokoro(tokens, style, speed);
4238
- const latencyMs = performance.now() - startTime;
4285
+ const latencyMs = getClock().now() - startTime;
4239
4286
  telemetry?.recordHistogram("omote.inference.latency", latencyMs, {
4240
4287
  model: "kokoro-tts-unified",
4241
4288
  backend: "wasm"
@@ -4350,11 +4397,11 @@ var SileroVADUnifiedAdapter = class {
4350
4397
  return new Promise((resolve, reject) => {
4351
4398
  this.inferenceQueue = this.inferenceQueue.then(async () => {
4352
4399
  try {
4353
- const startTime = performance.now();
4400
+ const startTime = getClock().now();
4354
4401
  const result = await this.worker.processVAD(audioChunkCopy, this.state, this.context);
4355
4402
  this.state = result.state;
4356
4403
  this.context = audioChunkCopy.slice(-this.contextSize);
4357
- const inferenceTimeMs = performance.now() - startTime;
4404
+ const inferenceTimeMs = getClock().now() - startTime;
4358
4405
  const isSpeech = result.probability > this.config.threshold;
4359
4406
  let preSpeechChunks;
4360
4407
  if (isSpeech && !this.wasSpeaking) {
@@ -4412,17 +4459,20 @@ var SileroVADUnifiedAdapter = class {
4412
4459
  var logger20 = createLogger("createA2E");
4413
4460
  function createA2E(config = {}) {
4414
4461
  const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.lam;
4462
+ const platformInfo = {
4463
+ modelUrl,
4464
+ isIOS: isIOS(),
4465
+ webgpu: typeof navigator !== "undefined" && "gpu" in navigator
4466
+ };
4415
4467
  if (config.unifiedWorker) {
4416
- logger20.info("Creating A2EUnifiedAdapter (via unified worker)", {
4417
- modelUrl
4418
- });
4468
+ logger20.info("Creating A2EUnifiedAdapter (via unified worker)", platformInfo);
4419
4469
  return new A2EUnifiedAdapter(config.unifiedWorker, {
4420
4470
  modelUrl,
4421
4471
  externalDataUrl: config.externalDataUrl,
4422
4472
  numIdentityClasses: config.numIdentityClasses
4423
4473
  });
4424
4474
  }
4425
- logger20.info("Creating A2EInference", { modelUrl });
4475
+ logger20.info("Creating A2EInference", platformInfo);
4426
4476
  return new A2EInference({
4427
4477
  modelUrl,
4428
4478
  externalDataUrl: config.externalDataUrl,
@@ -4798,16 +4848,28 @@ async function loadOrt(wasmPaths, isIOSDevice) {
4798
4848
  // ort.webgpu.min.js crashes WebKit's JIT compiler.
4799
4849
  var isSafariWorker = typeof navigator !== 'undefined' && /safari/i.test(navigator.userAgent) && !/chrome|crios|fxios|chromium|edg/i.test(navigator.userAgent);
4800
4850
  var hasWebGPU = false;
4801
- if (!isIOSDevice && !isSafariWorker && typeof navigator !== 'undefined' && navigator.gpu) {
4851
+ var webgpuReason = '';
4852
+ if (isIOSDevice) {
4853
+ webgpuReason = 'iOS device';
4854
+ } else if (isSafariWorker) {
4855
+ webgpuReason = 'Safari (JSEP/ASYNCIFY crash)';
4856
+ } else if (typeof navigator === 'undefined' || !navigator.gpu) {
4857
+ webgpuReason = 'navigator.gpu unavailable';
4858
+ } else {
4802
4859
  try {
4803
4860
  var adapter = await navigator.gpu.requestAdapter();
4804
4861
  if (adapter) {
4805
4862
  hasWebGPU = true;
4863
+ } else {
4864
+ webgpuReason = 'requestAdapter returned null';
4806
4865
  }
4807
4866
  } catch (e) {
4808
- // WebGPU not available \u2014 fall through to WASM
4867
+ webgpuReason = 'requestAdapter failed: ' + String(e);
4809
4868
  }
4810
4869
  }
4870
+ if (!hasWebGPU && webgpuReason) {
4871
+ console.warn('[UnifiedWorker] WebGPU unavailable: ' + webgpuReason + ', falling back to WASM');
4872
+ }
4811
4873
 
4812
4874
  var ortUrl;
4813
4875
  if (hasWebGPU) {
@@ -5292,7 +5354,12 @@ var UnifiedInferenceWorker = class {
5292
5354
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs, "worker.backend": this._workerBackend });
5293
5355
  span?.end();
5294
5356
  } catch (error) {
5295
- span?.endWithError(error instanceof Error ? error : new Error(String(error)));
5357
+ const err = error instanceof Error ? error : new Error(String(error));
5358
+ const isTimeout = err.message.includes("timed out");
5359
+ if (isTimeout) {
5360
+ logger21.error("Worker init timed out", { code: "OMOTE_INF_003", timeoutMs: INIT_TIMEOUT_MS });
5361
+ }
5362
+ span?.endWithError(err);
5296
5363
  this.cleanup();
5297
5364
  throw error;
5298
5365
  }
@@ -5676,7 +5743,7 @@ var TTSSpeaker = class {
5676
5743
  async connect(tts, config) {
5677
5744
  logger22.info("Connecting TTS...");
5678
5745
  const span = getTelemetry()?.startSpan("TTSSpeaker.connect");
5679
- const connectStart = performance.now();
5746
+ const connectStart = getClock().now();
5680
5747
  this.tts = tts;
5681
5748
  if (!tts.isLoaded) {
5682
5749
  await tts.load();
@@ -5685,7 +5752,7 @@ var TTSSpeaker = class {
5685
5752
  if (!hasLam) {
5686
5753
  this._audioOnly = true;
5687
5754
  this.scheduler = new AudioScheduler({ sampleRate: tts.sampleRate });
5688
- getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, performance.now() - connectStart);
5755
+ getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, getClock().now() - connectStart);
5689
5756
  span?.end();
5690
5757
  logger22.info("TTS connected (audio-only mode)");
5691
5758
  return;
@@ -5719,7 +5786,7 @@ var TTSSpeaker = class {
5719
5786
  neutralTransitionMs: config?.neutralTransitionMs
5720
5787
  });
5721
5788
  await this.ttsPlayback.initialize();
5722
- getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, performance.now() - connectStart);
5789
+ getTelemetry()?.recordHistogram(MetricNames.TTS_CONNECT_LATENCY, getClock().now() - connectStart);
5723
5790
  span?.end();
5724
5791
  logger22.info("TTS connected (lip sync mode)");
5725
5792
  }
@@ -5754,7 +5821,7 @@ var TTSSpeaker = class {
5754
5821
  const span = getTelemetry()?.startSpan("TTSSpeaker.speak", {
5755
5822
  "text.length": text.length
5756
5823
  });
5757
- const speakStart = performance.now();
5824
+ const speakStart = getClock().now();
5758
5825
  try {
5759
5826
  if (this._audioOnly) {
5760
5827
  await this.speakAudioOnly(text, abort, options?.voice);
@@ -5764,7 +5831,7 @@ var TTSSpeaker = class {
5764
5831
  voice: options?.voice
5765
5832
  });
5766
5833
  }
5767
- getTelemetry()?.recordHistogram(MetricNames.TTS_SPEAK_LATENCY, performance.now() - speakStart);
5834
+ getTelemetry()?.recordHistogram(MetricNames.TTS_SPEAK_LATENCY, getClock().now() - speakStart);
5768
5835
  span?.end();
5769
5836
  } catch (err) {
5770
5837
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
@@ -5894,42 +5961,42 @@ var TTSSpeaker = class {
5894
5961
  end: async () => {
5895
5962
  if (ended) return;
5896
5963
  ended = true;
5897
- if (abort.signal.aborted) {
5898
- this._isSpeaking = false;
5899
- if (this.currentAbort === abort) this.currentAbort = null;
5900
- return;
5901
- }
5902
- if (buffer.trim()) {
5903
- enqueueSentence(buffer.trim());
5904
- buffer = "";
5905
- }
5906
- await processChain;
5907
- if (abort.signal.aborted) {
5908
- this._isSpeaking = false;
5909
- if (this.currentAbort === abort) this.currentAbort = null;
5910
- return;
5911
- }
5912
- await pipeline.end();
5913
- await new Promise((resolve) => {
5914
- let resolved = false;
5915
- const done = () => {
5916
- if (resolved) return;
5917
- resolved = true;
5918
- unsubC();
5919
- unsubS();
5920
- abort.signal.removeEventListener("abort", done);
5921
- resolve();
5922
- };
5964
+ const unsubs = [];
5965
+ try {
5923
5966
  if (abort.signal.aborted) {
5924
- resolve();
5925
5967
  return;
5926
5968
  }
5927
- const unsubC = pipeline.on("playback:complete", done);
5928
- const unsubS = pipeline.on("playback:stop", done);
5929
- abort.signal.addEventListener("abort", done);
5930
- });
5931
- this._isSpeaking = false;
5932
- if (this.currentAbort === abort) this.currentAbort = null;
5969
+ if (buffer.trim()) {
5970
+ enqueueSentence(buffer.trim());
5971
+ buffer = "";
5972
+ }
5973
+ await processChain;
5974
+ if (abort.signal.aborted) {
5975
+ return;
5976
+ }
5977
+ await pipeline.end();
5978
+ await new Promise((resolve) => {
5979
+ let resolved = false;
5980
+ const done = () => {
5981
+ if (resolved) return;
5982
+ resolved = true;
5983
+ resolve();
5984
+ };
5985
+ if (abort.signal.aborted) {
5986
+ resolve();
5987
+ return;
5988
+ }
5989
+ unsubs.push(pipeline.on("playback:complete", done));
5990
+ unsubs.push(pipeline.on("playback:stop", done));
5991
+ const onAbort = () => done();
5992
+ abort.signal.addEventListener("abort", onAbort);
5993
+ unsubs.push(() => abort.signal.removeEventListener("abort", onAbort));
5994
+ });
5995
+ } finally {
5996
+ unsubs.forEach((fn) => fn());
5997
+ this._isSpeaking = false;
5998
+ if (this.currentAbort === abort) this.currentAbort = null;
5999
+ }
5933
6000
  }
5934
6001
  };
5935
6002
  }
@@ -6620,14 +6687,14 @@ function createKokoroTTS(config = {}) {
6620
6687
  logger24.info("iOS + unified worker: creating KokoroTTSUnifiedAdapter (off-main-thread ONNX)");
6621
6688
  return new KokoroTTSUnifiedAdapter(config.unifiedWorker, config);
6622
6689
  }
6623
- logger24.info("iOS detected: creating KokoroTTSInference (main thread, shared ORT)");
6690
+ logger24.info("iOS: creating KokoroTTSInference (main thread, shared ORT)");
6624
6691
  return new KokoroTTSInference(config);
6625
6692
  }
6626
6693
  if (!KokoroTTSWorker.isSupported()) {
6627
6694
  logger24.info("Worker not supported: creating KokoroTTSInference (main thread)");
6628
6695
  return new KokoroTTSInference(config);
6629
6696
  }
6630
- logger24.info("Auto-detected: creating KokoroTTSWorker (off-main-thread)");
6697
+ logger24.info("Auto: creating KokoroTTSWorker (off-main-thread)");
6631
6698
  return new KokoroTTSWorker(config);
6632
6699
  }
6633
6700
 
@@ -6861,6 +6928,9 @@ var _SenseVoiceInference = class _SenseVoiceInference {
6861
6928
  // so all future transcribe() calls reject immediately to prevent concurrent access.
6862
6929
  this.poisoned = false;
6863
6930
  // 10s for SenseVoice (heavier preprocessing)
6931
+ // WebGPU shape change tracking (for dynamic shape warning)
6932
+ this.lastLfrFrames = 0;
6933
+ this.webgpuShapeWarned = false;
6864
6934
  // Preprocessing state (loaded once)
6865
6935
  this.tokenMap = null;
6866
6936
  this.negMean = null;
@@ -6895,7 +6965,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
6895
6965
  throw new Error("Model already loaded. Call dispose() first.");
6896
6966
  }
6897
6967
  this.isLoading = true;
6898
- const startTime = performance.now();
6968
+ const startTime = getClock().now();
6899
6969
  const telemetry = getTelemetry();
6900
6970
  const span = telemetry?.startSpan("SenseVoice.load", {
6901
6971
  "model.url": this.config.modelUrl,
@@ -6962,7 +7032,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
6962
7032
  } catch (cmvnErr) {
6963
7033
  logger25.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
6964
7034
  }
6965
- const loadTimeMs = performance.now() - startTime;
7035
+ const loadTimeMs = getClock().now() - startTime;
6966
7036
  logger25.info("SenseVoice model loaded", {
6967
7037
  backend: this._backend,
6968
7038
  loadTimeMs: Math.round(loadTimeMs),
@@ -7027,24 +7097,35 @@ var _SenseVoiceInference = class _SenseVoiceInference {
7027
7097
  "inference.input_samples": audio.length
7028
7098
  });
7029
7099
  try {
7030
- const startTime = performance.now();
7031
- const preprocessStart = performance.now();
7100
+ const startTime = getClock().now();
7101
+ const preprocessStart = getClock().now();
7032
7102
  const fbank = computeKaldiFbank(audio, 16e3, 80);
7033
7103
  const numFrames = fbank.length / 80;
7034
7104
  if (numFrames === 0) {
7035
7105
  resolve({
7036
7106
  text: "",
7037
- inferenceTimeMs: performance.now() - startTime,
7038
- preprocessTimeMs: performance.now() - preprocessStart
7107
+ inferenceTimeMs: getClock().now() - startTime,
7108
+ preprocessTimeMs: getClock().now() - preprocessStart
7039
7109
  });
7040
7110
  return;
7041
7111
  }
7042
7112
  const lfrFeatures = applyLFR(fbank, 80, 7, 6);
7043
7113
  const numLfrFrames = lfrFeatures.length / 560;
7114
+ if (this._backend === "webgpu" && this.lastLfrFrames !== 0 && numLfrFrames !== this.lastLfrFrames) {
7115
+ if (!this.webgpuShapeWarned) {
7116
+ this.webgpuShapeWarned = true;
7117
+ logger25.warn("SenseVoice running on WebGPU with variable audio shapes \u2014 risk of kernel crash", {
7118
+ code: ErrorCodes.INF_SHAPE_MISMATCH,
7119
+ previousFrames: this.lastLfrFrames,
7120
+ currentFrames: numLfrFrames
7121
+ });
7122
+ }
7123
+ }
7124
+ this.lastLfrFrames = numLfrFrames;
7044
7125
  if (this.negMean && this.invStddev) {
7045
7126
  applyCMVN(lfrFeatures, 560, this.negMean, this.invStddev);
7046
7127
  }
7047
- const preprocessTimeMs = performance.now() - preprocessStart;
7128
+ const preprocessTimeMs = getClock().now() - preprocessStart;
7048
7129
  const ort = this.ort;
7049
7130
  const feeds = {
7050
7131
  x: new ort.Tensor("float32", lfrFeatures, [1, numLfrFrames, 560]),
@@ -7074,7 +7155,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
7074
7155
  const seqLen = logitsDims[1];
7075
7156
  const vocabSize = logitsDims[2];
7076
7157
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
7077
- const inferenceTimeMs = performance.now() - startTime;
7158
+ const inferenceTimeMs = getClock().now() - startTime;
7078
7159
  logger25.trace("Transcription complete", {
7079
7160
  text: decoded.text.substring(0, 50),
7080
7161
  language: decoded.language,
@@ -8413,7 +8494,7 @@ var SileroVADInference = class {
8413
8494
  throw new Error("Model already loaded. Call dispose() first.");
8414
8495
  }
8415
8496
  this.isLoading = true;
8416
- const startTime = performance.now();
8497
+ const startTime = getClock().now();
8417
8498
  const telemetry = getTelemetry();
8418
8499
  const span = telemetry?.startSpan("SileroVAD.load", {
8419
8500
  "model.url": this.config.modelUrl,
@@ -8445,7 +8526,7 @@ var SileroVADInference = class {
8445
8526
  const modelData = new Uint8Array(modelBuffer);
8446
8527
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
8447
8528
  this.reset();
8448
- const loadTimeMs = performance.now() - startTime;
8529
+ const loadTimeMs = getClock().now() - startTime;
8449
8530
  logger28.info("Model loaded successfully", {
8450
8531
  backend: this._backend,
8451
8532
  loadTimeMs: Math.round(loadTimeMs),
@@ -8625,7 +8706,7 @@ var SileroVADInference = class {
8625
8706
  "inference.chunk_size": this.chunkSize
8626
8707
  });
8627
8708
  try {
8628
- const startTime = performance.now();
8709
+ const startTime = getClock().now();
8629
8710
  const inputSize = this.contextSize + this.chunkSize;
8630
8711
  const inputBuffer = new Float32Array(inputSize);
8631
8712
  inputBuffer.set(this.context, 0);
@@ -8655,7 +8736,7 @@ var SileroVADInference = class {
8655
8736
  );
8656
8737
  }
8657
8738
  this.context = audioChunkCopy.slice(-this.contextSize);
8658
- const inferenceTimeMs = performance.now() - startTime;
8739
+ const inferenceTimeMs = getClock().now() - startTime;
8659
8740
  const isSpeech = probability > this.config.threshold;
8660
8741
  let preSpeechChunks;
8661
8742
  if (isSpeech && !this.wasSpeaking) {
@@ -9470,6 +9551,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9470
9551
  this.lastProgressiveSamples = 0;
9471
9552
  // ASR error recovery
9472
9553
  this.asrErrorCount = 0;
9554
+ this.progressiveErrorCount = 0;
9473
9555
  this.config = config ?? {};
9474
9556
  }
9475
9557
  /** Current listener state */
@@ -9662,7 +9744,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9662
9744
  if (result.isSpeech) {
9663
9745
  if (!wasSpeaking) {
9664
9746
  this.isSpeechActive = true;
9665
- this.speechStartTime = performance.now();
9747
+ this.speechStartTime = getClock().now();
9666
9748
  this.audioBuffer = [];
9667
9749
  this.audioBufferSamples = 0;
9668
9750
  this.lastProgressiveResult = null;
@@ -9701,13 +9783,13 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9701
9783
  const extended = this.config.silenceTimeoutExtendedMs ?? 700;
9702
9784
  const adaptive = this.config.adaptiveTimeout ?? true;
9703
9785
  if (!adaptive) return base;
9704
- const speechDurationMs = performance.now() - this.speechStartTime;
9786
+ const speechDurationMs = getClock().now() - this.speechStartTime;
9705
9787
  return speechDurationMs > 3e3 ? extended : base;
9706
9788
  }
9707
9789
  onSilenceDetected() {
9708
9790
  const capturedEpoch = this.epoch;
9709
9791
  this.isSpeechActive = false;
9710
- const durationMs = performance.now() - this.speechStartTime;
9792
+ const durationMs = getClock().now() - this.speechStartTime;
9711
9793
  logger31.debug("Speech end", { durationMs: Math.round(durationMs) });
9712
9794
  this.emit("speech:end", { durationMs });
9713
9795
  this.clearSilenceTimer();
@@ -9804,7 +9886,15 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9804
9886
  this.lastProgressiveSamples = snapshotSamples;
9805
9887
  this.emit("transcript", { ...result, isFinal: false });
9806
9888
  }
9807
- } catch {
9889
+ } catch (err) {
9890
+ this.progressiveErrorCount = (this.progressiveErrorCount ?? 0) + 1;
9891
+ if (this.progressiveErrorCount % 10 === 1) {
9892
+ logger31.warn("Progressive transcription error", {
9893
+ code: ErrorCodes.SPH_ASR_ERROR,
9894
+ error: String(err),
9895
+ count: this.progressiveErrorCount
9896
+ });
9897
+ }
9808
9898
  }
9809
9899
  })();
9810
9900
  }, intervalMs);
@@ -9821,7 +9911,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9821
9911
  async transcribeWithTimeout(audio) {
9822
9912
  if (!this.asr) return null;
9823
9913
  const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
9824
- const startTime = performance.now();
9914
+ const startTime = getClock().now();
9825
9915
  const span = getTelemetry()?.startSpan("SpeechListener.transcribe", {
9826
9916
  "inference.input_samples": audio.length,
9827
9917
  "inference.input_duration_ms": audio.length / 16e3 * 1e3
@@ -9835,7 +9925,7 @@ var _SpeechListener = class _SpeechListener extends EventEmitter {
9835
9925
  })
9836
9926
  ]);
9837
9927
  clearTimeout(timeoutId);
9838
- const latency = performance.now() - startTime;
9928
+ const latency = getClock().now() - startTime;
9839
9929
  this.asrErrorCount = 0;
9840
9930
  getTelemetry()?.recordHistogram(MetricNames.VOICE_TRANSCRIPTION_LATENCY, latency);
9841
9931
  getTelemetry()?.incrementCounter(MetricNames.VOICE_TRANSCRIPTIONS);
@@ -10009,11 +10099,11 @@ var InterruptionHandler = class extends EventEmitter {
10009
10099
  getState() {
10010
10100
  return {
10011
10101
  isSpeaking: this.isSpeaking,
10012
- speechDurationMs: this.isSpeaking ? performance.now() - this.speechStartTime : 0
10102
+ speechDurationMs: this.isSpeaking ? getClock().now() - this.speechStartTime : 0
10013
10103
  };
10014
10104
  }
10015
10105
  onSpeechDetected(rms) {
10016
- const now = performance.now();
10106
+ const now = getClock().now();
10017
10107
  this.lastSpeechTime = now;
10018
10108
  if (this.silenceTimer) {
10019
10109
  clearTimeout(this.silenceTimer);
@@ -10230,7 +10320,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
10230
10320
  this.setupEventHandlers();
10231
10321
  this.recognition.start();
10232
10322
  this.isListening = true;
10233
- this.startTime = performance.now();
10323
+ this.startTime = getClock().now();
10234
10324
  this.accumulatedText = "";
10235
10325
  logger33.info("Speech recognition started", {
10236
10326
  language: this.config.language
@@ -10331,7 +10421,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
10331
10421
  const speechResult = {
10332
10422
  text: isFinal ? this.accumulatedText.trim() : text,
10333
10423
  language: this.config.language,
10334
- inferenceTimeMs: performance.now() - this.startTime,
10424
+ inferenceTimeMs: getClock().now() - this.startTime,
10335
10425
  isFinal,
10336
10426
  confidence: alternative.confidence
10337
10427
  };
@@ -10363,13 +10453,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
10363
10453
  this.isListening = false;
10364
10454
  logger33.info("Speech recognition ended", {
10365
10455
  totalText: this.accumulatedText.length,
10366
- durationMs: performance.now() - this.startTime
10456
+ durationMs: getClock().now() - this.startTime
10367
10457
  });
10368
10458
  if (this.stopResolver) {
10369
10459
  const result = {
10370
10460
  text: this.accumulatedText.trim(),
10371
10461
  language: this.config.language,
10372
- inferenceTimeMs: performance.now() - this.startTime,
10462
+ inferenceTimeMs: getClock().now() - this.startTime,
10373
10463
  isFinal: true
10374
10464
  };
10375
10465
  this.stopResolver(result);
@@ -10413,6 +10503,303 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
10413
10503
  }
10414
10504
  };
10415
10505
 
10506
+ // src/inference/ElevenLabsTTSBackend.ts
10507
+ var logger34 = createLogger("ElevenLabsTTS");
10508
+ var DEFAULT_MODEL = "eleven_multilingual_v2";
10509
+ var DEFAULT_OUTPUT_FORMAT = "pcm_16000";
10510
+ var DEFAULT_STABILITY = 0.5;
10511
+ var DEFAULT_SIMILARITY_BOOST = 0.75;
10512
+ var DEFAULT_BASE_URL = "https://api.elevenlabs.io";
10513
+ var FORMAT_TO_SAMPLE_RATE = {
10514
+ pcm_16000: 16e3,
10515
+ pcm_22050: 22050,
10516
+ pcm_24000: 24e3,
10517
+ pcm_44100: 44100
10518
+ };
10519
+ var ElevenLabsTTSBackend = class {
10520
+ constructor(config) {
10521
+ this._isLoaded = false;
10522
+ if (!config.apiKey) throw new Error("ElevenLabsTTS: apiKey is required");
10523
+ if (!config.voiceId) throw new Error("ElevenLabsTTS: voiceId is required");
10524
+ this.apiKey = config.apiKey;
10525
+ this.voiceId = config.voiceId;
10526
+ this.model = config.model ?? DEFAULT_MODEL;
10527
+ this.outputFormat = config.outputFormat ?? DEFAULT_OUTPUT_FORMAT;
10528
+ this.stability = config.stability ?? DEFAULT_STABILITY;
10529
+ this.similarityBoost = config.similarityBoost ?? DEFAULT_SIMILARITY_BOOST;
10530
+ this.baseUrl = config.baseUrl ?? DEFAULT_BASE_URL;
10531
+ const rate = FORMAT_TO_SAMPLE_RATE[this.outputFormat];
10532
+ if (!rate) {
10533
+ throw new Error(
10534
+ `ElevenLabsTTS: unsupported outputFormat "${this.outputFormat}". Supported: ${Object.keys(FORMAT_TO_SAMPLE_RATE).join(", ")}`
10535
+ );
10536
+ }
10537
+ this._sampleRate = rate;
10538
+ }
10539
+ get sampleRate() {
10540
+ return this._sampleRate;
10541
+ }
10542
+ get isLoaded() {
10543
+ return this._isLoaded;
10544
+ }
10545
+ // ─── Load ───────────────────────────────────────────────────────────────
10546
+ /**
10547
+ * No-op for cloud TTS (no model to load).
10548
+ * Marks backend as ready.
10549
+ */
10550
+ async load() {
10551
+ this._isLoaded = true;
10552
+ logger34.info("ElevenLabs TTS ready", { voiceId: this.voiceId, model: this.model });
10553
+ }
10554
+ // ─── Stream ─────────────────────────────────────────────────────────────
10555
+ /**
10556
+ * Stream audio from ElevenLabs for the given text.
10557
+ *
10558
+ * Uses the streaming endpoint. Yields a single chunk for non-streaming
10559
+ * or multiple chunks as response data arrives.
10560
+ */
10561
+ async *stream(text, options) {
10562
+ if (!this._isLoaded) {
10563
+ throw new Error("ElevenLabsTTS: not loaded. Call load() first.");
10564
+ }
10565
+ const trimmed = text.trim();
10566
+ if (trimmed.length === 0) {
10567
+ throw new Error("ElevenLabsTTS: text must not be empty");
10568
+ }
10569
+ const startTime = getClock().now();
10570
+ const telemetry = getTelemetry();
10571
+ const span = telemetry?.startSpan("ElevenLabsTTS.stream", {
10572
+ "tts.text_length": trimmed.length,
10573
+ "tts.voice_id": this.voiceId,
10574
+ "tts.model": this.model
10575
+ });
10576
+ const url = `${this.baseUrl}/v1/text-to-speech/${this.voiceId}?output_format=${this.outputFormat}`;
10577
+ try {
10578
+ const response = await fetch(url, {
10579
+ method: "POST",
10580
+ headers: {
10581
+ "xi-api-key": this.apiKey,
10582
+ "Content-Type": "application/json",
10583
+ Accept: "audio/pcm"
10584
+ },
10585
+ body: JSON.stringify({
10586
+ text: trimmed,
10587
+ model_id: this.model,
10588
+ voice_settings: {
10589
+ stability: this.stability,
10590
+ similarity_boost: this.similarityBoost
10591
+ }
10592
+ }),
10593
+ signal: options?.signal
10594
+ });
10595
+ if (!response.ok) {
10596
+ const errorText = await response.text().catch(() => "unknown");
10597
+ const msg = `ElevenLabsTTS: HTTP ${response.status} \u2014 ${this.getHttpErrorMessage(response.status, errorText)}`;
10598
+ logger34.error(msg);
10599
+ throw new Error(msg);
10600
+ }
10601
+ if (!response.body) {
10602
+ const buffer = await response.arrayBuffer();
10603
+ const audio = pcm16ToFloat32(buffer);
10604
+ const duration = audio.length / this._sampleRate;
10605
+ const latency2 = getClock().now() - startTime;
10606
+ span?.setAttributes({ "tts.duration_s": duration, "tts.latency_ms": latency2 });
10607
+ span?.end();
10608
+ telemetry?.recordHistogram("omote.inference.latency", latency2, {
10609
+ model: "elevenlabs-tts",
10610
+ backend: "cloud"
10611
+ });
10612
+ yield { audio, duration, text: trimmed };
10613
+ return;
10614
+ }
10615
+ const reader = response.body.getReader();
10616
+ let totalSamples = 0;
10617
+ try {
10618
+ while (true) {
10619
+ if (options?.signal?.aborted) {
10620
+ reader.cancel();
10621
+ logger34.debug("Stream aborted by signal");
10622
+ return;
10623
+ }
10624
+ const { done, value } = await reader.read();
10625
+ if (done) break;
10626
+ if (value && value.byteLength > 0) {
10627
+ const usableBytes = value.byteLength & ~1;
10628
+ if (usableBytes === 0) continue;
10629
+ const audio = pcm16ToFloat32(value.buffer.slice(value.byteOffset, value.byteOffset + usableBytes));
10630
+ const duration = audio.length / this._sampleRate;
10631
+ totalSamples += audio.length;
10632
+ yield { audio, duration, text: trimmed };
10633
+ }
10634
+ }
10635
+ } finally {
10636
+ reader.releaseLock();
10637
+ }
10638
+ const latency = getClock().now() - startTime;
10639
+ const totalDuration = totalSamples / this._sampleRate;
10640
+ logger34.debug("Stream complete", {
10641
+ totalDuration: `${totalDuration.toFixed(2)}s`,
10642
+ latencyMs: Math.round(latency),
10643
+ totalSamples
10644
+ });
10645
+ span?.setAttributes({ "tts.duration_s": totalDuration, "tts.latency_ms": latency });
10646
+ span?.end();
10647
+ telemetry?.recordHistogram("omote.inference.latency", latency, {
10648
+ model: "elevenlabs-tts",
10649
+ backend: "cloud"
10650
+ });
10651
+ telemetry?.incrementCounter("omote.inference.total", 1, {
10652
+ model: "elevenlabs-tts",
10653
+ backend: "cloud",
10654
+ status: "success"
10655
+ });
10656
+ } catch (err) {
10657
+ if (err instanceof DOMException && err.name === "AbortError") {
10658
+ logger34.debug("Stream aborted");
10659
+ span?.end();
10660
+ return;
10661
+ }
10662
+ const errMsg = err instanceof Error ? err.message : String(err);
10663
+ logger34.error("Stream failed", { error: errMsg });
10664
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
10665
+ telemetry?.incrementCounter("omote.inference.total", 1, {
10666
+ model: "elevenlabs-tts",
10667
+ backend: "cloud",
10668
+ status: "error"
10669
+ });
10670
+ throw err;
10671
+ }
10672
+ }
10673
+ // ─── Dispose ────────────────────────────────────────────────────────────
10674
+ async dispose() {
10675
+ this._isLoaded = false;
10676
+ logger34.info("ElevenLabs TTS disposed");
10677
+ }
10678
+ // ─── Private ────────────────────────────────────────────────────────────
10679
+ getHttpErrorMessage(status, body) {
10680
+ switch (status) {
10681
+ case 401:
10682
+ return "Unauthorized \u2014 check your API key";
10683
+ case 403:
10684
+ return "Forbidden \u2014 API key lacks required permissions";
10685
+ case 429:
10686
+ return "Rate limited \u2014 too many requests";
10687
+ case 400:
10688
+ return `Bad request \u2014 ${body}`;
10689
+ default:
10690
+ return body || `HTTP error ${status}`;
10691
+ }
10692
+ }
10693
+ };
10694
+
10695
+ // src/inference/PollyTTSBackend.ts
10696
+ var logger35 = createLogger("PollyTTS");
10697
+ var DEFAULT_VOICE = "Joanna";
10698
+ var DEFAULT_SAMPLE_RATE = 16e3;
10699
+ var PollyTTSBackend = class {
10700
+ constructor(config) {
10701
+ this._isLoaded = false;
10702
+ if (!config.synthesizeFn) {
10703
+ throw new Error("PollyTTS: synthesizeFn is required");
10704
+ }
10705
+ this.synthesizeFn = config.synthesizeFn;
10706
+ this.voice = config.voice ?? DEFAULT_VOICE;
10707
+ this._sampleRate = config.sampleRate ?? DEFAULT_SAMPLE_RATE;
10708
+ this.engine = config.engine ?? "neural";
10709
+ }
10710
+ get sampleRate() {
10711
+ return this._sampleRate;
10712
+ }
10713
+ get isLoaded() {
10714
+ return this._isLoaded;
10715
+ }
10716
+ // ─── Load ───────────────────────────────────────────────────────────────
10717
+ /**
10718
+ * No-op for cloud TTS (no model to load).
10719
+ * Marks backend as ready.
10720
+ */
10721
+ async load() {
10722
+ this._isLoaded = true;
10723
+ logger35.info("Polly TTS ready", { voice: this.voice, engine: this.engine, sampleRate: this._sampleRate });
10724
+ }
10725
+ // ─── Stream ─────────────────────────────────────────────────────────────
10726
+ /**
10727
+ * Synthesize audio via consumer's Polly function.
10728
+ *
10729
+ * Polly's SynthesizeSpeech is request/response (not streaming for PCM),
10730
+ * so this yields a single chunk per call. For long text, consider splitting
10731
+ * into sentences on the consumer side.
10732
+ */
10733
+ async *stream(text, options) {
10734
+ if (!this._isLoaded) {
10735
+ throw new Error("PollyTTS: not loaded. Call load() first.");
10736
+ }
10737
+ const trimmed = text.trim();
10738
+ if (trimmed.length === 0) {
10739
+ throw new Error("PollyTTS: text must not be empty");
10740
+ }
10741
+ if (options?.signal?.aborted) {
10742
+ return;
10743
+ }
10744
+ const voiceName = options?.voice ?? this.voice;
10745
+ const startTime = getClock().now();
10746
+ const telemetry = getTelemetry();
10747
+ const span = telemetry?.startSpan("PollyTTS.stream", {
10748
+ "tts.text_length": trimmed.length,
10749
+ "tts.voice": voiceName,
10750
+ "tts.engine": this.engine
10751
+ });
10752
+ try {
10753
+ const result = await this.synthesizeFn(trimmed, voiceName, this._sampleRate);
10754
+ if (options?.signal?.aborted) {
10755
+ span?.end();
10756
+ return;
10757
+ }
10758
+ const audio = pcm16ToFloat32(result.audio);
10759
+ const duration = audio.length / this._sampleRate;
10760
+ const latency = getClock().now() - startTime;
10761
+ logger35.debug("Synthesis complete", {
10762
+ voice: voiceName,
10763
+ duration: `${duration.toFixed(2)}s`,
10764
+ latencyMs: Math.round(latency),
10765
+ numSamples: audio.length
10766
+ });
10767
+ span?.setAttributes({ "tts.duration_s": duration, "tts.latency_ms": latency });
10768
+ span?.end();
10769
+ telemetry?.recordHistogram("omote.inference.latency", latency, {
10770
+ model: "polly-tts",
10771
+ backend: "cloud"
10772
+ });
10773
+ telemetry?.incrementCounter("omote.inference.total", 1, {
10774
+ model: "polly-tts",
10775
+ backend: "cloud",
10776
+ status: "success"
10777
+ });
10778
+ yield { audio, duration, text: trimmed };
10779
+ } catch (err) {
10780
+ if (err instanceof DOMException && err.name === "AbortError") {
10781
+ logger35.debug("Synthesis aborted");
10782
+ span?.end();
10783
+ return;
10784
+ }
10785
+ const errMsg = err instanceof Error ? err.message : String(err);
10786
+ logger35.error("Synthesis failed", { error: errMsg });
10787
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
10788
+ telemetry?.incrementCounter("omote.inference.total", 1, {
10789
+ model: "polly-tts",
10790
+ backend: "cloud",
10791
+ status: "error"
10792
+ });
10793
+ throw err;
10794
+ }
10795
+ }
10796
+ // ─── Dispose ────────────────────────────────────────────────────────────
10797
+ async dispose() {
10798
+ this._isLoaded = false;
10799
+ logger35.info("Polly TTS disposed");
10800
+ }
10801
+ };
10802
+
10416
10803
  // src/inference/ortConfig.ts
10417
10804
  var ortCdnBase = null;
10418
10805
  function configureOrtCdn(cdnPath) {
@@ -10426,7 +10813,7 @@ function getOrtCdnBase() {
10426
10813
  }
10427
10814
 
10428
10815
  // src/emotion/Emotion.ts
10429
- var logger34 = createLogger("EmotionController");
10816
+ var logger36 = createLogger("EmotionController");
10430
10817
  var EMOTION_NAMES = [
10431
10818
  "amazement",
10432
10819
  "anger",
@@ -10448,7 +10835,7 @@ function createEmotionVector(weights = {}) {
10448
10835
  if (idx >= 0) {
10449
10836
  vector[idx] = Math.max(0, Math.min(1, value));
10450
10837
  } else {
10451
- logger34.warn(`Invalid emotion name in createEmotionVector: "${name}"`);
10838
+ logger36.warn(`Invalid emotion name in createEmotionVector: "${name}"`);
10452
10839
  }
10453
10840
  }
10454
10841
  return vector;
@@ -10531,7 +10918,7 @@ var EmotionController = class {
10531
10918
  this.targetEmotion.set(newEmotion);
10532
10919
  this.currentEmotion.set(newEmotion);
10533
10920
  this.transitionProgress = 1;
10534
- logger34.debug("set", { weights });
10921
+ logger36.debug("set", { weights });
10535
10922
  }
10536
10923
  /**
10537
10924
  * Set emotion from preset immediately
@@ -10541,7 +10928,7 @@ var EmotionController = class {
10541
10928
  this.targetEmotion.set(newEmotion);
10542
10929
  this.currentEmotion.set(newEmotion);
10543
10930
  this.transitionProgress = 1;
10544
- logger34.debug("setPreset", { preset });
10931
+ logger36.debug("setPreset", { preset });
10545
10932
  }
10546
10933
  /**
10547
10934
  * Transition to new emotion over time
@@ -10553,9 +10940,9 @@ var EmotionController = class {
10553
10940
  this.currentEmotion.set(this.emotion);
10554
10941
  this.targetEmotion.set(createEmotionVector(weights));
10555
10942
  this.transitionDuration = durationMs;
10556
- this.transitionStartTime = performance.now();
10943
+ this.transitionStartTime = getClock().now();
10557
10944
  this.transitionProgress = 0;
10558
- logger34.debug("transitionTo", { weights, durationMs });
10945
+ logger36.debug("transitionTo", { weights, durationMs });
10559
10946
  }
10560
10947
  /**
10561
10948
  * Transition to preset over time
@@ -10564,7 +10951,7 @@ var EmotionController = class {
10564
10951
  this.currentEmotion.set(this.emotion);
10565
10952
  this.targetEmotion.set(getEmotionPreset(preset));
10566
10953
  this.transitionDuration = durationMs;
10567
- this.transitionStartTime = performance.now();
10954
+ this.transitionStartTime = getClock().now();
10568
10955
  this.transitionProgress = 0;
10569
10956
  }
10570
10957
  /**
@@ -10572,7 +10959,7 @@ var EmotionController = class {
10572
10959
  */
10573
10960
  update() {
10574
10961
  if (this.transitionProgress >= 1) return;
10575
- const elapsed = performance.now() - this.transitionStartTime;
10962
+ const elapsed = getClock().now() - this.transitionStartTime;
10576
10963
  this.transitionProgress = Math.min(1, elapsed / this.transitionDuration);
10577
10964
  }
10578
10965
  /**
@@ -10588,7 +10975,7 @@ var EmotionController = class {
10588
10975
  this.currentEmotion.fill(0);
10589
10976
  this.targetEmotion.fill(0);
10590
10977
  this.transitionProgress = 1;
10591
- logger34.debug("reset");
10978
+ logger36.debug("reset");
10592
10979
  }
10593
10980
  };
10594
10981
 
@@ -10669,7 +11056,7 @@ var DEFAULT_ANIMATION_CONFIG = {
10669
11056
  };
10670
11057
 
10671
11058
  // src/animation/AnimationGraph.ts
10672
- var logger35 = createLogger("AnimationGraph");
11059
+ var logger37 = createLogger("AnimationGraph");
10673
11060
  var AnimationGraph = class extends EventEmitter {
10674
11061
  constructor(config = {}) {
10675
11062
  super();
@@ -10702,7 +11089,7 @@ var AnimationGraph = class extends EventEmitter {
10702
11089
  this.stateEnterTime = Date.now();
10703
11090
  this.lastUpdateTime = Date.now();
10704
11091
  this.cachedOutput = this.computeOutput();
10705
- logger35.info("constructor", {
11092
+ logger37.info("constructor", {
10706
11093
  initialState: this.config.initialState,
10707
11094
  stateCount: this.config.states.length,
10708
11095
  transitionCount: this.config.transitions.length
@@ -10773,7 +11160,7 @@ var AnimationGraph = class extends EventEmitter {
10773
11160
  setState(stateName, blendDuration = 300) {
10774
11161
  const targetState = this.config.states.find((s) => s.name === stateName);
10775
11162
  if (!targetState) {
10776
- logger35.warn(`State '${stateName}' not found`);
11163
+ logger37.warn(`State '${stateName}' not found`);
10777
11164
  return;
10778
11165
  }
10779
11166
  if (targetState.name === this.currentState.name && !this.isTransitioning) {
@@ -10851,7 +11238,7 @@ var AnimationGraph = class extends EventEmitter {
10851
11238
  (s) => s.name === transition.to
10852
11239
  );
10853
11240
  if (!targetState) {
10854
- logger35.warn(`Target state '${transition.to}' not found`);
11241
+ logger37.warn(`Target state '${transition.to}' not found`);
10855
11242
  return;
10856
11243
  }
10857
11244
  const fromState = this.currentState.name;
@@ -10865,7 +11252,7 @@ var AnimationGraph = class extends EventEmitter {
10865
11252
  if (!this.currentState.emotionBlendEnabled) {
10866
11253
  this.targetEmotionWeight = 0;
10867
11254
  }
10868
- logger35.debug("state transition", {
11255
+ logger37.debug("state transition", {
10869
11256
  from: fromState,
10870
11257
  to: targetState.name,
10871
11258
  trigger: event,
@@ -10902,7 +11289,7 @@ var AnimationGraph = class extends EventEmitter {
10902
11289
  if (this.currentState.timeout <= 0) return;
10903
11290
  const elapsed = now - this.stateEnterTime;
10904
11291
  if (elapsed >= this.currentState.timeout) {
10905
- logger35.debug("timeout transition", {
11292
+ logger37.debug("timeout transition", {
10906
11293
  state: this.currentState.name,
10907
11294
  elapsed,
10908
11295
  timeout: this.currentState.timeout
@@ -11016,7 +11403,7 @@ var AnimationGraph = class extends EventEmitter {
11016
11403
 
11017
11404
  // src/animation/ProceduralLifeLayer.ts
11018
11405
  import { createNoise2D } from "simplex-noise";
11019
- var logger36 = createLogger("ProceduralLifeLayer");
11406
+ var logger38 = createLogger("ProceduralLifeLayer");
11020
11407
  var simplex2d = createNoise2D();
11021
11408
  var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
11022
11409
  for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
@@ -11122,7 +11509,7 @@ var ProceduralLifeLayer = class {
11122
11509
  }
11123
11510
  this.blinkInterval = this.nextBlinkInterval();
11124
11511
  this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
11125
- logger36.debug("constructor", {
11512
+ logger38.debug("constructor", {
11126
11513
  blinkIntervalRange: this.blinkIntervalRange,
11127
11514
  useLogNormalBlinks: this.useLogNormalBlinks,
11128
11515
  gazeBreakIntervalRange: this.gazeBreakIntervalRange,
@@ -11226,7 +11613,7 @@ var ProceduralLifeLayer = class {
11226
11613
  * Reset all internal state to initial values.
11227
11614
  */
11228
11615
  reset() {
11229
- logger36.debug("reset");
11616
+ logger38.debug("reset");
11230
11617
  this.blinkTimer = 0;
11231
11618
  this.blinkInterval = this.nextBlinkInterval();
11232
11619
  this.blinkPhase = PHASE_OPEN;
@@ -11278,7 +11665,7 @@ var ProceduralLifeLayer = class {
11278
11665
  this.blinkTimer = 0;
11279
11666
  this.blinkInterval = this.nextBlinkInterval();
11280
11667
  this.asymmetryRight = 0.95 + Math.random() * 0.08;
11281
- logger36.trace("blink", { nextInterval: this.blinkInterval });
11668
+ logger38.trace("blink", { nextInterval: this.blinkInterval });
11282
11669
  }
11283
11670
  if (this.blinkPhase > PHASE_OPEN) {
11284
11671
  this.blinkProgress += delta;
@@ -11359,7 +11746,7 @@ var ProceduralLifeLayer = class {
11359
11746
  this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
11360
11747
  this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
11361
11748
  this.gazeBreakInterval = randomRange(...params.interval);
11362
- logger36.trace("gaze break", {
11749
+ logger38.trace("gaze break", {
11363
11750
  targetX: this.gazeBreakTargetX.toFixed(3),
11364
11751
  targetY: this.gazeBreakTargetY.toFixed(3),
11365
11752
  nextInterval: this.gazeBreakInterval.toFixed(2),
@@ -11602,7 +11989,7 @@ var ALL_AUS = [...new Set(
11602
11989
  )];
11603
11990
 
11604
11991
  // src/face/EmotionResolver.ts
11605
- var logger37 = createLogger("EmotionResolver");
11992
+ var logger39 = createLogger("EmotionResolver");
11606
11993
  var BS_INDEX = /* @__PURE__ */ new Map();
11607
11994
  for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
11608
11995
  BS_INDEX.set(LAM_BLENDSHAPES[i], i);
@@ -11629,7 +12016,7 @@ var EmotionResolver = class {
11629
12016
  if (!emotionWeight || emotionWeight < 0.01) continue;
11630
12017
  const auActivations = EMOTION_TO_AU[emotionName];
11631
12018
  if (!auActivations) {
11632
- logger37.warn(`Unknown emotion name with no AU mapping: "${emotionName}"`);
12019
+ logger39.warn(`Unknown emotion name with no AU mapping: "${emotionName}"`);
11633
12020
  continue;
11634
12021
  }
11635
12022
  for (const activation of auActivations) {
@@ -11654,7 +12041,7 @@ var EmotionResolver = class {
11654
12041
  };
11655
12042
 
11656
12043
  // src/face/FaceCompositor.ts
11657
- var logger38 = createLogger("FaceCompositor");
12044
+ var logger40 = createLogger("FaceCompositor");
11658
12045
  function smoothstep(t) {
11659
12046
  return t * t * (3 - 2 * t);
11660
12047
  }
@@ -11685,7 +12072,7 @@ var FaceCompositor = class {
11685
12072
  if (config?.profile) {
11686
12073
  this.applyProfileArrays(config.profile);
11687
12074
  }
11688
- logger38.debug("constructor", {
12075
+ logger40.debug("constructor", {
11689
12076
  emotionSmoothing: this.emotionSmoothing,
11690
12077
  hasProfile: !!config?.profile,
11691
12078
  hasLifeLayer: !!config?.lifeLayer
@@ -11701,7 +12088,7 @@ var FaceCompositor = class {
11701
12088
  * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
11702
12089
  */
11703
12090
  compose(base, input, target) {
11704
- const composeStart = performance.now();
12091
+ const composeStart = getClock().now();
11705
12092
  const out = target ?? this.outputBuffer;
11706
12093
  out.set(base);
11707
12094
  const emotion = input.emotion ?? this.stickyEmotion;
@@ -11748,7 +12135,7 @@ var FaceCompositor = class {
11748
12135
  }
11749
12136
  getTelemetry()?.recordHistogram(
11750
12137
  MetricNames.COMPOSITOR_COMPOSE_LATENCY,
11751
- (performance.now() - composeStart) * 1e3
12138
+ (getClock().now() - composeStart) * 1e3
11752
12139
  // µs
11753
12140
  );
11754
12141
  return { blendshapes: out, headDelta: lifeResult.headDelta };
@@ -11758,7 +12145,7 @@ var FaceCompositor = class {
11758
12145
  */
11759
12146
  setEmotion(weights) {
11760
12147
  this.stickyEmotion = weights;
11761
- logger38.debug("setEmotion", { weights });
12148
+ logger40.debug("setEmotion", { weights });
11762
12149
  }
11763
12150
  /**
11764
12151
  * Update character profile at runtime.
@@ -11767,7 +12154,7 @@ var FaceCompositor = class {
11767
12154
  this.multiplier.fill(1);
11768
12155
  this.offset.fill(0);
11769
12156
  this.applyProfileArrays(profile);
11770
- logger38.debug("setProfile", {
12157
+ logger40.debug("setProfile", {
11771
12158
  multiplierKeys: profile.multiplier ? Object.keys(profile.multiplier).length : 0,
11772
12159
  offsetKeys: profile.offset ? Object.keys(profile.offset).length : 0
11773
12160
  });
@@ -11781,7 +12168,7 @@ var FaceCompositor = class {
11781
12168
  this.lifeBuffer.fill(0);
11782
12169
  this.stickyEmotion = void 0;
11783
12170
  this.lifeLayer.reset();
11784
- logger38.debug("reset");
12171
+ logger40.debug("reset");
11785
12172
  }
11786
12173
  /** Expand partial profile maps into dense Float32Arrays */
11787
12174
  applyProfileArrays(profile) {
@@ -11866,7 +12253,7 @@ function parseEmotionTags(text) {
11866
12253
  }
11867
12254
 
11868
12255
  // src/character/CharacterController.ts
11869
- var logger39 = createLogger("CharacterController");
12256
+ var logger41 = createLogger("CharacterController");
11870
12257
  var FRAME_BUDGET_US = 33e3;
11871
12258
  var EMOTION_MAP = {
11872
12259
  // Synced with EmotionPresets (packages/core/src/emotion/Emotion.ts)
@@ -11936,7 +12323,7 @@ var CharacterController = class {
11936
12323
  this.gazeYawInfluence = config?.gaze?.yawInfluence ?? 0.4;
11937
12324
  this.gazePitchInfluence = config?.gaze?.pitchInfluence ?? 0.3;
11938
12325
  this.gazeSmoothing = config?.gaze?.smoothing ?? 5;
11939
- logger39.debug("constructor", {
12326
+ logger41.debug("constructor", {
11940
12327
  gazeEnabled: this.gazeEnabled,
11941
12328
  gazeYawInfluence: this.gazeYawInfluence,
11942
12329
  gazePitchInfluence: this.gazePitchInfluence,
@@ -11951,7 +12338,7 @@ var CharacterController = class {
11951
12338
  * into a single output frame.
11952
12339
  */
11953
12340
  update(input) {
11954
- const frameStart = performance.now();
12341
+ const frameStart = getClock().now();
11955
12342
  const base = input.baseBlendshapes ?? this.zeroBase;
11956
12343
  const eyeTargets = this.computeEyeTargets(
11957
12344
  input.cameraWorldPos,
@@ -11978,7 +12365,7 @@ var CharacterController = class {
11978
12365
  lifeHeadDelta,
11979
12366
  input.avatarRotationY ?? 0
11980
12367
  );
11981
- const frameUs = (performance.now() - frameStart) * 1e3;
12368
+ const frameUs = (getClock().now() - frameStart) * 1e3;
11982
12369
  this.frameTimes[this.frameTimeIdx] = frameUs;
11983
12370
  this.frameTimeIdx = (this.frameTimeIdx + 1) % this.frameTimes.length;
11984
12371
  if (this.frameTimeFill < this.frameTimes.length) this.frameTimeFill++;
@@ -12000,13 +12387,13 @@ var CharacterController = class {
12000
12387
  const resolved = resolveEmotion(emotion);
12001
12388
  if (resolved) {
12002
12389
  this._compositor.setEmotion(resolved);
12003
- logger39.debug("setEmotion", { emotion, resolved });
12390
+ logger41.debug("setEmotion", { emotion, resolved });
12004
12391
  }
12005
12392
  }
12006
12393
  /** Update character profile at runtime. */
12007
12394
  setProfile(profile) {
12008
12395
  this._compositor.setProfile(profile);
12009
- logger39.debug("setProfile", {
12396
+ logger41.debug("setProfile", {
12010
12397
  multiplierKeys: profile.multiplier ? Object.keys(profile.multiplier).length : 0,
12011
12398
  offsetKeys: profile.offset ? Object.keys(profile.offset).length : 0
12012
12399
  });
@@ -12041,11 +12428,11 @@ var CharacterController = class {
12041
12428
  this._compositor.reset();
12042
12429
  this.gazeHeadYaw = 0;
12043
12430
  this.gazeHeadPitch = -0.1;
12044
- logger39.debug("reset");
12431
+ logger41.debug("reset");
12045
12432
  }
12046
12433
  dispose() {
12047
12434
  this.reset();
12048
- logger39.debug("dispose");
12435
+ logger41.debug("dispose");
12049
12436
  }
12050
12437
  // ---------------------------------------------------------------------------
12051
12438
  // Eye angle math (extracted from r3f useGazeTracking.computeEyeTargets)
@@ -12127,7 +12514,7 @@ var CharacterController = class {
12127
12514
  };
12128
12515
 
12129
12516
  // src/orchestration/MicLipSync.ts
12130
- var logger40 = createLogger("MicLipSync");
12517
+ var logger42 = createLogger("MicLipSync");
12131
12518
  var MicLipSync = class extends EventEmitter {
12132
12519
  constructor(config) {
12133
12520
  super();
@@ -12146,7 +12533,7 @@ var MicLipSync = class extends EventEmitter {
12146
12533
  this.vadChunkSize = 0;
12147
12534
  this.vadBuffer = null;
12148
12535
  this.vadBufferOffset = 0;
12149
- logger40.info("MicLipSync created", {
12536
+ logger42.info("MicLipSync created", {
12150
12537
  sampleRate: config.sampleRate ?? 16e3,
12151
12538
  micChunkSize: config.micChunkSize ?? 512,
12152
12539
  hasVAD: !!config.vad,
@@ -12168,12 +12555,12 @@ var MicLipSync = class extends EventEmitter {
12168
12555
  this._currentFrame = scaled;
12169
12556
  if (!this._firstFrameEmitted) {
12170
12557
  this._firstFrameEmitted = true;
12171
- logger40.trace("First blendshape frame emitted");
12558
+ logger42.trace("First blendshape frame emitted");
12172
12559
  }
12173
12560
  this.emit("frame", { blendshapes: scaled, rawBlendshapes: raw });
12174
12561
  },
12175
12562
  onError: (error) => {
12176
- logger40.error("A2E inference error", { message: error.message });
12563
+ logger42.error("A2E inference error", { message: error.message });
12177
12564
  this.emit("error", error);
12178
12565
  }
12179
12566
  });
@@ -12181,7 +12568,9 @@ var MicLipSync = class extends EventEmitter {
12181
12568
  const float32 = int16ToFloat32(pcm);
12182
12569
  this.processor.pushAudio(float32);
12183
12570
  if (this.vad) {
12184
- this.vadQueue = this.vadQueue.then(() => this.processVAD(float32)).catch(() => {
12571
+ this.vadQueue = this.vadQueue.then(() => this.processVAD(float32)).catch((err) => {
12572
+ logger42.warn("VAD processing error", { error: String(err), code: ErrorCodes.SPH_VAD_ERROR });
12573
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
12185
12574
  });
12186
12575
  }
12187
12576
  });
@@ -12216,7 +12605,7 @@ var MicLipSync = class extends EventEmitter {
12216
12605
  /** Start microphone capture and inference loop */
12217
12606
  async start() {
12218
12607
  if (this._state === "active") return;
12219
- logger40.info("Starting MicLipSync");
12608
+ logger42.info("Starting MicLipSync");
12220
12609
  getTelemetry()?.incrementCounter(MetricNames.MIC_SESSIONS);
12221
12610
  await this.mic.start();
12222
12611
  this.processor.startDrip();
@@ -12226,7 +12615,7 @@ var MicLipSync = class extends EventEmitter {
12226
12615
  /** Stop microphone and inference */
12227
12616
  stop() {
12228
12617
  if (this._state === "idle") return;
12229
- logger40.info("Stopping MicLipSync");
12618
+ logger42.info("Stopping MicLipSync");
12230
12619
  this.processor.stopDrip();
12231
12620
  this.mic.stop();
12232
12621
  this._isSpeaking = false;
@@ -12268,14 +12657,15 @@ var MicLipSync = class extends EventEmitter {
12268
12657
  const wasSpeaking = this._isSpeaking;
12269
12658
  this._isSpeaking = result.isSpeech;
12270
12659
  if (!wasSpeaking && result.isSpeech) {
12271
- this.speechStartTime = performance.now();
12660
+ this.speechStartTime = getClock().now();
12272
12661
  this.emit("speech:start");
12273
12662
  } else if (wasSpeaking && !result.isSpeech) {
12274
- const durationMs = performance.now() - this.speechStartTime;
12663
+ const durationMs = getClock().now() - this.speechStartTime;
12275
12664
  this.emit("speech:end", { durationMs });
12276
12665
  }
12277
12666
  } catch (err) {
12278
- logger40.warn("VAD process error", { error: String(err) });
12667
+ logger42.warn("VAD process error", { error: String(err), code: ErrorCodes.SPH_VAD_ERROR });
12668
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
12279
12669
  }
12280
12670
  this.vadBufferOffset = 0;
12281
12671
  }
@@ -12292,7 +12682,7 @@ var MicLipSync = class extends EventEmitter {
12292
12682
  };
12293
12683
 
12294
12684
  // src/orchestration/VoicePipeline.ts
12295
- var logger41 = createLogger("VoicePipeline");
12685
+ var logger43 = createLogger("VoicePipeline");
12296
12686
  var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12297
12687
  constructor(config) {
12298
12688
  super();
@@ -12324,6 +12714,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12324
12714
  this.lastProgressiveSamples = 0;
12325
12715
  // ASR error recovery
12326
12716
  this.asrErrorCount = 0;
12717
+ this.progressiveErrorCount = 0;
12327
12718
  // Response abort
12328
12719
  this.responseAbortController = null;
12329
12720
  // Listener cleanup
@@ -12367,7 +12758,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12367
12758
  if (typeof requestAnimationFrame !== "undefined") {
12368
12759
  await new Promise((r) => requestAnimationFrame(() => r()));
12369
12760
  }
12370
- logger41.debug("Creating PlaybackPipeline", {
12761
+ logger43.debug("Creating PlaybackPipeline", {
12371
12762
  neutralTransitionEnabled: this.config.neutralTransitionEnabled ?? true,
12372
12763
  audioDelayMs: this.config.audioDelayMs,
12373
12764
  chunkTargetMs: this.config.chunkTargetMs
@@ -12407,8 +12798,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12407
12798
  this.setState("ready");
12408
12799
  } catch (error) {
12409
12800
  const err = error instanceof Error ? error : new Error(String(error));
12801
+ span?.setAttributes({ "error.type": ErrorTypes.MODEL });
12410
12802
  span?.endWithError(err);
12411
- logger41.error("Model loading failed", { message: err.message });
12803
+ logger43.error("Model loading failed", { message: err.message });
12412
12804
  this.emit("error", err);
12413
12805
  this.setState("error");
12414
12806
  throw err;
@@ -12422,7 +12814,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12422
12814
  const { backends } = this.config;
12423
12815
  if (!backends) throw new Error("No backends config");
12424
12816
  this.emitProgress("Loading models", 0, 1, 0);
12425
- logger41.info("Loading from pre-built backends");
12817
+ logger43.info("Loading from pre-built backends");
12426
12818
  const toLoad = [];
12427
12819
  if (!backends.asr.isLoaded) toLoad.push(backends.asr.load());
12428
12820
  if (!backends.lam.isLoaded) toLoad.push(backends.lam.load());
@@ -12456,7 +12848,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12456
12848
  } else if (UnifiedInferenceWorker.isSupported()) {
12457
12849
  this.unifiedWorker = new UnifiedInferenceWorker();
12458
12850
  await this.unifiedWorker.init();
12459
- logger41.info("Created internal unified worker", { backend: this.unifiedWorker.backend });
12851
+ logger43.info("Created internal unified worker", { backend: this.unifiedWorker.backend });
12460
12852
  }
12461
12853
  }
12462
12854
  this.emitProgress("Loading models", 0, 3, 0);
@@ -12493,17 +12885,17 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12493
12885
  throw asrResult.reason;
12494
12886
  }
12495
12887
  this.asr = asr;
12496
- logger41.info("SenseVoice loaded");
12888
+ logger43.info("SenseVoice loaded");
12497
12889
  if (vadResult.status === "rejected") {
12498
12890
  throw vadResult.reason;
12499
12891
  }
12500
12892
  this.vad = vad;
12501
- logger41.info("Silero VAD loaded");
12893
+ logger43.info("Silero VAD loaded");
12502
12894
  if (lamResult.status === "rejected") {
12503
12895
  throw lamResult.reason;
12504
12896
  }
12505
12897
  this.lam = lam;
12506
- logger41.info("LAM loaded");
12898
+ logger43.info("LAM loaded");
12507
12899
  } finally {
12508
12900
  clearInterval(progressInterval);
12509
12901
  }
@@ -12511,7 +12903,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12511
12903
  if (this.isLocalMode) {
12512
12904
  const localConfig = this.config;
12513
12905
  if (localConfig.ttsConfig && !localConfig.tts) {
12514
- logger41.info("Creating Kokoro TTS from config", {
12906
+ logger43.info("Creating Kokoro TTS from config", {
12515
12907
  hasUnifiedWorker: !!this.unifiedWorker,
12516
12908
  voice: localConfig.ttsConfig.defaultVoice
12517
12909
  });
@@ -12521,7 +12913,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12521
12913
  });
12522
12914
  }
12523
12915
  if (localConfig.tts && !localConfig.ttsConfig && isIOS()) {
12524
- logger41.warn(
12916
+ logger43.warn(
12525
12917
  "External TTS on iOS creates a separate ORT WASM instance, risking OOM. Prefer ttsConfig for automatic unified worker integration."
12526
12918
  );
12527
12919
  }
@@ -12529,9 +12921,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12529
12921
  throw new Error("VoicePipeline local mode requires either tts or ttsConfig");
12530
12922
  }
12531
12923
  if (!localConfig.tts.isLoaded) {
12532
- logger41.info("Loading local TTS model...");
12924
+ logger43.info("Loading local TTS model...");
12533
12925
  await localConfig.tts.load();
12534
- logger41.info("Local TTS model loaded");
12926
+ logger43.info("Local TTS model loaded");
12535
12927
  }
12536
12928
  }
12537
12929
  this.emitProgress("Loading models", 100, 3, 3);
@@ -12547,8 +12939,8 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12547
12939
  this.epoch++;
12548
12940
  this._sessionId = crypto.randomUUID();
12549
12941
  this.asrErrorCount = 0;
12550
- logger41.info("Starting voice pipeline", { sessionId: this._sessionId });
12551
- logger41.debug("Pipeline mode", { mode: this.isLocalMode ? "local" : "cloud" });
12942
+ logger43.info("Starting voice pipeline", { sessionId: this._sessionId });
12943
+ logger43.debug("Pipeline mode", { mode: this.isLocalMode ? "local" : "cloud" });
12552
12944
  this.mic = new MicrophoneCapture(this.omoteEvents, {
12553
12945
  sampleRate: 16e3,
12554
12946
  chunkSize: 512
@@ -12561,11 +12953,11 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12561
12953
  this.emit("audio:level", level);
12562
12954
  });
12563
12955
  await this.mic.start();
12564
- logger41.debug("Microphone started");
12956
+ logger43.debug("Microphone started");
12565
12957
  this.setState("listening");
12566
12958
  }
12567
12959
  stop() {
12568
- logger41.info("Stopping voice pipeline", { sessionId: this._sessionId });
12960
+ logger43.info("Stopping voice pipeline", { sessionId: this._sessionId });
12569
12961
  this.stopped = true;
12570
12962
  this.epoch++;
12571
12963
  this.clearSilenceTimer();
@@ -12594,7 +12986,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12594
12986
  this.playback?.setProfile(profile);
12595
12987
  }
12596
12988
  async dispose() {
12597
- logger41.debug("Disposing VoicePipeline");
12989
+ logger43.debug("Disposing VoicePipeline");
12598
12990
  this.stop();
12599
12991
  this.epoch++;
12600
12992
  await Promise.allSettled([
@@ -12628,19 +13020,20 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12628
13020
  if (result.isSpeech) {
12629
13021
  if (!wasSpeaking) {
12630
13022
  this.isSpeaking = true;
12631
- this.speechStartTime = performance.now();
13023
+ this.speechStartTime = getClock().now();
12632
13024
  this.audioBuffer = [];
12633
13025
  this.audioBufferSamples = 0;
12634
13026
  this.lastProgressiveResult = null;
12635
13027
  this.lastProgressiveSamples = 0;
12636
- logger41.debug("VAD speech start");
13028
+ this.progressiveErrorCount = 0;
13029
+ logger43.debug("VAD speech start");
12637
13030
  this.emit("speech:start");
12638
13031
  this.startProgressiveTranscription();
12639
13032
  }
12640
13033
  this.audioBuffer.push(new Float32Array(samples));
12641
13034
  this.audioBufferSamples += samples.length;
12642
13035
  if (this.audioBufferSamples >= _VoicePipeline.MAX_AUDIO_BUFFER_SAMPLES) {
12643
- logger41.warn("Audio buffer exceeded max, forcing transcription flush");
13036
+ logger43.warn("Audio buffer exceeded max, forcing transcription flush");
12644
13037
  this.onSilenceDetected();
12645
13038
  return;
12646
13039
  }
@@ -12656,7 +13049,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12656
13049
  }
12657
13050
  }
12658
13051
  } catch (err) {
12659
- logger41.warn("VAD error", { error: String(err) });
13052
+ logger43.warn("VAD error", { error: String(err) });
12660
13053
  }
12661
13054
  }
12662
13055
  // ---------------------------------------------------------------------------
@@ -12667,18 +13060,18 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12667
13060
  const extended = this.config.silenceTimeoutExtendedMs ?? 700;
12668
13061
  const adaptive = this.config.adaptiveTimeout ?? true;
12669
13062
  if (!adaptive) return base;
12670
- const speechDurationMs = performance.now() - this.speechStartTime;
13063
+ const speechDurationMs = getClock().now() - this.speechStartTime;
12671
13064
  return speechDurationMs > 3e3 ? extended : base;
12672
13065
  }
12673
13066
  onSilenceDetected() {
12674
13067
  const capturedEpoch = this.epoch;
12675
13068
  this.isSpeaking = false;
12676
- const durationMs = performance.now() - this.speechStartTime;
12677
- logger41.debug("VAD speech end", { durationMs: Math.round(durationMs) });
13069
+ const durationMs = getClock().now() - this.speechStartTime;
13070
+ logger43.debug("VAD speech end", { durationMs: Math.round(durationMs) });
12678
13071
  this.emit("speech:end", { durationMs });
12679
13072
  this.clearSilenceTimer();
12680
13073
  this.processEndOfSpeech(capturedEpoch).catch((err) => {
12681
- logger41.error("End of speech processing failed", { error: String(err) });
13074
+ logger43.error("End of speech processing failed", { error: String(err) });
12682
13075
  if (this.epoch === capturedEpoch && !this.stopped) {
12683
13076
  this.emit("error", err instanceof Error ? err : new Error(String(err)));
12684
13077
  this.setState("listening");
@@ -12692,7 +13085,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12692
13085
  const turnSpan = getTelemetry()?.startSpan("VoicePipeline.turn", {
12693
13086
  "session.id": this._sessionId ?? ""
12694
13087
  });
12695
- const turnStart = performance.now();
13088
+ const turnStart = getClock().now();
12696
13089
  if (this.progressivePromise) {
12697
13090
  try {
12698
13091
  await this.progressivePromise;
@@ -12717,7 +13110,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12717
13110
  const minEnergy = this.config.minAudioEnergy ?? 0.02;
12718
13111
  const durationSec = totalSamples / 16e3;
12719
13112
  if (durationSec < minDuration) {
12720
- logger41.info("Audio too short, discarding", { durationSec });
13113
+ logger43.info("Audio too short, discarding", { durationSec });
12721
13114
  turnSpan?.end();
12722
13115
  this.setState("listening");
12723
13116
  return;
@@ -12728,7 +13121,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12728
13121
  }
12729
13122
  rms = Math.sqrt(rms / fullAudio.length);
12730
13123
  if (rms < minEnergy) {
12731
- logger41.info("Audio too quiet, discarding", { rms });
13124
+ logger43.info("Audio too quiet, discarding", { rms });
12732
13125
  turnSpan?.end();
12733
13126
  this.setState("listening");
12734
13127
  return;
@@ -12739,12 +13132,12 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12739
13132
  const coverageThreshold = this.config.progressiveCoverageThreshold ?? 0.8;
12740
13133
  if (this.lastProgressiveResult && this.lastProgressiveResult.text.trim().length > 0 && this.lastProgressiveSamples >= totalSamples * coverageThreshold) {
12741
13134
  transcript = { ...this.lastProgressiveResult, isFinal: true };
12742
- logger41.info("Using progressive result", {
13135
+ logger43.info("Using progressive result", {
12743
13136
  coverage: (this.lastProgressiveSamples / totalSamples).toFixed(2),
12744
13137
  text: transcript.text
12745
13138
  });
12746
13139
  } else {
12747
- logger41.debug("Progressive result insufficient, running final transcription", {
13140
+ logger43.debug("Progressive result insufficient, running final transcription", {
12748
13141
  samples: totalSamples,
12749
13142
  hadProgressive: !!this.lastProgressiveResult
12750
13143
  });
@@ -12759,7 +13152,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12759
13152
  return;
12760
13153
  }
12761
13154
  if (!transcript || !transcript.text.trim()) {
12762
- logger41.info("No transcript, resuming listening");
13155
+ logger43.info("No transcript, resuming listening");
12763
13156
  turnSpan?.end();
12764
13157
  this.setState("listening");
12765
13158
  return;
@@ -12767,7 +13160,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12767
13160
  this.emit("transcript", transcript);
12768
13161
  getTelemetry()?.recordHistogram(
12769
13162
  MetricNames.VOICE_TURN_LATENCY,
12770
- performance.now() - turnStart,
13163
+ getClock().now() - turnStart,
12771
13164
  { mode: this.isLocalMode ? "local" : "cloud" }
12772
13165
  );
12773
13166
  await this.callResponseHandler(transcript, capturedEpoch, turnSpan?.getContext());
@@ -12781,7 +13174,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12781
13174
  const span = getTelemetry()?.startSpan("VoicePipeline.response", {
12782
13175
  "text.length": transcript.text.length
12783
13176
  }, parentContext);
12784
- const responseStart = performance.now();
13177
+ const responseStart = getClock().now();
12785
13178
  this.setState("speaking");
12786
13179
  this.interruption?.setAISpeaking(true);
12787
13180
  if (transcript.emotion) {
@@ -12798,7 +13191,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12798
13191
  }
12799
13192
  getTelemetry()?.recordHistogram(
12800
13193
  MetricNames.VOICE_RESPONSE_LATENCY,
12801
- performance.now() - responseStart,
13194
+ getClock().now() - responseStart,
12802
13195
  { mode: this.isLocalMode ? "local" : "cloud" }
12803
13196
  );
12804
13197
  span?.end();
@@ -12808,8 +13201,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12808
13201
  return;
12809
13202
  }
12810
13203
  const err = error instanceof Error ? error : new Error(String(error));
13204
+ span?.setAttributes({ "error.type": ErrorTypes.RUNTIME });
12811
13205
  span?.endWithError(err);
12812
- logger41.error("Response handler error", { message: err.message });
13206
+ logger43.error("Response handler error", { message: err.message });
12813
13207
  this.emit("error", err);
12814
13208
  if (this.epoch === capturedEpoch && !this.stopped) {
12815
13209
  this.interruption?.setAISpeaking(false);
@@ -12880,11 +13274,11 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12880
13274
  // ---------------------------------------------------------------------------
12881
13275
  handleInterruption() {
12882
13276
  if (this._state !== "speaking") return;
12883
- logger41.info("Interruption triggered");
13277
+ logger43.info("Interruption triggered");
12884
13278
  getTelemetry()?.incrementCounter(MetricNames.VOICE_INTERRUPTIONS);
12885
13279
  this.epoch++;
12886
13280
  if (this.responseAbortController) {
12887
- logger41.debug("Aborting in-flight response");
13281
+ logger43.debug("Aborting in-flight response");
12888
13282
  }
12889
13283
  this.responseAbortController?.abort();
12890
13284
  this.playback?.stop();
@@ -12922,7 +13316,15 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12922
13316
  this.lastProgressiveSamples = snapshotSamples;
12923
13317
  this.emit("transcript", { ...result, isFinal: false });
12924
13318
  }
12925
- } catch {
13319
+ } catch (err) {
13320
+ this.progressiveErrorCount++;
13321
+ if (this.progressiveErrorCount % 10 === 1) {
13322
+ logger43.warn("Progressive transcription error", {
13323
+ code: ErrorCodes.SPH_ASR_ERROR,
13324
+ count: this.progressiveErrorCount,
13325
+ error: String(err)
13326
+ });
13327
+ }
12926
13328
  }
12927
13329
  })();
12928
13330
  }, intervalMs);
@@ -12938,8 +13340,9 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12938
13340
  // ---------------------------------------------------------------------------
12939
13341
  async transcribeWithTimeout(audio) {
12940
13342
  if (!this.asr) return null;
13343
+ const currentEpoch = this.epoch;
12941
13344
  const timeoutMs = this.config.transcriptionTimeoutMs ?? 1e4;
12942
- const startTime = performance.now();
13345
+ const startTime = getClock().now();
12943
13346
  const span = getTelemetry()?.startSpan("VoicePipeline.transcribe", {
12944
13347
  "inference.input_samples": audio.length,
12945
13348
  "inference.input_duration_ms": audio.length / 16e3 * 1e3
@@ -12953,7 +13356,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12953
13356
  })
12954
13357
  ]);
12955
13358
  clearTimeout(timeoutId);
12956
- const latency = performance.now() - startTime;
13359
+ const latency = getClock().now() - startTime;
12957
13360
  this.asrErrorCount = 0;
12958
13361
  getTelemetry()?.recordHistogram(MetricNames.VOICE_TRANSCRIPTION_LATENCY, latency);
12959
13362
  getTelemetry()?.incrementCounter(MetricNames.VOICE_TRANSCRIPTIONS);
@@ -12967,14 +13370,18 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12967
13370
  inferenceTimeMs: latency
12968
13371
  };
12969
13372
  } catch (error) {
13373
+ span?.setAttributes({ "error.type": ErrorTypes.INFERENCE });
12970
13374
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
12971
13375
  this.asrErrorCount++;
12972
- logger41.warn("Transcription failed", {
13376
+ logger43.warn("Transcription failed", {
12973
13377
  attempt: this.asrErrorCount,
12974
13378
  error: String(error)
12975
13379
  });
12976
13380
  if (this.asrErrorCount >= 3 && this.config.models) {
12977
- logger41.warn("3 consecutive ASR errors, recreating session");
13381
+ if (this.epoch !== currentEpoch) return null;
13382
+ logger43.warn("3 consecutive ASR errors, recreating session", {
13383
+ code: ErrorCodes.SPH_ASR_ERROR
13384
+ });
12978
13385
  try {
12979
13386
  await this.asr.dispose();
12980
13387
  this.asr = createSenseVoice({
@@ -12984,9 +13391,10 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
12984
13391
  unifiedWorker: this.unifiedWorker ?? void 0
12985
13392
  });
12986
13393
  await this.asr.load();
13394
+ if (this.epoch !== currentEpoch) return null;
12987
13395
  this.asrErrorCount = 0;
12988
13396
  } catch (recreateErr) {
12989
- logger41.error("ASR session recreation failed", { error: String(recreateErr) });
13397
+ logger43.error("ASR session recreation failed", { error: String(recreateErr) });
12990
13398
  }
12991
13399
  }
12992
13400
  return null;
@@ -13015,7 +13423,7 @@ var _VoicePipeline = class _VoicePipeline extends EventEmitter {
13015
13423
  // ---------------------------------------------------------------------------
13016
13424
  setState(state) {
13017
13425
  if (this._state === state) return;
13018
- logger41.info("State transition", { from: this._state, to: state });
13426
+ logger43.info("State transition", { from: this._state, to: state });
13019
13427
  this._state = state;
13020
13428
  this.emit("state", state);
13021
13429
  }
@@ -13034,7 +13442,7 @@ _VoicePipeline.MAX_AUDIO_BUFFER_SAMPLES = 16e3 * 30;
13034
13442
  var VoicePipeline = _VoicePipeline;
13035
13443
 
13036
13444
  // src/orchestration/VoiceOrchestrator.ts
13037
- var logger42 = createLogger("VoiceOrchestrator");
13445
+ var logger44 = createLogger("VoiceOrchestrator");
13038
13446
  var VoiceOrchestrator = class extends EventEmitter {
13039
13447
  constructor() {
13040
13448
  super(...arguments);
@@ -13084,7 +13492,7 @@ var VoiceOrchestrator = class extends EventEmitter {
13084
13492
  const epoch = ++this.connectEpoch;
13085
13493
  this._mode = config.mode ?? "local";
13086
13494
  this._sessionId = crypto.randomUUID();
13087
- logger42.info("Connecting voice orchestrator", { mode: this._mode });
13495
+ logger44.info("Connecting voice orchestrator", { mode: this._mode });
13088
13496
  if (this._mode === "local") {
13089
13497
  const localCfg = config;
13090
13498
  this.ttsSpeaker = new TTSSpeaker();
@@ -13137,7 +13545,7 @@ var VoiceOrchestrator = class extends EventEmitter {
13137
13545
  } else {
13138
13546
  this.wireCloudTranscript(config);
13139
13547
  }
13140
- logger42.info("Voice orchestrator connected", { mode: this._mode });
13548
+ logger44.info("Voice orchestrator connected", { mode: this._mode });
13141
13549
  }
13142
13550
  async disconnect() {
13143
13551
  this.connectEpoch++;
@@ -13251,7 +13659,7 @@ var VoiceOrchestrator = class extends EventEmitter {
13251
13659
  await this.speak(text);
13252
13660
  }
13253
13661
  } catch (e) {
13254
- logger42.error("Voice transcript handler error", { error: String(e) });
13662
+ logger44.error("Voice transcript handler error", { error: String(e) });
13255
13663
  } finally {
13256
13664
  this.interruption?.setAISpeaking(false);
13257
13665
  this.speechListener?.resume();
@@ -13292,7 +13700,7 @@ var VoiceOrchestrator = class extends EventEmitter {
13292
13700
  });
13293
13701
  } catch (e) {
13294
13702
  if (!abortController.signal.aborted) {
13295
- logger42.error("Cloud response handler error", { error: String(e) });
13703
+ logger44.error("Cloud response handler error", { error: String(e) });
13296
13704
  }
13297
13705
  } finally {
13298
13706
  this.responseAbortController = null;
@@ -13306,7 +13714,7 @@ var VoiceOrchestrator = class extends EventEmitter {
13306
13714
  // -------------------------------------------------------------------------
13307
13715
  handleInterruption() {
13308
13716
  if (this._state !== "speaking") return;
13309
- logger42.info("Interruption triggered");
13717
+ logger44.info("Interruption triggered");
13310
13718
  this.stopSpeaking();
13311
13719
  this.speechListener?.resume();
13312
13720
  this.setState("listening");
@@ -13352,10 +13760,12 @@ export {
13352
13760
  EMOTION_TO_AU,
13353
13761
  EMOTION_VECTOR_SIZE,
13354
13762
  EXPLICIT_EMOTION_COUNT,
13763
+ ElevenLabsTTSBackend,
13355
13764
  EmotionController,
13356
13765
  EmotionPresets,
13357
13766
  EmotionResolver,
13358
13767
  EmphasisDetector,
13768
+ ErrorCodes,
13359
13769
  ErrorTypes,
13360
13770
  EventEmitter,
13361
13771
  FaceCompositor,
@@ -13379,6 +13789,7 @@ export {
13379
13789
  PRESERVE_POSITION_BONES,
13380
13790
  PROTOCOL_VERSION,
13381
13791
  PlaybackPipeline,
13792
+ PollyTTSBackend,
13382
13793
  ProceduralLifeLayer,
13383
13794
  RingBuffer,
13384
13795
  SafariSpeechRecognition,
@@ -13402,6 +13813,7 @@ export {
13402
13813
  calculatePeak,
13403
13814
  calculateRMS,
13404
13815
  configureCacheLimit,
13816
+ configureClock,
13405
13817
  configureLogging,
13406
13818
  configureModelUrls,
13407
13819
  configureOrtCdn,
@@ -13418,6 +13830,7 @@ export {
13418
13830
  formatBytes,
13419
13831
  getCacheConfig,
13420
13832
  getCacheKey,
13833
+ getClock,
13421
13834
  getEmotionPreset,
13422
13835
  getLoggingConfig,
13423
13836
  getModelCache,