@omote/core 0.4.7 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,6 +30,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ A2EOrchestrator: () => A2EOrchestrator,
34
+ A2EProcessor: () => A2EProcessor,
33
35
  ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
34
36
  AgentCoreAdapter: () => AgentCoreAdapter,
35
37
  AnimationGraph: () => AnimationGraph,
@@ -37,23 +39,22 @@ __export(index_exports, {
37
39
  AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
38
40
  AudioScheduler: () => AudioScheduler,
39
41
  AudioSyncManager: () => AudioSyncManager,
42
+ BLENDSHAPE_TO_GROUP: () => BLENDSHAPE_TO_GROUP,
43
+ BlendshapeSmoother: () => BlendshapeSmoother,
40
44
  CTC_VOCAB: () => CTC_VOCAB,
41
45
  ConsoleExporter: () => ConsoleExporter,
42
46
  ConversationOrchestrator: () => ConversationOrchestrator,
43
47
  DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
44
48
  DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
45
- EMOTION_ARKIT_MAP: () => EMOTION_ARKIT_MAP,
46
49
  EMOTION_NAMES: () => EMOTION_NAMES,
47
50
  EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
48
51
  EmotionController: () => EmotionController,
49
52
  EmotionPresets: () => EmotionPresets,
50
- EmotionToBlendshapeMapper: () => EmotionToBlendshapeMapper,
51
53
  EmphasisDetector: () => EmphasisDetector,
52
54
  EventEmitter: () => EventEmitter,
53
55
  FullFacePipeline: () => FullFacePipeline,
54
56
  INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
55
57
  InterruptionHandler: () => InterruptionHandler,
56
- LAMPipeline: () => LAMPipeline,
57
58
  LAM_BLENDSHAPES: () => LAM_BLENDSHAPES,
58
59
  LOG_LEVEL_PRIORITY: () => LOG_LEVEL_PRIORITY,
59
60
  MODEL_LOAD_TIME_BUCKETS: () => MODEL_LOAD_TIME_BUCKETS,
@@ -72,74 +73,55 @@ __export(index_exports, {
72
73
  SileroVADInference: () => SileroVADInference,
73
74
  SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
74
75
  SileroVADWorker: () => SileroVADWorker,
75
- SyncedAudioPipeline: () => SyncedAudioPipeline,
76
76
  TenantManager: () => TenantManager,
77
- UPPER_FACE_BLENDSHAPES: () => UPPER_FACE_BLENDSHAPES,
78
77
  UnifiedInferenceWorker: () => UnifiedInferenceWorker,
79
- WAV2ARKIT_BLENDSHAPES: () => WAV2ARKIT_BLENDSHAPES,
80
78
  Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
81
79
  Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
82
80
  Wav2ArkitCpuWorker: () => Wav2ArkitCpuWorker,
83
81
  Wav2Vec2Inference: () => Wav2Vec2Inference,
84
- applyCMVN: () => applyCMVN,
85
- applyLFR: () => applyLFR,
86
82
  blendEmotions: () => blendEmotions,
87
83
  calculatePeak: () => calculatePeak,
88
84
  calculateRMS: () => calculateRMS,
89
- computeKaldiFbank: () => computeKaldiFbank,
90
85
  configureCacheLimit: () => configureCacheLimit,
91
86
  configureLogging: () => configureLogging,
92
87
  configureTelemetry: () => configureTelemetry,
88
+ createA2E: () => createA2E,
93
89
  createEmotionVector: () => createEmotionVector,
94
- createLipSync: () => createLipSync,
95
90
  createLogger: () => createLogger,
96
91
  createSenseVoice: () => createSenseVoice,
97
- createSessionWithFallback: () => createSessionWithFallback,
98
92
  createSileroVAD: () => createSileroVAD,
99
- ctcGreedyDecode: () => ctcGreedyDecode,
100
93
  fetchWithCache: () => fetchWithCache,
101
94
  formatBytes: () => formatBytes,
102
95
  getCacheConfig: () => getCacheConfig,
103
96
  getCacheKey: () => getCacheKey,
104
97
  getEmotionPreset: () => getEmotionPreset,
105
- getLoadedBackend: () => getLoadedBackend,
106
98
  getLoggingConfig: () => getLoggingConfig,
107
99
  getModelCache: () => getModelCache,
108
- getOnnxRuntime: () => getOnnxRuntime,
109
- getOnnxRuntimeForPreference: () => getOnnxRuntimeForPreference,
110
100
  getOptimalWasmThreads: () => getOptimalWasmThreads,
111
101
  getRecommendedBackend: () => getRecommendedBackend,
112
- getSessionOptions: () => getSessionOptions,
113
102
  getTelemetry: () => getTelemetry,
114
103
  hasWebGPUApi: () => hasWebGPUApi,
115
104
  isAndroid: () => isAndroid,
116
105
  isIOS: () => isIOS,
117
106
  isIOSSafari: () => isIOSSafari,
118
107
  isMobile: () => isMobile,
119
- isOnnxRuntimeLoaded: () => isOnnxRuntimeLoaded,
120
108
  isProtocolEvent: () => isProtocolEvent,
121
109
  isSafari: () => isSafari,
122
110
  isSpeechRecognitionAvailable: () => isSpeechRecognitionAvailable,
123
111
  isWebGPUAvailable: () => isWebGPUAvailable,
112
+ lerpBlendshapes: () => lerpBlendshapes,
124
113
  lerpEmotion: () => lerpEmotion,
125
114
  noopLogger: () => noopLogger,
126
- parseCMVNFromMetadata: () => parseCMVNFromMetadata,
127
- parseTokensFile: () => parseTokensFile,
128
115
  preloadModels: () => preloadModels,
129
- preloadOnnxRuntime: () => preloadOnnxRuntime,
130
- remapWav2ArkitToLam: () => remapWav2ArkitToLam,
131
116
  resetLoggingConfig: () => resetLoggingConfig,
132
117
  resolveBackend: () => resolveBackend,
133
- resolveLanguageId: () => resolveLanguageId,
134
- resolveTextNormId: () => resolveTextNormId,
135
118
  setLogLevel: () => setLogLevel,
136
119
  setLoggingEnabled: () => setLoggingEnabled,
137
120
  shouldEnableWasmProxy: () => shouldEnableWasmProxy,
138
- shouldUseCpuLipSync: () => shouldUseCpuLipSync,
121
+ shouldUseCpuA2E: () => shouldUseCpuA2E,
139
122
  shouldUseNativeASR: () => shouldUseNativeASR,
140
- shouldUseServerLipSync: () => shouldUseServerLipSync,
141
- supportsVADWorker: () => supportsVADWorker,
142
- symmetrizeBlendshapes: () => symmetrizeBlendshapes
123
+ shouldUseServerA2E: () => shouldUseServerA2E,
124
+ supportsVADWorker: () => supportsVADWorker
143
125
  });
144
126
  module.exports = __toCommonJS(index_exports);
145
127
 
@@ -649,730 +631,617 @@ var AudioChunkCoalescer = class {
649
631
  }
650
632
  };
651
633
 
652
- // src/audio/LAMPipeline.ts
653
- var LAMPipeline = class {
654
- constructor(options = {}) {
655
- this.options = options;
656
- this.REQUIRED_SAMPLES = 16e3;
657
- // 1.0s at 16kHz (LAM requirement)
658
- this.FRAME_RATE = 30;
659
- // LAM outputs 30fps
660
- this.buffer = new Float32Array(0);
661
- this.bufferStartTime = 0;
662
- this.frameQueue = [];
663
- /**
664
- * Last successfully retrieved frame
665
- * Used as fallback when no new frame is available to prevent avatar freezing
666
- */
667
- this.lastFrame = null;
668
- }
669
- /**
670
- * Push audio samples into the pipeline
671
- *
672
- * Accumulates samples and triggers LAM inference when buffer is full.
673
- * Multiple calls may be needed to accumulate enough samples.
674
- *
675
- * @param samples - Float32Array of audio samples
676
- * @param timestamp - AudioContext time when these samples start playing
677
- * @param lam - LAM inference engine
678
- */
679
- async push(samples, timestamp, lam) {
680
- if (this.buffer.length === 0) {
681
- this.bufferStartTime = timestamp;
682
- }
683
- const newBuffer = new Float32Array(this.buffer.length + samples.length);
684
- newBuffer.set(this.buffer, 0);
685
- newBuffer.set(samples, this.buffer.length);
686
- this.buffer = newBuffer;
687
- while (this.buffer.length >= this.REQUIRED_SAMPLES) {
688
- await this.processBuffer(lam);
689
- if (this.buffer.length >= this.REQUIRED_SAMPLES) {
690
- await new Promise((r) => setTimeout(r, 0));
691
- }
692
- }
693
- }
694
- /**
695
- * Process accumulated buffer through LAM inference
696
- */
697
- async processBuffer(lam) {
698
- try {
699
- const toProcess = this.buffer.slice(0, this.REQUIRED_SAMPLES);
700
- const processedStartTime = this.bufferStartTime;
701
- this.buffer = this.buffer.slice(this.REQUIRED_SAMPLES);
702
- const processedDuration = this.REQUIRED_SAMPLES / (this.options.sampleRate ?? 16e3);
703
- this.bufferStartTime = processedStartTime + processedDuration;
704
- const result = await lam.infer(toProcess);
705
- const frameDuration = 1 / this.FRAME_RATE;
706
- for (let i = 0; i < result.blendshapes.length; i++) {
707
- const frame = result.blendshapes[i];
708
- const timestamp = processedStartTime + i * frameDuration;
709
- this.frameQueue.push({ frame, timestamp });
710
- }
711
- this.options.onInference?.(result.blendshapes.length);
712
- } catch (error) {
713
- this.options.onError?.(error);
714
- this.buffer = new Float32Array(0);
715
- this.bufferStartTime = 0;
716
- }
717
- }
718
- /**
719
- * Get the frame that should be displayed at the current time
720
- *
721
- * Automatically removes frames that have already been displayed.
722
- * This prevents memory leaks from accumulating old frames.
723
- *
724
- * Discard Window (prevents premature frame discarding):
725
- * - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
726
- * - WASM: 1.0s (LAM inference 50-500ms + higher variability)
727
- *
728
- * Last-Frame-Hold: Returns last valid frame instead of null to prevent
729
- * avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
730
- *
731
- * @param currentTime - Current AudioContext time
732
- * @param lam - LAM inference engine (optional, for backend detection)
733
- * @returns Current frame, or last frame as fallback, or null if no frames yet
734
- */
735
- getFrameForTime(currentTime, lam) {
736
- const discardWindow = lam?.backend === "wasm" ? 1 : 0.5;
737
- let discardedCount = 0;
738
- while (this.frameQueue.length > 0 && this.frameQueue[0].timestamp < currentTime - discardWindow) {
739
- const discarded = this.frameQueue.shift();
740
- discardedCount++;
741
- if (discardedCount === 1) {
742
- const ageMs = ((currentTime - discarded.timestamp) * 1e3).toFixed(0);
743
- console.warn("[LAM] Frame(s) discarded as too old", {
744
- ageMs,
745
- discardWindowMs: discardWindow * 1e3,
746
- queueLength: this.frameQueue.length,
747
- backend: lam?.backend ?? "unknown"
748
- });
749
- }
750
- }
751
- if (this.frameQueue.length > 0 && this.frameQueue[0].timestamp <= currentTime) {
752
- const { frame } = this.frameQueue.shift();
753
- this.lastFrame = frame;
754
- return frame;
755
- }
756
- return this.lastFrame;
757
- }
758
- /**
759
- * Get all frames in the queue (for debugging/monitoring)
760
- */
761
- getQueuedFrames() {
762
- return [...this.frameQueue];
763
- }
764
- /**
765
- * Get current buffer fill level (0-1)
766
- */
767
- get fillLevel() {
768
- return Math.min(1, this.buffer.length / this.REQUIRED_SAMPLES);
769
- }
770
- /**
771
- * Get number of frames queued
772
- */
773
- get queuedFrameCount() {
774
- return this.frameQueue.length;
775
- }
776
- /**
777
- * Get buffered audio duration in seconds
778
- */
779
- get bufferedDuration() {
780
- return this.buffer.length / (this.options.sampleRate ?? 16e3);
781
- }
782
- /**
783
- * Flush remaining buffered audio
784
- *
785
- * Processes any remaining audio in the buffer, even if less than REQUIRED_SAMPLES.
786
- * This ensures the final audio chunk generates blendshape frames.
787
- *
788
- * Should be called when audio stream ends to prevent losing the last 0-1 seconds.
789
- *
790
- * @param lam - LAM inference engine
791
- */
792
- async flush(lam) {
793
- if (this.buffer.length === 0) {
794
- return;
795
- }
796
- const padded = new Float32Array(this.REQUIRED_SAMPLES);
797
- padded.set(this.buffer, 0);
798
- const processedStartTime = this.bufferStartTime;
799
- try {
800
- const result = await lam.infer(padded);
801
- const actualDuration = this.buffer.length / (this.options.sampleRate ?? 16e3);
802
- const frameDuration = 1 / this.FRAME_RATE;
803
- const actualFrameCount = Math.ceil(actualDuration * this.FRAME_RATE);
804
- for (let i = 0; i < Math.min(actualFrameCount, result.blendshapes.length); i++) {
805
- const frame = result.blendshapes[i];
806
- const timestamp = processedStartTime + i * frameDuration;
807
- this.frameQueue.push({ frame, timestamp });
808
- }
809
- this.buffer = new Float32Array(0);
810
- this.bufferStartTime = 0;
811
- this.options.onInference?.(Math.min(actualFrameCount, result.blendshapes.length));
812
- } catch (error) {
813
- this.options.onError?.(error);
814
- this.buffer = new Float32Array(0);
815
- this.bufferStartTime = 0;
816
- }
817
- }
818
- /**
819
- * Adjust all queued frame timestamps by an offset
820
- *
821
- * Used for synchronization when audio scheduling time differs from
822
- * the estimated time used during LAM processing.
823
- *
824
- * @param offset - Time offset in seconds to add to all timestamps
825
- */
826
- adjustTimestamps(offset) {
827
- for (const frame of this.frameQueue) {
828
- frame.timestamp += offset;
829
- }
830
- }
831
- /**
832
- * Reset the pipeline
833
- */
834
- reset() {
835
- this.buffer = new Float32Array(0);
836
- this.bufferStartTime = 0;
837
- this.frameQueue = [];
838
- this.lastFrame = null;
839
- }
634
+ // src/logging/types.ts
635
+ var LOG_LEVEL_PRIORITY = {
636
+ error: 0,
637
+ warn: 1,
638
+ info: 2,
639
+ debug: 3,
640
+ trace: 4,
641
+ verbose: 5
642
+ };
643
+ var DEFAULT_LOGGING_CONFIG = {
644
+ level: "info",
645
+ enabled: true,
646
+ format: "pretty",
647
+ timestamps: true,
648
+ includeModule: true
840
649
  };
841
650
 
842
- // src/audio/audioUtils.ts
843
- function pcm16ToFloat32(buffer) {
844
- const byteLen = buffer.byteLength & ~1;
845
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
846
- const float32 = new Float32Array(int16.length);
847
- for (let i = 0; i < int16.length; i++) {
848
- float32[i] = int16[i] / 32768;
849
- }
850
- return float32;
851
- }
852
- function int16ToFloat32(int16) {
853
- const float32 = new Float32Array(int16.length);
854
- for (let i = 0; i < int16.length; i++) {
855
- float32[i] = int16[i] / 32768;
856
- }
857
- return float32;
651
+ // src/logging/formatters.ts
652
+ var COLORS = {
653
+ reset: "\x1B[0m",
654
+ red: "\x1B[31m",
655
+ yellow: "\x1B[33m",
656
+ blue: "\x1B[34m",
657
+ cyan: "\x1B[36m",
658
+ gray: "\x1B[90m",
659
+ white: "\x1B[37m",
660
+ magenta: "\x1B[35m"
661
+ };
662
+ var LEVEL_COLORS = {
663
+ error: COLORS.red,
664
+ warn: COLORS.yellow,
665
+ info: COLORS.blue,
666
+ debug: COLORS.cyan,
667
+ trace: COLORS.magenta,
668
+ verbose: COLORS.gray
669
+ };
670
+ var LEVEL_NAMES = {
671
+ error: "ERROR ",
672
+ warn: "WARN ",
673
+ info: "INFO ",
674
+ debug: "DEBUG ",
675
+ trace: "TRACE ",
676
+ verbose: "VERBOSE"
677
+ };
678
+ var isBrowser = typeof window !== "undefined";
679
+ function formatTimestamp(timestamp) {
680
+ const date = new Date(timestamp);
681
+ return date.toISOString().substring(11, 23);
858
682
  }
859
-
860
- // src/audio/SyncedAudioPipeline.ts
861
- var SyncedAudioPipeline = class extends EventEmitter {
862
- constructor(options) {
863
- super();
864
- this.options = options;
865
- this.playbackStarted = false;
866
- this.monitorInterval = null;
867
- this.frameAnimationId = null;
868
- const sampleRate = options.sampleRate ?? 16e3;
869
- const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
870
- const audioDelayMs = options.audioDelayMs ?? autoDelay;
871
- this.scheduler = new AudioScheduler({
872
- sampleRate,
873
- initialLookaheadSec: audioDelayMs / 1e3
874
- });
875
- this.coalescer = new AudioChunkCoalescer({
876
- sampleRate,
877
- targetDurationMs: options.chunkTargetMs ?? 200
878
- });
879
- this.lamPipeline = new LAMPipeline({
880
- sampleRate,
881
- onError: (error) => {
882
- this.emit("error", error);
683
+ function safeStringify(data) {
684
+ const seen = /* @__PURE__ */ new WeakSet();
685
+ return JSON.stringify(data, (key, value) => {
686
+ if (typeof value === "object" && value !== null) {
687
+ if (seen.has(value)) {
688
+ return "[Circular]";
883
689
  }
884
- });
885
- }
886
- /**
887
- * Initialize the pipeline
888
- */
889
- async initialize() {
890
- await this.scheduler.initialize();
891
- }
892
- /**
893
- * Start a new playback session
894
- *
895
- * Resets all state and prepares for incoming audio chunks.
896
- * Audio will be scheduled immediately as chunks arrive (no buffering).
897
- */
898
- start() {
899
- this.stopMonitoring();
900
- this.scheduler.reset();
901
- this.coalescer.reset();
902
- this.lamPipeline.reset();
903
- this.playbackStarted = false;
904
- this.scheduler.warmup();
905
- this.startFrameLoop();
906
- this.startMonitoring();
907
- }
908
- /**
909
- * Receive audio chunk from network
910
- *
911
- * Audio-first design: schedules audio immediately, LAM runs in background.
912
- * This prevents LAM inference (50-300ms) from blocking audio scheduling,
913
- * which caused audible stuttering with continuous audio streams.
914
- *
915
- * @param chunk - Uint8Array containing Int16 PCM audio
916
- */
917
- async onAudioChunk(chunk) {
918
- const combined = this.coalescer.add(chunk);
919
- if (!combined) {
920
- return;
690
+ seen.add(value);
921
691
  }
922
- const float32 = pcm16ToFloat32(combined);
923
- const scheduleTime = await this.scheduler.schedule(float32);
924
- if (!this.playbackStarted) {
925
- this.playbackStarted = true;
926
- this.emit("playback_start", scheduleTime);
692
+ if (value instanceof Error) {
693
+ return {
694
+ name: value.name,
695
+ message: value.message,
696
+ stack: value.stack
697
+ };
927
698
  }
928
- this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
929
- this.emit("error", err);
930
- });
931
- }
932
- /**
933
- * End of audio stream
934
- *
935
- * Flushes any remaining buffered data.
936
- */
937
- async end() {
938
- const remaining = this.coalescer.flush();
939
- if (remaining) {
940
- const chunk = new Uint8Array(remaining);
941
- await this.onAudioChunk(chunk);
699
+ if (value instanceof Float32Array || value instanceof Int16Array) {
700
+ return `${value.constructor.name}(${value.length})`;
942
701
  }
943
- await this.lamPipeline.flush(this.options.lam);
944
- }
945
- /**
946
- * Stop playback immediately with smooth fade-out
947
- *
948
- * Gracefully cancels all audio playback and LAM processing:
949
- * - Fades out audio over specified duration (default: 50ms)
950
- * - Cancels pending LAM inferences
951
- * - Clears all buffers and queues
952
- * - Emits 'playback_complete' event
953
- *
954
- * Use this for interruptions (e.g., user barge-in during AI speech).
955
- *
956
- * @param fadeOutMs - Fade-out duration in milliseconds (default: 50ms)
957
- * @returns Promise that resolves when fade-out completes
958
- */
959
- async stop(fadeOutMs = 50) {
960
- this.stopMonitoring();
961
- await this.scheduler.cancelAll(fadeOutMs);
962
- this.coalescer.reset();
963
- this.lamPipeline.reset();
964
- this.playbackStarted = false;
965
- this.emit("playback_complete", void 0);
702
+ if (ArrayBuffer.isView(value)) {
703
+ return `${value.constructor.name}(${value.byteLength})`;
704
+ }
705
+ return value;
706
+ });
707
+ }
708
+ var jsonFormatter = (entry) => {
709
+ const output = {
710
+ timestamp: entry.timestamp,
711
+ level: entry.level,
712
+ module: entry.module,
713
+ message: entry.message
714
+ };
715
+ if (entry.data && Object.keys(entry.data).length > 0) {
716
+ output.data = entry.data;
966
717
  }
967
- /**
968
- * Start frame animation loop
969
- *
970
- * Uses requestAnimationFrame to check for new LAM frames.
971
- * Synchronized to AudioContext clock (not visual refresh rate).
972
- *
973
- * Frame Emission Strategy:
974
- * - LAMPipeline uses last-frame-hold to prevent null returns
975
- * - Always emit frames (even repeated frames) to maintain smooth animation
976
- * - Renderer is responsible for detecting duplicate frames if needed
977
- */
978
- startFrameLoop() {
979
- const updateFrame = () => {
980
- const currentTime = this.scheduler.getCurrentTime();
981
- const frame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
982
- if (frame) {
983
- this.emit("frame_ready", frame);
984
- }
985
- this.frameAnimationId = requestAnimationFrame(updateFrame);
718
+ if (entry.error) {
719
+ output.error = {
720
+ name: entry.error.name,
721
+ message: entry.error.message,
722
+ stack: entry.error.stack
986
723
  };
987
- this.frameAnimationId = requestAnimationFrame(updateFrame);
988
724
  }
989
- /**
990
- * Start monitoring for playback completion
991
- */
992
- startMonitoring() {
993
- if (this.monitorInterval) {
994
- clearInterval(this.monitorInterval);
995
- }
996
- this.monitorInterval = window.setInterval(() => {
997
- if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
998
- this.emit("playback_complete", void 0);
999
- this.stopMonitoring();
1000
- }
1001
- }, 100);
725
+ return safeStringify(output);
726
+ };
727
+ var prettyFormatter = (entry) => {
728
+ const time = formatTimestamp(entry.timestamp);
729
+ const level = LEVEL_NAMES[entry.level];
730
+ const module2 = entry.module;
731
+ const message = entry.message;
732
+ let output;
733
+ if (isBrowser) {
734
+ output = `${time} ${level} [${module2}] ${message}`;
735
+ } else {
736
+ const color = LEVEL_COLORS[entry.level];
737
+ output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
1002
738
  }
1003
- /**
1004
- * Stop monitoring
1005
- */
1006
- stopMonitoring() {
1007
- if (this.monitorInterval) {
1008
- clearInterval(this.monitorInterval);
1009
- this.monitorInterval = null;
1010
- }
1011
- if (this.frameAnimationId) {
1012
- cancelAnimationFrame(this.frameAnimationId);
1013
- this.frameAnimationId = null;
739
+ if (entry.data && Object.keys(entry.data).length > 0) {
740
+ const dataStr = safeStringify(entry.data);
741
+ if (dataStr.length > 80) {
742
+ output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
743
+ } else {
744
+ output += " " + dataStr;
1014
745
  }
1015
746
  }
1016
- /**
1017
- * Get current pipeline state (for debugging/monitoring)
1018
- */
1019
- getState() {
1020
- return {
1021
- playbackStarted: this.playbackStarted,
1022
- coalescerFill: this.coalescer.fillLevel,
1023
- lamFill: this.lamPipeline.fillLevel,
1024
- queuedFrames: this.lamPipeline.queuedFrameCount,
1025
- currentTime: this.scheduler.getCurrentTime(),
1026
- playbackEndTime: this.scheduler.getPlaybackEndTime()
1027
- };
1028
- }
1029
- /**
1030
- * Cleanup resources
1031
- */
1032
- dispose() {
1033
- this.stopMonitoring();
1034
- this.scheduler.dispose();
1035
- this.coalescer.reset();
1036
- this.lamPipeline.reset();
747
+ if (entry.error) {
748
+ output += `
749
+ ${entry.error.name}: ${entry.error.message}`;
750
+ if (entry.error.stack) {
751
+ const stackLines = entry.error.stack.split("\n").slice(1, 4);
752
+ output += "\n " + stackLines.join("\n ");
753
+ }
1037
754
  }
755
+ return output;
1038
756
  };
1039
-
1040
- // src/animation/EmotionToBlendshapeMapper.ts
1041
- var UPPER_FACE_BLENDSHAPES = [
1042
- // Brows (5)
1043
- "browDownLeft",
1044
- "browDownRight",
1045
- "browInnerUp",
1046
- "browOuterUpLeft",
1047
- "browOuterUpRight",
1048
- // Eyes (4)
1049
- "eyeSquintLeft",
1050
- "eyeSquintRight",
1051
- "eyeWideLeft",
1052
- "eyeWideRight",
1053
- // Cheeks (2)
1054
- "cheekSquintLeft",
1055
- "cheekSquintRight"
1056
- ];
1057
- var EMOTION_ARKIT_MAP = {
1058
- happy: {
1059
- // AU6 - Cheek raiser (primary Duchenne smile marker)
1060
- cheekSquintLeft: 0.5,
1061
- cheekSquintRight: 0.5,
1062
- // Slight eye squint from genuine smile (orbicularis oculi activation)
1063
- eyeSquintLeft: 0.2,
1064
- eyeSquintRight: 0.2
1065
- },
1066
- angry: {
1067
- // AU4 - Brow lowerer (intense, primary anger marker)
1068
- browDownLeft: 0.7,
1069
- browDownRight: 0.7,
1070
- // AU5 - Upper lid raiser (wide eyes, part of the "glare")
1071
- eyeWideLeft: 0.4,
1072
- eyeWideRight: 0.4,
1073
- // AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
1074
- eyeSquintLeft: 0.3,
1075
- eyeSquintRight: 0.3
1076
- },
1077
- sad: {
1078
- // AU1 - Inner brow raiser (primary sadness marker)
1079
- browInnerUp: 0.6,
1080
- // AU4 - Brow lowerer (brows drawn together)
1081
- browDownLeft: 0.3,
1082
- browDownRight: 0.3
1083
- },
1084
- neutral: {}
1085
- // All zeros - no expression overlay
1086
- };
1087
- var DEFAULT_CONFIG = {
1088
- smoothingFactor: 0.15,
1089
- confidenceThreshold: 0.3,
1090
- intensity: 1,
1091
- blendMode: "dominant",
1092
- minBlendProbability: 0.1,
1093
- energyModulation: false,
1094
- minEnergyScale: 0.3,
1095
- maxEnergyScale: 1
1096
- };
1097
- function createZeroBlendshapes() {
1098
- const result = {};
1099
- for (const name of UPPER_FACE_BLENDSHAPES) {
1100
- result[name] = 0;
757
+ function getFormatter(format) {
758
+ return format === "json" ? jsonFormatter : prettyFormatter;
759
+ }
760
+ function createBrowserConsoleArgs(entry) {
761
+ const time = formatTimestamp(entry.timestamp);
762
+ const level = entry.level.toUpperCase().padEnd(7);
763
+ const module2 = entry.module;
764
+ const message = entry.message;
765
+ const styles = {
766
+ time: "color: gray;",
767
+ error: "color: red; font-weight: bold;",
768
+ warn: "color: orange; font-weight: bold;",
769
+ info: "color: blue;",
770
+ debug: "color: cyan;",
771
+ trace: "color: magenta;",
772
+ verbose: "color: gray;",
773
+ module: "color: teal; font-weight: bold;",
774
+ message: "color: inherit;"
775
+ };
776
+ let formatStr = "%c%s %c%s %c[%s]%c %s";
777
+ const args = [
778
+ styles.time,
779
+ time,
780
+ styles[entry.level],
781
+ level,
782
+ styles.module,
783
+ module2,
784
+ styles.message,
785
+ message
786
+ ];
787
+ if (entry.data && Object.keys(entry.data).length > 0) {
788
+ formatStr += " %o";
789
+ args.push(entry.data);
1101
790
  }
1102
- return result;
791
+ return [formatStr, ...args];
1103
792
  }
1104
- function clamp01(value) {
1105
- return Math.max(0, Math.min(1, value));
793
+
794
+ // src/logging/Logger.ts
795
+ var isBrowser2 = typeof window !== "undefined";
796
+ var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
797
+ function configureLogging(config) {
798
+ globalConfig = { ...globalConfig, ...config };
1106
799
  }
1107
- var EmotionToBlendshapeMapper = class {
1108
- /**
1109
- * Create a new EmotionToBlendshapeMapper
1110
- *
1111
- * @param config - Optional configuration
1112
- */
1113
- constructor(config) {
1114
- this.currentEnergy = 1;
1115
- this.config = {
1116
- ...DEFAULT_CONFIG,
1117
- ...config
1118
- };
1119
- this.targetBlendshapes = createZeroBlendshapes();
1120
- this.currentBlendshapes = createZeroBlendshapes();
800
+ function getLoggingConfig() {
801
+ return { ...globalConfig };
802
+ }
803
+ function resetLoggingConfig() {
804
+ globalConfig = { ...DEFAULT_LOGGING_CONFIG };
805
+ }
806
+ function setLogLevel(level) {
807
+ globalConfig.level = level;
808
+ }
809
+ function setLoggingEnabled(enabled) {
810
+ globalConfig.enabled = enabled;
811
+ }
812
+ var consoleSink = (entry) => {
813
+ const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
814
+ if (globalConfig.format === "pretty" && isBrowser2) {
815
+ const args = createBrowserConsoleArgs(entry);
816
+ console[consoleMethod](...args);
817
+ } else {
818
+ const formatter = getFormatter(globalConfig.format);
819
+ const formatted = formatter(entry);
820
+ console[consoleMethod](formatted);
1121
821
  }
1122
- /**
1123
- * Map an emotion frame to target blendshapes
1124
- *
1125
- * This sets the target values that the mapper will smoothly interpolate
1126
- * towards. Call update() each frame to apply smoothing.
1127
- *
1128
- * @param frame - Emotion frame from Emotion2VecInference
1129
- * @param audioEnergy - Optional audio energy (0-1) for energy modulation
1130
- * @returns Target upper face blendshapes (before smoothing)
1131
- */
1132
- mapFrame(frame, audioEnergy) {
1133
- this.targetBlendshapes = createZeroBlendshapes();
1134
- if (audioEnergy !== void 0) {
1135
- this.currentEnergy = clamp01(audioEnergy);
1136
- }
1137
- if (!frame) {
1138
- return { ...this.targetBlendshapes };
1139
- }
1140
- if (this.config.blendMode === "weighted") {
1141
- this.mapFrameWeighted(frame);
1142
- } else {
1143
- this.mapFrameDominant(frame);
1144
- }
1145
- if (this.config.energyModulation) {
1146
- this.applyEnergyModulation();
1147
- }
1148
- return { ...this.targetBlendshapes };
822
+ };
823
+ function getActiveSink() {
824
+ return globalConfig.sink || consoleSink;
825
+ }
826
+ function shouldLog(level) {
827
+ if (!globalConfig.enabled) return false;
828
+ return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
829
+ }
830
+ var Logger = class _Logger {
831
+ constructor(module2) {
832
+ this.module = module2;
1149
833
  }
1150
- /**
1151
- * Map using dominant emotion only (original behavior)
1152
- */
1153
- mapFrameDominant(frame) {
1154
- if (frame.confidence < this.config.confidenceThreshold) {
1155
- return;
1156
- }
1157
- const emotion = frame.emotion;
1158
- const mapping = EMOTION_ARKIT_MAP[emotion];
1159
- if (!mapping) {
1160
- return;
1161
- }
1162
- const scale = this.config.intensity * frame.confidence;
1163
- for (const [name, value] of Object.entries(mapping)) {
1164
- const blendshapeName = name;
1165
- if (value !== void 0) {
1166
- this.targetBlendshapes[blendshapeName] = clamp01(value * scale);
1167
- }
834
+ log(level, message, data) {
835
+ if (!shouldLog(level)) return;
836
+ const entry = {
837
+ timestamp: Date.now(),
838
+ level,
839
+ module: this.module,
840
+ message,
841
+ data
842
+ };
843
+ if (data?.error instanceof Error) {
844
+ entry.error = data.error;
845
+ const { error, ...rest } = data;
846
+ entry.data = Object.keys(rest).length > 0 ? rest : void 0;
1168
847
  }
848
+ getActiveSink()(entry);
1169
849
  }
1170
- /**
1171
- * Map using weighted blend of all emotions by probability
1172
- * Creates more nuanced expressions (e.g., bittersweet = happy + sad)
1173
- */
1174
- mapFrameWeighted(frame) {
1175
- if (!frame.probabilities) {
1176
- this.mapFrameDominant(frame);
1177
- return;
1178
- }
1179
- for (const [emotion, probability] of Object.entries(frame.probabilities)) {
1180
- if (probability < this.config.minBlendProbability) {
1181
- continue;
1182
- }
1183
- const mapping = EMOTION_ARKIT_MAP[emotion];
1184
- if (!mapping) {
1185
- continue;
1186
- }
1187
- const scale = this.config.intensity * probability;
1188
- for (const [name, value] of Object.entries(mapping)) {
1189
- const blendshapeName = name;
1190
- if (value !== void 0) {
1191
- this.targetBlendshapes[blendshapeName] += value * scale;
1192
- }
1193
- }
1194
- }
1195
- for (const name of UPPER_FACE_BLENDSHAPES) {
1196
- this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name]);
1197
- }
850
+ error(message, data) {
851
+ this.log("error", message, data);
1198
852
  }
1199
- /**
1200
- * Apply energy modulation to scale emotion intensity by audio energy
1201
- * Louder speech = stronger expressions
1202
- */
1203
- applyEnergyModulation() {
1204
- const { minEnergyScale, maxEnergyScale } = this.config;
1205
- const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
1206
- for (const name of UPPER_FACE_BLENDSHAPES) {
1207
- this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
1208
- }
853
+ warn(message, data) {
854
+ this.log("warn", message, data);
1209
855
  }
1210
- /**
1211
- * Apply smoothing to interpolate current values towards target
1212
- *
1213
- * Uses exponential moving average:
1214
- * current = current + smoothingFactor * (target - current)
1215
- *
1216
- * @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
1217
- */
1218
- update(_deltaMs) {
1219
- const factor = this.config.smoothingFactor;
1220
- for (const name of UPPER_FACE_BLENDSHAPES) {
1221
- const target = this.targetBlendshapes[name];
1222
- const current = this.currentBlendshapes[name];
1223
- this.currentBlendshapes[name] = clamp01(current + factor * (target - current));
1224
- }
856
+ info(message, data) {
857
+ this.log("info", message, data);
1225
858
  }
1226
- /**
1227
- * Get current smoothed blendshape values
1228
- *
1229
- * @returns Current upper face blendshapes (after smoothing)
1230
- */
1231
- getCurrentBlendshapes() {
1232
- return { ...this.currentBlendshapes };
859
+ debug(message, data) {
860
+ this.log("debug", message, data);
1233
861
  }
1234
- /**
1235
- * Reset mapper to neutral state
1236
- *
1237
- * Sets both target and current blendshapes to zero.
1238
- */
1239
- reset() {
1240
- this.targetBlendshapes = createZeroBlendshapes();
1241
- this.currentBlendshapes = createZeroBlendshapes();
1242
- this.currentEnergy = 1;
862
+ trace(message, data) {
863
+ this.log("trace", message, data);
1243
864
  }
1244
- /**
1245
- * Get current configuration
1246
- */
1247
- getConfig() {
1248
- return { ...this.config };
865
+ verbose(message, data) {
866
+ this.log("verbose", message, data);
1249
867
  }
1250
- /**
1251
- * Update configuration
1252
- *
1253
- * @param config - Partial configuration to update
1254
- */
1255
- setConfig(config) {
1256
- this.config = {
1257
- ...this.config,
1258
- ...config
1259
- };
868
+ child(subModule) {
869
+ return new _Logger(`${this.module}.${subModule}`);
1260
870
  }
1261
871
  };
1262
-
1263
- // src/animation/audioEnergy.ts
1264
- function calculateRMS(samples) {
1265
- if (samples.length === 0) return 0;
1266
- let sumSquares = 0;
1267
- for (let i = 0; i < samples.length; i++) {
1268
- sumSquares += samples[i] * samples[i];
1269
- }
1270
- return Math.sqrt(sumSquares / samples.length);
1271
- }
1272
- function calculatePeak(samples) {
1273
- let peak = 0;
1274
- for (let i = 0; i < samples.length; i++) {
1275
- const abs = Math.abs(samples[i]);
1276
- if (abs > peak) peak = abs;
872
+ var loggerCache = /* @__PURE__ */ new Map();
873
+ function createLogger(module2) {
874
+ let logger17 = loggerCache.get(module2);
875
+ if (!logger17) {
876
+ logger17 = new Logger(module2);
877
+ loggerCache.set(module2, logger17);
1277
878
  }
1278
- return peak;
879
+ return logger17;
1279
880
  }
1280
- var AudioEnergyAnalyzer = class {
1281
- /**
1282
- * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
1283
- * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
1284
- */
1285
- constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
1286
- this.smoothedRMS = 0;
1287
- this.smoothedPeak = 0;
1288
- this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
1289
- this.noiseFloor = noiseFloor;
1290
- }
1291
- /**
1292
- * Process audio samples and return smoothed energy values
1293
- * @param samples Audio samples (Float32Array)
1294
- * @returns Object with rms and peak values
881
+ var noopLogger = {
882
+ module: "noop",
883
+ error: () => {
884
+ },
885
+ warn: () => {
886
+ },
887
+ info: () => {
888
+ },
889
+ debug: () => {
890
+ },
891
+ trace: () => {
892
+ },
893
+ verbose: () => {
894
+ },
895
+ child: () => noopLogger
896
+ };
897
+
898
+ // src/inference/A2EProcessor.ts
899
+ var logger = createLogger("A2EProcessor");
900
+ var FRAME_RATE = 30;
901
+ var DRIP_INTERVAL_MS = 33;
902
+ var A2EProcessor = class {
903
+ constructor(config) {
904
+ this.writeOffset = 0;
905
+ this.bufferStartTime = 0;
906
+ // Frame queues (timestamped for pull mode, plain for drip mode)
907
+ this.timestampedQueue = [];
908
+ this.plainQueue = [];
909
+ // Push mode state
910
+ this._latestFrame = null;
911
+ this.dripInterval = null;
912
+ // Last-frame-hold for pull mode (prevents avatar freezing between frames)
913
+ this.lastPulledFrame = null;
914
+ // Inference serialization
915
+ this.inferenceRunning = false;
916
+ this.pendingChunks = [];
917
+ // Diagnostic: track getFrameForTime calls
918
+ this.getFrameCallCount = 0;
919
+ this.disposed = false;
920
+ this.backend = config.backend;
921
+ this.sampleRate = config.sampleRate ?? 16e3;
922
+ this.chunkSize = config.chunkSize ?? config.backend.chunkSize ?? 16e3;
923
+ this.onFrame = config.onFrame;
924
+ this.onError = config.onError;
925
+ this.bufferCapacity = this.chunkSize * 2;
926
+ this.buffer = new Float32Array(this.bufferCapacity);
927
+ }
928
+ // ═══════════════════════════════════════════════════════════════════════
929
+ // Audio Input
930
+ // ═══════════════════════════════════════════════════════════════════════
931
+ /**
932
+ * Push audio samples for inference (any source: mic, TTS, file).
933
+ *
934
+ * - With `timestamp`: frames stored with timestamps (pull mode)
935
+ * - Without `timestamp`: frames stored in plain queue (drip/push mode)
936
+ *
937
+ * Fire-and-forget: returns immediately, inference runs async.
1295
938
  */
1296
- process(samples) {
1297
- const instantRMS = calculateRMS(samples);
1298
- const instantPeak = calculatePeak(samples);
1299
- const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
1300
- const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
1301
- if (gatedRMS > this.smoothedRMS) {
1302
- this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
1303
- } else {
1304
- this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
939
+ pushAudio(samples, timestamp) {
940
+ if (this.disposed) return;
941
+ if (this.writeOffset === 0 && timestamp !== void 0) {
942
+ this.bufferStartTime = timestamp;
1305
943
  }
1306
- if (gatedPeak > this.smoothedPeak) {
1307
- this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
1308
- } else {
1309
- this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
944
+ if (this.writeOffset + samples.length > this.bufferCapacity) {
945
+ this.bufferCapacity = (this.writeOffset + samples.length) * 2;
946
+ const grown = new Float32Array(this.bufferCapacity);
947
+ grown.set(this.buffer.subarray(0, this.writeOffset));
948
+ this.buffer = grown;
949
+ }
950
+ this.buffer.set(samples, this.writeOffset);
951
+ this.writeOffset += samples.length;
952
+ logger.debug("pushAudio", {
953
+ samplesIn: samples.length,
954
+ writeOffset: this.writeOffset,
955
+ chunkSize: this.chunkSize,
956
+ willExtract: this.writeOffset >= this.chunkSize,
957
+ inferenceRunning: this.inferenceRunning,
958
+ pendingChunks: this.pendingChunks.length,
959
+ queuedFrames: this.timestampedQueue.length + this.plainQueue.length
960
+ });
961
+ while (this.writeOffset >= this.chunkSize) {
962
+ const chunk = this.buffer.slice(0, this.chunkSize);
963
+ this.buffer.copyWithin(0, this.chunkSize, this.writeOffset);
964
+ this.writeOffset -= this.chunkSize;
965
+ const chunkTimestamp = timestamp !== void 0 ? this.bufferStartTime : void 0;
966
+ this.pendingChunks.push({ chunk, timestamp: chunkTimestamp });
967
+ logger.info("Chunk queued for inference", {
968
+ chunkSize: chunk.length,
969
+ chunkTimestamp,
970
+ pendingChunks: this.pendingChunks.length,
971
+ remainderOffset: this.writeOffset
972
+ });
973
+ if (timestamp !== void 0) {
974
+ this.bufferStartTime += this.chunkSize / this.sampleRate;
975
+ }
1310
976
  }
1311
- const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
1312
- return {
1313
- rms: this.smoothedRMS,
1314
- peak: this.smoothedPeak,
1315
- energy: Math.min(1, energy * 2)
1316
- // Scale up and clamp
1317
- };
977
+ this.drainPendingChunks();
1318
978
  }
1319
979
  /**
1320
- * Reset analyzer state
980
+ * Flush remaining buffered audio (pads to chunkSize).
981
+ * Call at end of stream to process final partial chunk.
982
+ *
983
+ * Routes through the serialized pendingChunks pipeline to maintain
984
+ * correct frame ordering. Without this, flush() could push frames
985
+ * with the latest timestamp to the queue before drainPendingChunks()
986
+ * finishes pushing frames with earlier timestamps — causing
987
+ * getFrameForTime() to see out-of-order timestamps and stall.
1321
988
  */
1322
- reset() {
1323
- this.smoothedRMS = 0;
1324
- this.smoothedPeak = 0;
989
+ async flush() {
990
+ if (this.disposed || this.writeOffset === 0) return;
991
+ const padded = new Float32Array(this.chunkSize);
992
+ padded.set(this.buffer.subarray(0, this.writeOffset), 0);
993
+ const chunkTimestamp = this.bufferStartTime > 0 ? this.bufferStartTime : void 0;
994
+ logger.info("flush: routing through drain pipeline", {
995
+ actualSamples: this.writeOffset,
996
+ chunkTimestamp: chunkTimestamp?.toFixed(3),
997
+ pendingChunks: this.pendingChunks.length,
998
+ inferenceRunning: this.inferenceRunning
999
+ });
1000
+ this.writeOffset = 0;
1001
+ this.bufferStartTime = 0;
1002
+ this.pendingChunks.push({ chunk: padded, timestamp: chunkTimestamp });
1003
+ this.drainPendingChunks();
1325
1004
  }
1326
1005
  /**
1327
- * Get current smoothed RMS value
1006
+ * Reset buffer and frame queues
1328
1007
  */
1329
- get rms() {
1330
- return this.smoothedRMS;
1008
+ reset() {
1009
+ this.writeOffset = 0;
1010
+ this.bufferStartTime = 0;
1011
+ this.timestampedQueue = [];
1012
+ this.plainQueue = [];
1013
+ this._latestFrame = null;
1014
+ this.lastPulledFrame = null;
1015
+ this.pendingChunks = [];
1016
+ this.inferenceRunning = false;
1017
+ this.getFrameCallCount = 0;
1018
+ }
1019
+ // ═══════════════════════════════════════════════════════════════════════
1020
+ // Frame Output — Pull Mode (TTS playback)
1021
+ // ═══════════════════════════════════════════════════════════════════════
1022
+ /**
1023
+ * Get frame synced to external clock (e.g. AudioContext.currentTime).
1024
+ *
1025
+ * Discards frames that are too old, returns the current frame,
1026
+ * or holds last frame as fallback to prevent avatar freezing.
1027
+ *
1028
+ * @param currentTime - Current playback time (seconds)
1029
+ * @returns Blendshape frame, or null if no frames yet
1030
+ */
1031
+ getFrameForTime(currentTime) {
1032
+ this.getFrameCallCount++;
1033
+ const discardWindow = this.backend.backend === "wasm" ? 1 : 0.5;
1034
+ let discardCount = 0;
1035
+ while (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp < currentTime - discardWindow) {
1036
+ this.timestampedQueue.shift();
1037
+ discardCount++;
1038
+ }
1039
+ if (discardCount > 0) {
1040
+ logger.warn("getFrameForTime DISCARDED stale frames", {
1041
+ discardCount,
1042
+ currentTime: currentTime.toFixed(3),
1043
+ discardWindow,
1044
+ remainingFrames: this.timestampedQueue.length,
1045
+ nextFrameTs: this.timestampedQueue.length > 0 ? this.timestampedQueue[0].timestamp.toFixed(3) : "none"
1046
+ });
1047
+ }
1048
+ if (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp <= currentTime) {
1049
+ const { frame } = this.timestampedQueue.shift();
1050
+ this.lastPulledFrame = frame;
1051
+ return frame;
1052
+ }
1053
+ if (this.timestampedQueue.length > 0 && this.getFrameCallCount % 60 === 0) {
1054
+ logger.warn("getFrameForTime: frames in queue but NOT consumable", {
1055
+ queueLen: this.timestampedQueue.length,
1056
+ frontTimestamp: this.timestampedQueue[0].timestamp.toFixed(4),
1057
+ currentTime: currentTime.toFixed(4),
1058
+ delta: (this.timestampedQueue[0].timestamp - currentTime).toFixed(4),
1059
+ callCount: this.getFrameCallCount
1060
+ });
1061
+ }
1062
+ return this.lastPulledFrame;
1063
+ }
1064
+ // ═══════════════════════════════════════════════════════════════════════
1065
+ // Frame Output — Push Mode (live mic, game loop)
1066
+ // ═══════════════════════════════════════════════════════════════════════
1067
+ /** Latest frame from drip-feed (live mic, game loop) */
1068
+ get latestFrame() {
1069
+ return this._latestFrame;
1070
+ }
1071
+ /** Start 30fps drip-feed timer (push mode) */
1072
+ startDrip() {
1073
+ if (this.dripInterval) return;
1074
+ this.dripInterval = setInterval(() => {
1075
+ const frame = this.plainQueue.shift();
1076
+ if (frame) {
1077
+ this._latestFrame = frame;
1078
+ this.onFrame?.(frame);
1079
+ }
1080
+ }, DRIP_INTERVAL_MS);
1081
+ }
1082
+ /** Stop drip-feed timer */
1083
+ stopDrip() {
1084
+ if (this.dripInterval) {
1085
+ clearInterval(this.dripInterval);
1086
+ this.dripInterval = null;
1087
+ }
1088
+ }
1089
+ // ═══════════════════════════════════════════════════════════════════════
1090
+ // State
1091
+ // ═══════════════════════════════════════════════════════════════════════
1092
+ /** Number of frames waiting in queue (both modes combined) */
1093
+ get queuedFrameCount() {
1094
+ return this.timestampedQueue.length + this.plainQueue.length;
1095
+ }
1096
+ /** Buffer fill level as fraction of chunkSize (0-1) */
1097
+ get fillLevel() {
1098
+ return Math.min(1, this.writeOffset / this.chunkSize);
1099
+ }
1100
+ /** Dispose resources */
1101
+ dispose() {
1102
+ if (this.disposed) return;
1103
+ this.disposed = true;
1104
+ this.stopDrip();
1105
+ this.reset();
1331
1106
  }
1107
+ // ═══════════════════════════════════════════════════════════════════════
1108
+ // Private
1109
+ // ═══════════════════════════════════════════════════════════════════════
1332
1110
  /**
1333
- * Get current smoothed peak value
1111
+ * Process pending chunks sequentially.
1112
+ * Fire-and-forget — called from pushAudio() without awaiting.
1334
1113
  */
1335
- get peak() {
1336
- return this.smoothedPeak;
1114
+ drainPendingChunks() {
1115
+ if (this.inferenceRunning || this.pendingChunks.length === 0) {
1116
+ if (this.inferenceRunning && this.pendingChunks.length > 0) {
1117
+ logger.debug("drainPendingChunks skipped (inference running)", {
1118
+ pendingChunks: this.pendingChunks.length
1119
+ });
1120
+ }
1121
+ return;
1122
+ }
1123
+ this.inferenceRunning = true;
1124
+ logger.info("drainPendingChunks starting", { pendingChunks: this.pendingChunks.length });
1125
+ const processNext = async () => {
1126
+ while (this.pendingChunks.length > 0 && !this.disposed) {
1127
+ const { chunk, timestamp } = this.pendingChunks.shift();
1128
+ try {
1129
+ const t0 = performance.now();
1130
+ const result = await this.backend.infer(chunk);
1131
+ const inferMs = Math.round(performance.now() - t0);
1132
+ const actualDuration = chunk.length / this.sampleRate;
1133
+ const actualFrameCount = Math.ceil(actualDuration * FRAME_RATE);
1134
+ const framesToQueue = Math.min(actualFrameCount, result.blendshapes.length);
1135
+ logger.info("Inference complete", {
1136
+ inferMs,
1137
+ modelFrames: result.blendshapes.length,
1138
+ framesToQueue,
1139
+ timestamp,
1140
+ totalQueued: this.timestampedQueue.length + framesToQueue,
1141
+ remainingPending: this.pendingChunks.length
1142
+ });
1143
+ for (let i = 0; i < framesToQueue; i++) {
1144
+ if (timestamp !== void 0) {
1145
+ this.timestampedQueue.push({
1146
+ frame: result.blendshapes[i],
1147
+ timestamp: timestamp + i / FRAME_RATE
1148
+ });
1149
+ } else {
1150
+ this.plainQueue.push(result.blendshapes[i]);
1151
+ }
1152
+ }
1153
+ } catch (err) {
1154
+ this.handleError(err);
1155
+ }
1156
+ if (this.pendingChunks.length > 0) {
1157
+ await new Promise((r) => setTimeout(r, 0));
1158
+ }
1159
+ }
1160
+ this.inferenceRunning = false;
1161
+ if (this.pendingChunks.length > 0) {
1162
+ this.drainPendingChunks();
1163
+ }
1164
+ };
1165
+ processNext().catch((err) => this.handleError(err));
1166
+ }
1167
+ handleError(err) {
1168
+ const error = err instanceof Error ? err : new Error(String(err));
1169
+ logger.warn("A2EProcessor inference error", { error: error.message });
1170
+ this.onError?.(error);
1337
1171
  }
1338
1172
  };
1339
- var EmphasisDetector = class {
1173
+
1174
+ // src/inference/BlendshapeSmoother.ts
1175
+ var NUM_BLENDSHAPES = 52;
1176
+ var BlendshapeSmoother = class {
1177
+ constructor(config) {
1178
+ /** Whether any target has been set */
1179
+ this._hasTarget = false;
1180
+ this.halflife = config?.halflife ?? 0.06;
1181
+ this.values = new Float32Array(NUM_BLENDSHAPES);
1182
+ this.velocities = new Float32Array(NUM_BLENDSHAPES);
1183
+ this.targets = new Float32Array(NUM_BLENDSHAPES);
1184
+ }
1185
+ /** Whether a target frame has been set (false until first setTarget call) */
1186
+ get hasTarget() {
1187
+ return this._hasTarget;
1188
+ }
1340
1189
  /**
1341
- * @param historySize Number of frames to track. Default 10
1342
- * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
1190
+ * Set new target frame from inference output.
1191
+ * Springs will converge toward these values on subsequent update() calls.
1343
1192
  */
1344
- constructor(historySize = 10, emphasisThreshold = 0.15) {
1345
- this.energyHistory = [];
1346
- this.historySize = historySize;
1347
- this.emphasisThreshold = emphasisThreshold;
1193
+ setTarget(frame) {
1194
+ this.targets.set(frame);
1195
+ this._hasTarget = true;
1348
1196
  }
1349
1197
  /**
1350
- * Process energy value and detect emphasis
1351
- * @param energy Current energy value (0-1)
1352
- * @returns Object with isEmphasis flag and emphasisStrength
1198
+ * Advance all 52 springs by `dt` seconds and return the smoothed frame.
1199
+ *
1200
+ * Call this every render frame (e.g., inside requestAnimationFrame).
1201
+ * Returns the internal values buffer — do NOT mutate the returned array.
1202
+ *
1203
+ * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
1204
+ * @returns Smoothed blendshape values (Float32Array of 52)
1353
1205
  */
1354
- process(energy) {
1355
- this.energyHistory.push(energy);
1356
- if (this.energyHistory.length > this.historySize) {
1357
- this.energyHistory.shift();
1206
+ update(dt) {
1207
+ if (!this._hasTarget) {
1208
+ return this.values;
1358
1209
  }
1359
- if (this.energyHistory.length < 3) {
1360
- return { isEmphasis: false, emphasisStrength: 0 };
1210
+ if (this.halflife <= 0) {
1211
+ this.values.set(this.targets);
1212
+ this.velocities.fill(0);
1213
+ return this.values;
1361
1214
  }
1362
- const prevFrames = this.energyHistory.slice(0, -1);
1363
- const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
1364
- const increase = energy - avgPrev;
1365
- const isEmphasis = increase > this.emphasisThreshold;
1366
- return {
1367
- isEmphasis,
1368
- emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
1369
- };
1215
+ const damping = Math.LN2 / this.halflife;
1216
+ const eydt = Math.exp(-damping * dt);
1217
+ for (let i = 0; i < NUM_BLENDSHAPES; i++) {
1218
+ const j0 = this.values[i] - this.targets[i];
1219
+ const j1 = this.velocities[i] + j0 * damping;
1220
+ this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
1221
+ this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
1222
+ this.values[i] = Math.max(0, Math.min(1, this.values[i]));
1223
+ }
1224
+ return this.values;
1370
1225
  }
1371
1226
  /**
1372
- * Reset detector state
1227
+ * Decay all spring targets to neutral (0).
1228
+ *
1229
+ * Call when inference stalls (no new frames for threshold duration).
1230
+ * The springs will smoothly close the mouth / relax the face over
1231
+ * the halflife period rather than freezing.
1232
+ */
1233
+ decayToNeutral() {
1234
+ this.targets.fill(0);
1235
+ }
1236
+ /**
1237
+ * Reset all state (values, velocities, targets).
1238
+ * Call when starting a new playback session.
1373
1239
  */
1374
1240
  reset() {
1375
- this.energyHistory = [];
1241
+ this.values.fill(0);
1242
+ this.velocities.fill(0);
1243
+ this.targets.fill(0);
1244
+ this._hasTarget = false;
1376
1245
  }
1377
1246
  };
1378
1247
 
@@ -2485,340 +2354,76 @@ async function fetchWithCache(url, optionsOrProgress) {
2485
2354
  if (!response.ok) {
2486
2355
  throw new Error(`Failed to fetch ${url}: ${response.status}`);
2487
2356
  }
2488
- const contentLength = response.headers.get("content-length");
2489
- const total = contentLength ? parseInt(contentLength, 10) : 0;
2490
- const etag = response.headers.get("etag") ?? void 0;
2491
- const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
2492
- if (tooLargeForCache) {
2493
- console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
2494
- }
2495
- if (!response.body) {
2496
- const data2 = await response.arrayBuffer();
2497
- if (!tooLargeForCache) {
2498
- await cache.set(cacheKey, data2, etag, version);
2499
- }
2500
- span?.setAttributes({
2501
- "fetch.size_bytes": data2.byteLength,
2502
- "fetch.cached_to_indexeddb": !tooLargeForCache
2503
- });
2504
- span?.end();
2505
- return data2;
2506
- }
2507
- const reader = response.body.getReader();
2508
- const chunks = [];
2509
- let loaded = 0;
2510
- while (true) {
2511
- const { done, value } = await reader.read();
2512
- if (done) break;
2513
- chunks.push(value);
2514
- loaded += value.length;
2515
- onProgress?.(loaded, total || loaded);
2516
- }
2517
- const data = new Uint8Array(loaded);
2518
- let offset = 0;
2519
- for (const chunk of chunks) {
2520
- data.set(chunk, offset);
2521
- offset += chunk.length;
2522
- }
2523
- const buffer = data.buffer;
2524
- if (!tooLargeForCache) {
2525
- await cache.set(cacheKey, buffer, etag, version);
2526
- console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2527
- }
2528
- span?.setAttributes({
2529
- "fetch.size_bytes": buffer.byteLength,
2530
- "fetch.cached_to_indexeddb": !tooLargeForCache
2531
- });
2532
- span?.end();
2533
- return buffer;
2534
- } catch (error) {
2535
- span?.endWithError(error instanceof Error ? error : new Error(String(error)));
2536
- throw error;
2537
- }
2538
- }
2539
- async function preloadModels(urls, onProgress) {
2540
- const cache = getModelCache();
2541
- for (let i = 0; i < urls.length; i++) {
2542
- const url = urls[i];
2543
- onProgress?.(i, urls.length, url);
2544
- if (await cache.has(url)) {
2545
- console.log(`[ModelCache] Already cached: ${url}`);
2546
- continue;
2547
- }
2548
- await fetchWithCache(url);
2549
- }
2550
- onProgress?.(urls.length, urls.length, "done");
2551
- }
2552
- function formatBytes(bytes) {
2553
- if (bytes < 1024) return `${bytes} B`;
2554
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
2555
- if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
2556
- return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
2557
- }
2558
-
2559
- // src/logging/types.ts
2560
- var LOG_LEVEL_PRIORITY = {
2561
- error: 0,
2562
- warn: 1,
2563
- info: 2,
2564
- debug: 3,
2565
- trace: 4,
2566
- verbose: 5
2567
- };
2568
- var DEFAULT_LOGGING_CONFIG = {
2569
- level: "info",
2570
- enabled: true,
2571
- format: "pretty",
2572
- timestamps: true,
2573
- includeModule: true
2574
- };
2575
-
2576
- // src/logging/formatters.ts
2577
- var COLORS = {
2578
- reset: "\x1B[0m",
2579
- red: "\x1B[31m",
2580
- yellow: "\x1B[33m",
2581
- blue: "\x1B[34m",
2582
- cyan: "\x1B[36m",
2583
- gray: "\x1B[90m",
2584
- white: "\x1B[37m",
2585
- magenta: "\x1B[35m"
2586
- };
2587
- var LEVEL_COLORS = {
2588
- error: COLORS.red,
2589
- warn: COLORS.yellow,
2590
- info: COLORS.blue,
2591
- debug: COLORS.cyan,
2592
- trace: COLORS.magenta,
2593
- verbose: COLORS.gray
2594
- };
2595
- var LEVEL_NAMES = {
2596
- error: "ERROR ",
2597
- warn: "WARN ",
2598
- info: "INFO ",
2599
- debug: "DEBUG ",
2600
- trace: "TRACE ",
2601
- verbose: "VERBOSE"
2602
- };
2603
- var isBrowser = typeof window !== "undefined";
2604
- function formatTimestamp(timestamp) {
2605
- const date = new Date(timestamp);
2606
- return date.toISOString().substring(11, 23);
2607
- }
2608
- function safeStringify(data) {
2609
- const seen = /* @__PURE__ */ new WeakSet();
2610
- return JSON.stringify(data, (key, value) => {
2611
- if (typeof value === "object" && value !== null) {
2612
- if (seen.has(value)) {
2613
- return "[Circular]";
2614
- }
2615
- seen.add(value);
2616
- }
2617
- if (value instanceof Error) {
2618
- return {
2619
- name: value.name,
2620
- message: value.message,
2621
- stack: value.stack
2622
- };
2357
+ const contentLength = response.headers.get("content-length");
2358
+ const total = contentLength ? parseInt(contentLength, 10) : 0;
2359
+ const etag = response.headers.get("etag") ?? void 0;
2360
+ const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
2361
+ if (tooLargeForCache) {
2362
+ console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
2623
2363
  }
2624
- if (value instanceof Float32Array || value instanceof Int16Array) {
2625
- return `${value.constructor.name}(${value.length})`;
2364
+ if (!response.body) {
2365
+ const data2 = await response.arrayBuffer();
2366
+ if (!tooLargeForCache) {
2367
+ await cache.set(cacheKey, data2, etag, version);
2368
+ }
2369
+ span?.setAttributes({
2370
+ "fetch.size_bytes": data2.byteLength,
2371
+ "fetch.cached_to_indexeddb": !tooLargeForCache
2372
+ });
2373
+ span?.end();
2374
+ return data2;
2626
2375
  }
2627
- if (ArrayBuffer.isView(value)) {
2628
- return `${value.constructor.name}(${value.byteLength})`;
2376
+ const reader = response.body.getReader();
2377
+ const chunks = [];
2378
+ let loaded = 0;
2379
+ while (true) {
2380
+ const { done, value } = await reader.read();
2381
+ if (done) break;
2382
+ chunks.push(value);
2383
+ loaded += value.length;
2384
+ onProgress?.(loaded, total || loaded);
2629
2385
  }
2630
- return value;
2631
- });
2632
- }
2633
- var jsonFormatter = (entry) => {
2634
- const output = {
2635
- timestamp: entry.timestamp,
2636
- level: entry.level,
2637
- module: entry.module,
2638
- message: entry.message
2639
- };
2640
- if (entry.data && Object.keys(entry.data).length > 0) {
2641
- output.data = entry.data;
2642
- }
2643
- if (entry.error) {
2644
- output.error = {
2645
- name: entry.error.name,
2646
- message: entry.error.message,
2647
- stack: entry.error.stack
2648
- };
2649
- }
2650
- return safeStringify(output);
2651
- };
2652
- var prettyFormatter = (entry) => {
2653
- const time = formatTimestamp(entry.timestamp);
2654
- const level = LEVEL_NAMES[entry.level];
2655
- const module2 = entry.module;
2656
- const message = entry.message;
2657
- let output;
2658
- if (isBrowser) {
2659
- output = `${time} ${level} [${module2}] ${message}`;
2660
- } else {
2661
- const color = LEVEL_COLORS[entry.level];
2662
- output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
2663
- }
2664
- if (entry.data && Object.keys(entry.data).length > 0) {
2665
- const dataStr = safeStringify(entry.data);
2666
- if (dataStr.length > 80) {
2667
- output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
2668
- } else {
2669
- output += " " + dataStr;
2386
+ const data = new Uint8Array(loaded);
2387
+ let offset = 0;
2388
+ for (const chunk of chunks) {
2389
+ data.set(chunk, offset);
2390
+ offset += chunk.length;
2670
2391
  }
2671
- }
2672
- if (entry.error) {
2673
- output += `
2674
- ${entry.error.name}: ${entry.error.message}`;
2675
- if (entry.error.stack) {
2676
- const stackLines = entry.error.stack.split("\n").slice(1, 4);
2677
- output += "\n " + stackLines.join("\n ");
2392
+ const buffer = data.buffer;
2393
+ if (!tooLargeForCache) {
2394
+ await cache.set(cacheKey, buffer, etag, version);
2395
+ console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2678
2396
  }
2397
+ span?.setAttributes({
2398
+ "fetch.size_bytes": buffer.byteLength,
2399
+ "fetch.cached_to_indexeddb": !tooLargeForCache
2400
+ });
2401
+ span?.end();
2402
+ return buffer;
2403
+ } catch (error) {
2404
+ span?.endWithError(error instanceof Error ? error : new Error(String(error)));
2405
+ throw error;
2679
2406
  }
2680
- return output;
2681
- };
2682
- function getFormatter(format) {
2683
- return format === "json" ? jsonFormatter : prettyFormatter;
2684
- }
2685
- function createBrowserConsoleArgs(entry) {
2686
- const time = formatTimestamp(entry.timestamp);
2687
- const level = entry.level.toUpperCase().padEnd(7);
2688
- const module2 = entry.module;
2689
- const message = entry.message;
2690
- const styles = {
2691
- time: "color: gray;",
2692
- error: "color: red; font-weight: bold;",
2693
- warn: "color: orange; font-weight: bold;",
2694
- info: "color: blue;",
2695
- debug: "color: cyan;",
2696
- trace: "color: magenta;",
2697
- verbose: "color: gray;",
2698
- module: "color: teal; font-weight: bold;",
2699
- message: "color: inherit;"
2700
- };
2701
- let formatStr = "%c%s %c%s %c[%s]%c %s";
2702
- const args = [
2703
- styles.time,
2704
- time,
2705
- styles[entry.level],
2706
- level,
2707
- styles.module,
2708
- module2,
2709
- styles.message,
2710
- message
2711
- ];
2712
- if (entry.data && Object.keys(entry.data).length > 0) {
2713
- formatStr += " %o";
2714
- args.push(entry.data);
2715
- }
2716
- return [formatStr, ...args];
2717
- }
2718
-
2719
- // src/logging/Logger.ts
2720
- var isBrowser2 = typeof window !== "undefined";
2721
- var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
2722
- function configureLogging(config) {
2723
- globalConfig = { ...globalConfig, ...config };
2724
- }
2725
- function getLoggingConfig() {
2726
- return { ...globalConfig };
2727
- }
2728
- function resetLoggingConfig() {
2729
- globalConfig = { ...DEFAULT_LOGGING_CONFIG };
2730
- }
2731
- function setLogLevel(level) {
2732
- globalConfig.level = level;
2733
- }
2734
- function setLoggingEnabled(enabled) {
2735
- globalConfig.enabled = enabled;
2736
- }
2737
- var consoleSink = (entry) => {
2738
- const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
2739
- if (globalConfig.format === "pretty" && isBrowser2) {
2740
- const args = createBrowserConsoleArgs(entry);
2741
- console[consoleMethod](...args);
2742
- } else {
2743
- const formatter = getFormatter(globalConfig.format);
2744
- const formatted = formatter(entry);
2745
- console[consoleMethod](formatted);
2746
- }
2747
- };
2748
- function getActiveSink() {
2749
- return globalConfig.sink || consoleSink;
2750
- }
2751
- function shouldLog(level) {
2752
- if (!globalConfig.enabled) return false;
2753
- return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
2754
2407
  }
2755
- var Logger = class _Logger {
2756
- constructor(module2) {
2757
- this.module = module2;
2758
- }
2759
- log(level, message, data) {
2760
- if (!shouldLog(level)) return;
2761
- const entry = {
2762
- timestamp: Date.now(),
2763
- level,
2764
- module: this.module,
2765
- message,
2766
- data
2767
- };
2768
- if (data?.error instanceof Error) {
2769
- entry.error = data.error;
2770
- const { error, ...rest } = data;
2771
- entry.data = Object.keys(rest).length > 0 ? rest : void 0;
2408
+ async function preloadModels(urls, onProgress) {
2409
+ const cache = getModelCache();
2410
+ for (let i = 0; i < urls.length; i++) {
2411
+ const url = urls[i];
2412
+ onProgress?.(i, urls.length, url);
2413
+ if (await cache.has(url)) {
2414
+ console.log(`[ModelCache] Already cached: ${url}`);
2415
+ continue;
2772
2416
  }
2773
- getActiveSink()(entry);
2774
- }
2775
- error(message, data) {
2776
- this.log("error", message, data);
2777
- }
2778
- warn(message, data) {
2779
- this.log("warn", message, data);
2780
- }
2781
- info(message, data) {
2782
- this.log("info", message, data);
2783
- }
2784
- debug(message, data) {
2785
- this.log("debug", message, data);
2786
- }
2787
- trace(message, data) {
2788
- this.log("trace", message, data);
2789
- }
2790
- verbose(message, data) {
2791
- this.log("verbose", message, data);
2792
- }
2793
- child(subModule) {
2794
- return new _Logger(`${this.module}.${subModule}`);
2795
- }
2796
- };
2797
- var loggerCache = /* @__PURE__ */ new Map();
2798
- function createLogger(module2) {
2799
- let logger15 = loggerCache.get(module2);
2800
- if (!logger15) {
2801
- logger15 = new Logger(module2);
2802
- loggerCache.set(module2, logger15);
2417
+ await fetchWithCache(url);
2803
2418
  }
2804
- return logger15;
2419
+ onProgress?.(urls.length, urls.length, "done");
2420
+ }
2421
+ function formatBytes(bytes) {
2422
+ if (bytes < 1024) return `${bytes} B`;
2423
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
2424
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
2425
+ return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
2805
2426
  }
2806
- var noopLogger = {
2807
- module: "noop",
2808
- error: () => {
2809
- },
2810
- warn: () => {
2811
- },
2812
- info: () => {
2813
- },
2814
- debug: () => {
2815
- },
2816
- trace: () => {
2817
- },
2818
- verbose: () => {
2819
- },
2820
- child: () => noopLogger
2821
- };
2822
2427
 
2823
2428
  // src/utils/runtime.ts
2824
2429
  function isIOSSafari() {
@@ -2889,7 +2494,7 @@ function isSafari() {
2889
2494
  const ua = navigator.userAgent.toLowerCase();
2890
2495
  return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
2891
2496
  }
2892
- function shouldUseCpuLipSync() {
2497
+ function shouldUseCpuA2E() {
2893
2498
  return isSafari() || isIOS();
2894
2499
  }
2895
2500
  function isSpeechRecognitionAvailable() {
@@ -2899,22 +2504,22 @@ function isSpeechRecognitionAvailable() {
2899
2504
  function shouldUseNativeASR() {
2900
2505
  return (isIOS() || isSafari()) && isSpeechRecognitionAvailable();
2901
2506
  }
2902
- function shouldUseServerLipSync() {
2507
+ function shouldUseServerA2E() {
2903
2508
  return isIOS();
2904
2509
  }
2905
2510
 
2906
2511
  // src/inference/onnxLoader.ts
2907
- var logger = createLogger("OnnxLoader");
2512
+ var logger2 = createLogger("OnnxLoader");
2908
2513
  var ortInstance = null;
2909
2514
  var loadedBackend = null;
2910
2515
  var WASM_CDN_PATH = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
2911
2516
  async function isWebGPUAvailable() {
2912
2517
  if (isIOS()) {
2913
- logger.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
2518
+ logger2.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
2914
2519
  return false;
2915
2520
  }
2916
2521
  if (!hasWebGPUApi()) {
2917
- logger.debug("WebGPU check: navigator.gpu not available", {
2522
+ logger2.debug("WebGPU check: navigator.gpu not available", {
2918
2523
  isSecureContext: typeof window !== "undefined" ? window.isSecureContext : "N/A"
2919
2524
  });
2920
2525
  return false;
@@ -2922,19 +2527,19 @@ async function isWebGPUAvailable() {
2922
2527
  try {
2923
2528
  const adapter = await navigator.gpu.requestAdapter();
2924
2529
  if (!adapter) {
2925
- logger.debug("WebGPU check: No adapter available");
2530
+ logger2.debug("WebGPU check: No adapter available");
2926
2531
  return false;
2927
2532
  }
2928
2533
  const device = await adapter.requestDevice();
2929
2534
  if (!device) {
2930
- logger.debug("WebGPU check: Could not create device");
2535
+ logger2.debug("WebGPU check: Could not create device");
2931
2536
  return false;
2932
2537
  }
2933
2538
  device.destroy();
2934
- logger.debug("WebGPU check: Available and working");
2539
+ logger2.debug("WebGPU check: Available and working");
2935
2540
  return true;
2936
2541
  } catch (err) {
2937
- logger.debug("WebGPU check: Error during availability check", { error: err });
2542
+ logger2.debug("WebGPU check: Error during availability check", { error: err });
2938
2543
  return false;
2939
2544
  }
2940
2545
  }
@@ -2944,11 +2549,11 @@ function applyIOSWasmMemoryPatch() {
2944
2549
  iosWasmPatched = true;
2945
2550
  const OrigMemory = WebAssembly.Memory;
2946
2551
  const MAX_IOS_PAGES = 32768;
2947
- logger.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
2552
+ logger2.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
2948
2553
  WebAssembly.Memory = function IOSPatchedMemory(descriptor) {
2949
2554
  const patched = { ...descriptor };
2950
2555
  if (patched.maximum !== void 0 && patched.maximum > MAX_IOS_PAGES) {
2951
- logger.info("iOS memory patch: capping maximum", {
2556
+ logger2.info("iOS memory patch: capping maximum", {
2952
2557
  original: patched.maximum,
2953
2558
  capped: MAX_IOS_PAGES,
2954
2559
  shared: patched.shared,
@@ -2967,7 +2572,7 @@ function configureWasm(ort) {
2967
2572
  ort.env.wasm.numThreads = numThreads;
2968
2573
  ort.env.wasm.simd = true;
2969
2574
  ort.env.wasm.proxy = enableProxy;
2970
- logger.info("WASM configured", {
2575
+ logger2.info("WASM configured", {
2971
2576
  numThreads,
2972
2577
  simd: true,
2973
2578
  proxy: enableProxy,
@@ -2979,12 +2584,12 @@ async function getOnnxRuntime(backend) {
2979
2584
  return ortInstance;
2980
2585
  }
2981
2586
  if (ortInstance && loadedBackend !== backend) {
2982
- logger.warn(
2587
+ logger2.warn(
2983
2588
  `ONNX Runtime already loaded with ${loadedBackend} backend. Cannot switch to ${backend}. Returning existing instance.`
2984
2589
  );
2985
2590
  return ortInstance;
2986
2591
  }
2987
- logger.info(`Loading ONNX Runtime with ${backend} backend...`);
2592
+ logger2.info(`Loading ONNX Runtime with ${backend} backend...`);
2988
2593
  applyIOSWasmMemoryPatch();
2989
2594
  try {
2990
2595
  if (backend === "wasm" && (isIOS() || isSafari())) {
@@ -2999,10 +2604,10 @@ async function getOnnxRuntime(backend) {
2999
2604
  }
3000
2605
  loadedBackend = backend;
3001
2606
  configureWasm(ortInstance);
3002
- logger.info(`ONNX Runtime loaded successfully`, { backend });
2607
+ logger2.info(`ONNX Runtime loaded successfully`, { backend });
3003
2608
  return ortInstance;
3004
2609
  } catch (err) {
3005
- logger.error(`Failed to load ONNX Runtime with ${backend} backend`, {
2610
+ logger2.error(`Failed to load ONNX Runtime with ${backend} backend`, {
3006
2611
  error: err
3007
2612
  });
3008
2613
  throw new Error(
@@ -3013,7 +2618,7 @@ async function getOnnxRuntime(backend) {
3013
2618
  async function getOnnxRuntimeForPreference(preference = "auto") {
3014
2619
  const webgpuAvailable = await isWebGPUAvailable();
3015
2620
  const backend = resolveBackend(preference, webgpuAvailable);
3016
- logger.info("Resolved backend preference", {
2621
+ logger2.info("Resolved backend preference", {
3017
2622
  preference,
3018
2623
  webgpuAvailable,
3019
2624
  resolvedBackend: backend
@@ -3047,42 +2652,6 @@ function getSessionOptions(backend) {
3047
2652
  graphOptimizationLevel: "all"
3048
2653
  };
3049
2654
  }
3050
- async function createSessionWithFallback(modelBuffer, preferredBackend) {
3051
- const ort = await getOnnxRuntime(preferredBackend);
3052
- const modelData = new Uint8Array(modelBuffer);
3053
- if (preferredBackend === "webgpu") {
3054
- try {
3055
- const options2 = getSessionOptions("webgpu");
3056
- const session2 = await ort.InferenceSession.create(modelData, options2);
3057
- logger.info("Session created with WebGPU backend");
3058
- return { session: session2, backend: "webgpu" };
3059
- } catch (err) {
3060
- logger.warn("WebGPU session creation failed, falling back to WASM", {
3061
- error: err instanceof Error ? err.message : String(err)
3062
- });
3063
- }
3064
- }
3065
- const options = getSessionOptions("wasm");
3066
- const session = await ort.InferenceSession.create(modelData, options);
3067
- logger.info("Session created with WASM backend");
3068
- return { session, backend: "wasm" };
3069
- }
3070
- function getLoadedBackend() {
3071
- return loadedBackend;
3072
- }
3073
- function isOnnxRuntimeLoaded() {
3074
- return ortInstance !== null;
3075
- }
3076
- async function preloadOnnxRuntime(preference = "auto") {
3077
- if (ortInstance) {
3078
- logger.info("ONNX Runtime already preloaded", { backend: loadedBackend });
3079
- return loadedBackend;
3080
- }
3081
- logger.info("Preloading ONNX Runtime...", { preference });
3082
- const { backend } = await getOnnxRuntimeForPreference(preference);
3083
- logger.info("ONNX Runtime preloaded", { backend });
3084
- return backend;
3085
- }
3086
2655
 
3087
2656
  // src/inference/blendshapeUtils.ts
3088
2657
  var LAM_BLENDSHAPES = [
@@ -3232,16 +2801,19 @@ var WAV2ARKIT_BLENDSHAPES = [
3232
2801
  var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
3233
2802
  (name) => LAM_BLENDSHAPES.indexOf(name)
3234
2803
  );
3235
- function remapWav2ArkitToLam(frame) {
3236
- const result = new Float32Array(52);
3237
- for (let i = 0; i < 52; i++) {
3238
- result[REMAP_WAV2ARKIT_TO_LAM[i]] = frame[i];
2804
+ function lerpBlendshapes(current, target, factor = 0.3) {
2805
+ const len = Math.max(current.length, target.length);
2806
+ const result = new Array(len);
2807
+ for (let i = 0; i < len; i++) {
2808
+ const c = current[i] ?? 0;
2809
+ const t = target[i] ?? 0;
2810
+ result[i] = c + (t - c) * factor;
3239
2811
  }
3240
2812
  return result;
3241
2813
  }
3242
2814
 
3243
2815
  // src/inference/Wav2Vec2Inference.ts
3244
- var logger2 = createLogger("Wav2Vec2");
2816
+ var logger3 = createLogger("Wav2Vec2");
3245
2817
  var CTC_VOCAB = [
3246
2818
  "<pad>",
3247
2819
  "<s>",
@@ -3291,6 +2863,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3291
2863
  this.poisoned = false;
3292
2864
  this.config = config;
3293
2865
  this.numIdentityClasses = config.numIdentityClasses ?? 12;
2866
+ this.chunkSize = config.chunkSize ?? 16e3;
3294
2867
  }
3295
2868
  get backend() {
3296
2869
  return this.session ? this._backend : null;
@@ -3320,30 +2893,30 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3320
2893
  "model.backend_requested": this.config.backend || "auto"
3321
2894
  });
3322
2895
  try {
3323
- logger2.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
2896
+ logger3.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
3324
2897
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
3325
2898
  this.ort = ort;
3326
2899
  this._backend = backend;
3327
- logger2.info("ONNX Runtime loaded", { backend: this._backend });
2900
+ logger3.info("ONNX Runtime loaded", { backend: this._backend });
3328
2901
  const modelUrl = this.config.modelUrl;
3329
2902
  const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
3330
2903
  const sessionOptions = getSessionOptions(this._backend);
3331
2904
  let isCached = false;
3332
2905
  if (isIOS()) {
3333
- logger2.info("iOS: passing model URLs directly to ORT (low-memory path)", {
2906
+ logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
3334
2907
  modelUrl,
3335
2908
  dataUrl
3336
2909
  });
3337
2910
  if (dataUrl) {
3338
2911
  const dataFilename = dataUrl.split("/").pop();
3339
- logger2.info("iOS: setting externalData", { dataFilename, dataUrl });
2912
+ logger3.info("iOS: setting externalData", { dataFilename, dataUrl });
3340
2913
  sessionOptions.externalData = [{
3341
2914
  path: dataFilename,
3342
2915
  data: dataUrl
3343
2916
  // URL string — ORT fetches directly into WASM
3344
2917
  }];
3345
2918
  }
3346
- logger2.info("iOS: calling InferenceSession.create() with URL string", {
2919
+ logger3.info("iOS: calling InferenceSession.create() with URL string", {
3347
2920
  modelUrl,
3348
2921
  sessionOptions: JSON.stringify(
3349
2922
  sessionOptions,
@@ -3353,14 +2926,14 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3353
2926
  try {
3354
2927
  this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
3355
2928
  } catch (sessionErr) {
3356
- logger2.error("iOS: InferenceSession.create() failed", {
2929
+ logger3.error("iOS: InferenceSession.create() failed", {
3357
2930
  error: sessionErr instanceof Error ? sessionErr.message : String(sessionErr),
3358
2931
  errorType: sessionErr?.constructor?.name,
3359
2932
  stack: sessionErr instanceof Error ? sessionErr.stack : void 0
3360
2933
  });
3361
2934
  throw sessionErr;
3362
2935
  }
3363
- logger2.info("iOS: session created successfully", {
2936
+ logger3.info("iOS: session created successfully", {
3364
2937
  inputNames: this.session.inputNames,
3365
2938
  outputNames: this.session.outputNames
3366
2939
  });
@@ -3369,15 +2942,15 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3369
2942
  isCached = await cache.has(modelUrl);
3370
2943
  let modelBuffer;
3371
2944
  if (isCached) {
3372
- logger2.debug("Loading model from cache", { modelUrl });
2945
+ logger3.debug("Loading model from cache", { modelUrl });
3373
2946
  modelBuffer = await cache.get(modelUrl);
3374
2947
  if (!modelBuffer) {
3375
- logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
2948
+ logger3.warn("Cache corruption detected, clearing and retrying", { modelUrl });
3376
2949
  await cache.delete(modelUrl);
3377
2950
  modelBuffer = await fetchWithCache(modelUrl);
3378
2951
  }
3379
2952
  } else {
3380
- logger2.debug("Fetching and caching model", { modelUrl });
2953
+ logger3.debug("Fetching and caching model", { modelUrl });
3381
2954
  modelBuffer = await fetchWithCache(modelUrl);
3382
2955
  }
3383
2956
  if (!modelBuffer) {
@@ -3388,31 +2961,31 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3388
2961
  try {
3389
2962
  const isDataCached = await cache.has(dataUrl);
3390
2963
  if (isDataCached) {
3391
- logger2.debug("Loading external data from cache", { dataUrl });
2964
+ logger3.debug("Loading external data from cache", { dataUrl });
3392
2965
  externalDataBuffer = await cache.get(dataUrl);
3393
2966
  if (!externalDataBuffer) {
3394
- logger2.warn("Cache corruption for external data, retrying", { dataUrl });
2967
+ logger3.warn("Cache corruption for external data, retrying", { dataUrl });
3395
2968
  await cache.delete(dataUrl);
3396
2969
  externalDataBuffer = await fetchWithCache(dataUrl);
3397
2970
  }
3398
2971
  } else {
3399
- logger2.info("Fetching external model data", {
2972
+ logger3.info("Fetching external model data", {
3400
2973
  dataUrl,
3401
2974
  note: "This may be a large download (383MB+)"
3402
2975
  });
3403
2976
  externalDataBuffer = await fetchWithCache(dataUrl);
3404
2977
  }
3405
- logger2.info("External data loaded", {
2978
+ logger3.info("External data loaded", {
3406
2979
  size: formatBytes(externalDataBuffer.byteLength)
3407
2980
  });
3408
2981
  } catch (err) {
3409
- logger2.debug("No external data file found (single-file model)", {
2982
+ logger3.debug("No external data file found (single-file model)", {
3410
2983
  dataUrl,
3411
2984
  error: err.message
3412
2985
  });
3413
2986
  }
3414
2987
  }
3415
- logger2.debug("Creating ONNX session", {
2988
+ logger3.debug("Creating ONNX session", {
3416
2989
  graphSize: formatBytes(modelBuffer.byteLength),
3417
2990
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
3418
2991
  backend: this._backend
@@ -3427,12 +3000,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3427
3000
  const modelData = new Uint8Array(modelBuffer);
3428
3001
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
3429
3002
  }
3430
- logger2.info("ONNX session created successfully", {
3003
+ logger3.info("ONNX session created successfully", {
3431
3004
  executionProvider: this._backend,
3432
3005
  backend: this._backend
3433
3006
  });
3434
3007
  const loadTimeMs = performance.now() - startTime;
3435
- logger2.info("Model loaded successfully", {
3008
+ logger3.info("Model loaded successfully", {
3436
3009
  backend: this._backend,
3437
3010
  loadTimeMs: Math.round(loadTimeMs),
3438
3011
  inputs: this.session.inputNames,
@@ -3448,13 +3021,13 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3448
3021
  model: "wav2vec2",
3449
3022
  backend: this._backend
3450
3023
  });
3451
- logger2.debug("Running warmup inference to initialize GPU context");
3024
+ logger3.debug("Running warmup inference to initialize GPU context");
3452
3025
  const warmupStart = performance.now();
3453
- const warmupAudio = new Float32Array(16e3);
3026
+ const warmupAudio = new Float32Array(this.chunkSize);
3454
3027
  const warmupIdentity = new Float32Array(this.numIdentityClasses);
3455
3028
  warmupIdentity[0] = 1;
3456
3029
  const warmupFeeds = {
3457
- "audio": new this.ort.Tensor("float32", warmupAudio, [1, 16e3]),
3030
+ "audio": new this.ort.Tensor("float32", warmupAudio, [1, this.chunkSize]),
3458
3031
  "identity": new this.ort.Tensor("float32", warmupIdentity, [1, this.numIdentityClasses])
3459
3032
  };
3460
3033
  const WARMUP_TIMEOUT_MS = 15e3;
@@ -3464,12 +3037,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3464
3037
  ]);
3465
3038
  const warmupTimeMs = performance.now() - warmupStart;
3466
3039
  if (warmupResult === "timeout") {
3467
- logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
3040
+ logger3.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
3468
3041
  timeoutMs: WARMUP_TIMEOUT_MS,
3469
3042
  backend: this._backend
3470
3043
  });
3471
3044
  } else {
3472
- logger2.info("Warmup inference complete", {
3045
+ logger3.info("Warmup inference complete", {
3473
3046
  warmupTimeMs: Math.round(warmupTimeMs),
3474
3047
  backend: this._backend
3475
3048
  });
@@ -3497,11 +3070,10 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3497
3070
  }
3498
3071
  /**
3499
3072
  * Run inference on raw audio
3500
- * @param audioSamples - Float32Array of raw audio at 16kHz (16000 samples = 1 second)
3073
+ * @param audioSamples - Float32Array of raw audio at 16kHz
3501
3074
  * @param identityIndex - Optional identity index (0-11, default 0 = neutral)
3502
3075
  *
3503
- * Note: Model expects 1-second chunks (16000 samples) for optimal performance.
3504
- * Audio will be zero-padded or truncated to 16000 samples.
3076
+ * Audio will be zero-padded or truncated to chunkSize samples.
3505
3077
  */
3506
3078
  async infer(audioSamples, identityIndex = 0) {
3507
3079
  if (!this.session) {
@@ -3512,20 +3084,20 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3512
3084
  }
3513
3085
  const audioSamplesCopy = new Float32Array(audioSamples);
3514
3086
  let audio;
3515
- if (audioSamplesCopy.length === 16e3) {
3087
+ if (audioSamplesCopy.length === this.chunkSize) {
3516
3088
  audio = audioSamplesCopy;
3517
- } else if (audioSamplesCopy.length < 16e3) {
3518
- audio = new Float32Array(16e3);
3089
+ } else if (audioSamplesCopy.length < this.chunkSize) {
3090
+ audio = new Float32Array(this.chunkSize);
3519
3091
  audio.set(audioSamplesCopy, 0);
3520
3092
  } else {
3521
- audio = audioSamplesCopy.slice(0, 16e3);
3093
+ audio = audioSamplesCopy.slice(0, this.chunkSize);
3522
3094
  }
3523
3095
  const identity = new Float32Array(this.numIdentityClasses);
3524
3096
  identity[Math.max(0, Math.min(identityIndex, this.numIdentityClasses - 1))] = 1;
3525
3097
  const audioCopy = new Float32Array(audio);
3526
3098
  const identityCopy = new Float32Array(identity);
3527
3099
  const feeds = {
3528
- "audio": new this.ort.Tensor("float32", audioCopy, [1, 16e3]),
3100
+ "audio": new this.ort.Tensor("float32", audioCopy, [1, this.chunkSize]),
3529
3101
  "identity": new this.ort.Tensor("float32", identityCopy, [1, this.numIdentityClasses])
3530
3102
  };
3531
3103
  return this.queueInference(feeds);
@@ -3561,7 +3133,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3561
3133
  const telemetry = getTelemetry();
3562
3134
  const span = telemetry?.startSpan("Wav2Vec2.infer", {
3563
3135
  "inference.backend": this._backend,
3564
- "inference.input_samples": 16e3
3136
+ "inference.input_samples": this.chunkSize
3565
3137
  });
3566
3138
  try {
3567
3139
  const startTime = performance.now();
@@ -3600,7 +3172,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3600
3172
  blendshapes.push(symmetrizeBlendshapes(rawFrame));
3601
3173
  }
3602
3174
  const text = this.decodeCTC(asrLogits);
3603
- logger2.trace("Inference completed", {
3175
+ logger3.trace("Inference completed", {
3604
3176
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
3605
3177
  numA2EFrames,
3606
3178
  numASRFrames,
@@ -3634,12 +3206,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3634
3206
  const errMsg = err instanceof Error ? err.message : String(err);
3635
3207
  if (errMsg.includes("timed out")) {
3636
3208
  this.poisoned = true;
3637
- logger2.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
3209
+ logger3.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
3638
3210
  backend: this._backend,
3639
3211
  timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
3640
3212
  });
3641
3213
  } else {
3642
- logger2.error("Inference failed", { error: errMsg, backend: this._backend });
3214
+ logger3.error("Inference failed", { error: errMsg, backend: this._backend });
3643
3215
  }
3644
3216
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
3645
3217
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -3680,56 +3252,79 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
3680
3252
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
3681
3253
  var Wav2Vec2Inference = _Wav2Vec2Inference;
3682
3254
 
3255
+ // src/audio/audioUtils.ts
3256
+ function pcm16ToFloat32(buffer) {
3257
+ const byteLen = buffer.byteLength & ~1;
3258
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
3259
+ const float32 = new Float32Array(int16.length);
3260
+ for (let i = 0; i < int16.length; i++) {
3261
+ float32[i] = int16[i] / 32768;
3262
+ }
3263
+ return float32;
3264
+ }
3265
+ function int16ToFloat32(int16) {
3266
+ const float32 = new Float32Array(int16.length);
3267
+ for (let i = 0; i < int16.length; i++) {
3268
+ float32[i] = int16[i] / 32768;
3269
+ }
3270
+ return float32;
3271
+ }
3272
+
3683
3273
  // src/audio/FullFacePipeline.ts
3684
- var logger3 = createLogger("FullFacePipeline");
3685
- var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
3686
- LAM_BLENDSHAPES.forEach((name, index) => {
3687
- BLENDSHAPE_INDEX_MAP.set(name, index);
3688
- });
3689
- var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
3690
- var EMOTION_LABEL_MAP = {
3691
- // Direct labels
3692
- happy: "happy",
3693
- sad: "sad",
3694
- angry: "angry",
3695
- neutral: "neutral",
3696
- // Natural language synonyms
3697
- excited: "happy",
3698
- joyful: "happy",
3699
- cheerful: "happy",
3700
- delighted: "happy",
3701
- amused: "happy",
3702
- melancholic: "sad",
3703
- sorrowful: "sad",
3704
- disappointed: "sad",
3705
- frustrated: "angry",
3706
- irritated: "angry",
3707
- furious: "angry",
3708
- annoyed: "angry",
3709
- // SenseVoice labels
3710
- fearful: "sad",
3711
- disgusted: "angry",
3712
- surprised: "happy"
3713
- };
3714
- var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3274
+ var logger4 = createLogger("FullFacePipeline");
3275
+ var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
3276
+ for (const name of LAM_BLENDSHAPES) {
3277
+ if (name.startsWith("eye")) {
3278
+ BLENDSHAPE_TO_GROUP.set(name, "eyes");
3279
+ } else if (name.startsWith("brow")) {
3280
+ BLENDSHAPE_TO_GROUP.set(name, "brows");
3281
+ } else if (name.startsWith("jaw")) {
3282
+ BLENDSHAPE_TO_GROUP.set(name, "jaw");
3283
+ } else if (name.startsWith("mouth")) {
3284
+ BLENDSHAPE_TO_GROUP.set(name, "mouth");
3285
+ } else if (name.startsWith("cheek")) {
3286
+ BLENDSHAPE_TO_GROUP.set(name, "cheeks");
3287
+ } else if (name.startsWith("nose")) {
3288
+ BLENDSHAPE_TO_GROUP.set(name, "nose");
3289
+ } else if (name.startsWith("tongue")) {
3290
+ BLENDSHAPE_TO_GROUP.set(name, "tongue");
3291
+ }
3292
+ }
3293
+ var FullFacePipeline = class extends EventEmitter {
3715
3294
  constructor(options) {
3716
3295
  super();
3717
3296
  this.options = options;
3718
3297
  this.playbackStarted = false;
3719
3298
  this.monitorInterval = null;
3720
3299
  this.frameAnimationId = null;
3721
- // Emotion state
3722
- this.lastEmotionFrame = null;
3723
- this.currentAudioEnergy = 0;
3724
3300
  // Stale frame detection
3725
3301
  this.lastNewFrameTime = 0;
3726
3302
  this.lastKnownLamFrame = null;
3727
3303
  this.staleWarningEmitted = false;
3304
+ // Frame loop timing (for dt calculation)
3305
+ this.lastFrameLoopTime = 0;
3306
+ // Diagnostic logging counter
3307
+ this.frameLoopCount = 0;
3728
3308
  const sampleRate = options.sampleRate ?? 16e3;
3729
- this.emotionBlendFactor = options.emotionBlendFactor ?? 0.8;
3730
- this.lamBlendFactor = options.lamBlendFactor ?? 0.2;
3731
- const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
3309
+ this.profile = options.profile ?? {};
3310
+ this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
3311
+ this.smoother = new BlendshapeSmoother({
3312
+ halflife: options.smoothingHalflife ?? 0.06
3313
+ });
3314
+ const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
3315
+ const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
3316
+ const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
3317
+ const inferenceEstimateMs = isCpuModel ? 300 : options.lam.backend === "wasm" ? 250 : 80;
3318
+ const marginMs = 100;
3319
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3732
3320
  const audioDelayMs = options.audioDelayMs ?? autoDelay;
3321
+ logger4.info("FullFacePipeline config", {
3322
+ chunkSize,
3323
+ audioDelayMs,
3324
+ autoDelay,
3325
+ backend: options.lam.backend,
3326
+ modelId: options.lam.modelId
3327
+ });
3733
3328
  this.scheduler = new AudioScheduler({
3734
3329
  sampleRate,
3735
3330
  initialLookaheadSec: audioDelayMs / 1e3
@@ -3738,20 +3333,15 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3738
3333
  sampleRate,
3739
3334
  targetDurationMs: options.chunkTargetMs ?? 200
3740
3335
  });
3741
- this.lamPipeline = new LAMPipeline({
3336
+ this.processor = new A2EProcessor({
3337
+ backend: options.lam,
3742
3338
  sampleRate,
3339
+ chunkSize,
3743
3340
  onError: (error) => {
3744
- logger3.error("LAM inference error", { message: error.message, stack: error.stack });
3341
+ logger4.error("A2E inference error", { message: error.message, stack: error.stack });
3745
3342
  this.emit("error", error);
3746
3343
  }
3747
3344
  });
3748
- this.emotionMapper = new EmotionToBlendshapeMapper({
3749
- smoothingFactor: 0.15,
3750
- confidenceThreshold: 0.3,
3751
- intensity: 1,
3752
- energyModulation: true
3753
- });
3754
- this.energyAnalyzer = new AudioEnergyAnalyzer();
3755
3345
  }
3756
3346
  /**
3757
3347
  * Initialize the pipeline
@@ -3760,40 +3350,33 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3760
3350
  await this.scheduler.initialize();
3761
3351
  }
3762
3352
  /**
3763
- * Set emotion label from backend (e.g., LLM response emotion).
3764
- *
3765
- * Converts a natural language emotion label into an EmotionFrame
3766
- * that drives upper face blendshapes for the duration of the utterance.
3767
- *
3768
- * Supported labels: happy, excited, joyful, sad, melancholic, angry,
3769
- * frustrated, neutral, etc.
3770
- *
3771
- * @param label - Emotion label string (case-insensitive)
3772
- */
3773
- setEmotionLabel(label) {
3774
- const normalized = label.toLowerCase();
3775
- const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
3776
- const probabilities = {
3777
- neutral: 0.1,
3778
- happy: 0.1,
3779
- angry: 0.1,
3780
- sad: 0.1
3781
- };
3782
- probabilities[mapped] = 0.7;
3783
- const frame = {
3784
- emotion: mapped,
3785
- confidence: 0.7,
3786
- probabilities
3787
- };
3788
- this.lastEmotionFrame = frame;
3789
- logger3.info("Emotion label set", { label, mapped });
3353
+ * Update the ExpressionProfile at runtime (e.g., character switch).
3354
+ */
3355
+ setProfile(profile) {
3356
+ this.profile = profile;
3790
3357
  }
3791
3358
  /**
3792
- * Clear any set emotion label.
3793
- * Falls back to prosody-only upper face animation.
3359
+ * Apply ExpressionProfile scaling to raw A2E blendshapes.
3360
+ *
3361
+ * For each blendshape:
3362
+ * 1. If an override exists for the blendshape name, use override as scaler
3363
+ * 2. Otherwise, use the group scaler (default 1.0)
3364
+ * 3. Clamp result to [0, 1]
3794
3365
  */
3795
- clearEmotionLabel() {
3796
- this.lastEmotionFrame = null;
3366
+ applyProfile(raw) {
3367
+ const scaled = new Float32Array(52);
3368
+ for (let i = 0; i < 52; i++) {
3369
+ const name = LAM_BLENDSHAPES[i];
3370
+ let scaler;
3371
+ if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
3372
+ scaler = this.profile.overrides[name];
3373
+ } else {
3374
+ const group = BLENDSHAPE_TO_GROUP.get(name);
3375
+ scaler = group ? this.profile[group] ?? 1 : 1;
3376
+ }
3377
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3378
+ }
3379
+ return scaled;
3797
3380
  }
3798
3381
  /**
3799
3382
  * Start a new playback session
@@ -3805,15 +3388,14 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3805
3388
  this.stopMonitoring();
3806
3389
  this.scheduler.reset();
3807
3390
  this.coalescer.reset();
3808
- this.lamPipeline.reset();
3391
+ this.processor.reset();
3809
3392
  this.playbackStarted = false;
3810
- this.lastEmotionFrame = null;
3811
- this.currentAudioEnergy = 0;
3812
- this.emotionMapper.reset();
3813
- this.energyAnalyzer.reset();
3814
3393
  this.lastNewFrameTime = 0;
3815
3394
  this.lastKnownLamFrame = null;
3816
3395
  this.staleWarningEmitted = false;
3396
+ this.lastFrameLoopTime = 0;
3397
+ this.frameLoopCount = 0;
3398
+ this.smoother.reset();
3817
3399
  this.scheduler.warmup();
3818
3400
  this.startFrameLoop();
3819
3401
  this.startMonitoring();
@@ -3821,8 +3403,8 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3821
3403
  /**
3822
3404
  * Receive audio chunk from network
3823
3405
  *
3824
- * Audio-first design: schedules audio immediately, LAM runs in background.
3825
- * This prevents LAM inference (50-300ms) from blocking audio scheduling.
3406
+ * Audio-first design: schedules audio immediately, A2E runs in background.
3407
+ * This prevents A2E inference (50-300ms) from blocking audio scheduling.
3826
3408
  *
3827
3409
  * @param chunk - Uint8Array containing Int16 PCM audio
3828
3410
  */
@@ -3837,100 +3419,77 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3837
3419
  this.playbackStarted = true;
3838
3420
  this.emit("playback_start", scheduleTime);
3839
3421
  }
3840
- const { energy } = this.energyAnalyzer.process(float32);
3841
- this.currentAudioEnergy = energy;
3842
- this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
3843
- this.emit("error", err);
3422
+ logger4.info("onAudioChunk \u2192 pushAudio", {
3423
+ float32Samples: float32.length,
3424
+ scheduleTime: scheduleTime.toFixed(3),
3425
+ currentTime: this.scheduler.getCurrentTime().toFixed(3),
3426
+ deltaToPlayback: (scheduleTime - this.scheduler.getCurrentTime()).toFixed(3)
3844
3427
  });
3845
- }
3846
- /**
3847
- * Get emotion frame for current animation.
3848
- *
3849
- * Priority:
3850
- * 1. Explicit emotion label from setEmotionLabel()
3851
- * 2. Prosody fallback: subtle brow movement from audio energy
3852
- */
3853
- getEmotionFrame() {
3854
- if (this.lastEmotionFrame) {
3855
- return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
3856
- }
3857
- return { frame: null, energy: this.currentAudioEnergy };
3858
- }
3859
- /**
3860
- * Merge LAM blendshapes with emotion upper face blendshapes
3861
- */
3862
- mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
3863
- const merged = new Float32Array(52);
3864
- let emotionBlendshapes;
3865
- if (emotionFrame) {
3866
- this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
3867
- this.emotionMapper.update(33);
3868
- emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
3869
- } else {
3870
- emotionBlendshapes = {};
3871
- for (const name of UPPER_FACE_BLENDSHAPES) {
3872
- emotionBlendshapes[name] = 0;
3873
- }
3874
- }
3875
- for (let i = 0; i < 52; i++) {
3876
- const name = LAM_BLENDSHAPES[i];
3877
- if (UPPER_FACE_SET.has(name)) {
3878
- const emotionValue = emotionBlendshapes[name] ?? 0;
3879
- const lamValue = lamFrame[i];
3880
- merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
3881
- } else {
3882
- merged[i] = lamFrame[i];
3883
- }
3884
- }
3885
- return { merged, emotionBlendshapes };
3428
+ this.processor.pushAudio(float32, scheduleTime);
3886
3429
  }
3887
3430
  /**
3888
3431
  * Start frame animation loop
3432
+ *
3433
+ * Uses critically damped spring smoother to produce continuous output
3434
+ * at render rate (60fps), even between inference batches (~30fps bursts).
3435
+ * Springs interpolate toward the latest inference target, and decay
3436
+ * to neutral when inference stalls.
3889
3437
  */
3890
3438
  startFrameLoop() {
3439
+ this.lastFrameLoopTime = 0;
3891
3440
  const updateFrame = () => {
3441
+ const now = performance.now() / 1e3;
3442
+ const dt = this.lastFrameLoopTime > 0 ? now - this.lastFrameLoopTime : 1 / 60;
3443
+ this.lastFrameLoopTime = now;
3444
+ this.frameLoopCount++;
3892
3445
  const currentTime = this.scheduler.getCurrentTime();
3893
- const lamFrame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
3894
- if (lamFrame) {
3895
- if (lamFrame !== this.lastKnownLamFrame) {
3896
- this.lastNewFrameTime = performance.now();
3897
- this.lastKnownLamFrame = lamFrame;
3898
- this.staleWarningEmitted = false;
3446
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3447
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3448
+ this.smoother.setTarget(lamFrame);
3449
+ this.lastNewFrameTime = performance.now();
3450
+ this.lastKnownLamFrame = lamFrame;
3451
+ this.staleWarningEmitted = false;
3452
+ logger4.info("New A2E frame", {
3453
+ jawOpen: lamFrame[24]?.toFixed(3),
3454
+ mouthClose: lamFrame[26]?.toFixed(3),
3455
+ browInnerUp: lamFrame[2]?.toFixed(3),
3456
+ browDownL: lamFrame[0]?.toFixed(3),
3457
+ browOuterUpL: lamFrame[3]?.toFixed(3),
3458
+ currentTime: currentTime.toFixed(3),
3459
+ queuedFrames: this.processor.queuedFrameCount
3460
+ });
3461
+ }
3462
+ if (this.frameLoopCount % 60 === 0) {
3463
+ logger4.info("Frame loop heartbeat", {
3464
+ frameLoopCount: this.frameLoopCount,
3465
+ currentTime: currentTime.toFixed(3),
3466
+ playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
3467
+ queuedFrames: this.processor.queuedFrameCount,
3468
+ hasTarget: this.smoother.hasTarget,
3469
+ playbackStarted: this.playbackStarted,
3470
+ msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
3471
+ processorFill: this.processor.fillLevel.toFixed(2)
3472
+ });
3473
+ }
3474
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3475
+ this.smoother.decayToNeutral();
3476
+ if (!this.staleWarningEmitted) {
3477
+ this.staleWarningEmitted = true;
3478
+ logger4.warn("A2E stalled \u2014 decaying to neutral", {
3479
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3480
+ queuedFrames: this.processor.queuedFrameCount
3481
+ });
3899
3482
  }
3900
- const { frame: emotionFrame, energy } = this.getEmotionFrame();
3901
- const { merged, emotionBlendshapes } = this.mergeBlendshapes(lamFrame, emotionFrame, energy);
3483
+ }
3484
+ if (lamFrame) {
3485
+ const scaled = this.applyProfile(lamFrame);
3902
3486
  const fullFrame = {
3903
- blendshapes: merged,
3904
- lamBlendshapes: lamFrame,
3905
- emotionBlendshapes,
3906
- emotion: emotionFrame,
3487
+ blendshapes: scaled,
3488
+ rawBlendshapes: lamFrame,
3907
3489
  timestamp: currentTime
3908
3490
  };
3909
3491
  this.emit("full_frame_ready", fullFrame);
3910
3492
  this.emit("lam_frame_ready", lamFrame);
3911
- if (emotionFrame) {
3912
- this.emit("emotion_frame_ready", emotionFrame);
3913
- }
3914
- } else if (this.playbackStarted && !this.lastKnownLamFrame) {
3915
- const { frame: emotionFrame, energy } = this.getEmotionFrame();
3916
- if (emotionFrame && energy > 0.05) {
3917
- const startupFrame = new Float32Array(52);
3918
- const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
3919
- this.emit("full_frame_ready", {
3920
- blendshapes: merged,
3921
- lamBlendshapes: startupFrame,
3922
- emotionBlendshapes,
3923
- emotion: emotionFrame,
3924
- timestamp: currentTime
3925
- });
3926
- }
3927
- }
3928
- if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
3929
- this.staleWarningEmitted = true;
3930
- logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
3931
- staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3932
- queuedFrames: this.lamPipeline.queuedFrameCount
3933
- });
3934
3493
  }
3935
3494
  this.frameAnimationId = requestAnimationFrame(updateFrame);
3936
3495
  };
@@ -3945,7 +3504,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3945
3504
  const chunk = new Uint8Array(remaining);
3946
3505
  await this.onAudioChunk(chunk);
3947
3506
  }
3948
- await this.lamPipeline.flush(this.options.lam);
3507
+ await this.processor.flush();
3949
3508
  }
3950
3509
  /**
3951
3510
  * Stop playback immediately with smooth fade-out
@@ -3954,15 +3513,13 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3954
3513
  this.stopMonitoring();
3955
3514
  await this.scheduler.cancelAll(fadeOutMs);
3956
3515
  this.coalescer.reset();
3957
- this.lamPipeline.reset();
3516
+ this.processor.reset();
3517
+ this.smoother.reset();
3958
3518
  this.playbackStarted = false;
3959
- this.lastEmotionFrame = null;
3960
- this.currentAudioEnergy = 0;
3961
- this.emotionMapper.reset();
3962
- this.energyAnalyzer.reset();
3963
3519
  this.lastNewFrameTime = 0;
3964
3520
  this.lastKnownLamFrame = null;
3965
3521
  this.staleWarningEmitted = false;
3522
+ this.lastFrameLoopTime = 0;
3966
3523
  this.emit("playback_complete", void 0);
3967
3524
  }
3968
3525
  /**
@@ -3973,7 +3530,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3973
3530
  clearInterval(this.monitorInterval);
3974
3531
  }
3975
3532
  this.monitorInterval = setInterval(() => {
3976
- if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
3533
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3977
3534
  this.emit("playback_complete", void 0);
3978
3535
  this.stopMonitoring();
3979
3536
  }
@@ -3999,20 +3556,12 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3999
3556
  return {
4000
3557
  playbackStarted: this.playbackStarted,
4001
3558
  coalescerFill: this.coalescer.fillLevel,
4002
- lamFill: this.lamPipeline.fillLevel,
4003
- queuedLAMFrames: this.lamPipeline.queuedFrameCount,
4004
- emotionLabel: this.lastEmotionFrame?.emotion ?? null,
4005
- currentAudioEnergy: this.currentAudioEnergy,
3559
+ processorFill: this.processor.fillLevel,
3560
+ queuedFrames: this.processor.queuedFrameCount,
4006
3561
  currentTime: this.scheduler.getCurrentTime(),
4007
3562
  playbackEndTime: this.scheduler.getPlaybackEndTime()
4008
3563
  };
4009
3564
  }
4010
- /**
4011
- * Check if an explicit emotion label is currently set
4012
- */
4013
- get hasEmotionLabel() {
4014
- return this.lastEmotionFrame !== null;
4015
- }
4016
3565
  /**
4017
3566
  * Cleanup resources
4018
3567
  */
@@ -4020,13 +3569,9 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
4020
3569
  this.stopMonitoring();
4021
3570
  this.scheduler.dispose();
4022
3571
  this.coalescer.reset();
4023
- this.lamPipeline.reset();
4024
- this.lastEmotionFrame = null;
4025
- this.currentAudioEnergy = 0;
3572
+ this.processor.dispose();
4026
3573
  }
4027
3574
  };
4028
- _FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
4029
- var FullFacePipeline = _FullFacePipeline;
4030
3575
 
4031
3576
  // src/inference/kaldiFbank.ts
4032
3577
  function fft(re, im) {
@@ -4313,7 +3858,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
4313
3858
  }
4314
3859
 
4315
3860
  // src/inference/SenseVoiceInference.ts
4316
- var logger4 = createLogger("SenseVoice");
3861
+ var logger5 = createLogger("SenseVoice");
4317
3862
  var _SenseVoiceInference = class _SenseVoiceInference {
4318
3863
  constructor(config) {
4319
3864
  this.session = null;
@@ -4366,26 +3911,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4366
3911
  "model.backend_requested": this.config.backend
4367
3912
  });
4368
3913
  try {
4369
- logger4.info("Loading ONNX Runtime...", { preference: this.config.backend });
3914
+ logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
4370
3915
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
4371
3916
  this.ort = ort;
4372
3917
  this._backend = backend;
4373
- logger4.info("ONNX Runtime loaded", { backend: this._backend });
4374
- logger4.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3918
+ logger5.info("ONNX Runtime loaded", { backend: this._backend });
3919
+ logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
4375
3920
  const tokensResponse = await fetch(this.config.tokensUrl);
4376
3921
  if (!tokensResponse.ok) {
4377
3922
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
4378
3923
  }
4379
3924
  const tokensText = await tokensResponse.text();
4380
3925
  this.tokenMap = parseTokensFile(tokensText);
4381
- logger4.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3926
+ logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
4382
3927
  const sessionOptions = getSessionOptions(this._backend);
4383
3928
  if (this._backend === "webgpu") {
4384
3929
  sessionOptions.graphOptimizationLevel = "basic";
4385
3930
  }
4386
3931
  let isCached = false;
4387
3932
  if (isIOS()) {
4388
- logger4.info("iOS: passing model URL directly to ORT (low-memory path)", {
3933
+ logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
4389
3934
  modelUrl: this.config.modelUrl
4390
3935
  });
4391
3936
  this.session = await this.ort.InferenceSession.create(
@@ -4397,14 +3942,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4397
3942
  isCached = await cache.has(this.config.modelUrl);
4398
3943
  let modelBuffer;
4399
3944
  if (isCached) {
4400
- logger4.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3945
+ logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
4401
3946
  modelBuffer = await cache.get(this.config.modelUrl);
4402
3947
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
4403
3948
  } else {
4404
- logger4.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3949
+ logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
4405
3950
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
4406
3951
  }
4407
- logger4.debug("Creating ONNX session", {
3952
+ logger5.debug("Creating ONNX session", {
4408
3953
  size: formatBytes(modelBuffer.byteLength),
4409
3954
  backend: this._backend
4410
3955
  });
@@ -4417,15 +3962,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4417
3962
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
4418
3963
  this.negMean = cmvn.negMean;
4419
3964
  this.invStddev = cmvn.invStddev;
4420
- logger4.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3965
+ logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4421
3966
  } else {
4422
- logger4.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3967
+ logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4423
3968
  }
4424
3969
  } catch (cmvnErr) {
4425
- logger4.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3970
+ logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4426
3971
  }
4427
3972
  const loadTimeMs = performance.now() - startTime;
4428
- logger4.info("SenseVoice model loaded", {
3973
+ logger5.info("SenseVoice model loaded", {
4429
3974
  backend: this._backend,
4430
3975
  loadTimeMs: Math.round(loadTimeMs),
4431
3976
  vocabSize: this.tokenMap.size,
@@ -4536,7 +4081,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4536
4081
  const vocabSize = logitsDims[2];
4537
4082
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
4538
4083
  const inferenceTimeMs = performance.now() - startTime;
4539
- logger4.trace("Transcription complete", {
4084
+ logger5.trace("Transcription complete", {
4540
4085
  text: decoded.text.substring(0, 50),
4541
4086
  language: decoded.language,
4542
4087
  emotion: decoded.emotion,
@@ -4574,7 +4119,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4574
4119
  const errMsg = err instanceof Error ? err.message : String(err);
4575
4120
  if (errMsg.includes("timed out")) {
4576
4121
  this.poisoned = true;
4577
- logger4.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4122
+ logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4578
4123
  backend: this._backend,
4579
4124
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
4580
4125
  });
@@ -4582,7 +4127,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4582
4127
  const oomError = new Error(
4583
4128
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
4584
4129
  );
4585
- logger4.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4130
+ logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4586
4131
  pointer: `0x${err.toString(16)}`,
4587
4132
  backend: this._backend
4588
4133
  });
@@ -4595,7 +4140,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4595
4140
  reject(oomError);
4596
4141
  return;
4597
4142
  } else {
4598
- logger4.error("Inference failed", { error: errMsg, backend: this._backend });
4143
+ logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4599
4144
  }
4600
4145
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4601
4146
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4624,7 +4169,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
4624
4169
  var SenseVoiceInference = _SenseVoiceInference;
4625
4170
 
4626
4171
  // src/inference/SenseVoiceWorker.ts
4627
- var logger5 = createLogger("SenseVoiceWorker");
4172
+ var logger6 = createLogger("SenseVoiceWorker");
4628
4173
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4629
4174
  var LOAD_TIMEOUT_MS = 3e4;
4630
4175
  var INFERENCE_TIMEOUT_MS = 1e4;
@@ -5357,7 +4902,7 @@ var SenseVoiceWorker = class {
5357
4902
  this.handleWorkerMessage(event.data);
5358
4903
  };
5359
4904
  worker.onerror = (error) => {
5360
- logger5.error("Worker error", { error: error.message });
4905
+ logger6.error("Worker error", { error: error.message });
5361
4906
  for (const [, resolver] of this.pendingResolvers) {
5362
4907
  resolver.reject(new Error(`Worker error: ${error.message}`));
5363
4908
  }
@@ -5437,9 +4982,9 @@ var SenseVoiceWorker = class {
5437
4982
  "model.language": this.config.language
5438
4983
  });
5439
4984
  try {
5440
- logger5.info("Creating SenseVoice worker...");
4985
+ logger6.info("Creating SenseVoice worker...");
5441
4986
  this.worker = this.createWorker();
5442
- logger5.info("Loading model in worker...", {
4987
+ logger6.info("Loading model in worker...", {
5443
4988
  modelUrl: this.config.modelUrl,
5444
4989
  tokensUrl: this.config.tokensUrl,
5445
4990
  language: this.config.language,
@@ -5461,7 +5006,7 @@ var SenseVoiceWorker = class {
5461
5006
  this._isLoaded = true;
5462
5007
  const loadTimeMs = performance.now() - startTime;
5463
5008
  onProgress?.(1, 1);
5464
- logger5.info("SenseVoice worker loaded successfully", {
5009
+ logger6.info("SenseVoice worker loaded successfully", {
5465
5010
  backend: "wasm",
5466
5011
  loadTimeMs: Math.round(loadTimeMs),
5467
5012
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -5540,7 +5085,7 @@ var SenseVoiceWorker = class {
5540
5085
  INFERENCE_TIMEOUT_MS
5541
5086
  );
5542
5087
  const totalTimeMs = performance.now() - startTime;
5543
- logger5.trace("Worker transcription complete", {
5088
+ logger6.trace("Worker transcription complete", {
5544
5089
  text: result.text.substring(0, 50),
5545
5090
  language: result.language,
5546
5091
  emotion: result.emotion,
@@ -5576,11 +5121,11 @@ var SenseVoiceWorker = class {
5576
5121
  } catch (err) {
5577
5122
  const errMsg = err instanceof Error ? err.message : String(err);
5578
5123
  if (errMsg.includes("timed out")) {
5579
- logger5.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5124
+ logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5580
5125
  timeoutMs: INFERENCE_TIMEOUT_MS
5581
5126
  });
5582
5127
  } else {
5583
- logger5.error("Worker inference failed", { error: errMsg });
5128
+ logger6.error("Worker inference failed", { error: errMsg });
5584
5129
  }
5585
5130
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
5586
5131
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -5618,7 +5163,7 @@ var SenseVoiceWorker = class {
5618
5163
  };
5619
5164
 
5620
5165
  // src/inference/UnifiedInferenceWorker.ts
5621
- var logger6 = createLogger("UnifiedInferenceWorker");
5166
+ var logger7 = createLogger("UnifiedInferenceWorker");
5622
5167
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
5623
5168
  var INIT_TIMEOUT_MS = 15e3;
5624
5169
  var SV_LOAD_TIMEOUT_MS = 3e4;
@@ -6314,7 +5859,7 @@ var UnifiedInferenceWorker = class {
6314
5859
  const telemetry = getTelemetry();
6315
5860
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
6316
5861
  try {
6317
- logger6.info("Creating unified inference worker...");
5862
+ logger7.info("Creating unified inference worker...");
6318
5863
  this.worker = this.createWorker();
6319
5864
  await this.sendMessage(
6320
5865
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -6323,7 +5868,7 @@ var UnifiedInferenceWorker = class {
6323
5868
  );
6324
5869
  this.initialized = true;
6325
5870
  const loadTimeMs = performance.now() - startTime;
6326
- logger6.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5871
+ logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
6327
5872
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
6328
5873
  span?.end();
6329
5874
  } catch (error) {
@@ -6377,8 +5922,8 @@ var UnifiedInferenceWorker = class {
6377
5922
  if (!this.worker) return;
6378
5923
  await this.sendMessage({ type: "sv:dispose" }, "sv:disposed", DISPOSE_TIMEOUT_MS);
6379
5924
  }
6380
- // ── Wav2ArkitCpu (Lip Sync) ──────────────────────────────────────────
6381
- async loadLipSync(config) {
5925
+ // ── Wav2ArkitCpu (A2E) ──────────────────────────────────────────────
5926
+ async loadA2E(config) {
6382
5927
  this.assertReady();
6383
5928
  const startTime = performance.now();
6384
5929
  const result = await this.sendMessage(
@@ -6399,7 +5944,7 @@ var UnifiedInferenceWorker = class {
6399
5944
  outputNames: result.outputNames
6400
5945
  };
6401
5946
  }
6402
- async inferLipSync(audio) {
5947
+ async inferA2E(audio) {
6403
5948
  this.assertReady();
6404
5949
  return this.sendMessage(
6405
5950
  { type: "cpu:infer", audio },
@@ -6407,7 +5952,7 @@ var UnifiedInferenceWorker = class {
6407
5952
  CPU_INFER_TIMEOUT_MS
6408
5953
  );
6409
5954
  }
6410
- async disposeLipSync() {
5955
+ async disposeA2E() {
6411
5956
  if (!this.worker) return;
6412
5957
  await this.sendMessage({ type: "cpu:dispose" }, "cpu:disposed", DISPOSE_TIMEOUT_MS);
6413
5958
  }
@@ -6497,7 +6042,7 @@ var UnifiedInferenceWorker = class {
6497
6042
  this.handleWorkerMessage(event.data);
6498
6043
  };
6499
6044
  worker.onerror = (error) => {
6500
- logger6.error("Unified worker error", { error: error.message });
6045
+ logger7.error("Unified worker error", { error: error.message });
6501
6046
  this.rejectAllPending(`Worker error: ${error.message}`);
6502
6047
  };
6503
6048
  return worker;
@@ -6511,7 +6056,7 @@ var UnifiedInferenceWorker = class {
6511
6056
  this.pendingRequests.delete(requestId);
6512
6057
  pending.reject(new Error(data.error));
6513
6058
  } else {
6514
- logger6.error("Worker broadcast error", { error: data.error });
6059
+ logger7.error("Worker broadcast error", { error: data.error });
6515
6060
  this.rejectAllPending(data.error);
6516
6061
  }
6517
6062
  return;
@@ -6533,7 +6078,7 @@ var UnifiedInferenceWorker = class {
6533
6078
  const timeout = setTimeout(() => {
6534
6079
  this.pendingRequests.delete(requestId);
6535
6080
  this.poisoned = true;
6536
- logger6.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6081
+ logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6537
6082
  type: message.type,
6538
6083
  timeoutMs
6539
6084
  });
@@ -6599,7 +6144,7 @@ var SenseVoiceUnifiedAdapter = class {
6599
6144
  });
6600
6145
  this._isLoaded = true;
6601
6146
  onProgress?.(1, 1);
6602
- logger6.info("SenseVoice loaded via unified worker", {
6147
+ logger7.info("SenseVoice loaded via unified worker", {
6603
6148
  backend: "wasm",
6604
6149
  loadTimeMs: Math.round(result.loadTimeMs),
6605
6150
  vocabSize: result.vocabSize
@@ -6640,6 +6185,7 @@ var SenseVoiceUnifiedAdapter = class {
6640
6185
  var Wav2ArkitCpuUnifiedAdapter = class {
6641
6186
  constructor(worker, config) {
6642
6187
  this.modelId = "wav2arkit_cpu";
6188
+ this.chunkSize = 16e3;
6643
6189
  this._isLoaded = false;
6644
6190
  this.inferenceQueue = Promise.resolve();
6645
6191
  this.worker = worker;
@@ -6658,12 +6204,12 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6658
6204
  });
6659
6205
  try {
6660
6206
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
6661
- const result = await this.worker.loadLipSync({
6207
+ const result = await this.worker.loadA2E({
6662
6208
  modelUrl: this.config.modelUrl,
6663
6209
  externalDataUrl: externalDataUrl || null
6664
6210
  });
6665
6211
  this._isLoaded = true;
6666
- logger6.info("Wav2ArkitCpu loaded via unified worker", {
6212
+ logger7.info("Wav2ArkitCpu loaded via unified worker", {
6667
6213
  backend: "wasm",
6668
6214
  loadTimeMs: Math.round(result.loadTimeMs)
6669
6215
  });
@@ -6690,7 +6236,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6690
6236
  });
6691
6237
  try {
6692
6238
  const startTime = performance.now();
6693
- const result = await this.worker.inferLipSync(audioCopy);
6239
+ const result = await this.worker.inferA2E(audioCopy);
6694
6240
  const inferenceTimeMs = performance.now() - startTime;
6695
6241
  const flatBuffer = result.blendshapes;
6696
6242
  const { numFrames, numBlendshapes } = result;
@@ -6713,7 +6259,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6713
6259
  }
6714
6260
  async dispose() {
6715
6261
  if (this._isLoaded) {
6716
- await this.worker.disposeLipSync();
6262
+ await this.worker.disposeA2E();
6717
6263
  this._isLoaded = false;
6718
6264
  }
6719
6265
  }
@@ -6769,7 +6315,7 @@ var SileroVADUnifiedAdapter = class {
6769
6315
  sampleRate: this.config.sampleRate
6770
6316
  });
6771
6317
  this._isLoaded = true;
6772
- logger6.info("SileroVAD loaded via unified worker", {
6318
+ logger7.info("SileroVAD loaded via unified worker", {
6773
6319
  backend: "wasm",
6774
6320
  loadTimeMs: Math.round(result.loadTimeMs),
6775
6321
  sampleRate: this.config.sampleRate,
@@ -6850,10 +6396,10 @@ var SileroVADUnifiedAdapter = class {
6850
6396
  };
6851
6397
 
6852
6398
  // src/inference/createSenseVoice.ts
6853
- var logger7 = createLogger("createSenseVoice");
6399
+ var logger8 = createLogger("createSenseVoice");
6854
6400
  function createSenseVoice(config) {
6855
6401
  if (config.unifiedWorker) {
6856
- logger7.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6402
+ logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6857
6403
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6858
6404
  modelUrl: config.modelUrl,
6859
6405
  tokensUrl: config.tokensUrl,
@@ -6866,7 +6412,7 @@ function createSenseVoice(config) {
6866
6412
  if (!SenseVoiceWorker.isSupported()) {
6867
6413
  throw new Error("Web Workers are not supported in this environment");
6868
6414
  }
6869
- logger7.info("Creating SenseVoiceWorker (off-main-thread)");
6415
+ logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6870
6416
  return new SenseVoiceWorker({
6871
6417
  modelUrl: config.modelUrl,
6872
6418
  tokensUrl: config.tokensUrl,
@@ -6875,7 +6421,7 @@ function createSenseVoice(config) {
6875
6421
  });
6876
6422
  }
6877
6423
  if (useWorker === false) {
6878
- logger7.info("Creating SenseVoiceInference (main thread)");
6424
+ logger8.info("Creating SenseVoiceInference (main thread)");
6879
6425
  return new SenseVoiceInference({
6880
6426
  modelUrl: config.modelUrl,
6881
6427
  tokensUrl: config.tokensUrl,
@@ -6884,7 +6430,7 @@ function createSenseVoice(config) {
6884
6430
  });
6885
6431
  }
6886
6432
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6887
- logger7.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6433
+ logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6888
6434
  return new SenseVoiceWorker({
6889
6435
  modelUrl: config.modelUrl,
6890
6436
  tokensUrl: config.tokensUrl,
@@ -6892,7 +6438,7 @@ function createSenseVoice(config) {
6892
6438
  textNorm: config.textNorm
6893
6439
  });
6894
6440
  }
6895
- logger7.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6441
+ logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6896
6442
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6897
6443
  });
6898
6444
  return new SenseVoiceInference({
@@ -6904,10 +6450,11 @@ function createSenseVoice(config) {
6904
6450
  }
6905
6451
 
6906
6452
  // src/inference/Wav2ArkitCpuInference.ts
6907
- var logger8 = createLogger("Wav2ArkitCpu");
6453
+ var logger9 = createLogger("Wav2ArkitCpu");
6908
6454
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6909
6455
  constructor(config) {
6910
6456
  this.modelId = "wav2arkit_cpu";
6457
+ this.chunkSize = 16e3;
6911
6458
  this.session = null;
6912
6459
  this.ort = null;
6913
6460
  this._backend = "wasm";
@@ -6945,16 +6492,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6945
6492
  });
6946
6493
  try {
6947
6494
  const preference = this.config.backend || "wasm";
6948
- logger8.info("Loading ONNX Runtime...", { preference });
6495
+ logger9.info("Loading ONNX Runtime...", { preference });
6949
6496
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6950
6497
  this.ort = ort;
6951
6498
  this._backend = backend;
6952
- logger8.info("ONNX Runtime loaded", { backend: this._backend });
6499
+ logger9.info("ONNX Runtime loaded", { backend: this._backend });
6953
6500
  const modelUrl = this.config.modelUrl;
6954
6501
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6955
6502
  const sessionOptions = getSessionOptions(this._backend);
6956
6503
  if (isIOS()) {
6957
- logger8.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6504
+ logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6958
6505
  modelUrl,
6959
6506
  dataUrl
6960
6507
  });
@@ -6972,15 +6519,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6972
6519
  const isCached = await cache.has(modelUrl);
6973
6520
  let modelBuffer;
6974
6521
  if (isCached) {
6975
- logger8.debug("Loading model from cache", { modelUrl });
6522
+ logger9.debug("Loading model from cache", { modelUrl });
6976
6523
  modelBuffer = await cache.get(modelUrl);
6977
6524
  if (!modelBuffer) {
6978
- logger8.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6525
+ logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6979
6526
  await cache.delete(modelUrl);
6980
6527
  modelBuffer = await fetchWithCache(modelUrl);
6981
6528
  }
6982
6529
  } else {
6983
- logger8.debug("Fetching and caching model graph", { modelUrl });
6530
+ logger9.debug("Fetching and caching model graph", { modelUrl });
6984
6531
  modelBuffer = await fetchWithCache(modelUrl);
6985
6532
  }
6986
6533
  if (!modelBuffer) {
@@ -6991,31 +6538,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6991
6538
  try {
6992
6539
  const isDataCached = await cache.has(dataUrl);
6993
6540
  if (isDataCached) {
6994
- logger8.debug("Loading external data from cache", { dataUrl });
6541
+ logger9.debug("Loading external data from cache", { dataUrl });
6995
6542
  externalDataBuffer = await cache.get(dataUrl);
6996
6543
  if (!externalDataBuffer) {
6997
- logger8.warn("Cache corruption for external data, retrying", { dataUrl });
6544
+ logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6998
6545
  await cache.delete(dataUrl);
6999
6546
  externalDataBuffer = await fetchWithCache(dataUrl);
7000
6547
  }
7001
6548
  } else {
7002
- logger8.info("Fetching external model data", {
6549
+ logger9.info("Fetching external model data", {
7003
6550
  dataUrl,
7004
6551
  note: "This may be a large download (400MB+)"
7005
6552
  });
7006
6553
  externalDataBuffer = await fetchWithCache(dataUrl);
7007
6554
  }
7008
- logger8.info("External data loaded", {
6555
+ logger9.info("External data loaded", {
7009
6556
  size: formatBytes(externalDataBuffer.byteLength)
7010
6557
  });
7011
6558
  } catch (err) {
7012
- logger8.debug("No external data file found (single-file model)", {
6559
+ logger9.debug("No external data file found (single-file model)", {
7013
6560
  dataUrl,
7014
6561
  error: err.message
7015
6562
  });
7016
6563
  }
7017
6564
  }
7018
- logger8.debug("Creating ONNX session", {
6565
+ logger9.debug("Creating ONNX session", {
7019
6566
  graphSize: formatBytes(modelBuffer.byteLength),
7020
6567
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
7021
6568
  backend: this._backend
@@ -7031,7 +6578,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7031
6578
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
7032
6579
  }
7033
6580
  const loadTimeMs = performance.now() - startTime;
7034
- logger8.info("Model loaded successfully", {
6581
+ logger9.info("Model loaded successfully", {
7035
6582
  backend: this._backend,
7036
6583
  loadTimeMs: Math.round(loadTimeMs),
7037
6584
  inputs: this.session.inputNames,
@@ -7047,12 +6594,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7047
6594
  model: "wav2arkit_cpu",
7048
6595
  backend: this._backend
7049
6596
  });
7050
- logger8.debug("Running warmup inference");
6597
+ logger9.debug("Running warmup inference");
7051
6598
  const warmupStart = performance.now();
7052
6599
  const silentAudio = new Float32Array(16e3);
7053
6600
  await this.infer(silentAudio);
7054
6601
  const warmupTimeMs = performance.now() - warmupStart;
7055
- logger8.info("Warmup inference complete", {
6602
+ logger9.info("Warmup inference complete", {
7056
6603
  warmupTimeMs: Math.round(warmupTimeMs),
7057
6604
  backend: this._backend
7058
6605
  });
@@ -7139,7 +6686,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7139
6686
  const symmetrized = symmetrizeBlendshapes(rawFrame);
7140
6687
  blendshapes.push(symmetrized);
7141
6688
  }
7142
- logger8.trace("Inference completed", {
6689
+ logger9.trace("Inference completed", {
7143
6690
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7144
6691
  numFrames,
7145
6692
  inputSamples
@@ -7167,7 +6714,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7167
6714
  const errMsg = err instanceof Error ? err.message : String(err);
7168
6715
  if (errMsg.includes("timed out")) {
7169
6716
  this.poisoned = true;
7170
- logger8.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6717
+ logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
7171
6718
  backend: this._backend,
7172
6719
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
7173
6720
  });
@@ -7175,7 +6722,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7175
6722
  const oomError = new Error(
7176
6723
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
7177
6724
  );
7178
- logger8.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6725
+ logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7179
6726
  pointer: `0x${err.toString(16)}`,
7180
6727
  backend: this._backend
7181
6728
  });
@@ -7188,7 +6735,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7188
6735
  reject(oomError);
7189
6736
  return;
7190
6737
  } else {
7191
- logger8.error("Inference failed", { error: errMsg, backend: this._backend });
6738
+ logger9.error("Inference failed", { error: errMsg, backend: this._backend });
7192
6739
  }
7193
6740
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7194
6741
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7215,7 +6762,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
7215
6762
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
7216
6763
 
7217
6764
  // src/inference/Wav2ArkitCpuWorker.ts
7218
- var logger9 = createLogger("Wav2ArkitCpuWorker");
6765
+ var logger10 = createLogger("Wav2ArkitCpuWorker");
7219
6766
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
7220
6767
  var LOAD_TIMEOUT_MS2 = 6e4;
7221
6768
  var INFERENCE_TIMEOUT_MS2 = 5e3;
@@ -7461,6 +7008,7 @@ self.onerror = function(err) {
7461
7008
  var Wav2ArkitCpuWorker = class {
7462
7009
  constructor(config) {
7463
7010
  this.modelId = "wav2arkit_cpu";
7011
+ this.chunkSize = 16e3;
7464
7012
  this.worker = null;
7465
7013
  this.isLoading = false;
7466
7014
  this._isLoaded = false;
@@ -7495,7 +7043,7 @@ var Wav2ArkitCpuWorker = class {
7495
7043
  this.handleWorkerMessage(event.data);
7496
7044
  };
7497
7045
  worker.onerror = (error) => {
7498
- logger9.error("Worker error", { error: error.message });
7046
+ logger10.error("Worker error", { error: error.message });
7499
7047
  for (const [, resolver] of this.pendingResolvers) {
7500
7048
  resolver.reject(new Error(`Worker error: ${error.message}`));
7501
7049
  }
@@ -7571,10 +7119,10 @@ var Wav2ArkitCpuWorker = class {
7571
7119
  "model.backend_requested": "wasm"
7572
7120
  });
7573
7121
  try {
7574
- logger9.info("Creating wav2arkit_cpu worker...");
7122
+ logger10.info("Creating wav2arkit_cpu worker...");
7575
7123
  this.worker = this.createWorker();
7576
7124
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
7577
- logger9.info("Loading model in worker...", {
7125
+ logger10.info("Loading model in worker...", {
7578
7126
  modelUrl: this.config.modelUrl,
7579
7127
  externalDataUrl,
7580
7128
  isIOS: isIOS()
@@ -7592,7 +7140,7 @@ var Wav2ArkitCpuWorker = class {
7592
7140
  );
7593
7141
  this._isLoaded = true;
7594
7142
  const loadTimeMs = performance.now() - startTime;
7595
- logger9.info("Wav2ArkitCpu worker loaded successfully", {
7143
+ logger10.info("Wav2ArkitCpu worker loaded successfully", {
7596
7144
  backend: "wasm",
7597
7145
  loadTimeMs: Math.round(loadTimeMs),
7598
7146
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -7677,7 +7225,7 @@ var Wav2ArkitCpuWorker = class {
7677
7225
  for (let f = 0; f < numFrames; f++) {
7678
7226
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
7679
7227
  }
7680
- logger9.trace("Worker inference completed", {
7228
+ logger10.trace("Worker inference completed", {
7681
7229
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7682
7230
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
7683
7231
  numFrames,
@@ -7707,12 +7255,12 @@ var Wav2ArkitCpuWorker = class {
7707
7255
  const errMsg = err instanceof Error ? err.message : String(err);
7708
7256
  if (errMsg.includes("timed out")) {
7709
7257
  this.poisoned = true;
7710
- logger9.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7258
+ logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7711
7259
  backend: "wasm",
7712
7260
  timeoutMs: INFERENCE_TIMEOUT_MS2
7713
7261
  });
7714
7262
  } else {
7715
- logger9.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7263
+ logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7716
7264
  }
7717
7265
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7718
7266
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7749,39 +7297,39 @@ var Wav2ArkitCpuWorker = class {
7749
7297
  }
7750
7298
  };
7751
7299
 
7752
- // src/inference/createLipSync.ts
7753
- var logger10 = createLogger("createLipSync");
7754
- function createLipSync(config) {
7300
+ // src/inference/createA2E.ts
7301
+ var logger11 = createLogger("createA2E");
7302
+ function createA2E(config) {
7755
7303
  const mode = config.mode ?? "auto";
7756
7304
  const fallbackOnError = config.fallbackOnError ?? true;
7757
7305
  let useCpu;
7758
7306
  if (mode === "cpu") {
7759
7307
  useCpu = true;
7760
- logger10.info("Forcing CPU lip sync model (wav2arkit_cpu)");
7308
+ logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7761
7309
  } else if (mode === "gpu") {
7762
7310
  useCpu = false;
7763
- logger10.info("Forcing GPU lip sync model (Wav2Vec2)");
7311
+ logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7764
7312
  } else {
7765
- useCpu = shouldUseCpuLipSync();
7766
- logger10.info("Auto-detected lip sync model", {
7313
+ useCpu = shouldUseCpuA2E();
7314
+ logger11.info("Auto-detected A2E model", {
7767
7315
  useCpu,
7768
7316
  isSafari: isSafari()
7769
7317
  });
7770
7318
  }
7771
7319
  if (useCpu) {
7772
7320
  if (config.unifiedWorker) {
7773
- logger10.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7321
+ logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7774
7322
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
7775
7323
  modelUrl: config.cpuModelUrl
7776
7324
  });
7777
7325
  }
7778
7326
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7779
- logger10.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7327
+ logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7780
7328
  return new Wav2ArkitCpuWorker({
7781
7329
  modelUrl: config.cpuModelUrl
7782
7330
  });
7783
7331
  }
7784
- logger10.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7332
+ logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7785
7333
  return new Wav2ArkitCpuInference({
7786
7334
  modelUrl: config.cpuModelUrl
7787
7335
  });
@@ -7793,13 +7341,13 @@ function createLipSync(config) {
7793
7341
  numIdentityClasses: config.numIdentityClasses
7794
7342
  });
7795
7343
  if (fallbackOnError) {
7796
- logger10.info("Creating Wav2Vec2Inference with CPU fallback");
7797
- return new LipSyncWithFallback(gpuInstance, config);
7344
+ logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7345
+ return new A2EWithFallback(gpuInstance, config);
7798
7346
  }
7799
- logger10.info("Creating Wav2Vec2Inference (no fallback)");
7347
+ logger11.info("Creating Wav2Vec2Inference (no fallback)");
7800
7348
  return gpuInstance;
7801
7349
  }
7802
- var LipSyncWithFallback = class {
7350
+ var A2EWithFallback = class {
7803
7351
  constructor(gpuInstance, config) {
7804
7352
  this.hasFallenBack = false;
7805
7353
  this.implementation = gpuInstance;
@@ -7808,6 +7356,9 @@ var LipSyncWithFallback = class {
7808
7356
  get modelId() {
7809
7357
  return this.implementation.modelId;
7810
7358
  }
7359
+ get chunkSize() {
7360
+ return this.implementation.chunkSize;
7361
+ }
7811
7362
  get backend() {
7812
7363
  return this.implementation.backend;
7813
7364
  }
@@ -7822,7 +7373,7 @@ var LipSyncWithFallback = class {
7822
7373
  }
7823
7374
  }
7824
7375
  async fallbackToCpu(reason) {
7825
- logger10.warn("GPU model load failed, falling back to CPU model", { reason });
7376
+ logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7826
7377
  try {
7827
7378
  await this.implementation.dispose();
7828
7379
  } catch {
@@ -7831,17 +7382,17 @@ var LipSyncWithFallback = class {
7831
7382
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7832
7383
  modelUrl: this.config.cpuModelUrl
7833
7384
  });
7834
- logger10.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7385
+ logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7835
7386
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7836
7387
  this.implementation = new Wav2ArkitCpuWorker({
7837
7388
  modelUrl: this.config.cpuModelUrl
7838
7389
  });
7839
- logger10.info("Fallback to Wav2ArkitCpuWorker successful");
7390
+ logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7840
7391
  } else {
7841
7392
  this.implementation = new Wav2ArkitCpuInference({
7842
7393
  modelUrl: this.config.cpuModelUrl
7843
7394
  });
7844
- logger10.info("Fallback to Wav2ArkitCpuInference successful");
7395
+ logger11.info("Fallback to Wav2ArkitCpuInference successful");
7845
7396
  }
7846
7397
  this.hasFallenBack = true;
7847
7398
  return await this.implementation.load();
@@ -7854,8 +7405,124 @@ var LipSyncWithFallback = class {
7854
7405
  }
7855
7406
  };
7856
7407
 
7408
+ // src/animation/audioEnergy.ts
7409
+ function calculateRMS(samples) {
7410
+ if (samples.length === 0) return 0;
7411
+ let sumSquares = 0;
7412
+ for (let i = 0; i < samples.length; i++) {
7413
+ sumSquares += samples[i] * samples[i];
7414
+ }
7415
+ return Math.sqrt(sumSquares / samples.length);
7416
+ }
7417
+ function calculatePeak(samples) {
7418
+ let peak = 0;
7419
+ for (let i = 0; i < samples.length; i++) {
7420
+ const abs = Math.abs(samples[i]);
7421
+ if (abs > peak) peak = abs;
7422
+ }
7423
+ return peak;
7424
+ }
7425
+ var AudioEnergyAnalyzer = class {
7426
+ /**
7427
+ * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
7428
+ * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
7429
+ */
7430
+ constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
7431
+ this.smoothedRMS = 0;
7432
+ this.smoothedPeak = 0;
7433
+ this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
7434
+ this.noiseFloor = noiseFloor;
7435
+ }
7436
+ /**
7437
+ * Process audio samples and return smoothed energy values
7438
+ * @param samples Audio samples (Float32Array)
7439
+ * @returns Object with rms and peak values
7440
+ */
7441
+ process(samples) {
7442
+ const instantRMS = calculateRMS(samples);
7443
+ const instantPeak = calculatePeak(samples);
7444
+ const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
7445
+ const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
7446
+ if (gatedRMS > this.smoothedRMS) {
7447
+ this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
7448
+ } else {
7449
+ this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
7450
+ }
7451
+ if (gatedPeak > this.smoothedPeak) {
7452
+ this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
7453
+ } else {
7454
+ this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
7455
+ }
7456
+ const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
7457
+ return {
7458
+ rms: this.smoothedRMS,
7459
+ peak: this.smoothedPeak,
7460
+ energy: Math.min(1, energy * 2)
7461
+ // Scale up and clamp
7462
+ };
7463
+ }
7464
+ /**
7465
+ * Reset analyzer state
7466
+ */
7467
+ reset() {
7468
+ this.smoothedRMS = 0;
7469
+ this.smoothedPeak = 0;
7470
+ }
7471
+ /**
7472
+ * Get current smoothed RMS value
7473
+ */
7474
+ get rms() {
7475
+ return this.smoothedRMS;
7476
+ }
7477
+ /**
7478
+ * Get current smoothed peak value
7479
+ */
7480
+ get peak() {
7481
+ return this.smoothedPeak;
7482
+ }
7483
+ };
7484
+ var EmphasisDetector = class {
7485
+ /**
7486
+ * @param historySize Number of frames to track. Default 10
7487
+ * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
7488
+ */
7489
+ constructor(historySize = 10, emphasisThreshold = 0.15) {
7490
+ this.energyHistory = [];
7491
+ this.historySize = historySize;
7492
+ this.emphasisThreshold = emphasisThreshold;
7493
+ }
7494
+ /**
7495
+ * Process energy value and detect emphasis
7496
+ * @param energy Current energy value (0-1)
7497
+ * @returns Object with isEmphasis flag and emphasisStrength
7498
+ */
7499
+ process(energy) {
7500
+ this.energyHistory.push(energy);
7501
+ if (this.energyHistory.length > this.historySize) {
7502
+ this.energyHistory.shift();
7503
+ }
7504
+ if (this.energyHistory.length < 3) {
7505
+ return { isEmphasis: false, emphasisStrength: 0 };
7506
+ }
7507
+ const prevFrames = this.energyHistory.slice(0, -1);
7508
+ const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
7509
+ const increase = energy - avgPrev;
7510
+ const isEmphasis = increase > this.emphasisThreshold;
7511
+ return {
7512
+ isEmphasis,
7513
+ emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
7514
+ };
7515
+ }
7516
+ /**
7517
+ * Reset detector state
7518
+ */
7519
+ reset() {
7520
+ this.energyHistory = [];
7521
+ }
7522
+ };
7523
+
7857
7524
  // src/inference/SileroVADInference.ts
7858
- var logger11 = createLogger("SileroVAD");
7525
+ var logger12 = createLogger("SileroVAD");
7859
7526
  var SileroVADInference = class {
7860
7527
  constructor(config) {
7861
7528
  this.session = null;
@@ -7929,23 +7596,23 @@ var SileroVADInference = class {
7929
7596
  "model.sample_rate": this.config.sampleRate
7930
7597
  });
7931
7598
  try {
7932
- logger11.info("Loading ONNX Runtime...", { preference: this.config.backend });
7599
+ logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
7933
7600
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7934
7601
  this.ort = ort;
7935
7602
  this._backend = backend;
7936
- logger11.info("ONNX Runtime loaded", { backend: this._backend });
7603
+ logger12.info("ONNX Runtime loaded", { backend: this._backend });
7937
7604
  const cache = getModelCache();
7938
7605
  const modelUrl = this.config.modelUrl;
7939
7606
  const isCached = await cache.has(modelUrl);
7940
7607
  let modelBuffer;
7941
7608
  if (isCached) {
7942
- logger11.debug("Loading model from cache", { modelUrl });
7609
+ logger12.debug("Loading model from cache", { modelUrl });
7943
7610
  modelBuffer = await cache.get(modelUrl);
7944
7611
  } else {
7945
- logger11.debug("Fetching and caching model", { modelUrl });
7612
+ logger12.debug("Fetching and caching model", { modelUrl });
7946
7613
  modelBuffer = await fetchWithCache(modelUrl);
7947
7614
  }
7948
- logger11.debug("Creating ONNX session", {
7615
+ logger12.debug("Creating ONNX session", {
7949
7616
  size: formatBytes(modelBuffer.byteLength),
7950
7617
  backend: this._backend
7951
7618
  });
@@ -7954,7 +7621,7 @@ var SileroVADInference = class {
7954
7621
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7955
7622
  this.reset();
7956
7623
  const loadTimeMs = performance.now() - startTime;
7957
- logger11.info("Model loaded successfully", {
7624
+ logger12.info("Model loaded successfully", {
7958
7625
  backend: this._backend,
7959
7626
  loadTimeMs: Math.round(loadTimeMs),
7960
7627
  sampleRate: this.config.sampleRate,
@@ -8009,7 +7676,7 @@ var SileroVADInference = class {
8009
7676
  []
8010
7677
  );
8011
7678
  } catch (e) {
8012
- logger11.warn("BigInt64Array not available, using bigint array fallback", {
7679
+ logger12.warn("BigInt64Array not available, using bigint array fallback", {
8013
7680
  error: e instanceof Error ? e.message : String(e)
8014
7681
  });
8015
7682
  this.srTensor = new this.ort.Tensor(
@@ -8115,7 +7782,7 @@ var SileroVADInference = class {
8115
7782
  this.preSpeechBuffer.shift();
8116
7783
  }
8117
7784
  }
8118
- logger11.trace("Skipping VAD inference - audio too quiet", {
7785
+ logger12.trace("Skipping VAD inference - audio too quiet", {
8119
7786
  rms: Math.round(rms * 1e4) / 1e4,
8120
7787
  threshold: MIN_ENERGY_THRESHOLD
8121
7788
  });
@@ -8169,7 +7836,7 @@ var SileroVADInference = class {
8169
7836
  if (isSpeech && !this.wasSpeaking) {
8170
7837
  preSpeechChunks = [...this.preSpeechBuffer];
8171
7838
  this.preSpeechBuffer = [];
8172
- logger11.debug("Speech started with pre-speech buffer", {
7839
+ logger12.debug("Speech started with pre-speech buffer", {
8173
7840
  preSpeechChunks: preSpeechChunks.length,
8174
7841
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8175
7842
  });
@@ -8182,7 +7849,7 @@ var SileroVADInference = class {
8182
7849
  this.preSpeechBuffer = [];
8183
7850
  }
8184
7851
  this.wasSpeaking = isSpeech;
8185
- logger11.trace("VAD inference completed", {
7852
+ logger12.trace("VAD inference completed", {
8186
7853
  probability: Math.round(probability * 1e3) / 1e3,
8187
7854
  isSpeech,
8188
7855
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -8213,7 +7880,7 @@ var SileroVADInference = class {
8213
7880
  const oomError = new Error(
8214
7881
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
8215
7882
  );
8216
- logger11.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7883
+ logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8217
7884
  pointer: `0x${err.toString(16)}`,
8218
7885
  backend: this._backend
8219
7886
  });
@@ -8256,7 +7923,7 @@ var SileroVADInference = class {
8256
7923
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
8257
7924
 
8258
7925
  // src/inference/SileroVADWorker.ts
8259
- var logger12 = createLogger("SileroVADWorker");
7926
+ var logger13 = createLogger("SileroVADWorker");
8260
7927
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
8261
7928
  var LOAD_TIMEOUT_MS3 = 1e4;
8262
7929
  var INFERENCE_TIMEOUT_MS3 = 1e3;
@@ -8534,7 +8201,7 @@ var SileroVADWorker = class {
8534
8201
  this.handleWorkerMessage(event.data);
8535
8202
  };
8536
8203
  worker.onerror = (error) => {
8537
- logger12.error("Worker error", { error: error.message });
8204
+ logger13.error("Worker error", { error: error.message });
8538
8205
  for (const [, resolver] of this.pendingResolvers) {
8539
8206
  resolver.reject(new Error(`Worker error: ${error.message}`));
8540
8207
  }
@@ -8610,9 +8277,9 @@ var SileroVADWorker = class {
8610
8277
  "model.sample_rate": this.config.sampleRate
8611
8278
  });
8612
8279
  try {
8613
- logger12.info("Creating VAD worker...");
8280
+ logger13.info("Creating VAD worker...");
8614
8281
  this.worker = this.createWorker();
8615
- logger12.info("Loading model in worker...", {
8282
+ logger13.info("Loading model in worker...", {
8616
8283
  modelUrl: this.config.modelUrl,
8617
8284
  sampleRate: this.config.sampleRate
8618
8285
  });
@@ -8628,7 +8295,7 @@ var SileroVADWorker = class {
8628
8295
  );
8629
8296
  this._isLoaded = true;
8630
8297
  const loadTimeMs = performance.now() - startTime;
8631
- logger12.info("VAD worker loaded successfully", {
8298
+ logger13.info("VAD worker loaded successfully", {
8632
8299
  backend: "wasm",
8633
8300
  loadTimeMs: Math.round(loadTimeMs),
8634
8301
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8735,7 +8402,7 @@ var SileroVADWorker = class {
8735
8402
  if (isSpeech && !this.wasSpeaking) {
8736
8403
  preSpeechChunks = [...this.preSpeechBuffer];
8737
8404
  this.preSpeechBuffer = [];
8738
- logger12.debug("Speech started with pre-speech buffer", {
8405
+ logger13.debug("Speech started with pre-speech buffer", {
8739
8406
  preSpeechChunks: preSpeechChunks.length,
8740
8407
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8741
8408
  });
@@ -8748,7 +8415,7 @@ var SileroVADWorker = class {
8748
8415
  this.preSpeechBuffer = [];
8749
8416
  }
8750
8417
  this.wasSpeaking = isSpeech;
8751
- logger12.trace("VAD worker inference completed", {
8418
+ logger13.trace("VAD worker inference completed", {
8752
8419
  probability: Math.round(result.probability * 1e3) / 1e3,
8753
8420
  isSpeech,
8754
8421
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8816,44 +8483,44 @@ var SileroVADWorker = class {
8816
8483
  };
8817
8484
 
8818
8485
  // src/inference/createSileroVAD.ts
8819
- var logger13 = createLogger("createSileroVAD");
8486
+ var logger14 = createLogger("createSileroVAD");
8820
8487
  function supportsVADWorker() {
8821
8488
  if (typeof Worker === "undefined") {
8822
- logger13.debug("Worker not supported: Worker constructor undefined");
8489
+ logger14.debug("Worker not supported: Worker constructor undefined");
8823
8490
  return false;
8824
8491
  }
8825
8492
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8826
- logger13.debug("Worker not supported: URL.createObjectURL unavailable");
8493
+ logger14.debug("Worker not supported: URL.createObjectURL unavailable");
8827
8494
  return false;
8828
8495
  }
8829
8496
  if (typeof Blob === "undefined") {
8830
- logger13.debug("Worker not supported: Blob constructor unavailable");
8497
+ logger14.debug("Worker not supported: Blob constructor unavailable");
8831
8498
  return false;
8832
8499
  }
8833
8500
  return true;
8834
8501
  }
8835
8502
  function createSileroVAD(config) {
8836
8503
  if (config.unifiedWorker) {
8837
- logger13.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8504
+ logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8838
8505
  return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8839
8506
  }
8840
8507
  const fallbackOnError = config.fallbackOnError ?? true;
8841
8508
  let useWorker;
8842
8509
  if (config.useWorker !== void 0) {
8843
8510
  useWorker = config.useWorker;
8844
- logger13.debug("Worker preference explicitly set", { useWorker });
8511
+ logger14.debug("Worker preference explicitly set", { useWorker });
8845
8512
  } else {
8846
8513
  const workerSupported = supportsVADWorker();
8847
8514
  const onMobile = isMobile();
8848
8515
  useWorker = workerSupported && !onMobile;
8849
- logger13.debug("Auto-detected Worker preference", {
8516
+ logger14.debug("Auto-detected Worker preference", {
8850
8517
  useWorker,
8851
8518
  workerSupported,
8852
8519
  onMobile
8853
8520
  });
8854
8521
  }
8855
8522
  if (useWorker) {
8856
- logger13.info("Creating SileroVADWorker (off-main-thread)");
8523
+ logger14.info("Creating SileroVADWorker (off-main-thread)");
8857
8524
  const worker = new SileroVADWorker({
8858
8525
  modelUrl: config.modelUrl,
8859
8526
  sampleRate: config.sampleRate,
@@ -8865,7 +8532,7 @@ function createSileroVAD(config) {
8865
8532
  }
8866
8533
  return worker;
8867
8534
  }
8868
- logger13.info("Creating SileroVADInference (main thread)");
8535
+ logger14.info("Creating SileroVADInference (main thread)");
8869
8536
  return new SileroVADInference(config);
8870
8537
  }
8871
8538
  var VADWorkerWithFallback = class {
@@ -8891,7 +8558,7 @@ var VADWorkerWithFallback = class {
8891
8558
  try {
8892
8559
  return await this.implementation.load();
8893
8560
  } catch (error) {
8894
- logger13.warn("Worker load failed, falling back to main thread", {
8561
+ logger14.warn("Worker load failed, falling back to main thread", {
8895
8562
  error: error instanceof Error ? error.message : String(error)
8896
8563
  });
8897
8564
  try {
@@ -8900,7 +8567,7 @@ var VADWorkerWithFallback = class {
8900
8567
  }
8901
8568
  this.implementation = new SileroVADInference(this.config);
8902
8569
  this.hasFallenBack = true;
8903
- logger13.info("Fallback to SileroVADInference successful");
8570
+ logger14.info("Fallback to SileroVADInference successful");
8904
8571
  return await this.implementation.load();
8905
8572
  }
8906
8573
  }
@@ -8921,8 +8588,175 @@ var VADWorkerWithFallback = class {
8921
8588
  }
8922
8589
  };
8923
8590
 
8591
+ // src/inference/A2EOrchestrator.ts
8592
+ var logger15 = createLogger("A2EOrchestrator");
8593
+ var A2EOrchestrator = class {
8594
+ constructor(config) {
8595
+ this.a2e = null;
8596
+ this.processor = null;
8597
+ // Mic capture state (lightweight — no dependency on MicrophoneCapture class
8598
+ // which requires an external EventEmitter. We do raw Web Audio here.)
8599
+ this.stream = null;
8600
+ this.audioContext = null;
8601
+ this.scriptProcessor = null;
8602
+ this.nativeSampleRate = 0;
8603
+ this._isReady = false;
8604
+ this._isStreaming = false;
8605
+ this._backend = null;
8606
+ this.disposed = false;
8607
+ this.config = {
8608
+ sampleRate: 16e3,
8609
+ ...config
8610
+ };
8611
+ }
8612
+ /** Latest blendshape weights from inference (null if none yet) */
8613
+ get latestWeights() {
8614
+ return this.processor?.latestFrame ?? null;
8615
+ }
8616
+ /** Whether the model is loaded and ready for inference */
8617
+ get isReady() {
8618
+ return this._isReady;
8619
+ }
8620
+ /** Whether mic is active and inference loop is running */
8621
+ get isStreaming() {
8622
+ return this._isStreaming;
8623
+ }
8624
+ /** Current backend type (webgpu, wasm, or null) */
8625
+ get backend() {
8626
+ return this._backend;
8627
+ }
8628
+ /**
8629
+ * Load the A2E model and create the processor
8630
+ */
8631
+ async load() {
8632
+ if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8633
+ logger15.info("Loading A2E model...");
8634
+ this.a2e = createA2E({
8635
+ gpuModelUrl: this.config.gpuModelUrl,
8636
+ gpuExternalDataUrl: this.config.gpuExternalDataUrl,
8637
+ cpuModelUrl: this.config.cpuModelUrl ?? this.config.gpuModelUrl,
8638
+ ...this.config.a2eConfig
8639
+ });
8640
+ const info = await this.a2e.load();
8641
+ this._backend = info.backend;
8642
+ this.processor = new A2EProcessor({
8643
+ backend: this.a2e,
8644
+ sampleRate: this.config.sampleRate,
8645
+ chunkSize: this.config.chunkSize,
8646
+ onFrame: this.config.onFrame,
8647
+ onError: this.config.onError
8648
+ });
8649
+ this._isReady = true;
8650
+ logger15.info("A2E model loaded", {
8651
+ backend: info.backend,
8652
+ loadTimeMs: info.loadTimeMs,
8653
+ modelId: this.a2e.modelId
8654
+ });
8655
+ this.config.onReady?.();
8656
+ }
8657
+ /**
8658
+ * Start mic capture and inference loop
8659
+ */
8660
+ async start() {
8661
+ if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8662
+ if (!this._isReady || !this.processor) throw new Error("Model not loaded. Call load() first.");
8663
+ if (this._isStreaming) return;
8664
+ try {
8665
+ this.stream = await navigator.mediaDevices.getUserMedia({
8666
+ audio: {
8667
+ sampleRate: { ideal: this.config.sampleRate },
8668
+ channelCount: 1,
8669
+ echoCancellation: true,
8670
+ noiseSuppression: true,
8671
+ autoGainControl: true
8672
+ }
8673
+ });
8674
+ this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
8675
+ if (this.audioContext.state === "suspended") {
8676
+ await this.audioContext.resume();
8677
+ }
8678
+ this.nativeSampleRate = this.audioContext.sampleRate;
8679
+ const source = this.audioContext.createMediaStreamSource(this.stream);
8680
+ this.scriptProcessor = this.audioContext.createScriptProcessor(4096, 1, 1);
8681
+ this.scriptProcessor.onaudioprocess = (e) => {
8682
+ if (!this._isStreaming || !this.processor) return;
8683
+ const input = e.inputBuffer.getChannelData(0);
8684
+ let samples;
8685
+ if (this.nativeSampleRate !== this.config.sampleRate) {
8686
+ const ratio = this.config.sampleRate / this.nativeSampleRate;
8687
+ const newLen = Math.round(input.length * ratio);
8688
+ samples = new Float32Array(newLen);
8689
+ for (let i = 0; i < newLen; i++) {
8690
+ const srcIdx = i / ratio;
8691
+ const lo = Math.floor(srcIdx);
8692
+ const hi = Math.min(lo + 1, input.length - 1);
8693
+ const frac = srcIdx - lo;
8694
+ samples[i] = input[lo] * (1 - frac) + input[hi] * frac;
8695
+ }
8696
+ } else {
8697
+ samples = new Float32Array(input);
8698
+ }
8699
+ this.processor.pushAudio(samples);
8700
+ };
8701
+ source.connect(this.scriptProcessor);
8702
+ this.scriptProcessor.connect(this.audioContext.destination);
8703
+ this._isStreaming = true;
8704
+ this.processor.startDrip();
8705
+ logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8706
+ } catch (err) {
8707
+ const error = err instanceof Error ? err : new Error(String(err));
8708
+ logger15.error("Failed to start mic capture", { error: error.message });
8709
+ this.config.onError?.(error);
8710
+ throw error;
8711
+ }
8712
+ }
8713
+ /**
8714
+ * Stop mic capture and inference loop
8715
+ */
8716
+ stop() {
8717
+ this._isStreaming = false;
8718
+ if (this.processor) {
8719
+ this.processor.stopDrip();
8720
+ this.processor.reset();
8721
+ }
8722
+ if (this.scriptProcessor) {
8723
+ this.scriptProcessor.disconnect();
8724
+ this.scriptProcessor.onaudioprocess = null;
8725
+ this.scriptProcessor = null;
8726
+ }
8727
+ if (this.stream) {
8728
+ this.stream.getTracks().forEach((t) => t.stop());
8729
+ this.stream = null;
8730
+ }
8731
+ if (this.audioContext) {
8732
+ this.audioContext.close().catch(() => {
8733
+ });
8734
+ this.audioContext = null;
8735
+ }
8736
+ logger15.info("Mic capture stopped");
8737
+ }
8738
+ /**
8739
+ * Dispose of all resources
8740
+ */
8741
+ async dispose() {
8742
+ if (this.disposed) return;
8743
+ this.disposed = true;
8744
+ this.stop();
8745
+ if (this.processor) {
8746
+ this.processor.dispose();
8747
+ this.processor = null;
8748
+ }
8749
+ if (this.a2e) {
8750
+ await this.a2e.dispose();
8751
+ this.a2e = null;
8752
+ }
8753
+ this._isReady = false;
8754
+ this._backend = null;
8755
+ }
8756
+ };
8757
+
8924
8758
  // src/inference/SafariSpeechRecognition.ts
8925
- var logger14 = createLogger("SafariSpeech");
8759
+ var logger16 = createLogger("SafariSpeech");
8926
8760
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8927
8761
  constructor(config = {}) {
8928
8762
  this.recognition = null;
@@ -8941,7 +8775,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8941
8775
  interimResults: config.interimResults ?? true,
8942
8776
  maxAlternatives: config.maxAlternatives ?? 1
8943
8777
  };
8944
- logger14.debug("SafariSpeechRecognition created", {
8778
+ logger16.debug("SafariSpeechRecognition created", {
8945
8779
  language: this.config.language,
8946
8780
  continuous: this.config.continuous
8947
8781
  });
@@ -9002,7 +8836,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9002
8836
  */
9003
8837
  async start() {
9004
8838
  if (this.isListening) {
9005
- logger14.warn("Already listening");
8839
+ logger16.warn("Already listening");
9006
8840
  return;
9007
8841
  }
9008
8842
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -9032,7 +8866,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9032
8866
  this.isListening = true;
9033
8867
  this.startTime = performance.now();
9034
8868
  this.accumulatedText = "";
9035
- logger14.info("Speech recognition started", {
8869
+ logger16.info("Speech recognition started", {
9036
8870
  language: this.config.language
9037
8871
  });
9038
8872
  span?.end();
@@ -9047,7 +8881,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9047
8881
  */
9048
8882
  async stop() {
9049
8883
  if (!this.isListening || !this.recognition) {
9050
- logger14.warn("Not currently listening");
8884
+ logger16.warn("Not currently listening");
9051
8885
  return {
9052
8886
  text: this.accumulatedText,
9053
8887
  language: this.config.language,
@@ -9076,7 +8910,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9076
8910
  if (this.recognition && this.isListening) {
9077
8911
  this.recognition.abort();
9078
8912
  this.isListening = false;
9079
- logger14.info("Speech recognition aborted");
8913
+ logger16.info("Speech recognition aborted");
9080
8914
  }
9081
8915
  }
9082
8916
  /**
@@ -9107,7 +8941,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9107
8941
  this.isListening = false;
9108
8942
  this.resultCallbacks = [];
9109
8943
  this.errorCallbacks = [];
9110
- logger14.debug("SafariSpeechRecognition disposed");
8944
+ logger16.debug("SafariSpeechRecognition disposed");
9111
8945
  }
9112
8946
  /**
9113
8947
  * Set up event handlers for the recognition instance
@@ -9135,7 +8969,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9135
8969
  confidence: alternative.confidence
9136
8970
  };
9137
8971
  this.emitResult(speechResult);
9138
- logger14.trace("Speech result", {
8972
+ logger16.trace("Speech result", {
9139
8973
  text: text.substring(0, 50),
9140
8974
  isFinal,
9141
8975
  confidence: alternative.confidence
@@ -9145,12 +8979,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9145
8979
  span?.end();
9146
8980
  } catch (error) {
9147
8981
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
9148
- logger14.error("Error processing speech result", { error });
8982
+ logger16.error("Error processing speech result", { error });
9149
8983
  }
9150
8984
  };
9151
8985
  this.recognition.onerror = (event) => {
9152
8986
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
9153
- logger14.error("Speech recognition error", { error: event.error, message: event.message });
8987
+ logger16.error("Speech recognition error", { error: event.error, message: event.message });
9154
8988
  this.emitError(error);
9155
8989
  if (this.stopRejecter) {
9156
8990
  this.stopRejecter(error);
@@ -9160,7 +8994,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9160
8994
  };
9161
8995
  this.recognition.onend = () => {
9162
8996
  this.isListening = false;
9163
- logger14.info("Speech recognition ended", {
8997
+ logger16.info("Speech recognition ended", {
9164
8998
  totalText: this.accumulatedText.length,
9165
8999
  durationMs: performance.now() - this.startTime
9166
9000
  });
@@ -9177,13 +9011,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9177
9011
  }
9178
9012
  };
9179
9013
  this.recognition.onstart = () => {
9180
- logger14.debug("Speech recognition started by browser");
9014
+ logger16.debug("Speech recognition started by browser");
9181
9015
  };
9182
9016
  this.recognition.onspeechstart = () => {
9183
- logger14.debug("Speech detected");
9017
+ logger16.debug("Speech detected");
9184
9018
  };
9185
9019
  this.recognition.onspeechend = () => {
9186
- logger14.debug("Speech ended");
9020
+ logger16.debug("Speech ended");
9187
9021
  };
9188
9022
  }
9189
9023
  /**
@@ -9194,7 +9028,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9194
9028
  try {
9195
9029
  callback(result);
9196
9030
  } catch (error) {
9197
- logger14.error("Error in result callback", { error });
9031
+ logger16.error("Error in result callback", { error });
9198
9032
  }
9199
9033
  }
9200
9034
  }
@@ -9206,7 +9040,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9206
9040
  try {
9207
9041
  callback(error);
9208
9042
  } catch (callbackError) {
9209
- logger14.error("Error in error callback", { error: callbackError });
9043
+ logger16.error("Error in error callback", { error: callbackError });
9210
9044
  }
9211
9045
  }
9212
9046
  }
@@ -9619,13 +9453,14 @@ var AgentCoreAdapter = class extends EventEmitter {
9619
9453
  if (!this.lam) {
9620
9454
  throw new Error("LAM must be initialized before pipeline");
9621
9455
  }
9622
- this.pipeline = new SyncedAudioPipeline({
9456
+ this.pipeline = new FullFacePipeline({
9623
9457
  lam: this.lam,
9624
9458
  sampleRate: 16e3,
9625
9459
  chunkTargetMs: 200
9626
9460
  });
9627
9461
  await this.pipeline.initialize();
9628
- this.pipeline.on("frame_ready", (frame) => {
9462
+ this.pipeline.on("full_frame_ready", (fullFrame) => {
9463
+ const frame = fullFrame.blendshapes;
9629
9464
  this.emit("animation", {
9630
9465
  blendshapes: frame,
9631
9466
  get: (name) => {
@@ -9804,9 +9639,9 @@ var AgentCoreAdapter = class extends EventEmitter {
9804
9639
  });
9805
9640
  }
9806
9641
  }
9807
- // REMOVED: processAudioForAnimation() - now handled by SyncedAudioPipeline
9642
+ // REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
9808
9643
  // The pipeline manages audio scheduling, LAM inference, and frame synchronization
9809
- // Frames are emitted via pipeline.on('frame_ready') event (see initPipeline())
9644
+ // Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
9810
9645
  /**
9811
9646
  * Detect voice activity using Silero VAD
9812
9647
  * Falls back to simple RMS if VAD not available