@omote/core 0.4.7 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,6 +30,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ A2EOrchestrator: () => A2EOrchestrator,
34
+ A2EProcessor: () => A2EProcessor,
33
35
  ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
34
36
  AgentCoreAdapter: () => AgentCoreAdapter,
35
37
  AnimationGraph: () => AnimationGraph,
@@ -37,23 +39,22 @@ __export(index_exports, {
37
39
  AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
38
40
  AudioScheduler: () => AudioScheduler,
39
41
  AudioSyncManager: () => AudioSyncManager,
42
+ BLENDSHAPE_TO_GROUP: () => BLENDSHAPE_TO_GROUP,
43
+ BlendshapeSmoother: () => BlendshapeSmoother,
40
44
  CTC_VOCAB: () => CTC_VOCAB,
41
45
  ConsoleExporter: () => ConsoleExporter,
42
46
  ConversationOrchestrator: () => ConversationOrchestrator,
43
47
  DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
44
48
  DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
45
- EMOTION_ARKIT_MAP: () => EMOTION_ARKIT_MAP,
46
49
  EMOTION_NAMES: () => EMOTION_NAMES,
47
50
  EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
48
51
  EmotionController: () => EmotionController,
49
52
  EmotionPresets: () => EmotionPresets,
50
- EmotionToBlendshapeMapper: () => EmotionToBlendshapeMapper,
51
53
  EmphasisDetector: () => EmphasisDetector,
52
54
  EventEmitter: () => EventEmitter,
53
55
  FullFacePipeline: () => FullFacePipeline,
54
56
  INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
55
57
  InterruptionHandler: () => InterruptionHandler,
56
- LAMPipeline: () => LAMPipeline,
57
58
  LAM_BLENDSHAPES: () => LAM_BLENDSHAPES,
58
59
  LOG_LEVEL_PRIORITY: () => LOG_LEVEL_PRIORITY,
59
60
  MODEL_LOAD_TIME_BUCKETS: () => MODEL_LOAD_TIME_BUCKETS,
@@ -72,74 +73,55 @@ __export(index_exports, {
72
73
  SileroVADInference: () => SileroVADInference,
73
74
  SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
74
75
  SileroVADWorker: () => SileroVADWorker,
75
- SyncedAudioPipeline: () => SyncedAudioPipeline,
76
76
  TenantManager: () => TenantManager,
77
- UPPER_FACE_BLENDSHAPES: () => UPPER_FACE_BLENDSHAPES,
78
77
  UnifiedInferenceWorker: () => UnifiedInferenceWorker,
79
- WAV2ARKIT_BLENDSHAPES: () => WAV2ARKIT_BLENDSHAPES,
80
78
  Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
81
79
  Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
82
80
  Wav2ArkitCpuWorker: () => Wav2ArkitCpuWorker,
83
81
  Wav2Vec2Inference: () => Wav2Vec2Inference,
84
- applyCMVN: () => applyCMVN,
85
- applyLFR: () => applyLFR,
86
82
  blendEmotions: () => blendEmotions,
87
83
  calculatePeak: () => calculatePeak,
88
84
  calculateRMS: () => calculateRMS,
89
- computeKaldiFbank: () => computeKaldiFbank,
90
85
  configureCacheLimit: () => configureCacheLimit,
91
86
  configureLogging: () => configureLogging,
92
87
  configureTelemetry: () => configureTelemetry,
88
+ createA2E: () => createA2E,
93
89
  createEmotionVector: () => createEmotionVector,
94
- createLipSync: () => createLipSync,
95
90
  createLogger: () => createLogger,
96
91
  createSenseVoice: () => createSenseVoice,
97
- createSessionWithFallback: () => createSessionWithFallback,
98
92
  createSileroVAD: () => createSileroVAD,
99
- ctcGreedyDecode: () => ctcGreedyDecode,
100
93
  fetchWithCache: () => fetchWithCache,
101
94
  formatBytes: () => formatBytes,
102
95
  getCacheConfig: () => getCacheConfig,
103
96
  getCacheKey: () => getCacheKey,
104
97
  getEmotionPreset: () => getEmotionPreset,
105
- getLoadedBackend: () => getLoadedBackend,
106
98
  getLoggingConfig: () => getLoggingConfig,
107
99
  getModelCache: () => getModelCache,
108
- getOnnxRuntime: () => getOnnxRuntime,
109
- getOnnxRuntimeForPreference: () => getOnnxRuntimeForPreference,
110
100
  getOptimalWasmThreads: () => getOptimalWasmThreads,
111
101
  getRecommendedBackend: () => getRecommendedBackend,
112
- getSessionOptions: () => getSessionOptions,
113
102
  getTelemetry: () => getTelemetry,
114
103
  hasWebGPUApi: () => hasWebGPUApi,
115
104
  isAndroid: () => isAndroid,
116
105
  isIOS: () => isIOS,
117
106
  isIOSSafari: () => isIOSSafari,
118
107
  isMobile: () => isMobile,
119
- isOnnxRuntimeLoaded: () => isOnnxRuntimeLoaded,
120
108
  isProtocolEvent: () => isProtocolEvent,
121
109
  isSafari: () => isSafari,
122
110
  isSpeechRecognitionAvailable: () => isSpeechRecognitionAvailable,
123
111
  isWebGPUAvailable: () => isWebGPUAvailable,
112
+ lerpBlendshapes: () => lerpBlendshapes,
124
113
  lerpEmotion: () => lerpEmotion,
125
114
  noopLogger: () => noopLogger,
126
- parseCMVNFromMetadata: () => parseCMVNFromMetadata,
127
- parseTokensFile: () => parseTokensFile,
128
115
  preloadModels: () => preloadModels,
129
- preloadOnnxRuntime: () => preloadOnnxRuntime,
130
- remapWav2ArkitToLam: () => remapWav2ArkitToLam,
131
116
  resetLoggingConfig: () => resetLoggingConfig,
132
117
  resolveBackend: () => resolveBackend,
133
- resolveLanguageId: () => resolveLanguageId,
134
- resolveTextNormId: () => resolveTextNormId,
135
118
  setLogLevel: () => setLogLevel,
136
119
  setLoggingEnabled: () => setLoggingEnabled,
137
120
  shouldEnableWasmProxy: () => shouldEnableWasmProxy,
138
- shouldUseCpuLipSync: () => shouldUseCpuLipSync,
121
+ shouldUseCpuA2E: () => shouldUseCpuA2E,
139
122
  shouldUseNativeASR: () => shouldUseNativeASR,
140
- shouldUseServerLipSync: () => shouldUseServerLipSync,
141
- supportsVADWorker: () => supportsVADWorker,
142
- symmetrizeBlendshapes: () => symmetrizeBlendshapes
123
+ shouldUseServerA2E: () => shouldUseServerA2E,
124
+ supportsVADWorker: () => supportsVADWorker
143
125
  });
144
126
  module.exports = __toCommonJS(index_exports);
145
127
 
@@ -649,730 +631,543 @@ var AudioChunkCoalescer = class {
649
631
  }
650
632
  };
651
633
 
652
- // src/audio/LAMPipeline.ts
653
- var LAMPipeline = class {
654
- constructor(options = {}) {
655
- this.options = options;
656
- this.REQUIRED_SAMPLES = 16e3;
657
- // 1.0s at 16kHz (LAM requirement)
658
- this.FRAME_RATE = 30;
659
- // LAM outputs 30fps
660
- this.buffer = new Float32Array(0);
661
- this.bufferStartTime = 0;
662
- this.frameQueue = [];
663
- /**
664
- * Last successfully retrieved frame
665
- * Used as fallback when no new frame is available to prevent avatar freezing
666
- */
667
- this.lastFrame = null;
668
- }
669
- /**
670
- * Push audio samples into the pipeline
671
- *
672
- * Accumulates samples and triggers LAM inference when buffer is full.
673
- * Multiple calls may be needed to accumulate enough samples.
674
- *
675
- * @param samples - Float32Array of audio samples
676
- * @param timestamp - AudioContext time when these samples start playing
677
- * @param lam - LAM inference engine
678
- */
679
- async push(samples, timestamp, lam) {
680
- if (this.buffer.length === 0) {
681
- this.bufferStartTime = timestamp;
682
- }
683
- const newBuffer = new Float32Array(this.buffer.length + samples.length);
684
- newBuffer.set(this.buffer, 0);
685
- newBuffer.set(samples, this.buffer.length);
686
- this.buffer = newBuffer;
687
- while (this.buffer.length >= this.REQUIRED_SAMPLES) {
688
- await this.processBuffer(lam);
689
- if (this.buffer.length >= this.REQUIRED_SAMPLES) {
690
- await new Promise((r) => setTimeout(r, 0));
634
+ // src/logging/types.ts
635
+ var LOG_LEVEL_PRIORITY = {
636
+ error: 0,
637
+ warn: 1,
638
+ info: 2,
639
+ debug: 3,
640
+ trace: 4,
641
+ verbose: 5
642
+ };
643
+ var DEFAULT_LOGGING_CONFIG = {
644
+ level: "info",
645
+ enabled: true,
646
+ format: "pretty",
647
+ timestamps: true,
648
+ includeModule: true
649
+ };
650
+
651
+ // src/logging/formatters.ts
652
+ var COLORS = {
653
+ reset: "\x1B[0m",
654
+ red: "\x1B[31m",
655
+ yellow: "\x1B[33m",
656
+ blue: "\x1B[34m",
657
+ cyan: "\x1B[36m",
658
+ gray: "\x1B[90m",
659
+ white: "\x1B[37m",
660
+ magenta: "\x1B[35m"
661
+ };
662
+ var LEVEL_COLORS = {
663
+ error: COLORS.red,
664
+ warn: COLORS.yellow,
665
+ info: COLORS.blue,
666
+ debug: COLORS.cyan,
667
+ trace: COLORS.magenta,
668
+ verbose: COLORS.gray
669
+ };
670
+ var LEVEL_NAMES = {
671
+ error: "ERROR ",
672
+ warn: "WARN ",
673
+ info: "INFO ",
674
+ debug: "DEBUG ",
675
+ trace: "TRACE ",
676
+ verbose: "VERBOSE"
677
+ };
678
+ var isBrowser = typeof window !== "undefined";
679
+ function formatTimestamp(timestamp) {
680
+ const date = new Date(timestamp);
681
+ return date.toISOString().substring(11, 23);
682
+ }
683
+ function safeStringify(data) {
684
+ const seen = /* @__PURE__ */ new WeakSet();
685
+ return JSON.stringify(data, (key, value) => {
686
+ if (typeof value === "object" && value !== null) {
687
+ if (seen.has(value)) {
688
+ return "[Circular]";
691
689
  }
690
+ seen.add(value);
692
691
  }
693
- }
694
- /**
695
- * Process accumulated buffer through LAM inference
696
- */
697
- async processBuffer(lam) {
698
- try {
699
- const toProcess = this.buffer.slice(0, this.REQUIRED_SAMPLES);
700
- const processedStartTime = this.bufferStartTime;
701
- this.buffer = this.buffer.slice(this.REQUIRED_SAMPLES);
702
- const processedDuration = this.REQUIRED_SAMPLES / (this.options.sampleRate ?? 16e3);
703
- this.bufferStartTime = processedStartTime + processedDuration;
704
- const result = await lam.infer(toProcess);
705
- const frameDuration = 1 / this.FRAME_RATE;
706
- for (let i = 0; i < result.blendshapes.length; i++) {
707
- const frame = result.blendshapes[i];
708
- const timestamp = processedStartTime + i * frameDuration;
709
- this.frameQueue.push({ frame, timestamp });
710
- }
711
- this.options.onInference?.(result.blendshapes.length);
712
- } catch (error) {
713
- this.options.onError?.(error);
714
- this.buffer = new Float32Array(0);
715
- this.bufferStartTime = 0;
692
+ if (value instanceof Error) {
693
+ return {
694
+ name: value.name,
695
+ message: value.message,
696
+ stack: value.stack
697
+ };
716
698
  }
717
- }
718
- /**
719
- * Get the frame that should be displayed at the current time
720
- *
721
- * Automatically removes frames that have already been displayed.
722
- * This prevents memory leaks from accumulating old frames.
723
- *
724
- * Discard Window (prevents premature frame discarding):
725
- * - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
726
- * - WASM: 1.0s (LAM inference 50-500ms + higher variability)
727
- *
728
- * Last-Frame-Hold: Returns last valid frame instead of null to prevent
729
- * avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
730
- *
731
- * @param currentTime - Current AudioContext time
732
- * @param lam - LAM inference engine (optional, for backend detection)
733
- * @returns Current frame, or last frame as fallback, or null if no frames yet
734
- */
735
- getFrameForTime(currentTime, lam) {
736
- const discardWindow = lam?.backend === "wasm" ? 1 : 0.5;
737
- let discardedCount = 0;
738
- while (this.frameQueue.length > 0 && this.frameQueue[0].timestamp < currentTime - discardWindow) {
739
- const discarded = this.frameQueue.shift();
740
- discardedCount++;
741
- if (discardedCount === 1) {
742
- const ageMs = ((currentTime - discarded.timestamp) * 1e3).toFixed(0);
743
- console.warn("[LAM] Frame(s) discarded as too old", {
744
- ageMs,
745
- discardWindowMs: discardWindow * 1e3,
746
- queueLength: this.frameQueue.length,
747
- backend: lam?.backend ?? "unknown"
748
- });
749
- }
699
+ if (value instanceof Float32Array || value instanceof Int16Array) {
700
+ return `${value.constructor.name}(${value.length})`;
750
701
  }
751
- if (this.frameQueue.length > 0 && this.frameQueue[0].timestamp <= currentTime) {
752
- const { frame } = this.frameQueue.shift();
753
- this.lastFrame = frame;
754
- return frame;
702
+ if (ArrayBuffer.isView(value)) {
703
+ return `${value.constructor.name}(${value.byteLength})`;
755
704
  }
756
- return this.lastFrame;
757
- }
758
- /**
759
- * Get all frames in the queue (for debugging/monitoring)
760
- */
761
- getQueuedFrames() {
762
- return [...this.frameQueue];
763
- }
764
- /**
765
- * Get current buffer fill level (0-1)
766
- */
767
- get fillLevel() {
768
- return Math.min(1, this.buffer.length / this.REQUIRED_SAMPLES);
705
+ return value;
706
+ });
707
+ }
708
+ var jsonFormatter = (entry) => {
709
+ const output = {
710
+ timestamp: entry.timestamp,
711
+ level: entry.level,
712
+ module: entry.module,
713
+ message: entry.message
714
+ };
715
+ if (entry.data && Object.keys(entry.data).length > 0) {
716
+ output.data = entry.data;
769
717
  }
770
- /**
771
- * Get number of frames queued
772
- */
773
- get queuedFrameCount() {
774
- return this.frameQueue.length;
718
+ if (entry.error) {
719
+ output.error = {
720
+ name: entry.error.name,
721
+ message: entry.error.message,
722
+ stack: entry.error.stack
723
+ };
775
724
  }
776
- /**
777
- * Get buffered audio duration in seconds
778
- */
779
- get bufferedDuration() {
780
- return this.buffer.length / (this.options.sampleRate ?? 16e3);
725
+ return safeStringify(output);
726
+ };
727
+ var prettyFormatter = (entry) => {
728
+ const time = formatTimestamp(entry.timestamp);
729
+ const level = LEVEL_NAMES[entry.level];
730
+ const module2 = entry.module;
731
+ const message = entry.message;
732
+ let output;
733
+ if (isBrowser) {
734
+ output = `${time} ${level} [${module2}] ${message}`;
735
+ } else {
736
+ const color = LEVEL_COLORS[entry.level];
737
+ output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
781
738
  }
782
- /**
783
- * Flush remaining buffered audio
784
- *
785
- * Processes any remaining audio in the buffer, even if less than REQUIRED_SAMPLES.
786
- * This ensures the final audio chunk generates blendshape frames.
787
- *
788
- * Should be called when audio stream ends to prevent losing the last 0-1 seconds.
789
- *
790
- * @param lam - LAM inference engine
791
- */
792
- async flush(lam) {
793
- if (this.buffer.length === 0) {
794
- return;
795
- }
796
- const padded = new Float32Array(this.REQUIRED_SAMPLES);
797
- padded.set(this.buffer, 0);
798
- const processedStartTime = this.bufferStartTime;
799
- try {
800
- const result = await lam.infer(padded);
801
- const actualDuration = this.buffer.length / (this.options.sampleRate ?? 16e3);
802
- const frameDuration = 1 / this.FRAME_RATE;
803
- const actualFrameCount = Math.ceil(actualDuration * this.FRAME_RATE);
804
- for (let i = 0; i < Math.min(actualFrameCount, result.blendshapes.length); i++) {
805
- const frame = result.blendshapes[i];
806
- const timestamp = processedStartTime + i * frameDuration;
807
- this.frameQueue.push({ frame, timestamp });
808
- }
809
- this.buffer = new Float32Array(0);
810
- this.bufferStartTime = 0;
811
- this.options.onInference?.(Math.min(actualFrameCount, result.blendshapes.length));
812
- } catch (error) {
813
- this.options.onError?.(error);
814
- this.buffer = new Float32Array(0);
815
- this.bufferStartTime = 0;
739
+ if (entry.data && Object.keys(entry.data).length > 0) {
740
+ const dataStr = safeStringify(entry.data);
741
+ if (dataStr.length > 80) {
742
+ output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
743
+ } else {
744
+ output += " " + dataStr;
816
745
  }
817
746
  }
818
- /**
819
- * Adjust all queued frame timestamps by an offset
820
- *
821
- * Used for synchronization when audio scheduling time differs from
822
- * the estimated time used during LAM processing.
823
- *
824
- * @param offset - Time offset in seconds to add to all timestamps
825
- */
826
- adjustTimestamps(offset) {
827
- for (const frame of this.frameQueue) {
828
- frame.timestamp += offset;
747
+ if (entry.error) {
748
+ output += `
749
+ ${entry.error.name}: ${entry.error.message}`;
750
+ if (entry.error.stack) {
751
+ const stackLines = entry.error.stack.split("\n").slice(1, 4);
752
+ output += "\n " + stackLines.join("\n ");
829
753
  }
830
754
  }
831
- /**
832
- * Reset the pipeline
833
- */
834
- reset() {
835
- this.buffer = new Float32Array(0);
836
- this.bufferStartTime = 0;
837
- this.frameQueue = [];
838
- this.lastFrame = null;
839
- }
755
+ return output;
840
756
  };
841
-
842
- // src/audio/audioUtils.ts
843
- function pcm16ToFloat32(buffer) {
844
- const byteLen = buffer.byteLength & ~1;
845
- const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
846
- const float32 = new Float32Array(int16.length);
847
- for (let i = 0; i < int16.length; i++) {
848
- float32[i] = int16[i] / 32768;
849
- }
850
- return float32;
757
+ function getFormatter(format) {
758
+ return format === "json" ? jsonFormatter : prettyFormatter;
851
759
  }
852
- function int16ToFloat32(int16) {
853
- const float32 = new Float32Array(int16.length);
854
- for (let i = 0; i < int16.length; i++) {
855
- float32[i] = int16[i] / 32768;
760
+ function createBrowserConsoleArgs(entry) {
761
+ const time = formatTimestamp(entry.timestamp);
762
+ const level = entry.level.toUpperCase().padEnd(7);
763
+ const module2 = entry.module;
764
+ const message = entry.message;
765
+ const styles = {
766
+ time: "color: gray;",
767
+ error: "color: red; font-weight: bold;",
768
+ warn: "color: orange; font-weight: bold;",
769
+ info: "color: blue;",
770
+ debug: "color: cyan;",
771
+ trace: "color: magenta;",
772
+ verbose: "color: gray;",
773
+ module: "color: teal; font-weight: bold;",
774
+ message: "color: inherit;"
775
+ };
776
+ let formatStr = "%c%s %c%s %c[%s]%c %s";
777
+ const args = [
778
+ styles.time,
779
+ time,
780
+ styles[entry.level],
781
+ level,
782
+ styles.module,
783
+ module2,
784
+ styles.message,
785
+ message
786
+ ];
787
+ if (entry.data && Object.keys(entry.data).length > 0) {
788
+ formatStr += " %o";
789
+ args.push(entry.data);
856
790
  }
857
- return float32;
791
+ return [formatStr, ...args];
858
792
  }
859
793
 
860
- // src/audio/SyncedAudioPipeline.ts
861
- var SyncedAudioPipeline = class extends EventEmitter {
862
- constructor(options) {
863
- super();
864
- this.options = options;
865
- this.playbackStarted = false;
866
- this.monitorInterval = null;
867
- this.frameAnimationId = null;
868
- const sampleRate = options.sampleRate ?? 16e3;
869
- const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
870
- const audioDelayMs = options.audioDelayMs ?? autoDelay;
871
- this.scheduler = new AudioScheduler({
872
- sampleRate,
873
- initialLookaheadSec: audioDelayMs / 1e3
874
- });
875
- this.coalescer = new AudioChunkCoalescer({
876
- sampleRate,
877
- targetDurationMs: options.chunkTargetMs ?? 200
878
- });
879
- this.lamPipeline = new LAMPipeline({
880
- sampleRate,
881
- onError: (error) => {
882
- this.emit("error", error);
883
- }
884
- });
885
- }
886
- /**
887
- * Initialize the pipeline
888
- */
889
- async initialize() {
890
- await this.scheduler.initialize();
891
- }
892
- /**
893
- * Start a new playback session
894
- *
895
- * Resets all state and prepares for incoming audio chunks.
896
- * Audio will be scheduled immediately as chunks arrive (no buffering).
897
- */
898
- start() {
899
- this.stopMonitoring();
900
- this.scheduler.reset();
901
- this.coalescer.reset();
902
- this.lamPipeline.reset();
903
- this.playbackStarted = false;
904
- this.scheduler.warmup();
905
- this.startFrameLoop();
906
- this.startMonitoring();
794
+ // src/logging/Logger.ts
795
+ var isBrowser2 = typeof window !== "undefined";
796
+ var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
797
+ function configureLogging(config) {
798
+ globalConfig = { ...globalConfig, ...config };
799
+ }
800
+ function getLoggingConfig() {
801
+ return { ...globalConfig };
802
+ }
803
+ function resetLoggingConfig() {
804
+ globalConfig = { ...DEFAULT_LOGGING_CONFIG };
805
+ }
806
+ function setLogLevel(level) {
807
+ globalConfig.level = level;
808
+ }
809
+ function setLoggingEnabled(enabled) {
810
+ globalConfig.enabled = enabled;
811
+ }
812
+ var consoleSink = (entry) => {
813
+ const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
814
+ if (globalConfig.format === "pretty" && isBrowser2) {
815
+ const args = createBrowserConsoleArgs(entry);
816
+ console[consoleMethod](...args);
817
+ } else {
818
+ const formatter = getFormatter(globalConfig.format);
819
+ const formatted = formatter(entry);
820
+ console[consoleMethod](formatted);
907
821
  }
908
- /**
909
- * Receive audio chunk from network
910
- *
911
- * Audio-first design: schedules audio immediately, LAM runs in background.
912
- * This prevents LAM inference (50-300ms) from blocking audio scheduling,
913
- * which caused audible stuttering with continuous audio streams.
914
- *
915
- * @param chunk - Uint8Array containing Int16 PCM audio
916
- */
917
- async onAudioChunk(chunk) {
918
- const combined = this.coalescer.add(chunk);
919
- if (!combined) {
920
- return;
921
- }
922
- const float32 = pcm16ToFloat32(combined);
923
- const scheduleTime = await this.scheduler.schedule(float32);
924
- if (!this.playbackStarted) {
925
- this.playbackStarted = true;
926
- this.emit("playback_start", scheduleTime);
927
- }
928
- this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
929
- this.emit("error", err);
930
- });
822
+ };
823
+ function getActiveSink() {
824
+ return globalConfig.sink || consoleSink;
825
+ }
826
+ function shouldLog(level) {
827
+ if (!globalConfig.enabled) return false;
828
+ return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
829
+ }
830
+ var Logger = class _Logger {
831
+ constructor(module2) {
832
+ this.module = module2;
931
833
  }
932
- /**
933
- * End of audio stream
934
- *
935
- * Flushes any remaining buffered data.
936
- */
937
- async end() {
938
- const remaining = this.coalescer.flush();
939
- if (remaining) {
940
- const chunk = new Uint8Array(remaining);
941
- await this.onAudioChunk(chunk);
834
+ log(level, message, data) {
835
+ if (!shouldLog(level)) return;
836
+ const entry = {
837
+ timestamp: Date.now(),
838
+ level,
839
+ module: this.module,
840
+ message,
841
+ data
842
+ };
843
+ if (data?.error instanceof Error) {
844
+ entry.error = data.error;
845
+ const { error, ...rest } = data;
846
+ entry.data = Object.keys(rest).length > 0 ? rest : void 0;
942
847
  }
943
- await this.lamPipeline.flush(this.options.lam);
848
+ getActiveSink()(entry);
944
849
  }
945
- /**
946
- * Stop playback immediately with smooth fade-out
947
- *
948
- * Gracefully cancels all audio playback and LAM processing:
949
- * - Fades out audio over specified duration (default: 50ms)
950
- * - Cancels pending LAM inferences
951
- * - Clears all buffers and queues
952
- * - Emits 'playback_complete' event
953
- *
954
- * Use this for interruptions (e.g., user barge-in during AI speech).
955
- *
956
- * @param fadeOutMs - Fade-out duration in milliseconds (default: 50ms)
957
- * @returns Promise that resolves when fade-out completes
958
- */
959
- async stop(fadeOutMs = 50) {
960
- this.stopMonitoring();
961
- await this.scheduler.cancelAll(fadeOutMs);
962
- this.coalescer.reset();
963
- this.lamPipeline.reset();
964
- this.playbackStarted = false;
965
- this.emit("playback_complete", void 0);
850
+ error(message, data) {
851
+ this.log("error", message, data);
966
852
  }
967
- /**
968
- * Start frame animation loop
969
- *
970
- * Uses requestAnimationFrame to check for new LAM frames.
971
- * Synchronized to AudioContext clock (not visual refresh rate).
972
- *
973
- * Frame Emission Strategy:
974
- * - LAMPipeline uses last-frame-hold to prevent null returns
975
- * - Always emit frames (even repeated frames) to maintain smooth animation
976
- * - Renderer is responsible for detecting duplicate frames if needed
977
- */
978
- startFrameLoop() {
979
- const updateFrame = () => {
980
- const currentTime = this.scheduler.getCurrentTime();
981
- const frame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
982
- if (frame) {
983
- this.emit("frame_ready", frame);
984
- }
985
- this.frameAnimationId = requestAnimationFrame(updateFrame);
986
- };
987
- this.frameAnimationId = requestAnimationFrame(updateFrame);
853
+ warn(message, data) {
854
+ this.log("warn", message, data);
988
855
  }
989
- /**
990
- * Start monitoring for playback completion
991
- */
992
- startMonitoring() {
993
- if (this.monitorInterval) {
994
- clearInterval(this.monitorInterval);
995
- }
996
- this.monitorInterval = window.setInterval(() => {
997
- if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
998
- this.emit("playback_complete", void 0);
999
- this.stopMonitoring();
1000
- }
1001
- }, 100);
856
+ info(message, data) {
857
+ this.log("info", message, data);
1002
858
  }
1003
- /**
1004
- * Stop monitoring
1005
- */
1006
- stopMonitoring() {
1007
- if (this.monitorInterval) {
1008
- clearInterval(this.monitorInterval);
1009
- this.monitorInterval = null;
1010
- }
1011
- if (this.frameAnimationId) {
1012
- cancelAnimationFrame(this.frameAnimationId);
1013
- this.frameAnimationId = null;
1014
- }
859
+ debug(message, data) {
860
+ this.log("debug", message, data);
1015
861
  }
1016
- /**
1017
- * Get current pipeline state (for debugging/monitoring)
1018
- */
1019
- getState() {
1020
- return {
1021
- playbackStarted: this.playbackStarted,
1022
- coalescerFill: this.coalescer.fillLevel,
1023
- lamFill: this.lamPipeline.fillLevel,
1024
- queuedFrames: this.lamPipeline.queuedFrameCount,
1025
- currentTime: this.scheduler.getCurrentTime(),
1026
- playbackEndTime: this.scheduler.getPlaybackEndTime()
1027
- };
862
+ trace(message, data) {
863
+ this.log("trace", message, data);
1028
864
  }
1029
- /**
1030
- * Cleanup resources
1031
- */
1032
- dispose() {
1033
- this.stopMonitoring();
1034
- this.scheduler.dispose();
1035
- this.coalescer.reset();
1036
- this.lamPipeline.reset();
865
+ verbose(message, data) {
866
+ this.log("verbose", message, data);
867
+ }
868
+ child(subModule) {
869
+ return new _Logger(`${this.module}.${subModule}`);
1037
870
  }
1038
871
  };
1039
-
1040
- // src/animation/EmotionToBlendshapeMapper.ts
1041
- var UPPER_FACE_BLENDSHAPES = [
1042
- // Brows (5)
1043
- "browDownLeft",
1044
- "browDownRight",
1045
- "browInnerUp",
1046
- "browOuterUpLeft",
1047
- "browOuterUpRight",
1048
- // Eyes (4)
1049
- "eyeSquintLeft",
1050
- "eyeSquintRight",
1051
- "eyeWideLeft",
1052
- "eyeWideRight",
1053
- // Cheeks (2)
1054
- "cheekSquintLeft",
1055
- "cheekSquintRight"
1056
- ];
1057
- var EMOTION_ARKIT_MAP = {
1058
- happy: {
1059
- // AU6 - Cheek raiser (primary Duchenne smile marker)
1060
- cheekSquintLeft: 0.5,
1061
- cheekSquintRight: 0.5,
1062
- // Slight eye squint from genuine smile (orbicularis oculi activation)
1063
- eyeSquintLeft: 0.2,
1064
- eyeSquintRight: 0.2
872
+ var loggerCache = /* @__PURE__ */ new Map();
873
+ function createLogger(module2) {
874
+ let logger17 = loggerCache.get(module2);
875
+ if (!logger17) {
876
+ logger17 = new Logger(module2);
877
+ loggerCache.set(module2, logger17);
878
+ }
879
+ return logger17;
880
+ }
881
+ var noopLogger = {
882
+ module: "noop",
883
+ error: () => {
884
+ },
885
+ warn: () => {
1065
886
  },
1066
- angry: {
1067
- // AU4 - Brow lowerer (intense, primary anger marker)
1068
- browDownLeft: 0.7,
1069
- browDownRight: 0.7,
1070
- // AU5 - Upper lid raiser (wide eyes, part of the "glare")
1071
- eyeWideLeft: 0.4,
1072
- eyeWideRight: 0.4,
1073
- // AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
1074
- eyeSquintLeft: 0.3,
1075
- eyeSquintRight: 0.3
887
+ info: () => {
1076
888
  },
1077
- sad: {
1078
- // AU1 - Inner brow raiser (primary sadness marker)
1079
- browInnerUp: 0.6,
1080
- // AU4 - Brow lowerer (brows drawn together)
1081
- browDownLeft: 0.3,
1082
- browDownRight: 0.3
889
+ debug: () => {
1083
890
  },
1084
- neutral: {}
1085
- // All zeros - no expression overlay
1086
- };
1087
- var DEFAULT_CONFIG = {
1088
- smoothingFactor: 0.15,
1089
- confidenceThreshold: 0.3,
1090
- intensity: 1,
1091
- blendMode: "dominant",
1092
- minBlendProbability: 0.1,
1093
- energyModulation: false,
1094
- minEnergyScale: 0.3,
1095
- maxEnergyScale: 1
891
+ trace: () => {
892
+ },
893
+ verbose: () => {
894
+ },
895
+ child: () => noopLogger
1096
896
  };
1097
- function createZeroBlendshapes() {
1098
- const result = {};
1099
- for (const name of UPPER_FACE_BLENDSHAPES) {
1100
- result[name] = 0;
1101
- }
1102
- return result;
1103
- }
1104
- function clamp01(value) {
1105
- return Math.max(0, Math.min(1, value));
1106
- }
1107
- var EmotionToBlendshapeMapper = class {
1108
- /**
1109
- * Create a new EmotionToBlendshapeMapper
1110
- *
1111
- * @param config - Optional configuration
1112
- */
897
+
898
+ // src/inference/A2EProcessor.ts
899
+ var logger = createLogger("A2EProcessor");
900
+ var FRAME_RATE = 30;
901
+ var DRIP_INTERVAL_MS = 33;
902
+ var A2EProcessor = class {
1113
903
  constructor(config) {
1114
- this.currentEnergy = 1;
1115
- this.config = {
1116
- ...DEFAULT_CONFIG,
1117
- ...config
1118
- };
1119
- this.targetBlendshapes = createZeroBlendshapes();
1120
- this.currentBlendshapes = createZeroBlendshapes();
1121
- }
1122
- /**
1123
- * Map an emotion frame to target blendshapes
904
+ this.writeOffset = 0;
905
+ this.bufferStartTime = 0;
906
+ // Frame queues (timestamped for pull mode, plain for drip mode)
907
+ this.timestampedQueue = [];
908
+ this.plainQueue = [];
909
+ // Push mode state
910
+ this._latestFrame = null;
911
+ this.dripInterval = null;
912
+ // Last-frame-hold for pull mode (prevents avatar freezing between frames)
913
+ this.lastPulledFrame = null;
914
+ // Inference serialization
915
+ this.inferenceRunning = false;
916
+ this.pendingChunks = [];
917
+ // Diagnostic: track getFrameForTime calls
918
+ this.getFrameCallCount = 0;
919
+ this.disposed = false;
920
+ this.backend = config.backend;
921
+ this.sampleRate = config.sampleRate ?? 16e3;
922
+ this.chunkSize = config.chunkSize ?? config.backend.chunkSize ?? 16e3;
923
+ this.onFrame = config.onFrame;
924
+ this.onError = config.onError;
925
+ this.bufferCapacity = this.chunkSize * 2;
926
+ this.buffer = new Float32Array(this.bufferCapacity);
927
+ }
928
+ // ═══════════════════════════════════════════════════════════════════════
929
+ // Audio Input
930
+ // ═══════════════════════════════════════════════════════════════════════
931
+ /**
932
+ * Push audio samples for inference (any source: mic, TTS, file).
1124
933
  *
1125
- * This sets the target values that the mapper will smoothly interpolate
1126
- * towards. Call update() each frame to apply smoothing.
934
+ * - With `timestamp`: frames stored with timestamps (pull mode)
935
+ * - Without `timestamp`: frames stored in plain queue (drip/push mode)
1127
936
  *
1128
- * @param frame - Emotion frame from Emotion2VecInference
1129
- * @param audioEnergy - Optional audio energy (0-1) for energy modulation
1130
- * @returns Target upper face blendshapes (before smoothing)
937
+ * Fire-and-forget: returns immediately, inference runs async.
1131
938
  */
1132
- mapFrame(frame, audioEnergy) {
1133
- this.targetBlendshapes = createZeroBlendshapes();
1134
- if (audioEnergy !== void 0) {
1135
- this.currentEnergy = clamp01(audioEnergy);
1136
- }
1137
- if (!frame) {
1138
- return { ...this.targetBlendshapes };
1139
- }
1140
- if (this.config.blendMode === "weighted") {
1141
- this.mapFrameWeighted(frame);
1142
- } else {
1143
- this.mapFrameDominant(frame);
939
+ pushAudio(samples, timestamp) {
940
+ if (this.disposed) return;
941
+ if (this.writeOffset === 0 && timestamp !== void 0) {
942
+ this.bufferStartTime = timestamp;
1144
943
  }
1145
- if (this.config.energyModulation) {
1146
- this.applyEnergyModulation();
944
+ if (this.writeOffset + samples.length > this.bufferCapacity) {
945
+ this.bufferCapacity = (this.writeOffset + samples.length) * 2;
946
+ const grown = new Float32Array(this.bufferCapacity);
947
+ grown.set(this.buffer.subarray(0, this.writeOffset));
948
+ this.buffer = grown;
949
+ }
950
+ this.buffer.set(samples, this.writeOffset);
951
+ this.writeOffset += samples.length;
952
+ logger.debug("pushAudio", {
953
+ samplesIn: samples.length,
954
+ writeOffset: this.writeOffset,
955
+ chunkSize: this.chunkSize,
956
+ willExtract: this.writeOffset >= this.chunkSize,
957
+ inferenceRunning: this.inferenceRunning,
958
+ pendingChunks: this.pendingChunks.length,
959
+ queuedFrames: this.timestampedQueue.length + this.plainQueue.length
960
+ });
961
+ while (this.writeOffset >= this.chunkSize) {
962
+ const chunk = this.buffer.slice(0, this.chunkSize);
963
+ this.buffer.copyWithin(0, this.chunkSize, this.writeOffset);
964
+ this.writeOffset -= this.chunkSize;
965
+ const chunkTimestamp = timestamp !== void 0 ? this.bufferStartTime : void 0;
966
+ this.pendingChunks.push({ chunk, timestamp: chunkTimestamp });
967
+ logger.info("Chunk queued for inference", {
968
+ chunkSize: chunk.length,
969
+ chunkTimestamp,
970
+ pendingChunks: this.pendingChunks.length,
971
+ remainderOffset: this.writeOffset
972
+ });
973
+ if (timestamp !== void 0) {
974
+ this.bufferStartTime += this.chunkSize / this.sampleRate;
975
+ }
1147
976
  }
1148
- return { ...this.targetBlendshapes };
977
+ this.drainPendingChunks();
1149
978
  }
1150
979
  /**
1151
- * Map using dominant emotion only (original behavior)
980
+ * Flush remaining buffered audio (pads to chunkSize).
981
+ * Call at end of stream to process final partial chunk.
982
+ *
983
+ * Routes through the serialized pendingChunks pipeline to maintain
984
+ * correct frame ordering. Without this, flush() could push frames
985
+ * with the latest timestamp to the queue before drainPendingChunks()
986
+ * finishes pushing frames with earlier timestamps — causing
987
+ * getFrameForTime() to see out-of-order timestamps and stall.
1152
988
  */
1153
- mapFrameDominant(frame) {
1154
- if (frame.confidence < this.config.confidenceThreshold) {
1155
- return;
1156
- }
1157
- const emotion = frame.emotion;
1158
- const mapping = EMOTION_ARKIT_MAP[emotion];
1159
- if (!mapping) {
1160
- return;
1161
- }
1162
- const scale = this.config.intensity * frame.confidence;
1163
- for (const [name, value] of Object.entries(mapping)) {
1164
- const blendshapeName = name;
1165
- if (value !== void 0) {
1166
- this.targetBlendshapes[blendshapeName] = clamp01(value * scale);
1167
- }
1168
- }
989
+ async flush() {
990
+ if (this.disposed || this.writeOffset === 0) return;
991
+ const padded = new Float32Array(this.chunkSize);
992
+ padded.set(this.buffer.subarray(0, this.writeOffset), 0);
993
+ const chunkTimestamp = this.bufferStartTime > 0 ? this.bufferStartTime : void 0;
994
+ logger.info("flush: routing through drain pipeline", {
995
+ actualSamples: this.writeOffset,
996
+ chunkTimestamp: chunkTimestamp?.toFixed(3),
997
+ pendingChunks: this.pendingChunks.length,
998
+ inferenceRunning: this.inferenceRunning
999
+ });
1000
+ this.writeOffset = 0;
1001
+ this.bufferStartTime = 0;
1002
+ this.pendingChunks.push({ chunk: padded, timestamp: chunkTimestamp });
1003
+ this.drainPendingChunks();
1169
1004
  }
1170
1005
  /**
1171
- * Map using weighted blend of all emotions by probability
1172
- * Creates more nuanced expressions (e.g., bittersweet = happy + sad)
1006
+ * Reset buffer and frame queues
1173
1007
  */
1174
- mapFrameWeighted(frame) {
1175
- if (!frame.probabilities) {
1176
- this.mapFrameDominant(frame);
1177
- return;
1008
+ reset() {
1009
+ this.writeOffset = 0;
1010
+ this.bufferStartTime = 0;
1011
+ this.timestampedQueue = [];
1012
+ this.plainQueue = [];
1013
+ this._latestFrame = null;
1014
+ this.lastPulledFrame = null;
1015
+ this.pendingChunks = [];
1016
+ this.inferenceRunning = false;
1017
+ this.getFrameCallCount = 0;
1018
+ }
1019
+ // ═══════════════════════════════════════════════════════════════════════
1020
+ // Frame Output — Pull Mode (TTS playback)
1021
+ // ═══════════════════════════════════════════════════════════════════════
1022
+ /**
1023
+ * Get frame synced to external clock (e.g. AudioContext.currentTime).
1024
+ *
1025
+ * Discards frames that are too old, returns the current frame,
1026
+ * or holds last frame as fallback to prevent avatar freezing.
1027
+ *
1028
+ * @param currentTime - Current playback time (seconds)
1029
+ * @returns Blendshape frame, or null if no frames yet
1030
+ */
1031
+ getFrameForTime(currentTime) {
1032
+ this.getFrameCallCount++;
1033
+ const discardWindow = this.backend.backend === "wasm" ? 1 : 0.5;
1034
+ let discardCount = 0;
1035
+ while (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp < currentTime - discardWindow) {
1036
+ this.timestampedQueue.shift();
1037
+ discardCount++;
1038
+ }
1039
+ if (discardCount > 0) {
1040
+ logger.warn("getFrameForTime DISCARDED stale frames", {
1041
+ discardCount,
1042
+ currentTime: currentTime.toFixed(3),
1043
+ discardWindow,
1044
+ remainingFrames: this.timestampedQueue.length,
1045
+ nextFrameTs: this.timestampedQueue.length > 0 ? this.timestampedQueue[0].timestamp.toFixed(3) : "none"
1046
+ });
1178
1047
  }
1179
- for (const [emotion, probability] of Object.entries(frame.probabilities)) {
1180
- if (probability < this.config.minBlendProbability) {
1181
- continue;
1182
- }
1183
- const mapping = EMOTION_ARKIT_MAP[emotion];
1184
- if (!mapping) {
1185
- continue;
1186
- }
1187
- const scale = this.config.intensity * probability;
1188
- for (const [name, value] of Object.entries(mapping)) {
1189
- const blendshapeName = name;
1190
- if (value !== void 0) {
1191
- this.targetBlendshapes[blendshapeName] += value * scale;
1192
- }
1193
- }
1048
+ if (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp <= currentTime) {
1049
+ const { frame } = this.timestampedQueue.shift();
1050
+ this.lastPulledFrame = frame;
1051
+ return frame;
1194
1052
  }
1195
- for (const name of UPPER_FACE_BLENDSHAPES) {
1196
- this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name]);
1053
+ if (this.timestampedQueue.length > 0 && this.getFrameCallCount % 60 === 0) {
1054
+ logger.warn("getFrameForTime: frames in queue but NOT consumable", {
1055
+ queueLen: this.timestampedQueue.length,
1056
+ frontTimestamp: this.timestampedQueue[0].timestamp.toFixed(4),
1057
+ currentTime: currentTime.toFixed(4),
1058
+ delta: (this.timestampedQueue[0].timestamp - currentTime).toFixed(4),
1059
+ callCount: this.getFrameCallCount
1060
+ });
1197
1061
  }
1062
+ return this.lastPulledFrame;
1198
1063
  }
1199
- /**
1200
- * Apply energy modulation to scale emotion intensity by audio energy
1201
- * Louder speech = stronger expressions
1202
- */
1203
- applyEnergyModulation() {
1204
- const { minEnergyScale, maxEnergyScale } = this.config;
1205
- const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
1206
- for (const name of UPPER_FACE_BLENDSHAPES) {
1207
- this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
1208
- }
1064
+ // ═══════════════════════════════════════════════════════════════════════
1065
+ // Frame Output Push Mode (live mic, game loop)
1066
+ // ═══════════════════════════════════════════════════════════════════════
1067
+ /** Latest frame from drip-feed (live mic, game loop) */
1068
+ get latestFrame() {
1069
+ return this._latestFrame;
1209
1070
  }
1210
- /**
1211
- * Apply smoothing to interpolate current values towards target
1212
- *
1213
- * Uses exponential moving average:
1214
- * current = current + smoothingFactor * (target - current)
1215
- *
1216
- * @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
1217
- */
1218
- update(_deltaMs) {
1219
- const factor = this.config.smoothingFactor;
1220
- for (const name of UPPER_FACE_BLENDSHAPES) {
1221
- const target = this.targetBlendshapes[name];
1222
- const current = this.currentBlendshapes[name];
1223
- this.currentBlendshapes[name] = clamp01(current + factor * (target - current));
1071
+ /** Start 30fps drip-feed timer (push mode) */
1072
+ startDrip() {
1073
+ if (this.dripInterval) return;
1074
+ this.dripInterval = setInterval(() => {
1075
+ const frame = this.plainQueue.shift();
1076
+ if (frame) {
1077
+ this._latestFrame = frame;
1078
+ this.onFrame?.(frame);
1079
+ }
1080
+ }, DRIP_INTERVAL_MS);
1081
+ }
1082
+ /** Stop drip-feed timer */
1083
+ stopDrip() {
1084
+ if (this.dripInterval) {
1085
+ clearInterval(this.dripInterval);
1086
+ this.dripInterval = null;
1224
1087
  }
1225
1088
  }
1226
- /**
1227
- * Get current smoothed blendshape values
1228
- *
1229
- * @returns Current upper face blendshapes (after smoothing)
1230
- */
1231
- getCurrentBlendshapes() {
1232
- return { ...this.currentBlendshapes };
1089
+ // ═══════════════════════════════════════════════════════════════════════
1090
+ // State
1091
+ // ═══════════════════════════════════════════════════════════════════════
1092
+ /** Number of frames waiting in queue (both modes combined) */
1093
+ get queuedFrameCount() {
1094
+ return this.timestampedQueue.length + this.plainQueue.length;
1095
+ }
1096
+ /** Buffer fill level as fraction of chunkSize (0-1) */
1097
+ get fillLevel() {
1098
+ return Math.min(1, this.writeOffset / this.chunkSize);
1233
1099
  }
1100
+ /** Dispose resources */
1101
+ dispose() {
1102
+ if (this.disposed) return;
1103
+ this.disposed = true;
1104
+ this.stopDrip();
1105
+ this.reset();
1106
+ }
1107
+ // ═══════════════════════════════════════════════════════════════════════
1108
+ // Private
1109
+ // ═══════════════════════════════════════════════════════════════════════
1234
1110
  /**
1235
- * Reset mapper to neutral state
1236
- *
1237
- * Sets both target and current blendshapes to zero.
1111
+ * Process pending chunks sequentially.
1112
+ * Fire-and-forget — called from pushAudio() without awaiting.
1238
1113
  */
1239
- reset() {
1240
- this.targetBlendshapes = createZeroBlendshapes();
1241
- this.currentBlendshapes = createZeroBlendshapes();
1242
- this.currentEnergy = 1;
1243
- }
1244
- /**
1245
- * Get current configuration
1246
- */
1247
- getConfig() {
1248
- return { ...this.config };
1249
- }
1250
- /**
1251
- * Update configuration
1252
- *
1253
- * @param config - Partial configuration to update
1254
- */
1255
- setConfig(config) {
1256
- this.config = {
1257
- ...this.config,
1258
- ...config
1259
- };
1260
- }
1261
- };
1262
-
1263
- // src/animation/audioEnergy.ts
1264
- function calculateRMS(samples) {
1265
- if (samples.length === 0) return 0;
1266
- let sumSquares = 0;
1267
- for (let i = 0; i < samples.length; i++) {
1268
- sumSquares += samples[i] * samples[i];
1269
- }
1270
- return Math.sqrt(sumSquares / samples.length);
1271
- }
1272
- function calculatePeak(samples) {
1273
- let peak = 0;
1274
- for (let i = 0; i < samples.length; i++) {
1275
- const abs = Math.abs(samples[i]);
1276
- if (abs > peak) peak = abs;
1277
- }
1278
- return peak;
1279
- }
1280
- var AudioEnergyAnalyzer = class {
1281
- /**
1282
- * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
1283
- * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
1284
- */
1285
- constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
1286
- this.smoothedRMS = 0;
1287
- this.smoothedPeak = 0;
1288
- this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
1289
- this.noiseFloor = noiseFloor;
1290
- }
1291
- /**
1292
- * Process audio samples and return smoothed energy values
1293
- * @param samples Audio samples (Float32Array)
1294
- * @returns Object with rms and peak values
1295
- */
1296
- process(samples) {
1297
- const instantRMS = calculateRMS(samples);
1298
- const instantPeak = calculatePeak(samples);
1299
- const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
1300
- const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
1301
- if (gatedRMS > this.smoothedRMS) {
1302
- this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
1303
- } else {
1304
- this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
1305
- }
1306
- if (gatedPeak > this.smoothedPeak) {
1307
- this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
1308
- } else {
1309
- this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
1310
- }
1311
- const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
1312
- return {
1313
- rms: this.smoothedRMS,
1314
- peak: this.smoothedPeak,
1315
- energy: Math.min(1, energy * 2)
1316
- // Scale up and clamp
1317
- };
1318
- }
1319
- /**
1320
- * Reset analyzer state
1321
- */
1322
- reset() {
1323
- this.smoothedRMS = 0;
1324
- this.smoothedPeak = 0;
1325
- }
1326
- /**
1327
- * Get current smoothed RMS value
1328
- */
1329
- get rms() {
1330
- return this.smoothedRMS;
1331
- }
1332
- /**
1333
- * Get current smoothed peak value
1334
- */
1335
- get peak() {
1336
- return this.smoothedPeak;
1337
- }
1338
- };
1339
- var EmphasisDetector = class {
1340
- /**
1341
- * @param historySize Number of frames to track. Default 10
1342
- * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
1343
- */
1344
- constructor(historySize = 10, emphasisThreshold = 0.15) {
1345
- this.energyHistory = [];
1346
- this.historySize = historySize;
1347
- this.emphasisThreshold = emphasisThreshold;
1348
- }
1349
- /**
1350
- * Process energy value and detect emphasis
1351
- * @param energy Current energy value (0-1)
1352
- * @returns Object with isEmphasis flag and emphasisStrength
1353
- */
1354
- process(energy) {
1355
- this.energyHistory.push(energy);
1356
- if (this.energyHistory.length > this.historySize) {
1357
- this.energyHistory.shift();
1358
- }
1359
- if (this.energyHistory.length < 3) {
1360
- return { isEmphasis: false, emphasisStrength: 0 };
1114
+ drainPendingChunks() {
1115
+ if (this.inferenceRunning || this.pendingChunks.length === 0) {
1116
+ if (this.inferenceRunning && this.pendingChunks.length > 0) {
1117
+ logger.debug("drainPendingChunks skipped (inference running)", {
1118
+ pendingChunks: this.pendingChunks.length
1119
+ });
1120
+ }
1121
+ return;
1361
1122
  }
1362
- const prevFrames = this.energyHistory.slice(0, -1);
1363
- const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
1364
- const increase = energy - avgPrev;
1365
- const isEmphasis = increase > this.emphasisThreshold;
1366
- return {
1367
- isEmphasis,
1368
- emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
1123
+ this.inferenceRunning = true;
1124
+ logger.info("drainPendingChunks starting", { pendingChunks: this.pendingChunks.length });
1125
+ const processNext = async () => {
1126
+ while (this.pendingChunks.length > 0 && !this.disposed) {
1127
+ const { chunk, timestamp } = this.pendingChunks.shift();
1128
+ try {
1129
+ const t0 = performance.now();
1130
+ const result = await this.backend.infer(chunk);
1131
+ const inferMs = Math.round(performance.now() - t0);
1132
+ const actualDuration = chunk.length / this.sampleRate;
1133
+ const actualFrameCount = Math.ceil(actualDuration * FRAME_RATE);
1134
+ const framesToQueue = Math.min(actualFrameCount, result.blendshapes.length);
1135
+ logger.info("Inference complete", {
1136
+ inferMs,
1137
+ modelFrames: result.blendshapes.length,
1138
+ framesToQueue,
1139
+ timestamp,
1140
+ totalQueued: this.timestampedQueue.length + framesToQueue,
1141
+ remainingPending: this.pendingChunks.length
1142
+ });
1143
+ for (let i = 0; i < framesToQueue; i++) {
1144
+ if (timestamp !== void 0) {
1145
+ this.timestampedQueue.push({
1146
+ frame: result.blendshapes[i],
1147
+ timestamp: timestamp + i / FRAME_RATE
1148
+ });
1149
+ } else {
1150
+ this.plainQueue.push(result.blendshapes[i]);
1151
+ }
1152
+ }
1153
+ } catch (err) {
1154
+ this.handleError(err);
1155
+ }
1156
+ if (this.pendingChunks.length > 0) {
1157
+ await new Promise((r) => setTimeout(r, 0));
1158
+ }
1159
+ }
1160
+ this.inferenceRunning = false;
1161
+ if (this.pendingChunks.length > 0) {
1162
+ this.drainPendingChunks();
1163
+ }
1369
1164
  };
1165
+ processNext().catch((err) => this.handleError(err));
1370
1166
  }
1371
- /**
1372
- * Reset detector state
1373
- */
1374
- reset() {
1375
- this.energyHistory = [];
1167
+ handleError(err) {
1168
+ const error = err instanceof Error ? err : new Error(String(err));
1169
+ logger.warn("A2EProcessor inference error", { error: error.message });
1170
+ this.onError?.(error);
1376
1171
  }
1377
1172
  };
1378
1173
 
@@ -2361,464 +2156,200 @@ var ModelCache = class {
2361
2156
  }
2362
2157
  await this.delete(model.url);
2363
2158
  evictedUrls.push(model.url);
2364
- freedBytes += model.size;
2365
- console.log(`[ModelCache] Evicted: ${model.url} (${formatBytes(model.size)})`);
2366
- }
2367
- span?.setAttributes({
2368
- "eviction.bytes_freed": freedBytes,
2369
- "eviction.models_evicted": evictedUrls.length
2370
- });
2371
- span?.end();
2372
- if (freedBytes > 0) {
2373
- telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
2374
- bytes_freed: String(freedBytes)
2375
- });
2376
- }
2377
- return evictedUrls;
2378
- } catch (err) {
2379
- span?.endWithError(err instanceof Error ? err : new Error(String(err)));
2380
- console.warn("[ModelCache] Eviction failed:", err);
2381
- return [];
2382
- }
2383
- }
2384
- /**
2385
- * Get storage quota information
2386
- *
2387
- * Uses navigator.storage.estimate() to get quota details.
2388
- * Returns null if the API is unavailable.
2389
- *
2390
- * @returns Quota info or null if unavailable
2391
- *
2392
- * @example
2393
- * ```typescript
2394
- * const cache = getModelCache();
2395
- * const quota = await cache.getQuotaInfo();
2396
- * if (quota) {
2397
- * console.log(`Using ${quota.percentUsed.toFixed(1)}% of quota`);
2398
- * }
2399
- * ```
2400
- */
2401
- async getQuotaInfo() {
2402
- if (!navigator?.storage?.estimate) {
2403
- return null;
2404
- }
2405
- try {
2406
- const estimate = await navigator.storage.estimate();
2407
- const usedBytes = estimate.usage || 0;
2408
- const quotaBytes = estimate.quota || 0;
2409
- const percentUsed = quotaBytes > 0 ? usedBytes / quotaBytes * 100 : 0;
2410
- const stats = await this.getStats();
2411
- return {
2412
- usedBytes,
2413
- quotaBytes,
2414
- percentUsed,
2415
- cacheBytes: stats.totalSize
2416
- };
2417
- } catch {
2418
- return null;
2419
- }
2420
- }
2421
- };
2422
- var cacheInstance = null;
2423
- function getModelCache() {
2424
- if (!cacheInstance) {
2425
- cacheInstance = new ModelCache();
2426
- }
2427
- return cacheInstance;
2428
- }
2429
- var MAX_CACHE_SIZE_BYTES = 500 * 1024 * 1024;
2430
- async function fetchWithCache(url, optionsOrProgress) {
2431
- let options = {};
2432
- if (typeof optionsOrProgress === "function") {
2433
- options = { onProgress: optionsOrProgress };
2434
- } else if (optionsOrProgress) {
2435
- options = optionsOrProgress;
2436
- }
2437
- const { version, validateStale = false, onProgress } = options;
2438
- const cache = getModelCache();
2439
- const cacheKey = version ? getCacheKey(url, version) : url;
2440
- const telemetry = getTelemetry();
2441
- const span = telemetry?.startSpan("fetchWithCache", {
2442
- "fetch.url": url,
2443
- ...version && { "fetch.version": version },
2444
- "fetch.validate_stale": validateStale
2445
- });
2446
- if (validateStale) {
2447
- const validation = await cache.getWithValidation(cacheKey, url);
2448
- if (validation.data && !validation.stale) {
2449
- console.log(`[ModelCache] Cache hit (validated): ${url} (${(validation.data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2450
- onProgress?.(validation.data.byteLength, validation.data.byteLength);
2451
- span?.setAttributes({
2452
- "fetch.cache_hit": true,
2453
- "fetch.cache_validated": true,
2454
- "fetch.cache_stale": false,
2455
- "fetch.size_bytes": validation.data.byteLength
2456
- });
2457
- span?.end();
2458
- return validation.data;
2459
- }
2460
- if (validation.stale) {
2461
- console.log(`[ModelCache] Cache stale, refetching: ${url}`);
2462
- span?.setAttributes({
2463
- "fetch.cache_hit": true,
2464
- "fetch.cache_validated": true,
2465
- "fetch.cache_stale": true
2466
- });
2467
- }
2468
- } else {
2469
- const cached = await cache.get(cacheKey);
2470
- if (cached) {
2471
- console.log(`[ModelCache] Cache hit: ${url} (${(cached.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2472
- onProgress?.(cached.byteLength, cached.byteLength);
2473
- span?.setAttributes({
2474
- "fetch.cache_hit": true,
2475
- "fetch.size_bytes": cached.byteLength
2476
- });
2477
- span?.end();
2478
- return cached;
2479
- }
2480
- }
2481
- span?.setAttributes({ "fetch.cache_hit": false });
2482
- console.log(`[ModelCache] Cache miss, fetching: ${url}`);
2483
- try {
2484
- const response = await fetch(url);
2485
- if (!response.ok) {
2486
- throw new Error(`Failed to fetch ${url}: ${response.status}`);
2487
- }
2488
- const contentLength = response.headers.get("content-length");
2489
- const total = contentLength ? parseInt(contentLength, 10) : 0;
2490
- const etag = response.headers.get("etag") ?? void 0;
2491
- const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
2492
- if (tooLargeForCache) {
2493
- console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
2494
- }
2495
- if (!response.body) {
2496
- const data2 = await response.arrayBuffer();
2497
- if (!tooLargeForCache) {
2498
- await cache.set(cacheKey, data2, etag, version);
2499
- }
2500
- span?.setAttributes({
2501
- "fetch.size_bytes": data2.byteLength,
2502
- "fetch.cached_to_indexeddb": !tooLargeForCache
2503
- });
2504
- span?.end();
2505
- return data2;
2506
- }
2507
- const reader = response.body.getReader();
2508
- const chunks = [];
2509
- let loaded = 0;
2510
- while (true) {
2511
- const { done, value } = await reader.read();
2512
- if (done) break;
2513
- chunks.push(value);
2514
- loaded += value.length;
2515
- onProgress?.(loaded, total || loaded);
2516
- }
2517
- const data = new Uint8Array(loaded);
2518
- let offset = 0;
2519
- for (const chunk of chunks) {
2520
- data.set(chunk, offset);
2521
- offset += chunk.length;
2522
- }
2523
- const buffer = data.buffer;
2524
- if (!tooLargeForCache) {
2525
- await cache.set(cacheKey, buffer, etag, version);
2526
- console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2527
- }
2528
- span?.setAttributes({
2529
- "fetch.size_bytes": buffer.byteLength,
2530
- "fetch.cached_to_indexeddb": !tooLargeForCache
2531
- });
2532
- span?.end();
2533
- return buffer;
2534
- } catch (error) {
2535
- span?.endWithError(error instanceof Error ? error : new Error(String(error)));
2536
- throw error;
2537
- }
2538
- }
2539
- async function preloadModels(urls, onProgress) {
2540
- const cache = getModelCache();
2541
- for (let i = 0; i < urls.length; i++) {
2542
- const url = urls[i];
2543
- onProgress?.(i, urls.length, url);
2544
- if (await cache.has(url)) {
2545
- console.log(`[ModelCache] Already cached: ${url}`);
2546
- continue;
2547
- }
2548
- await fetchWithCache(url);
2549
- }
2550
- onProgress?.(urls.length, urls.length, "done");
2551
- }
2552
- function formatBytes(bytes) {
2553
- if (bytes < 1024) return `${bytes} B`;
2554
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
2555
- if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
2556
- return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
2557
- }
2558
-
2559
- // src/logging/types.ts
2560
- var LOG_LEVEL_PRIORITY = {
2561
- error: 0,
2562
- warn: 1,
2563
- info: 2,
2564
- debug: 3,
2565
- trace: 4,
2566
- verbose: 5
2567
- };
2568
- var DEFAULT_LOGGING_CONFIG = {
2569
- level: "info",
2570
- enabled: true,
2571
- format: "pretty",
2572
- timestamps: true,
2573
- includeModule: true
2574
- };
2575
-
2576
- // src/logging/formatters.ts
2577
- var COLORS = {
2578
- reset: "\x1B[0m",
2579
- red: "\x1B[31m",
2580
- yellow: "\x1B[33m",
2581
- blue: "\x1B[34m",
2582
- cyan: "\x1B[36m",
2583
- gray: "\x1B[90m",
2584
- white: "\x1B[37m",
2585
- magenta: "\x1B[35m"
2586
- };
2587
- var LEVEL_COLORS = {
2588
- error: COLORS.red,
2589
- warn: COLORS.yellow,
2590
- info: COLORS.blue,
2591
- debug: COLORS.cyan,
2592
- trace: COLORS.magenta,
2593
- verbose: COLORS.gray
2594
- };
2595
- var LEVEL_NAMES = {
2596
- error: "ERROR ",
2597
- warn: "WARN ",
2598
- info: "INFO ",
2599
- debug: "DEBUG ",
2600
- trace: "TRACE ",
2601
- verbose: "VERBOSE"
2602
- };
2603
- var isBrowser = typeof window !== "undefined";
2604
- function formatTimestamp(timestamp) {
2605
- const date = new Date(timestamp);
2606
- return date.toISOString().substring(11, 23);
2607
- }
2608
- function safeStringify(data) {
2609
- const seen = /* @__PURE__ */ new WeakSet();
2610
- return JSON.stringify(data, (key, value) => {
2611
- if (typeof value === "object" && value !== null) {
2612
- if (seen.has(value)) {
2613
- return "[Circular]";
2614
- }
2615
- seen.add(value);
2616
- }
2617
- if (value instanceof Error) {
2618
- return {
2619
- name: value.name,
2620
- message: value.message,
2621
- stack: value.stack
2622
- };
2623
- }
2624
- if (value instanceof Float32Array || value instanceof Int16Array) {
2625
- return `${value.constructor.name}(${value.length})`;
2626
- }
2627
- if (ArrayBuffer.isView(value)) {
2628
- return `${value.constructor.name}(${value.byteLength})`;
2629
- }
2630
- return value;
2631
- });
2632
- }
2633
- var jsonFormatter = (entry) => {
2634
- const output = {
2635
- timestamp: entry.timestamp,
2636
- level: entry.level,
2637
- module: entry.module,
2638
- message: entry.message
2639
- };
2640
- if (entry.data && Object.keys(entry.data).length > 0) {
2641
- output.data = entry.data;
2642
- }
2643
- if (entry.error) {
2644
- output.error = {
2645
- name: entry.error.name,
2646
- message: entry.error.message,
2647
- stack: entry.error.stack
2648
- };
2649
- }
2650
- return safeStringify(output);
2651
- };
2652
- var prettyFormatter = (entry) => {
2653
- const time = formatTimestamp(entry.timestamp);
2654
- const level = LEVEL_NAMES[entry.level];
2655
- const module2 = entry.module;
2656
- const message = entry.message;
2657
- let output;
2658
- if (isBrowser) {
2659
- output = `${time} ${level} [${module2}] ${message}`;
2660
- } else {
2661
- const color = LEVEL_COLORS[entry.level];
2662
- output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
2663
- }
2664
- if (entry.data && Object.keys(entry.data).length > 0) {
2665
- const dataStr = safeStringify(entry.data);
2666
- if (dataStr.length > 80) {
2667
- output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
2668
- } else {
2669
- output += " " + dataStr;
2670
- }
2671
- }
2672
- if (entry.error) {
2673
- output += `
2674
- ${entry.error.name}: ${entry.error.message}`;
2675
- if (entry.error.stack) {
2676
- const stackLines = entry.error.stack.split("\n").slice(1, 4);
2677
- output += "\n " + stackLines.join("\n ");
2678
- }
2679
- }
2680
- return output;
2681
- };
2682
- function getFormatter(format) {
2683
- return format === "json" ? jsonFormatter : prettyFormatter;
2684
- }
2685
- function createBrowserConsoleArgs(entry) {
2686
- const time = formatTimestamp(entry.timestamp);
2687
- const level = entry.level.toUpperCase().padEnd(7);
2688
- const module2 = entry.module;
2689
- const message = entry.message;
2690
- const styles = {
2691
- time: "color: gray;",
2692
- error: "color: red; font-weight: bold;",
2693
- warn: "color: orange; font-weight: bold;",
2694
- info: "color: blue;",
2695
- debug: "color: cyan;",
2696
- trace: "color: magenta;",
2697
- verbose: "color: gray;",
2698
- module: "color: teal; font-weight: bold;",
2699
- message: "color: inherit;"
2700
- };
2701
- let formatStr = "%c%s %c%s %c[%s]%c %s";
2702
- const args = [
2703
- styles.time,
2704
- time,
2705
- styles[entry.level],
2706
- level,
2707
- styles.module,
2708
- module2,
2709
- styles.message,
2710
- message
2711
- ];
2712
- if (entry.data && Object.keys(entry.data).length > 0) {
2713
- formatStr += " %o";
2714
- args.push(entry.data);
2715
- }
2716
- return [formatStr, ...args];
2717
- }
2718
-
2719
- // src/logging/Logger.ts
2720
- var isBrowser2 = typeof window !== "undefined";
2721
- var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
2722
- function configureLogging(config) {
2723
- globalConfig = { ...globalConfig, ...config };
2724
- }
2725
- function getLoggingConfig() {
2726
- return { ...globalConfig };
2727
- }
2728
- function resetLoggingConfig() {
2729
- globalConfig = { ...DEFAULT_LOGGING_CONFIG };
2730
- }
2731
- function setLogLevel(level) {
2732
- globalConfig.level = level;
2733
- }
2734
- function setLoggingEnabled(enabled) {
2735
- globalConfig.enabled = enabled;
2736
- }
2737
- var consoleSink = (entry) => {
2738
- const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
2739
- if (globalConfig.format === "pretty" && isBrowser2) {
2740
- const args = createBrowserConsoleArgs(entry);
2741
- console[consoleMethod](...args);
2742
- } else {
2743
- const formatter = getFormatter(globalConfig.format);
2744
- const formatted = formatter(entry);
2745
- console[consoleMethod](formatted);
2746
- }
2747
- };
2748
- function getActiveSink() {
2749
- return globalConfig.sink || consoleSink;
2750
- }
2751
- function shouldLog(level) {
2752
- if (!globalConfig.enabled) return false;
2753
- return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
2754
- }
2755
- var Logger = class _Logger {
2756
- constructor(module2) {
2757
- this.module = module2;
2758
- }
2759
- log(level, message, data) {
2760
- if (!shouldLog(level)) return;
2761
- const entry = {
2762
- timestamp: Date.now(),
2763
- level,
2764
- module: this.module,
2765
- message,
2766
- data
2767
- };
2768
- if (data?.error instanceof Error) {
2769
- entry.error = data.error;
2770
- const { error, ...rest } = data;
2771
- entry.data = Object.keys(rest).length > 0 ? rest : void 0;
2159
+ freedBytes += model.size;
2160
+ console.log(`[ModelCache] Evicted: ${model.url} (${formatBytes(model.size)})`);
2161
+ }
2162
+ span?.setAttributes({
2163
+ "eviction.bytes_freed": freedBytes,
2164
+ "eviction.models_evicted": evictedUrls.length
2165
+ });
2166
+ span?.end();
2167
+ if (freedBytes > 0) {
2168
+ telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
2169
+ bytes_freed: String(freedBytes)
2170
+ });
2171
+ }
2172
+ return evictedUrls;
2173
+ } catch (err) {
2174
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
2175
+ console.warn("[ModelCache] Eviction failed:", err);
2176
+ return [];
2772
2177
  }
2773
- getActiveSink()(entry);
2774
- }
2775
- error(message, data) {
2776
- this.log("error", message, data);
2777
- }
2778
- warn(message, data) {
2779
- this.log("warn", message, data);
2780
2178
  }
2781
- info(message, data) {
2782
- this.log("info", message, data);
2179
+ /**
2180
+ * Get storage quota information
2181
+ *
2182
+ * Uses navigator.storage.estimate() to get quota details.
2183
+ * Returns null if the API is unavailable.
2184
+ *
2185
+ * @returns Quota info or null if unavailable
2186
+ *
2187
+ * @example
2188
+ * ```typescript
2189
+ * const cache = getModelCache();
2190
+ * const quota = await cache.getQuotaInfo();
2191
+ * if (quota) {
2192
+ * console.log(`Using ${quota.percentUsed.toFixed(1)}% of quota`);
2193
+ * }
2194
+ * ```
2195
+ */
2196
+ async getQuotaInfo() {
2197
+ if (!navigator?.storage?.estimate) {
2198
+ return null;
2199
+ }
2200
+ try {
2201
+ const estimate = await navigator.storage.estimate();
2202
+ const usedBytes = estimate.usage || 0;
2203
+ const quotaBytes = estimate.quota || 0;
2204
+ const percentUsed = quotaBytes > 0 ? usedBytes / quotaBytes * 100 : 0;
2205
+ const stats = await this.getStats();
2206
+ return {
2207
+ usedBytes,
2208
+ quotaBytes,
2209
+ percentUsed,
2210
+ cacheBytes: stats.totalSize
2211
+ };
2212
+ } catch {
2213
+ return null;
2214
+ }
2783
2215
  }
2784
- debug(message, data) {
2785
- this.log("debug", message, data);
2216
+ };
2217
+ var cacheInstance = null;
2218
+ function getModelCache() {
2219
+ if (!cacheInstance) {
2220
+ cacheInstance = new ModelCache();
2786
2221
  }
2787
- trace(message, data) {
2788
- this.log("trace", message, data);
2222
+ return cacheInstance;
2223
+ }
2224
+ var MAX_CACHE_SIZE_BYTES = 500 * 1024 * 1024;
2225
+ async function fetchWithCache(url, optionsOrProgress) {
2226
+ let options = {};
2227
+ if (typeof optionsOrProgress === "function") {
2228
+ options = { onProgress: optionsOrProgress };
2229
+ } else if (optionsOrProgress) {
2230
+ options = optionsOrProgress;
2789
2231
  }
2790
- verbose(message, data) {
2791
- this.log("verbose", message, data);
2232
+ const { version, validateStale = false, onProgress } = options;
2233
+ const cache = getModelCache();
2234
+ const cacheKey = version ? getCacheKey(url, version) : url;
2235
+ const telemetry = getTelemetry();
2236
+ const span = telemetry?.startSpan("fetchWithCache", {
2237
+ "fetch.url": url,
2238
+ ...version && { "fetch.version": version },
2239
+ "fetch.validate_stale": validateStale
2240
+ });
2241
+ if (validateStale) {
2242
+ const validation = await cache.getWithValidation(cacheKey, url);
2243
+ if (validation.data && !validation.stale) {
2244
+ console.log(`[ModelCache] Cache hit (validated): ${url} (${(validation.data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2245
+ onProgress?.(validation.data.byteLength, validation.data.byteLength);
2246
+ span?.setAttributes({
2247
+ "fetch.cache_hit": true,
2248
+ "fetch.cache_validated": true,
2249
+ "fetch.cache_stale": false,
2250
+ "fetch.size_bytes": validation.data.byteLength
2251
+ });
2252
+ span?.end();
2253
+ return validation.data;
2254
+ }
2255
+ if (validation.stale) {
2256
+ console.log(`[ModelCache] Cache stale, refetching: ${url}`);
2257
+ span?.setAttributes({
2258
+ "fetch.cache_hit": true,
2259
+ "fetch.cache_validated": true,
2260
+ "fetch.cache_stale": true
2261
+ });
2262
+ }
2263
+ } else {
2264
+ const cached = await cache.get(cacheKey);
2265
+ if (cached) {
2266
+ console.log(`[ModelCache] Cache hit: ${url} (${(cached.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2267
+ onProgress?.(cached.byteLength, cached.byteLength);
2268
+ span?.setAttributes({
2269
+ "fetch.cache_hit": true,
2270
+ "fetch.size_bytes": cached.byteLength
2271
+ });
2272
+ span?.end();
2273
+ return cached;
2274
+ }
2792
2275
  }
2793
- child(subModule) {
2794
- return new _Logger(`${this.module}.${subModule}`);
2276
+ span?.setAttributes({ "fetch.cache_hit": false });
2277
+ console.log(`[ModelCache] Cache miss, fetching: ${url}`);
2278
+ try {
2279
+ const response = await fetch(url);
2280
+ if (!response.ok) {
2281
+ throw new Error(`Failed to fetch ${url}: ${response.status}`);
2282
+ }
2283
+ const contentLength = response.headers.get("content-length");
2284
+ const total = contentLength ? parseInt(contentLength, 10) : 0;
2285
+ const etag = response.headers.get("etag") ?? void 0;
2286
+ const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
2287
+ if (tooLargeForCache) {
2288
+ console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
2289
+ }
2290
+ if (!response.body) {
2291
+ const data2 = await response.arrayBuffer();
2292
+ if (!tooLargeForCache) {
2293
+ await cache.set(cacheKey, data2, etag, version);
2294
+ }
2295
+ span?.setAttributes({
2296
+ "fetch.size_bytes": data2.byteLength,
2297
+ "fetch.cached_to_indexeddb": !tooLargeForCache
2298
+ });
2299
+ span?.end();
2300
+ return data2;
2301
+ }
2302
+ const reader = response.body.getReader();
2303
+ const chunks = [];
2304
+ let loaded = 0;
2305
+ while (true) {
2306
+ const { done, value } = await reader.read();
2307
+ if (done) break;
2308
+ chunks.push(value);
2309
+ loaded += value.length;
2310
+ onProgress?.(loaded, total || loaded);
2311
+ }
2312
+ const data = new Uint8Array(loaded);
2313
+ let offset = 0;
2314
+ for (const chunk of chunks) {
2315
+ data.set(chunk, offset);
2316
+ offset += chunk.length;
2317
+ }
2318
+ const buffer = data.buffer;
2319
+ if (!tooLargeForCache) {
2320
+ await cache.set(cacheKey, buffer, etag, version);
2321
+ console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
2322
+ }
2323
+ span?.setAttributes({
2324
+ "fetch.size_bytes": buffer.byteLength,
2325
+ "fetch.cached_to_indexeddb": !tooLargeForCache
2326
+ });
2327
+ span?.end();
2328
+ return buffer;
2329
+ } catch (error) {
2330
+ span?.endWithError(error instanceof Error ? error : new Error(String(error)));
2331
+ throw error;
2795
2332
  }
2796
- };
2797
- var loggerCache = /* @__PURE__ */ new Map();
2798
- function createLogger(module2) {
2799
- let logger15 = loggerCache.get(module2);
2800
- if (!logger15) {
2801
- logger15 = new Logger(module2);
2802
- loggerCache.set(module2, logger15);
2333
+ }
2334
+ async function preloadModels(urls, onProgress) {
2335
+ const cache = getModelCache();
2336
+ for (let i = 0; i < urls.length; i++) {
2337
+ const url = urls[i];
2338
+ onProgress?.(i, urls.length, url);
2339
+ if (await cache.has(url)) {
2340
+ console.log(`[ModelCache] Already cached: ${url}`);
2341
+ continue;
2342
+ }
2343
+ await fetchWithCache(url);
2803
2344
  }
2804
- return logger15;
2345
+ onProgress?.(urls.length, urls.length, "done");
2346
+ }
2347
+ function formatBytes(bytes) {
2348
+ if (bytes < 1024) return `${bytes} B`;
2349
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
2350
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
2351
+ return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
2805
2352
  }
2806
- var noopLogger = {
2807
- module: "noop",
2808
- error: () => {
2809
- },
2810
- warn: () => {
2811
- },
2812
- info: () => {
2813
- },
2814
- debug: () => {
2815
- },
2816
- trace: () => {
2817
- },
2818
- verbose: () => {
2819
- },
2820
- child: () => noopLogger
2821
- };
2822
2353
 
2823
2354
  // src/utils/runtime.ts
2824
2355
  function isIOSSafari() {
@@ -2889,7 +2420,7 @@ function isSafari() {
2889
2420
  const ua = navigator.userAgent.toLowerCase();
2890
2421
  return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
2891
2422
  }
2892
- function shouldUseCpuLipSync() {
2423
+ function shouldUseCpuA2E() {
2893
2424
  return isSafari() || isIOS();
2894
2425
  }
2895
2426
  function isSpeechRecognitionAvailable() {
@@ -2899,22 +2430,22 @@ function isSpeechRecognitionAvailable() {
2899
2430
  function shouldUseNativeASR() {
2900
2431
  return (isIOS() || isSafari()) && isSpeechRecognitionAvailable();
2901
2432
  }
2902
- function shouldUseServerLipSync() {
2433
+ function shouldUseServerA2E() {
2903
2434
  return isIOS();
2904
2435
  }
2905
2436
 
2906
2437
  // src/inference/onnxLoader.ts
2907
- var logger = createLogger("OnnxLoader");
2438
+ var logger2 = createLogger("OnnxLoader");
2908
2439
  var ortInstance = null;
2909
2440
  var loadedBackend = null;
2910
2441
  var WASM_CDN_PATH = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
2911
2442
  async function isWebGPUAvailable() {
2912
2443
  if (isIOS()) {
2913
- logger.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
2444
+ logger2.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
2914
2445
  return false;
2915
2446
  }
2916
2447
  if (!hasWebGPUApi()) {
2917
- logger.debug("WebGPU check: navigator.gpu not available", {
2448
+ logger2.debug("WebGPU check: navigator.gpu not available", {
2918
2449
  isSecureContext: typeof window !== "undefined" ? window.isSecureContext : "N/A"
2919
2450
  });
2920
2451
  return false;
@@ -2922,19 +2453,19 @@ async function isWebGPUAvailable() {
2922
2453
  try {
2923
2454
  const adapter = await navigator.gpu.requestAdapter();
2924
2455
  if (!adapter) {
2925
- logger.debug("WebGPU check: No adapter available");
2456
+ logger2.debug("WebGPU check: No adapter available");
2926
2457
  return false;
2927
2458
  }
2928
2459
  const device = await adapter.requestDevice();
2929
2460
  if (!device) {
2930
- logger.debug("WebGPU check: Could not create device");
2461
+ logger2.debug("WebGPU check: Could not create device");
2931
2462
  return false;
2932
2463
  }
2933
2464
  device.destroy();
2934
- logger.debug("WebGPU check: Available and working");
2465
+ logger2.debug("WebGPU check: Available and working");
2935
2466
  return true;
2936
2467
  } catch (err) {
2937
- logger.debug("WebGPU check: Error during availability check", { error: err });
2468
+ logger2.debug("WebGPU check: Error during availability check", { error: err });
2938
2469
  return false;
2939
2470
  }
2940
2471
  }
@@ -2944,11 +2475,11 @@ function applyIOSWasmMemoryPatch() {
2944
2475
  iosWasmPatched = true;
2945
2476
  const OrigMemory = WebAssembly.Memory;
2946
2477
  const MAX_IOS_PAGES = 32768;
2947
- logger.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
2478
+ logger2.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
2948
2479
  WebAssembly.Memory = function IOSPatchedMemory(descriptor) {
2949
2480
  const patched = { ...descriptor };
2950
2481
  if (patched.maximum !== void 0 && patched.maximum > MAX_IOS_PAGES) {
2951
- logger.info("iOS memory patch: capping maximum", {
2482
+ logger2.info("iOS memory patch: capping maximum", {
2952
2483
  original: patched.maximum,
2953
2484
  capped: MAX_IOS_PAGES,
2954
2485
  shared: patched.shared,
@@ -2967,7 +2498,7 @@ function configureWasm(ort) {
2967
2498
  ort.env.wasm.numThreads = numThreads;
2968
2499
  ort.env.wasm.simd = true;
2969
2500
  ort.env.wasm.proxy = enableProxy;
2970
- logger.info("WASM configured", {
2501
+ logger2.info("WASM configured", {
2971
2502
  numThreads,
2972
2503
  simd: true,
2973
2504
  proxy: enableProxy,
@@ -2979,12 +2510,12 @@ async function getOnnxRuntime(backend) {
2979
2510
  return ortInstance;
2980
2511
  }
2981
2512
  if (ortInstance && loadedBackend !== backend) {
2982
- logger.warn(
2513
+ logger2.warn(
2983
2514
  `ONNX Runtime already loaded with ${loadedBackend} backend. Cannot switch to ${backend}. Returning existing instance.`
2984
2515
  );
2985
2516
  return ortInstance;
2986
2517
  }
2987
- logger.info(`Loading ONNX Runtime with ${backend} backend...`);
2518
+ logger2.info(`Loading ONNX Runtime with ${backend} backend...`);
2988
2519
  applyIOSWasmMemoryPatch();
2989
2520
  try {
2990
2521
  if (backend === "wasm" && (isIOS() || isSafari())) {
@@ -2999,10 +2530,10 @@ async function getOnnxRuntime(backend) {
2999
2530
  }
3000
2531
  loadedBackend = backend;
3001
2532
  configureWasm(ortInstance);
3002
- logger.info(`ONNX Runtime loaded successfully`, { backend });
2533
+ logger2.info(`ONNX Runtime loaded successfully`, { backend });
3003
2534
  return ortInstance;
3004
2535
  } catch (err) {
3005
- logger.error(`Failed to load ONNX Runtime with ${backend} backend`, {
2536
+ logger2.error(`Failed to load ONNX Runtime with ${backend} backend`, {
3006
2537
  error: err
3007
2538
  });
3008
2539
  throw new Error(
@@ -3013,7 +2544,7 @@ async function getOnnxRuntime(backend) {
3013
2544
  async function getOnnxRuntimeForPreference(preference = "auto") {
3014
2545
  const webgpuAvailable = await isWebGPUAvailable();
3015
2546
  const backend = resolveBackend(preference, webgpuAvailable);
3016
- logger.info("Resolved backend preference", {
2547
+ logger2.info("Resolved backend preference", {
3017
2548
  preference,
3018
2549
  webgpuAvailable,
3019
2550
  resolvedBackend: backend
@@ -3047,42 +2578,6 @@ function getSessionOptions(backend) {
3047
2578
  graphOptimizationLevel: "all"
3048
2579
  };
3049
2580
  }
3050
- async function createSessionWithFallback(modelBuffer, preferredBackend) {
3051
- const ort = await getOnnxRuntime(preferredBackend);
3052
- const modelData = new Uint8Array(modelBuffer);
3053
- if (preferredBackend === "webgpu") {
3054
- try {
3055
- const options2 = getSessionOptions("webgpu");
3056
- const session2 = await ort.InferenceSession.create(modelData, options2);
3057
- logger.info("Session created with WebGPU backend");
3058
- return { session: session2, backend: "webgpu" };
3059
- } catch (err) {
3060
- logger.warn("WebGPU session creation failed, falling back to WASM", {
3061
- error: err instanceof Error ? err.message : String(err)
3062
- });
3063
- }
3064
- }
3065
- const options = getSessionOptions("wasm");
3066
- const session = await ort.InferenceSession.create(modelData, options);
3067
- logger.info("Session created with WASM backend");
3068
- return { session, backend: "wasm" };
3069
- }
3070
- function getLoadedBackend() {
3071
- return loadedBackend;
3072
- }
3073
- function isOnnxRuntimeLoaded() {
3074
- return ortInstance !== null;
3075
- }
3076
- async function preloadOnnxRuntime(preference = "auto") {
3077
- if (ortInstance) {
3078
- logger.info("ONNX Runtime already preloaded", { backend: loadedBackend });
3079
- return loadedBackend;
3080
- }
3081
- logger.info("Preloading ONNX Runtime...", { preference });
3082
- const { backend } = await getOnnxRuntimeForPreference(preference);
3083
- logger.info("ONNX Runtime preloaded", { backend });
3084
- return backend;
3085
- }
3086
2581
 
3087
2582
  // src/inference/blendshapeUtils.ts
3088
2583
  var LAM_BLENDSHAPES = [
@@ -3232,16 +2727,19 @@ var WAV2ARKIT_BLENDSHAPES = [
3232
2727
  var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
3233
2728
  (name) => LAM_BLENDSHAPES.indexOf(name)
3234
2729
  );
3235
- function remapWav2ArkitToLam(frame) {
3236
- const result = new Float32Array(52);
3237
- for (let i = 0; i < 52; i++) {
3238
- result[REMAP_WAV2ARKIT_TO_LAM[i]] = frame[i];
2730
+ function lerpBlendshapes(current, target, factor = 0.3) {
2731
+ const len = Math.max(current.length, target.length);
2732
+ const result = new Array(len);
2733
+ for (let i = 0; i < len; i++) {
2734
+ const c = current[i] ?? 0;
2735
+ const t = target[i] ?? 0;
2736
+ result[i] = c + (t - c) * factor;
3239
2737
  }
3240
2738
  return result;
3241
2739
  }
3242
2740
 
3243
2741
  // src/inference/Wav2Vec2Inference.ts
3244
- var logger2 = createLogger("Wav2Vec2");
2742
+ var logger3 = createLogger("Wav2Vec2");
3245
2743
  var CTC_VOCAB = [
3246
2744
  "<pad>",
3247
2745
  "<s>",
@@ -3291,6 +2789,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3291
2789
  this.poisoned = false;
3292
2790
  this.config = config;
3293
2791
  this.numIdentityClasses = config.numIdentityClasses ?? 12;
2792
+ this.chunkSize = config.chunkSize ?? 16e3;
3294
2793
  }
3295
2794
  get backend() {
3296
2795
  return this.session ? this._backend : null;
@@ -3320,30 +2819,30 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3320
2819
  "model.backend_requested": this.config.backend || "auto"
3321
2820
  });
3322
2821
  try {
3323
- logger2.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
2822
+ logger3.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
3324
2823
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
3325
2824
  this.ort = ort;
3326
2825
  this._backend = backend;
3327
- logger2.info("ONNX Runtime loaded", { backend: this._backend });
2826
+ logger3.info("ONNX Runtime loaded", { backend: this._backend });
3328
2827
  const modelUrl = this.config.modelUrl;
3329
2828
  const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
3330
2829
  const sessionOptions = getSessionOptions(this._backend);
3331
2830
  let isCached = false;
3332
2831
  if (isIOS()) {
3333
- logger2.info("iOS: passing model URLs directly to ORT (low-memory path)", {
2832
+ logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
3334
2833
  modelUrl,
3335
2834
  dataUrl
3336
2835
  });
3337
2836
  if (dataUrl) {
3338
2837
  const dataFilename = dataUrl.split("/").pop();
3339
- logger2.info("iOS: setting externalData", { dataFilename, dataUrl });
2838
+ logger3.info("iOS: setting externalData", { dataFilename, dataUrl });
3340
2839
  sessionOptions.externalData = [{
3341
2840
  path: dataFilename,
3342
2841
  data: dataUrl
3343
2842
  // URL string — ORT fetches directly into WASM
3344
2843
  }];
3345
2844
  }
3346
- logger2.info("iOS: calling InferenceSession.create() with URL string", {
2845
+ logger3.info("iOS: calling InferenceSession.create() with URL string", {
3347
2846
  modelUrl,
3348
2847
  sessionOptions: JSON.stringify(
3349
2848
  sessionOptions,
@@ -3353,14 +2852,14 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3353
2852
  try {
3354
2853
  this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
3355
2854
  } catch (sessionErr) {
3356
- logger2.error("iOS: InferenceSession.create() failed", {
2855
+ logger3.error("iOS: InferenceSession.create() failed", {
3357
2856
  error: sessionErr instanceof Error ? sessionErr.message : String(sessionErr),
3358
2857
  errorType: sessionErr?.constructor?.name,
3359
2858
  stack: sessionErr instanceof Error ? sessionErr.stack : void 0
3360
2859
  });
3361
2860
  throw sessionErr;
3362
2861
  }
3363
- logger2.info("iOS: session created successfully", {
2862
+ logger3.info("iOS: session created successfully", {
3364
2863
  inputNames: this.session.inputNames,
3365
2864
  outputNames: this.session.outputNames
3366
2865
  });
@@ -3369,15 +2868,15 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3369
2868
  isCached = await cache.has(modelUrl);
3370
2869
  let modelBuffer;
3371
2870
  if (isCached) {
3372
- logger2.debug("Loading model from cache", { modelUrl });
2871
+ logger3.debug("Loading model from cache", { modelUrl });
3373
2872
  modelBuffer = await cache.get(modelUrl);
3374
2873
  if (!modelBuffer) {
3375
- logger2.warn("Cache corruption detected, clearing and retrying", { modelUrl });
2874
+ logger3.warn("Cache corruption detected, clearing and retrying", { modelUrl });
3376
2875
  await cache.delete(modelUrl);
3377
2876
  modelBuffer = await fetchWithCache(modelUrl);
3378
2877
  }
3379
2878
  } else {
3380
- logger2.debug("Fetching and caching model", { modelUrl });
2879
+ logger3.debug("Fetching and caching model", { modelUrl });
3381
2880
  modelBuffer = await fetchWithCache(modelUrl);
3382
2881
  }
3383
2882
  if (!modelBuffer) {
@@ -3388,31 +2887,31 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3388
2887
  try {
3389
2888
  const isDataCached = await cache.has(dataUrl);
3390
2889
  if (isDataCached) {
3391
- logger2.debug("Loading external data from cache", { dataUrl });
2890
+ logger3.debug("Loading external data from cache", { dataUrl });
3392
2891
  externalDataBuffer = await cache.get(dataUrl);
3393
2892
  if (!externalDataBuffer) {
3394
- logger2.warn("Cache corruption for external data, retrying", { dataUrl });
2893
+ logger3.warn("Cache corruption for external data, retrying", { dataUrl });
3395
2894
  await cache.delete(dataUrl);
3396
2895
  externalDataBuffer = await fetchWithCache(dataUrl);
3397
2896
  }
3398
2897
  } else {
3399
- logger2.info("Fetching external model data", {
2898
+ logger3.info("Fetching external model data", {
3400
2899
  dataUrl,
3401
2900
  note: "This may be a large download (383MB+)"
3402
2901
  });
3403
2902
  externalDataBuffer = await fetchWithCache(dataUrl);
3404
2903
  }
3405
- logger2.info("External data loaded", {
2904
+ logger3.info("External data loaded", {
3406
2905
  size: formatBytes(externalDataBuffer.byteLength)
3407
2906
  });
3408
2907
  } catch (err) {
3409
- logger2.debug("No external data file found (single-file model)", {
2908
+ logger3.debug("No external data file found (single-file model)", {
3410
2909
  dataUrl,
3411
2910
  error: err.message
3412
2911
  });
3413
2912
  }
3414
2913
  }
3415
- logger2.debug("Creating ONNX session", {
2914
+ logger3.debug("Creating ONNX session", {
3416
2915
  graphSize: formatBytes(modelBuffer.byteLength),
3417
2916
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
3418
2917
  backend: this._backend
@@ -3427,12 +2926,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3427
2926
  const modelData = new Uint8Array(modelBuffer);
3428
2927
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
3429
2928
  }
3430
- logger2.info("ONNX session created successfully", {
2929
+ logger3.info("ONNX session created successfully", {
3431
2930
  executionProvider: this._backend,
3432
2931
  backend: this._backend
3433
2932
  });
3434
2933
  const loadTimeMs = performance.now() - startTime;
3435
- logger2.info("Model loaded successfully", {
2934
+ logger3.info("Model loaded successfully", {
3436
2935
  backend: this._backend,
3437
2936
  loadTimeMs: Math.round(loadTimeMs),
3438
2937
  inputs: this.session.inputNames,
@@ -3448,13 +2947,13 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3448
2947
  model: "wav2vec2",
3449
2948
  backend: this._backend
3450
2949
  });
3451
- logger2.debug("Running warmup inference to initialize GPU context");
2950
+ logger3.debug("Running warmup inference to initialize GPU context");
3452
2951
  const warmupStart = performance.now();
3453
- const warmupAudio = new Float32Array(16e3);
2952
+ const warmupAudio = new Float32Array(this.chunkSize);
3454
2953
  const warmupIdentity = new Float32Array(this.numIdentityClasses);
3455
2954
  warmupIdentity[0] = 1;
3456
2955
  const warmupFeeds = {
3457
- "audio": new this.ort.Tensor("float32", warmupAudio, [1, 16e3]),
2956
+ "audio": new this.ort.Tensor("float32", warmupAudio, [1, this.chunkSize]),
3458
2957
  "identity": new this.ort.Tensor("float32", warmupIdentity, [1, this.numIdentityClasses])
3459
2958
  };
3460
2959
  const WARMUP_TIMEOUT_MS = 15e3;
@@ -3464,12 +2963,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3464
2963
  ]);
3465
2964
  const warmupTimeMs = performance.now() - warmupStart;
3466
2965
  if (warmupResult === "timeout") {
3467
- logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
2966
+ logger3.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
3468
2967
  timeoutMs: WARMUP_TIMEOUT_MS,
3469
2968
  backend: this._backend
3470
2969
  });
3471
2970
  } else {
3472
- logger2.info("Warmup inference complete", {
2971
+ logger3.info("Warmup inference complete", {
3473
2972
  warmupTimeMs: Math.round(warmupTimeMs),
3474
2973
  backend: this._backend
3475
2974
  });
@@ -3497,11 +2996,10 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3497
2996
  }
3498
2997
  /**
3499
2998
  * Run inference on raw audio
3500
- * @param audioSamples - Float32Array of raw audio at 16kHz (16000 samples = 1 second)
2999
+ * @param audioSamples - Float32Array of raw audio at 16kHz
3501
3000
  * @param identityIndex - Optional identity index (0-11, default 0 = neutral)
3502
3001
  *
3503
- * Note: Model expects 1-second chunks (16000 samples) for optimal performance.
3504
- * Audio will be zero-padded or truncated to 16000 samples.
3002
+ * Audio will be zero-padded or truncated to chunkSize samples.
3505
3003
  */
3506
3004
  async infer(audioSamples, identityIndex = 0) {
3507
3005
  if (!this.session) {
@@ -3512,20 +3010,20 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3512
3010
  }
3513
3011
  const audioSamplesCopy = new Float32Array(audioSamples);
3514
3012
  let audio;
3515
- if (audioSamplesCopy.length === 16e3) {
3013
+ if (audioSamplesCopy.length === this.chunkSize) {
3516
3014
  audio = audioSamplesCopy;
3517
- } else if (audioSamplesCopy.length < 16e3) {
3518
- audio = new Float32Array(16e3);
3015
+ } else if (audioSamplesCopy.length < this.chunkSize) {
3016
+ audio = new Float32Array(this.chunkSize);
3519
3017
  audio.set(audioSamplesCopy, 0);
3520
3018
  } else {
3521
- audio = audioSamplesCopy.slice(0, 16e3);
3019
+ audio = audioSamplesCopy.slice(0, this.chunkSize);
3522
3020
  }
3523
3021
  const identity = new Float32Array(this.numIdentityClasses);
3524
3022
  identity[Math.max(0, Math.min(identityIndex, this.numIdentityClasses - 1))] = 1;
3525
3023
  const audioCopy = new Float32Array(audio);
3526
3024
  const identityCopy = new Float32Array(identity);
3527
3025
  const feeds = {
3528
- "audio": new this.ort.Tensor("float32", audioCopy, [1, 16e3]),
3026
+ "audio": new this.ort.Tensor("float32", audioCopy, [1, this.chunkSize]),
3529
3027
  "identity": new this.ort.Tensor("float32", identityCopy, [1, this.numIdentityClasses])
3530
3028
  };
3531
3029
  return this.queueInference(feeds);
@@ -3561,7 +3059,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3561
3059
  const telemetry = getTelemetry();
3562
3060
  const span = telemetry?.startSpan("Wav2Vec2.infer", {
3563
3061
  "inference.backend": this._backend,
3564
- "inference.input_samples": 16e3
3062
+ "inference.input_samples": this.chunkSize
3565
3063
  });
3566
3064
  try {
3567
3065
  const startTime = performance.now();
@@ -3600,7 +3098,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3600
3098
  blendshapes.push(symmetrizeBlendshapes(rawFrame));
3601
3099
  }
3602
3100
  const text = this.decodeCTC(asrLogits);
3603
- logger2.trace("Inference completed", {
3101
+ logger3.trace("Inference completed", {
3604
3102
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
3605
3103
  numA2EFrames,
3606
3104
  numASRFrames,
@@ -3634,12 +3132,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
3634
3132
  const errMsg = err instanceof Error ? err.message : String(err);
3635
3133
  if (errMsg.includes("timed out")) {
3636
3134
  this.poisoned = true;
3637
- logger2.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
3135
+ logger3.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
3638
3136
  backend: this._backend,
3639
3137
  timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
3640
3138
  });
3641
3139
  } else {
3642
- logger2.error("Inference failed", { error: errMsg, backend: this._backend });
3140
+ logger3.error("Inference failed", { error: errMsg, backend: this._backend });
3643
3141
  }
3644
3142
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
3645
3143
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -3680,56 +3178,74 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
3680
3178
  _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
3681
3179
  var Wav2Vec2Inference = _Wav2Vec2Inference;
3682
3180
 
3181
+ // src/audio/audioUtils.ts
3182
+ function pcm16ToFloat32(buffer) {
3183
+ const byteLen = buffer.byteLength & ~1;
3184
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
3185
+ const float32 = new Float32Array(int16.length);
3186
+ for (let i = 0; i < int16.length; i++) {
3187
+ float32[i] = int16[i] / 32768;
3188
+ }
3189
+ return float32;
3190
+ }
3191
+ function int16ToFloat32(int16) {
3192
+ const float32 = new Float32Array(int16.length);
3193
+ for (let i = 0; i < int16.length; i++) {
3194
+ float32[i] = int16[i] / 32768;
3195
+ }
3196
+ return float32;
3197
+ }
3198
+
3683
3199
  // src/audio/FullFacePipeline.ts
3684
- var logger3 = createLogger("FullFacePipeline");
3685
- var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
3686
- LAM_BLENDSHAPES.forEach((name, index) => {
3687
- BLENDSHAPE_INDEX_MAP.set(name, index);
3688
- });
3689
- var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
3690
- var EMOTION_LABEL_MAP = {
3691
- // Direct labels
3692
- happy: "happy",
3693
- sad: "sad",
3694
- angry: "angry",
3695
- neutral: "neutral",
3696
- // Natural language synonyms
3697
- excited: "happy",
3698
- joyful: "happy",
3699
- cheerful: "happy",
3700
- delighted: "happy",
3701
- amused: "happy",
3702
- melancholic: "sad",
3703
- sorrowful: "sad",
3704
- disappointed: "sad",
3705
- frustrated: "angry",
3706
- irritated: "angry",
3707
- furious: "angry",
3708
- annoyed: "angry",
3709
- // SenseVoice labels
3710
- fearful: "sad",
3711
- disgusted: "angry",
3712
- surprised: "happy"
3713
- };
3714
- var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3200
+ var logger4 = createLogger("FullFacePipeline");
3201
+ var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
3202
+ for (const name of LAM_BLENDSHAPES) {
3203
+ if (name.startsWith("eye")) {
3204
+ BLENDSHAPE_TO_GROUP.set(name, "eyes");
3205
+ } else if (name.startsWith("brow")) {
3206
+ BLENDSHAPE_TO_GROUP.set(name, "brows");
3207
+ } else if (name.startsWith("jaw")) {
3208
+ BLENDSHAPE_TO_GROUP.set(name, "jaw");
3209
+ } else if (name.startsWith("mouth")) {
3210
+ BLENDSHAPE_TO_GROUP.set(name, "mouth");
3211
+ } else if (name.startsWith("cheek")) {
3212
+ BLENDSHAPE_TO_GROUP.set(name, "cheeks");
3213
+ } else if (name.startsWith("nose")) {
3214
+ BLENDSHAPE_TO_GROUP.set(name, "nose");
3215
+ } else if (name.startsWith("tongue")) {
3216
+ BLENDSHAPE_TO_GROUP.set(name, "tongue");
3217
+ }
3218
+ }
3219
+ var FullFacePipeline = class extends EventEmitter {
3715
3220
  constructor(options) {
3716
3221
  super();
3717
3222
  this.options = options;
3718
3223
  this.playbackStarted = false;
3719
3224
  this.monitorInterval = null;
3720
3225
  this.frameAnimationId = null;
3721
- // Emotion state
3722
- this.lastEmotionFrame = null;
3723
- this.currentAudioEnergy = 0;
3724
3226
  // Stale frame detection
3725
3227
  this.lastNewFrameTime = 0;
3726
3228
  this.lastKnownLamFrame = null;
3727
3229
  this.staleWarningEmitted = false;
3230
+ // Diagnostic logging counter
3231
+ this.frameLoopCount = 0;
3728
3232
  const sampleRate = options.sampleRate ?? 16e3;
3729
- this.emotionBlendFactor = options.emotionBlendFactor ?? 0.8;
3730
- this.lamBlendFactor = options.lamBlendFactor ?? 0.2;
3731
- const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
3233
+ this.profile = options.profile ?? {};
3234
+ this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
3235
+ const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
3236
+ const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
3237
+ const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
3238
+ const inferenceEstimateMs = isCpuModel ? 300 : options.lam.backend === "wasm" ? 250 : 80;
3239
+ const marginMs = 100;
3240
+ const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
3732
3241
  const audioDelayMs = options.audioDelayMs ?? autoDelay;
3242
+ logger4.info("FullFacePipeline config", {
3243
+ chunkSize,
3244
+ audioDelayMs,
3245
+ autoDelay,
3246
+ backend: options.lam.backend,
3247
+ modelId: options.lam.modelId
3248
+ });
3733
3249
  this.scheduler = new AudioScheduler({
3734
3250
  sampleRate,
3735
3251
  initialLookaheadSec: audioDelayMs / 1e3
@@ -3738,20 +3254,15 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3738
3254
  sampleRate,
3739
3255
  targetDurationMs: options.chunkTargetMs ?? 200
3740
3256
  });
3741
- this.lamPipeline = new LAMPipeline({
3257
+ this.processor = new A2EProcessor({
3258
+ backend: options.lam,
3742
3259
  sampleRate,
3260
+ chunkSize,
3743
3261
  onError: (error) => {
3744
- logger3.error("LAM inference error", { message: error.message, stack: error.stack });
3262
+ logger4.error("A2E inference error", { message: error.message, stack: error.stack });
3745
3263
  this.emit("error", error);
3746
3264
  }
3747
3265
  });
3748
- this.emotionMapper = new EmotionToBlendshapeMapper({
3749
- smoothingFactor: 0.15,
3750
- confidenceThreshold: 0.3,
3751
- intensity: 1,
3752
- energyModulation: true
3753
- });
3754
- this.energyAnalyzer = new AudioEnergyAnalyzer();
3755
3266
  }
3756
3267
  /**
3757
3268
  * Initialize the pipeline
@@ -3760,40 +3271,33 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3760
3271
  await this.scheduler.initialize();
3761
3272
  }
3762
3273
  /**
3763
- * Set emotion label from backend (e.g., LLM response emotion).
3764
- *
3765
- * Converts a natural language emotion label into an EmotionFrame
3766
- * that drives upper face blendshapes for the duration of the utterance.
3767
- *
3768
- * Supported labels: happy, excited, joyful, sad, melancholic, angry,
3769
- * frustrated, neutral, etc.
3770
- *
3771
- * @param label - Emotion label string (case-insensitive)
3772
- */
3773
- setEmotionLabel(label) {
3774
- const normalized = label.toLowerCase();
3775
- const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
3776
- const probabilities = {
3777
- neutral: 0.1,
3778
- happy: 0.1,
3779
- angry: 0.1,
3780
- sad: 0.1
3781
- };
3782
- probabilities[mapped] = 0.7;
3783
- const frame = {
3784
- emotion: mapped,
3785
- confidence: 0.7,
3786
- probabilities
3787
- };
3788
- this.lastEmotionFrame = frame;
3789
- logger3.info("Emotion label set", { label, mapped });
3274
+ * Update the ExpressionProfile at runtime (e.g., character switch).
3275
+ */
3276
+ setProfile(profile) {
3277
+ this.profile = profile;
3790
3278
  }
3791
3279
  /**
3792
- * Clear any set emotion label.
3793
- * Falls back to prosody-only upper face animation.
3280
+ * Apply ExpressionProfile scaling to raw A2E blendshapes.
3281
+ *
3282
+ * For each blendshape:
3283
+ * 1. If an override exists for the blendshape name, use override as scaler
3284
+ * 2. Otherwise, use the group scaler (default 1.0)
3285
+ * 3. Clamp result to [0, 1]
3794
3286
  */
3795
- clearEmotionLabel() {
3796
- this.lastEmotionFrame = null;
3287
+ applyProfile(raw) {
3288
+ const scaled = new Float32Array(52);
3289
+ for (let i = 0; i < 52; i++) {
3290
+ const name = LAM_BLENDSHAPES[i];
3291
+ let scaler;
3292
+ if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
3293
+ scaler = this.profile.overrides[name];
3294
+ } else {
3295
+ const group = BLENDSHAPE_TO_GROUP.get(name);
3296
+ scaler = group ? this.profile[group] ?? 1 : 1;
3297
+ }
3298
+ scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
3299
+ }
3300
+ return scaled;
3797
3301
  }
3798
3302
  /**
3799
3303
  * Start a new playback session
@@ -3805,15 +3309,12 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3805
3309
  this.stopMonitoring();
3806
3310
  this.scheduler.reset();
3807
3311
  this.coalescer.reset();
3808
- this.lamPipeline.reset();
3312
+ this.processor.reset();
3809
3313
  this.playbackStarted = false;
3810
- this.lastEmotionFrame = null;
3811
- this.currentAudioEnergy = 0;
3812
- this.emotionMapper.reset();
3813
- this.energyAnalyzer.reset();
3814
3314
  this.lastNewFrameTime = 0;
3815
3315
  this.lastKnownLamFrame = null;
3816
3316
  this.staleWarningEmitted = false;
3317
+ this.frameLoopCount = 0;
3817
3318
  this.scheduler.warmup();
3818
3319
  this.startFrameLoop();
3819
3320
  this.startMonitoring();
@@ -3821,8 +3322,8 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3821
3322
  /**
3822
3323
  * Receive audio chunk from network
3823
3324
  *
3824
- * Audio-first design: schedules audio immediately, LAM runs in background.
3825
- * This prevents LAM inference (50-300ms) from blocking audio scheduling.
3325
+ * Audio-first design: schedules audio immediately, A2E runs in background.
3326
+ * This prevents A2E inference (50-300ms) from blocking audio scheduling.
3826
3327
  *
3827
3328
  * @param chunk - Uint8Array containing Int16 PCM audio
3828
3329
  */
@@ -3837,100 +3338,69 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3837
3338
  this.playbackStarted = true;
3838
3339
  this.emit("playback_start", scheduleTime);
3839
3340
  }
3840
- const { energy } = this.energyAnalyzer.process(float32);
3841
- this.currentAudioEnergy = energy;
3842
- this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
3843
- this.emit("error", err);
3341
+ logger4.info("onAudioChunk \u2192 pushAudio", {
3342
+ float32Samples: float32.length,
3343
+ scheduleTime: scheduleTime.toFixed(3),
3344
+ currentTime: this.scheduler.getCurrentTime().toFixed(3),
3345
+ deltaToPlayback: (scheduleTime - this.scheduler.getCurrentTime()).toFixed(3)
3844
3346
  });
3845
- }
3846
- /**
3847
- * Get emotion frame for current animation.
3848
- *
3849
- * Priority:
3850
- * 1. Explicit emotion label from setEmotionLabel()
3851
- * 2. Prosody fallback: subtle brow movement from audio energy
3852
- */
3853
- getEmotionFrame() {
3854
- if (this.lastEmotionFrame) {
3855
- return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
3856
- }
3857
- return { frame: null, energy: this.currentAudioEnergy };
3858
- }
3859
- /**
3860
- * Merge LAM blendshapes with emotion upper face blendshapes
3861
- */
3862
- mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
3863
- const merged = new Float32Array(52);
3864
- let emotionBlendshapes;
3865
- if (emotionFrame) {
3866
- this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
3867
- this.emotionMapper.update(33);
3868
- emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
3869
- } else {
3870
- emotionBlendshapes = {};
3871
- for (const name of UPPER_FACE_BLENDSHAPES) {
3872
- emotionBlendshapes[name] = 0;
3873
- }
3874
- }
3875
- for (let i = 0; i < 52; i++) {
3876
- const name = LAM_BLENDSHAPES[i];
3877
- if (UPPER_FACE_SET.has(name)) {
3878
- const emotionValue = emotionBlendshapes[name] ?? 0;
3879
- const lamValue = lamFrame[i];
3880
- merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
3881
- } else {
3882
- merged[i] = lamFrame[i];
3883
- }
3884
- }
3885
- return { merged, emotionBlendshapes };
3347
+ this.processor.pushAudio(float32, scheduleTime);
3886
3348
  }
3887
3349
  /**
3888
3350
  * Start frame animation loop
3351
+ *
3352
+ * Polls A2EProcessor at render rate (60fps) for the latest inference frame
3353
+ * matching the current AudioContext time. Between inference batches (~30fps
3354
+ * bursts), getFrameForTime() holds the last frame.
3889
3355
  */
3890
3356
  startFrameLoop() {
3891
3357
  const updateFrame = () => {
3358
+ this.frameLoopCount++;
3892
3359
  const currentTime = this.scheduler.getCurrentTime();
3893
- const lamFrame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
3894
- if (lamFrame) {
3895
- if (lamFrame !== this.lastKnownLamFrame) {
3896
- this.lastNewFrameTime = performance.now();
3897
- this.lastKnownLamFrame = lamFrame;
3898
- this.staleWarningEmitted = false;
3360
+ const lamFrame = this.processor.getFrameForTime(currentTime);
3361
+ if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3362
+ this.lastNewFrameTime = performance.now();
3363
+ this.lastKnownLamFrame = lamFrame;
3364
+ this.staleWarningEmitted = false;
3365
+ logger4.info("New A2E frame", {
3366
+ jawOpen: lamFrame[24]?.toFixed(3),
3367
+ mouthClose: lamFrame[26]?.toFixed(3),
3368
+ browInnerUp: lamFrame[2]?.toFixed(3),
3369
+ browDownL: lamFrame[0]?.toFixed(3),
3370
+ browOuterUpL: lamFrame[3]?.toFixed(3),
3371
+ currentTime: currentTime.toFixed(3),
3372
+ queuedFrames: this.processor.queuedFrameCount
3373
+ });
3374
+ }
3375
+ if (this.frameLoopCount % 60 === 0) {
3376
+ logger4.info("Frame loop heartbeat", {
3377
+ frameLoopCount: this.frameLoopCount,
3378
+ currentTime: currentTime.toFixed(3),
3379
+ playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
3380
+ queuedFrames: this.processor.queuedFrameCount,
3381
+ playbackStarted: this.playbackStarted,
3382
+ msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
3383
+ processorFill: this.processor.fillLevel.toFixed(2)
3384
+ });
3385
+ }
3386
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3387
+ if (!this.staleWarningEmitted) {
3388
+ this.staleWarningEmitted = true;
3389
+ logger4.warn("A2E stalled \u2014 no new inference frames", {
3390
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3391
+ queuedFrames: this.processor.queuedFrameCount
3392
+ });
3899
3393
  }
3900
- const { frame: emotionFrame, energy } = this.getEmotionFrame();
3901
- const { merged, emotionBlendshapes } = this.mergeBlendshapes(lamFrame, emotionFrame, energy);
3394
+ }
3395
+ if (lamFrame) {
3396
+ const scaled = this.applyProfile(lamFrame);
3902
3397
  const fullFrame = {
3903
- blendshapes: merged,
3904
- lamBlendshapes: lamFrame,
3905
- emotionBlendshapes,
3906
- emotion: emotionFrame,
3398
+ blendshapes: scaled,
3399
+ rawBlendshapes: lamFrame,
3907
3400
  timestamp: currentTime
3908
3401
  };
3909
3402
  this.emit("full_frame_ready", fullFrame);
3910
3403
  this.emit("lam_frame_ready", lamFrame);
3911
- if (emotionFrame) {
3912
- this.emit("emotion_frame_ready", emotionFrame);
3913
- }
3914
- } else if (this.playbackStarted && !this.lastKnownLamFrame) {
3915
- const { frame: emotionFrame, energy } = this.getEmotionFrame();
3916
- if (emotionFrame && energy > 0.05) {
3917
- const startupFrame = new Float32Array(52);
3918
- const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
3919
- this.emit("full_frame_ready", {
3920
- blendshapes: merged,
3921
- lamBlendshapes: startupFrame,
3922
- emotionBlendshapes,
3923
- emotion: emotionFrame,
3924
- timestamp: currentTime
3925
- });
3926
- }
3927
- }
3928
- if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
3929
- this.staleWarningEmitted = true;
3930
- logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
3931
- staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3932
- queuedFrames: this.lamPipeline.queuedFrameCount
3933
- });
3934
3404
  }
3935
3405
  this.frameAnimationId = requestAnimationFrame(updateFrame);
3936
3406
  };
@@ -3945,7 +3415,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3945
3415
  const chunk = new Uint8Array(remaining);
3946
3416
  await this.onAudioChunk(chunk);
3947
3417
  }
3948
- await this.lamPipeline.flush(this.options.lam);
3418
+ await this.processor.flush();
3949
3419
  }
3950
3420
  /**
3951
3421
  * Stop playback immediately with smooth fade-out
@@ -3954,12 +3424,8 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3954
3424
  this.stopMonitoring();
3955
3425
  await this.scheduler.cancelAll(fadeOutMs);
3956
3426
  this.coalescer.reset();
3957
- this.lamPipeline.reset();
3427
+ this.processor.reset();
3958
3428
  this.playbackStarted = false;
3959
- this.lastEmotionFrame = null;
3960
- this.currentAudioEnergy = 0;
3961
- this.emotionMapper.reset();
3962
- this.energyAnalyzer.reset();
3963
3429
  this.lastNewFrameTime = 0;
3964
3430
  this.lastKnownLamFrame = null;
3965
3431
  this.staleWarningEmitted = false;
@@ -3973,7 +3439,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3973
3439
  clearInterval(this.monitorInterval);
3974
3440
  }
3975
3441
  this.monitorInterval = setInterval(() => {
3976
- if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
3442
+ if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
3977
3443
  this.emit("playback_complete", void 0);
3978
3444
  this.stopMonitoring();
3979
3445
  }
@@ -3999,20 +3465,12 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3999
3465
  return {
4000
3466
  playbackStarted: this.playbackStarted,
4001
3467
  coalescerFill: this.coalescer.fillLevel,
4002
- lamFill: this.lamPipeline.fillLevel,
4003
- queuedLAMFrames: this.lamPipeline.queuedFrameCount,
4004
- emotionLabel: this.lastEmotionFrame?.emotion ?? null,
4005
- currentAudioEnergy: this.currentAudioEnergy,
3468
+ processorFill: this.processor.fillLevel,
3469
+ queuedFrames: this.processor.queuedFrameCount,
4006
3470
  currentTime: this.scheduler.getCurrentTime(),
4007
3471
  playbackEndTime: this.scheduler.getPlaybackEndTime()
4008
3472
  };
4009
3473
  }
4010
- /**
4011
- * Check if an explicit emotion label is currently set
4012
- */
4013
- get hasEmotionLabel() {
4014
- return this.lastEmotionFrame !== null;
4015
- }
4016
3474
  /**
4017
3475
  * Cleanup resources
4018
3476
  */
@@ -4020,13 +3478,9 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
4020
3478
  this.stopMonitoring();
4021
3479
  this.scheduler.dispose();
4022
3480
  this.coalescer.reset();
4023
- this.lamPipeline.reset();
4024
- this.lastEmotionFrame = null;
4025
- this.currentAudioEnergy = 0;
3481
+ this.processor.dispose();
4026
3482
  }
4027
3483
  };
4028
- _FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
4029
- var FullFacePipeline = _FullFacePipeline;
4030
3484
 
4031
3485
  // src/inference/kaldiFbank.ts
4032
3486
  function fft(re, im) {
@@ -4313,7 +3767,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
4313
3767
  }
4314
3768
 
4315
3769
  // src/inference/SenseVoiceInference.ts
4316
- var logger4 = createLogger("SenseVoice");
3770
+ var logger5 = createLogger("SenseVoice");
4317
3771
  var _SenseVoiceInference = class _SenseVoiceInference {
4318
3772
  constructor(config) {
4319
3773
  this.session = null;
@@ -4366,26 +3820,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4366
3820
  "model.backend_requested": this.config.backend
4367
3821
  });
4368
3822
  try {
4369
- logger4.info("Loading ONNX Runtime...", { preference: this.config.backend });
3823
+ logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
4370
3824
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
4371
3825
  this.ort = ort;
4372
3826
  this._backend = backend;
4373
- logger4.info("ONNX Runtime loaded", { backend: this._backend });
4374
- logger4.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
3827
+ logger5.info("ONNX Runtime loaded", { backend: this._backend });
3828
+ logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
4375
3829
  const tokensResponse = await fetch(this.config.tokensUrl);
4376
3830
  if (!tokensResponse.ok) {
4377
3831
  throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
4378
3832
  }
4379
3833
  const tokensText = await tokensResponse.text();
4380
3834
  this.tokenMap = parseTokensFile(tokensText);
4381
- logger4.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
3835
+ logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
4382
3836
  const sessionOptions = getSessionOptions(this._backend);
4383
3837
  if (this._backend === "webgpu") {
4384
3838
  sessionOptions.graphOptimizationLevel = "basic";
4385
3839
  }
4386
3840
  let isCached = false;
4387
3841
  if (isIOS()) {
4388
- logger4.info("iOS: passing model URL directly to ORT (low-memory path)", {
3842
+ logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
4389
3843
  modelUrl: this.config.modelUrl
4390
3844
  });
4391
3845
  this.session = await this.ort.InferenceSession.create(
@@ -4397,14 +3851,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4397
3851
  isCached = await cache.has(this.config.modelUrl);
4398
3852
  let modelBuffer;
4399
3853
  if (isCached) {
4400
- logger4.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
3854
+ logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
4401
3855
  modelBuffer = await cache.get(this.config.modelUrl);
4402
3856
  onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
4403
3857
  } else {
4404
- logger4.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
3858
+ logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
4405
3859
  modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
4406
3860
  }
4407
- logger4.debug("Creating ONNX session", {
3861
+ logger5.debug("Creating ONNX session", {
4408
3862
  size: formatBytes(modelBuffer.byteLength),
4409
3863
  backend: this._backend
4410
3864
  });
@@ -4417,15 +3871,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4417
3871
  const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
4418
3872
  this.negMean = cmvn.negMean;
4419
3873
  this.invStddev = cmvn.invStddev;
4420
- logger4.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
3874
+ logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
4421
3875
  } else {
4422
- logger4.warn("CMVN not found in model metadata \u2014 features will not be normalized");
3876
+ logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
4423
3877
  }
4424
3878
  } catch (cmvnErr) {
4425
- logger4.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
3879
+ logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
4426
3880
  }
4427
3881
  const loadTimeMs = performance.now() - startTime;
4428
- logger4.info("SenseVoice model loaded", {
3882
+ logger5.info("SenseVoice model loaded", {
4429
3883
  backend: this._backend,
4430
3884
  loadTimeMs: Math.round(loadTimeMs),
4431
3885
  vocabSize: this.tokenMap.size,
@@ -4536,7 +3990,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4536
3990
  const vocabSize = logitsDims[2];
4537
3991
  const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
4538
3992
  const inferenceTimeMs = performance.now() - startTime;
4539
- logger4.trace("Transcription complete", {
3993
+ logger5.trace("Transcription complete", {
4540
3994
  text: decoded.text.substring(0, 50),
4541
3995
  language: decoded.language,
4542
3996
  emotion: decoded.emotion,
@@ -4574,7 +4028,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4574
4028
  const errMsg = err instanceof Error ? err.message : String(err);
4575
4029
  if (errMsg.includes("timed out")) {
4576
4030
  this.poisoned = true;
4577
- logger4.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4031
+ logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
4578
4032
  backend: this._backend,
4579
4033
  timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
4580
4034
  });
@@ -4582,7 +4036,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4582
4036
  const oomError = new Error(
4583
4037
  `SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
4584
4038
  );
4585
- logger4.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4039
+ logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
4586
4040
  pointer: `0x${err.toString(16)}`,
4587
4041
  backend: this._backend
4588
4042
  });
@@ -4595,7 +4049,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
4595
4049
  reject(oomError);
4596
4050
  return;
4597
4051
  } else {
4598
- logger4.error("Inference failed", { error: errMsg, backend: this._backend });
4052
+ logger5.error("Inference failed", { error: errMsg, backend: this._backend });
4599
4053
  }
4600
4054
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
4601
4055
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -4624,7 +4078,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
4624
4078
  var SenseVoiceInference = _SenseVoiceInference;
4625
4079
 
4626
4080
  // src/inference/SenseVoiceWorker.ts
4627
- var logger5 = createLogger("SenseVoiceWorker");
4081
+ var logger6 = createLogger("SenseVoiceWorker");
4628
4082
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
4629
4083
  var LOAD_TIMEOUT_MS = 3e4;
4630
4084
  var INFERENCE_TIMEOUT_MS = 1e4;
@@ -5357,7 +4811,7 @@ var SenseVoiceWorker = class {
5357
4811
  this.handleWorkerMessage(event.data);
5358
4812
  };
5359
4813
  worker.onerror = (error) => {
5360
- logger5.error("Worker error", { error: error.message });
4814
+ logger6.error("Worker error", { error: error.message });
5361
4815
  for (const [, resolver] of this.pendingResolvers) {
5362
4816
  resolver.reject(new Error(`Worker error: ${error.message}`));
5363
4817
  }
@@ -5437,9 +4891,9 @@ var SenseVoiceWorker = class {
5437
4891
  "model.language": this.config.language
5438
4892
  });
5439
4893
  try {
5440
- logger5.info("Creating SenseVoice worker...");
4894
+ logger6.info("Creating SenseVoice worker...");
5441
4895
  this.worker = this.createWorker();
5442
- logger5.info("Loading model in worker...", {
4896
+ logger6.info("Loading model in worker...", {
5443
4897
  modelUrl: this.config.modelUrl,
5444
4898
  tokensUrl: this.config.tokensUrl,
5445
4899
  language: this.config.language,
@@ -5461,7 +4915,7 @@ var SenseVoiceWorker = class {
5461
4915
  this._isLoaded = true;
5462
4916
  const loadTimeMs = performance.now() - startTime;
5463
4917
  onProgress?.(1, 1);
5464
- logger5.info("SenseVoice worker loaded successfully", {
4918
+ logger6.info("SenseVoice worker loaded successfully", {
5465
4919
  backend: "wasm",
5466
4920
  loadTimeMs: Math.round(loadTimeMs),
5467
4921
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -5540,7 +4994,7 @@ var SenseVoiceWorker = class {
5540
4994
  INFERENCE_TIMEOUT_MS
5541
4995
  );
5542
4996
  const totalTimeMs = performance.now() - startTime;
5543
- logger5.trace("Worker transcription complete", {
4997
+ logger6.trace("Worker transcription complete", {
5544
4998
  text: result.text.substring(0, 50),
5545
4999
  language: result.language,
5546
5000
  emotion: result.emotion,
@@ -5576,11 +5030,11 @@ var SenseVoiceWorker = class {
5576
5030
  } catch (err) {
5577
5031
  const errMsg = err instanceof Error ? err.message : String(err);
5578
5032
  if (errMsg.includes("timed out")) {
5579
- logger5.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5033
+ logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
5580
5034
  timeoutMs: INFERENCE_TIMEOUT_MS
5581
5035
  });
5582
5036
  } else {
5583
- logger5.error("Worker inference failed", { error: errMsg });
5037
+ logger6.error("Worker inference failed", { error: errMsg });
5584
5038
  }
5585
5039
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
5586
5040
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -5618,7 +5072,7 @@ var SenseVoiceWorker = class {
5618
5072
  };
5619
5073
 
5620
5074
  // src/inference/UnifiedInferenceWorker.ts
5621
- var logger6 = createLogger("UnifiedInferenceWorker");
5075
+ var logger7 = createLogger("UnifiedInferenceWorker");
5622
5076
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
5623
5077
  var INIT_TIMEOUT_MS = 15e3;
5624
5078
  var SV_LOAD_TIMEOUT_MS = 3e4;
@@ -6314,7 +5768,7 @@ var UnifiedInferenceWorker = class {
6314
5768
  const telemetry = getTelemetry();
6315
5769
  const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
6316
5770
  try {
6317
- logger6.info("Creating unified inference worker...");
5771
+ logger7.info("Creating unified inference worker...");
6318
5772
  this.worker = this.createWorker();
6319
5773
  await this.sendMessage(
6320
5774
  { type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
@@ -6323,7 +5777,7 @@ var UnifiedInferenceWorker = class {
6323
5777
  );
6324
5778
  this.initialized = true;
6325
5779
  const loadTimeMs = performance.now() - startTime;
6326
- logger6.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
5780
+ logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
6327
5781
  span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
6328
5782
  span?.end();
6329
5783
  } catch (error) {
@@ -6377,8 +5831,8 @@ var UnifiedInferenceWorker = class {
6377
5831
  if (!this.worker) return;
6378
5832
  await this.sendMessage({ type: "sv:dispose" }, "sv:disposed", DISPOSE_TIMEOUT_MS);
6379
5833
  }
6380
- // ── Wav2ArkitCpu (Lip Sync) ──────────────────────────────────────────
6381
- async loadLipSync(config) {
5834
+ // ── Wav2ArkitCpu (A2E) ──────────────────────────────────────────────
5835
+ async loadA2E(config) {
6382
5836
  this.assertReady();
6383
5837
  const startTime = performance.now();
6384
5838
  const result = await this.sendMessage(
@@ -6399,7 +5853,7 @@ var UnifiedInferenceWorker = class {
6399
5853
  outputNames: result.outputNames
6400
5854
  };
6401
5855
  }
6402
- async inferLipSync(audio) {
5856
+ async inferA2E(audio) {
6403
5857
  this.assertReady();
6404
5858
  return this.sendMessage(
6405
5859
  { type: "cpu:infer", audio },
@@ -6407,7 +5861,7 @@ var UnifiedInferenceWorker = class {
6407
5861
  CPU_INFER_TIMEOUT_MS
6408
5862
  );
6409
5863
  }
6410
- async disposeLipSync() {
5864
+ async disposeA2E() {
6411
5865
  if (!this.worker) return;
6412
5866
  await this.sendMessage({ type: "cpu:dispose" }, "cpu:disposed", DISPOSE_TIMEOUT_MS);
6413
5867
  }
@@ -6497,7 +5951,7 @@ var UnifiedInferenceWorker = class {
6497
5951
  this.handleWorkerMessage(event.data);
6498
5952
  };
6499
5953
  worker.onerror = (error) => {
6500
- logger6.error("Unified worker error", { error: error.message });
5954
+ logger7.error("Unified worker error", { error: error.message });
6501
5955
  this.rejectAllPending(`Worker error: ${error.message}`);
6502
5956
  };
6503
5957
  return worker;
@@ -6511,7 +5965,7 @@ var UnifiedInferenceWorker = class {
6511
5965
  this.pendingRequests.delete(requestId);
6512
5966
  pending.reject(new Error(data.error));
6513
5967
  } else {
6514
- logger6.error("Worker broadcast error", { error: data.error });
5968
+ logger7.error("Worker broadcast error", { error: data.error });
6515
5969
  this.rejectAllPending(data.error);
6516
5970
  }
6517
5971
  return;
@@ -6533,7 +5987,7 @@ var UnifiedInferenceWorker = class {
6533
5987
  const timeout = setTimeout(() => {
6534
5988
  this.pendingRequests.delete(requestId);
6535
5989
  this.poisoned = true;
6536
- logger6.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
5990
+ logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
6537
5991
  type: message.type,
6538
5992
  timeoutMs
6539
5993
  });
@@ -6599,7 +6053,7 @@ var SenseVoiceUnifiedAdapter = class {
6599
6053
  });
6600
6054
  this._isLoaded = true;
6601
6055
  onProgress?.(1, 1);
6602
- logger6.info("SenseVoice loaded via unified worker", {
6056
+ logger7.info("SenseVoice loaded via unified worker", {
6603
6057
  backend: "wasm",
6604
6058
  loadTimeMs: Math.round(result.loadTimeMs),
6605
6059
  vocabSize: result.vocabSize
@@ -6640,6 +6094,7 @@ var SenseVoiceUnifiedAdapter = class {
6640
6094
  var Wav2ArkitCpuUnifiedAdapter = class {
6641
6095
  constructor(worker, config) {
6642
6096
  this.modelId = "wav2arkit_cpu";
6097
+ this.chunkSize = 16e3;
6643
6098
  this._isLoaded = false;
6644
6099
  this.inferenceQueue = Promise.resolve();
6645
6100
  this.worker = worker;
@@ -6658,12 +6113,12 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6658
6113
  });
6659
6114
  try {
6660
6115
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
6661
- const result = await this.worker.loadLipSync({
6116
+ const result = await this.worker.loadA2E({
6662
6117
  modelUrl: this.config.modelUrl,
6663
6118
  externalDataUrl: externalDataUrl || null
6664
6119
  });
6665
6120
  this._isLoaded = true;
6666
- logger6.info("Wav2ArkitCpu loaded via unified worker", {
6121
+ logger7.info("Wav2ArkitCpu loaded via unified worker", {
6667
6122
  backend: "wasm",
6668
6123
  loadTimeMs: Math.round(result.loadTimeMs)
6669
6124
  });
@@ -6690,7 +6145,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6690
6145
  });
6691
6146
  try {
6692
6147
  const startTime = performance.now();
6693
- const result = await this.worker.inferLipSync(audioCopy);
6148
+ const result = await this.worker.inferA2E(audioCopy);
6694
6149
  const inferenceTimeMs = performance.now() - startTime;
6695
6150
  const flatBuffer = result.blendshapes;
6696
6151
  const { numFrames, numBlendshapes } = result;
@@ -6713,7 +6168,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
6713
6168
  }
6714
6169
  async dispose() {
6715
6170
  if (this._isLoaded) {
6716
- await this.worker.disposeLipSync();
6171
+ await this.worker.disposeA2E();
6717
6172
  this._isLoaded = false;
6718
6173
  }
6719
6174
  }
@@ -6769,7 +6224,7 @@ var SileroVADUnifiedAdapter = class {
6769
6224
  sampleRate: this.config.sampleRate
6770
6225
  });
6771
6226
  this._isLoaded = true;
6772
- logger6.info("SileroVAD loaded via unified worker", {
6227
+ logger7.info("SileroVAD loaded via unified worker", {
6773
6228
  backend: "wasm",
6774
6229
  loadTimeMs: Math.round(result.loadTimeMs),
6775
6230
  sampleRate: this.config.sampleRate,
@@ -6850,10 +6305,10 @@ var SileroVADUnifiedAdapter = class {
6850
6305
  };
6851
6306
 
6852
6307
  // src/inference/createSenseVoice.ts
6853
- var logger7 = createLogger("createSenseVoice");
6308
+ var logger8 = createLogger("createSenseVoice");
6854
6309
  function createSenseVoice(config) {
6855
6310
  if (config.unifiedWorker) {
6856
- logger7.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6311
+ logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6857
6312
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6858
6313
  modelUrl: config.modelUrl,
6859
6314
  tokensUrl: config.tokensUrl,
@@ -6866,7 +6321,7 @@ function createSenseVoice(config) {
6866
6321
  if (!SenseVoiceWorker.isSupported()) {
6867
6322
  throw new Error("Web Workers are not supported in this environment");
6868
6323
  }
6869
- logger7.info("Creating SenseVoiceWorker (off-main-thread)");
6324
+ logger8.info("Creating SenseVoiceWorker (off-main-thread)");
6870
6325
  return new SenseVoiceWorker({
6871
6326
  modelUrl: config.modelUrl,
6872
6327
  tokensUrl: config.tokensUrl,
@@ -6875,7 +6330,7 @@ function createSenseVoice(config) {
6875
6330
  });
6876
6331
  }
6877
6332
  if (useWorker === false) {
6878
- logger7.info("Creating SenseVoiceInference (main thread)");
6333
+ logger8.info("Creating SenseVoiceInference (main thread)");
6879
6334
  return new SenseVoiceInference({
6880
6335
  modelUrl: config.modelUrl,
6881
6336
  tokensUrl: config.tokensUrl,
@@ -6884,7 +6339,7 @@ function createSenseVoice(config) {
6884
6339
  });
6885
6340
  }
6886
6341
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6887
- logger7.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6342
+ logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6888
6343
  return new SenseVoiceWorker({
6889
6344
  modelUrl: config.modelUrl,
6890
6345
  tokensUrl: config.tokensUrl,
@@ -6892,7 +6347,7 @@ function createSenseVoice(config) {
6892
6347
  textNorm: config.textNorm
6893
6348
  });
6894
6349
  }
6895
- logger7.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6350
+ logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
6896
6351
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6897
6352
  });
6898
6353
  return new SenseVoiceInference({
@@ -6904,10 +6359,11 @@ function createSenseVoice(config) {
6904
6359
  }
6905
6360
 
6906
6361
  // src/inference/Wav2ArkitCpuInference.ts
6907
- var logger8 = createLogger("Wav2ArkitCpu");
6362
+ var logger9 = createLogger("Wav2ArkitCpu");
6908
6363
  var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6909
6364
  constructor(config) {
6910
6365
  this.modelId = "wav2arkit_cpu";
6366
+ this.chunkSize = 16e3;
6911
6367
  this.session = null;
6912
6368
  this.ort = null;
6913
6369
  this._backend = "wasm";
@@ -6945,16 +6401,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6945
6401
  });
6946
6402
  try {
6947
6403
  const preference = this.config.backend || "wasm";
6948
- logger8.info("Loading ONNX Runtime...", { preference });
6404
+ logger9.info("Loading ONNX Runtime...", { preference });
6949
6405
  const { ort, backend } = await getOnnxRuntimeForPreference(preference);
6950
6406
  this.ort = ort;
6951
6407
  this._backend = backend;
6952
- logger8.info("ONNX Runtime loaded", { backend: this._backend });
6408
+ logger9.info("ONNX Runtime loaded", { backend: this._backend });
6953
6409
  const modelUrl = this.config.modelUrl;
6954
6410
  const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
6955
6411
  const sessionOptions = getSessionOptions(this._backend);
6956
6412
  if (isIOS()) {
6957
- logger8.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6413
+ logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
6958
6414
  modelUrl,
6959
6415
  dataUrl
6960
6416
  });
@@ -6972,15 +6428,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6972
6428
  const isCached = await cache.has(modelUrl);
6973
6429
  let modelBuffer;
6974
6430
  if (isCached) {
6975
- logger8.debug("Loading model from cache", { modelUrl });
6431
+ logger9.debug("Loading model from cache", { modelUrl });
6976
6432
  modelBuffer = await cache.get(modelUrl);
6977
6433
  if (!modelBuffer) {
6978
- logger8.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6434
+ logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
6979
6435
  await cache.delete(modelUrl);
6980
6436
  modelBuffer = await fetchWithCache(modelUrl);
6981
6437
  }
6982
6438
  } else {
6983
- logger8.debug("Fetching and caching model graph", { modelUrl });
6439
+ logger9.debug("Fetching and caching model graph", { modelUrl });
6984
6440
  modelBuffer = await fetchWithCache(modelUrl);
6985
6441
  }
6986
6442
  if (!modelBuffer) {
@@ -6991,31 +6447,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
6991
6447
  try {
6992
6448
  const isDataCached = await cache.has(dataUrl);
6993
6449
  if (isDataCached) {
6994
- logger8.debug("Loading external data from cache", { dataUrl });
6450
+ logger9.debug("Loading external data from cache", { dataUrl });
6995
6451
  externalDataBuffer = await cache.get(dataUrl);
6996
6452
  if (!externalDataBuffer) {
6997
- logger8.warn("Cache corruption for external data, retrying", { dataUrl });
6453
+ logger9.warn("Cache corruption for external data, retrying", { dataUrl });
6998
6454
  await cache.delete(dataUrl);
6999
6455
  externalDataBuffer = await fetchWithCache(dataUrl);
7000
6456
  }
7001
6457
  } else {
7002
- logger8.info("Fetching external model data", {
6458
+ logger9.info("Fetching external model data", {
7003
6459
  dataUrl,
7004
6460
  note: "This may be a large download (400MB+)"
7005
6461
  });
7006
6462
  externalDataBuffer = await fetchWithCache(dataUrl);
7007
6463
  }
7008
- logger8.info("External data loaded", {
6464
+ logger9.info("External data loaded", {
7009
6465
  size: formatBytes(externalDataBuffer.byteLength)
7010
6466
  });
7011
6467
  } catch (err) {
7012
- logger8.debug("No external data file found (single-file model)", {
6468
+ logger9.debug("No external data file found (single-file model)", {
7013
6469
  dataUrl,
7014
6470
  error: err.message
7015
6471
  });
7016
6472
  }
7017
6473
  }
7018
- logger8.debug("Creating ONNX session", {
6474
+ logger9.debug("Creating ONNX session", {
7019
6475
  graphSize: formatBytes(modelBuffer.byteLength),
7020
6476
  externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
7021
6477
  backend: this._backend
@@ -7031,7 +6487,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7031
6487
  this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
7032
6488
  }
7033
6489
  const loadTimeMs = performance.now() - startTime;
7034
- logger8.info("Model loaded successfully", {
6490
+ logger9.info("Model loaded successfully", {
7035
6491
  backend: this._backend,
7036
6492
  loadTimeMs: Math.round(loadTimeMs),
7037
6493
  inputs: this.session.inputNames,
@@ -7047,12 +6503,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7047
6503
  model: "wav2arkit_cpu",
7048
6504
  backend: this._backend
7049
6505
  });
7050
- logger8.debug("Running warmup inference");
6506
+ logger9.debug("Running warmup inference");
7051
6507
  const warmupStart = performance.now();
7052
6508
  const silentAudio = new Float32Array(16e3);
7053
6509
  await this.infer(silentAudio);
7054
6510
  const warmupTimeMs = performance.now() - warmupStart;
7055
- logger8.info("Warmup inference complete", {
6511
+ logger9.info("Warmup inference complete", {
7056
6512
  warmupTimeMs: Math.round(warmupTimeMs),
7057
6513
  backend: this._backend
7058
6514
  });
@@ -7139,7 +6595,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7139
6595
  const symmetrized = symmetrizeBlendshapes(rawFrame);
7140
6596
  blendshapes.push(symmetrized);
7141
6597
  }
7142
- logger8.trace("Inference completed", {
6598
+ logger9.trace("Inference completed", {
7143
6599
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7144
6600
  numFrames,
7145
6601
  inputSamples
@@ -7167,7 +6623,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7167
6623
  const errMsg = err instanceof Error ? err.message : String(err);
7168
6624
  if (errMsg.includes("timed out")) {
7169
6625
  this.poisoned = true;
7170
- logger8.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
6626
+ logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
7171
6627
  backend: this._backend,
7172
6628
  timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
7173
6629
  });
@@ -7175,7 +6631,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7175
6631
  const oomError = new Error(
7176
6632
  `Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
7177
6633
  );
7178
- logger8.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
6634
+ logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7179
6635
  pointer: `0x${err.toString(16)}`,
7180
6636
  backend: this._backend
7181
6637
  });
@@ -7188,7 +6644,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
7188
6644
  reject(oomError);
7189
6645
  return;
7190
6646
  } else {
7191
- logger8.error("Inference failed", { error: errMsg, backend: this._backend });
6647
+ logger9.error("Inference failed", { error: errMsg, backend: this._backend });
7192
6648
  }
7193
6649
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7194
6650
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7215,7 +6671,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
7215
6671
  var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
7216
6672
 
7217
6673
  // src/inference/Wav2ArkitCpuWorker.ts
7218
- var logger9 = createLogger("Wav2ArkitCpuWorker");
6674
+ var logger10 = createLogger("Wav2ArkitCpuWorker");
7219
6675
  var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
7220
6676
  var LOAD_TIMEOUT_MS2 = 6e4;
7221
6677
  var INFERENCE_TIMEOUT_MS2 = 5e3;
@@ -7461,6 +6917,7 @@ self.onerror = function(err) {
7461
6917
  var Wav2ArkitCpuWorker = class {
7462
6918
  constructor(config) {
7463
6919
  this.modelId = "wav2arkit_cpu";
6920
+ this.chunkSize = 16e3;
7464
6921
  this.worker = null;
7465
6922
  this.isLoading = false;
7466
6923
  this._isLoaded = false;
@@ -7495,7 +6952,7 @@ var Wav2ArkitCpuWorker = class {
7495
6952
  this.handleWorkerMessage(event.data);
7496
6953
  };
7497
6954
  worker.onerror = (error) => {
7498
- logger9.error("Worker error", { error: error.message });
6955
+ logger10.error("Worker error", { error: error.message });
7499
6956
  for (const [, resolver] of this.pendingResolvers) {
7500
6957
  resolver.reject(new Error(`Worker error: ${error.message}`));
7501
6958
  }
@@ -7571,10 +7028,10 @@ var Wav2ArkitCpuWorker = class {
7571
7028
  "model.backend_requested": "wasm"
7572
7029
  });
7573
7030
  try {
7574
- logger9.info("Creating wav2arkit_cpu worker...");
7031
+ logger10.info("Creating wav2arkit_cpu worker...");
7575
7032
  this.worker = this.createWorker();
7576
7033
  const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
7577
- logger9.info("Loading model in worker...", {
7034
+ logger10.info("Loading model in worker...", {
7578
7035
  modelUrl: this.config.modelUrl,
7579
7036
  externalDataUrl,
7580
7037
  isIOS: isIOS()
@@ -7592,7 +7049,7 @@ var Wav2ArkitCpuWorker = class {
7592
7049
  );
7593
7050
  this._isLoaded = true;
7594
7051
  const loadTimeMs = performance.now() - startTime;
7595
- logger9.info("Wav2ArkitCpu worker loaded successfully", {
7052
+ logger10.info("Wav2ArkitCpu worker loaded successfully", {
7596
7053
  backend: "wasm",
7597
7054
  loadTimeMs: Math.round(loadTimeMs),
7598
7055
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -7677,7 +7134,7 @@ var Wav2ArkitCpuWorker = class {
7677
7134
  for (let f = 0; f < numFrames; f++) {
7678
7135
  blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
7679
7136
  }
7680
- logger9.trace("Worker inference completed", {
7137
+ logger10.trace("Worker inference completed", {
7681
7138
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
7682
7139
  workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
7683
7140
  numFrames,
@@ -7707,12 +7164,12 @@ var Wav2ArkitCpuWorker = class {
7707
7164
  const errMsg = err instanceof Error ? err.message : String(err);
7708
7165
  if (errMsg.includes("timed out")) {
7709
7166
  this.poisoned = true;
7710
- logger9.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7167
+ logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
7711
7168
  backend: "wasm",
7712
7169
  timeoutMs: INFERENCE_TIMEOUT_MS2
7713
7170
  });
7714
7171
  } else {
7715
- logger9.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7172
+ logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
7716
7173
  }
7717
7174
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
7718
7175
  telemetry?.incrementCounter("omote.inference.total", 1, {
@@ -7749,39 +7206,39 @@ var Wav2ArkitCpuWorker = class {
7749
7206
  }
7750
7207
  };
7751
7208
 
7752
- // src/inference/createLipSync.ts
7753
- var logger10 = createLogger("createLipSync");
7754
- function createLipSync(config) {
7209
+ // src/inference/createA2E.ts
7210
+ var logger11 = createLogger("createA2E");
7211
+ function createA2E(config) {
7755
7212
  const mode = config.mode ?? "auto";
7756
7213
  const fallbackOnError = config.fallbackOnError ?? true;
7757
7214
  let useCpu;
7758
7215
  if (mode === "cpu") {
7759
7216
  useCpu = true;
7760
- logger10.info("Forcing CPU lip sync model (wav2arkit_cpu)");
7217
+ logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
7761
7218
  } else if (mode === "gpu") {
7762
7219
  useCpu = false;
7763
- logger10.info("Forcing GPU lip sync model (Wav2Vec2)");
7220
+ logger11.info("Forcing GPU A2E model (Wav2Vec2)");
7764
7221
  } else {
7765
- useCpu = shouldUseCpuLipSync();
7766
- logger10.info("Auto-detected lip sync model", {
7222
+ useCpu = shouldUseCpuA2E();
7223
+ logger11.info("Auto-detected A2E model", {
7767
7224
  useCpu,
7768
7225
  isSafari: isSafari()
7769
7226
  });
7770
7227
  }
7771
7228
  if (useCpu) {
7772
7229
  if (config.unifiedWorker) {
7773
- logger10.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7230
+ logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7774
7231
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
7775
7232
  modelUrl: config.cpuModelUrl
7776
7233
  });
7777
7234
  }
7778
7235
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7779
- logger10.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7236
+ logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7780
7237
  return new Wav2ArkitCpuWorker({
7781
7238
  modelUrl: config.cpuModelUrl
7782
7239
  });
7783
7240
  }
7784
- logger10.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7241
+ logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7785
7242
  return new Wav2ArkitCpuInference({
7786
7243
  modelUrl: config.cpuModelUrl
7787
7244
  });
@@ -7793,13 +7250,13 @@ function createLipSync(config) {
7793
7250
  numIdentityClasses: config.numIdentityClasses
7794
7251
  });
7795
7252
  if (fallbackOnError) {
7796
- logger10.info("Creating Wav2Vec2Inference with CPU fallback");
7797
- return new LipSyncWithFallback(gpuInstance, config);
7253
+ logger11.info("Creating Wav2Vec2Inference with CPU fallback");
7254
+ return new A2EWithFallback(gpuInstance, config);
7798
7255
  }
7799
- logger10.info("Creating Wav2Vec2Inference (no fallback)");
7256
+ logger11.info("Creating Wav2Vec2Inference (no fallback)");
7800
7257
  return gpuInstance;
7801
7258
  }
7802
- var LipSyncWithFallback = class {
7259
+ var A2EWithFallback = class {
7803
7260
  constructor(gpuInstance, config) {
7804
7261
  this.hasFallenBack = false;
7805
7262
  this.implementation = gpuInstance;
@@ -7808,6 +7265,9 @@ var LipSyncWithFallback = class {
7808
7265
  get modelId() {
7809
7266
  return this.implementation.modelId;
7810
7267
  }
7268
+ get chunkSize() {
7269
+ return this.implementation.chunkSize;
7270
+ }
7811
7271
  get backend() {
7812
7272
  return this.implementation.backend;
7813
7273
  }
@@ -7822,7 +7282,7 @@ var LipSyncWithFallback = class {
7822
7282
  }
7823
7283
  }
7824
7284
  async fallbackToCpu(reason) {
7825
- logger10.warn("GPU model load failed, falling back to CPU model", { reason });
7285
+ logger11.warn("GPU model load failed, falling back to CPU model", { reason });
7826
7286
  try {
7827
7287
  await this.implementation.dispose();
7828
7288
  } catch {
@@ -7831,17 +7291,17 @@ var LipSyncWithFallback = class {
7831
7291
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7832
7292
  modelUrl: this.config.cpuModelUrl
7833
7293
  });
7834
- logger10.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7294
+ logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7835
7295
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7836
7296
  this.implementation = new Wav2ArkitCpuWorker({
7837
7297
  modelUrl: this.config.cpuModelUrl
7838
7298
  });
7839
- logger10.info("Fallback to Wav2ArkitCpuWorker successful");
7299
+ logger11.info("Fallback to Wav2ArkitCpuWorker successful");
7840
7300
  } else {
7841
7301
  this.implementation = new Wav2ArkitCpuInference({
7842
7302
  modelUrl: this.config.cpuModelUrl
7843
7303
  });
7844
- logger10.info("Fallback to Wav2ArkitCpuInference successful");
7304
+ logger11.info("Fallback to Wav2ArkitCpuInference successful");
7845
7305
  }
7846
7306
  this.hasFallenBack = true;
7847
7307
  return await this.implementation.load();
@@ -7854,8 +7314,198 @@ var LipSyncWithFallback = class {
7854
7314
  }
7855
7315
  };
7856
7316
 
7317
+ // src/inference/BlendshapeSmoother.ts
7318
+ var NUM_BLENDSHAPES = 52;
7319
+ var BlendshapeSmoother = class {
7320
+ constructor(config) {
7321
+ /** Whether any target has been set */
7322
+ this._hasTarget = false;
7323
+ this.halflife = config?.halflife ?? 0.06;
7324
+ this.values = new Float32Array(NUM_BLENDSHAPES);
7325
+ this.velocities = new Float32Array(NUM_BLENDSHAPES);
7326
+ this.targets = new Float32Array(NUM_BLENDSHAPES);
7327
+ }
7328
+ /** Whether a target frame has been set (false until first setTarget call) */
7329
+ get hasTarget() {
7330
+ return this._hasTarget;
7331
+ }
7332
+ /**
7333
+ * Set new target frame from inference output.
7334
+ * Springs will converge toward these values on subsequent update() calls.
7335
+ */
7336
+ setTarget(frame) {
7337
+ this.targets.set(frame);
7338
+ this._hasTarget = true;
7339
+ }
7340
+ /**
7341
+ * Advance all 52 springs by `dt` seconds and return the smoothed frame.
7342
+ *
7343
+ * Call this every render frame (e.g., inside requestAnimationFrame).
7344
+ * Returns the internal values buffer — do NOT mutate the returned array.
7345
+ *
7346
+ * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
7347
+ * @returns Smoothed blendshape values (Float32Array of 52)
7348
+ */
7349
+ update(dt) {
7350
+ if (!this._hasTarget) {
7351
+ return this.values;
7352
+ }
7353
+ if (this.halflife <= 0) {
7354
+ this.values.set(this.targets);
7355
+ this.velocities.fill(0);
7356
+ return this.values;
7357
+ }
7358
+ const damping = Math.LN2 / this.halflife;
7359
+ const eydt = Math.exp(-damping * dt);
7360
+ for (let i = 0; i < NUM_BLENDSHAPES; i++) {
7361
+ const j0 = this.values[i] - this.targets[i];
7362
+ const j1 = this.velocities[i] + j0 * damping;
7363
+ this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
7364
+ this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
7365
+ this.values[i] = Math.max(0, Math.min(1, this.values[i]));
7366
+ }
7367
+ return this.values;
7368
+ }
7369
+ /**
7370
+ * Decay all spring targets to neutral (0).
7371
+ *
7372
+ * Call when inference stalls (no new frames for threshold duration).
7373
+ * The springs will smoothly close the mouth / relax the face over
7374
+ * the halflife period rather than freezing.
7375
+ */
7376
+ decayToNeutral() {
7377
+ this.targets.fill(0);
7378
+ }
7379
+ /**
7380
+ * Reset all state (values, velocities, targets).
7381
+ * Call when starting a new playback session.
7382
+ */
7383
+ reset() {
7384
+ this.values.fill(0);
7385
+ this.velocities.fill(0);
7386
+ this.targets.fill(0);
7387
+ this._hasTarget = false;
7388
+ }
7389
+ };
7390
+
7391
+ // src/animation/audioEnergy.ts
7392
+ function calculateRMS(samples) {
7393
+ if (samples.length === 0) return 0;
7394
+ let sumSquares = 0;
7395
+ for (let i = 0; i < samples.length; i++) {
7396
+ sumSquares += samples[i] * samples[i];
7397
+ }
7398
+ return Math.sqrt(sumSquares / samples.length);
7399
+ }
7400
+ function calculatePeak(samples) {
7401
+ let peak = 0;
7402
+ for (let i = 0; i < samples.length; i++) {
7403
+ const abs = Math.abs(samples[i]);
7404
+ if (abs > peak) peak = abs;
7405
+ }
7406
+ return peak;
7407
+ }
7408
+ var AudioEnergyAnalyzer = class {
7409
+ /**
7410
+ * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
7411
+ * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
7412
+ */
7413
+ constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
7414
+ this.smoothedRMS = 0;
7415
+ this.smoothedPeak = 0;
7416
+ this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
7417
+ this.noiseFloor = noiseFloor;
7418
+ }
7419
+ /**
7420
+ * Process audio samples and return smoothed energy values
7421
+ * @param samples Audio samples (Float32Array)
7422
+ * @returns Object with rms and peak values
7423
+ */
7424
+ process(samples) {
7425
+ const instantRMS = calculateRMS(samples);
7426
+ const instantPeak = calculatePeak(samples);
7427
+ const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
7428
+ const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
7429
+ if (gatedRMS > this.smoothedRMS) {
7430
+ this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
7431
+ } else {
7432
+ this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
7433
+ }
7434
+ if (gatedPeak > this.smoothedPeak) {
7435
+ this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
7436
+ } else {
7437
+ this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
7438
+ }
7439
+ const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
7440
+ return {
7441
+ rms: this.smoothedRMS,
7442
+ peak: this.smoothedPeak,
7443
+ energy: Math.min(1, energy * 2)
7444
+ // Scale up and clamp
7445
+ };
7446
+ }
7447
+ /**
7448
+ * Reset analyzer state
7449
+ */
7450
+ reset() {
7451
+ this.smoothedRMS = 0;
7452
+ this.smoothedPeak = 0;
7453
+ }
7454
+ /**
7455
+ * Get current smoothed RMS value
7456
+ */
7457
+ get rms() {
7458
+ return this.smoothedRMS;
7459
+ }
7460
+ /**
7461
+ * Get current smoothed peak value
7462
+ */
7463
+ get peak() {
7464
+ return this.smoothedPeak;
7465
+ }
7466
+ };
7467
+ var EmphasisDetector = class {
7468
+ /**
7469
+ * @param historySize Number of frames to track. Default 10
7470
+ * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
7471
+ */
7472
+ constructor(historySize = 10, emphasisThreshold = 0.15) {
7473
+ this.energyHistory = [];
7474
+ this.historySize = historySize;
7475
+ this.emphasisThreshold = emphasisThreshold;
7476
+ }
7477
+ /**
7478
+ * Process energy value and detect emphasis
7479
+ * @param energy Current energy value (0-1)
7480
+ * @returns Object with isEmphasis flag and emphasisStrength
7481
+ */
7482
+ process(energy) {
7483
+ this.energyHistory.push(energy);
7484
+ if (this.energyHistory.length > this.historySize) {
7485
+ this.energyHistory.shift();
7486
+ }
7487
+ if (this.energyHistory.length < 3) {
7488
+ return { isEmphasis: false, emphasisStrength: 0 };
7489
+ }
7490
+ const prevFrames = this.energyHistory.slice(0, -1);
7491
+ const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
7492
+ const increase = energy - avgPrev;
7493
+ const isEmphasis = increase > this.emphasisThreshold;
7494
+ return {
7495
+ isEmphasis,
7496
+ emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
7497
+ };
7498
+ }
7499
+ /**
7500
+ * Reset detector state
7501
+ */
7502
+ reset() {
7503
+ this.energyHistory = [];
7504
+ }
7505
+ };
7506
+
7857
7507
  // src/inference/SileroVADInference.ts
7858
- var logger11 = createLogger("SileroVAD");
7508
+ var logger12 = createLogger("SileroVAD");
7859
7509
  var SileroVADInference = class {
7860
7510
  constructor(config) {
7861
7511
  this.session = null;
@@ -7929,23 +7579,23 @@ var SileroVADInference = class {
7929
7579
  "model.sample_rate": this.config.sampleRate
7930
7580
  });
7931
7581
  try {
7932
- logger11.info("Loading ONNX Runtime...", { preference: this.config.backend });
7582
+ logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
7933
7583
  const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
7934
7584
  this.ort = ort;
7935
7585
  this._backend = backend;
7936
- logger11.info("ONNX Runtime loaded", { backend: this._backend });
7586
+ logger12.info("ONNX Runtime loaded", { backend: this._backend });
7937
7587
  const cache = getModelCache();
7938
7588
  const modelUrl = this.config.modelUrl;
7939
7589
  const isCached = await cache.has(modelUrl);
7940
7590
  let modelBuffer;
7941
7591
  if (isCached) {
7942
- logger11.debug("Loading model from cache", { modelUrl });
7592
+ logger12.debug("Loading model from cache", { modelUrl });
7943
7593
  modelBuffer = await cache.get(modelUrl);
7944
7594
  } else {
7945
- logger11.debug("Fetching and caching model", { modelUrl });
7595
+ logger12.debug("Fetching and caching model", { modelUrl });
7946
7596
  modelBuffer = await fetchWithCache(modelUrl);
7947
7597
  }
7948
- logger11.debug("Creating ONNX session", {
7598
+ logger12.debug("Creating ONNX session", {
7949
7599
  size: formatBytes(modelBuffer.byteLength),
7950
7600
  backend: this._backend
7951
7601
  });
@@ -7954,7 +7604,7 @@ var SileroVADInference = class {
7954
7604
  this.session = await ort.InferenceSession.create(modelData, sessionOptions);
7955
7605
  this.reset();
7956
7606
  const loadTimeMs = performance.now() - startTime;
7957
- logger11.info("Model loaded successfully", {
7607
+ logger12.info("Model loaded successfully", {
7958
7608
  backend: this._backend,
7959
7609
  loadTimeMs: Math.round(loadTimeMs),
7960
7610
  sampleRate: this.config.sampleRate,
@@ -8009,7 +7659,7 @@ var SileroVADInference = class {
8009
7659
  []
8010
7660
  );
8011
7661
  } catch (e) {
8012
- logger11.warn("BigInt64Array not available, using bigint array fallback", {
7662
+ logger12.warn("BigInt64Array not available, using bigint array fallback", {
8013
7663
  error: e instanceof Error ? e.message : String(e)
8014
7664
  });
8015
7665
  this.srTensor = new this.ort.Tensor(
@@ -8115,7 +7765,7 @@ var SileroVADInference = class {
8115
7765
  this.preSpeechBuffer.shift();
8116
7766
  }
8117
7767
  }
8118
- logger11.trace("Skipping VAD inference - audio too quiet", {
7768
+ logger12.trace("Skipping VAD inference - audio too quiet", {
8119
7769
  rms: Math.round(rms * 1e4) / 1e4,
8120
7770
  threshold: MIN_ENERGY_THRESHOLD
8121
7771
  });
@@ -8169,7 +7819,7 @@ var SileroVADInference = class {
8169
7819
  if (isSpeech && !this.wasSpeaking) {
8170
7820
  preSpeechChunks = [...this.preSpeechBuffer];
8171
7821
  this.preSpeechBuffer = [];
8172
- logger11.debug("Speech started with pre-speech buffer", {
7822
+ logger12.debug("Speech started with pre-speech buffer", {
8173
7823
  preSpeechChunks: preSpeechChunks.length,
8174
7824
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8175
7825
  });
@@ -8182,7 +7832,7 @@ var SileroVADInference = class {
8182
7832
  this.preSpeechBuffer = [];
8183
7833
  }
8184
7834
  this.wasSpeaking = isSpeech;
8185
- logger11.trace("VAD inference completed", {
7835
+ logger12.trace("VAD inference completed", {
8186
7836
  probability: Math.round(probability * 1e3) / 1e3,
8187
7837
  isSpeech,
8188
7838
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -8213,7 +7863,7 @@ var SileroVADInference = class {
8213
7863
  const oomError = new Error(
8214
7864
  `SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
8215
7865
  );
8216
- logger11.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
7866
+ logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
8217
7867
  pointer: `0x${err.toString(16)}`,
8218
7868
  backend: this._backend
8219
7869
  });
@@ -8256,7 +7906,7 @@ var SileroVADInference = class {
8256
7906
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
8257
7907
 
8258
7908
  // src/inference/SileroVADWorker.ts
8259
- var logger12 = createLogger("SileroVADWorker");
7909
+ var logger13 = createLogger("SileroVADWorker");
8260
7910
  var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
8261
7911
  var LOAD_TIMEOUT_MS3 = 1e4;
8262
7912
  var INFERENCE_TIMEOUT_MS3 = 1e3;
@@ -8534,7 +8184,7 @@ var SileroVADWorker = class {
8534
8184
  this.handleWorkerMessage(event.data);
8535
8185
  };
8536
8186
  worker.onerror = (error) => {
8537
- logger12.error("Worker error", { error: error.message });
8187
+ logger13.error("Worker error", { error: error.message });
8538
8188
  for (const [, resolver] of this.pendingResolvers) {
8539
8189
  resolver.reject(new Error(`Worker error: ${error.message}`));
8540
8190
  }
@@ -8610,9 +8260,9 @@ var SileroVADWorker = class {
8610
8260
  "model.sample_rate": this.config.sampleRate
8611
8261
  });
8612
8262
  try {
8613
- logger12.info("Creating VAD worker...");
8263
+ logger13.info("Creating VAD worker...");
8614
8264
  this.worker = this.createWorker();
8615
- logger12.info("Loading model in worker...", {
8265
+ logger13.info("Loading model in worker...", {
8616
8266
  modelUrl: this.config.modelUrl,
8617
8267
  sampleRate: this.config.sampleRate
8618
8268
  });
@@ -8628,7 +8278,7 @@ var SileroVADWorker = class {
8628
8278
  );
8629
8279
  this._isLoaded = true;
8630
8280
  const loadTimeMs = performance.now() - startTime;
8631
- logger12.info("VAD worker loaded successfully", {
8281
+ logger13.info("VAD worker loaded successfully", {
8632
8282
  backend: "wasm",
8633
8283
  loadTimeMs: Math.round(loadTimeMs),
8634
8284
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -8735,7 +8385,7 @@ var SileroVADWorker = class {
8735
8385
  if (isSpeech && !this.wasSpeaking) {
8736
8386
  preSpeechChunks = [...this.preSpeechBuffer];
8737
8387
  this.preSpeechBuffer = [];
8738
- logger12.debug("Speech started with pre-speech buffer", {
8388
+ logger13.debug("Speech started with pre-speech buffer", {
8739
8389
  preSpeechChunks: preSpeechChunks.length,
8740
8390
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
8741
8391
  });
@@ -8748,7 +8398,7 @@ var SileroVADWorker = class {
8748
8398
  this.preSpeechBuffer = [];
8749
8399
  }
8750
8400
  this.wasSpeaking = isSpeech;
8751
- logger12.trace("VAD worker inference completed", {
8401
+ logger13.trace("VAD worker inference completed", {
8752
8402
  probability: Math.round(result.probability * 1e3) / 1e3,
8753
8403
  isSpeech,
8754
8404
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -8816,44 +8466,44 @@ var SileroVADWorker = class {
8816
8466
  };
8817
8467
 
8818
8468
  // src/inference/createSileroVAD.ts
8819
- var logger13 = createLogger("createSileroVAD");
8469
+ var logger14 = createLogger("createSileroVAD");
8820
8470
  function supportsVADWorker() {
8821
8471
  if (typeof Worker === "undefined") {
8822
- logger13.debug("Worker not supported: Worker constructor undefined");
8472
+ logger14.debug("Worker not supported: Worker constructor undefined");
8823
8473
  return false;
8824
8474
  }
8825
8475
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
8826
- logger13.debug("Worker not supported: URL.createObjectURL unavailable");
8476
+ logger14.debug("Worker not supported: URL.createObjectURL unavailable");
8827
8477
  return false;
8828
8478
  }
8829
8479
  if (typeof Blob === "undefined") {
8830
- logger13.debug("Worker not supported: Blob constructor unavailable");
8480
+ logger14.debug("Worker not supported: Blob constructor unavailable");
8831
8481
  return false;
8832
8482
  }
8833
8483
  return true;
8834
8484
  }
8835
8485
  function createSileroVAD(config) {
8836
8486
  if (config.unifiedWorker) {
8837
- logger13.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8487
+ logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8838
8488
  return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8839
8489
  }
8840
8490
  const fallbackOnError = config.fallbackOnError ?? true;
8841
8491
  let useWorker;
8842
8492
  if (config.useWorker !== void 0) {
8843
8493
  useWorker = config.useWorker;
8844
- logger13.debug("Worker preference explicitly set", { useWorker });
8494
+ logger14.debug("Worker preference explicitly set", { useWorker });
8845
8495
  } else {
8846
8496
  const workerSupported = supportsVADWorker();
8847
8497
  const onMobile = isMobile();
8848
8498
  useWorker = workerSupported && !onMobile;
8849
- logger13.debug("Auto-detected Worker preference", {
8499
+ logger14.debug("Auto-detected Worker preference", {
8850
8500
  useWorker,
8851
8501
  workerSupported,
8852
8502
  onMobile
8853
8503
  });
8854
8504
  }
8855
8505
  if (useWorker) {
8856
- logger13.info("Creating SileroVADWorker (off-main-thread)");
8506
+ logger14.info("Creating SileroVADWorker (off-main-thread)");
8857
8507
  const worker = new SileroVADWorker({
8858
8508
  modelUrl: config.modelUrl,
8859
8509
  sampleRate: config.sampleRate,
@@ -8865,7 +8515,7 @@ function createSileroVAD(config) {
8865
8515
  }
8866
8516
  return worker;
8867
8517
  }
8868
- logger13.info("Creating SileroVADInference (main thread)");
8518
+ logger14.info("Creating SileroVADInference (main thread)");
8869
8519
  return new SileroVADInference(config);
8870
8520
  }
8871
8521
  var VADWorkerWithFallback = class {
@@ -8891,7 +8541,7 @@ var VADWorkerWithFallback = class {
8891
8541
  try {
8892
8542
  return await this.implementation.load();
8893
8543
  } catch (error) {
8894
- logger13.warn("Worker load failed, falling back to main thread", {
8544
+ logger14.warn("Worker load failed, falling back to main thread", {
8895
8545
  error: error instanceof Error ? error.message : String(error)
8896
8546
  });
8897
8547
  try {
@@ -8900,7 +8550,7 @@ var VADWorkerWithFallback = class {
8900
8550
  }
8901
8551
  this.implementation = new SileroVADInference(this.config);
8902
8552
  this.hasFallenBack = true;
8903
- logger13.info("Fallback to SileroVADInference successful");
8553
+ logger14.info("Fallback to SileroVADInference successful");
8904
8554
  return await this.implementation.load();
8905
8555
  }
8906
8556
  }
@@ -8921,8 +8571,175 @@ var VADWorkerWithFallback = class {
8921
8571
  }
8922
8572
  };
8923
8573
 
8574
+ // src/inference/A2EOrchestrator.ts
8575
+ var logger15 = createLogger("A2EOrchestrator");
8576
+ var A2EOrchestrator = class {
8577
+ constructor(config) {
8578
+ this.a2e = null;
8579
+ this.processor = null;
8580
+ // Mic capture state (lightweight — no dependency on MicrophoneCapture class
8581
+ // which requires an external EventEmitter. We do raw Web Audio here.)
8582
+ this.stream = null;
8583
+ this.audioContext = null;
8584
+ this.scriptProcessor = null;
8585
+ this.nativeSampleRate = 0;
8586
+ this._isReady = false;
8587
+ this._isStreaming = false;
8588
+ this._backend = null;
8589
+ this.disposed = false;
8590
+ this.config = {
8591
+ sampleRate: 16e3,
8592
+ ...config
8593
+ };
8594
+ }
8595
+ /** Latest blendshape weights from inference (null if none yet) */
8596
+ get latestWeights() {
8597
+ return this.processor?.latestFrame ?? null;
8598
+ }
8599
+ /** Whether the model is loaded and ready for inference */
8600
+ get isReady() {
8601
+ return this._isReady;
8602
+ }
8603
+ /** Whether mic is active and inference loop is running */
8604
+ get isStreaming() {
8605
+ return this._isStreaming;
8606
+ }
8607
+ /** Current backend type (webgpu, wasm, or null) */
8608
+ get backend() {
8609
+ return this._backend;
8610
+ }
8611
+ /**
8612
+ * Load the A2E model and create the processor
8613
+ */
8614
+ async load() {
8615
+ if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8616
+ logger15.info("Loading A2E model...");
8617
+ this.a2e = createA2E({
8618
+ gpuModelUrl: this.config.gpuModelUrl,
8619
+ gpuExternalDataUrl: this.config.gpuExternalDataUrl,
8620
+ cpuModelUrl: this.config.cpuModelUrl ?? this.config.gpuModelUrl,
8621
+ ...this.config.a2eConfig
8622
+ });
8623
+ const info = await this.a2e.load();
8624
+ this._backend = info.backend;
8625
+ this.processor = new A2EProcessor({
8626
+ backend: this.a2e,
8627
+ sampleRate: this.config.sampleRate,
8628
+ chunkSize: this.config.chunkSize,
8629
+ onFrame: this.config.onFrame,
8630
+ onError: this.config.onError
8631
+ });
8632
+ this._isReady = true;
8633
+ logger15.info("A2E model loaded", {
8634
+ backend: info.backend,
8635
+ loadTimeMs: info.loadTimeMs,
8636
+ modelId: this.a2e.modelId
8637
+ });
8638
+ this.config.onReady?.();
8639
+ }
8640
+ /**
8641
+ * Start mic capture and inference loop
8642
+ */
8643
+ async start() {
8644
+ if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
8645
+ if (!this._isReady || !this.processor) throw new Error("Model not loaded. Call load() first.");
8646
+ if (this._isStreaming) return;
8647
+ try {
8648
+ this.stream = await navigator.mediaDevices.getUserMedia({
8649
+ audio: {
8650
+ sampleRate: { ideal: this.config.sampleRate },
8651
+ channelCount: 1,
8652
+ echoCancellation: true,
8653
+ noiseSuppression: true,
8654
+ autoGainControl: true
8655
+ }
8656
+ });
8657
+ this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
8658
+ if (this.audioContext.state === "suspended") {
8659
+ await this.audioContext.resume();
8660
+ }
8661
+ this.nativeSampleRate = this.audioContext.sampleRate;
8662
+ const source = this.audioContext.createMediaStreamSource(this.stream);
8663
+ this.scriptProcessor = this.audioContext.createScriptProcessor(4096, 1, 1);
8664
+ this.scriptProcessor.onaudioprocess = (e) => {
8665
+ if (!this._isStreaming || !this.processor) return;
8666
+ const input = e.inputBuffer.getChannelData(0);
8667
+ let samples;
8668
+ if (this.nativeSampleRate !== this.config.sampleRate) {
8669
+ const ratio = this.config.sampleRate / this.nativeSampleRate;
8670
+ const newLen = Math.round(input.length * ratio);
8671
+ samples = new Float32Array(newLen);
8672
+ for (let i = 0; i < newLen; i++) {
8673
+ const srcIdx = i / ratio;
8674
+ const lo = Math.floor(srcIdx);
8675
+ const hi = Math.min(lo + 1, input.length - 1);
8676
+ const frac = srcIdx - lo;
8677
+ samples[i] = input[lo] * (1 - frac) + input[hi] * frac;
8678
+ }
8679
+ } else {
8680
+ samples = new Float32Array(input);
8681
+ }
8682
+ this.processor.pushAudio(samples);
8683
+ };
8684
+ source.connect(this.scriptProcessor);
8685
+ this.scriptProcessor.connect(this.audioContext.destination);
8686
+ this._isStreaming = true;
8687
+ this.processor.startDrip();
8688
+ logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
8689
+ } catch (err) {
8690
+ const error = err instanceof Error ? err : new Error(String(err));
8691
+ logger15.error("Failed to start mic capture", { error: error.message });
8692
+ this.config.onError?.(error);
8693
+ throw error;
8694
+ }
8695
+ }
8696
+ /**
8697
+ * Stop mic capture and inference loop
8698
+ */
8699
+ stop() {
8700
+ this._isStreaming = false;
8701
+ if (this.processor) {
8702
+ this.processor.stopDrip();
8703
+ this.processor.reset();
8704
+ }
8705
+ if (this.scriptProcessor) {
8706
+ this.scriptProcessor.disconnect();
8707
+ this.scriptProcessor.onaudioprocess = null;
8708
+ this.scriptProcessor = null;
8709
+ }
8710
+ if (this.stream) {
8711
+ this.stream.getTracks().forEach((t) => t.stop());
8712
+ this.stream = null;
8713
+ }
8714
+ if (this.audioContext) {
8715
+ this.audioContext.close().catch(() => {
8716
+ });
8717
+ this.audioContext = null;
8718
+ }
8719
+ logger15.info("Mic capture stopped");
8720
+ }
8721
+ /**
8722
+ * Dispose of all resources
8723
+ */
8724
+ async dispose() {
8725
+ if (this.disposed) return;
8726
+ this.disposed = true;
8727
+ this.stop();
8728
+ if (this.processor) {
8729
+ this.processor.dispose();
8730
+ this.processor = null;
8731
+ }
8732
+ if (this.a2e) {
8733
+ await this.a2e.dispose();
8734
+ this.a2e = null;
8735
+ }
8736
+ this._isReady = false;
8737
+ this._backend = null;
8738
+ }
8739
+ };
8740
+
8924
8741
  // src/inference/SafariSpeechRecognition.ts
8925
- var logger14 = createLogger("SafariSpeech");
8742
+ var logger16 = createLogger("SafariSpeech");
8926
8743
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
8927
8744
  constructor(config = {}) {
8928
8745
  this.recognition = null;
@@ -8941,7 +8758,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
8941
8758
  interimResults: config.interimResults ?? true,
8942
8759
  maxAlternatives: config.maxAlternatives ?? 1
8943
8760
  };
8944
- logger14.debug("SafariSpeechRecognition created", {
8761
+ logger16.debug("SafariSpeechRecognition created", {
8945
8762
  language: this.config.language,
8946
8763
  continuous: this.config.continuous
8947
8764
  });
@@ -9002,7 +8819,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9002
8819
  */
9003
8820
  async start() {
9004
8821
  if (this.isListening) {
9005
- logger14.warn("Already listening");
8822
+ logger16.warn("Already listening");
9006
8823
  return;
9007
8824
  }
9008
8825
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -9032,7 +8849,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9032
8849
  this.isListening = true;
9033
8850
  this.startTime = performance.now();
9034
8851
  this.accumulatedText = "";
9035
- logger14.info("Speech recognition started", {
8852
+ logger16.info("Speech recognition started", {
9036
8853
  language: this.config.language
9037
8854
  });
9038
8855
  span?.end();
@@ -9047,7 +8864,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9047
8864
  */
9048
8865
  async stop() {
9049
8866
  if (!this.isListening || !this.recognition) {
9050
- logger14.warn("Not currently listening");
8867
+ logger16.warn("Not currently listening");
9051
8868
  return {
9052
8869
  text: this.accumulatedText,
9053
8870
  language: this.config.language,
@@ -9076,7 +8893,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9076
8893
  if (this.recognition && this.isListening) {
9077
8894
  this.recognition.abort();
9078
8895
  this.isListening = false;
9079
- logger14.info("Speech recognition aborted");
8896
+ logger16.info("Speech recognition aborted");
9080
8897
  }
9081
8898
  }
9082
8899
  /**
@@ -9107,7 +8924,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9107
8924
  this.isListening = false;
9108
8925
  this.resultCallbacks = [];
9109
8926
  this.errorCallbacks = [];
9110
- logger14.debug("SafariSpeechRecognition disposed");
8927
+ logger16.debug("SafariSpeechRecognition disposed");
9111
8928
  }
9112
8929
  /**
9113
8930
  * Set up event handlers for the recognition instance
@@ -9135,7 +8952,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9135
8952
  confidence: alternative.confidence
9136
8953
  };
9137
8954
  this.emitResult(speechResult);
9138
- logger14.trace("Speech result", {
8955
+ logger16.trace("Speech result", {
9139
8956
  text: text.substring(0, 50),
9140
8957
  isFinal,
9141
8958
  confidence: alternative.confidence
@@ -9145,12 +8962,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9145
8962
  span?.end();
9146
8963
  } catch (error) {
9147
8964
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
9148
- logger14.error("Error processing speech result", { error });
8965
+ logger16.error("Error processing speech result", { error });
9149
8966
  }
9150
8967
  };
9151
8968
  this.recognition.onerror = (event) => {
9152
8969
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
9153
- logger14.error("Speech recognition error", { error: event.error, message: event.message });
8970
+ logger16.error("Speech recognition error", { error: event.error, message: event.message });
9154
8971
  this.emitError(error);
9155
8972
  if (this.stopRejecter) {
9156
8973
  this.stopRejecter(error);
@@ -9160,7 +8977,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9160
8977
  };
9161
8978
  this.recognition.onend = () => {
9162
8979
  this.isListening = false;
9163
- logger14.info("Speech recognition ended", {
8980
+ logger16.info("Speech recognition ended", {
9164
8981
  totalText: this.accumulatedText.length,
9165
8982
  durationMs: performance.now() - this.startTime
9166
8983
  });
@@ -9177,13 +8994,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9177
8994
  }
9178
8995
  };
9179
8996
  this.recognition.onstart = () => {
9180
- logger14.debug("Speech recognition started by browser");
8997
+ logger16.debug("Speech recognition started by browser");
9181
8998
  };
9182
8999
  this.recognition.onspeechstart = () => {
9183
- logger14.debug("Speech detected");
9000
+ logger16.debug("Speech detected");
9184
9001
  };
9185
9002
  this.recognition.onspeechend = () => {
9186
- logger14.debug("Speech ended");
9003
+ logger16.debug("Speech ended");
9187
9004
  };
9188
9005
  }
9189
9006
  /**
@@ -9194,7 +9011,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9194
9011
  try {
9195
9012
  callback(result);
9196
9013
  } catch (error) {
9197
- logger14.error("Error in result callback", { error });
9014
+ logger16.error("Error in result callback", { error });
9198
9015
  }
9199
9016
  }
9200
9017
  }
@@ -9206,7 +9023,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
9206
9023
  try {
9207
9024
  callback(error);
9208
9025
  } catch (callbackError) {
9209
- logger14.error("Error in error callback", { error: callbackError });
9026
+ logger16.error("Error in error callback", { error: callbackError });
9210
9027
  }
9211
9028
  }
9212
9029
  }
@@ -9619,13 +9436,14 @@ var AgentCoreAdapter = class extends EventEmitter {
9619
9436
  if (!this.lam) {
9620
9437
  throw new Error("LAM must be initialized before pipeline");
9621
9438
  }
9622
- this.pipeline = new SyncedAudioPipeline({
9439
+ this.pipeline = new FullFacePipeline({
9623
9440
  lam: this.lam,
9624
9441
  sampleRate: 16e3,
9625
9442
  chunkTargetMs: 200
9626
9443
  });
9627
9444
  await this.pipeline.initialize();
9628
- this.pipeline.on("frame_ready", (frame) => {
9445
+ this.pipeline.on("full_frame_ready", (fullFrame) => {
9446
+ const frame = fullFrame.blendshapes;
9629
9447
  this.emit("animation", {
9630
9448
  blendshapes: frame,
9631
9449
  get: (name) => {
@@ -9804,9 +9622,9 @@ var AgentCoreAdapter = class extends EventEmitter {
9804
9622
  });
9805
9623
  }
9806
9624
  }
9807
- // REMOVED: processAudioForAnimation() - now handled by SyncedAudioPipeline
9625
+ // REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
9808
9626
  // The pipeline manages audio scheduling, LAM inference, and frame synchronization
9809
- // Frames are emitted via pipeline.on('frame_ready') event (see initPipeline())
9627
+ // Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
9810
9628
  /**
9811
9629
  * Detect voice activity using Silero VAD
9812
9630
  * Falls back to simple RMS if VAD not available