@omote/core 0.4.7 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +470 -861
- package/dist/index.d.ts +470 -861
- package/dist/index.js +1383 -1565
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +949 -1131
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -30,6 +30,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
+
A2EOrchestrator: () => A2EOrchestrator,
|
|
34
|
+
A2EProcessor: () => A2EProcessor,
|
|
33
35
|
ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
|
|
34
36
|
AgentCoreAdapter: () => AgentCoreAdapter,
|
|
35
37
|
AnimationGraph: () => AnimationGraph,
|
|
@@ -37,23 +39,22 @@ __export(index_exports, {
|
|
|
37
39
|
AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
|
|
38
40
|
AudioScheduler: () => AudioScheduler,
|
|
39
41
|
AudioSyncManager: () => AudioSyncManager,
|
|
42
|
+
BLENDSHAPE_TO_GROUP: () => BLENDSHAPE_TO_GROUP,
|
|
43
|
+
BlendshapeSmoother: () => BlendshapeSmoother,
|
|
40
44
|
CTC_VOCAB: () => CTC_VOCAB,
|
|
41
45
|
ConsoleExporter: () => ConsoleExporter,
|
|
42
46
|
ConversationOrchestrator: () => ConversationOrchestrator,
|
|
43
47
|
DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
|
|
44
48
|
DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
|
|
45
|
-
EMOTION_ARKIT_MAP: () => EMOTION_ARKIT_MAP,
|
|
46
49
|
EMOTION_NAMES: () => EMOTION_NAMES,
|
|
47
50
|
EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
|
|
48
51
|
EmotionController: () => EmotionController,
|
|
49
52
|
EmotionPresets: () => EmotionPresets,
|
|
50
|
-
EmotionToBlendshapeMapper: () => EmotionToBlendshapeMapper,
|
|
51
53
|
EmphasisDetector: () => EmphasisDetector,
|
|
52
54
|
EventEmitter: () => EventEmitter,
|
|
53
55
|
FullFacePipeline: () => FullFacePipeline,
|
|
54
56
|
INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
|
|
55
57
|
InterruptionHandler: () => InterruptionHandler,
|
|
56
|
-
LAMPipeline: () => LAMPipeline,
|
|
57
58
|
LAM_BLENDSHAPES: () => LAM_BLENDSHAPES,
|
|
58
59
|
LOG_LEVEL_PRIORITY: () => LOG_LEVEL_PRIORITY,
|
|
59
60
|
MODEL_LOAD_TIME_BUCKETS: () => MODEL_LOAD_TIME_BUCKETS,
|
|
@@ -72,74 +73,55 @@ __export(index_exports, {
|
|
|
72
73
|
SileroVADInference: () => SileroVADInference,
|
|
73
74
|
SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
|
|
74
75
|
SileroVADWorker: () => SileroVADWorker,
|
|
75
|
-
SyncedAudioPipeline: () => SyncedAudioPipeline,
|
|
76
76
|
TenantManager: () => TenantManager,
|
|
77
|
-
UPPER_FACE_BLENDSHAPES: () => UPPER_FACE_BLENDSHAPES,
|
|
78
77
|
UnifiedInferenceWorker: () => UnifiedInferenceWorker,
|
|
79
|
-
WAV2ARKIT_BLENDSHAPES: () => WAV2ARKIT_BLENDSHAPES,
|
|
80
78
|
Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
|
|
81
79
|
Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
|
|
82
80
|
Wav2ArkitCpuWorker: () => Wav2ArkitCpuWorker,
|
|
83
81
|
Wav2Vec2Inference: () => Wav2Vec2Inference,
|
|
84
|
-
applyCMVN: () => applyCMVN,
|
|
85
|
-
applyLFR: () => applyLFR,
|
|
86
82
|
blendEmotions: () => blendEmotions,
|
|
87
83
|
calculatePeak: () => calculatePeak,
|
|
88
84
|
calculateRMS: () => calculateRMS,
|
|
89
|
-
computeKaldiFbank: () => computeKaldiFbank,
|
|
90
85
|
configureCacheLimit: () => configureCacheLimit,
|
|
91
86
|
configureLogging: () => configureLogging,
|
|
92
87
|
configureTelemetry: () => configureTelemetry,
|
|
88
|
+
createA2E: () => createA2E,
|
|
93
89
|
createEmotionVector: () => createEmotionVector,
|
|
94
|
-
createLipSync: () => createLipSync,
|
|
95
90
|
createLogger: () => createLogger,
|
|
96
91
|
createSenseVoice: () => createSenseVoice,
|
|
97
|
-
createSessionWithFallback: () => createSessionWithFallback,
|
|
98
92
|
createSileroVAD: () => createSileroVAD,
|
|
99
|
-
ctcGreedyDecode: () => ctcGreedyDecode,
|
|
100
93
|
fetchWithCache: () => fetchWithCache,
|
|
101
94
|
formatBytes: () => formatBytes,
|
|
102
95
|
getCacheConfig: () => getCacheConfig,
|
|
103
96
|
getCacheKey: () => getCacheKey,
|
|
104
97
|
getEmotionPreset: () => getEmotionPreset,
|
|
105
|
-
getLoadedBackend: () => getLoadedBackend,
|
|
106
98
|
getLoggingConfig: () => getLoggingConfig,
|
|
107
99
|
getModelCache: () => getModelCache,
|
|
108
|
-
getOnnxRuntime: () => getOnnxRuntime,
|
|
109
|
-
getOnnxRuntimeForPreference: () => getOnnxRuntimeForPreference,
|
|
110
100
|
getOptimalWasmThreads: () => getOptimalWasmThreads,
|
|
111
101
|
getRecommendedBackend: () => getRecommendedBackend,
|
|
112
|
-
getSessionOptions: () => getSessionOptions,
|
|
113
102
|
getTelemetry: () => getTelemetry,
|
|
114
103
|
hasWebGPUApi: () => hasWebGPUApi,
|
|
115
104
|
isAndroid: () => isAndroid,
|
|
116
105
|
isIOS: () => isIOS,
|
|
117
106
|
isIOSSafari: () => isIOSSafari,
|
|
118
107
|
isMobile: () => isMobile,
|
|
119
|
-
isOnnxRuntimeLoaded: () => isOnnxRuntimeLoaded,
|
|
120
108
|
isProtocolEvent: () => isProtocolEvent,
|
|
121
109
|
isSafari: () => isSafari,
|
|
122
110
|
isSpeechRecognitionAvailable: () => isSpeechRecognitionAvailable,
|
|
123
111
|
isWebGPUAvailable: () => isWebGPUAvailable,
|
|
112
|
+
lerpBlendshapes: () => lerpBlendshapes,
|
|
124
113
|
lerpEmotion: () => lerpEmotion,
|
|
125
114
|
noopLogger: () => noopLogger,
|
|
126
|
-
parseCMVNFromMetadata: () => parseCMVNFromMetadata,
|
|
127
|
-
parseTokensFile: () => parseTokensFile,
|
|
128
115
|
preloadModels: () => preloadModels,
|
|
129
|
-
preloadOnnxRuntime: () => preloadOnnxRuntime,
|
|
130
|
-
remapWav2ArkitToLam: () => remapWav2ArkitToLam,
|
|
131
116
|
resetLoggingConfig: () => resetLoggingConfig,
|
|
132
117
|
resolveBackend: () => resolveBackend,
|
|
133
|
-
resolveLanguageId: () => resolveLanguageId,
|
|
134
|
-
resolveTextNormId: () => resolveTextNormId,
|
|
135
118
|
setLogLevel: () => setLogLevel,
|
|
136
119
|
setLoggingEnabled: () => setLoggingEnabled,
|
|
137
120
|
shouldEnableWasmProxy: () => shouldEnableWasmProxy,
|
|
138
|
-
|
|
121
|
+
shouldUseCpuA2E: () => shouldUseCpuA2E,
|
|
139
122
|
shouldUseNativeASR: () => shouldUseNativeASR,
|
|
140
|
-
|
|
141
|
-
supportsVADWorker: () => supportsVADWorker
|
|
142
|
-
symmetrizeBlendshapes: () => symmetrizeBlendshapes
|
|
123
|
+
shouldUseServerA2E: () => shouldUseServerA2E,
|
|
124
|
+
supportsVADWorker: () => supportsVADWorker
|
|
143
125
|
});
|
|
144
126
|
module.exports = __toCommonJS(index_exports);
|
|
145
127
|
|
|
@@ -649,730 +631,543 @@ var AudioChunkCoalescer = class {
|
|
|
649
631
|
}
|
|
650
632
|
};
|
|
651
633
|
|
|
652
|
-
// src/
|
|
653
|
-
var
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
634
|
+
// src/logging/types.ts
|
|
635
|
+
var LOG_LEVEL_PRIORITY = {
|
|
636
|
+
error: 0,
|
|
637
|
+
warn: 1,
|
|
638
|
+
info: 2,
|
|
639
|
+
debug: 3,
|
|
640
|
+
trace: 4,
|
|
641
|
+
verbose: 5
|
|
642
|
+
};
|
|
643
|
+
var DEFAULT_LOGGING_CONFIG = {
|
|
644
|
+
level: "info",
|
|
645
|
+
enabled: true,
|
|
646
|
+
format: "pretty",
|
|
647
|
+
timestamps: true,
|
|
648
|
+
includeModule: true
|
|
649
|
+
};
|
|
650
|
+
|
|
651
|
+
// src/logging/formatters.ts
|
|
652
|
+
var COLORS = {
|
|
653
|
+
reset: "\x1B[0m",
|
|
654
|
+
red: "\x1B[31m",
|
|
655
|
+
yellow: "\x1B[33m",
|
|
656
|
+
blue: "\x1B[34m",
|
|
657
|
+
cyan: "\x1B[36m",
|
|
658
|
+
gray: "\x1B[90m",
|
|
659
|
+
white: "\x1B[37m",
|
|
660
|
+
magenta: "\x1B[35m"
|
|
661
|
+
};
|
|
662
|
+
var LEVEL_COLORS = {
|
|
663
|
+
error: COLORS.red,
|
|
664
|
+
warn: COLORS.yellow,
|
|
665
|
+
info: COLORS.blue,
|
|
666
|
+
debug: COLORS.cyan,
|
|
667
|
+
trace: COLORS.magenta,
|
|
668
|
+
verbose: COLORS.gray
|
|
669
|
+
};
|
|
670
|
+
var LEVEL_NAMES = {
|
|
671
|
+
error: "ERROR ",
|
|
672
|
+
warn: "WARN ",
|
|
673
|
+
info: "INFO ",
|
|
674
|
+
debug: "DEBUG ",
|
|
675
|
+
trace: "TRACE ",
|
|
676
|
+
verbose: "VERBOSE"
|
|
677
|
+
};
|
|
678
|
+
var isBrowser = typeof window !== "undefined";
|
|
679
|
+
function formatTimestamp(timestamp) {
|
|
680
|
+
const date = new Date(timestamp);
|
|
681
|
+
return date.toISOString().substring(11, 23);
|
|
682
|
+
}
|
|
683
|
+
function safeStringify(data) {
|
|
684
|
+
const seen = /* @__PURE__ */ new WeakSet();
|
|
685
|
+
return JSON.stringify(data, (key, value) => {
|
|
686
|
+
if (typeof value === "object" && value !== null) {
|
|
687
|
+
if (seen.has(value)) {
|
|
688
|
+
return "[Circular]";
|
|
691
689
|
}
|
|
690
|
+
seen.add(value);
|
|
692
691
|
}
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
const toProcess = this.buffer.slice(0, this.REQUIRED_SAMPLES);
|
|
700
|
-
const processedStartTime = this.bufferStartTime;
|
|
701
|
-
this.buffer = this.buffer.slice(this.REQUIRED_SAMPLES);
|
|
702
|
-
const processedDuration = this.REQUIRED_SAMPLES / (this.options.sampleRate ?? 16e3);
|
|
703
|
-
this.bufferStartTime = processedStartTime + processedDuration;
|
|
704
|
-
const result = await lam.infer(toProcess);
|
|
705
|
-
const frameDuration = 1 / this.FRAME_RATE;
|
|
706
|
-
for (let i = 0; i < result.blendshapes.length; i++) {
|
|
707
|
-
const frame = result.blendshapes[i];
|
|
708
|
-
const timestamp = processedStartTime + i * frameDuration;
|
|
709
|
-
this.frameQueue.push({ frame, timestamp });
|
|
710
|
-
}
|
|
711
|
-
this.options.onInference?.(result.blendshapes.length);
|
|
712
|
-
} catch (error) {
|
|
713
|
-
this.options.onError?.(error);
|
|
714
|
-
this.buffer = new Float32Array(0);
|
|
715
|
-
this.bufferStartTime = 0;
|
|
692
|
+
if (value instanceof Error) {
|
|
693
|
+
return {
|
|
694
|
+
name: value.name,
|
|
695
|
+
message: value.message,
|
|
696
|
+
stack: value.stack
|
|
697
|
+
};
|
|
716
698
|
}
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
* Get the frame that should be displayed at the current time
|
|
720
|
-
*
|
|
721
|
-
* Automatically removes frames that have already been displayed.
|
|
722
|
-
* This prevents memory leaks from accumulating old frames.
|
|
723
|
-
*
|
|
724
|
-
* Discard Window (prevents premature frame discarding):
|
|
725
|
-
* - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
|
|
726
|
-
* - WASM: 1.0s (LAM inference 50-500ms + higher variability)
|
|
727
|
-
*
|
|
728
|
-
* Last-Frame-Hold: Returns last valid frame instead of null to prevent
|
|
729
|
-
* avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
|
|
730
|
-
*
|
|
731
|
-
* @param currentTime - Current AudioContext time
|
|
732
|
-
* @param lam - LAM inference engine (optional, for backend detection)
|
|
733
|
-
* @returns Current frame, or last frame as fallback, or null if no frames yet
|
|
734
|
-
*/
|
|
735
|
-
getFrameForTime(currentTime, lam) {
|
|
736
|
-
const discardWindow = lam?.backend === "wasm" ? 1 : 0.5;
|
|
737
|
-
let discardedCount = 0;
|
|
738
|
-
while (this.frameQueue.length > 0 && this.frameQueue[0].timestamp < currentTime - discardWindow) {
|
|
739
|
-
const discarded = this.frameQueue.shift();
|
|
740
|
-
discardedCount++;
|
|
741
|
-
if (discardedCount === 1) {
|
|
742
|
-
const ageMs = ((currentTime - discarded.timestamp) * 1e3).toFixed(0);
|
|
743
|
-
console.warn("[LAM] Frame(s) discarded as too old", {
|
|
744
|
-
ageMs,
|
|
745
|
-
discardWindowMs: discardWindow * 1e3,
|
|
746
|
-
queueLength: this.frameQueue.length,
|
|
747
|
-
backend: lam?.backend ?? "unknown"
|
|
748
|
-
});
|
|
749
|
-
}
|
|
699
|
+
if (value instanceof Float32Array || value instanceof Int16Array) {
|
|
700
|
+
return `${value.constructor.name}(${value.length})`;
|
|
750
701
|
}
|
|
751
|
-
if (
|
|
752
|
-
|
|
753
|
-
this.lastFrame = frame;
|
|
754
|
-
return frame;
|
|
702
|
+
if (ArrayBuffer.isView(value)) {
|
|
703
|
+
return `${value.constructor.name}(${value.byteLength})`;
|
|
755
704
|
}
|
|
756
|
-
return
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
return Math.min(1, this.buffer.length / this.REQUIRED_SAMPLES);
|
|
705
|
+
return value;
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
var jsonFormatter = (entry) => {
|
|
709
|
+
const output = {
|
|
710
|
+
timestamp: entry.timestamp,
|
|
711
|
+
level: entry.level,
|
|
712
|
+
module: entry.module,
|
|
713
|
+
message: entry.message
|
|
714
|
+
};
|
|
715
|
+
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
716
|
+
output.data = entry.data;
|
|
769
717
|
}
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
718
|
+
if (entry.error) {
|
|
719
|
+
output.error = {
|
|
720
|
+
name: entry.error.name,
|
|
721
|
+
message: entry.error.message,
|
|
722
|
+
stack: entry.error.stack
|
|
723
|
+
};
|
|
775
724
|
}
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
725
|
+
return safeStringify(output);
|
|
726
|
+
};
|
|
727
|
+
var prettyFormatter = (entry) => {
|
|
728
|
+
const time = formatTimestamp(entry.timestamp);
|
|
729
|
+
const level = LEVEL_NAMES[entry.level];
|
|
730
|
+
const module2 = entry.module;
|
|
731
|
+
const message = entry.message;
|
|
732
|
+
let output;
|
|
733
|
+
if (isBrowser) {
|
|
734
|
+
output = `${time} ${level} [${module2}] ${message}`;
|
|
735
|
+
} else {
|
|
736
|
+
const color = LEVEL_COLORS[entry.level];
|
|
737
|
+
output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
|
|
781
738
|
}
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
* Should be called when audio stream ends to prevent losing the last 0-1 seconds.
|
|
789
|
-
*
|
|
790
|
-
* @param lam - LAM inference engine
|
|
791
|
-
*/
|
|
792
|
-
async flush(lam) {
|
|
793
|
-
if (this.buffer.length === 0) {
|
|
794
|
-
return;
|
|
795
|
-
}
|
|
796
|
-
const padded = new Float32Array(this.REQUIRED_SAMPLES);
|
|
797
|
-
padded.set(this.buffer, 0);
|
|
798
|
-
const processedStartTime = this.bufferStartTime;
|
|
799
|
-
try {
|
|
800
|
-
const result = await lam.infer(padded);
|
|
801
|
-
const actualDuration = this.buffer.length / (this.options.sampleRate ?? 16e3);
|
|
802
|
-
const frameDuration = 1 / this.FRAME_RATE;
|
|
803
|
-
const actualFrameCount = Math.ceil(actualDuration * this.FRAME_RATE);
|
|
804
|
-
for (let i = 0; i < Math.min(actualFrameCount, result.blendshapes.length); i++) {
|
|
805
|
-
const frame = result.blendshapes[i];
|
|
806
|
-
const timestamp = processedStartTime + i * frameDuration;
|
|
807
|
-
this.frameQueue.push({ frame, timestamp });
|
|
808
|
-
}
|
|
809
|
-
this.buffer = new Float32Array(0);
|
|
810
|
-
this.bufferStartTime = 0;
|
|
811
|
-
this.options.onInference?.(Math.min(actualFrameCount, result.blendshapes.length));
|
|
812
|
-
} catch (error) {
|
|
813
|
-
this.options.onError?.(error);
|
|
814
|
-
this.buffer = new Float32Array(0);
|
|
815
|
-
this.bufferStartTime = 0;
|
|
739
|
+
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
740
|
+
const dataStr = safeStringify(entry.data);
|
|
741
|
+
if (dataStr.length > 80) {
|
|
742
|
+
output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
|
|
743
|
+
} else {
|
|
744
|
+
output += " " + dataStr;
|
|
816
745
|
}
|
|
817
746
|
}
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
* @param offset - Time offset in seconds to add to all timestamps
|
|
825
|
-
*/
|
|
826
|
-
adjustTimestamps(offset) {
|
|
827
|
-
for (const frame of this.frameQueue) {
|
|
828
|
-
frame.timestamp += offset;
|
|
747
|
+
if (entry.error) {
|
|
748
|
+
output += `
|
|
749
|
+
${entry.error.name}: ${entry.error.message}`;
|
|
750
|
+
if (entry.error.stack) {
|
|
751
|
+
const stackLines = entry.error.stack.split("\n").slice(1, 4);
|
|
752
|
+
output += "\n " + stackLines.join("\n ");
|
|
829
753
|
}
|
|
830
754
|
}
|
|
831
|
-
|
|
832
|
-
* Reset the pipeline
|
|
833
|
-
*/
|
|
834
|
-
reset() {
|
|
835
|
-
this.buffer = new Float32Array(0);
|
|
836
|
-
this.bufferStartTime = 0;
|
|
837
|
-
this.frameQueue = [];
|
|
838
|
-
this.lastFrame = null;
|
|
839
|
-
}
|
|
755
|
+
return output;
|
|
840
756
|
};
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
function pcm16ToFloat32(buffer) {
|
|
844
|
-
const byteLen = buffer.byteLength & ~1;
|
|
845
|
-
const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
|
|
846
|
-
const float32 = new Float32Array(int16.length);
|
|
847
|
-
for (let i = 0; i < int16.length; i++) {
|
|
848
|
-
float32[i] = int16[i] / 32768;
|
|
849
|
-
}
|
|
850
|
-
return float32;
|
|
757
|
+
function getFormatter(format) {
|
|
758
|
+
return format === "json" ? jsonFormatter : prettyFormatter;
|
|
851
759
|
}
|
|
852
|
-
function
|
|
853
|
-
const
|
|
854
|
-
|
|
855
|
-
|
|
760
|
+
function createBrowserConsoleArgs(entry) {
|
|
761
|
+
const time = formatTimestamp(entry.timestamp);
|
|
762
|
+
const level = entry.level.toUpperCase().padEnd(7);
|
|
763
|
+
const module2 = entry.module;
|
|
764
|
+
const message = entry.message;
|
|
765
|
+
const styles = {
|
|
766
|
+
time: "color: gray;",
|
|
767
|
+
error: "color: red; font-weight: bold;",
|
|
768
|
+
warn: "color: orange; font-weight: bold;",
|
|
769
|
+
info: "color: blue;",
|
|
770
|
+
debug: "color: cyan;",
|
|
771
|
+
trace: "color: magenta;",
|
|
772
|
+
verbose: "color: gray;",
|
|
773
|
+
module: "color: teal; font-weight: bold;",
|
|
774
|
+
message: "color: inherit;"
|
|
775
|
+
};
|
|
776
|
+
let formatStr = "%c%s %c%s %c[%s]%c %s";
|
|
777
|
+
const args = [
|
|
778
|
+
styles.time,
|
|
779
|
+
time,
|
|
780
|
+
styles[entry.level],
|
|
781
|
+
level,
|
|
782
|
+
styles.module,
|
|
783
|
+
module2,
|
|
784
|
+
styles.message,
|
|
785
|
+
message
|
|
786
|
+
];
|
|
787
|
+
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
788
|
+
formatStr += " %o";
|
|
789
|
+
args.push(entry.data);
|
|
856
790
|
}
|
|
857
|
-
return
|
|
791
|
+
return [formatStr, ...args];
|
|
858
792
|
}
|
|
859
793
|
|
|
860
|
-
// src/
|
|
861
|
-
var
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
* Initialize the pipeline
|
|
888
|
-
*/
|
|
889
|
-
async initialize() {
|
|
890
|
-
await this.scheduler.initialize();
|
|
891
|
-
}
|
|
892
|
-
/**
|
|
893
|
-
* Start a new playback session
|
|
894
|
-
*
|
|
895
|
-
* Resets all state and prepares for incoming audio chunks.
|
|
896
|
-
* Audio will be scheduled immediately as chunks arrive (no buffering).
|
|
897
|
-
*/
|
|
898
|
-
start() {
|
|
899
|
-
this.stopMonitoring();
|
|
900
|
-
this.scheduler.reset();
|
|
901
|
-
this.coalescer.reset();
|
|
902
|
-
this.lamPipeline.reset();
|
|
903
|
-
this.playbackStarted = false;
|
|
904
|
-
this.scheduler.warmup();
|
|
905
|
-
this.startFrameLoop();
|
|
906
|
-
this.startMonitoring();
|
|
794
|
+
// src/logging/Logger.ts
|
|
795
|
+
var isBrowser2 = typeof window !== "undefined";
|
|
796
|
+
var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
|
|
797
|
+
function configureLogging(config) {
|
|
798
|
+
globalConfig = { ...globalConfig, ...config };
|
|
799
|
+
}
|
|
800
|
+
function getLoggingConfig() {
|
|
801
|
+
return { ...globalConfig };
|
|
802
|
+
}
|
|
803
|
+
function resetLoggingConfig() {
|
|
804
|
+
globalConfig = { ...DEFAULT_LOGGING_CONFIG };
|
|
805
|
+
}
|
|
806
|
+
function setLogLevel(level) {
|
|
807
|
+
globalConfig.level = level;
|
|
808
|
+
}
|
|
809
|
+
function setLoggingEnabled(enabled) {
|
|
810
|
+
globalConfig.enabled = enabled;
|
|
811
|
+
}
|
|
812
|
+
var consoleSink = (entry) => {
|
|
813
|
+
const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
|
|
814
|
+
if (globalConfig.format === "pretty" && isBrowser2) {
|
|
815
|
+
const args = createBrowserConsoleArgs(entry);
|
|
816
|
+
console[consoleMethod](...args);
|
|
817
|
+
} else {
|
|
818
|
+
const formatter = getFormatter(globalConfig.format);
|
|
819
|
+
const formatted = formatter(entry);
|
|
820
|
+
console[consoleMethod](formatted);
|
|
907
821
|
}
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
if (!combined) {
|
|
920
|
-
return;
|
|
921
|
-
}
|
|
922
|
-
const float32 = pcm16ToFloat32(combined);
|
|
923
|
-
const scheduleTime = await this.scheduler.schedule(float32);
|
|
924
|
-
if (!this.playbackStarted) {
|
|
925
|
-
this.playbackStarted = true;
|
|
926
|
-
this.emit("playback_start", scheduleTime);
|
|
927
|
-
}
|
|
928
|
-
this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
|
|
929
|
-
this.emit("error", err);
|
|
930
|
-
});
|
|
822
|
+
};
|
|
823
|
+
function getActiveSink() {
|
|
824
|
+
return globalConfig.sink || consoleSink;
|
|
825
|
+
}
|
|
826
|
+
function shouldLog(level) {
|
|
827
|
+
if (!globalConfig.enabled) return false;
|
|
828
|
+
return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
|
|
829
|
+
}
|
|
830
|
+
var Logger = class _Logger {
|
|
831
|
+
constructor(module2) {
|
|
832
|
+
this.module = module2;
|
|
931
833
|
}
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
834
|
+
log(level, message, data) {
|
|
835
|
+
if (!shouldLog(level)) return;
|
|
836
|
+
const entry = {
|
|
837
|
+
timestamp: Date.now(),
|
|
838
|
+
level,
|
|
839
|
+
module: this.module,
|
|
840
|
+
message,
|
|
841
|
+
data
|
|
842
|
+
};
|
|
843
|
+
if (data?.error instanceof Error) {
|
|
844
|
+
entry.error = data.error;
|
|
845
|
+
const { error, ...rest } = data;
|
|
846
|
+
entry.data = Object.keys(rest).length > 0 ? rest : void 0;
|
|
942
847
|
}
|
|
943
|
-
|
|
848
|
+
getActiveSink()(entry);
|
|
944
849
|
}
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
*
|
|
948
|
-
* Gracefully cancels all audio playback and LAM processing:
|
|
949
|
-
* - Fades out audio over specified duration (default: 50ms)
|
|
950
|
-
* - Cancels pending LAM inferences
|
|
951
|
-
* - Clears all buffers and queues
|
|
952
|
-
* - Emits 'playback_complete' event
|
|
953
|
-
*
|
|
954
|
-
* Use this for interruptions (e.g., user barge-in during AI speech).
|
|
955
|
-
*
|
|
956
|
-
* @param fadeOutMs - Fade-out duration in milliseconds (default: 50ms)
|
|
957
|
-
* @returns Promise that resolves when fade-out completes
|
|
958
|
-
*/
|
|
959
|
-
async stop(fadeOutMs = 50) {
|
|
960
|
-
this.stopMonitoring();
|
|
961
|
-
await this.scheduler.cancelAll(fadeOutMs);
|
|
962
|
-
this.coalescer.reset();
|
|
963
|
-
this.lamPipeline.reset();
|
|
964
|
-
this.playbackStarted = false;
|
|
965
|
-
this.emit("playback_complete", void 0);
|
|
850
|
+
error(message, data) {
|
|
851
|
+
this.log("error", message, data);
|
|
966
852
|
}
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
*
|
|
970
|
-
* Uses requestAnimationFrame to check for new LAM frames.
|
|
971
|
-
* Synchronized to AudioContext clock (not visual refresh rate).
|
|
972
|
-
*
|
|
973
|
-
* Frame Emission Strategy:
|
|
974
|
-
* - LAMPipeline uses last-frame-hold to prevent null returns
|
|
975
|
-
* - Always emit frames (even repeated frames) to maintain smooth animation
|
|
976
|
-
* - Renderer is responsible for detecting duplicate frames if needed
|
|
977
|
-
*/
|
|
978
|
-
startFrameLoop() {
|
|
979
|
-
const updateFrame = () => {
|
|
980
|
-
const currentTime = this.scheduler.getCurrentTime();
|
|
981
|
-
const frame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
|
|
982
|
-
if (frame) {
|
|
983
|
-
this.emit("frame_ready", frame);
|
|
984
|
-
}
|
|
985
|
-
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
986
|
-
};
|
|
987
|
-
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
853
|
+
warn(message, data) {
|
|
854
|
+
this.log("warn", message, data);
|
|
988
855
|
}
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
*/
|
|
992
|
-
startMonitoring() {
|
|
993
|
-
if (this.monitorInterval) {
|
|
994
|
-
clearInterval(this.monitorInterval);
|
|
995
|
-
}
|
|
996
|
-
this.monitorInterval = window.setInterval(() => {
|
|
997
|
-
if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
|
|
998
|
-
this.emit("playback_complete", void 0);
|
|
999
|
-
this.stopMonitoring();
|
|
1000
|
-
}
|
|
1001
|
-
}, 100);
|
|
856
|
+
info(message, data) {
|
|
857
|
+
this.log("info", message, data);
|
|
1002
858
|
}
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
*/
|
|
1006
|
-
stopMonitoring() {
|
|
1007
|
-
if (this.monitorInterval) {
|
|
1008
|
-
clearInterval(this.monitorInterval);
|
|
1009
|
-
this.monitorInterval = null;
|
|
1010
|
-
}
|
|
1011
|
-
if (this.frameAnimationId) {
|
|
1012
|
-
cancelAnimationFrame(this.frameAnimationId);
|
|
1013
|
-
this.frameAnimationId = null;
|
|
1014
|
-
}
|
|
859
|
+
debug(message, data) {
|
|
860
|
+
this.log("debug", message, data);
|
|
1015
861
|
}
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
*/
|
|
1019
|
-
getState() {
|
|
1020
|
-
return {
|
|
1021
|
-
playbackStarted: this.playbackStarted,
|
|
1022
|
-
coalescerFill: this.coalescer.fillLevel,
|
|
1023
|
-
lamFill: this.lamPipeline.fillLevel,
|
|
1024
|
-
queuedFrames: this.lamPipeline.queuedFrameCount,
|
|
1025
|
-
currentTime: this.scheduler.getCurrentTime(),
|
|
1026
|
-
playbackEndTime: this.scheduler.getPlaybackEndTime()
|
|
1027
|
-
};
|
|
862
|
+
trace(message, data) {
|
|
863
|
+
this.log("trace", message, data);
|
|
1028
864
|
}
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
this.
|
|
1034
|
-
this.scheduler.dispose();
|
|
1035
|
-
this.coalescer.reset();
|
|
1036
|
-
this.lamPipeline.reset();
|
|
865
|
+
verbose(message, data) {
|
|
866
|
+
this.log("verbose", message, data);
|
|
867
|
+
}
|
|
868
|
+
child(subModule) {
|
|
869
|
+
return new _Logger(`${this.module}.${subModule}`);
|
|
1037
870
|
}
|
|
1038
871
|
};
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
"
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
// Cheeks (2)
|
|
1054
|
-
"cheekSquintLeft",
|
|
1055
|
-
"cheekSquintRight"
|
|
1056
|
-
];
|
|
1057
|
-
var EMOTION_ARKIT_MAP = {
|
|
1058
|
-
happy: {
|
|
1059
|
-
// AU6 - Cheek raiser (primary Duchenne smile marker)
|
|
1060
|
-
cheekSquintLeft: 0.5,
|
|
1061
|
-
cheekSquintRight: 0.5,
|
|
1062
|
-
// Slight eye squint from genuine smile (orbicularis oculi activation)
|
|
1063
|
-
eyeSquintLeft: 0.2,
|
|
1064
|
-
eyeSquintRight: 0.2
|
|
872
|
+
var loggerCache = /* @__PURE__ */ new Map();
|
|
873
|
+
function createLogger(module2) {
|
|
874
|
+
let logger17 = loggerCache.get(module2);
|
|
875
|
+
if (!logger17) {
|
|
876
|
+
logger17 = new Logger(module2);
|
|
877
|
+
loggerCache.set(module2, logger17);
|
|
878
|
+
}
|
|
879
|
+
return logger17;
|
|
880
|
+
}
|
|
881
|
+
var noopLogger = {
|
|
882
|
+
module: "noop",
|
|
883
|
+
error: () => {
|
|
884
|
+
},
|
|
885
|
+
warn: () => {
|
|
1065
886
|
},
|
|
1066
|
-
|
|
1067
|
-
// AU4 - Brow lowerer (intense, primary anger marker)
|
|
1068
|
-
browDownLeft: 0.7,
|
|
1069
|
-
browDownRight: 0.7,
|
|
1070
|
-
// AU5 - Upper lid raiser (wide eyes, part of the "glare")
|
|
1071
|
-
eyeWideLeft: 0.4,
|
|
1072
|
-
eyeWideRight: 0.4,
|
|
1073
|
-
// AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
|
|
1074
|
-
eyeSquintLeft: 0.3,
|
|
1075
|
-
eyeSquintRight: 0.3
|
|
887
|
+
info: () => {
|
|
1076
888
|
},
|
|
1077
|
-
|
|
1078
|
-
// AU1 - Inner brow raiser (primary sadness marker)
|
|
1079
|
-
browInnerUp: 0.6,
|
|
1080
|
-
// AU4 - Brow lowerer (brows drawn together)
|
|
1081
|
-
browDownLeft: 0.3,
|
|
1082
|
-
browDownRight: 0.3
|
|
889
|
+
debug: () => {
|
|
1083
890
|
},
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
confidenceThreshold: 0.3,
|
|
1090
|
-
intensity: 1,
|
|
1091
|
-
blendMode: "dominant",
|
|
1092
|
-
minBlendProbability: 0.1,
|
|
1093
|
-
energyModulation: false,
|
|
1094
|
-
minEnergyScale: 0.3,
|
|
1095
|
-
maxEnergyScale: 1
|
|
891
|
+
trace: () => {
|
|
892
|
+
},
|
|
893
|
+
verbose: () => {
|
|
894
|
+
},
|
|
895
|
+
child: () => noopLogger
|
|
1096
896
|
};
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
}
|
|
1104
|
-
function clamp01(value) {
|
|
1105
|
-
return Math.max(0, Math.min(1, value));
|
|
1106
|
-
}
|
|
1107
|
-
var EmotionToBlendshapeMapper = class {
|
|
1108
|
-
/**
|
|
1109
|
-
* Create a new EmotionToBlendshapeMapper
|
|
1110
|
-
*
|
|
1111
|
-
* @param config - Optional configuration
|
|
1112
|
-
*/
|
|
897
|
+
|
|
898
|
+
// src/inference/A2EProcessor.ts
|
|
899
|
+
var logger = createLogger("A2EProcessor");
|
|
900
|
+
var FRAME_RATE = 30;
|
|
901
|
+
var DRIP_INTERVAL_MS = 33;
|
|
902
|
+
var A2EProcessor = class {
|
|
1113
903
|
constructor(config) {
|
|
1114
|
-
this.
|
|
1115
|
-
this.
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
this.
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
904
|
+
this.writeOffset = 0;
|
|
905
|
+
this.bufferStartTime = 0;
|
|
906
|
+
// Frame queues (timestamped for pull mode, plain for drip mode)
|
|
907
|
+
this.timestampedQueue = [];
|
|
908
|
+
this.plainQueue = [];
|
|
909
|
+
// Push mode state
|
|
910
|
+
this._latestFrame = null;
|
|
911
|
+
this.dripInterval = null;
|
|
912
|
+
// Last-frame-hold for pull mode (prevents avatar freezing between frames)
|
|
913
|
+
this.lastPulledFrame = null;
|
|
914
|
+
// Inference serialization
|
|
915
|
+
this.inferenceRunning = false;
|
|
916
|
+
this.pendingChunks = [];
|
|
917
|
+
// Diagnostic: track getFrameForTime calls
|
|
918
|
+
this.getFrameCallCount = 0;
|
|
919
|
+
this.disposed = false;
|
|
920
|
+
this.backend = config.backend;
|
|
921
|
+
this.sampleRate = config.sampleRate ?? 16e3;
|
|
922
|
+
this.chunkSize = config.chunkSize ?? config.backend.chunkSize ?? 16e3;
|
|
923
|
+
this.onFrame = config.onFrame;
|
|
924
|
+
this.onError = config.onError;
|
|
925
|
+
this.bufferCapacity = this.chunkSize * 2;
|
|
926
|
+
this.buffer = new Float32Array(this.bufferCapacity);
|
|
927
|
+
}
|
|
928
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
929
|
+
// Audio Input
|
|
930
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
931
|
+
/**
|
|
932
|
+
* Push audio samples for inference (any source: mic, TTS, file).
|
|
1124
933
|
*
|
|
1125
|
-
*
|
|
1126
|
-
*
|
|
934
|
+
* - With `timestamp`: frames stored with timestamps (pull mode)
|
|
935
|
+
* - Without `timestamp`: frames stored in plain queue (drip/push mode)
|
|
1127
936
|
*
|
|
1128
|
-
*
|
|
1129
|
-
* @param audioEnergy - Optional audio energy (0-1) for energy modulation
|
|
1130
|
-
* @returns Target upper face blendshapes (before smoothing)
|
|
937
|
+
* Fire-and-forget: returns immediately, inference runs async.
|
|
1131
938
|
*/
|
|
1132
|
-
|
|
1133
|
-
this.
|
|
1134
|
-
if (
|
|
1135
|
-
this.
|
|
1136
|
-
}
|
|
1137
|
-
if (!frame) {
|
|
1138
|
-
return { ...this.targetBlendshapes };
|
|
1139
|
-
}
|
|
1140
|
-
if (this.config.blendMode === "weighted") {
|
|
1141
|
-
this.mapFrameWeighted(frame);
|
|
1142
|
-
} else {
|
|
1143
|
-
this.mapFrameDominant(frame);
|
|
939
|
+
pushAudio(samples, timestamp) {
|
|
940
|
+
if (this.disposed) return;
|
|
941
|
+
if (this.writeOffset === 0 && timestamp !== void 0) {
|
|
942
|
+
this.bufferStartTime = timestamp;
|
|
1144
943
|
}
|
|
1145
|
-
if (this.
|
|
1146
|
-
this.
|
|
944
|
+
if (this.writeOffset + samples.length > this.bufferCapacity) {
|
|
945
|
+
this.bufferCapacity = (this.writeOffset + samples.length) * 2;
|
|
946
|
+
const grown = new Float32Array(this.bufferCapacity);
|
|
947
|
+
grown.set(this.buffer.subarray(0, this.writeOffset));
|
|
948
|
+
this.buffer = grown;
|
|
949
|
+
}
|
|
950
|
+
this.buffer.set(samples, this.writeOffset);
|
|
951
|
+
this.writeOffset += samples.length;
|
|
952
|
+
logger.debug("pushAudio", {
|
|
953
|
+
samplesIn: samples.length,
|
|
954
|
+
writeOffset: this.writeOffset,
|
|
955
|
+
chunkSize: this.chunkSize,
|
|
956
|
+
willExtract: this.writeOffset >= this.chunkSize,
|
|
957
|
+
inferenceRunning: this.inferenceRunning,
|
|
958
|
+
pendingChunks: this.pendingChunks.length,
|
|
959
|
+
queuedFrames: this.timestampedQueue.length + this.plainQueue.length
|
|
960
|
+
});
|
|
961
|
+
while (this.writeOffset >= this.chunkSize) {
|
|
962
|
+
const chunk = this.buffer.slice(0, this.chunkSize);
|
|
963
|
+
this.buffer.copyWithin(0, this.chunkSize, this.writeOffset);
|
|
964
|
+
this.writeOffset -= this.chunkSize;
|
|
965
|
+
const chunkTimestamp = timestamp !== void 0 ? this.bufferStartTime : void 0;
|
|
966
|
+
this.pendingChunks.push({ chunk, timestamp: chunkTimestamp });
|
|
967
|
+
logger.info("Chunk queued for inference", {
|
|
968
|
+
chunkSize: chunk.length,
|
|
969
|
+
chunkTimestamp,
|
|
970
|
+
pendingChunks: this.pendingChunks.length,
|
|
971
|
+
remainderOffset: this.writeOffset
|
|
972
|
+
});
|
|
973
|
+
if (timestamp !== void 0) {
|
|
974
|
+
this.bufferStartTime += this.chunkSize / this.sampleRate;
|
|
975
|
+
}
|
|
1147
976
|
}
|
|
1148
|
-
|
|
977
|
+
this.drainPendingChunks();
|
|
1149
978
|
}
|
|
1150
979
|
/**
|
|
1151
|
-
*
|
|
980
|
+
* Flush remaining buffered audio (pads to chunkSize).
|
|
981
|
+
* Call at end of stream to process final partial chunk.
|
|
982
|
+
*
|
|
983
|
+
* Routes through the serialized pendingChunks pipeline to maintain
|
|
984
|
+
* correct frame ordering. Without this, flush() could push frames
|
|
985
|
+
* with the latest timestamp to the queue before drainPendingChunks()
|
|
986
|
+
* finishes pushing frames with earlier timestamps — causing
|
|
987
|
+
* getFrameForTime() to see out-of-order timestamps and stall.
|
|
1152
988
|
*/
|
|
1153
|
-
|
|
1154
|
-
if (
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
const
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
}
|
|
989
|
+
async flush() {
|
|
990
|
+
if (this.disposed || this.writeOffset === 0) return;
|
|
991
|
+
const padded = new Float32Array(this.chunkSize);
|
|
992
|
+
padded.set(this.buffer.subarray(0, this.writeOffset), 0);
|
|
993
|
+
const chunkTimestamp = this.bufferStartTime > 0 ? this.bufferStartTime : void 0;
|
|
994
|
+
logger.info("flush: routing through drain pipeline", {
|
|
995
|
+
actualSamples: this.writeOffset,
|
|
996
|
+
chunkTimestamp: chunkTimestamp?.toFixed(3),
|
|
997
|
+
pendingChunks: this.pendingChunks.length,
|
|
998
|
+
inferenceRunning: this.inferenceRunning
|
|
999
|
+
});
|
|
1000
|
+
this.writeOffset = 0;
|
|
1001
|
+
this.bufferStartTime = 0;
|
|
1002
|
+
this.pendingChunks.push({ chunk: padded, timestamp: chunkTimestamp });
|
|
1003
|
+
this.drainPendingChunks();
|
|
1169
1004
|
}
|
|
1170
1005
|
/**
|
|
1171
|
-
*
|
|
1172
|
-
* Creates more nuanced expressions (e.g., bittersweet = happy + sad)
|
|
1006
|
+
* Reset buffer and frame queues
|
|
1173
1007
|
*/
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1008
|
+
reset() {
|
|
1009
|
+
this.writeOffset = 0;
|
|
1010
|
+
this.bufferStartTime = 0;
|
|
1011
|
+
this.timestampedQueue = [];
|
|
1012
|
+
this.plainQueue = [];
|
|
1013
|
+
this._latestFrame = null;
|
|
1014
|
+
this.lastPulledFrame = null;
|
|
1015
|
+
this.pendingChunks = [];
|
|
1016
|
+
this.inferenceRunning = false;
|
|
1017
|
+
this.getFrameCallCount = 0;
|
|
1018
|
+
}
|
|
1019
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1020
|
+
// Frame Output — Pull Mode (TTS playback)
|
|
1021
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1022
|
+
/**
|
|
1023
|
+
* Get frame synced to external clock (e.g. AudioContext.currentTime).
|
|
1024
|
+
*
|
|
1025
|
+
* Discards frames that are too old, returns the current frame,
|
|
1026
|
+
* or holds last frame as fallback to prevent avatar freezing.
|
|
1027
|
+
*
|
|
1028
|
+
* @param currentTime - Current playback time (seconds)
|
|
1029
|
+
* @returns Blendshape frame, or null if no frames yet
|
|
1030
|
+
*/
|
|
1031
|
+
getFrameForTime(currentTime) {
|
|
1032
|
+
this.getFrameCallCount++;
|
|
1033
|
+
const discardWindow = this.backend.backend === "wasm" ? 1 : 0.5;
|
|
1034
|
+
let discardCount = 0;
|
|
1035
|
+
while (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp < currentTime - discardWindow) {
|
|
1036
|
+
this.timestampedQueue.shift();
|
|
1037
|
+
discardCount++;
|
|
1038
|
+
}
|
|
1039
|
+
if (discardCount > 0) {
|
|
1040
|
+
logger.warn("getFrameForTime DISCARDED stale frames", {
|
|
1041
|
+
discardCount,
|
|
1042
|
+
currentTime: currentTime.toFixed(3),
|
|
1043
|
+
discardWindow,
|
|
1044
|
+
remainingFrames: this.timestampedQueue.length,
|
|
1045
|
+
nextFrameTs: this.timestampedQueue.length > 0 ? this.timestampedQueue[0].timestamp.toFixed(3) : "none"
|
|
1046
|
+
});
|
|
1178
1047
|
}
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
const mapping = EMOTION_ARKIT_MAP[emotion];
|
|
1184
|
-
if (!mapping) {
|
|
1185
|
-
continue;
|
|
1186
|
-
}
|
|
1187
|
-
const scale = this.config.intensity * probability;
|
|
1188
|
-
for (const [name, value] of Object.entries(mapping)) {
|
|
1189
|
-
const blendshapeName = name;
|
|
1190
|
-
if (value !== void 0) {
|
|
1191
|
-
this.targetBlendshapes[blendshapeName] += value * scale;
|
|
1192
|
-
}
|
|
1193
|
-
}
|
|
1048
|
+
if (this.timestampedQueue.length > 0 && this.timestampedQueue[0].timestamp <= currentTime) {
|
|
1049
|
+
const { frame } = this.timestampedQueue.shift();
|
|
1050
|
+
this.lastPulledFrame = frame;
|
|
1051
|
+
return frame;
|
|
1194
1052
|
}
|
|
1195
|
-
|
|
1196
|
-
|
|
1053
|
+
if (this.timestampedQueue.length > 0 && this.getFrameCallCount % 60 === 0) {
|
|
1054
|
+
logger.warn("getFrameForTime: frames in queue but NOT consumable", {
|
|
1055
|
+
queueLen: this.timestampedQueue.length,
|
|
1056
|
+
frontTimestamp: this.timestampedQueue[0].timestamp.toFixed(4),
|
|
1057
|
+
currentTime: currentTime.toFixed(4),
|
|
1058
|
+
delta: (this.timestampedQueue[0].timestamp - currentTime).toFixed(4),
|
|
1059
|
+
callCount: this.getFrameCallCount
|
|
1060
|
+
});
|
|
1197
1061
|
}
|
|
1062
|
+
return this.lastPulledFrame;
|
|
1198
1063
|
}
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
|
|
1206
|
-
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1207
|
-
this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
|
|
1208
|
-
}
|
|
1064
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1065
|
+
// Frame Output — Push Mode (live mic, game loop)
|
|
1066
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1067
|
+
/** Latest frame from drip-feed (live mic, game loop) */
|
|
1068
|
+
get latestFrame() {
|
|
1069
|
+
return this._latestFrame;
|
|
1209
1070
|
}
|
|
1210
|
-
/**
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1071
|
+
/** Start 30fps drip-feed timer (push mode) */
|
|
1072
|
+
startDrip() {
|
|
1073
|
+
if (this.dripInterval) return;
|
|
1074
|
+
this.dripInterval = setInterval(() => {
|
|
1075
|
+
const frame = this.plainQueue.shift();
|
|
1076
|
+
if (frame) {
|
|
1077
|
+
this._latestFrame = frame;
|
|
1078
|
+
this.onFrame?.(frame);
|
|
1079
|
+
}
|
|
1080
|
+
}, DRIP_INTERVAL_MS);
|
|
1081
|
+
}
|
|
1082
|
+
/** Stop drip-feed timer */
|
|
1083
|
+
stopDrip() {
|
|
1084
|
+
if (this.dripInterval) {
|
|
1085
|
+
clearInterval(this.dripInterval);
|
|
1086
|
+
this.dripInterval = null;
|
|
1224
1087
|
}
|
|
1225
1088
|
}
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1089
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1090
|
+
// State
|
|
1091
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1092
|
+
/** Number of frames waiting in queue (both modes combined) */
|
|
1093
|
+
get queuedFrameCount() {
|
|
1094
|
+
return this.timestampedQueue.length + this.plainQueue.length;
|
|
1095
|
+
}
|
|
1096
|
+
/** Buffer fill level as fraction of chunkSize (0-1) */
|
|
1097
|
+
get fillLevel() {
|
|
1098
|
+
return Math.min(1, this.writeOffset / this.chunkSize);
|
|
1233
1099
|
}
|
|
1100
|
+
/** Dispose resources */
|
|
1101
|
+
dispose() {
|
|
1102
|
+
if (this.disposed) return;
|
|
1103
|
+
this.disposed = true;
|
|
1104
|
+
this.stopDrip();
|
|
1105
|
+
this.reset();
|
|
1106
|
+
}
|
|
1107
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1108
|
+
// Private
|
|
1109
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
1234
1110
|
/**
|
|
1235
|
-
*
|
|
1236
|
-
*
|
|
1237
|
-
* Sets both target and current blendshapes to zero.
|
|
1111
|
+
* Process pending chunks sequentially.
|
|
1112
|
+
* Fire-and-forget — called from pushAudio() without awaiting.
|
|
1238
1113
|
*/
|
|
1239
|
-
|
|
1240
|
-
this.
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
getConfig() {
|
|
1248
|
-
return { ...this.config };
|
|
1249
|
-
}
|
|
1250
|
-
/**
|
|
1251
|
-
* Update configuration
|
|
1252
|
-
*
|
|
1253
|
-
* @param config - Partial configuration to update
|
|
1254
|
-
*/
|
|
1255
|
-
setConfig(config) {
|
|
1256
|
-
this.config = {
|
|
1257
|
-
...this.config,
|
|
1258
|
-
...config
|
|
1259
|
-
};
|
|
1260
|
-
}
|
|
1261
|
-
};
|
|
1262
|
-
|
|
1263
|
-
// src/animation/audioEnergy.ts
|
|
1264
|
-
function calculateRMS(samples) {
|
|
1265
|
-
if (samples.length === 0) return 0;
|
|
1266
|
-
let sumSquares = 0;
|
|
1267
|
-
for (let i = 0; i < samples.length; i++) {
|
|
1268
|
-
sumSquares += samples[i] * samples[i];
|
|
1269
|
-
}
|
|
1270
|
-
return Math.sqrt(sumSquares / samples.length);
|
|
1271
|
-
}
|
|
1272
|
-
function calculatePeak(samples) {
|
|
1273
|
-
let peak = 0;
|
|
1274
|
-
for (let i = 0; i < samples.length; i++) {
|
|
1275
|
-
const abs = Math.abs(samples[i]);
|
|
1276
|
-
if (abs > peak) peak = abs;
|
|
1277
|
-
}
|
|
1278
|
-
return peak;
|
|
1279
|
-
}
|
|
1280
|
-
var AudioEnergyAnalyzer = class {
|
|
1281
|
-
/**
|
|
1282
|
-
* @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
|
|
1283
|
-
* @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
|
|
1284
|
-
*/
|
|
1285
|
-
constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
|
|
1286
|
-
this.smoothedRMS = 0;
|
|
1287
|
-
this.smoothedPeak = 0;
|
|
1288
|
-
this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
|
|
1289
|
-
this.noiseFloor = noiseFloor;
|
|
1290
|
-
}
|
|
1291
|
-
/**
|
|
1292
|
-
* Process audio samples and return smoothed energy values
|
|
1293
|
-
* @param samples Audio samples (Float32Array)
|
|
1294
|
-
* @returns Object with rms and peak values
|
|
1295
|
-
*/
|
|
1296
|
-
process(samples) {
|
|
1297
|
-
const instantRMS = calculateRMS(samples);
|
|
1298
|
-
const instantPeak = calculatePeak(samples);
|
|
1299
|
-
const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
|
|
1300
|
-
const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
|
|
1301
|
-
if (gatedRMS > this.smoothedRMS) {
|
|
1302
|
-
this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
|
|
1303
|
-
} else {
|
|
1304
|
-
this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
|
|
1305
|
-
}
|
|
1306
|
-
if (gatedPeak > this.smoothedPeak) {
|
|
1307
|
-
this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
|
|
1308
|
-
} else {
|
|
1309
|
-
this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
|
|
1310
|
-
}
|
|
1311
|
-
const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
|
|
1312
|
-
return {
|
|
1313
|
-
rms: this.smoothedRMS,
|
|
1314
|
-
peak: this.smoothedPeak,
|
|
1315
|
-
energy: Math.min(1, energy * 2)
|
|
1316
|
-
// Scale up and clamp
|
|
1317
|
-
};
|
|
1318
|
-
}
|
|
1319
|
-
/**
|
|
1320
|
-
* Reset analyzer state
|
|
1321
|
-
*/
|
|
1322
|
-
reset() {
|
|
1323
|
-
this.smoothedRMS = 0;
|
|
1324
|
-
this.smoothedPeak = 0;
|
|
1325
|
-
}
|
|
1326
|
-
/**
|
|
1327
|
-
* Get current smoothed RMS value
|
|
1328
|
-
*/
|
|
1329
|
-
get rms() {
|
|
1330
|
-
return this.smoothedRMS;
|
|
1331
|
-
}
|
|
1332
|
-
/**
|
|
1333
|
-
* Get current smoothed peak value
|
|
1334
|
-
*/
|
|
1335
|
-
get peak() {
|
|
1336
|
-
return this.smoothedPeak;
|
|
1337
|
-
}
|
|
1338
|
-
};
|
|
1339
|
-
var EmphasisDetector = class {
|
|
1340
|
-
/**
|
|
1341
|
-
* @param historySize Number of frames to track. Default 10
|
|
1342
|
-
* @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
|
|
1343
|
-
*/
|
|
1344
|
-
constructor(historySize = 10, emphasisThreshold = 0.15) {
|
|
1345
|
-
this.energyHistory = [];
|
|
1346
|
-
this.historySize = historySize;
|
|
1347
|
-
this.emphasisThreshold = emphasisThreshold;
|
|
1348
|
-
}
|
|
1349
|
-
/**
|
|
1350
|
-
* Process energy value and detect emphasis
|
|
1351
|
-
* @param energy Current energy value (0-1)
|
|
1352
|
-
* @returns Object with isEmphasis flag and emphasisStrength
|
|
1353
|
-
*/
|
|
1354
|
-
process(energy) {
|
|
1355
|
-
this.energyHistory.push(energy);
|
|
1356
|
-
if (this.energyHistory.length > this.historySize) {
|
|
1357
|
-
this.energyHistory.shift();
|
|
1358
|
-
}
|
|
1359
|
-
if (this.energyHistory.length < 3) {
|
|
1360
|
-
return { isEmphasis: false, emphasisStrength: 0 };
|
|
1114
|
+
drainPendingChunks() {
|
|
1115
|
+
if (this.inferenceRunning || this.pendingChunks.length === 0) {
|
|
1116
|
+
if (this.inferenceRunning && this.pendingChunks.length > 0) {
|
|
1117
|
+
logger.debug("drainPendingChunks skipped (inference running)", {
|
|
1118
|
+
pendingChunks: this.pendingChunks.length
|
|
1119
|
+
});
|
|
1120
|
+
}
|
|
1121
|
+
return;
|
|
1361
1122
|
}
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
const
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1123
|
+
this.inferenceRunning = true;
|
|
1124
|
+
logger.info("drainPendingChunks starting", { pendingChunks: this.pendingChunks.length });
|
|
1125
|
+
const processNext = async () => {
|
|
1126
|
+
while (this.pendingChunks.length > 0 && !this.disposed) {
|
|
1127
|
+
const { chunk, timestamp } = this.pendingChunks.shift();
|
|
1128
|
+
try {
|
|
1129
|
+
const t0 = performance.now();
|
|
1130
|
+
const result = await this.backend.infer(chunk);
|
|
1131
|
+
const inferMs = Math.round(performance.now() - t0);
|
|
1132
|
+
const actualDuration = chunk.length / this.sampleRate;
|
|
1133
|
+
const actualFrameCount = Math.ceil(actualDuration * FRAME_RATE);
|
|
1134
|
+
const framesToQueue = Math.min(actualFrameCount, result.blendshapes.length);
|
|
1135
|
+
logger.info("Inference complete", {
|
|
1136
|
+
inferMs,
|
|
1137
|
+
modelFrames: result.blendshapes.length,
|
|
1138
|
+
framesToQueue,
|
|
1139
|
+
timestamp,
|
|
1140
|
+
totalQueued: this.timestampedQueue.length + framesToQueue,
|
|
1141
|
+
remainingPending: this.pendingChunks.length
|
|
1142
|
+
});
|
|
1143
|
+
for (let i = 0; i < framesToQueue; i++) {
|
|
1144
|
+
if (timestamp !== void 0) {
|
|
1145
|
+
this.timestampedQueue.push({
|
|
1146
|
+
frame: result.blendshapes[i],
|
|
1147
|
+
timestamp: timestamp + i / FRAME_RATE
|
|
1148
|
+
});
|
|
1149
|
+
} else {
|
|
1150
|
+
this.plainQueue.push(result.blendshapes[i]);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
} catch (err) {
|
|
1154
|
+
this.handleError(err);
|
|
1155
|
+
}
|
|
1156
|
+
if (this.pendingChunks.length > 0) {
|
|
1157
|
+
await new Promise((r) => setTimeout(r, 0));
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
this.inferenceRunning = false;
|
|
1161
|
+
if (this.pendingChunks.length > 0) {
|
|
1162
|
+
this.drainPendingChunks();
|
|
1163
|
+
}
|
|
1369
1164
|
};
|
|
1165
|
+
processNext().catch((err) => this.handleError(err));
|
|
1370
1166
|
}
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
this.energyHistory = [];
|
|
1167
|
+
handleError(err) {
|
|
1168
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
1169
|
+
logger.warn("A2EProcessor inference error", { error: error.message });
|
|
1170
|
+
this.onError?.(error);
|
|
1376
1171
|
}
|
|
1377
1172
|
};
|
|
1378
1173
|
|
|
@@ -2361,464 +2156,200 @@ var ModelCache = class {
|
|
|
2361
2156
|
}
|
|
2362
2157
|
await this.delete(model.url);
|
|
2363
2158
|
evictedUrls.push(model.url);
|
|
2364
|
-
freedBytes += model.size;
|
|
2365
|
-
console.log(`[ModelCache] Evicted: ${model.url} (${formatBytes(model.size)})`);
|
|
2366
|
-
}
|
|
2367
|
-
span?.setAttributes({
|
|
2368
|
-
"eviction.bytes_freed": freedBytes,
|
|
2369
|
-
"eviction.models_evicted": evictedUrls.length
|
|
2370
|
-
});
|
|
2371
|
-
span?.end();
|
|
2372
|
-
if (freedBytes > 0) {
|
|
2373
|
-
telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
|
|
2374
|
-
bytes_freed: String(freedBytes)
|
|
2375
|
-
});
|
|
2376
|
-
}
|
|
2377
|
-
return evictedUrls;
|
|
2378
|
-
} catch (err) {
|
|
2379
|
-
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
2380
|
-
console.warn("[ModelCache] Eviction failed:", err);
|
|
2381
|
-
return [];
|
|
2382
|
-
}
|
|
2383
|
-
}
|
|
2384
|
-
/**
|
|
2385
|
-
* Get storage quota information
|
|
2386
|
-
*
|
|
2387
|
-
* Uses navigator.storage.estimate() to get quota details.
|
|
2388
|
-
* Returns null if the API is unavailable.
|
|
2389
|
-
*
|
|
2390
|
-
* @returns Quota info or null if unavailable
|
|
2391
|
-
*
|
|
2392
|
-
* @example
|
|
2393
|
-
* ```typescript
|
|
2394
|
-
* const cache = getModelCache();
|
|
2395
|
-
* const quota = await cache.getQuotaInfo();
|
|
2396
|
-
* if (quota) {
|
|
2397
|
-
* console.log(`Using ${quota.percentUsed.toFixed(1)}% of quota`);
|
|
2398
|
-
* }
|
|
2399
|
-
* ```
|
|
2400
|
-
*/
|
|
2401
|
-
async getQuotaInfo() {
|
|
2402
|
-
if (!navigator?.storage?.estimate) {
|
|
2403
|
-
return null;
|
|
2404
|
-
}
|
|
2405
|
-
try {
|
|
2406
|
-
const estimate = await navigator.storage.estimate();
|
|
2407
|
-
const usedBytes = estimate.usage || 0;
|
|
2408
|
-
const quotaBytes = estimate.quota || 0;
|
|
2409
|
-
const percentUsed = quotaBytes > 0 ? usedBytes / quotaBytes * 100 : 0;
|
|
2410
|
-
const stats = await this.getStats();
|
|
2411
|
-
return {
|
|
2412
|
-
usedBytes,
|
|
2413
|
-
quotaBytes,
|
|
2414
|
-
percentUsed,
|
|
2415
|
-
cacheBytes: stats.totalSize
|
|
2416
|
-
};
|
|
2417
|
-
} catch {
|
|
2418
|
-
return null;
|
|
2419
|
-
}
|
|
2420
|
-
}
|
|
2421
|
-
};
|
|
2422
|
-
var cacheInstance = null;
|
|
2423
|
-
function getModelCache() {
|
|
2424
|
-
if (!cacheInstance) {
|
|
2425
|
-
cacheInstance = new ModelCache();
|
|
2426
|
-
}
|
|
2427
|
-
return cacheInstance;
|
|
2428
|
-
}
|
|
2429
|
-
var MAX_CACHE_SIZE_BYTES = 500 * 1024 * 1024;
|
|
2430
|
-
async function fetchWithCache(url, optionsOrProgress) {
|
|
2431
|
-
let options = {};
|
|
2432
|
-
if (typeof optionsOrProgress === "function") {
|
|
2433
|
-
options = { onProgress: optionsOrProgress };
|
|
2434
|
-
} else if (optionsOrProgress) {
|
|
2435
|
-
options = optionsOrProgress;
|
|
2436
|
-
}
|
|
2437
|
-
const { version, validateStale = false, onProgress } = options;
|
|
2438
|
-
const cache = getModelCache();
|
|
2439
|
-
const cacheKey = version ? getCacheKey(url, version) : url;
|
|
2440
|
-
const telemetry = getTelemetry();
|
|
2441
|
-
const span = telemetry?.startSpan("fetchWithCache", {
|
|
2442
|
-
"fetch.url": url,
|
|
2443
|
-
...version && { "fetch.version": version },
|
|
2444
|
-
"fetch.validate_stale": validateStale
|
|
2445
|
-
});
|
|
2446
|
-
if (validateStale) {
|
|
2447
|
-
const validation = await cache.getWithValidation(cacheKey, url);
|
|
2448
|
-
if (validation.data && !validation.stale) {
|
|
2449
|
-
console.log(`[ModelCache] Cache hit (validated): ${url} (${(validation.data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2450
|
-
onProgress?.(validation.data.byteLength, validation.data.byteLength);
|
|
2451
|
-
span?.setAttributes({
|
|
2452
|
-
"fetch.cache_hit": true,
|
|
2453
|
-
"fetch.cache_validated": true,
|
|
2454
|
-
"fetch.cache_stale": false,
|
|
2455
|
-
"fetch.size_bytes": validation.data.byteLength
|
|
2456
|
-
});
|
|
2457
|
-
span?.end();
|
|
2458
|
-
return validation.data;
|
|
2459
|
-
}
|
|
2460
|
-
if (validation.stale) {
|
|
2461
|
-
console.log(`[ModelCache] Cache stale, refetching: ${url}`);
|
|
2462
|
-
span?.setAttributes({
|
|
2463
|
-
"fetch.cache_hit": true,
|
|
2464
|
-
"fetch.cache_validated": true,
|
|
2465
|
-
"fetch.cache_stale": true
|
|
2466
|
-
});
|
|
2467
|
-
}
|
|
2468
|
-
} else {
|
|
2469
|
-
const cached = await cache.get(cacheKey);
|
|
2470
|
-
if (cached) {
|
|
2471
|
-
console.log(`[ModelCache] Cache hit: ${url} (${(cached.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2472
|
-
onProgress?.(cached.byteLength, cached.byteLength);
|
|
2473
|
-
span?.setAttributes({
|
|
2474
|
-
"fetch.cache_hit": true,
|
|
2475
|
-
"fetch.size_bytes": cached.byteLength
|
|
2476
|
-
});
|
|
2477
|
-
span?.end();
|
|
2478
|
-
return cached;
|
|
2479
|
-
}
|
|
2480
|
-
}
|
|
2481
|
-
span?.setAttributes({ "fetch.cache_hit": false });
|
|
2482
|
-
console.log(`[ModelCache] Cache miss, fetching: ${url}`);
|
|
2483
|
-
try {
|
|
2484
|
-
const response = await fetch(url);
|
|
2485
|
-
if (!response.ok) {
|
|
2486
|
-
throw new Error(`Failed to fetch ${url}: ${response.status}`);
|
|
2487
|
-
}
|
|
2488
|
-
const contentLength = response.headers.get("content-length");
|
|
2489
|
-
const total = contentLength ? parseInt(contentLength, 10) : 0;
|
|
2490
|
-
const etag = response.headers.get("etag") ?? void 0;
|
|
2491
|
-
const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
|
|
2492
|
-
if (tooLargeForCache) {
|
|
2493
|
-
console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
|
|
2494
|
-
}
|
|
2495
|
-
if (!response.body) {
|
|
2496
|
-
const data2 = await response.arrayBuffer();
|
|
2497
|
-
if (!tooLargeForCache) {
|
|
2498
|
-
await cache.set(cacheKey, data2, etag, version);
|
|
2499
|
-
}
|
|
2500
|
-
span?.setAttributes({
|
|
2501
|
-
"fetch.size_bytes": data2.byteLength,
|
|
2502
|
-
"fetch.cached_to_indexeddb": !tooLargeForCache
|
|
2503
|
-
});
|
|
2504
|
-
span?.end();
|
|
2505
|
-
return data2;
|
|
2506
|
-
}
|
|
2507
|
-
const reader = response.body.getReader();
|
|
2508
|
-
const chunks = [];
|
|
2509
|
-
let loaded = 0;
|
|
2510
|
-
while (true) {
|
|
2511
|
-
const { done, value } = await reader.read();
|
|
2512
|
-
if (done) break;
|
|
2513
|
-
chunks.push(value);
|
|
2514
|
-
loaded += value.length;
|
|
2515
|
-
onProgress?.(loaded, total || loaded);
|
|
2516
|
-
}
|
|
2517
|
-
const data = new Uint8Array(loaded);
|
|
2518
|
-
let offset = 0;
|
|
2519
|
-
for (const chunk of chunks) {
|
|
2520
|
-
data.set(chunk, offset);
|
|
2521
|
-
offset += chunk.length;
|
|
2522
|
-
}
|
|
2523
|
-
const buffer = data.buffer;
|
|
2524
|
-
if (!tooLargeForCache) {
|
|
2525
|
-
await cache.set(cacheKey, buffer, etag, version);
|
|
2526
|
-
console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2527
|
-
}
|
|
2528
|
-
span?.setAttributes({
|
|
2529
|
-
"fetch.size_bytes": buffer.byteLength,
|
|
2530
|
-
"fetch.cached_to_indexeddb": !tooLargeForCache
|
|
2531
|
-
});
|
|
2532
|
-
span?.end();
|
|
2533
|
-
return buffer;
|
|
2534
|
-
} catch (error) {
|
|
2535
|
-
span?.endWithError(error instanceof Error ? error : new Error(String(error)));
|
|
2536
|
-
throw error;
|
|
2537
|
-
}
|
|
2538
|
-
}
|
|
2539
|
-
async function preloadModels(urls, onProgress) {
|
|
2540
|
-
const cache = getModelCache();
|
|
2541
|
-
for (let i = 0; i < urls.length; i++) {
|
|
2542
|
-
const url = urls[i];
|
|
2543
|
-
onProgress?.(i, urls.length, url);
|
|
2544
|
-
if (await cache.has(url)) {
|
|
2545
|
-
console.log(`[ModelCache] Already cached: ${url}`);
|
|
2546
|
-
continue;
|
|
2547
|
-
}
|
|
2548
|
-
await fetchWithCache(url);
|
|
2549
|
-
}
|
|
2550
|
-
onProgress?.(urls.length, urls.length, "done");
|
|
2551
|
-
}
|
|
2552
|
-
function formatBytes(bytes) {
|
|
2553
|
-
if (bytes < 1024) return `${bytes} B`;
|
|
2554
|
-
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
2555
|
-
if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
2556
|
-
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
2557
|
-
}
|
|
2558
|
-
|
|
2559
|
-
// src/logging/types.ts
|
|
2560
|
-
var LOG_LEVEL_PRIORITY = {
|
|
2561
|
-
error: 0,
|
|
2562
|
-
warn: 1,
|
|
2563
|
-
info: 2,
|
|
2564
|
-
debug: 3,
|
|
2565
|
-
trace: 4,
|
|
2566
|
-
verbose: 5
|
|
2567
|
-
};
|
|
2568
|
-
var DEFAULT_LOGGING_CONFIG = {
|
|
2569
|
-
level: "info",
|
|
2570
|
-
enabled: true,
|
|
2571
|
-
format: "pretty",
|
|
2572
|
-
timestamps: true,
|
|
2573
|
-
includeModule: true
|
|
2574
|
-
};
|
|
2575
|
-
|
|
2576
|
-
// src/logging/formatters.ts
|
|
2577
|
-
var COLORS = {
|
|
2578
|
-
reset: "\x1B[0m",
|
|
2579
|
-
red: "\x1B[31m",
|
|
2580
|
-
yellow: "\x1B[33m",
|
|
2581
|
-
blue: "\x1B[34m",
|
|
2582
|
-
cyan: "\x1B[36m",
|
|
2583
|
-
gray: "\x1B[90m",
|
|
2584
|
-
white: "\x1B[37m",
|
|
2585
|
-
magenta: "\x1B[35m"
|
|
2586
|
-
};
|
|
2587
|
-
var LEVEL_COLORS = {
|
|
2588
|
-
error: COLORS.red,
|
|
2589
|
-
warn: COLORS.yellow,
|
|
2590
|
-
info: COLORS.blue,
|
|
2591
|
-
debug: COLORS.cyan,
|
|
2592
|
-
trace: COLORS.magenta,
|
|
2593
|
-
verbose: COLORS.gray
|
|
2594
|
-
};
|
|
2595
|
-
var LEVEL_NAMES = {
|
|
2596
|
-
error: "ERROR ",
|
|
2597
|
-
warn: "WARN ",
|
|
2598
|
-
info: "INFO ",
|
|
2599
|
-
debug: "DEBUG ",
|
|
2600
|
-
trace: "TRACE ",
|
|
2601
|
-
verbose: "VERBOSE"
|
|
2602
|
-
};
|
|
2603
|
-
var isBrowser = typeof window !== "undefined";
|
|
2604
|
-
function formatTimestamp(timestamp) {
|
|
2605
|
-
const date = new Date(timestamp);
|
|
2606
|
-
return date.toISOString().substring(11, 23);
|
|
2607
|
-
}
|
|
2608
|
-
function safeStringify(data) {
|
|
2609
|
-
const seen = /* @__PURE__ */ new WeakSet();
|
|
2610
|
-
return JSON.stringify(data, (key, value) => {
|
|
2611
|
-
if (typeof value === "object" && value !== null) {
|
|
2612
|
-
if (seen.has(value)) {
|
|
2613
|
-
return "[Circular]";
|
|
2614
|
-
}
|
|
2615
|
-
seen.add(value);
|
|
2616
|
-
}
|
|
2617
|
-
if (value instanceof Error) {
|
|
2618
|
-
return {
|
|
2619
|
-
name: value.name,
|
|
2620
|
-
message: value.message,
|
|
2621
|
-
stack: value.stack
|
|
2622
|
-
};
|
|
2623
|
-
}
|
|
2624
|
-
if (value instanceof Float32Array || value instanceof Int16Array) {
|
|
2625
|
-
return `${value.constructor.name}(${value.length})`;
|
|
2626
|
-
}
|
|
2627
|
-
if (ArrayBuffer.isView(value)) {
|
|
2628
|
-
return `${value.constructor.name}(${value.byteLength})`;
|
|
2629
|
-
}
|
|
2630
|
-
return value;
|
|
2631
|
-
});
|
|
2632
|
-
}
|
|
2633
|
-
var jsonFormatter = (entry) => {
|
|
2634
|
-
const output = {
|
|
2635
|
-
timestamp: entry.timestamp,
|
|
2636
|
-
level: entry.level,
|
|
2637
|
-
module: entry.module,
|
|
2638
|
-
message: entry.message
|
|
2639
|
-
};
|
|
2640
|
-
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
2641
|
-
output.data = entry.data;
|
|
2642
|
-
}
|
|
2643
|
-
if (entry.error) {
|
|
2644
|
-
output.error = {
|
|
2645
|
-
name: entry.error.name,
|
|
2646
|
-
message: entry.error.message,
|
|
2647
|
-
stack: entry.error.stack
|
|
2648
|
-
};
|
|
2649
|
-
}
|
|
2650
|
-
return safeStringify(output);
|
|
2651
|
-
};
|
|
2652
|
-
var prettyFormatter = (entry) => {
|
|
2653
|
-
const time = formatTimestamp(entry.timestamp);
|
|
2654
|
-
const level = LEVEL_NAMES[entry.level];
|
|
2655
|
-
const module2 = entry.module;
|
|
2656
|
-
const message = entry.message;
|
|
2657
|
-
let output;
|
|
2658
|
-
if (isBrowser) {
|
|
2659
|
-
output = `${time} ${level} [${module2}] ${message}`;
|
|
2660
|
-
} else {
|
|
2661
|
-
const color = LEVEL_COLORS[entry.level];
|
|
2662
|
-
output = `${COLORS.gray}${time}${COLORS.reset} ${color}${level}${COLORS.reset} ${COLORS.cyan}[${module2}]${COLORS.reset} ${message}`;
|
|
2663
|
-
}
|
|
2664
|
-
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
2665
|
-
const dataStr = safeStringify(entry.data);
|
|
2666
|
-
if (dataStr.length > 80) {
|
|
2667
|
-
output += "\n " + JSON.stringify(entry.data, null, 2).replace(/\n/g, "\n ");
|
|
2668
|
-
} else {
|
|
2669
|
-
output += " " + dataStr;
|
|
2670
|
-
}
|
|
2671
|
-
}
|
|
2672
|
-
if (entry.error) {
|
|
2673
|
-
output += `
|
|
2674
|
-
${entry.error.name}: ${entry.error.message}`;
|
|
2675
|
-
if (entry.error.stack) {
|
|
2676
|
-
const stackLines = entry.error.stack.split("\n").slice(1, 4);
|
|
2677
|
-
output += "\n " + stackLines.join("\n ");
|
|
2678
|
-
}
|
|
2679
|
-
}
|
|
2680
|
-
return output;
|
|
2681
|
-
};
|
|
2682
|
-
function getFormatter(format) {
|
|
2683
|
-
return format === "json" ? jsonFormatter : prettyFormatter;
|
|
2684
|
-
}
|
|
2685
|
-
function createBrowserConsoleArgs(entry) {
|
|
2686
|
-
const time = formatTimestamp(entry.timestamp);
|
|
2687
|
-
const level = entry.level.toUpperCase().padEnd(7);
|
|
2688
|
-
const module2 = entry.module;
|
|
2689
|
-
const message = entry.message;
|
|
2690
|
-
const styles = {
|
|
2691
|
-
time: "color: gray;",
|
|
2692
|
-
error: "color: red; font-weight: bold;",
|
|
2693
|
-
warn: "color: orange; font-weight: bold;",
|
|
2694
|
-
info: "color: blue;",
|
|
2695
|
-
debug: "color: cyan;",
|
|
2696
|
-
trace: "color: magenta;",
|
|
2697
|
-
verbose: "color: gray;",
|
|
2698
|
-
module: "color: teal; font-weight: bold;",
|
|
2699
|
-
message: "color: inherit;"
|
|
2700
|
-
};
|
|
2701
|
-
let formatStr = "%c%s %c%s %c[%s]%c %s";
|
|
2702
|
-
const args = [
|
|
2703
|
-
styles.time,
|
|
2704
|
-
time,
|
|
2705
|
-
styles[entry.level],
|
|
2706
|
-
level,
|
|
2707
|
-
styles.module,
|
|
2708
|
-
module2,
|
|
2709
|
-
styles.message,
|
|
2710
|
-
message
|
|
2711
|
-
];
|
|
2712
|
-
if (entry.data && Object.keys(entry.data).length > 0) {
|
|
2713
|
-
formatStr += " %o";
|
|
2714
|
-
args.push(entry.data);
|
|
2715
|
-
}
|
|
2716
|
-
return [formatStr, ...args];
|
|
2717
|
-
}
|
|
2718
|
-
|
|
2719
|
-
// src/logging/Logger.ts
|
|
2720
|
-
var isBrowser2 = typeof window !== "undefined";
|
|
2721
|
-
var globalConfig = { ...DEFAULT_LOGGING_CONFIG };
|
|
2722
|
-
function configureLogging(config) {
|
|
2723
|
-
globalConfig = { ...globalConfig, ...config };
|
|
2724
|
-
}
|
|
2725
|
-
function getLoggingConfig() {
|
|
2726
|
-
return { ...globalConfig };
|
|
2727
|
-
}
|
|
2728
|
-
function resetLoggingConfig() {
|
|
2729
|
-
globalConfig = { ...DEFAULT_LOGGING_CONFIG };
|
|
2730
|
-
}
|
|
2731
|
-
function setLogLevel(level) {
|
|
2732
|
-
globalConfig.level = level;
|
|
2733
|
-
}
|
|
2734
|
-
function setLoggingEnabled(enabled) {
|
|
2735
|
-
globalConfig.enabled = enabled;
|
|
2736
|
-
}
|
|
2737
|
-
var consoleSink = (entry) => {
|
|
2738
|
-
const consoleMethod = entry.level === "error" ? "error" : entry.level === "warn" ? "warn" : "log";
|
|
2739
|
-
if (globalConfig.format === "pretty" && isBrowser2) {
|
|
2740
|
-
const args = createBrowserConsoleArgs(entry);
|
|
2741
|
-
console[consoleMethod](...args);
|
|
2742
|
-
} else {
|
|
2743
|
-
const formatter = getFormatter(globalConfig.format);
|
|
2744
|
-
const formatted = formatter(entry);
|
|
2745
|
-
console[consoleMethod](formatted);
|
|
2746
|
-
}
|
|
2747
|
-
};
|
|
2748
|
-
function getActiveSink() {
|
|
2749
|
-
return globalConfig.sink || consoleSink;
|
|
2750
|
-
}
|
|
2751
|
-
function shouldLog(level) {
|
|
2752
|
-
if (!globalConfig.enabled) return false;
|
|
2753
|
-
return LOG_LEVEL_PRIORITY[level] <= LOG_LEVEL_PRIORITY[globalConfig.level];
|
|
2754
|
-
}
|
|
2755
|
-
var Logger = class _Logger {
|
|
2756
|
-
constructor(module2) {
|
|
2757
|
-
this.module = module2;
|
|
2758
|
-
}
|
|
2759
|
-
log(level, message, data) {
|
|
2760
|
-
if (!shouldLog(level)) return;
|
|
2761
|
-
const entry = {
|
|
2762
|
-
timestamp: Date.now(),
|
|
2763
|
-
level,
|
|
2764
|
-
module: this.module,
|
|
2765
|
-
message,
|
|
2766
|
-
data
|
|
2767
|
-
};
|
|
2768
|
-
if (data?.error instanceof Error) {
|
|
2769
|
-
entry.error = data.error;
|
|
2770
|
-
const { error, ...rest } = data;
|
|
2771
|
-
entry.data = Object.keys(rest).length > 0 ? rest : void 0;
|
|
2159
|
+
freedBytes += model.size;
|
|
2160
|
+
console.log(`[ModelCache] Evicted: ${model.url} (${formatBytes(model.size)})`);
|
|
2161
|
+
}
|
|
2162
|
+
span?.setAttributes({
|
|
2163
|
+
"eviction.bytes_freed": freedBytes,
|
|
2164
|
+
"eviction.models_evicted": evictedUrls.length
|
|
2165
|
+
});
|
|
2166
|
+
span?.end();
|
|
2167
|
+
if (freedBytes > 0) {
|
|
2168
|
+
telemetry?.incrementCounter("omote.cache.eviction", evictedUrls.length, {
|
|
2169
|
+
bytes_freed: String(freedBytes)
|
|
2170
|
+
});
|
|
2171
|
+
}
|
|
2172
|
+
return evictedUrls;
|
|
2173
|
+
} catch (err) {
|
|
2174
|
+
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
2175
|
+
console.warn("[ModelCache] Eviction failed:", err);
|
|
2176
|
+
return [];
|
|
2772
2177
|
}
|
|
2773
|
-
getActiveSink()(entry);
|
|
2774
|
-
}
|
|
2775
|
-
error(message, data) {
|
|
2776
|
-
this.log("error", message, data);
|
|
2777
|
-
}
|
|
2778
|
-
warn(message, data) {
|
|
2779
|
-
this.log("warn", message, data);
|
|
2780
2178
|
}
|
|
2781
|
-
|
|
2782
|
-
|
|
2179
|
+
/**
|
|
2180
|
+
* Get storage quota information
|
|
2181
|
+
*
|
|
2182
|
+
* Uses navigator.storage.estimate() to get quota details.
|
|
2183
|
+
* Returns null if the API is unavailable.
|
|
2184
|
+
*
|
|
2185
|
+
* @returns Quota info or null if unavailable
|
|
2186
|
+
*
|
|
2187
|
+
* @example
|
|
2188
|
+
* ```typescript
|
|
2189
|
+
* const cache = getModelCache();
|
|
2190
|
+
* const quota = await cache.getQuotaInfo();
|
|
2191
|
+
* if (quota) {
|
|
2192
|
+
* console.log(`Using ${quota.percentUsed.toFixed(1)}% of quota`);
|
|
2193
|
+
* }
|
|
2194
|
+
* ```
|
|
2195
|
+
*/
|
|
2196
|
+
async getQuotaInfo() {
|
|
2197
|
+
if (!navigator?.storage?.estimate) {
|
|
2198
|
+
return null;
|
|
2199
|
+
}
|
|
2200
|
+
try {
|
|
2201
|
+
const estimate = await navigator.storage.estimate();
|
|
2202
|
+
const usedBytes = estimate.usage || 0;
|
|
2203
|
+
const quotaBytes = estimate.quota || 0;
|
|
2204
|
+
const percentUsed = quotaBytes > 0 ? usedBytes / quotaBytes * 100 : 0;
|
|
2205
|
+
const stats = await this.getStats();
|
|
2206
|
+
return {
|
|
2207
|
+
usedBytes,
|
|
2208
|
+
quotaBytes,
|
|
2209
|
+
percentUsed,
|
|
2210
|
+
cacheBytes: stats.totalSize
|
|
2211
|
+
};
|
|
2212
|
+
} catch {
|
|
2213
|
+
return null;
|
|
2214
|
+
}
|
|
2783
2215
|
}
|
|
2784
|
-
|
|
2785
|
-
|
|
2216
|
+
};
|
|
2217
|
+
var cacheInstance = null;
|
|
2218
|
+
function getModelCache() {
|
|
2219
|
+
if (!cacheInstance) {
|
|
2220
|
+
cacheInstance = new ModelCache();
|
|
2786
2221
|
}
|
|
2787
|
-
|
|
2788
|
-
|
|
2222
|
+
return cacheInstance;
|
|
2223
|
+
}
|
|
2224
|
+
var MAX_CACHE_SIZE_BYTES = 500 * 1024 * 1024;
|
|
2225
|
+
async function fetchWithCache(url, optionsOrProgress) {
|
|
2226
|
+
let options = {};
|
|
2227
|
+
if (typeof optionsOrProgress === "function") {
|
|
2228
|
+
options = { onProgress: optionsOrProgress };
|
|
2229
|
+
} else if (optionsOrProgress) {
|
|
2230
|
+
options = optionsOrProgress;
|
|
2789
2231
|
}
|
|
2790
|
-
|
|
2791
|
-
|
|
2232
|
+
const { version, validateStale = false, onProgress } = options;
|
|
2233
|
+
const cache = getModelCache();
|
|
2234
|
+
const cacheKey = version ? getCacheKey(url, version) : url;
|
|
2235
|
+
const telemetry = getTelemetry();
|
|
2236
|
+
const span = telemetry?.startSpan("fetchWithCache", {
|
|
2237
|
+
"fetch.url": url,
|
|
2238
|
+
...version && { "fetch.version": version },
|
|
2239
|
+
"fetch.validate_stale": validateStale
|
|
2240
|
+
});
|
|
2241
|
+
if (validateStale) {
|
|
2242
|
+
const validation = await cache.getWithValidation(cacheKey, url);
|
|
2243
|
+
if (validation.data && !validation.stale) {
|
|
2244
|
+
console.log(`[ModelCache] Cache hit (validated): ${url} (${(validation.data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2245
|
+
onProgress?.(validation.data.byteLength, validation.data.byteLength);
|
|
2246
|
+
span?.setAttributes({
|
|
2247
|
+
"fetch.cache_hit": true,
|
|
2248
|
+
"fetch.cache_validated": true,
|
|
2249
|
+
"fetch.cache_stale": false,
|
|
2250
|
+
"fetch.size_bytes": validation.data.byteLength
|
|
2251
|
+
});
|
|
2252
|
+
span?.end();
|
|
2253
|
+
return validation.data;
|
|
2254
|
+
}
|
|
2255
|
+
if (validation.stale) {
|
|
2256
|
+
console.log(`[ModelCache] Cache stale, refetching: ${url}`);
|
|
2257
|
+
span?.setAttributes({
|
|
2258
|
+
"fetch.cache_hit": true,
|
|
2259
|
+
"fetch.cache_validated": true,
|
|
2260
|
+
"fetch.cache_stale": true
|
|
2261
|
+
});
|
|
2262
|
+
}
|
|
2263
|
+
} else {
|
|
2264
|
+
const cached = await cache.get(cacheKey);
|
|
2265
|
+
if (cached) {
|
|
2266
|
+
console.log(`[ModelCache] Cache hit: ${url} (${(cached.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2267
|
+
onProgress?.(cached.byteLength, cached.byteLength);
|
|
2268
|
+
span?.setAttributes({
|
|
2269
|
+
"fetch.cache_hit": true,
|
|
2270
|
+
"fetch.size_bytes": cached.byteLength
|
|
2271
|
+
});
|
|
2272
|
+
span?.end();
|
|
2273
|
+
return cached;
|
|
2274
|
+
}
|
|
2792
2275
|
}
|
|
2793
|
-
|
|
2794
|
-
|
|
2276
|
+
span?.setAttributes({ "fetch.cache_hit": false });
|
|
2277
|
+
console.log(`[ModelCache] Cache miss, fetching: ${url}`);
|
|
2278
|
+
try {
|
|
2279
|
+
const response = await fetch(url);
|
|
2280
|
+
if (!response.ok) {
|
|
2281
|
+
throw new Error(`Failed to fetch ${url}: ${response.status}`);
|
|
2282
|
+
}
|
|
2283
|
+
const contentLength = response.headers.get("content-length");
|
|
2284
|
+
const total = contentLength ? parseInt(contentLength, 10) : 0;
|
|
2285
|
+
const etag = response.headers.get("etag") ?? void 0;
|
|
2286
|
+
const tooLargeForCache = total > MAX_CACHE_SIZE_BYTES;
|
|
2287
|
+
if (tooLargeForCache) {
|
|
2288
|
+
console.log(`[ModelCache] File too large for IndexedDB (${(total / 1024 / 1024).toFixed(0)}MB > 500MB), using HTTP cache only`);
|
|
2289
|
+
}
|
|
2290
|
+
if (!response.body) {
|
|
2291
|
+
const data2 = await response.arrayBuffer();
|
|
2292
|
+
if (!tooLargeForCache) {
|
|
2293
|
+
await cache.set(cacheKey, data2, etag, version);
|
|
2294
|
+
}
|
|
2295
|
+
span?.setAttributes({
|
|
2296
|
+
"fetch.size_bytes": data2.byteLength,
|
|
2297
|
+
"fetch.cached_to_indexeddb": !tooLargeForCache
|
|
2298
|
+
});
|
|
2299
|
+
span?.end();
|
|
2300
|
+
return data2;
|
|
2301
|
+
}
|
|
2302
|
+
const reader = response.body.getReader();
|
|
2303
|
+
const chunks = [];
|
|
2304
|
+
let loaded = 0;
|
|
2305
|
+
while (true) {
|
|
2306
|
+
const { done, value } = await reader.read();
|
|
2307
|
+
if (done) break;
|
|
2308
|
+
chunks.push(value);
|
|
2309
|
+
loaded += value.length;
|
|
2310
|
+
onProgress?.(loaded, total || loaded);
|
|
2311
|
+
}
|
|
2312
|
+
const data = new Uint8Array(loaded);
|
|
2313
|
+
let offset = 0;
|
|
2314
|
+
for (const chunk of chunks) {
|
|
2315
|
+
data.set(chunk, offset);
|
|
2316
|
+
offset += chunk.length;
|
|
2317
|
+
}
|
|
2318
|
+
const buffer = data.buffer;
|
|
2319
|
+
if (!tooLargeForCache) {
|
|
2320
|
+
await cache.set(cacheKey, buffer, etag, version);
|
|
2321
|
+
console.log(`[ModelCache] Cached: ${url} (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB)`);
|
|
2322
|
+
}
|
|
2323
|
+
span?.setAttributes({
|
|
2324
|
+
"fetch.size_bytes": buffer.byteLength,
|
|
2325
|
+
"fetch.cached_to_indexeddb": !tooLargeForCache
|
|
2326
|
+
});
|
|
2327
|
+
span?.end();
|
|
2328
|
+
return buffer;
|
|
2329
|
+
} catch (error) {
|
|
2330
|
+
span?.endWithError(error instanceof Error ? error : new Error(String(error)));
|
|
2331
|
+
throw error;
|
|
2795
2332
|
}
|
|
2796
|
-
}
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
let
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2333
|
+
}
|
|
2334
|
+
async function preloadModels(urls, onProgress) {
|
|
2335
|
+
const cache = getModelCache();
|
|
2336
|
+
for (let i = 0; i < urls.length; i++) {
|
|
2337
|
+
const url = urls[i];
|
|
2338
|
+
onProgress?.(i, urls.length, url);
|
|
2339
|
+
if (await cache.has(url)) {
|
|
2340
|
+
console.log(`[ModelCache] Already cached: ${url}`);
|
|
2341
|
+
continue;
|
|
2342
|
+
}
|
|
2343
|
+
await fetchWithCache(url);
|
|
2803
2344
|
}
|
|
2804
|
-
|
|
2345
|
+
onProgress?.(urls.length, urls.length, "done");
|
|
2346
|
+
}
|
|
2347
|
+
function formatBytes(bytes) {
|
|
2348
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
2349
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
2350
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
2351
|
+
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
2805
2352
|
}
|
|
2806
|
-
var noopLogger = {
|
|
2807
|
-
module: "noop",
|
|
2808
|
-
error: () => {
|
|
2809
|
-
},
|
|
2810
|
-
warn: () => {
|
|
2811
|
-
},
|
|
2812
|
-
info: () => {
|
|
2813
|
-
},
|
|
2814
|
-
debug: () => {
|
|
2815
|
-
},
|
|
2816
|
-
trace: () => {
|
|
2817
|
-
},
|
|
2818
|
-
verbose: () => {
|
|
2819
|
-
},
|
|
2820
|
-
child: () => noopLogger
|
|
2821
|
-
};
|
|
2822
2353
|
|
|
2823
2354
|
// src/utils/runtime.ts
|
|
2824
2355
|
function isIOSSafari() {
|
|
@@ -2889,7 +2420,7 @@ function isSafari() {
|
|
|
2889
2420
|
const ua = navigator.userAgent.toLowerCase();
|
|
2890
2421
|
return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
|
|
2891
2422
|
}
|
|
2892
|
-
function
|
|
2423
|
+
function shouldUseCpuA2E() {
|
|
2893
2424
|
return isSafari() || isIOS();
|
|
2894
2425
|
}
|
|
2895
2426
|
function isSpeechRecognitionAvailable() {
|
|
@@ -2899,22 +2430,22 @@ function isSpeechRecognitionAvailable() {
|
|
|
2899
2430
|
function shouldUseNativeASR() {
|
|
2900
2431
|
return (isIOS() || isSafari()) && isSpeechRecognitionAvailable();
|
|
2901
2432
|
}
|
|
2902
|
-
function
|
|
2433
|
+
function shouldUseServerA2E() {
|
|
2903
2434
|
return isIOS();
|
|
2904
2435
|
}
|
|
2905
2436
|
|
|
2906
2437
|
// src/inference/onnxLoader.ts
|
|
2907
|
-
var
|
|
2438
|
+
var logger2 = createLogger("OnnxLoader");
|
|
2908
2439
|
var ortInstance = null;
|
|
2909
2440
|
var loadedBackend = null;
|
|
2910
2441
|
var WASM_CDN_PATH = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
2911
2442
|
async function isWebGPUAvailable() {
|
|
2912
2443
|
if (isIOS()) {
|
|
2913
|
-
|
|
2444
|
+
logger2.debug("WebGPU check: disabled on iOS (asyncify bundle crashes WebKit)");
|
|
2914
2445
|
return false;
|
|
2915
2446
|
}
|
|
2916
2447
|
if (!hasWebGPUApi()) {
|
|
2917
|
-
|
|
2448
|
+
logger2.debug("WebGPU check: navigator.gpu not available", {
|
|
2918
2449
|
isSecureContext: typeof window !== "undefined" ? window.isSecureContext : "N/A"
|
|
2919
2450
|
});
|
|
2920
2451
|
return false;
|
|
@@ -2922,19 +2453,19 @@ async function isWebGPUAvailable() {
|
|
|
2922
2453
|
try {
|
|
2923
2454
|
const adapter = await navigator.gpu.requestAdapter();
|
|
2924
2455
|
if (!adapter) {
|
|
2925
|
-
|
|
2456
|
+
logger2.debug("WebGPU check: No adapter available");
|
|
2926
2457
|
return false;
|
|
2927
2458
|
}
|
|
2928
2459
|
const device = await adapter.requestDevice();
|
|
2929
2460
|
if (!device) {
|
|
2930
|
-
|
|
2461
|
+
logger2.debug("WebGPU check: Could not create device");
|
|
2931
2462
|
return false;
|
|
2932
2463
|
}
|
|
2933
2464
|
device.destroy();
|
|
2934
|
-
|
|
2465
|
+
logger2.debug("WebGPU check: Available and working");
|
|
2935
2466
|
return true;
|
|
2936
2467
|
} catch (err) {
|
|
2937
|
-
|
|
2468
|
+
logger2.debug("WebGPU check: Error during availability check", { error: err });
|
|
2938
2469
|
return false;
|
|
2939
2470
|
}
|
|
2940
2471
|
}
|
|
@@ -2944,11 +2475,11 @@ function applyIOSWasmMemoryPatch() {
|
|
|
2944
2475
|
iosWasmPatched = true;
|
|
2945
2476
|
const OrigMemory = WebAssembly.Memory;
|
|
2946
2477
|
const MAX_IOS_PAGES = 32768;
|
|
2947
|
-
|
|
2478
|
+
logger2.info("Applying iOS WASM memory patch (max\u21922GB, shared preserved)");
|
|
2948
2479
|
WebAssembly.Memory = function IOSPatchedMemory(descriptor) {
|
|
2949
2480
|
const patched = { ...descriptor };
|
|
2950
2481
|
if (patched.maximum !== void 0 && patched.maximum > MAX_IOS_PAGES) {
|
|
2951
|
-
|
|
2482
|
+
logger2.info("iOS memory patch: capping maximum", {
|
|
2952
2483
|
original: patched.maximum,
|
|
2953
2484
|
capped: MAX_IOS_PAGES,
|
|
2954
2485
|
shared: patched.shared,
|
|
@@ -2967,7 +2498,7 @@ function configureWasm(ort) {
|
|
|
2967
2498
|
ort.env.wasm.numThreads = numThreads;
|
|
2968
2499
|
ort.env.wasm.simd = true;
|
|
2969
2500
|
ort.env.wasm.proxy = enableProxy;
|
|
2970
|
-
|
|
2501
|
+
logger2.info("WASM configured", {
|
|
2971
2502
|
numThreads,
|
|
2972
2503
|
simd: true,
|
|
2973
2504
|
proxy: enableProxy,
|
|
@@ -2979,12 +2510,12 @@ async function getOnnxRuntime(backend) {
|
|
|
2979
2510
|
return ortInstance;
|
|
2980
2511
|
}
|
|
2981
2512
|
if (ortInstance && loadedBackend !== backend) {
|
|
2982
|
-
|
|
2513
|
+
logger2.warn(
|
|
2983
2514
|
`ONNX Runtime already loaded with ${loadedBackend} backend. Cannot switch to ${backend}. Returning existing instance.`
|
|
2984
2515
|
);
|
|
2985
2516
|
return ortInstance;
|
|
2986
2517
|
}
|
|
2987
|
-
|
|
2518
|
+
logger2.info(`Loading ONNX Runtime with ${backend} backend...`);
|
|
2988
2519
|
applyIOSWasmMemoryPatch();
|
|
2989
2520
|
try {
|
|
2990
2521
|
if (backend === "wasm" && (isIOS() || isSafari())) {
|
|
@@ -2999,10 +2530,10 @@ async function getOnnxRuntime(backend) {
|
|
|
2999
2530
|
}
|
|
3000
2531
|
loadedBackend = backend;
|
|
3001
2532
|
configureWasm(ortInstance);
|
|
3002
|
-
|
|
2533
|
+
logger2.info(`ONNX Runtime loaded successfully`, { backend });
|
|
3003
2534
|
return ortInstance;
|
|
3004
2535
|
} catch (err) {
|
|
3005
|
-
|
|
2536
|
+
logger2.error(`Failed to load ONNX Runtime with ${backend} backend`, {
|
|
3006
2537
|
error: err
|
|
3007
2538
|
});
|
|
3008
2539
|
throw new Error(
|
|
@@ -3013,7 +2544,7 @@ async function getOnnxRuntime(backend) {
|
|
|
3013
2544
|
async function getOnnxRuntimeForPreference(preference = "auto") {
|
|
3014
2545
|
const webgpuAvailable = await isWebGPUAvailable();
|
|
3015
2546
|
const backend = resolveBackend(preference, webgpuAvailable);
|
|
3016
|
-
|
|
2547
|
+
logger2.info("Resolved backend preference", {
|
|
3017
2548
|
preference,
|
|
3018
2549
|
webgpuAvailable,
|
|
3019
2550
|
resolvedBackend: backend
|
|
@@ -3047,42 +2578,6 @@ function getSessionOptions(backend) {
|
|
|
3047
2578
|
graphOptimizationLevel: "all"
|
|
3048
2579
|
};
|
|
3049
2580
|
}
|
|
3050
|
-
async function createSessionWithFallback(modelBuffer, preferredBackend) {
|
|
3051
|
-
const ort = await getOnnxRuntime(preferredBackend);
|
|
3052
|
-
const modelData = new Uint8Array(modelBuffer);
|
|
3053
|
-
if (preferredBackend === "webgpu") {
|
|
3054
|
-
try {
|
|
3055
|
-
const options2 = getSessionOptions("webgpu");
|
|
3056
|
-
const session2 = await ort.InferenceSession.create(modelData, options2);
|
|
3057
|
-
logger.info("Session created with WebGPU backend");
|
|
3058
|
-
return { session: session2, backend: "webgpu" };
|
|
3059
|
-
} catch (err) {
|
|
3060
|
-
logger.warn("WebGPU session creation failed, falling back to WASM", {
|
|
3061
|
-
error: err instanceof Error ? err.message : String(err)
|
|
3062
|
-
});
|
|
3063
|
-
}
|
|
3064
|
-
}
|
|
3065
|
-
const options = getSessionOptions("wasm");
|
|
3066
|
-
const session = await ort.InferenceSession.create(modelData, options);
|
|
3067
|
-
logger.info("Session created with WASM backend");
|
|
3068
|
-
return { session, backend: "wasm" };
|
|
3069
|
-
}
|
|
3070
|
-
function getLoadedBackend() {
|
|
3071
|
-
return loadedBackend;
|
|
3072
|
-
}
|
|
3073
|
-
function isOnnxRuntimeLoaded() {
|
|
3074
|
-
return ortInstance !== null;
|
|
3075
|
-
}
|
|
3076
|
-
async function preloadOnnxRuntime(preference = "auto") {
|
|
3077
|
-
if (ortInstance) {
|
|
3078
|
-
logger.info("ONNX Runtime already preloaded", { backend: loadedBackend });
|
|
3079
|
-
return loadedBackend;
|
|
3080
|
-
}
|
|
3081
|
-
logger.info("Preloading ONNX Runtime...", { preference });
|
|
3082
|
-
const { backend } = await getOnnxRuntimeForPreference(preference);
|
|
3083
|
-
logger.info("ONNX Runtime preloaded", { backend });
|
|
3084
|
-
return backend;
|
|
3085
|
-
}
|
|
3086
2581
|
|
|
3087
2582
|
// src/inference/blendshapeUtils.ts
|
|
3088
2583
|
var LAM_BLENDSHAPES = [
|
|
@@ -3232,16 +2727,19 @@ var WAV2ARKIT_BLENDSHAPES = [
|
|
|
3232
2727
|
var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
|
|
3233
2728
|
(name) => LAM_BLENDSHAPES.indexOf(name)
|
|
3234
2729
|
);
|
|
3235
|
-
function
|
|
3236
|
-
const
|
|
3237
|
-
|
|
3238
|
-
|
|
2730
|
+
function lerpBlendshapes(current, target, factor = 0.3) {
|
|
2731
|
+
const len = Math.max(current.length, target.length);
|
|
2732
|
+
const result = new Array(len);
|
|
2733
|
+
for (let i = 0; i < len; i++) {
|
|
2734
|
+
const c = current[i] ?? 0;
|
|
2735
|
+
const t = target[i] ?? 0;
|
|
2736
|
+
result[i] = c + (t - c) * factor;
|
|
3239
2737
|
}
|
|
3240
2738
|
return result;
|
|
3241
2739
|
}
|
|
3242
2740
|
|
|
3243
2741
|
// src/inference/Wav2Vec2Inference.ts
|
|
3244
|
-
var
|
|
2742
|
+
var logger3 = createLogger("Wav2Vec2");
|
|
3245
2743
|
var CTC_VOCAB = [
|
|
3246
2744
|
"<pad>",
|
|
3247
2745
|
"<s>",
|
|
@@ -3291,6 +2789,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3291
2789
|
this.poisoned = false;
|
|
3292
2790
|
this.config = config;
|
|
3293
2791
|
this.numIdentityClasses = config.numIdentityClasses ?? 12;
|
|
2792
|
+
this.chunkSize = config.chunkSize ?? 16e3;
|
|
3294
2793
|
}
|
|
3295
2794
|
get backend() {
|
|
3296
2795
|
return this.session ? this._backend : null;
|
|
@@ -3320,30 +2819,30 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3320
2819
|
"model.backend_requested": this.config.backend || "auto"
|
|
3321
2820
|
});
|
|
3322
2821
|
try {
|
|
3323
|
-
|
|
2822
|
+
logger3.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
|
|
3324
2823
|
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
|
|
3325
2824
|
this.ort = ort;
|
|
3326
2825
|
this._backend = backend;
|
|
3327
|
-
|
|
2826
|
+
logger3.info("ONNX Runtime loaded", { backend: this._backend });
|
|
3328
2827
|
const modelUrl = this.config.modelUrl;
|
|
3329
2828
|
const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
|
|
3330
2829
|
const sessionOptions = getSessionOptions(this._backend);
|
|
3331
2830
|
let isCached = false;
|
|
3332
2831
|
if (isIOS()) {
|
|
3333
|
-
|
|
2832
|
+
logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
|
|
3334
2833
|
modelUrl,
|
|
3335
2834
|
dataUrl
|
|
3336
2835
|
});
|
|
3337
2836
|
if (dataUrl) {
|
|
3338
2837
|
const dataFilename = dataUrl.split("/").pop();
|
|
3339
|
-
|
|
2838
|
+
logger3.info("iOS: setting externalData", { dataFilename, dataUrl });
|
|
3340
2839
|
sessionOptions.externalData = [{
|
|
3341
2840
|
path: dataFilename,
|
|
3342
2841
|
data: dataUrl
|
|
3343
2842
|
// URL string — ORT fetches directly into WASM
|
|
3344
2843
|
}];
|
|
3345
2844
|
}
|
|
3346
|
-
|
|
2845
|
+
logger3.info("iOS: calling InferenceSession.create() with URL string", {
|
|
3347
2846
|
modelUrl,
|
|
3348
2847
|
sessionOptions: JSON.stringify(
|
|
3349
2848
|
sessionOptions,
|
|
@@ -3353,14 +2852,14 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3353
2852
|
try {
|
|
3354
2853
|
this.session = await this.ort.InferenceSession.create(modelUrl, sessionOptions);
|
|
3355
2854
|
} catch (sessionErr) {
|
|
3356
|
-
|
|
2855
|
+
logger3.error("iOS: InferenceSession.create() failed", {
|
|
3357
2856
|
error: sessionErr instanceof Error ? sessionErr.message : String(sessionErr),
|
|
3358
2857
|
errorType: sessionErr?.constructor?.name,
|
|
3359
2858
|
stack: sessionErr instanceof Error ? sessionErr.stack : void 0
|
|
3360
2859
|
});
|
|
3361
2860
|
throw sessionErr;
|
|
3362
2861
|
}
|
|
3363
|
-
|
|
2862
|
+
logger3.info("iOS: session created successfully", {
|
|
3364
2863
|
inputNames: this.session.inputNames,
|
|
3365
2864
|
outputNames: this.session.outputNames
|
|
3366
2865
|
});
|
|
@@ -3369,15 +2868,15 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3369
2868
|
isCached = await cache.has(modelUrl);
|
|
3370
2869
|
let modelBuffer;
|
|
3371
2870
|
if (isCached) {
|
|
3372
|
-
|
|
2871
|
+
logger3.debug("Loading model from cache", { modelUrl });
|
|
3373
2872
|
modelBuffer = await cache.get(modelUrl);
|
|
3374
2873
|
if (!modelBuffer) {
|
|
3375
|
-
|
|
2874
|
+
logger3.warn("Cache corruption detected, clearing and retrying", { modelUrl });
|
|
3376
2875
|
await cache.delete(modelUrl);
|
|
3377
2876
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
3378
2877
|
}
|
|
3379
2878
|
} else {
|
|
3380
|
-
|
|
2879
|
+
logger3.debug("Fetching and caching model", { modelUrl });
|
|
3381
2880
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
3382
2881
|
}
|
|
3383
2882
|
if (!modelBuffer) {
|
|
@@ -3388,31 +2887,31 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3388
2887
|
try {
|
|
3389
2888
|
const isDataCached = await cache.has(dataUrl);
|
|
3390
2889
|
if (isDataCached) {
|
|
3391
|
-
|
|
2890
|
+
logger3.debug("Loading external data from cache", { dataUrl });
|
|
3392
2891
|
externalDataBuffer = await cache.get(dataUrl);
|
|
3393
2892
|
if (!externalDataBuffer) {
|
|
3394
|
-
|
|
2893
|
+
logger3.warn("Cache corruption for external data, retrying", { dataUrl });
|
|
3395
2894
|
await cache.delete(dataUrl);
|
|
3396
2895
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
3397
2896
|
}
|
|
3398
2897
|
} else {
|
|
3399
|
-
|
|
2898
|
+
logger3.info("Fetching external model data", {
|
|
3400
2899
|
dataUrl,
|
|
3401
2900
|
note: "This may be a large download (383MB+)"
|
|
3402
2901
|
});
|
|
3403
2902
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
3404
2903
|
}
|
|
3405
|
-
|
|
2904
|
+
logger3.info("External data loaded", {
|
|
3406
2905
|
size: formatBytes(externalDataBuffer.byteLength)
|
|
3407
2906
|
});
|
|
3408
2907
|
} catch (err) {
|
|
3409
|
-
|
|
2908
|
+
logger3.debug("No external data file found (single-file model)", {
|
|
3410
2909
|
dataUrl,
|
|
3411
2910
|
error: err.message
|
|
3412
2911
|
});
|
|
3413
2912
|
}
|
|
3414
2913
|
}
|
|
3415
|
-
|
|
2914
|
+
logger3.debug("Creating ONNX session", {
|
|
3416
2915
|
graphSize: formatBytes(modelBuffer.byteLength),
|
|
3417
2916
|
externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
|
|
3418
2917
|
backend: this._backend
|
|
@@ -3427,12 +2926,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3427
2926
|
const modelData = new Uint8Array(modelBuffer);
|
|
3428
2927
|
this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
|
|
3429
2928
|
}
|
|
3430
|
-
|
|
2929
|
+
logger3.info("ONNX session created successfully", {
|
|
3431
2930
|
executionProvider: this._backend,
|
|
3432
2931
|
backend: this._backend
|
|
3433
2932
|
});
|
|
3434
2933
|
const loadTimeMs = performance.now() - startTime;
|
|
3435
|
-
|
|
2934
|
+
logger3.info("Model loaded successfully", {
|
|
3436
2935
|
backend: this._backend,
|
|
3437
2936
|
loadTimeMs: Math.round(loadTimeMs),
|
|
3438
2937
|
inputs: this.session.inputNames,
|
|
@@ -3448,13 +2947,13 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3448
2947
|
model: "wav2vec2",
|
|
3449
2948
|
backend: this._backend
|
|
3450
2949
|
});
|
|
3451
|
-
|
|
2950
|
+
logger3.debug("Running warmup inference to initialize GPU context");
|
|
3452
2951
|
const warmupStart = performance.now();
|
|
3453
|
-
const warmupAudio = new Float32Array(
|
|
2952
|
+
const warmupAudio = new Float32Array(this.chunkSize);
|
|
3454
2953
|
const warmupIdentity = new Float32Array(this.numIdentityClasses);
|
|
3455
2954
|
warmupIdentity[0] = 1;
|
|
3456
2955
|
const warmupFeeds = {
|
|
3457
|
-
"audio": new this.ort.Tensor("float32", warmupAudio, [1,
|
|
2956
|
+
"audio": new this.ort.Tensor("float32", warmupAudio, [1, this.chunkSize]),
|
|
3458
2957
|
"identity": new this.ort.Tensor("float32", warmupIdentity, [1, this.numIdentityClasses])
|
|
3459
2958
|
};
|
|
3460
2959
|
const WARMUP_TIMEOUT_MS = 15e3;
|
|
@@ -3464,12 +2963,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3464
2963
|
]);
|
|
3465
2964
|
const warmupTimeMs = performance.now() - warmupStart;
|
|
3466
2965
|
if (warmupResult === "timeout") {
|
|
3467
|
-
|
|
2966
|
+
logger3.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
|
|
3468
2967
|
timeoutMs: WARMUP_TIMEOUT_MS,
|
|
3469
2968
|
backend: this._backend
|
|
3470
2969
|
});
|
|
3471
2970
|
} else {
|
|
3472
|
-
|
|
2971
|
+
logger3.info("Warmup inference complete", {
|
|
3473
2972
|
warmupTimeMs: Math.round(warmupTimeMs),
|
|
3474
2973
|
backend: this._backend
|
|
3475
2974
|
});
|
|
@@ -3497,11 +2996,10 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3497
2996
|
}
|
|
3498
2997
|
/**
|
|
3499
2998
|
* Run inference on raw audio
|
|
3500
|
-
* @param audioSamples - Float32Array of raw audio at 16kHz
|
|
2999
|
+
* @param audioSamples - Float32Array of raw audio at 16kHz
|
|
3501
3000
|
* @param identityIndex - Optional identity index (0-11, default 0 = neutral)
|
|
3502
3001
|
*
|
|
3503
|
-
*
|
|
3504
|
-
* Audio will be zero-padded or truncated to 16000 samples.
|
|
3002
|
+
* Audio will be zero-padded or truncated to chunkSize samples.
|
|
3505
3003
|
*/
|
|
3506
3004
|
async infer(audioSamples, identityIndex = 0) {
|
|
3507
3005
|
if (!this.session) {
|
|
@@ -3512,20 +3010,20 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3512
3010
|
}
|
|
3513
3011
|
const audioSamplesCopy = new Float32Array(audioSamples);
|
|
3514
3012
|
let audio;
|
|
3515
|
-
if (audioSamplesCopy.length ===
|
|
3013
|
+
if (audioSamplesCopy.length === this.chunkSize) {
|
|
3516
3014
|
audio = audioSamplesCopy;
|
|
3517
|
-
} else if (audioSamplesCopy.length <
|
|
3518
|
-
audio = new Float32Array(
|
|
3015
|
+
} else if (audioSamplesCopy.length < this.chunkSize) {
|
|
3016
|
+
audio = new Float32Array(this.chunkSize);
|
|
3519
3017
|
audio.set(audioSamplesCopy, 0);
|
|
3520
3018
|
} else {
|
|
3521
|
-
audio = audioSamplesCopy.slice(0,
|
|
3019
|
+
audio = audioSamplesCopy.slice(0, this.chunkSize);
|
|
3522
3020
|
}
|
|
3523
3021
|
const identity = new Float32Array(this.numIdentityClasses);
|
|
3524
3022
|
identity[Math.max(0, Math.min(identityIndex, this.numIdentityClasses - 1))] = 1;
|
|
3525
3023
|
const audioCopy = new Float32Array(audio);
|
|
3526
3024
|
const identityCopy = new Float32Array(identity);
|
|
3527
3025
|
const feeds = {
|
|
3528
|
-
"audio": new this.ort.Tensor("float32", audioCopy, [1,
|
|
3026
|
+
"audio": new this.ort.Tensor("float32", audioCopy, [1, this.chunkSize]),
|
|
3529
3027
|
"identity": new this.ort.Tensor("float32", identityCopy, [1, this.numIdentityClasses])
|
|
3530
3028
|
};
|
|
3531
3029
|
return this.queueInference(feeds);
|
|
@@ -3561,7 +3059,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3561
3059
|
const telemetry = getTelemetry();
|
|
3562
3060
|
const span = telemetry?.startSpan("Wav2Vec2.infer", {
|
|
3563
3061
|
"inference.backend": this._backend,
|
|
3564
|
-
"inference.input_samples":
|
|
3062
|
+
"inference.input_samples": this.chunkSize
|
|
3565
3063
|
});
|
|
3566
3064
|
try {
|
|
3567
3065
|
const startTime = performance.now();
|
|
@@ -3600,7 +3098,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3600
3098
|
blendshapes.push(symmetrizeBlendshapes(rawFrame));
|
|
3601
3099
|
}
|
|
3602
3100
|
const text = this.decodeCTC(asrLogits);
|
|
3603
|
-
|
|
3101
|
+
logger3.trace("Inference completed", {
|
|
3604
3102
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
3605
3103
|
numA2EFrames,
|
|
3606
3104
|
numASRFrames,
|
|
@@ -3634,12 +3132,12 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3634
3132
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
3635
3133
|
if (errMsg.includes("timed out")) {
|
|
3636
3134
|
this.poisoned = true;
|
|
3637
|
-
|
|
3135
|
+
logger3.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
|
|
3638
3136
|
backend: this._backend,
|
|
3639
3137
|
timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
|
|
3640
3138
|
});
|
|
3641
3139
|
} else {
|
|
3642
|
-
|
|
3140
|
+
logger3.error("Inference failed", { error: errMsg, backend: this._backend });
|
|
3643
3141
|
}
|
|
3644
3142
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
3645
3143
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
@@ -3680,56 +3178,74 @@ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
|
|
|
3680
3178
|
_Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
|
|
3681
3179
|
var Wav2Vec2Inference = _Wav2Vec2Inference;
|
|
3682
3180
|
|
|
3181
|
+
// src/audio/audioUtils.ts
|
|
3182
|
+
function pcm16ToFloat32(buffer) {
|
|
3183
|
+
const byteLen = buffer.byteLength & ~1;
|
|
3184
|
+
const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
|
|
3185
|
+
const float32 = new Float32Array(int16.length);
|
|
3186
|
+
for (let i = 0; i < int16.length; i++) {
|
|
3187
|
+
float32[i] = int16[i] / 32768;
|
|
3188
|
+
}
|
|
3189
|
+
return float32;
|
|
3190
|
+
}
|
|
3191
|
+
function int16ToFloat32(int16) {
|
|
3192
|
+
const float32 = new Float32Array(int16.length);
|
|
3193
|
+
for (let i = 0; i < int16.length; i++) {
|
|
3194
|
+
float32[i] = int16[i] / 32768;
|
|
3195
|
+
}
|
|
3196
|
+
return float32;
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3683
3199
|
// src/audio/FullFacePipeline.ts
|
|
3684
|
-
var
|
|
3685
|
-
var
|
|
3686
|
-
|
|
3687
|
-
|
|
3688
|
-
|
|
3689
|
-
|
|
3690
|
-
|
|
3691
|
-
|
|
3692
|
-
|
|
3693
|
-
|
|
3694
|
-
|
|
3695
|
-
|
|
3696
|
-
|
|
3697
|
-
|
|
3698
|
-
|
|
3699
|
-
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
disappointed: "sad",
|
|
3705
|
-
frustrated: "angry",
|
|
3706
|
-
irritated: "angry",
|
|
3707
|
-
furious: "angry",
|
|
3708
|
-
annoyed: "angry",
|
|
3709
|
-
// SenseVoice labels
|
|
3710
|
-
fearful: "sad",
|
|
3711
|
-
disgusted: "angry",
|
|
3712
|
-
surprised: "happy"
|
|
3713
|
-
};
|
|
3714
|
-
var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
3200
|
+
var logger4 = createLogger("FullFacePipeline");
|
|
3201
|
+
var BLENDSHAPE_TO_GROUP = /* @__PURE__ */ new Map();
|
|
3202
|
+
for (const name of LAM_BLENDSHAPES) {
|
|
3203
|
+
if (name.startsWith("eye")) {
|
|
3204
|
+
BLENDSHAPE_TO_GROUP.set(name, "eyes");
|
|
3205
|
+
} else if (name.startsWith("brow")) {
|
|
3206
|
+
BLENDSHAPE_TO_GROUP.set(name, "brows");
|
|
3207
|
+
} else if (name.startsWith("jaw")) {
|
|
3208
|
+
BLENDSHAPE_TO_GROUP.set(name, "jaw");
|
|
3209
|
+
} else if (name.startsWith("mouth")) {
|
|
3210
|
+
BLENDSHAPE_TO_GROUP.set(name, "mouth");
|
|
3211
|
+
} else if (name.startsWith("cheek")) {
|
|
3212
|
+
BLENDSHAPE_TO_GROUP.set(name, "cheeks");
|
|
3213
|
+
} else if (name.startsWith("nose")) {
|
|
3214
|
+
BLENDSHAPE_TO_GROUP.set(name, "nose");
|
|
3215
|
+
} else if (name.startsWith("tongue")) {
|
|
3216
|
+
BLENDSHAPE_TO_GROUP.set(name, "tongue");
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
var FullFacePipeline = class extends EventEmitter {
|
|
3715
3220
|
constructor(options) {
|
|
3716
3221
|
super();
|
|
3717
3222
|
this.options = options;
|
|
3718
3223
|
this.playbackStarted = false;
|
|
3719
3224
|
this.monitorInterval = null;
|
|
3720
3225
|
this.frameAnimationId = null;
|
|
3721
|
-
// Emotion state
|
|
3722
|
-
this.lastEmotionFrame = null;
|
|
3723
|
-
this.currentAudioEnergy = 0;
|
|
3724
3226
|
// Stale frame detection
|
|
3725
3227
|
this.lastNewFrameTime = 0;
|
|
3726
3228
|
this.lastKnownLamFrame = null;
|
|
3727
3229
|
this.staleWarningEmitted = false;
|
|
3230
|
+
// Diagnostic logging counter
|
|
3231
|
+
this.frameLoopCount = 0;
|
|
3728
3232
|
const sampleRate = options.sampleRate ?? 16e3;
|
|
3729
|
-
this.
|
|
3730
|
-
this.
|
|
3731
|
-
const
|
|
3233
|
+
this.profile = options.profile ?? {};
|
|
3234
|
+
this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
|
|
3235
|
+
const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
|
|
3236
|
+
const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
|
|
3237
|
+
const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
|
|
3238
|
+
const inferenceEstimateMs = isCpuModel ? 300 : options.lam.backend === "wasm" ? 250 : 80;
|
|
3239
|
+
const marginMs = 100;
|
|
3240
|
+
const autoDelay = Math.ceil(chunkAccumulationMs + inferenceEstimateMs + marginMs);
|
|
3732
3241
|
const audioDelayMs = options.audioDelayMs ?? autoDelay;
|
|
3242
|
+
logger4.info("FullFacePipeline config", {
|
|
3243
|
+
chunkSize,
|
|
3244
|
+
audioDelayMs,
|
|
3245
|
+
autoDelay,
|
|
3246
|
+
backend: options.lam.backend,
|
|
3247
|
+
modelId: options.lam.modelId
|
|
3248
|
+
});
|
|
3733
3249
|
this.scheduler = new AudioScheduler({
|
|
3734
3250
|
sampleRate,
|
|
3735
3251
|
initialLookaheadSec: audioDelayMs / 1e3
|
|
@@ -3738,20 +3254,15 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3738
3254
|
sampleRate,
|
|
3739
3255
|
targetDurationMs: options.chunkTargetMs ?? 200
|
|
3740
3256
|
});
|
|
3741
|
-
this.
|
|
3257
|
+
this.processor = new A2EProcessor({
|
|
3258
|
+
backend: options.lam,
|
|
3742
3259
|
sampleRate,
|
|
3260
|
+
chunkSize,
|
|
3743
3261
|
onError: (error) => {
|
|
3744
|
-
|
|
3262
|
+
logger4.error("A2E inference error", { message: error.message, stack: error.stack });
|
|
3745
3263
|
this.emit("error", error);
|
|
3746
3264
|
}
|
|
3747
3265
|
});
|
|
3748
|
-
this.emotionMapper = new EmotionToBlendshapeMapper({
|
|
3749
|
-
smoothingFactor: 0.15,
|
|
3750
|
-
confidenceThreshold: 0.3,
|
|
3751
|
-
intensity: 1,
|
|
3752
|
-
energyModulation: true
|
|
3753
|
-
});
|
|
3754
|
-
this.energyAnalyzer = new AudioEnergyAnalyzer();
|
|
3755
3266
|
}
|
|
3756
3267
|
/**
|
|
3757
3268
|
* Initialize the pipeline
|
|
@@ -3760,40 +3271,33 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3760
3271
|
await this.scheduler.initialize();
|
|
3761
3272
|
}
|
|
3762
3273
|
/**
|
|
3763
|
-
*
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
*
|
|
3768
|
-
* Supported labels: happy, excited, joyful, sad, melancholic, angry,
|
|
3769
|
-
* frustrated, neutral, etc.
|
|
3770
|
-
*
|
|
3771
|
-
* @param label - Emotion label string (case-insensitive)
|
|
3772
|
-
*/
|
|
3773
|
-
setEmotionLabel(label) {
|
|
3774
|
-
const normalized = label.toLowerCase();
|
|
3775
|
-
const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
|
|
3776
|
-
const probabilities = {
|
|
3777
|
-
neutral: 0.1,
|
|
3778
|
-
happy: 0.1,
|
|
3779
|
-
angry: 0.1,
|
|
3780
|
-
sad: 0.1
|
|
3781
|
-
};
|
|
3782
|
-
probabilities[mapped] = 0.7;
|
|
3783
|
-
const frame = {
|
|
3784
|
-
emotion: mapped,
|
|
3785
|
-
confidence: 0.7,
|
|
3786
|
-
probabilities
|
|
3787
|
-
};
|
|
3788
|
-
this.lastEmotionFrame = frame;
|
|
3789
|
-
logger3.info("Emotion label set", { label, mapped });
|
|
3274
|
+
* Update the ExpressionProfile at runtime (e.g., character switch).
|
|
3275
|
+
*/
|
|
3276
|
+
setProfile(profile) {
|
|
3277
|
+
this.profile = profile;
|
|
3790
3278
|
}
|
|
3791
3279
|
/**
|
|
3792
|
-
*
|
|
3793
|
-
*
|
|
3280
|
+
* Apply ExpressionProfile scaling to raw A2E blendshapes.
|
|
3281
|
+
*
|
|
3282
|
+
* For each blendshape:
|
|
3283
|
+
* 1. If an override exists for the blendshape name, use override as scaler
|
|
3284
|
+
* 2. Otherwise, use the group scaler (default 1.0)
|
|
3285
|
+
* 3. Clamp result to [0, 1]
|
|
3794
3286
|
*/
|
|
3795
|
-
|
|
3796
|
-
|
|
3287
|
+
applyProfile(raw) {
|
|
3288
|
+
const scaled = new Float32Array(52);
|
|
3289
|
+
for (let i = 0; i < 52; i++) {
|
|
3290
|
+
const name = LAM_BLENDSHAPES[i];
|
|
3291
|
+
let scaler;
|
|
3292
|
+
if (this.profile.overrides && this.profile.overrides[name] !== void 0) {
|
|
3293
|
+
scaler = this.profile.overrides[name];
|
|
3294
|
+
} else {
|
|
3295
|
+
const group = BLENDSHAPE_TO_GROUP.get(name);
|
|
3296
|
+
scaler = group ? this.profile[group] ?? 1 : 1;
|
|
3297
|
+
}
|
|
3298
|
+
scaled[i] = Math.min(1, Math.max(0, raw[i] * scaler));
|
|
3299
|
+
}
|
|
3300
|
+
return scaled;
|
|
3797
3301
|
}
|
|
3798
3302
|
/**
|
|
3799
3303
|
* Start a new playback session
|
|
@@ -3805,15 +3309,12 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3805
3309
|
this.stopMonitoring();
|
|
3806
3310
|
this.scheduler.reset();
|
|
3807
3311
|
this.coalescer.reset();
|
|
3808
|
-
this.
|
|
3312
|
+
this.processor.reset();
|
|
3809
3313
|
this.playbackStarted = false;
|
|
3810
|
-
this.lastEmotionFrame = null;
|
|
3811
|
-
this.currentAudioEnergy = 0;
|
|
3812
|
-
this.emotionMapper.reset();
|
|
3813
|
-
this.energyAnalyzer.reset();
|
|
3814
3314
|
this.lastNewFrameTime = 0;
|
|
3815
3315
|
this.lastKnownLamFrame = null;
|
|
3816
3316
|
this.staleWarningEmitted = false;
|
|
3317
|
+
this.frameLoopCount = 0;
|
|
3817
3318
|
this.scheduler.warmup();
|
|
3818
3319
|
this.startFrameLoop();
|
|
3819
3320
|
this.startMonitoring();
|
|
@@ -3821,8 +3322,8 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3821
3322
|
/**
|
|
3822
3323
|
* Receive audio chunk from network
|
|
3823
3324
|
*
|
|
3824
|
-
* Audio-first design: schedules audio immediately,
|
|
3825
|
-
* This prevents
|
|
3325
|
+
* Audio-first design: schedules audio immediately, A2E runs in background.
|
|
3326
|
+
* This prevents A2E inference (50-300ms) from blocking audio scheduling.
|
|
3826
3327
|
*
|
|
3827
3328
|
* @param chunk - Uint8Array containing Int16 PCM audio
|
|
3828
3329
|
*/
|
|
@@ -3837,100 +3338,69 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3837
3338
|
this.playbackStarted = true;
|
|
3838
3339
|
this.emit("playback_start", scheduleTime);
|
|
3839
3340
|
}
|
|
3840
|
-
|
|
3841
|
-
|
|
3842
|
-
|
|
3843
|
-
this.
|
|
3341
|
+
logger4.info("onAudioChunk \u2192 pushAudio", {
|
|
3342
|
+
float32Samples: float32.length,
|
|
3343
|
+
scheduleTime: scheduleTime.toFixed(3),
|
|
3344
|
+
currentTime: this.scheduler.getCurrentTime().toFixed(3),
|
|
3345
|
+
deltaToPlayback: (scheduleTime - this.scheduler.getCurrentTime()).toFixed(3)
|
|
3844
3346
|
});
|
|
3845
|
-
|
|
3846
|
-
/**
|
|
3847
|
-
* Get emotion frame for current animation.
|
|
3848
|
-
*
|
|
3849
|
-
* Priority:
|
|
3850
|
-
* 1. Explicit emotion label from setEmotionLabel()
|
|
3851
|
-
* 2. Prosody fallback: subtle brow movement from audio energy
|
|
3852
|
-
*/
|
|
3853
|
-
getEmotionFrame() {
|
|
3854
|
-
if (this.lastEmotionFrame) {
|
|
3855
|
-
return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
|
|
3856
|
-
}
|
|
3857
|
-
return { frame: null, energy: this.currentAudioEnergy };
|
|
3858
|
-
}
|
|
3859
|
-
/**
|
|
3860
|
-
* Merge LAM blendshapes with emotion upper face blendshapes
|
|
3861
|
-
*/
|
|
3862
|
-
mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
|
|
3863
|
-
const merged = new Float32Array(52);
|
|
3864
|
-
let emotionBlendshapes;
|
|
3865
|
-
if (emotionFrame) {
|
|
3866
|
-
this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
|
|
3867
|
-
this.emotionMapper.update(33);
|
|
3868
|
-
emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
|
|
3869
|
-
} else {
|
|
3870
|
-
emotionBlendshapes = {};
|
|
3871
|
-
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
3872
|
-
emotionBlendshapes[name] = 0;
|
|
3873
|
-
}
|
|
3874
|
-
}
|
|
3875
|
-
for (let i = 0; i < 52; i++) {
|
|
3876
|
-
const name = LAM_BLENDSHAPES[i];
|
|
3877
|
-
if (UPPER_FACE_SET.has(name)) {
|
|
3878
|
-
const emotionValue = emotionBlendshapes[name] ?? 0;
|
|
3879
|
-
const lamValue = lamFrame[i];
|
|
3880
|
-
merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
|
|
3881
|
-
} else {
|
|
3882
|
-
merged[i] = lamFrame[i];
|
|
3883
|
-
}
|
|
3884
|
-
}
|
|
3885
|
-
return { merged, emotionBlendshapes };
|
|
3347
|
+
this.processor.pushAudio(float32, scheduleTime);
|
|
3886
3348
|
}
|
|
3887
3349
|
/**
|
|
3888
3350
|
* Start frame animation loop
|
|
3351
|
+
*
|
|
3352
|
+
* Polls A2EProcessor at render rate (60fps) for the latest inference frame
|
|
3353
|
+
* matching the current AudioContext time. Between inference batches (~30fps
|
|
3354
|
+
* bursts), getFrameForTime() holds the last frame.
|
|
3889
3355
|
*/
|
|
3890
3356
|
startFrameLoop() {
|
|
3891
3357
|
const updateFrame = () => {
|
|
3358
|
+
this.frameLoopCount++;
|
|
3892
3359
|
const currentTime = this.scheduler.getCurrentTime();
|
|
3893
|
-
const lamFrame = this.
|
|
3894
|
-
if (lamFrame) {
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3360
|
+
const lamFrame = this.processor.getFrameForTime(currentTime);
|
|
3361
|
+
if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
|
|
3362
|
+
this.lastNewFrameTime = performance.now();
|
|
3363
|
+
this.lastKnownLamFrame = lamFrame;
|
|
3364
|
+
this.staleWarningEmitted = false;
|
|
3365
|
+
logger4.info("New A2E frame", {
|
|
3366
|
+
jawOpen: lamFrame[24]?.toFixed(3),
|
|
3367
|
+
mouthClose: lamFrame[26]?.toFixed(3),
|
|
3368
|
+
browInnerUp: lamFrame[2]?.toFixed(3),
|
|
3369
|
+
browDownL: lamFrame[0]?.toFixed(3),
|
|
3370
|
+
browOuterUpL: lamFrame[3]?.toFixed(3),
|
|
3371
|
+
currentTime: currentTime.toFixed(3),
|
|
3372
|
+
queuedFrames: this.processor.queuedFrameCount
|
|
3373
|
+
});
|
|
3374
|
+
}
|
|
3375
|
+
if (this.frameLoopCount % 60 === 0) {
|
|
3376
|
+
logger4.info("Frame loop heartbeat", {
|
|
3377
|
+
frameLoopCount: this.frameLoopCount,
|
|
3378
|
+
currentTime: currentTime.toFixed(3),
|
|
3379
|
+
playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
|
|
3380
|
+
queuedFrames: this.processor.queuedFrameCount,
|
|
3381
|
+
playbackStarted: this.playbackStarted,
|
|
3382
|
+
msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
|
|
3383
|
+
processorFill: this.processor.fillLevel.toFixed(2)
|
|
3384
|
+
});
|
|
3385
|
+
}
|
|
3386
|
+
if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
|
|
3387
|
+
if (!this.staleWarningEmitted) {
|
|
3388
|
+
this.staleWarningEmitted = true;
|
|
3389
|
+
logger4.warn("A2E stalled \u2014 no new inference frames", {
|
|
3390
|
+
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3391
|
+
queuedFrames: this.processor.queuedFrameCount
|
|
3392
|
+
});
|
|
3899
3393
|
}
|
|
3900
|
-
|
|
3901
|
-
|
|
3394
|
+
}
|
|
3395
|
+
if (lamFrame) {
|
|
3396
|
+
const scaled = this.applyProfile(lamFrame);
|
|
3902
3397
|
const fullFrame = {
|
|
3903
|
-
blendshapes:
|
|
3904
|
-
|
|
3905
|
-
emotionBlendshapes,
|
|
3906
|
-
emotion: emotionFrame,
|
|
3398
|
+
blendshapes: scaled,
|
|
3399
|
+
rawBlendshapes: lamFrame,
|
|
3907
3400
|
timestamp: currentTime
|
|
3908
3401
|
};
|
|
3909
3402
|
this.emit("full_frame_ready", fullFrame);
|
|
3910
3403
|
this.emit("lam_frame_ready", lamFrame);
|
|
3911
|
-
if (emotionFrame) {
|
|
3912
|
-
this.emit("emotion_frame_ready", emotionFrame);
|
|
3913
|
-
}
|
|
3914
|
-
} else if (this.playbackStarted && !this.lastKnownLamFrame) {
|
|
3915
|
-
const { frame: emotionFrame, energy } = this.getEmotionFrame();
|
|
3916
|
-
if (emotionFrame && energy > 0.05) {
|
|
3917
|
-
const startupFrame = new Float32Array(52);
|
|
3918
|
-
const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
|
|
3919
|
-
this.emit("full_frame_ready", {
|
|
3920
|
-
blendshapes: merged,
|
|
3921
|
-
lamBlendshapes: startupFrame,
|
|
3922
|
-
emotionBlendshapes,
|
|
3923
|
-
emotion: emotionFrame,
|
|
3924
|
-
timestamp: currentTime
|
|
3925
|
-
});
|
|
3926
|
-
}
|
|
3927
|
-
}
|
|
3928
|
-
if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
|
|
3929
|
-
this.staleWarningEmitted = true;
|
|
3930
|
-
logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
|
|
3931
|
-
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3932
|
-
queuedFrames: this.lamPipeline.queuedFrameCount
|
|
3933
|
-
});
|
|
3934
3404
|
}
|
|
3935
3405
|
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
3936
3406
|
};
|
|
@@ -3945,7 +3415,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3945
3415
|
const chunk = new Uint8Array(remaining);
|
|
3946
3416
|
await this.onAudioChunk(chunk);
|
|
3947
3417
|
}
|
|
3948
|
-
await this.
|
|
3418
|
+
await this.processor.flush();
|
|
3949
3419
|
}
|
|
3950
3420
|
/**
|
|
3951
3421
|
* Stop playback immediately with smooth fade-out
|
|
@@ -3954,12 +3424,8 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3954
3424
|
this.stopMonitoring();
|
|
3955
3425
|
await this.scheduler.cancelAll(fadeOutMs);
|
|
3956
3426
|
this.coalescer.reset();
|
|
3957
|
-
this.
|
|
3427
|
+
this.processor.reset();
|
|
3958
3428
|
this.playbackStarted = false;
|
|
3959
|
-
this.lastEmotionFrame = null;
|
|
3960
|
-
this.currentAudioEnergy = 0;
|
|
3961
|
-
this.emotionMapper.reset();
|
|
3962
|
-
this.energyAnalyzer.reset();
|
|
3963
3429
|
this.lastNewFrameTime = 0;
|
|
3964
3430
|
this.lastKnownLamFrame = null;
|
|
3965
3431
|
this.staleWarningEmitted = false;
|
|
@@ -3973,7 +3439,7 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3973
3439
|
clearInterval(this.monitorInterval);
|
|
3974
3440
|
}
|
|
3975
3441
|
this.monitorInterval = setInterval(() => {
|
|
3976
|
-
if (this.scheduler.isComplete() && this.
|
|
3442
|
+
if (this.scheduler.isComplete() && this.processor.queuedFrameCount === 0) {
|
|
3977
3443
|
this.emit("playback_complete", void 0);
|
|
3978
3444
|
this.stopMonitoring();
|
|
3979
3445
|
}
|
|
@@ -3999,20 +3465,12 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
3999
3465
|
return {
|
|
4000
3466
|
playbackStarted: this.playbackStarted,
|
|
4001
3467
|
coalescerFill: this.coalescer.fillLevel,
|
|
4002
|
-
|
|
4003
|
-
|
|
4004
|
-
emotionLabel: this.lastEmotionFrame?.emotion ?? null,
|
|
4005
|
-
currentAudioEnergy: this.currentAudioEnergy,
|
|
3468
|
+
processorFill: this.processor.fillLevel,
|
|
3469
|
+
queuedFrames: this.processor.queuedFrameCount,
|
|
4006
3470
|
currentTime: this.scheduler.getCurrentTime(),
|
|
4007
3471
|
playbackEndTime: this.scheduler.getPlaybackEndTime()
|
|
4008
3472
|
};
|
|
4009
3473
|
}
|
|
4010
|
-
/**
|
|
4011
|
-
* Check if an explicit emotion label is currently set
|
|
4012
|
-
*/
|
|
4013
|
-
get hasEmotionLabel() {
|
|
4014
|
-
return this.lastEmotionFrame !== null;
|
|
4015
|
-
}
|
|
4016
3474
|
/**
|
|
4017
3475
|
* Cleanup resources
|
|
4018
3476
|
*/
|
|
@@ -4020,13 +3478,9 @@ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
|
4020
3478
|
this.stopMonitoring();
|
|
4021
3479
|
this.scheduler.dispose();
|
|
4022
3480
|
this.coalescer.reset();
|
|
4023
|
-
this.
|
|
4024
|
-
this.lastEmotionFrame = null;
|
|
4025
|
-
this.currentAudioEnergy = 0;
|
|
3481
|
+
this.processor.dispose();
|
|
4026
3482
|
}
|
|
4027
3483
|
};
|
|
4028
|
-
_FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
|
|
4029
|
-
var FullFacePipeline = _FullFacePipeline;
|
|
4030
3484
|
|
|
4031
3485
|
// src/inference/kaldiFbank.ts
|
|
4032
3486
|
function fft(re, im) {
|
|
@@ -4313,7 +3767,7 @@ function ctcGreedyDecode(logits, seqLen, vocabSize, tokenMap) {
|
|
|
4313
3767
|
}
|
|
4314
3768
|
|
|
4315
3769
|
// src/inference/SenseVoiceInference.ts
|
|
4316
|
-
var
|
|
3770
|
+
var logger5 = createLogger("SenseVoice");
|
|
4317
3771
|
var _SenseVoiceInference = class _SenseVoiceInference {
|
|
4318
3772
|
constructor(config) {
|
|
4319
3773
|
this.session = null;
|
|
@@ -4366,26 +3820,26 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4366
3820
|
"model.backend_requested": this.config.backend
|
|
4367
3821
|
});
|
|
4368
3822
|
try {
|
|
4369
|
-
|
|
3823
|
+
logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
|
|
4370
3824
|
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
|
|
4371
3825
|
this.ort = ort;
|
|
4372
3826
|
this._backend = backend;
|
|
4373
|
-
|
|
4374
|
-
|
|
3827
|
+
logger5.info("ONNX Runtime loaded", { backend: this._backend });
|
|
3828
|
+
logger5.debug("Fetching tokens vocabulary", { tokensUrl: this.config.tokensUrl });
|
|
4375
3829
|
const tokensResponse = await fetch(this.config.tokensUrl);
|
|
4376
3830
|
if (!tokensResponse.ok) {
|
|
4377
3831
|
throw new Error(`Failed to fetch tokens.txt: ${tokensResponse.status} ${tokensResponse.statusText}`);
|
|
4378
3832
|
}
|
|
4379
3833
|
const tokensText = await tokensResponse.text();
|
|
4380
3834
|
this.tokenMap = parseTokensFile(tokensText);
|
|
4381
|
-
|
|
3835
|
+
logger5.debug("Tokens loaded", { vocabSize: this.tokenMap.size });
|
|
4382
3836
|
const sessionOptions = getSessionOptions(this._backend);
|
|
4383
3837
|
if (this._backend === "webgpu") {
|
|
4384
3838
|
sessionOptions.graphOptimizationLevel = "basic";
|
|
4385
3839
|
}
|
|
4386
3840
|
let isCached = false;
|
|
4387
3841
|
if (isIOS()) {
|
|
4388
|
-
|
|
3842
|
+
logger5.info("iOS: passing model URL directly to ORT (low-memory path)", {
|
|
4389
3843
|
modelUrl: this.config.modelUrl
|
|
4390
3844
|
});
|
|
4391
3845
|
this.session = await this.ort.InferenceSession.create(
|
|
@@ -4397,14 +3851,14 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4397
3851
|
isCached = await cache.has(this.config.modelUrl);
|
|
4398
3852
|
let modelBuffer;
|
|
4399
3853
|
if (isCached) {
|
|
4400
|
-
|
|
3854
|
+
logger5.debug("Loading model from cache", { modelUrl: this.config.modelUrl });
|
|
4401
3855
|
modelBuffer = await cache.get(this.config.modelUrl);
|
|
4402
3856
|
onProgress?.(modelBuffer.byteLength, modelBuffer.byteLength);
|
|
4403
3857
|
} else {
|
|
4404
|
-
|
|
3858
|
+
logger5.debug("Fetching and caching model", { modelUrl: this.config.modelUrl });
|
|
4405
3859
|
modelBuffer = await fetchWithCache(this.config.modelUrl, onProgress);
|
|
4406
3860
|
}
|
|
4407
|
-
|
|
3861
|
+
logger5.debug("Creating ONNX session", {
|
|
4408
3862
|
size: formatBytes(modelBuffer.byteLength),
|
|
4409
3863
|
backend: this._backend
|
|
4410
3864
|
});
|
|
@@ -4417,15 +3871,15 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4417
3871
|
const cmvn = parseCMVNFromMetadata(metadata.neg_mean, metadata.inv_stddev);
|
|
4418
3872
|
this.negMean = cmvn.negMean;
|
|
4419
3873
|
this.invStddev = cmvn.invStddev;
|
|
4420
|
-
|
|
3874
|
+
logger5.debug("CMVN loaded from model metadata", { dim: this.negMean.length });
|
|
4421
3875
|
} else {
|
|
4422
|
-
|
|
3876
|
+
logger5.warn("CMVN not found in model metadata \u2014 features will not be normalized");
|
|
4423
3877
|
}
|
|
4424
3878
|
} catch (cmvnErr) {
|
|
4425
|
-
|
|
3879
|
+
logger5.warn("Failed to read CMVN from model metadata", { error: cmvnErr });
|
|
4426
3880
|
}
|
|
4427
3881
|
const loadTimeMs = performance.now() - startTime;
|
|
4428
|
-
|
|
3882
|
+
logger5.info("SenseVoice model loaded", {
|
|
4429
3883
|
backend: this._backend,
|
|
4430
3884
|
loadTimeMs: Math.round(loadTimeMs),
|
|
4431
3885
|
vocabSize: this.tokenMap.size,
|
|
@@ -4536,7 +3990,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4536
3990
|
const vocabSize = logitsDims[2];
|
|
4537
3991
|
const decoded = ctcGreedyDecode(logitsData, seqLen, vocabSize, this.tokenMap);
|
|
4538
3992
|
const inferenceTimeMs = performance.now() - startTime;
|
|
4539
|
-
|
|
3993
|
+
logger5.trace("Transcription complete", {
|
|
4540
3994
|
text: decoded.text.substring(0, 50),
|
|
4541
3995
|
language: decoded.language,
|
|
4542
3996
|
emotion: decoded.emotion,
|
|
@@ -4574,7 +4028,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4574
4028
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
4575
4029
|
if (errMsg.includes("timed out")) {
|
|
4576
4030
|
this.poisoned = true;
|
|
4577
|
-
|
|
4031
|
+
logger5.error("CRITICAL: Inference session timed out \u2014 SenseVoice is dead. Page reload required.", {
|
|
4578
4032
|
backend: this._backend,
|
|
4579
4033
|
timeoutMs: _SenseVoiceInference.INFERENCE_TIMEOUT_MS
|
|
4580
4034
|
});
|
|
@@ -4582,7 +4036,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4582
4036
|
const oomError = new Error(
|
|
4583
4037
|
`SenseVoice inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
|
|
4584
4038
|
);
|
|
4585
|
-
|
|
4039
|
+
logger5.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
|
|
4586
4040
|
pointer: `0x${err.toString(16)}`,
|
|
4587
4041
|
backend: this._backend
|
|
4588
4042
|
});
|
|
@@ -4595,7 +4049,7 @@ var _SenseVoiceInference = class _SenseVoiceInference {
|
|
|
4595
4049
|
reject(oomError);
|
|
4596
4050
|
return;
|
|
4597
4051
|
} else {
|
|
4598
|
-
|
|
4052
|
+
logger5.error("Inference failed", { error: errMsg, backend: this._backend });
|
|
4599
4053
|
}
|
|
4600
4054
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
4601
4055
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
@@ -4624,7 +4078,7 @@ _SenseVoiceInference.INFERENCE_TIMEOUT_MS = 1e4;
|
|
|
4624
4078
|
var SenseVoiceInference = _SenseVoiceInference;
|
|
4625
4079
|
|
|
4626
4080
|
// src/inference/SenseVoiceWorker.ts
|
|
4627
|
-
var
|
|
4081
|
+
var logger6 = createLogger("SenseVoiceWorker");
|
|
4628
4082
|
var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
4629
4083
|
var LOAD_TIMEOUT_MS = 3e4;
|
|
4630
4084
|
var INFERENCE_TIMEOUT_MS = 1e4;
|
|
@@ -5357,7 +4811,7 @@ var SenseVoiceWorker = class {
|
|
|
5357
4811
|
this.handleWorkerMessage(event.data);
|
|
5358
4812
|
};
|
|
5359
4813
|
worker.onerror = (error) => {
|
|
5360
|
-
|
|
4814
|
+
logger6.error("Worker error", { error: error.message });
|
|
5361
4815
|
for (const [, resolver] of this.pendingResolvers) {
|
|
5362
4816
|
resolver.reject(new Error(`Worker error: ${error.message}`));
|
|
5363
4817
|
}
|
|
@@ -5437,9 +4891,9 @@ var SenseVoiceWorker = class {
|
|
|
5437
4891
|
"model.language": this.config.language
|
|
5438
4892
|
});
|
|
5439
4893
|
try {
|
|
5440
|
-
|
|
4894
|
+
logger6.info("Creating SenseVoice worker...");
|
|
5441
4895
|
this.worker = this.createWorker();
|
|
5442
|
-
|
|
4896
|
+
logger6.info("Loading model in worker...", {
|
|
5443
4897
|
modelUrl: this.config.modelUrl,
|
|
5444
4898
|
tokensUrl: this.config.tokensUrl,
|
|
5445
4899
|
language: this.config.language,
|
|
@@ -5461,7 +4915,7 @@ var SenseVoiceWorker = class {
|
|
|
5461
4915
|
this._isLoaded = true;
|
|
5462
4916
|
const loadTimeMs = performance.now() - startTime;
|
|
5463
4917
|
onProgress?.(1, 1);
|
|
5464
|
-
|
|
4918
|
+
logger6.info("SenseVoice worker loaded successfully", {
|
|
5465
4919
|
backend: "wasm",
|
|
5466
4920
|
loadTimeMs: Math.round(loadTimeMs),
|
|
5467
4921
|
workerLoadTimeMs: Math.round(result.loadTimeMs),
|
|
@@ -5540,7 +4994,7 @@ var SenseVoiceWorker = class {
|
|
|
5540
4994
|
INFERENCE_TIMEOUT_MS
|
|
5541
4995
|
);
|
|
5542
4996
|
const totalTimeMs = performance.now() - startTime;
|
|
5543
|
-
|
|
4997
|
+
logger6.trace("Worker transcription complete", {
|
|
5544
4998
|
text: result.text.substring(0, 50),
|
|
5545
4999
|
language: result.language,
|
|
5546
5000
|
emotion: result.emotion,
|
|
@@ -5576,11 +5030,11 @@ var SenseVoiceWorker = class {
|
|
|
5576
5030
|
} catch (err) {
|
|
5577
5031
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
5578
5032
|
if (errMsg.includes("timed out")) {
|
|
5579
|
-
|
|
5033
|
+
logger6.error("CRITICAL: Worker inference timed out \u2014 SenseVoice worker is dead. Page reload required.", {
|
|
5580
5034
|
timeoutMs: INFERENCE_TIMEOUT_MS
|
|
5581
5035
|
});
|
|
5582
5036
|
} else {
|
|
5583
|
-
|
|
5037
|
+
logger6.error("Worker inference failed", { error: errMsg });
|
|
5584
5038
|
}
|
|
5585
5039
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
5586
5040
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
@@ -5618,7 +5072,7 @@ var SenseVoiceWorker = class {
|
|
|
5618
5072
|
};
|
|
5619
5073
|
|
|
5620
5074
|
// src/inference/UnifiedInferenceWorker.ts
|
|
5621
|
-
var
|
|
5075
|
+
var logger7 = createLogger("UnifiedInferenceWorker");
|
|
5622
5076
|
var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
5623
5077
|
var INIT_TIMEOUT_MS = 15e3;
|
|
5624
5078
|
var SV_LOAD_TIMEOUT_MS = 3e4;
|
|
@@ -6314,7 +5768,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6314
5768
|
const telemetry = getTelemetry();
|
|
6315
5769
|
const span = telemetry?.startSpan("UnifiedInferenceWorker.init");
|
|
6316
5770
|
try {
|
|
6317
|
-
|
|
5771
|
+
logger7.info("Creating unified inference worker...");
|
|
6318
5772
|
this.worker = this.createWorker();
|
|
6319
5773
|
await this.sendMessage(
|
|
6320
5774
|
{ type: "init", wasmPaths: WASM_CDN_PATH3, isIOS: isIOS() },
|
|
@@ -6323,7 +5777,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6323
5777
|
);
|
|
6324
5778
|
this.initialized = true;
|
|
6325
5779
|
const loadTimeMs = performance.now() - startTime;
|
|
6326
|
-
|
|
5780
|
+
logger7.info("Unified worker initialized", { loadTimeMs: Math.round(loadTimeMs) });
|
|
6327
5781
|
span?.setAttributes({ "worker.init_time_ms": loadTimeMs });
|
|
6328
5782
|
span?.end();
|
|
6329
5783
|
} catch (error) {
|
|
@@ -6377,8 +5831,8 @@ var UnifiedInferenceWorker = class {
|
|
|
6377
5831
|
if (!this.worker) return;
|
|
6378
5832
|
await this.sendMessage({ type: "sv:dispose" }, "sv:disposed", DISPOSE_TIMEOUT_MS);
|
|
6379
5833
|
}
|
|
6380
|
-
// ── Wav2ArkitCpu (
|
|
6381
|
-
async
|
|
5834
|
+
// ── Wav2ArkitCpu (A2E) ──────────────────────────────────────────────
|
|
5835
|
+
async loadA2E(config) {
|
|
6382
5836
|
this.assertReady();
|
|
6383
5837
|
const startTime = performance.now();
|
|
6384
5838
|
const result = await this.sendMessage(
|
|
@@ -6399,7 +5853,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6399
5853
|
outputNames: result.outputNames
|
|
6400
5854
|
};
|
|
6401
5855
|
}
|
|
6402
|
-
async
|
|
5856
|
+
async inferA2E(audio) {
|
|
6403
5857
|
this.assertReady();
|
|
6404
5858
|
return this.sendMessage(
|
|
6405
5859
|
{ type: "cpu:infer", audio },
|
|
@@ -6407,7 +5861,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6407
5861
|
CPU_INFER_TIMEOUT_MS
|
|
6408
5862
|
);
|
|
6409
5863
|
}
|
|
6410
|
-
async
|
|
5864
|
+
async disposeA2E() {
|
|
6411
5865
|
if (!this.worker) return;
|
|
6412
5866
|
await this.sendMessage({ type: "cpu:dispose" }, "cpu:disposed", DISPOSE_TIMEOUT_MS);
|
|
6413
5867
|
}
|
|
@@ -6497,7 +5951,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6497
5951
|
this.handleWorkerMessage(event.data);
|
|
6498
5952
|
};
|
|
6499
5953
|
worker.onerror = (error) => {
|
|
6500
|
-
|
|
5954
|
+
logger7.error("Unified worker error", { error: error.message });
|
|
6501
5955
|
this.rejectAllPending(`Worker error: ${error.message}`);
|
|
6502
5956
|
};
|
|
6503
5957
|
return worker;
|
|
@@ -6511,7 +5965,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6511
5965
|
this.pendingRequests.delete(requestId);
|
|
6512
5966
|
pending.reject(new Error(data.error));
|
|
6513
5967
|
} else {
|
|
6514
|
-
|
|
5968
|
+
logger7.error("Worker broadcast error", { error: data.error });
|
|
6515
5969
|
this.rejectAllPending(data.error);
|
|
6516
5970
|
}
|
|
6517
5971
|
return;
|
|
@@ -6533,7 +5987,7 @@ var UnifiedInferenceWorker = class {
|
|
|
6533
5987
|
const timeout = setTimeout(() => {
|
|
6534
5988
|
this.pendingRequests.delete(requestId);
|
|
6535
5989
|
this.poisoned = true;
|
|
6536
|
-
|
|
5990
|
+
logger7.error("CRITICAL: Worker operation timed out \u2014 worker is dead", {
|
|
6537
5991
|
type: message.type,
|
|
6538
5992
|
timeoutMs
|
|
6539
5993
|
});
|
|
@@ -6599,7 +6053,7 @@ var SenseVoiceUnifiedAdapter = class {
|
|
|
6599
6053
|
});
|
|
6600
6054
|
this._isLoaded = true;
|
|
6601
6055
|
onProgress?.(1, 1);
|
|
6602
|
-
|
|
6056
|
+
logger7.info("SenseVoice loaded via unified worker", {
|
|
6603
6057
|
backend: "wasm",
|
|
6604
6058
|
loadTimeMs: Math.round(result.loadTimeMs),
|
|
6605
6059
|
vocabSize: result.vocabSize
|
|
@@ -6640,6 +6094,7 @@ var SenseVoiceUnifiedAdapter = class {
|
|
|
6640
6094
|
var Wav2ArkitCpuUnifiedAdapter = class {
|
|
6641
6095
|
constructor(worker, config) {
|
|
6642
6096
|
this.modelId = "wav2arkit_cpu";
|
|
6097
|
+
this.chunkSize = 16e3;
|
|
6643
6098
|
this._isLoaded = false;
|
|
6644
6099
|
this.inferenceQueue = Promise.resolve();
|
|
6645
6100
|
this.worker = worker;
|
|
@@ -6658,12 +6113,12 @@ var Wav2ArkitCpuUnifiedAdapter = class {
|
|
|
6658
6113
|
});
|
|
6659
6114
|
try {
|
|
6660
6115
|
const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
|
|
6661
|
-
const result = await this.worker.
|
|
6116
|
+
const result = await this.worker.loadA2E({
|
|
6662
6117
|
modelUrl: this.config.modelUrl,
|
|
6663
6118
|
externalDataUrl: externalDataUrl || null
|
|
6664
6119
|
});
|
|
6665
6120
|
this._isLoaded = true;
|
|
6666
|
-
|
|
6121
|
+
logger7.info("Wav2ArkitCpu loaded via unified worker", {
|
|
6667
6122
|
backend: "wasm",
|
|
6668
6123
|
loadTimeMs: Math.round(result.loadTimeMs)
|
|
6669
6124
|
});
|
|
@@ -6690,7 +6145,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
|
|
|
6690
6145
|
});
|
|
6691
6146
|
try {
|
|
6692
6147
|
const startTime = performance.now();
|
|
6693
|
-
const result = await this.worker.
|
|
6148
|
+
const result = await this.worker.inferA2E(audioCopy);
|
|
6694
6149
|
const inferenceTimeMs = performance.now() - startTime;
|
|
6695
6150
|
const flatBuffer = result.blendshapes;
|
|
6696
6151
|
const { numFrames, numBlendshapes } = result;
|
|
@@ -6713,7 +6168,7 @@ var Wav2ArkitCpuUnifiedAdapter = class {
|
|
|
6713
6168
|
}
|
|
6714
6169
|
async dispose() {
|
|
6715
6170
|
if (this._isLoaded) {
|
|
6716
|
-
await this.worker.
|
|
6171
|
+
await this.worker.disposeA2E();
|
|
6717
6172
|
this._isLoaded = false;
|
|
6718
6173
|
}
|
|
6719
6174
|
}
|
|
@@ -6769,7 +6224,7 @@ var SileroVADUnifiedAdapter = class {
|
|
|
6769
6224
|
sampleRate: this.config.sampleRate
|
|
6770
6225
|
});
|
|
6771
6226
|
this._isLoaded = true;
|
|
6772
|
-
|
|
6227
|
+
logger7.info("SileroVAD loaded via unified worker", {
|
|
6773
6228
|
backend: "wasm",
|
|
6774
6229
|
loadTimeMs: Math.round(result.loadTimeMs),
|
|
6775
6230
|
sampleRate: this.config.sampleRate,
|
|
@@ -6850,10 +6305,10 @@ var SileroVADUnifiedAdapter = class {
|
|
|
6850
6305
|
};
|
|
6851
6306
|
|
|
6852
6307
|
// src/inference/createSenseVoice.ts
|
|
6853
|
-
var
|
|
6308
|
+
var logger8 = createLogger("createSenseVoice");
|
|
6854
6309
|
function createSenseVoice(config) {
|
|
6855
6310
|
if (config.unifiedWorker) {
|
|
6856
|
-
|
|
6311
|
+
logger8.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
|
|
6857
6312
|
return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
|
|
6858
6313
|
modelUrl: config.modelUrl,
|
|
6859
6314
|
tokensUrl: config.tokensUrl,
|
|
@@ -6866,7 +6321,7 @@ function createSenseVoice(config) {
|
|
|
6866
6321
|
if (!SenseVoiceWorker.isSupported()) {
|
|
6867
6322
|
throw new Error("Web Workers are not supported in this environment");
|
|
6868
6323
|
}
|
|
6869
|
-
|
|
6324
|
+
logger8.info("Creating SenseVoiceWorker (off-main-thread)");
|
|
6870
6325
|
return new SenseVoiceWorker({
|
|
6871
6326
|
modelUrl: config.modelUrl,
|
|
6872
6327
|
tokensUrl: config.tokensUrl,
|
|
@@ -6875,7 +6330,7 @@ function createSenseVoice(config) {
|
|
|
6875
6330
|
});
|
|
6876
6331
|
}
|
|
6877
6332
|
if (useWorker === false) {
|
|
6878
|
-
|
|
6333
|
+
logger8.info("Creating SenseVoiceInference (main thread)");
|
|
6879
6334
|
return new SenseVoiceInference({
|
|
6880
6335
|
modelUrl: config.modelUrl,
|
|
6881
6336
|
tokensUrl: config.tokensUrl,
|
|
@@ -6884,7 +6339,7 @@ function createSenseVoice(config) {
|
|
|
6884
6339
|
});
|
|
6885
6340
|
}
|
|
6886
6341
|
if (SenseVoiceWorker.isSupported() && !isIOS()) {
|
|
6887
|
-
|
|
6342
|
+
logger8.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
|
|
6888
6343
|
return new SenseVoiceWorker({
|
|
6889
6344
|
modelUrl: config.modelUrl,
|
|
6890
6345
|
tokensUrl: config.tokensUrl,
|
|
@@ -6892,7 +6347,7 @@ function createSenseVoice(config) {
|
|
|
6892
6347
|
textNorm: config.textNorm
|
|
6893
6348
|
});
|
|
6894
6349
|
}
|
|
6895
|
-
|
|
6350
|
+
logger8.info("Auto-detected: creating SenseVoiceInference (main thread)", {
|
|
6896
6351
|
reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
|
|
6897
6352
|
});
|
|
6898
6353
|
return new SenseVoiceInference({
|
|
@@ -6904,10 +6359,11 @@ function createSenseVoice(config) {
|
|
|
6904
6359
|
}
|
|
6905
6360
|
|
|
6906
6361
|
// src/inference/Wav2ArkitCpuInference.ts
|
|
6907
|
-
var
|
|
6362
|
+
var logger9 = createLogger("Wav2ArkitCpu");
|
|
6908
6363
|
var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
6909
6364
|
constructor(config) {
|
|
6910
6365
|
this.modelId = "wav2arkit_cpu";
|
|
6366
|
+
this.chunkSize = 16e3;
|
|
6911
6367
|
this.session = null;
|
|
6912
6368
|
this.ort = null;
|
|
6913
6369
|
this._backend = "wasm";
|
|
@@ -6945,16 +6401,16 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
6945
6401
|
});
|
|
6946
6402
|
try {
|
|
6947
6403
|
const preference = this.config.backend || "wasm";
|
|
6948
|
-
|
|
6404
|
+
logger9.info("Loading ONNX Runtime...", { preference });
|
|
6949
6405
|
const { ort, backend } = await getOnnxRuntimeForPreference(preference);
|
|
6950
6406
|
this.ort = ort;
|
|
6951
6407
|
this._backend = backend;
|
|
6952
|
-
|
|
6408
|
+
logger9.info("ONNX Runtime loaded", { backend: this._backend });
|
|
6953
6409
|
const modelUrl = this.config.modelUrl;
|
|
6954
6410
|
const dataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${modelUrl}.data` : null;
|
|
6955
6411
|
const sessionOptions = getSessionOptions(this._backend);
|
|
6956
6412
|
if (isIOS()) {
|
|
6957
|
-
|
|
6413
|
+
logger9.info("iOS: passing model URLs directly to ORT (low-memory path)", {
|
|
6958
6414
|
modelUrl,
|
|
6959
6415
|
dataUrl
|
|
6960
6416
|
});
|
|
@@ -6972,15 +6428,15 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
6972
6428
|
const isCached = await cache.has(modelUrl);
|
|
6973
6429
|
let modelBuffer;
|
|
6974
6430
|
if (isCached) {
|
|
6975
|
-
|
|
6431
|
+
logger9.debug("Loading model from cache", { modelUrl });
|
|
6976
6432
|
modelBuffer = await cache.get(modelUrl);
|
|
6977
6433
|
if (!modelBuffer) {
|
|
6978
|
-
|
|
6434
|
+
logger9.warn("Cache corruption detected, clearing and retrying", { modelUrl });
|
|
6979
6435
|
await cache.delete(modelUrl);
|
|
6980
6436
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
6981
6437
|
}
|
|
6982
6438
|
} else {
|
|
6983
|
-
|
|
6439
|
+
logger9.debug("Fetching and caching model graph", { modelUrl });
|
|
6984
6440
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
6985
6441
|
}
|
|
6986
6442
|
if (!modelBuffer) {
|
|
@@ -6991,31 +6447,31 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
6991
6447
|
try {
|
|
6992
6448
|
const isDataCached = await cache.has(dataUrl);
|
|
6993
6449
|
if (isDataCached) {
|
|
6994
|
-
|
|
6450
|
+
logger9.debug("Loading external data from cache", { dataUrl });
|
|
6995
6451
|
externalDataBuffer = await cache.get(dataUrl);
|
|
6996
6452
|
if (!externalDataBuffer) {
|
|
6997
|
-
|
|
6453
|
+
logger9.warn("Cache corruption for external data, retrying", { dataUrl });
|
|
6998
6454
|
await cache.delete(dataUrl);
|
|
6999
6455
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
7000
6456
|
}
|
|
7001
6457
|
} else {
|
|
7002
|
-
|
|
6458
|
+
logger9.info("Fetching external model data", {
|
|
7003
6459
|
dataUrl,
|
|
7004
6460
|
note: "This may be a large download (400MB+)"
|
|
7005
6461
|
});
|
|
7006
6462
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
7007
6463
|
}
|
|
7008
|
-
|
|
6464
|
+
logger9.info("External data loaded", {
|
|
7009
6465
|
size: formatBytes(externalDataBuffer.byteLength)
|
|
7010
6466
|
});
|
|
7011
6467
|
} catch (err) {
|
|
7012
|
-
|
|
6468
|
+
logger9.debug("No external data file found (single-file model)", {
|
|
7013
6469
|
dataUrl,
|
|
7014
6470
|
error: err.message
|
|
7015
6471
|
});
|
|
7016
6472
|
}
|
|
7017
6473
|
}
|
|
7018
|
-
|
|
6474
|
+
logger9.debug("Creating ONNX session", {
|
|
7019
6475
|
graphSize: formatBytes(modelBuffer.byteLength),
|
|
7020
6476
|
externalDataSize: externalDataBuffer ? formatBytes(externalDataBuffer.byteLength) : "none",
|
|
7021
6477
|
backend: this._backend
|
|
@@ -7031,7 +6487,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7031
6487
|
this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
|
|
7032
6488
|
}
|
|
7033
6489
|
const loadTimeMs = performance.now() - startTime;
|
|
7034
|
-
|
|
6490
|
+
logger9.info("Model loaded successfully", {
|
|
7035
6491
|
backend: this._backend,
|
|
7036
6492
|
loadTimeMs: Math.round(loadTimeMs),
|
|
7037
6493
|
inputs: this.session.inputNames,
|
|
@@ -7047,12 +6503,12 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7047
6503
|
model: "wav2arkit_cpu",
|
|
7048
6504
|
backend: this._backend
|
|
7049
6505
|
});
|
|
7050
|
-
|
|
6506
|
+
logger9.debug("Running warmup inference");
|
|
7051
6507
|
const warmupStart = performance.now();
|
|
7052
6508
|
const silentAudio = new Float32Array(16e3);
|
|
7053
6509
|
await this.infer(silentAudio);
|
|
7054
6510
|
const warmupTimeMs = performance.now() - warmupStart;
|
|
7055
|
-
|
|
6511
|
+
logger9.info("Warmup inference complete", {
|
|
7056
6512
|
warmupTimeMs: Math.round(warmupTimeMs),
|
|
7057
6513
|
backend: this._backend
|
|
7058
6514
|
});
|
|
@@ -7139,7 +6595,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7139
6595
|
const symmetrized = symmetrizeBlendshapes(rawFrame);
|
|
7140
6596
|
blendshapes.push(symmetrized);
|
|
7141
6597
|
}
|
|
7142
|
-
|
|
6598
|
+
logger9.trace("Inference completed", {
|
|
7143
6599
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
7144
6600
|
numFrames,
|
|
7145
6601
|
inputSamples
|
|
@@ -7167,7 +6623,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7167
6623
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
7168
6624
|
if (errMsg.includes("timed out")) {
|
|
7169
6625
|
this.poisoned = true;
|
|
7170
|
-
|
|
6626
|
+
logger9.error("CRITICAL: Inference session timed out \u2014 Wav2ArkitCpu is dead. Page reload required.", {
|
|
7171
6627
|
backend: this._backend,
|
|
7172
6628
|
timeoutMs: _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS
|
|
7173
6629
|
});
|
|
@@ -7175,7 +6631,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7175
6631
|
const oomError = new Error(
|
|
7176
6632
|
`Wav2ArkitCpu inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reloading the page.`
|
|
7177
6633
|
);
|
|
7178
|
-
|
|
6634
|
+
logger9.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
|
|
7179
6635
|
pointer: `0x${err.toString(16)}`,
|
|
7180
6636
|
backend: this._backend
|
|
7181
6637
|
});
|
|
@@ -7188,7 +6644,7 @@ var _Wav2ArkitCpuInference = class _Wav2ArkitCpuInference {
|
|
|
7188
6644
|
reject(oomError);
|
|
7189
6645
|
return;
|
|
7190
6646
|
} else {
|
|
7191
|
-
|
|
6647
|
+
logger9.error("Inference failed", { error: errMsg, backend: this._backend });
|
|
7192
6648
|
}
|
|
7193
6649
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
7194
6650
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
@@ -7215,7 +6671,7 @@ _Wav2ArkitCpuInference.INFERENCE_TIMEOUT_MS = 5e3;
|
|
|
7215
6671
|
var Wav2ArkitCpuInference = _Wav2ArkitCpuInference;
|
|
7216
6672
|
|
|
7217
6673
|
// src/inference/Wav2ArkitCpuWorker.ts
|
|
7218
|
-
var
|
|
6674
|
+
var logger10 = createLogger("Wav2ArkitCpuWorker");
|
|
7219
6675
|
var WASM_CDN_PATH4 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
7220
6676
|
var LOAD_TIMEOUT_MS2 = 6e4;
|
|
7221
6677
|
var INFERENCE_TIMEOUT_MS2 = 5e3;
|
|
@@ -7461,6 +6917,7 @@ self.onerror = function(err) {
|
|
|
7461
6917
|
var Wav2ArkitCpuWorker = class {
|
|
7462
6918
|
constructor(config) {
|
|
7463
6919
|
this.modelId = "wav2arkit_cpu";
|
|
6920
|
+
this.chunkSize = 16e3;
|
|
7464
6921
|
this.worker = null;
|
|
7465
6922
|
this.isLoading = false;
|
|
7466
6923
|
this._isLoaded = false;
|
|
@@ -7495,7 +6952,7 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7495
6952
|
this.handleWorkerMessage(event.data);
|
|
7496
6953
|
};
|
|
7497
6954
|
worker.onerror = (error) => {
|
|
7498
|
-
|
|
6955
|
+
logger10.error("Worker error", { error: error.message });
|
|
7499
6956
|
for (const [, resolver] of this.pendingResolvers) {
|
|
7500
6957
|
resolver.reject(new Error(`Worker error: ${error.message}`));
|
|
7501
6958
|
}
|
|
@@ -7571,10 +7028,10 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7571
7028
|
"model.backend_requested": "wasm"
|
|
7572
7029
|
});
|
|
7573
7030
|
try {
|
|
7574
|
-
|
|
7031
|
+
logger10.info("Creating wav2arkit_cpu worker...");
|
|
7575
7032
|
this.worker = this.createWorker();
|
|
7576
7033
|
const externalDataUrl = this.config.externalDataUrl !== false ? this.config.externalDataUrl || `${this.config.modelUrl}.data` : null;
|
|
7577
|
-
|
|
7034
|
+
logger10.info("Loading model in worker...", {
|
|
7578
7035
|
modelUrl: this.config.modelUrl,
|
|
7579
7036
|
externalDataUrl,
|
|
7580
7037
|
isIOS: isIOS()
|
|
@@ -7592,7 +7049,7 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7592
7049
|
);
|
|
7593
7050
|
this._isLoaded = true;
|
|
7594
7051
|
const loadTimeMs = performance.now() - startTime;
|
|
7595
|
-
|
|
7052
|
+
logger10.info("Wav2ArkitCpu worker loaded successfully", {
|
|
7596
7053
|
backend: "wasm",
|
|
7597
7054
|
loadTimeMs: Math.round(loadTimeMs),
|
|
7598
7055
|
workerLoadTimeMs: Math.round(result.loadTimeMs),
|
|
@@ -7677,7 +7134,7 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7677
7134
|
for (let f = 0; f < numFrames; f++) {
|
|
7678
7135
|
blendshapes.push(flatBuffer.slice(f * numBlendshapes, (f + 1) * numBlendshapes));
|
|
7679
7136
|
}
|
|
7680
|
-
|
|
7137
|
+
logger10.trace("Worker inference completed", {
|
|
7681
7138
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
7682
7139
|
workerTimeMs: Math.round(result.inferenceTimeMs * 100) / 100,
|
|
7683
7140
|
numFrames,
|
|
@@ -7707,12 +7164,12 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7707
7164
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
7708
7165
|
if (errMsg.includes("timed out")) {
|
|
7709
7166
|
this.poisoned = true;
|
|
7710
|
-
|
|
7167
|
+
logger10.error("CRITICAL: Worker inference timed out \u2014 Wav2ArkitCpu worker is dead. Page reload required.", {
|
|
7711
7168
|
backend: "wasm",
|
|
7712
7169
|
timeoutMs: INFERENCE_TIMEOUT_MS2
|
|
7713
7170
|
});
|
|
7714
7171
|
} else {
|
|
7715
|
-
|
|
7172
|
+
logger10.error("Worker inference failed", { error: errMsg, backend: "wasm" });
|
|
7716
7173
|
}
|
|
7717
7174
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
7718
7175
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
@@ -7749,39 +7206,39 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7749
7206
|
}
|
|
7750
7207
|
};
|
|
7751
7208
|
|
|
7752
|
-
// src/inference/
|
|
7753
|
-
var
|
|
7754
|
-
function
|
|
7209
|
+
// src/inference/createA2E.ts
|
|
7210
|
+
var logger11 = createLogger("createA2E");
|
|
7211
|
+
function createA2E(config) {
|
|
7755
7212
|
const mode = config.mode ?? "auto";
|
|
7756
7213
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
7757
7214
|
let useCpu;
|
|
7758
7215
|
if (mode === "cpu") {
|
|
7759
7216
|
useCpu = true;
|
|
7760
|
-
|
|
7217
|
+
logger11.info("Forcing CPU A2E model (wav2arkit_cpu)");
|
|
7761
7218
|
} else if (mode === "gpu") {
|
|
7762
7219
|
useCpu = false;
|
|
7763
|
-
|
|
7220
|
+
logger11.info("Forcing GPU A2E model (Wav2Vec2)");
|
|
7764
7221
|
} else {
|
|
7765
|
-
useCpu =
|
|
7766
|
-
|
|
7222
|
+
useCpu = shouldUseCpuA2E();
|
|
7223
|
+
logger11.info("Auto-detected A2E model", {
|
|
7767
7224
|
useCpu,
|
|
7768
7225
|
isSafari: isSafari()
|
|
7769
7226
|
});
|
|
7770
7227
|
}
|
|
7771
7228
|
if (useCpu) {
|
|
7772
7229
|
if (config.unifiedWorker) {
|
|
7773
|
-
|
|
7230
|
+
logger11.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
|
|
7774
7231
|
return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
|
|
7775
7232
|
modelUrl: config.cpuModelUrl
|
|
7776
7233
|
});
|
|
7777
7234
|
}
|
|
7778
7235
|
if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7779
|
-
|
|
7236
|
+
logger11.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
|
|
7780
7237
|
return new Wav2ArkitCpuWorker({
|
|
7781
7238
|
modelUrl: config.cpuModelUrl
|
|
7782
7239
|
});
|
|
7783
7240
|
}
|
|
7784
|
-
|
|
7241
|
+
logger11.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
|
|
7785
7242
|
return new Wav2ArkitCpuInference({
|
|
7786
7243
|
modelUrl: config.cpuModelUrl
|
|
7787
7244
|
});
|
|
@@ -7793,13 +7250,13 @@ function createLipSync(config) {
|
|
|
7793
7250
|
numIdentityClasses: config.numIdentityClasses
|
|
7794
7251
|
});
|
|
7795
7252
|
if (fallbackOnError) {
|
|
7796
|
-
|
|
7797
|
-
return new
|
|
7253
|
+
logger11.info("Creating Wav2Vec2Inference with CPU fallback");
|
|
7254
|
+
return new A2EWithFallback(gpuInstance, config);
|
|
7798
7255
|
}
|
|
7799
|
-
|
|
7256
|
+
logger11.info("Creating Wav2Vec2Inference (no fallback)");
|
|
7800
7257
|
return gpuInstance;
|
|
7801
7258
|
}
|
|
7802
|
-
var
|
|
7259
|
+
var A2EWithFallback = class {
|
|
7803
7260
|
constructor(gpuInstance, config) {
|
|
7804
7261
|
this.hasFallenBack = false;
|
|
7805
7262
|
this.implementation = gpuInstance;
|
|
@@ -7808,6 +7265,9 @@ var LipSyncWithFallback = class {
|
|
|
7808
7265
|
get modelId() {
|
|
7809
7266
|
return this.implementation.modelId;
|
|
7810
7267
|
}
|
|
7268
|
+
get chunkSize() {
|
|
7269
|
+
return this.implementation.chunkSize;
|
|
7270
|
+
}
|
|
7811
7271
|
get backend() {
|
|
7812
7272
|
return this.implementation.backend;
|
|
7813
7273
|
}
|
|
@@ -7822,7 +7282,7 @@ var LipSyncWithFallback = class {
|
|
|
7822
7282
|
}
|
|
7823
7283
|
}
|
|
7824
7284
|
async fallbackToCpu(reason) {
|
|
7825
|
-
|
|
7285
|
+
logger11.warn("GPU model load failed, falling back to CPU model", { reason });
|
|
7826
7286
|
try {
|
|
7827
7287
|
await this.implementation.dispose();
|
|
7828
7288
|
} catch {
|
|
@@ -7831,17 +7291,17 @@ var LipSyncWithFallback = class {
|
|
|
7831
7291
|
this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
|
|
7832
7292
|
modelUrl: this.config.cpuModelUrl
|
|
7833
7293
|
});
|
|
7834
|
-
|
|
7294
|
+
logger11.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
|
|
7835
7295
|
} else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7836
7296
|
this.implementation = new Wav2ArkitCpuWorker({
|
|
7837
7297
|
modelUrl: this.config.cpuModelUrl
|
|
7838
7298
|
});
|
|
7839
|
-
|
|
7299
|
+
logger11.info("Fallback to Wav2ArkitCpuWorker successful");
|
|
7840
7300
|
} else {
|
|
7841
7301
|
this.implementation = new Wav2ArkitCpuInference({
|
|
7842
7302
|
modelUrl: this.config.cpuModelUrl
|
|
7843
7303
|
});
|
|
7844
|
-
|
|
7304
|
+
logger11.info("Fallback to Wav2ArkitCpuInference successful");
|
|
7845
7305
|
}
|
|
7846
7306
|
this.hasFallenBack = true;
|
|
7847
7307
|
return await this.implementation.load();
|
|
@@ -7854,8 +7314,198 @@ var LipSyncWithFallback = class {
|
|
|
7854
7314
|
}
|
|
7855
7315
|
};
|
|
7856
7316
|
|
|
7317
|
+
// src/inference/BlendshapeSmoother.ts
|
|
7318
|
+
var NUM_BLENDSHAPES = 52;
|
|
7319
|
+
var BlendshapeSmoother = class {
|
|
7320
|
+
constructor(config) {
|
|
7321
|
+
/** Whether any target has been set */
|
|
7322
|
+
this._hasTarget = false;
|
|
7323
|
+
this.halflife = config?.halflife ?? 0.06;
|
|
7324
|
+
this.values = new Float32Array(NUM_BLENDSHAPES);
|
|
7325
|
+
this.velocities = new Float32Array(NUM_BLENDSHAPES);
|
|
7326
|
+
this.targets = new Float32Array(NUM_BLENDSHAPES);
|
|
7327
|
+
}
|
|
7328
|
+
/** Whether a target frame has been set (false until first setTarget call) */
|
|
7329
|
+
get hasTarget() {
|
|
7330
|
+
return this._hasTarget;
|
|
7331
|
+
}
|
|
7332
|
+
/**
|
|
7333
|
+
* Set new target frame from inference output.
|
|
7334
|
+
* Springs will converge toward these values on subsequent update() calls.
|
|
7335
|
+
*/
|
|
7336
|
+
setTarget(frame) {
|
|
7337
|
+
this.targets.set(frame);
|
|
7338
|
+
this._hasTarget = true;
|
|
7339
|
+
}
|
|
7340
|
+
/**
|
|
7341
|
+
* Advance all 52 springs by `dt` seconds and return the smoothed frame.
|
|
7342
|
+
*
|
|
7343
|
+
* Call this every render frame (e.g., inside requestAnimationFrame).
|
|
7344
|
+
* Returns the internal values buffer — do NOT mutate the returned array.
|
|
7345
|
+
*
|
|
7346
|
+
* @param dt - Time step in seconds (e.g., 1/60 for 60fps)
|
|
7347
|
+
* @returns Smoothed blendshape values (Float32Array of 52)
|
|
7348
|
+
*/
|
|
7349
|
+
update(dt) {
|
|
7350
|
+
if (!this._hasTarget) {
|
|
7351
|
+
return this.values;
|
|
7352
|
+
}
|
|
7353
|
+
if (this.halflife <= 0) {
|
|
7354
|
+
this.values.set(this.targets);
|
|
7355
|
+
this.velocities.fill(0);
|
|
7356
|
+
return this.values;
|
|
7357
|
+
}
|
|
7358
|
+
const damping = Math.LN2 / this.halflife;
|
|
7359
|
+
const eydt = Math.exp(-damping * dt);
|
|
7360
|
+
for (let i = 0; i < NUM_BLENDSHAPES; i++) {
|
|
7361
|
+
const j0 = this.values[i] - this.targets[i];
|
|
7362
|
+
const j1 = this.velocities[i] + j0 * damping;
|
|
7363
|
+
this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
|
|
7364
|
+
this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
|
|
7365
|
+
this.values[i] = Math.max(0, Math.min(1, this.values[i]));
|
|
7366
|
+
}
|
|
7367
|
+
return this.values;
|
|
7368
|
+
}
|
|
7369
|
+
/**
|
|
7370
|
+
* Decay all spring targets to neutral (0).
|
|
7371
|
+
*
|
|
7372
|
+
* Call when inference stalls (no new frames for threshold duration).
|
|
7373
|
+
* The springs will smoothly close the mouth / relax the face over
|
|
7374
|
+
* the halflife period rather than freezing.
|
|
7375
|
+
*/
|
|
7376
|
+
decayToNeutral() {
|
|
7377
|
+
this.targets.fill(0);
|
|
7378
|
+
}
|
|
7379
|
+
/**
|
|
7380
|
+
* Reset all state (values, velocities, targets).
|
|
7381
|
+
* Call when starting a new playback session.
|
|
7382
|
+
*/
|
|
7383
|
+
reset() {
|
|
7384
|
+
this.values.fill(0);
|
|
7385
|
+
this.velocities.fill(0);
|
|
7386
|
+
this.targets.fill(0);
|
|
7387
|
+
this._hasTarget = false;
|
|
7388
|
+
}
|
|
7389
|
+
};
|
|
7390
|
+
|
|
7391
|
+
// src/animation/audioEnergy.ts
|
|
7392
|
+
function calculateRMS(samples) {
|
|
7393
|
+
if (samples.length === 0) return 0;
|
|
7394
|
+
let sumSquares = 0;
|
|
7395
|
+
for (let i = 0; i < samples.length; i++) {
|
|
7396
|
+
sumSquares += samples[i] * samples[i];
|
|
7397
|
+
}
|
|
7398
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
7399
|
+
}
|
|
7400
|
+
function calculatePeak(samples) {
|
|
7401
|
+
let peak = 0;
|
|
7402
|
+
for (let i = 0; i < samples.length; i++) {
|
|
7403
|
+
const abs = Math.abs(samples[i]);
|
|
7404
|
+
if (abs > peak) peak = abs;
|
|
7405
|
+
}
|
|
7406
|
+
return peak;
|
|
7407
|
+
}
|
|
7408
|
+
var AudioEnergyAnalyzer = class {
|
|
7409
|
+
/**
|
|
7410
|
+
* @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
|
|
7411
|
+
* @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
|
|
7412
|
+
*/
|
|
7413
|
+
constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
|
|
7414
|
+
this.smoothedRMS = 0;
|
|
7415
|
+
this.smoothedPeak = 0;
|
|
7416
|
+
this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
|
|
7417
|
+
this.noiseFloor = noiseFloor;
|
|
7418
|
+
}
|
|
7419
|
+
/**
|
|
7420
|
+
* Process audio samples and return smoothed energy values
|
|
7421
|
+
* @param samples Audio samples (Float32Array)
|
|
7422
|
+
* @returns Object with rms and peak values
|
|
7423
|
+
*/
|
|
7424
|
+
process(samples) {
|
|
7425
|
+
const instantRMS = calculateRMS(samples);
|
|
7426
|
+
const instantPeak = calculatePeak(samples);
|
|
7427
|
+
const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
|
|
7428
|
+
const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
|
|
7429
|
+
if (gatedRMS > this.smoothedRMS) {
|
|
7430
|
+
this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
|
|
7431
|
+
} else {
|
|
7432
|
+
this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
|
|
7433
|
+
}
|
|
7434
|
+
if (gatedPeak > this.smoothedPeak) {
|
|
7435
|
+
this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
|
|
7436
|
+
} else {
|
|
7437
|
+
this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
|
|
7438
|
+
}
|
|
7439
|
+
const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
|
|
7440
|
+
return {
|
|
7441
|
+
rms: this.smoothedRMS,
|
|
7442
|
+
peak: this.smoothedPeak,
|
|
7443
|
+
energy: Math.min(1, energy * 2)
|
|
7444
|
+
// Scale up and clamp
|
|
7445
|
+
};
|
|
7446
|
+
}
|
|
7447
|
+
/**
|
|
7448
|
+
* Reset analyzer state
|
|
7449
|
+
*/
|
|
7450
|
+
reset() {
|
|
7451
|
+
this.smoothedRMS = 0;
|
|
7452
|
+
this.smoothedPeak = 0;
|
|
7453
|
+
}
|
|
7454
|
+
/**
|
|
7455
|
+
* Get current smoothed RMS value
|
|
7456
|
+
*/
|
|
7457
|
+
get rms() {
|
|
7458
|
+
return this.smoothedRMS;
|
|
7459
|
+
}
|
|
7460
|
+
/**
|
|
7461
|
+
* Get current smoothed peak value
|
|
7462
|
+
*/
|
|
7463
|
+
get peak() {
|
|
7464
|
+
return this.smoothedPeak;
|
|
7465
|
+
}
|
|
7466
|
+
};
|
|
7467
|
+
var EmphasisDetector = class {
|
|
7468
|
+
/**
|
|
7469
|
+
* @param historySize Number of frames to track. Default 10
|
|
7470
|
+
* @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
|
|
7471
|
+
*/
|
|
7472
|
+
constructor(historySize = 10, emphasisThreshold = 0.15) {
|
|
7473
|
+
this.energyHistory = [];
|
|
7474
|
+
this.historySize = historySize;
|
|
7475
|
+
this.emphasisThreshold = emphasisThreshold;
|
|
7476
|
+
}
|
|
7477
|
+
/**
|
|
7478
|
+
* Process energy value and detect emphasis
|
|
7479
|
+
* @param energy Current energy value (0-1)
|
|
7480
|
+
* @returns Object with isEmphasis flag and emphasisStrength
|
|
7481
|
+
*/
|
|
7482
|
+
process(energy) {
|
|
7483
|
+
this.energyHistory.push(energy);
|
|
7484
|
+
if (this.energyHistory.length > this.historySize) {
|
|
7485
|
+
this.energyHistory.shift();
|
|
7486
|
+
}
|
|
7487
|
+
if (this.energyHistory.length < 3) {
|
|
7488
|
+
return { isEmphasis: false, emphasisStrength: 0 };
|
|
7489
|
+
}
|
|
7490
|
+
const prevFrames = this.energyHistory.slice(0, -1);
|
|
7491
|
+
const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
|
|
7492
|
+
const increase = energy - avgPrev;
|
|
7493
|
+
const isEmphasis = increase > this.emphasisThreshold;
|
|
7494
|
+
return {
|
|
7495
|
+
isEmphasis,
|
|
7496
|
+
emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
|
|
7497
|
+
};
|
|
7498
|
+
}
|
|
7499
|
+
/**
|
|
7500
|
+
* Reset detector state
|
|
7501
|
+
*/
|
|
7502
|
+
reset() {
|
|
7503
|
+
this.energyHistory = [];
|
|
7504
|
+
}
|
|
7505
|
+
};
|
|
7506
|
+
|
|
7857
7507
|
// src/inference/SileroVADInference.ts
|
|
7858
|
-
var
|
|
7508
|
+
var logger12 = createLogger("SileroVAD");
|
|
7859
7509
|
var SileroVADInference = class {
|
|
7860
7510
|
constructor(config) {
|
|
7861
7511
|
this.session = null;
|
|
@@ -7929,23 +7579,23 @@ var SileroVADInference = class {
|
|
|
7929
7579
|
"model.sample_rate": this.config.sampleRate
|
|
7930
7580
|
});
|
|
7931
7581
|
try {
|
|
7932
|
-
|
|
7582
|
+
logger12.info("Loading ONNX Runtime...", { preference: this.config.backend });
|
|
7933
7583
|
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
|
|
7934
7584
|
this.ort = ort;
|
|
7935
7585
|
this._backend = backend;
|
|
7936
|
-
|
|
7586
|
+
logger12.info("ONNX Runtime loaded", { backend: this._backend });
|
|
7937
7587
|
const cache = getModelCache();
|
|
7938
7588
|
const modelUrl = this.config.modelUrl;
|
|
7939
7589
|
const isCached = await cache.has(modelUrl);
|
|
7940
7590
|
let modelBuffer;
|
|
7941
7591
|
if (isCached) {
|
|
7942
|
-
|
|
7592
|
+
logger12.debug("Loading model from cache", { modelUrl });
|
|
7943
7593
|
modelBuffer = await cache.get(modelUrl);
|
|
7944
7594
|
} else {
|
|
7945
|
-
|
|
7595
|
+
logger12.debug("Fetching and caching model", { modelUrl });
|
|
7946
7596
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
7947
7597
|
}
|
|
7948
|
-
|
|
7598
|
+
logger12.debug("Creating ONNX session", {
|
|
7949
7599
|
size: formatBytes(modelBuffer.byteLength),
|
|
7950
7600
|
backend: this._backend
|
|
7951
7601
|
});
|
|
@@ -7954,7 +7604,7 @@ var SileroVADInference = class {
|
|
|
7954
7604
|
this.session = await ort.InferenceSession.create(modelData, sessionOptions);
|
|
7955
7605
|
this.reset();
|
|
7956
7606
|
const loadTimeMs = performance.now() - startTime;
|
|
7957
|
-
|
|
7607
|
+
logger12.info("Model loaded successfully", {
|
|
7958
7608
|
backend: this._backend,
|
|
7959
7609
|
loadTimeMs: Math.round(loadTimeMs),
|
|
7960
7610
|
sampleRate: this.config.sampleRate,
|
|
@@ -8009,7 +7659,7 @@ var SileroVADInference = class {
|
|
|
8009
7659
|
[]
|
|
8010
7660
|
);
|
|
8011
7661
|
} catch (e) {
|
|
8012
|
-
|
|
7662
|
+
logger12.warn("BigInt64Array not available, using bigint array fallback", {
|
|
8013
7663
|
error: e instanceof Error ? e.message : String(e)
|
|
8014
7664
|
});
|
|
8015
7665
|
this.srTensor = new this.ort.Tensor(
|
|
@@ -8115,7 +7765,7 @@ var SileroVADInference = class {
|
|
|
8115
7765
|
this.preSpeechBuffer.shift();
|
|
8116
7766
|
}
|
|
8117
7767
|
}
|
|
8118
|
-
|
|
7768
|
+
logger12.trace("Skipping VAD inference - audio too quiet", {
|
|
8119
7769
|
rms: Math.round(rms * 1e4) / 1e4,
|
|
8120
7770
|
threshold: MIN_ENERGY_THRESHOLD
|
|
8121
7771
|
});
|
|
@@ -8169,7 +7819,7 @@ var SileroVADInference = class {
|
|
|
8169
7819
|
if (isSpeech && !this.wasSpeaking) {
|
|
8170
7820
|
preSpeechChunks = [...this.preSpeechBuffer];
|
|
8171
7821
|
this.preSpeechBuffer = [];
|
|
8172
|
-
|
|
7822
|
+
logger12.debug("Speech started with pre-speech buffer", {
|
|
8173
7823
|
preSpeechChunks: preSpeechChunks.length,
|
|
8174
7824
|
durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
|
|
8175
7825
|
});
|
|
@@ -8182,7 +7832,7 @@ var SileroVADInference = class {
|
|
|
8182
7832
|
this.preSpeechBuffer = [];
|
|
8183
7833
|
}
|
|
8184
7834
|
this.wasSpeaking = isSpeech;
|
|
8185
|
-
|
|
7835
|
+
logger12.trace("VAD inference completed", {
|
|
8186
7836
|
probability: Math.round(probability * 1e3) / 1e3,
|
|
8187
7837
|
isSpeech,
|
|
8188
7838
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
|
|
@@ -8213,7 +7863,7 @@ var SileroVADInference = class {
|
|
|
8213
7863
|
const oomError = new Error(
|
|
8214
7864
|
`SileroVAD inference failed with raw C++ exception pointer (0x${err.toString(16)}). This is likely an OOM crash in WASM. Try reducing concurrent model sessions or reloading the page.`
|
|
8215
7865
|
);
|
|
8216
|
-
|
|
7866
|
+
logger12.error("ORT WASM OOM \u2014 raw C++ exception pointer", {
|
|
8217
7867
|
pointer: `0x${err.toString(16)}`,
|
|
8218
7868
|
backend: this._backend
|
|
8219
7869
|
});
|
|
@@ -8256,7 +7906,7 @@ var SileroVADInference = class {
|
|
|
8256
7906
|
SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
|
|
8257
7907
|
|
|
8258
7908
|
// src/inference/SileroVADWorker.ts
|
|
8259
|
-
var
|
|
7909
|
+
var logger13 = createLogger("SileroVADWorker");
|
|
8260
7910
|
var WASM_CDN_PATH5 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
8261
7911
|
var LOAD_TIMEOUT_MS3 = 1e4;
|
|
8262
7912
|
var INFERENCE_TIMEOUT_MS3 = 1e3;
|
|
@@ -8534,7 +8184,7 @@ var SileroVADWorker = class {
|
|
|
8534
8184
|
this.handleWorkerMessage(event.data);
|
|
8535
8185
|
};
|
|
8536
8186
|
worker.onerror = (error) => {
|
|
8537
|
-
|
|
8187
|
+
logger13.error("Worker error", { error: error.message });
|
|
8538
8188
|
for (const [, resolver] of this.pendingResolvers) {
|
|
8539
8189
|
resolver.reject(new Error(`Worker error: ${error.message}`));
|
|
8540
8190
|
}
|
|
@@ -8610,9 +8260,9 @@ var SileroVADWorker = class {
|
|
|
8610
8260
|
"model.sample_rate": this.config.sampleRate
|
|
8611
8261
|
});
|
|
8612
8262
|
try {
|
|
8613
|
-
|
|
8263
|
+
logger13.info("Creating VAD worker...");
|
|
8614
8264
|
this.worker = this.createWorker();
|
|
8615
|
-
|
|
8265
|
+
logger13.info("Loading model in worker...", {
|
|
8616
8266
|
modelUrl: this.config.modelUrl,
|
|
8617
8267
|
sampleRate: this.config.sampleRate
|
|
8618
8268
|
});
|
|
@@ -8628,7 +8278,7 @@ var SileroVADWorker = class {
|
|
|
8628
8278
|
);
|
|
8629
8279
|
this._isLoaded = true;
|
|
8630
8280
|
const loadTimeMs = performance.now() - startTime;
|
|
8631
|
-
|
|
8281
|
+
logger13.info("VAD worker loaded successfully", {
|
|
8632
8282
|
backend: "wasm",
|
|
8633
8283
|
loadTimeMs: Math.round(loadTimeMs),
|
|
8634
8284
|
workerLoadTimeMs: Math.round(result.loadTimeMs),
|
|
@@ -8735,7 +8385,7 @@ var SileroVADWorker = class {
|
|
|
8735
8385
|
if (isSpeech && !this.wasSpeaking) {
|
|
8736
8386
|
preSpeechChunks = [...this.preSpeechBuffer];
|
|
8737
8387
|
this.preSpeechBuffer = [];
|
|
8738
|
-
|
|
8388
|
+
logger13.debug("Speech started with pre-speech buffer", {
|
|
8739
8389
|
preSpeechChunks: preSpeechChunks.length,
|
|
8740
8390
|
durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
|
|
8741
8391
|
});
|
|
@@ -8748,7 +8398,7 @@ var SileroVADWorker = class {
|
|
|
8748
8398
|
this.preSpeechBuffer = [];
|
|
8749
8399
|
}
|
|
8750
8400
|
this.wasSpeaking = isSpeech;
|
|
8751
|
-
|
|
8401
|
+
logger13.trace("VAD worker inference completed", {
|
|
8752
8402
|
probability: Math.round(result.probability * 1e3) / 1e3,
|
|
8753
8403
|
isSpeech,
|
|
8754
8404
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
@@ -8816,44 +8466,44 @@ var SileroVADWorker = class {
|
|
|
8816
8466
|
};
|
|
8817
8467
|
|
|
8818
8468
|
// src/inference/createSileroVAD.ts
|
|
8819
|
-
var
|
|
8469
|
+
var logger14 = createLogger("createSileroVAD");
|
|
8820
8470
|
function supportsVADWorker() {
|
|
8821
8471
|
if (typeof Worker === "undefined") {
|
|
8822
|
-
|
|
8472
|
+
logger14.debug("Worker not supported: Worker constructor undefined");
|
|
8823
8473
|
return false;
|
|
8824
8474
|
}
|
|
8825
8475
|
if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
|
|
8826
|
-
|
|
8476
|
+
logger14.debug("Worker not supported: URL.createObjectURL unavailable");
|
|
8827
8477
|
return false;
|
|
8828
8478
|
}
|
|
8829
8479
|
if (typeof Blob === "undefined") {
|
|
8830
|
-
|
|
8480
|
+
logger14.debug("Worker not supported: Blob constructor unavailable");
|
|
8831
8481
|
return false;
|
|
8832
8482
|
}
|
|
8833
8483
|
return true;
|
|
8834
8484
|
}
|
|
8835
8485
|
function createSileroVAD(config) {
|
|
8836
8486
|
if (config.unifiedWorker) {
|
|
8837
|
-
|
|
8487
|
+
logger14.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
|
|
8838
8488
|
return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
|
|
8839
8489
|
}
|
|
8840
8490
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
8841
8491
|
let useWorker;
|
|
8842
8492
|
if (config.useWorker !== void 0) {
|
|
8843
8493
|
useWorker = config.useWorker;
|
|
8844
|
-
|
|
8494
|
+
logger14.debug("Worker preference explicitly set", { useWorker });
|
|
8845
8495
|
} else {
|
|
8846
8496
|
const workerSupported = supportsVADWorker();
|
|
8847
8497
|
const onMobile = isMobile();
|
|
8848
8498
|
useWorker = workerSupported && !onMobile;
|
|
8849
|
-
|
|
8499
|
+
logger14.debug("Auto-detected Worker preference", {
|
|
8850
8500
|
useWorker,
|
|
8851
8501
|
workerSupported,
|
|
8852
8502
|
onMobile
|
|
8853
8503
|
});
|
|
8854
8504
|
}
|
|
8855
8505
|
if (useWorker) {
|
|
8856
|
-
|
|
8506
|
+
logger14.info("Creating SileroVADWorker (off-main-thread)");
|
|
8857
8507
|
const worker = new SileroVADWorker({
|
|
8858
8508
|
modelUrl: config.modelUrl,
|
|
8859
8509
|
sampleRate: config.sampleRate,
|
|
@@ -8865,7 +8515,7 @@ function createSileroVAD(config) {
|
|
|
8865
8515
|
}
|
|
8866
8516
|
return worker;
|
|
8867
8517
|
}
|
|
8868
|
-
|
|
8518
|
+
logger14.info("Creating SileroVADInference (main thread)");
|
|
8869
8519
|
return new SileroVADInference(config);
|
|
8870
8520
|
}
|
|
8871
8521
|
var VADWorkerWithFallback = class {
|
|
@@ -8891,7 +8541,7 @@ var VADWorkerWithFallback = class {
|
|
|
8891
8541
|
try {
|
|
8892
8542
|
return await this.implementation.load();
|
|
8893
8543
|
} catch (error) {
|
|
8894
|
-
|
|
8544
|
+
logger14.warn("Worker load failed, falling back to main thread", {
|
|
8895
8545
|
error: error instanceof Error ? error.message : String(error)
|
|
8896
8546
|
});
|
|
8897
8547
|
try {
|
|
@@ -8900,7 +8550,7 @@ var VADWorkerWithFallback = class {
|
|
|
8900
8550
|
}
|
|
8901
8551
|
this.implementation = new SileroVADInference(this.config);
|
|
8902
8552
|
this.hasFallenBack = true;
|
|
8903
|
-
|
|
8553
|
+
logger14.info("Fallback to SileroVADInference successful");
|
|
8904
8554
|
return await this.implementation.load();
|
|
8905
8555
|
}
|
|
8906
8556
|
}
|
|
@@ -8921,8 +8571,175 @@ var VADWorkerWithFallback = class {
|
|
|
8921
8571
|
}
|
|
8922
8572
|
};
|
|
8923
8573
|
|
|
8574
|
+
// src/inference/A2EOrchestrator.ts
|
|
8575
|
+
var logger15 = createLogger("A2EOrchestrator");
|
|
8576
|
+
var A2EOrchestrator = class {
|
|
8577
|
+
constructor(config) {
|
|
8578
|
+
this.a2e = null;
|
|
8579
|
+
this.processor = null;
|
|
8580
|
+
// Mic capture state (lightweight — no dependency on MicrophoneCapture class
|
|
8581
|
+
// which requires an external EventEmitter. We do raw Web Audio here.)
|
|
8582
|
+
this.stream = null;
|
|
8583
|
+
this.audioContext = null;
|
|
8584
|
+
this.scriptProcessor = null;
|
|
8585
|
+
this.nativeSampleRate = 0;
|
|
8586
|
+
this._isReady = false;
|
|
8587
|
+
this._isStreaming = false;
|
|
8588
|
+
this._backend = null;
|
|
8589
|
+
this.disposed = false;
|
|
8590
|
+
this.config = {
|
|
8591
|
+
sampleRate: 16e3,
|
|
8592
|
+
...config
|
|
8593
|
+
};
|
|
8594
|
+
}
|
|
8595
|
+
/** Latest blendshape weights from inference (null if none yet) */
|
|
8596
|
+
get latestWeights() {
|
|
8597
|
+
return this.processor?.latestFrame ?? null;
|
|
8598
|
+
}
|
|
8599
|
+
/** Whether the model is loaded and ready for inference */
|
|
8600
|
+
get isReady() {
|
|
8601
|
+
return this._isReady;
|
|
8602
|
+
}
|
|
8603
|
+
/** Whether mic is active and inference loop is running */
|
|
8604
|
+
get isStreaming() {
|
|
8605
|
+
return this._isStreaming;
|
|
8606
|
+
}
|
|
8607
|
+
/** Current backend type (webgpu, wasm, or null) */
|
|
8608
|
+
get backend() {
|
|
8609
|
+
return this._backend;
|
|
8610
|
+
}
|
|
8611
|
+
/**
|
|
8612
|
+
* Load the A2E model and create the processor
|
|
8613
|
+
*/
|
|
8614
|
+
async load() {
|
|
8615
|
+
if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
|
|
8616
|
+
logger15.info("Loading A2E model...");
|
|
8617
|
+
this.a2e = createA2E({
|
|
8618
|
+
gpuModelUrl: this.config.gpuModelUrl,
|
|
8619
|
+
gpuExternalDataUrl: this.config.gpuExternalDataUrl,
|
|
8620
|
+
cpuModelUrl: this.config.cpuModelUrl ?? this.config.gpuModelUrl,
|
|
8621
|
+
...this.config.a2eConfig
|
|
8622
|
+
});
|
|
8623
|
+
const info = await this.a2e.load();
|
|
8624
|
+
this._backend = info.backend;
|
|
8625
|
+
this.processor = new A2EProcessor({
|
|
8626
|
+
backend: this.a2e,
|
|
8627
|
+
sampleRate: this.config.sampleRate,
|
|
8628
|
+
chunkSize: this.config.chunkSize,
|
|
8629
|
+
onFrame: this.config.onFrame,
|
|
8630
|
+
onError: this.config.onError
|
|
8631
|
+
});
|
|
8632
|
+
this._isReady = true;
|
|
8633
|
+
logger15.info("A2E model loaded", {
|
|
8634
|
+
backend: info.backend,
|
|
8635
|
+
loadTimeMs: info.loadTimeMs,
|
|
8636
|
+
modelId: this.a2e.modelId
|
|
8637
|
+
});
|
|
8638
|
+
this.config.onReady?.();
|
|
8639
|
+
}
|
|
8640
|
+
/**
|
|
8641
|
+
* Start mic capture and inference loop
|
|
8642
|
+
*/
|
|
8643
|
+
async start() {
|
|
8644
|
+
if (this.disposed) throw new Error("A2EOrchestrator has been disposed");
|
|
8645
|
+
if (!this._isReady || !this.processor) throw new Error("Model not loaded. Call load() first.");
|
|
8646
|
+
if (this._isStreaming) return;
|
|
8647
|
+
try {
|
|
8648
|
+
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
8649
|
+
audio: {
|
|
8650
|
+
sampleRate: { ideal: this.config.sampleRate },
|
|
8651
|
+
channelCount: 1,
|
|
8652
|
+
echoCancellation: true,
|
|
8653
|
+
noiseSuppression: true,
|
|
8654
|
+
autoGainControl: true
|
|
8655
|
+
}
|
|
8656
|
+
});
|
|
8657
|
+
this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
|
|
8658
|
+
if (this.audioContext.state === "suspended") {
|
|
8659
|
+
await this.audioContext.resume();
|
|
8660
|
+
}
|
|
8661
|
+
this.nativeSampleRate = this.audioContext.sampleRate;
|
|
8662
|
+
const source = this.audioContext.createMediaStreamSource(this.stream);
|
|
8663
|
+
this.scriptProcessor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
|
8664
|
+
this.scriptProcessor.onaudioprocess = (e) => {
|
|
8665
|
+
if (!this._isStreaming || !this.processor) return;
|
|
8666
|
+
const input = e.inputBuffer.getChannelData(0);
|
|
8667
|
+
let samples;
|
|
8668
|
+
if (this.nativeSampleRate !== this.config.sampleRate) {
|
|
8669
|
+
const ratio = this.config.sampleRate / this.nativeSampleRate;
|
|
8670
|
+
const newLen = Math.round(input.length * ratio);
|
|
8671
|
+
samples = new Float32Array(newLen);
|
|
8672
|
+
for (let i = 0; i < newLen; i++) {
|
|
8673
|
+
const srcIdx = i / ratio;
|
|
8674
|
+
const lo = Math.floor(srcIdx);
|
|
8675
|
+
const hi = Math.min(lo + 1, input.length - 1);
|
|
8676
|
+
const frac = srcIdx - lo;
|
|
8677
|
+
samples[i] = input[lo] * (1 - frac) + input[hi] * frac;
|
|
8678
|
+
}
|
|
8679
|
+
} else {
|
|
8680
|
+
samples = new Float32Array(input);
|
|
8681
|
+
}
|
|
8682
|
+
this.processor.pushAudio(samples);
|
|
8683
|
+
};
|
|
8684
|
+
source.connect(this.scriptProcessor);
|
|
8685
|
+
this.scriptProcessor.connect(this.audioContext.destination);
|
|
8686
|
+
this._isStreaming = true;
|
|
8687
|
+
this.processor.startDrip();
|
|
8688
|
+
logger15.info("Mic capture started", { sampleRate: this.nativeSampleRate });
|
|
8689
|
+
} catch (err) {
|
|
8690
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
8691
|
+
logger15.error("Failed to start mic capture", { error: error.message });
|
|
8692
|
+
this.config.onError?.(error);
|
|
8693
|
+
throw error;
|
|
8694
|
+
}
|
|
8695
|
+
}
|
|
8696
|
+
/**
|
|
8697
|
+
* Stop mic capture and inference loop
|
|
8698
|
+
*/
|
|
8699
|
+
stop() {
|
|
8700
|
+
this._isStreaming = false;
|
|
8701
|
+
if (this.processor) {
|
|
8702
|
+
this.processor.stopDrip();
|
|
8703
|
+
this.processor.reset();
|
|
8704
|
+
}
|
|
8705
|
+
if (this.scriptProcessor) {
|
|
8706
|
+
this.scriptProcessor.disconnect();
|
|
8707
|
+
this.scriptProcessor.onaudioprocess = null;
|
|
8708
|
+
this.scriptProcessor = null;
|
|
8709
|
+
}
|
|
8710
|
+
if (this.stream) {
|
|
8711
|
+
this.stream.getTracks().forEach((t) => t.stop());
|
|
8712
|
+
this.stream = null;
|
|
8713
|
+
}
|
|
8714
|
+
if (this.audioContext) {
|
|
8715
|
+
this.audioContext.close().catch(() => {
|
|
8716
|
+
});
|
|
8717
|
+
this.audioContext = null;
|
|
8718
|
+
}
|
|
8719
|
+
logger15.info("Mic capture stopped");
|
|
8720
|
+
}
|
|
8721
|
+
/**
|
|
8722
|
+
* Dispose of all resources
|
|
8723
|
+
*/
|
|
8724
|
+
async dispose() {
|
|
8725
|
+
if (this.disposed) return;
|
|
8726
|
+
this.disposed = true;
|
|
8727
|
+
this.stop();
|
|
8728
|
+
if (this.processor) {
|
|
8729
|
+
this.processor.dispose();
|
|
8730
|
+
this.processor = null;
|
|
8731
|
+
}
|
|
8732
|
+
if (this.a2e) {
|
|
8733
|
+
await this.a2e.dispose();
|
|
8734
|
+
this.a2e = null;
|
|
8735
|
+
}
|
|
8736
|
+
this._isReady = false;
|
|
8737
|
+
this._backend = null;
|
|
8738
|
+
}
|
|
8739
|
+
};
|
|
8740
|
+
|
|
8924
8741
|
// src/inference/SafariSpeechRecognition.ts
|
|
8925
|
-
var
|
|
8742
|
+
var logger16 = createLogger("SafariSpeech");
|
|
8926
8743
|
var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
8927
8744
|
constructor(config = {}) {
|
|
8928
8745
|
this.recognition = null;
|
|
@@ -8941,7 +8758,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
8941
8758
|
interimResults: config.interimResults ?? true,
|
|
8942
8759
|
maxAlternatives: config.maxAlternatives ?? 1
|
|
8943
8760
|
};
|
|
8944
|
-
|
|
8761
|
+
logger16.debug("SafariSpeechRecognition created", {
|
|
8945
8762
|
language: this.config.language,
|
|
8946
8763
|
continuous: this.config.continuous
|
|
8947
8764
|
});
|
|
@@ -9002,7 +8819,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9002
8819
|
*/
|
|
9003
8820
|
async start() {
|
|
9004
8821
|
if (this.isListening) {
|
|
9005
|
-
|
|
8822
|
+
logger16.warn("Already listening");
|
|
9006
8823
|
return;
|
|
9007
8824
|
}
|
|
9008
8825
|
if (!_SafariSpeechRecognition.isAvailable()) {
|
|
@@ -9032,7 +8849,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9032
8849
|
this.isListening = true;
|
|
9033
8850
|
this.startTime = performance.now();
|
|
9034
8851
|
this.accumulatedText = "";
|
|
9035
|
-
|
|
8852
|
+
logger16.info("Speech recognition started", {
|
|
9036
8853
|
language: this.config.language
|
|
9037
8854
|
});
|
|
9038
8855
|
span?.end();
|
|
@@ -9047,7 +8864,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9047
8864
|
*/
|
|
9048
8865
|
async stop() {
|
|
9049
8866
|
if (!this.isListening || !this.recognition) {
|
|
9050
|
-
|
|
8867
|
+
logger16.warn("Not currently listening");
|
|
9051
8868
|
return {
|
|
9052
8869
|
text: this.accumulatedText,
|
|
9053
8870
|
language: this.config.language,
|
|
@@ -9076,7 +8893,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9076
8893
|
if (this.recognition && this.isListening) {
|
|
9077
8894
|
this.recognition.abort();
|
|
9078
8895
|
this.isListening = false;
|
|
9079
|
-
|
|
8896
|
+
logger16.info("Speech recognition aborted");
|
|
9080
8897
|
}
|
|
9081
8898
|
}
|
|
9082
8899
|
/**
|
|
@@ -9107,7 +8924,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9107
8924
|
this.isListening = false;
|
|
9108
8925
|
this.resultCallbacks = [];
|
|
9109
8926
|
this.errorCallbacks = [];
|
|
9110
|
-
|
|
8927
|
+
logger16.debug("SafariSpeechRecognition disposed");
|
|
9111
8928
|
}
|
|
9112
8929
|
/**
|
|
9113
8930
|
* Set up event handlers for the recognition instance
|
|
@@ -9135,7 +8952,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9135
8952
|
confidence: alternative.confidence
|
|
9136
8953
|
};
|
|
9137
8954
|
this.emitResult(speechResult);
|
|
9138
|
-
|
|
8955
|
+
logger16.trace("Speech result", {
|
|
9139
8956
|
text: text.substring(0, 50),
|
|
9140
8957
|
isFinal,
|
|
9141
8958
|
confidence: alternative.confidence
|
|
@@ -9145,12 +8962,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9145
8962
|
span?.end();
|
|
9146
8963
|
} catch (error) {
|
|
9147
8964
|
span?.endWithError(error instanceof Error ? error : new Error(String(error)));
|
|
9148
|
-
|
|
8965
|
+
logger16.error("Error processing speech result", { error });
|
|
9149
8966
|
}
|
|
9150
8967
|
};
|
|
9151
8968
|
this.recognition.onerror = (event) => {
|
|
9152
8969
|
const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
|
|
9153
|
-
|
|
8970
|
+
logger16.error("Speech recognition error", { error: event.error, message: event.message });
|
|
9154
8971
|
this.emitError(error);
|
|
9155
8972
|
if (this.stopRejecter) {
|
|
9156
8973
|
this.stopRejecter(error);
|
|
@@ -9160,7 +8977,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9160
8977
|
};
|
|
9161
8978
|
this.recognition.onend = () => {
|
|
9162
8979
|
this.isListening = false;
|
|
9163
|
-
|
|
8980
|
+
logger16.info("Speech recognition ended", {
|
|
9164
8981
|
totalText: this.accumulatedText.length,
|
|
9165
8982
|
durationMs: performance.now() - this.startTime
|
|
9166
8983
|
});
|
|
@@ -9177,13 +8994,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9177
8994
|
}
|
|
9178
8995
|
};
|
|
9179
8996
|
this.recognition.onstart = () => {
|
|
9180
|
-
|
|
8997
|
+
logger16.debug("Speech recognition started by browser");
|
|
9181
8998
|
};
|
|
9182
8999
|
this.recognition.onspeechstart = () => {
|
|
9183
|
-
|
|
9000
|
+
logger16.debug("Speech detected");
|
|
9184
9001
|
};
|
|
9185
9002
|
this.recognition.onspeechend = () => {
|
|
9186
|
-
|
|
9003
|
+
logger16.debug("Speech ended");
|
|
9187
9004
|
};
|
|
9188
9005
|
}
|
|
9189
9006
|
/**
|
|
@@ -9194,7 +9011,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9194
9011
|
try {
|
|
9195
9012
|
callback(result);
|
|
9196
9013
|
} catch (error) {
|
|
9197
|
-
|
|
9014
|
+
logger16.error("Error in result callback", { error });
|
|
9198
9015
|
}
|
|
9199
9016
|
}
|
|
9200
9017
|
}
|
|
@@ -9206,7 +9023,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
9206
9023
|
try {
|
|
9207
9024
|
callback(error);
|
|
9208
9025
|
} catch (callbackError) {
|
|
9209
|
-
|
|
9026
|
+
logger16.error("Error in error callback", { error: callbackError });
|
|
9210
9027
|
}
|
|
9211
9028
|
}
|
|
9212
9029
|
}
|
|
@@ -9619,13 +9436,14 @@ var AgentCoreAdapter = class extends EventEmitter {
|
|
|
9619
9436
|
if (!this.lam) {
|
|
9620
9437
|
throw new Error("LAM must be initialized before pipeline");
|
|
9621
9438
|
}
|
|
9622
|
-
this.pipeline = new
|
|
9439
|
+
this.pipeline = new FullFacePipeline({
|
|
9623
9440
|
lam: this.lam,
|
|
9624
9441
|
sampleRate: 16e3,
|
|
9625
9442
|
chunkTargetMs: 200
|
|
9626
9443
|
});
|
|
9627
9444
|
await this.pipeline.initialize();
|
|
9628
|
-
this.pipeline.on("
|
|
9445
|
+
this.pipeline.on("full_frame_ready", (fullFrame) => {
|
|
9446
|
+
const frame = fullFrame.blendshapes;
|
|
9629
9447
|
this.emit("animation", {
|
|
9630
9448
|
blendshapes: frame,
|
|
9631
9449
|
get: (name) => {
|
|
@@ -9804,9 +9622,9 @@ var AgentCoreAdapter = class extends EventEmitter {
|
|
|
9804
9622
|
});
|
|
9805
9623
|
}
|
|
9806
9624
|
}
|
|
9807
|
-
// REMOVED: processAudioForAnimation() - now handled by
|
|
9625
|
+
// REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
|
|
9808
9626
|
// The pipeline manages audio scheduling, LAM inference, and frame synchronization
|
|
9809
|
-
// Frames are emitted via pipeline.on('
|
|
9627
|
+
// Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
|
|
9810
9628
|
/**
|
|
9811
9629
|
* Detect voice activity using Silero VAD
|
|
9812
9630
|
* Falls back to simple RMS if VAD not available
|