@omote/core 0.6.2 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -13
- package/dist/index.d.mts +364 -63
- package/dist/index.d.ts +364 -63
- package/dist/index.js +486 -82
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +486 -82
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -32,7 +32,9 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
A2EOrchestrator: () => A2EOrchestrator,
|
|
34
34
|
A2EProcessor: () => A2EProcessor,
|
|
35
|
+
ALL_AUS: () => ALL_AUS,
|
|
35
36
|
ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
|
|
37
|
+
AU_TO_ARKIT: () => AU_TO_ARKIT,
|
|
36
38
|
AnimationGraph: () => AnimationGraph,
|
|
37
39
|
AudioChunkCoalescer: () => AudioChunkCoalescer,
|
|
38
40
|
AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
|
|
@@ -43,13 +45,18 @@ __export(index_exports, {
|
|
|
43
45
|
ConsoleExporter: () => ConsoleExporter,
|
|
44
46
|
DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
|
|
45
47
|
DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
|
|
48
|
+
DEFAULT_MODEL_URLS: () => DEFAULT_MODEL_URLS,
|
|
46
49
|
EMOTION_NAMES: () => EMOTION_NAMES,
|
|
50
|
+
EMOTION_TO_AU: () => EMOTION_TO_AU,
|
|
47
51
|
EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
|
|
48
52
|
EmotionController: () => EmotionController,
|
|
49
53
|
EmotionPresets: () => EmotionPresets,
|
|
54
|
+
EmotionResolver: () => EmotionResolver,
|
|
50
55
|
EmphasisDetector: () => EmphasisDetector,
|
|
51
56
|
EventEmitter: () => EventEmitter,
|
|
57
|
+
FaceCompositor: () => FaceCompositor,
|
|
52
58
|
FullFacePipeline: () => FullFacePipeline,
|
|
59
|
+
HF_CDN_URLS: () => HF_CDN_URLS,
|
|
53
60
|
INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
|
|
54
61
|
InterruptionHandler: () => InterruptionHandler,
|
|
55
62
|
LAM_BLENDSHAPES: () => LAM_BLENDSHAPES,
|
|
@@ -84,6 +91,7 @@ __export(index_exports, {
|
|
|
84
91
|
calculateRMS: () => calculateRMS,
|
|
85
92
|
configureCacheLimit: () => configureCacheLimit,
|
|
86
93
|
configureLogging: () => configureLogging,
|
|
94
|
+
configureModelUrls: () => configureModelUrls,
|
|
87
95
|
configureTelemetry: () => configureTelemetry,
|
|
88
96
|
createA2E: () => createA2E,
|
|
89
97
|
createEmotionVector: () => createEmotionVector,
|
|
@@ -114,6 +122,7 @@ __export(index_exports, {
|
|
|
114
122
|
noopLogger: () => noopLogger,
|
|
115
123
|
preloadModels: () => preloadModels,
|
|
116
124
|
resetLoggingConfig: () => resetLoggingConfig,
|
|
125
|
+
resetModelUrls: () => resetModelUrls,
|
|
117
126
|
resolveBackend: () => resolveBackend,
|
|
118
127
|
setLogLevel: () => setLogLevel,
|
|
119
128
|
setLoggingEnabled: () => setLoggingEnabled,
|
|
@@ -2604,7 +2613,7 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
|
|
|
2604
2613
|
const ort = await getOnnxRuntime(backend);
|
|
2605
2614
|
return { ort, backend };
|
|
2606
2615
|
}
|
|
2607
|
-
function getSessionOptions(backend) {
|
|
2616
|
+
function getSessionOptions(backend, config) {
|
|
2608
2617
|
if (backend === "webgpu") {
|
|
2609
2618
|
return {
|
|
2610
2619
|
executionProviders: [
|
|
@@ -2620,7 +2629,7 @@ function getSessionOptions(backend) {
|
|
|
2620
2629
|
if (isIOS()) {
|
|
2621
2630
|
return {
|
|
2622
2631
|
executionProviders: ["wasm"],
|
|
2623
|
-
graphOptimizationLevel: "basic",
|
|
2632
|
+
graphOptimizationLevel: config?.iosDisableOptimization ? "disabled" : "basic",
|
|
2624
2633
|
enableCpuMemArena: false,
|
|
2625
2634
|
enableMemPattern: false
|
|
2626
2635
|
};
|
|
@@ -2887,7 +2896,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2887
2896
|
logger3.info("ONNX Runtime loaded", { backend: this._backend });
|
|
2888
2897
|
const modelUrl = this.config.modelUrl;
|
|
2889
2898
|
const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
|
|
2890
|
-
const sessionOptions = getSessionOptions(this._backend);
|
|
2899
|
+
const sessionOptions = getSessionOptions(this._backend, { iosDisableOptimization: true });
|
|
2891
2900
|
let isCached = false;
|
|
2892
2901
|
if (isIOS()) {
|
|
2893
2902
|
logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
|
|
@@ -2962,7 +2971,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2962
2971
|
} else {
|
|
2963
2972
|
logger3.info("Fetching external model data", {
|
|
2964
2973
|
dataUrl,
|
|
2965
|
-
note: "This may be a large download
|
|
2974
|
+
note: "This may be a large download"
|
|
2966
2975
|
});
|
|
2967
2976
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
2968
2977
|
}
|
|
@@ -2970,6 +2979,9 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2970
2979
|
size: formatBytes(externalDataBuffer.byteLength)
|
|
2971
2980
|
});
|
|
2972
2981
|
} catch (err) {
|
|
2982
|
+
if (typeof this.config.externalDataUrl === "string") {
|
|
2983
|
+
throw new Error(`Failed to fetch external data: ${dataUrl} \u2014 ${err.message}`);
|
|
2984
|
+
}
|
|
2973
2985
|
logger3.debug("No external data file found (single-file model)", {
|
|
2974
2986
|
dataUrl,
|
|
2975
2987
|
error: err.message
|
|
@@ -3093,28 +3105,6 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3093
3105
|
};
|
|
3094
3106
|
return this.queueInference(feeds);
|
|
3095
3107
|
}
|
|
3096
|
-
/**
|
|
3097
|
-
* Decode CTC logits to text using greedy decoding
|
|
3098
|
-
*/
|
|
3099
|
-
decodeCTC(logits) {
|
|
3100
|
-
const tokens = [];
|
|
3101
|
-
let prevToken = -1;
|
|
3102
|
-
for (const frame of logits) {
|
|
3103
|
-
let maxIdx = 0;
|
|
3104
|
-
let maxVal = frame[0];
|
|
3105
|
-
for (let i = 1; i < frame.length; i++) {
|
|
3106
|
-
if (frame[i] > maxVal) {
|
|
3107
|
-
maxVal = frame[i];
|
|
3108
|
-
maxIdx = i;
|
|
3109
|
-
}
|
|
3110
|
-
}
|
|
3111
|
-
if (maxIdx !== prevToken && maxIdx !== 0) {
|
|
3112
|
-
tokens.push(maxIdx);
|
|
3113
|
-
}
|
|
3114
|
-
prevToken = maxIdx;
|
|
3115
|
-
}
|
|
3116
|
-
return tokens.map((t) => CTC_VOCAB[t] === "|" ? " " : CTC_VOCAB[t]).join("");
|
|
3117
|
-
}
|
|
3118
3108
|
/**
|
|
3119
3109
|
* Queue inference to serialize ONNX session calls
|
|
3120
3110
|
*/
|
|
@@ -3142,37 +3132,25 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3142
3132
|
})
|
|
3143
3133
|
]);
|
|
3144
3134
|
const inferenceTimeMs = performance.now() - startTime;
|
|
3145
|
-
const asrOutput = results["asr_logits"];
|
|
3146
3135
|
const blendshapeOutput = results["blendshapes"];
|
|
3147
|
-
if (!
|
|
3148
|
-
throw new Error("Missing
|
|
3136
|
+
if (!blendshapeOutput) {
|
|
3137
|
+
throw new Error("Missing blendshapes output from model");
|
|
3149
3138
|
}
|
|
3150
|
-
const asrData = asrOutput.data;
|
|
3151
3139
|
const blendshapeData = blendshapeOutput.data;
|
|
3152
|
-
const numASRFrames = asrOutput.dims[1];
|
|
3153
3140
|
const numA2EFrames = blendshapeOutput.dims[1];
|
|
3154
|
-
const asrVocabSize = asrOutput.dims[2];
|
|
3155
3141
|
const numBlendshapes = blendshapeOutput.dims[2];
|
|
3156
|
-
const asrLogits = [];
|
|
3157
3142
|
const blendshapes = [];
|
|
3158
|
-
for (let f = 0; f < numASRFrames; f++) {
|
|
3159
|
-
asrLogits.push(asrData.slice(f * asrVocabSize, (f + 1) * asrVocabSize));
|
|
3160
|
-
}
|
|
3161
3143
|
for (let f = 0; f < numA2EFrames; f++) {
|
|
3162
3144
|
const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
|
|
3163
3145
|
blendshapes.push(symmetrizeBlendshapes(rawFrame));
|
|
3164
3146
|
}
|
|
3165
|
-
const text = this.decodeCTC(asrLogits);
|
|
3166
3147
|
logger3.trace("Inference completed", {
|
|
3167
3148
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
3168
|
-
numA2EFrames
|
|
3169
|
-
numASRFrames,
|
|
3170
|
-
textLength: text.length
|
|
3149
|
+
numA2EFrames
|
|
3171
3150
|
});
|
|
3172
3151
|
span?.setAttributes({
|
|
3173
3152
|
"inference.duration_ms": inferenceTimeMs,
|
|
3174
|
-
"inference.a2e_frames": numA2EFrames
|
|
3175
|
-
"inference.asr_frames": numASRFrames
|
|
3153
|
+
"inference.a2e_frames": numA2EFrames
|
|
3176
3154
|
});
|
|
3177
3155
|
span?.end();
|
|
3178
3156
|
telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
|
|
@@ -3186,11 +3164,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
3186
3164
|
});
|
|
3187
3165
|
resolve({
|
|
3188
3166
|
blendshapes,
|
|
3189
|
-
asrLogits,
|
|
3190
|
-
text,
|
|
3191
3167
|
numFrames: numA2EFrames,
|
|
3192
|
-
numA2EFrames,
|
|
3193
|
-
numASRFrames,
|
|
3194
3168
|
inferenceTimeMs
|
|
3195
3169
|
});
|
|
3196
3170
|
} catch (err) {
|
|
@@ -5553,6 +5527,51 @@ var SenseVoiceWorker = class {
|
|
|
5553
5527
|
}
|
|
5554
5528
|
};
|
|
5555
5529
|
|
|
5530
|
+
// src/inference/defaultModelUrls.ts
|
|
5531
|
+
var HF = "https://huggingface.co";
|
|
5532
|
+
var HF_MODEL_URLS = {
|
|
5533
|
+
/** LAM A2E model — fp16 external data (385KB graph + 192MB weights, WebGPU) — 52 ARKit blendshapes */
|
|
5534
|
+
lam: `${HF}/omote-ai/lam-a2e/resolve/main/model_fp16.onnx`,
|
|
5535
|
+
/** wav2arkit_cpu A2E model graph (1.86MB, WASM) — Safari/iOS fallback */
|
|
5536
|
+
wav2arkitCpu: `${HF}/myned-ai/wav2arkit_cpu/resolve/main/wav2arkit_cpu.onnx`,
|
|
5537
|
+
/** SenseVoice ASR model (228MB int8, WASM) — speech recognition + emotion + language */
|
|
5538
|
+
senseVoice: `${HF}/omote-ai/sensevoice-asr/resolve/main/model.int8.onnx`,
|
|
5539
|
+
/** Silero VAD model (~2MB, WASM) — voice activity detection */
|
|
5540
|
+
sileroVad: `${HF}/deepghs/silero-vad-onnx/resolve/main/silero_vad.onnx`
|
|
5541
|
+
};
|
|
5542
|
+
var _overrides = {};
|
|
5543
|
+
var DEFAULT_MODEL_URLS = new Proxy(
|
|
5544
|
+
{},
|
|
5545
|
+
{
|
|
5546
|
+
get(_target, prop) {
|
|
5547
|
+
const key = prop;
|
|
5548
|
+
return _overrides[key] ?? HF_MODEL_URLS[key];
|
|
5549
|
+
},
|
|
5550
|
+
ownKeys() {
|
|
5551
|
+
return Object.keys(HF_MODEL_URLS);
|
|
5552
|
+
},
|
|
5553
|
+
getOwnPropertyDescriptor(_target, prop) {
|
|
5554
|
+
if (prop in HF_MODEL_URLS) {
|
|
5555
|
+
return { configurable: true, enumerable: true, value: this.get(_target, prop, _target) };
|
|
5556
|
+
}
|
|
5557
|
+
return void 0;
|
|
5558
|
+
}
|
|
5559
|
+
}
|
|
5560
|
+
);
|
|
5561
|
+
function configureModelUrls(urls) {
|
|
5562
|
+
for (const [key, url] of Object.entries(urls)) {
|
|
5563
|
+
if (key in HF_MODEL_URLS && typeof url === "string") {
|
|
5564
|
+
_overrides[key] = url;
|
|
5565
|
+
}
|
|
5566
|
+
}
|
|
5567
|
+
}
|
|
5568
|
+
function resetModelUrls() {
|
|
5569
|
+
for (const key of Object.keys(_overrides)) {
|
|
5570
|
+
delete _overrides[key];
|
|
5571
|
+
}
|
|
5572
|
+
}
|
|
5573
|
+
var HF_CDN_URLS = HF_MODEL_URLS;
|
|
5574
|
+
|
|
5556
5575
|
// src/inference/UnifiedInferenceWorker.ts
|
|
5557
5576
|
var logger8 = createLogger("UnifiedInferenceWorker");
|
|
5558
5577
|
var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
@@ -6794,11 +6813,12 @@ var SileroVADUnifiedAdapter = class {
|
|
|
6794
6813
|
|
|
6795
6814
|
// src/inference/createSenseVoice.ts
|
|
6796
6815
|
var logger9 = createLogger("createSenseVoice");
|
|
6797
|
-
function createSenseVoice(config) {
|
|
6816
|
+
function createSenseVoice(config = {}) {
|
|
6817
|
+
const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.senseVoice;
|
|
6798
6818
|
if (config.unifiedWorker) {
|
|
6799
6819
|
logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
|
|
6800
6820
|
return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
|
|
6801
|
-
modelUrl
|
|
6821
|
+
modelUrl,
|
|
6802
6822
|
tokensUrl: config.tokensUrl,
|
|
6803
6823
|
language: config.language,
|
|
6804
6824
|
textNorm: config.textNorm
|
|
@@ -6811,7 +6831,7 @@ function createSenseVoice(config) {
|
|
|
6811
6831
|
}
|
|
6812
6832
|
logger9.info("Creating SenseVoiceWorker (off-main-thread)");
|
|
6813
6833
|
return new SenseVoiceWorker({
|
|
6814
|
-
modelUrl
|
|
6834
|
+
modelUrl,
|
|
6815
6835
|
tokensUrl: config.tokensUrl,
|
|
6816
6836
|
language: config.language,
|
|
6817
6837
|
textNorm: config.textNorm
|
|
@@ -6820,7 +6840,7 @@ function createSenseVoice(config) {
|
|
|
6820
6840
|
if (useWorker === false) {
|
|
6821
6841
|
logger9.info("Creating SenseVoiceInference (main thread)");
|
|
6822
6842
|
return new SenseVoiceInference({
|
|
6823
|
-
modelUrl
|
|
6843
|
+
modelUrl,
|
|
6824
6844
|
tokensUrl: config.tokensUrl,
|
|
6825
6845
|
language: config.language,
|
|
6826
6846
|
textNorm: config.textNorm
|
|
@@ -6829,7 +6849,7 @@ function createSenseVoice(config) {
|
|
|
6829
6849
|
if (SenseVoiceWorker.isSupported() && !isIOS()) {
|
|
6830
6850
|
logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
|
|
6831
6851
|
return new SenseVoiceWorker({
|
|
6832
|
-
modelUrl
|
|
6852
|
+
modelUrl,
|
|
6833
6853
|
tokensUrl: config.tokensUrl,
|
|
6834
6854
|
language: config.language,
|
|
6835
6855
|
textNorm: config.textNorm
|
|
@@ -6839,7 +6859,7 @@ function createSenseVoice(config) {
|
|
|
6839
6859
|
reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
|
|
6840
6860
|
});
|
|
6841
6861
|
return new SenseVoiceInference({
|
|
6842
|
-
modelUrl
|
|
6862
|
+
modelUrl,
|
|
6843
6863
|
tokensUrl: config.tokensUrl,
|
|
6844
6864
|
language: config.language,
|
|
6845
6865
|
textNorm: config.textNorm
|
|
@@ -7706,9 +7726,11 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7706
7726
|
|
|
7707
7727
|
// src/inference/createA2E.ts
|
|
7708
7728
|
var logger12 = createLogger("createA2E");
|
|
7709
|
-
function createA2E(config) {
|
|
7729
|
+
function createA2E(config = {}) {
|
|
7710
7730
|
const mode = config.mode ?? "auto";
|
|
7711
7731
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
7732
|
+
const gpuModelUrl = config.gpuModelUrl ?? DEFAULT_MODEL_URLS.lam;
|
|
7733
|
+
const cpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
|
|
7712
7734
|
let useCpu;
|
|
7713
7735
|
if (mode === "cpu") {
|
|
7714
7736
|
useCpu = true;
|
|
@@ -7717,33 +7739,35 @@ function createA2E(config) {
|
|
|
7717
7739
|
useCpu = false;
|
|
7718
7740
|
logger12.info("Forcing GPU A2E model (Wav2Vec2)");
|
|
7719
7741
|
} else {
|
|
7720
|
-
useCpu =
|
|
7721
|
-
logger12.info("Auto-detected A2E model", {
|
|
7722
|
-
|
|
7723
|
-
|
|
7742
|
+
useCpu = false;
|
|
7743
|
+
logger12.info("Auto-detected A2E model: trying GPU first (fp16 external data)", {
|
|
7744
|
+
isSafari: isSafari(),
|
|
7745
|
+
isIOS: isIOS(),
|
|
7746
|
+
fallbackOnError
|
|
7724
7747
|
});
|
|
7725
7748
|
}
|
|
7726
7749
|
if (useCpu) {
|
|
7727
7750
|
if (config.unifiedWorker) {
|
|
7728
7751
|
logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
|
|
7729
7752
|
return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
|
|
7730
|
-
modelUrl:
|
|
7753
|
+
modelUrl: cpuModelUrl
|
|
7731
7754
|
});
|
|
7732
7755
|
}
|
|
7733
7756
|
if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7734
7757
|
logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
|
|
7735
7758
|
return new Wav2ArkitCpuWorker({
|
|
7736
|
-
modelUrl:
|
|
7759
|
+
modelUrl: cpuModelUrl
|
|
7737
7760
|
});
|
|
7738
7761
|
}
|
|
7739
7762
|
logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
|
|
7740
7763
|
return new Wav2ArkitCpuInference({
|
|
7741
|
-
modelUrl:
|
|
7764
|
+
modelUrl: cpuModelUrl
|
|
7742
7765
|
});
|
|
7743
7766
|
}
|
|
7767
|
+
const gpuExternalDataUrl = config.gpuExternalDataUrl !== void 0 ? config.gpuExternalDataUrl : void 0;
|
|
7744
7768
|
const gpuInstance = new Wav2Vec2Inference({
|
|
7745
|
-
modelUrl:
|
|
7746
|
-
externalDataUrl:
|
|
7769
|
+
modelUrl: gpuModelUrl,
|
|
7770
|
+
externalDataUrl: gpuExternalDataUrl,
|
|
7747
7771
|
backend: config.gpuBackend ?? "auto",
|
|
7748
7772
|
numIdentityClasses: config.numIdentityClasses
|
|
7749
7773
|
});
|
|
@@ -7759,6 +7783,7 @@ var A2EWithFallback = class {
|
|
|
7759
7783
|
this.hasFallenBack = false;
|
|
7760
7784
|
this.implementation = gpuInstance;
|
|
7761
7785
|
this.config = config;
|
|
7786
|
+
this.resolvedCpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
|
|
7762
7787
|
}
|
|
7763
7788
|
get modelId() {
|
|
7764
7789
|
return this.implementation.modelId;
|
|
@@ -7780,6 +7805,7 @@ var A2EWithFallback = class {
|
|
|
7780
7805
|
}
|
|
7781
7806
|
}
|
|
7782
7807
|
async fallbackToCpu(reason) {
|
|
7808
|
+
console.error("[A2EWithFallback] GPU\u2192CPU FALLBACK TRIGGERED. Reason:", reason);
|
|
7783
7809
|
logger12.warn("GPU model load failed, falling back to CPU model", { reason });
|
|
7784
7810
|
try {
|
|
7785
7811
|
await this.implementation.dispose();
|
|
@@ -7787,17 +7813,17 @@ var A2EWithFallback = class {
|
|
|
7787
7813
|
}
|
|
7788
7814
|
if (this.config.unifiedWorker) {
|
|
7789
7815
|
this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
|
|
7790
|
-
modelUrl: this.
|
|
7816
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7791
7817
|
});
|
|
7792
7818
|
logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
|
|
7793
7819
|
} else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7794
7820
|
this.implementation = new Wav2ArkitCpuWorker({
|
|
7795
|
-
modelUrl: this.
|
|
7821
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7796
7822
|
});
|
|
7797
7823
|
logger12.info("Fallback to Wav2ArkitCpuWorker successful");
|
|
7798
7824
|
} else {
|
|
7799
7825
|
this.implementation = new Wav2ArkitCpuInference({
|
|
7800
|
-
modelUrl: this.
|
|
7826
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7801
7827
|
});
|
|
7802
7828
|
logger12.info("Fallback to Wav2ArkitCpuInference successful");
|
|
7803
7829
|
}
|
|
@@ -8987,10 +9013,12 @@ function supportsVADWorker() {
|
|
|
8987
9013
|
}
|
|
8988
9014
|
return true;
|
|
8989
9015
|
}
|
|
8990
|
-
function createSileroVAD(config) {
|
|
9016
|
+
function createSileroVAD(config = {}) {
|
|
9017
|
+
const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.sileroVad;
|
|
9018
|
+
const resolvedConfig = { ...config, modelUrl };
|
|
8991
9019
|
if (config.unifiedWorker) {
|
|
8992
9020
|
logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
|
|
8993
|
-
return new SileroVADUnifiedAdapter(config.unifiedWorker,
|
|
9021
|
+
return new SileroVADUnifiedAdapter(config.unifiedWorker, resolvedConfig);
|
|
8994
9022
|
}
|
|
8995
9023
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
8996
9024
|
let useWorker;
|
|
@@ -9010,24 +9038,24 @@ function createSileroVAD(config) {
|
|
|
9010
9038
|
if (useWorker) {
|
|
9011
9039
|
logger15.info("Creating SileroVADWorker (off-main-thread)");
|
|
9012
9040
|
const worker = new SileroVADWorker({
|
|
9013
|
-
modelUrl
|
|
9041
|
+
modelUrl,
|
|
9014
9042
|
sampleRate: config.sampleRate,
|
|
9015
9043
|
threshold: config.threshold,
|
|
9016
9044
|
preSpeechBufferChunks: config.preSpeechBufferChunks
|
|
9017
9045
|
});
|
|
9018
9046
|
if (fallbackOnError) {
|
|
9019
|
-
return new VADWorkerWithFallback(worker,
|
|
9047
|
+
return new VADWorkerWithFallback(worker, resolvedConfig);
|
|
9020
9048
|
}
|
|
9021
9049
|
return worker;
|
|
9022
9050
|
}
|
|
9023
9051
|
logger15.info("Creating SileroVADInference (main thread)");
|
|
9024
|
-
return new SileroVADInference(
|
|
9052
|
+
return new SileroVADInference(resolvedConfig);
|
|
9025
9053
|
}
|
|
9026
9054
|
var VADWorkerWithFallback = class {
|
|
9027
|
-
constructor(worker,
|
|
9055
|
+
constructor(worker, resolvedConfig) {
|
|
9028
9056
|
this.hasFallenBack = false;
|
|
9029
9057
|
this.implementation = worker;
|
|
9030
|
-
this.
|
|
9058
|
+
this.resolvedConfig = resolvedConfig;
|
|
9031
9059
|
}
|
|
9032
9060
|
get backend() {
|
|
9033
9061
|
if (!this.isLoaded) return null;
|
|
@@ -9053,7 +9081,7 @@ var VADWorkerWithFallback = class {
|
|
|
9053
9081
|
await this.implementation.dispose();
|
|
9054
9082
|
} catch {
|
|
9055
9083
|
}
|
|
9056
|
-
this.implementation = new SileroVADInference(this.
|
|
9084
|
+
this.implementation = new SileroVADInference(this.resolvedConfig);
|
|
9057
9085
|
this.hasFallenBack = true;
|
|
9058
9086
|
logger15.info("Fallback to SileroVADInference successful");
|
|
9059
9087
|
return await this.implementation.load();
|
|
@@ -10101,17 +10129,29 @@ var AnimationGraph = class extends EventEmitter {
|
|
|
10101
10129
|
// src/animation/ProceduralLifeLayer.ts
|
|
10102
10130
|
var import_simplex_noise = require("simplex-noise");
|
|
10103
10131
|
var simplex2d = (0, import_simplex_noise.createNoise2D)();
|
|
10132
|
+
var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
|
|
10133
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
10134
|
+
LIFE_BS_INDEX.set(LAM_BLENDSHAPES[i], i);
|
|
10135
|
+
}
|
|
10104
10136
|
var PHASE_OPEN = 0;
|
|
10105
10137
|
var PHASE_CLOSING = 1;
|
|
10106
10138
|
var PHASE_CLOSED = 2;
|
|
10107
10139
|
var PHASE_OPENING = 3;
|
|
10108
|
-
var BLINK_CLOSE_DURATION = 0.
|
|
10140
|
+
var BLINK_CLOSE_DURATION = 0.092;
|
|
10109
10141
|
var BLINK_HOLD_DURATION = 0.04;
|
|
10110
|
-
var BLINK_OPEN_DURATION = 0.
|
|
10142
|
+
var BLINK_OPEN_DURATION = 0.242;
|
|
10111
10143
|
var BLINK_ASYMMETRY_DELAY = 8e-3;
|
|
10144
|
+
var BLINK_IBI_MU = Math.log(5.97);
|
|
10145
|
+
var BLINK_IBI_SIGMA = 0.89;
|
|
10112
10146
|
var GAZE_BREAK_DURATION = 0.12;
|
|
10113
10147
|
var GAZE_BREAK_HOLD_DURATION = 0.3;
|
|
10114
10148
|
var GAZE_BREAK_RETURN_DURATION = 0.15;
|
|
10149
|
+
var GAZE_STATE_PARAMS = {
|
|
10150
|
+
idle: { interval: [2, 5], amplitude: [0.15, 0.4] },
|
|
10151
|
+
listening: { interval: [4, 10], amplitude: [0.1, 0.25] },
|
|
10152
|
+
thinking: { interval: [1, 3], amplitude: [0.2, 0.5] },
|
|
10153
|
+
speaking: { interval: [2, 6], amplitude: [0.15, 0.35] }
|
|
10154
|
+
};
|
|
10115
10155
|
var EYE_NOISE_X_FREQ = 0.8;
|
|
10116
10156
|
var EYE_NOISE_Y_FREQ = 0.6;
|
|
10117
10157
|
var EYE_NOISE_X_PHASE = 73.1;
|
|
@@ -10139,6 +10179,12 @@ function smoothStep(t) {
|
|
|
10139
10179
|
function softClamp(v, max) {
|
|
10140
10180
|
return Math.tanh(v / max) * max;
|
|
10141
10181
|
}
|
|
10182
|
+
function sampleLogNormal(mu, sigma) {
|
|
10183
|
+
const u1 = Math.random();
|
|
10184
|
+
const u2 = Math.random();
|
|
10185
|
+
const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2);
|
|
10186
|
+
return Math.exp(mu + sigma * z);
|
|
10187
|
+
}
|
|
10142
10188
|
var ProceduralLifeLayer = class {
|
|
10143
10189
|
constructor(config) {
|
|
10144
10190
|
// Blink state
|
|
@@ -10151,7 +10197,7 @@ var ProceduralLifeLayer = class {
|
|
|
10151
10197
|
// Eye contact (smoothed)
|
|
10152
10198
|
this.smoothedEyeX = 0;
|
|
10153
10199
|
this.smoothedEyeY = 0;
|
|
10154
|
-
// Eye micro-motion
|
|
10200
|
+
// Eye micro-motion
|
|
10155
10201
|
this.eyeNoiseTime = 0;
|
|
10156
10202
|
// Gaze break state
|
|
10157
10203
|
this.gazeBreakTimer = 0;
|
|
@@ -10161,6 +10207,8 @@ var ProceduralLifeLayer = class {
|
|
|
10161
10207
|
this.gazeBreakTargetY = 0;
|
|
10162
10208
|
this.gazeBreakCurrentX = 0;
|
|
10163
10209
|
this.gazeBreakCurrentY = 0;
|
|
10210
|
+
// Conversational state for gaze
|
|
10211
|
+
this.currentState = null;
|
|
10164
10212
|
// Breathing / postural sway
|
|
10165
10213
|
this.microMotionTime = 0;
|
|
10166
10214
|
this.breathingPhase = 0;
|
|
@@ -10169,6 +10217,7 @@ var ProceduralLifeLayer = class {
|
|
|
10169
10217
|
this.previousEnergy = 0;
|
|
10170
10218
|
this.emphasisLevel = 0;
|
|
10171
10219
|
this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
|
|
10220
|
+
this.useLogNormalBlinks = !config?.blinkIntervalRange;
|
|
10172
10221
|
this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
|
|
10173
10222
|
this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
|
|
10174
10223
|
this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
|
|
@@ -10178,7 +10227,7 @@ var ProceduralLifeLayer = class {
|
|
|
10178
10227
|
this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
|
|
10179
10228
|
this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
|
|
10180
10229
|
this.eyeSmoothing = config?.eyeSmoothing ?? 15;
|
|
10181
|
-
this.blinkInterval =
|
|
10230
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
10182
10231
|
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
10183
10232
|
}
|
|
10184
10233
|
/**
|
|
@@ -10193,6 +10242,7 @@ var ProceduralLifeLayer = class {
|
|
|
10193
10242
|
const eyeTargetY = input?.eyeTargetY ?? 0;
|
|
10194
10243
|
const audioEnergy = input?.audioEnergy ?? 0;
|
|
10195
10244
|
const isSpeaking = input?.isSpeaking ?? false;
|
|
10245
|
+
this.currentState = input?.state ?? null;
|
|
10196
10246
|
const safeDelta = Math.min(delta, 0.1);
|
|
10197
10247
|
const blendshapes = {};
|
|
10198
10248
|
this.updateBlinks(delta);
|
|
@@ -10231,6 +10281,12 @@ var ProceduralLifeLayer = class {
|
|
|
10231
10281
|
const swayAmp = this.posturalSwayAmplitude;
|
|
10232
10282
|
const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
|
|
10233
10283
|
const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
|
|
10284
|
+
const breathVal = Math.sin(this.breathingPhase);
|
|
10285
|
+
if (breathVal > 0) {
|
|
10286
|
+
blendshapes["jawOpen"] = breathVal * 0.015;
|
|
10287
|
+
blendshapes["noseSneerLeft"] = breathVal * 8e-3;
|
|
10288
|
+
blendshapes["noseSneerRight"] = breathVal * 8e-3;
|
|
10289
|
+
}
|
|
10234
10290
|
return {
|
|
10235
10291
|
blendshapes,
|
|
10236
10292
|
headDelta: {
|
|
@@ -10239,12 +10295,35 @@ var ProceduralLifeLayer = class {
|
|
|
10239
10295
|
}
|
|
10240
10296
|
};
|
|
10241
10297
|
}
|
|
10298
|
+
/**
|
|
10299
|
+
* Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
|
|
10300
|
+
*
|
|
10301
|
+
* Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
|
|
10302
|
+
* break uncanny stillness on undriven channels.
|
|
10303
|
+
*
|
|
10304
|
+
* @param delta - Time since last frame in seconds
|
|
10305
|
+
* @param input - Per-frame input
|
|
10306
|
+
* @param out - Pre-allocated Float32Array(52) to write into
|
|
10307
|
+
*/
|
|
10308
|
+
updateToArray(delta, input, out) {
|
|
10309
|
+
out.fill(0);
|
|
10310
|
+
const result = this.update(delta, input);
|
|
10311
|
+
for (const [name, value] of Object.entries(result.blendshapes)) {
|
|
10312
|
+
const idx = LIFE_BS_INDEX.get(name);
|
|
10313
|
+
if (idx !== void 0) {
|
|
10314
|
+
out[idx] = value;
|
|
10315
|
+
}
|
|
10316
|
+
}
|
|
10317
|
+
for (let i = 0; i < 52; i++) {
|
|
10318
|
+
out[i] += simplex2d(this.noiseTime * 0.3, i * 7.13) * 4e-3;
|
|
10319
|
+
}
|
|
10320
|
+
}
|
|
10242
10321
|
/**
|
|
10243
10322
|
* Reset all internal state to initial values.
|
|
10244
10323
|
*/
|
|
10245
10324
|
reset() {
|
|
10246
10325
|
this.blinkTimer = 0;
|
|
10247
|
-
this.blinkInterval =
|
|
10326
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
10248
10327
|
this.blinkPhase = PHASE_OPEN;
|
|
10249
10328
|
this.blinkProgress = 0;
|
|
10250
10329
|
this.asymmetryRight = 0.97;
|
|
@@ -10261,6 +10340,7 @@ var ProceduralLifeLayer = class {
|
|
|
10261
10340
|
this.gazeBreakTargetY = 0;
|
|
10262
10341
|
this.gazeBreakCurrentX = 0;
|
|
10263
10342
|
this.gazeBreakCurrentY = 0;
|
|
10343
|
+
this.currentState = null;
|
|
10264
10344
|
this.microMotionTime = 0;
|
|
10265
10345
|
this.breathingPhase = 0;
|
|
10266
10346
|
this.noiseTime = 0;
|
|
@@ -10268,6 +10348,21 @@ var ProceduralLifeLayer = class {
|
|
|
10268
10348
|
this.emphasisLevel = 0;
|
|
10269
10349
|
}
|
|
10270
10350
|
// =====================================================================
|
|
10351
|
+
// PRIVATE: Blink interval sampling
|
|
10352
|
+
// =====================================================================
|
|
10353
|
+
/**
|
|
10354
|
+
* Sample next blink interval.
|
|
10355
|
+
* Uses log-normal distribution (PMC3565584) when using default config,
|
|
10356
|
+
* or uniform random when custom blinkIntervalRange is provided.
|
|
10357
|
+
*/
|
|
10358
|
+
nextBlinkInterval() {
|
|
10359
|
+
if (this.useLogNormalBlinks) {
|
|
10360
|
+
const sample = sampleLogNormal(BLINK_IBI_MU, BLINK_IBI_SIGMA);
|
|
10361
|
+
return clamp(sample, 1.5, 12);
|
|
10362
|
+
}
|
|
10363
|
+
return randomRange(...this.blinkIntervalRange);
|
|
10364
|
+
}
|
|
10365
|
+
// =====================================================================
|
|
10271
10366
|
// PRIVATE: Blink system
|
|
10272
10367
|
// =====================================================================
|
|
10273
10368
|
updateBlinks(delta) {
|
|
@@ -10276,7 +10371,7 @@ var ProceduralLifeLayer = class {
|
|
|
10276
10371
|
this.blinkPhase = PHASE_CLOSING;
|
|
10277
10372
|
this.blinkProgress = 0;
|
|
10278
10373
|
this.blinkTimer = 0;
|
|
10279
|
-
this.blinkInterval =
|
|
10374
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
10280
10375
|
this.asymmetryRight = 0.95 + Math.random() * 0.08;
|
|
10281
10376
|
}
|
|
10282
10377
|
if (this.blinkPhase > PHASE_OPEN) {
|
|
@@ -10332,18 +10427,32 @@ var ProceduralLifeLayer = class {
|
|
|
10332
10427
|
return { x, y };
|
|
10333
10428
|
}
|
|
10334
10429
|
// =====================================================================
|
|
10335
|
-
// PRIVATE: Gaze breaks
|
|
10430
|
+
// PRIVATE: Gaze breaks (state-dependent)
|
|
10336
10431
|
// =====================================================================
|
|
10432
|
+
/**
|
|
10433
|
+
* Get active gaze parameters — uses state-dependent params when
|
|
10434
|
+
* conversational state is provided, otherwise falls back to config ranges.
|
|
10435
|
+
*/
|
|
10436
|
+
getActiveGazeParams() {
|
|
10437
|
+
if (this.currentState && GAZE_STATE_PARAMS[this.currentState]) {
|
|
10438
|
+
return GAZE_STATE_PARAMS[this.currentState];
|
|
10439
|
+
}
|
|
10440
|
+
return {
|
|
10441
|
+
interval: this.gazeBreakIntervalRange,
|
|
10442
|
+
amplitude: this.gazeBreakAmplitudeRange
|
|
10443
|
+
};
|
|
10444
|
+
}
|
|
10337
10445
|
updateGazeBreaks(delta) {
|
|
10338
10446
|
this.gazeBreakTimer += delta;
|
|
10339
10447
|
if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
|
|
10340
10448
|
this.gazeBreakPhase = PHASE_CLOSING;
|
|
10341
10449
|
this.gazeBreakProgress = 0;
|
|
10342
10450
|
this.gazeBreakTimer = 0;
|
|
10343
|
-
const
|
|
10451
|
+
const params = this.getActiveGazeParams();
|
|
10452
|
+
const amp = randomRange(...params.amplitude);
|
|
10344
10453
|
this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
|
|
10345
10454
|
this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
|
|
10346
|
-
this.gazeBreakInterval = randomRange(...
|
|
10455
|
+
this.gazeBreakInterval = randomRange(...params.interval);
|
|
10347
10456
|
}
|
|
10348
10457
|
if (this.gazeBreakPhase > PHASE_OPEN) {
|
|
10349
10458
|
this.gazeBreakProgress += delta;
|
|
@@ -10408,6 +10517,300 @@ var ProceduralLifeLayer = class {
|
|
|
10408
10517
|
}
|
|
10409
10518
|
};
|
|
10410
10519
|
|
|
10520
|
+
// src/face/FACSMapping.ts
|
|
10521
|
+
var EMOTION_TO_AU = {
|
|
10522
|
+
joy: [
|
|
10523
|
+
{ au: "AU6", intensity: 0.7, region: "upper" },
|
|
10524
|
+
// cheek raise (Duchenne)
|
|
10525
|
+
{ au: "AU12", intensity: 0.8, region: "lower" }
|
|
10526
|
+
// lip corner pull (smile)
|
|
10527
|
+
],
|
|
10528
|
+
anger: [
|
|
10529
|
+
{ au: "AU4", intensity: 0.8, region: "upper" },
|
|
10530
|
+
// brow lower
|
|
10531
|
+
{ au: "AU5", intensity: 0.4, region: "upper" },
|
|
10532
|
+
// upper lid raise
|
|
10533
|
+
{ au: "AU7", intensity: 0.3, region: "upper" },
|
|
10534
|
+
// lid tighten
|
|
10535
|
+
{ au: "AU23", intensity: 0.6, region: "lower" }
|
|
10536
|
+
// lip tighten
|
|
10537
|
+
],
|
|
10538
|
+
sadness: [
|
|
10539
|
+
{ au: "AU1", intensity: 0.7, region: "upper" },
|
|
10540
|
+
// inner brow raise
|
|
10541
|
+
{ au: "AU4", intensity: 0.3, region: "upper" },
|
|
10542
|
+
// brow lower (furrow)
|
|
10543
|
+
{ au: "AU15", intensity: 0.5, region: "lower" }
|
|
10544
|
+
// lip corner depress
|
|
10545
|
+
],
|
|
10546
|
+
fear: [
|
|
10547
|
+
{ au: "AU1", intensity: 0.6, region: "upper" },
|
|
10548
|
+
// inner brow raise
|
|
10549
|
+
{ au: "AU2", intensity: 0.5, region: "upper" },
|
|
10550
|
+
// outer brow raise
|
|
10551
|
+
{ au: "AU4", intensity: 0.3, region: "upper" },
|
|
10552
|
+
// brow lower
|
|
10553
|
+
{ au: "AU5", intensity: 0.5, region: "upper" },
|
|
10554
|
+
// upper lid raise
|
|
10555
|
+
{ au: "AU20", intensity: 0.4, region: "lower" }
|
|
10556
|
+
// lip stretch
|
|
10557
|
+
],
|
|
10558
|
+
disgust: [
|
|
10559
|
+
{ au: "AU9", intensity: 0.7, region: "upper" },
|
|
10560
|
+
// nose wrinkle
|
|
10561
|
+
{ au: "AU10", intensity: 0.5, region: "lower" },
|
|
10562
|
+
// upper lip raise
|
|
10563
|
+
{ au: "AU15", intensity: 0.4, region: "lower" }
|
|
10564
|
+
// lip corner depress
|
|
10565
|
+
],
|
|
10566
|
+
amazement: [
|
|
10567
|
+
{ au: "AU1", intensity: 0.6, region: "upper" },
|
|
10568
|
+
// inner brow raise
|
|
10569
|
+
{ au: "AU2", intensity: 0.7, region: "upper" },
|
|
10570
|
+
// outer brow raise
|
|
10571
|
+
{ au: "AU5", intensity: 0.6, region: "upper" },
|
|
10572
|
+
// upper lid raise
|
|
10573
|
+
{ au: "AU26", intensity: 0.4, region: "lower" }
|
|
10574
|
+
// jaw drop
|
|
10575
|
+
],
|
|
10576
|
+
grief: [
|
|
10577
|
+
{ au: "AU1", intensity: 0.8, region: "upper" },
|
|
10578
|
+
// inner brow raise
|
|
10579
|
+
{ au: "AU4", intensity: 0.5, region: "upper" },
|
|
10580
|
+
// brow lower
|
|
10581
|
+
{ au: "AU6", intensity: 0.3, region: "upper" },
|
|
10582
|
+
// cheek raise (grief cry)
|
|
10583
|
+
{ au: "AU15", intensity: 0.6, region: "lower" }
|
|
10584
|
+
// lip corner depress
|
|
10585
|
+
],
|
|
10586
|
+
cheekiness: [
|
|
10587
|
+
{ au: "AU2", intensity: 0.4, region: "upper" },
|
|
10588
|
+
// outer brow raise
|
|
10589
|
+
{ au: "AU6", intensity: 0.4, region: "upper" },
|
|
10590
|
+
// cheek raise
|
|
10591
|
+
{ au: "AU12", intensity: 0.6, region: "lower" }
|
|
10592
|
+
// lip corner pull (smirk)
|
|
10593
|
+
],
|
|
10594
|
+
pain: [
|
|
10595
|
+
{ au: "AU4", intensity: 0.7, region: "upper" },
|
|
10596
|
+
// brow lower
|
|
10597
|
+
{ au: "AU6", intensity: 0.4, region: "upper" },
|
|
10598
|
+
// cheek raise (orbicularis)
|
|
10599
|
+
{ au: "AU7", intensity: 0.7, region: "upper" },
|
|
10600
|
+
// lid tighten (squint)
|
|
10601
|
+
{ au: "AU9", intensity: 0.5, region: "upper" }
|
|
10602
|
+
// nose wrinkle
|
|
10603
|
+
],
|
|
10604
|
+
outofbreath: [
|
|
10605
|
+
{ au: "AU1", intensity: 0.3, region: "upper" },
|
|
10606
|
+
// inner brow raise
|
|
10607
|
+
{ au: "AU25", intensity: 0.3, region: "lower" },
|
|
10608
|
+
// lips part
|
|
10609
|
+
{ au: "AU26", intensity: 0.5, region: "lower" }
|
|
10610
|
+
// jaw drop
|
|
10611
|
+
]
|
|
10612
|
+
};
|
|
10613
|
+
var AU_TO_ARKIT = {
|
|
10614
|
+
"AU1": [{ blendshape: "browInnerUp", weight: 1 }],
|
|
10615
|
+
"AU2": [{ blendshape: "browOuterUpLeft", weight: 1 }, { blendshape: "browOuterUpRight", weight: 1 }],
|
|
10616
|
+
"AU4": [{ blendshape: "browDownLeft", weight: 1 }, { blendshape: "browDownRight", weight: 1 }],
|
|
10617
|
+
"AU5": [{ blendshape: "eyeWideLeft", weight: 1 }, { blendshape: "eyeWideRight", weight: 1 }],
|
|
10618
|
+
"AU6": [{ blendshape: "cheekSquintLeft", weight: 1 }, { blendshape: "cheekSquintRight", weight: 1 }],
|
|
10619
|
+
"AU7": [{ blendshape: "eyeSquintLeft", weight: 1 }, { blendshape: "eyeSquintRight", weight: 1 }],
|
|
10620
|
+
"AU9": [{ blendshape: "noseSneerLeft", weight: 1 }, { blendshape: "noseSneerRight", weight: 1 }],
|
|
10621
|
+
"AU10": [{ blendshape: "mouthUpperUpLeft", weight: 1 }, { blendshape: "mouthUpperUpRight", weight: 1 }],
|
|
10622
|
+
"AU12": [{ blendshape: "mouthSmileLeft", weight: 1 }, { blendshape: "mouthSmileRight", weight: 1 }],
|
|
10623
|
+
"AU15": [{ blendshape: "mouthFrownLeft", weight: 1 }, { blendshape: "mouthFrownRight", weight: 1 }],
|
|
10624
|
+
"AU20": [{ blendshape: "mouthStretchLeft", weight: 1 }, { blendshape: "mouthStretchRight", weight: 1 }],
|
|
10625
|
+
"AU23": [{ blendshape: "mouthPressLeft", weight: 1 }, { blendshape: "mouthPressRight", weight: 1 }],
|
|
10626
|
+
"AU25": [{ blendshape: "jawOpen", weight: 0.3 }],
|
|
10627
|
+
"AU26": [{ blendshape: "jawOpen", weight: 1 }]
|
|
10628
|
+
};
|
|
10629
|
+
var ALL_AUS = [...new Set(
|
|
10630
|
+
Object.values(EMOTION_TO_AU).flatMap((activations) => activations.map((a) => a.au))
|
|
10631
|
+
)];
|
|
10632
|
+
|
|
10633
|
+
// src/face/EmotionResolver.ts
|
|
10634
|
+
var BS_INDEX = /* @__PURE__ */ new Map();
|
|
10635
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
10636
|
+
BS_INDEX.set(LAM_BLENDSHAPES[i], i);
|
|
10637
|
+
}
|
|
10638
|
+
var EmotionResolver = class {
|
|
10639
|
+
constructor() {
|
|
10640
|
+
this.upperBuffer = new Float32Array(52);
|
|
10641
|
+
this.lowerBuffer = new Float32Array(52);
|
|
10642
|
+
}
|
|
10643
|
+
/**
|
|
10644
|
+
* Resolve emotion weights to upper/lower face blendshape contributions.
|
|
10645
|
+
*
|
|
10646
|
+
* @param weights - Emotion channel weights from EmotionController
|
|
10647
|
+
* @param intensity - Global intensity multiplier (0-2). Default: 1.0
|
|
10648
|
+
* @returns Upper and lower face blendshape arrays (52 channels each)
|
|
10649
|
+
*/
|
|
10650
|
+
resolve(weights, intensity = 1) {
|
|
10651
|
+
const upper = this.upperBuffer;
|
|
10652
|
+
const lower = this.lowerBuffer;
|
|
10653
|
+
upper.fill(0);
|
|
10654
|
+
lower.fill(0);
|
|
10655
|
+
for (const emotionName of EMOTION_NAMES) {
|
|
10656
|
+
const emotionWeight = weights[emotionName];
|
|
10657
|
+
if (!emotionWeight || emotionWeight < 0.01) continue;
|
|
10658
|
+
const auActivations = EMOTION_TO_AU[emotionName];
|
|
10659
|
+
if (!auActivations) continue;
|
|
10660
|
+
for (const activation of auActivations) {
|
|
10661
|
+
const arkitMappings = AU_TO_ARKIT[activation.au];
|
|
10662
|
+
if (!arkitMappings) continue;
|
|
10663
|
+
const target = activation.region === "upper" ? upper : lower;
|
|
10664
|
+
const scale = emotionWeight * activation.intensity * intensity;
|
|
10665
|
+
for (const mapping of arkitMappings) {
|
|
10666
|
+
const idx = BS_INDEX.get(mapping.blendshape);
|
|
10667
|
+
if (idx !== void 0) {
|
|
10668
|
+
target[idx] += mapping.weight * scale;
|
|
10669
|
+
}
|
|
10670
|
+
}
|
|
10671
|
+
}
|
|
10672
|
+
}
|
|
10673
|
+
for (let i = 0; i < 52; i++) {
|
|
10674
|
+
if (upper[i] > 1) upper[i] = 1;
|
|
10675
|
+
if (lower[i] > 1) lower[i] = 1;
|
|
10676
|
+
}
|
|
10677
|
+
return { upper, lower };
|
|
10678
|
+
}
|
|
10679
|
+
};
|
|
10680
|
+
|
|
10681
|
+
// src/face/FaceCompositor.ts
|
|
10682
|
+
function smoothstep(t) {
|
|
10683
|
+
return t * t * (3 - 2 * t);
|
|
10684
|
+
}
|
|
10685
|
+
var BS_INDEX2 = /* @__PURE__ */ new Map();
|
|
10686
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
10687
|
+
BS_INDEX2.set(LAM_BLENDSHAPES[i], i);
|
|
10688
|
+
}
|
|
10689
|
+
var IDX_MOUTH_CLOSE = BS_INDEX2.get("mouthClose");
|
|
10690
|
+
var IS_EYE_CHANNEL = new Array(52).fill(false);
|
|
10691
|
+
for (const name of LAM_BLENDSHAPES) {
|
|
10692
|
+
if (name.startsWith("eyeBlink") || name.startsWith("eyeLook")) {
|
|
10693
|
+
IS_EYE_CHANNEL[BS_INDEX2.get(name)] = true;
|
|
10694
|
+
}
|
|
10695
|
+
}
|
|
10696
|
+
var FaceCompositor = class {
|
|
10697
|
+
constructor(config) {
|
|
10698
|
+
this.emotionResolver = new EmotionResolver();
|
|
10699
|
+
// Pre-allocated buffers
|
|
10700
|
+
this.outputBuffer = new Float32Array(52);
|
|
10701
|
+
this.smoothedUpper = new Float32Array(52);
|
|
10702
|
+
this.smoothedLower = new Float32Array(52);
|
|
10703
|
+
this.lifeBuffer = new Float32Array(52);
|
|
10704
|
+
// Profile arrays (pre-expanded to 52 channels)
|
|
10705
|
+
this.multiplier = new Float32Array(52).fill(1);
|
|
10706
|
+
this.offset = new Float32Array(52);
|
|
10707
|
+
this.lifeLayer = config?.lifeLayer ?? new ProceduralLifeLayer();
|
|
10708
|
+
this.emotionSmoothing = config?.emotionSmoothing ?? 0.12;
|
|
10709
|
+
if (config?.profile) {
|
|
10710
|
+
this.applyProfileArrays(config.profile);
|
|
10711
|
+
}
|
|
10712
|
+
}
|
|
10713
|
+
/**
|
|
10714
|
+
* Compose a single output frame from the 5-stage signal chain.
|
|
10715
|
+
*
|
|
10716
|
+
* @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
|
|
10717
|
+
* @param input - Per-frame input (deltaTime, emotion, life layer params)
|
|
10718
|
+
* @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
|
|
10719
|
+
* When omitted, an internal buffer is used (valid until next compose() call).
|
|
10720
|
+
* @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
|
|
10721
|
+
*/
|
|
10722
|
+
compose(base, input, target) {
|
|
10723
|
+
const out = target ?? this.outputBuffer;
|
|
10724
|
+
out.set(base);
|
|
10725
|
+
const emotion = input.emotion ?? this.stickyEmotion;
|
|
10726
|
+
if (emotion) {
|
|
10727
|
+
const resolved = this.emotionResolver.resolve(
|
|
10728
|
+
emotion,
|
|
10729
|
+
input.emotionIntensity ?? 1
|
|
10730
|
+
);
|
|
10731
|
+
const k = this.emotionSmoothing;
|
|
10732
|
+
for (let i = 0; i < 52; i++) {
|
|
10733
|
+
this.smoothedUpper[i] += (resolved.upper[i] - this.smoothedUpper[i]) * k;
|
|
10734
|
+
this.smoothedLower[i] += (resolved.lower[i] - this.smoothedLower[i]) * k;
|
|
10735
|
+
}
|
|
10736
|
+
const mc = base[IDX_MOUTH_CLOSE];
|
|
10737
|
+
const bilabialSuppress = mc <= 0.3 ? 1 : mc >= 0.7 ? 0.1 : 1 - 0.9 * smoothstep((mc - 0.3) * 2.5);
|
|
10738
|
+
for (let i = 0; i < 52; i++) {
|
|
10739
|
+
out[i] += this.smoothedUpper[i];
|
|
10740
|
+
}
|
|
10741
|
+
for (let i = 0; i < 52; i++) {
|
|
10742
|
+
out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
|
|
10743
|
+
}
|
|
10744
|
+
}
|
|
10745
|
+
const lifeResult = this.lifeLayer.update(input.deltaTime, input);
|
|
10746
|
+
this.lifeBuffer.fill(0);
|
|
10747
|
+
for (const [name, value] of Object.entries(lifeResult.blendshapes)) {
|
|
10748
|
+
const idx = BS_INDEX2.get(name);
|
|
10749
|
+
if (idx !== void 0) {
|
|
10750
|
+
this.lifeBuffer[idx] = value;
|
|
10751
|
+
}
|
|
10752
|
+
}
|
|
10753
|
+
for (let i = 0; i < 52; i++) {
|
|
10754
|
+
if (IS_EYE_CHANNEL[i]) {
|
|
10755
|
+
out[i] = this.lifeBuffer[i];
|
|
10756
|
+
} else {
|
|
10757
|
+
out[i] += this.lifeBuffer[i];
|
|
10758
|
+
}
|
|
10759
|
+
}
|
|
10760
|
+
for (let i = 0; i < 52; i++) {
|
|
10761
|
+
out[i] = out[i] * this.multiplier[i] + this.offset[i];
|
|
10762
|
+
}
|
|
10763
|
+
for (let i = 0; i < 52; i++) {
|
|
10764
|
+
if (out[i] < 0) out[i] = 0;
|
|
10765
|
+
else if (out[i] > 1) out[i] = 1;
|
|
10766
|
+
}
|
|
10767
|
+
return { blendshapes: out, headDelta: lifeResult.headDelta };
|
|
10768
|
+
}
|
|
10769
|
+
/**
|
|
10770
|
+
* Set sticky emotion (used when input.emotion is not provided).
|
|
10771
|
+
*/
|
|
10772
|
+
setEmotion(weights) {
|
|
10773
|
+
this.stickyEmotion = weights;
|
|
10774
|
+
}
|
|
10775
|
+
/**
|
|
10776
|
+
* Update character profile at runtime.
|
|
10777
|
+
*/
|
|
10778
|
+
setProfile(profile) {
|
|
10779
|
+
this.multiplier.fill(1);
|
|
10780
|
+
this.offset.fill(0);
|
|
10781
|
+
this.applyProfileArrays(profile);
|
|
10782
|
+
}
|
|
10783
|
+
/**
|
|
10784
|
+
* Reset all smoothing state and life layer.
|
|
10785
|
+
*/
|
|
10786
|
+
reset() {
|
|
10787
|
+
this.smoothedUpper.fill(0);
|
|
10788
|
+
this.smoothedLower.fill(0);
|
|
10789
|
+
this.lifeBuffer.fill(0);
|
|
10790
|
+
this.stickyEmotion = void 0;
|
|
10791
|
+
this.lifeLayer.reset();
|
|
10792
|
+
}
|
|
10793
|
+
/** Expand partial profile maps into dense Float32Arrays */
|
|
10794
|
+
applyProfileArrays(profile) {
|
|
10795
|
+
if (profile.multiplier) {
|
|
10796
|
+
for (const [name, value] of Object.entries(profile.multiplier)) {
|
|
10797
|
+
const idx = BS_INDEX2.get(name);
|
|
10798
|
+
if (idx !== void 0 && value !== void 0) {
|
|
10799
|
+
this.multiplier[idx] = value;
|
|
10800
|
+
}
|
|
10801
|
+
}
|
|
10802
|
+
}
|
|
10803
|
+
if (profile.offset) {
|
|
10804
|
+
for (const [name, value] of Object.entries(profile.offset)) {
|
|
10805
|
+
const idx = BS_INDEX2.get(name);
|
|
10806
|
+
if (idx !== void 0 && value !== void 0) {
|
|
10807
|
+
this.offset[idx] = value;
|
|
10808
|
+
}
|
|
10809
|
+
}
|
|
10810
|
+
}
|
|
10811
|
+
}
|
|
10812
|
+
};
|
|
10813
|
+
|
|
10411
10814
|
// src/orchestration/MicLipSync.ts
|
|
10412
10815
|
var logger18 = createLogger("MicLipSync");
|
|
10413
10816
|
var MicLipSync = class extends EventEmitter {
|
|
@@ -10648,6 +11051,7 @@ var VoicePipeline = class extends EventEmitter {
|
|
|
10648
11051
|
new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
|
|
10649
11052
|
]);
|
|
10650
11053
|
if (lamLoadResult === "timeout") {
|
|
11054
|
+
console.error(`[VoicePipeline] LAM TIMEOUT after ${timeoutMs}ms \u2014 forcing CPU fallback`);
|
|
10651
11055
|
logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
|
|
10652
11056
|
await lam.dispose();
|
|
10653
11057
|
lam = createA2E({
|