@omote/core 0.6.2 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -13
- package/dist/index.d.mts +364 -63
- package/dist/index.d.ts +364 -63
- package/dist/index.js +486 -82
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +486 -82
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2194,7 +2194,7 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
|
|
|
2194
2194
|
const ort = await getOnnxRuntime(backend);
|
|
2195
2195
|
return { ort, backend };
|
|
2196
2196
|
}
|
|
2197
|
-
function getSessionOptions(backend) {
|
|
2197
|
+
function getSessionOptions(backend, config) {
|
|
2198
2198
|
if (backend === "webgpu") {
|
|
2199
2199
|
return {
|
|
2200
2200
|
executionProviders: [
|
|
@@ -2210,7 +2210,7 @@ function getSessionOptions(backend) {
|
|
|
2210
2210
|
if (isIOS()) {
|
|
2211
2211
|
return {
|
|
2212
2212
|
executionProviders: ["wasm"],
|
|
2213
|
-
graphOptimizationLevel: "basic",
|
|
2213
|
+
graphOptimizationLevel: config?.iosDisableOptimization ? "disabled" : "basic",
|
|
2214
2214
|
enableCpuMemArena: false,
|
|
2215
2215
|
enableMemPattern: false
|
|
2216
2216
|
};
|
|
@@ -2477,7 +2477,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2477
2477
|
logger3.info("ONNX Runtime loaded", { backend: this._backend });
|
|
2478
2478
|
const modelUrl = this.config.modelUrl;
|
|
2479
2479
|
const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
|
|
2480
|
-
const sessionOptions = getSessionOptions(this._backend);
|
|
2480
|
+
const sessionOptions = getSessionOptions(this._backend, { iosDisableOptimization: true });
|
|
2481
2481
|
let isCached = false;
|
|
2482
2482
|
if (isIOS()) {
|
|
2483
2483
|
logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
|
|
@@ -2552,7 +2552,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2552
2552
|
} else {
|
|
2553
2553
|
logger3.info("Fetching external model data", {
|
|
2554
2554
|
dataUrl,
|
|
2555
|
-
note: "This may be a large download
|
|
2555
|
+
note: "This may be a large download"
|
|
2556
2556
|
});
|
|
2557
2557
|
externalDataBuffer = await fetchWithCache(dataUrl);
|
|
2558
2558
|
}
|
|
@@ -2560,6 +2560,9 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2560
2560
|
size: formatBytes(externalDataBuffer.byteLength)
|
|
2561
2561
|
});
|
|
2562
2562
|
} catch (err) {
|
|
2563
|
+
if (typeof this.config.externalDataUrl === "string") {
|
|
2564
|
+
throw new Error(`Failed to fetch external data: ${dataUrl} \u2014 ${err.message}`);
|
|
2565
|
+
}
|
|
2563
2566
|
logger3.debug("No external data file found (single-file model)", {
|
|
2564
2567
|
dataUrl,
|
|
2565
2568
|
error: err.message
|
|
@@ -2683,28 +2686,6 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2683
2686
|
};
|
|
2684
2687
|
return this.queueInference(feeds);
|
|
2685
2688
|
}
|
|
2686
|
-
/**
|
|
2687
|
-
* Decode CTC logits to text using greedy decoding
|
|
2688
|
-
*/
|
|
2689
|
-
decodeCTC(logits) {
|
|
2690
|
-
const tokens = [];
|
|
2691
|
-
let prevToken = -1;
|
|
2692
|
-
for (const frame of logits) {
|
|
2693
|
-
let maxIdx = 0;
|
|
2694
|
-
let maxVal = frame[0];
|
|
2695
|
-
for (let i = 1; i < frame.length; i++) {
|
|
2696
|
-
if (frame[i] > maxVal) {
|
|
2697
|
-
maxVal = frame[i];
|
|
2698
|
-
maxIdx = i;
|
|
2699
|
-
}
|
|
2700
|
-
}
|
|
2701
|
-
if (maxIdx !== prevToken && maxIdx !== 0) {
|
|
2702
|
-
tokens.push(maxIdx);
|
|
2703
|
-
}
|
|
2704
|
-
prevToken = maxIdx;
|
|
2705
|
-
}
|
|
2706
|
-
return tokens.map((t) => CTC_VOCAB[t] === "|" ? " " : CTC_VOCAB[t]).join("");
|
|
2707
|
-
}
|
|
2708
2689
|
/**
|
|
2709
2690
|
* Queue inference to serialize ONNX session calls
|
|
2710
2691
|
*/
|
|
@@ -2732,37 +2713,25 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2732
2713
|
})
|
|
2733
2714
|
]);
|
|
2734
2715
|
const inferenceTimeMs = performance.now() - startTime;
|
|
2735
|
-
const asrOutput = results["asr_logits"];
|
|
2736
2716
|
const blendshapeOutput = results["blendshapes"];
|
|
2737
|
-
if (!
|
|
2738
|
-
throw new Error("Missing
|
|
2717
|
+
if (!blendshapeOutput) {
|
|
2718
|
+
throw new Error("Missing blendshapes output from model");
|
|
2739
2719
|
}
|
|
2740
|
-
const asrData = asrOutput.data;
|
|
2741
2720
|
const blendshapeData = blendshapeOutput.data;
|
|
2742
|
-
const numASRFrames = asrOutput.dims[1];
|
|
2743
2721
|
const numA2EFrames = blendshapeOutput.dims[1];
|
|
2744
|
-
const asrVocabSize = asrOutput.dims[2];
|
|
2745
2722
|
const numBlendshapes = blendshapeOutput.dims[2];
|
|
2746
|
-
const asrLogits = [];
|
|
2747
2723
|
const blendshapes = [];
|
|
2748
|
-
for (let f = 0; f < numASRFrames; f++) {
|
|
2749
|
-
asrLogits.push(asrData.slice(f * asrVocabSize, (f + 1) * asrVocabSize));
|
|
2750
|
-
}
|
|
2751
2724
|
for (let f = 0; f < numA2EFrames; f++) {
|
|
2752
2725
|
const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
|
|
2753
2726
|
blendshapes.push(symmetrizeBlendshapes(rawFrame));
|
|
2754
2727
|
}
|
|
2755
|
-
const text = this.decodeCTC(asrLogits);
|
|
2756
2728
|
logger3.trace("Inference completed", {
|
|
2757
2729
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
2758
|
-
numA2EFrames
|
|
2759
|
-
numASRFrames,
|
|
2760
|
-
textLength: text.length
|
|
2730
|
+
numA2EFrames
|
|
2761
2731
|
});
|
|
2762
2732
|
span?.setAttributes({
|
|
2763
2733
|
"inference.duration_ms": inferenceTimeMs,
|
|
2764
|
-
"inference.a2e_frames": numA2EFrames
|
|
2765
|
-
"inference.asr_frames": numASRFrames
|
|
2734
|
+
"inference.a2e_frames": numA2EFrames
|
|
2766
2735
|
});
|
|
2767
2736
|
span?.end();
|
|
2768
2737
|
telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
|
|
@@ -2776,11 +2745,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
|
2776
2745
|
});
|
|
2777
2746
|
resolve({
|
|
2778
2747
|
blendshapes,
|
|
2779
|
-
asrLogits,
|
|
2780
|
-
text,
|
|
2781
2748
|
numFrames: numA2EFrames,
|
|
2782
|
-
numA2EFrames,
|
|
2783
|
-
numASRFrames,
|
|
2784
2749
|
inferenceTimeMs
|
|
2785
2750
|
});
|
|
2786
2751
|
} catch (err) {
|
|
@@ -5143,6 +5108,51 @@ var SenseVoiceWorker = class {
|
|
|
5143
5108
|
}
|
|
5144
5109
|
};
|
|
5145
5110
|
|
|
5111
|
+
// src/inference/defaultModelUrls.ts
|
|
5112
|
+
var HF = "https://huggingface.co";
|
|
5113
|
+
var HF_MODEL_URLS = {
|
|
5114
|
+
/** LAM A2E model — fp16 external data (385KB graph + 192MB weights, WebGPU) — 52 ARKit blendshapes */
|
|
5115
|
+
lam: `${HF}/omote-ai/lam-a2e/resolve/main/model_fp16.onnx`,
|
|
5116
|
+
/** wav2arkit_cpu A2E model graph (1.86MB, WASM) — Safari/iOS fallback */
|
|
5117
|
+
wav2arkitCpu: `${HF}/myned-ai/wav2arkit_cpu/resolve/main/wav2arkit_cpu.onnx`,
|
|
5118
|
+
/** SenseVoice ASR model (228MB int8, WASM) — speech recognition + emotion + language */
|
|
5119
|
+
senseVoice: `${HF}/omote-ai/sensevoice-asr/resolve/main/model.int8.onnx`,
|
|
5120
|
+
/** Silero VAD model (~2MB, WASM) — voice activity detection */
|
|
5121
|
+
sileroVad: `${HF}/deepghs/silero-vad-onnx/resolve/main/silero_vad.onnx`
|
|
5122
|
+
};
|
|
5123
|
+
var _overrides = {};
|
|
5124
|
+
var DEFAULT_MODEL_URLS = new Proxy(
|
|
5125
|
+
{},
|
|
5126
|
+
{
|
|
5127
|
+
get(_target, prop) {
|
|
5128
|
+
const key = prop;
|
|
5129
|
+
return _overrides[key] ?? HF_MODEL_URLS[key];
|
|
5130
|
+
},
|
|
5131
|
+
ownKeys() {
|
|
5132
|
+
return Object.keys(HF_MODEL_URLS);
|
|
5133
|
+
},
|
|
5134
|
+
getOwnPropertyDescriptor(_target, prop) {
|
|
5135
|
+
if (prop in HF_MODEL_URLS) {
|
|
5136
|
+
return { configurable: true, enumerable: true, value: this.get(_target, prop, _target) };
|
|
5137
|
+
}
|
|
5138
|
+
return void 0;
|
|
5139
|
+
}
|
|
5140
|
+
}
|
|
5141
|
+
);
|
|
5142
|
+
function configureModelUrls(urls) {
|
|
5143
|
+
for (const [key, url] of Object.entries(urls)) {
|
|
5144
|
+
if (key in HF_MODEL_URLS && typeof url === "string") {
|
|
5145
|
+
_overrides[key] = url;
|
|
5146
|
+
}
|
|
5147
|
+
}
|
|
5148
|
+
}
|
|
5149
|
+
function resetModelUrls() {
|
|
5150
|
+
for (const key of Object.keys(_overrides)) {
|
|
5151
|
+
delete _overrides[key];
|
|
5152
|
+
}
|
|
5153
|
+
}
|
|
5154
|
+
var HF_CDN_URLS = HF_MODEL_URLS;
|
|
5155
|
+
|
|
5146
5156
|
// src/inference/UnifiedInferenceWorker.ts
|
|
5147
5157
|
var logger8 = createLogger("UnifiedInferenceWorker");
|
|
5148
5158
|
var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
@@ -6384,11 +6394,12 @@ var SileroVADUnifiedAdapter = class {
|
|
|
6384
6394
|
|
|
6385
6395
|
// src/inference/createSenseVoice.ts
|
|
6386
6396
|
var logger9 = createLogger("createSenseVoice");
|
|
6387
|
-
function createSenseVoice(config) {
|
|
6397
|
+
function createSenseVoice(config = {}) {
|
|
6398
|
+
const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.senseVoice;
|
|
6388
6399
|
if (config.unifiedWorker) {
|
|
6389
6400
|
logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
|
|
6390
6401
|
return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
|
|
6391
|
-
modelUrl
|
|
6402
|
+
modelUrl,
|
|
6392
6403
|
tokensUrl: config.tokensUrl,
|
|
6393
6404
|
language: config.language,
|
|
6394
6405
|
textNorm: config.textNorm
|
|
@@ -6401,7 +6412,7 @@ function createSenseVoice(config) {
|
|
|
6401
6412
|
}
|
|
6402
6413
|
logger9.info("Creating SenseVoiceWorker (off-main-thread)");
|
|
6403
6414
|
return new SenseVoiceWorker({
|
|
6404
|
-
modelUrl
|
|
6415
|
+
modelUrl,
|
|
6405
6416
|
tokensUrl: config.tokensUrl,
|
|
6406
6417
|
language: config.language,
|
|
6407
6418
|
textNorm: config.textNorm
|
|
@@ -6410,7 +6421,7 @@ function createSenseVoice(config) {
|
|
|
6410
6421
|
if (useWorker === false) {
|
|
6411
6422
|
logger9.info("Creating SenseVoiceInference (main thread)");
|
|
6412
6423
|
return new SenseVoiceInference({
|
|
6413
|
-
modelUrl
|
|
6424
|
+
modelUrl,
|
|
6414
6425
|
tokensUrl: config.tokensUrl,
|
|
6415
6426
|
language: config.language,
|
|
6416
6427
|
textNorm: config.textNorm
|
|
@@ -6419,7 +6430,7 @@ function createSenseVoice(config) {
|
|
|
6419
6430
|
if (SenseVoiceWorker.isSupported() && !isIOS()) {
|
|
6420
6431
|
logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
|
|
6421
6432
|
return new SenseVoiceWorker({
|
|
6422
|
-
modelUrl
|
|
6433
|
+
modelUrl,
|
|
6423
6434
|
tokensUrl: config.tokensUrl,
|
|
6424
6435
|
language: config.language,
|
|
6425
6436
|
textNorm: config.textNorm
|
|
@@ -6429,7 +6440,7 @@ function createSenseVoice(config) {
|
|
|
6429
6440
|
reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
|
|
6430
6441
|
});
|
|
6431
6442
|
return new SenseVoiceInference({
|
|
6432
|
-
modelUrl
|
|
6443
|
+
modelUrl,
|
|
6433
6444
|
tokensUrl: config.tokensUrl,
|
|
6434
6445
|
language: config.language,
|
|
6435
6446
|
textNorm: config.textNorm
|
|
@@ -7296,9 +7307,11 @@ var Wav2ArkitCpuWorker = class {
|
|
|
7296
7307
|
|
|
7297
7308
|
// src/inference/createA2E.ts
|
|
7298
7309
|
var logger12 = createLogger("createA2E");
|
|
7299
|
-
function createA2E(config) {
|
|
7310
|
+
function createA2E(config = {}) {
|
|
7300
7311
|
const mode = config.mode ?? "auto";
|
|
7301
7312
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
7313
|
+
const gpuModelUrl = config.gpuModelUrl ?? DEFAULT_MODEL_URLS.lam;
|
|
7314
|
+
const cpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
|
|
7302
7315
|
let useCpu;
|
|
7303
7316
|
if (mode === "cpu") {
|
|
7304
7317
|
useCpu = true;
|
|
@@ -7307,33 +7320,35 @@ function createA2E(config) {
|
|
|
7307
7320
|
useCpu = false;
|
|
7308
7321
|
logger12.info("Forcing GPU A2E model (Wav2Vec2)");
|
|
7309
7322
|
} else {
|
|
7310
|
-
useCpu =
|
|
7311
|
-
logger12.info("Auto-detected A2E model", {
|
|
7312
|
-
|
|
7313
|
-
|
|
7323
|
+
useCpu = false;
|
|
7324
|
+
logger12.info("Auto-detected A2E model: trying GPU first (fp16 external data)", {
|
|
7325
|
+
isSafari: isSafari(),
|
|
7326
|
+
isIOS: isIOS(),
|
|
7327
|
+
fallbackOnError
|
|
7314
7328
|
});
|
|
7315
7329
|
}
|
|
7316
7330
|
if (useCpu) {
|
|
7317
7331
|
if (config.unifiedWorker) {
|
|
7318
7332
|
logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
|
|
7319
7333
|
return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
|
|
7320
|
-
modelUrl:
|
|
7334
|
+
modelUrl: cpuModelUrl
|
|
7321
7335
|
});
|
|
7322
7336
|
}
|
|
7323
7337
|
if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7324
7338
|
logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
|
|
7325
7339
|
return new Wav2ArkitCpuWorker({
|
|
7326
|
-
modelUrl:
|
|
7340
|
+
modelUrl: cpuModelUrl
|
|
7327
7341
|
});
|
|
7328
7342
|
}
|
|
7329
7343
|
logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
|
|
7330
7344
|
return new Wav2ArkitCpuInference({
|
|
7331
|
-
modelUrl:
|
|
7345
|
+
modelUrl: cpuModelUrl
|
|
7332
7346
|
});
|
|
7333
7347
|
}
|
|
7348
|
+
const gpuExternalDataUrl = config.gpuExternalDataUrl !== void 0 ? config.gpuExternalDataUrl : void 0;
|
|
7334
7349
|
const gpuInstance = new Wav2Vec2Inference({
|
|
7335
|
-
modelUrl:
|
|
7336
|
-
externalDataUrl:
|
|
7350
|
+
modelUrl: gpuModelUrl,
|
|
7351
|
+
externalDataUrl: gpuExternalDataUrl,
|
|
7337
7352
|
backend: config.gpuBackend ?? "auto",
|
|
7338
7353
|
numIdentityClasses: config.numIdentityClasses
|
|
7339
7354
|
});
|
|
@@ -7349,6 +7364,7 @@ var A2EWithFallback = class {
|
|
|
7349
7364
|
this.hasFallenBack = false;
|
|
7350
7365
|
this.implementation = gpuInstance;
|
|
7351
7366
|
this.config = config;
|
|
7367
|
+
this.resolvedCpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
|
|
7352
7368
|
}
|
|
7353
7369
|
get modelId() {
|
|
7354
7370
|
return this.implementation.modelId;
|
|
@@ -7370,6 +7386,7 @@ var A2EWithFallback = class {
|
|
|
7370
7386
|
}
|
|
7371
7387
|
}
|
|
7372
7388
|
async fallbackToCpu(reason) {
|
|
7389
|
+
console.error("[A2EWithFallback] GPU\u2192CPU FALLBACK TRIGGERED. Reason:", reason);
|
|
7373
7390
|
logger12.warn("GPU model load failed, falling back to CPU model", { reason });
|
|
7374
7391
|
try {
|
|
7375
7392
|
await this.implementation.dispose();
|
|
@@ -7377,17 +7394,17 @@ var A2EWithFallback = class {
|
|
|
7377
7394
|
}
|
|
7378
7395
|
if (this.config.unifiedWorker) {
|
|
7379
7396
|
this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
|
|
7380
|
-
modelUrl: this.
|
|
7397
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7381
7398
|
});
|
|
7382
7399
|
logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
|
|
7383
7400
|
} else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
|
|
7384
7401
|
this.implementation = new Wav2ArkitCpuWorker({
|
|
7385
|
-
modelUrl: this.
|
|
7402
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7386
7403
|
});
|
|
7387
7404
|
logger12.info("Fallback to Wav2ArkitCpuWorker successful");
|
|
7388
7405
|
} else {
|
|
7389
7406
|
this.implementation = new Wav2ArkitCpuInference({
|
|
7390
|
-
modelUrl: this.
|
|
7407
|
+
modelUrl: this.resolvedCpuModelUrl
|
|
7391
7408
|
});
|
|
7392
7409
|
logger12.info("Fallback to Wav2ArkitCpuInference successful");
|
|
7393
7410
|
}
|
|
@@ -8577,10 +8594,12 @@ function supportsVADWorker() {
|
|
|
8577
8594
|
}
|
|
8578
8595
|
return true;
|
|
8579
8596
|
}
|
|
8580
|
-
function createSileroVAD(config) {
|
|
8597
|
+
function createSileroVAD(config = {}) {
|
|
8598
|
+
const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.sileroVad;
|
|
8599
|
+
const resolvedConfig = { ...config, modelUrl };
|
|
8581
8600
|
if (config.unifiedWorker) {
|
|
8582
8601
|
logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
|
|
8583
|
-
return new SileroVADUnifiedAdapter(config.unifiedWorker,
|
|
8602
|
+
return new SileroVADUnifiedAdapter(config.unifiedWorker, resolvedConfig);
|
|
8584
8603
|
}
|
|
8585
8604
|
const fallbackOnError = config.fallbackOnError ?? true;
|
|
8586
8605
|
let useWorker;
|
|
@@ -8600,24 +8619,24 @@ function createSileroVAD(config) {
|
|
|
8600
8619
|
if (useWorker) {
|
|
8601
8620
|
logger15.info("Creating SileroVADWorker (off-main-thread)");
|
|
8602
8621
|
const worker = new SileroVADWorker({
|
|
8603
|
-
modelUrl
|
|
8622
|
+
modelUrl,
|
|
8604
8623
|
sampleRate: config.sampleRate,
|
|
8605
8624
|
threshold: config.threshold,
|
|
8606
8625
|
preSpeechBufferChunks: config.preSpeechBufferChunks
|
|
8607
8626
|
});
|
|
8608
8627
|
if (fallbackOnError) {
|
|
8609
|
-
return new VADWorkerWithFallback(worker,
|
|
8628
|
+
return new VADWorkerWithFallback(worker, resolvedConfig);
|
|
8610
8629
|
}
|
|
8611
8630
|
return worker;
|
|
8612
8631
|
}
|
|
8613
8632
|
logger15.info("Creating SileroVADInference (main thread)");
|
|
8614
|
-
return new SileroVADInference(
|
|
8633
|
+
return new SileroVADInference(resolvedConfig);
|
|
8615
8634
|
}
|
|
8616
8635
|
var VADWorkerWithFallback = class {
|
|
8617
|
-
constructor(worker,
|
|
8636
|
+
constructor(worker, resolvedConfig) {
|
|
8618
8637
|
this.hasFallenBack = false;
|
|
8619
8638
|
this.implementation = worker;
|
|
8620
|
-
this.
|
|
8639
|
+
this.resolvedConfig = resolvedConfig;
|
|
8621
8640
|
}
|
|
8622
8641
|
get backend() {
|
|
8623
8642
|
if (!this.isLoaded) return null;
|
|
@@ -8643,7 +8662,7 @@ var VADWorkerWithFallback = class {
|
|
|
8643
8662
|
await this.implementation.dispose();
|
|
8644
8663
|
} catch {
|
|
8645
8664
|
}
|
|
8646
|
-
this.implementation = new SileroVADInference(this.
|
|
8665
|
+
this.implementation = new SileroVADInference(this.resolvedConfig);
|
|
8647
8666
|
this.hasFallenBack = true;
|
|
8648
8667
|
logger15.info("Fallback to SileroVADInference successful");
|
|
8649
8668
|
return await this.implementation.load();
|
|
@@ -9691,17 +9710,29 @@ var AnimationGraph = class extends EventEmitter {
|
|
|
9691
9710
|
// src/animation/ProceduralLifeLayer.ts
|
|
9692
9711
|
import { createNoise2D } from "simplex-noise";
|
|
9693
9712
|
var simplex2d = createNoise2D();
|
|
9713
|
+
var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
|
|
9714
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
9715
|
+
LIFE_BS_INDEX.set(LAM_BLENDSHAPES[i], i);
|
|
9716
|
+
}
|
|
9694
9717
|
var PHASE_OPEN = 0;
|
|
9695
9718
|
var PHASE_CLOSING = 1;
|
|
9696
9719
|
var PHASE_CLOSED = 2;
|
|
9697
9720
|
var PHASE_OPENING = 3;
|
|
9698
|
-
var BLINK_CLOSE_DURATION = 0.
|
|
9721
|
+
var BLINK_CLOSE_DURATION = 0.092;
|
|
9699
9722
|
var BLINK_HOLD_DURATION = 0.04;
|
|
9700
|
-
var BLINK_OPEN_DURATION = 0.
|
|
9723
|
+
var BLINK_OPEN_DURATION = 0.242;
|
|
9701
9724
|
var BLINK_ASYMMETRY_DELAY = 8e-3;
|
|
9725
|
+
var BLINK_IBI_MU = Math.log(5.97);
|
|
9726
|
+
var BLINK_IBI_SIGMA = 0.89;
|
|
9702
9727
|
var GAZE_BREAK_DURATION = 0.12;
|
|
9703
9728
|
var GAZE_BREAK_HOLD_DURATION = 0.3;
|
|
9704
9729
|
var GAZE_BREAK_RETURN_DURATION = 0.15;
|
|
9730
|
+
var GAZE_STATE_PARAMS = {
|
|
9731
|
+
idle: { interval: [2, 5], amplitude: [0.15, 0.4] },
|
|
9732
|
+
listening: { interval: [4, 10], amplitude: [0.1, 0.25] },
|
|
9733
|
+
thinking: { interval: [1, 3], amplitude: [0.2, 0.5] },
|
|
9734
|
+
speaking: { interval: [2, 6], amplitude: [0.15, 0.35] }
|
|
9735
|
+
};
|
|
9705
9736
|
var EYE_NOISE_X_FREQ = 0.8;
|
|
9706
9737
|
var EYE_NOISE_Y_FREQ = 0.6;
|
|
9707
9738
|
var EYE_NOISE_X_PHASE = 73.1;
|
|
@@ -9729,6 +9760,12 @@ function smoothStep(t) {
|
|
|
9729
9760
|
function softClamp(v, max) {
|
|
9730
9761
|
return Math.tanh(v / max) * max;
|
|
9731
9762
|
}
|
|
9763
|
+
function sampleLogNormal(mu, sigma) {
|
|
9764
|
+
const u1 = Math.random();
|
|
9765
|
+
const u2 = Math.random();
|
|
9766
|
+
const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2);
|
|
9767
|
+
return Math.exp(mu + sigma * z);
|
|
9768
|
+
}
|
|
9732
9769
|
var ProceduralLifeLayer = class {
|
|
9733
9770
|
constructor(config) {
|
|
9734
9771
|
// Blink state
|
|
@@ -9741,7 +9778,7 @@ var ProceduralLifeLayer = class {
|
|
|
9741
9778
|
// Eye contact (smoothed)
|
|
9742
9779
|
this.smoothedEyeX = 0;
|
|
9743
9780
|
this.smoothedEyeY = 0;
|
|
9744
|
-
// Eye micro-motion
|
|
9781
|
+
// Eye micro-motion
|
|
9745
9782
|
this.eyeNoiseTime = 0;
|
|
9746
9783
|
// Gaze break state
|
|
9747
9784
|
this.gazeBreakTimer = 0;
|
|
@@ -9751,6 +9788,8 @@ var ProceduralLifeLayer = class {
|
|
|
9751
9788
|
this.gazeBreakTargetY = 0;
|
|
9752
9789
|
this.gazeBreakCurrentX = 0;
|
|
9753
9790
|
this.gazeBreakCurrentY = 0;
|
|
9791
|
+
// Conversational state for gaze
|
|
9792
|
+
this.currentState = null;
|
|
9754
9793
|
// Breathing / postural sway
|
|
9755
9794
|
this.microMotionTime = 0;
|
|
9756
9795
|
this.breathingPhase = 0;
|
|
@@ -9759,6 +9798,7 @@ var ProceduralLifeLayer = class {
|
|
|
9759
9798
|
this.previousEnergy = 0;
|
|
9760
9799
|
this.emphasisLevel = 0;
|
|
9761
9800
|
this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
|
|
9801
|
+
this.useLogNormalBlinks = !config?.blinkIntervalRange;
|
|
9762
9802
|
this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
|
|
9763
9803
|
this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
|
|
9764
9804
|
this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
|
|
@@ -9768,7 +9808,7 @@ var ProceduralLifeLayer = class {
|
|
|
9768
9808
|
this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
|
|
9769
9809
|
this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
|
|
9770
9810
|
this.eyeSmoothing = config?.eyeSmoothing ?? 15;
|
|
9771
|
-
this.blinkInterval =
|
|
9811
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
9772
9812
|
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
9773
9813
|
}
|
|
9774
9814
|
/**
|
|
@@ -9783,6 +9823,7 @@ var ProceduralLifeLayer = class {
|
|
|
9783
9823
|
const eyeTargetY = input?.eyeTargetY ?? 0;
|
|
9784
9824
|
const audioEnergy = input?.audioEnergy ?? 0;
|
|
9785
9825
|
const isSpeaking = input?.isSpeaking ?? false;
|
|
9826
|
+
this.currentState = input?.state ?? null;
|
|
9786
9827
|
const safeDelta = Math.min(delta, 0.1);
|
|
9787
9828
|
const blendshapes = {};
|
|
9788
9829
|
this.updateBlinks(delta);
|
|
@@ -9821,6 +9862,12 @@ var ProceduralLifeLayer = class {
|
|
|
9821
9862
|
const swayAmp = this.posturalSwayAmplitude;
|
|
9822
9863
|
const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
|
|
9823
9864
|
const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
|
|
9865
|
+
const breathVal = Math.sin(this.breathingPhase);
|
|
9866
|
+
if (breathVal > 0) {
|
|
9867
|
+
blendshapes["jawOpen"] = breathVal * 0.015;
|
|
9868
|
+
blendshapes["noseSneerLeft"] = breathVal * 8e-3;
|
|
9869
|
+
blendshapes["noseSneerRight"] = breathVal * 8e-3;
|
|
9870
|
+
}
|
|
9824
9871
|
return {
|
|
9825
9872
|
blendshapes,
|
|
9826
9873
|
headDelta: {
|
|
@@ -9829,12 +9876,35 @@ var ProceduralLifeLayer = class {
|
|
|
9829
9876
|
}
|
|
9830
9877
|
};
|
|
9831
9878
|
}
|
|
9879
|
+
/**
|
|
9880
|
+
* Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
|
|
9881
|
+
*
|
|
9882
|
+
* Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
|
|
9883
|
+
* break uncanny stillness on undriven channels.
|
|
9884
|
+
*
|
|
9885
|
+
* @param delta - Time since last frame in seconds
|
|
9886
|
+
* @param input - Per-frame input
|
|
9887
|
+
* @param out - Pre-allocated Float32Array(52) to write into
|
|
9888
|
+
*/
|
|
9889
|
+
updateToArray(delta, input, out) {
|
|
9890
|
+
out.fill(0);
|
|
9891
|
+
const result = this.update(delta, input);
|
|
9892
|
+
for (const [name, value] of Object.entries(result.blendshapes)) {
|
|
9893
|
+
const idx = LIFE_BS_INDEX.get(name);
|
|
9894
|
+
if (idx !== void 0) {
|
|
9895
|
+
out[idx] = value;
|
|
9896
|
+
}
|
|
9897
|
+
}
|
|
9898
|
+
for (let i = 0; i < 52; i++) {
|
|
9899
|
+
out[i] += simplex2d(this.noiseTime * 0.3, i * 7.13) * 4e-3;
|
|
9900
|
+
}
|
|
9901
|
+
}
|
|
9832
9902
|
/**
|
|
9833
9903
|
* Reset all internal state to initial values.
|
|
9834
9904
|
*/
|
|
9835
9905
|
reset() {
|
|
9836
9906
|
this.blinkTimer = 0;
|
|
9837
|
-
this.blinkInterval =
|
|
9907
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
9838
9908
|
this.blinkPhase = PHASE_OPEN;
|
|
9839
9909
|
this.blinkProgress = 0;
|
|
9840
9910
|
this.asymmetryRight = 0.97;
|
|
@@ -9851,6 +9921,7 @@ var ProceduralLifeLayer = class {
|
|
|
9851
9921
|
this.gazeBreakTargetY = 0;
|
|
9852
9922
|
this.gazeBreakCurrentX = 0;
|
|
9853
9923
|
this.gazeBreakCurrentY = 0;
|
|
9924
|
+
this.currentState = null;
|
|
9854
9925
|
this.microMotionTime = 0;
|
|
9855
9926
|
this.breathingPhase = 0;
|
|
9856
9927
|
this.noiseTime = 0;
|
|
@@ -9858,6 +9929,21 @@ var ProceduralLifeLayer = class {
|
|
|
9858
9929
|
this.emphasisLevel = 0;
|
|
9859
9930
|
}
|
|
9860
9931
|
// =====================================================================
|
|
9932
|
+
// PRIVATE: Blink interval sampling
|
|
9933
|
+
// =====================================================================
|
|
9934
|
+
/**
|
|
9935
|
+
* Sample next blink interval.
|
|
9936
|
+
* Uses log-normal distribution (PMC3565584) when using default config,
|
|
9937
|
+
* or uniform random when custom blinkIntervalRange is provided.
|
|
9938
|
+
*/
|
|
9939
|
+
nextBlinkInterval() {
|
|
9940
|
+
if (this.useLogNormalBlinks) {
|
|
9941
|
+
const sample = sampleLogNormal(BLINK_IBI_MU, BLINK_IBI_SIGMA);
|
|
9942
|
+
return clamp(sample, 1.5, 12);
|
|
9943
|
+
}
|
|
9944
|
+
return randomRange(...this.blinkIntervalRange);
|
|
9945
|
+
}
|
|
9946
|
+
// =====================================================================
|
|
9861
9947
|
// PRIVATE: Blink system
|
|
9862
9948
|
// =====================================================================
|
|
9863
9949
|
updateBlinks(delta) {
|
|
@@ -9866,7 +9952,7 @@ var ProceduralLifeLayer = class {
|
|
|
9866
9952
|
this.blinkPhase = PHASE_CLOSING;
|
|
9867
9953
|
this.blinkProgress = 0;
|
|
9868
9954
|
this.blinkTimer = 0;
|
|
9869
|
-
this.blinkInterval =
|
|
9955
|
+
this.blinkInterval = this.nextBlinkInterval();
|
|
9870
9956
|
this.asymmetryRight = 0.95 + Math.random() * 0.08;
|
|
9871
9957
|
}
|
|
9872
9958
|
if (this.blinkPhase > PHASE_OPEN) {
|
|
@@ -9922,18 +10008,32 @@ var ProceduralLifeLayer = class {
|
|
|
9922
10008
|
return { x, y };
|
|
9923
10009
|
}
|
|
9924
10010
|
// =====================================================================
|
|
9925
|
-
// PRIVATE: Gaze breaks
|
|
10011
|
+
// PRIVATE: Gaze breaks (state-dependent)
|
|
9926
10012
|
// =====================================================================
|
|
10013
|
+
/**
|
|
10014
|
+
* Get active gaze parameters — uses state-dependent params when
|
|
10015
|
+
* conversational state is provided, otherwise falls back to config ranges.
|
|
10016
|
+
*/
|
|
10017
|
+
getActiveGazeParams() {
|
|
10018
|
+
if (this.currentState && GAZE_STATE_PARAMS[this.currentState]) {
|
|
10019
|
+
return GAZE_STATE_PARAMS[this.currentState];
|
|
10020
|
+
}
|
|
10021
|
+
return {
|
|
10022
|
+
interval: this.gazeBreakIntervalRange,
|
|
10023
|
+
amplitude: this.gazeBreakAmplitudeRange
|
|
10024
|
+
};
|
|
10025
|
+
}
|
|
9927
10026
|
updateGazeBreaks(delta) {
|
|
9928
10027
|
this.gazeBreakTimer += delta;
|
|
9929
10028
|
if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
|
|
9930
10029
|
this.gazeBreakPhase = PHASE_CLOSING;
|
|
9931
10030
|
this.gazeBreakProgress = 0;
|
|
9932
10031
|
this.gazeBreakTimer = 0;
|
|
9933
|
-
const
|
|
10032
|
+
const params = this.getActiveGazeParams();
|
|
10033
|
+
const amp = randomRange(...params.amplitude);
|
|
9934
10034
|
this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
|
|
9935
10035
|
this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
|
|
9936
|
-
this.gazeBreakInterval = randomRange(...
|
|
10036
|
+
this.gazeBreakInterval = randomRange(...params.interval);
|
|
9937
10037
|
}
|
|
9938
10038
|
if (this.gazeBreakPhase > PHASE_OPEN) {
|
|
9939
10039
|
this.gazeBreakProgress += delta;
|
|
@@ -9998,6 +10098,300 @@ var ProceduralLifeLayer = class {
|
|
|
9998
10098
|
}
|
|
9999
10099
|
};
|
|
10000
10100
|
|
|
10101
|
+
// src/face/FACSMapping.ts
|
|
10102
|
+
var EMOTION_TO_AU = {
|
|
10103
|
+
joy: [
|
|
10104
|
+
{ au: "AU6", intensity: 0.7, region: "upper" },
|
|
10105
|
+
// cheek raise (Duchenne)
|
|
10106
|
+
{ au: "AU12", intensity: 0.8, region: "lower" }
|
|
10107
|
+
// lip corner pull (smile)
|
|
10108
|
+
],
|
|
10109
|
+
anger: [
|
|
10110
|
+
{ au: "AU4", intensity: 0.8, region: "upper" },
|
|
10111
|
+
// brow lower
|
|
10112
|
+
{ au: "AU5", intensity: 0.4, region: "upper" },
|
|
10113
|
+
// upper lid raise
|
|
10114
|
+
{ au: "AU7", intensity: 0.3, region: "upper" },
|
|
10115
|
+
// lid tighten
|
|
10116
|
+
{ au: "AU23", intensity: 0.6, region: "lower" }
|
|
10117
|
+
// lip tighten
|
|
10118
|
+
],
|
|
10119
|
+
sadness: [
|
|
10120
|
+
{ au: "AU1", intensity: 0.7, region: "upper" },
|
|
10121
|
+
// inner brow raise
|
|
10122
|
+
{ au: "AU4", intensity: 0.3, region: "upper" },
|
|
10123
|
+
// brow lower (furrow)
|
|
10124
|
+
{ au: "AU15", intensity: 0.5, region: "lower" }
|
|
10125
|
+
// lip corner depress
|
|
10126
|
+
],
|
|
10127
|
+
fear: [
|
|
10128
|
+
{ au: "AU1", intensity: 0.6, region: "upper" },
|
|
10129
|
+
// inner brow raise
|
|
10130
|
+
{ au: "AU2", intensity: 0.5, region: "upper" },
|
|
10131
|
+
// outer brow raise
|
|
10132
|
+
{ au: "AU4", intensity: 0.3, region: "upper" },
|
|
10133
|
+
// brow lower
|
|
10134
|
+
{ au: "AU5", intensity: 0.5, region: "upper" },
|
|
10135
|
+
// upper lid raise
|
|
10136
|
+
{ au: "AU20", intensity: 0.4, region: "lower" }
|
|
10137
|
+
// lip stretch
|
|
10138
|
+
],
|
|
10139
|
+
disgust: [
|
|
10140
|
+
{ au: "AU9", intensity: 0.7, region: "upper" },
|
|
10141
|
+
// nose wrinkle
|
|
10142
|
+
{ au: "AU10", intensity: 0.5, region: "lower" },
|
|
10143
|
+
// upper lip raise
|
|
10144
|
+
{ au: "AU15", intensity: 0.4, region: "lower" }
|
|
10145
|
+
// lip corner depress
|
|
10146
|
+
],
|
|
10147
|
+
amazement: [
|
|
10148
|
+
{ au: "AU1", intensity: 0.6, region: "upper" },
|
|
10149
|
+
// inner brow raise
|
|
10150
|
+
{ au: "AU2", intensity: 0.7, region: "upper" },
|
|
10151
|
+
// outer brow raise
|
|
10152
|
+
{ au: "AU5", intensity: 0.6, region: "upper" },
|
|
10153
|
+
// upper lid raise
|
|
10154
|
+
{ au: "AU26", intensity: 0.4, region: "lower" }
|
|
10155
|
+
// jaw drop
|
|
10156
|
+
],
|
|
10157
|
+
grief: [
|
|
10158
|
+
{ au: "AU1", intensity: 0.8, region: "upper" },
|
|
10159
|
+
// inner brow raise
|
|
10160
|
+
{ au: "AU4", intensity: 0.5, region: "upper" },
|
|
10161
|
+
// brow lower
|
|
10162
|
+
{ au: "AU6", intensity: 0.3, region: "upper" },
|
|
10163
|
+
// cheek raise (grief cry)
|
|
10164
|
+
{ au: "AU15", intensity: 0.6, region: "lower" }
|
|
10165
|
+
// lip corner depress
|
|
10166
|
+
],
|
|
10167
|
+
cheekiness: [
|
|
10168
|
+
{ au: "AU2", intensity: 0.4, region: "upper" },
|
|
10169
|
+
// outer brow raise
|
|
10170
|
+
{ au: "AU6", intensity: 0.4, region: "upper" },
|
|
10171
|
+
// cheek raise
|
|
10172
|
+
{ au: "AU12", intensity: 0.6, region: "lower" }
|
|
10173
|
+
// lip corner pull (smirk)
|
|
10174
|
+
],
|
|
10175
|
+
pain: [
|
|
10176
|
+
{ au: "AU4", intensity: 0.7, region: "upper" },
|
|
10177
|
+
// brow lower
|
|
10178
|
+
{ au: "AU6", intensity: 0.4, region: "upper" },
|
|
10179
|
+
// cheek raise (orbicularis)
|
|
10180
|
+
{ au: "AU7", intensity: 0.7, region: "upper" },
|
|
10181
|
+
// lid tighten (squint)
|
|
10182
|
+
{ au: "AU9", intensity: 0.5, region: "upper" }
|
|
10183
|
+
// nose wrinkle
|
|
10184
|
+
],
|
|
10185
|
+
outofbreath: [
|
|
10186
|
+
{ au: "AU1", intensity: 0.3, region: "upper" },
|
|
10187
|
+
// inner brow raise
|
|
10188
|
+
{ au: "AU25", intensity: 0.3, region: "lower" },
|
|
10189
|
+
// lips part
|
|
10190
|
+
{ au: "AU26", intensity: 0.5, region: "lower" }
|
|
10191
|
+
// jaw drop
|
|
10192
|
+
]
|
|
10193
|
+
};
|
|
10194
|
+
var AU_TO_ARKIT = {
|
|
10195
|
+
"AU1": [{ blendshape: "browInnerUp", weight: 1 }],
|
|
10196
|
+
"AU2": [{ blendshape: "browOuterUpLeft", weight: 1 }, { blendshape: "browOuterUpRight", weight: 1 }],
|
|
10197
|
+
"AU4": [{ blendshape: "browDownLeft", weight: 1 }, { blendshape: "browDownRight", weight: 1 }],
|
|
10198
|
+
"AU5": [{ blendshape: "eyeWideLeft", weight: 1 }, { blendshape: "eyeWideRight", weight: 1 }],
|
|
10199
|
+
"AU6": [{ blendshape: "cheekSquintLeft", weight: 1 }, { blendshape: "cheekSquintRight", weight: 1 }],
|
|
10200
|
+
"AU7": [{ blendshape: "eyeSquintLeft", weight: 1 }, { blendshape: "eyeSquintRight", weight: 1 }],
|
|
10201
|
+
"AU9": [{ blendshape: "noseSneerLeft", weight: 1 }, { blendshape: "noseSneerRight", weight: 1 }],
|
|
10202
|
+
"AU10": [{ blendshape: "mouthUpperUpLeft", weight: 1 }, { blendshape: "mouthUpperUpRight", weight: 1 }],
|
|
10203
|
+
"AU12": [{ blendshape: "mouthSmileLeft", weight: 1 }, { blendshape: "mouthSmileRight", weight: 1 }],
|
|
10204
|
+
"AU15": [{ blendshape: "mouthFrownLeft", weight: 1 }, { blendshape: "mouthFrownRight", weight: 1 }],
|
|
10205
|
+
"AU20": [{ blendshape: "mouthStretchLeft", weight: 1 }, { blendshape: "mouthStretchRight", weight: 1 }],
|
|
10206
|
+
"AU23": [{ blendshape: "mouthPressLeft", weight: 1 }, { blendshape: "mouthPressRight", weight: 1 }],
|
|
10207
|
+
"AU25": [{ blendshape: "jawOpen", weight: 0.3 }],
|
|
10208
|
+
"AU26": [{ blendshape: "jawOpen", weight: 1 }]
|
|
10209
|
+
};
|
|
10210
|
+
var ALL_AUS = [...new Set(
|
|
10211
|
+
Object.values(EMOTION_TO_AU).flatMap((activations) => activations.map((a) => a.au))
|
|
10212
|
+
)];
|
|
10213
|
+
|
|
10214
|
+
// src/face/EmotionResolver.ts
|
|
10215
|
+
var BS_INDEX = /* @__PURE__ */ new Map();
|
|
10216
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
10217
|
+
BS_INDEX.set(LAM_BLENDSHAPES[i], i);
|
|
10218
|
+
}
|
|
10219
|
+
var EmotionResolver = class {
|
|
10220
|
+
constructor() {
|
|
10221
|
+
this.upperBuffer = new Float32Array(52);
|
|
10222
|
+
this.lowerBuffer = new Float32Array(52);
|
|
10223
|
+
}
|
|
10224
|
+
/**
|
|
10225
|
+
* Resolve emotion weights to upper/lower face blendshape contributions.
|
|
10226
|
+
*
|
|
10227
|
+
* @param weights - Emotion channel weights from EmotionController
|
|
10228
|
+
* @param intensity - Global intensity multiplier (0-2). Default: 1.0
|
|
10229
|
+
* @returns Upper and lower face blendshape arrays (52 channels each)
|
|
10230
|
+
*/
|
|
10231
|
+
resolve(weights, intensity = 1) {
|
|
10232
|
+
const upper = this.upperBuffer;
|
|
10233
|
+
const lower = this.lowerBuffer;
|
|
10234
|
+
upper.fill(0);
|
|
10235
|
+
lower.fill(0);
|
|
10236
|
+
for (const emotionName of EMOTION_NAMES) {
|
|
10237
|
+
const emotionWeight = weights[emotionName];
|
|
10238
|
+
if (!emotionWeight || emotionWeight < 0.01) continue;
|
|
10239
|
+
const auActivations = EMOTION_TO_AU[emotionName];
|
|
10240
|
+
if (!auActivations) continue;
|
|
10241
|
+
for (const activation of auActivations) {
|
|
10242
|
+
const arkitMappings = AU_TO_ARKIT[activation.au];
|
|
10243
|
+
if (!arkitMappings) continue;
|
|
10244
|
+
const target = activation.region === "upper" ? upper : lower;
|
|
10245
|
+
const scale = emotionWeight * activation.intensity * intensity;
|
|
10246
|
+
for (const mapping of arkitMappings) {
|
|
10247
|
+
const idx = BS_INDEX.get(mapping.blendshape);
|
|
10248
|
+
if (idx !== void 0) {
|
|
10249
|
+
target[idx] += mapping.weight * scale;
|
|
10250
|
+
}
|
|
10251
|
+
}
|
|
10252
|
+
}
|
|
10253
|
+
}
|
|
10254
|
+
for (let i = 0; i < 52; i++) {
|
|
10255
|
+
if (upper[i] > 1) upper[i] = 1;
|
|
10256
|
+
if (lower[i] > 1) lower[i] = 1;
|
|
10257
|
+
}
|
|
10258
|
+
return { upper, lower };
|
|
10259
|
+
}
|
|
10260
|
+
};
|
|
10261
|
+
|
|
10262
|
+
// src/face/FaceCompositor.ts
|
|
10263
|
+
function smoothstep(t) {
|
|
10264
|
+
return t * t * (3 - 2 * t);
|
|
10265
|
+
}
|
|
10266
|
+
var BS_INDEX2 = /* @__PURE__ */ new Map();
|
|
10267
|
+
for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
|
|
10268
|
+
BS_INDEX2.set(LAM_BLENDSHAPES[i], i);
|
|
10269
|
+
}
|
|
10270
|
+
var IDX_MOUTH_CLOSE = BS_INDEX2.get("mouthClose");
|
|
10271
|
+
var IS_EYE_CHANNEL = new Array(52).fill(false);
|
|
10272
|
+
for (const name of LAM_BLENDSHAPES) {
|
|
10273
|
+
if (name.startsWith("eyeBlink") || name.startsWith("eyeLook")) {
|
|
10274
|
+
IS_EYE_CHANNEL[BS_INDEX2.get(name)] = true;
|
|
10275
|
+
}
|
|
10276
|
+
}
|
|
10277
|
+
var FaceCompositor = class {
|
|
10278
|
+
constructor(config) {
|
|
10279
|
+
this.emotionResolver = new EmotionResolver();
|
|
10280
|
+
// Pre-allocated buffers
|
|
10281
|
+
this.outputBuffer = new Float32Array(52);
|
|
10282
|
+
this.smoothedUpper = new Float32Array(52);
|
|
10283
|
+
this.smoothedLower = new Float32Array(52);
|
|
10284
|
+
this.lifeBuffer = new Float32Array(52);
|
|
10285
|
+
// Profile arrays (pre-expanded to 52 channels)
|
|
10286
|
+
this.multiplier = new Float32Array(52).fill(1);
|
|
10287
|
+
this.offset = new Float32Array(52);
|
|
10288
|
+
this.lifeLayer = config?.lifeLayer ?? new ProceduralLifeLayer();
|
|
10289
|
+
this.emotionSmoothing = config?.emotionSmoothing ?? 0.12;
|
|
10290
|
+
if (config?.profile) {
|
|
10291
|
+
this.applyProfileArrays(config.profile);
|
|
10292
|
+
}
|
|
10293
|
+
}
|
|
10294
|
+
/**
|
|
10295
|
+
* Compose a single output frame from the 5-stage signal chain.
|
|
10296
|
+
*
|
|
10297
|
+
* @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
|
|
10298
|
+
* @param input - Per-frame input (deltaTime, emotion, life layer params)
|
|
10299
|
+
* @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
|
|
10300
|
+
* When omitted, an internal buffer is used (valid until next compose() call).
|
|
10301
|
+
* @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
|
|
10302
|
+
*/
|
|
10303
|
+
compose(base, input, target) {
|
|
10304
|
+
const out = target ?? this.outputBuffer;
|
|
10305
|
+
out.set(base);
|
|
10306
|
+
const emotion = input.emotion ?? this.stickyEmotion;
|
|
10307
|
+
if (emotion) {
|
|
10308
|
+
const resolved = this.emotionResolver.resolve(
|
|
10309
|
+
emotion,
|
|
10310
|
+
input.emotionIntensity ?? 1
|
|
10311
|
+
);
|
|
10312
|
+
const k = this.emotionSmoothing;
|
|
10313
|
+
for (let i = 0; i < 52; i++) {
|
|
10314
|
+
this.smoothedUpper[i] += (resolved.upper[i] - this.smoothedUpper[i]) * k;
|
|
10315
|
+
this.smoothedLower[i] += (resolved.lower[i] - this.smoothedLower[i]) * k;
|
|
10316
|
+
}
|
|
10317
|
+
const mc = base[IDX_MOUTH_CLOSE];
|
|
10318
|
+
const bilabialSuppress = mc <= 0.3 ? 1 : mc >= 0.7 ? 0.1 : 1 - 0.9 * smoothstep((mc - 0.3) * 2.5);
|
|
10319
|
+
for (let i = 0; i < 52; i++) {
|
|
10320
|
+
out[i] += this.smoothedUpper[i];
|
|
10321
|
+
}
|
|
10322
|
+
for (let i = 0; i < 52; i++) {
|
|
10323
|
+
out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
|
|
10324
|
+
}
|
|
10325
|
+
}
|
|
10326
|
+
const lifeResult = this.lifeLayer.update(input.deltaTime, input);
|
|
10327
|
+
this.lifeBuffer.fill(0);
|
|
10328
|
+
for (const [name, value] of Object.entries(lifeResult.blendshapes)) {
|
|
10329
|
+
const idx = BS_INDEX2.get(name);
|
|
10330
|
+
if (idx !== void 0) {
|
|
10331
|
+
this.lifeBuffer[idx] = value;
|
|
10332
|
+
}
|
|
10333
|
+
}
|
|
10334
|
+
for (let i = 0; i < 52; i++) {
|
|
10335
|
+
if (IS_EYE_CHANNEL[i]) {
|
|
10336
|
+
out[i] = this.lifeBuffer[i];
|
|
10337
|
+
} else {
|
|
10338
|
+
out[i] += this.lifeBuffer[i];
|
|
10339
|
+
}
|
|
10340
|
+
}
|
|
10341
|
+
for (let i = 0; i < 52; i++) {
|
|
10342
|
+
out[i] = out[i] * this.multiplier[i] + this.offset[i];
|
|
10343
|
+
}
|
|
10344
|
+
for (let i = 0; i < 52; i++) {
|
|
10345
|
+
if (out[i] < 0) out[i] = 0;
|
|
10346
|
+
else if (out[i] > 1) out[i] = 1;
|
|
10347
|
+
}
|
|
10348
|
+
return { blendshapes: out, headDelta: lifeResult.headDelta };
|
|
10349
|
+
}
|
|
10350
|
+
/**
|
|
10351
|
+
* Set sticky emotion (used when input.emotion is not provided).
|
|
10352
|
+
*/
|
|
10353
|
+
setEmotion(weights) {
|
|
10354
|
+
this.stickyEmotion = weights;
|
|
10355
|
+
}
|
|
10356
|
+
/**
|
|
10357
|
+
* Update character profile at runtime.
|
|
10358
|
+
*/
|
|
10359
|
+
setProfile(profile) {
|
|
10360
|
+
this.multiplier.fill(1);
|
|
10361
|
+
this.offset.fill(0);
|
|
10362
|
+
this.applyProfileArrays(profile);
|
|
10363
|
+
}
|
|
10364
|
+
/**
|
|
10365
|
+
* Reset all smoothing state and life layer.
|
|
10366
|
+
*/
|
|
10367
|
+
reset() {
|
|
10368
|
+
this.smoothedUpper.fill(0);
|
|
10369
|
+
this.smoothedLower.fill(0);
|
|
10370
|
+
this.lifeBuffer.fill(0);
|
|
10371
|
+
this.stickyEmotion = void 0;
|
|
10372
|
+
this.lifeLayer.reset();
|
|
10373
|
+
}
|
|
10374
|
+
/** Expand partial profile maps into dense Float32Arrays */
|
|
10375
|
+
applyProfileArrays(profile) {
|
|
10376
|
+
if (profile.multiplier) {
|
|
10377
|
+
for (const [name, value] of Object.entries(profile.multiplier)) {
|
|
10378
|
+
const idx = BS_INDEX2.get(name);
|
|
10379
|
+
if (idx !== void 0 && value !== void 0) {
|
|
10380
|
+
this.multiplier[idx] = value;
|
|
10381
|
+
}
|
|
10382
|
+
}
|
|
10383
|
+
}
|
|
10384
|
+
if (profile.offset) {
|
|
10385
|
+
for (const [name, value] of Object.entries(profile.offset)) {
|
|
10386
|
+
const idx = BS_INDEX2.get(name);
|
|
10387
|
+
if (idx !== void 0 && value !== void 0) {
|
|
10388
|
+
this.offset[idx] = value;
|
|
10389
|
+
}
|
|
10390
|
+
}
|
|
10391
|
+
}
|
|
10392
|
+
}
|
|
10393
|
+
};
|
|
10394
|
+
|
|
10001
10395
|
// src/orchestration/MicLipSync.ts
|
|
10002
10396
|
var logger18 = createLogger("MicLipSync");
|
|
10003
10397
|
var MicLipSync = class extends EventEmitter {
|
|
@@ -10238,6 +10632,7 @@ var VoicePipeline = class extends EventEmitter {
|
|
|
10238
10632
|
new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
|
|
10239
10633
|
]);
|
|
10240
10634
|
if (lamLoadResult === "timeout") {
|
|
10635
|
+
console.error(`[VoicePipeline] LAM TIMEOUT after ${timeoutMs}ms \u2014 forcing CPU fallback`);
|
|
10241
10636
|
logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
|
|
10242
10637
|
await lam.dispose();
|
|
10243
10638
|
lam = createA2E({
|
|
@@ -10684,7 +11079,9 @@ function isProtocolEvent(obj) {
|
|
|
10684
11079
|
export {
|
|
10685
11080
|
A2EOrchestrator,
|
|
10686
11081
|
A2EProcessor,
|
|
11082
|
+
ALL_AUS,
|
|
10687
11083
|
ARKIT_BLENDSHAPES,
|
|
11084
|
+
AU_TO_ARKIT,
|
|
10688
11085
|
AnimationGraph,
|
|
10689
11086
|
AudioChunkCoalescer,
|
|
10690
11087
|
AudioEnergyAnalyzer,
|
|
@@ -10695,13 +11092,18 @@ export {
|
|
|
10695
11092
|
ConsoleExporter,
|
|
10696
11093
|
DEFAULT_ANIMATION_CONFIG,
|
|
10697
11094
|
DEFAULT_LOGGING_CONFIG,
|
|
11095
|
+
DEFAULT_MODEL_URLS,
|
|
10698
11096
|
EMOTION_NAMES,
|
|
11097
|
+
EMOTION_TO_AU,
|
|
10699
11098
|
EMOTION_VECTOR_SIZE,
|
|
10700
11099
|
EmotionController,
|
|
10701
11100
|
EmotionPresets,
|
|
11101
|
+
EmotionResolver,
|
|
10702
11102
|
EmphasisDetector,
|
|
10703
11103
|
EventEmitter,
|
|
11104
|
+
FaceCompositor,
|
|
10704
11105
|
FullFacePipeline,
|
|
11106
|
+
HF_CDN_URLS,
|
|
10705
11107
|
INFERENCE_LATENCY_BUCKETS,
|
|
10706
11108
|
InterruptionHandler,
|
|
10707
11109
|
LAM_BLENDSHAPES,
|
|
@@ -10736,6 +11138,7 @@ export {
|
|
|
10736
11138
|
calculateRMS,
|
|
10737
11139
|
configureCacheLimit,
|
|
10738
11140
|
configureLogging,
|
|
11141
|
+
configureModelUrls,
|
|
10739
11142
|
configureTelemetry,
|
|
10740
11143
|
createA2E,
|
|
10741
11144
|
createEmotionVector,
|
|
@@ -10766,6 +11169,7 @@ export {
|
|
|
10766
11169
|
noopLogger,
|
|
10767
11170
|
preloadModels,
|
|
10768
11171
|
resetLoggingConfig,
|
|
11172
|
+
resetModelUrls,
|
|
10769
11173
|
resolveBackend,
|
|
10770
11174
|
setLogLevel,
|
|
10771
11175
|
setLoggingEnabled,
|