@omote/core 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +636 -1
- package/dist/index.d.ts +636 -1
- package/dist/index.js +1384 -146
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1384 -146
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -1
package/dist/index.mjs
CHANGED
|
@@ -874,6 +874,327 @@ var UPPER_FACE_BLENDSHAPES = [
|
|
|
874
874
|
"cheekSquintLeft",
|
|
875
875
|
"cheekSquintRight"
|
|
876
876
|
];
|
|
877
|
+
var EMOTION_ARKIT_MAP = {
|
|
878
|
+
happy: {
|
|
879
|
+
// AU6 - Cheek raiser (primary Duchenne smile marker)
|
|
880
|
+
cheekSquintLeft: 0.5,
|
|
881
|
+
cheekSquintRight: 0.5,
|
|
882
|
+
// Slight eye squint from genuine smile (orbicularis oculi activation)
|
|
883
|
+
eyeSquintLeft: 0.2,
|
|
884
|
+
eyeSquintRight: 0.2
|
|
885
|
+
},
|
|
886
|
+
angry: {
|
|
887
|
+
// AU4 - Brow lowerer (intense, primary anger marker)
|
|
888
|
+
browDownLeft: 0.7,
|
|
889
|
+
browDownRight: 0.7,
|
|
890
|
+
// AU5 - Upper lid raiser (wide eyes, part of the "glare")
|
|
891
|
+
eyeWideLeft: 0.4,
|
|
892
|
+
eyeWideRight: 0.4,
|
|
893
|
+
// AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
|
|
894
|
+
eyeSquintLeft: 0.3,
|
|
895
|
+
eyeSquintRight: 0.3
|
|
896
|
+
},
|
|
897
|
+
sad: {
|
|
898
|
+
// AU1 - Inner brow raiser (primary sadness marker)
|
|
899
|
+
browInnerUp: 0.6,
|
|
900
|
+
// AU4 - Brow lowerer (brows drawn together)
|
|
901
|
+
browDownLeft: 0.3,
|
|
902
|
+
browDownRight: 0.3
|
|
903
|
+
},
|
|
904
|
+
neutral: {}
|
|
905
|
+
// All zeros - no expression overlay
|
|
906
|
+
};
|
|
907
|
+
var DEFAULT_CONFIG = {
|
|
908
|
+
smoothingFactor: 0.15,
|
|
909
|
+
confidenceThreshold: 0.3,
|
|
910
|
+
intensity: 1,
|
|
911
|
+
blendMode: "dominant",
|
|
912
|
+
minBlendProbability: 0.1,
|
|
913
|
+
energyModulation: false,
|
|
914
|
+
minEnergyScale: 0.3,
|
|
915
|
+
maxEnergyScale: 1
|
|
916
|
+
};
|
|
917
|
+
function createZeroBlendshapes() {
|
|
918
|
+
const result = {};
|
|
919
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
920
|
+
result[name] = 0;
|
|
921
|
+
}
|
|
922
|
+
return result;
|
|
923
|
+
}
|
|
924
|
+
function clamp01(value) {
|
|
925
|
+
return Math.max(0, Math.min(1, value));
|
|
926
|
+
}
|
|
927
|
+
var EmotionToBlendshapeMapper = class {
|
|
928
|
+
/**
|
|
929
|
+
* Create a new EmotionToBlendshapeMapper
|
|
930
|
+
*
|
|
931
|
+
* @param config - Optional configuration
|
|
932
|
+
*/
|
|
933
|
+
constructor(config) {
|
|
934
|
+
this.currentEnergy = 1;
|
|
935
|
+
this.config = {
|
|
936
|
+
...DEFAULT_CONFIG,
|
|
937
|
+
...config
|
|
938
|
+
};
|
|
939
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
940
|
+
this.currentBlendshapes = createZeroBlendshapes();
|
|
941
|
+
}
|
|
942
|
+
/**
|
|
943
|
+
* Map an emotion frame to target blendshapes
|
|
944
|
+
*
|
|
945
|
+
* This sets the target values that the mapper will smoothly interpolate
|
|
946
|
+
* towards. Call update() each frame to apply smoothing.
|
|
947
|
+
*
|
|
948
|
+
* @param frame - Emotion frame from Emotion2VecInference
|
|
949
|
+
* @param audioEnergy - Optional audio energy (0-1) for energy modulation
|
|
950
|
+
* @returns Target upper face blendshapes (before smoothing)
|
|
951
|
+
*/
|
|
952
|
+
mapFrame(frame, audioEnergy) {
|
|
953
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
954
|
+
if (audioEnergy !== void 0) {
|
|
955
|
+
this.currentEnergy = clamp01(audioEnergy);
|
|
956
|
+
}
|
|
957
|
+
if (!frame) {
|
|
958
|
+
return { ...this.targetBlendshapes };
|
|
959
|
+
}
|
|
960
|
+
if (this.config.blendMode === "weighted") {
|
|
961
|
+
this.mapFrameWeighted(frame);
|
|
962
|
+
} else {
|
|
963
|
+
this.mapFrameDominant(frame);
|
|
964
|
+
}
|
|
965
|
+
if (this.config.energyModulation) {
|
|
966
|
+
this.applyEnergyModulation();
|
|
967
|
+
}
|
|
968
|
+
return { ...this.targetBlendshapes };
|
|
969
|
+
}
|
|
970
|
+
/**
|
|
971
|
+
* Map using dominant emotion only (original behavior)
|
|
972
|
+
*/
|
|
973
|
+
mapFrameDominant(frame) {
|
|
974
|
+
if (frame.confidence < this.config.confidenceThreshold) {
|
|
975
|
+
return;
|
|
976
|
+
}
|
|
977
|
+
const emotion = frame.emotion;
|
|
978
|
+
const mapping = EMOTION_ARKIT_MAP[emotion];
|
|
979
|
+
if (!mapping) {
|
|
980
|
+
return;
|
|
981
|
+
}
|
|
982
|
+
const scale = this.config.intensity * frame.confidence;
|
|
983
|
+
for (const [name, value] of Object.entries(mapping)) {
|
|
984
|
+
const blendshapeName = name;
|
|
985
|
+
if (value !== void 0) {
|
|
986
|
+
this.targetBlendshapes[blendshapeName] = clamp01(value * scale);
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
/**
|
|
991
|
+
* Map using weighted blend of all emotions by probability
|
|
992
|
+
* Creates more nuanced expressions (e.g., bittersweet = happy + sad)
|
|
993
|
+
*/
|
|
994
|
+
mapFrameWeighted(frame) {
|
|
995
|
+
if (!frame.probabilities) {
|
|
996
|
+
this.mapFrameDominant(frame);
|
|
997
|
+
return;
|
|
998
|
+
}
|
|
999
|
+
for (const [emotion, probability] of Object.entries(frame.probabilities)) {
|
|
1000
|
+
if (probability < this.config.minBlendProbability) {
|
|
1001
|
+
continue;
|
|
1002
|
+
}
|
|
1003
|
+
const mapping = EMOTION_ARKIT_MAP[emotion];
|
|
1004
|
+
if (!mapping) {
|
|
1005
|
+
continue;
|
|
1006
|
+
}
|
|
1007
|
+
const scale = this.config.intensity * probability;
|
|
1008
|
+
for (const [name, value] of Object.entries(mapping)) {
|
|
1009
|
+
const blendshapeName = name;
|
|
1010
|
+
if (value !== void 0) {
|
|
1011
|
+
this.targetBlendshapes[blendshapeName] += value * scale;
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1016
|
+
this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name]);
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
/**
|
|
1020
|
+
* Apply energy modulation to scale emotion intensity by audio energy
|
|
1021
|
+
* Louder speech = stronger expressions
|
|
1022
|
+
*/
|
|
1023
|
+
applyEnergyModulation() {
|
|
1024
|
+
const { minEnergyScale, maxEnergyScale } = this.config;
|
|
1025
|
+
const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
|
|
1026
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1027
|
+
this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Apply smoothing to interpolate current values towards target
|
|
1032
|
+
*
|
|
1033
|
+
* Uses exponential moving average:
|
|
1034
|
+
* current = current + smoothingFactor * (target - current)
|
|
1035
|
+
*
|
|
1036
|
+
* @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
|
|
1037
|
+
*/
|
|
1038
|
+
update(_deltaMs) {
|
|
1039
|
+
const factor = this.config.smoothingFactor;
|
|
1040
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1041
|
+
const target = this.targetBlendshapes[name];
|
|
1042
|
+
const current = this.currentBlendshapes[name];
|
|
1043
|
+
this.currentBlendshapes[name] = clamp01(current + factor * (target - current));
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Get current smoothed blendshape values
|
|
1048
|
+
*
|
|
1049
|
+
* @returns Current upper face blendshapes (after smoothing)
|
|
1050
|
+
*/
|
|
1051
|
+
getCurrentBlendshapes() {
|
|
1052
|
+
return { ...this.currentBlendshapes };
|
|
1053
|
+
}
|
|
1054
|
+
/**
|
|
1055
|
+
* Reset mapper to neutral state
|
|
1056
|
+
*
|
|
1057
|
+
* Sets both target and current blendshapes to zero.
|
|
1058
|
+
*/
|
|
1059
|
+
reset() {
|
|
1060
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
1061
|
+
this.currentBlendshapes = createZeroBlendshapes();
|
|
1062
|
+
this.currentEnergy = 1;
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Get current configuration
|
|
1066
|
+
*/
|
|
1067
|
+
getConfig() {
|
|
1068
|
+
return { ...this.config };
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* Update configuration
|
|
1072
|
+
*
|
|
1073
|
+
* @param config - Partial configuration to update
|
|
1074
|
+
*/
|
|
1075
|
+
setConfig(config) {
|
|
1076
|
+
this.config = {
|
|
1077
|
+
...this.config,
|
|
1078
|
+
...config
|
|
1079
|
+
};
|
|
1080
|
+
}
|
|
1081
|
+
};
|
|
1082
|
+
|
|
1083
|
+
// src/animation/audioEnergy.ts
|
|
1084
|
+
function calculateRMS(samples) {
|
|
1085
|
+
if (samples.length === 0) return 0;
|
|
1086
|
+
let sumSquares = 0;
|
|
1087
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1088
|
+
sumSquares += samples[i] * samples[i];
|
|
1089
|
+
}
|
|
1090
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
1091
|
+
}
|
|
1092
|
+
function calculatePeak(samples) {
|
|
1093
|
+
let peak = 0;
|
|
1094
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1095
|
+
const abs = Math.abs(samples[i]);
|
|
1096
|
+
if (abs > peak) peak = abs;
|
|
1097
|
+
}
|
|
1098
|
+
return peak;
|
|
1099
|
+
}
|
|
1100
|
+
var AudioEnergyAnalyzer = class {
|
|
1101
|
+
/**
|
|
1102
|
+
* @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
|
|
1103
|
+
* @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
|
|
1104
|
+
*/
|
|
1105
|
+
constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
|
|
1106
|
+
this.smoothedRMS = 0;
|
|
1107
|
+
this.smoothedPeak = 0;
|
|
1108
|
+
this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
|
|
1109
|
+
this.noiseFloor = noiseFloor;
|
|
1110
|
+
}
|
|
1111
|
+
/**
|
|
1112
|
+
* Process audio samples and return smoothed energy values
|
|
1113
|
+
* @param samples Audio samples (Float32Array)
|
|
1114
|
+
* @returns Object with rms and peak values
|
|
1115
|
+
*/
|
|
1116
|
+
process(samples) {
|
|
1117
|
+
const instantRMS = calculateRMS(samples);
|
|
1118
|
+
const instantPeak = calculatePeak(samples);
|
|
1119
|
+
const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
|
|
1120
|
+
const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
|
|
1121
|
+
if (gatedRMS > this.smoothedRMS) {
|
|
1122
|
+
this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
|
|
1123
|
+
} else {
|
|
1124
|
+
this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
|
|
1125
|
+
}
|
|
1126
|
+
if (gatedPeak > this.smoothedPeak) {
|
|
1127
|
+
this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
|
|
1128
|
+
} else {
|
|
1129
|
+
this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
|
|
1130
|
+
}
|
|
1131
|
+
const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
|
|
1132
|
+
return {
|
|
1133
|
+
rms: this.smoothedRMS,
|
|
1134
|
+
peak: this.smoothedPeak,
|
|
1135
|
+
energy: Math.min(1, energy * 2)
|
|
1136
|
+
// Scale up and clamp
|
|
1137
|
+
};
|
|
1138
|
+
}
|
|
1139
|
+
/**
|
|
1140
|
+
* Reset analyzer state
|
|
1141
|
+
*/
|
|
1142
|
+
reset() {
|
|
1143
|
+
this.smoothedRMS = 0;
|
|
1144
|
+
this.smoothedPeak = 0;
|
|
1145
|
+
}
|
|
1146
|
+
/**
|
|
1147
|
+
* Get current smoothed RMS value
|
|
1148
|
+
*/
|
|
1149
|
+
get rms() {
|
|
1150
|
+
return this.smoothedRMS;
|
|
1151
|
+
}
|
|
1152
|
+
/**
|
|
1153
|
+
* Get current smoothed peak value
|
|
1154
|
+
*/
|
|
1155
|
+
get peak() {
|
|
1156
|
+
return this.smoothedPeak;
|
|
1157
|
+
}
|
|
1158
|
+
};
|
|
1159
|
+
var EmphasisDetector = class {
|
|
1160
|
+
/**
|
|
1161
|
+
* @param historySize Number of frames to track. Default 10
|
|
1162
|
+
* @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
|
|
1163
|
+
*/
|
|
1164
|
+
constructor(historySize = 10, emphasisThreshold = 0.15) {
|
|
1165
|
+
this.energyHistory = [];
|
|
1166
|
+
this.historySize = historySize;
|
|
1167
|
+
this.emphasisThreshold = emphasisThreshold;
|
|
1168
|
+
}
|
|
1169
|
+
/**
|
|
1170
|
+
* Process energy value and detect emphasis
|
|
1171
|
+
* @param energy Current energy value (0-1)
|
|
1172
|
+
* @returns Object with isEmphasis flag and emphasisStrength
|
|
1173
|
+
*/
|
|
1174
|
+
process(energy) {
|
|
1175
|
+
this.energyHistory.push(energy);
|
|
1176
|
+
if (this.energyHistory.length > this.historySize) {
|
|
1177
|
+
this.energyHistory.shift();
|
|
1178
|
+
}
|
|
1179
|
+
if (this.energyHistory.length < 3) {
|
|
1180
|
+
return { isEmphasis: false, emphasisStrength: 0 };
|
|
1181
|
+
}
|
|
1182
|
+
const prevFrames = this.energyHistory.slice(0, -1);
|
|
1183
|
+
const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
|
|
1184
|
+
const increase = energy - avgPrev;
|
|
1185
|
+
const isEmphasis = increase > this.emphasisThreshold;
|
|
1186
|
+
return {
|
|
1187
|
+
isEmphasis,
|
|
1188
|
+
emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
/**
|
|
1192
|
+
* Reset detector state
|
|
1193
|
+
*/
|
|
1194
|
+
reset() {
|
|
1195
|
+
this.energyHistory = [];
|
|
1196
|
+
}
|
|
1197
|
+
};
|
|
877
1198
|
|
|
878
1199
|
// src/telemetry/exporters/console.ts
|
|
879
1200
|
var ConsoleExporter = class {
|
|
@@ -2511,7 +2832,7 @@ var CTC_VOCAB = [
|
|
|
2511
2832
|
"Q",
|
|
2512
2833
|
"Z"
|
|
2513
2834
|
];
|
|
2514
|
-
var
|
|
2835
|
+
var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
2515
2836
|
constructor(config) {
|
|
2516
2837
|
this.modelId = "wav2vec2";
|
|
2517
2838
|
this.session = null;
|
|
@@ -2520,6 +2841,10 @@ var Wav2Vec2Inference = class {
|
|
|
2520
2841
|
this.isLoading = false;
|
|
2521
2842
|
// Inference queue for handling concurrent calls
|
|
2522
2843
|
this.inferenceQueue = Promise.resolve();
|
|
2844
|
+
// Session health: set to true if session.run() times out.
|
|
2845
|
+
// A timed-out session may have a zombie GPU/WASM dispatch still running,
|
|
2846
|
+
// so all future infer() calls reject immediately to prevent concurrent access.
|
|
2847
|
+
this.poisoned = false;
|
|
2523
2848
|
this.config = config;
|
|
2524
2849
|
this.numIdentityClasses = config.numIdentityClasses ?? 12;
|
|
2525
2850
|
}
|
|
@@ -2529,6 +2854,10 @@ var Wav2Vec2Inference = class {
|
|
|
2529
2854
|
get isLoaded() {
|
|
2530
2855
|
return this.session !== null;
|
|
2531
2856
|
}
|
|
2857
|
+
/** True if inference timed out and the session is permanently unusable */
|
|
2858
|
+
get isSessionPoisoned() {
|
|
2859
|
+
return this.poisoned;
|
|
2860
|
+
}
|
|
2532
2861
|
/**
|
|
2533
2862
|
* Load the ONNX model
|
|
2534
2863
|
*/
|
|
@@ -2678,12 +3007,23 @@ var Wav2Vec2Inference = class {
|
|
|
2678
3007
|
logger2.debug("Running warmup inference to initialize GPU context");
|
|
2679
3008
|
const warmupStart = performance.now();
|
|
2680
3009
|
const silentAudio = new Float32Array(16e3);
|
|
2681
|
-
|
|
3010
|
+
const WARMUP_TIMEOUT_MS = 15e3;
|
|
3011
|
+
const warmupResult = await Promise.race([
|
|
3012
|
+
this.infer(silentAudio, 0).then(() => "ok"),
|
|
3013
|
+
new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
|
|
3014
|
+
]);
|
|
2682
3015
|
const warmupTimeMs = performance.now() - warmupStart;
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
3016
|
+
if (warmupResult === "timeout") {
|
|
3017
|
+
logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
|
|
3018
|
+
timeoutMs: WARMUP_TIMEOUT_MS,
|
|
3019
|
+
backend: this._backend
|
|
3020
|
+
});
|
|
3021
|
+
} else {
|
|
3022
|
+
logger2.info("Warmup inference complete", {
|
|
3023
|
+
warmupTimeMs: Math.round(warmupTimeMs),
|
|
3024
|
+
backend: this._backend
|
|
3025
|
+
});
|
|
3026
|
+
}
|
|
2687
3027
|
telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
|
|
2688
3028
|
model: "wav2vec2",
|
|
2689
3029
|
backend: this._backend
|
|
@@ -2717,6 +3057,9 @@ var Wav2Vec2Inference = class {
|
|
|
2717
3057
|
if (!this.session) {
|
|
2718
3058
|
throw new Error("Model not loaded. Call load() first.");
|
|
2719
3059
|
}
|
|
3060
|
+
if (this.poisoned) {
|
|
3061
|
+
throw new Error("Wav2Vec2 session timed out \u2014 inference unavailable until page reload");
|
|
3062
|
+
}
|
|
2720
3063
|
const audioSamplesCopy = new Float32Array(audioSamples);
|
|
2721
3064
|
let audio;
|
|
2722
3065
|
if (audioSamplesCopy.length === 16e3) {
|
|
@@ -2772,7 +3115,15 @@ var Wav2Vec2Inference = class {
|
|
|
2772
3115
|
});
|
|
2773
3116
|
try {
|
|
2774
3117
|
const startTime = performance.now();
|
|
2775
|
-
const results = await
|
|
3118
|
+
const results = await Promise.race([
|
|
3119
|
+
this.session.run(feeds),
|
|
3120
|
+
new Promise(
|
|
3121
|
+
(_, rej) => setTimeout(
|
|
3122
|
+
() => rej(new Error(`Wav2Vec2 inference timed out after ${_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS}ms`)),
|
|
3123
|
+
_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
|
|
3124
|
+
)
|
|
3125
|
+
)
|
|
3126
|
+
]);
|
|
2776
3127
|
const inferenceTimeMs = performance.now() - startTime;
|
|
2777
3128
|
const asrOutput = results["asr_logits"];
|
|
2778
3129
|
const blendshapeOutput = results["blendshapes"];
|
|
@@ -2826,50 +3177,411 @@ var Wav2Vec2Inference = class {
|
|
|
2826
3177
|
inferenceTimeMs
|
|
2827
3178
|
});
|
|
2828
3179
|
} catch (err) {
|
|
3180
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
3181
|
+
if (errMsg.includes("timed out")) {
|
|
3182
|
+
this.poisoned = true;
|
|
3183
|
+
logger2.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
|
|
3184
|
+
backend: this._backend,
|
|
3185
|
+
timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
|
|
3186
|
+
});
|
|
3187
|
+
} else {
|
|
3188
|
+
logger2.error("Inference failed", { error: errMsg, backend: this._backend });
|
|
3189
|
+
}
|
|
2829
3190
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
2830
3191
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
2831
3192
|
model: "wav2vec2",
|
|
2832
3193
|
backend: this._backend,
|
|
2833
3194
|
status: "error"
|
|
2834
3195
|
});
|
|
2835
|
-
reject(err);
|
|
3196
|
+
reject(err);
|
|
3197
|
+
}
|
|
3198
|
+
});
|
|
3199
|
+
});
|
|
3200
|
+
}
|
|
3201
|
+
/**
|
|
3202
|
+
* Get blendshape value by name for a specific frame
|
|
3203
|
+
*/
|
|
3204
|
+
getBlendshape(blendshapes, name) {
|
|
3205
|
+
const index = LAM_BLENDSHAPES.indexOf(name);
|
|
3206
|
+
if (index === -1) {
|
|
3207
|
+
throw new Error(`Unknown blendshape: ${name}`);
|
|
3208
|
+
}
|
|
3209
|
+
return blendshapes[index];
|
|
3210
|
+
}
|
|
3211
|
+
/**
|
|
3212
|
+
* Dispose of the model and free resources
|
|
3213
|
+
*/
|
|
3214
|
+
async dispose() {
|
|
3215
|
+
if (this.session) {
|
|
3216
|
+
await this.session.release();
|
|
3217
|
+
this.session = null;
|
|
3218
|
+
}
|
|
3219
|
+
}
|
|
3220
|
+
};
|
|
3221
|
+
_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
|
|
3222
|
+
/**
|
|
3223
|
+
* Check if WebGPU is available and working
|
|
3224
|
+
* (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
|
|
3225
|
+
*/
|
|
3226
|
+
_Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
|
|
3227
|
+
var Wav2Vec2Inference = _Wav2Vec2Inference;
|
|
3228
|
+
|
|
3229
|
+
// src/audio/FullFacePipeline.ts
|
|
3230
|
+
var logger3 = createLogger("FullFacePipeline");
|
|
3231
|
+
function pcm16ToFloat322(buffer) {
|
|
3232
|
+
const byteLen = buffer.byteLength & ~1;
|
|
3233
|
+
const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
|
|
3234
|
+
const float32 = new Float32Array(int16.length);
|
|
3235
|
+
for (let i = 0; i < int16.length; i++) {
|
|
3236
|
+
float32[i] = int16[i] / 32768;
|
|
3237
|
+
}
|
|
3238
|
+
return float32;
|
|
3239
|
+
}
|
|
3240
|
+
var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
|
|
3241
|
+
LAM_BLENDSHAPES.forEach((name, index) => {
|
|
3242
|
+
BLENDSHAPE_INDEX_MAP.set(name, index);
|
|
3243
|
+
});
|
|
3244
|
+
var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
|
|
3245
|
+
var EMOTION_LABEL_MAP = {
|
|
3246
|
+
// Direct labels
|
|
3247
|
+
happy: "happy",
|
|
3248
|
+
sad: "sad",
|
|
3249
|
+
angry: "angry",
|
|
3250
|
+
neutral: "neutral",
|
|
3251
|
+
// Natural language synonyms
|
|
3252
|
+
excited: "happy",
|
|
3253
|
+
joyful: "happy",
|
|
3254
|
+
cheerful: "happy",
|
|
3255
|
+
delighted: "happy",
|
|
3256
|
+
amused: "happy",
|
|
3257
|
+
melancholic: "sad",
|
|
3258
|
+
sorrowful: "sad",
|
|
3259
|
+
disappointed: "sad",
|
|
3260
|
+
frustrated: "angry",
|
|
3261
|
+
irritated: "angry",
|
|
3262
|
+
furious: "angry",
|
|
3263
|
+
annoyed: "angry",
|
|
3264
|
+
// SenseVoice labels
|
|
3265
|
+
fearful: "sad",
|
|
3266
|
+
disgusted: "angry",
|
|
3267
|
+
surprised: "happy"
|
|
3268
|
+
};
|
|
3269
|
+
var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
3270
|
+
constructor(options) {
|
|
3271
|
+
super();
|
|
3272
|
+
this.options = options;
|
|
3273
|
+
this.playbackStarted = false;
|
|
3274
|
+
this.monitorInterval = null;
|
|
3275
|
+
this.frameAnimationId = null;
|
|
3276
|
+
// Emotion state
|
|
3277
|
+
this.lastEmotionFrame = null;
|
|
3278
|
+
this.currentAudioEnergy = 0;
|
|
3279
|
+
// Stale frame detection
|
|
3280
|
+
this.lastNewFrameTime = 0;
|
|
3281
|
+
this.lastKnownLamFrame = null;
|
|
3282
|
+
this.staleWarningEmitted = false;
|
|
3283
|
+
const sampleRate = options.sampleRate ?? 16e3;
|
|
3284
|
+
this.emotionBlendFactor = options.emotionBlendFactor ?? 0.8;
|
|
3285
|
+
this.lamBlendFactor = options.lamBlendFactor ?? 0.2;
|
|
3286
|
+
const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
|
|
3287
|
+
const audioDelayMs = options.audioDelayMs ?? autoDelay;
|
|
3288
|
+
this.scheduler = new AudioScheduler({
|
|
3289
|
+
sampleRate,
|
|
3290
|
+
initialLookaheadSec: audioDelayMs / 1e3
|
|
3291
|
+
});
|
|
3292
|
+
this.coalescer = new AudioChunkCoalescer({
|
|
3293
|
+
sampleRate,
|
|
3294
|
+
targetDurationMs: options.chunkTargetMs ?? 200
|
|
3295
|
+
});
|
|
3296
|
+
this.lamPipeline = new LAMPipeline({
|
|
3297
|
+
sampleRate,
|
|
3298
|
+
onError: (error) => {
|
|
3299
|
+
logger3.error("LAM inference error", { message: error.message, stack: error.stack });
|
|
3300
|
+
this.emit("error", error);
|
|
3301
|
+
}
|
|
3302
|
+
});
|
|
3303
|
+
this.emotionMapper = new EmotionToBlendshapeMapper({
|
|
3304
|
+
smoothingFactor: 0.15,
|
|
3305
|
+
confidenceThreshold: 0.3,
|
|
3306
|
+
intensity: 1,
|
|
3307
|
+
energyModulation: true
|
|
3308
|
+
});
|
|
3309
|
+
this.energyAnalyzer = new AudioEnergyAnalyzer();
|
|
3310
|
+
}
|
|
3311
|
+
/**
|
|
3312
|
+
* Initialize the pipeline
|
|
3313
|
+
*/
|
|
3314
|
+
async initialize() {
|
|
3315
|
+
await this.scheduler.initialize();
|
|
3316
|
+
}
|
|
3317
|
+
/**
|
|
3318
|
+
* Set emotion label from backend (e.g., LLM response emotion).
|
|
3319
|
+
*
|
|
3320
|
+
* Converts a natural language emotion label into an EmotionFrame
|
|
3321
|
+
* that drives upper face blendshapes for the duration of the utterance.
|
|
3322
|
+
*
|
|
3323
|
+
* Supported labels: happy, excited, joyful, sad, melancholic, angry,
|
|
3324
|
+
* frustrated, neutral, etc.
|
|
3325
|
+
*
|
|
3326
|
+
* @param label - Emotion label string (case-insensitive)
|
|
3327
|
+
*/
|
|
3328
|
+
setEmotionLabel(label) {
|
|
3329
|
+
const normalized = label.toLowerCase();
|
|
3330
|
+
const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
|
|
3331
|
+
const probabilities = {
|
|
3332
|
+
neutral: 0.1,
|
|
3333
|
+
happy: 0.1,
|
|
3334
|
+
angry: 0.1,
|
|
3335
|
+
sad: 0.1
|
|
3336
|
+
};
|
|
3337
|
+
probabilities[mapped] = 0.7;
|
|
3338
|
+
const frame = {
|
|
3339
|
+
emotion: mapped,
|
|
3340
|
+
confidence: 0.7,
|
|
3341
|
+
probabilities
|
|
3342
|
+
};
|
|
3343
|
+
this.lastEmotionFrame = frame;
|
|
3344
|
+
logger3.info("Emotion label set", { label, mapped });
|
|
3345
|
+
}
|
|
3346
|
+
/**
|
|
3347
|
+
* Clear any set emotion label.
|
|
3348
|
+
* Falls back to prosody-only upper face animation.
|
|
3349
|
+
*/
|
|
3350
|
+
clearEmotionLabel() {
|
|
3351
|
+
this.lastEmotionFrame = null;
|
|
3352
|
+
}
|
|
3353
|
+
/**
|
|
3354
|
+
* Start a new playback session
|
|
3355
|
+
*
|
|
3356
|
+
* Resets all state and prepares for incoming audio chunks.
|
|
3357
|
+
* Audio will be scheduled immediately as chunks arrive (no buffering).
|
|
3358
|
+
*/
|
|
3359
|
+
start() {
|
|
3360
|
+
this.stopMonitoring();
|
|
3361
|
+
this.scheduler.reset();
|
|
3362
|
+
this.coalescer.reset();
|
|
3363
|
+
this.lamPipeline.reset();
|
|
3364
|
+
this.playbackStarted = false;
|
|
3365
|
+
this.lastEmotionFrame = null;
|
|
3366
|
+
this.currentAudioEnergy = 0;
|
|
3367
|
+
this.emotionMapper.reset();
|
|
3368
|
+
this.energyAnalyzer.reset();
|
|
3369
|
+
this.lastNewFrameTime = 0;
|
|
3370
|
+
this.lastKnownLamFrame = null;
|
|
3371
|
+
this.staleWarningEmitted = false;
|
|
3372
|
+
this.scheduler.warmup();
|
|
3373
|
+
this.startFrameLoop();
|
|
3374
|
+
this.startMonitoring();
|
|
3375
|
+
}
|
|
3376
|
+
/**
|
|
3377
|
+
* Receive audio chunk from network
|
|
3378
|
+
*
|
|
3379
|
+
* Audio-first design: schedules audio immediately, LAM runs in background.
|
|
3380
|
+
* This prevents LAM inference (50-300ms) from blocking audio scheduling.
|
|
3381
|
+
*
|
|
3382
|
+
* @param chunk - Uint8Array containing Int16 PCM audio
|
|
3383
|
+
*/
|
|
3384
|
+
async onAudioChunk(chunk) {
|
|
3385
|
+
const combined = this.coalescer.add(chunk);
|
|
3386
|
+
if (!combined) {
|
|
3387
|
+
return;
|
|
3388
|
+
}
|
|
3389
|
+
const float32 = pcm16ToFloat322(combined);
|
|
3390
|
+
const scheduleTime = await this.scheduler.schedule(float32);
|
|
3391
|
+
if (!this.playbackStarted) {
|
|
3392
|
+
this.playbackStarted = true;
|
|
3393
|
+
this.emit("playback_start", scheduleTime);
|
|
3394
|
+
}
|
|
3395
|
+
const { energy } = this.energyAnalyzer.process(float32);
|
|
3396
|
+
this.currentAudioEnergy = energy;
|
|
3397
|
+
this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
|
|
3398
|
+
this.emit("error", err);
|
|
3399
|
+
});
|
|
3400
|
+
}
|
|
3401
|
+
/**
|
|
3402
|
+
* Get emotion frame for current animation.
|
|
3403
|
+
*
|
|
3404
|
+
* Priority:
|
|
3405
|
+
* 1. Explicit emotion label from setEmotionLabel()
|
|
3406
|
+
* 2. Prosody fallback: subtle brow movement from audio energy
|
|
3407
|
+
*/
|
|
3408
|
+
getEmotionFrame() {
|
|
3409
|
+
if (this.lastEmotionFrame) {
|
|
3410
|
+
return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
|
|
3411
|
+
}
|
|
3412
|
+
return { frame: null, energy: this.currentAudioEnergy };
|
|
3413
|
+
}
|
|
3414
|
+
/**
|
|
3415
|
+
* Merge LAM blendshapes with emotion upper face blendshapes
|
|
3416
|
+
*/
|
|
3417
|
+
mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
|
|
3418
|
+
const merged = new Float32Array(52);
|
|
3419
|
+
let emotionBlendshapes;
|
|
3420
|
+
if (emotionFrame) {
|
|
3421
|
+
this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
|
|
3422
|
+
this.emotionMapper.update(33);
|
|
3423
|
+
emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
|
|
3424
|
+
} else {
|
|
3425
|
+
emotionBlendshapes = {};
|
|
3426
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
3427
|
+
emotionBlendshapes[name] = 0;
|
|
3428
|
+
}
|
|
3429
|
+
}
|
|
3430
|
+
for (let i = 0; i < 52; i++) {
|
|
3431
|
+
const name = LAM_BLENDSHAPES[i];
|
|
3432
|
+
if (UPPER_FACE_SET.has(name)) {
|
|
3433
|
+
const emotionValue = emotionBlendshapes[name] ?? 0;
|
|
3434
|
+
const lamValue = lamFrame[i];
|
|
3435
|
+
merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
|
|
3436
|
+
} else {
|
|
3437
|
+
merged[i] = lamFrame[i];
|
|
3438
|
+
}
|
|
3439
|
+
}
|
|
3440
|
+
return { merged, emotionBlendshapes };
|
|
3441
|
+
}
|
|
3442
|
+
/**
|
|
3443
|
+
* Start frame animation loop
|
|
3444
|
+
*/
|
|
3445
|
+
startFrameLoop() {
|
|
3446
|
+
const updateFrame = () => {
|
|
3447
|
+
const currentTime = this.scheduler.getCurrentTime();
|
|
3448
|
+
const lamFrame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
|
|
3449
|
+
if (lamFrame) {
|
|
3450
|
+
if (lamFrame !== this.lastKnownLamFrame) {
|
|
3451
|
+
this.lastNewFrameTime = performance.now();
|
|
3452
|
+
this.lastKnownLamFrame = lamFrame;
|
|
3453
|
+
this.staleWarningEmitted = false;
|
|
3454
|
+
}
|
|
3455
|
+
const { frame: emotionFrame, energy } = this.getEmotionFrame();
|
|
3456
|
+
const { merged, emotionBlendshapes } = this.mergeBlendshapes(lamFrame, emotionFrame, energy);
|
|
3457
|
+
const fullFrame = {
|
|
3458
|
+
blendshapes: merged,
|
|
3459
|
+
lamBlendshapes: lamFrame,
|
|
3460
|
+
emotionBlendshapes,
|
|
3461
|
+
emotion: emotionFrame,
|
|
3462
|
+
timestamp: currentTime
|
|
3463
|
+
};
|
|
3464
|
+
this.emit("full_frame_ready", fullFrame);
|
|
3465
|
+
this.emit("lam_frame_ready", lamFrame);
|
|
3466
|
+
if (emotionFrame) {
|
|
3467
|
+
this.emit("emotion_frame_ready", emotionFrame);
|
|
3468
|
+
}
|
|
3469
|
+
} else if (this.playbackStarted && !this.lastKnownLamFrame) {
|
|
3470
|
+
const { frame: emotionFrame, energy } = this.getEmotionFrame();
|
|
3471
|
+
if (emotionFrame && energy > 0.05) {
|
|
3472
|
+
const startupFrame = new Float32Array(52);
|
|
3473
|
+
const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
|
|
3474
|
+
this.emit("full_frame_ready", {
|
|
3475
|
+
blendshapes: merged,
|
|
3476
|
+
lamBlendshapes: startupFrame,
|
|
3477
|
+
emotionBlendshapes,
|
|
3478
|
+
emotion: emotionFrame,
|
|
3479
|
+
timestamp: currentTime
|
|
3480
|
+
});
|
|
2836
3481
|
}
|
|
2837
|
-
}
|
|
2838
|
-
|
|
3482
|
+
}
|
|
3483
|
+
if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
|
|
3484
|
+
this.staleWarningEmitted = true;
|
|
3485
|
+
logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
|
|
3486
|
+
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3487
|
+
queuedFrames: this.lamPipeline.queuedFrameCount
|
|
3488
|
+
});
|
|
3489
|
+
}
|
|
3490
|
+
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
3491
|
+
};
|
|
3492
|
+
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
2839
3493
|
}
|
|
2840
3494
|
/**
|
|
2841
|
-
*
|
|
3495
|
+
* End of audio stream
|
|
2842
3496
|
*/
|
|
2843
|
-
|
|
2844
|
-
const
|
|
2845
|
-
if (
|
|
2846
|
-
|
|
3497
|
+
async end() {
|
|
3498
|
+
const remaining = this.coalescer.flush();
|
|
3499
|
+
if (remaining) {
|
|
3500
|
+
const chunk = new Uint8Array(remaining);
|
|
3501
|
+
await this.onAudioChunk(chunk);
|
|
2847
3502
|
}
|
|
2848
|
-
|
|
3503
|
+
await this.lamPipeline.flush(this.options.lam);
|
|
2849
3504
|
}
|
|
2850
3505
|
/**
|
|
2851
|
-
*
|
|
3506
|
+
* Stop playback immediately with smooth fade-out
|
|
2852
3507
|
*/
|
|
2853
|
-
async
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
3508
|
+
async stop(fadeOutMs = 50) {
|
|
3509
|
+
this.stopMonitoring();
|
|
3510
|
+
await this.scheduler.cancelAll(fadeOutMs);
|
|
3511
|
+
this.coalescer.reset();
|
|
3512
|
+
this.lamPipeline.reset();
|
|
3513
|
+
this.playbackStarted = false;
|
|
3514
|
+
this.lastEmotionFrame = null;
|
|
3515
|
+
this.currentAudioEnergy = 0;
|
|
3516
|
+
this.emotionMapper.reset();
|
|
3517
|
+
this.energyAnalyzer.reset();
|
|
3518
|
+
this.lastNewFrameTime = 0;
|
|
3519
|
+
this.lastKnownLamFrame = null;
|
|
3520
|
+
this.staleWarningEmitted = false;
|
|
3521
|
+
this.emit("playback_complete", void 0);
|
|
3522
|
+
}
|
|
3523
|
+
/**
|
|
3524
|
+
* Start monitoring for playback completion
|
|
3525
|
+
*/
|
|
3526
|
+
startMonitoring() {
|
|
3527
|
+
if (this.monitorInterval) {
|
|
3528
|
+
clearInterval(this.monitorInterval);
|
|
3529
|
+
}
|
|
3530
|
+
this.monitorInterval = setInterval(() => {
|
|
3531
|
+
if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
|
|
3532
|
+
this.emit("playback_complete", void 0);
|
|
3533
|
+
this.stopMonitoring();
|
|
3534
|
+
}
|
|
3535
|
+
}, 100);
|
|
3536
|
+
}
|
|
3537
|
+
/**
|
|
3538
|
+
* Stop monitoring
|
|
3539
|
+
*/
|
|
3540
|
+
stopMonitoring() {
|
|
3541
|
+
if (this.monitorInterval) {
|
|
3542
|
+
clearInterval(this.monitorInterval);
|
|
3543
|
+
this.monitorInterval = null;
|
|
3544
|
+
}
|
|
3545
|
+
if (this.frameAnimationId) {
|
|
3546
|
+
cancelAnimationFrame(this.frameAnimationId);
|
|
3547
|
+
this.frameAnimationId = null;
|
|
2857
3548
|
}
|
|
2858
3549
|
}
|
|
3550
|
+
/**
|
|
3551
|
+
* Get current pipeline state (for debugging/monitoring)
|
|
3552
|
+
*/
|
|
3553
|
+
getState() {
|
|
3554
|
+
return {
|
|
3555
|
+
playbackStarted: this.playbackStarted,
|
|
3556
|
+
coalescerFill: this.coalescer.fillLevel,
|
|
3557
|
+
lamFill: this.lamPipeline.fillLevel,
|
|
3558
|
+
queuedLAMFrames: this.lamPipeline.queuedFrameCount,
|
|
3559
|
+
emotionLabel: this.lastEmotionFrame?.emotion ?? null,
|
|
3560
|
+
currentAudioEnergy: this.currentAudioEnergy,
|
|
3561
|
+
currentTime: this.scheduler.getCurrentTime(),
|
|
3562
|
+
playbackEndTime: this.scheduler.getPlaybackEndTime()
|
|
3563
|
+
};
|
|
3564
|
+
}
|
|
3565
|
+
/**
|
|
3566
|
+
* Check if an explicit emotion label is currently set
|
|
3567
|
+
*/
|
|
3568
|
+
get hasEmotionLabel() {
|
|
3569
|
+
return this.lastEmotionFrame !== null;
|
|
3570
|
+
}
|
|
3571
|
+
/**
|
|
3572
|
+
* Cleanup resources
|
|
3573
|
+
*/
|
|
3574
|
+
dispose() {
|
|
3575
|
+
this.stopMonitoring();
|
|
3576
|
+
this.scheduler.dispose();
|
|
3577
|
+
this.coalescer.reset();
|
|
3578
|
+
this.lamPipeline.reset();
|
|
3579
|
+
this.lastEmotionFrame = null;
|
|
3580
|
+
this.currentAudioEnergy = 0;
|
|
3581
|
+
}
|
|
2859
3582
|
};
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
* (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
|
|
2863
|
-
*/
|
|
2864
|
-
Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
|
|
2865
|
-
|
|
2866
|
-
// src/audio/FullFacePipeline.ts
|
|
2867
|
-
var logger3 = createLogger("FullFacePipeline");
|
|
2868
|
-
var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
|
|
2869
|
-
LAM_BLENDSHAPES.forEach((name, index) => {
|
|
2870
|
-
BLENDSHAPE_INDEX_MAP.set(name, index);
|
|
2871
|
-
});
|
|
2872
|
-
var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
|
|
3583
|
+
_FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
|
|
3584
|
+
var FullFacePipeline = _FullFacePipeline;
|
|
2873
3585
|
|
|
2874
3586
|
// src/inference/kaldiFbank.ts
|
|
2875
3587
|
function fft(re, im) {
|
|
@@ -3739,20 +4451,21 @@ var LipSyncWithFallback = class {
|
|
|
3739
4451
|
try {
|
|
3740
4452
|
return await this.implementation.load();
|
|
3741
4453
|
} catch (error) {
|
|
3742
|
-
|
|
3743
|
-
|
|
3744
|
-
|
|
3745
|
-
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
modelUrl: this.config.cpuModelUrl
|
|
3751
|
-
});
|
|
3752
|
-
this.hasFallenBack = true;
|
|
3753
|
-
logger6.info("Fallback to Wav2ArkitCpuInference successful");
|
|
3754
|
-
return await this.implementation.load();
|
|
4454
|
+
return this.fallbackToCpu(error instanceof Error ? error.message : String(error));
|
|
4455
|
+
}
|
|
4456
|
+
}
|
|
4457
|
+
async fallbackToCpu(reason) {
|
|
4458
|
+
logger6.warn("GPU model load failed, falling back to CPU model", { reason });
|
|
4459
|
+
try {
|
|
4460
|
+
await this.implementation.dispose();
|
|
4461
|
+
} catch {
|
|
3755
4462
|
}
|
|
4463
|
+
this.implementation = new Wav2ArkitCpuInference({
|
|
4464
|
+
modelUrl: this.config.cpuModelUrl
|
|
4465
|
+
});
|
|
4466
|
+
this.hasFallenBack = true;
|
|
4467
|
+
logger6.info("Fallback to Wav2ArkitCpuInference successful");
|
|
4468
|
+
return await this.implementation.load();
|
|
3756
4469
|
}
|
|
3757
4470
|
async infer(audioSamples, identityIndex) {
|
|
3758
4471
|
return this.implementation.infer(audioSamples, identityIndex);
|
|
@@ -6893,121 +7606,639 @@ var AnimationGraph = class extends EventEmitter {
|
|
|
6893
7606
|
}
|
|
6894
7607
|
};
|
|
6895
7608
|
|
|
6896
|
-
// src/animation/
|
|
6897
|
-
|
|
6898
|
-
|
|
6899
|
-
|
|
6900
|
-
|
|
6901
|
-
|
|
6902
|
-
|
|
6903
|
-
|
|
7609
|
+
// src/animation/simplex2d.ts
|
|
7610
|
+
var perm = new Uint8Array(512);
|
|
7611
|
+
var grad2 = [
|
|
7612
|
+
[1, 1],
|
|
7613
|
+
[-1, 1],
|
|
7614
|
+
[1, -1],
|
|
7615
|
+
[-1, -1],
|
|
7616
|
+
[1, 0],
|
|
7617
|
+
[-1, 0],
|
|
7618
|
+
[0, 1],
|
|
7619
|
+
[0, -1]
|
|
7620
|
+
];
|
|
7621
|
+
var p = [
|
|
7622
|
+
151,
|
|
7623
|
+
160,
|
|
7624
|
+
137,
|
|
7625
|
+
91,
|
|
7626
|
+
90,
|
|
7627
|
+
15,
|
|
7628
|
+
131,
|
|
7629
|
+
13,
|
|
7630
|
+
201,
|
|
7631
|
+
95,
|
|
7632
|
+
96,
|
|
7633
|
+
53,
|
|
7634
|
+
194,
|
|
7635
|
+
233,
|
|
7636
|
+
7,
|
|
7637
|
+
225,
|
|
7638
|
+
140,
|
|
7639
|
+
36,
|
|
7640
|
+
103,
|
|
7641
|
+
30,
|
|
7642
|
+
69,
|
|
7643
|
+
142,
|
|
7644
|
+
8,
|
|
7645
|
+
99,
|
|
7646
|
+
37,
|
|
7647
|
+
240,
|
|
7648
|
+
21,
|
|
7649
|
+
10,
|
|
7650
|
+
23,
|
|
7651
|
+
190,
|
|
7652
|
+
6,
|
|
7653
|
+
148,
|
|
7654
|
+
247,
|
|
7655
|
+
120,
|
|
7656
|
+
234,
|
|
7657
|
+
75,
|
|
7658
|
+
0,
|
|
7659
|
+
26,
|
|
7660
|
+
197,
|
|
7661
|
+
62,
|
|
7662
|
+
94,
|
|
7663
|
+
252,
|
|
7664
|
+
219,
|
|
7665
|
+
203,
|
|
7666
|
+
117,
|
|
7667
|
+
35,
|
|
7668
|
+
11,
|
|
7669
|
+
32,
|
|
7670
|
+
57,
|
|
7671
|
+
177,
|
|
7672
|
+
33,
|
|
7673
|
+
88,
|
|
7674
|
+
237,
|
|
7675
|
+
149,
|
|
7676
|
+
56,
|
|
7677
|
+
87,
|
|
7678
|
+
174,
|
|
7679
|
+
20,
|
|
7680
|
+
125,
|
|
7681
|
+
136,
|
|
7682
|
+
171,
|
|
7683
|
+
168,
|
|
7684
|
+
68,
|
|
7685
|
+
175,
|
|
7686
|
+
74,
|
|
7687
|
+
165,
|
|
7688
|
+
71,
|
|
7689
|
+
134,
|
|
7690
|
+
139,
|
|
7691
|
+
48,
|
|
7692
|
+
27,
|
|
7693
|
+
166,
|
|
7694
|
+
77,
|
|
7695
|
+
146,
|
|
7696
|
+
158,
|
|
7697
|
+
231,
|
|
7698
|
+
83,
|
|
7699
|
+
111,
|
|
7700
|
+
229,
|
|
7701
|
+
122,
|
|
7702
|
+
60,
|
|
7703
|
+
211,
|
|
7704
|
+
133,
|
|
7705
|
+
230,
|
|
7706
|
+
220,
|
|
7707
|
+
105,
|
|
7708
|
+
92,
|
|
7709
|
+
41,
|
|
7710
|
+
55,
|
|
7711
|
+
46,
|
|
7712
|
+
245,
|
|
7713
|
+
40,
|
|
7714
|
+
244,
|
|
7715
|
+
102,
|
|
7716
|
+
143,
|
|
7717
|
+
54,
|
|
7718
|
+
65,
|
|
7719
|
+
25,
|
|
7720
|
+
63,
|
|
7721
|
+
161,
|
|
7722
|
+
1,
|
|
7723
|
+
216,
|
|
7724
|
+
80,
|
|
7725
|
+
73,
|
|
7726
|
+
209,
|
|
7727
|
+
76,
|
|
7728
|
+
132,
|
|
7729
|
+
187,
|
|
7730
|
+
208,
|
|
7731
|
+
89,
|
|
7732
|
+
18,
|
|
7733
|
+
169,
|
|
7734
|
+
200,
|
|
7735
|
+
196,
|
|
7736
|
+
135,
|
|
7737
|
+
130,
|
|
7738
|
+
116,
|
|
7739
|
+
188,
|
|
7740
|
+
159,
|
|
7741
|
+
86,
|
|
7742
|
+
164,
|
|
7743
|
+
100,
|
|
7744
|
+
109,
|
|
7745
|
+
198,
|
|
7746
|
+
173,
|
|
7747
|
+
186,
|
|
7748
|
+
3,
|
|
7749
|
+
64,
|
|
7750
|
+
52,
|
|
7751
|
+
217,
|
|
7752
|
+
226,
|
|
7753
|
+
250,
|
|
7754
|
+
124,
|
|
7755
|
+
123,
|
|
7756
|
+
5,
|
|
7757
|
+
202,
|
|
7758
|
+
38,
|
|
7759
|
+
147,
|
|
7760
|
+
118,
|
|
7761
|
+
126,
|
|
7762
|
+
255,
|
|
7763
|
+
82,
|
|
7764
|
+
85,
|
|
7765
|
+
212,
|
|
7766
|
+
207,
|
|
7767
|
+
206,
|
|
7768
|
+
59,
|
|
7769
|
+
227,
|
|
7770
|
+
47,
|
|
7771
|
+
16,
|
|
7772
|
+
58,
|
|
7773
|
+
17,
|
|
7774
|
+
182,
|
|
7775
|
+
189,
|
|
7776
|
+
28,
|
|
7777
|
+
42,
|
|
7778
|
+
223,
|
|
7779
|
+
183,
|
|
7780
|
+
170,
|
|
7781
|
+
213,
|
|
7782
|
+
119,
|
|
7783
|
+
248,
|
|
7784
|
+
152,
|
|
7785
|
+
2,
|
|
7786
|
+
44,
|
|
7787
|
+
154,
|
|
7788
|
+
163,
|
|
7789
|
+
70,
|
|
7790
|
+
221,
|
|
7791
|
+
153,
|
|
7792
|
+
101,
|
|
7793
|
+
155,
|
|
7794
|
+
167,
|
|
7795
|
+
43,
|
|
7796
|
+
172,
|
|
7797
|
+
9,
|
|
7798
|
+
129,
|
|
7799
|
+
22,
|
|
7800
|
+
39,
|
|
7801
|
+
253,
|
|
7802
|
+
19,
|
|
7803
|
+
98,
|
|
7804
|
+
108,
|
|
7805
|
+
110,
|
|
7806
|
+
79,
|
|
7807
|
+
113,
|
|
7808
|
+
224,
|
|
7809
|
+
232,
|
|
7810
|
+
178,
|
|
7811
|
+
185,
|
|
7812
|
+
112,
|
|
7813
|
+
104,
|
|
7814
|
+
218,
|
|
7815
|
+
246,
|
|
7816
|
+
97,
|
|
7817
|
+
228,
|
|
7818
|
+
251,
|
|
7819
|
+
34,
|
|
7820
|
+
242,
|
|
7821
|
+
193,
|
|
7822
|
+
238,
|
|
7823
|
+
210,
|
|
7824
|
+
144,
|
|
7825
|
+
12,
|
|
7826
|
+
191,
|
|
7827
|
+
179,
|
|
7828
|
+
162,
|
|
7829
|
+
241,
|
|
7830
|
+
81,
|
|
7831
|
+
51,
|
|
7832
|
+
145,
|
|
7833
|
+
235,
|
|
7834
|
+
249,
|
|
7835
|
+
14,
|
|
7836
|
+
239,
|
|
7837
|
+
107,
|
|
7838
|
+
49,
|
|
7839
|
+
192,
|
|
7840
|
+
214,
|
|
7841
|
+
31,
|
|
7842
|
+
181,
|
|
7843
|
+
199,
|
|
7844
|
+
106,
|
|
7845
|
+
157,
|
|
7846
|
+
184,
|
|
7847
|
+
84,
|
|
7848
|
+
204,
|
|
7849
|
+
176,
|
|
7850
|
+
115,
|
|
7851
|
+
121,
|
|
7852
|
+
50,
|
|
7853
|
+
45,
|
|
7854
|
+
127,
|
|
7855
|
+
4,
|
|
7856
|
+
150,
|
|
7857
|
+
254,
|
|
7858
|
+
138,
|
|
7859
|
+
236,
|
|
7860
|
+
205,
|
|
7861
|
+
93,
|
|
7862
|
+
222,
|
|
7863
|
+
114,
|
|
7864
|
+
67,
|
|
7865
|
+
29,
|
|
7866
|
+
24,
|
|
7867
|
+
72,
|
|
7868
|
+
243,
|
|
7869
|
+
141,
|
|
7870
|
+
128,
|
|
7871
|
+
195,
|
|
7872
|
+
78,
|
|
7873
|
+
66,
|
|
7874
|
+
215,
|
|
7875
|
+
61,
|
|
7876
|
+
156,
|
|
7877
|
+
180
|
|
7878
|
+
];
|
|
7879
|
+
for (let i = 0; i < 256; i++) {
|
|
7880
|
+
perm[i] = p[i];
|
|
7881
|
+
perm[i + 256] = p[i];
|
|
6904
7882
|
}
|
|
6905
|
-
|
|
6906
|
-
|
|
6907
|
-
|
|
6908
|
-
|
|
6909
|
-
if (abs > peak) peak = abs;
|
|
6910
|
-
}
|
|
6911
|
-
return peak;
|
|
7883
|
+
var F2 = 0.5 * (Math.sqrt(3) - 1);
|
|
7884
|
+
var G2 = (3 - Math.sqrt(3)) / 6;
|
|
7885
|
+
function dot2(g, x, y) {
|
|
7886
|
+
return g[0] * x + g[1] * y;
|
|
6912
7887
|
}
|
|
6913
|
-
|
|
6914
|
-
|
|
6915
|
-
|
|
6916
|
-
|
|
6917
|
-
|
|
6918
|
-
|
|
6919
|
-
|
|
6920
|
-
|
|
6921
|
-
|
|
6922
|
-
|
|
6923
|
-
|
|
6924
|
-
|
|
6925
|
-
|
|
6926
|
-
|
|
6927
|
-
|
|
6928
|
-
|
|
6929
|
-
|
|
6930
|
-
|
|
6931
|
-
|
|
6932
|
-
|
|
6933
|
-
|
|
6934
|
-
|
|
6935
|
-
|
|
6936
|
-
|
|
6937
|
-
|
|
6938
|
-
|
|
6939
|
-
|
|
6940
|
-
|
|
6941
|
-
|
|
6942
|
-
|
|
6943
|
-
|
|
6944
|
-
|
|
7888
|
+
function simplex2d(x, y) {
|
|
7889
|
+
const s = (x + y) * F2;
|
|
7890
|
+
const i = Math.floor(x + s);
|
|
7891
|
+
const j = Math.floor(y + s);
|
|
7892
|
+
const t = (i + j) * G2;
|
|
7893
|
+
const X0 = i - t;
|
|
7894
|
+
const Y0 = j - t;
|
|
7895
|
+
const x0 = x - X0;
|
|
7896
|
+
const y0 = y - Y0;
|
|
7897
|
+
const i1 = x0 > y0 ? 1 : 0;
|
|
7898
|
+
const j1 = x0 > y0 ? 0 : 1;
|
|
7899
|
+
const x1 = x0 - i1 + G2;
|
|
7900
|
+
const y1 = y0 - j1 + G2;
|
|
7901
|
+
const x2 = x0 - 1 + 2 * G2;
|
|
7902
|
+
const y2 = y0 - 1 + 2 * G2;
|
|
7903
|
+
const ii = i & 255;
|
|
7904
|
+
const jj = j & 255;
|
|
7905
|
+
const gi0 = perm[ii + perm[jj]] % 8;
|
|
7906
|
+
const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
|
|
7907
|
+
const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
|
|
7908
|
+
let n0 = 0;
|
|
7909
|
+
let t0 = 0.5 - x0 * x0 - y0 * y0;
|
|
7910
|
+
if (t0 >= 0) {
|
|
7911
|
+
t0 *= t0;
|
|
7912
|
+
n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
|
|
7913
|
+
}
|
|
7914
|
+
let n1 = 0;
|
|
7915
|
+
let t1 = 0.5 - x1 * x1 - y1 * y1;
|
|
7916
|
+
if (t1 >= 0) {
|
|
7917
|
+
t1 *= t1;
|
|
7918
|
+
n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
|
|
7919
|
+
}
|
|
7920
|
+
let n2 = 0;
|
|
7921
|
+
let t2 = 0.5 - x2 * x2 - y2 * y2;
|
|
7922
|
+
if (t2 >= 0) {
|
|
7923
|
+
t2 *= t2;
|
|
7924
|
+
n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
|
|
7925
|
+
}
|
|
7926
|
+
return 70 * (n0 + n1 + n2);
|
|
7927
|
+
}
|
|
7928
|
+
|
|
7929
|
+
// src/animation/ProceduralLifeLayer.ts
|
|
7930
|
+
var PHASE_OPEN = 0;
|
|
7931
|
+
var PHASE_CLOSING = 1;
|
|
7932
|
+
var PHASE_CLOSED = 2;
|
|
7933
|
+
var PHASE_OPENING = 3;
|
|
7934
|
+
var BLINK_CLOSE_DURATION = 0.06;
|
|
7935
|
+
var BLINK_HOLD_DURATION = 0.04;
|
|
7936
|
+
var BLINK_OPEN_DURATION = 0.15;
|
|
7937
|
+
var BLINK_ASYMMETRY_DELAY = 8e-3;
|
|
7938
|
+
var GAZE_BREAK_DURATION = 0.12;
|
|
7939
|
+
var GAZE_BREAK_HOLD_DURATION = 0.3;
|
|
7940
|
+
var GAZE_BREAK_RETURN_DURATION = 0.15;
|
|
7941
|
+
var EYE_NOISE_X_FREQ = 0.8;
|
|
7942
|
+
var EYE_NOISE_Y_FREQ = 0.6;
|
|
7943
|
+
var EYE_NOISE_X_PHASE = 73.1;
|
|
7944
|
+
var EYE_NOISE_Y_PHASE = 91.7;
|
|
7945
|
+
var BROW_INNER_UP_FREQ = 0.4;
|
|
7946
|
+
var BROW_OUTER_LEFT_FREQ = 0.35;
|
|
7947
|
+
var BROW_OUTER_RIGHT_FREQ = 0.38;
|
|
7948
|
+
var BROW_DOWN_FREQ = 0.3;
|
|
7949
|
+
var BROW_INNER_UP_PHASE = 0;
|
|
7950
|
+
var BROW_OUTER_LEFT_PHASE = 17.3;
|
|
7951
|
+
var BROW_OUTER_RIGHT_PHASE = 31.7;
|
|
7952
|
+
var BROW_DOWN_LEFT_PHASE = 47.1;
|
|
7953
|
+
var BROW_DOWN_RIGHT_PHASE = 59.3;
|
|
7954
|
+
var EMPHASIS_ENERGY_THRESHOLD = 0.3;
|
|
7955
|
+
var EMPHASIS_DECAY_RATE = 4;
|
|
7956
|
+
function clamp(v, min, max) {
|
|
7957
|
+
return v < min ? min : v > max ? max : v;
|
|
7958
|
+
}
|
|
7959
|
+
function randomRange(min, max) {
|
|
7960
|
+
return min + Math.random() * (max - min);
|
|
7961
|
+
}
|
|
7962
|
+
function smoothStep(t) {
|
|
7963
|
+
return t * t * (3 - 2 * t);
|
|
7964
|
+
}
|
|
7965
|
+
function softClamp(v, max) {
|
|
7966
|
+
return Math.tanh(v / max) * max;
|
|
7967
|
+
}
|
|
7968
|
+
var ProceduralLifeLayer = class {
|
|
7969
|
+
constructor(config) {
|
|
7970
|
+
// Blink state
|
|
7971
|
+
this.blinkTimer = 0;
|
|
7972
|
+
this.blinkPhase = PHASE_OPEN;
|
|
7973
|
+
this.blinkProgress = 0;
|
|
7974
|
+
this.asymmetryRight = 0.97;
|
|
7975
|
+
this.smoothedBlinkLeft = 0;
|
|
7976
|
+
this.smoothedBlinkRight = 0;
|
|
7977
|
+
// Eye contact (smoothed)
|
|
7978
|
+
this.smoothedEyeX = 0;
|
|
7979
|
+
this.smoothedEyeY = 0;
|
|
7980
|
+
// Eye micro-motion (continuous simplex noise, no discrete events)
|
|
7981
|
+
this.eyeNoiseTime = 0;
|
|
7982
|
+
// Gaze break state
|
|
7983
|
+
this.gazeBreakTimer = 0;
|
|
7984
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
7985
|
+
this.gazeBreakProgress = 0;
|
|
7986
|
+
this.gazeBreakTargetX = 0;
|
|
7987
|
+
this.gazeBreakTargetY = 0;
|
|
7988
|
+
this.gazeBreakCurrentX = 0;
|
|
7989
|
+
this.gazeBreakCurrentY = 0;
|
|
7990
|
+
// Breathing / postural sway
|
|
7991
|
+
this.microMotionTime = 0;
|
|
7992
|
+
this.breathingPhase = 0;
|
|
7993
|
+
// Brow noise
|
|
7994
|
+
this.noiseTime = 0;
|
|
7995
|
+
this.previousEnergy = 0;
|
|
7996
|
+
this.emphasisLevel = 0;
|
|
7997
|
+
this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
|
|
7998
|
+
this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
|
|
7999
|
+
this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
|
|
8000
|
+
this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
|
|
8001
|
+
this.browNoiseAmplitude = config?.browNoiseAmplitude ?? 0.3;
|
|
8002
|
+
this.browNoiseSpeechMultiplier = config?.browNoiseSpeechMultiplier ?? 2;
|
|
8003
|
+
this.breathingRate = config?.breathingRate ?? 0.25;
|
|
8004
|
+
this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
|
|
8005
|
+
this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
|
|
8006
|
+
this.eyeSmoothing = config?.eyeSmoothing ?? 15;
|
|
8007
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8008
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8009
|
+
}
|
|
8010
|
+
/**
|
|
8011
|
+
* Update the life layer and produce output for this frame.
|
|
8012
|
+
*
|
|
8013
|
+
* @param delta - Time since last frame in seconds
|
|
8014
|
+
* @param input - Per-frame input (eye target, audio energy, speaking state)
|
|
8015
|
+
* @returns Blendshape values and head rotation deltas
|
|
8016
|
+
*/
|
|
8017
|
+
update(delta, input) {
|
|
8018
|
+
const eyeTargetX = input?.eyeTargetX ?? 0;
|
|
8019
|
+
const eyeTargetY = input?.eyeTargetY ?? 0;
|
|
8020
|
+
const audioEnergy = input?.audioEnergy ?? 0;
|
|
8021
|
+
const isSpeaking = input?.isSpeaking ?? false;
|
|
8022
|
+
const safeDelta = Math.min(delta, 0.1);
|
|
8023
|
+
const blendshapes = {};
|
|
8024
|
+
this.updateBlinks(delta);
|
|
8025
|
+
const blinkSmoothing = 45;
|
|
8026
|
+
const blinkValues = this.getBlinkValues();
|
|
8027
|
+
this.smoothedBlinkLeft += (blinkValues.left - this.smoothedBlinkLeft) * Math.min(1, safeDelta * blinkSmoothing);
|
|
8028
|
+
this.smoothedBlinkRight += (blinkValues.right - this.smoothedBlinkRight) * Math.min(1, safeDelta * blinkSmoothing);
|
|
8029
|
+
blendshapes["eyeBlinkLeft"] = this.smoothedBlinkLeft;
|
|
8030
|
+
blendshapes["eyeBlinkRight"] = this.smoothedBlinkRight;
|
|
8031
|
+
this.smoothedEyeX += (eyeTargetX - this.smoothedEyeX) * Math.min(1, safeDelta * this.eyeSmoothing);
|
|
8032
|
+
this.smoothedEyeY += (eyeTargetY - this.smoothedEyeY) * Math.min(1, safeDelta * this.eyeSmoothing);
|
|
8033
|
+
this.eyeNoiseTime += delta;
|
|
8034
|
+
const microMotion = this.getEyeMicroMotion();
|
|
8035
|
+
this.updateGazeBreaks(delta);
|
|
8036
|
+
const finalEyeX = this.smoothedEyeX + this.gazeBreakCurrentX + microMotion.x;
|
|
8037
|
+
const finalEyeY = this.smoothedEyeY + this.gazeBreakCurrentY + microMotion.y;
|
|
8038
|
+
const clampedX = softClamp(finalEyeX, this.eyeMaxDeviation);
|
|
8039
|
+
const clampedY = softClamp(finalEyeY, this.eyeMaxDeviation);
|
|
8040
|
+
const deadZone = 0.02;
|
|
8041
|
+
const lookRight = clampedX > deadZone ? clampedX : clampedX > 0 ? clampedX * (clampedX / deadZone) : 0;
|
|
8042
|
+
const lookLeft = clampedX < -deadZone ? -clampedX : clampedX < 0 ? -clampedX * (-clampedX / deadZone) : 0;
|
|
8043
|
+
const lookUp = clampedY > deadZone ? clampedY : clampedY > 0 ? clampedY * (clampedY / deadZone) : 0;
|
|
8044
|
+
const lookDown = clampedY < -deadZone ? -clampedY : clampedY < 0 ? -clampedY * (-clampedY / deadZone) : 0;
|
|
8045
|
+
blendshapes["eyeLookInLeft"] = lookRight;
|
|
8046
|
+
blendshapes["eyeLookOutLeft"] = lookLeft;
|
|
8047
|
+
blendshapes["eyeLookInRight"] = lookLeft;
|
|
8048
|
+
blendshapes["eyeLookOutRight"] = lookRight;
|
|
8049
|
+
blendshapes["eyeLookUpLeft"] = lookUp;
|
|
8050
|
+
blendshapes["eyeLookUpRight"] = lookUp;
|
|
8051
|
+
blendshapes["eyeLookDownLeft"] = lookDown;
|
|
8052
|
+
blendshapes["eyeLookDownRight"] = lookDown;
|
|
8053
|
+
this.updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes);
|
|
8054
|
+
this.microMotionTime += delta;
|
|
8055
|
+
this.breathingPhase += delta * this.breathingRate * Math.PI * 2;
|
|
8056
|
+
const breathingY = Math.sin(this.breathingPhase) * 3e-3;
|
|
8057
|
+
const swayAmp = this.posturalSwayAmplitude;
|
|
8058
|
+
const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
|
|
8059
|
+
const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
|
|
6945
8060
|
return {
|
|
6946
|
-
|
|
6947
|
-
|
|
6948
|
-
|
|
6949
|
-
|
|
8061
|
+
blendshapes,
|
|
8062
|
+
headDelta: {
|
|
8063
|
+
yaw: swayX,
|
|
8064
|
+
pitch: breathingY + swayY
|
|
8065
|
+
}
|
|
6950
8066
|
};
|
|
6951
8067
|
}
|
|
6952
8068
|
/**
|
|
6953
|
-
* Reset
|
|
8069
|
+
* Reset all internal state to initial values.
|
|
6954
8070
|
*/
|
|
6955
8071
|
reset() {
|
|
6956
|
-
this.
|
|
6957
|
-
this.
|
|
6958
|
-
|
|
6959
|
-
|
|
6960
|
-
|
|
6961
|
-
|
|
6962
|
-
|
|
6963
|
-
|
|
6964
|
-
|
|
6965
|
-
|
|
6966
|
-
|
|
6967
|
-
|
|
6968
|
-
|
|
6969
|
-
|
|
6970
|
-
|
|
6971
|
-
|
|
6972
|
-
|
|
6973
|
-
|
|
6974
|
-
|
|
6975
|
-
|
|
6976
|
-
|
|
6977
|
-
|
|
6978
|
-
this.
|
|
6979
|
-
|
|
6980
|
-
|
|
8072
|
+
this.blinkTimer = 0;
|
|
8073
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8074
|
+
this.blinkPhase = PHASE_OPEN;
|
|
8075
|
+
this.blinkProgress = 0;
|
|
8076
|
+
this.asymmetryRight = 0.97;
|
|
8077
|
+
this.smoothedBlinkLeft = 0;
|
|
8078
|
+
this.smoothedBlinkRight = 0;
|
|
8079
|
+
this.smoothedEyeX = 0;
|
|
8080
|
+
this.smoothedEyeY = 0;
|
|
8081
|
+
this.eyeNoiseTime = 0;
|
|
8082
|
+
this.gazeBreakTimer = 0;
|
|
8083
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8084
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
8085
|
+
this.gazeBreakProgress = 0;
|
|
8086
|
+
this.gazeBreakTargetX = 0;
|
|
8087
|
+
this.gazeBreakTargetY = 0;
|
|
8088
|
+
this.gazeBreakCurrentX = 0;
|
|
8089
|
+
this.gazeBreakCurrentY = 0;
|
|
8090
|
+
this.microMotionTime = 0;
|
|
8091
|
+
this.breathingPhase = 0;
|
|
8092
|
+
this.noiseTime = 0;
|
|
8093
|
+
this.previousEnergy = 0;
|
|
8094
|
+
this.emphasisLevel = 0;
|
|
8095
|
+
}
|
|
8096
|
+
// =====================================================================
|
|
8097
|
+
// PRIVATE: Blink system
|
|
8098
|
+
// =====================================================================
|
|
8099
|
+
updateBlinks(delta) {
|
|
8100
|
+
this.blinkTimer += delta;
|
|
8101
|
+
if (this.blinkTimer >= this.blinkInterval && this.blinkPhase === PHASE_OPEN) {
|
|
8102
|
+
this.blinkPhase = PHASE_CLOSING;
|
|
8103
|
+
this.blinkProgress = 0;
|
|
8104
|
+
this.blinkTimer = 0;
|
|
8105
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8106
|
+
this.asymmetryRight = 0.95 + Math.random() * 0.08;
|
|
8107
|
+
}
|
|
8108
|
+
if (this.blinkPhase > PHASE_OPEN) {
|
|
8109
|
+
this.blinkProgress += delta;
|
|
8110
|
+
if (this.blinkPhase === PHASE_CLOSING) {
|
|
8111
|
+
if (this.blinkProgress >= BLINK_CLOSE_DURATION) {
|
|
8112
|
+
this.blinkPhase = PHASE_CLOSED;
|
|
8113
|
+
this.blinkProgress = 0;
|
|
8114
|
+
}
|
|
8115
|
+
} else if (this.blinkPhase === PHASE_CLOSED) {
|
|
8116
|
+
if (this.blinkProgress >= BLINK_HOLD_DURATION) {
|
|
8117
|
+
this.blinkPhase = PHASE_OPENING;
|
|
8118
|
+
this.blinkProgress = 0;
|
|
8119
|
+
}
|
|
8120
|
+
} else if (this.blinkPhase === PHASE_OPENING) {
|
|
8121
|
+
if (this.blinkProgress >= BLINK_OPEN_DURATION) {
|
|
8122
|
+
this.blinkPhase = PHASE_OPEN;
|
|
8123
|
+
this.blinkProgress = 0;
|
|
8124
|
+
}
|
|
8125
|
+
}
|
|
8126
|
+
}
|
|
6981
8127
|
}
|
|
6982
|
-
|
|
6983
|
-
|
|
6984
|
-
|
|
6985
|
-
* @returns Object with isEmphasis flag and emphasisStrength
|
|
6986
|
-
*/
|
|
6987
|
-
process(energy) {
|
|
6988
|
-
this.energyHistory.push(energy);
|
|
6989
|
-
if (this.energyHistory.length > this.historySize) {
|
|
6990
|
-
this.energyHistory.shift();
|
|
8128
|
+
getBlinkValues() {
|
|
8129
|
+
if (this.blinkPhase === PHASE_OPEN) {
|
|
8130
|
+
return { left: 0, right: 0 };
|
|
6991
8131
|
}
|
|
6992
|
-
if (this.
|
|
6993
|
-
|
|
8132
|
+
if (this.blinkPhase === PHASE_CLOSING) {
|
|
8133
|
+
const t2 = Math.min(1, this.blinkProgress / BLINK_CLOSE_DURATION);
|
|
8134
|
+
const eased2 = t2 * t2 * t2;
|
|
8135
|
+
const tRight = Math.max(0, Math.min(1, (this.blinkProgress - BLINK_ASYMMETRY_DELAY) / BLINK_CLOSE_DURATION));
|
|
8136
|
+
return {
|
|
8137
|
+
left: eased2,
|
|
8138
|
+
right: tRight * tRight * tRight * this.asymmetryRight
|
|
8139
|
+
};
|
|
6994
8140
|
}
|
|
6995
|
-
|
|
6996
|
-
|
|
6997
|
-
|
|
6998
|
-
const
|
|
8141
|
+
if (this.blinkPhase === PHASE_CLOSED) {
|
|
8142
|
+
return { left: 1, right: this.asymmetryRight };
|
|
8143
|
+
}
|
|
8144
|
+
const t = Math.min(1, this.blinkProgress / BLINK_OPEN_DURATION);
|
|
8145
|
+
const eased = smoothStep(t);
|
|
6999
8146
|
return {
|
|
7000
|
-
|
|
7001
|
-
|
|
8147
|
+
left: 1 - eased,
|
|
8148
|
+
right: (1 - eased) * this.asymmetryRight
|
|
7002
8149
|
};
|
|
7003
8150
|
}
|
|
7004
|
-
|
|
7005
|
-
|
|
7006
|
-
|
|
7007
|
-
|
|
7008
|
-
|
|
8151
|
+
// =====================================================================
|
|
8152
|
+
// PRIVATE: Eye micro-motion (continuous simplex noise)
|
|
8153
|
+
// =====================================================================
|
|
8154
|
+
getEyeMicroMotion() {
|
|
8155
|
+
const amp = this.eyeNoiseAmplitude;
|
|
8156
|
+
const x = simplex2d(this.eyeNoiseTime * EYE_NOISE_X_FREQ, EYE_NOISE_X_PHASE) * amp;
|
|
8157
|
+
const y = simplex2d(this.eyeNoiseTime * EYE_NOISE_Y_FREQ, EYE_NOISE_Y_PHASE) * amp * 0.7;
|
|
8158
|
+
return { x, y };
|
|
8159
|
+
}
|
|
8160
|
+
// =====================================================================
|
|
8161
|
+
// PRIVATE: Gaze breaks
|
|
8162
|
+
// =====================================================================
|
|
8163
|
+
updateGazeBreaks(delta) {
|
|
8164
|
+
this.gazeBreakTimer += delta;
|
|
8165
|
+
if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
|
|
8166
|
+
this.gazeBreakPhase = PHASE_CLOSING;
|
|
8167
|
+
this.gazeBreakProgress = 0;
|
|
8168
|
+
this.gazeBreakTimer = 0;
|
|
8169
|
+
const amp = randomRange(...this.gazeBreakAmplitudeRange);
|
|
8170
|
+
this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
|
|
8171
|
+
this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
|
|
8172
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8173
|
+
}
|
|
8174
|
+
if (this.gazeBreakPhase > PHASE_OPEN) {
|
|
8175
|
+
this.gazeBreakProgress += delta;
|
|
8176
|
+
if (this.gazeBreakPhase === 1) {
|
|
8177
|
+
const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_DURATION);
|
|
8178
|
+
const eased = smoothStep(t);
|
|
8179
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX * eased;
|
|
8180
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY * eased;
|
|
8181
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_DURATION) {
|
|
8182
|
+
this.gazeBreakPhase = 2;
|
|
8183
|
+
this.gazeBreakProgress = 0;
|
|
8184
|
+
}
|
|
8185
|
+
} else if (this.gazeBreakPhase === 2) {
|
|
8186
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX;
|
|
8187
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY;
|
|
8188
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_HOLD_DURATION) {
|
|
8189
|
+
this.gazeBreakPhase = 3;
|
|
8190
|
+
this.gazeBreakProgress = 0;
|
|
8191
|
+
}
|
|
8192
|
+
} else if (this.gazeBreakPhase === 3) {
|
|
8193
|
+
const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_RETURN_DURATION);
|
|
8194
|
+
const eased = smoothStep(t);
|
|
8195
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX * (1 - eased);
|
|
8196
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY * (1 - eased);
|
|
8197
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_RETURN_DURATION) {
|
|
8198
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
8199
|
+
this.gazeBreakProgress = 0;
|
|
8200
|
+
this.gazeBreakCurrentX = 0;
|
|
8201
|
+
this.gazeBreakCurrentY = 0;
|
|
8202
|
+
}
|
|
8203
|
+
}
|
|
8204
|
+
} else {
|
|
8205
|
+
this.gazeBreakCurrentX = 0;
|
|
8206
|
+
this.gazeBreakCurrentY = 0;
|
|
8207
|
+
}
|
|
8208
|
+
}
|
|
8209
|
+
// =====================================================================
|
|
8210
|
+
// PRIVATE: Brow noise (simplex-driven organic drift)
|
|
8211
|
+
// =====================================================================
|
|
8212
|
+
updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes) {
|
|
8213
|
+
this.noiseTime += delta;
|
|
8214
|
+
const energyDelta = audioEnergy - this.previousEnergy;
|
|
8215
|
+
if (energyDelta > EMPHASIS_ENERGY_THRESHOLD) {
|
|
8216
|
+
this.emphasisLevel = 1;
|
|
8217
|
+
}
|
|
8218
|
+
this.emphasisLevel = Math.max(0, this.emphasisLevel - delta * EMPHASIS_DECAY_RATE);
|
|
8219
|
+
this.previousEnergy = audioEnergy;
|
|
8220
|
+
const speechMul = isSpeaking && audioEnergy > 0 ? this.browNoiseSpeechMultiplier : 1;
|
|
8221
|
+
const amp = this.browNoiseAmplitude * speechMul;
|
|
8222
|
+
const innerUpNoise = simplex2d(this.noiseTime * BROW_INNER_UP_FREQ, BROW_INNER_UP_PHASE);
|
|
8223
|
+
const innerUpBase = (innerUpNoise * 0.5 + 0.5) * amp * 0.83;
|
|
8224
|
+
const innerUpEmphasis = this.emphasisLevel * 0.25;
|
|
8225
|
+
blendshapes["browInnerUp"] = clamp(innerUpBase + innerUpEmphasis, 0, 1);
|
|
8226
|
+
const outerLeftNoise = simplex2d(this.noiseTime * BROW_OUTER_LEFT_FREQ, BROW_OUTER_LEFT_PHASE);
|
|
8227
|
+
blendshapes["browOuterUpLeft"] = clamp((outerLeftNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
|
|
8228
|
+
const outerRightNoise = simplex2d(this.noiseTime * BROW_OUTER_RIGHT_FREQ, BROW_OUTER_RIGHT_PHASE);
|
|
8229
|
+
blendshapes["browOuterUpRight"] = clamp((outerRightNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
|
|
8230
|
+
const downLeftNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_LEFT_PHASE);
|
|
8231
|
+
blendshapes["browDownLeft"] = clamp((downLeftNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
|
|
8232
|
+
const downRightNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_RIGHT_PHASE);
|
|
8233
|
+
blendshapes["browDownRight"] = clamp((downRightNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
|
|
7009
8234
|
}
|
|
7010
8235
|
};
|
|
8236
|
+
|
|
8237
|
+
// ../types/dist/index.mjs
|
|
8238
|
+
var PROTOCOL_VERSION = 1;
|
|
8239
|
+
function isProtocolEvent(obj) {
|
|
8240
|
+
return typeof obj === "object" && obj !== null && "v" in obj && "type" in obj && "ts" in obj;
|
|
8241
|
+
}
|
|
7011
8242
|
export {
|
|
7012
8243
|
ARKIT_BLENDSHAPES,
|
|
7013
8244
|
AgentCoreAdapter,
|
|
@@ -7021,12 +8252,15 @@ export {
|
|
|
7021
8252
|
ConversationOrchestrator,
|
|
7022
8253
|
DEFAULT_ANIMATION_CONFIG,
|
|
7023
8254
|
DEFAULT_LOGGING_CONFIG,
|
|
8255
|
+
EMOTION_ARKIT_MAP,
|
|
7024
8256
|
EMOTION_NAMES,
|
|
7025
8257
|
EMOTION_VECTOR_SIZE,
|
|
7026
8258
|
EmotionController,
|
|
7027
8259
|
EmotionPresets,
|
|
8260
|
+
EmotionToBlendshapeMapper,
|
|
7028
8261
|
EmphasisDetector,
|
|
7029
8262
|
EventEmitter,
|
|
8263
|
+
FullFacePipeline,
|
|
7030
8264
|
INFERENCE_LATENCY_BUCKETS,
|
|
7031
8265
|
InterruptionHandler,
|
|
7032
8266
|
LAMPipeline,
|
|
@@ -7038,6 +8272,8 @@ export {
|
|
|
7038
8272
|
ModelCache,
|
|
7039
8273
|
OTLPExporter,
|
|
7040
8274
|
OmoteTelemetry,
|
|
8275
|
+
PROTOCOL_VERSION,
|
|
8276
|
+
ProceduralLifeLayer,
|
|
7041
8277
|
RingBuffer,
|
|
7042
8278
|
SafariSpeechRecognition,
|
|
7043
8279
|
SenseVoiceInference,
|
|
@@ -7045,6 +8281,7 @@ export {
|
|
|
7045
8281
|
SileroVADWorker,
|
|
7046
8282
|
SyncedAudioPipeline,
|
|
7047
8283
|
TenantManager,
|
|
8284
|
+
UPPER_FACE_BLENDSHAPES,
|
|
7048
8285
|
WAV2ARKIT_BLENDSHAPES,
|
|
7049
8286
|
Wav2ArkitCpuInference,
|
|
7050
8287
|
Wav2Vec2Inference,
|
|
@@ -7083,6 +8320,7 @@ export {
|
|
|
7083
8320
|
isIOSSafari,
|
|
7084
8321
|
isMobile,
|
|
7085
8322
|
isOnnxRuntimeLoaded,
|
|
8323
|
+
isProtocolEvent,
|
|
7086
8324
|
isSafari,
|
|
7087
8325
|
isSpeechRecognitionAvailable,
|
|
7088
8326
|
isWebGPUAvailable,
|