@omote/core 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +636 -1
- package/dist/index.d.ts +636 -1
- package/dist/index.js +1384 -146
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1384 -146
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -42,12 +42,15 @@ __export(index_exports, {
|
|
|
42
42
|
ConversationOrchestrator: () => ConversationOrchestrator,
|
|
43
43
|
DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
|
|
44
44
|
DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
|
|
45
|
+
EMOTION_ARKIT_MAP: () => EMOTION_ARKIT_MAP,
|
|
45
46
|
EMOTION_NAMES: () => EMOTION_NAMES,
|
|
46
47
|
EMOTION_VECTOR_SIZE: () => EMOTION_VECTOR_SIZE,
|
|
47
48
|
EmotionController: () => EmotionController,
|
|
48
49
|
EmotionPresets: () => EmotionPresets,
|
|
50
|
+
EmotionToBlendshapeMapper: () => EmotionToBlendshapeMapper,
|
|
49
51
|
EmphasisDetector: () => EmphasisDetector,
|
|
50
52
|
EventEmitter: () => EventEmitter,
|
|
53
|
+
FullFacePipeline: () => FullFacePipeline,
|
|
51
54
|
INFERENCE_LATENCY_BUCKETS: () => INFERENCE_LATENCY_BUCKETS,
|
|
52
55
|
InterruptionHandler: () => InterruptionHandler,
|
|
53
56
|
LAMPipeline: () => LAMPipeline,
|
|
@@ -59,6 +62,8 @@ __export(index_exports, {
|
|
|
59
62
|
ModelCache: () => ModelCache,
|
|
60
63
|
OTLPExporter: () => OTLPExporter,
|
|
61
64
|
OmoteTelemetry: () => OmoteTelemetry,
|
|
65
|
+
PROTOCOL_VERSION: () => PROTOCOL_VERSION,
|
|
66
|
+
ProceduralLifeLayer: () => ProceduralLifeLayer,
|
|
62
67
|
RingBuffer: () => RingBuffer,
|
|
63
68
|
SafariSpeechRecognition: () => SafariSpeechRecognition,
|
|
64
69
|
SenseVoiceInference: () => SenseVoiceInference,
|
|
@@ -66,6 +71,7 @@ __export(index_exports, {
|
|
|
66
71
|
SileroVADWorker: () => SileroVADWorker,
|
|
67
72
|
SyncedAudioPipeline: () => SyncedAudioPipeline,
|
|
68
73
|
TenantManager: () => TenantManager,
|
|
74
|
+
UPPER_FACE_BLENDSHAPES: () => UPPER_FACE_BLENDSHAPES,
|
|
69
75
|
WAV2ARKIT_BLENDSHAPES: () => WAV2ARKIT_BLENDSHAPES,
|
|
70
76
|
Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
|
|
71
77
|
Wav2Vec2Inference: () => Wav2Vec2Inference,
|
|
@@ -104,6 +110,7 @@ __export(index_exports, {
|
|
|
104
110
|
isIOSSafari: () => isIOSSafari,
|
|
105
111
|
isMobile: () => isMobile,
|
|
106
112
|
isOnnxRuntimeLoaded: () => isOnnxRuntimeLoaded,
|
|
113
|
+
isProtocolEvent: () => isProtocolEvent,
|
|
107
114
|
isSafari: () => isSafari,
|
|
108
115
|
isSpeechRecognitionAvailable: () => isSpeechRecognitionAvailable,
|
|
109
116
|
isWebGPUAvailable: () => isWebGPUAvailable,
|
|
@@ -1024,6 +1031,327 @@ var UPPER_FACE_BLENDSHAPES = [
|
|
|
1024
1031
|
"cheekSquintLeft",
|
|
1025
1032
|
"cheekSquintRight"
|
|
1026
1033
|
];
|
|
1034
|
+
var EMOTION_ARKIT_MAP = {
|
|
1035
|
+
happy: {
|
|
1036
|
+
// AU6 - Cheek raiser (primary Duchenne smile marker)
|
|
1037
|
+
cheekSquintLeft: 0.5,
|
|
1038
|
+
cheekSquintRight: 0.5,
|
|
1039
|
+
// Slight eye squint from genuine smile (orbicularis oculi activation)
|
|
1040
|
+
eyeSquintLeft: 0.2,
|
|
1041
|
+
eyeSquintRight: 0.2
|
|
1042
|
+
},
|
|
1043
|
+
angry: {
|
|
1044
|
+
// AU4 - Brow lowerer (intense, primary anger marker)
|
|
1045
|
+
browDownLeft: 0.7,
|
|
1046
|
+
browDownRight: 0.7,
|
|
1047
|
+
// AU5 - Upper lid raiser (wide eyes, part of the "glare")
|
|
1048
|
+
eyeWideLeft: 0.4,
|
|
1049
|
+
eyeWideRight: 0.4,
|
|
1050
|
+
// AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
|
|
1051
|
+
eyeSquintLeft: 0.3,
|
|
1052
|
+
eyeSquintRight: 0.3
|
|
1053
|
+
},
|
|
1054
|
+
sad: {
|
|
1055
|
+
// AU1 - Inner brow raiser (primary sadness marker)
|
|
1056
|
+
browInnerUp: 0.6,
|
|
1057
|
+
// AU4 - Brow lowerer (brows drawn together)
|
|
1058
|
+
browDownLeft: 0.3,
|
|
1059
|
+
browDownRight: 0.3
|
|
1060
|
+
},
|
|
1061
|
+
neutral: {}
|
|
1062
|
+
// All zeros - no expression overlay
|
|
1063
|
+
};
|
|
1064
|
+
var DEFAULT_CONFIG = {
|
|
1065
|
+
smoothingFactor: 0.15,
|
|
1066
|
+
confidenceThreshold: 0.3,
|
|
1067
|
+
intensity: 1,
|
|
1068
|
+
blendMode: "dominant",
|
|
1069
|
+
minBlendProbability: 0.1,
|
|
1070
|
+
energyModulation: false,
|
|
1071
|
+
minEnergyScale: 0.3,
|
|
1072
|
+
maxEnergyScale: 1
|
|
1073
|
+
};
|
|
1074
|
+
function createZeroBlendshapes() {
|
|
1075
|
+
const result = {};
|
|
1076
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1077
|
+
result[name] = 0;
|
|
1078
|
+
}
|
|
1079
|
+
return result;
|
|
1080
|
+
}
|
|
1081
|
+
function clamp01(value) {
|
|
1082
|
+
return Math.max(0, Math.min(1, value));
|
|
1083
|
+
}
|
|
1084
|
+
var EmotionToBlendshapeMapper = class {
|
|
1085
|
+
/**
|
|
1086
|
+
* Create a new EmotionToBlendshapeMapper
|
|
1087
|
+
*
|
|
1088
|
+
* @param config - Optional configuration
|
|
1089
|
+
*/
|
|
1090
|
+
constructor(config) {
|
|
1091
|
+
this.currentEnergy = 1;
|
|
1092
|
+
this.config = {
|
|
1093
|
+
...DEFAULT_CONFIG,
|
|
1094
|
+
...config
|
|
1095
|
+
};
|
|
1096
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
1097
|
+
this.currentBlendshapes = createZeroBlendshapes();
|
|
1098
|
+
}
|
|
1099
|
+
/**
|
|
1100
|
+
* Map an emotion frame to target blendshapes
|
|
1101
|
+
*
|
|
1102
|
+
* This sets the target values that the mapper will smoothly interpolate
|
|
1103
|
+
* towards. Call update() each frame to apply smoothing.
|
|
1104
|
+
*
|
|
1105
|
+
* @param frame - Emotion frame from Emotion2VecInference
|
|
1106
|
+
* @param audioEnergy - Optional audio energy (0-1) for energy modulation
|
|
1107
|
+
* @returns Target upper face blendshapes (before smoothing)
|
|
1108
|
+
*/
|
|
1109
|
+
mapFrame(frame, audioEnergy) {
|
|
1110
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
1111
|
+
if (audioEnergy !== void 0) {
|
|
1112
|
+
this.currentEnergy = clamp01(audioEnergy);
|
|
1113
|
+
}
|
|
1114
|
+
if (!frame) {
|
|
1115
|
+
return { ...this.targetBlendshapes };
|
|
1116
|
+
}
|
|
1117
|
+
if (this.config.blendMode === "weighted") {
|
|
1118
|
+
this.mapFrameWeighted(frame);
|
|
1119
|
+
} else {
|
|
1120
|
+
this.mapFrameDominant(frame);
|
|
1121
|
+
}
|
|
1122
|
+
if (this.config.energyModulation) {
|
|
1123
|
+
this.applyEnergyModulation();
|
|
1124
|
+
}
|
|
1125
|
+
return { ...this.targetBlendshapes };
|
|
1126
|
+
}
|
|
1127
|
+
/**
|
|
1128
|
+
* Map using dominant emotion only (original behavior)
|
|
1129
|
+
*/
|
|
1130
|
+
mapFrameDominant(frame) {
|
|
1131
|
+
if (frame.confidence < this.config.confidenceThreshold) {
|
|
1132
|
+
return;
|
|
1133
|
+
}
|
|
1134
|
+
const emotion = frame.emotion;
|
|
1135
|
+
const mapping = EMOTION_ARKIT_MAP[emotion];
|
|
1136
|
+
if (!mapping) {
|
|
1137
|
+
return;
|
|
1138
|
+
}
|
|
1139
|
+
const scale = this.config.intensity * frame.confidence;
|
|
1140
|
+
for (const [name, value] of Object.entries(mapping)) {
|
|
1141
|
+
const blendshapeName = name;
|
|
1142
|
+
if (value !== void 0) {
|
|
1143
|
+
this.targetBlendshapes[blendshapeName] = clamp01(value * scale);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Map using weighted blend of all emotions by probability
|
|
1149
|
+
* Creates more nuanced expressions (e.g., bittersweet = happy + sad)
|
|
1150
|
+
*/
|
|
1151
|
+
mapFrameWeighted(frame) {
|
|
1152
|
+
if (!frame.probabilities) {
|
|
1153
|
+
this.mapFrameDominant(frame);
|
|
1154
|
+
return;
|
|
1155
|
+
}
|
|
1156
|
+
for (const [emotion, probability] of Object.entries(frame.probabilities)) {
|
|
1157
|
+
if (probability < this.config.minBlendProbability) {
|
|
1158
|
+
continue;
|
|
1159
|
+
}
|
|
1160
|
+
const mapping = EMOTION_ARKIT_MAP[emotion];
|
|
1161
|
+
if (!mapping) {
|
|
1162
|
+
continue;
|
|
1163
|
+
}
|
|
1164
|
+
const scale = this.config.intensity * probability;
|
|
1165
|
+
for (const [name, value] of Object.entries(mapping)) {
|
|
1166
|
+
const blendshapeName = name;
|
|
1167
|
+
if (value !== void 0) {
|
|
1168
|
+
this.targetBlendshapes[blendshapeName] += value * scale;
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1173
|
+
this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name]);
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
/**
|
|
1177
|
+
* Apply energy modulation to scale emotion intensity by audio energy
|
|
1178
|
+
* Louder speech = stronger expressions
|
|
1179
|
+
*/
|
|
1180
|
+
applyEnergyModulation() {
|
|
1181
|
+
const { minEnergyScale, maxEnergyScale } = this.config;
|
|
1182
|
+
const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
|
|
1183
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1184
|
+
this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
/**
|
|
1188
|
+
* Apply smoothing to interpolate current values towards target
|
|
1189
|
+
*
|
|
1190
|
+
* Uses exponential moving average:
|
|
1191
|
+
* current = current + smoothingFactor * (target - current)
|
|
1192
|
+
*
|
|
1193
|
+
* @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
|
|
1194
|
+
*/
|
|
1195
|
+
update(_deltaMs) {
|
|
1196
|
+
const factor = this.config.smoothingFactor;
|
|
1197
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
1198
|
+
const target = this.targetBlendshapes[name];
|
|
1199
|
+
const current = this.currentBlendshapes[name];
|
|
1200
|
+
this.currentBlendshapes[name] = clamp01(current + factor * (target - current));
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
/**
|
|
1204
|
+
* Get current smoothed blendshape values
|
|
1205
|
+
*
|
|
1206
|
+
* @returns Current upper face blendshapes (after smoothing)
|
|
1207
|
+
*/
|
|
1208
|
+
getCurrentBlendshapes() {
|
|
1209
|
+
return { ...this.currentBlendshapes };
|
|
1210
|
+
}
|
|
1211
|
+
/**
|
|
1212
|
+
* Reset mapper to neutral state
|
|
1213
|
+
*
|
|
1214
|
+
* Sets both target and current blendshapes to zero.
|
|
1215
|
+
*/
|
|
1216
|
+
reset() {
|
|
1217
|
+
this.targetBlendshapes = createZeroBlendshapes();
|
|
1218
|
+
this.currentBlendshapes = createZeroBlendshapes();
|
|
1219
|
+
this.currentEnergy = 1;
|
|
1220
|
+
}
|
|
1221
|
+
/**
|
|
1222
|
+
* Get current configuration
|
|
1223
|
+
*/
|
|
1224
|
+
getConfig() {
|
|
1225
|
+
return { ...this.config };
|
|
1226
|
+
}
|
|
1227
|
+
/**
|
|
1228
|
+
* Update configuration
|
|
1229
|
+
*
|
|
1230
|
+
* @param config - Partial configuration to update
|
|
1231
|
+
*/
|
|
1232
|
+
setConfig(config) {
|
|
1233
|
+
this.config = {
|
|
1234
|
+
...this.config,
|
|
1235
|
+
...config
|
|
1236
|
+
};
|
|
1237
|
+
}
|
|
1238
|
+
};
|
|
1239
|
+
|
|
1240
|
+
// src/animation/audioEnergy.ts
|
|
1241
|
+
function calculateRMS(samples) {
|
|
1242
|
+
if (samples.length === 0) return 0;
|
|
1243
|
+
let sumSquares = 0;
|
|
1244
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1245
|
+
sumSquares += samples[i] * samples[i];
|
|
1246
|
+
}
|
|
1247
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
1248
|
+
}
|
|
1249
|
+
function calculatePeak(samples) {
|
|
1250
|
+
let peak = 0;
|
|
1251
|
+
for (let i = 0; i < samples.length; i++) {
|
|
1252
|
+
const abs = Math.abs(samples[i]);
|
|
1253
|
+
if (abs > peak) peak = abs;
|
|
1254
|
+
}
|
|
1255
|
+
return peak;
|
|
1256
|
+
}
|
|
1257
|
+
var AudioEnergyAnalyzer = class {
|
|
1258
|
+
/**
|
|
1259
|
+
* @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
|
|
1260
|
+
* @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
|
|
1261
|
+
*/
|
|
1262
|
+
constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
|
|
1263
|
+
this.smoothedRMS = 0;
|
|
1264
|
+
this.smoothedPeak = 0;
|
|
1265
|
+
this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
|
|
1266
|
+
this.noiseFloor = noiseFloor;
|
|
1267
|
+
}
|
|
1268
|
+
/**
|
|
1269
|
+
* Process audio samples and return smoothed energy values
|
|
1270
|
+
* @param samples Audio samples (Float32Array)
|
|
1271
|
+
* @returns Object with rms and peak values
|
|
1272
|
+
*/
|
|
1273
|
+
process(samples) {
|
|
1274
|
+
const instantRMS = calculateRMS(samples);
|
|
1275
|
+
const instantPeak = calculatePeak(samples);
|
|
1276
|
+
const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
|
|
1277
|
+
const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
|
|
1278
|
+
if (gatedRMS > this.smoothedRMS) {
|
|
1279
|
+
this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
|
|
1280
|
+
} else {
|
|
1281
|
+
this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
|
|
1282
|
+
}
|
|
1283
|
+
if (gatedPeak > this.smoothedPeak) {
|
|
1284
|
+
this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
|
|
1285
|
+
} else {
|
|
1286
|
+
this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
|
|
1287
|
+
}
|
|
1288
|
+
const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
|
|
1289
|
+
return {
|
|
1290
|
+
rms: this.smoothedRMS,
|
|
1291
|
+
peak: this.smoothedPeak,
|
|
1292
|
+
energy: Math.min(1, energy * 2)
|
|
1293
|
+
// Scale up and clamp
|
|
1294
|
+
};
|
|
1295
|
+
}
|
|
1296
|
+
/**
|
|
1297
|
+
* Reset analyzer state
|
|
1298
|
+
*/
|
|
1299
|
+
reset() {
|
|
1300
|
+
this.smoothedRMS = 0;
|
|
1301
|
+
this.smoothedPeak = 0;
|
|
1302
|
+
}
|
|
1303
|
+
/**
|
|
1304
|
+
* Get current smoothed RMS value
|
|
1305
|
+
*/
|
|
1306
|
+
get rms() {
|
|
1307
|
+
return this.smoothedRMS;
|
|
1308
|
+
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Get current smoothed peak value
|
|
1311
|
+
*/
|
|
1312
|
+
get peak() {
|
|
1313
|
+
return this.smoothedPeak;
|
|
1314
|
+
}
|
|
1315
|
+
};
|
|
1316
|
+
var EmphasisDetector = class {
|
|
1317
|
+
/**
|
|
1318
|
+
* @param historySize Number of frames to track. Default 10
|
|
1319
|
+
* @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
|
|
1320
|
+
*/
|
|
1321
|
+
constructor(historySize = 10, emphasisThreshold = 0.15) {
|
|
1322
|
+
this.energyHistory = [];
|
|
1323
|
+
this.historySize = historySize;
|
|
1324
|
+
this.emphasisThreshold = emphasisThreshold;
|
|
1325
|
+
}
|
|
1326
|
+
/**
|
|
1327
|
+
* Process energy value and detect emphasis
|
|
1328
|
+
* @param energy Current energy value (0-1)
|
|
1329
|
+
* @returns Object with isEmphasis flag and emphasisStrength
|
|
1330
|
+
*/
|
|
1331
|
+
process(energy) {
|
|
1332
|
+
this.energyHistory.push(energy);
|
|
1333
|
+
if (this.energyHistory.length > this.historySize) {
|
|
1334
|
+
this.energyHistory.shift();
|
|
1335
|
+
}
|
|
1336
|
+
if (this.energyHistory.length < 3) {
|
|
1337
|
+
return { isEmphasis: false, emphasisStrength: 0 };
|
|
1338
|
+
}
|
|
1339
|
+
const prevFrames = this.energyHistory.slice(0, -1);
|
|
1340
|
+
const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
|
|
1341
|
+
const increase = energy - avgPrev;
|
|
1342
|
+
const isEmphasis = increase > this.emphasisThreshold;
|
|
1343
|
+
return {
|
|
1344
|
+
isEmphasis,
|
|
1345
|
+
emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
|
|
1346
|
+
};
|
|
1347
|
+
}
|
|
1348
|
+
/**
|
|
1349
|
+
* Reset detector state
|
|
1350
|
+
*/
|
|
1351
|
+
reset() {
|
|
1352
|
+
this.energyHistory = [];
|
|
1353
|
+
}
|
|
1354
|
+
};
|
|
1027
1355
|
|
|
1028
1356
|
// src/telemetry/exporters/console.ts
|
|
1029
1357
|
var ConsoleExporter = class {
|
|
@@ -2925,7 +3253,7 @@ var CTC_VOCAB = [
|
|
|
2925
3253
|
"Q",
|
|
2926
3254
|
"Z"
|
|
2927
3255
|
];
|
|
2928
|
-
var
|
|
3256
|
+
var _Wav2Vec2Inference = class _Wav2Vec2Inference {
|
|
2929
3257
|
constructor(config) {
|
|
2930
3258
|
this.modelId = "wav2vec2";
|
|
2931
3259
|
this.session = null;
|
|
@@ -2934,6 +3262,10 @@ var Wav2Vec2Inference = class {
|
|
|
2934
3262
|
this.isLoading = false;
|
|
2935
3263
|
// Inference queue for handling concurrent calls
|
|
2936
3264
|
this.inferenceQueue = Promise.resolve();
|
|
3265
|
+
// Session health: set to true if session.run() times out.
|
|
3266
|
+
// A timed-out session may have a zombie GPU/WASM dispatch still running,
|
|
3267
|
+
// so all future infer() calls reject immediately to prevent concurrent access.
|
|
3268
|
+
this.poisoned = false;
|
|
2937
3269
|
this.config = config;
|
|
2938
3270
|
this.numIdentityClasses = config.numIdentityClasses ?? 12;
|
|
2939
3271
|
}
|
|
@@ -2943,6 +3275,10 @@ var Wav2Vec2Inference = class {
|
|
|
2943
3275
|
get isLoaded() {
|
|
2944
3276
|
return this.session !== null;
|
|
2945
3277
|
}
|
|
3278
|
+
/** True if inference timed out and the session is permanently unusable */
|
|
3279
|
+
get isSessionPoisoned() {
|
|
3280
|
+
return this.poisoned;
|
|
3281
|
+
}
|
|
2946
3282
|
/**
|
|
2947
3283
|
* Load the ONNX model
|
|
2948
3284
|
*/
|
|
@@ -3092,12 +3428,23 @@ var Wav2Vec2Inference = class {
|
|
|
3092
3428
|
logger2.debug("Running warmup inference to initialize GPU context");
|
|
3093
3429
|
const warmupStart = performance.now();
|
|
3094
3430
|
const silentAudio = new Float32Array(16e3);
|
|
3095
|
-
|
|
3431
|
+
const WARMUP_TIMEOUT_MS = 15e3;
|
|
3432
|
+
const warmupResult = await Promise.race([
|
|
3433
|
+
this.infer(silentAudio, 0).then(() => "ok"),
|
|
3434
|
+
new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
|
|
3435
|
+
]);
|
|
3096
3436
|
const warmupTimeMs = performance.now() - warmupStart;
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3437
|
+
if (warmupResult === "timeout") {
|
|
3438
|
+
logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
|
|
3439
|
+
timeoutMs: WARMUP_TIMEOUT_MS,
|
|
3440
|
+
backend: this._backend
|
|
3441
|
+
});
|
|
3442
|
+
} else {
|
|
3443
|
+
logger2.info("Warmup inference complete", {
|
|
3444
|
+
warmupTimeMs: Math.round(warmupTimeMs),
|
|
3445
|
+
backend: this._backend
|
|
3446
|
+
});
|
|
3447
|
+
}
|
|
3101
3448
|
telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
|
|
3102
3449
|
model: "wav2vec2",
|
|
3103
3450
|
backend: this._backend
|
|
@@ -3131,6 +3478,9 @@ var Wav2Vec2Inference = class {
|
|
|
3131
3478
|
if (!this.session) {
|
|
3132
3479
|
throw new Error("Model not loaded. Call load() first.");
|
|
3133
3480
|
}
|
|
3481
|
+
if (this.poisoned) {
|
|
3482
|
+
throw new Error("Wav2Vec2 session timed out \u2014 inference unavailable until page reload");
|
|
3483
|
+
}
|
|
3134
3484
|
const audioSamplesCopy = new Float32Array(audioSamples);
|
|
3135
3485
|
let audio;
|
|
3136
3486
|
if (audioSamplesCopy.length === 16e3) {
|
|
@@ -3186,7 +3536,15 @@ var Wav2Vec2Inference = class {
|
|
|
3186
3536
|
});
|
|
3187
3537
|
try {
|
|
3188
3538
|
const startTime = performance.now();
|
|
3189
|
-
const results = await
|
|
3539
|
+
const results = await Promise.race([
|
|
3540
|
+
this.session.run(feeds),
|
|
3541
|
+
new Promise(
|
|
3542
|
+
(_, rej) => setTimeout(
|
|
3543
|
+
() => rej(new Error(`Wav2Vec2 inference timed out after ${_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS}ms`)),
|
|
3544
|
+
_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
|
|
3545
|
+
)
|
|
3546
|
+
)
|
|
3547
|
+
]);
|
|
3190
3548
|
const inferenceTimeMs = performance.now() - startTime;
|
|
3191
3549
|
const asrOutput = results["asr_logits"];
|
|
3192
3550
|
const blendshapeOutput = results["blendshapes"];
|
|
@@ -3240,50 +3598,411 @@ var Wav2Vec2Inference = class {
|
|
|
3240
3598
|
inferenceTimeMs
|
|
3241
3599
|
});
|
|
3242
3600
|
} catch (err) {
|
|
3601
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
3602
|
+
if (errMsg.includes("timed out")) {
|
|
3603
|
+
this.poisoned = true;
|
|
3604
|
+
logger2.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
|
|
3605
|
+
backend: this._backend,
|
|
3606
|
+
timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
|
|
3607
|
+
});
|
|
3608
|
+
} else {
|
|
3609
|
+
logger2.error("Inference failed", { error: errMsg, backend: this._backend });
|
|
3610
|
+
}
|
|
3243
3611
|
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
3244
3612
|
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
3245
3613
|
model: "wav2vec2",
|
|
3246
3614
|
backend: this._backend,
|
|
3247
3615
|
status: "error"
|
|
3248
3616
|
});
|
|
3249
|
-
reject(err);
|
|
3617
|
+
reject(err);
|
|
3618
|
+
}
|
|
3619
|
+
});
|
|
3620
|
+
});
|
|
3621
|
+
}
|
|
3622
|
+
/**
|
|
3623
|
+
* Get blendshape value by name for a specific frame
|
|
3624
|
+
*/
|
|
3625
|
+
getBlendshape(blendshapes, name) {
|
|
3626
|
+
const index = LAM_BLENDSHAPES.indexOf(name);
|
|
3627
|
+
if (index === -1) {
|
|
3628
|
+
throw new Error(`Unknown blendshape: ${name}`);
|
|
3629
|
+
}
|
|
3630
|
+
return blendshapes[index];
|
|
3631
|
+
}
|
|
3632
|
+
/**
|
|
3633
|
+
* Dispose of the model and free resources
|
|
3634
|
+
*/
|
|
3635
|
+
async dispose() {
|
|
3636
|
+
if (this.session) {
|
|
3637
|
+
await this.session.release();
|
|
3638
|
+
this.session = null;
|
|
3639
|
+
}
|
|
3640
|
+
}
|
|
3641
|
+
};
|
|
3642
|
+
_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
|
|
3643
|
+
/**
|
|
3644
|
+
* Check if WebGPU is available and working
|
|
3645
|
+
* (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
|
|
3646
|
+
*/
|
|
3647
|
+
_Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
|
|
3648
|
+
var Wav2Vec2Inference = _Wav2Vec2Inference;
|
|
3649
|
+
|
|
3650
|
+
// src/audio/FullFacePipeline.ts
|
|
3651
|
+
var logger3 = createLogger("FullFacePipeline");
|
|
3652
|
+
function pcm16ToFloat322(buffer) {
|
|
3653
|
+
const byteLen = buffer.byteLength & ~1;
|
|
3654
|
+
const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
|
|
3655
|
+
const float32 = new Float32Array(int16.length);
|
|
3656
|
+
for (let i = 0; i < int16.length; i++) {
|
|
3657
|
+
float32[i] = int16[i] / 32768;
|
|
3658
|
+
}
|
|
3659
|
+
return float32;
|
|
3660
|
+
}
|
|
3661
|
+
var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
|
|
3662
|
+
LAM_BLENDSHAPES.forEach((name, index) => {
|
|
3663
|
+
BLENDSHAPE_INDEX_MAP.set(name, index);
|
|
3664
|
+
});
|
|
3665
|
+
var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
|
|
3666
|
+
var EMOTION_LABEL_MAP = {
|
|
3667
|
+
// Direct labels
|
|
3668
|
+
happy: "happy",
|
|
3669
|
+
sad: "sad",
|
|
3670
|
+
angry: "angry",
|
|
3671
|
+
neutral: "neutral",
|
|
3672
|
+
// Natural language synonyms
|
|
3673
|
+
excited: "happy",
|
|
3674
|
+
joyful: "happy",
|
|
3675
|
+
cheerful: "happy",
|
|
3676
|
+
delighted: "happy",
|
|
3677
|
+
amused: "happy",
|
|
3678
|
+
melancholic: "sad",
|
|
3679
|
+
sorrowful: "sad",
|
|
3680
|
+
disappointed: "sad",
|
|
3681
|
+
frustrated: "angry",
|
|
3682
|
+
irritated: "angry",
|
|
3683
|
+
furious: "angry",
|
|
3684
|
+
annoyed: "angry",
|
|
3685
|
+
// SenseVoice labels
|
|
3686
|
+
fearful: "sad",
|
|
3687
|
+
disgusted: "angry",
|
|
3688
|
+
surprised: "happy"
|
|
3689
|
+
};
|
|
3690
|
+
var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
|
|
3691
|
+
constructor(options) {
|
|
3692
|
+
super();
|
|
3693
|
+
this.options = options;
|
|
3694
|
+
this.playbackStarted = false;
|
|
3695
|
+
this.monitorInterval = null;
|
|
3696
|
+
this.frameAnimationId = null;
|
|
3697
|
+
// Emotion state
|
|
3698
|
+
this.lastEmotionFrame = null;
|
|
3699
|
+
this.currentAudioEnergy = 0;
|
|
3700
|
+
// Stale frame detection
|
|
3701
|
+
this.lastNewFrameTime = 0;
|
|
3702
|
+
this.lastKnownLamFrame = null;
|
|
3703
|
+
this.staleWarningEmitted = false;
|
|
3704
|
+
const sampleRate = options.sampleRate ?? 16e3;
|
|
3705
|
+
this.emotionBlendFactor = options.emotionBlendFactor ?? 0.8;
|
|
3706
|
+
this.lamBlendFactor = options.lamBlendFactor ?? 0.2;
|
|
3707
|
+
const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
|
|
3708
|
+
const audioDelayMs = options.audioDelayMs ?? autoDelay;
|
|
3709
|
+
this.scheduler = new AudioScheduler({
|
|
3710
|
+
sampleRate,
|
|
3711
|
+
initialLookaheadSec: audioDelayMs / 1e3
|
|
3712
|
+
});
|
|
3713
|
+
this.coalescer = new AudioChunkCoalescer({
|
|
3714
|
+
sampleRate,
|
|
3715
|
+
targetDurationMs: options.chunkTargetMs ?? 200
|
|
3716
|
+
});
|
|
3717
|
+
this.lamPipeline = new LAMPipeline({
|
|
3718
|
+
sampleRate,
|
|
3719
|
+
onError: (error) => {
|
|
3720
|
+
logger3.error("LAM inference error", { message: error.message, stack: error.stack });
|
|
3721
|
+
this.emit("error", error);
|
|
3722
|
+
}
|
|
3723
|
+
});
|
|
3724
|
+
this.emotionMapper = new EmotionToBlendshapeMapper({
|
|
3725
|
+
smoothingFactor: 0.15,
|
|
3726
|
+
confidenceThreshold: 0.3,
|
|
3727
|
+
intensity: 1,
|
|
3728
|
+
energyModulation: true
|
|
3729
|
+
});
|
|
3730
|
+
this.energyAnalyzer = new AudioEnergyAnalyzer();
|
|
3731
|
+
}
|
|
3732
|
+
/**
|
|
3733
|
+
* Initialize the pipeline
|
|
3734
|
+
*/
|
|
3735
|
+
async initialize() {
|
|
3736
|
+
await this.scheduler.initialize();
|
|
3737
|
+
}
|
|
3738
|
+
/**
|
|
3739
|
+
* Set emotion label from backend (e.g., LLM response emotion).
|
|
3740
|
+
*
|
|
3741
|
+
* Converts a natural language emotion label into an EmotionFrame
|
|
3742
|
+
* that drives upper face blendshapes for the duration of the utterance.
|
|
3743
|
+
*
|
|
3744
|
+
* Supported labels: happy, excited, joyful, sad, melancholic, angry,
|
|
3745
|
+
* frustrated, neutral, etc.
|
|
3746
|
+
*
|
|
3747
|
+
* @param label - Emotion label string (case-insensitive)
|
|
3748
|
+
*/
|
|
3749
|
+
setEmotionLabel(label) {
|
|
3750
|
+
const normalized = label.toLowerCase();
|
|
3751
|
+
const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
|
|
3752
|
+
const probabilities = {
|
|
3753
|
+
neutral: 0.1,
|
|
3754
|
+
happy: 0.1,
|
|
3755
|
+
angry: 0.1,
|
|
3756
|
+
sad: 0.1
|
|
3757
|
+
};
|
|
3758
|
+
probabilities[mapped] = 0.7;
|
|
3759
|
+
const frame = {
|
|
3760
|
+
emotion: mapped,
|
|
3761
|
+
confidence: 0.7,
|
|
3762
|
+
probabilities
|
|
3763
|
+
};
|
|
3764
|
+
this.lastEmotionFrame = frame;
|
|
3765
|
+
logger3.info("Emotion label set", { label, mapped });
|
|
3766
|
+
}
|
|
3767
|
+
/**
|
|
3768
|
+
* Clear any set emotion label.
|
|
3769
|
+
* Falls back to prosody-only upper face animation.
|
|
3770
|
+
*/
|
|
3771
|
+
clearEmotionLabel() {
|
|
3772
|
+
this.lastEmotionFrame = null;
|
|
3773
|
+
}
|
|
3774
|
+
/**
|
|
3775
|
+
* Start a new playback session
|
|
3776
|
+
*
|
|
3777
|
+
* Resets all state and prepares for incoming audio chunks.
|
|
3778
|
+
* Audio will be scheduled immediately as chunks arrive (no buffering).
|
|
3779
|
+
*/
|
|
3780
|
+
start() {
|
|
3781
|
+
this.stopMonitoring();
|
|
3782
|
+
this.scheduler.reset();
|
|
3783
|
+
this.coalescer.reset();
|
|
3784
|
+
this.lamPipeline.reset();
|
|
3785
|
+
this.playbackStarted = false;
|
|
3786
|
+
this.lastEmotionFrame = null;
|
|
3787
|
+
this.currentAudioEnergy = 0;
|
|
3788
|
+
this.emotionMapper.reset();
|
|
3789
|
+
this.energyAnalyzer.reset();
|
|
3790
|
+
this.lastNewFrameTime = 0;
|
|
3791
|
+
this.lastKnownLamFrame = null;
|
|
3792
|
+
this.staleWarningEmitted = false;
|
|
3793
|
+
this.scheduler.warmup();
|
|
3794
|
+
this.startFrameLoop();
|
|
3795
|
+
this.startMonitoring();
|
|
3796
|
+
}
|
|
3797
|
+
/**
|
|
3798
|
+
* Receive audio chunk from network
|
|
3799
|
+
*
|
|
3800
|
+
* Audio-first design: schedules audio immediately, LAM runs in background.
|
|
3801
|
+
* This prevents LAM inference (50-300ms) from blocking audio scheduling.
|
|
3802
|
+
*
|
|
3803
|
+
* @param chunk - Uint8Array containing Int16 PCM audio
|
|
3804
|
+
*/
|
|
3805
|
+
async onAudioChunk(chunk) {
|
|
3806
|
+
const combined = this.coalescer.add(chunk);
|
|
3807
|
+
if (!combined) {
|
|
3808
|
+
return;
|
|
3809
|
+
}
|
|
3810
|
+
const float32 = pcm16ToFloat322(combined);
|
|
3811
|
+
const scheduleTime = await this.scheduler.schedule(float32);
|
|
3812
|
+
if (!this.playbackStarted) {
|
|
3813
|
+
this.playbackStarted = true;
|
|
3814
|
+
this.emit("playback_start", scheduleTime);
|
|
3815
|
+
}
|
|
3816
|
+
const { energy } = this.energyAnalyzer.process(float32);
|
|
3817
|
+
this.currentAudioEnergy = energy;
|
|
3818
|
+
this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
|
|
3819
|
+
this.emit("error", err);
|
|
3820
|
+
});
|
|
3821
|
+
}
|
|
3822
|
+
/**
|
|
3823
|
+
* Get emotion frame for current animation.
|
|
3824
|
+
*
|
|
3825
|
+
* Priority:
|
|
3826
|
+
* 1. Explicit emotion label from setEmotionLabel()
|
|
3827
|
+
* 2. Prosody fallback: subtle brow movement from audio energy
|
|
3828
|
+
*/
|
|
3829
|
+
getEmotionFrame() {
|
|
3830
|
+
if (this.lastEmotionFrame) {
|
|
3831
|
+
return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
|
|
3832
|
+
}
|
|
3833
|
+
return { frame: null, energy: this.currentAudioEnergy };
|
|
3834
|
+
}
|
|
3835
|
+
/**
|
|
3836
|
+
* Merge LAM blendshapes with emotion upper face blendshapes
|
|
3837
|
+
*/
|
|
3838
|
+
mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
|
|
3839
|
+
const merged = new Float32Array(52);
|
|
3840
|
+
let emotionBlendshapes;
|
|
3841
|
+
if (emotionFrame) {
|
|
3842
|
+
this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
|
|
3843
|
+
this.emotionMapper.update(33);
|
|
3844
|
+
emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
|
|
3845
|
+
} else {
|
|
3846
|
+
emotionBlendshapes = {};
|
|
3847
|
+
for (const name of UPPER_FACE_BLENDSHAPES) {
|
|
3848
|
+
emotionBlendshapes[name] = 0;
|
|
3849
|
+
}
|
|
3850
|
+
}
|
|
3851
|
+
for (let i = 0; i < 52; i++) {
|
|
3852
|
+
const name = LAM_BLENDSHAPES[i];
|
|
3853
|
+
if (UPPER_FACE_SET.has(name)) {
|
|
3854
|
+
const emotionValue = emotionBlendshapes[name] ?? 0;
|
|
3855
|
+
const lamValue = lamFrame[i];
|
|
3856
|
+
merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
|
|
3857
|
+
} else {
|
|
3858
|
+
merged[i] = lamFrame[i];
|
|
3859
|
+
}
|
|
3860
|
+
}
|
|
3861
|
+
return { merged, emotionBlendshapes };
|
|
3862
|
+
}
|
|
3863
|
+
/**
|
|
3864
|
+
* Start frame animation loop
|
|
3865
|
+
*/
|
|
3866
|
+
startFrameLoop() {
|
|
3867
|
+
const updateFrame = () => {
|
|
3868
|
+
const currentTime = this.scheduler.getCurrentTime();
|
|
3869
|
+
const lamFrame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
|
|
3870
|
+
if (lamFrame) {
|
|
3871
|
+
if (lamFrame !== this.lastKnownLamFrame) {
|
|
3872
|
+
this.lastNewFrameTime = performance.now();
|
|
3873
|
+
this.lastKnownLamFrame = lamFrame;
|
|
3874
|
+
this.staleWarningEmitted = false;
|
|
3875
|
+
}
|
|
3876
|
+
const { frame: emotionFrame, energy } = this.getEmotionFrame();
|
|
3877
|
+
const { merged, emotionBlendshapes } = this.mergeBlendshapes(lamFrame, emotionFrame, energy);
|
|
3878
|
+
const fullFrame = {
|
|
3879
|
+
blendshapes: merged,
|
|
3880
|
+
lamBlendshapes: lamFrame,
|
|
3881
|
+
emotionBlendshapes,
|
|
3882
|
+
emotion: emotionFrame,
|
|
3883
|
+
timestamp: currentTime
|
|
3884
|
+
};
|
|
3885
|
+
this.emit("full_frame_ready", fullFrame);
|
|
3886
|
+
this.emit("lam_frame_ready", lamFrame);
|
|
3887
|
+
if (emotionFrame) {
|
|
3888
|
+
this.emit("emotion_frame_ready", emotionFrame);
|
|
3889
|
+
}
|
|
3890
|
+
} else if (this.playbackStarted && !this.lastKnownLamFrame) {
|
|
3891
|
+
const { frame: emotionFrame, energy } = this.getEmotionFrame();
|
|
3892
|
+
if (emotionFrame && energy > 0.05) {
|
|
3893
|
+
const startupFrame = new Float32Array(52);
|
|
3894
|
+
const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
|
|
3895
|
+
this.emit("full_frame_ready", {
|
|
3896
|
+
blendshapes: merged,
|
|
3897
|
+
lamBlendshapes: startupFrame,
|
|
3898
|
+
emotionBlendshapes,
|
|
3899
|
+
emotion: emotionFrame,
|
|
3900
|
+
timestamp: currentTime
|
|
3901
|
+
});
|
|
3250
3902
|
}
|
|
3251
|
-
}
|
|
3252
|
-
|
|
3903
|
+
}
|
|
3904
|
+
if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
|
|
3905
|
+
this.staleWarningEmitted = true;
|
|
3906
|
+
logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
|
|
3907
|
+
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3908
|
+
queuedFrames: this.lamPipeline.queuedFrameCount
|
|
3909
|
+
});
|
|
3910
|
+
}
|
|
3911
|
+
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
3912
|
+
};
|
|
3913
|
+
this.frameAnimationId = requestAnimationFrame(updateFrame);
|
|
3253
3914
|
}
|
|
3254
3915
|
/**
|
|
3255
|
-
*
|
|
3916
|
+
* End of audio stream
|
|
3256
3917
|
*/
|
|
3257
|
-
|
|
3258
|
-
const
|
|
3259
|
-
if (
|
|
3260
|
-
|
|
3918
|
+
async end() {
|
|
3919
|
+
const remaining = this.coalescer.flush();
|
|
3920
|
+
if (remaining) {
|
|
3921
|
+
const chunk = new Uint8Array(remaining);
|
|
3922
|
+
await this.onAudioChunk(chunk);
|
|
3261
3923
|
}
|
|
3262
|
-
|
|
3924
|
+
await this.lamPipeline.flush(this.options.lam);
|
|
3263
3925
|
}
|
|
3264
3926
|
/**
|
|
3265
|
-
*
|
|
3927
|
+
* Stop playback immediately with smooth fade-out
|
|
3266
3928
|
*/
|
|
3267
|
-
async
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3929
|
+
async stop(fadeOutMs = 50) {
|
|
3930
|
+
this.stopMonitoring();
|
|
3931
|
+
await this.scheduler.cancelAll(fadeOutMs);
|
|
3932
|
+
this.coalescer.reset();
|
|
3933
|
+
this.lamPipeline.reset();
|
|
3934
|
+
this.playbackStarted = false;
|
|
3935
|
+
this.lastEmotionFrame = null;
|
|
3936
|
+
this.currentAudioEnergy = 0;
|
|
3937
|
+
this.emotionMapper.reset();
|
|
3938
|
+
this.energyAnalyzer.reset();
|
|
3939
|
+
this.lastNewFrameTime = 0;
|
|
3940
|
+
this.lastKnownLamFrame = null;
|
|
3941
|
+
this.staleWarningEmitted = false;
|
|
3942
|
+
this.emit("playback_complete", void 0);
|
|
3943
|
+
}
|
|
3944
|
+
/**
|
|
3945
|
+
* Start monitoring for playback completion
|
|
3946
|
+
*/
|
|
3947
|
+
startMonitoring() {
|
|
3948
|
+
if (this.monitorInterval) {
|
|
3949
|
+
clearInterval(this.monitorInterval);
|
|
3950
|
+
}
|
|
3951
|
+
this.monitorInterval = setInterval(() => {
|
|
3952
|
+
if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
|
|
3953
|
+
this.emit("playback_complete", void 0);
|
|
3954
|
+
this.stopMonitoring();
|
|
3955
|
+
}
|
|
3956
|
+
}, 100);
|
|
3957
|
+
}
|
|
3958
|
+
/**
|
|
3959
|
+
* Stop monitoring
|
|
3960
|
+
*/
|
|
3961
|
+
stopMonitoring() {
|
|
3962
|
+
if (this.monitorInterval) {
|
|
3963
|
+
clearInterval(this.monitorInterval);
|
|
3964
|
+
this.monitorInterval = null;
|
|
3965
|
+
}
|
|
3966
|
+
if (this.frameAnimationId) {
|
|
3967
|
+
cancelAnimationFrame(this.frameAnimationId);
|
|
3968
|
+
this.frameAnimationId = null;
|
|
3271
3969
|
}
|
|
3272
3970
|
}
|
|
3971
|
+
/**
|
|
3972
|
+
* Get current pipeline state (for debugging/monitoring)
|
|
3973
|
+
*/
|
|
3974
|
+
getState() {
|
|
3975
|
+
return {
|
|
3976
|
+
playbackStarted: this.playbackStarted,
|
|
3977
|
+
coalescerFill: this.coalescer.fillLevel,
|
|
3978
|
+
lamFill: this.lamPipeline.fillLevel,
|
|
3979
|
+
queuedLAMFrames: this.lamPipeline.queuedFrameCount,
|
|
3980
|
+
emotionLabel: this.lastEmotionFrame?.emotion ?? null,
|
|
3981
|
+
currentAudioEnergy: this.currentAudioEnergy,
|
|
3982
|
+
currentTime: this.scheduler.getCurrentTime(),
|
|
3983
|
+
playbackEndTime: this.scheduler.getPlaybackEndTime()
|
|
3984
|
+
};
|
|
3985
|
+
}
|
|
3986
|
+
/**
|
|
3987
|
+
* Check if an explicit emotion label is currently set
|
|
3988
|
+
*/
|
|
3989
|
+
get hasEmotionLabel() {
|
|
3990
|
+
return this.lastEmotionFrame !== null;
|
|
3991
|
+
}
|
|
3992
|
+
/**
|
|
3993
|
+
* Cleanup resources
|
|
3994
|
+
*/
|
|
3995
|
+
dispose() {
|
|
3996
|
+
this.stopMonitoring();
|
|
3997
|
+
this.scheduler.dispose();
|
|
3998
|
+
this.coalescer.reset();
|
|
3999
|
+
this.lamPipeline.reset();
|
|
4000
|
+
this.lastEmotionFrame = null;
|
|
4001
|
+
this.currentAudioEnergy = 0;
|
|
4002
|
+
}
|
|
3273
4003
|
};
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
* (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
|
|
3277
|
-
*/
|
|
3278
|
-
Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
|
|
3279
|
-
|
|
3280
|
-
// src/audio/FullFacePipeline.ts
|
|
3281
|
-
var logger3 = createLogger("FullFacePipeline");
|
|
3282
|
-
var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
|
|
3283
|
-
LAM_BLENDSHAPES.forEach((name, index) => {
|
|
3284
|
-
BLENDSHAPE_INDEX_MAP.set(name, index);
|
|
3285
|
-
});
|
|
3286
|
-
var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
|
|
4004
|
+
_FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
|
|
4005
|
+
var FullFacePipeline = _FullFacePipeline;
|
|
3287
4006
|
|
|
3288
4007
|
// src/inference/kaldiFbank.ts
|
|
3289
4008
|
function fft(re, im) {
|
|
@@ -4153,20 +4872,21 @@ var LipSyncWithFallback = class {
|
|
|
4153
4872
|
try {
|
|
4154
4873
|
return await this.implementation.load();
|
|
4155
4874
|
} catch (error) {
|
|
4156
|
-
|
|
4157
|
-
|
|
4158
|
-
|
|
4159
|
-
|
|
4160
|
-
|
|
4161
|
-
|
|
4162
|
-
|
|
4163
|
-
|
|
4164
|
-
modelUrl: this.config.cpuModelUrl
|
|
4165
|
-
});
|
|
4166
|
-
this.hasFallenBack = true;
|
|
4167
|
-
logger6.info("Fallback to Wav2ArkitCpuInference successful");
|
|
4168
|
-
return await this.implementation.load();
|
|
4875
|
+
return this.fallbackToCpu(error instanceof Error ? error.message : String(error));
|
|
4876
|
+
}
|
|
4877
|
+
}
|
|
4878
|
+
async fallbackToCpu(reason) {
|
|
4879
|
+
logger6.warn("GPU model load failed, falling back to CPU model", { reason });
|
|
4880
|
+
try {
|
|
4881
|
+
await this.implementation.dispose();
|
|
4882
|
+
} catch {
|
|
4169
4883
|
}
|
|
4884
|
+
this.implementation = new Wav2ArkitCpuInference({
|
|
4885
|
+
modelUrl: this.config.cpuModelUrl
|
|
4886
|
+
});
|
|
4887
|
+
this.hasFallenBack = true;
|
|
4888
|
+
logger6.info("Fallback to Wav2ArkitCpuInference successful");
|
|
4889
|
+
return await this.implementation.load();
|
|
4170
4890
|
}
|
|
4171
4891
|
async infer(audioSamples, identityIndex) {
|
|
4172
4892
|
return this.implementation.infer(audioSamples, identityIndex);
|
|
@@ -7307,119 +8027,637 @@ var AnimationGraph = class extends EventEmitter {
|
|
|
7307
8027
|
}
|
|
7308
8028
|
};
|
|
7309
8029
|
|
|
7310
|
-
// src/animation/
|
|
7311
|
-
|
|
7312
|
-
|
|
7313
|
-
|
|
7314
|
-
|
|
7315
|
-
|
|
7316
|
-
|
|
7317
|
-
|
|
8030
|
+
// src/animation/simplex2d.ts
|
|
8031
|
+
var perm = new Uint8Array(512);
|
|
8032
|
+
var grad2 = [
|
|
8033
|
+
[1, 1],
|
|
8034
|
+
[-1, 1],
|
|
8035
|
+
[1, -1],
|
|
8036
|
+
[-1, -1],
|
|
8037
|
+
[1, 0],
|
|
8038
|
+
[-1, 0],
|
|
8039
|
+
[0, 1],
|
|
8040
|
+
[0, -1]
|
|
8041
|
+
];
|
|
8042
|
+
var p = [
|
|
8043
|
+
151,
|
|
8044
|
+
160,
|
|
8045
|
+
137,
|
|
8046
|
+
91,
|
|
8047
|
+
90,
|
|
8048
|
+
15,
|
|
8049
|
+
131,
|
|
8050
|
+
13,
|
|
8051
|
+
201,
|
|
8052
|
+
95,
|
|
8053
|
+
96,
|
|
8054
|
+
53,
|
|
8055
|
+
194,
|
|
8056
|
+
233,
|
|
8057
|
+
7,
|
|
8058
|
+
225,
|
|
8059
|
+
140,
|
|
8060
|
+
36,
|
|
8061
|
+
103,
|
|
8062
|
+
30,
|
|
8063
|
+
69,
|
|
8064
|
+
142,
|
|
8065
|
+
8,
|
|
8066
|
+
99,
|
|
8067
|
+
37,
|
|
8068
|
+
240,
|
|
8069
|
+
21,
|
|
8070
|
+
10,
|
|
8071
|
+
23,
|
|
8072
|
+
190,
|
|
8073
|
+
6,
|
|
8074
|
+
148,
|
|
8075
|
+
247,
|
|
8076
|
+
120,
|
|
8077
|
+
234,
|
|
8078
|
+
75,
|
|
8079
|
+
0,
|
|
8080
|
+
26,
|
|
8081
|
+
197,
|
|
8082
|
+
62,
|
|
8083
|
+
94,
|
|
8084
|
+
252,
|
|
8085
|
+
219,
|
|
8086
|
+
203,
|
|
8087
|
+
117,
|
|
8088
|
+
35,
|
|
8089
|
+
11,
|
|
8090
|
+
32,
|
|
8091
|
+
57,
|
|
8092
|
+
177,
|
|
8093
|
+
33,
|
|
8094
|
+
88,
|
|
8095
|
+
237,
|
|
8096
|
+
149,
|
|
8097
|
+
56,
|
|
8098
|
+
87,
|
|
8099
|
+
174,
|
|
8100
|
+
20,
|
|
8101
|
+
125,
|
|
8102
|
+
136,
|
|
8103
|
+
171,
|
|
8104
|
+
168,
|
|
8105
|
+
68,
|
|
8106
|
+
175,
|
|
8107
|
+
74,
|
|
8108
|
+
165,
|
|
8109
|
+
71,
|
|
8110
|
+
134,
|
|
8111
|
+
139,
|
|
8112
|
+
48,
|
|
8113
|
+
27,
|
|
8114
|
+
166,
|
|
8115
|
+
77,
|
|
8116
|
+
146,
|
|
8117
|
+
158,
|
|
8118
|
+
231,
|
|
8119
|
+
83,
|
|
8120
|
+
111,
|
|
8121
|
+
229,
|
|
8122
|
+
122,
|
|
8123
|
+
60,
|
|
8124
|
+
211,
|
|
8125
|
+
133,
|
|
8126
|
+
230,
|
|
8127
|
+
220,
|
|
8128
|
+
105,
|
|
8129
|
+
92,
|
|
8130
|
+
41,
|
|
8131
|
+
55,
|
|
8132
|
+
46,
|
|
8133
|
+
245,
|
|
8134
|
+
40,
|
|
8135
|
+
244,
|
|
8136
|
+
102,
|
|
8137
|
+
143,
|
|
8138
|
+
54,
|
|
8139
|
+
65,
|
|
8140
|
+
25,
|
|
8141
|
+
63,
|
|
8142
|
+
161,
|
|
8143
|
+
1,
|
|
8144
|
+
216,
|
|
8145
|
+
80,
|
|
8146
|
+
73,
|
|
8147
|
+
209,
|
|
8148
|
+
76,
|
|
8149
|
+
132,
|
|
8150
|
+
187,
|
|
8151
|
+
208,
|
|
8152
|
+
89,
|
|
8153
|
+
18,
|
|
8154
|
+
169,
|
|
8155
|
+
200,
|
|
8156
|
+
196,
|
|
8157
|
+
135,
|
|
8158
|
+
130,
|
|
8159
|
+
116,
|
|
8160
|
+
188,
|
|
8161
|
+
159,
|
|
8162
|
+
86,
|
|
8163
|
+
164,
|
|
8164
|
+
100,
|
|
8165
|
+
109,
|
|
8166
|
+
198,
|
|
8167
|
+
173,
|
|
8168
|
+
186,
|
|
8169
|
+
3,
|
|
8170
|
+
64,
|
|
8171
|
+
52,
|
|
8172
|
+
217,
|
|
8173
|
+
226,
|
|
8174
|
+
250,
|
|
8175
|
+
124,
|
|
8176
|
+
123,
|
|
8177
|
+
5,
|
|
8178
|
+
202,
|
|
8179
|
+
38,
|
|
8180
|
+
147,
|
|
8181
|
+
118,
|
|
8182
|
+
126,
|
|
8183
|
+
255,
|
|
8184
|
+
82,
|
|
8185
|
+
85,
|
|
8186
|
+
212,
|
|
8187
|
+
207,
|
|
8188
|
+
206,
|
|
8189
|
+
59,
|
|
8190
|
+
227,
|
|
8191
|
+
47,
|
|
8192
|
+
16,
|
|
8193
|
+
58,
|
|
8194
|
+
17,
|
|
8195
|
+
182,
|
|
8196
|
+
189,
|
|
8197
|
+
28,
|
|
8198
|
+
42,
|
|
8199
|
+
223,
|
|
8200
|
+
183,
|
|
8201
|
+
170,
|
|
8202
|
+
213,
|
|
8203
|
+
119,
|
|
8204
|
+
248,
|
|
8205
|
+
152,
|
|
8206
|
+
2,
|
|
8207
|
+
44,
|
|
8208
|
+
154,
|
|
8209
|
+
163,
|
|
8210
|
+
70,
|
|
8211
|
+
221,
|
|
8212
|
+
153,
|
|
8213
|
+
101,
|
|
8214
|
+
155,
|
|
8215
|
+
167,
|
|
8216
|
+
43,
|
|
8217
|
+
172,
|
|
8218
|
+
9,
|
|
8219
|
+
129,
|
|
8220
|
+
22,
|
|
8221
|
+
39,
|
|
8222
|
+
253,
|
|
8223
|
+
19,
|
|
8224
|
+
98,
|
|
8225
|
+
108,
|
|
8226
|
+
110,
|
|
8227
|
+
79,
|
|
8228
|
+
113,
|
|
8229
|
+
224,
|
|
8230
|
+
232,
|
|
8231
|
+
178,
|
|
8232
|
+
185,
|
|
8233
|
+
112,
|
|
8234
|
+
104,
|
|
8235
|
+
218,
|
|
8236
|
+
246,
|
|
8237
|
+
97,
|
|
8238
|
+
228,
|
|
8239
|
+
251,
|
|
8240
|
+
34,
|
|
8241
|
+
242,
|
|
8242
|
+
193,
|
|
8243
|
+
238,
|
|
8244
|
+
210,
|
|
8245
|
+
144,
|
|
8246
|
+
12,
|
|
8247
|
+
191,
|
|
8248
|
+
179,
|
|
8249
|
+
162,
|
|
8250
|
+
241,
|
|
8251
|
+
81,
|
|
8252
|
+
51,
|
|
8253
|
+
145,
|
|
8254
|
+
235,
|
|
8255
|
+
249,
|
|
8256
|
+
14,
|
|
8257
|
+
239,
|
|
8258
|
+
107,
|
|
8259
|
+
49,
|
|
8260
|
+
192,
|
|
8261
|
+
214,
|
|
8262
|
+
31,
|
|
8263
|
+
181,
|
|
8264
|
+
199,
|
|
8265
|
+
106,
|
|
8266
|
+
157,
|
|
8267
|
+
184,
|
|
8268
|
+
84,
|
|
8269
|
+
204,
|
|
8270
|
+
176,
|
|
8271
|
+
115,
|
|
8272
|
+
121,
|
|
8273
|
+
50,
|
|
8274
|
+
45,
|
|
8275
|
+
127,
|
|
8276
|
+
4,
|
|
8277
|
+
150,
|
|
8278
|
+
254,
|
|
8279
|
+
138,
|
|
8280
|
+
236,
|
|
8281
|
+
205,
|
|
8282
|
+
93,
|
|
8283
|
+
222,
|
|
8284
|
+
114,
|
|
8285
|
+
67,
|
|
8286
|
+
29,
|
|
8287
|
+
24,
|
|
8288
|
+
72,
|
|
8289
|
+
243,
|
|
8290
|
+
141,
|
|
8291
|
+
128,
|
|
8292
|
+
195,
|
|
8293
|
+
78,
|
|
8294
|
+
66,
|
|
8295
|
+
215,
|
|
8296
|
+
61,
|
|
8297
|
+
156,
|
|
8298
|
+
180
|
|
8299
|
+
];
|
|
8300
|
+
for (let i = 0; i < 256; i++) {
|
|
8301
|
+
perm[i] = p[i];
|
|
8302
|
+
perm[i + 256] = p[i];
|
|
7318
8303
|
}
|
|
7319
|
-
|
|
7320
|
-
|
|
7321
|
-
|
|
7322
|
-
|
|
7323
|
-
if (abs > peak) peak = abs;
|
|
7324
|
-
}
|
|
7325
|
-
return peak;
|
|
8304
|
+
var F2 = 0.5 * (Math.sqrt(3) - 1);
|
|
8305
|
+
var G2 = (3 - Math.sqrt(3)) / 6;
|
|
8306
|
+
function dot2(g, x, y) {
|
|
8307
|
+
return g[0] * x + g[1] * y;
|
|
7326
8308
|
}
|
|
7327
|
-
|
|
7328
|
-
|
|
7329
|
-
|
|
7330
|
-
|
|
7331
|
-
|
|
7332
|
-
|
|
7333
|
-
|
|
7334
|
-
|
|
7335
|
-
|
|
7336
|
-
|
|
7337
|
-
|
|
7338
|
-
|
|
7339
|
-
|
|
7340
|
-
|
|
7341
|
-
|
|
7342
|
-
|
|
7343
|
-
|
|
7344
|
-
|
|
7345
|
-
|
|
7346
|
-
|
|
7347
|
-
|
|
7348
|
-
|
|
7349
|
-
|
|
7350
|
-
|
|
7351
|
-
|
|
7352
|
-
|
|
7353
|
-
|
|
7354
|
-
|
|
7355
|
-
|
|
7356
|
-
|
|
7357
|
-
|
|
7358
|
-
|
|
8309
|
+
function simplex2d(x, y) {
|
|
8310
|
+
const s = (x + y) * F2;
|
|
8311
|
+
const i = Math.floor(x + s);
|
|
8312
|
+
const j = Math.floor(y + s);
|
|
8313
|
+
const t = (i + j) * G2;
|
|
8314
|
+
const X0 = i - t;
|
|
8315
|
+
const Y0 = j - t;
|
|
8316
|
+
const x0 = x - X0;
|
|
8317
|
+
const y0 = y - Y0;
|
|
8318
|
+
const i1 = x0 > y0 ? 1 : 0;
|
|
8319
|
+
const j1 = x0 > y0 ? 0 : 1;
|
|
8320
|
+
const x1 = x0 - i1 + G2;
|
|
8321
|
+
const y1 = y0 - j1 + G2;
|
|
8322
|
+
const x2 = x0 - 1 + 2 * G2;
|
|
8323
|
+
const y2 = y0 - 1 + 2 * G2;
|
|
8324
|
+
const ii = i & 255;
|
|
8325
|
+
const jj = j & 255;
|
|
8326
|
+
const gi0 = perm[ii + perm[jj]] % 8;
|
|
8327
|
+
const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
|
|
8328
|
+
const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
|
|
8329
|
+
let n0 = 0;
|
|
8330
|
+
let t0 = 0.5 - x0 * x0 - y0 * y0;
|
|
8331
|
+
if (t0 >= 0) {
|
|
8332
|
+
t0 *= t0;
|
|
8333
|
+
n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
|
|
8334
|
+
}
|
|
8335
|
+
let n1 = 0;
|
|
8336
|
+
let t1 = 0.5 - x1 * x1 - y1 * y1;
|
|
8337
|
+
if (t1 >= 0) {
|
|
8338
|
+
t1 *= t1;
|
|
8339
|
+
n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
|
|
8340
|
+
}
|
|
8341
|
+
let n2 = 0;
|
|
8342
|
+
let t2 = 0.5 - x2 * x2 - y2 * y2;
|
|
8343
|
+
if (t2 >= 0) {
|
|
8344
|
+
t2 *= t2;
|
|
8345
|
+
n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
|
|
8346
|
+
}
|
|
8347
|
+
return 70 * (n0 + n1 + n2);
|
|
8348
|
+
}
|
|
8349
|
+
|
|
8350
|
+
// src/animation/ProceduralLifeLayer.ts
|
|
8351
|
+
var PHASE_OPEN = 0;
|
|
8352
|
+
var PHASE_CLOSING = 1;
|
|
8353
|
+
var PHASE_CLOSED = 2;
|
|
8354
|
+
var PHASE_OPENING = 3;
|
|
8355
|
+
var BLINK_CLOSE_DURATION = 0.06;
|
|
8356
|
+
var BLINK_HOLD_DURATION = 0.04;
|
|
8357
|
+
var BLINK_OPEN_DURATION = 0.15;
|
|
8358
|
+
var BLINK_ASYMMETRY_DELAY = 8e-3;
|
|
8359
|
+
var GAZE_BREAK_DURATION = 0.12;
|
|
8360
|
+
var GAZE_BREAK_HOLD_DURATION = 0.3;
|
|
8361
|
+
var GAZE_BREAK_RETURN_DURATION = 0.15;
|
|
8362
|
+
var EYE_NOISE_X_FREQ = 0.8;
|
|
8363
|
+
var EYE_NOISE_Y_FREQ = 0.6;
|
|
8364
|
+
var EYE_NOISE_X_PHASE = 73.1;
|
|
8365
|
+
var EYE_NOISE_Y_PHASE = 91.7;
|
|
8366
|
+
var BROW_INNER_UP_FREQ = 0.4;
|
|
8367
|
+
var BROW_OUTER_LEFT_FREQ = 0.35;
|
|
8368
|
+
var BROW_OUTER_RIGHT_FREQ = 0.38;
|
|
8369
|
+
var BROW_DOWN_FREQ = 0.3;
|
|
8370
|
+
var BROW_INNER_UP_PHASE = 0;
|
|
8371
|
+
var BROW_OUTER_LEFT_PHASE = 17.3;
|
|
8372
|
+
var BROW_OUTER_RIGHT_PHASE = 31.7;
|
|
8373
|
+
var BROW_DOWN_LEFT_PHASE = 47.1;
|
|
8374
|
+
var BROW_DOWN_RIGHT_PHASE = 59.3;
|
|
8375
|
+
var EMPHASIS_ENERGY_THRESHOLD = 0.3;
|
|
8376
|
+
var EMPHASIS_DECAY_RATE = 4;
|
|
8377
|
+
function clamp(v, min, max) {
|
|
8378
|
+
return v < min ? min : v > max ? max : v;
|
|
8379
|
+
}
|
|
8380
|
+
function randomRange(min, max) {
|
|
8381
|
+
return min + Math.random() * (max - min);
|
|
8382
|
+
}
|
|
8383
|
+
function smoothStep(t) {
|
|
8384
|
+
return t * t * (3 - 2 * t);
|
|
8385
|
+
}
|
|
8386
|
+
function softClamp(v, max) {
|
|
8387
|
+
return Math.tanh(v / max) * max;
|
|
8388
|
+
}
|
|
8389
|
+
var ProceduralLifeLayer = class {
|
|
8390
|
+
constructor(config) {
|
|
8391
|
+
// Blink state
|
|
8392
|
+
this.blinkTimer = 0;
|
|
8393
|
+
this.blinkPhase = PHASE_OPEN;
|
|
8394
|
+
this.blinkProgress = 0;
|
|
8395
|
+
this.asymmetryRight = 0.97;
|
|
8396
|
+
this.smoothedBlinkLeft = 0;
|
|
8397
|
+
this.smoothedBlinkRight = 0;
|
|
8398
|
+
// Eye contact (smoothed)
|
|
8399
|
+
this.smoothedEyeX = 0;
|
|
8400
|
+
this.smoothedEyeY = 0;
|
|
8401
|
+
// Eye micro-motion (continuous simplex noise, no discrete events)
|
|
8402
|
+
this.eyeNoiseTime = 0;
|
|
8403
|
+
// Gaze break state
|
|
8404
|
+
this.gazeBreakTimer = 0;
|
|
8405
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
8406
|
+
this.gazeBreakProgress = 0;
|
|
8407
|
+
this.gazeBreakTargetX = 0;
|
|
8408
|
+
this.gazeBreakTargetY = 0;
|
|
8409
|
+
this.gazeBreakCurrentX = 0;
|
|
8410
|
+
this.gazeBreakCurrentY = 0;
|
|
8411
|
+
// Breathing / postural sway
|
|
8412
|
+
this.microMotionTime = 0;
|
|
8413
|
+
this.breathingPhase = 0;
|
|
8414
|
+
// Brow noise
|
|
8415
|
+
this.noiseTime = 0;
|
|
8416
|
+
this.previousEnergy = 0;
|
|
8417
|
+
this.emphasisLevel = 0;
|
|
8418
|
+
this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
|
|
8419
|
+
this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
|
|
8420
|
+
this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
|
|
8421
|
+
this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
|
|
8422
|
+
this.browNoiseAmplitude = config?.browNoiseAmplitude ?? 0.3;
|
|
8423
|
+
this.browNoiseSpeechMultiplier = config?.browNoiseSpeechMultiplier ?? 2;
|
|
8424
|
+
this.breathingRate = config?.breathingRate ?? 0.25;
|
|
8425
|
+
this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
|
|
8426
|
+
this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
|
|
8427
|
+
this.eyeSmoothing = config?.eyeSmoothing ?? 15;
|
|
8428
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8429
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8430
|
+
}
|
|
8431
|
+
/**
|
|
8432
|
+
* Update the life layer and produce output for this frame.
|
|
8433
|
+
*
|
|
8434
|
+
* @param delta - Time since last frame in seconds
|
|
8435
|
+
* @param input - Per-frame input (eye target, audio energy, speaking state)
|
|
8436
|
+
* @returns Blendshape values and head rotation deltas
|
|
8437
|
+
*/
|
|
8438
|
+
update(delta, input) {
|
|
8439
|
+
const eyeTargetX = input?.eyeTargetX ?? 0;
|
|
8440
|
+
const eyeTargetY = input?.eyeTargetY ?? 0;
|
|
8441
|
+
const audioEnergy = input?.audioEnergy ?? 0;
|
|
8442
|
+
const isSpeaking = input?.isSpeaking ?? false;
|
|
8443
|
+
const safeDelta = Math.min(delta, 0.1);
|
|
8444
|
+
const blendshapes = {};
|
|
8445
|
+
this.updateBlinks(delta);
|
|
8446
|
+
const blinkSmoothing = 45;
|
|
8447
|
+
const blinkValues = this.getBlinkValues();
|
|
8448
|
+
this.smoothedBlinkLeft += (blinkValues.left - this.smoothedBlinkLeft) * Math.min(1, safeDelta * blinkSmoothing);
|
|
8449
|
+
this.smoothedBlinkRight += (blinkValues.right - this.smoothedBlinkRight) * Math.min(1, safeDelta * blinkSmoothing);
|
|
8450
|
+
blendshapes["eyeBlinkLeft"] = this.smoothedBlinkLeft;
|
|
8451
|
+
blendshapes["eyeBlinkRight"] = this.smoothedBlinkRight;
|
|
8452
|
+
this.smoothedEyeX += (eyeTargetX - this.smoothedEyeX) * Math.min(1, safeDelta * this.eyeSmoothing);
|
|
8453
|
+
this.smoothedEyeY += (eyeTargetY - this.smoothedEyeY) * Math.min(1, safeDelta * this.eyeSmoothing);
|
|
8454
|
+
this.eyeNoiseTime += delta;
|
|
8455
|
+
const microMotion = this.getEyeMicroMotion();
|
|
8456
|
+
this.updateGazeBreaks(delta);
|
|
8457
|
+
const finalEyeX = this.smoothedEyeX + this.gazeBreakCurrentX + microMotion.x;
|
|
8458
|
+
const finalEyeY = this.smoothedEyeY + this.gazeBreakCurrentY + microMotion.y;
|
|
8459
|
+
const clampedX = softClamp(finalEyeX, this.eyeMaxDeviation);
|
|
8460
|
+
const clampedY = softClamp(finalEyeY, this.eyeMaxDeviation);
|
|
8461
|
+
const deadZone = 0.02;
|
|
8462
|
+
const lookRight = clampedX > deadZone ? clampedX : clampedX > 0 ? clampedX * (clampedX / deadZone) : 0;
|
|
8463
|
+
const lookLeft = clampedX < -deadZone ? -clampedX : clampedX < 0 ? -clampedX * (-clampedX / deadZone) : 0;
|
|
8464
|
+
const lookUp = clampedY > deadZone ? clampedY : clampedY > 0 ? clampedY * (clampedY / deadZone) : 0;
|
|
8465
|
+
const lookDown = clampedY < -deadZone ? -clampedY : clampedY < 0 ? -clampedY * (-clampedY / deadZone) : 0;
|
|
8466
|
+
blendshapes["eyeLookInLeft"] = lookRight;
|
|
8467
|
+
blendshapes["eyeLookOutLeft"] = lookLeft;
|
|
8468
|
+
blendshapes["eyeLookInRight"] = lookLeft;
|
|
8469
|
+
blendshapes["eyeLookOutRight"] = lookRight;
|
|
8470
|
+
blendshapes["eyeLookUpLeft"] = lookUp;
|
|
8471
|
+
blendshapes["eyeLookUpRight"] = lookUp;
|
|
8472
|
+
blendshapes["eyeLookDownLeft"] = lookDown;
|
|
8473
|
+
blendshapes["eyeLookDownRight"] = lookDown;
|
|
8474
|
+
this.updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes);
|
|
8475
|
+
this.microMotionTime += delta;
|
|
8476
|
+
this.breathingPhase += delta * this.breathingRate * Math.PI * 2;
|
|
8477
|
+
const breathingY = Math.sin(this.breathingPhase) * 3e-3;
|
|
8478
|
+
const swayAmp = this.posturalSwayAmplitude;
|
|
8479
|
+
const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
|
|
8480
|
+
const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
|
|
7359
8481
|
return {
|
|
7360
|
-
|
|
7361
|
-
|
|
7362
|
-
|
|
7363
|
-
|
|
8482
|
+
blendshapes,
|
|
8483
|
+
headDelta: {
|
|
8484
|
+
yaw: swayX,
|
|
8485
|
+
pitch: breathingY + swayY
|
|
8486
|
+
}
|
|
7364
8487
|
};
|
|
7365
8488
|
}
|
|
7366
8489
|
/**
|
|
7367
|
-
* Reset
|
|
8490
|
+
* Reset all internal state to initial values.
|
|
7368
8491
|
*/
|
|
7369
8492
|
reset() {
|
|
7370
|
-
this.
|
|
7371
|
-
this.
|
|
7372
|
-
|
|
7373
|
-
|
|
7374
|
-
|
|
7375
|
-
|
|
7376
|
-
|
|
7377
|
-
|
|
7378
|
-
|
|
7379
|
-
|
|
7380
|
-
|
|
7381
|
-
|
|
7382
|
-
|
|
7383
|
-
|
|
7384
|
-
|
|
7385
|
-
|
|
7386
|
-
|
|
7387
|
-
|
|
7388
|
-
|
|
7389
|
-
|
|
7390
|
-
|
|
7391
|
-
|
|
7392
|
-
this.
|
|
7393
|
-
|
|
7394
|
-
|
|
8493
|
+
this.blinkTimer = 0;
|
|
8494
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8495
|
+
this.blinkPhase = PHASE_OPEN;
|
|
8496
|
+
this.blinkProgress = 0;
|
|
8497
|
+
this.asymmetryRight = 0.97;
|
|
8498
|
+
this.smoothedBlinkLeft = 0;
|
|
8499
|
+
this.smoothedBlinkRight = 0;
|
|
8500
|
+
this.smoothedEyeX = 0;
|
|
8501
|
+
this.smoothedEyeY = 0;
|
|
8502
|
+
this.eyeNoiseTime = 0;
|
|
8503
|
+
this.gazeBreakTimer = 0;
|
|
8504
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8505
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
8506
|
+
this.gazeBreakProgress = 0;
|
|
8507
|
+
this.gazeBreakTargetX = 0;
|
|
8508
|
+
this.gazeBreakTargetY = 0;
|
|
8509
|
+
this.gazeBreakCurrentX = 0;
|
|
8510
|
+
this.gazeBreakCurrentY = 0;
|
|
8511
|
+
this.microMotionTime = 0;
|
|
8512
|
+
this.breathingPhase = 0;
|
|
8513
|
+
this.noiseTime = 0;
|
|
8514
|
+
this.previousEnergy = 0;
|
|
8515
|
+
this.emphasisLevel = 0;
|
|
8516
|
+
}
|
|
8517
|
+
// =====================================================================
|
|
8518
|
+
// PRIVATE: Blink system
|
|
8519
|
+
// =====================================================================
|
|
8520
|
+
updateBlinks(delta) {
|
|
8521
|
+
this.blinkTimer += delta;
|
|
8522
|
+
if (this.blinkTimer >= this.blinkInterval && this.blinkPhase === PHASE_OPEN) {
|
|
8523
|
+
this.blinkPhase = PHASE_CLOSING;
|
|
8524
|
+
this.blinkProgress = 0;
|
|
8525
|
+
this.blinkTimer = 0;
|
|
8526
|
+
this.blinkInterval = randomRange(...this.blinkIntervalRange);
|
|
8527
|
+
this.asymmetryRight = 0.95 + Math.random() * 0.08;
|
|
8528
|
+
}
|
|
8529
|
+
if (this.blinkPhase > PHASE_OPEN) {
|
|
8530
|
+
this.blinkProgress += delta;
|
|
8531
|
+
if (this.blinkPhase === PHASE_CLOSING) {
|
|
8532
|
+
if (this.blinkProgress >= BLINK_CLOSE_DURATION) {
|
|
8533
|
+
this.blinkPhase = PHASE_CLOSED;
|
|
8534
|
+
this.blinkProgress = 0;
|
|
8535
|
+
}
|
|
8536
|
+
} else if (this.blinkPhase === PHASE_CLOSED) {
|
|
8537
|
+
if (this.blinkProgress >= BLINK_HOLD_DURATION) {
|
|
8538
|
+
this.blinkPhase = PHASE_OPENING;
|
|
8539
|
+
this.blinkProgress = 0;
|
|
8540
|
+
}
|
|
8541
|
+
} else if (this.blinkPhase === PHASE_OPENING) {
|
|
8542
|
+
if (this.blinkProgress >= BLINK_OPEN_DURATION) {
|
|
8543
|
+
this.blinkPhase = PHASE_OPEN;
|
|
8544
|
+
this.blinkProgress = 0;
|
|
8545
|
+
}
|
|
8546
|
+
}
|
|
8547
|
+
}
|
|
7395
8548
|
}
|
|
7396
|
-
|
|
7397
|
-
|
|
7398
|
-
|
|
7399
|
-
* @returns Object with isEmphasis flag and emphasisStrength
|
|
7400
|
-
*/
|
|
7401
|
-
process(energy) {
|
|
7402
|
-
this.energyHistory.push(energy);
|
|
7403
|
-
if (this.energyHistory.length > this.historySize) {
|
|
7404
|
-
this.energyHistory.shift();
|
|
8549
|
+
getBlinkValues() {
|
|
8550
|
+
if (this.blinkPhase === PHASE_OPEN) {
|
|
8551
|
+
return { left: 0, right: 0 };
|
|
7405
8552
|
}
|
|
7406
|
-
if (this.
|
|
7407
|
-
|
|
8553
|
+
if (this.blinkPhase === PHASE_CLOSING) {
|
|
8554
|
+
const t2 = Math.min(1, this.blinkProgress / BLINK_CLOSE_DURATION);
|
|
8555
|
+
const eased2 = t2 * t2 * t2;
|
|
8556
|
+
const tRight = Math.max(0, Math.min(1, (this.blinkProgress - BLINK_ASYMMETRY_DELAY) / BLINK_CLOSE_DURATION));
|
|
8557
|
+
return {
|
|
8558
|
+
left: eased2,
|
|
8559
|
+
right: tRight * tRight * tRight * this.asymmetryRight
|
|
8560
|
+
};
|
|
7408
8561
|
}
|
|
7409
|
-
|
|
7410
|
-
|
|
7411
|
-
|
|
7412
|
-
const
|
|
8562
|
+
if (this.blinkPhase === PHASE_CLOSED) {
|
|
8563
|
+
return { left: 1, right: this.asymmetryRight };
|
|
8564
|
+
}
|
|
8565
|
+
const t = Math.min(1, this.blinkProgress / BLINK_OPEN_DURATION);
|
|
8566
|
+
const eased = smoothStep(t);
|
|
7413
8567
|
return {
|
|
7414
|
-
|
|
7415
|
-
|
|
8568
|
+
left: 1 - eased,
|
|
8569
|
+
right: (1 - eased) * this.asymmetryRight
|
|
7416
8570
|
};
|
|
7417
8571
|
}
|
|
7418
|
-
|
|
7419
|
-
|
|
7420
|
-
|
|
7421
|
-
|
|
7422
|
-
|
|
8572
|
+
// =====================================================================
|
|
8573
|
+
// PRIVATE: Eye micro-motion (continuous simplex noise)
|
|
8574
|
+
// =====================================================================
|
|
8575
|
+
getEyeMicroMotion() {
|
|
8576
|
+
const amp = this.eyeNoiseAmplitude;
|
|
8577
|
+
const x = simplex2d(this.eyeNoiseTime * EYE_NOISE_X_FREQ, EYE_NOISE_X_PHASE) * amp;
|
|
8578
|
+
const y = simplex2d(this.eyeNoiseTime * EYE_NOISE_Y_FREQ, EYE_NOISE_Y_PHASE) * amp * 0.7;
|
|
8579
|
+
return { x, y };
|
|
8580
|
+
}
|
|
8581
|
+
// =====================================================================
|
|
8582
|
+
// PRIVATE: Gaze breaks
|
|
8583
|
+
// =====================================================================
|
|
8584
|
+
updateGazeBreaks(delta) {
|
|
8585
|
+
this.gazeBreakTimer += delta;
|
|
8586
|
+
if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
|
|
8587
|
+
this.gazeBreakPhase = PHASE_CLOSING;
|
|
8588
|
+
this.gazeBreakProgress = 0;
|
|
8589
|
+
this.gazeBreakTimer = 0;
|
|
8590
|
+
const amp = randomRange(...this.gazeBreakAmplitudeRange);
|
|
8591
|
+
this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
|
|
8592
|
+
this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
|
|
8593
|
+
this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
|
|
8594
|
+
}
|
|
8595
|
+
if (this.gazeBreakPhase > PHASE_OPEN) {
|
|
8596
|
+
this.gazeBreakProgress += delta;
|
|
8597
|
+
if (this.gazeBreakPhase === 1) {
|
|
8598
|
+
const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_DURATION);
|
|
8599
|
+
const eased = smoothStep(t);
|
|
8600
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX * eased;
|
|
8601
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY * eased;
|
|
8602
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_DURATION) {
|
|
8603
|
+
this.gazeBreakPhase = 2;
|
|
8604
|
+
this.gazeBreakProgress = 0;
|
|
8605
|
+
}
|
|
8606
|
+
} else if (this.gazeBreakPhase === 2) {
|
|
8607
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX;
|
|
8608
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY;
|
|
8609
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_HOLD_DURATION) {
|
|
8610
|
+
this.gazeBreakPhase = 3;
|
|
8611
|
+
this.gazeBreakProgress = 0;
|
|
8612
|
+
}
|
|
8613
|
+
} else if (this.gazeBreakPhase === 3) {
|
|
8614
|
+
const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_RETURN_DURATION);
|
|
8615
|
+
const eased = smoothStep(t);
|
|
8616
|
+
this.gazeBreakCurrentX = this.gazeBreakTargetX * (1 - eased);
|
|
8617
|
+
this.gazeBreakCurrentY = this.gazeBreakTargetY * (1 - eased);
|
|
8618
|
+
if (this.gazeBreakProgress >= GAZE_BREAK_RETURN_DURATION) {
|
|
8619
|
+
this.gazeBreakPhase = PHASE_OPEN;
|
|
8620
|
+
this.gazeBreakProgress = 0;
|
|
8621
|
+
this.gazeBreakCurrentX = 0;
|
|
8622
|
+
this.gazeBreakCurrentY = 0;
|
|
8623
|
+
}
|
|
8624
|
+
}
|
|
8625
|
+
} else {
|
|
8626
|
+
this.gazeBreakCurrentX = 0;
|
|
8627
|
+
this.gazeBreakCurrentY = 0;
|
|
8628
|
+
}
|
|
8629
|
+
}
|
|
8630
|
+
// =====================================================================
|
|
8631
|
+
// PRIVATE: Brow noise (simplex-driven organic drift)
|
|
8632
|
+
// =====================================================================
|
|
8633
|
+
updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes) {
|
|
8634
|
+
this.noiseTime += delta;
|
|
8635
|
+
const energyDelta = audioEnergy - this.previousEnergy;
|
|
8636
|
+
if (energyDelta > EMPHASIS_ENERGY_THRESHOLD) {
|
|
8637
|
+
this.emphasisLevel = 1;
|
|
8638
|
+
}
|
|
8639
|
+
this.emphasisLevel = Math.max(0, this.emphasisLevel - delta * EMPHASIS_DECAY_RATE);
|
|
8640
|
+
this.previousEnergy = audioEnergy;
|
|
8641
|
+
const speechMul = isSpeaking && audioEnergy > 0 ? this.browNoiseSpeechMultiplier : 1;
|
|
8642
|
+
const amp = this.browNoiseAmplitude * speechMul;
|
|
8643
|
+
const innerUpNoise = simplex2d(this.noiseTime * BROW_INNER_UP_FREQ, BROW_INNER_UP_PHASE);
|
|
8644
|
+
const innerUpBase = (innerUpNoise * 0.5 + 0.5) * amp * 0.83;
|
|
8645
|
+
const innerUpEmphasis = this.emphasisLevel * 0.25;
|
|
8646
|
+
blendshapes["browInnerUp"] = clamp(innerUpBase + innerUpEmphasis, 0, 1);
|
|
8647
|
+
const outerLeftNoise = simplex2d(this.noiseTime * BROW_OUTER_LEFT_FREQ, BROW_OUTER_LEFT_PHASE);
|
|
8648
|
+
blendshapes["browOuterUpLeft"] = clamp((outerLeftNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
|
|
8649
|
+
const outerRightNoise = simplex2d(this.noiseTime * BROW_OUTER_RIGHT_FREQ, BROW_OUTER_RIGHT_PHASE);
|
|
8650
|
+
blendshapes["browOuterUpRight"] = clamp((outerRightNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
|
|
8651
|
+
const downLeftNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_LEFT_PHASE);
|
|
8652
|
+
blendshapes["browDownLeft"] = clamp((downLeftNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
|
|
8653
|
+
const downRightNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_RIGHT_PHASE);
|
|
8654
|
+
blendshapes["browDownRight"] = clamp((downRightNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
|
|
7423
8655
|
}
|
|
7424
8656
|
};
|
|
8657
|
+
|
|
8658
|
+
// ../types/dist/index.mjs
|
|
8659
|
+
var PROTOCOL_VERSION = 1;
|
|
8660
|
+
function isProtocolEvent(obj) {
|
|
8661
|
+
return typeof obj === "object" && obj !== null && "v" in obj && "type" in obj && "ts" in obj;
|
|
8662
|
+
}
|
|
7425
8663
|
//# sourceMappingURL=index.js.map
|