@omote/core 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -874,6 +874,327 @@ var UPPER_FACE_BLENDSHAPES = [
874
874
  "cheekSquintLeft",
875
875
  "cheekSquintRight"
876
876
  ];
877
+ var EMOTION_ARKIT_MAP = {
878
+ happy: {
879
+ // AU6 - Cheek raiser (primary Duchenne smile marker)
880
+ cheekSquintLeft: 0.5,
881
+ cheekSquintRight: 0.5,
882
+ // Slight eye squint from genuine smile (orbicularis oculi activation)
883
+ eyeSquintLeft: 0.2,
884
+ eyeSquintRight: 0.2
885
+ },
886
+ angry: {
887
+ // AU4 - Brow lowerer (intense, primary anger marker)
888
+ browDownLeft: 0.7,
889
+ browDownRight: 0.7,
890
+ // AU5 - Upper lid raiser (wide eyes, part of the "glare")
891
+ eyeWideLeft: 0.4,
892
+ eyeWideRight: 0.4,
893
+ // AU7 - Lid tightener (tense stare, combines with AU5 for angry glare)
894
+ eyeSquintLeft: 0.3,
895
+ eyeSquintRight: 0.3
896
+ },
897
+ sad: {
898
+ // AU1 - Inner brow raiser (primary sadness marker)
899
+ browInnerUp: 0.6,
900
+ // AU4 - Brow lowerer (brows drawn together)
901
+ browDownLeft: 0.3,
902
+ browDownRight: 0.3
903
+ },
904
+ neutral: {}
905
+ // All zeros - no expression overlay
906
+ };
907
+ var DEFAULT_CONFIG = {
908
+ smoothingFactor: 0.15,
909
+ confidenceThreshold: 0.3,
910
+ intensity: 1,
911
+ blendMode: "dominant",
912
+ minBlendProbability: 0.1,
913
+ energyModulation: false,
914
+ minEnergyScale: 0.3,
915
+ maxEnergyScale: 1
916
+ };
917
+ function createZeroBlendshapes() {
918
+ const result = {};
919
+ for (const name of UPPER_FACE_BLENDSHAPES) {
920
+ result[name] = 0;
921
+ }
922
+ return result;
923
+ }
924
+ function clamp01(value) {
925
+ return Math.max(0, Math.min(1, value));
926
+ }
927
+ var EmotionToBlendshapeMapper = class {
928
+ /**
929
+ * Create a new EmotionToBlendshapeMapper
930
+ *
931
+ * @param config - Optional configuration
932
+ */
933
+ constructor(config) {
934
+ this.currentEnergy = 1;
935
+ this.config = {
936
+ ...DEFAULT_CONFIG,
937
+ ...config
938
+ };
939
+ this.targetBlendshapes = createZeroBlendshapes();
940
+ this.currentBlendshapes = createZeroBlendshapes();
941
+ }
942
+ /**
943
+ * Map an emotion frame to target blendshapes
944
+ *
945
+ * This sets the target values that the mapper will smoothly interpolate
946
+ * towards. Call update() each frame to apply smoothing.
947
+ *
948
+ * @param frame - Emotion frame from Emotion2VecInference
949
+ * @param audioEnergy - Optional audio energy (0-1) for energy modulation
950
+ * @returns Target upper face blendshapes (before smoothing)
951
+ */
952
+ mapFrame(frame, audioEnergy) {
953
+ this.targetBlendshapes = createZeroBlendshapes();
954
+ if (audioEnergy !== void 0) {
955
+ this.currentEnergy = clamp01(audioEnergy);
956
+ }
957
+ if (!frame) {
958
+ return { ...this.targetBlendshapes };
959
+ }
960
+ if (this.config.blendMode === "weighted") {
961
+ this.mapFrameWeighted(frame);
962
+ } else {
963
+ this.mapFrameDominant(frame);
964
+ }
965
+ if (this.config.energyModulation) {
966
+ this.applyEnergyModulation();
967
+ }
968
+ return { ...this.targetBlendshapes };
969
+ }
970
+ /**
971
+ * Map using dominant emotion only (original behavior)
972
+ */
973
+ mapFrameDominant(frame) {
974
+ if (frame.confidence < this.config.confidenceThreshold) {
975
+ return;
976
+ }
977
+ const emotion = frame.emotion;
978
+ const mapping = EMOTION_ARKIT_MAP[emotion];
979
+ if (!mapping) {
980
+ return;
981
+ }
982
+ const scale = this.config.intensity * frame.confidence;
983
+ for (const [name, value] of Object.entries(mapping)) {
984
+ const blendshapeName = name;
985
+ if (value !== void 0) {
986
+ this.targetBlendshapes[blendshapeName] = clamp01(value * scale);
987
+ }
988
+ }
989
+ }
990
+ /**
991
+ * Map using weighted blend of all emotions by probability
992
+ * Creates more nuanced expressions (e.g., bittersweet = happy + sad)
993
+ */
994
+ mapFrameWeighted(frame) {
995
+ if (!frame.probabilities) {
996
+ this.mapFrameDominant(frame);
997
+ return;
998
+ }
999
+ for (const [emotion, probability] of Object.entries(frame.probabilities)) {
1000
+ if (probability < this.config.minBlendProbability) {
1001
+ continue;
1002
+ }
1003
+ const mapping = EMOTION_ARKIT_MAP[emotion];
1004
+ if (!mapping) {
1005
+ continue;
1006
+ }
1007
+ const scale = this.config.intensity * probability;
1008
+ for (const [name, value] of Object.entries(mapping)) {
1009
+ const blendshapeName = name;
1010
+ if (value !== void 0) {
1011
+ this.targetBlendshapes[blendshapeName] += value * scale;
1012
+ }
1013
+ }
1014
+ }
1015
+ for (const name of UPPER_FACE_BLENDSHAPES) {
1016
+ this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name]);
1017
+ }
1018
+ }
1019
+ /**
1020
+ * Apply energy modulation to scale emotion intensity by audio energy
1021
+ * Louder speech = stronger expressions
1022
+ */
1023
+ applyEnergyModulation() {
1024
+ const { minEnergyScale, maxEnergyScale } = this.config;
1025
+ const energyScale = minEnergyScale + this.currentEnergy * (maxEnergyScale - minEnergyScale);
1026
+ for (const name of UPPER_FACE_BLENDSHAPES) {
1027
+ this.targetBlendshapes[name] = clamp01(this.targetBlendshapes[name] * energyScale);
1028
+ }
1029
+ }
1030
+ /**
1031
+ * Apply smoothing to interpolate current values towards target
1032
+ *
1033
+ * Uses exponential moving average:
1034
+ * current = current + smoothingFactor * (target - current)
1035
+ *
1036
+ * @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
1037
+ */
1038
+ update(_deltaMs) {
1039
+ const factor = this.config.smoothingFactor;
1040
+ for (const name of UPPER_FACE_BLENDSHAPES) {
1041
+ const target = this.targetBlendshapes[name];
1042
+ const current = this.currentBlendshapes[name];
1043
+ this.currentBlendshapes[name] = clamp01(current + factor * (target - current));
1044
+ }
1045
+ }
1046
+ /**
1047
+ * Get current smoothed blendshape values
1048
+ *
1049
+ * @returns Current upper face blendshapes (after smoothing)
1050
+ */
1051
+ getCurrentBlendshapes() {
1052
+ return { ...this.currentBlendshapes };
1053
+ }
1054
+ /**
1055
+ * Reset mapper to neutral state
1056
+ *
1057
+ * Sets both target and current blendshapes to zero.
1058
+ */
1059
+ reset() {
1060
+ this.targetBlendshapes = createZeroBlendshapes();
1061
+ this.currentBlendshapes = createZeroBlendshapes();
1062
+ this.currentEnergy = 1;
1063
+ }
1064
+ /**
1065
+ * Get current configuration
1066
+ */
1067
+ getConfig() {
1068
+ return { ...this.config };
1069
+ }
1070
+ /**
1071
+ * Update configuration
1072
+ *
1073
+ * @param config - Partial configuration to update
1074
+ */
1075
+ setConfig(config) {
1076
+ this.config = {
1077
+ ...this.config,
1078
+ ...config
1079
+ };
1080
+ }
1081
+ };
1082
+
1083
+ // src/animation/audioEnergy.ts
1084
+ function calculateRMS(samples) {
1085
+ if (samples.length === 0) return 0;
1086
+ let sumSquares = 0;
1087
+ for (let i = 0; i < samples.length; i++) {
1088
+ sumSquares += samples[i] * samples[i];
1089
+ }
1090
+ return Math.sqrt(sumSquares / samples.length);
1091
+ }
1092
+ function calculatePeak(samples) {
1093
+ let peak = 0;
1094
+ for (let i = 0; i < samples.length; i++) {
1095
+ const abs = Math.abs(samples[i]);
1096
+ if (abs > peak) peak = abs;
1097
+ }
1098
+ return peak;
1099
+ }
1100
+ var AudioEnergyAnalyzer = class {
1101
+ /**
1102
+ * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
1103
+ * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
1104
+ */
1105
+ constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
1106
+ this.smoothedRMS = 0;
1107
+ this.smoothedPeak = 0;
1108
+ this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
1109
+ this.noiseFloor = noiseFloor;
1110
+ }
1111
+ /**
1112
+ * Process audio samples and return smoothed energy values
1113
+ * @param samples Audio samples (Float32Array)
1114
+ * @returns Object with rms and peak values
1115
+ */
1116
+ process(samples) {
1117
+ const instantRMS = calculateRMS(samples);
1118
+ const instantPeak = calculatePeak(samples);
1119
+ const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
1120
+ const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
1121
+ if (gatedRMS > this.smoothedRMS) {
1122
+ this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
1123
+ } else {
1124
+ this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
1125
+ }
1126
+ if (gatedPeak > this.smoothedPeak) {
1127
+ this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
1128
+ } else {
1129
+ this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
1130
+ }
1131
+ const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
1132
+ return {
1133
+ rms: this.smoothedRMS,
1134
+ peak: this.smoothedPeak,
1135
+ energy: Math.min(1, energy * 2)
1136
+ // Scale up and clamp
1137
+ };
1138
+ }
1139
+ /**
1140
+ * Reset analyzer state
1141
+ */
1142
+ reset() {
1143
+ this.smoothedRMS = 0;
1144
+ this.smoothedPeak = 0;
1145
+ }
1146
+ /**
1147
+ * Get current smoothed RMS value
1148
+ */
1149
+ get rms() {
1150
+ return this.smoothedRMS;
1151
+ }
1152
+ /**
1153
+ * Get current smoothed peak value
1154
+ */
1155
+ get peak() {
1156
+ return this.smoothedPeak;
1157
+ }
1158
+ };
1159
+ var EmphasisDetector = class {
1160
+ /**
1161
+ * @param historySize Number of frames to track. Default 10
1162
+ * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
1163
+ */
1164
+ constructor(historySize = 10, emphasisThreshold = 0.15) {
1165
+ this.energyHistory = [];
1166
+ this.historySize = historySize;
1167
+ this.emphasisThreshold = emphasisThreshold;
1168
+ }
1169
+ /**
1170
+ * Process energy value and detect emphasis
1171
+ * @param energy Current energy value (0-1)
1172
+ * @returns Object with isEmphasis flag and emphasisStrength
1173
+ */
1174
+ process(energy) {
1175
+ this.energyHistory.push(energy);
1176
+ if (this.energyHistory.length > this.historySize) {
1177
+ this.energyHistory.shift();
1178
+ }
1179
+ if (this.energyHistory.length < 3) {
1180
+ return { isEmphasis: false, emphasisStrength: 0 };
1181
+ }
1182
+ const prevFrames = this.energyHistory.slice(0, -1);
1183
+ const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
1184
+ const increase = energy - avgPrev;
1185
+ const isEmphasis = increase > this.emphasisThreshold;
1186
+ return {
1187
+ isEmphasis,
1188
+ emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
1189
+ };
1190
+ }
1191
+ /**
1192
+ * Reset detector state
1193
+ */
1194
+ reset() {
1195
+ this.energyHistory = [];
1196
+ }
1197
+ };
877
1198
 
878
1199
  // src/telemetry/exporters/console.ts
879
1200
  var ConsoleExporter = class {
@@ -2511,7 +2832,7 @@ var CTC_VOCAB = [
2511
2832
  "Q",
2512
2833
  "Z"
2513
2834
  ];
2514
- var Wav2Vec2Inference = class {
2835
+ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2515
2836
  constructor(config) {
2516
2837
  this.modelId = "wav2vec2";
2517
2838
  this.session = null;
@@ -2520,6 +2841,10 @@ var Wav2Vec2Inference = class {
2520
2841
  this.isLoading = false;
2521
2842
  // Inference queue for handling concurrent calls
2522
2843
  this.inferenceQueue = Promise.resolve();
2844
+ // Session health: set to true if session.run() times out.
2845
+ // A timed-out session may have a zombie GPU/WASM dispatch still running,
2846
+ // so all future infer() calls reject immediately to prevent concurrent access.
2847
+ this.poisoned = false;
2523
2848
  this.config = config;
2524
2849
  this.numIdentityClasses = config.numIdentityClasses ?? 12;
2525
2850
  }
@@ -2529,6 +2854,10 @@ var Wav2Vec2Inference = class {
2529
2854
  get isLoaded() {
2530
2855
  return this.session !== null;
2531
2856
  }
2857
+ /** True if inference timed out and the session is permanently unusable */
2858
+ get isSessionPoisoned() {
2859
+ return this.poisoned;
2860
+ }
2532
2861
  /**
2533
2862
  * Load the ONNX model
2534
2863
  */
@@ -2678,12 +3007,23 @@ var Wav2Vec2Inference = class {
2678
3007
  logger2.debug("Running warmup inference to initialize GPU context");
2679
3008
  const warmupStart = performance.now();
2680
3009
  const silentAudio = new Float32Array(16e3);
2681
- await this.infer(silentAudio, 0);
3010
+ const WARMUP_TIMEOUT_MS = 15e3;
3011
+ const warmupResult = await Promise.race([
3012
+ this.infer(silentAudio, 0).then(() => "ok"),
3013
+ new Promise((r) => setTimeout(() => r("timeout"), WARMUP_TIMEOUT_MS))
3014
+ ]);
2682
3015
  const warmupTimeMs = performance.now() - warmupStart;
2683
- logger2.info("Warmup inference complete", {
2684
- warmupTimeMs: Math.round(warmupTimeMs),
2685
- backend: this._backend
2686
- });
3016
+ if (warmupResult === "timeout") {
3017
+ logger2.warn("Warmup inference timed out \u2014 GPU may be unresponsive. Continuing without warmup.", {
3018
+ timeoutMs: WARMUP_TIMEOUT_MS,
3019
+ backend: this._backend
3020
+ });
3021
+ } else {
3022
+ logger2.info("Warmup inference complete", {
3023
+ warmupTimeMs: Math.round(warmupTimeMs),
3024
+ backend: this._backend
3025
+ });
3026
+ }
2687
3027
  telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
2688
3028
  model: "wav2vec2",
2689
3029
  backend: this._backend
@@ -2717,6 +3057,9 @@ var Wav2Vec2Inference = class {
2717
3057
  if (!this.session) {
2718
3058
  throw new Error("Model not loaded. Call load() first.");
2719
3059
  }
3060
+ if (this.poisoned) {
3061
+ throw new Error("Wav2Vec2 session timed out \u2014 inference unavailable until page reload");
3062
+ }
2720
3063
  const audioSamplesCopy = new Float32Array(audioSamples);
2721
3064
  let audio;
2722
3065
  if (audioSamplesCopy.length === 16e3) {
@@ -2772,7 +3115,15 @@ var Wav2Vec2Inference = class {
2772
3115
  });
2773
3116
  try {
2774
3117
  const startTime = performance.now();
2775
- const results = await this.session.run(feeds);
3118
+ const results = await Promise.race([
3119
+ this.session.run(feeds),
3120
+ new Promise(
3121
+ (_, rej) => setTimeout(
3122
+ () => rej(new Error(`Wav2Vec2 inference timed out after ${_Wav2Vec2Inference.INFERENCE_TIMEOUT_MS}ms`)),
3123
+ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
3124
+ )
3125
+ )
3126
+ ]);
2776
3127
  const inferenceTimeMs = performance.now() - startTime;
2777
3128
  const asrOutput = results["asr_logits"];
2778
3129
  const blendshapeOutput = results["blendshapes"];
@@ -2826,50 +3177,411 @@ var Wav2Vec2Inference = class {
2826
3177
  inferenceTimeMs
2827
3178
  });
2828
3179
  } catch (err) {
3180
+ const errMsg = err instanceof Error ? err.message : String(err);
3181
+ if (errMsg.includes("timed out")) {
3182
+ this.poisoned = true;
3183
+ logger2.error("CRITICAL: Inference session timed out \u2014 LAM is dead. Page reload required.", {
3184
+ backend: this._backend,
3185
+ timeoutMs: _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS
3186
+ });
3187
+ } else {
3188
+ logger2.error("Inference failed", { error: errMsg, backend: this._backend });
3189
+ }
2829
3190
  span?.endWithError(err instanceof Error ? err : new Error(String(err)));
2830
3191
  telemetry?.incrementCounter("omote.inference.total", 1, {
2831
3192
  model: "wav2vec2",
2832
3193
  backend: this._backend,
2833
3194
  status: "error"
2834
3195
  });
2835
- reject(err);
3196
+ reject(err);
3197
+ }
3198
+ });
3199
+ });
3200
+ }
3201
+ /**
3202
+ * Get blendshape value by name for a specific frame
3203
+ */
3204
+ getBlendshape(blendshapes, name) {
3205
+ const index = LAM_BLENDSHAPES.indexOf(name);
3206
+ if (index === -1) {
3207
+ throw new Error(`Unknown blendshape: ${name}`);
3208
+ }
3209
+ return blendshapes[index];
3210
+ }
3211
+ /**
3212
+ * Dispose of the model and free resources
3213
+ */
3214
+ async dispose() {
3215
+ if (this.session) {
3216
+ await this.session.release();
3217
+ this.session = null;
3218
+ }
3219
+ }
3220
+ };
3221
+ _Wav2Vec2Inference.INFERENCE_TIMEOUT_MS = 5e3;
3222
+ /**
3223
+ * Check if WebGPU is available and working
3224
+ * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
3225
+ */
3226
+ _Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
3227
+ var Wav2Vec2Inference = _Wav2Vec2Inference;
3228
+
3229
+ // src/audio/FullFacePipeline.ts
3230
+ var logger3 = createLogger("FullFacePipeline");
3231
+ function pcm16ToFloat322(buffer) {
3232
+ const byteLen = buffer.byteLength & ~1;
3233
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
3234
+ const float32 = new Float32Array(int16.length);
3235
+ for (let i = 0; i < int16.length; i++) {
3236
+ float32[i] = int16[i] / 32768;
3237
+ }
3238
+ return float32;
3239
+ }
3240
+ var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
3241
+ LAM_BLENDSHAPES.forEach((name, index) => {
3242
+ BLENDSHAPE_INDEX_MAP.set(name, index);
3243
+ });
3244
+ var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
3245
+ var EMOTION_LABEL_MAP = {
3246
+ // Direct labels
3247
+ happy: "happy",
3248
+ sad: "sad",
3249
+ angry: "angry",
3250
+ neutral: "neutral",
3251
+ // Natural language synonyms
3252
+ excited: "happy",
3253
+ joyful: "happy",
3254
+ cheerful: "happy",
3255
+ delighted: "happy",
3256
+ amused: "happy",
3257
+ melancholic: "sad",
3258
+ sorrowful: "sad",
3259
+ disappointed: "sad",
3260
+ frustrated: "angry",
3261
+ irritated: "angry",
3262
+ furious: "angry",
3263
+ annoyed: "angry",
3264
+ // SenseVoice labels
3265
+ fearful: "sad",
3266
+ disgusted: "angry",
3267
+ surprised: "happy"
3268
+ };
3269
+ var _FullFacePipeline = class _FullFacePipeline extends EventEmitter {
3270
+ constructor(options) {
3271
+ super();
3272
+ this.options = options;
3273
+ this.playbackStarted = false;
3274
+ this.monitorInterval = null;
3275
+ this.frameAnimationId = null;
3276
+ // Emotion state
3277
+ this.lastEmotionFrame = null;
3278
+ this.currentAudioEnergy = 0;
3279
+ // Stale frame detection
3280
+ this.lastNewFrameTime = 0;
3281
+ this.lastKnownLamFrame = null;
3282
+ this.staleWarningEmitted = false;
3283
+ const sampleRate = options.sampleRate ?? 16e3;
3284
+ this.emotionBlendFactor = options.emotionBlendFactor ?? 0.8;
3285
+ this.lamBlendFactor = options.lamBlendFactor ?? 0.2;
3286
+ const autoDelay = options.lam.modelId === "wav2arkit_cpu" ? 750 : options.lam.backend === "wasm" ? 350 : 50;
3287
+ const audioDelayMs = options.audioDelayMs ?? autoDelay;
3288
+ this.scheduler = new AudioScheduler({
3289
+ sampleRate,
3290
+ initialLookaheadSec: audioDelayMs / 1e3
3291
+ });
3292
+ this.coalescer = new AudioChunkCoalescer({
3293
+ sampleRate,
3294
+ targetDurationMs: options.chunkTargetMs ?? 200
3295
+ });
3296
+ this.lamPipeline = new LAMPipeline({
3297
+ sampleRate,
3298
+ onError: (error) => {
3299
+ logger3.error("LAM inference error", { message: error.message, stack: error.stack });
3300
+ this.emit("error", error);
3301
+ }
3302
+ });
3303
+ this.emotionMapper = new EmotionToBlendshapeMapper({
3304
+ smoothingFactor: 0.15,
3305
+ confidenceThreshold: 0.3,
3306
+ intensity: 1,
3307
+ energyModulation: true
3308
+ });
3309
+ this.energyAnalyzer = new AudioEnergyAnalyzer();
3310
+ }
3311
+ /**
3312
+ * Initialize the pipeline
3313
+ */
3314
+ async initialize() {
3315
+ await this.scheduler.initialize();
3316
+ }
3317
+ /**
3318
+ * Set emotion label from backend (e.g., LLM response emotion).
3319
+ *
3320
+ * Converts a natural language emotion label into an EmotionFrame
3321
+ * that drives upper face blendshapes for the duration of the utterance.
3322
+ *
3323
+ * Supported labels: happy, excited, joyful, sad, melancholic, angry,
3324
+ * frustrated, neutral, etc.
3325
+ *
3326
+ * @param label - Emotion label string (case-insensitive)
3327
+ */
3328
+ setEmotionLabel(label) {
3329
+ const normalized = label.toLowerCase();
3330
+ const mapped = EMOTION_LABEL_MAP[normalized] ?? "neutral";
3331
+ const probabilities = {
3332
+ neutral: 0.1,
3333
+ happy: 0.1,
3334
+ angry: 0.1,
3335
+ sad: 0.1
3336
+ };
3337
+ probabilities[mapped] = 0.7;
3338
+ const frame = {
3339
+ emotion: mapped,
3340
+ confidence: 0.7,
3341
+ probabilities
3342
+ };
3343
+ this.lastEmotionFrame = frame;
3344
+ logger3.info("Emotion label set", { label, mapped });
3345
+ }
3346
+ /**
3347
+ * Clear any set emotion label.
3348
+ * Falls back to prosody-only upper face animation.
3349
+ */
3350
+ clearEmotionLabel() {
3351
+ this.lastEmotionFrame = null;
3352
+ }
3353
+ /**
3354
+ * Start a new playback session
3355
+ *
3356
+ * Resets all state and prepares for incoming audio chunks.
3357
+ * Audio will be scheduled immediately as chunks arrive (no buffering).
3358
+ */
3359
+ start() {
3360
+ this.stopMonitoring();
3361
+ this.scheduler.reset();
3362
+ this.coalescer.reset();
3363
+ this.lamPipeline.reset();
3364
+ this.playbackStarted = false;
3365
+ this.lastEmotionFrame = null;
3366
+ this.currentAudioEnergy = 0;
3367
+ this.emotionMapper.reset();
3368
+ this.energyAnalyzer.reset();
3369
+ this.lastNewFrameTime = 0;
3370
+ this.lastKnownLamFrame = null;
3371
+ this.staleWarningEmitted = false;
3372
+ this.scheduler.warmup();
3373
+ this.startFrameLoop();
3374
+ this.startMonitoring();
3375
+ }
3376
+ /**
3377
+ * Receive audio chunk from network
3378
+ *
3379
+ * Audio-first design: schedules audio immediately, LAM runs in background.
3380
+ * This prevents LAM inference (50-300ms) from blocking audio scheduling.
3381
+ *
3382
+ * @param chunk - Uint8Array containing Int16 PCM audio
3383
+ */
3384
+ async onAudioChunk(chunk) {
3385
+ const combined = this.coalescer.add(chunk);
3386
+ if (!combined) {
3387
+ return;
3388
+ }
3389
+ const float32 = pcm16ToFloat322(combined);
3390
+ const scheduleTime = await this.scheduler.schedule(float32);
3391
+ if (!this.playbackStarted) {
3392
+ this.playbackStarted = true;
3393
+ this.emit("playback_start", scheduleTime);
3394
+ }
3395
+ const { energy } = this.energyAnalyzer.process(float32);
3396
+ this.currentAudioEnergy = energy;
3397
+ this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
3398
+ this.emit("error", err);
3399
+ });
3400
+ }
3401
+ /**
3402
+ * Get emotion frame for current animation.
3403
+ *
3404
+ * Priority:
3405
+ * 1. Explicit emotion label from setEmotionLabel()
3406
+ * 2. Prosody fallback: subtle brow movement from audio energy
3407
+ */
3408
+ getEmotionFrame() {
3409
+ if (this.lastEmotionFrame) {
3410
+ return { frame: this.lastEmotionFrame, energy: this.currentAudioEnergy };
3411
+ }
3412
+ return { frame: null, energy: this.currentAudioEnergy };
3413
+ }
3414
+ /**
3415
+ * Merge LAM blendshapes with emotion upper face blendshapes
3416
+ */
3417
+ mergeBlendshapes(lamFrame, emotionFrame, audioEnergy) {
3418
+ const merged = new Float32Array(52);
3419
+ let emotionBlendshapes;
3420
+ if (emotionFrame) {
3421
+ this.emotionMapper.mapFrame(emotionFrame, audioEnergy);
3422
+ this.emotionMapper.update(33);
3423
+ emotionBlendshapes = this.emotionMapper.getCurrentBlendshapes();
3424
+ } else {
3425
+ emotionBlendshapes = {};
3426
+ for (const name of UPPER_FACE_BLENDSHAPES) {
3427
+ emotionBlendshapes[name] = 0;
3428
+ }
3429
+ }
3430
+ for (let i = 0; i < 52; i++) {
3431
+ const name = LAM_BLENDSHAPES[i];
3432
+ if (UPPER_FACE_SET.has(name)) {
3433
+ const emotionValue = emotionBlendshapes[name] ?? 0;
3434
+ const lamValue = lamFrame[i];
3435
+ merged[i] = emotionValue * this.emotionBlendFactor + lamValue * this.lamBlendFactor;
3436
+ } else {
3437
+ merged[i] = lamFrame[i];
3438
+ }
3439
+ }
3440
+ return { merged, emotionBlendshapes };
3441
+ }
3442
+ /**
3443
+ * Start frame animation loop
3444
+ */
3445
+ startFrameLoop() {
3446
+ const updateFrame = () => {
3447
+ const currentTime = this.scheduler.getCurrentTime();
3448
+ const lamFrame = this.lamPipeline.getFrameForTime(currentTime, this.options.lam);
3449
+ if (lamFrame) {
3450
+ if (lamFrame !== this.lastKnownLamFrame) {
3451
+ this.lastNewFrameTime = performance.now();
3452
+ this.lastKnownLamFrame = lamFrame;
3453
+ this.staleWarningEmitted = false;
3454
+ }
3455
+ const { frame: emotionFrame, energy } = this.getEmotionFrame();
3456
+ const { merged, emotionBlendshapes } = this.mergeBlendshapes(lamFrame, emotionFrame, energy);
3457
+ const fullFrame = {
3458
+ blendshapes: merged,
3459
+ lamBlendshapes: lamFrame,
3460
+ emotionBlendshapes,
3461
+ emotion: emotionFrame,
3462
+ timestamp: currentTime
3463
+ };
3464
+ this.emit("full_frame_ready", fullFrame);
3465
+ this.emit("lam_frame_ready", lamFrame);
3466
+ if (emotionFrame) {
3467
+ this.emit("emotion_frame_ready", emotionFrame);
3468
+ }
3469
+ } else if (this.playbackStarted && !this.lastKnownLamFrame) {
3470
+ const { frame: emotionFrame, energy } = this.getEmotionFrame();
3471
+ if (emotionFrame && energy > 0.05) {
3472
+ const startupFrame = new Float32Array(52);
3473
+ const { merged, emotionBlendshapes } = this.mergeBlendshapes(startupFrame, emotionFrame, energy);
3474
+ this.emit("full_frame_ready", {
3475
+ blendshapes: merged,
3476
+ lamBlendshapes: startupFrame,
3477
+ emotionBlendshapes,
3478
+ emotion: emotionFrame,
3479
+ timestamp: currentTime
3480
+ });
2836
3481
  }
2837
- });
2838
- });
3482
+ }
3483
+ if (this.playbackStarted && this.lastNewFrameTime > 0 && !this.staleWarningEmitted && performance.now() - this.lastNewFrameTime > _FullFacePipeline.STALE_FRAME_THRESHOLD_MS) {
3484
+ this.staleWarningEmitted = true;
3485
+ logger3.warn("LAM appears stalled \u2014 no new frames for 3+ seconds during playback", {
3486
+ staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3487
+ queuedFrames: this.lamPipeline.queuedFrameCount
3488
+ });
3489
+ }
3490
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
3491
+ };
3492
+ this.frameAnimationId = requestAnimationFrame(updateFrame);
2839
3493
  }
2840
3494
  /**
2841
- * Get blendshape value by name for a specific frame
3495
+ * End of audio stream
2842
3496
  */
2843
- getBlendshape(blendshapes, name) {
2844
- const index = LAM_BLENDSHAPES.indexOf(name);
2845
- if (index === -1) {
2846
- throw new Error(`Unknown blendshape: ${name}`);
3497
+ async end() {
3498
+ const remaining = this.coalescer.flush();
3499
+ if (remaining) {
3500
+ const chunk = new Uint8Array(remaining);
3501
+ await this.onAudioChunk(chunk);
2847
3502
  }
2848
- return blendshapes[index];
3503
+ await this.lamPipeline.flush(this.options.lam);
2849
3504
  }
2850
3505
  /**
2851
- * Dispose of the model and free resources
3506
+ * Stop playback immediately with smooth fade-out
2852
3507
  */
2853
- async dispose() {
2854
- if (this.session) {
2855
- await this.session.release();
2856
- this.session = null;
3508
+ async stop(fadeOutMs = 50) {
3509
+ this.stopMonitoring();
3510
+ await this.scheduler.cancelAll(fadeOutMs);
3511
+ this.coalescer.reset();
3512
+ this.lamPipeline.reset();
3513
+ this.playbackStarted = false;
3514
+ this.lastEmotionFrame = null;
3515
+ this.currentAudioEnergy = 0;
3516
+ this.emotionMapper.reset();
3517
+ this.energyAnalyzer.reset();
3518
+ this.lastNewFrameTime = 0;
3519
+ this.lastKnownLamFrame = null;
3520
+ this.staleWarningEmitted = false;
3521
+ this.emit("playback_complete", void 0);
3522
+ }
3523
+ /**
3524
+ * Start monitoring for playback completion
3525
+ */
3526
+ startMonitoring() {
3527
+ if (this.monitorInterval) {
3528
+ clearInterval(this.monitorInterval);
3529
+ }
3530
+ this.monitorInterval = setInterval(() => {
3531
+ if (this.scheduler.isComplete() && this.lamPipeline.queuedFrameCount === 0) {
3532
+ this.emit("playback_complete", void 0);
3533
+ this.stopMonitoring();
3534
+ }
3535
+ }, 100);
3536
+ }
3537
+ /**
3538
+ * Stop monitoring
3539
+ */
3540
+ stopMonitoring() {
3541
+ if (this.monitorInterval) {
3542
+ clearInterval(this.monitorInterval);
3543
+ this.monitorInterval = null;
3544
+ }
3545
+ if (this.frameAnimationId) {
3546
+ cancelAnimationFrame(this.frameAnimationId);
3547
+ this.frameAnimationId = null;
2857
3548
  }
2858
3549
  }
3550
+ /**
3551
+ * Get current pipeline state (for debugging/monitoring)
3552
+ */
3553
+ getState() {
3554
+ return {
3555
+ playbackStarted: this.playbackStarted,
3556
+ coalescerFill: this.coalescer.fillLevel,
3557
+ lamFill: this.lamPipeline.fillLevel,
3558
+ queuedLAMFrames: this.lamPipeline.queuedFrameCount,
3559
+ emotionLabel: this.lastEmotionFrame?.emotion ?? null,
3560
+ currentAudioEnergy: this.currentAudioEnergy,
3561
+ currentTime: this.scheduler.getCurrentTime(),
3562
+ playbackEndTime: this.scheduler.getPlaybackEndTime()
3563
+ };
3564
+ }
3565
+ /**
3566
+ * Check if an explicit emotion label is currently set
3567
+ */
3568
+ get hasEmotionLabel() {
3569
+ return this.lastEmotionFrame !== null;
3570
+ }
3571
+ /**
3572
+ * Cleanup resources
3573
+ */
3574
+ dispose() {
3575
+ this.stopMonitoring();
3576
+ this.scheduler.dispose();
3577
+ this.coalescer.reset();
3578
+ this.lamPipeline.reset();
3579
+ this.lastEmotionFrame = null;
3580
+ this.currentAudioEnergy = 0;
3581
+ }
2859
3582
  };
2860
- /**
2861
- * Check if WebGPU is available and working
2862
- * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
2863
- */
2864
- Wav2Vec2Inference.isWebGPUAvailable = isWebGPUAvailable;
2865
-
2866
- // src/audio/FullFacePipeline.ts
2867
- var logger3 = createLogger("FullFacePipeline");
2868
- var BLENDSHAPE_INDEX_MAP = /* @__PURE__ */ new Map();
2869
- LAM_BLENDSHAPES.forEach((name, index) => {
2870
- BLENDSHAPE_INDEX_MAP.set(name, index);
2871
- });
2872
- var UPPER_FACE_SET = new Set(UPPER_FACE_BLENDSHAPES);
3583
+ _FullFacePipeline.STALE_FRAME_THRESHOLD_MS = 3e3;
3584
+ var FullFacePipeline = _FullFacePipeline;
2873
3585
 
2874
3586
  // src/inference/kaldiFbank.ts
2875
3587
  function fft(re, im) {
@@ -3739,20 +4451,21 @@ var LipSyncWithFallback = class {
3739
4451
  try {
3740
4452
  return await this.implementation.load();
3741
4453
  } catch (error) {
3742
- logger6.warn("GPU model load failed, falling back to CPU model", {
3743
- error: error instanceof Error ? error.message : String(error)
3744
- });
3745
- try {
3746
- await this.implementation.dispose();
3747
- } catch {
3748
- }
3749
- this.implementation = new Wav2ArkitCpuInference({
3750
- modelUrl: this.config.cpuModelUrl
3751
- });
3752
- this.hasFallenBack = true;
3753
- logger6.info("Fallback to Wav2ArkitCpuInference successful");
3754
- return await this.implementation.load();
4454
+ return this.fallbackToCpu(error instanceof Error ? error.message : String(error));
4455
+ }
4456
+ }
4457
+ async fallbackToCpu(reason) {
4458
+ logger6.warn("GPU model load failed, falling back to CPU model", { reason });
4459
+ try {
4460
+ await this.implementation.dispose();
4461
+ } catch {
3755
4462
  }
4463
+ this.implementation = new Wav2ArkitCpuInference({
4464
+ modelUrl: this.config.cpuModelUrl
4465
+ });
4466
+ this.hasFallenBack = true;
4467
+ logger6.info("Fallback to Wav2ArkitCpuInference successful");
4468
+ return await this.implementation.load();
3756
4469
  }
3757
4470
  async infer(audioSamples, identityIndex) {
3758
4471
  return this.implementation.infer(audioSamples, identityIndex);
@@ -6893,121 +7606,639 @@ var AnimationGraph = class extends EventEmitter {
6893
7606
  }
6894
7607
  };
6895
7608
 
6896
- // src/animation/audioEnergy.ts
6897
- function calculateRMS(samples) {
6898
- if (samples.length === 0) return 0;
6899
- let sumSquares = 0;
6900
- for (let i = 0; i < samples.length; i++) {
6901
- sumSquares += samples[i] * samples[i];
6902
- }
6903
- return Math.sqrt(sumSquares / samples.length);
7609
+ // src/animation/simplex2d.ts
7610
+ var perm = new Uint8Array(512);
7611
+ var grad2 = [
7612
+ [1, 1],
7613
+ [-1, 1],
7614
+ [1, -1],
7615
+ [-1, -1],
7616
+ [1, 0],
7617
+ [-1, 0],
7618
+ [0, 1],
7619
+ [0, -1]
7620
+ ];
7621
+ var p = [
7622
+ 151,
7623
+ 160,
7624
+ 137,
7625
+ 91,
7626
+ 90,
7627
+ 15,
7628
+ 131,
7629
+ 13,
7630
+ 201,
7631
+ 95,
7632
+ 96,
7633
+ 53,
7634
+ 194,
7635
+ 233,
7636
+ 7,
7637
+ 225,
7638
+ 140,
7639
+ 36,
7640
+ 103,
7641
+ 30,
7642
+ 69,
7643
+ 142,
7644
+ 8,
7645
+ 99,
7646
+ 37,
7647
+ 240,
7648
+ 21,
7649
+ 10,
7650
+ 23,
7651
+ 190,
7652
+ 6,
7653
+ 148,
7654
+ 247,
7655
+ 120,
7656
+ 234,
7657
+ 75,
7658
+ 0,
7659
+ 26,
7660
+ 197,
7661
+ 62,
7662
+ 94,
7663
+ 252,
7664
+ 219,
7665
+ 203,
7666
+ 117,
7667
+ 35,
7668
+ 11,
7669
+ 32,
7670
+ 57,
7671
+ 177,
7672
+ 33,
7673
+ 88,
7674
+ 237,
7675
+ 149,
7676
+ 56,
7677
+ 87,
7678
+ 174,
7679
+ 20,
7680
+ 125,
7681
+ 136,
7682
+ 171,
7683
+ 168,
7684
+ 68,
7685
+ 175,
7686
+ 74,
7687
+ 165,
7688
+ 71,
7689
+ 134,
7690
+ 139,
7691
+ 48,
7692
+ 27,
7693
+ 166,
7694
+ 77,
7695
+ 146,
7696
+ 158,
7697
+ 231,
7698
+ 83,
7699
+ 111,
7700
+ 229,
7701
+ 122,
7702
+ 60,
7703
+ 211,
7704
+ 133,
7705
+ 230,
7706
+ 220,
7707
+ 105,
7708
+ 92,
7709
+ 41,
7710
+ 55,
7711
+ 46,
7712
+ 245,
7713
+ 40,
7714
+ 244,
7715
+ 102,
7716
+ 143,
7717
+ 54,
7718
+ 65,
7719
+ 25,
7720
+ 63,
7721
+ 161,
7722
+ 1,
7723
+ 216,
7724
+ 80,
7725
+ 73,
7726
+ 209,
7727
+ 76,
7728
+ 132,
7729
+ 187,
7730
+ 208,
7731
+ 89,
7732
+ 18,
7733
+ 169,
7734
+ 200,
7735
+ 196,
7736
+ 135,
7737
+ 130,
7738
+ 116,
7739
+ 188,
7740
+ 159,
7741
+ 86,
7742
+ 164,
7743
+ 100,
7744
+ 109,
7745
+ 198,
7746
+ 173,
7747
+ 186,
7748
+ 3,
7749
+ 64,
7750
+ 52,
7751
+ 217,
7752
+ 226,
7753
+ 250,
7754
+ 124,
7755
+ 123,
7756
+ 5,
7757
+ 202,
7758
+ 38,
7759
+ 147,
7760
+ 118,
7761
+ 126,
7762
+ 255,
7763
+ 82,
7764
+ 85,
7765
+ 212,
7766
+ 207,
7767
+ 206,
7768
+ 59,
7769
+ 227,
7770
+ 47,
7771
+ 16,
7772
+ 58,
7773
+ 17,
7774
+ 182,
7775
+ 189,
7776
+ 28,
7777
+ 42,
7778
+ 223,
7779
+ 183,
7780
+ 170,
7781
+ 213,
7782
+ 119,
7783
+ 248,
7784
+ 152,
7785
+ 2,
7786
+ 44,
7787
+ 154,
7788
+ 163,
7789
+ 70,
7790
+ 221,
7791
+ 153,
7792
+ 101,
7793
+ 155,
7794
+ 167,
7795
+ 43,
7796
+ 172,
7797
+ 9,
7798
+ 129,
7799
+ 22,
7800
+ 39,
7801
+ 253,
7802
+ 19,
7803
+ 98,
7804
+ 108,
7805
+ 110,
7806
+ 79,
7807
+ 113,
7808
+ 224,
7809
+ 232,
7810
+ 178,
7811
+ 185,
7812
+ 112,
7813
+ 104,
7814
+ 218,
7815
+ 246,
7816
+ 97,
7817
+ 228,
7818
+ 251,
7819
+ 34,
7820
+ 242,
7821
+ 193,
7822
+ 238,
7823
+ 210,
7824
+ 144,
7825
+ 12,
7826
+ 191,
7827
+ 179,
7828
+ 162,
7829
+ 241,
7830
+ 81,
7831
+ 51,
7832
+ 145,
7833
+ 235,
7834
+ 249,
7835
+ 14,
7836
+ 239,
7837
+ 107,
7838
+ 49,
7839
+ 192,
7840
+ 214,
7841
+ 31,
7842
+ 181,
7843
+ 199,
7844
+ 106,
7845
+ 157,
7846
+ 184,
7847
+ 84,
7848
+ 204,
7849
+ 176,
7850
+ 115,
7851
+ 121,
7852
+ 50,
7853
+ 45,
7854
+ 127,
7855
+ 4,
7856
+ 150,
7857
+ 254,
7858
+ 138,
7859
+ 236,
7860
+ 205,
7861
+ 93,
7862
+ 222,
7863
+ 114,
7864
+ 67,
7865
+ 29,
7866
+ 24,
7867
+ 72,
7868
+ 243,
7869
+ 141,
7870
+ 128,
7871
+ 195,
7872
+ 78,
7873
+ 66,
7874
+ 215,
7875
+ 61,
7876
+ 156,
7877
+ 180
7878
+ ];
7879
+ for (let i = 0; i < 256; i++) {
7880
+ perm[i] = p[i];
7881
+ perm[i + 256] = p[i];
6904
7882
  }
6905
- function calculatePeak(samples) {
6906
- let peak = 0;
6907
- for (let i = 0; i < samples.length; i++) {
6908
- const abs = Math.abs(samples[i]);
6909
- if (abs > peak) peak = abs;
6910
- }
6911
- return peak;
7883
+ var F2 = 0.5 * (Math.sqrt(3) - 1);
7884
+ var G2 = (3 - Math.sqrt(3)) / 6;
7885
+ function dot2(g, x, y) {
7886
+ return g[0] * x + g[1] * y;
6912
7887
  }
6913
- var AudioEnergyAnalyzer = class {
6914
- /**
6915
- * @param smoothingFactor How much to smooth (0 = no smoothing, 1 = infinite smoothing). Default 0.85
6916
- * @param noiseFloor Minimum energy threshold to consider as signal. Default 0.01
6917
- */
6918
- constructor(smoothingFactor = 0.85, noiseFloor = 0.01) {
6919
- this.smoothedRMS = 0;
6920
- this.smoothedPeak = 0;
6921
- this.smoothingFactor = Math.max(0, Math.min(0.99, smoothingFactor));
6922
- this.noiseFloor = noiseFloor;
6923
- }
6924
- /**
6925
- * Process audio samples and return smoothed energy values
6926
- * @param samples Audio samples (Float32Array)
6927
- * @returns Object with rms and peak values
6928
- */
6929
- process(samples) {
6930
- const instantRMS = calculateRMS(samples);
6931
- const instantPeak = calculatePeak(samples);
6932
- const gatedRMS = instantRMS > this.noiseFloor ? instantRMS : 0;
6933
- const gatedPeak = instantPeak > this.noiseFloor ? instantPeak : 0;
6934
- if (gatedRMS > this.smoothedRMS) {
6935
- this.smoothedRMS = this.smoothedRMS * 0.5 + gatedRMS * 0.5;
6936
- } else {
6937
- this.smoothedRMS = this.smoothedRMS * this.smoothingFactor + gatedRMS * (1 - this.smoothingFactor);
6938
- }
6939
- if (gatedPeak > this.smoothedPeak) {
6940
- this.smoothedPeak = this.smoothedPeak * 0.3 + gatedPeak * 0.7;
6941
- } else {
6942
- this.smoothedPeak = this.smoothedPeak * this.smoothingFactor + gatedPeak * (1 - this.smoothingFactor);
6943
- }
6944
- const energy = this.smoothedRMS * 0.7 + this.smoothedPeak * 0.3;
7888
+ function simplex2d(x, y) {
7889
+ const s = (x + y) * F2;
7890
+ const i = Math.floor(x + s);
7891
+ const j = Math.floor(y + s);
7892
+ const t = (i + j) * G2;
7893
+ const X0 = i - t;
7894
+ const Y0 = j - t;
7895
+ const x0 = x - X0;
7896
+ const y0 = y - Y0;
7897
+ const i1 = x0 > y0 ? 1 : 0;
7898
+ const j1 = x0 > y0 ? 0 : 1;
7899
+ const x1 = x0 - i1 + G2;
7900
+ const y1 = y0 - j1 + G2;
7901
+ const x2 = x0 - 1 + 2 * G2;
7902
+ const y2 = y0 - 1 + 2 * G2;
7903
+ const ii = i & 255;
7904
+ const jj = j & 255;
7905
+ const gi0 = perm[ii + perm[jj]] % 8;
7906
+ const gi1 = perm[ii + i1 + perm[jj + j1]] % 8;
7907
+ const gi2 = perm[ii + 1 + perm[jj + 1]] % 8;
7908
+ let n0 = 0;
7909
+ let t0 = 0.5 - x0 * x0 - y0 * y0;
7910
+ if (t0 >= 0) {
7911
+ t0 *= t0;
7912
+ n0 = t0 * t0 * dot2(grad2[gi0], x0, y0);
7913
+ }
7914
+ let n1 = 0;
7915
+ let t1 = 0.5 - x1 * x1 - y1 * y1;
7916
+ if (t1 >= 0) {
7917
+ t1 *= t1;
7918
+ n1 = t1 * t1 * dot2(grad2[gi1], x1, y1);
7919
+ }
7920
+ let n2 = 0;
7921
+ let t2 = 0.5 - x2 * x2 - y2 * y2;
7922
+ if (t2 >= 0) {
7923
+ t2 *= t2;
7924
+ n2 = t2 * t2 * dot2(grad2[gi2], x2, y2);
7925
+ }
7926
+ return 70 * (n0 + n1 + n2);
7927
+ }
7928
+
7929
+ // src/animation/ProceduralLifeLayer.ts
7930
+ var PHASE_OPEN = 0;
7931
+ var PHASE_CLOSING = 1;
7932
+ var PHASE_CLOSED = 2;
7933
+ var PHASE_OPENING = 3;
7934
+ var BLINK_CLOSE_DURATION = 0.06;
7935
+ var BLINK_HOLD_DURATION = 0.04;
7936
+ var BLINK_OPEN_DURATION = 0.15;
7937
+ var BLINK_ASYMMETRY_DELAY = 8e-3;
7938
+ var GAZE_BREAK_DURATION = 0.12;
7939
+ var GAZE_BREAK_HOLD_DURATION = 0.3;
7940
+ var GAZE_BREAK_RETURN_DURATION = 0.15;
7941
+ var EYE_NOISE_X_FREQ = 0.8;
7942
+ var EYE_NOISE_Y_FREQ = 0.6;
7943
+ var EYE_NOISE_X_PHASE = 73.1;
7944
+ var EYE_NOISE_Y_PHASE = 91.7;
7945
+ var BROW_INNER_UP_FREQ = 0.4;
7946
+ var BROW_OUTER_LEFT_FREQ = 0.35;
7947
+ var BROW_OUTER_RIGHT_FREQ = 0.38;
7948
+ var BROW_DOWN_FREQ = 0.3;
7949
+ var BROW_INNER_UP_PHASE = 0;
7950
+ var BROW_OUTER_LEFT_PHASE = 17.3;
7951
+ var BROW_OUTER_RIGHT_PHASE = 31.7;
7952
+ var BROW_DOWN_LEFT_PHASE = 47.1;
7953
+ var BROW_DOWN_RIGHT_PHASE = 59.3;
7954
+ var EMPHASIS_ENERGY_THRESHOLD = 0.3;
7955
+ var EMPHASIS_DECAY_RATE = 4;
7956
+ function clamp(v, min, max) {
7957
+ return v < min ? min : v > max ? max : v;
7958
+ }
7959
+ function randomRange(min, max) {
7960
+ return min + Math.random() * (max - min);
7961
+ }
7962
+ function smoothStep(t) {
7963
+ return t * t * (3 - 2 * t);
7964
+ }
7965
+ function softClamp(v, max) {
7966
+ return Math.tanh(v / max) * max;
7967
+ }
7968
+ var ProceduralLifeLayer = class {
7969
+ constructor(config) {
7970
+ // Blink state
7971
+ this.blinkTimer = 0;
7972
+ this.blinkPhase = PHASE_OPEN;
7973
+ this.blinkProgress = 0;
7974
+ this.asymmetryRight = 0.97;
7975
+ this.smoothedBlinkLeft = 0;
7976
+ this.smoothedBlinkRight = 0;
7977
+ // Eye contact (smoothed)
7978
+ this.smoothedEyeX = 0;
7979
+ this.smoothedEyeY = 0;
7980
+ // Eye micro-motion (continuous simplex noise, no discrete events)
7981
+ this.eyeNoiseTime = 0;
7982
+ // Gaze break state
7983
+ this.gazeBreakTimer = 0;
7984
+ this.gazeBreakPhase = PHASE_OPEN;
7985
+ this.gazeBreakProgress = 0;
7986
+ this.gazeBreakTargetX = 0;
7987
+ this.gazeBreakTargetY = 0;
7988
+ this.gazeBreakCurrentX = 0;
7989
+ this.gazeBreakCurrentY = 0;
7990
+ // Breathing / postural sway
7991
+ this.microMotionTime = 0;
7992
+ this.breathingPhase = 0;
7993
+ // Brow noise
7994
+ this.noiseTime = 0;
7995
+ this.previousEnergy = 0;
7996
+ this.emphasisLevel = 0;
7997
+ this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
7998
+ this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
7999
+ this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
8000
+ this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
8001
+ this.browNoiseAmplitude = config?.browNoiseAmplitude ?? 0.3;
8002
+ this.browNoiseSpeechMultiplier = config?.browNoiseSpeechMultiplier ?? 2;
8003
+ this.breathingRate = config?.breathingRate ?? 0.25;
8004
+ this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
8005
+ this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
8006
+ this.eyeSmoothing = config?.eyeSmoothing ?? 15;
8007
+ this.blinkInterval = randomRange(...this.blinkIntervalRange);
8008
+ this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
8009
+ }
8010
+ /**
8011
+ * Update the life layer and produce output for this frame.
8012
+ *
8013
+ * @param delta - Time since last frame in seconds
8014
+ * @param input - Per-frame input (eye target, audio energy, speaking state)
8015
+ * @returns Blendshape values and head rotation deltas
8016
+ */
8017
+ update(delta, input) {
8018
+ const eyeTargetX = input?.eyeTargetX ?? 0;
8019
+ const eyeTargetY = input?.eyeTargetY ?? 0;
8020
+ const audioEnergy = input?.audioEnergy ?? 0;
8021
+ const isSpeaking = input?.isSpeaking ?? false;
8022
+ const safeDelta = Math.min(delta, 0.1);
8023
+ const blendshapes = {};
8024
+ this.updateBlinks(delta);
8025
+ const blinkSmoothing = 45;
8026
+ const blinkValues = this.getBlinkValues();
8027
+ this.smoothedBlinkLeft += (blinkValues.left - this.smoothedBlinkLeft) * Math.min(1, safeDelta * blinkSmoothing);
8028
+ this.smoothedBlinkRight += (blinkValues.right - this.smoothedBlinkRight) * Math.min(1, safeDelta * blinkSmoothing);
8029
+ blendshapes["eyeBlinkLeft"] = this.smoothedBlinkLeft;
8030
+ blendshapes["eyeBlinkRight"] = this.smoothedBlinkRight;
8031
+ this.smoothedEyeX += (eyeTargetX - this.smoothedEyeX) * Math.min(1, safeDelta * this.eyeSmoothing);
8032
+ this.smoothedEyeY += (eyeTargetY - this.smoothedEyeY) * Math.min(1, safeDelta * this.eyeSmoothing);
8033
+ this.eyeNoiseTime += delta;
8034
+ const microMotion = this.getEyeMicroMotion();
8035
+ this.updateGazeBreaks(delta);
8036
+ const finalEyeX = this.smoothedEyeX + this.gazeBreakCurrentX + microMotion.x;
8037
+ const finalEyeY = this.smoothedEyeY + this.gazeBreakCurrentY + microMotion.y;
8038
+ const clampedX = softClamp(finalEyeX, this.eyeMaxDeviation);
8039
+ const clampedY = softClamp(finalEyeY, this.eyeMaxDeviation);
8040
+ const deadZone = 0.02;
8041
+ const lookRight = clampedX > deadZone ? clampedX : clampedX > 0 ? clampedX * (clampedX / deadZone) : 0;
8042
+ const lookLeft = clampedX < -deadZone ? -clampedX : clampedX < 0 ? -clampedX * (-clampedX / deadZone) : 0;
8043
+ const lookUp = clampedY > deadZone ? clampedY : clampedY > 0 ? clampedY * (clampedY / deadZone) : 0;
8044
+ const lookDown = clampedY < -deadZone ? -clampedY : clampedY < 0 ? -clampedY * (-clampedY / deadZone) : 0;
8045
+ blendshapes["eyeLookInLeft"] = lookRight;
8046
+ blendshapes["eyeLookOutLeft"] = lookLeft;
8047
+ blendshapes["eyeLookInRight"] = lookLeft;
8048
+ blendshapes["eyeLookOutRight"] = lookRight;
8049
+ blendshapes["eyeLookUpLeft"] = lookUp;
8050
+ blendshapes["eyeLookUpRight"] = lookUp;
8051
+ blendshapes["eyeLookDownLeft"] = lookDown;
8052
+ blendshapes["eyeLookDownRight"] = lookDown;
8053
+ this.updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes);
8054
+ this.microMotionTime += delta;
8055
+ this.breathingPhase += delta * this.breathingRate * Math.PI * 2;
8056
+ const breathingY = Math.sin(this.breathingPhase) * 3e-3;
8057
+ const swayAmp = this.posturalSwayAmplitude;
8058
+ const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
8059
+ const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
6945
8060
  return {
6946
- rms: this.smoothedRMS,
6947
- peak: this.smoothedPeak,
6948
- energy: Math.min(1, energy * 2)
6949
- // Scale up and clamp
8061
+ blendshapes,
8062
+ headDelta: {
8063
+ yaw: swayX,
8064
+ pitch: breathingY + swayY
8065
+ }
6950
8066
  };
6951
8067
  }
6952
8068
  /**
6953
- * Reset analyzer state
8069
+ * Reset all internal state to initial values.
6954
8070
  */
6955
8071
  reset() {
6956
- this.smoothedRMS = 0;
6957
- this.smoothedPeak = 0;
6958
- }
6959
- /**
6960
- * Get current smoothed RMS value
6961
- */
6962
- get rms() {
6963
- return this.smoothedRMS;
6964
- }
6965
- /**
6966
- * Get current smoothed peak value
6967
- */
6968
- get peak() {
6969
- return this.smoothedPeak;
6970
- }
6971
- };
6972
- var EmphasisDetector = class {
6973
- /**
6974
- * @param historySize Number of frames to track. Default 10
6975
- * @param emphasisThreshold Minimum energy increase to count as emphasis. Default 0.15
6976
- */
6977
- constructor(historySize = 10, emphasisThreshold = 0.15) {
6978
- this.energyHistory = [];
6979
- this.historySize = historySize;
6980
- this.emphasisThreshold = emphasisThreshold;
8072
+ this.blinkTimer = 0;
8073
+ this.blinkInterval = randomRange(...this.blinkIntervalRange);
8074
+ this.blinkPhase = PHASE_OPEN;
8075
+ this.blinkProgress = 0;
8076
+ this.asymmetryRight = 0.97;
8077
+ this.smoothedBlinkLeft = 0;
8078
+ this.smoothedBlinkRight = 0;
8079
+ this.smoothedEyeX = 0;
8080
+ this.smoothedEyeY = 0;
8081
+ this.eyeNoiseTime = 0;
8082
+ this.gazeBreakTimer = 0;
8083
+ this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
8084
+ this.gazeBreakPhase = PHASE_OPEN;
8085
+ this.gazeBreakProgress = 0;
8086
+ this.gazeBreakTargetX = 0;
8087
+ this.gazeBreakTargetY = 0;
8088
+ this.gazeBreakCurrentX = 0;
8089
+ this.gazeBreakCurrentY = 0;
8090
+ this.microMotionTime = 0;
8091
+ this.breathingPhase = 0;
8092
+ this.noiseTime = 0;
8093
+ this.previousEnergy = 0;
8094
+ this.emphasisLevel = 0;
8095
+ }
8096
+ // =====================================================================
8097
+ // PRIVATE: Blink system
8098
+ // =====================================================================
8099
+ updateBlinks(delta) {
8100
+ this.blinkTimer += delta;
8101
+ if (this.blinkTimer >= this.blinkInterval && this.blinkPhase === PHASE_OPEN) {
8102
+ this.blinkPhase = PHASE_CLOSING;
8103
+ this.blinkProgress = 0;
8104
+ this.blinkTimer = 0;
8105
+ this.blinkInterval = randomRange(...this.blinkIntervalRange);
8106
+ this.asymmetryRight = 0.95 + Math.random() * 0.08;
8107
+ }
8108
+ if (this.blinkPhase > PHASE_OPEN) {
8109
+ this.blinkProgress += delta;
8110
+ if (this.blinkPhase === PHASE_CLOSING) {
8111
+ if (this.blinkProgress >= BLINK_CLOSE_DURATION) {
8112
+ this.blinkPhase = PHASE_CLOSED;
8113
+ this.blinkProgress = 0;
8114
+ }
8115
+ } else if (this.blinkPhase === PHASE_CLOSED) {
8116
+ if (this.blinkProgress >= BLINK_HOLD_DURATION) {
8117
+ this.blinkPhase = PHASE_OPENING;
8118
+ this.blinkProgress = 0;
8119
+ }
8120
+ } else if (this.blinkPhase === PHASE_OPENING) {
8121
+ if (this.blinkProgress >= BLINK_OPEN_DURATION) {
8122
+ this.blinkPhase = PHASE_OPEN;
8123
+ this.blinkProgress = 0;
8124
+ }
8125
+ }
8126
+ }
6981
8127
  }
6982
- /**
6983
- * Process energy value and detect emphasis
6984
- * @param energy Current energy value (0-1)
6985
- * @returns Object with isEmphasis flag and emphasisStrength
6986
- */
6987
- process(energy) {
6988
- this.energyHistory.push(energy);
6989
- if (this.energyHistory.length > this.historySize) {
6990
- this.energyHistory.shift();
8128
+ getBlinkValues() {
8129
+ if (this.blinkPhase === PHASE_OPEN) {
8130
+ return { left: 0, right: 0 };
6991
8131
  }
6992
- if (this.energyHistory.length < 3) {
6993
- return { isEmphasis: false, emphasisStrength: 0 };
8132
+ if (this.blinkPhase === PHASE_CLOSING) {
8133
+ const t2 = Math.min(1, this.blinkProgress / BLINK_CLOSE_DURATION);
8134
+ const eased2 = t2 * t2 * t2;
8135
+ const tRight = Math.max(0, Math.min(1, (this.blinkProgress - BLINK_ASYMMETRY_DELAY) / BLINK_CLOSE_DURATION));
8136
+ return {
8137
+ left: eased2,
8138
+ right: tRight * tRight * tRight * this.asymmetryRight
8139
+ };
6994
8140
  }
6995
- const prevFrames = this.energyHistory.slice(0, -1);
6996
- const avgPrev = prevFrames.reduce((a, b) => a + b, 0) / prevFrames.length;
6997
- const increase = energy - avgPrev;
6998
- const isEmphasis = increase > this.emphasisThreshold;
8141
+ if (this.blinkPhase === PHASE_CLOSED) {
8142
+ return { left: 1, right: this.asymmetryRight };
8143
+ }
8144
+ const t = Math.min(1, this.blinkProgress / BLINK_OPEN_DURATION);
8145
+ const eased = smoothStep(t);
6999
8146
  return {
7000
- isEmphasis,
7001
- emphasisStrength: isEmphasis ? Math.min(1, increase / 0.3) : 0
8147
+ left: 1 - eased,
8148
+ right: (1 - eased) * this.asymmetryRight
7002
8149
  };
7003
8150
  }
7004
- /**
7005
- * Reset detector state
7006
- */
7007
- reset() {
7008
- this.energyHistory = [];
8151
+ // =====================================================================
8152
+ // PRIVATE: Eye micro-motion (continuous simplex noise)
8153
+ // =====================================================================
8154
+ getEyeMicroMotion() {
8155
+ const amp = this.eyeNoiseAmplitude;
8156
+ const x = simplex2d(this.eyeNoiseTime * EYE_NOISE_X_FREQ, EYE_NOISE_X_PHASE) * amp;
8157
+ const y = simplex2d(this.eyeNoiseTime * EYE_NOISE_Y_FREQ, EYE_NOISE_Y_PHASE) * amp * 0.7;
8158
+ return { x, y };
8159
+ }
8160
+ // =====================================================================
8161
+ // PRIVATE: Gaze breaks
8162
+ // =====================================================================
8163
+ updateGazeBreaks(delta) {
8164
+ this.gazeBreakTimer += delta;
8165
+ if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
8166
+ this.gazeBreakPhase = PHASE_CLOSING;
8167
+ this.gazeBreakProgress = 0;
8168
+ this.gazeBreakTimer = 0;
8169
+ const amp = randomRange(...this.gazeBreakAmplitudeRange);
8170
+ this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
8171
+ this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
8172
+ this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
8173
+ }
8174
+ if (this.gazeBreakPhase > PHASE_OPEN) {
8175
+ this.gazeBreakProgress += delta;
8176
+ if (this.gazeBreakPhase === 1) {
8177
+ const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_DURATION);
8178
+ const eased = smoothStep(t);
8179
+ this.gazeBreakCurrentX = this.gazeBreakTargetX * eased;
8180
+ this.gazeBreakCurrentY = this.gazeBreakTargetY * eased;
8181
+ if (this.gazeBreakProgress >= GAZE_BREAK_DURATION) {
8182
+ this.gazeBreakPhase = 2;
8183
+ this.gazeBreakProgress = 0;
8184
+ }
8185
+ } else if (this.gazeBreakPhase === 2) {
8186
+ this.gazeBreakCurrentX = this.gazeBreakTargetX;
8187
+ this.gazeBreakCurrentY = this.gazeBreakTargetY;
8188
+ if (this.gazeBreakProgress >= GAZE_BREAK_HOLD_DURATION) {
8189
+ this.gazeBreakPhase = 3;
8190
+ this.gazeBreakProgress = 0;
8191
+ }
8192
+ } else if (this.gazeBreakPhase === 3) {
8193
+ const t = Math.min(1, this.gazeBreakProgress / GAZE_BREAK_RETURN_DURATION);
8194
+ const eased = smoothStep(t);
8195
+ this.gazeBreakCurrentX = this.gazeBreakTargetX * (1 - eased);
8196
+ this.gazeBreakCurrentY = this.gazeBreakTargetY * (1 - eased);
8197
+ if (this.gazeBreakProgress >= GAZE_BREAK_RETURN_DURATION) {
8198
+ this.gazeBreakPhase = PHASE_OPEN;
8199
+ this.gazeBreakProgress = 0;
8200
+ this.gazeBreakCurrentX = 0;
8201
+ this.gazeBreakCurrentY = 0;
8202
+ }
8203
+ }
8204
+ } else {
8205
+ this.gazeBreakCurrentX = 0;
8206
+ this.gazeBreakCurrentY = 0;
8207
+ }
8208
+ }
8209
+ // =====================================================================
8210
+ // PRIVATE: Brow noise (simplex-driven organic drift)
8211
+ // =====================================================================
8212
+ updateBrowNoise(delta, audioEnergy, isSpeaking, blendshapes) {
8213
+ this.noiseTime += delta;
8214
+ const energyDelta = audioEnergy - this.previousEnergy;
8215
+ if (energyDelta > EMPHASIS_ENERGY_THRESHOLD) {
8216
+ this.emphasisLevel = 1;
8217
+ }
8218
+ this.emphasisLevel = Math.max(0, this.emphasisLevel - delta * EMPHASIS_DECAY_RATE);
8219
+ this.previousEnergy = audioEnergy;
8220
+ const speechMul = isSpeaking && audioEnergy > 0 ? this.browNoiseSpeechMultiplier : 1;
8221
+ const amp = this.browNoiseAmplitude * speechMul;
8222
+ const innerUpNoise = simplex2d(this.noiseTime * BROW_INNER_UP_FREQ, BROW_INNER_UP_PHASE);
8223
+ const innerUpBase = (innerUpNoise * 0.5 + 0.5) * amp * 0.83;
8224
+ const innerUpEmphasis = this.emphasisLevel * 0.25;
8225
+ blendshapes["browInnerUp"] = clamp(innerUpBase + innerUpEmphasis, 0, 1);
8226
+ const outerLeftNoise = simplex2d(this.noiseTime * BROW_OUTER_LEFT_FREQ, BROW_OUTER_LEFT_PHASE);
8227
+ blendshapes["browOuterUpLeft"] = clamp((outerLeftNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
8228
+ const outerRightNoise = simplex2d(this.noiseTime * BROW_OUTER_RIGHT_FREQ, BROW_OUTER_RIGHT_PHASE);
8229
+ blendshapes["browOuterUpRight"] = clamp((outerRightNoise * 0.5 + 0.5) * amp * 0.5, 0, 1);
8230
+ const downLeftNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_LEFT_PHASE);
8231
+ blendshapes["browDownLeft"] = clamp((downLeftNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
8232
+ const downRightNoise = simplex2d(this.noiseTime * BROW_DOWN_FREQ, BROW_DOWN_RIGHT_PHASE);
8233
+ blendshapes["browDownRight"] = clamp((downRightNoise * 0.5 + 0.5) * amp * 0.33, 0, 1);
7009
8234
  }
7010
8235
  };
8236
+
8237
+ // ../types/dist/index.mjs
8238
+ var PROTOCOL_VERSION = 1;
8239
+ function isProtocolEvent(obj) {
8240
+ return typeof obj === "object" && obj !== null && "v" in obj && "type" in obj && "ts" in obj;
8241
+ }
7011
8242
  export {
7012
8243
  ARKIT_BLENDSHAPES,
7013
8244
  AgentCoreAdapter,
@@ -7021,12 +8252,15 @@ export {
7021
8252
  ConversationOrchestrator,
7022
8253
  DEFAULT_ANIMATION_CONFIG,
7023
8254
  DEFAULT_LOGGING_CONFIG,
8255
+ EMOTION_ARKIT_MAP,
7024
8256
  EMOTION_NAMES,
7025
8257
  EMOTION_VECTOR_SIZE,
7026
8258
  EmotionController,
7027
8259
  EmotionPresets,
8260
+ EmotionToBlendshapeMapper,
7028
8261
  EmphasisDetector,
7029
8262
  EventEmitter,
8263
+ FullFacePipeline,
7030
8264
  INFERENCE_LATENCY_BUCKETS,
7031
8265
  InterruptionHandler,
7032
8266
  LAMPipeline,
@@ -7038,6 +8272,8 @@ export {
7038
8272
  ModelCache,
7039
8273
  OTLPExporter,
7040
8274
  OmoteTelemetry,
8275
+ PROTOCOL_VERSION,
8276
+ ProceduralLifeLayer,
7041
8277
  RingBuffer,
7042
8278
  SafariSpeechRecognition,
7043
8279
  SenseVoiceInference,
@@ -7045,6 +8281,7 @@ export {
7045
8281
  SileroVADWorker,
7046
8282
  SyncedAudioPipeline,
7047
8283
  TenantManager,
8284
+ UPPER_FACE_BLENDSHAPES,
7048
8285
  WAV2ARKIT_BLENDSHAPES,
7049
8286
  Wav2ArkitCpuInference,
7050
8287
  Wav2Vec2Inference,
@@ -7083,6 +8320,7 @@ export {
7083
8320
  isIOSSafari,
7084
8321
  isMobile,
7085
8322
  isOnnxRuntimeLoaded,
8323
+ isProtocolEvent,
7086
8324
  isSafari,
7087
8325
  isSpeechRecognitionAvailable,
7088
8326
  isWebGPUAvailable,