@omote/core 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2552,7 +2552,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2552
2552
  } else {
2553
2553
  logger3.info("Fetching external model data", {
2554
2554
  dataUrl,
2555
- note: "This may be a large download (383MB+)"
2555
+ note: "This may be a large download"
2556
2556
  });
2557
2557
  externalDataBuffer = await fetchWithCache(dataUrl);
2558
2558
  }
@@ -2560,6 +2560,9 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2560
2560
  size: formatBytes(externalDataBuffer.byteLength)
2561
2561
  });
2562
2562
  } catch (err) {
2563
+ if (typeof this.config.externalDataUrl === "string") {
2564
+ throw new Error(`Failed to fetch external data: ${dataUrl} \u2014 ${err.message}`);
2565
+ }
2563
2566
  logger3.debug("No external data file found (single-file model)", {
2564
2567
  dataUrl,
2565
2568
  error: err.message
@@ -2683,28 +2686,6 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2683
2686
  };
2684
2687
  return this.queueInference(feeds);
2685
2688
  }
2686
- /**
2687
- * Decode CTC logits to text using greedy decoding
2688
- */
2689
- decodeCTC(logits) {
2690
- const tokens = [];
2691
- let prevToken = -1;
2692
- for (const frame of logits) {
2693
- let maxIdx = 0;
2694
- let maxVal = frame[0];
2695
- for (let i = 1; i < frame.length; i++) {
2696
- if (frame[i] > maxVal) {
2697
- maxVal = frame[i];
2698
- maxIdx = i;
2699
- }
2700
- }
2701
- if (maxIdx !== prevToken && maxIdx !== 0) {
2702
- tokens.push(maxIdx);
2703
- }
2704
- prevToken = maxIdx;
2705
- }
2706
- return tokens.map((t) => CTC_VOCAB[t] === "|" ? " " : CTC_VOCAB[t]).join("");
2707
- }
2708
2689
  /**
2709
2690
  * Queue inference to serialize ONNX session calls
2710
2691
  */
@@ -2732,37 +2713,25 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2732
2713
  })
2733
2714
  ]);
2734
2715
  const inferenceTimeMs = performance.now() - startTime;
2735
- const asrOutput = results["asr_logits"];
2736
2716
  const blendshapeOutput = results["blendshapes"];
2737
- if (!asrOutput || !blendshapeOutput) {
2738
- throw new Error("Missing outputs from model");
2717
+ if (!blendshapeOutput) {
2718
+ throw new Error("Missing blendshapes output from model");
2739
2719
  }
2740
- const asrData = asrOutput.data;
2741
2720
  const blendshapeData = blendshapeOutput.data;
2742
- const numASRFrames = asrOutput.dims[1];
2743
2721
  const numA2EFrames = blendshapeOutput.dims[1];
2744
- const asrVocabSize = asrOutput.dims[2];
2745
2722
  const numBlendshapes = blendshapeOutput.dims[2];
2746
- const asrLogits = [];
2747
2723
  const blendshapes = [];
2748
- for (let f = 0; f < numASRFrames; f++) {
2749
- asrLogits.push(asrData.slice(f * asrVocabSize, (f + 1) * asrVocabSize));
2750
- }
2751
2724
  for (let f = 0; f < numA2EFrames; f++) {
2752
2725
  const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
2753
2726
  blendshapes.push(symmetrizeBlendshapes(rawFrame));
2754
2727
  }
2755
- const text = this.decodeCTC(asrLogits);
2756
2728
  logger3.trace("Inference completed", {
2757
2729
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
2758
- numA2EFrames,
2759
- numASRFrames,
2760
- textLength: text.length
2730
+ numA2EFrames
2761
2731
  });
2762
2732
  span?.setAttributes({
2763
2733
  "inference.duration_ms": inferenceTimeMs,
2764
- "inference.a2e_frames": numA2EFrames,
2765
- "inference.asr_frames": numASRFrames
2734
+ "inference.a2e_frames": numA2EFrames
2766
2735
  });
2767
2736
  span?.end();
2768
2737
  telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
@@ -2776,11 +2745,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
2776
2745
  });
2777
2746
  resolve({
2778
2747
  blendshapes,
2779
- asrLogits,
2780
- text,
2781
2748
  numFrames: numA2EFrames,
2782
- numA2EFrames,
2783
- numASRFrames,
2784
2749
  inferenceTimeMs
2785
2750
  });
2786
2751
  } catch (err) {
@@ -5143,6 +5108,51 @@ var SenseVoiceWorker = class {
5143
5108
  }
5144
5109
  };
5145
5110
 
5111
+ // src/inference/defaultModelUrls.ts
5112
+ var HF = "https://huggingface.co";
5113
+ var HF_MODEL_URLS = {
5114
+ /** LAM A2E model — fp16 external data (385KB graph + 192MB weights, WebGPU) — 52 ARKit blendshapes */
5115
+ lam: `${HF}/omote-ai/lam-a2e/resolve/main/model_fp16.onnx`,
5116
+ /** wav2arkit_cpu A2E model graph (1.86MB, WASM) — Safari/iOS fallback */
5117
+ wav2arkitCpu: `${HF}/myned-ai/wav2arkit_cpu/resolve/main/wav2arkit_cpu.onnx`,
5118
+ /** SenseVoice ASR model (228MB int8, WASM) — speech recognition + emotion + language */
5119
+ senseVoice: `${HF}/omote-ai/sensevoice-asr/resolve/main/model.int8.onnx`,
5120
+ /** Silero VAD model (~2MB, WASM) — voice activity detection */
5121
+ sileroVad: `${HF}/deepghs/silero-vad-onnx/resolve/main/silero_vad.onnx`
5122
+ };
5123
+ var _overrides = {};
5124
+ var DEFAULT_MODEL_URLS = new Proxy(
5125
+ {},
5126
+ {
5127
+ get(_target, prop) {
5128
+ const key = prop;
5129
+ return _overrides[key] ?? HF_MODEL_URLS[key];
5130
+ },
5131
+ ownKeys() {
5132
+ return Object.keys(HF_MODEL_URLS);
5133
+ },
5134
+ getOwnPropertyDescriptor(_target, prop) {
5135
+ if (prop in HF_MODEL_URLS) {
5136
+ return { configurable: true, enumerable: true, value: this.get(_target, prop, _target) };
5137
+ }
5138
+ return void 0;
5139
+ }
5140
+ }
5141
+ );
5142
+ function configureModelUrls(urls) {
5143
+ for (const [key, url] of Object.entries(urls)) {
5144
+ if (key in HF_MODEL_URLS && typeof url === "string") {
5145
+ _overrides[key] = url;
5146
+ }
5147
+ }
5148
+ }
5149
+ function resetModelUrls() {
5150
+ for (const key of Object.keys(_overrides)) {
5151
+ delete _overrides[key];
5152
+ }
5153
+ }
5154
+ var HF_CDN_URLS = HF_MODEL_URLS;
5155
+
5146
5156
  // src/inference/UnifiedInferenceWorker.ts
5147
5157
  var logger8 = createLogger("UnifiedInferenceWorker");
5148
5158
  var WASM_CDN_PATH3 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
@@ -6384,11 +6394,12 @@ var SileroVADUnifiedAdapter = class {
6384
6394
 
6385
6395
  // src/inference/createSenseVoice.ts
6386
6396
  var logger9 = createLogger("createSenseVoice");
6387
- function createSenseVoice(config) {
6397
+ function createSenseVoice(config = {}) {
6398
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.senseVoice;
6388
6399
  if (config.unifiedWorker) {
6389
6400
  logger9.info("Creating SenseVoiceUnifiedAdapter (shared unified worker)");
6390
6401
  return new SenseVoiceUnifiedAdapter(config.unifiedWorker, {
6391
- modelUrl: config.modelUrl,
6402
+ modelUrl,
6392
6403
  tokensUrl: config.tokensUrl,
6393
6404
  language: config.language,
6394
6405
  textNorm: config.textNorm
@@ -6401,7 +6412,7 @@ function createSenseVoice(config) {
6401
6412
  }
6402
6413
  logger9.info("Creating SenseVoiceWorker (off-main-thread)");
6403
6414
  return new SenseVoiceWorker({
6404
- modelUrl: config.modelUrl,
6415
+ modelUrl,
6405
6416
  tokensUrl: config.tokensUrl,
6406
6417
  language: config.language,
6407
6418
  textNorm: config.textNorm
@@ -6410,7 +6421,7 @@ function createSenseVoice(config) {
6410
6421
  if (useWorker === false) {
6411
6422
  logger9.info("Creating SenseVoiceInference (main thread)");
6412
6423
  return new SenseVoiceInference({
6413
- modelUrl: config.modelUrl,
6424
+ modelUrl,
6414
6425
  tokensUrl: config.tokensUrl,
6415
6426
  language: config.language,
6416
6427
  textNorm: config.textNorm
@@ -6419,7 +6430,7 @@ function createSenseVoice(config) {
6419
6430
  if (SenseVoiceWorker.isSupported() && !isIOS()) {
6420
6431
  logger9.info("Auto-detected: creating SenseVoiceWorker (off-main-thread)");
6421
6432
  return new SenseVoiceWorker({
6422
- modelUrl: config.modelUrl,
6433
+ modelUrl,
6423
6434
  tokensUrl: config.tokensUrl,
6424
6435
  language: config.language,
6425
6436
  textNorm: config.textNorm
@@ -6429,7 +6440,7 @@ function createSenseVoice(config) {
6429
6440
  reason: isIOS() ? "iOS (shared ORT instance)" : "Worker unsupported"
6430
6441
  });
6431
6442
  return new SenseVoiceInference({
6432
- modelUrl: config.modelUrl,
6443
+ modelUrl,
6433
6444
  tokensUrl: config.tokensUrl,
6434
6445
  language: config.language,
6435
6446
  textNorm: config.textNorm
@@ -7296,9 +7307,11 @@ var Wav2ArkitCpuWorker = class {
7296
7307
 
7297
7308
  // src/inference/createA2E.ts
7298
7309
  var logger12 = createLogger("createA2E");
7299
- function createA2E(config) {
7310
+ function createA2E(config = {}) {
7300
7311
  const mode = config.mode ?? "auto";
7301
7312
  const fallbackOnError = config.fallbackOnError ?? true;
7313
+ const gpuModelUrl = config.gpuModelUrl ?? DEFAULT_MODEL_URLS.lam;
7314
+ const cpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
7302
7315
  let useCpu;
7303
7316
  if (mode === "cpu") {
7304
7317
  useCpu = true;
@@ -7317,23 +7330,24 @@ function createA2E(config) {
7317
7330
  if (config.unifiedWorker) {
7318
7331
  logger12.info("Creating Wav2ArkitCpuUnifiedAdapter (404MB, WASM, shared unified worker)");
7319
7332
  return new Wav2ArkitCpuUnifiedAdapter(config.unifiedWorker, {
7320
- modelUrl: config.cpuModelUrl
7333
+ modelUrl: cpuModelUrl
7321
7334
  });
7322
7335
  }
7323
7336
  if (config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7324
7337
  logger12.info("Creating Wav2ArkitCpuWorker (404MB, WASM, off-main-thread)");
7325
7338
  return new Wav2ArkitCpuWorker({
7326
- modelUrl: config.cpuModelUrl
7339
+ modelUrl: cpuModelUrl
7327
7340
  });
7328
7341
  }
7329
7342
  logger12.info("Creating Wav2ArkitCpuInference (404MB, WASM)");
7330
7343
  return new Wav2ArkitCpuInference({
7331
- modelUrl: config.cpuModelUrl
7344
+ modelUrl: cpuModelUrl
7332
7345
  });
7333
7346
  }
7347
+ const gpuExternalDataUrl = config.gpuExternalDataUrl !== void 0 ? config.gpuExternalDataUrl : void 0;
7334
7348
  const gpuInstance = new Wav2Vec2Inference({
7335
- modelUrl: config.gpuModelUrl,
7336
- externalDataUrl: config.gpuExternalDataUrl,
7349
+ modelUrl: gpuModelUrl,
7350
+ externalDataUrl: gpuExternalDataUrl,
7337
7351
  backend: config.gpuBackend ?? "auto",
7338
7352
  numIdentityClasses: config.numIdentityClasses
7339
7353
  });
@@ -7349,6 +7363,7 @@ var A2EWithFallback = class {
7349
7363
  this.hasFallenBack = false;
7350
7364
  this.implementation = gpuInstance;
7351
7365
  this.config = config;
7366
+ this.resolvedCpuModelUrl = config.cpuModelUrl ?? DEFAULT_MODEL_URLS.wav2arkitCpu;
7352
7367
  }
7353
7368
  get modelId() {
7354
7369
  return this.implementation.modelId;
@@ -7377,17 +7392,17 @@ var A2EWithFallback = class {
7377
7392
  }
7378
7393
  if (this.config.unifiedWorker) {
7379
7394
  this.implementation = new Wav2ArkitCpuUnifiedAdapter(this.config.unifiedWorker, {
7380
- modelUrl: this.config.cpuModelUrl
7395
+ modelUrl: this.resolvedCpuModelUrl
7381
7396
  });
7382
7397
  logger12.info("Fallback to Wav2ArkitCpuUnifiedAdapter successful");
7383
7398
  } else if (this.config.useWorker && Wav2ArkitCpuWorker.isSupported() && !isIOS()) {
7384
7399
  this.implementation = new Wav2ArkitCpuWorker({
7385
- modelUrl: this.config.cpuModelUrl
7400
+ modelUrl: this.resolvedCpuModelUrl
7386
7401
  });
7387
7402
  logger12.info("Fallback to Wav2ArkitCpuWorker successful");
7388
7403
  } else {
7389
7404
  this.implementation = new Wav2ArkitCpuInference({
7390
- modelUrl: this.config.cpuModelUrl
7405
+ modelUrl: this.resolvedCpuModelUrl
7391
7406
  });
7392
7407
  logger12.info("Fallback to Wav2ArkitCpuInference successful");
7393
7408
  }
@@ -8577,10 +8592,12 @@ function supportsVADWorker() {
8577
8592
  }
8578
8593
  return true;
8579
8594
  }
8580
- function createSileroVAD(config) {
8595
+ function createSileroVAD(config = {}) {
8596
+ const modelUrl = config.modelUrl ?? DEFAULT_MODEL_URLS.sileroVad;
8597
+ const resolvedConfig = { ...config, modelUrl };
8581
8598
  if (config.unifiedWorker) {
8582
8599
  logger15.info("Creating SileroVADUnifiedAdapter (shared unified worker)");
8583
- return new SileroVADUnifiedAdapter(config.unifiedWorker, config);
8600
+ return new SileroVADUnifiedAdapter(config.unifiedWorker, resolvedConfig);
8584
8601
  }
8585
8602
  const fallbackOnError = config.fallbackOnError ?? true;
8586
8603
  let useWorker;
@@ -8600,24 +8617,24 @@ function createSileroVAD(config) {
8600
8617
  if (useWorker) {
8601
8618
  logger15.info("Creating SileroVADWorker (off-main-thread)");
8602
8619
  const worker = new SileroVADWorker({
8603
- modelUrl: config.modelUrl,
8620
+ modelUrl,
8604
8621
  sampleRate: config.sampleRate,
8605
8622
  threshold: config.threshold,
8606
8623
  preSpeechBufferChunks: config.preSpeechBufferChunks
8607
8624
  });
8608
8625
  if (fallbackOnError) {
8609
- return new VADWorkerWithFallback(worker, config);
8626
+ return new VADWorkerWithFallback(worker, resolvedConfig);
8610
8627
  }
8611
8628
  return worker;
8612
8629
  }
8613
8630
  logger15.info("Creating SileroVADInference (main thread)");
8614
- return new SileroVADInference(config);
8631
+ return new SileroVADInference(resolvedConfig);
8615
8632
  }
8616
8633
  var VADWorkerWithFallback = class {
8617
- constructor(worker, config) {
8634
+ constructor(worker, resolvedConfig) {
8618
8635
  this.hasFallenBack = false;
8619
8636
  this.implementation = worker;
8620
- this.config = config;
8637
+ this.resolvedConfig = resolvedConfig;
8621
8638
  }
8622
8639
  get backend() {
8623
8640
  if (!this.isLoaded) return null;
@@ -8643,7 +8660,7 @@ var VADWorkerWithFallback = class {
8643
8660
  await this.implementation.dispose();
8644
8661
  } catch {
8645
8662
  }
8646
- this.implementation = new SileroVADInference(this.config);
8663
+ this.implementation = new SileroVADInference(this.resolvedConfig);
8647
8664
  this.hasFallenBack = true;
8648
8665
  logger15.info("Fallback to SileroVADInference successful");
8649
8666
  return await this.implementation.load();
@@ -9691,17 +9708,29 @@ var AnimationGraph = class extends EventEmitter {
9691
9708
  // src/animation/ProceduralLifeLayer.ts
9692
9709
  import { createNoise2D } from "simplex-noise";
9693
9710
  var simplex2d = createNoise2D();
9711
+ var LIFE_BS_INDEX = /* @__PURE__ */ new Map();
9712
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
9713
+ LIFE_BS_INDEX.set(LAM_BLENDSHAPES[i], i);
9714
+ }
9694
9715
  var PHASE_OPEN = 0;
9695
9716
  var PHASE_CLOSING = 1;
9696
9717
  var PHASE_CLOSED = 2;
9697
9718
  var PHASE_OPENING = 3;
9698
- var BLINK_CLOSE_DURATION = 0.06;
9719
+ var BLINK_CLOSE_DURATION = 0.092;
9699
9720
  var BLINK_HOLD_DURATION = 0.04;
9700
- var BLINK_OPEN_DURATION = 0.15;
9721
+ var BLINK_OPEN_DURATION = 0.242;
9701
9722
  var BLINK_ASYMMETRY_DELAY = 8e-3;
9723
+ var BLINK_IBI_MU = Math.log(5.97);
9724
+ var BLINK_IBI_SIGMA = 0.89;
9702
9725
  var GAZE_BREAK_DURATION = 0.12;
9703
9726
  var GAZE_BREAK_HOLD_DURATION = 0.3;
9704
9727
  var GAZE_BREAK_RETURN_DURATION = 0.15;
9728
+ var GAZE_STATE_PARAMS = {
9729
+ idle: { interval: [2, 5], amplitude: [0.15, 0.4] },
9730
+ listening: { interval: [4, 10], amplitude: [0.1, 0.25] },
9731
+ thinking: { interval: [1, 3], amplitude: [0.2, 0.5] },
9732
+ speaking: { interval: [2, 6], amplitude: [0.15, 0.35] }
9733
+ };
9705
9734
  var EYE_NOISE_X_FREQ = 0.8;
9706
9735
  var EYE_NOISE_Y_FREQ = 0.6;
9707
9736
  var EYE_NOISE_X_PHASE = 73.1;
@@ -9729,6 +9758,12 @@ function smoothStep(t) {
9729
9758
  function softClamp(v, max) {
9730
9759
  return Math.tanh(v / max) * max;
9731
9760
  }
9761
+ function sampleLogNormal(mu, sigma) {
9762
+ const u1 = Math.random();
9763
+ const u2 = Math.random();
9764
+ const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2);
9765
+ return Math.exp(mu + sigma * z);
9766
+ }
9732
9767
  var ProceduralLifeLayer = class {
9733
9768
  constructor(config) {
9734
9769
  // Blink state
@@ -9741,7 +9776,7 @@ var ProceduralLifeLayer = class {
9741
9776
  // Eye contact (smoothed)
9742
9777
  this.smoothedEyeX = 0;
9743
9778
  this.smoothedEyeY = 0;
9744
- // Eye micro-motion (continuous simplex noise, no discrete events)
9779
+ // Eye micro-motion
9745
9780
  this.eyeNoiseTime = 0;
9746
9781
  // Gaze break state
9747
9782
  this.gazeBreakTimer = 0;
@@ -9751,6 +9786,8 @@ var ProceduralLifeLayer = class {
9751
9786
  this.gazeBreakTargetY = 0;
9752
9787
  this.gazeBreakCurrentX = 0;
9753
9788
  this.gazeBreakCurrentY = 0;
9789
+ // Conversational state for gaze
9790
+ this.currentState = null;
9754
9791
  // Breathing / postural sway
9755
9792
  this.microMotionTime = 0;
9756
9793
  this.breathingPhase = 0;
@@ -9759,6 +9796,7 @@ var ProceduralLifeLayer = class {
9759
9796
  this.previousEnergy = 0;
9760
9797
  this.emphasisLevel = 0;
9761
9798
  this.blinkIntervalRange = config?.blinkIntervalRange ?? [2.5, 6];
9799
+ this.useLogNormalBlinks = !config?.blinkIntervalRange;
9762
9800
  this.gazeBreakIntervalRange = config?.gazeBreakIntervalRange ?? [3, 8];
9763
9801
  this.gazeBreakAmplitudeRange = config?.gazeBreakAmplitudeRange ?? [0.15, 0.4];
9764
9802
  this.eyeNoiseAmplitude = config?.eyeNoiseAmplitude ?? 0.06;
@@ -9768,7 +9806,7 @@ var ProceduralLifeLayer = class {
9768
9806
  this.posturalSwayAmplitude = config?.posturalSwayAmplitude ?? 2e-3;
9769
9807
  this.eyeMaxDeviation = config?.eyeMaxDeviation ?? 0.8;
9770
9808
  this.eyeSmoothing = config?.eyeSmoothing ?? 15;
9771
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9809
+ this.blinkInterval = this.nextBlinkInterval();
9772
9810
  this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
9773
9811
  }
9774
9812
  /**
@@ -9783,6 +9821,7 @@ var ProceduralLifeLayer = class {
9783
9821
  const eyeTargetY = input?.eyeTargetY ?? 0;
9784
9822
  const audioEnergy = input?.audioEnergy ?? 0;
9785
9823
  const isSpeaking = input?.isSpeaking ?? false;
9824
+ this.currentState = input?.state ?? null;
9786
9825
  const safeDelta = Math.min(delta, 0.1);
9787
9826
  const blendshapes = {};
9788
9827
  this.updateBlinks(delta);
@@ -9821,6 +9860,12 @@ var ProceduralLifeLayer = class {
9821
9860
  const swayAmp = this.posturalSwayAmplitude;
9822
9861
  const swayX = Math.sin(this.microMotionTime * 0.7) * swayAmp + Math.sin(this.microMotionTime * 1.3) * swayAmp * 0.5;
9823
9862
  const swayY = Math.sin(this.microMotionTime * 0.5) * swayAmp * 0.75 + Math.sin(this.microMotionTime * 0.9) * swayAmp * 0.5;
9863
+ const breathVal = Math.sin(this.breathingPhase);
9864
+ if (breathVal > 0) {
9865
+ blendshapes["jawOpen"] = breathVal * 0.015;
9866
+ blendshapes["noseSneerLeft"] = breathVal * 8e-3;
9867
+ blendshapes["noseSneerRight"] = breathVal * 8e-3;
9868
+ }
9824
9869
  return {
9825
9870
  blendshapes,
9826
9871
  headDelta: {
@@ -9829,12 +9874,35 @@ var ProceduralLifeLayer = class {
9829
9874
  }
9830
9875
  };
9831
9876
  }
9877
+ /**
9878
+ * Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
9879
+ *
9880
+ * Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
9881
+ * break uncanny stillness on undriven channels.
9882
+ *
9883
+ * @param delta - Time since last frame in seconds
9884
+ * @param input - Per-frame input
9885
+ * @param out - Pre-allocated Float32Array(52) to write into
9886
+ */
9887
+ updateToArray(delta, input, out) {
9888
+ out.fill(0);
9889
+ const result = this.update(delta, input);
9890
+ for (const [name, value] of Object.entries(result.blendshapes)) {
9891
+ const idx = LIFE_BS_INDEX.get(name);
9892
+ if (idx !== void 0) {
9893
+ out[idx] = value;
9894
+ }
9895
+ }
9896
+ for (let i = 0; i < 52; i++) {
9897
+ out[i] += simplex2d(this.noiseTime * 0.3, i * 7.13) * 4e-3;
9898
+ }
9899
+ }
9832
9900
  /**
9833
9901
  * Reset all internal state to initial values.
9834
9902
  */
9835
9903
  reset() {
9836
9904
  this.blinkTimer = 0;
9837
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9905
+ this.blinkInterval = this.nextBlinkInterval();
9838
9906
  this.blinkPhase = PHASE_OPEN;
9839
9907
  this.blinkProgress = 0;
9840
9908
  this.asymmetryRight = 0.97;
@@ -9851,6 +9919,7 @@ var ProceduralLifeLayer = class {
9851
9919
  this.gazeBreakTargetY = 0;
9852
9920
  this.gazeBreakCurrentX = 0;
9853
9921
  this.gazeBreakCurrentY = 0;
9922
+ this.currentState = null;
9854
9923
  this.microMotionTime = 0;
9855
9924
  this.breathingPhase = 0;
9856
9925
  this.noiseTime = 0;
@@ -9858,6 +9927,21 @@ var ProceduralLifeLayer = class {
9858
9927
  this.emphasisLevel = 0;
9859
9928
  }
9860
9929
  // =====================================================================
9930
+ // PRIVATE: Blink interval sampling
9931
+ // =====================================================================
9932
+ /**
9933
+ * Sample next blink interval.
9934
+ * Uses log-normal distribution (PMC3565584) when using default config,
9935
+ * or uniform random when custom blinkIntervalRange is provided.
9936
+ */
9937
+ nextBlinkInterval() {
9938
+ if (this.useLogNormalBlinks) {
9939
+ const sample = sampleLogNormal(BLINK_IBI_MU, BLINK_IBI_SIGMA);
9940
+ return clamp(sample, 1.5, 12);
9941
+ }
9942
+ return randomRange(...this.blinkIntervalRange);
9943
+ }
9944
+ // =====================================================================
9861
9945
  // PRIVATE: Blink system
9862
9946
  // =====================================================================
9863
9947
  updateBlinks(delta) {
@@ -9866,7 +9950,7 @@ var ProceduralLifeLayer = class {
9866
9950
  this.blinkPhase = PHASE_CLOSING;
9867
9951
  this.blinkProgress = 0;
9868
9952
  this.blinkTimer = 0;
9869
- this.blinkInterval = randomRange(...this.blinkIntervalRange);
9953
+ this.blinkInterval = this.nextBlinkInterval();
9870
9954
  this.asymmetryRight = 0.95 + Math.random() * 0.08;
9871
9955
  }
9872
9956
  if (this.blinkPhase > PHASE_OPEN) {
@@ -9922,18 +10006,32 @@ var ProceduralLifeLayer = class {
9922
10006
  return { x, y };
9923
10007
  }
9924
10008
  // =====================================================================
9925
- // PRIVATE: Gaze breaks
10009
+ // PRIVATE: Gaze breaks (state-dependent)
9926
10010
  // =====================================================================
10011
+ /**
10012
+ * Get active gaze parameters — uses state-dependent params when
10013
+ * conversational state is provided, otherwise falls back to config ranges.
10014
+ */
10015
+ getActiveGazeParams() {
10016
+ if (this.currentState && GAZE_STATE_PARAMS[this.currentState]) {
10017
+ return GAZE_STATE_PARAMS[this.currentState];
10018
+ }
10019
+ return {
10020
+ interval: this.gazeBreakIntervalRange,
10021
+ amplitude: this.gazeBreakAmplitudeRange
10022
+ };
10023
+ }
9927
10024
  updateGazeBreaks(delta) {
9928
10025
  this.gazeBreakTimer += delta;
9929
10026
  if (this.gazeBreakTimer >= this.gazeBreakInterval && this.gazeBreakPhase === PHASE_OPEN) {
9930
10027
  this.gazeBreakPhase = PHASE_CLOSING;
9931
10028
  this.gazeBreakProgress = 0;
9932
10029
  this.gazeBreakTimer = 0;
9933
- const amp = randomRange(...this.gazeBreakAmplitudeRange);
10030
+ const params = this.getActiveGazeParams();
10031
+ const amp = randomRange(...params.amplitude);
9934
10032
  this.gazeBreakTargetX = (Math.random() - 0.5) * 2 * amp;
9935
10033
  this.gazeBreakTargetY = (Math.random() - 0.5) * amp * 0.4;
9936
- this.gazeBreakInterval = randomRange(...this.gazeBreakIntervalRange);
10034
+ this.gazeBreakInterval = randomRange(...params.interval);
9937
10035
  }
9938
10036
  if (this.gazeBreakPhase > PHASE_OPEN) {
9939
10037
  this.gazeBreakProgress += delta;
@@ -9998,6 +10096,293 @@ var ProceduralLifeLayer = class {
9998
10096
  }
9999
10097
  };
10000
10098
 
10099
+ // src/face/FACSMapping.ts
10100
+ var EMOTION_TO_AU = {
10101
+ joy: [
10102
+ { au: "AU6", intensity: 0.7, region: "upper" },
10103
+ // cheek raise (Duchenne)
10104
+ { au: "AU12", intensity: 0.8, region: "lower" }
10105
+ // lip corner pull (smile)
10106
+ ],
10107
+ anger: [
10108
+ { au: "AU4", intensity: 0.8, region: "upper" },
10109
+ // brow lower
10110
+ { au: "AU5", intensity: 0.4, region: "upper" },
10111
+ // upper lid raise
10112
+ { au: "AU7", intensity: 0.3, region: "upper" },
10113
+ // lid tighten
10114
+ { au: "AU23", intensity: 0.6, region: "lower" }
10115
+ // lip tighten
10116
+ ],
10117
+ sadness: [
10118
+ { au: "AU1", intensity: 0.7, region: "upper" },
10119
+ // inner brow raise
10120
+ { au: "AU4", intensity: 0.3, region: "upper" },
10121
+ // brow lower (furrow)
10122
+ { au: "AU15", intensity: 0.5, region: "lower" }
10123
+ // lip corner depress
10124
+ ],
10125
+ fear: [
10126
+ { au: "AU1", intensity: 0.6, region: "upper" },
10127
+ // inner brow raise
10128
+ { au: "AU2", intensity: 0.5, region: "upper" },
10129
+ // outer brow raise
10130
+ { au: "AU4", intensity: 0.3, region: "upper" },
10131
+ // brow lower
10132
+ { au: "AU5", intensity: 0.5, region: "upper" },
10133
+ // upper lid raise
10134
+ { au: "AU20", intensity: 0.4, region: "lower" }
10135
+ // lip stretch
10136
+ ],
10137
+ disgust: [
10138
+ { au: "AU9", intensity: 0.7, region: "upper" },
10139
+ // nose wrinkle
10140
+ { au: "AU10", intensity: 0.5, region: "lower" },
10141
+ // upper lip raise
10142
+ { au: "AU15", intensity: 0.4, region: "lower" }
10143
+ // lip corner depress
10144
+ ],
10145
+ amazement: [
10146
+ { au: "AU1", intensity: 0.6, region: "upper" },
10147
+ // inner brow raise
10148
+ { au: "AU2", intensity: 0.7, region: "upper" },
10149
+ // outer brow raise
10150
+ { au: "AU5", intensity: 0.6, region: "upper" },
10151
+ // upper lid raise
10152
+ { au: "AU26", intensity: 0.4, region: "lower" }
10153
+ // jaw drop
10154
+ ],
10155
+ grief: [
10156
+ { au: "AU1", intensity: 0.8, region: "upper" },
10157
+ // inner brow raise
10158
+ { au: "AU4", intensity: 0.5, region: "upper" },
10159
+ // brow lower
10160
+ { au: "AU6", intensity: 0.3, region: "upper" },
10161
+ // cheek raise (grief cry)
10162
+ { au: "AU15", intensity: 0.6, region: "lower" }
10163
+ // lip corner depress
10164
+ ],
10165
+ cheekiness: [
10166
+ { au: "AU2", intensity: 0.4, region: "upper" },
10167
+ // outer brow raise
10168
+ { au: "AU6", intensity: 0.4, region: "upper" },
10169
+ // cheek raise
10170
+ { au: "AU12", intensity: 0.6, region: "lower" }
10171
+ // lip corner pull (smirk)
10172
+ ],
10173
+ pain: [
10174
+ { au: "AU4", intensity: 0.7, region: "upper" },
10175
+ // brow lower
10176
+ { au: "AU6", intensity: 0.4, region: "upper" },
10177
+ // cheek raise (orbicularis)
10178
+ { au: "AU7", intensity: 0.7, region: "upper" },
10179
+ // lid tighten (squint)
10180
+ { au: "AU9", intensity: 0.5, region: "upper" }
10181
+ // nose wrinkle
10182
+ ],
10183
+ outofbreath: [
10184
+ { au: "AU1", intensity: 0.3, region: "upper" },
10185
+ // inner brow raise
10186
+ { au: "AU25", intensity: 0.3, region: "lower" },
10187
+ // lips part
10188
+ { au: "AU26", intensity: 0.5, region: "lower" }
10189
+ // jaw drop
10190
+ ]
10191
+ };
10192
+ var AU_TO_ARKIT = {
10193
+ "AU1": [{ blendshape: "browInnerUp", weight: 1 }],
10194
+ "AU2": [{ blendshape: "browOuterUpLeft", weight: 1 }, { blendshape: "browOuterUpRight", weight: 1 }],
10195
+ "AU4": [{ blendshape: "browDownLeft", weight: 1 }, { blendshape: "browDownRight", weight: 1 }],
10196
+ "AU5": [{ blendshape: "eyeWideLeft", weight: 1 }, { blendshape: "eyeWideRight", weight: 1 }],
10197
+ "AU6": [{ blendshape: "cheekSquintLeft", weight: 1 }, { blendshape: "cheekSquintRight", weight: 1 }],
10198
+ "AU7": [{ blendshape: "eyeSquintLeft", weight: 1 }, { blendshape: "eyeSquintRight", weight: 1 }],
10199
+ "AU9": [{ blendshape: "noseSneerLeft", weight: 1 }, { blendshape: "noseSneerRight", weight: 1 }],
10200
+ "AU10": [{ blendshape: "mouthUpperUpLeft", weight: 1 }, { blendshape: "mouthUpperUpRight", weight: 1 }],
10201
+ "AU12": [{ blendshape: "mouthSmileLeft", weight: 1 }, { blendshape: "mouthSmileRight", weight: 1 }],
10202
+ "AU15": [{ blendshape: "mouthFrownLeft", weight: 1 }, { blendshape: "mouthFrownRight", weight: 1 }],
10203
+ "AU20": [{ blendshape: "mouthStretchLeft", weight: 1 }, { blendshape: "mouthStretchRight", weight: 1 }],
10204
+ "AU23": [{ blendshape: "mouthPressLeft", weight: 1 }, { blendshape: "mouthPressRight", weight: 1 }],
10205
+ "AU25": [{ blendshape: "jawOpen", weight: 0.3 }],
10206
+ "AU26": [{ blendshape: "jawOpen", weight: 1 }]
10207
+ };
10208
+ var ALL_AUS = [...new Set(
10209
+ Object.values(EMOTION_TO_AU).flatMap((activations) => activations.map((a) => a.au))
10210
+ )];
10211
+
10212
+ // src/face/EmotionResolver.ts
10213
+ var BS_INDEX = /* @__PURE__ */ new Map();
10214
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10215
+ BS_INDEX.set(LAM_BLENDSHAPES[i], i);
10216
+ }
10217
+ var EmotionResolver = class {
10218
+ constructor() {
10219
+ this.upperBuffer = new Float32Array(52);
10220
+ this.lowerBuffer = new Float32Array(52);
10221
+ }
10222
+ /**
10223
+ * Resolve emotion weights to upper/lower face blendshape contributions.
10224
+ *
10225
+ * @param weights - Emotion channel weights from EmotionController
10226
+ * @param intensity - Global intensity multiplier (0-2). Default: 1.0
10227
+ * @returns Upper and lower face blendshape arrays (52 channels each)
10228
+ */
10229
+ resolve(weights, intensity = 1) {
10230
+ const upper = this.upperBuffer;
10231
+ const lower = this.lowerBuffer;
10232
+ upper.fill(0);
10233
+ lower.fill(0);
10234
+ for (const emotionName of EMOTION_NAMES) {
10235
+ const emotionWeight = weights[emotionName];
10236
+ if (!emotionWeight || emotionWeight < 0.01) continue;
10237
+ const auActivations = EMOTION_TO_AU[emotionName];
10238
+ if (!auActivations) continue;
10239
+ for (const activation of auActivations) {
10240
+ const arkitMappings = AU_TO_ARKIT[activation.au];
10241
+ if (!arkitMappings) continue;
10242
+ const target = activation.region === "upper" ? upper : lower;
10243
+ const scale = emotionWeight * activation.intensity * intensity;
10244
+ for (const mapping of arkitMappings) {
10245
+ const idx = BS_INDEX.get(mapping.blendshape);
10246
+ if (idx !== void 0) {
10247
+ target[idx] += mapping.weight * scale;
10248
+ }
10249
+ }
10250
+ }
10251
+ }
10252
+ for (let i = 0; i < 52; i++) {
10253
+ if (upper[i] > 1) upper[i] = 1;
10254
+ if (lower[i] > 1) lower[i] = 1;
10255
+ }
10256
+ return {
10257
+ upper: new Float32Array(upper),
10258
+ lower: new Float32Array(lower)
10259
+ };
10260
+ }
10261
+ };
10262
+
10263
+ // src/face/FaceCompositor.ts
10264
+ function smoothstep(t) {
10265
+ return t * t * (3 - 2 * t);
10266
+ }
10267
+ var BS_INDEX2 = /* @__PURE__ */ new Map();
10268
+ for (let i = 0; i < LAM_BLENDSHAPES.length; i++) {
10269
+ BS_INDEX2.set(LAM_BLENDSHAPES[i], i);
10270
+ }
10271
+ var IDX_MOUTH_CLOSE = BS_INDEX2.get("mouthClose");
10272
+ var IS_EYE_CHANNEL = new Array(52).fill(false);
10273
+ for (const name of LAM_BLENDSHAPES) {
10274
+ if (name.startsWith("eyeBlink") || name.startsWith("eyeLook")) {
10275
+ IS_EYE_CHANNEL[BS_INDEX2.get(name)] = true;
10276
+ }
10277
+ }
10278
+ var FaceCompositor = class {
10279
+ constructor(config) {
10280
+ this.emotionResolver = new EmotionResolver();
10281
+ // Pre-allocated buffers
10282
+ this.smoothedUpper = new Float32Array(52);
10283
+ this.smoothedLower = new Float32Array(52);
10284
+ this.lifeBuffer = new Float32Array(52);
10285
+ // Profile arrays (pre-expanded to 52 channels)
10286
+ this.multiplier = new Float32Array(52).fill(1);
10287
+ this.offset = new Float32Array(52);
10288
+ this.lifeLayer = config?.lifeLayer ?? new ProceduralLifeLayer();
10289
+ this.emotionSmoothing = config?.emotionSmoothing ?? 0.12;
10290
+ if (config?.profile) {
10291
+ this.applyProfileArrays(config.profile);
10292
+ }
10293
+ }
10294
+ /**
10295
+ * Compose a single output frame from the 5-stage signal chain.
10296
+ *
10297
+ * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
10298
+ * @param input - Per-frame input (deltaTime, emotion, life layer params)
10299
+ * @returns Float32Array[52] with all values clamped to [0, 1]
10300
+ */
10301
+ compose(base, input) {
10302
+ const out = new Float32Array(52);
10303
+ out.set(base);
10304
+ const emotion = input.emotion ?? this.stickyEmotion;
10305
+ if (emotion) {
10306
+ const resolved = this.emotionResolver.resolve(
10307
+ emotion,
10308
+ input.emotionIntensity ?? 1
10309
+ );
10310
+ const k = this.emotionSmoothing;
10311
+ for (let i = 0; i < 52; i++) {
10312
+ this.smoothedUpper[i] += (resolved.upper[i] - this.smoothedUpper[i]) * k;
10313
+ this.smoothedLower[i] += (resolved.lower[i] - this.smoothedLower[i]) * k;
10314
+ }
10315
+ const mc = base[IDX_MOUTH_CLOSE];
10316
+ const bilabialSuppress = mc <= 0.3 ? 1 : mc >= 0.7 ? 0.1 : 1 - 0.9 * smoothstep((mc - 0.3) * 2.5);
10317
+ for (let i = 0; i < 52; i++) {
10318
+ out[i] += this.smoothedUpper[i];
10319
+ }
10320
+ for (let i = 0; i < 52; i++) {
10321
+ out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
10322
+ }
10323
+ }
10324
+ this.lifeLayer.updateToArray(input.deltaTime, input, this.lifeBuffer);
10325
+ for (let i = 0; i < 52; i++) {
10326
+ if (IS_EYE_CHANNEL[i]) {
10327
+ out[i] = this.lifeBuffer[i];
10328
+ } else {
10329
+ out[i] += this.lifeBuffer[i];
10330
+ }
10331
+ }
10332
+ for (let i = 0; i < 52; i++) {
10333
+ out[i] = out[i] * this.multiplier[i] + this.offset[i];
10334
+ }
10335
+ for (let i = 0; i < 52; i++) {
10336
+ if (out[i] < 0) out[i] = 0;
10337
+ else if (out[i] > 1) out[i] = 1;
10338
+ }
10339
+ return out;
10340
+ }
10341
+ /**
10342
+ * Set sticky emotion (used when input.emotion is not provided).
10343
+ */
10344
+ setEmotion(weights) {
10345
+ this.stickyEmotion = weights;
10346
+ }
10347
+ /**
10348
+ * Update character profile at runtime.
10349
+ */
10350
+ setProfile(profile) {
10351
+ this.multiplier.fill(1);
10352
+ this.offset.fill(0);
10353
+ this.applyProfileArrays(profile);
10354
+ }
10355
+ /**
10356
+ * Reset all smoothing state and life layer.
10357
+ */
10358
+ reset() {
10359
+ this.smoothedUpper.fill(0);
10360
+ this.smoothedLower.fill(0);
10361
+ this.lifeBuffer.fill(0);
10362
+ this.stickyEmotion = void 0;
10363
+ this.lifeLayer.reset();
10364
+ }
10365
+ /** Expand partial profile maps into dense Float32Arrays */
10366
+ applyProfileArrays(profile) {
10367
+ if (profile.multiplier) {
10368
+ for (const [name, value] of Object.entries(profile.multiplier)) {
10369
+ const idx = BS_INDEX2.get(name);
10370
+ if (idx !== void 0 && value !== void 0) {
10371
+ this.multiplier[idx] = value;
10372
+ }
10373
+ }
10374
+ }
10375
+ if (profile.offset) {
10376
+ for (const [name, value] of Object.entries(profile.offset)) {
10377
+ const idx = BS_INDEX2.get(name);
10378
+ if (idx !== void 0 && value !== void 0) {
10379
+ this.offset[idx] = value;
10380
+ }
10381
+ }
10382
+ }
10383
+ }
10384
+ };
10385
+
10001
10386
  // src/orchestration/MicLipSync.ts
10002
10387
  var logger18 = createLogger("MicLipSync");
10003
10388
  var MicLipSync = class extends EventEmitter {
@@ -10684,7 +11069,9 @@ function isProtocolEvent(obj) {
10684
11069
  export {
10685
11070
  A2EOrchestrator,
10686
11071
  A2EProcessor,
11072
+ ALL_AUS,
10687
11073
  ARKIT_BLENDSHAPES,
11074
+ AU_TO_ARKIT,
10688
11075
  AnimationGraph,
10689
11076
  AudioChunkCoalescer,
10690
11077
  AudioEnergyAnalyzer,
@@ -10695,13 +11082,18 @@ export {
10695
11082
  ConsoleExporter,
10696
11083
  DEFAULT_ANIMATION_CONFIG,
10697
11084
  DEFAULT_LOGGING_CONFIG,
11085
+ DEFAULT_MODEL_URLS,
10698
11086
  EMOTION_NAMES,
11087
+ EMOTION_TO_AU,
10699
11088
  EMOTION_VECTOR_SIZE,
10700
11089
  EmotionController,
10701
11090
  EmotionPresets,
11091
+ EmotionResolver,
10702
11092
  EmphasisDetector,
10703
11093
  EventEmitter,
11094
+ FaceCompositor,
10704
11095
  FullFacePipeline,
11096
+ HF_CDN_URLS,
10705
11097
  INFERENCE_LATENCY_BUCKETS,
10706
11098
  InterruptionHandler,
10707
11099
  LAM_BLENDSHAPES,
@@ -10736,6 +11128,7 @@ export {
10736
11128
  calculateRMS,
10737
11129
  configureCacheLimit,
10738
11130
  configureLogging,
11131
+ configureModelUrls,
10739
11132
  configureTelemetry,
10740
11133
  createA2E,
10741
11134
  createEmotionVector,
@@ -10766,6 +11159,7 @@ export {
10766
11159
  noopLogger,
10767
11160
  preloadModels,
10768
11161
  resetLoggingConfig,
11162
+ resetModelUrls,
10769
11163
  resolveBackend,
10770
11164
  setLogLevel,
10771
11165
  setLoggingEnabled,