@omote/core 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -761,80 +761,6 @@ var A2EProcessor = class {
761
761
  }
762
762
  };
763
763
 
764
- // src/inference/BlendshapeSmoother.ts
765
- var NUM_BLENDSHAPES = 52;
766
- var BlendshapeSmoother = class {
767
- constructor(config) {
768
- /** Whether any target has been set */
769
- this._hasTarget = false;
770
- this.halflife = config?.halflife ?? 0.06;
771
- this.values = new Float32Array(NUM_BLENDSHAPES);
772
- this.velocities = new Float32Array(NUM_BLENDSHAPES);
773
- this.targets = new Float32Array(NUM_BLENDSHAPES);
774
- }
775
- /** Whether a target frame has been set (false until first setTarget call) */
776
- get hasTarget() {
777
- return this._hasTarget;
778
- }
779
- /**
780
- * Set new target frame from inference output.
781
- * Springs will converge toward these values on subsequent update() calls.
782
- */
783
- setTarget(frame) {
784
- this.targets.set(frame);
785
- this._hasTarget = true;
786
- }
787
- /**
788
- * Advance all 52 springs by `dt` seconds and return the smoothed frame.
789
- *
790
- * Call this every render frame (e.g., inside requestAnimationFrame).
791
- * Returns the internal values buffer — do NOT mutate the returned array.
792
- *
793
- * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
794
- * @returns Smoothed blendshape values (Float32Array of 52)
795
- */
796
- update(dt) {
797
- if (!this._hasTarget) {
798
- return this.values;
799
- }
800
- if (this.halflife <= 0) {
801
- this.values.set(this.targets);
802
- this.velocities.fill(0);
803
- return this.values;
804
- }
805
- const damping = Math.LN2 / this.halflife;
806
- const eydt = Math.exp(-damping * dt);
807
- for (let i = 0; i < NUM_BLENDSHAPES; i++) {
808
- const j0 = this.values[i] - this.targets[i];
809
- const j1 = this.velocities[i] + j0 * damping;
810
- this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
811
- this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
812
- this.values[i] = Math.max(0, Math.min(1, this.values[i]));
813
- }
814
- return this.values;
815
- }
816
- /**
817
- * Decay all spring targets to neutral (0).
818
- *
819
- * Call when inference stalls (no new frames for threshold duration).
820
- * The springs will smoothly close the mouth / relax the face over
821
- * the halflife period rather than freezing.
822
- */
823
- decayToNeutral() {
824
- this.targets.fill(0);
825
- }
826
- /**
827
- * Reset all state (values, velocities, targets).
828
- * Call when starting a new playback session.
829
- */
830
- reset() {
831
- this.values.fill(0);
832
- this.velocities.fill(0);
833
- this.targets.fill(0);
834
- this._hasTarget = false;
835
- }
836
- };
837
-
838
764
  // src/telemetry/exporters/console.ts
839
765
  var ConsoleExporter = class {
840
766
  constructor(options = {}) {
@@ -2852,13 +2778,6 @@ function pcm16ToFloat32(buffer) {
2852
2778
  }
2853
2779
  return float32;
2854
2780
  }
2855
- function int16ToFloat32(int16) {
2856
- const float32 = new Float32Array(int16.length);
2857
- for (let i = 0; i < int16.length; i++) {
2858
- float32[i] = int16[i] / 32768;
2859
- }
2860
- return float32;
2861
- }
2862
2781
 
2863
2782
  // src/audio/FullFacePipeline.ts
2864
2783
  var logger4 = createLogger("FullFacePipeline");
@@ -2891,16 +2810,11 @@ var FullFacePipeline = class extends EventEmitter {
2891
2810
  this.lastNewFrameTime = 0;
2892
2811
  this.lastKnownLamFrame = null;
2893
2812
  this.staleWarningEmitted = false;
2894
- // Frame loop timing (for dt calculation)
2895
- this.lastFrameLoopTime = 0;
2896
2813
  // Diagnostic logging counter
2897
2814
  this.frameLoopCount = 0;
2898
2815
  const sampleRate = options.sampleRate ?? 16e3;
2899
2816
  this.profile = options.profile ?? {};
2900
2817
  this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
2901
- this.smoother = new BlendshapeSmoother({
2902
- halflife: options.smoothingHalflife ?? 0.06
2903
- });
2904
2818
  const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
2905
2819
  const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
2906
2820
  const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
@@ -2983,9 +2897,7 @@ var FullFacePipeline = class extends EventEmitter {
2983
2897
  this.lastNewFrameTime = 0;
2984
2898
  this.lastKnownLamFrame = null;
2985
2899
  this.staleWarningEmitted = false;
2986
- this.lastFrameLoopTime = 0;
2987
2900
  this.frameLoopCount = 0;
2988
- this.smoother.reset();
2989
2901
  this.scheduler.warmup();
2990
2902
  this.startFrameLoop();
2991
2903
  this.startMonitoring();
@@ -3020,22 +2932,16 @@ var FullFacePipeline = class extends EventEmitter {
3020
2932
  /**
3021
2933
  * Start frame animation loop
3022
2934
  *
3023
- * Uses critically damped spring smoother to produce continuous output
3024
- * at render rate (60fps), even between inference batches (~30fps bursts).
3025
- * Springs interpolate toward the latest inference target, and decay
3026
- * to neutral when inference stalls.
2935
+ * Polls A2EProcessor at render rate (60fps) for the latest inference frame
2936
+ * matching the current AudioContext time. Between inference batches (~30fps
2937
+ * bursts), getFrameForTime() holds the last frame.
3027
2938
  */
3028
2939
  startFrameLoop() {
3029
- this.lastFrameLoopTime = 0;
3030
2940
  const updateFrame = () => {
3031
- const now = performance.now() / 1e3;
3032
- const dt = this.lastFrameLoopTime > 0 ? now - this.lastFrameLoopTime : 1 / 60;
3033
- this.lastFrameLoopTime = now;
3034
2941
  this.frameLoopCount++;
3035
2942
  const currentTime = this.scheduler.getCurrentTime();
3036
2943
  const lamFrame = this.processor.getFrameForTime(currentTime);
3037
2944
  if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3038
- this.smoother.setTarget(lamFrame);
3039
2945
  this.lastNewFrameTime = performance.now();
3040
2946
  this.lastKnownLamFrame = lamFrame;
3041
2947
  this.staleWarningEmitted = false;
@@ -3055,17 +2961,15 @@ var FullFacePipeline = class extends EventEmitter {
3055
2961
  currentTime: currentTime.toFixed(3),
3056
2962
  playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
3057
2963
  queuedFrames: this.processor.queuedFrameCount,
3058
- hasTarget: this.smoother.hasTarget,
3059
2964
  playbackStarted: this.playbackStarted,
3060
2965
  msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
3061
2966
  processorFill: this.processor.fillLevel.toFixed(2)
3062
2967
  });
3063
2968
  }
3064
2969
  if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3065
- this.smoother.decayToNeutral();
3066
2970
  if (!this.staleWarningEmitted) {
3067
2971
  this.staleWarningEmitted = true;
3068
- logger4.warn("A2E stalled \u2014 decaying to neutral", {
2972
+ logger4.warn("A2E stalled \u2014 no new inference frames", {
3069
2973
  staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3070
2974
  queuedFrames: this.processor.queuedFrameCount
3071
2975
  });
@@ -3104,12 +3008,10 @@ var FullFacePipeline = class extends EventEmitter {
3104
3008
  await this.scheduler.cancelAll(fadeOutMs);
3105
3009
  this.coalescer.reset();
3106
3010
  this.processor.reset();
3107
- this.smoother.reset();
3108
3011
  this.playbackStarted = false;
3109
3012
  this.lastNewFrameTime = 0;
3110
3013
  this.lastKnownLamFrame = null;
3111
3014
  this.staleWarningEmitted = false;
3112
- this.lastFrameLoopTime = 0;
3113
3015
  this.emit("playback_complete", void 0);
3114
3016
  }
3115
3017
  /**
@@ -3163,6 +3065,108 @@ var FullFacePipeline = class extends EventEmitter {
3163
3065
  }
3164
3066
  };
3165
3067
 
3068
+ // src/audio/InterruptionHandler.ts
3069
+ var InterruptionHandler = class extends EventEmitter {
3070
+ constructor(config = {}) {
3071
+ super();
3072
+ this.isSpeaking = false;
3073
+ this.speechStartTime = 0;
3074
+ this.lastSpeechTime = 0;
3075
+ this.silenceTimer = null;
3076
+ this.aiIsSpeaking = false;
3077
+ // Debouncing: only emit one interruption per speech session
3078
+ this.interruptionTriggeredThisSession = false;
3079
+ this.config = {
3080
+ vadThreshold: 0.5,
3081
+ // Silero VAD default
3082
+ minSpeechDurationMs: 200,
3083
+ // Google/Amazon barge-in standard
3084
+ silenceTimeoutMs: 500,
3085
+ // OpenAI Realtime API standard
3086
+ enabled: true,
3087
+ ...config
3088
+ };
3089
+ }
3090
+ /**
3091
+ * Process VAD result for interruption detection
3092
+ * @param vadProbability - Speech probability from VAD (0-1)
3093
+ * @param audioEnergy - Optional RMS energy for logging (default: 0)
3094
+ */
3095
+ processVADResult(vadProbability, audioEnergy = 0) {
3096
+ if (!this.config.enabled) return;
3097
+ if (vadProbability > this.config.vadThreshold) {
3098
+ this.onSpeechDetected(audioEnergy || vadProbability);
3099
+ } else {
3100
+ this.onSilenceDetected();
3101
+ }
3102
+ }
3103
+ /** Notify that AI started/stopped speaking */
3104
+ setAISpeaking(speaking) {
3105
+ this.aiIsSpeaking = speaking;
3106
+ }
3107
+ /** Enable/disable interruption detection */
3108
+ setEnabled(enabled) {
3109
+ this.config.enabled = enabled;
3110
+ if (!enabled) {
3111
+ this.reset();
3112
+ }
3113
+ }
3114
+ /** Update configuration */
3115
+ updateConfig(config) {
3116
+ this.config = { ...this.config, ...config };
3117
+ }
3118
+ /** Reset state */
3119
+ reset() {
3120
+ this.isSpeaking = false;
3121
+ this.speechStartTime = 0;
3122
+ this.lastSpeechTime = 0;
3123
+ this.interruptionTriggeredThisSession = false;
3124
+ if (this.silenceTimer) {
3125
+ clearTimeout(this.silenceTimer);
3126
+ this.silenceTimer = null;
3127
+ }
3128
+ }
3129
+ /** Get current state */
3130
+ getState() {
3131
+ return {
3132
+ isSpeaking: this.isSpeaking,
3133
+ speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
3134
+ };
3135
+ }
3136
+ onSpeechDetected(rms) {
3137
+ const now = Date.now();
3138
+ this.lastSpeechTime = now;
3139
+ if (this.silenceTimer) {
3140
+ clearTimeout(this.silenceTimer);
3141
+ this.silenceTimer = null;
3142
+ }
3143
+ if (!this.isSpeaking) {
3144
+ this.isSpeaking = true;
3145
+ this.speechStartTime = now;
3146
+ this.emit("speech.detected", { rms });
3147
+ }
3148
+ if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
3149
+ const speechDuration = now - this.speechStartTime;
3150
+ if (speechDuration >= this.config.minSpeechDurationMs) {
3151
+ this.interruptionTriggeredThisSession = true;
3152
+ this.emit("interruption.triggered", { rms, durationMs: speechDuration });
3153
+ }
3154
+ }
3155
+ }
3156
+ onSilenceDetected() {
3157
+ if (!this.isSpeaking) return;
3158
+ if (!this.silenceTimer) {
3159
+ this.silenceTimer = setTimeout(() => {
3160
+ const durationMs = this.lastSpeechTime - this.speechStartTime;
3161
+ this.isSpeaking = false;
3162
+ this.silenceTimer = null;
3163
+ this.interruptionTriggeredThisSession = false;
3164
+ this.emit("speech.ended", { durationMs });
3165
+ }, this.config.silenceTimeoutMs);
3166
+ }
3167
+ }
3168
+ };
3169
+
3166
3170
  // src/inference/kaldiFbank.ts
3167
3171
  function fft(re, im) {
3168
3172
  const n = re.length;
@@ -6995,6 +6999,80 @@ var A2EWithFallback = class {
6995
6999
  }
6996
7000
  };
6997
7001
 
7002
+ // src/inference/BlendshapeSmoother.ts
7003
+ var NUM_BLENDSHAPES = 52;
7004
+ var BlendshapeSmoother = class {
7005
+ constructor(config) {
7006
+ /** Whether any target has been set */
7007
+ this._hasTarget = false;
7008
+ this.halflife = config?.halflife ?? 0.06;
7009
+ this.values = new Float32Array(NUM_BLENDSHAPES);
7010
+ this.velocities = new Float32Array(NUM_BLENDSHAPES);
7011
+ this.targets = new Float32Array(NUM_BLENDSHAPES);
7012
+ }
7013
+ /** Whether a target frame has been set (false until first setTarget call) */
7014
+ get hasTarget() {
7015
+ return this._hasTarget;
7016
+ }
7017
+ /**
7018
+ * Set new target frame from inference output.
7019
+ * Springs will converge toward these values on subsequent update() calls.
7020
+ */
7021
+ setTarget(frame) {
7022
+ this.targets.set(frame);
7023
+ this._hasTarget = true;
7024
+ }
7025
+ /**
7026
+ * Advance all 52 springs by `dt` seconds and return the smoothed frame.
7027
+ *
7028
+ * Call this every render frame (e.g., inside requestAnimationFrame).
7029
+ * Returns the internal values buffer — do NOT mutate the returned array.
7030
+ *
7031
+ * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
7032
+ * @returns Smoothed blendshape values (Float32Array of 52)
7033
+ */
7034
+ update(dt) {
7035
+ if (!this._hasTarget) {
7036
+ return this.values;
7037
+ }
7038
+ if (this.halflife <= 0) {
7039
+ this.values.set(this.targets);
7040
+ this.velocities.fill(0);
7041
+ return this.values;
7042
+ }
7043
+ const damping = Math.LN2 / this.halflife;
7044
+ const eydt = Math.exp(-damping * dt);
7045
+ for (let i = 0; i < NUM_BLENDSHAPES; i++) {
7046
+ const j0 = this.values[i] - this.targets[i];
7047
+ const j1 = this.velocities[i] + j0 * damping;
7048
+ this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
7049
+ this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
7050
+ this.values[i] = Math.max(0, Math.min(1, this.values[i]));
7051
+ }
7052
+ return this.values;
7053
+ }
7054
+ /**
7055
+ * Decay all spring targets to neutral (0).
7056
+ *
7057
+ * Call when inference stalls (no new frames for threshold duration).
7058
+ * The springs will smoothly close the mouth / relax the face over
7059
+ * the halflife period rather than freezing.
7060
+ */
7061
+ decayToNeutral() {
7062
+ this.targets.fill(0);
7063
+ }
7064
+ /**
7065
+ * Reset all state (values, velocities, targets).
7066
+ * Call when starting a new playback session.
7067
+ */
7068
+ reset() {
7069
+ this.values.fill(0);
7070
+ this.velocities.fill(0);
7071
+ this.targets.fill(0);
7072
+ this._hasTarget = false;
7073
+ }
7074
+ };
7075
+
6998
7076
  // src/animation/audioEnergy.ts
6999
7077
  function calculateRMS(samples) {
7000
7078
  if (samples.length === 0) return 0;
@@ -8795,1214 +8873,6 @@ var EmotionController = class {
8795
8873
  }
8796
8874
  };
8797
8875
 
8798
- // src/ai/adapters/AgentCoreAdapter.ts
8799
- var AgentCoreAdapter = class extends EventEmitter {
8800
- constructor(config) {
8801
- super();
8802
- this.name = "AgentCore";
8803
- this._state = "disconnected";
8804
- this._sessionId = null;
8805
- this._isConnected = false;
8806
- // Sub-components
8807
- this.asr = null;
8808
- this.vad = null;
8809
- this.lam = null;
8810
- this.pipeline = null;
8811
- // WebSocket connection to AgentCore
8812
- this.ws = null;
8813
- this.wsReconnectAttempts = 0;
8814
- this.maxReconnectAttempts = 5;
8815
- // Audio buffers
8816
- this.audioBuffer = [];
8817
- // Conversation state
8818
- this.history = [];
8819
- this.currentConfig = null;
8820
- // Interruption handling
8821
- this.isSpeaking = false;
8822
- this.currentTtsAbortController = null;
8823
- // Auth token cache per tenant
8824
- this.tokenCache = /* @__PURE__ */ new Map();
8825
- this.agentCoreConfig = config;
8826
- this.emotionController = new EmotionController();
8827
- }
8828
- get state() {
8829
- return this._state;
8830
- }
8831
- get sessionId() {
8832
- return this._sessionId;
8833
- }
8834
- get isConnected() {
8835
- return this._isConnected;
8836
- }
8837
- /**
8838
- * Connect to AgentCore with session configuration
8839
- */
8840
- async connect(config) {
8841
- this.currentConfig = config;
8842
- this._sessionId = config.sessionId;
8843
- try {
8844
- const authToken = await this.getAuthToken(config.tenant);
8845
- await Promise.all([
8846
- this.initASR(),
8847
- this.initLAM()
8848
- ]);
8849
- await this.connectWebSocket(authToken, config);
8850
- this._isConnected = true;
8851
- this.setState("idle");
8852
- this.emit("connection.opened", { sessionId: this._sessionId, adapter: this.name });
8853
- } catch (error) {
8854
- this.setState("error");
8855
- this.emit("connection.error", {
8856
- error,
8857
- recoverable: true
8858
- });
8859
- throw error;
8860
- }
8861
- }
8862
- /**
8863
- * Disconnect and cleanup
8864
- */
8865
- async disconnect() {
8866
- this.currentTtsAbortController?.abort();
8867
- if (this.pipeline) {
8868
- this.pipeline.dispose();
8869
- this.pipeline = null;
8870
- }
8871
- if (this.ws) {
8872
- this.ws.close(1e3, "Client disconnect");
8873
- this.ws = null;
8874
- }
8875
- await Promise.all([
8876
- this.asr?.dispose(),
8877
- this.vad?.dispose(),
8878
- this.lam?.dispose()
8879
- ]);
8880
- this._isConnected = false;
8881
- this.setState("disconnected");
8882
- this.emit("connection.closed", { reason: "Client disconnect" });
8883
- }
8884
- /**
8885
- * Push user audio for processing
8886
- */
8887
- pushAudio(audio) {
8888
- if (!this._isConnected) return;
8889
- if (this.isSpeaking) {
8890
- this.detectVoiceActivity(audio).then((hasVoiceActivity) => {
8891
- if (hasVoiceActivity) {
8892
- this.interrupt();
8893
- }
8894
- }).catch((error) => {
8895
- console.error("[AgentCore] VAD error during interruption detection:", error);
8896
- });
8897
- }
8898
- const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
8899
- this.audioBuffer.push(float32);
8900
- this.scheduleTranscription();
8901
- }
8902
- /**
8903
- * Send text directly to AgentCore
8904
- */
8905
- async sendText(text) {
8906
- if (!this._isConnected || !this.ws) {
8907
- throw new Error("Not connected to AgentCore");
8908
- }
8909
- this.addToHistory({
8910
- role: "user",
8911
- content: text,
8912
- timestamp: Date.now()
8913
- });
8914
- this.setState("thinking");
8915
- this.emit("ai.thinking.start", { timestamp: Date.now() });
8916
- this.ws.send(JSON.stringify({
8917
- type: "user_message",
8918
- sessionId: this._sessionId,
8919
- content: text,
8920
- context: {
8921
- history: this.history.slice(-10),
8922
- // Last 10 messages
8923
- emotion: Array.from(this.emotionController.emotion)
8924
- }
8925
- }));
8926
- }
8927
- /**
8928
- * Interrupt current AI response
8929
- */
8930
- interrupt() {
8931
- if (!this.isSpeaking) return;
8932
- this.emit("interruption.detected", { timestamp: Date.now() });
8933
- this.currentTtsAbortController?.abort();
8934
- this.currentTtsAbortController = null;
8935
- if (this.ws?.readyState === WebSocket.OPEN) {
8936
- this.ws.send(JSON.stringify({
8937
- type: "interrupt",
8938
- sessionId: this._sessionId,
8939
- timestamp: Date.now()
8940
- }));
8941
- }
8942
- this.isSpeaking = false;
8943
- this.setState("listening");
8944
- this.emit("interruption.handled", { timestamp: Date.now(), action: "stop" });
8945
- }
8946
- getHistory() {
8947
- return [...this.history];
8948
- }
8949
- clearHistory() {
8950
- this.history = [];
8951
- this.emit("memory.updated", { messageCount: 0 });
8952
- }
8953
- async healthCheck() {
8954
- if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
8955
- return false;
8956
- }
8957
- return new Promise((resolve) => {
8958
- const timeout = setTimeout(() => resolve(false), 5e3);
8959
- const handler = (event) => {
8960
- try {
8961
- const data = JSON.parse(event.data);
8962
- if (data.type === "pong") {
8963
- clearTimeout(timeout);
8964
- this.ws?.removeEventListener("message", handler);
8965
- resolve(true);
8966
- }
8967
- } catch {
8968
- }
8969
- };
8970
- this.ws?.addEventListener("message", handler);
8971
- this.ws?.send(JSON.stringify({ type: "ping" }));
8972
- });
8973
- }
8974
- // ==================== Private Methods ====================
8975
- setState(state) {
8976
- const previousState = this._state;
8977
- this._state = state;
8978
- this.emit("state.change", { state, previousState });
8979
- }
8980
- async getAuthToken(tenant) {
8981
- const cached = this.tokenCache.get(tenant.tenantId);
8982
- if (cached && cached.expiresAt > Date.now() + 6e4) {
8983
- return cached.token;
8984
- }
8985
- if (tenant.credentials.authToken) {
8986
- return tenant.credentials.authToken;
8987
- }
8988
- const endpoint = this.agentCoreConfig.endpoint;
8989
- if (endpoint.startsWith("ws://") || endpoint.includes("localhost")) {
8990
- return "local-dev-token";
8991
- }
8992
- const httpEndpoint = endpoint.replace("wss://", "https://").replace("ws://", "http://");
8993
- const response = await fetch(`${httpEndpoint}/auth/token`, {
8994
- method: "POST",
8995
- headers: { "Content-Type": "application/json" },
8996
- body: JSON.stringify({
8997
- tenantId: tenant.tenantId,
8998
- apiKey: tenant.credentials.apiKey
8999
- })
9000
- });
9001
- if (!response.ok) {
9002
- throw new Error(`Auth failed: ${response.statusText}`);
9003
- }
9004
- const { token, expiresIn } = await response.json();
9005
- this.tokenCache.set(tenant.tenantId, {
9006
- token,
9007
- expiresAt: Date.now() + expiresIn * 1e3
9008
- });
9009
- return token;
9010
- }
9011
- async initASR() {
9012
- await Promise.all([
9013
- // SenseVoice ASR
9014
- (async () => {
9015
- this.asr = new SenseVoiceInference({
9016
- modelUrl: "/models/sensevoice/model.int8.onnx",
9017
- language: "auto"
9018
- });
9019
- await this.asr.load();
9020
- })(),
9021
- // Silero VAD for accurate voice activity detection
9022
- (async () => {
9023
- this.vad = new SileroVADInference({
9024
- modelUrl: "/models/silero-vad.onnx",
9025
- backend: "webgpu",
9026
- sampleRate: 16e3,
9027
- threshold: 0.5
9028
- });
9029
- await this.vad.load();
9030
- })()
9031
- ]);
9032
- }
9033
- async initLAM() {
9034
- const lamUrl = this.agentCoreConfig.models?.lamUrl || "/models/unified_wav2vec2_asr_a2e.onnx";
9035
- this.lam = new Wav2Vec2Inference({
9036
- modelUrl: lamUrl,
9037
- backend: "auto"
9038
- });
9039
- await this.lam.load();
9040
- await this.initPipeline();
9041
- }
9042
- async initPipeline() {
9043
- if (!this.lam) {
9044
- throw new Error("LAM must be initialized before pipeline");
9045
- }
9046
- this.pipeline = new FullFacePipeline({
9047
- lam: this.lam,
9048
- sampleRate: 16e3,
9049
- chunkTargetMs: 200
9050
- });
9051
- await this.pipeline.initialize();
9052
- this.pipeline.on("full_frame_ready", (fullFrame) => {
9053
- const frame = fullFrame.blendshapes;
9054
- this.emit("animation", {
9055
- blendshapes: frame,
9056
- get: (name) => {
9057
- const idx = LAM_BLENDSHAPES.indexOf(name);
9058
- return idx >= 0 ? frame[idx] : 0;
9059
- },
9060
- timestamp: Date.now(),
9061
- // Wall clock for client-side logging only
9062
- inferenceMs: 0
9063
- // Pipeline handles LAM inference asynchronously
9064
- });
9065
- });
9066
- this.pipeline.on("playback_complete", () => {
9067
- this.isSpeaking = false;
9068
- this.setState("idle");
9069
- this.emit("audio.output.end", { durationMs: 0 });
9070
- });
9071
- this.pipeline.on("error", (error) => {
9072
- console.error("[AgentCore] Pipeline error:", error);
9073
- this.emit("connection.error", {
9074
- error,
9075
- recoverable: true
9076
- });
9077
- });
9078
- }
9079
- async connectWebSocket(authToken, config) {
9080
- return new Promise((resolve, reject) => {
9081
- const wsUrl = new URL(`${this.agentCoreConfig.endpoint.replace("http", "ws")}/ws`);
9082
- wsUrl.searchParams.set("sessionId", config.sessionId);
9083
- wsUrl.searchParams.set("characterId", config.tenant.characterId);
9084
- this.ws = new WebSocket(wsUrl.toString());
9085
- this.ws.onopen = () => {
9086
- this.ws?.send(JSON.stringify({
9087
- type: "auth",
9088
- token: authToken,
9089
- tenantId: config.tenant.tenantId,
9090
- systemPrompt: config.systemPrompt
9091
- }));
9092
- };
9093
- this.ws.onmessage = (event) => {
9094
- try {
9095
- this.handleAgentCoreMessage(JSON.parse(event.data));
9096
- } catch {
9097
- }
9098
- };
9099
- this.ws.onerror = () => {
9100
- reject(new Error("WebSocket connection failed"));
9101
- };
9102
- this.ws.onclose = (event) => {
9103
- this.handleDisconnect(event);
9104
- };
9105
- const authTimeout = setTimeout(() => {
9106
- reject(new Error("Auth timeout"));
9107
- }, 1e4);
9108
- const authHandler = (event) => {
9109
- try {
9110
- const data = JSON.parse(event.data);
9111
- if (data.type === "auth_success") {
9112
- clearTimeout(authTimeout);
9113
- this.ws?.removeEventListener("message", authHandler);
9114
- resolve();
9115
- } else if (data.type === "auth_failed") {
9116
- clearTimeout(authTimeout);
9117
- reject(new Error(data.message));
9118
- }
9119
- } catch {
9120
- }
9121
- };
9122
- this.ws.addEventListener("message", authHandler);
9123
- });
9124
- }
9125
- handleAgentCoreMessage(data) {
9126
- switch (data.type) {
9127
- case "response_start":
9128
- this.setState("speaking");
9129
- this.isSpeaking = true;
9130
- this.emit("ai.response.start", {
9131
- text: data.text,
9132
- emotion: data.emotion
9133
- });
9134
- if (data.emotion) {
9135
- this.emotionController.transitionTo(
9136
- { [data.emotion]: 0.7 },
9137
- 300
9138
- );
9139
- }
9140
- if (this.pipeline) {
9141
- this.pipeline.start();
9142
- }
9143
- break;
9144
- case "response_chunk":
9145
- this.emit("ai.response.chunk", {
9146
- text: data.text,
9147
- isLast: data.isLast
9148
- });
9149
- break;
9150
- case "audio_chunk":
9151
- if (data.audio && this.pipeline) {
9152
- const audioData = this.base64ToArrayBuffer(data.audio);
9153
- const uint8 = new Uint8Array(audioData);
9154
- this.pipeline.onAudioChunk(uint8).catch((error) => {
9155
- console.error("[AgentCore] Pipeline chunk error:", error);
9156
- });
9157
- }
9158
- break;
9159
- case "audio_end":
9160
- if (this.pipeline) {
9161
- this.pipeline.end().catch((error) => {
9162
- console.error("[AgentCore] Pipeline end error:", error);
9163
- });
9164
- }
9165
- break;
9166
- case "response_end":
9167
- this.addToHistory({
9168
- role: "assistant",
9169
- content: data.fullText,
9170
- timestamp: Date.now(),
9171
- emotion: data.emotion
9172
- });
9173
- this.emit("ai.response.end", {
9174
- fullText: data.fullText,
9175
- durationMs: data.durationMs || 0
9176
- });
9177
- break;
9178
- case "memory_updated":
9179
- this.emit("memory.updated", {
9180
- messageCount: data.messageCount,
9181
- tokenCount: data.tokenCount
9182
- });
9183
- break;
9184
- case "error":
9185
- this.emit("connection.error", {
9186
- error: new Error(data.message),
9187
- recoverable: data.recoverable ?? false
9188
- });
9189
- break;
9190
- }
9191
- }
9192
- scheduleTranscription() {
9193
- if (this.audioBuffer.length === 0) return;
9194
- const totalLength = this.audioBuffer.reduce((sum2, buf) => sum2 + buf.length, 0);
9195
- if (totalLength < 4e3) return;
9196
- const audio = new Float32Array(totalLength);
9197
- let offset = 0;
9198
- for (const buf of this.audioBuffer) {
9199
- audio.set(buf, offset);
9200
- offset += buf.length;
9201
- }
9202
- this.audioBuffer = [];
9203
- let sum = 0;
9204
- for (let i = 0; i < audio.length; i++) {
9205
- sum += audio[i] * audio[i];
9206
- }
9207
- const rms = Math.sqrt(sum / audio.length);
9208
- if (rms < 0.01) {
9209
- console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
9210
- return;
9211
- }
9212
- if (this.asr) {
9213
- this.setState("listening");
9214
- this.emit("user.speech.start", { timestamp: Date.now() });
9215
- this.asr.transcribe(audio).then((result) => {
9216
- this.emit("user.transcript.final", {
9217
- text: result.text,
9218
- confidence: 1
9219
- });
9220
- this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
9221
- const cleanText = result.text.trim();
9222
- if (cleanText) {
9223
- this.sendText(cleanText).catch((error) => {
9224
- console.error("[AgentCore] Send text error:", error);
9225
- });
9226
- }
9227
- }).catch((error) => {
9228
- console.error("[AgentCore] Transcription error:", error);
9229
- });
9230
- }
9231
- }
9232
- // REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
9233
- // The pipeline manages audio scheduling, LAM inference, and frame synchronization
9234
- // Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
9235
- /**
9236
- * Detect voice activity using Silero VAD
9237
- * Falls back to simple RMS if VAD not available
9238
- */
9239
- async detectVoiceActivity(audio) {
9240
- const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
9241
- if (this.vad) {
9242
- const chunkSize = this.vad.getChunkSize();
9243
- for (let i = 0; i + chunkSize <= float32.length; i += chunkSize) {
9244
- const chunk = float32.slice(i, i + chunkSize);
9245
- const result = await this.vad.process(chunk);
9246
- if (result.isSpeech) {
9247
- return true;
9248
- }
9249
- }
9250
- return false;
9251
- }
9252
- let sum = 0;
9253
- for (let i = 0; i < float32.length; i++) {
9254
- sum += float32[i] * float32[i];
9255
- }
9256
- const rms = Math.sqrt(sum / float32.length);
9257
- return rms > 0.02;
9258
- }
9259
- base64ToArrayBuffer(base64) {
9260
- const binaryString = atob(base64);
9261
- const bytes = new Uint8Array(binaryString.length);
9262
- for (let i = 0; i < binaryString.length; i++) {
9263
- bytes[i] = binaryString.charCodeAt(i);
9264
- }
9265
- return bytes.buffer;
9266
- }
9267
- addToHistory(message) {
9268
- this.history.push(message);
9269
- this.emit("memory.updated", { messageCount: this.history.length });
9270
- }
9271
- handleDisconnect(event) {
9272
- this._isConnected = false;
9273
- if (event.code !== 1e3) {
9274
- if (this.wsReconnectAttempts < this.maxReconnectAttempts) {
9275
- this.wsReconnectAttempts++;
9276
- setTimeout(() => {
9277
- if (this.currentConfig) {
9278
- this.connect(this.currentConfig).catch(() => {
9279
- });
9280
- }
9281
- }, Math.pow(2, this.wsReconnectAttempts) * 1e3);
9282
- } else {
9283
- this.setState("error");
9284
- this.emit("connection.error", {
9285
- error: new Error("Max reconnection attempts reached"),
9286
- recoverable: false
9287
- });
9288
- }
9289
- }
9290
- this.emit("connection.closed", { reason: event.reason || "Connection closed" });
9291
- }
9292
- };
9293
-
9294
- // src/ai/orchestration/ConversationOrchestrator.ts
9295
- var ConversationSessionImpl = class {
9296
- constructor(config, adapter) {
9297
- this._history = [];
9298
- this._context = /* @__PURE__ */ new Map();
9299
- this.sessionId = config.sessionId;
9300
- this._config = config;
9301
- this._adapter = adapter;
9302
- this.createdAt = Date.now();
9303
- this._lastActivityAt = Date.now();
9304
- this._emotionController = new EmotionController();
9305
- if (config.emotion) {
9306
- this._emotionController.setPreset(config.emotion);
9307
- }
9308
- }
9309
- get adapter() {
9310
- return this._adapter;
9311
- }
9312
- get config() {
9313
- return this._config;
9314
- }
9315
- get state() {
9316
- return this._adapter.state;
9317
- }
9318
- get history() {
9319
- return [...this._history];
9320
- }
9321
- get emotion() {
9322
- return {};
9323
- }
9324
- get lastActivityAt() {
9325
- return this._lastActivityAt;
9326
- }
9327
- async start() {
9328
- await this._adapter.connect(this._config);
9329
- this._lastActivityAt = Date.now();
9330
- }
9331
- async end() {
9332
- await this._adapter.disconnect();
9333
- }
9334
- pushAudio(audio) {
9335
- this._adapter.pushAudio(audio);
9336
- this._lastActivityAt = Date.now();
9337
- }
9338
- async sendText(text) {
9339
- await this._adapter.sendText(text);
9340
- this._lastActivityAt = Date.now();
9341
- }
9342
- interrupt() {
9343
- this._adapter.interrupt();
9344
- this._lastActivityAt = Date.now();
9345
- }
9346
- setEmotion(emotion) {
9347
- this._emotionController.set(emotion);
9348
- }
9349
- addContext(key, value) {
9350
- this._context.set(key, value);
9351
- }
9352
- removeContext(key) {
9353
- this._context.delete(key);
9354
- }
9355
- getContext() {
9356
- return Object.fromEntries(this._context);
9357
- }
9358
- export() {
9359
- return {
9360
- sessionId: this.sessionId,
9361
- tenantId: this._config.tenant.tenantId,
9362
- characterId: this._config.tenant.characterId,
9363
- history: this._history,
9364
- context: Object.fromEntries(this._context),
9365
- emotion: this.emotion,
9366
- createdAt: this.createdAt,
9367
- lastActivityAt: this._lastActivityAt
9368
- };
9369
- }
9370
- import(snapshot) {
9371
- this._history = [...snapshot.history];
9372
- this._context = new Map(Object.entries(snapshot.context));
9373
- this._lastActivityAt = snapshot.lastActivityAt;
9374
- }
9375
- syncHistory() {
9376
- this._history = this._adapter.getHistory();
9377
- }
9378
- };
9379
- var ConversationOrchestrator = class extends EventEmitter {
9380
- constructor(config) {
9381
- super();
9382
- // Sessions per tenant
9383
- this.sessions = /* @__PURE__ */ new Map();
9384
- // Tenant configurations
9385
- this.tenants = /* @__PURE__ */ new Map();
9386
- // Health monitoring
9387
- this.healthCheckInterval = null;
9388
- this.HEALTH_CHECK_INTERVAL_MS = 3e4;
9389
- this.config = {
9390
- connectionTimeoutMs: 5e3,
9391
- maxRetries: 3,
9392
- ...config
9393
- };
9394
- this.adapter = new AgentCoreAdapter(config.adapter);
9395
- }
9396
- /**
9397
- * Register a tenant
9398
- */
9399
- registerTenant(tenant) {
9400
- this.tenants.set(tenant.tenantId, tenant);
9401
- }
9402
- /**
9403
- * Unregister a tenant
9404
- */
9405
- unregisterTenant(tenantId) {
9406
- this.tenants.delete(tenantId);
9407
- }
9408
- /**
9409
- * Get tenant config
9410
- */
9411
- getTenant(tenantId) {
9412
- return this.tenants.get(tenantId);
9413
- }
9414
- /**
9415
- * Create a new conversation session for a tenant
9416
- */
9417
- async createSession(tenantId, options = {}) {
9418
- const tenant = this.tenants.get(tenantId);
9419
- if (!tenant) {
9420
- throw new Error(`Tenant not found: ${tenantId}`);
9421
- }
9422
- const sessionId = options.sessionId || this.generateSessionId();
9423
- const sessionConfig = {
9424
- sessionId,
9425
- tenant,
9426
- systemPrompt: options.systemPrompt,
9427
- voice: options.voice,
9428
- emotion: options.emotion,
9429
- language: options.language
9430
- };
9431
- const session = new ConversationSessionImpl(sessionConfig, this.adapter);
9432
- this.sessions.set(sessionId, session);
9433
- this.forwardAdapterEvents(this.adapter, sessionId);
9434
- await session.start();
9435
- this.emit("session.created", { sessionId, tenantId });
9436
- return session;
9437
- }
9438
- /**
9439
- * End a session
9440
- */
9441
- async endSession(sessionId) {
9442
- const session = this.sessions.get(sessionId);
9443
- if (session) {
9444
- await session.end();
9445
- this.sessions.delete(sessionId);
9446
- this.emit("session.ended", { sessionId, reason: "Client requested" });
9447
- }
9448
- }
9449
- /**
9450
- * Get session by ID
9451
- */
9452
- getSession(sessionId) {
9453
- return this.sessions.get(sessionId);
9454
- }
9455
- /**
9456
- * Get all sessions for a tenant
9457
- */
9458
- getTenantSessions(tenantId) {
9459
- return Array.from(this.sessions.values()).filter((s) => s.config.tenant.tenantId === tenantId);
9460
- }
9461
- /**
9462
- * Start health monitoring
9463
- */
9464
- startHealthMonitoring() {
9465
- if (this.healthCheckInterval) return;
9466
- this.healthCheckInterval = setInterval(async () => {
9467
- await this.performHealthCheck();
9468
- }, this.HEALTH_CHECK_INTERVAL_MS);
9469
- }
9470
- /**
9471
- * Stop health monitoring
9472
- */
9473
- stopHealthMonitoring() {
9474
- if (this.healthCheckInterval) {
9475
- clearInterval(this.healthCheckInterval);
9476
- this.healthCheckInterval = null;
9477
- }
9478
- }
9479
- /**
9480
- * Dispose all resources
9481
- */
9482
- async dispose() {
9483
- this.stopHealthMonitoring();
9484
- const endPromises = Array.from(this.sessions.values()).map((s) => s.end());
9485
- await Promise.all(endPromises);
9486
- this.sessions.clear();
9487
- await this.adapter.disconnect();
9488
- }
9489
- // ==================== Private Methods ====================
9490
- generateSessionId() {
9491
- return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
9492
- }
9493
- forwardAdapterEvents(adapter, sessionId) {
9494
- const events = [
9495
- "state.change",
9496
- "user.speech.start",
9497
- "user.speech.end",
9498
- "user.transcript.partial",
9499
- "user.transcript.final",
9500
- "ai.thinking.start",
9501
- "ai.response.start",
9502
- "ai.response.chunk",
9503
- "ai.response.end",
9504
- "audio.output.chunk",
9505
- "audio.output.end",
9506
- "animation",
9507
- "memory.updated",
9508
- "connection.error",
9509
- "interruption.detected",
9510
- "interruption.handled"
9511
- ];
9512
- for (const event of events) {
9513
- adapter.on(event, (data) => {
9514
- const eventData = data;
9515
- this.emit(event, { ...eventData, sessionId });
9516
- });
9517
- }
9518
- }
9519
- async performHealthCheck() {
9520
- try {
9521
- await this.adapter.healthCheck();
9522
- } catch {
9523
- }
9524
- }
9525
- };
9526
-
9527
- // src/ai/tenancy/TenantManager.ts
9528
- var _TenantManager = class _TenantManager {
9529
- constructor() {
9530
- this.tenants = /* @__PURE__ */ new Map();
9531
- this.quotas = /* @__PURE__ */ new Map();
9532
- this.usage = /* @__PURE__ */ new Map();
9533
- this.tokenRefreshCallbacks = /* @__PURE__ */ new Map();
9534
- }
9535
- /**
9536
- * Register a tenant with quota
9537
- */
9538
- register(tenant, quota = _TenantManager.DEFAULT_QUOTA, tokenRefreshCallback) {
9539
- this.tenants.set(tenant.tenantId, tenant);
9540
- this.quotas.set(tenant.tenantId, quota);
9541
- this.usage.set(tenant.tenantId, {
9542
- currentSessions: 0,
9543
- requestsThisMinute: 0,
9544
- tokensUsed: 0,
9545
- audioMinutesToday: 0,
9546
- lastMinuteReset: Date.now(),
9547
- lastDailyReset: Date.now()
9548
- });
9549
- if (tokenRefreshCallback) {
9550
- this.tokenRefreshCallbacks.set(tenant.tenantId, tokenRefreshCallback);
9551
- }
9552
- }
9553
- /**
9554
- * Unregister a tenant
9555
- */
9556
- unregister(tenantId) {
9557
- this.tenants.delete(tenantId);
9558
- this.quotas.delete(tenantId);
9559
- this.usage.delete(tenantId);
9560
- this.tokenRefreshCallbacks.delete(tenantId);
9561
- }
9562
- /**
9563
- * Get tenant config
9564
- */
9565
- get(tenantId) {
9566
- return this.tenants.get(tenantId);
9567
- }
9568
- /**
9569
- * Check if tenant exists
9570
- */
9571
- has(tenantId) {
9572
- return this.tenants.has(tenantId);
9573
- }
9574
- /**
9575
- * Get all tenant IDs
9576
- */
9577
- getTenantIds() {
9578
- return Array.from(this.tenants.keys());
9579
- }
9580
- /**
9581
- * Check if tenant can create new session
9582
- */
9583
- canCreateSession(tenantId) {
9584
- const quota = this.quotas.get(tenantId);
9585
- const usage = this.usage.get(tenantId);
9586
- if (!quota || !usage) return false;
9587
- return usage.currentSessions < quota.maxSessions;
9588
- }
9589
- /**
9590
- * Check if tenant can make request
9591
- */
9592
- canMakeRequest(tenantId) {
9593
- const quota = this.quotas.get(tenantId);
9594
- const usage = this.usage.get(tenantId);
9595
- if (!quota || !usage) return false;
9596
- this.checkMinuteReset(tenantId);
9597
- return usage.requestsThisMinute < quota.requestsPerMinute;
9598
- }
9599
- /**
9600
- * Check if tenant can use audio
9601
- */
9602
- canUseAudio(tenantId, minutes) {
9603
- const quota = this.quotas.get(tenantId);
9604
- const usage = this.usage.get(tenantId);
9605
- if (!quota || !usage) return false;
9606
- this.checkDailyReset(tenantId);
9607
- return usage.audioMinutesToday + minutes <= quota.maxAudioMinutesPerDay;
9608
- }
9609
- /**
9610
- * Increment session count
9611
- */
9612
- incrementSessions(tenantId) {
9613
- const usage = this.usage.get(tenantId);
9614
- if (usage) {
9615
- usage.currentSessions++;
9616
- }
9617
- }
9618
- /**
9619
- * Decrement session count
9620
- */
9621
- decrementSessions(tenantId) {
9622
- const usage = this.usage.get(tenantId);
9623
- if (usage && usage.currentSessions > 0) {
9624
- usage.currentSessions--;
9625
- }
9626
- }
9627
- /**
9628
- * Record a request
9629
- */
9630
- recordRequest(tenantId) {
9631
- const usage = this.usage.get(tenantId);
9632
- if (usage) {
9633
- this.checkMinuteReset(tenantId);
9634
- usage.requestsThisMinute++;
9635
- }
9636
- }
9637
- /**
9638
- * Record token usage
9639
- */
9640
- recordTokens(tenantId, tokens) {
9641
- const usage = this.usage.get(tenantId);
9642
- if (usage) {
9643
- usage.tokensUsed += tokens;
9644
- }
9645
- }
9646
- /**
9647
- * Record audio usage
9648
- */
9649
- recordAudioMinutes(tenantId, minutes) {
9650
- const usage = this.usage.get(tenantId);
9651
- if (usage) {
9652
- this.checkDailyReset(tenantId);
9653
- usage.audioMinutesToday += minutes;
9654
- }
9655
- }
9656
- /**
9657
- * Get fresh auth token for tenant
9658
- */
9659
- async getAuthToken(tenantId) {
9660
- const tenant = this.tenants.get(tenantId);
9661
- if (!tenant) {
9662
- throw new Error(`Tenant not found: ${tenantId}`);
9663
- }
9664
- const callback = this.tokenRefreshCallbacks.get(tenantId);
9665
- if (callback) {
9666
- const token = await callback();
9667
- tenant.credentials.authToken = token;
9668
- return token;
9669
- }
9670
- if (tenant.credentials.authToken) {
9671
- return tenant.credentials.authToken;
9672
- }
9673
- throw new Error(`No auth token available for tenant: ${tenantId}`);
9674
- }
9675
- /**
9676
- * Update tenant credentials
9677
- */
9678
- updateCredentials(tenantId, credentials) {
9679
- const tenant = this.tenants.get(tenantId);
9680
- if (tenant) {
9681
- tenant.credentials = { ...tenant.credentials, ...credentials };
9682
- }
9683
- }
9684
- /**
9685
- * Get usage stats for tenant
9686
- */
9687
- getUsage(tenantId) {
9688
- return this.usage.get(tenantId);
9689
- }
9690
- /**
9691
- * Get quota for tenant
9692
- */
9693
- getQuota(tenantId) {
9694
- return this.quotas.get(tenantId);
9695
- }
9696
- /**
9697
- * Update quota for tenant
9698
- */
9699
- updateQuota(tenantId, quota) {
9700
- const existing = this.quotas.get(tenantId);
9701
- if (existing) {
9702
- this.quotas.set(tenantId, { ...existing, ...quota });
9703
- }
9704
- }
9705
- /**
9706
- * Reset all usage stats for a tenant
9707
- */
9708
- resetUsage(tenantId) {
9709
- const usage = this.usage.get(tenantId);
9710
- if (usage) {
9711
- usage.requestsThisMinute = 0;
9712
- usage.tokensUsed = 0;
9713
- usage.audioMinutesToday = 0;
9714
- usage.lastMinuteReset = Date.now();
9715
- usage.lastDailyReset = Date.now();
9716
- }
9717
- }
9718
- // ==================== Private Methods ====================
9719
- checkMinuteReset(tenantId) {
9720
- const usage = this.usage.get(tenantId);
9721
- if (!usage) return;
9722
- const now = Date.now();
9723
- if (now - usage.lastMinuteReset >= 6e4) {
9724
- usage.requestsThisMinute = 0;
9725
- usage.lastMinuteReset = now;
9726
- }
9727
- }
9728
- checkDailyReset(tenantId) {
9729
- const usage = this.usage.get(tenantId);
9730
- if (!usage) return;
9731
- const now = Date.now();
9732
- const MS_PER_DAY = 24 * 60 * 60 * 1e3;
9733
- if (now - usage.lastDailyReset >= MS_PER_DAY) {
9734
- usage.audioMinutesToday = 0;
9735
- usage.lastDailyReset = now;
9736
- }
9737
- }
9738
- };
9739
- /**
9740
- * Default quota for new tenants
9741
- */
9742
- _TenantManager.DEFAULT_QUOTA = {
9743
- maxSessions: 10,
9744
- requestsPerMinute: 60,
9745
- maxTokensPerConversation: 1e5,
9746
- maxAudioMinutesPerDay: 60
9747
- };
9748
- var TenantManager = _TenantManager;
9749
-
9750
- // src/ai/utils/AudioSyncManager.ts
9751
- var AudioSyncManager = class extends EventEmitter {
9752
- constructor(config = {}) {
9753
- super();
9754
- this.bufferPosition = 0;
9755
- this.playbackQueue = [];
9756
- this.isPlaying = false;
9757
- this.audioContext = null;
9758
- this.playbackStartTime = 0;
9759
- this.samplesPlayed = 0;
9760
- this.config = {
9761
- sampleRate: 16e3,
9762
- bufferSize: 16640,
9763
- overlapSize: 4160,
9764
- maxDriftMs: 100,
9765
- ...config
9766
- };
9767
- this.audioBuffer = new Float32Array(this.config.bufferSize);
9768
- }
9769
- /**
9770
- * Initialize audio context
9771
- */
9772
- async initialize() {
9773
- if (!this.audioContext) {
9774
- this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
9775
- }
9776
- if (this.audioContext.state === "suspended") {
9777
- await this.audioContext.resume();
9778
- }
9779
- }
9780
- /**
9781
- * Push audio chunk for processing and playback
9782
- */
9783
- pushAudio(audio) {
9784
- this.playbackQueue.push(audio);
9785
- this.bufferForInference(audio);
9786
- if (!this.isPlaying && this.playbackQueue.length > 0) {
9787
- this.startPlayback();
9788
- }
9789
- }
9790
- /**
9791
- * Buffer audio for inference
9792
- */
9793
- bufferForInference(audio) {
9794
- let offset = 0;
9795
- while (offset < audio.length) {
9796
- const remaining = this.config.bufferSize - this.bufferPosition;
9797
- const toCopy = Math.min(remaining, audio.length - offset);
9798
- this.audioBuffer.set(audio.subarray(offset, offset + toCopy), this.bufferPosition);
9799
- this.bufferPosition += toCopy;
9800
- offset += toCopy;
9801
- if (this.bufferPosition >= this.config.bufferSize) {
9802
- this.emit("buffer.ready", { audio: new Float32Array(this.audioBuffer) });
9803
- const overlapStart = this.config.bufferSize - this.config.overlapSize;
9804
- this.audioBuffer.copyWithin(0, overlapStart);
9805
- this.bufferPosition = this.config.overlapSize;
9806
- }
9807
- }
9808
- }
9809
- /**
9810
- * Start audio playback
9811
- */
9812
- async startPlayback() {
9813
- if (!this.audioContext || this.isPlaying) return;
9814
- this.isPlaying = true;
9815
- this.playbackStartTime = this.audioContext.currentTime;
9816
- this.samplesPlayed = 0;
9817
- this.emit("playback.start", {});
9818
- await this.processPlaybackQueue();
9819
- }
9820
- /**
9821
- * Process playback queue
9822
- */
9823
- async processPlaybackQueue() {
9824
- if (!this.audioContext) return;
9825
- while (this.playbackQueue.length > 0) {
9826
- const audio = this.playbackQueue.shift();
9827
- const buffer = this.audioContext.createBuffer(1, audio.length, this.config.sampleRate);
9828
- buffer.copyToChannel(audio, 0);
9829
- const source = this.audioContext.createBufferSource();
9830
- source.buffer = buffer;
9831
- source.connect(this.audioContext.destination);
9832
- const playTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
9833
- source.start(playTime);
9834
- this.samplesPlayed += audio.length;
9835
- this.checkDrift();
9836
- await new Promise((resolve) => {
9837
- source.onended = resolve;
9838
- });
9839
- }
9840
- this.isPlaying = false;
9841
- this.emit("playback.end", {});
9842
- }
9843
- /**
9844
- * Check for audio/animation drift
9845
- */
9846
- checkDrift() {
9847
- if (!this.audioContext) return;
9848
- const expectedTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
9849
- const actualTime = this.audioContext.currentTime;
9850
- const driftMs = (actualTime - expectedTime) * 1e3;
9851
- if (Math.abs(driftMs) > this.config.maxDriftMs) {
9852
- this.emit("sync.drift", { driftMs });
9853
- }
9854
- }
9855
- /**
9856
- * Clear playback queue
9857
- */
9858
- clearQueue() {
9859
- this.playbackQueue = [];
9860
- this.bufferPosition = 0;
9861
- this.audioBuffer.fill(0);
9862
- }
9863
- /**
9864
- * Stop playback
9865
- */
9866
- stop() {
9867
- this.clearQueue();
9868
- this.isPlaying = false;
9869
- }
9870
- /**
9871
- * Get current playback position in seconds
9872
- */
9873
- getPlaybackPosition() {
9874
- if (!this.audioContext) return 0;
9875
- return this.audioContext.currentTime - this.playbackStartTime;
9876
- }
9877
- /**
9878
- * Check if currently playing
9879
- */
9880
- getIsPlaying() {
9881
- return this.isPlaying;
9882
- }
9883
- /**
9884
- * Dispose resources
9885
- */
9886
- dispose() {
9887
- this.stop();
9888
- this.audioContext?.close();
9889
- this.audioContext = null;
9890
- }
9891
- };
9892
-
9893
- // src/ai/utils/InterruptionHandler.ts
9894
- var InterruptionHandler = class extends EventEmitter {
9895
- constructor(config = {}) {
9896
- super();
9897
- this.isSpeaking = false;
9898
- this.speechStartTime = 0;
9899
- this.lastSpeechTime = 0;
9900
- this.silenceTimer = null;
9901
- this.aiIsSpeaking = false;
9902
- // Debouncing: only emit one interruption per speech session
9903
- this.interruptionTriggeredThisSession = false;
9904
- this.config = {
9905
- vadThreshold: 0.5,
9906
- // Silero VAD default
9907
- minSpeechDurationMs: 200,
9908
- // Google/Amazon barge-in standard
9909
- silenceTimeoutMs: 500,
9910
- // OpenAI Realtime API standard
9911
- enabled: true,
9912
- ...config
9913
- };
9914
- }
9915
- /**
9916
- * Process VAD result for interruption detection
9917
- * @param vadProbability - Speech probability from VAD (0-1)
9918
- * @param audioEnergy - Optional RMS energy for logging (default: 0)
9919
- */
9920
- processVADResult(vadProbability, audioEnergy = 0) {
9921
- if (!this.config.enabled) return;
9922
- if (vadProbability > this.config.vadThreshold) {
9923
- this.onSpeechDetected(audioEnergy || vadProbability);
9924
- } else {
9925
- this.onSilenceDetected();
9926
- }
9927
- }
9928
- /**
9929
- * Notify that AI started speaking
9930
- */
9931
- setAISpeaking(speaking) {
9932
- this.aiIsSpeaking = speaking;
9933
- }
9934
- /**
9935
- * Enable/disable interruption detection
9936
- */
9937
- setEnabled(enabled) {
9938
- this.config.enabled = enabled;
9939
- if (!enabled) {
9940
- this.reset();
9941
- }
9942
- }
9943
- /**
9944
- * Update configuration
9945
- */
9946
- updateConfig(config) {
9947
- this.config = { ...this.config, ...config };
9948
- }
9949
- /**
9950
- * Reset state
9951
- */
9952
- reset() {
9953
- this.isSpeaking = false;
9954
- this.speechStartTime = 0;
9955
- this.lastSpeechTime = 0;
9956
- this.interruptionTriggeredThisSession = false;
9957
- if (this.silenceTimer) {
9958
- clearTimeout(this.silenceTimer);
9959
- this.silenceTimer = null;
9960
- }
9961
- }
9962
- /**
9963
- * Get current state
9964
- */
9965
- getState() {
9966
- return {
9967
- isSpeaking: this.isSpeaking,
9968
- speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
9969
- };
9970
- }
9971
- // ==================== Private Methods ====================
9972
- onSpeechDetected(rms) {
9973
- const now = Date.now();
9974
- this.lastSpeechTime = now;
9975
- if (this.silenceTimer) {
9976
- clearTimeout(this.silenceTimer);
9977
- this.silenceTimer = null;
9978
- }
9979
- if (!this.isSpeaking) {
9980
- this.isSpeaking = true;
9981
- this.speechStartTime = now;
9982
- this.emit("speech.detected", { rms });
9983
- }
9984
- if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
9985
- const speechDuration = now - this.speechStartTime;
9986
- if (speechDuration >= this.config.minSpeechDurationMs) {
9987
- this.interruptionTriggeredThisSession = true;
9988
- this.emit("interruption.triggered", { rms, durationMs: speechDuration });
9989
- }
9990
- }
9991
- }
9992
- onSilenceDetected() {
9993
- if (!this.isSpeaking) return;
9994
- if (!this.silenceTimer) {
9995
- this.silenceTimer = setTimeout(() => {
9996
- const durationMs = this.lastSpeechTime - this.speechStartTime;
9997
- this.isSpeaking = false;
9998
- this.silenceTimer = null;
9999
- this.interruptionTriggeredThisSession = false;
10000
- this.emit("speech.ended", { durationMs });
10001
- }, this.config.silenceTimeoutMs);
10002
- }
10003
- }
10004
- };
10005
-
10006
8876
  // src/animation/types.ts
10007
8877
  var DEFAULT_ANIMATION_CONFIG = {
10008
8878
  initialState: "idle",
@@ -11045,17 +9915,14 @@ export {
11045
9915
  A2EOrchestrator,
11046
9916
  A2EProcessor,
11047
9917
  ARKIT_BLENDSHAPES,
11048
- AgentCoreAdapter,
11049
9918
  AnimationGraph,
11050
9919
  AudioChunkCoalescer,
11051
9920
  AudioEnergyAnalyzer,
11052
9921
  AudioScheduler,
11053
- AudioSyncManager,
11054
9922
  BLENDSHAPE_TO_GROUP,
11055
9923
  BlendshapeSmoother,
11056
9924
  CTC_VOCAB,
11057
9925
  ConsoleExporter,
11058
- ConversationOrchestrator,
11059
9926
  DEFAULT_ANIMATION_CONFIG,
11060
9927
  DEFAULT_LOGGING_CONFIG,
11061
9928
  EMOTION_NAMES,
@@ -11085,7 +9952,6 @@ export {
11085
9952
  SileroVADInference,
11086
9953
  SileroVADUnifiedAdapter,
11087
9954
  SileroVADWorker,
11088
- TenantManager,
11089
9955
  UnifiedInferenceWorker,
11090
9956
  Wav2ArkitCpuInference,
11091
9957
  Wav2ArkitCpuUnifiedAdapter,