@omote/core 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +222 -443
- package/dist/index.d.mts +79 -828
- package/dist/index.d.ts +79 -828
- package/dist/index.js +180 -1314
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +180 -1314
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.mjs
CHANGED
|
@@ -761,80 +761,6 @@ var A2EProcessor = class {
|
|
|
761
761
|
}
|
|
762
762
|
};
|
|
763
763
|
|
|
764
|
-
// src/inference/BlendshapeSmoother.ts
|
|
765
|
-
var NUM_BLENDSHAPES = 52;
|
|
766
|
-
var BlendshapeSmoother = class {
|
|
767
|
-
constructor(config) {
|
|
768
|
-
/** Whether any target has been set */
|
|
769
|
-
this._hasTarget = false;
|
|
770
|
-
this.halflife = config?.halflife ?? 0.06;
|
|
771
|
-
this.values = new Float32Array(NUM_BLENDSHAPES);
|
|
772
|
-
this.velocities = new Float32Array(NUM_BLENDSHAPES);
|
|
773
|
-
this.targets = new Float32Array(NUM_BLENDSHAPES);
|
|
774
|
-
}
|
|
775
|
-
/** Whether a target frame has been set (false until first setTarget call) */
|
|
776
|
-
get hasTarget() {
|
|
777
|
-
return this._hasTarget;
|
|
778
|
-
}
|
|
779
|
-
/**
|
|
780
|
-
* Set new target frame from inference output.
|
|
781
|
-
* Springs will converge toward these values on subsequent update() calls.
|
|
782
|
-
*/
|
|
783
|
-
setTarget(frame) {
|
|
784
|
-
this.targets.set(frame);
|
|
785
|
-
this._hasTarget = true;
|
|
786
|
-
}
|
|
787
|
-
/**
|
|
788
|
-
* Advance all 52 springs by `dt` seconds and return the smoothed frame.
|
|
789
|
-
*
|
|
790
|
-
* Call this every render frame (e.g., inside requestAnimationFrame).
|
|
791
|
-
* Returns the internal values buffer — do NOT mutate the returned array.
|
|
792
|
-
*
|
|
793
|
-
* @param dt - Time step in seconds (e.g., 1/60 for 60fps)
|
|
794
|
-
* @returns Smoothed blendshape values (Float32Array of 52)
|
|
795
|
-
*/
|
|
796
|
-
update(dt) {
|
|
797
|
-
if (!this._hasTarget) {
|
|
798
|
-
return this.values;
|
|
799
|
-
}
|
|
800
|
-
if (this.halflife <= 0) {
|
|
801
|
-
this.values.set(this.targets);
|
|
802
|
-
this.velocities.fill(0);
|
|
803
|
-
return this.values;
|
|
804
|
-
}
|
|
805
|
-
const damping = Math.LN2 / this.halflife;
|
|
806
|
-
const eydt = Math.exp(-damping * dt);
|
|
807
|
-
for (let i = 0; i < NUM_BLENDSHAPES; i++) {
|
|
808
|
-
const j0 = this.values[i] - this.targets[i];
|
|
809
|
-
const j1 = this.velocities[i] + j0 * damping;
|
|
810
|
-
this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
|
|
811
|
-
this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
|
|
812
|
-
this.values[i] = Math.max(0, Math.min(1, this.values[i]));
|
|
813
|
-
}
|
|
814
|
-
return this.values;
|
|
815
|
-
}
|
|
816
|
-
/**
|
|
817
|
-
* Decay all spring targets to neutral (0).
|
|
818
|
-
*
|
|
819
|
-
* Call when inference stalls (no new frames for threshold duration).
|
|
820
|
-
* The springs will smoothly close the mouth / relax the face over
|
|
821
|
-
* the halflife period rather than freezing.
|
|
822
|
-
*/
|
|
823
|
-
decayToNeutral() {
|
|
824
|
-
this.targets.fill(0);
|
|
825
|
-
}
|
|
826
|
-
/**
|
|
827
|
-
* Reset all state (values, velocities, targets).
|
|
828
|
-
* Call when starting a new playback session.
|
|
829
|
-
*/
|
|
830
|
-
reset() {
|
|
831
|
-
this.values.fill(0);
|
|
832
|
-
this.velocities.fill(0);
|
|
833
|
-
this.targets.fill(0);
|
|
834
|
-
this._hasTarget = false;
|
|
835
|
-
}
|
|
836
|
-
};
|
|
837
|
-
|
|
838
764
|
// src/telemetry/exporters/console.ts
|
|
839
765
|
var ConsoleExporter = class {
|
|
840
766
|
constructor(options = {}) {
|
|
@@ -2852,13 +2778,6 @@ function pcm16ToFloat32(buffer) {
|
|
|
2852
2778
|
}
|
|
2853
2779
|
return float32;
|
|
2854
2780
|
}
|
|
2855
|
-
function int16ToFloat32(int16) {
|
|
2856
|
-
const float32 = new Float32Array(int16.length);
|
|
2857
|
-
for (let i = 0; i < int16.length; i++) {
|
|
2858
|
-
float32[i] = int16[i] / 32768;
|
|
2859
|
-
}
|
|
2860
|
-
return float32;
|
|
2861
|
-
}
|
|
2862
2781
|
|
|
2863
2782
|
// src/audio/FullFacePipeline.ts
|
|
2864
2783
|
var logger4 = createLogger("FullFacePipeline");
|
|
@@ -2891,16 +2810,11 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
2891
2810
|
this.lastNewFrameTime = 0;
|
|
2892
2811
|
this.lastKnownLamFrame = null;
|
|
2893
2812
|
this.staleWarningEmitted = false;
|
|
2894
|
-
// Frame loop timing (for dt calculation)
|
|
2895
|
-
this.lastFrameLoopTime = 0;
|
|
2896
2813
|
// Diagnostic logging counter
|
|
2897
2814
|
this.frameLoopCount = 0;
|
|
2898
2815
|
const sampleRate = options.sampleRate ?? 16e3;
|
|
2899
2816
|
this.profile = options.profile ?? {};
|
|
2900
2817
|
this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
|
|
2901
|
-
this.smoother = new BlendshapeSmoother({
|
|
2902
|
-
halflife: options.smoothingHalflife ?? 0.06
|
|
2903
|
-
});
|
|
2904
2818
|
const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
|
|
2905
2819
|
const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
|
|
2906
2820
|
const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
|
|
@@ -2983,9 +2897,7 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
2983
2897
|
this.lastNewFrameTime = 0;
|
|
2984
2898
|
this.lastKnownLamFrame = null;
|
|
2985
2899
|
this.staleWarningEmitted = false;
|
|
2986
|
-
this.lastFrameLoopTime = 0;
|
|
2987
2900
|
this.frameLoopCount = 0;
|
|
2988
|
-
this.smoother.reset();
|
|
2989
2901
|
this.scheduler.warmup();
|
|
2990
2902
|
this.startFrameLoop();
|
|
2991
2903
|
this.startMonitoring();
|
|
@@ -3020,22 +2932,16 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3020
2932
|
/**
|
|
3021
2933
|
* Start frame animation loop
|
|
3022
2934
|
*
|
|
3023
|
-
*
|
|
3024
|
-
*
|
|
3025
|
-
*
|
|
3026
|
-
* to neutral when inference stalls.
|
|
2935
|
+
* Polls A2EProcessor at render rate (60fps) for the latest inference frame
|
|
2936
|
+
* matching the current AudioContext time. Between inference batches (~30fps
|
|
2937
|
+
* bursts), getFrameForTime() holds the last frame.
|
|
3027
2938
|
*/
|
|
3028
2939
|
startFrameLoop() {
|
|
3029
|
-
this.lastFrameLoopTime = 0;
|
|
3030
2940
|
const updateFrame = () => {
|
|
3031
|
-
const now = performance.now() / 1e3;
|
|
3032
|
-
const dt = this.lastFrameLoopTime > 0 ? now - this.lastFrameLoopTime : 1 / 60;
|
|
3033
|
-
this.lastFrameLoopTime = now;
|
|
3034
2941
|
this.frameLoopCount++;
|
|
3035
2942
|
const currentTime = this.scheduler.getCurrentTime();
|
|
3036
2943
|
const lamFrame = this.processor.getFrameForTime(currentTime);
|
|
3037
2944
|
if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
|
|
3038
|
-
this.smoother.setTarget(lamFrame);
|
|
3039
2945
|
this.lastNewFrameTime = performance.now();
|
|
3040
2946
|
this.lastKnownLamFrame = lamFrame;
|
|
3041
2947
|
this.staleWarningEmitted = false;
|
|
@@ -3055,17 +2961,15 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3055
2961
|
currentTime: currentTime.toFixed(3),
|
|
3056
2962
|
playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
|
|
3057
2963
|
queuedFrames: this.processor.queuedFrameCount,
|
|
3058
|
-
hasTarget: this.smoother.hasTarget,
|
|
3059
2964
|
playbackStarted: this.playbackStarted,
|
|
3060
2965
|
msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
|
|
3061
2966
|
processorFill: this.processor.fillLevel.toFixed(2)
|
|
3062
2967
|
});
|
|
3063
2968
|
}
|
|
3064
2969
|
if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
|
|
3065
|
-
this.smoother.decayToNeutral();
|
|
3066
2970
|
if (!this.staleWarningEmitted) {
|
|
3067
2971
|
this.staleWarningEmitted = true;
|
|
3068
|
-
logger4.warn("A2E stalled \u2014
|
|
2972
|
+
logger4.warn("A2E stalled \u2014 no new inference frames", {
|
|
3069
2973
|
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3070
2974
|
queuedFrames: this.processor.queuedFrameCount
|
|
3071
2975
|
});
|
|
@@ -3104,12 +3008,10 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3104
3008
|
await this.scheduler.cancelAll(fadeOutMs);
|
|
3105
3009
|
this.coalescer.reset();
|
|
3106
3010
|
this.processor.reset();
|
|
3107
|
-
this.smoother.reset();
|
|
3108
3011
|
this.playbackStarted = false;
|
|
3109
3012
|
this.lastNewFrameTime = 0;
|
|
3110
3013
|
this.lastKnownLamFrame = null;
|
|
3111
3014
|
this.staleWarningEmitted = false;
|
|
3112
|
-
this.lastFrameLoopTime = 0;
|
|
3113
3015
|
this.emit("playback_complete", void 0);
|
|
3114
3016
|
}
|
|
3115
3017
|
/**
|
|
@@ -3163,6 +3065,108 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3163
3065
|
}
|
|
3164
3066
|
};
|
|
3165
3067
|
|
|
3068
|
+
// src/audio/InterruptionHandler.ts
|
|
3069
|
+
var InterruptionHandler = class extends EventEmitter {
|
|
3070
|
+
constructor(config = {}) {
|
|
3071
|
+
super();
|
|
3072
|
+
this.isSpeaking = false;
|
|
3073
|
+
this.speechStartTime = 0;
|
|
3074
|
+
this.lastSpeechTime = 0;
|
|
3075
|
+
this.silenceTimer = null;
|
|
3076
|
+
this.aiIsSpeaking = false;
|
|
3077
|
+
// Debouncing: only emit one interruption per speech session
|
|
3078
|
+
this.interruptionTriggeredThisSession = false;
|
|
3079
|
+
this.config = {
|
|
3080
|
+
vadThreshold: 0.5,
|
|
3081
|
+
// Silero VAD default
|
|
3082
|
+
minSpeechDurationMs: 200,
|
|
3083
|
+
// Google/Amazon barge-in standard
|
|
3084
|
+
silenceTimeoutMs: 500,
|
|
3085
|
+
// OpenAI Realtime API standard
|
|
3086
|
+
enabled: true,
|
|
3087
|
+
...config
|
|
3088
|
+
};
|
|
3089
|
+
}
|
|
3090
|
+
/**
|
|
3091
|
+
* Process VAD result for interruption detection
|
|
3092
|
+
* @param vadProbability - Speech probability from VAD (0-1)
|
|
3093
|
+
* @param audioEnergy - Optional RMS energy for logging (default: 0)
|
|
3094
|
+
*/
|
|
3095
|
+
processVADResult(vadProbability, audioEnergy = 0) {
|
|
3096
|
+
if (!this.config.enabled) return;
|
|
3097
|
+
if (vadProbability > this.config.vadThreshold) {
|
|
3098
|
+
this.onSpeechDetected(audioEnergy || vadProbability);
|
|
3099
|
+
} else {
|
|
3100
|
+
this.onSilenceDetected();
|
|
3101
|
+
}
|
|
3102
|
+
}
|
|
3103
|
+
/** Notify that AI started/stopped speaking */
|
|
3104
|
+
setAISpeaking(speaking) {
|
|
3105
|
+
this.aiIsSpeaking = speaking;
|
|
3106
|
+
}
|
|
3107
|
+
/** Enable/disable interruption detection */
|
|
3108
|
+
setEnabled(enabled) {
|
|
3109
|
+
this.config.enabled = enabled;
|
|
3110
|
+
if (!enabled) {
|
|
3111
|
+
this.reset();
|
|
3112
|
+
}
|
|
3113
|
+
}
|
|
3114
|
+
/** Update configuration */
|
|
3115
|
+
updateConfig(config) {
|
|
3116
|
+
this.config = { ...this.config, ...config };
|
|
3117
|
+
}
|
|
3118
|
+
/** Reset state */
|
|
3119
|
+
reset() {
|
|
3120
|
+
this.isSpeaking = false;
|
|
3121
|
+
this.speechStartTime = 0;
|
|
3122
|
+
this.lastSpeechTime = 0;
|
|
3123
|
+
this.interruptionTriggeredThisSession = false;
|
|
3124
|
+
if (this.silenceTimer) {
|
|
3125
|
+
clearTimeout(this.silenceTimer);
|
|
3126
|
+
this.silenceTimer = null;
|
|
3127
|
+
}
|
|
3128
|
+
}
|
|
3129
|
+
/** Get current state */
|
|
3130
|
+
getState() {
|
|
3131
|
+
return {
|
|
3132
|
+
isSpeaking: this.isSpeaking,
|
|
3133
|
+
speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
|
|
3134
|
+
};
|
|
3135
|
+
}
|
|
3136
|
+
onSpeechDetected(rms) {
|
|
3137
|
+
const now = Date.now();
|
|
3138
|
+
this.lastSpeechTime = now;
|
|
3139
|
+
if (this.silenceTimer) {
|
|
3140
|
+
clearTimeout(this.silenceTimer);
|
|
3141
|
+
this.silenceTimer = null;
|
|
3142
|
+
}
|
|
3143
|
+
if (!this.isSpeaking) {
|
|
3144
|
+
this.isSpeaking = true;
|
|
3145
|
+
this.speechStartTime = now;
|
|
3146
|
+
this.emit("speech.detected", { rms });
|
|
3147
|
+
}
|
|
3148
|
+
if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
|
|
3149
|
+
const speechDuration = now - this.speechStartTime;
|
|
3150
|
+
if (speechDuration >= this.config.minSpeechDurationMs) {
|
|
3151
|
+
this.interruptionTriggeredThisSession = true;
|
|
3152
|
+
this.emit("interruption.triggered", { rms, durationMs: speechDuration });
|
|
3153
|
+
}
|
|
3154
|
+
}
|
|
3155
|
+
}
|
|
3156
|
+
onSilenceDetected() {
|
|
3157
|
+
if (!this.isSpeaking) return;
|
|
3158
|
+
if (!this.silenceTimer) {
|
|
3159
|
+
this.silenceTimer = setTimeout(() => {
|
|
3160
|
+
const durationMs = this.lastSpeechTime - this.speechStartTime;
|
|
3161
|
+
this.isSpeaking = false;
|
|
3162
|
+
this.silenceTimer = null;
|
|
3163
|
+
this.interruptionTriggeredThisSession = false;
|
|
3164
|
+
this.emit("speech.ended", { durationMs });
|
|
3165
|
+
}, this.config.silenceTimeoutMs);
|
|
3166
|
+
}
|
|
3167
|
+
}
|
|
3168
|
+
};
|
|
3169
|
+
|
|
3166
3170
|
// src/inference/kaldiFbank.ts
|
|
3167
3171
|
function fft(re, im) {
|
|
3168
3172
|
const n = re.length;
|
|
@@ -6995,6 +6999,80 @@ var A2EWithFallback = class {
|
|
|
6995
6999
|
}
|
|
6996
7000
|
};
|
|
6997
7001
|
|
|
7002
|
+
// src/inference/BlendshapeSmoother.ts
|
|
7003
|
+
var NUM_BLENDSHAPES = 52;
|
|
7004
|
+
var BlendshapeSmoother = class {
|
|
7005
|
+
constructor(config) {
|
|
7006
|
+
/** Whether any target has been set */
|
|
7007
|
+
this._hasTarget = false;
|
|
7008
|
+
this.halflife = config?.halflife ?? 0.06;
|
|
7009
|
+
this.values = new Float32Array(NUM_BLENDSHAPES);
|
|
7010
|
+
this.velocities = new Float32Array(NUM_BLENDSHAPES);
|
|
7011
|
+
this.targets = new Float32Array(NUM_BLENDSHAPES);
|
|
7012
|
+
}
|
|
7013
|
+
/** Whether a target frame has been set (false until first setTarget call) */
|
|
7014
|
+
get hasTarget() {
|
|
7015
|
+
return this._hasTarget;
|
|
7016
|
+
}
|
|
7017
|
+
/**
|
|
7018
|
+
* Set new target frame from inference output.
|
|
7019
|
+
* Springs will converge toward these values on subsequent update() calls.
|
|
7020
|
+
*/
|
|
7021
|
+
setTarget(frame) {
|
|
7022
|
+
this.targets.set(frame);
|
|
7023
|
+
this._hasTarget = true;
|
|
7024
|
+
}
|
|
7025
|
+
/**
|
|
7026
|
+
* Advance all 52 springs by `dt` seconds and return the smoothed frame.
|
|
7027
|
+
*
|
|
7028
|
+
* Call this every render frame (e.g., inside requestAnimationFrame).
|
|
7029
|
+
* Returns the internal values buffer — do NOT mutate the returned array.
|
|
7030
|
+
*
|
|
7031
|
+
* @param dt - Time step in seconds (e.g., 1/60 for 60fps)
|
|
7032
|
+
* @returns Smoothed blendshape values (Float32Array of 52)
|
|
7033
|
+
*/
|
|
7034
|
+
update(dt) {
|
|
7035
|
+
if (!this._hasTarget) {
|
|
7036
|
+
return this.values;
|
|
7037
|
+
}
|
|
7038
|
+
if (this.halflife <= 0) {
|
|
7039
|
+
this.values.set(this.targets);
|
|
7040
|
+
this.velocities.fill(0);
|
|
7041
|
+
return this.values;
|
|
7042
|
+
}
|
|
7043
|
+
const damping = Math.LN2 / this.halflife;
|
|
7044
|
+
const eydt = Math.exp(-damping * dt);
|
|
7045
|
+
for (let i = 0; i < NUM_BLENDSHAPES; i++) {
|
|
7046
|
+
const j0 = this.values[i] - this.targets[i];
|
|
7047
|
+
const j1 = this.velocities[i] + j0 * damping;
|
|
7048
|
+
this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
|
|
7049
|
+
this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
|
|
7050
|
+
this.values[i] = Math.max(0, Math.min(1, this.values[i]));
|
|
7051
|
+
}
|
|
7052
|
+
return this.values;
|
|
7053
|
+
}
|
|
7054
|
+
/**
|
|
7055
|
+
* Decay all spring targets to neutral (0).
|
|
7056
|
+
*
|
|
7057
|
+
* Call when inference stalls (no new frames for threshold duration).
|
|
7058
|
+
* The springs will smoothly close the mouth / relax the face over
|
|
7059
|
+
* the halflife period rather than freezing.
|
|
7060
|
+
*/
|
|
7061
|
+
decayToNeutral() {
|
|
7062
|
+
this.targets.fill(0);
|
|
7063
|
+
}
|
|
7064
|
+
/**
|
|
7065
|
+
* Reset all state (values, velocities, targets).
|
|
7066
|
+
* Call when starting a new playback session.
|
|
7067
|
+
*/
|
|
7068
|
+
reset() {
|
|
7069
|
+
this.values.fill(0);
|
|
7070
|
+
this.velocities.fill(0);
|
|
7071
|
+
this.targets.fill(0);
|
|
7072
|
+
this._hasTarget = false;
|
|
7073
|
+
}
|
|
7074
|
+
};
|
|
7075
|
+
|
|
6998
7076
|
// src/animation/audioEnergy.ts
|
|
6999
7077
|
function calculateRMS(samples) {
|
|
7000
7078
|
if (samples.length === 0) return 0;
|
|
@@ -8795,1214 +8873,6 @@ var EmotionController = class {
|
|
|
8795
8873
|
}
|
|
8796
8874
|
};
|
|
8797
8875
|
|
|
8798
|
-
// src/ai/adapters/AgentCoreAdapter.ts
|
|
8799
|
-
var AgentCoreAdapter = class extends EventEmitter {
|
|
8800
|
-
constructor(config) {
|
|
8801
|
-
super();
|
|
8802
|
-
this.name = "AgentCore";
|
|
8803
|
-
this._state = "disconnected";
|
|
8804
|
-
this._sessionId = null;
|
|
8805
|
-
this._isConnected = false;
|
|
8806
|
-
// Sub-components
|
|
8807
|
-
this.asr = null;
|
|
8808
|
-
this.vad = null;
|
|
8809
|
-
this.lam = null;
|
|
8810
|
-
this.pipeline = null;
|
|
8811
|
-
// WebSocket connection to AgentCore
|
|
8812
|
-
this.ws = null;
|
|
8813
|
-
this.wsReconnectAttempts = 0;
|
|
8814
|
-
this.maxReconnectAttempts = 5;
|
|
8815
|
-
// Audio buffers
|
|
8816
|
-
this.audioBuffer = [];
|
|
8817
|
-
// Conversation state
|
|
8818
|
-
this.history = [];
|
|
8819
|
-
this.currentConfig = null;
|
|
8820
|
-
// Interruption handling
|
|
8821
|
-
this.isSpeaking = false;
|
|
8822
|
-
this.currentTtsAbortController = null;
|
|
8823
|
-
// Auth token cache per tenant
|
|
8824
|
-
this.tokenCache = /* @__PURE__ */ new Map();
|
|
8825
|
-
this.agentCoreConfig = config;
|
|
8826
|
-
this.emotionController = new EmotionController();
|
|
8827
|
-
}
|
|
8828
|
-
get state() {
|
|
8829
|
-
return this._state;
|
|
8830
|
-
}
|
|
8831
|
-
get sessionId() {
|
|
8832
|
-
return this._sessionId;
|
|
8833
|
-
}
|
|
8834
|
-
get isConnected() {
|
|
8835
|
-
return this._isConnected;
|
|
8836
|
-
}
|
|
8837
|
-
/**
|
|
8838
|
-
* Connect to AgentCore with session configuration
|
|
8839
|
-
*/
|
|
8840
|
-
async connect(config) {
|
|
8841
|
-
this.currentConfig = config;
|
|
8842
|
-
this._sessionId = config.sessionId;
|
|
8843
|
-
try {
|
|
8844
|
-
const authToken = await this.getAuthToken(config.tenant);
|
|
8845
|
-
await Promise.all([
|
|
8846
|
-
this.initASR(),
|
|
8847
|
-
this.initLAM()
|
|
8848
|
-
]);
|
|
8849
|
-
await this.connectWebSocket(authToken, config);
|
|
8850
|
-
this._isConnected = true;
|
|
8851
|
-
this.setState("idle");
|
|
8852
|
-
this.emit("connection.opened", { sessionId: this._sessionId, adapter: this.name });
|
|
8853
|
-
} catch (error) {
|
|
8854
|
-
this.setState("error");
|
|
8855
|
-
this.emit("connection.error", {
|
|
8856
|
-
error,
|
|
8857
|
-
recoverable: true
|
|
8858
|
-
});
|
|
8859
|
-
throw error;
|
|
8860
|
-
}
|
|
8861
|
-
}
|
|
8862
|
-
/**
|
|
8863
|
-
* Disconnect and cleanup
|
|
8864
|
-
*/
|
|
8865
|
-
async disconnect() {
|
|
8866
|
-
this.currentTtsAbortController?.abort();
|
|
8867
|
-
if (this.pipeline) {
|
|
8868
|
-
this.pipeline.dispose();
|
|
8869
|
-
this.pipeline = null;
|
|
8870
|
-
}
|
|
8871
|
-
if (this.ws) {
|
|
8872
|
-
this.ws.close(1e3, "Client disconnect");
|
|
8873
|
-
this.ws = null;
|
|
8874
|
-
}
|
|
8875
|
-
await Promise.all([
|
|
8876
|
-
this.asr?.dispose(),
|
|
8877
|
-
this.vad?.dispose(),
|
|
8878
|
-
this.lam?.dispose()
|
|
8879
|
-
]);
|
|
8880
|
-
this._isConnected = false;
|
|
8881
|
-
this.setState("disconnected");
|
|
8882
|
-
this.emit("connection.closed", { reason: "Client disconnect" });
|
|
8883
|
-
}
|
|
8884
|
-
/**
|
|
8885
|
-
* Push user audio for processing
|
|
8886
|
-
*/
|
|
8887
|
-
pushAudio(audio) {
|
|
8888
|
-
if (!this._isConnected) return;
|
|
8889
|
-
if (this.isSpeaking) {
|
|
8890
|
-
this.detectVoiceActivity(audio).then((hasVoiceActivity) => {
|
|
8891
|
-
if (hasVoiceActivity) {
|
|
8892
|
-
this.interrupt();
|
|
8893
|
-
}
|
|
8894
|
-
}).catch((error) => {
|
|
8895
|
-
console.error("[AgentCore] VAD error during interruption detection:", error);
|
|
8896
|
-
});
|
|
8897
|
-
}
|
|
8898
|
-
const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
|
|
8899
|
-
this.audioBuffer.push(float32);
|
|
8900
|
-
this.scheduleTranscription();
|
|
8901
|
-
}
|
|
8902
|
-
/**
|
|
8903
|
-
* Send text directly to AgentCore
|
|
8904
|
-
*/
|
|
8905
|
-
async sendText(text) {
|
|
8906
|
-
if (!this._isConnected || !this.ws) {
|
|
8907
|
-
throw new Error("Not connected to AgentCore");
|
|
8908
|
-
}
|
|
8909
|
-
this.addToHistory({
|
|
8910
|
-
role: "user",
|
|
8911
|
-
content: text,
|
|
8912
|
-
timestamp: Date.now()
|
|
8913
|
-
});
|
|
8914
|
-
this.setState("thinking");
|
|
8915
|
-
this.emit("ai.thinking.start", { timestamp: Date.now() });
|
|
8916
|
-
this.ws.send(JSON.stringify({
|
|
8917
|
-
type: "user_message",
|
|
8918
|
-
sessionId: this._sessionId,
|
|
8919
|
-
content: text,
|
|
8920
|
-
context: {
|
|
8921
|
-
history: this.history.slice(-10),
|
|
8922
|
-
// Last 10 messages
|
|
8923
|
-
emotion: Array.from(this.emotionController.emotion)
|
|
8924
|
-
}
|
|
8925
|
-
}));
|
|
8926
|
-
}
|
|
8927
|
-
/**
|
|
8928
|
-
* Interrupt current AI response
|
|
8929
|
-
*/
|
|
8930
|
-
interrupt() {
|
|
8931
|
-
if (!this.isSpeaking) return;
|
|
8932
|
-
this.emit("interruption.detected", { timestamp: Date.now() });
|
|
8933
|
-
this.currentTtsAbortController?.abort();
|
|
8934
|
-
this.currentTtsAbortController = null;
|
|
8935
|
-
if (this.ws?.readyState === WebSocket.OPEN) {
|
|
8936
|
-
this.ws.send(JSON.stringify({
|
|
8937
|
-
type: "interrupt",
|
|
8938
|
-
sessionId: this._sessionId,
|
|
8939
|
-
timestamp: Date.now()
|
|
8940
|
-
}));
|
|
8941
|
-
}
|
|
8942
|
-
this.isSpeaking = false;
|
|
8943
|
-
this.setState("listening");
|
|
8944
|
-
this.emit("interruption.handled", { timestamp: Date.now(), action: "stop" });
|
|
8945
|
-
}
|
|
8946
|
-
getHistory() {
|
|
8947
|
-
return [...this.history];
|
|
8948
|
-
}
|
|
8949
|
-
clearHistory() {
|
|
8950
|
-
this.history = [];
|
|
8951
|
-
this.emit("memory.updated", { messageCount: 0 });
|
|
8952
|
-
}
|
|
8953
|
-
async healthCheck() {
|
|
8954
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
8955
|
-
return false;
|
|
8956
|
-
}
|
|
8957
|
-
return new Promise((resolve) => {
|
|
8958
|
-
const timeout = setTimeout(() => resolve(false), 5e3);
|
|
8959
|
-
const handler = (event) => {
|
|
8960
|
-
try {
|
|
8961
|
-
const data = JSON.parse(event.data);
|
|
8962
|
-
if (data.type === "pong") {
|
|
8963
|
-
clearTimeout(timeout);
|
|
8964
|
-
this.ws?.removeEventListener("message", handler);
|
|
8965
|
-
resolve(true);
|
|
8966
|
-
}
|
|
8967
|
-
} catch {
|
|
8968
|
-
}
|
|
8969
|
-
};
|
|
8970
|
-
this.ws?.addEventListener("message", handler);
|
|
8971
|
-
this.ws?.send(JSON.stringify({ type: "ping" }));
|
|
8972
|
-
});
|
|
8973
|
-
}
|
|
8974
|
-
// ==================== Private Methods ====================
|
|
8975
|
-
setState(state) {
|
|
8976
|
-
const previousState = this._state;
|
|
8977
|
-
this._state = state;
|
|
8978
|
-
this.emit("state.change", { state, previousState });
|
|
8979
|
-
}
|
|
8980
|
-
async getAuthToken(tenant) {
|
|
8981
|
-
const cached = this.tokenCache.get(tenant.tenantId);
|
|
8982
|
-
if (cached && cached.expiresAt > Date.now() + 6e4) {
|
|
8983
|
-
return cached.token;
|
|
8984
|
-
}
|
|
8985
|
-
if (tenant.credentials.authToken) {
|
|
8986
|
-
return tenant.credentials.authToken;
|
|
8987
|
-
}
|
|
8988
|
-
const endpoint = this.agentCoreConfig.endpoint;
|
|
8989
|
-
if (endpoint.startsWith("ws://") || endpoint.includes("localhost")) {
|
|
8990
|
-
return "local-dev-token";
|
|
8991
|
-
}
|
|
8992
|
-
const httpEndpoint = endpoint.replace("wss://", "https://").replace("ws://", "http://");
|
|
8993
|
-
const response = await fetch(`${httpEndpoint}/auth/token`, {
|
|
8994
|
-
method: "POST",
|
|
8995
|
-
headers: { "Content-Type": "application/json" },
|
|
8996
|
-
body: JSON.stringify({
|
|
8997
|
-
tenantId: tenant.tenantId,
|
|
8998
|
-
apiKey: tenant.credentials.apiKey
|
|
8999
|
-
})
|
|
9000
|
-
});
|
|
9001
|
-
if (!response.ok) {
|
|
9002
|
-
throw new Error(`Auth failed: ${response.statusText}`);
|
|
9003
|
-
}
|
|
9004
|
-
const { token, expiresIn } = await response.json();
|
|
9005
|
-
this.tokenCache.set(tenant.tenantId, {
|
|
9006
|
-
token,
|
|
9007
|
-
expiresAt: Date.now() + expiresIn * 1e3
|
|
9008
|
-
});
|
|
9009
|
-
return token;
|
|
9010
|
-
}
|
|
9011
|
-
async initASR() {
|
|
9012
|
-
await Promise.all([
|
|
9013
|
-
// SenseVoice ASR
|
|
9014
|
-
(async () => {
|
|
9015
|
-
this.asr = new SenseVoiceInference({
|
|
9016
|
-
modelUrl: "/models/sensevoice/model.int8.onnx",
|
|
9017
|
-
language: "auto"
|
|
9018
|
-
});
|
|
9019
|
-
await this.asr.load();
|
|
9020
|
-
})(),
|
|
9021
|
-
// Silero VAD for accurate voice activity detection
|
|
9022
|
-
(async () => {
|
|
9023
|
-
this.vad = new SileroVADInference({
|
|
9024
|
-
modelUrl: "/models/silero-vad.onnx",
|
|
9025
|
-
backend: "webgpu",
|
|
9026
|
-
sampleRate: 16e3,
|
|
9027
|
-
threshold: 0.5
|
|
9028
|
-
});
|
|
9029
|
-
await this.vad.load();
|
|
9030
|
-
})()
|
|
9031
|
-
]);
|
|
9032
|
-
}
|
|
9033
|
-
async initLAM() {
|
|
9034
|
-
const lamUrl = this.agentCoreConfig.models?.lamUrl || "/models/unified_wav2vec2_asr_a2e.onnx";
|
|
9035
|
-
this.lam = new Wav2Vec2Inference({
|
|
9036
|
-
modelUrl: lamUrl,
|
|
9037
|
-
backend: "auto"
|
|
9038
|
-
});
|
|
9039
|
-
await this.lam.load();
|
|
9040
|
-
await this.initPipeline();
|
|
9041
|
-
}
|
|
9042
|
-
async initPipeline() {
|
|
9043
|
-
if (!this.lam) {
|
|
9044
|
-
throw new Error("LAM must be initialized before pipeline");
|
|
9045
|
-
}
|
|
9046
|
-
this.pipeline = new FullFacePipeline({
|
|
9047
|
-
lam: this.lam,
|
|
9048
|
-
sampleRate: 16e3,
|
|
9049
|
-
chunkTargetMs: 200
|
|
9050
|
-
});
|
|
9051
|
-
await this.pipeline.initialize();
|
|
9052
|
-
this.pipeline.on("full_frame_ready", (fullFrame) => {
|
|
9053
|
-
const frame = fullFrame.blendshapes;
|
|
9054
|
-
this.emit("animation", {
|
|
9055
|
-
blendshapes: frame,
|
|
9056
|
-
get: (name) => {
|
|
9057
|
-
const idx = LAM_BLENDSHAPES.indexOf(name);
|
|
9058
|
-
return idx >= 0 ? frame[idx] : 0;
|
|
9059
|
-
},
|
|
9060
|
-
timestamp: Date.now(),
|
|
9061
|
-
// Wall clock for client-side logging only
|
|
9062
|
-
inferenceMs: 0
|
|
9063
|
-
// Pipeline handles LAM inference asynchronously
|
|
9064
|
-
});
|
|
9065
|
-
});
|
|
9066
|
-
this.pipeline.on("playback_complete", () => {
|
|
9067
|
-
this.isSpeaking = false;
|
|
9068
|
-
this.setState("idle");
|
|
9069
|
-
this.emit("audio.output.end", { durationMs: 0 });
|
|
9070
|
-
});
|
|
9071
|
-
this.pipeline.on("error", (error) => {
|
|
9072
|
-
console.error("[AgentCore] Pipeline error:", error);
|
|
9073
|
-
this.emit("connection.error", {
|
|
9074
|
-
error,
|
|
9075
|
-
recoverable: true
|
|
9076
|
-
});
|
|
9077
|
-
});
|
|
9078
|
-
}
|
|
9079
|
-
async connectWebSocket(authToken, config) {
|
|
9080
|
-
return new Promise((resolve, reject) => {
|
|
9081
|
-
const wsUrl = new URL(`${this.agentCoreConfig.endpoint.replace("http", "ws")}/ws`);
|
|
9082
|
-
wsUrl.searchParams.set("sessionId", config.sessionId);
|
|
9083
|
-
wsUrl.searchParams.set("characterId", config.tenant.characterId);
|
|
9084
|
-
this.ws = new WebSocket(wsUrl.toString());
|
|
9085
|
-
this.ws.onopen = () => {
|
|
9086
|
-
this.ws?.send(JSON.stringify({
|
|
9087
|
-
type: "auth",
|
|
9088
|
-
token: authToken,
|
|
9089
|
-
tenantId: config.tenant.tenantId,
|
|
9090
|
-
systemPrompt: config.systemPrompt
|
|
9091
|
-
}));
|
|
9092
|
-
};
|
|
9093
|
-
this.ws.onmessage = (event) => {
|
|
9094
|
-
try {
|
|
9095
|
-
this.handleAgentCoreMessage(JSON.parse(event.data));
|
|
9096
|
-
} catch {
|
|
9097
|
-
}
|
|
9098
|
-
};
|
|
9099
|
-
this.ws.onerror = () => {
|
|
9100
|
-
reject(new Error("WebSocket connection failed"));
|
|
9101
|
-
};
|
|
9102
|
-
this.ws.onclose = (event) => {
|
|
9103
|
-
this.handleDisconnect(event);
|
|
9104
|
-
};
|
|
9105
|
-
const authTimeout = setTimeout(() => {
|
|
9106
|
-
reject(new Error("Auth timeout"));
|
|
9107
|
-
}, 1e4);
|
|
9108
|
-
const authHandler = (event) => {
|
|
9109
|
-
try {
|
|
9110
|
-
const data = JSON.parse(event.data);
|
|
9111
|
-
if (data.type === "auth_success") {
|
|
9112
|
-
clearTimeout(authTimeout);
|
|
9113
|
-
this.ws?.removeEventListener("message", authHandler);
|
|
9114
|
-
resolve();
|
|
9115
|
-
} else if (data.type === "auth_failed") {
|
|
9116
|
-
clearTimeout(authTimeout);
|
|
9117
|
-
reject(new Error(data.message));
|
|
9118
|
-
}
|
|
9119
|
-
} catch {
|
|
9120
|
-
}
|
|
9121
|
-
};
|
|
9122
|
-
this.ws.addEventListener("message", authHandler);
|
|
9123
|
-
});
|
|
9124
|
-
}
|
|
9125
|
-
handleAgentCoreMessage(data) {
|
|
9126
|
-
switch (data.type) {
|
|
9127
|
-
case "response_start":
|
|
9128
|
-
this.setState("speaking");
|
|
9129
|
-
this.isSpeaking = true;
|
|
9130
|
-
this.emit("ai.response.start", {
|
|
9131
|
-
text: data.text,
|
|
9132
|
-
emotion: data.emotion
|
|
9133
|
-
});
|
|
9134
|
-
if (data.emotion) {
|
|
9135
|
-
this.emotionController.transitionTo(
|
|
9136
|
-
{ [data.emotion]: 0.7 },
|
|
9137
|
-
300
|
|
9138
|
-
);
|
|
9139
|
-
}
|
|
9140
|
-
if (this.pipeline) {
|
|
9141
|
-
this.pipeline.start();
|
|
9142
|
-
}
|
|
9143
|
-
break;
|
|
9144
|
-
case "response_chunk":
|
|
9145
|
-
this.emit("ai.response.chunk", {
|
|
9146
|
-
text: data.text,
|
|
9147
|
-
isLast: data.isLast
|
|
9148
|
-
});
|
|
9149
|
-
break;
|
|
9150
|
-
case "audio_chunk":
|
|
9151
|
-
if (data.audio && this.pipeline) {
|
|
9152
|
-
const audioData = this.base64ToArrayBuffer(data.audio);
|
|
9153
|
-
const uint8 = new Uint8Array(audioData);
|
|
9154
|
-
this.pipeline.onAudioChunk(uint8).catch((error) => {
|
|
9155
|
-
console.error("[AgentCore] Pipeline chunk error:", error);
|
|
9156
|
-
});
|
|
9157
|
-
}
|
|
9158
|
-
break;
|
|
9159
|
-
case "audio_end":
|
|
9160
|
-
if (this.pipeline) {
|
|
9161
|
-
this.pipeline.end().catch((error) => {
|
|
9162
|
-
console.error("[AgentCore] Pipeline end error:", error);
|
|
9163
|
-
});
|
|
9164
|
-
}
|
|
9165
|
-
break;
|
|
9166
|
-
case "response_end":
|
|
9167
|
-
this.addToHistory({
|
|
9168
|
-
role: "assistant",
|
|
9169
|
-
content: data.fullText,
|
|
9170
|
-
timestamp: Date.now(),
|
|
9171
|
-
emotion: data.emotion
|
|
9172
|
-
});
|
|
9173
|
-
this.emit("ai.response.end", {
|
|
9174
|
-
fullText: data.fullText,
|
|
9175
|
-
durationMs: data.durationMs || 0
|
|
9176
|
-
});
|
|
9177
|
-
break;
|
|
9178
|
-
case "memory_updated":
|
|
9179
|
-
this.emit("memory.updated", {
|
|
9180
|
-
messageCount: data.messageCount,
|
|
9181
|
-
tokenCount: data.tokenCount
|
|
9182
|
-
});
|
|
9183
|
-
break;
|
|
9184
|
-
case "error":
|
|
9185
|
-
this.emit("connection.error", {
|
|
9186
|
-
error: new Error(data.message),
|
|
9187
|
-
recoverable: data.recoverable ?? false
|
|
9188
|
-
});
|
|
9189
|
-
break;
|
|
9190
|
-
}
|
|
9191
|
-
}
|
|
9192
|
-
scheduleTranscription() {
|
|
9193
|
-
if (this.audioBuffer.length === 0) return;
|
|
9194
|
-
const totalLength = this.audioBuffer.reduce((sum2, buf) => sum2 + buf.length, 0);
|
|
9195
|
-
if (totalLength < 4e3) return;
|
|
9196
|
-
const audio = new Float32Array(totalLength);
|
|
9197
|
-
let offset = 0;
|
|
9198
|
-
for (const buf of this.audioBuffer) {
|
|
9199
|
-
audio.set(buf, offset);
|
|
9200
|
-
offset += buf.length;
|
|
9201
|
-
}
|
|
9202
|
-
this.audioBuffer = [];
|
|
9203
|
-
let sum = 0;
|
|
9204
|
-
for (let i = 0; i < audio.length; i++) {
|
|
9205
|
-
sum += audio[i] * audio[i];
|
|
9206
|
-
}
|
|
9207
|
-
const rms = Math.sqrt(sum / audio.length);
|
|
9208
|
-
if (rms < 0.01) {
|
|
9209
|
-
console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
|
|
9210
|
-
return;
|
|
9211
|
-
}
|
|
9212
|
-
if (this.asr) {
|
|
9213
|
-
this.setState("listening");
|
|
9214
|
-
this.emit("user.speech.start", { timestamp: Date.now() });
|
|
9215
|
-
this.asr.transcribe(audio).then((result) => {
|
|
9216
|
-
this.emit("user.transcript.final", {
|
|
9217
|
-
text: result.text,
|
|
9218
|
-
confidence: 1
|
|
9219
|
-
});
|
|
9220
|
-
this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
|
|
9221
|
-
const cleanText = result.text.trim();
|
|
9222
|
-
if (cleanText) {
|
|
9223
|
-
this.sendText(cleanText).catch((error) => {
|
|
9224
|
-
console.error("[AgentCore] Send text error:", error);
|
|
9225
|
-
});
|
|
9226
|
-
}
|
|
9227
|
-
}).catch((error) => {
|
|
9228
|
-
console.error("[AgentCore] Transcription error:", error);
|
|
9229
|
-
});
|
|
9230
|
-
}
|
|
9231
|
-
}
|
|
9232
|
-
// REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
|
|
9233
|
-
// The pipeline manages audio scheduling, LAM inference, and frame synchronization
|
|
9234
|
-
// Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
|
|
9235
|
-
/**
|
|
9236
|
-
* Detect voice activity using Silero VAD
|
|
9237
|
-
* Falls back to simple RMS if VAD not available
|
|
9238
|
-
*/
|
|
9239
|
-
async detectVoiceActivity(audio) {
|
|
9240
|
-
const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
|
|
9241
|
-
if (this.vad) {
|
|
9242
|
-
const chunkSize = this.vad.getChunkSize();
|
|
9243
|
-
for (let i = 0; i + chunkSize <= float32.length; i += chunkSize) {
|
|
9244
|
-
const chunk = float32.slice(i, i + chunkSize);
|
|
9245
|
-
const result = await this.vad.process(chunk);
|
|
9246
|
-
if (result.isSpeech) {
|
|
9247
|
-
return true;
|
|
9248
|
-
}
|
|
9249
|
-
}
|
|
9250
|
-
return false;
|
|
9251
|
-
}
|
|
9252
|
-
let sum = 0;
|
|
9253
|
-
for (let i = 0; i < float32.length; i++) {
|
|
9254
|
-
sum += float32[i] * float32[i];
|
|
9255
|
-
}
|
|
9256
|
-
const rms = Math.sqrt(sum / float32.length);
|
|
9257
|
-
return rms > 0.02;
|
|
9258
|
-
}
|
|
9259
|
-
base64ToArrayBuffer(base64) {
|
|
9260
|
-
const binaryString = atob(base64);
|
|
9261
|
-
const bytes = new Uint8Array(binaryString.length);
|
|
9262
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
9263
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
9264
|
-
}
|
|
9265
|
-
return bytes.buffer;
|
|
9266
|
-
}
|
|
9267
|
-
addToHistory(message) {
|
|
9268
|
-
this.history.push(message);
|
|
9269
|
-
this.emit("memory.updated", { messageCount: this.history.length });
|
|
9270
|
-
}
|
|
9271
|
-
handleDisconnect(event) {
|
|
9272
|
-
this._isConnected = false;
|
|
9273
|
-
if (event.code !== 1e3) {
|
|
9274
|
-
if (this.wsReconnectAttempts < this.maxReconnectAttempts) {
|
|
9275
|
-
this.wsReconnectAttempts++;
|
|
9276
|
-
setTimeout(() => {
|
|
9277
|
-
if (this.currentConfig) {
|
|
9278
|
-
this.connect(this.currentConfig).catch(() => {
|
|
9279
|
-
});
|
|
9280
|
-
}
|
|
9281
|
-
}, Math.pow(2, this.wsReconnectAttempts) * 1e3);
|
|
9282
|
-
} else {
|
|
9283
|
-
this.setState("error");
|
|
9284
|
-
this.emit("connection.error", {
|
|
9285
|
-
error: new Error("Max reconnection attempts reached"),
|
|
9286
|
-
recoverable: false
|
|
9287
|
-
});
|
|
9288
|
-
}
|
|
9289
|
-
}
|
|
9290
|
-
this.emit("connection.closed", { reason: event.reason || "Connection closed" });
|
|
9291
|
-
}
|
|
9292
|
-
};
|
|
9293
|
-
|
|
9294
|
-
// src/ai/orchestration/ConversationOrchestrator.ts
|
|
9295
|
-
var ConversationSessionImpl = class {
|
|
9296
|
-
constructor(config, adapter) {
|
|
9297
|
-
this._history = [];
|
|
9298
|
-
this._context = /* @__PURE__ */ new Map();
|
|
9299
|
-
this.sessionId = config.sessionId;
|
|
9300
|
-
this._config = config;
|
|
9301
|
-
this._adapter = adapter;
|
|
9302
|
-
this.createdAt = Date.now();
|
|
9303
|
-
this._lastActivityAt = Date.now();
|
|
9304
|
-
this._emotionController = new EmotionController();
|
|
9305
|
-
if (config.emotion) {
|
|
9306
|
-
this._emotionController.setPreset(config.emotion);
|
|
9307
|
-
}
|
|
9308
|
-
}
|
|
9309
|
-
get adapter() {
|
|
9310
|
-
return this._adapter;
|
|
9311
|
-
}
|
|
9312
|
-
get config() {
|
|
9313
|
-
return this._config;
|
|
9314
|
-
}
|
|
9315
|
-
get state() {
|
|
9316
|
-
return this._adapter.state;
|
|
9317
|
-
}
|
|
9318
|
-
get history() {
|
|
9319
|
-
return [...this._history];
|
|
9320
|
-
}
|
|
9321
|
-
get emotion() {
|
|
9322
|
-
return {};
|
|
9323
|
-
}
|
|
9324
|
-
get lastActivityAt() {
|
|
9325
|
-
return this._lastActivityAt;
|
|
9326
|
-
}
|
|
9327
|
-
async start() {
|
|
9328
|
-
await this._adapter.connect(this._config);
|
|
9329
|
-
this._lastActivityAt = Date.now();
|
|
9330
|
-
}
|
|
9331
|
-
async end() {
|
|
9332
|
-
await this._adapter.disconnect();
|
|
9333
|
-
}
|
|
9334
|
-
pushAudio(audio) {
|
|
9335
|
-
this._adapter.pushAudio(audio);
|
|
9336
|
-
this._lastActivityAt = Date.now();
|
|
9337
|
-
}
|
|
9338
|
-
async sendText(text) {
|
|
9339
|
-
await this._adapter.sendText(text);
|
|
9340
|
-
this._lastActivityAt = Date.now();
|
|
9341
|
-
}
|
|
9342
|
-
interrupt() {
|
|
9343
|
-
this._adapter.interrupt();
|
|
9344
|
-
this._lastActivityAt = Date.now();
|
|
9345
|
-
}
|
|
9346
|
-
setEmotion(emotion) {
|
|
9347
|
-
this._emotionController.set(emotion);
|
|
9348
|
-
}
|
|
9349
|
-
addContext(key, value) {
|
|
9350
|
-
this._context.set(key, value);
|
|
9351
|
-
}
|
|
9352
|
-
removeContext(key) {
|
|
9353
|
-
this._context.delete(key);
|
|
9354
|
-
}
|
|
9355
|
-
getContext() {
|
|
9356
|
-
return Object.fromEntries(this._context);
|
|
9357
|
-
}
|
|
9358
|
-
export() {
|
|
9359
|
-
return {
|
|
9360
|
-
sessionId: this.sessionId,
|
|
9361
|
-
tenantId: this._config.tenant.tenantId,
|
|
9362
|
-
characterId: this._config.tenant.characterId,
|
|
9363
|
-
history: this._history,
|
|
9364
|
-
context: Object.fromEntries(this._context),
|
|
9365
|
-
emotion: this.emotion,
|
|
9366
|
-
createdAt: this.createdAt,
|
|
9367
|
-
lastActivityAt: this._lastActivityAt
|
|
9368
|
-
};
|
|
9369
|
-
}
|
|
9370
|
-
import(snapshot) {
|
|
9371
|
-
this._history = [...snapshot.history];
|
|
9372
|
-
this._context = new Map(Object.entries(snapshot.context));
|
|
9373
|
-
this._lastActivityAt = snapshot.lastActivityAt;
|
|
9374
|
-
}
|
|
9375
|
-
syncHistory() {
|
|
9376
|
-
this._history = this._adapter.getHistory();
|
|
9377
|
-
}
|
|
9378
|
-
};
|
|
9379
|
-
var ConversationOrchestrator = class extends EventEmitter {
|
|
9380
|
-
constructor(config) {
|
|
9381
|
-
super();
|
|
9382
|
-
// Sessions per tenant
|
|
9383
|
-
this.sessions = /* @__PURE__ */ new Map();
|
|
9384
|
-
// Tenant configurations
|
|
9385
|
-
this.tenants = /* @__PURE__ */ new Map();
|
|
9386
|
-
// Health monitoring
|
|
9387
|
-
this.healthCheckInterval = null;
|
|
9388
|
-
this.HEALTH_CHECK_INTERVAL_MS = 3e4;
|
|
9389
|
-
this.config = {
|
|
9390
|
-
connectionTimeoutMs: 5e3,
|
|
9391
|
-
maxRetries: 3,
|
|
9392
|
-
...config
|
|
9393
|
-
};
|
|
9394
|
-
this.adapter = new AgentCoreAdapter(config.adapter);
|
|
9395
|
-
}
|
|
9396
|
-
/**
|
|
9397
|
-
* Register a tenant
|
|
9398
|
-
*/
|
|
9399
|
-
registerTenant(tenant) {
|
|
9400
|
-
this.tenants.set(tenant.tenantId, tenant);
|
|
9401
|
-
}
|
|
9402
|
-
/**
|
|
9403
|
-
* Unregister a tenant
|
|
9404
|
-
*/
|
|
9405
|
-
unregisterTenant(tenantId) {
|
|
9406
|
-
this.tenants.delete(tenantId);
|
|
9407
|
-
}
|
|
9408
|
-
/**
|
|
9409
|
-
* Get tenant config
|
|
9410
|
-
*/
|
|
9411
|
-
getTenant(tenantId) {
|
|
9412
|
-
return this.tenants.get(tenantId);
|
|
9413
|
-
}
|
|
9414
|
-
/**
|
|
9415
|
-
* Create a new conversation session for a tenant
|
|
9416
|
-
*/
|
|
9417
|
-
async createSession(tenantId, options = {}) {
|
|
9418
|
-
const tenant = this.tenants.get(tenantId);
|
|
9419
|
-
if (!tenant) {
|
|
9420
|
-
throw new Error(`Tenant not found: ${tenantId}`);
|
|
9421
|
-
}
|
|
9422
|
-
const sessionId = options.sessionId || this.generateSessionId();
|
|
9423
|
-
const sessionConfig = {
|
|
9424
|
-
sessionId,
|
|
9425
|
-
tenant,
|
|
9426
|
-
systemPrompt: options.systemPrompt,
|
|
9427
|
-
voice: options.voice,
|
|
9428
|
-
emotion: options.emotion,
|
|
9429
|
-
language: options.language
|
|
9430
|
-
};
|
|
9431
|
-
const session = new ConversationSessionImpl(sessionConfig, this.adapter);
|
|
9432
|
-
this.sessions.set(sessionId, session);
|
|
9433
|
-
this.forwardAdapterEvents(this.adapter, sessionId);
|
|
9434
|
-
await session.start();
|
|
9435
|
-
this.emit("session.created", { sessionId, tenantId });
|
|
9436
|
-
return session;
|
|
9437
|
-
}
|
|
9438
|
-
/**
|
|
9439
|
-
* End a session
|
|
9440
|
-
*/
|
|
9441
|
-
async endSession(sessionId) {
|
|
9442
|
-
const session = this.sessions.get(sessionId);
|
|
9443
|
-
if (session) {
|
|
9444
|
-
await session.end();
|
|
9445
|
-
this.sessions.delete(sessionId);
|
|
9446
|
-
this.emit("session.ended", { sessionId, reason: "Client requested" });
|
|
9447
|
-
}
|
|
9448
|
-
}
|
|
9449
|
-
/**
|
|
9450
|
-
* Get session by ID
|
|
9451
|
-
*/
|
|
9452
|
-
getSession(sessionId) {
|
|
9453
|
-
return this.sessions.get(sessionId);
|
|
9454
|
-
}
|
|
9455
|
-
/**
|
|
9456
|
-
* Get all sessions for a tenant
|
|
9457
|
-
*/
|
|
9458
|
-
getTenantSessions(tenantId) {
|
|
9459
|
-
return Array.from(this.sessions.values()).filter((s) => s.config.tenant.tenantId === tenantId);
|
|
9460
|
-
}
|
|
9461
|
-
/**
|
|
9462
|
-
* Start health monitoring
|
|
9463
|
-
*/
|
|
9464
|
-
startHealthMonitoring() {
|
|
9465
|
-
if (this.healthCheckInterval) return;
|
|
9466
|
-
this.healthCheckInterval = setInterval(async () => {
|
|
9467
|
-
await this.performHealthCheck();
|
|
9468
|
-
}, this.HEALTH_CHECK_INTERVAL_MS);
|
|
9469
|
-
}
|
|
9470
|
-
/**
|
|
9471
|
-
* Stop health monitoring
|
|
9472
|
-
*/
|
|
9473
|
-
stopHealthMonitoring() {
|
|
9474
|
-
if (this.healthCheckInterval) {
|
|
9475
|
-
clearInterval(this.healthCheckInterval);
|
|
9476
|
-
this.healthCheckInterval = null;
|
|
9477
|
-
}
|
|
9478
|
-
}
|
|
9479
|
-
/**
|
|
9480
|
-
* Dispose all resources
|
|
9481
|
-
*/
|
|
9482
|
-
async dispose() {
|
|
9483
|
-
this.stopHealthMonitoring();
|
|
9484
|
-
const endPromises = Array.from(this.sessions.values()).map((s) => s.end());
|
|
9485
|
-
await Promise.all(endPromises);
|
|
9486
|
-
this.sessions.clear();
|
|
9487
|
-
await this.adapter.disconnect();
|
|
9488
|
-
}
|
|
9489
|
-
// ==================== Private Methods ====================
|
|
9490
|
-
generateSessionId() {
|
|
9491
|
-
return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
9492
|
-
}
|
|
9493
|
-
forwardAdapterEvents(adapter, sessionId) {
|
|
9494
|
-
const events = [
|
|
9495
|
-
"state.change",
|
|
9496
|
-
"user.speech.start",
|
|
9497
|
-
"user.speech.end",
|
|
9498
|
-
"user.transcript.partial",
|
|
9499
|
-
"user.transcript.final",
|
|
9500
|
-
"ai.thinking.start",
|
|
9501
|
-
"ai.response.start",
|
|
9502
|
-
"ai.response.chunk",
|
|
9503
|
-
"ai.response.end",
|
|
9504
|
-
"audio.output.chunk",
|
|
9505
|
-
"audio.output.end",
|
|
9506
|
-
"animation",
|
|
9507
|
-
"memory.updated",
|
|
9508
|
-
"connection.error",
|
|
9509
|
-
"interruption.detected",
|
|
9510
|
-
"interruption.handled"
|
|
9511
|
-
];
|
|
9512
|
-
for (const event of events) {
|
|
9513
|
-
adapter.on(event, (data) => {
|
|
9514
|
-
const eventData = data;
|
|
9515
|
-
this.emit(event, { ...eventData, sessionId });
|
|
9516
|
-
});
|
|
9517
|
-
}
|
|
9518
|
-
}
|
|
9519
|
-
async performHealthCheck() {
|
|
9520
|
-
try {
|
|
9521
|
-
await this.adapter.healthCheck();
|
|
9522
|
-
} catch {
|
|
9523
|
-
}
|
|
9524
|
-
}
|
|
9525
|
-
};
|
|
9526
|
-
|
|
9527
|
-
// src/ai/tenancy/TenantManager.ts
|
|
9528
|
-
var _TenantManager = class _TenantManager {
|
|
9529
|
-
constructor() {
|
|
9530
|
-
this.tenants = /* @__PURE__ */ new Map();
|
|
9531
|
-
this.quotas = /* @__PURE__ */ new Map();
|
|
9532
|
-
this.usage = /* @__PURE__ */ new Map();
|
|
9533
|
-
this.tokenRefreshCallbacks = /* @__PURE__ */ new Map();
|
|
9534
|
-
}
|
|
9535
|
-
/**
|
|
9536
|
-
* Register a tenant with quota
|
|
9537
|
-
*/
|
|
9538
|
-
register(tenant, quota = _TenantManager.DEFAULT_QUOTA, tokenRefreshCallback) {
|
|
9539
|
-
this.tenants.set(tenant.tenantId, tenant);
|
|
9540
|
-
this.quotas.set(tenant.tenantId, quota);
|
|
9541
|
-
this.usage.set(tenant.tenantId, {
|
|
9542
|
-
currentSessions: 0,
|
|
9543
|
-
requestsThisMinute: 0,
|
|
9544
|
-
tokensUsed: 0,
|
|
9545
|
-
audioMinutesToday: 0,
|
|
9546
|
-
lastMinuteReset: Date.now(),
|
|
9547
|
-
lastDailyReset: Date.now()
|
|
9548
|
-
});
|
|
9549
|
-
if (tokenRefreshCallback) {
|
|
9550
|
-
this.tokenRefreshCallbacks.set(tenant.tenantId, tokenRefreshCallback);
|
|
9551
|
-
}
|
|
9552
|
-
}
|
|
9553
|
-
/**
|
|
9554
|
-
* Unregister a tenant
|
|
9555
|
-
*/
|
|
9556
|
-
unregister(tenantId) {
|
|
9557
|
-
this.tenants.delete(tenantId);
|
|
9558
|
-
this.quotas.delete(tenantId);
|
|
9559
|
-
this.usage.delete(tenantId);
|
|
9560
|
-
this.tokenRefreshCallbacks.delete(tenantId);
|
|
9561
|
-
}
|
|
9562
|
-
/**
|
|
9563
|
-
* Get tenant config
|
|
9564
|
-
*/
|
|
9565
|
-
get(tenantId) {
|
|
9566
|
-
return this.tenants.get(tenantId);
|
|
9567
|
-
}
|
|
9568
|
-
/**
|
|
9569
|
-
* Check if tenant exists
|
|
9570
|
-
*/
|
|
9571
|
-
has(tenantId) {
|
|
9572
|
-
return this.tenants.has(tenantId);
|
|
9573
|
-
}
|
|
9574
|
-
/**
|
|
9575
|
-
* Get all tenant IDs
|
|
9576
|
-
*/
|
|
9577
|
-
getTenantIds() {
|
|
9578
|
-
return Array.from(this.tenants.keys());
|
|
9579
|
-
}
|
|
9580
|
-
/**
|
|
9581
|
-
* Check if tenant can create new session
|
|
9582
|
-
*/
|
|
9583
|
-
canCreateSession(tenantId) {
|
|
9584
|
-
const quota = this.quotas.get(tenantId);
|
|
9585
|
-
const usage = this.usage.get(tenantId);
|
|
9586
|
-
if (!quota || !usage) return false;
|
|
9587
|
-
return usage.currentSessions < quota.maxSessions;
|
|
9588
|
-
}
|
|
9589
|
-
/**
|
|
9590
|
-
* Check if tenant can make request
|
|
9591
|
-
*/
|
|
9592
|
-
canMakeRequest(tenantId) {
|
|
9593
|
-
const quota = this.quotas.get(tenantId);
|
|
9594
|
-
const usage = this.usage.get(tenantId);
|
|
9595
|
-
if (!quota || !usage) return false;
|
|
9596
|
-
this.checkMinuteReset(tenantId);
|
|
9597
|
-
return usage.requestsThisMinute < quota.requestsPerMinute;
|
|
9598
|
-
}
|
|
9599
|
-
/**
|
|
9600
|
-
* Check if tenant can use audio
|
|
9601
|
-
*/
|
|
9602
|
-
canUseAudio(tenantId, minutes) {
|
|
9603
|
-
const quota = this.quotas.get(tenantId);
|
|
9604
|
-
const usage = this.usage.get(tenantId);
|
|
9605
|
-
if (!quota || !usage) return false;
|
|
9606
|
-
this.checkDailyReset(tenantId);
|
|
9607
|
-
return usage.audioMinutesToday + minutes <= quota.maxAudioMinutesPerDay;
|
|
9608
|
-
}
|
|
9609
|
-
/**
|
|
9610
|
-
* Increment session count
|
|
9611
|
-
*/
|
|
9612
|
-
incrementSessions(tenantId) {
|
|
9613
|
-
const usage = this.usage.get(tenantId);
|
|
9614
|
-
if (usage) {
|
|
9615
|
-
usage.currentSessions++;
|
|
9616
|
-
}
|
|
9617
|
-
}
|
|
9618
|
-
/**
|
|
9619
|
-
* Decrement session count
|
|
9620
|
-
*/
|
|
9621
|
-
decrementSessions(tenantId) {
|
|
9622
|
-
const usage = this.usage.get(tenantId);
|
|
9623
|
-
if (usage && usage.currentSessions > 0) {
|
|
9624
|
-
usage.currentSessions--;
|
|
9625
|
-
}
|
|
9626
|
-
}
|
|
9627
|
-
/**
|
|
9628
|
-
* Record a request
|
|
9629
|
-
*/
|
|
9630
|
-
recordRequest(tenantId) {
|
|
9631
|
-
const usage = this.usage.get(tenantId);
|
|
9632
|
-
if (usage) {
|
|
9633
|
-
this.checkMinuteReset(tenantId);
|
|
9634
|
-
usage.requestsThisMinute++;
|
|
9635
|
-
}
|
|
9636
|
-
}
|
|
9637
|
-
/**
|
|
9638
|
-
* Record token usage
|
|
9639
|
-
*/
|
|
9640
|
-
recordTokens(tenantId, tokens) {
|
|
9641
|
-
const usage = this.usage.get(tenantId);
|
|
9642
|
-
if (usage) {
|
|
9643
|
-
usage.tokensUsed += tokens;
|
|
9644
|
-
}
|
|
9645
|
-
}
|
|
9646
|
-
/**
|
|
9647
|
-
* Record audio usage
|
|
9648
|
-
*/
|
|
9649
|
-
recordAudioMinutes(tenantId, minutes) {
|
|
9650
|
-
const usage = this.usage.get(tenantId);
|
|
9651
|
-
if (usage) {
|
|
9652
|
-
this.checkDailyReset(tenantId);
|
|
9653
|
-
usage.audioMinutesToday += minutes;
|
|
9654
|
-
}
|
|
9655
|
-
}
|
|
9656
|
-
/**
|
|
9657
|
-
* Get fresh auth token for tenant
|
|
9658
|
-
*/
|
|
9659
|
-
async getAuthToken(tenantId) {
|
|
9660
|
-
const tenant = this.tenants.get(tenantId);
|
|
9661
|
-
if (!tenant) {
|
|
9662
|
-
throw new Error(`Tenant not found: ${tenantId}`);
|
|
9663
|
-
}
|
|
9664
|
-
const callback = this.tokenRefreshCallbacks.get(tenantId);
|
|
9665
|
-
if (callback) {
|
|
9666
|
-
const token = await callback();
|
|
9667
|
-
tenant.credentials.authToken = token;
|
|
9668
|
-
return token;
|
|
9669
|
-
}
|
|
9670
|
-
if (tenant.credentials.authToken) {
|
|
9671
|
-
return tenant.credentials.authToken;
|
|
9672
|
-
}
|
|
9673
|
-
throw new Error(`No auth token available for tenant: ${tenantId}`);
|
|
9674
|
-
}
|
|
9675
|
-
/**
|
|
9676
|
-
* Update tenant credentials
|
|
9677
|
-
*/
|
|
9678
|
-
updateCredentials(tenantId, credentials) {
|
|
9679
|
-
const tenant = this.tenants.get(tenantId);
|
|
9680
|
-
if (tenant) {
|
|
9681
|
-
tenant.credentials = { ...tenant.credentials, ...credentials };
|
|
9682
|
-
}
|
|
9683
|
-
}
|
|
9684
|
-
/**
|
|
9685
|
-
* Get usage stats for tenant
|
|
9686
|
-
*/
|
|
9687
|
-
getUsage(tenantId) {
|
|
9688
|
-
return this.usage.get(tenantId);
|
|
9689
|
-
}
|
|
9690
|
-
/**
|
|
9691
|
-
* Get quota for tenant
|
|
9692
|
-
*/
|
|
9693
|
-
getQuota(tenantId) {
|
|
9694
|
-
return this.quotas.get(tenantId);
|
|
9695
|
-
}
|
|
9696
|
-
/**
|
|
9697
|
-
* Update quota for tenant
|
|
9698
|
-
*/
|
|
9699
|
-
updateQuota(tenantId, quota) {
|
|
9700
|
-
const existing = this.quotas.get(tenantId);
|
|
9701
|
-
if (existing) {
|
|
9702
|
-
this.quotas.set(tenantId, { ...existing, ...quota });
|
|
9703
|
-
}
|
|
9704
|
-
}
|
|
9705
|
-
/**
|
|
9706
|
-
* Reset all usage stats for a tenant
|
|
9707
|
-
*/
|
|
9708
|
-
resetUsage(tenantId) {
|
|
9709
|
-
const usage = this.usage.get(tenantId);
|
|
9710
|
-
if (usage) {
|
|
9711
|
-
usage.requestsThisMinute = 0;
|
|
9712
|
-
usage.tokensUsed = 0;
|
|
9713
|
-
usage.audioMinutesToday = 0;
|
|
9714
|
-
usage.lastMinuteReset = Date.now();
|
|
9715
|
-
usage.lastDailyReset = Date.now();
|
|
9716
|
-
}
|
|
9717
|
-
}
|
|
9718
|
-
// ==================== Private Methods ====================
|
|
9719
|
-
checkMinuteReset(tenantId) {
|
|
9720
|
-
const usage = this.usage.get(tenantId);
|
|
9721
|
-
if (!usage) return;
|
|
9722
|
-
const now = Date.now();
|
|
9723
|
-
if (now - usage.lastMinuteReset >= 6e4) {
|
|
9724
|
-
usage.requestsThisMinute = 0;
|
|
9725
|
-
usage.lastMinuteReset = now;
|
|
9726
|
-
}
|
|
9727
|
-
}
|
|
9728
|
-
checkDailyReset(tenantId) {
|
|
9729
|
-
const usage = this.usage.get(tenantId);
|
|
9730
|
-
if (!usage) return;
|
|
9731
|
-
const now = Date.now();
|
|
9732
|
-
const MS_PER_DAY = 24 * 60 * 60 * 1e3;
|
|
9733
|
-
if (now - usage.lastDailyReset >= MS_PER_DAY) {
|
|
9734
|
-
usage.audioMinutesToday = 0;
|
|
9735
|
-
usage.lastDailyReset = now;
|
|
9736
|
-
}
|
|
9737
|
-
}
|
|
9738
|
-
};
|
|
9739
|
-
/**
|
|
9740
|
-
* Default quota for new tenants
|
|
9741
|
-
*/
|
|
9742
|
-
_TenantManager.DEFAULT_QUOTA = {
|
|
9743
|
-
maxSessions: 10,
|
|
9744
|
-
requestsPerMinute: 60,
|
|
9745
|
-
maxTokensPerConversation: 1e5,
|
|
9746
|
-
maxAudioMinutesPerDay: 60
|
|
9747
|
-
};
|
|
9748
|
-
var TenantManager = _TenantManager;
|
|
9749
|
-
|
|
9750
|
-
// src/ai/utils/AudioSyncManager.ts
|
|
9751
|
-
var AudioSyncManager = class extends EventEmitter {
|
|
9752
|
-
constructor(config = {}) {
|
|
9753
|
-
super();
|
|
9754
|
-
this.bufferPosition = 0;
|
|
9755
|
-
this.playbackQueue = [];
|
|
9756
|
-
this.isPlaying = false;
|
|
9757
|
-
this.audioContext = null;
|
|
9758
|
-
this.playbackStartTime = 0;
|
|
9759
|
-
this.samplesPlayed = 0;
|
|
9760
|
-
this.config = {
|
|
9761
|
-
sampleRate: 16e3,
|
|
9762
|
-
bufferSize: 16640,
|
|
9763
|
-
overlapSize: 4160,
|
|
9764
|
-
maxDriftMs: 100,
|
|
9765
|
-
...config
|
|
9766
|
-
};
|
|
9767
|
-
this.audioBuffer = new Float32Array(this.config.bufferSize);
|
|
9768
|
-
}
|
|
9769
|
-
/**
|
|
9770
|
-
* Initialize audio context
|
|
9771
|
-
*/
|
|
9772
|
-
async initialize() {
|
|
9773
|
-
if (!this.audioContext) {
|
|
9774
|
-
this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
|
|
9775
|
-
}
|
|
9776
|
-
if (this.audioContext.state === "suspended") {
|
|
9777
|
-
await this.audioContext.resume();
|
|
9778
|
-
}
|
|
9779
|
-
}
|
|
9780
|
-
/**
|
|
9781
|
-
* Push audio chunk for processing and playback
|
|
9782
|
-
*/
|
|
9783
|
-
pushAudio(audio) {
|
|
9784
|
-
this.playbackQueue.push(audio);
|
|
9785
|
-
this.bufferForInference(audio);
|
|
9786
|
-
if (!this.isPlaying && this.playbackQueue.length > 0) {
|
|
9787
|
-
this.startPlayback();
|
|
9788
|
-
}
|
|
9789
|
-
}
|
|
9790
|
-
/**
|
|
9791
|
-
* Buffer audio for inference
|
|
9792
|
-
*/
|
|
9793
|
-
bufferForInference(audio) {
|
|
9794
|
-
let offset = 0;
|
|
9795
|
-
while (offset < audio.length) {
|
|
9796
|
-
const remaining = this.config.bufferSize - this.bufferPosition;
|
|
9797
|
-
const toCopy = Math.min(remaining, audio.length - offset);
|
|
9798
|
-
this.audioBuffer.set(audio.subarray(offset, offset + toCopy), this.bufferPosition);
|
|
9799
|
-
this.bufferPosition += toCopy;
|
|
9800
|
-
offset += toCopy;
|
|
9801
|
-
if (this.bufferPosition >= this.config.bufferSize) {
|
|
9802
|
-
this.emit("buffer.ready", { audio: new Float32Array(this.audioBuffer) });
|
|
9803
|
-
const overlapStart = this.config.bufferSize - this.config.overlapSize;
|
|
9804
|
-
this.audioBuffer.copyWithin(0, overlapStart);
|
|
9805
|
-
this.bufferPosition = this.config.overlapSize;
|
|
9806
|
-
}
|
|
9807
|
-
}
|
|
9808
|
-
}
|
|
9809
|
-
/**
|
|
9810
|
-
* Start audio playback
|
|
9811
|
-
*/
|
|
9812
|
-
async startPlayback() {
|
|
9813
|
-
if (!this.audioContext || this.isPlaying) return;
|
|
9814
|
-
this.isPlaying = true;
|
|
9815
|
-
this.playbackStartTime = this.audioContext.currentTime;
|
|
9816
|
-
this.samplesPlayed = 0;
|
|
9817
|
-
this.emit("playback.start", {});
|
|
9818
|
-
await this.processPlaybackQueue();
|
|
9819
|
-
}
|
|
9820
|
-
/**
|
|
9821
|
-
* Process playback queue
|
|
9822
|
-
*/
|
|
9823
|
-
async processPlaybackQueue() {
|
|
9824
|
-
if (!this.audioContext) return;
|
|
9825
|
-
while (this.playbackQueue.length > 0) {
|
|
9826
|
-
const audio = this.playbackQueue.shift();
|
|
9827
|
-
const buffer = this.audioContext.createBuffer(1, audio.length, this.config.sampleRate);
|
|
9828
|
-
buffer.copyToChannel(audio, 0);
|
|
9829
|
-
const source = this.audioContext.createBufferSource();
|
|
9830
|
-
source.buffer = buffer;
|
|
9831
|
-
source.connect(this.audioContext.destination);
|
|
9832
|
-
const playTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
|
|
9833
|
-
source.start(playTime);
|
|
9834
|
-
this.samplesPlayed += audio.length;
|
|
9835
|
-
this.checkDrift();
|
|
9836
|
-
await new Promise((resolve) => {
|
|
9837
|
-
source.onended = resolve;
|
|
9838
|
-
});
|
|
9839
|
-
}
|
|
9840
|
-
this.isPlaying = false;
|
|
9841
|
-
this.emit("playback.end", {});
|
|
9842
|
-
}
|
|
9843
|
-
/**
|
|
9844
|
-
* Check for audio/animation drift
|
|
9845
|
-
*/
|
|
9846
|
-
checkDrift() {
|
|
9847
|
-
if (!this.audioContext) return;
|
|
9848
|
-
const expectedTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
|
|
9849
|
-
const actualTime = this.audioContext.currentTime;
|
|
9850
|
-
const driftMs = (actualTime - expectedTime) * 1e3;
|
|
9851
|
-
if (Math.abs(driftMs) > this.config.maxDriftMs) {
|
|
9852
|
-
this.emit("sync.drift", { driftMs });
|
|
9853
|
-
}
|
|
9854
|
-
}
|
|
9855
|
-
/**
|
|
9856
|
-
* Clear playback queue
|
|
9857
|
-
*/
|
|
9858
|
-
clearQueue() {
|
|
9859
|
-
this.playbackQueue = [];
|
|
9860
|
-
this.bufferPosition = 0;
|
|
9861
|
-
this.audioBuffer.fill(0);
|
|
9862
|
-
}
|
|
9863
|
-
/**
|
|
9864
|
-
* Stop playback
|
|
9865
|
-
*/
|
|
9866
|
-
stop() {
|
|
9867
|
-
this.clearQueue();
|
|
9868
|
-
this.isPlaying = false;
|
|
9869
|
-
}
|
|
9870
|
-
/**
|
|
9871
|
-
* Get current playback position in seconds
|
|
9872
|
-
*/
|
|
9873
|
-
getPlaybackPosition() {
|
|
9874
|
-
if (!this.audioContext) return 0;
|
|
9875
|
-
return this.audioContext.currentTime - this.playbackStartTime;
|
|
9876
|
-
}
|
|
9877
|
-
/**
|
|
9878
|
-
* Check if currently playing
|
|
9879
|
-
*/
|
|
9880
|
-
getIsPlaying() {
|
|
9881
|
-
return this.isPlaying;
|
|
9882
|
-
}
|
|
9883
|
-
/**
|
|
9884
|
-
* Dispose resources
|
|
9885
|
-
*/
|
|
9886
|
-
dispose() {
|
|
9887
|
-
this.stop();
|
|
9888
|
-
this.audioContext?.close();
|
|
9889
|
-
this.audioContext = null;
|
|
9890
|
-
}
|
|
9891
|
-
};
|
|
9892
|
-
|
|
9893
|
-
// src/ai/utils/InterruptionHandler.ts
|
|
9894
|
-
var InterruptionHandler = class extends EventEmitter {
|
|
9895
|
-
constructor(config = {}) {
|
|
9896
|
-
super();
|
|
9897
|
-
this.isSpeaking = false;
|
|
9898
|
-
this.speechStartTime = 0;
|
|
9899
|
-
this.lastSpeechTime = 0;
|
|
9900
|
-
this.silenceTimer = null;
|
|
9901
|
-
this.aiIsSpeaking = false;
|
|
9902
|
-
// Debouncing: only emit one interruption per speech session
|
|
9903
|
-
this.interruptionTriggeredThisSession = false;
|
|
9904
|
-
this.config = {
|
|
9905
|
-
vadThreshold: 0.5,
|
|
9906
|
-
// Silero VAD default
|
|
9907
|
-
minSpeechDurationMs: 200,
|
|
9908
|
-
// Google/Amazon barge-in standard
|
|
9909
|
-
silenceTimeoutMs: 500,
|
|
9910
|
-
// OpenAI Realtime API standard
|
|
9911
|
-
enabled: true,
|
|
9912
|
-
...config
|
|
9913
|
-
};
|
|
9914
|
-
}
|
|
9915
|
-
/**
|
|
9916
|
-
* Process VAD result for interruption detection
|
|
9917
|
-
* @param vadProbability - Speech probability from VAD (0-1)
|
|
9918
|
-
* @param audioEnergy - Optional RMS energy for logging (default: 0)
|
|
9919
|
-
*/
|
|
9920
|
-
processVADResult(vadProbability, audioEnergy = 0) {
|
|
9921
|
-
if (!this.config.enabled) return;
|
|
9922
|
-
if (vadProbability > this.config.vadThreshold) {
|
|
9923
|
-
this.onSpeechDetected(audioEnergy || vadProbability);
|
|
9924
|
-
} else {
|
|
9925
|
-
this.onSilenceDetected();
|
|
9926
|
-
}
|
|
9927
|
-
}
|
|
9928
|
-
/**
|
|
9929
|
-
* Notify that AI started speaking
|
|
9930
|
-
*/
|
|
9931
|
-
setAISpeaking(speaking) {
|
|
9932
|
-
this.aiIsSpeaking = speaking;
|
|
9933
|
-
}
|
|
9934
|
-
/**
|
|
9935
|
-
* Enable/disable interruption detection
|
|
9936
|
-
*/
|
|
9937
|
-
setEnabled(enabled) {
|
|
9938
|
-
this.config.enabled = enabled;
|
|
9939
|
-
if (!enabled) {
|
|
9940
|
-
this.reset();
|
|
9941
|
-
}
|
|
9942
|
-
}
|
|
9943
|
-
/**
|
|
9944
|
-
* Update configuration
|
|
9945
|
-
*/
|
|
9946
|
-
updateConfig(config) {
|
|
9947
|
-
this.config = { ...this.config, ...config };
|
|
9948
|
-
}
|
|
9949
|
-
/**
|
|
9950
|
-
* Reset state
|
|
9951
|
-
*/
|
|
9952
|
-
reset() {
|
|
9953
|
-
this.isSpeaking = false;
|
|
9954
|
-
this.speechStartTime = 0;
|
|
9955
|
-
this.lastSpeechTime = 0;
|
|
9956
|
-
this.interruptionTriggeredThisSession = false;
|
|
9957
|
-
if (this.silenceTimer) {
|
|
9958
|
-
clearTimeout(this.silenceTimer);
|
|
9959
|
-
this.silenceTimer = null;
|
|
9960
|
-
}
|
|
9961
|
-
}
|
|
9962
|
-
/**
|
|
9963
|
-
* Get current state
|
|
9964
|
-
*/
|
|
9965
|
-
getState() {
|
|
9966
|
-
return {
|
|
9967
|
-
isSpeaking: this.isSpeaking,
|
|
9968
|
-
speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
|
|
9969
|
-
};
|
|
9970
|
-
}
|
|
9971
|
-
// ==================== Private Methods ====================
|
|
9972
|
-
onSpeechDetected(rms) {
|
|
9973
|
-
const now = Date.now();
|
|
9974
|
-
this.lastSpeechTime = now;
|
|
9975
|
-
if (this.silenceTimer) {
|
|
9976
|
-
clearTimeout(this.silenceTimer);
|
|
9977
|
-
this.silenceTimer = null;
|
|
9978
|
-
}
|
|
9979
|
-
if (!this.isSpeaking) {
|
|
9980
|
-
this.isSpeaking = true;
|
|
9981
|
-
this.speechStartTime = now;
|
|
9982
|
-
this.emit("speech.detected", { rms });
|
|
9983
|
-
}
|
|
9984
|
-
if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
|
|
9985
|
-
const speechDuration = now - this.speechStartTime;
|
|
9986
|
-
if (speechDuration >= this.config.minSpeechDurationMs) {
|
|
9987
|
-
this.interruptionTriggeredThisSession = true;
|
|
9988
|
-
this.emit("interruption.triggered", { rms, durationMs: speechDuration });
|
|
9989
|
-
}
|
|
9990
|
-
}
|
|
9991
|
-
}
|
|
9992
|
-
onSilenceDetected() {
|
|
9993
|
-
if (!this.isSpeaking) return;
|
|
9994
|
-
if (!this.silenceTimer) {
|
|
9995
|
-
this.silenceTimer = setTimeout(() => {
|
|
9996
|
-
const durationMs = this.lastSpeechTime - this.speechStartTime;
|
|
9997
|
-
this.isSpeaking = false;
|
|
9998
|
-
this.silenceTimer = null;
|
|
9999
|
-
this.interruptionTriggeredThisSession = false;
|
|
10000
|
-
this.emit("speech.ended", { durationMs });
|
|
10001
|
-
}, this.config.silenceTimeoutMs);
|
|
10002
|
-
}
|
|
10003
|
-
}
|
|
10004
|
-
};
|
|
10005
|
-
|
|
10006
8876
|
// src/animation/types.ts
|
|
10007
8877
|
var DEFAULT_ANIMATION_CONFIG = {
|
|
10008
8878
|
initialState: "idle",
|
|
@@ -11045,17 +9915,14 @@ export {
|
|
|
11045
9915
|
A2EOrchestrator,
|
|
11046
9916
|
A2EProcessor,
|
|
11047
9917
|
ARKIT_BLENDSHAPES,
|
|
11048
|
-
AgentCoreAdapter,
|
|
11049
9918
|
AnimationGraph,
|
|
11050
9919
|
AudioChunkCoalescer,
|
|
11051
9920
|
AudioEnergyAnalyzer,
|
|
11052
9921
|
AudioScheduler,
|
|
11053
|
-
AudioSyncManager,
|
|
11054
9922
|
BLENDSHAPE_TO_GROUP,
|
|
11055
9923
|
BlendshapeSmoother,
|
|
11056
9924
|
CTC_VOCAB,
|
|
11057
9925
|
ConsoleExporter,
|
|
11058
|
-
ConversationOrchestrator,
|
|
11059
9926
|
DEFAULT_ANIMATION_CONFIG,
|
|
11060
9927
|
DEFAULT_LOGGING_CONFIG,
|
|
11061
9928
|
EMOTION_NAMES,
|
|
@@ -11085,7 +9952,6 @@ export {
|
|
|
11085
9952
|
SileroVADInference,
|
|
11086
9953
|
SileroVADUnifiedAdapter,
|
|
11087
9954
|
SileroVADWorker,
|
|
11088
|
-
TenantManager,
|
|
11089
9955
|
UnifiedInferenceWorker,
|
|
11090
9956
|
Wav2ArkitCpuInference,
|
|
11091
9957
|
Wav2ArkitCpuUnifiedAdapter,
|