@omote/core 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +222 -443
- package/dist/index.d.mts +79 -828
- package/dist/index.d.ts +79 -828
- package/dist/index.js +180 -1314
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +180 -1314
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.js
CHANGED
|
@@ -33,17 +33,14 @@ __export(index_exports, {
|
|
|
33
33
|
A2EOrchestrator: () => A2EOrchestrator,
|
|
34
34
|
A2EProcessor: () => A2EProcessor,
|
|
35
35
|
ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
|
|
36
|
-
AgentCoreAdapter: () => AgentCoreAdapter,
|
|
37
36
|
AnimationGraph: () => AnimationGraph,
|
|
38
37
|
AudioChunkCoalescer: () => AudioChunkCoalescer,
|
|
39
38
|
AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
|
|
40
39
|
AudioScheduler: () => AudioScheduler,
|
|
41
|
-
AudioSyncManager: () => AudioSyncManager,
|
|
42
40
|
BLENDSHAPE_TO_GROUP: () => BLENDSHAPE_TO_GROUP,
|
|
43
41
|
BlendshapeSmoother: () => BlendshapeSmoother,
|
|
44
42
|
CTC_VOCAB: () => CTC_VOCAB,
|
|
45
43
|
ConsoleExporter: () => ConsoleExporter,
|
|
46
|
-
ConversationOrchestrator: () => ConversationOrchestrator,
|
|
47
44
|
DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
|
|
48
45
|
DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
|
|
49
46
|
EMOTION_NAMES: () => EMOTION_NAMES,
|
|
@@ -73,7 +70,6 @@ __export(index_exports, {
|
|
|
73
70
|
SileroVADInference: () => SileroVADInference,
|
|
74
71
|
SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
|
|
75
72
|
SileroVADWorker: () => SileroVADWorker,
|
|
76
|
-
TenantManager: () => TenantManager,
|
|
77
73
|
UnifiedInferenceWorker: () => UnifiedInferenceWorker,
|
|
78
74
|
Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
|
|
79
75
|
Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
|
|
@@ -1171,80 +1167,6 @@ var A2EProcessor = class {
|
|
|
1171
1167
|
}
|
|
1172
1168
|
};
|
|
1173
1169
|
|
|
1174
|
-
// src/inference/BlendshapeSmoother.ts
|
|
1175
|
-
var NUM_BLENDSHAPES = 52;
|
|
1176
|
-
var BlendshapeSmoother = class {
|
|
1177
|
-
constructor(config) {
|
|
1178
|
-
/** Whether any target has been set */
|
|
1179
|
-
this._hasTarget = false;
|
|
1180
|
-
this.halflife = config?.halflife ?? 0.06;
|
|
1181
|
-
this.values = new Float32Array(NUM_BLENDSHAPES);
|
|
1182
|
-
this.velocities = new Float32Array(NUM_BLENDSHAPES);
|
|
1183
|
-
this.targets = new Float32Array(NUM_BLENDSHAPES);
|
|
1184
|
-
}
|
|
1185
|
-
/** Whether a target frame has been set (false until first setTarget call) */
|
|
1186
|
-
get hasTarget() {
|
|
1187
|
-
return this._hasTarget;
|
|
1188
|
-
}
|
|
1189
|
-
/**
|
|
1190
|
-
* Set new target frame from inference output.
|
|
1191
|
-
* Springs will converge toward these values on subsequent update() calls.
|
|
1192
|
-
*/
|
|
1193
|
-
setTarget(frame) {
|
|
1194
|
-
this.targets.set(frame);
|
|
1195
|
-
this._hasTarget = true;
|
|
1196
|
-
}
|
|
1197
|
-
/**
|
|
1198
|
-
* Advance all 52 springs by `dt` seconds and return the smoothed frame.
|
|
1199
|
-
*
|
|
1200
|
-
* Call this every render frame (e.g., inside requestAnimationFrame).
|
|
1201
|
-
* Returns the internal values buffer — do NOT mutate the returned array.
|
|
1202
|
-
*
|
|
1203
|
-
* @param dt - Time step in seconds (e.g., 1/60 for 60fps)
|
|
1204
|
-
* @returns Smoothed blendshape values (Float32Array of 52)
|
|
1205
|
-
*/
|
|
1206
|
-
update(dt) {
|
|
1207
|
-
if (!this._hasTarget) {
|
|
1208
|
-
return this.values;
|
|
1209
|
-
}
|
|
1210
|
-
if (this.halflife <= 0) {
|
|
1211
|
-
this.values.set(this.targets);
|
|
1212
|
-
this.velocities.fill(0);
|
|
1213
|
-
return this.values;
|
|
1214
|
-
}
|
|
1215
|
-
const damping = Math.LN2 / this.halflife;
|
|
1216
|
-
const eydt = Math.exp(-damping * dt);
|
|
1217
|
-
for (let i = 0; i < NUM_BLENDSHAPES; i++) {
|
|
1218
|
-
const j0 = this.values[i] - this.targets[i];
|
|
1219
|
-
const j1 = this.velocities[i] + j0 * damping;
|
|
1220
|
-
this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
|
|
1221
|
-
this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
|
|
1222
|
-
this.values[i] = Math.max(0, Math.min(1, this.values[i]));
|
|
1223
|
-
}
|
|
1224
|
-
return this.values;
|
|
1225
|
-
}
|
|
1226
|
-
/**
|
|
1227
|
-
* Decay all spring targets to neutral (0).
|
|
1228
|
-
*
|
|
1229
|
-
* Call when inference stalls (no new frames for threshold duration).
|
|
1230
|
-
* The springs will smoothly close the mouth / relax the face over
|
|
1231
|
-
* the halflife period rather than freezing.
|
|
1232
|
-
*/
|
|
1233
|
-
decayToNeutral() {
|
|
1234
|
-
this.targets.fill(0);
|
|
1235
|
-
}
|
|
1236
|
-
/**
|
|
1237
|
-
* Reset all state (values, velocities, targets).
|
|
1238
|
-
* Call when starting a new playback session.
|
|
1239
|
-
*/
|
|
1240
|
-
reset() {
|
|
1241
|
-
this.values.fill(0);
|
|
1242
|
-
this.velocities.fill(0);
|
|
1243
|
-
this.targets.fill(0);
|
|
1244
|
-
this._hasTarget = false;
|
|
1245
|
-
}
|
|
1246
|
-
};
|
|
1247
|
-
|
|
1248
1170
|
// src/telemetry/exporters/console.ts
|
|
1249
1171
|
var ConsoleExporter = class {
|
|
1250
1172
|
constructor(options = {}) {
|
|
@@ -3262,13 +3184,6 @@ function pcm16ToFloat32(buffer) {
|
|
|
3262
3184
|
}
|
|
3263
3185
|
return float32;
|
|
3264
3186
|
}
|
|
3265
|
-
function int16ToFloat32(int16) {
|
|
3266
|
-
const float32 = new Float32Array(int16.length);
|
|
3267
|
-
for (let i = 0; i < int16.length; i++) {
|
|
3268
|
-
float32[i] = int16[i] / 32768;
|
|
3269
|
-
}
|
|
3270
|
-
return float32;
|
|
3271
|
-
}
|
|
3272
3187
|
|
|
3273
3188
|
// src/audio/FullFacePipeline.ts
|
|
3274
3189
|
var logger4 = createLogger("FullFacePipeline");
|
|
@@ -3301,16 +3216,11 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3301
3216
|
this.lastNewFrameTime = 0;
|
|
3302
3217
|
this.lastKnownLamFrame = null;
|
|
3303
3218
|
this.staleWarningEmitted = false;
|
|
3304
|
-
// Frame loop timing (for dt calculation)
|
|
3305
|
-
this.lastFrameLoopTime = 0;
|
|
3306
3219
|
// Diagnostic logging counter
|
|
3307
3220
|
this.frameLoopCount = 0;
|
|
3308
3221
|
const sampleRate = options.sampleRate ?? 16e3;
|
|
3309
3222
|
this.profile = options.profile ?? {};
|
|
3310
3223
|
this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
|
|
3311
|
-
this.smoother = new BlendshapeSmoother({
|
|
3312
|
-
halflife: options.smoothingHalflife ?? 0.06
|
|
3313
|
-
});
|
|
3314
3224
|
const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
|
|
3315
3225
|
const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
|
|
3316
3226
|
const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
|
|
@@ -3393,9 +3303,7 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3393
3303
|
this.lastNewFrameTime = 0;
|
|
3394
3304
|
this.lastKnownLamFrame = null;
|
|
3395
3305
|
this.staleWarningEmitted = false;
|
|
3396
|
-
this.lastFrameLoopTime = 0;
|
|
3397
3306
|
this.frameLoopCount = 0;
|
|
3398
|
-
this.smoother.reset();
|
|
3399
3307
|
this.scheduler.warmup();
|
|
3400
3308
|
this.startFrameLoop();
|
|
3401
3309
|
this.startMonitoring();
|
|
@@ -3430,22 +3338,16 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3430
3338
|
/**
|
|
3431
3339
|
* Start frame animation loop
|
|
3432
3340
|
*
|
|
3433
|
-
*
|
|
3434
|
-
*
|
|
3435
|
-
*
|
|
3436
|
-
* to neutral when inference stalls.
|
|
3341
|
+
* Polls A2EProcessor at render rate (60fps) for the latest inference frame
|
|
3342
|
+
* matching the current AudioContext time. Between inference batches (~30fps
|
|
3343
|
+
* bursts), getFrameForTime() holds the last frame.
|
|
3437
3344
|
*/
|
|
3438
3345
|
startFrameLoop() {
|
|
3439
|
-
this.lastFrameLoopTime = 0;
|
|
3440
3346
|
const updateFrame = () => {
|
|
3441
|
-
const now = performance.now() / 1e3;
|
|
3442
|
-
const dt = this.lastFrameLoopTime > 0 ? now - this.lastFrameLoopTime : 1 / 60;
|
|
3443
|
-
this.lastFrameLoopTime = now;
|
|
3444
3347
|
this.frameLoopCount++;
|
|
3445
3348
|
const currentTime = this.scheduler.getCurrentTime();
|
|
3446
3349
|
const lamFrame = this.processor.getFrameForTime(currentTime);
|
|
3447
3350
|
if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
|
|
3448
|
-
this.smoother.setTarget(lamFrame);
|
|
3449
3351
|
this.lastNewFrameTime = performance.now();
|
|
3450
3352
|
this.lastKnownLamFrame = lamFrame;
|
|
3451
3353
|
this.staleWarningEmitted = false;
|
|
@@ -3465,17 +3367,15 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3465
3367
|
currentTime: currentTime.toFixed(3),
|
|
3466
3368
|
playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
|
|
3467
3369
|
queuedFrames: this.processor.queuedFrameCount,
|
|
3468
|
-
hasTarget: this.smoother.hasTarget,
|
|
3469
3370
|
playbackStarted: this.playbackStarted,
|
|
3470
3371
|
msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
|
|
3471
3372
|
processorFill: this.processor.fillLevel.toFixed(2)
|
|
3472
3373
|
});
|
|
3473
3374
|
}
|
|
3474
3375
|
if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
|
|
3475
|
-
this.smoother.decayToNeutral();
|
|
3476
3376
|
if (!this.staleWarningEmitted) {
|
|
3477
3377
|
this.staleWarningEmitted = true;
|
|
3478
|
-
logger4.warn("A2E stalled \u2014
|
|
3378
|
+
logger4.warn("A2E stalled \u2014 no new inference frames", {
|
|
3479
3379
|
staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
|
|
3480
3380
|
queuedFrames: this.processor.queuedFrameCount
|
|
3481
3381
|
});
|
|
@@ -3514,12 +3414,10 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3514
3414
|
await this.scheduler.cancelAll(fadeOutMs);
|
|
3515
3415
|
this.coalescer.reset();
|
|
3516
3416
|
this.processor.reset();
|
|
3517
|
-
this.smoother.reset();
|
|
3518
3417
|
this.playbackStarted = false;
|
|
3519
3418
|
this.lastNewFrameTime = 0;
|
|
3520
3419
|
this.lastKnownLamFrame = null;
|
|
3521
3420
|
this.staleWarningEmitted = false;
|
|
3522
|
-
this.lastFrameLoopTime = 0;
|
|
3523
3421
|
this.emit("playback_complete", void 0);
|
|
3524
3422
|
}
|
|
3525
3423
|
/**
|
|
@@ -3573,6 +3471,108 @@ var FullFacePipeline = class extends EventEmitter {
|
|
|
3573
3471
|
}
|
|
3574
3472
|
};
|
|
3575
3473
|
|
|
3474
|
+
// src/audio/InterruptionHandler.ts
|
|
3475
|
+
var InterruptionHandler = class extends EventEmitter {
|
|
3476
|
+
constructor(config = {}) {
|
|
3477
|
+
super();
|
|
3478
|
+
this.isSpeaking = false;
|
|
3479
|
+
this.speechStartTime = 0;
|
|
3480
|
+
this.lastSpeechTime = 0;
|
|
3481
|
+
this.silenceTimer = null;
|
|
3482
|
+
this.aiIsSpeaking = false;
|
|
3483
|
+
// Debouncing: only emit one interruption per speech session
|
|
3484
|
+
this.interruptionTriggeredThisSession = false;
|
|
3485
|
+
this.config = {
|
|
3486
|
+
vadThreshold: 0.5,
|
|
3487
|
+
// Silero VAD default
|
|
3488
|
+
minSpeechDurationMs: 200,
|
|
3489
|
+
// Google/Amazon barge-in standard
|
|
3490
|
+
silenceTimeoutMs: 500,
|
|
3491
|
+
// OpenAI Realtime API standard
|
|
3492
|
+
enabled: true,
|
|
3493
|
+
...config
|
|
3494
|
+
};
|
|
3495
|
+
}
|
|
3496
|
+
/**
|
|
3497
|
+
* Process VAD result for interruption detection
|
|
3498
|
+
* @param vadProbability - Speech probability from VAD (0-1)
|
|
3499
|
+
* @param audioEnergy - Optional RMS energy for logging (default: 0)
|
|
3500
|
+
*/
|
|
3501
|
+
processVADResult(vadProbability, audioEnergy = 0) {
|
|
3502
|
+
if (!this.config.enabled) return;
|
|
3503
|
+
if (vadProbability > this.config.vadThreshold) {
|
|
3504
|
+
this.onSpeechDetected(audioEnergy || vadProbability);
|
|
3505
|
+
} else {
|
|
3506
|
+
this.onSilenceDetected();
|
|
3507
|
+
}
|
|
3508
|
+
}
|
|
3509
|
+
/** Notify that AI started/stopped speaking */
|
|
3510
|
+
setAISpeaking(speaking) {
|
|
3511
|
+
this.aiIsSpeaking = speaking;
|
|
3512
|
+
}
|
|
3513
|
+
/** Enable/disable interruption detection */
|
|
3514
|
+
setEnabled(enabled) {
|
|
3515
|
+
this.config.enabled = enabled;
|
|
3516
|
+
if (!enabled) {
|
|
3517
|
+
this.reset();
|
|
3518
|
+
}
|
|
3519
|
+
}
|
|
3520
|
+
/** Update configuration */
|
|
3521
|
+
updateConfig(config) {
|
|
3522
|
+
this.config = { ...this.config, ...config };
|
|
3523
|
+
}
|
|
3524
|
+
/** Reset state */
|
|
3525
|
+
reset() {
|
|
3526
|
+
this.isSpeaking = false;
|
|
3527
|
+
this.speechStartTime = 0;
|
|
3528
|
+
this.lastSpeechTime = 0;
|
|
3529
|
+
this.interruptionTriggeredThisSession = false;
|
|
3530
|
+
if (this.silenceTimer) {
|
|
3531
|
+
clearTimeout(this.silenceTimer);
|
|
3532
|
+
this.silenceTimer = null;
|
|
3533
|
+
}
|
|
3534
|
+
}
|
|
3535
|
+
/** Get current state */
|
|
3536
|
+
getState() {
|
|
3537
|
+
return {
|
|
3538
|
+
isSpeaking: this.isSpeaking,
|
|
3539
|
+
speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
|
|
3540
|
+
};
|
|
3541
|
+
}
|
|
3542
|
+
onSpeechDetected(rms) {
|
|
3543
|
+
const now = Date.now();
|
|
3544
|
+
this.lastSpeechTime = now;
|
|
3545
|
+
if (this.silenceTimer) {
|
|
3546
|
+
clearTimeout(this.silenceTimer);
|
|
3547
|
+
this.silenceTimer = null;
|
|
3548
|
+
}
|
|
3549
|
+
if (!this.isSpeaking) {
|
|
3550
|
+
this.isSpeaking = true;
|
|
3551
|
+
this.speechStartTime = now;
|
|
3552
|
+
this.emit("speech.detected", { rms });
|
|
3553
|
+
}
|
|
3554
|
+
if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
|
|
3555
|
+
const speechDuration = now - this.speechStartTime;
|
|
3556
|
+
if (speechDuration >= this.config.minSpeechDurationMs) {
|
|
3557
|
+
this.interruptionTriggeredThisSession = true;
|
|
3558
|
+
this.emit("interruption.triggered", { rms, durationMs: speechDuration });
|
|
3559
|
+
}
|
|
3560
|
+
}
|
|
3561
|
+
}
|
|
3562
|
+
onSilenceDetected() {
|
|
3563
|
+
if (!this.isSpeaking) return;
|
|
3564
|
+
if (!this.silenceTimer) {
|
|
3565
|
+
this.silenceTimer = setTimeout(() => {
|
|
3566
|
+
const durationMs = this.lastSpeechTime - this.speechStartTime;
|
|
3567
|
+
this.isSpeaking = false;
|
|
3568
|
+
this.silenceTimer = null;
|
|
3569
|
+
this.interruptionTriggeredThisSession = false;
|
|
3570
|
+
this.emit("speech.ended", { durationMs });
|
|
3571
|
+
}, this.config.silenceTimeoutMs);
|
|
3572
|
+
}
|
|
3573
|
+
}
|
|
3574
|
+
};
|
|
3575
|
+
|
|
3576
3576
|
// src/inference/kaldiFbank.ts
|
|
3577
3577
|
function fft(re, im) {
|
|
3578
3578
|
const n = re.length;
|
|
@@ -7405,6 +7405,80 @@ var A2EWithFallback = class {
|
|
|
7405
7405
|
}
|
|
7406
7406
|
};
|
|
7407
7407
|
|
|
7408
|
+
// src/inference/BlendshapeSmoother.ts
|
|
7409
|
+
var NUM_BLENDSHAPES = 52;
|
|
7410
|
+
var BlendshapeSmoother = class {
|
|
7411
|
+
constructor(config) {
|
|
7412
|
+
/** Whether any target has been set */
|
|
7413
|
+
this._hasTarget = false;
|
|
7414
|
+
this.halflife = config?.halflife ?? 0.06;
|
|
7415
|
+
this.values = new Float32Array(NUM_BLENDSHAPES);
|
|
7416
|
+
this.velocities = new Float32Array(NUM_BLENDSHAPES);
|
|
7417
|
+
this.targets = new Float32Array(NUM_BLENDSHAPES);
|
|
7418
|
+
}
|
|
7419
|
+
/** Whether a target frame has been set (false until first setTarget call) */
|
|
7420
|
+
get hasTarget() {
|
|
7421
|
+
return this._hasTarget;
|
|
7422
|
+
}
|
|
7423
|
+
/**
|
|
7424
|
+
* Set new target frame from inference output.
|
|
7425
|
+
* Springs will converge toward these values on subsequent update() calls.
|
|
7426
|
+
*/
|
|
7427
|
+
setTarget(frame) {
|
|
7428
|
+
this.targets.set(frame);
|
|
7429
|
+
this._hasTarget = true;
|
|
7430
|
+
}
|
|
7431
|
+
/**
|
|
7432
|
+
* Advance all 52 springs by `dt` seconds and return the smoothed frame.
|
|
7433
|
+
*
|
|
7434
|
+
* Call this every render frame (e.g., inside requestAnimationFrame).
|
|
7435
|
+
* Returns the internal values buffer — do NOT mutate the returned array.
|
|
7436
|
+
*
|
|
7437
|
+
* @param dt - Time step in seconds (e.g., 1/60 for 60fps)
|
|
7438
|
+
* @returns Smoothed blendshape values (Float32Array of 52)
|
|
7439
|
+
*/
|
|
7440
|
+
update(dt) {
|
|
7441
|
+
if (!this._hasTarget) {
|
|
7442
|
+
return this.values;
|
|
7443
|
+
}
|
|
7444
|
+
if (this.halflife <= 0) {
|
|
7445
|
+
this.values.set(this.targets);
|
|
7446
|
+
this.velocities.fill(0);
|
|
7447
|
+
return this.values;
|
|
7448
|
+
}
|
|
7449
|
+
const damping = Math.LN2 / this.halflife;
|
|
7450
|
+
const eydt = Math.exp(-damping * dt);
|
|
7451
|
+
for (let i = 0; i < NUM_BLENDSHAPES; i++) {
|
|
7452
|
+
const j0 = this.values[i] - this.targets[i];
|
|
7453
|
+
const j1 = this.velocities[i] + j0 * damping;
|
|
7454
|
+
this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
|
|
7455
|
+
this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
|
|
7456
|
+
this.values[i] = Math.max(0, Math.min(1, this.values[i]));
|
|
7457
|
+
}
|
|
7458
|
+
return this.values;
|
|
7459
|
+
}
|
|
7460
|
+
/**
|
|
7461
|
+
* Decay all spring targets to neutral (0).
|
|
7462
|
+
*
|
|
7463
|
+
* Call when inference stalls (no new frames for threshold duration).
|
|
7464
|
+
* The springs will smoothly close the mouth / relax the face over
|
|
7465
|
+
* the halflife period rather than freezing.
|
|
7466
|
+
*/
|
|
7467
|
+
decayToNeutral() {
|
|
7468
|
+
this.targets.fill(0);
|
|
7469
|
+
}
|
|
7470
|
+
/**
|
|
7471
|
+
* Reset all state (values, velocities, targets).
|
|
7472
|
+
* Call when starting a new playback session.
|
|
7473
|
+
*/
|
|
7474
|
+
reset() {
|
|
7475
|
+
this.values.fill(0);
|
|
7476
|
+
this.velocities.fill(0);
|
|
7477
|
+
this.targets.fill(0);
|
|
7478
|
+
this._hasTarget = false;
|
|
7479
|
+
}
|
|
7480
|
+
};
|
|
7481
|
+
|
|
7408
7482
|
// src/animation/audioEnergy.ts
|
|
7409
7483
|
function calculateRMS(samples) {
|
|
7410
7484
|
if (samples.length === 0) return 0;
|
|
@@ -9205,1214 +9279,6 @@ var EmotionController = class {
|
|
|
9205
9279
|
}
|
|
9206
9280
|
};
|
|
9207
9281
|
|
|
9208
|
-
// src/ai/adapters/AgentCoreAdapter.ts
|
|
9209
|
-
var AgentCoreAdapter = class extends EventEmitter {
|
|
9210
|
-
constructor(config) {
|
|
9211
|
-
super();
|
|
9212
|
-
this.name = "AgentCore";
|
|
9213
|
-
this._state = "disconnected";
|
|
9214
|
-
this._sessionId = null;
|
|
9215
|
-
this._isConnected = false;
|
|
9216
|
-
// Sub-components
|
|
9217
|
-
this.asr = null;
|
|
9218
|
-
this.vad = null;
|
|
9219
|
-
this.lam = null;
|
|
9220
|
-
this.pipeline = null;
|
|
9221
|
-
// WebSocket connection to AgentCore
|
|
9222
|
-
this.ws = null;
|
|
9223
|
-
this.wsReconnectAttempts = 0;
|
|
9224
|
-
this.maxReconnectAttempts = 5;
|
|
9225
|
-
// Audio buffers
|
|
9226
|
-
this.audioBuffer = [];
|
|
9227
|
-
// Conversation state
|
|
9228
|
-
this.history = [];
|
|
9229
|
-
this.currentConfig = null;
|
|
9230
|
-
// Interruption handling
|
|
9231
|
-
this.isSpeaking = false;
|
|
9232
|
-
this.currentTtsAbortController = null;
|
|
9233
|
-
// Auth token cache per tenant
|
|
9234
|
-
this.tokenCache = /* @__PURE__ */ new Map();
|
|
9235
|
-
this.agentCoreConfig = config;
|
|
9236
|
-
this.emotionController = new EmotionController();
|
|
9237
|
-
}
|
|
9238
|
-
get state() {
|
|
9239
|
-
return this._state;
|
|
9240
|
-
}
|
|
9241
|
-
get sessionId() {
|
|
9242
|
-
return this._sessionId;
|
|
9243
|
-
}
|
|
9244
|
-
get isConnected() {
|
|
9245
|
-
return this._isConnected;
|
|
9246
|
-
}
|
|
9247
|
-
/**
|
|
9248
|
-
* Connect to AgentCore with session configuration
|
|
9249
|
-
*/
|
|
9250
|
-
async connect(config) {
|
|
9251
|
-
this.currentConfig = config;
|
|
9252
|
-
this._sessionId = config.sessionId;
|
|
9253
|
-
try {
|
|
9254
|
-
const authToken = await this.getAuthToken(config.tenant);
|
|
9255
|
-
await Promise.all([
|
|
9256
|
-
this.initASR(),
|
|
9257
|
-
this.initLAM()
|
|
9258
|
-
]);
|
|
9259
|
-
await this.connectWebSocket(authToken, config);
|
|
9260
|
-
this._isConnected = true;
|
|
9261
|
-
this.setState("idle");
|
|
9262
|
-
this.emit("connection.opened", { sessionId: this._sessionId, adapter: this.name });
|
|
9263
|
-
} catch (error) {
|
|
9264
|
-
this.setState("error");
|
|
9265
|
-
this.emit("connection.error", {
|
|
9266
|
-
error,
|
|
9267
|
-
recoverable: true
|
|
9268
|
-
});
|
|
9269
|
-
throw error;
|
|
9270
|
-
}
|
|
9271
|
-
}
|
|
9272
|
-
/**
|
|
9273
|
-
* Disconnect and cleanup
|
|
9274
|
-
*/
|
|
9275
|
-
async disconnect() {
|
|
9276
|
-
this.currentTtsAbortController?.abort();
|
|
9277
|
-
if (this.pipeline) {
|
|
9278
|
-
this.pipeline.dispose();
|
|
9279
|
-
this.pipeline = null;
|
|
9280
|
-
}
|
|
9281
|
-
if (this.ws) {
|
|
9282
|
-
this.ws.close(1e3, "Client disconnect");
|
|
9283
|
-
this.ws = null;
|
|
9284
|
-
}
|
|
9285
|
-
await Promise.all([
|
|
9286
|
-
this.asr?.dispose(),
|
|
9287
|
-
this.vad?.dispose(),
|
|
9288
|
-
this.lam?.dispose()
|
|
9289
|
-
]);
|
|
9290
|
-
this._isConnected = false;
|
|
9291
|
-
this.setState("disconnected");
|
|
9292
|
-
this.emit("connection.closed", { reason: "Client disconnect" });
|
|
9293
|
-
}
|
|
9294
|
-
/**
|
|
9295
|
-
* Push user audio for processing
|
|
9296
|
-
*/
|
|
9297
|
-
pushAudio(audio) {
|
|
9298
|
-
if (!this._isConnected) return;
|
|
9299
|
-
if (this.isSpeaking) {
|
|
9300
|
-
this.detectVoiceActivity(audio).then((hasVoiceActivity) => {
|
|
9301
|
-
if (hasVoiceActivity) {
|
|
9302
|
-
this.interrupt();
|
|
9303
|
-
}
|
|
9304
|
-
}).catch((error) => {
|
|
9305
|
-
console.error("[AgentCore] VAD error during interruption detection:", error);
|
|
9306
|
-
});
|
|
9307
|
-
}
|
|
9308
|
-
const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
|
|
9309
|
-
this.audioBuffer.push(float32);
|
|
9310
|
-
this.scheduleTranscription();
|
|
9311
|
-
}
|
|
9312
|
-
/**
|
|
9313
|
-
* Send text directly to AgentCore
|
|
9314
|
-
*/
|
|
9315
|
-
async sendText(text) {
|
|
9316
|
-
if (!this._isConnected || !this.ws) {
|
|
9317
|
-
throw new Error("Not connected to AgentCore");
|
|
9318
|
-
}
|
|
9319
|
-
this.addToHistory({
|
|
9320
|
-
role: "user",
|
|
9321
|
-
content: text,
|
|
9322
|
-
timestamp: Date.now()
|
|
9323
|
-
});
|
|
9324
|
-
this.setState("thinking");
|
|
9325
|
-
this.emit("ai.thinking.start", { timestamp: Date.now() });
|
|
9326
|
-
this.ws.send(JSON.stringify({
|
|
9327
|
-
type: "user_message",
|
|
9328
|
-
sessionId: this._sessionId,
|
|
9329
|
-
content: text,
|
|
9330
|
-
context: {
|
|
9331
|
-
history: this.history.slice(-10),
|
|
9332
|
-
// Last 10 messages
|
|
9333
|
-
emotion: Array.from(this.emotionController.emotion)
|
|
9334
|
-
}
|
|
9335
|
-
}));
|
|
9336
|
-
}
|
|
9337
|
-
/**
|
|
9338
|
-
* Interrupt current AI response
|
|
9339
|
-
*/
|
|
9340
|
-
interrupt() {
|
|
9341
|
-
if (!this.isSpeaking) return;
|
|
9342
|
-
this.emit("interruption.detected", { timestamp: Date.now() });
|
|
9343
|
-
this.currentTtsAbortController?.abort();
|
|
9344
|
-
this.currentTtsAbortController = null;
|
|
9345
|
-
if (this.ws?.readyState === WebSocket.OPEN) {
|
|
9346
|
-
this.ws.send(JSON.stringify({
|
|
9347
|
-
type: "interrupt",
|
|
9348
|
-
sessionId: this._sessionId,
|
|
9349
|
-
timestamp: Date.now()
|
|
9350
|
-
}));
|
|
9351
|
-
}
|
|
9352
|
-
this.isSpeaking = false;
|
|
9353
|
-
this.setState("listening");
|
|
9354
|
-
this.emit("interruption.handled", { timestamp: Date.now(), action: "stop" });
|
|
9355
|
-
}
|
|
9356
|
-
getHistory() {
|
|
9357
|
-
return [...this.history];
|
|
9358
|
-
}
|
|
9359
|
-
clearHistory() {
|
|
9360
|
-
this.history = [];
|
|
9361
|
-
this.emit("memory.updated", { messageCount: 0 });
|
|
9362
|
-
}
|
|
9363
|
-
async healthCheck() {
|
|
9364
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
9365
|
-
return false;
|
|
9366
|
-
}
|
|
9367
|
-
return new Promise((resolve) => {
|
|
9368
|
-
const timeout = setTimeout(() => resolve(false), 5e3);
|
|
9369
|
-
const handler = (event) => {
|
|
9370
|
-
try {
|
|
9371
|
-
const data = JSON.parse(event.data);
|
|
9372
|
-
if (data.type === "pong") {
|
|
9373
|
-
clearTimeout(timeout);
|
|
9374
|
-
this.ws?.removeEventListener("message", handler);
|
|
9375
|
-
resolve(true);
|
|
9376
|
-
}
|
|
9377
|
-
} catch {
|
|
9378
|
-
}
|
|
9379
|
-
};
|
|
9380
|
-
this.ws?.addEventListener("message", handler);
|
|
9381
|
-
this.ws?.send(JSON.stringify({ type: "ping" }));
|
|
9382
|
-
});
|
|
9383
|
-
}
|
|
9384
|
-
// ==================== Private Methods ====================
|
|
9385
|
-
setState(state) {
|
|
9386
|
-
const previousState = this._state;
|
|
9387
|
-
this._state = state;
|
|
9388
|
-
this.emit("state.change", { state, previousState });
|
|
9389
|
-
}
|
|
9390
|
-
async getAuthToken(tenant) {
|
|
9391
|
-
const cached = this.tokenCache.get(tenant.tenantId);
|
|
9392
|
-
if (cached && cached.expiresAt > Date.now() + 6e4) {
|
|
9393
|
-
return cached.token;
|
|
9394
|
-
}
|
|
9395
|
-
if (tenant.credentials.authToken) {
|
|
9396
|
-
return tenant.credentials.authToken;
|
|
9397
|
-
}
|
|
9398
|
-
const endpoint = this.agentCoreConfig.endpoint;
|
|
9399
|
-
if (endpoint.startsWith("ws://") || endpoint.includes("localhost")) {
|
|
9400
|
-
return "local-dev-token";
|
|
9401
|
-
}
|
|
9402
|
-
const httpEndpoint = endpoint.replace("wss://", "https://").replace("ws://", "http://");
|
|
9403
|
-
const response = await fetch(`${httpEndpoint}/auth/token`, {
|
|
9404
|
-
method: "POST",
|
|
9405
|
-
headers: { "Content-Type": "application/json" },
|
|
9406
|
-
body: JSON.stringify({
|
|
9407
|
-
tenantId: tenant.tenantId,
|
|
9408
|
-
apiKey: tenant.credentials.apiKey
|
|
9409
|
-
})
|
|
9410
|
-
});
|
|
9411
|
-
if (!response.ok) {
|
|
9412
|
-
throw new Error(`Auth failed: ${response.statusText}`);
|
|
9413
|
-
}
|
|
9414
|
-
const { token, expiresIn } = await response.json();
|
|
9415
|
-
this.tokenCache.set(tenant.tenantId, {
|
|
9416
|
-
token,
|
|
9417
|
-
expiresAt: Date.now() + expiresIn * 1e3
|
|
9418
|
-
});
|
|
9419
|
-
return token;
|
|
9420
|
-
}
|
|
9421
|
-
async initASR() {
|
|
9422
|
-
await Promise.all([
|
|
9423
|
-
// SenseVoice ASR
|
|
9424
|
-
(async () => {
|
|
9425
|
-
this.asr = new SenseVoiceInference({
|
|
9426
|
-
modelUrl: "/models/sensevoice/model.int8.onnx",
|
|
9427
|
-
language: "auto"
|
|
9428
|
-
});
|
|
9429
|
-
await this.asr.load();
|
|
9430
|
-
})(),
|
|
9431
|
-
// Silero VAD for accurate voice activity detection
|
|
9432
|
-
(async () => {
|
|
9433
|
-
this.vad = new SileroVADInference({
|
|
9434
|
-
modelUrl: "/models/silero-vad.onnx",
|
|
9435
|
-
backend: "webgpu",
|
|
9436
|
-
sampleRate: 16e3,
|
|
9437
|
-
threshold: 0.5
|
|
9438
|
-
});
|
|
9439
|
-
await this.vad.load();
|
|
9440
|
-
})()
|
|
9441
|
-
]);
|
|
9442
|
-
}
|
|
9443
|
-
async initLAM() {
|
|
9444
|
-
const lamUrl = this.agentCoreConfig.models?.lamUrl || "/models/unified_wav2vec2_asr_a2e.onnx";
|
|
9445
|
-
this.lam = new Wav2Vec2Inference({
|
|
9446
|
-
modelUrl: lamUrl,
|
|
9447
|
-
backend: "auto"
|
|
9448
|
-
});
|
|
9449
|
-
await this.lam.load();
|
|
9450
|
-
await this.initPipeline();
|
|
9451
|
-
}
|
|
9452
|
-
async initPipeline() {
|
|
9453
|
-
if (!this.lam) {
|
|
9454
|
-
throw new Error("LAM must be initialized before pipeline");
|
|
9455
|
-
}
|
|
9456
|
-
this.pipeline = new FullFacePipeline({
|
|
9457
|
-
lam: this.lam,
|
|
9458
|
-
sampleRate: 16e3,
|
|
9459
|
-
chunkTargetMs: 200
|
|
9460
|
-
});
|
|
9461
|
-
await this.pipeline.initialize();
|
|
9462
|
-
this.pipeline.on("full_frame_ready", (fullFrame) => {
|
|
9463
|
-
const frame = fullFrame.blendshapes;
|
|
9464
|
-
this.emit("animation", {
|
|
9465
|
-
blendshapes: frame,
|
|
9466
|
-
get: (name) => {
|
|
9467
|
-
const idx = LAM_BLENDSHAPES.indexOf(name);
|
|
9468
|
-
return idx >= 0 ? frame[idx] : 0;
|
|
9469
|
-
},
|
|
9470
|
-
timestamp: Date.now(),
|
|
9471
|
-
// Wall clock for client-side logging only
|
|
9472
|
-
inferenceMs: 0
|
|
9473
|
-
// Pipeline handles LAM inference asynchronously
|
|
9474
|
-
});
|
|
9475
|
-
});
|
|
9476
|
-
this.pipeline.on("playback_complete", () => {
|
|
9477
|
-
this.isSpeaking = false;
|
|
9478
|
-
this.setState("idle");
|
|
9479
|
-
this.emit("audio.output.end", { durationMs: 0 });
|
|
9480
|
-
});
|
|
9481
|
-
this.pipeline.on("error", (error) => {
|
|
9482
|
-
console.error("[AgentCore] Pipeline error:", error);
|
|
9483
|
-
this.emit("connection.error", {
|
|
9484
|
-
error,
|
|
9485
|
-
recoverable: true
|
|
9486
|
-
});
|
|
9487
|
-
});
|
|
9488
|
-
}
|
|
9489
|
-
async connectWebSocket(authToken, config) {
|
|
9490
|
-
return new Promise((resolve, reject) => {
|
|
9491
|
-
const wsUrl = new URL(`${this.agentCoreConfig.endpoint.replace("http", "ws")}/ws`);
|
|
9492
|
-
wsUrl.searchParams.set("sessionId", config.sessionId);
|
|
9493
|
-
wsUrl.searchParams.set("characterId", config.tenant.characterId);
|
|
9494
|
-
this.ws = new WebSocket(wsUrl.toString());
|
|
9495
|
-
this.ws.onopen = () => {
|
|
9496
|
-
this.ws?.send(JSON.stringify({
|
|
9497
|
-
type: "auth",
|
|
9498
|
-
token: authToken,
|
|
9499
|
-
tenantId: config.tenant.tenantId,
|
|
9500
|
-
systemPrompt: config.systemPrompt
|
|
9501
|
-
}));
|
|
9502
|
-
};
|
|
9503
|
-
this.ws.onmessage = (event) => {
|
|
9504
|
-
try {
|
|
9505
|
-
this.handleAgentCoreMessage(JSON.parse(event.data));
|
|
9506
|
-
} catch {
|
|
9507
|
-
}
|
|
9508
|
-
};
|
|
9509
|
-
this.ws.onerror = () => {
|
|
9510
|
-
reject(new Error("WebSocket connection failed"));
|
|
9511
|
-
};
|
|
9512
|
-
this.ws.onclose = (event) => {
|
|
9513
|
-
this.handleDisconnect(event);
|
|
9514
|
-
};
|
|
9515
|
-
const authTimeout = setTimeout(() => {
|
|
9516
|
-
reject(new Error("Auth timeout"));
|
|
9517
|
-
}, 1e4);
|
|
9518
|
-
const authHandler = (event) => {
|
|
9519
|
-
try {
|
|
9520
|
-
const data = JSON.parse(event.data);
|
|
9521
|
-
if (data.type === "auth_success") {
|
|
9522
|
-
clearTimeout(authTimeout);
|
|
9523
|
-
this.ws?.removeEventListener("message", authHandler);
|
|
9524
|
-
resolve();
|
|
9525
|
-
} else if (data.type === "auth_failed") {
|
|
9526
|
-
clearTimeout(authTimeout);
|
|
9527
|
-
reject(new Error(data.message));
|
|
9528
|
-
}
|
|
9529
|
-
} catch {
|
|
9530
|
-
}
|
|
9531
|
-
};
|
|
9532
|
-
this.ws.addEventListener("message", authHandler);
|
|
9533
|
-
});
|
|
9534
|
-
}
|
|
9535
|
-
handleAgentCoreMessage(data) {
|
|
9536
|
-
switch (data.type) {
|
|
9537
|
-
case "response_start":
|
|
9538
|
-
this.setState("speaking");
|
|
9539
|
-
this.isSpeaking = true;
|
|
9540
|
-
this.emit("ai.response.start", {
|
|
9541
|
-
text: data.text,
|
|
9542
|
-
emotion: data.emotion
|
|
9543
|
-
});
|
|
9544
|
-
if (data.emotion) {
|
|
9545
|
-
this.emotionController.transitionTo(
|
|
9546
|
-
{ [data.emotion]: 0.7 },
|
|
9547
|
-
300
|
|
9548
|
-
);
|
|
9549
|
-
}
|
|
9550
|
-
if (this.pipeline) {
|
|
9551
|
-
this.pipeline.start();
|
|
9552
|
-
}
|
|
9553
|
-
break;
|
|
9554
|
-
case "response_chunk":
|
|
9555
|
-
this.emit("ai.response.chunk", {
|
|
9556
|
-
text: data.text,
|
|
9557
|
-
isLast: data.isLast
|
|
9558
|
-
});
|
|
9559
|
-
break;
|
|
9560
|
-
case "audio_chunk":
|
|
9561
|
-
if (data.audio && this.pipeline) {
|
|
9562
|
-
const audioData = this.base64ToArrayBuffer(data.audio);
|
|
9563
|
-
const uint8 = new Uint8Array(audioData);
|
|
9564
|
-
this.pipeline.onAudioChunk(uint8).catch((error) => {
|
|
9565
|
-
console.error("[AgentCore] Pipeline chunk error:", error);
|
|
9566
|
-
});
|
|
9567
|
-
}
|
|
9568
|
-
break;
|
|
9569
|
-
case "audio_end":
|
|
9570
|
-
if (this.pipeline) {
|
|
9571
|
-
this.pipeline.end().catch((error) => {
|
|
9572
|
-
console.error("[AgentCore] Pipeline end error:", error);
|
|
9573
|
-
});
|
|
9574
|
-
}
|
|
9575
|
-
break;
|
|
9576
|
-
case "response_end":
|
|
9577
|
-
this.addToHistory({
|
|
9578
|
-
role: "assistant",
|
|
9579
|
-
content: data.fullText,
|
|
9580
|
-
timestamp: Date.now(),
|
|
9581
|
-
emotion: data.emotion
|
|
9582
|
-
});
|
|
9583
|
-
this.emit("ai.response.end", {
|
|
9584
|
-
fullText: data.fullText,
|
|
9585
|
-
durationMs: data.durationMs || 0
|
|
9586
|
-
});
|
|
9587
|
-
break;
|
|
9588
|
-
case "memory_updated":
|
|
9589
|
-
this.emit("memory.updated", {
|
|
9590
|
-
messageCount: data.messageCount,
|
|
9591
|
-
tokenCount: data.tokenCount
|
|
9592
|
-
});
|
|
9593
|
-
break;
|
|
9594
|
-
case "error":
|
|
9595
|
-
this.emit("connection.error", {
|
|
9596
|
-
error: new Error(data.message),
|
|
9597
|
-
recoverable: data.recoverable ?? false
|
|
9598
|
-
});
|
|
9599
|
-
break;
|
|
9600
|
-
}
|
|
9601
|
-
}
|
|
9602
|
-
scheduleTranscription() {
|
|
9603
|
-
if (this.audioBuffer.length === 0) return;
|
|
9604
|
-
const totalLength = this.audioBuffer.reduce((sum2, buf) => sum2 + buf.length, 0);
|
|
9605
|
-
if (totalLength < 4e3) return;
|
|
9606
|
-
const audio = new Float32Array(totalLength);
|
|
9607
|
-
let offset = 0;
|
|
9608
|
-
for (const buf of this.audioBuffer) {
|
|
9609
|
-
audio.set(buf, offset);
|
|
9610
|
-
offset += buf.length;
|
|
9611
|
-
}
|
|
9612
|
-
this.audioBuffer = [];
|
|
9613
|
-
let sum = 0;
|
|
9614
|
-
for (let i = 0; i < audio.length; i++) {
|
|
9615
|
-
sum += audio[i] * audio[i];
|
|
9616
|
-
}
|
|
9617
|
-
const rms = Math.sqrt(sum / audio.length);
|
|
9618
|
-
if (rms < 0.01) {
|
|
9619
|
-
console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
|
|
9620
|
-
return;
|
|
9621
|
-
}
|
|
9622
|
-
if (this.asr) {
|
|
9623
|
-
this.setState("listening");
|
|
9624
|
-
this.emit("user.speech.start", { timestamp: Date.now() });
|
|
9625
|
-
this.asr.transcribe(audio).then((result) => {
|
|
9626
|
-
this.emit("user.transcript.final", {
|
|
9627
|
-
text: result.text,
|
|
9628
|
-
confidence: 1
|
|
9629
|
-
});
|
|
9630
|
-
this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
|
|
9631
|
-
const cleanText = result.text.trim();
|
|
9632
|
-
if (cleanText) {
|
|
9633
|
-
this.sendText(cleanText).catch((error) => {
|
|
9634
|
-
console.error("[AgentCore] Send text error:", error);
|
|
9635
|
-
});
|
|
9636
|
-
}
|
|
9637
|
-
}).catch((error) => {
|
|
9638
|
-
console.error("[AgentCore] Transcription error:", error);
|
|
9639
|
-
});
|
|
9640
|
-
}
|
|
9641
|
-
}
|
|
9642
|
-
// REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
|
|
9643
|
-
// The pipeline manages audio scheduling, LAM inference, and frame synchronization
|
|
9644
|
-
// Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
|
|
9645
|
-
/**
|
|
9646
|
-
* Detect voice activity using Silero VAD
|
|
9647
|
-
* Falls back to simple RMS if VAD not available
|
|
9648
|
-
*/
|
|
9649
|
-
async detectVoiceActivity(audio) {
|
|
9650
|
-
const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
|
|
9651
|
-
if (this.vad) {
|
|
9652
|
-
const chunkSize = this.vad.getChunkSize();
|
|
9653
|
-
for (let i = 0; i + chunkSize <= float32.length; i += chunkSize) {
|
|
9654
|
-
const chunk = float32.slice(i, i + chunkSize);
|
|
9655
|
-
const result = await this.vad.process(chunk);
|
|
9656
|
-
if (result.isSpeech) {
|
|
9657
|
-
return true;
|
|
9658
|
-
}
|
|
9659
|
-
}
|
|
9660
|
-
return false;
|
|
9661
|
-
}
|
|
9662
|
-
let sum = 0;
|
|
9663
|
-
for (let i = 0; i < float32.length; i++) {
|
|
9664
|
-
sum += float32[i] * float32[i];
|
|
9665
|
-
}
|
|
9666
|
-
const rms = Math.sqrt(sum / float32.length);
|
|
9667
|
-
return rms > 0.02;
|
|
9668
|
-
}
|
|
9669
|
-
base64ToArrayBuffer(base64) {
|
|
9670
|
-
const binaryString = atob(base64);
|
|
9671
|
-
const bytes = new Uint8Array(binaryString.length);
|
|
9672
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
9673
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
9674
|
-
}
|
|
9675
|
-
return bytes.buffer;
|
|
9676
|
-
}
|
|
9677
|
-
addToHistory(message) {
|
|
9678
|
-
this.history.push(message);
|
|
9679
|
-
this.emit("memory.updated", { messageCount: this.history.length });
|
|
9680
|
-
}
|
|
9681
|
-
handleDisconnect(event) {
|
|
9682
|
-
this._isConnected = false;
|
|
9683
|
-
if (event.code !== 1e3) {
|
|
9684
|
-
if (this.wsReconnectAttempts < this.maxReconnectAttempts) {
|
|
9685
|
-
this.wsReconnectAttempts++;
|
|
9686
|
-
setTimeout(() => {
|
|
9687
|
-
if (this.currentConfig) {
|
|
9688
|
-
this.connect(this.currentConfig).catch(() => {
|
|
9689
|
-
});
|
|
9690
|
-
}
|
|
9691
|
-
}, Math.pow(2, this.wsReconnectAttempts) * 1e3);
|
|
9692
|
-
} else {
|
|
9693
|
-
this.setState("error");
|
|
9694
|
-
this.emit("connection.error", {
|
|
9695
|
-
error: new Error("Max reconnection attempts reached"),
|
|
9696
|
-
recoverable: false
|
|
9697
|
-
});
|
|
9698
|
-
}
|
|
9699
|
-
}
|
|
9700
|
-
this.emit("connection.closed", { reason: event.reason || "Connection closed" });
|
|
9701
|
-
}
|
|
9702
|
-
};
|
|
9703
|
-
|
|
9704
|
-
// src/ai/orchestration/ConversationOrchestrator.ts
|
|
9705
|
-
var ConversationSessionImpl = class {
|
|
9706
|
-
constructor(config, adapter) {
|
|
9707
|
-
this._history = [];
|
|
9708
|
-
this._context = /* @__PURE__ */ new Map();
|
|
9709
|
-
this.sessionId = config.sessionId;
|
|
9710
|
-
this._config = config;
|
|
9711
|
-
this._adapter = adapter;
|
|
9712
|
-
this.createdAt = Date.now();
|
|
9713
|
-
this._lastActivityAt = Date.now();
|
|
9714
|
-
this._emotionController = new EmotionController();
|
|
9715
|
-
if (config.emotion) {
|
|
9716
|
-
this._emotionController.setPreset(config.emotion);
|
|
9717
|
-
}
|
|
9718
|
-
}
|
|
9719
|
-
get adapter() {
|
|
9720
|
-
return this._adapter;
|
|
9721
|
-
}
|
|
9722
|
-
get config() {
|
|
9723
|
-
return this._config;
|
|
9724
|
-
}
|
|
9725
|
-
get state() {
|
|
9726
|
-
return this._adapter.state;
|
|
9727
|
-
}
|
|
9728
|
-
get history() {
|
|
9729
|
-
return [...this._history];
|
|
9730
|
-
}
|
|
9731
|
-
get emotion() {
|
|
9732
|
-
return {};
|
|
9733
|
-
}
|
|
9734
|
-
get lastActivityAt() {
|
|
9735
|
-
return this._lastActivityAt;
|
|
9736
|
-
}
|
|
9737
|
-
async start() {
|
|
9738
|
-
await this._adapter.connect(this._config);
|
|
9739
|
-
this._lastActivityAt = Date.now();
|
|
9740
|
-
}
|
|
9741
|
-
async end() {
|
|
9742
|
-
await this._adapter.disconnect();
|
|
9743
|
-
}
|
|
9744
|
-
pushAudio(audio) {
|
|
9745
|
-
this._adapter.pushAudio(audio);
|
|
9746
|
-
this._lastActivityAt = Date.now();
|
|
9747
|
-
}
|
|
9748
|
-
async sendText(text) {
|
|
9749
|
-
await this._adapter.sendText(text);
|
|
9750
|
-
this._lastActivityAt = Date.now();
|
|
9751
|
-
}
|
|
9752
|
-
interrupt() {
|
|
9753
|
-
this._adapter.interrupt();
|
|
9754
|
-
this._lastActivityAt = Date.now();
|
|
9755
|
-
}
|
|
9756
|
-
setEmotion(emotion) {
|
|
9757
|
-
this._emotionController.set(emotion);
|
|
9758
|
-
}
|
|
9759
|
-
addContext(key, value) {
|
|
9760
|
-
this._context.set(key, value);
|
|
9761
|
-
}
|
|
9762
|
-
removeContext(key) {
|
|
9763
|
-
this._context.delete(key);
|
|
9764
|
-
}
|
|
9765
|
-
getContext() {
|
|
9766
|
-
return Object.fromEntries(this._context);
|
|
9767
|
-
}
|
|
9768
|
-
export() {
|
|
9769
|
-
return {
|
|
9770
|
-
sessionId: this.sessionId,
|
|
9771
|
-
tenantId: this._config.tenant.tenantId,
|
|
9772
|
-
characterId: this._config.tenant.characterId,
|
|
9773
|
-
history: this._history,
|
|
9774
|
-
context: Object.fromEntries(this._context),
|
|
9775
|
-
emotion: this.emotion,
|
|
9776
|
-
createdAt: this.createdAt,
|
|
9777
|
-
lastActivityAt: this._lastActivityAt
|
|
9778
|
-
};
|
|
9779
|
-
}
|
|
9780
|
-
import(snapshot) {
|
|
9781
|
-
this._history = [...snapshot.history];
|
|
9782
|
-
this._context = new Map(Object.entries(snapshot.context));
|
|
9783
|
-
this._lastActivityAt = snapshot.lastActivityAt;
|
|
9784
|
-
}
|
|
9785
|
-
syncHistory() {
|
|
9786
|
-
this._history = this._adapter.getHistory();
|
|
9787
|
-
}
|
|
9788
|
-
};
|
|
9789
|
-
var ConversationOrchestrator = class extends EventEmitter {
|
|
9790
|
-
constructor(config) {
|
|
9791
|
-
super();
|
|
9792
|
-
// Sessions per tenant
|
|
9793
|
-
this.sessions = /* @__PURE__ */ new Map();
|
|
9794
|
-
// Tenant configurations
|
|
9795
|
-
this.tenants = /* @__PURE__ */ new Map();
|
|
9796
|
-
// Health monitoring
|
|
9797
|
-
this.healthCheckInterval = null;
|
|
9798
|
-
this.HEALTH_CHECK_INTERVAL_MS = 3e4;
|
|
9799
|
-
this.config = {
|
|
9800
|
-
connectionTimeoutMs: 5e3,
|
|
9801
|
-
maxRetries: 3,
|
|
9802
|
-
...config
|
|
9803
|
-
};
|
|
9804
|
-
this.adapter = new AgentCoreAdapter(config.adapter);
|
|
9805
|
-
}
|
|
9806
|
-
/**
|
|
9807
|
-
* Register a tenant
|
|
9808
|
-
*/
|
|
9809
|
-
registerTenant(tenant) {
|
|
9810
|
-
this.tenants.set(tenant.tenantId, tenant);
|
|
9811
|
-
}
|
|
9812
|
-
/**
|
|
9813
|
-
* Unregister a tenant
|
|
9814
|
-
*/
|
|
9815
|
-
unregisterTenant(tenantId) {
|
|
9816
|
-
this.tenants.delete(tenantId);
|
|
9817
|
-
}
|
|
9818
|
-
/**
|
|
9819
|
-
* Get tenant config
|
|
9820
|
-
*/
|
|
9821
|
-
getTenant(tenantId) {
|
|
9822
|
-
return this.tenants.get(tenantId);
|
|
9823
|
-
}
|
|
9824
|
-
/**
|
|
9825
|
-
* Create a new conversation session for a tenant
|
|
9826
|
-
*/
|
|
9827
|
-
async createSession(tenantId, options = {}) {
|
|
9828
|
-
const tenant = this.tenants.get(tenantId);
|
|
9829
|
-
if (!tenant) {
|
|
9830
|
-
throw new Error(`Tenant not found: ${tenantId}`);
|
|
9831
|
-
}
|
|
9832
|
-
const sessionId = options.sessionId || this.generateSessionId();
|
|
9833
|
-
const sessionConfig = {
|
|
9834
|
-
sessionId,
|
|
9835
|
-
tenant,
|
|
9836
|
-
systemPrompt: options.systemPrompt,
|
|
9837
|
-
voice: options.voice,
|
|
9838
|
-
emotion: options.emotion,
|
|
9839
|
-
language: options.language
|
|
9840
|
-
};
|
|
9841
|
-
const session = new ConversationSessionImpl(sessionConfig, this.adapter);
|
|
9842
|
-
this.sessions.set(sessionId, session);
|
|
9843
|
-
this.forwardAdapterEvents(this.adapter, sessionId);
|
|
9844
|
-
await session.start();
|
|
9845
|
-
this.emit("session.created", { sessionId, tenantId });
|
|
9846
|
-
return session;
|
|
9847
|
-
}
|
|
9848
|
-
/**
|
|
9849
|
-
* End a session
|
|
9850
|
-
*/
|
|
9851
|
-
async endSession(sessionId) {
|
|
9852
|
-
const session = this.sessions.get(sessionId);
|
|
9853
|
-
if (session) {
|
|
9854
|
-
await session.end();
|
|
9855
|
-
this.sessions.delete(sessionId);
|
|
9856
|
-
this.emit("session.ended", { sessionId, reason: "Client requested" });
|
|
9857
|
-
}
|
|
9858
|
-
}
|
|
9859
|
-
/**
|
|
9860
|
-
* Get session by ID
|
|
9861
|
-
*/
|
|
9862
|
-
getSession(sessionId) {
|
|
9863
|
-
return this.sessions.get(sessionId);
|
|
9864
|
-
}
|
|
9865
|
-
/**
|
|
9866
|
-
* Get all sessions for a tenant
|
|
9867
|
-
*/
|
|
9868
|
-
getTenantSessions(tenantId) {
|
|
9869
|
-
return Array.from(this.sessions.values()).filter((s) => s.config.tenant.tenantId === tenantId);
|
|
9870
|
-
}
|
|
9871
|
-
/**
|
|
9872
|
-
* Start health monitoring
|
|
9873
|
-
*/
|
|
9874
|
-
startHealthMonitoring() {
|
|
9875
|
-
if (this.healthCheckInterval) return;
|
|
9876
|
-
this.healthCheckInterval = setInterval(async () => {
|
|
9877
|
-
await this.performHealthCheck();
|
|
9878
|
-
}, this.HEALTH_CHECK_INTERVAL_MS);
|
|
9879
|
-
}
|
|
9880
|
-
/**
|
|
9881
|
-
* Stop health monitoring
|
|
9882
|
-
*/
|
|
9883
|
-
stopHealthMonitoring() {
|
|
9884
|
-
if (this.healthCheckInterval) {
|
|
9885
|
-
clearInterval(this.healthCheckInterval);
|
|
9886
|
-
this.healthCheckInterval = null;
|
|
9887
|
-
}
|
|
9888
|
-
}
|
|
9889
|
-
/**
|
|
9890
|
-
* Dispose all resources
|
|
9891
|
-
*/
|
|
9892
|
-
async dispose() {
|
|
9893
|
-
this.stopHealthMonitoring();
|
|
9894
|
-
const endPromises = Array.from(this.sessions.values()).map((s) => s.end());
|
|
9895
|
-
await Promise.all(endPromises);
|
|
9896
|
-
this.sessions.clear();
|
|
9897
|
-
await this.adapter.disconnect();
|
|
9898
|
-
}
|
|
9899
|
-
// ==================== Private Methods ====================
|
|
9900
|
-
generateSessionId() {
|
|
9901
|
-
return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
9902
|
-
}
|
|
9903
|
-
forwardAdapterEvents(adapter, sessionId) {
|
|
9904
|
-
const events = [
|
|
9905
|
-
"state.change",
|
|
9906
|
-
"user.speech.start",
|
|
9907
|
-
"user.speech.end",
|
|
9908
|
-
"user.transcript.partial",
|
|
9909
|
-
"user.transcript.final",
|
|
9910
|
-
"ai.thinking.start",
|
|
9911
|
-
"ai.response.start",
|
|
9912
|
-
"ai.response.chunk",
|
|
9913
|
-
"ai.response.end",
|
|
9914
|
-
"audio.output.chunk",
|
|
9915
|
-
"audio.output.end",
|
|
9916
|
-
"animation",
|
|
9917
|
-
"memory.updated",
|
|
9918
|
-
"connection.error",
|
|
9919
|
-
"interruption.detected",
|
|
9920
|
-
"interruption.handled"
|
|
9921
|
-
];
|
|
9922
|
-
for (const event of events) {
|
|
9923
|
-
adapter.on(event, (data) => {
|
|
9924
|
-
const eventData = data;
|
|
9925
|
-
this.emit(event, { ...eventData, sessionId });
|
|
9926
|
-
});
|
|
9927
|
-
}
|
|
9928
|
-
}
|
|
9929
|
-
async performHealthCheck() {
|
|
9930
|
-
try {
|
|
9931
|
-
await this.adapter.healthCheck();
|
|
9932
|
-
} catch {
|
|
9933
|
-
}
|
|
9934
|
-
}
|
|
9935
|
-
};
|
|
9936
|
-
|
|
9937
|
-
// src/ai/tenancy/TenantManager.ts
|
|
9938
|
-
var _TenantManager = class _TenantManager {
|
|
9939
|
-
constructor() {
|
|
9940
|
-
this.tenants = /* @__PURE__ */ new Map();
|
|
9941
|
-
this.quotas = /* @__PURE__ */ new Map();
|
|
9942
|
-
this.usage = /* @__PURE__ */ new Map();
|
|
9943
|
-
this.tokenRefreshCallbacks = /* @__PURE__ */ new Map();
|
|
9944
|
-
}
|
|
9945
|
-
/**
|
|
9946
|
-
* Register a tenant with quota
|
|
9947
|
-
*/
|
|
9948
|
-
register(tenant, quota = _TenantManager.DEFAULT_QUOTA, tokenRefreshCallback) {
|
|
9949
|
-
this.tenants.set(tenant.tenantId, tenant);
|
|
9950
|
-
this.quotas.set(tenant.tenantId, quota);
|
|
9951
|
-
this.usage.set(tenant.tenantId, {
|
|
9952
|
-
currentSessions: 0,
|
|
9953
|
-
requestsThisMinute: 0,
|
|
9954
|
-
tokensUsed: 0,
|
|
9955
|
-
audioMinutesToday: 0,
|
|
9956
|
-
lastMinuteReset: Date.now(),
|
|
9957
|
-
lastDailyReset: Date.now()
|
|
9958
|
-
});
|
|
9959
|
-
if (tokenRefreshCallback) {
|
|
9960
|
-
this.tokenRefreshCallbacks.set(tenant.tenantId, tokenRefreshCallback);
|
|
9961
|
-
}
|
|
9962
|
-
}
|
|
9963
|
-
/**
|
|
9964
|
-
* Unregister a tenant
|
|
9965
|
-
*/
|
|
9966
|
-
unregister(tenantId) {
|
|
9967
|
-
this.tenants.delete(tenantId);
|
|
9968
|
-
this.quotas.delete(tenantId);
|
|
9969
|
-
this.usage.delete(tenantId);
|
|
9970
|
-
this.tokenRefreshCallbacks.delete(tenantId);
|
|
9971
|
-
}
|
|
9972
|
-
/**
|
|
9973
|
-
* Get tenant config
|
|
9974
|
-
*/
|
|
9975
|
-
get(tenantId) {
|
|
9976
|
-
return this.tenants.get(tenantId);
|
|
9977
|
-
}
|
|
9978
|
-
/**
|
|
9979
|
-
* Check if tenant exists
|
|
9980
|
-
*/
|
|
9981
|
-
has(tenantId) {
|
|
9982
|
-
return this.tenants.has(tenantId);
|
|
9983
|
-
}
|
|
9984
|
-
/**
|
|
9985
|
-
* Get all tenant IDs
|
|
9986
|
-
*/
|
|
9987
|
-
getTenantIds() {
|
|
9988
|
-
return Array.from(this.tenants.keys());
|
|
9989
|
-
}
|
|
9990
|
-
/**
|
|
9991
|
-
* Check if tenant can create new session
|
|
9992
|
-
*/
|
|
9993
|
-
canCreateSession(tenantId) {
|
|
9994
|
-
const quota = this.quotas.get(tenantId);
|
|
9995
|
-
const usage = this.usage.get(tenantId);
|
|
9996
|
-
if (!quota || !usage) return false;
|
|
9997
|
-
return usage.currentSessions < quota.maxSessions;
|
|
9998
|
-
}
|
|
9999
|
-
/**
|
|
10000
|
-
* Check if tenant can make request
|
|
10001
|
-
*/
|
|
10002
|
-
canMakeRequest(tenantId) {
|
|
10003
|
-
const quota = this.quotas.get(tenantId);
|
|
10004
|
-
const usage = this.usage.get(tenantId);
|
|
10005
|
-
if (!quota || !usage) return false;
|
|
10006
|
-
this.checkMinuteReset(tenantId);
|
|
10007
|
-
return usage.requestsThisMinute < quota.requestsPerMinute;
|
|
10008
|
-
}
|
|
10009
|
-
/**
|
|
10010
|
-
* Check if tenant can use audio
|
|
10011
|
-
*/
|
|
10012
|
-
canUseAudio(tenantId, minutes) {
|
|
10013
|
-
const quota = this.quotas.get(tenantId);
|
|
10014
|
-
const usage = this.usage.get(tenantId);
|
|
10015
|
-
if (!quota || !usage) return false;
|
|
10016
|
-
this.checkDailyReset(tenantId);
|
|
10017
|
-
return usage.audioMinutesToday + minutes <= quota.maxAudioMinutesPerDay;
|
|
10018
|
-
}
|
|
10019
|
-
/**
|
|
10020
|
-
* Increment session count
|
|
10021
|
-
*/
|
|
10022
|
-
incrementSessions(tenantId) {
|
|
10023
|
-
const usage = this.usage.get(tenantId);
|
|
10024
|
-
if (usage) {
|
|
10025
|
-
usage.currentSessions++;
|
|
10026
|
-
}
|
|
10027
|
-
}
|
|
10028
|
-
/**
|
|
10029
|
-
* Decrement session count
|
|
10030
|
-
*/
|
|
10031
|
-
decrementSessions(tenantId) {
|
|
10032
|
-
const usage = this.usage.get(tenantId);
|
|
10033
|
-
if (usage && usage.currentSessions > 0) {
|
|
10034
|
-
usage.currentSessions--;
|
|
10035
|
-
}
|
|
10036
|
-
}
|
|
10037
|
-
/**
|
|
10038
|
-
* Record a request
|
|
10039
|
-
*/
|
|
10040
|
-
recordRequest(tenantId) {
|
|
10041
|
-
const usage = this.usage.get(tenantId);
|
|
10042
|
-
if (usage) {
|
|
10043
|
-
this.checkMinuteReset(tenantId);
|
|
10044
|
-
usage.requestsThisMinute++;
|
|
10045
|
-
}
|
|
10046
|
-
}
|
|
10047
|
-
/**
|
|
10048
|
-
* Record token usage
|
|
10049
|
-
*/
|
|
10050
|
-
recordTokens(tenantId, tokens) {
|
|
10051
|
-
const usage = this.usage.get(tenantId);
|
|
10052
|
-
if (usage) {
|
|
10053
|
-
usage.tokensUsed += tokens;
|
|
10054
|
-
}
|
|
10055
|
-
}
|
|
10056
|
-
/**
|
|
10057
|
-
* Record audio usage
|
|
10058
|
-
*/
|
|
10059
|
-
recordAudioMinutes(tenantId, minutes) {
|
|
10060
|
-
const usage = this.usage.get(tenantId);
|
|
10061
|
-
if (usage) {
|
|
10062
|
-
this.checkDailyReset(tenantId);
|
|
10063
|
-
usage.audioMinutesToday += minutes;
|
|
10064
|
-
}
|
|
10065
|
-
}
|
|
10066
|
-
/**
|
|
10067
|
-
* Get fresh auth token for tenant
|
|
10068
|
-
*/
|
|
10069
|
-
async getAuthToken(tenantId) {
|
|
10070
|
-
const tenant = this.tenants.get(tenantId);
|
|
10071
|
-
if (!tenant) {
|
|
10072
|
-
throw new Error(`Tenant not found: ${tenantId}`);
|
|
10073
|
-
}
|
|
10074
|
-
const callback = this.tokenRefreshCallbacks.get(tenantId);
|
|
10075
|
-
if (callback) {
|
|
10076
|
-
const token = await callback();
|
|
10077
|
-
tenant.credentials.authToken = token;
|
|
10078
|
-
return token;
|
|
10079
|
-
}
|
|
10080
|
-
if (tenant.credentials.authToken) {
|
|
10081
|
-
return tenant.credentials.authToken;
|
|
10082
|
-
}
|
|
10083
|
-
throw new Error(`No auth token available for tenant: ${tenantId}`);
|
|
10084
|
-
}
|
|
10085
|
-
/**
|
|
10086
|
-
* Update tenant credentials
|
|
10087
|
-
*/
|
|
10088
|
-
updateCredentials(tenantId, credentials) {
|
|
10089
|
-
const tenant = this.tenants.get(tenantId);
|
|
10090
|
-
if (tenant) {
|
|
10091
|
-
tenant.credentials = { ...tenant.credentials, ...credentials };
|
|
10092
|
-
}
|
|
10093
|
-
}
|
|
10094
|
-
/**
|
|
10095
|
-
* Get usage stats for tenant
|
|
10096
|
-
*/
|
|
10097
|
-
getUsage(tenantId) {
|
|
10098
|
-
return this.usage.get(tenantId);
|
|
10099
|
-
}
|
|
10100
|
-
/**
|
|
10101
|
-
* Get quota for tenant
|
|
10102
|
-
*/
|
|
10103
|
-
getQuota(tenantId) {
|
|
10104
|
-
return this.quotas.get(tenantId);
|
|
10105
|
-
}
|
|
10106
|
-
/**
|
|
10107
|
-
* Update quota for tenant
|
|
10108
|
-
*/
|
|
10109
|
-
updateQuota(tenantId, quota) {
|
|
10110
|
-
const existing = this.quotas.get(tenantId);
|
|
10111
|
-
if (existing) {
|
|
10112
|
-
this.quotas.set(tenantId, { ...existing, ...quota });
|
|
10113
|
-
}
|
|
10114
|
-
}
|
|
10115
|
-
/**
|
|
10116
|
-
* Reset all usage stats for a tenant
|
|
10117
|
-
*/
|
|
10118
|
-
resetUsage(tenantId) {
|
|
10119
|
-
const usage = this.usage.get(tenantId);
|
|
10120
|
-
if (usage) {
|
|
10121
|
-
usage.requestsThisMinute = 0;
|
|
10122
|
-
usage.tokensUsed = 0;
|
|
10123
|
-
usage.audioMinutesToday = 0;
|
|
10124
|
-
usage.lastMinuteReset = Date.now();
|
|
10125
|
-
usage.lastDailyReset = Date.now();
|
|
10126
|
-
}
|
|
10127
|
-
}
|
|
10128
|
-
// ==================== Private Methods ====================
|
|
10129
|
-
checkMinuteReset(tenantId) {
|
|
10130
|
-
const usage = this.usage.get(tenantId);
|
|
10131
|
-
if (!usage) return;
|
|
10132
|
-
const now = Date.now();
|
|
10133
|
-
if (now - usage.lastMinuteReset >= 6e4) {
|
|
10134
|
-
usage.requestsThisMinute = 0;
|
|
10135
|
-
usage.lastMinuteReset = now;
|
|
10136
|
-
}
|
|
10137
|
-
}
|
|
10138
|
-
checkDailyReset(tenantId) {
|
|
10139
|
-
const usage = this.usage.get(tenantId);
|
|
10140
|
-
if (!usage) return;
|
|
10141
|
-
const now = Date.now();
|
|
10142
|
-
const MS_PER_DAY = 24 * 60 * 60 * 1e3;
|
|
10143
|
-
if (now - usage.lastDailyReset >= MS_PER_DAY) {
|
|
10144
|
-
usage.audioMinutesToday = 0;
|
|
10145
|
-
usage.lastDailyReset = now;
|
|
10146
|
-
}
|
|
10147
|
-
}
|
|
10148
|
-
};
|
|
10149
|
-
/**
|
|
10150
|
-
* Default quota for new tenants
|
|
10151
|
-
*/
|
|
10152
|
-
_TenantManager.DEFAULT_QUOTA = {
|
|
10153
|
-
maxSessions: 10,
|
|
10154
|
-
requestsPerMinute: 60,
|
|
10155
|
-
maxTokensPerConversation: 1e5,
|
|
10156
|
-
maxAudioMinutesPerDay: 60
|
|
10157
|
-
};
|
|
10158
|
-
var TenantManager = _TenantManager;
|
|
10159
|
-
|
|
10160
|
-
// src/ai/utils/AudioSyncManager.ts
|
|
10161
|
-
var AudioSyncManager = class extends EventEmitter {
|
|
10162
|
-
constructor(config = {}) {
|
|
10163
|
-
super();
|
|
10164
|
-
this.bufferPosition = 0;
|
|
10165
|
-
this.playbackQueue = [];
|
|
10166
|
-
this.isPlaying = false;
|
|
10167
|
-
this.audioContext = null;
|
|
10168
|
-
this.playbackStartTime = 0;
|
|
10169
|
-
this.samplesPlayed = 0;
|
|
10170
|
-
this.config = {
|
|
10171
|
-
sampleRate: 16e3,
|
|
10172
|
-
bufferSize: 16640,
|
|
10173
|
-
overlapSize: 4160,
|
|
10174
|
-
maxDriftMs: 100,
|
|
10175
|
-
...config
|
|
10176
|
-
};
|
|
10177
|
-
this.audioBuffer = new Float32Array(this.config.bufferSize);
|
|
10178
|
-
}
|
|
10179
|
-
/**
|
|
10180
|
-
* Initialize audio context
|
|
10181
|
-
*/
|
|
10182
|
-
async initialize() {
|
|
10183
|
-
if (!this.audioContext) {
|
|
10184
|
-
this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
|
|
10185
|
-
}
|
|
10186
|
-
if (this.audioContext.state === "suspended") {
|
|
10187
|
-
await this.audioContext.resume();
|
|
10188
|
-
}
|
|
10189
|
-
}
|
|
10190
|
-
/**
|
|
10191
|
-
* Push audio chunk for processing and playback
|
|
10192
|
-
*/
|
|
10193
|
-
pushAudio(audio) {
|
|
10194
|
-
this.playbackQueue.push(audio);
|
|
10195
|
-
this.bufferForInference(audio);
|
|
10196
|
-
if (!this.isPlaying && this.playbackQueue.length > 0) {
|
|
10197
|
-
this.startPlayback();
|
|
10198
|
-
}
|
|
10199
|
-
}
|
|
10200
|
-
/**
|
|
10201
|
-
* Buffer audio for inference
|
|
10202
|
-
*/
|
|
10203
|
-
bufferForInference(audio) {
|
|
10204
|
-
let offset = 0;
|
|
10205
|
-
while (offset < audio.length) {
|
|
10206
|
-
const remaining = this.config.bufferSize - this.bufferPosition;
|
|
10207
|
-
const toCopy = Math.min(remaining, audio.length - offset);
|
|
10208
|
-
this.audioBuffer.set(audio.subarray(offset, offset + toCopy), this.bufferPosition);
|
|
10209
|
-
this.bufferPosition += toCopy;
|
|
10210
|
-
offset += toCopy;
|
|
10211
|
-
if (this.bufferPosition >= this.config.bufferSize) {
|
|
10212
|
-
this.emit("buffer.ready", { audio: new Float32Array(this.audioBuffer) });
|
|
10213
|
-
const overlapStart = this.config.bufferSize - this.config.overlapSize;
|
|
10214
|
-
this.audioBuffer.copyWithin(0, overlapStart);
|
|
10215
|
-
this.bufferPosition = this.config.overlapSize;
|
|
10216
|
-
}
|
|
10217
|
-
}
|
|
10218
|
-
}
|
|
10219
|
-
/**
|
|
10220
|
-
* Start audio playback
|
|
10221
|
-
*/
|
|
10222
|
-
async startPlayback() {
|
|
10223
|
-
if (!this.audioContext || this.isPlaying) return;
|
|
10224
|
-
this.isPlaying = true;
|
|
10225
|
-
this.playbackStartTime = this.audioContext.currentTime;
|
|
10226
|
-
this.samplesPlayed = 0;
|
|
10227
|
-
this.emit("playback.start", {});
|
|
10228
|
-
await this.processPlaybackQueue();
|
|
10229
|
-
}
|
|
10230
|
-
/**
|
|
10231
|
-
* Process playback queue
|
|
10232
|
-
*/
|
|
10233
|
-
async processPlaybackQueue() {
|
|
10234
|
-
if (!this.audioContext) return;
|
|
10235
|
-
while (this.playbackQueue.length > 0) {
|
|
10236
|
-
const audio = this.playbackQueue.shift();
|
|
10237
|
-
const buffer = this.audioContext.createBuffer(1, audio.length, this.config.sampleRate);
|
|
10238
|
-
buffer.copyToChannel(audio, 0);
|
|
10239
|
-
const source = this.audioContext.createBufferSource();
|
|
10240
|
-
source.buffer = buffer;
|
|
10241
|
-
source.connect(this.audioContext.destination);
|
|
10242
|
-
const playTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
|
|
10243
|
-
source.start(playTime);
|
|
10244
|
-
this.samplesPlayed += audio.length;
|
|
10245
|
-
this.checkDrift();
|
|
10246
|
-
await new Promise((resolve) => {
|
|
10247
|
-
source.onended = resolve;
|
|
10248
|
-
});
|
|
10249
|
-
}
|
|
10250
|
-
this.isPlaying = false;
|
|
10251
|
-
this.emit("playback.end", {});
|
|
10252
|
-
}
|
|
10253
|
-
/**
|
|
10254
|
-
* Check for audio/animation drift
|
|
10255
|
-
*/
|
|
10256
|
-
checkDrift() {
|
|
10257
|
-
if (!this.audioContext) return;
|
|
10258
|
-
const expectedTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
|
|
10259
|
-
const actualTime = this.audioContext.currentTime;
|
|
10260
|
-
const driftMs = (actualTime - expectedTime) * 1e3;
|
|
10261
|
-
if (Math.abs(driftMs) > this.config.maxDriftMs) {
|
|
10262
|
-
this.emit("sync.drift", { driftMs });
|
|
10263
|
-
}
|
|
10264
|
-
}
|
|
10265
|
-
/**
|
|
10266
|
-
* Clear playback queue
|
|
10267
|
-
*/
|
|
10268
|
-
clearQueue() {
|
|
10269
|
-
this.playbackQueue = [];
|
|
10270
|
-
this.bufferPosition = 0;
|
|
10271
|
-
this.audioBuffer.fill(0);
|
|
10272
|
-
}
|
|
10273
|
-
/**
|
|
10274
|
-
* Stop playback
|
|
10275
|
-
*/
|
|
10276
|
-
stop() {
|
|
10277
|
-
this.clearQueue();
|
|
10278
|
-
this.isPlaying = false;
|
|
10279
|
-
}
|
|
10280
|
-
/**
|
|
10281
|
-
* Get current playback position in seconds
|
|
10282
|
-
*/
|
|
10283
|
-
getPlaybackPosition() {
|
|
10284
|
-
if (!this.audioContext) return 0;
|
|
10285
|
-
return this.audioContext.currentTime - this.playbackStartTime;
|
|
10286
|
-
}
|
|
10287
|
-
/**
|
|
10288
|
-
* Check if currently playing
|
|
10289
|
-
*/
|
|
10290
|
-
getIsPlaying() {
|
|
10291
|
-
return this.isPlaying;
|
|
10292
|
-
}
|
|
10293
|
-
/**
|
|
10294
|
-
* Dispose resources
|
|
10295
|
-
*/
|
|
10296
|
-
dispose() {
|
|
10297
|
-
this.stop();
|
|
10298
|
-
this.audioContext?.close();
|
|
10299
|
-
this.audioContext = null;
|
|
10300
|
-
}
|
|
10301
|
-
};
|
|
10302
|
-
|
|
10303
|
-
// src/ai/utils/InterruptionHandler.ts
|
|
10304
|
-
var InterruptionHandler = class extends EventEmitter {
|
|
10305
|
-
constructor(config = {}) {
|
|
10306
|
-
super();
|
|
10307
|
-
this.isSpeaking = false;
|
|
10308
|
-
this.speechStartTime = 0;
|
|
10309
|
-
this.lastSpeechTime = 0;
|
|
10310
|
-
this.silenceTimer = null;
|
|
10311
|
-
this.aiIsSpeaking = false;
|
|
10312
|
-
// Debouncing: only emit one interruption per speech session
|
|
10313
|
-
this.interruptionTriggeredThisSession = false;
|
|
10314
|
-
this.config = {
|
|
10315
|
-
vadThreshold: 0.5,
|
|
10316
|
-
// Silero VAD default
|
|
10317
|
-
minSpeechDurationMs: 200,
|
|
10318
|
-
// Google/Amazon barge-in standard
|
|
10319
|
-
silenceTimeoutMs: 500,
|
|
10320
|
-
// OpenAI Realtime API standard
|
|
10321
|
-
enabled: true,
|
|
10322
|
-
...config
|
|
10323
|
-
};
|
|
10324
|
-
}
|
|
10325
|
-
/**
|
|
10326
|
-
* Process VAD result for interruption detection
|
|
10327
|
-
* @param vadProbability - Speech probability from VAD (0-1)
|
|
10328
|
-
* @param audioEnergy - Optional RMS energy for logging (default: 0)
|
|
10329
|
-
*/
|
|
10330
|
-
processVADResult(vadProbability, audioEnergy = 0) {
|
|
10331
|
-
if (!this.config.enabled) return;
|
|
10332
|
-
if (vadProbability > this.config.vadThreshold) {
|
|
10333
|
-
this.onSpeechDetected(audioEnergy || vadProbability);
|
|
10334
|
-
} else {
|
|
10335
|
-
this.onSilenceDetected();
|
|
10336
|
-
}
|
|
10337
|
-
}
|
|
10338
|
-
/**
|
|
10339
|
-
* Notify that AI started speaking
|
|
10340
|
-
*/
|
|
10341
|
-
setAISpeaking(speaking) {
|
|
10342
|
-
this.aiIsSpeaking = speaking;
|
|
10343
|
-
}
|
|
10344
|
-
/**
|
|
10345
|
-
* Enable/disable interruption detection
|
|
10346
|
-
*/
|
|
10347
|
-
setEnabled(enabled) {
|
|
10348
|
-
this.config.enabled = enabled;
|
|
10349
|
-
if (!enabled) {
|
|
10350
|
-
this.reset();
|
|
10351
|
-
}
|
|
10352
|
-
}
|
|
10353
|
-
/**
|
|
10354
|
-
* Update configuration
|
|
10355
|
-
*/
|
|
10356
|
-
updateConfig(config) {
|
|
10357
|
-
this.config = { ...this.config, ...config };
|
|
10358
|
-
}
|
|
10359
|
-
/**
|
|
10360
|
-
* Reset state
|
|
10361
|
-
*/
|
|
10362
|
-
reset() {
|
|
10363
|
-
this.isSpeaking = false;
|
|
10364
|
-
this.speechStartTime = 0;
|
|
10365
|
-
this.lastSpeechTime = 0;
|
|
10366
|
-
this.interruptionTriggeredThisSession = false;
|
|
10367
|
-
if (this.silenceTimer) {
|
|
10368
|
-
clearTimeout(this.silenceTimer);
|
|
10369
|
-
this.silenceTimer = null;
|
|
10370
|
-
}
|
|
10371
|
-
}
|
|
10372
|
-
/**
|
|
10373
|
-
* Get current state
|
|
10374
|
-
*/
|
|
10375
|
-
getState() {
|
|
10376
|
-
return {
|
|
10377
|
-
isSpeaking: this.isSpeaking,
|
|
10378
|
-
speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
|
|
10379
|
-
};
|
|
10380
|
-
}
|
|
10381
|
-
// ==================== Private Methods ====================
|
|
10382
|
-
onSpeechDetected(rms) {
|
|
10383
|
-
const now = Date.now();
|
|
10384
|
-
this.lastSpeechTime = now;
|
|
10385
|
-
if (this.silenceTimer) {
|
|
10386
|
-
clearTimeout(this.silenceTimer);
|
|
10387
|
-
this.silenceTimer = null;
|
|
10388
|
-
}
|
|
10389
|
-
if (!this.isSpeaking) {
|
|
10390
|
-
this.isSpeaking = true;
|
|
10391
|
-
this.speechStartTime = now;
|
|
10392
|
-
this.emit("speech.detected", { rms });
|
|
10393
|
-
}
|
|
10394
|
-
if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
|
|
10395
|
-
const speechDuration = now - this.speechStartTime;
|
|
10396
|
-
if (speechDuration >= this.config.minSpeechDurationMs) {
|
|
10397
|
-
this.interruptionTriggeredThisSession = true;
|
|
10398
|
-
this.emit("interruption.triggered", { rms, durationMs: speechDuration });
|
|
10399
|
-
}
|
|
10400
|
-
}
|
|
10401
|
-
}
|
|
10402
|
-
onSilenceDetected() {
|
|
10403
|
-
if (!this.isSpeaking) return;
|
|
10404
|
-
if (!this.silenceTimer) {
|
|
10405
|
-
this.silenceTimer = setTimeout(() => {
|
|
10406
|
-
const durationMs = this.lastSpeechTime - this.speechStartTime;
|
|
10407
|
-
this.isSpeaking = false;
|
|
10408
|
-
this.silenceTimer = null;
|
|
10409
|
-
this.interruptionTriggeredThisSession = false;
|
|
10410
|
-
this.emit("speech.ended", { durationMs });
|
|
10411
|
-
}, this.config.silenceTimeoutMs);
|
|
10412
|
-
}
|
|
10413
|
-
}
|
|
10414
|
-
};
|
|
10415
|
-
|
|
10416
9282
|
// src/animation/types.ts
|
|
10417
9283
|
var DEFAULT_ANIMATION_CONFIG = {
|
|
10418
9284
|
initialState: "idle",
|