@omote/core 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -33,17 +33,14 @@ __export(index_exports, {
33
33
  A2EOrchestrator: () => A2EOrchestrator,
34
34
  A2EProcessor: () => A2EProcessor,
35
35
  ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
36
- AgentCoreAdapter: () => AgentCoreAdapter,
37
36
  AnimationGraph: () => AnimationGraph,
38
37
  AudioChunkCoalescer: () => AudioChunkCoalescer,
39
38
  AudioEnergyAnalyzer: () => AudioEnergyAnalyzer,
40
39
  AudioScheduler: () => AudioScheduler,
41
- AudioSyncManager: () => AudioSyncManager,
42
40
  BLENDSHAPE_TO_GROUP: () => BLENDSHAPE_TO_GROUP,
43
41
  BlendshapeSmoother: () => BlendshapeSmoother,
44
42
  CTC_VOCAB: () => CTC_VOCAB,
45
43
  ConsoleExporter: () => ConsoleExporter,
46
- ConversationOrchestrator: () => ConversationOrchestrator,
47
44
  DEFAULT_ANIMATION_CONFIG: () => DEFAULT_ANIMATION_CONFIG,
48
45
  DEFAULT_LOGGING_CONFIG: () => DEFAULT_LOGGING_CONFIG,
49
46
  EMOTION_NAMES: () => EMOTION_NAMES,
@@ -73,7 +70,6 @@ __export(index_exports, {
73
70
  SileroVADInference: () => SileroVADInference,
74
71
  SileroVADUnifiedAdapter: () => SileroVADUnifiedAdapter,
75
72
  SileroVADWorker: () => SileroVADWorker,
76
- TenantManager: () => TenantManager,
77
73
  UnifiedInferenceWorker: () => UnifiedInferenceWorker,
78
74
  Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
79
75
  Wav2ArkitCpuUnifiedAdapter: () => Wav2ArkitCpuUnifiedAdapter,
@@ -1171,80 +1167,6 @@ var A2EProcessor = class {
1171
1167
  }
1172
1168
  };
1173
1169
 
1174
- // src/inference/BlendshapeSmoother.ts
1175
- var NUM_BLENDSHAPES = 52;
1176
- var BlendshapeSmoother = class {
1177
- constructor(config) {
1178
- /** Whether any target has been set */
1179
- this._hasTarget = false;
1180
- this.halflife = config?.halflife ?? 0.06;
1181
- this.values = new Float32Array(NUM_BLENDSHAPES);
1182
- this.velocities = new Float32Array(NUM_BLENDSHAPES);
1183
- this.targets = new Float32Array(NUM_BLENDSHAPES);
1184
- }
1185
- /** Whether a target frame has been set (false until first setTarget call) */
1186
- get hasTarget() {
1187
- return this._hasTarget;
1188
- }
1189
- /**
1190
- * Set new target frame from inference output.
1191
- * Springs will converge toward these values on subsequent update() calls.
1192
- */
1193
- setTarget(frame) {
1194
- this.targets.set(frame);
1195
- this._hasTarget = true;
1196
- }
1197
- /**
1198
- * Advance all 52 springs by `dt` seconds and return the smoothed frame.
1199
- *
1200
- * Call this every render frame (e.g., inside requestAnimationFrame).
1201
- * Returns the internal values buffer — do NOT mutate the returned array.
1202
- *
1203
- * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
1204
- * @returns Smoothed blendshape values (Float32Array of 52)
1205
- */
1206
- update(dt) {
1207
- if (!this._hasTarget) {
1208
- return this.values;
1209
- }
1210
- if (this.halflife <= 0) {
1211
- this.values.set(this.targets);
1212
- this.velocities.fill(0);
1213
- return this.values;
1214
- }
1215
- const damping = Math.LN2 / this.halflife;
1216
- const eydt = Math.exp(-damping * dt);
1217
- for (let i = 0; i < NUM_BLENDSHAPES; i++) {
1218
- const j0 = this.values[i] - this.targets[i];
1219
- const j1 = this.velocities[i] + j0 * damping;
1220
- this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
1221
- this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
1222
- this.values[i] = Math.max(0, Math.min(1, this.values[i]));
1223
- }
1224
- return this.values;
1225
- }
1226
- /**
1227
- * Decay all spring targets to neutral (0).
1228
- *
1229
- * Call when inference stalls (no new frames for threshold duration).
1230
- * The springs will smoothly close the mouth / relax the face over
1231
- * the halflife period rather than freezing.
1232
- */
1233
- decayToNeutral() {
1234
- this.targets.fill(0);
1235
- }
1236
- /**
1237
- * Reset all state (values, velocities, targets).
1238
- * Call when starting a new playback session.
1239
- */
1240
- reset() {
1241
- this.values.fill(0);
1242
- this.velocities.fill(0);
1243
- this.targets.fill(0);
1244
- this._hasTarget = false;
1245
- }
1246
- };
1247
-
1248
1170
  // src/telemetry/exporters/console.ts
1249
1171
  var ConsoleExporter = class {
1250
1172
  constructor(options = {}) {
@@ -3262,13 +3184,6 @@ function pcm16ToFloat32(buffer) {
3262
3184
  }
3263
3185
  return float32;
3264
3186
  }
3265
- function int16ToFloat32(int16) {
3266
- const float32 = new Float32Array(int16.length);
3267
- for (let i = 0; i < int16.length; i++) {
3268
- float32[i] = int16[i] / 32768;
3269
- }
3270
- return float32;
3271
- }
3272
3187
 
3273
3188
  // src/audio/FullFacePipeline.ts
3274
3189
  var logger4 = createLogger("FullFacePipeline");
@@ -3301,16 +3216,11 @@ var FullFacePipeline = class extends EventEmitter {
3301
3216
  this.lastNewFrameTime = 0;
3302
3217
  this.lastKnownLamFrame = null;
3303
3218
  this.staleWarningEmitted = false;
3304
- // Frame loop timing (for dt calculation)
3305
- this.lastFrameLoopTime = 0;
3306
3219
  // Diagnostic logging counter
3307
3220
  this.frameLoopCount = 0;
3308
3221
  const sampleRate = options.sampleRate ?? 16e3;
3309
3222
  this.profile = options.profile ?? {};
3310
3223
  this.staleThresholdMs = options.staleThresholdMs ?? 2e3;
3311
- this.smoother = new BlendshapeSmoother({
3312
- halflife: options.smoothingHalflife ?? 0.06
3313
- });
3314
3224
  const isCpuModel = options.lam.modelId === "wav2arkit_cpu";
3315
3225
  const chunkSize = options.chunkSize ?? options.lam.chunkSize ?? 16e3;
3316
3226
  const chunkAccumulationMs = chunkSize / sampleRate * 1e3;
@@ -3393,9 +3303,7 @@ var FullFacePipeline = class extends EventEmitter {
3393
3303
  this.lastNewFrameTime = 0;
3394
3304
  this.lastKnownLamFrame = null;
3395
3305
  this.staleWarningEmitted = false;
3396
- this.lastFrameLoopTime = 0;
3397
3306
  this.frameLoopCount = 0;
3398
- this.smoother.reset();
3399
3307
  this.scheduler.warmup();
3400
3308
  this.startFrameLoop();
3401
3309
  this.startMonitoring();
@@ -3430,22 +3338,16 @@ var FullFacePipeline = class extends EventEmitter {
3430
3338
  /**
3431
3339
  * Start frame animation loop
3432
3340
  *
3433
- * Uses critically damped spring smoother to produce continuous output
3434
- * at render rate (60fps), even between inference batches (~30fps bursts).
3435
- * Springs interpolate toward the latest inference target, and decay
3436
- * to neutral when inference stalls.
3341
+ * Polls A2EProcessor at render rate (60fps) for the latest inference frame
3342
+ * matching the current AudioContext time. Between inference batches (~30fps
3343
+ * bursts), getFrameForTime() holds the last frame.
3437
3344
  */
3438
3345
  startFrameLoop() {
3439
- this.lastFrameLoopTime = 0;
3440
3346
  const updateFrame = () => {
3441
- const now = performance.now() / 1e3;
3442
- const dt = this.lastFrameLoopTime > 0 ? now - this.lastFrameLoopTime : 1 / 60;
3443
- this.lastFrameLoopTime = now;
3444
3347
  this.frameLoopCount++;
3445
3348
  const currentTime = this.scheduler.getCurrentTime();
3446
3349
  const lamFrame = this.processor.getFrameForTime(currentTime);
3447
3350
  if (lamFrame && lamFrame !== this.lastKnownLamFrame) {
3448
- this.smoother.setTarget(lamFrame);
3449
3351
  this.lastNewFrameTime = performance.now();
3450
3352
  this.lastKnownLamFrame = lamFrame;
3451
3353
  this.staleWarningEmitted = false;
@@ -3465,17 +3367,15 @@ var FullFacePipeline = class extends EventEmitter {
3465
3367
  currentTime: currentTime.toFixed(3),
3466
3368
  playbackEndTime: this.scheduler.getPlaybackEndTime().toFixed(3),
3467
3369
  queuedFrames: this.processor.queuedFrameCount,
3468
- hasTarget: this.smoother.hasTarget,
3469
3370
  playbackStarted: this.playbackStarted,
3470
3371
  msSinceNewFrame: this.lastNewFrameTime > 0 ? Math.round(performance.now() - this.lastNewFrameTime) : -1,
3471
3372
  processorFill: this.processor.fillLevel.toFixed(2)
3472
3373
  });
3473
3374
  }
3474
3375
  if (this.playbackStarted && this.lastNewFrameTime > 0 && performance.now() - this.lastNewFrameTime > this.staleThresholdMs) {
3475
- this.smoother.decayToNeutral();
3476
3376
  if (!this.staleWarningEmitted) {
3477
3377
  this.staleWarningEmitted = true;
3478
- logger4.warn("A2E stalled \u2014 decaying to neutral", {
3378
+ logger4.warn("A2E stalled \u2014 no new inference frames", {
3479
3379
  staleDurationMs: Math.round(performance.now() - this.lastNewFrameTime),
3480
3380
  queuedFrames: this.processor.queuedFrameCount
3481
3381
  });
@@ -3514,12 +3414,10 @@ var FullFacePipeline = class extends EventEmitter {
3514
3414
  await this.scheduler.cancelAll(fadeOutMs);
3515
3415
  this.coalescer.reset();
3516
3416
  this.processor.reset();
3517
- this.smoother.reset();
3518
3417
  this.playbackStarted = false;
3519
3418
  this.lastNewFrameTime = 0;
3520
3419
  this.lastKnownLamFrame = null;
3521
3420
  this.staleWarningEmitted = false;
3522
- this.lastFrameLoopTime = 0;
3523
3421
  this.emit("playback_complete", void 0);
3524
3422
  }
3525
3423
  /**
@@ -3573,6 +3471,108 @@ var FullFacePipeline = class extends EventEmitter {
3573
3471
  }
3574
3472
  };
3575
3473
 
3474
+ // src/audio/InterruptionHandler.ts
3475
+ var InterruptionHandler = class extends EventEmitter {
3476
+ constructor(config = {}) {
3477
+ super();
3478
+ this.isSpeaking = false;
3479
+ this.speechStartTime = 0;
3480
+ this.lastSpeechTime = 0;
3481
+ this.silenceTimer = null;
3482
+ this.aiIsSpeaking = false;
3483
+ // Debouncing: only emit one interruption per speech session
3484
+ this.interruptionTriggeredThisSession = false;
3485
+ this.config = {
3486
+ vadThreshold: 0.5,
3487
+ // Silero VAD default
3488
+ minSpeechDurationMs: 200,
3489
+ // Google/Amazon barge-in standard
3490
+ silenceTimeoutMs: 500,
3491
+ // OpenAI Realtime API standard
3492
+ enabled: true,
3493
+ ...config
3494
+ };
3495
+ }
3496
+ /**
3497
+ * Process VAD result for interruption detection
3498
+ * @param vadProbability - Speech probability from VAD (0-1)
3499
+ * @param audioEnergy - Optional RMS energy for logging (default: 0)
3500
+ */
3501
+ processVADResult(vadProbability, audioEnergy = 0) {
3502
+ if (!this.config.enabled) return;
3503
+ if (vadProbability > this.config.vadThreshold) {
3504
+ this.onSpeechDetected(audioEnergy || vadProbability);
3505
+ } else {
3506
+ this.onSilenceDetected();
3507
+ }
3508
+ }
3509
+ /** Notify that AI started/stopped speaking */
3510
+ setAISpeaking(speaking) {
3511
+ this.aiIsSpeaking = speaking;
3512
+ }
3513
+ /** Enable/disable interruption detection */
3514
+ setEnabled(enabled) {
3515
+ this.config.enabled = enabled;
3516
+ if (!enabled) {
3517
+ this.reset();
3518
+ }
3519
+ }
3520
+ /** Update configuration */
3521
+ updateConfig(config) {
3522
+ this.config = { ...this.config, ...config };
3523
+ }
3524
+ /** Reset state */
3525
+ reset() {
3526
+ this.isSpeaking = false;
3527
+ this.speechStartTime = 0;
3528
+ this.lastSpeechTime = 0;
3529
+ this.interruptionTriggeredThisSession = false;
3530
+ if (this.silenceTimer) {
3531
+ clearTimeout(this.silenceTimer);
3532
+ this.silenceTimer = null;
3533
+ }
3534
+ }
3535
+ /** Get current state */
3536
+ getState() {
3537
+ return {
3538
+ isSpeaking: this.isSpeaking,
3539
+ speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
3540
+ };
3541
+ }
3542
+ onSpeechDetected(rms) {
3543
+ const now = Date.now();
3544
+ this.lastSpeechTime = now;
3545
+ if (this.silenceTimer) {
3546
+ clearTimeout(this.silenceTimer);
3547
+ this.silenceTimer = null;
3548
+ }
3549
+ if (!this.isSpeaking) {
3550
+ this.isSpeaking = true;
3551
+ this.speechStartTime = now;
3552
+ this.emit("speech.detected", { rms });
3553
+ }
3554
+ if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
3555
+ const speechDuration = now - this.speechStartTime;
3556
+ if (speechDuration >= this.config.minSpeechDurationMs) {
3557
+ this.interruptionTriggeredThisSession = true;
3558
+ this.emit("interruption.triggered", { rms, durationMs: speechDuration });
3559
+ }
3560
+ }
3561
+ }
3562
+ onSilenceDetected() {
3563
+ if (!this.isSpeaking) return;
3564
+ if (!this.silenceTimer) {
3565
+ this.silenceTimer = setTimeout(() => {
3566
+ const durationMs = this.lastSpeechTime - this.speechStartTime;
3567
+ this.isSpeaking = false;
3568
+ this.silenceTimer = null;
3569
+ this.interruptionTriggeredThisSession = false;
3570
+ this.emit("speech.ended", { durationMs });
3571
+ }, this.config.silenceTimeoutMs);
3572
+ }
3573
+ }
3574
+ };
3575
+
3576
3576
  // src/inference/kaldiFbank.ts
3577
3577
  function fft(re, im) {
3578
3578
  const n = re.length;
@@ -7405,6 +7405,80 @@ var A2EWithFallback = class {
7405
7405
  }
7406
7406
  };
7407
7407
 
7408
+ // src/inference/BlendshapeSmoother.ts
7409
+ var NUM_BLENDSHAPES = 52;
7410
+ var BlendshapeSmoother = class {
7411
+ constructor(config) {
7412
+ /** Whether any target has been set */
7413
+ this._hasTarget = false;
7414
+ this.halflife = config?.halflife ?? 0.06;
7415
+ this.values = new Float32Array(NUM_BLENDSHAPES);
7416
+ this.velocities = new Float32Array(NUM_BLENDSHAPES);
7417
+ this.targets = new Float32Array(NUM_BLENDSHAPES);
7418
+ }
7419
+ /** Whether a target frame has been set (false until first setTarget call) */
7420
+ get hasTarget() {
7421
+ return this._hasTarget;
7422
+ }
7423
+ /**
7424
+ * Set new target frame from inference output.
7425
+ * Springs will converge toward these values on subsequent update() calls.
7426
+ */
7427
+ setTarget(frame) {
7428
+ this.targets.set(frame);
7429
+ this._hasTarget = true;
7430
+ }
7431
+ /**
7432
+ * Advance all 52 springs by `dt` seconds and return the smoothed frame.
7433
+ *
7434
+ * Call this every render frame (e.g., inside requestAnimationFrame).
7435
+ * Returns the internal values buffer — do NOT mutate the returned array.
7436
+ *
7437
+ * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
7438
+ * @returns Smoothed blendshape values (Float32Array of 52)
7439
+ */
7440
+ update(dt) {
7441
+ if (!this._hasTarget) {
7442
+ return this.values;
7443
+ }
7444
+ if (this.halflife <= 0) {
7445
+ this.values.set(this.targets);
7446
+ this.velocities.fill(0);
7447
+ return this.values;
7448
+ }
7449
+ const damping = Math.LN2 / this.halflife;
7450
+ const eydt = Math.exp(-damping * dt);
7451
+ for (let i = 0; i < NUM_BLENDSHAPES; i++) {
7452
+ const j0 = this.values[i] - this.targets[i];
7453
+ const j1 = this.velocities[i] + j0 * damping;
7454
+ this.values[i] = eydt * (j0 + j1 * dt) + this.targets[i];
7455
+ this.velocities[i] = eydt * (this.velocities[i] - j1 * damping * dt);
7456
+ this.values[i] = Math.max(0, Math.min(1, this.values[i]));
7457
+ }
7458
+ return this.values;
7459
+ }
7460
+ /**
7461
+ * Decay all spring targets to neutral (0).
7462
+ *
7463
+ * Call when inference stalls (no new frames for threshold duration).
7464
+ * The springs will smoothly close the mouth / relax the face over
7465
+ * the halflife period rather than freezing.
7466
+ */
7467
+ decayToNeutral() {
7468
+ this.targets.fill(0);
7469
+ }
7470
+ /**
7471
+ * Reset all state (values, velocities, targets).
7472
+ * Call when starting a new playback session.
7473
+ */
7474
+ reset() {
7475
+ this.values.fill(0);
7476
+ this.velocities.fill(0);
7477
+ this.targets.fill(0);
7478
+ this._hasTarget = false;
7479
+ }
7480
+ };
7481
+
7408
7482
  // src/animation/audioEnergy.ts
7409
7483
  function calculateRMS(samples) {
7410
7484
  if (samples.length === 0) return 0;
@@ -9205,1214 +9279,6 @@ var EmotionController = class {
9205
9279
  }
9206
9280
  };
9207
9281
 
9208
- // src/ai/adapters/AgentCoreAdapter.ts
9209
- var AgentCoreAdapter = class extends EventEmitter {
9210
- constructor(config) {
9211
- super();
9212
- this.name = "AgentCore";
9213
- this._state = "disconnected";
9214
- this._sessionId = null;
9215
- this._isConnected = false;
9216
- // Sub-components
9217
- this.asr = null;
9218
- this.vad = null;
9219
- this.lam = null;
9220
- this.pipeline = null;
9221
- // WebSocket connection to AgentCore
9222
- this.ws = null;
9223
- this.wsReconnectAttempts = 0;
9224
- this.maxReconnectAttempts = 5;
9225
- // Audio buffers
9226
- this.audioBuffer = [];
9227
- // Conversation state
9228
- this.history = [];
9229
- this.currentConfig = null;
9230
- // Interruption handling
9231
- this.isSpeaking = false;
9232
- this.currentTtsAbortController = null;
9233
- // Auth token cache per tenant
9234
- this.tokenCache = /* @__PURE__ */ new Map();
9235
- this.agentCoreConfig = config;
9236
- this.emotionController = new EmotionController();
9237
- }
9238
- get state() {
9239
- return this._state;
9240
- }
9241
- get sessionId() {
9242
- return this._sessionId;
9243
- }
9244
- get isConnected() {
9245
- return this._isConnected;
9246
- }
9247
- /**
9248
- * Connect to AgentCore with session configuration
9249
- */
9250
- async connect(config) {
9251
- this.currentConfig = config;
9252
- this._sessionId = config.sessionId;
9253
- try {
9254
- const authToken = await this.getAuthToken(config.tenant);
9255
- await Promise.all([
9256
- this.initASR(),
9257
- this.initLAM()
9258
- ]);
9259
- await this.connectWebSocket(authToken, config);
9260
- this._isConnected = true;
9261
- this.setState("idle");
9262
- this.emit("connection.opened", { sessionId: this._sessionId, adapter: this.name });
9263
- } catch (error) {
9264
- this.setState("error");
9265
- this.emit("connection.error", {
9266
- error,
9267
- recoverable: true
9268
- });
9269
- throw error;
9270
- }
9271
- }
9272
- /**
9273
- * Disconnect and cleanup
9274
- */
9275
- async disconnect() {
9276
- this.currentTtsAbortController?.abort();
9277
- if (this.pipeline) {
9278
- this.pipeline.dispose();
9279
- this.pipeline = null;
9280
- }
9281
- if (this.ws) {
9282
- this.ws.close(1e3, "Client disconnect");
9283
- this.ws = null;
9284
- }
9285
- await Promise.all([
9286
- this.asr?.dispose(),
9287
- this.vad?.dispose(),
9288
- this.lam?.dispose()
9289
- ]);
9290
- this._isConnected = false;
9291
- this.setState("disconnected");
9292
- this.emit("connection.closed", { reason: "Client disconnect" });
9293
- }
9294
- /**
9295
- * Push user audio for processing
9296
- */
9297
- pushAudio(audio) {
9298
- if (!this._isConnected) return;
9299
- if (this.isSpeaking) {
9300
- this.detectVoiceActivity(audio).then((hasVoiceActivity) => {
9301
- if (hasVoiceActivity) {
9302
- this.interrupt();
9303
- }
9304
- }).catch((error) => {
9305
- console.error("[AgentCore] VAD error during interruption detection:", error);
9306
- });
9307
- }
9308
- const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
9309
- this.audioBuffer.push(float32);
9310
- this.scheduleTranscription();
9311
- }
9312
- /**
9313
- * Send text directly to AgentCore
9314
- */
9315
- async sendText(text) {
9316
- if (!this._isConnected || !this.ws) {
9317
- throw new Error("Not connected to AgentCore");
9318
- }
9319
- this.addToHistory({
9320
- role: "user",
9321
- content: text,
9322
- timestamp: Date.now()
9323
- });
9324
- this.setState("thinking");
9325
- this.emit("ai.thinking.start", { timestamp: Date.now() });
9326
- this.ws.send(JSON.stringify({
9327
- type: "user_message",
9328
- sessionId: this._sessionId,
9329
- content: text,
9330
- context: {
9331
- history: this.history.slice(-10),
9332
- // Last 10 messages
9333
- emotion: Array.from(this.emotionController.emotion)
9334
- }
9335
- }));
9336
- }
9337
- /**
9338
- * Interrupt current AI response
9339
- */
9340
- interrupt() {
9341
- if (!this.isSpeaking) return;
9342
- this.emit("interruption.detected", { timestamp: Date.now() });
9343
- this.currentTtsAbortController?.abort();
9344
- this.currentTtsAbortController = null;
9345
- if (this.ws?.readyState === WebSocket.OPEN) {
9346
- this.ws.send(JSON.stringify({
9347
- type: "interrupt",
9348
- sessionId: this._sessionId,
9349
- timestamp: Date.now()
9350
- }));
9351
- }
9352
- this.isSpeaking = false;
9353
- this.setState("listening");
9354
- this.emit("interruption.handled", { timestamp: Date.now(), action: "stop" });
9355
- }
9356
- getHistory() {
9357
- return [...this.history];
9358
- }
9359
- clearHistory() {
9360
- this.history = [];
9361
- this.emit("memory.updated", { messageCount: 0 });
9362
- }
9363
- async healthCheck() {
9364
- if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
9365
- return false;
9366
- }
9367
- return new Promise((resolve) => {
9368
- const timeout = setTimeout(() => resolve(false), 5e3);
9369
- const handler = (event) => {
9370
- try {
9371
- const data = JSON.parse(event.data);
9372
- if (data.type === "pong") {
9373
- clearTimeout(timeout);
9374
- this.ws?.removeEventListener("message", handler);
9375
- resolve(true);
9376
- }
9377
- } catch {
9378
- }
9379
- };
9380
- this.ws?.addEventListener("message", handler);
9381
- this.ws?.send(JSON.stringify({ type: "ping" }));
9382
- });
9383
- }
9384
- // ==================== Private Methods ====================
9385
- setState(state) {
9386
- const previousState = this._state;
9387
- this._state = state;
9388
- this.emit("state.change", { state, previousState });
9389
- }
9390
- async getAuthToken(tenant) {
9391
- const cached = this.tokenCache.get(tenant.tenantId);
9392
- if (cached && cached.expiresAt > Date.now() + 6e4) {
9393
- return cached.token;
9394
- }
9395
- if (tenant.credentials.authToken) {
9396
- return tenant.credentials.authToken;
9397
- }
9398
- const endpoint = this.agentCoreConfig.endpoint;
9399
- if (endpoint.startsWith("ws://") || endpoint.includes("localhost")) {
9400
- return "local-dev-token";
9401
- }
9402
- const httpEndpoint = endpoint.replace("wss://", "https://").replace("ws://", "http://");
9403
- const response = await fetch(`${httpEndpoint}/auth/token`, {
9404
- method: "POST",
9405
- headers: { "Content-Type": "application/json" },
9406
- body: JSON.stringify({
9407
- tenantId: tenant.tenantId,
9408
- apiKey: tenant.credentials.apiKey
9409
- })
9410
- });
9411
- if (!response.ok) {
9412
- throw new Error(`Auth failed: ${response.statusText}`);
9413
- }
9414
- const { token, expiresIn } = await response.json();
9415
- this.tokenCache.set(tenant.tenantId, {
9416
- token,
9417
- expiresAt: Date.now() + expiresIn * 1e3
9418
- });
9419
- return token;
9420
- }
9421
- async initASR() {
9422
- await Promise.all([
9423
- // SenseVoice ASR
9424
- (async () => {
9425
- this.asr = new SenseVoiceInference({
9426
- modelUrl: "/models/sensevoice/model.int8.onnx",
9427
- language: "auto"
9428
- });
9429
- await this.asr.load();
9430
- })(),
9431
- // Silero VAD for accurate voice activity detection
9432
- (async () => {
9433
- this.vad = new SileroVADInference({
9434
- modelUrl: "/models/silero-vad.onnx",
9435
- backend: "webgpu",
9436
- sampleRate: 16e3,
9437
- threshold: 0.5
9438
- });
9439
- await this.vad.load();
9440
- })()
9441
- ]);
9442
- }
9443
- async initLAM() {
9444
- const lamUrl = this.agentCoreConfig.models?.lamUrl || "/models/unified_wav2vec2_asr_a2e.onnx";
9445
- this.lam = new Wav2Vec2Inference({
9446
- modelUrl: lamUrl,
9447
- backend: "auto"
9448
- });
9449
- await this.lam.load();
9450
- await this.initPipeline();
9451
- }
9452
- async initPipeline() {
9453
- if (!this.lam) {
9454
- throw new Error("LAM must be initialized before pipeline");
9455
- }
9456
- this.pipeline = new FullFacePipeline({
9457
- lam: this.lam,
9458
- sampleRate: 16e3,
9459
- chunkTargetMs: 200
9460
- });
9461
- await this.pipeline.initialize();
9462
- this.pipeline.on("full_frame_ready", (fullFrame) => {
9463
- const frame = fullFrame.blendshapes;
9464
- this.emit("animation", {
9465
- blendshapes: frame,
9466
- get: (name) => {
9467
- const idx = LAM_BLENDSHAPES.indexOf(name);
9468
- return idx >= 0 ? frame[idx] : 0;
9469
- },
9470
- timestamp: Date.now(),
9471
- // Wall clock for client-side logging only
9472
- inferenceMs: 0
9473
- // Pipeline handles LAM inference asynchronously
9474
- });
9475
- });
9476
- this.pipeline.on("playback_complete", () => {
9477
- this.isSpeaking = false;
9478
- this.setState("idle");
9479
- this.emit("audio.output.end", { durationMs: 0 });
9480
- });
9481
- this.pipeline.on("error", (error) => {
9482
- console.error("[AgentCore] Pipeline error:", error);
9483
- this.emit("connection.error", {
9484
- error,
9485
- recoverable: true
9486
- });
9487
- });
9488
- }
9489
- async connectWebSocket(authToken, config) {
9490
- return new Promise((resolve, reject) => {
9491
- const wsUrl = new URL(`${this.agentCoreConfig.endpoint.replace("http", "ws")}/ws`);
9492
- wsUrl.searchParams.set("sessionId", config.sessionId);
9493
- wsUrl.searchParams.set("characterId", config.tenant.characterId);
9494
- this.ws = new WebSocket(wsUrl.toString());
9495
- this.ws.onopen = () => {
9496
- this.ws?.send(JSON.stringify({
9497
- type: "auth",
9498
- token: authToken,
9499
- tenantId: config.tenant.tenantId,
9500
- systemPrompt: config.systemPrompt
9501
- }));
9502
- };
9503
- this.ws.onmessage = (event) => {
9504
- try {
9505
- this.handleAgentCoreMessage(JSON.parse(event.data));
9506
- } catch {
9507
- }
9508
- };
9509
- this.ws.onerror = () => {
9510
- reject(new Error("WebSocket connection failed"));
9511
- };
9512
- this.ws.onclose = (event) => {
9513
- this.handleDisconnect(event);
9514
- };
9515
- const authTimeout = setTimeout(() => {
9516
- reject(new Error("Auth timeout"));
9517
- }, 1e4);
9518
- const authHandler = (event) => {
9519
- try {
9520
- const data = JSON.parse(event.data);
9521
- if (data.type === "auth_success") {
9522
- clearTimeout(authTimeout);
9523
- this.ws?.removeEventListener("message", authHandler);
9524
- resolve();
9525
- } else if (data.type === "auth_failed") {
9526
- clearTimeout(authTimeout);
9527
- reject(new Error(data.message));
9528
- }
9529
- } catch {
9530
- }
9531
- };
9532
- this.ws.addEventListener("message", authHandler);
9533
- });
9534
- }
9535
- handleAgentCoreMessage(data) {
9536
- switch (data.type) {
9537
- case "response_start":
9538
- this.setState("speaking");
9539
- this.isSpeaking = true;
9540
- this.emit("ai.response.start", {
9541
- text: data.text,
9542
- emotion: data.emotion
9543
- });
9544
- if (data.emotion) {
9545
- this.emotionController.transitionTo(
9546
- { [data.emotion]: 0.7 },
9547
- 300
9548
- );
9549
- }
9550
- if (this.pipeline) {
9551
- this.pipeline.start();
9552
- }
9553
- break;
9554
- case "response_chunk":
9555
- this.emit("ai.response.chunk", {
9556
- text: data.text,
9557
- isLast: data.isLast
9558
- });
9559
- break;
9560
- case "audio_chunk":
9561
- if (data.audio && this.pipeline) {
9562
- const audioData = this.base64ToArrayBuffer(data.audio);
9563
- const uint8 = new Uint8Array(audioData);
9564
- this.pipeline.onAudioChunk(uint8).catch((error) => {
9565
- console.error("[AgentCore] Pipeline chunk error:", error);
9566
- });
9567
- }
9568
- break;
9569
- case "audio_end":
9570
- if (this.pipeline) {
9571
- this.pipeline.end().catch((error) => {
9572
- console.error("[AgentCore] Pipeline end error:", error);
9573
- });
9574
- }
9575
- break;
9576
- case "response_end":
9577
- this.addToHistory({
9578
- role: "assistant",
9579
- content: data.fullText,
9580
- timestamp: Date.now(),
9581
- emotion: data.emotion
9582
- });
9583
- this.emit("ai.response.end", {
9584
- fullText: data.fullText,
9585
- durationMs: data.durationMs || 0
9586
- });
9587
- break;
9588
- case "memory_updated":
9589
- this.emit("memory.updated", {
9590
- messageCount: data.messageCount,
9591
- tokenCount: data.tokenCount
9592
- });
9593
- break;
9594
- case "error":
9595
- this.emit("connection.error", {
9596
- error: new Error(data.message),
9597
- recoverable: data.recoverable ?? false
9598
- });
9599
- break;
9600
- }
9601
- }
9602
- scheduleTranscription() {
9603
- if (this.audioBuffer.length === 0) return;
9604
- const totalLength = this.audioBuffer.reduce((sum2, buf) => sum2 + buf.length, 0);
9605
- if (totalLength < 4e3) return;
9606
- const audio = new Float32Array(totalLength);
9607
- let offset = 0;
9608
- for (const buf of this.audioBuffer) {
9609
- audio.set(buf, offset);
9610
- offset += buf.length;
9611
- }
9612
- this.audioBuffer = [];
9613
- let sum = 0;
9614
- for (let i = 0; i < audio.length; i++) {
9615
- sum += audio[i] * audio[i];
9616
- }
9617
- const rms = Math.sqrt(sum / audio.length);
9618
- if (rms < 0.01) {
9619
- console.debug("[AgentCore] Skipping silent audio", { rms, samples: audio.length });
9620
- return;
9621
- }
9622
- if (this.asr) {
9623
- this.setState("listening");
9624
- this.emit("user.speech.start", { timestamp: Date.now() });
9625
- this.asr.transcribe(audio).then((result) => {
9626
- this.emit("user.transcript.final", {
9627
- text: result.text,
9628
- confidence: 1
9629
- });
9630
- this.emit("user.speech.end", { timestamp: Date.now(), durationMs: result.inferenceTimeMs });
9631
- const cleanText = result.text.trim();
9632
- if (cleanText) {
9633
- this.sendText(cleanText).catch((error) => {
9634
- console.error("[AgentCore] Send text error:", error);
9635
- });
9636
- }
9637
- }).catch((error) => {
9638
- console.error("[AgentCore] Transcription error:", error);
9639
- });
9640
- }
9641
- }
9642
- // REMOVED: processAudioForAnimation() - now handled by FullFacePipeline
9643
- // The pipeline manages audio scheduling, LAM inference, and frame synchronization
9644
- // Frames are emitted via pipeline.on('full_frame_ready') event (see initPipeline())
9645
- /**
9646
- * Detect voice activity using Silero VAD
9647
- * Falls back to simple RMS if VAD not available
9648
- */
9649
- async detectVoiceActivity(audio) {
9650
- const float32 = audio instanceof Float32Array ? audio : int16ToFloat32(audio);
9651
- if (this.vad) {
9652
- const chunkSize = this.vad.getChunkSize();
9653
- for (let i = 0; i + chunkSize <= float32.length; i += chunkSize) {
9654
- const chunk = float32.slice(i, i + chunkSize);
9655
- const result = await this.vad.process(chunk);
9656
- if (result.isSpeech) {
9657
- return true;
9658
- }
9659
- }
9660
- return false;
9661
- }
9662
- let sum = 0;
9663
- for (let i = 0; i < float32.length; i++) {
9664
- sum += float32[i] * float32[i];
9665
- }
9666
- const rms = Math.sqrt(sum / float32.length);
9667
- return rms > 0.02;
9668
- }
9669
- base64ToArrayBuffer(base64) {
9670
- const binaryString = atob(base64);
9671
- const bytes = new Uint8Array(binaryString.length);
9672
- for (let i = 0; i < binaryString.length; i++) {
9673
- bytes[i] = binaryString.charCodeAt(i);
9674
- }
9675
- return bytes.buffer;
9676
- }
9677
- addToHistory(message) {
9678
- this.history.push(message);
9679
- this.emit("memory.updated", { messageCount: this.history.length });
9680
- }
9681
- handleDisconnect(event) {
9682
- this._isConnected = false;
9683
- if (event.code !== 1e3) {
9684
- if (this.wsReconnectAttempts < this.maxReconnectAttempts) {
9685
- this.wsReconnectAttempts++;
9686
- setTimeout(() => {
9687
- if (this.currentConfig) {
9688
- this.connect(this.currentConfig).catch(() => {
9689
- });
9690
- }
9691
- }, Math.pow(2, this.wsReconnectAttempts) * 1e3);
9692
- } else {
9693
- this.setState("error");
9694
- this.emit("connection.error", {
9695
- error: new Error("Max reconnection attempts reached"),
9696
- recoverable: false
9697
- });
9698
- }
9699
- }
9700
- this.emit("connection.closed", { reason: event.reason || "Connection closed" });
9701
- }
9702
- };
9703
-
9704
- // src/ai/orchestration/ConversationOrchestrator.ts
9705
- var ConversationSessionImpl = class {
9706
- constructor(config, adapter) {
9707
- this._history = [];
9708
- this._context = /* @__PURE__ */ new Map();
9709
- this.sessionId = config.sessionId;
9710
- this._config = config;
9711
- this._adapter = adapter;
9712
- this.createdAt = Date.now();
9713
- this._lastActivityAt = Date.now();
9714
- this._emotionController = new EmotionController();
9715
- if (config.emotion) {
9716
- this._emotionController.setPreset(config.emotion);
9717
- }
9718
- }
9719
- get adapter() {
9720
- return this._adapter;
9721
- }
9722
- get config() {
9723
- return this._config;
9724
- }
9725
- get state() {
9726
- return this._adapter.state;
9727
- }
9728
- get history() {
9729
- return [...this._history];
9730
- }
9731
- get emotion() {
9732
- return {};
9733
- }
9734
- get lastActivityAt() {
9735
- return this._lastActivityAt;
9736
- }
9737
- async start() {
9738
- await this._adapter.connect(this._config);
9739
- this._lastActivityAt = Date.now();
9740
- }
9741
- async end() {
9742
- await this._adapter.disconnect();
9743
- }
9744
- pushAudio(audio) {
9745
- this._adapter.pushAudio(audio);
9746
- this._lastActivityAt = Date.now();
9747
- }
9748
- async sendText(text) {
9749
- await this._adapter.sendText(text);
9750
- this._lastActivityAt = Date.now();
9751
- }
9752
- interrupt() {
9753
- this._adapter.interrupt();
9754
- this._lastActivityAt = Date.now();
9755
- }
9756
- setEmotion(emotion) {
9757
- this._emotionController.set(emotion);
9758
- }
9759
- addContext(key, value) {
9760
- this._context.set(key, value);
9761
- }
9762
- removeContext(key) {
9763
- this._context.delete(key);
9764
- }
9765
- getContext() {
9766
- return Object.fromEntries(this._context);
9767
- }
9768
- export() {
9769
- return {
9770
- sessionId: this.sessionId,
9771
- tenantId: this._config.tenant.tenantId,
9772
- characterId: this._config.tenant.characterId,
9773
- history: this._history,
9774
- context: Object.fromEntries(this._context),
9775
- emotion: this.emotion,
9776
- createdAt: this.createdAt,
9777
- lastActivityAt: this._lastActivityAt
9778
- };
9779
- }
9780
- import(snapshot) {
9781
- this._history = [...snapshot.history];
9782
- this._context = new Map(Object.entries(snapshot.context));
9783
- this._lastActivityAt = snapshot.lastActivityAt;
9784
- }
9785
- syncHistory() {
9786
- this._history = this._adapter.getHistory();
9787
- }
9788
- };
9789
- var ConversationOrchestrator = class extends EventEmitter {
9790
- constructor(config) {
9791
- super();
9792
- // Sessions per tenant
9793
- this.sessions = /* @__PURE__ */ new Map();
9794
- // Tenant configurations
9795
- this.tenants = /* @__PURE__ */ new Map();
9796
- // Health monitoring
9797
- this.healthCheckInterval = null;
9798
- this.HEALTH_CHECK_INTERVAL_MS = 3e4;
9799
- this.config = {
9800
- connectionTimeoutMs: 5e3,
9801
- maxRetries: 3,
9802
- ...config
9803
- };
9804
- this.adapter = new AgentCoreAdapter(config.adapter);
9805
- }
9806
- /**
9807
- * Register a tenant
9808
- */
9809
- registerTenant(tenant) {
9810
- this.tenants.set(tenant.tenantId, tenant);
9811
- }
9812
- /**
9813
- * Unregister a tenant
9814
- */
9815
- unregisterTenant(tenantId) {
9816
- this.tenants.delete(tenantId);
9817
- }
9818
- /**
9819
- * Get tenant config
9820
- */
9821
- getTenant(tenantId) {
9822
- return this.tenants.get(tenantId);
9823
- }
9824
- /**
9825
- * Create a new conversation session for a tenant
9826
- */
9827
- async createSession(tenantId, options = {}) {
9828
- const tenant = this.tenants.get(tenantId);
9829
- if (!tenant) {
9830
- throw new Error(`Tenant not found: ${tenantId}`);
9831
- }
9832
- const sessionId = options.sessionId || this.generateSessionId();
9833
- const sessionConfig = {
9834
- sessionId,
9835
- tenant,
9836
- systemPrompt: options.systemPrompt,
9837
- voice: options.voice,
9838
- emotion: options.emotion,
9839
- language: options.language
9840
- };
9841
- const session = new ConversationSessionImpl(sessionConfig, this.adapter);
9842
- this.sessions.set(sessionId, session);
9843
- this.forwardAdapterEvents(this.adapter, sessionId);
9844
- await session.start();
9845
- this.emit("session.created", { sessionId, tenantId });
9846
- return session;
9847
- }
9848
- /**
9849
- * End a session
9850
- */
9851
- async endSession(sessionId) {
9852
- const session = this.sessions.get(sessionId);
9853
- if (session) {
9854
- await session.end();
9855
- this.sessions.delete(sessionId);
9856
- this.emit("session.ended", { sessionId, reason: "Client requested" });
9857
- }
9858
- }
9859
- /**
9860
- * Get session by ID
9861
- */
9862
- getSession(sessionId) {
9863
- return this.sessions.get(sessionId);
9864
- }
9865
- /**
9866
- * Get all sessions for a tenant
9867
- */
9868
- getTenantSessions(tenantId) {
9869
- return Array.from(this.sessions.values()).filter((s) => s.config.tenant.tenantId === tenantId);
9870
- }
9871
- /**
9872
- * Start health monitoring
9873
- */
9874
- startHealthMonitoring() {
9875
- if (this.healthCheckInterval) return;
9876
- this.healthCheckInterval = setInterval(async () => {
9877
- await this.performHealthCheck();
9878
- }, this.HEALTH_CHECK_INTERVAL_MS);
9879
- }
9880
- /**
9881
- * Stop health monitoring
9882
- */
9883
- stopHealthMonitoring() {
9884
- if (this.healthCheckInterval) {
9885
- clearInterval(this.healthCheckInterval);
9886
- this.healthCheckInterval = null;
9887
- }
9888
- }
9889
- /**
9890
- * Dispose all resources
9891
- */
9892
- async dispose() {
9893
- this.stopHealthMonitoring();
9894
- const endPromises = Array.from(this.sessions.values()).map((s) => s.end());
9895
- await Promise.all(endPromises);
9896
- this.sessions.clear();
9897
- await this.adapter.disconnect();
9898
- }
9899
- // ==================== Private Methods ====================
9900
- generateSessionId() {
9901
- return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
9902
- }
9903
- forwardAdapterEvents(adapter, sessionId) {
9904
- const events = [
9905
- "state.change",
9906
- "user.speech.start",
9907
- "user.speech.end",
9908
- "user.transcript.partial",
9909
- "user.transcript.final",
9910
- "ai.thinking.start",
9911
- "ai.response.start",
9912
- "ai.response.chunk",
9913
- "ai.response.end",
9914
- "audio.output.chunk",
9915
- "audio.output.end",
9916
- "animation",
9917
- "memory.updated",
9918
- "connection.error",
9919
- "interruption.detected",
9920
- "interruption.handled"
9921
- ];
9922
- for (const event of events) {
9923
- adapter.on(event, (data) => {
9924
- const eventData = data;
9925
- this.emit(event, { ...eventData, sessionId });
9926
- });
9927
- }
9928
- }
9929
- async performHealthCheck() {
9930
- try {
9931
- await this.adapter.healthCheck();
9932
- } catch {
9933
- }
9934
- }
9935
- };
9936
-
9937
- // src/ai/tenancy/TenantManager.ts
9938
- var _TenantManager = class _TenantManager {
9939
- constructor() {
9940
- this.tenants = /* @__PURE__ */ new Map();
9941
- this.quotas = /* @__PURE__ */ new Map();
9942
- this.usage = /* @__PURE__ */ new Map();
9943
- this.tokenRefreshCallbacks = /* @__PURE__ */ new Map();
9944
- }
9945
- /**
9946
- * Register a tenant with quota
9947
- */
9948
- register(tenant, quota = _TenantManager.DEFAULT_QUOTA, tokenRefreshCallback) {
9949
- this.tenants.set(tenant.tenantId, tenant);
9950
- this.quotas.set(tenant.tenantId, quota);
9951
- this.usage.set(tenant.tenantId, {
9952
- currentSessions: 0,
9953
- requestsThisMinute: 0,
9954
- tokensUsed: 0,
9955
- audioMinutesToday: 0,
9956
- lastMinuteReset: Date.now(),
9957
- lastDailyReset: Date.now()
9958
- });
9959
- if (tokenRefreshCallback) {
9960
- this.tokenRefreshCallbacks.set(tenant.tenantId, tokenRefreshCallback);
9961
- }
9962
- }
9963
- /**
9964
- * Unregister a tenant
9965
- */
9966
- unregister(tenantId) {
9967
- this.tenants.delete(tenantId);
9968
- this.quotas.delete(tenantId);
9969
- this.usage.delete(tenantId);
9970
- this.tokenRefreshCallbacks.delete(tenantId);
9971
- }
9972
- /**
9973
- * Get tenant config
9974
- */
9975
- get(tenantId) {
9976
- return this.tenants.get(tenantId);
9977
- }
9978
- /**
9979
- * Check if tenant exists
9980
- */
9981
- has(tenantId) {
9982
- return this.tenants.has(tenantId);
9983
- }
9984
- /**
9985
- * Get all tenant IDs
9986
- */
9987
- getTenantIds() {
9988
- return Array.from(this.tenants.keys());
9989
- }
9990
- /**
9991
- * Check if tenant can create new session
9992
- */
9993
- canCreateSession(tenantId) {
9994
- const quota = this.quotas.get(tenantId);
9995
- const usage = this.usage.get(tenantId);
9996
- if (!quota || !usage) return false;
9997
- return usage.currentSessions < quota.maxSessions;
9998
- }
9999
- /**
10000
- * Check if tenant can make request
10001
- */
10002
- canMakeRequest(tenantId) {
10003
- const quota = this.quotas.get(tenantId);
10004
- const usage = this.usage.get(tenantId);
10005
- if (!quota || !usage) return false;
10006
- this.checkMinuteReset(tenantId);
10007
- return usage.requestsThisMinute < quota.requestsPerMinute;
10008
- }
10009
- /**
10010
- * Check if tenant can use audio
10011
- */
10012
- canUseAudio(tenantId, minutes) {
10013
- const quota = this.quotas.get(tenantId);
10014
- const usage = this.usage.get(tenantId);
10015
- if (!quota || !usage) return false;
10016
- this.checkDailyReset(tenantId);
10017
- return usage.audioMinutesToday + minutes <= quota.maxAudioMinutesPerDay;
10018
- }
10019
- /**
10020
- * Increment session count
10021
- */
10022
- incrementSessions(tenantId) {
10023
- const usage = this.usage.get(tenantId);
10024
- if (usage) {
10025
- usage.currentSessions++;
10026
- }
10027
- }
10028
- /**
10029
- * Decrement session count
10030
- */
10031
- decrementSessions(tenantId) {
10032
- const usage = this.usage.get(tenantId);
10033
- if (usage && usage.currentSessions > 0) {
10034
- usage.currentSessions--;
10035
- }
10036
- }
10037
- /**
10038
- * Record a request
10039
- */
10040
- recordRequest(tenantId) {
10041
- const usage = this.usage.get(tenantId);
10042
- if (usage) {
10043
- this.checkMinuteReset(tenantId);
10044
- usage.requestsThisMinute++;
10045
- }
10046
- }
10047
- /**
10048
- * Record token usage
10049
- */
10050
- recordTokens(tenantId, tokens) {
10051
- const usage = this.usage.get(tenantId);
10052
- if (usage) {
10053
- usage.tokensUsed += tokens;
10054
- }
10055
- }
10056
- /**
10057
- * Record audio usage
10058
- */
10059
- recordAudioMinutes(tenantId, minutes) {
10060
- const usage = this.usage.get(tenantId);
10061
- if (usage) {
10062
- this.checkDailyReset(tenantId);
10063
- usage.audioMinutesToday += minutes;
10064
- }
10065
- }
10066
- /**
10067
- * Get fresh auth token for tenant
10068
- */
10069
- async getAuthToken(tenantId) {
10070
- const tenant = this.tenants.get(tenantId);
10071
- if (!tenant) {
10072
- throw new Error(`Tenant not found: ${tenantId}`);
10073
- }
10074
- const callback = this.tokenRefreshCallbacks.get(tenantId);
10075
- if (callback) {
10076
- const token = await callback();
10077
- tenant.credentials.authToken = token;
10078
- return token;
10079
- }
10080
- if (tenant.credentials.authToken) {
10081
- return tenant.credentials.authToken;
10082
- }
10083
- throw new Error(`No auth token available for tenant: ${tenantId}`);
10084
- }
10085
- /**
10086
- * Update tenant credentials
10087
- */
10088
- updateCredentials(tenantId, credentials) {
10089
- const tenant = this.tenants.get(tenantId);
10090
- if (tenant) {
10091
- tenant.credentials = { ...tenant.credentials, ...credentials };
10092
- }
10093
- }
10094
- /**
10095
- * Get usage stats for tenant
10096
- */
10097
- getUsage(tenantId) {
10098
- return this.usage.get(tenantId);
10099
- }
10100
- /**
10101
- * Get quota for tenant
10102
- */
10103
- getQuota(tenantId) {
10104
- return this.quotas.get(tenantId);
10105
- }
10106
- /**
10107
- * Update quota for tenant
10108
- */
10109
- updateQuota(tenantId, quota) {
10110
- const existing = this.quotas.get(tenantId);
10111
- if (existing) {
10112
- this.quotas.set(tenantId, { ...existing, ...quota });
10113
- }
10114
- }
10115
- /**
10116
- * Reset all usage stats for a tenant
10117
- */
10118
- resetUsage(tenantId) {
10119
- const usage = this.usage.get(tenantId);
10120
- if (usage) {
10121
- usage.requestsThisMinute = 0;
10122
- usage.tokensUsed = 0;
10123
- usage.audioMinutesToday = 0;
10124
- usage.lastMinuteReset = Date.now();
10125
- usage.lastDailyReset = Date.now();
10126
- }
10127
- }
10128
- // ==================== Private Methods ====================
10129
- checkMinuteReset(tenantId) {
10130
- const usage = this.usage.get(tenantId);
10131
- if (!usage) return;
10132
- const now = Date.now();
10133
- if (now - usage.lastMinuteReset >= 6e4) {
10134
- usage.requestsThisMinute = 0;
10135
- usage.lastMinuteReset = now;
10136
- }
10137
- }
10138
- checkDailyReset(tenantId) {
10139
- const usage = this.usage.get(tenantId);
10140
- if (!usage) return;
10141
- const now = Date.now();
10142
- const MS_PER_DAY = 24 * 60 * 60 * 1e3;
10143
- if (now - usage.lastDailyReset >= MS_PER_DAY) {
10144
- usage.audioMinutesToday = 0;
10145
- usage.lastDailyReset = now;
10146
- }
10147
- }
10148
- };
10149
- /**
10150
- * Default quota for new tenants
10151
- */
10152
- _TenantManager.DEFAULT_QUOTA = {
10153
- maxSessions: 10,
10154
- requestsPerMinute: 60,
10155
- maxTokensPerConversation: 1e5,
10156
- maxAudioMinutesPerDay: 60
10157
- };
10158
- var TenantManager = _TenantManager;
10159
-
10160
- // src/ai/utils/AudioSyncManager.ts
10161
- var AudioSyncManager = class extends EventEmitter {
10162
- constructor(config = {}) {
10163
- super();
10164
- this.bufferPosition = 0;
10165
- this.playbackQueue = [];
10166
- this.isPlaying = false;
10167
- this.audioContext = null;
10168
- this.playbackStartTime = 0;
10169
- this.samplesPlayed = 0;
10170
- this.config = {
10171
- sampleRate: 16e3,
10172
- bufferSize: 16640,
10173
- overlapSize: 4160,
10174
- maxDriftMs: 100,
10175
- ...config
10176
- };
10177
- this.audioBuffer = new Float32Array(this.config.bufferSize);
10178
- }
10179
- /**
10180
- * Initialize audio context
10181
- */
10182
- async initialize() {
10183
- if (!this.audioContext) {
10184
- this.audioContext = new AudioContext({ sampleRate: this.config.sampleRate });
10185
- }
10186
- if (this.audioContext.state === "suspended") {
10187
- await this.audioContext.resume();
10188
- }
10189
- }
10190
- /**
10191
- * Push audio chunk for processing and playback
10192
- */
10193
- pushAudio(audio) {
10194
- this.playbackQueue.push(audio);
10195
- this.bufferForInference(audio);
10196
- if (!this.isPlaying && this.playbackQueue.length > 0) {
10197
- this.startPlayback();
10198
- }
10199
- }
10200
- /**
10201
- * Buffer audio for inference
10202
- */
10203
- bufferForInference(audio) {
10204
- let offset = 0;
10205
- while (offset < audio.length) {
10206
- const remaining = this.config.bufferSize - this.bufferPosition;
10207
- const toCopy = Math.min(remaining, audio.length - offset);
10208
- this.audioBuffer.set(audio.subarray(offset, offset + toCopy), this.bufferPosition);
10209
- this.bufferPosition += toCopy;
10210
- offset += toCopy;
10211
- if (this.bufferPosition >= this.config.bufferSize) {
10212
- this.emit("buffer.ready", { audio: new Float32Array(this.audioBuffer) });
10213
- const overlapStart = this.config.bufferSize - this.config.overlapSize;
10214
- this.audioBuffer.copyWithin(0, overlapStart);
10215
- this.bufferPosition = this.config.overlapSize;
10216
- }
10217
- }
10218
- }
10219
- /**
10220
- * Start audio playback
10221
- */
10222
- async startPlayback() {
10223
- if (!this.audioContext || this.isPlaying) return;
10224
- this.isPlaying = true;
10225
- this.playbackStartTime = this.audioContext.currentTime;
10226
- this.samplesPlayed = 0;
10227
- this.emit("playback.start", {});
10228
- await this.processPlaybackQueue();
10229
- }
10230
- /**
10231
- * Process playback queue
10232
- */
10233
- async processPlaybackQueue() {
10234
- if (!this.audioContext) return;
10235
- while (this.playbackQueue.length > 0) {
10236
- const audio = this.playbackQueue.shift();
10237
- const buffer = this.audioContext.createBuffer(1, audio.length, this.config.sampleRate);
10238
- buffer.copyToChannel(audio, 0);
10239
- const source = this.audioContext.createBufferSource();
10240
- source.buffer = buffer;
10241
- source.connect(this.audioContext.destination);
10242
- const playTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
10243
- source.start(playTime);
10244
- this.samplesPlayed += audio.length;
10245
- this.checkDrift();
10246
- await new Promise((resolve) => {
10247
- source.onended = resolve;
10248
- });
10249
- }
10250
- this.isPlaying = false;
10251
- this.emit("playback.end", {});
10252
- }
10253
- /**
10254
- * Check for audio/animation drift
10255
- */
10256
- checkDrift() {
10257
- if (!this.audioContext) return;
10258
- const expectedTime = this.playbackStartTime + this.samplesPlayed / this.config.sampleRate;
10259
- const actualTime = this.audioContext.currentTime;
10260
- const driftMs = (actualTime - expectedTime) * 1e3;
10261
- if (Math.abs(driftMs) > this.config.maxDriftMs) {
10262
- this.emit("sync.drift", { driftMs });
10263
- }
10264
- }
10265
- /**
10266
- * Clear playback queue
10267
- */
10268
- clearQueue() {
10269
- this.playbackQueue = [];
10270
- this.bufferPosition = 0;
10271
- this.audioBuffer.fill(0);
10272
- }
10273
- /**
10274
- * Stop playback
10275
- */
10276
- stop() {
10277
- this.clearQueue();
10278
- this.isPlaying = false;
10279
- }
10280
- /**
10281
- * Get current playback position in seconds
10282
- */
10283
- getPlaybackPosition() {
10284
- if (!this.audioContext) return 0;
10285
- return this.audioContext.currentTime - this.playbackStartTime;
10286
- }
10287
- /**
10288
- * Check if currently playing
10289
- */
10290
- getIsPlaying() {
10291
- return this.isPlaying;
10292
- }
10293
- /**
10294
- * Dispose resources
10295
- */
10296
- dispose() {
10297
- this.stop();
10298
- this.audioContext?.close();
10299
- this.audioContext = null;
10300
- }
10301
- };
10302
-
10303
- // src/ai/utils/InterruptionHandler.ts
10304
- var InterruptionHandler = class extends EventEmitter {
10305
- constructor(config = {}) {
10306
- super();
10307
- this.isSpeaking = false;
10308
- this.speechStartTime = 0;
10309
- this.lastSpeechTime = 0;
10310
- this.silenceTimer = null;
10311
- this.aiIsSpeaking = false;
10312
- // Debouncing: only emit one interruption per speech session
10313
- this.interruptionTriggeredThisSession = false;
10314
- this.config = {
10315
- vadThreshold: 0.5,
10316
- // Silero VAD default
10317
- minSpeechDurationMs: 200,
10318
- // Google/Amazon barge-in standard
10319
- silenceTimeoutMs: 500,
10320
- // OpenAI Realtime API standard
10321
- enabled: true,
10322
- ...config
10323
- };
10324
- }
10325
- /**
10326
- * Process VAD result for interruption detection
10327
- * @param vadProbability - Speech probability from VAD (0-1)
10328
- * @param audioEnergy - Optional RMS energy for logging (default: 0)
10329
- */
10330
- processVADResult(vadProbability, audioEnergy = 0) {
10331
- if (!this.config.enabled) return;
10332
- if (vadProbability > this.config.vadThreshold) {
10333
- this.onSpeechDetected(audioEnergy || vadProbability);
10334
- } else {
10335
- this.onSilenceDetected();
10336
- }
10337
- }
10338
- /**
10339
- * Notify that AI started speaking
10340
- */
10341
- setAISpeaking(speaking) {
10342
- this.aiIsSpeaking = speaking;
10343
- }
10344
- /**
10345
- * Enable/disable interruption detection
10346
- */
10347
- setEnabled(enabled) {
10348
- this.config.enabled = enabled;
10349
- if (!enabled) {
10350
- this.reset();
10351
- }
10352
- }
10353
- /**
10354
- * Update configuration
10355
- */
10356
- updateConfig(config) {
10357
- this.config = { ...this.config, ...config };
10358
- }
10359
- /**
10360
- * Reset state
10361
- */
10362
- reset() {
10363
- this.isSpeaking = false;
10364
- this.speechStartTime = 0;
10365
- this.lastSpeechTime = 0;
10366
- this.interruptionTriggeredThisSession = false;
10367
- if (this.silenceTimer) {
10368
- clearTimeout(this.silenceTimer);
10369
- this.silenceTimer = null;
10370
- }
10371
- }
10372
- /**
10373
- * Get current state
10374
- */
10375
- getState() {
10376
- return {
10377
- isSpeaking: this.isSpeaking,
10378
- speechDurationMs: this.isSpeaking ? Date.now() - this.speechStartTime : 0
10379
- };
10380
- }
10381
- // ==================== Private Methods ====================
10382
- onSpeechDetected(rms) {
10383
- const now = Date.now();
10384
- this.lastSpeechTime = now;
10385
- if (this.silenceTimer) {
10386
- clearTimeout(this.silenceTimer);
10387
- this.silenceTimer = null;
10388
- }
10389
- if (!this.isSpeaking) {
10390
- this.isSpeaking = true;
10391
- this.speechStartTime = now;
10392
- this.emit("speech.detected", { rms });
10393
- }
10394
- if (this.aiIsSpeaking && !this.interruptionTriggeredThisSession) {
10395
- const speechDuration = now - this.speechStartTime;
10396
- if (speechDuration >= this.config.minSpeechDurationMs) {
10397
- this.interruptionTriggeredThisSession = true;
10398
- this.emit("interruption.triggered", { rms, durationMs: speechDuration });
10399
- }
10400
- }
10401
- }
10402
- onSilenceDetected() {
10403
- if (!this.isSpeaking) return;
10404
- if (!this.silenceTimer) {
10405
- this.silenceTimer = setTimeout(() => {
10406
- const durationMs = this.lastSpeechTime - this.speechStartTime;
10407
- this.isSpeaking = false;
10408
- this.silenceTimer = null;
10409
- this.interruptionTriggeredThisSession = false;
10410
- this.emit("speech.ended", { durationMs });
10411
- }, this.config.silenceTimeoutMs);
10412
- }
10413
- }
10414
- };
10415
-
10416
9282
  // src/animation/types.ts
10417
9283
  var DEFAULT_ANIMATION_CONFIG = {
10418
9284
  initialState: "idle",