@estuary-ai/sdk 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -120,6 +120,15 @@ import { parseActions } from '@estuary-ai/sdk';
120
120
  const { actions, cleanText } = parseActions(rawBotText);
121
121
  ```
122
122
 
123
+ ### Character Info
124
+
125
+ Fetch character details (name, avatar, 3D model URLs):
126
+
127
+ ```typescript
128
+ const character = await client.getCharacter();
129
+ console.log(character.name, character.avatar);
130
+ ```
131
+
123
132
  ### Memory & Knowledge Graph
124
133
 
125
134
  ```typescript
@@ -237,9 +246,21 @@ interface EstuaryConfig {
237
246
  voiceTransport?: 'websocket' | 'livekit' | 'auto'; // Default: 'auto'
238
247
  realtimeMemory?: boolean; // Enable real-time memory extraction events. Default: false
239
248
  suppressMicDuringPlayback?: boolean; // Mute mic while bot audio plays (software AEC). Default: false
249
+ autoInterruptOnSpeech?: boolean; // Interrupt bot audio when user speaks. Default: true
240
250
  }
241
251
  ```
242
252
 
253
+ ## Runtime Properties
254
+
255
+ ```typescript
256
+ client.connectionState // ConnectionState enum (Disconnected, Connecting, Connected, ...)
257
+ client.isConnected // boolean shorthand
258
+ client.isVoiceActive // true while voice session is running
259
+ client.isMuted // current mute state
260
+ client.suppressMicDuringPlayback // get/set at runtime without reconnecting
261
+ client.session // SessionInfo | null after connect
262
+ ```
263
+
243
264
  ## Exports
244
265
 
245
266
  Key exports:
@@ -261,6 +282,7 @@ import { parseActions } from '@estuary-ai/sdk';
261
282
  import type {
262
283
  EstuaryConfig,
263
284
  SessionInfo,
285
+ CharacterInfo,
264
286
  BotResponse,
265
287
  BotVoice,
266
288
  SttResponse,
package/dist/index.d.mts CHANGED
@@ -147,6 +147,8 @@ type EstuaryEventMap = {
147
147
  livekitDisconnected: () => void;
148
148
  audioPlaybackStarted: (messageId: string) => void;
149
149
  audioPlaybackComplete: (messageId: string) => void;
150
+ /** Bot audio level 0.0–1.0, emitted during playback for both transports. */
151
+ botAudioLevel: (level: number) => void;
150
152
  memoryUpdated: (event: MemoryUpdatedEvent) => void;
151
153
  };
152
154
  interface VoiceManager {
@@ -155,6 +157,10 @@ interface VoiceManager {
155
157
  toggleMute(): void;
156
158
  /** Suppress audio sending (software AEC). No-op if not supported. */
157
159
  setSuppressed?(suppressed: boolean): void;
160
+ /** Set callback for speaking state from participant attributes (LiveKit only). */
161
+ setSpeakingStateCallback?(cb: (speaking: boolean) => void): void;
162
+ /** Set callback for audio level updates (0-1) during bot speech. */
163
+ setAudioLevelCallback?(cb: (level: number) => void): void;
158
164
  readonly isMuted: boolean;
159
165
  readonly isActive: boolean;
160
166
  dispose(): void;
@@ -368,6 +374,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
368
374
  private forwardSocketEvents;
369
375
  private handleBotResponse;
370
376
  private handleBotVoice;
377
+ /** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
378
+ private computeAudioLevel;
371
379
  private maybeAutoInterrupt;
372
380
  }
373
381
 
package/dist/index.d.ts CHANGED
@@ -147,6 +147,8 @@ type EstuaryEventMap = {
147
147
  livekitDisconnected: () => void;
148
148
  audioPlaybackStarted: (messageId: string) => void;
149
149
  audioPlaybackComplete: (messageId: string) => void;
150
+ /** Bot audio level 0.0–1.0, emitted during playback for both transports. */
151
+ botAudioLevel: (level: number) => void;
150
152
  memoryUpdated: (event: MemoryUpdatedEvent) => void;
151
153
  };
152
154
  interface VoiceManager {
@@ -155,6 +157,10 @@ interface VoiceManager {
155
157
  toggleMute(): void;
156
158
  /** Suppress audio sending (software AEC). No-op if not supported. */
157
159
  setSuppressed?(suppressed: boolean): void;
160
+ /** Set callback for speaking state from participant attributes (LiveKit only). */
161
+ setSpeakingStateCallback?(cb: (speaking: boolean) => void): void;
162
+ /** Set callback for audio level updates (0-1) during bot speech. */
163
+ setAudioLevelCallback?(cb: (level: number) => void): void;
158
164
  readonly isMuted: boolean;
159
165
  readonly isActive: boolean;
160
166
  dispose(): void;
@@ -368,6 +374,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
368
374
  private forwardSocketEvents;
369
375
  private handleBotResponse;
370
376
  private handleBotVoice;
377
+ /** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
378
+ private computeAudioLevel;
371
379
  private maybeAutoInterrupt;
372
380
  }
373
381
 
package/dist/index.js CHANGED
@@ -247,6 +247,13 @@ var init_livekit_voice = __esm({
247
247
  // livekit-client Room (dynamically imported)
248
248
  _isMuted = false;
249
249
  _isActive = false;
250
+ speakingStateCallback = null;
251
+ audioLevelCallback = null;
252
+ // Audio analyser (via livekit-client's createAudioAnalyser)
253
+ calculateVolume = null;
254
+ analyserCleanup = null;
255
+ audioLevelPollTimer = null;
256
+ _isBotSpeaking = false;
250
257
  constructor(socketManager, logger) {
251
258
  this.socketManager = socketManager;
252
259
  this.logger = logger;
@@ -257,6 +264,12 @@ var init_livekit_voice = __esm({
257
264
  get isActive() {
258
265
  return this._isActive;
259
266
  }
267
+ setSpeakingStateCallback(cb) {
268
+ this.speakingStateCallback = cb;
269
+ }
270
+ setAudioLevelCallback(cb) {
271
+ this.audioLevelCallback = cb;
272
+ }
260
273
  async start() {
261
274
  if (this._isActive) {
262
275
  throw new exports.EstuaryError("VOICE_ALREADY_ACTIVE" /* VOICE_ALREADY_ACTIVE */, "Voice is already active");
@@ -296,16 +309,24 @@ var init_livekit_voice = __esm({
296
309
  }
297
310
  audioElement.play().catch(() => {
298
311
  });
312
+ this.setupAnalyser(track);
313
+ if (this._isBotSpeaking) {
314
+ setTimeout(() => this.startAudioLevelPolling(), 50);
315
+ }
299
316
  }
300
317
  });
301
318
  this.room.on(RoomEvent.TrackUnsubscribed, (track) => {
302
319
  if (track.kind === Track.Kind.Audio) {
320
+ this.teardownAnalyser();
303
321
  track.detach().forEach((el) => el.remove());
304
322
  }
305
323
  });
306
324
  this.room.on(RoomEvent.Disconnected, () => {
307
325
  this.logger.debug("LiveKit room disconnected");
308
326
  this._isActive = false;
327
+ this._isBotSpeaking = false;
328
+ this.teardownAnalyser();
329
+ this.speakingStateCallback?.(false);
309
330
  });
310
331
  try {
311
332
  await this.room.connect(tokenData.url, tokenData.token);
@@ -319,6 +340,23 @@ var init_livekit_voice = __esm({
319
340
  err
320
341
  );
321
342
  }
343
+ this.room.on(
344
+ RoomEvent.ParticipantAttributesChanged,
345
+ (changedAttributes, participant) => {
346
+ if (participant === this.room?.localParticipant) return;
347
+ const state = changedAttributes["estuary.state"];
348
+ if (state === "speaking") {
349
+ this._isBotSpeaking = true;
350
+ this.speakingStateCallback?.(true);
351
+ this.startAudioLevelPolling();
352
+ } else if (state === "idle") {
353
+ this._isBotSpeaking = false;
354
+ this.stopAudioLevelPolling();
355
+ this.speakingStateCallback?.(false);
356
+ this.audioLevelCallback?.(0);
357
+ }
358
+ }
359
+ );
322
360
  try {
323
361
  await this.room.localParticipant.setMicrophoneEnabled(true);
324
362
  this.logger.debug("Microphone enabled");
@@ -342,6 +380,9 @@ var init_livekit_voice = __esm({
342
380
  this.socketManager.emitEvent("livekit_leave");
343
381
  } catch {
344
382
  }
383
+ this._isBotSpeaking = false;
384
+ this.teardownAnalyser();
385
+ this.speakingStateCallback?.(false);
345
386
  if (this.room) {
346
387
  for (const [, publication] of this.room.localParticipant.trackPublications) {
347
388
  if (publication.track) {
@@ -362,6 +403,10 @@ var init_livekit_voice = __esm({
362
403
  this.logger.debug("Mute toggled:", this._isMuted);
363
404
  }
364
405
  dispose() {
406
+ this.speakingStateCallback = null;
407
+ this.audioLevelCallback = null;
408
+ this._isBotSpeaking = false;
409
+ this.teardownAnalyser();
365
410
  if (this.room) {
366
411
  this.room.disconnect();
367
412
  this.room = null;
@@ -369,6 +414,53 @@ var init_livekit_voice = __esm({
369
414
  this._isActive = false;
370
415
  this._isMuted = false;
371
416
  }
417
+ // ─── Audio Analyser (livekit-client built-in) ───────────────────
418
+ async setupAnalyser(track) {
419
+ this.teardownAnalyser();
420
+ try {
421
+ const { createAudioAnalyser } = await import('livekit-client');
422
+ const { analyser, calculateVolume, cleanup } = createAudioAnalyser(track, {
423
+ fftSize: 256,
424
+ smoothingTimeConstant: 0.3
425
+ });
426
+ if (analyser.context.state === "suspended") {
427
+ await analyser.context.resume();
428
+ }
429
+ this.calculateVolume = calculateVolume;
430
+ this.analyserCleanup = cleanup;
431
+ this.logger.debug("Audio analyser created for bot track");
432
+ } catch (err) {
433
+ this.logger.debug("Failed to create audio analyser:", err);
434
+ }
435
+ }
436
+ teardownAnalyser() {
437
+ this.stopAudioLevelPolling();
438
+ if (this.analyserCleanup) {
439
+ this.analyserCleanup().catch(() => {
440
+ });
441
+ this.analyserCleanup = null;
442
+ }
443
+ this.calculateVolume = null;
444
+ }
445
+ startAudioLevelPolling() {
446
+ if (this.audioLevelPollTimer !== null) return;
447
+ if (!this.calculateVolume) return;
448
+ this.audioLevelPollTimer = setInterval(() => {
449
+ if (!this.calculateVolume) {
450
+ this.stopAudioLevelPolling();
451
+ return;
452
+ }
453
+ const vol = this.calculateVolume();
454
+ this.audioLevelCallback?.(vol);
455
+ }, 33);
456
+ }
457
+ stopAudioLevelPolling() {
458
+ if (this.audioLevelPollTimer !== null) {
459
+ clearInterval(this.audioLevelPollTimer);
460
+ this.audioLevelPollTimer = null;
461
+ }
462
+ }
463
+ // ─── Private ────────────────────────────────────────────────────
372
464
  requestToken() {
373
465
  return new Promise((resolve, reject) => {
374
466
  const timeout = setTimeout(() => {
@@ -1277,6 +1369,7 @@ var EstuaryClient = class extends TypedEventEmitter {
1277
1369
  }
1278
1370
  } else if (event.type === "complete") {
1279
1371
  this.emit("audioPlaybackComplete", event.messageId);
1372
+ this.emit("botAudioLevel", 0);
1280
1373
  this.notifyAudioPlaybackComplete(event.messageId);
1281
1374
  if (this.config.suppressMicDuringPlayback) {
1282
1375
  this.voiceManager?.setSuppressed?.(false);
@@ -1285,6 +1378,24 @@ var EstuaryClient = class extends TypedEventEmitter {
1285
1378
  });
1286
1379
  }
1287
1380
  await this.voiceManager.start();
1381
+ this.voiceManager.setSpeakingStateCallback?.((speaking) => {
1382
+ if (speaking) {
1383
+ this.emit("audioPlaybackStarted", "livekit-audio");
1384
+ if (this.config.suppressMicDuringPlayback) {
1385
+ this.voiceManager?.setSuppressed?.(true);
1386
+ }
1387
+ } else {
1388
+ this.emit("audioPlaybackComplete", "livekit-audio");
1389
+ this.emit("botAudioLevel", 0);
1390
+ this.notifyAudioPlaybackComplete("livekit-audio");
1391
+ if (this.config.suppressMicDuringPlayback) {
1392
+ this.voiceManager?.setSuppressed?.(false);
1393
+ }
1394
+ }
1395
+ });
1396
+ this.voiceManager.setAudioLevelCallback?.((level) => {
1397
+ this.emit("botAudioLevel", level);
1398
+ });
1288
1399
  this.emit("voiceStarted");
1289
1400
  }
1290
1401
  /** Stop voice input */
@@ -1383,7 +1494,31 @@ var EstuaryClient = class extends TypedEventEmitter {
1383
1494
  }
1384
1495
  handleBotVoice(voice) {
1385
1496
  this.emit("botVoice", voice);
1386
- this.audioPlayer?.enqueue(voice);
1497
+ if (voice.audio) {
1498
+ this.audioPlayer?.enqueue(voice);
1499
+ this.emit("botAudioLevel", this.computeAudioLevel(voice.audio));
1500
+ }
1501
+ }
1502
+ /** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
1503
+ computeAudioLevel(base64Audio) {
1504
+ try {
1505
+ const binaryStr = atob(base64Audio);
1506
+ const len = binaryStr.length;
1507
+ const step = 16;
1508
+ let sum = 0;
1509
+ let count = 0;
1510
+ for (let i = 0; i + 1 < len; i += step) {
1511
+ const sample = binaryStr.charCodeAt(i) | binaryStr.charCodeAt(i + 1) << 8;
1512
+ const signed = sample > 32767 ? sample - 65536 : sample;
1513
+ const normalized = signed / 32768;
1514
+ sum += normalized * normalized;
1515
+ count++;
1516
+ }
1517
+ if (count === 0) return 0;
1518
+ return Math.min(1, Math.sqrt(sum / count) * 5);
1519
+ } catch {
1520
+ return 0;
1521
+ }
1387
1522
  }
1388
1523
  maybeAutoInterrupt(stt) {
1389
1524
  if ((this.config.autoInterruptOnSpeech ?? true) === false) return;