@estuary-ai/sdk 0.1.31 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -26976,11 +26976,8 @@ var init_websocket_voice = __esm({
26976
26976
  this.logger.debug("WebSocket voice stopped");
26977
26977
  }
26978
26978
  toggleMute() {
26979
- if (!this._isActive || !this.mediaStream) return;
26979
+ if (!this._isActive) return;
26980
26980
  this._isMuted = !this._isMuted;
26981
- for (const track of this.mediaStream.getAudioTracks()) {
26982
- track.enabled = !this._isMuted;
26983
- }
26984
26981
  this.logger.debug("Mute toggled:", this._isMuted);
26985
26982
  }
26986
26983
  setSuppressed(suppressed) {
@@ -27038,6 +27035,7 @@ var init_livekit_voice = __esm({
27038
27035
  room = null;
27039
27036
  // livekit-client Room (dynamically imported)
27040
27037
  _isMuted = false;
27038
+ _isSuppressed = false;
27041
27039
  _isActive = false;
27042
27040
  speakingStateCallback = null;
27043
27041
  audioLevelCallback = null;
@@ -27204,14 +27202,20 @@ var init_livekit_voice = __esm({
27204
27202
  }
27205
27203
  this._isActive = false;
27206
27204
  this._isMuted = false;
27205
+ this._isSuppressed = false;
27207
27206
  this.logger.debug("LiveKit voice stopped");
27208
27207
  }
27209
27208
  toggleMute() {
27210
27209
  if (!this._isActive || !this.room) return;
27211
27210
  this._isMuted = !this._isMuted;
27212
- this.room.localParticipant.setMicrophoneEnabled(!this._isMuted);
27211
+ this.updateTrackEnabled();
27213
27212
  this.logger.debug("Mute toggled:", this._isMuted);
27214
27213
  }
27214
+ setSuppressed(suppressed) {
27215
+ this._isSuppressed = suppressed;
27216
+ this.updateTrackEnabled();
27217
+ this.logger.debug("Audio suppression:", suppressed ? "on" : "off");
27218
+ }
27215
27219
  dispose() {
27216
27220
  this.speakingStateCallback = null;
27217
27221
  this.audioLevelCallback = null;
@@ -27225,6 +27229,19 @@ var init_livekit_voice = __esm({
27225
27229
  }
27226
27230
  this._isActive = false;
27227
27231
  this._isMuted = false;
27232
+ this._isSuppressed = false;
27233
+ }
27234
+ /** Mute/unmute the local audio track directly via MediaStreamTrack.enabled.
27235
+ * This avoids setMicrophoneEnabled() which publishes/unpublishes the track
27236
+ * through the LiveKit server and can fail with engine timeout errors. */
27237
+ updateTrackEnabled() {
27238
+ if (!this.room) return;
27239
+ const enabled = !this._isMuted && !this._isSuppressed;
27240
+ for (const [, publication] of this.room.localParticipant.audioTrackPublications) {
27241
+ if (publication.track?.mediaStreamTrack) {
27242
+ publication.track.mediaStreamTrack.enabled = enabled;
27243
+ }
27244
+ }
27228
27245
  }
27229
27246
  // ─── Audio Level Polling (participant.audioLevel) ───────────────
27230
27247
  startAudioLevelPolling() {
@@ -30751,7 +30768,8 @@ function toBotVoice(wire) {
30751
30768
  audio: wire.audio,
30752
30769
  messageId: wire.message_id,
30753
30770
  chunkIndex: wire.chunk_index,
30754
- isFinal: wire.is_final
30771
+ isFinal: wire.is_final ?? false,
30772
+ isLivekit: wire.is_livekit
30755
30773
  };
30756
30774
  }
30757
30775
  function toSttResponse(wire) {
@@ -30849,13 +30867,18 @@ var SocketManager = class extends TypedEventEmitter {
30849
30867
  let settled = false;
30850
30868
  const onConnect = () => {
30851
30869
  this.logger.debug("Socket connected, authenticating...");
30852
- this.socket.emit("authenticate", {
30853
- api_key: this.config.apiKey,
30870
+ const authPayload = {
30854
30871
  character_id: this.config.characterId,
30855
30872
  player_id: this.config.playerId,
30856
30873
  audio_sample_rate: this.config.audioSampleRate ?? 16e3,
30857
30874
  realtime_memory: this.config.realtimeMemory ?? false
30858
- });
30875
+ };
30876
+ if (this.config.sessionToken) {
30877
+ authPayload.api_key = this.config.sessionToken;
30878
+ } else if (this.config.apiKey) {
30879
+ authPayload.api_key = this.config.apiKey;
30880
+ }
30881
+ this.socket.emit("authenticate", authPayload);
30859
30882
  };
30860
30883
  const onSessionInfo = (data) => {
30861
30884
  this.sessionInfo = toSessionInfo(data);
@@ -31184,6 +31207,7 @@ var AudioPlayer = class _AudioPlayer {
31184
31207
  _isCleared = false;
31185
31208
  _interruptedMessageId = null;
31186
31209
  _drainTimer = null;
31210
+ /** How long to wait for more chunks before declaring playback complete (ms) */
31187
31211
  static DRAIN_DELAY_MS = 300;
31188
31212
  constructor(sampleRate, onEvent) {
31189
31213
  this.sampleRate = sampleRate;
@@ -31202,6 +31226,7 @@ var AudioPlayer = class _AudioPlayer {
31202
31226
  this._interruptedMessageId = id;
31203
31227
  }
31204
31228
  enqueue(voice) {
31229
+ if (!voice.audio) return;
31205
31230
  if (voice.messageId === this._interruptedMessageId) return;
31206
31231
  if (this._interruptedMessageId && voice.messageId !== this._interruptedMessageId) {
31207
31232
  this._interruptedMessageId = null;
@@ -31215,7 +31240,9 @@ var AudioPlayer = class _AudioPlayer {
31215
31240
  const buffer = ctx.createBuffer(1, float32.length, this.sampleRate);
31216
31241
  buffer.getChannelData(0).set(float32);
31217
31242
  this.queue.push({ buffer, messageId: voice.messageId });
31218
- if (!this.isPlaying) {
31243
+ const wasDraining = this._drainTimer !== null;
31244
+ this.cancelDrain();
31245
+ if (!this.isPlaying || wasDraining) {
31219
31246
  this.playNext();
31220
31247
  }
31221
31248
  }
@@ -31314,6 +31341,7 @@ var AudioPlayer = class _AudioPlayer {
31314
31341
  }
31315
31342
  return;
31316
31343
  }
31344
+ this.cancelDrain();
31317
31345
  const { buffer, messageId } = this.queue.shift();
31318
31346
  if (messageId !== this.currentMessageId) {
31319
31347
  if (this.currentMessageId) {
@@ -31434,35 +31462,51 @@ function parseActions(text) {
31434
31462
  }
31435
31463
 
31436
31464
  // src/client.ts
31437
- var DEFAULT_SAMPLE_RATE = 16e3;
31465
+ var DEFAULT_SAMPLE_RATE = 24e3;
31466
+ var REST_UNAVAILABLE_MESSAGE = "REST API not available with session token auth. Use a server-side proxy for REST calls.";
31438
31467
  var EstuaryClient = class extends TypedEventEmitter {
31439
31468
  config;
31440
31469
  logger;
31441
31470
  socketManager;
31442
31471
  voiceManager = null;
31443
31472
  audioPlayer = null;
31444
- _memory;
31445
- _character;
31473
+ _memory = null;
31474
+ _character = null;
31446
31475
  _sessionInfo = null;
31447
31476
  actionParsers = /* @__PURE__ */ new Map();
31448
31477
  _hasAutoInterrupted = false;
31449
31478
  _autoInterruptGraceTimer = null;
31479
+ _isLiveKitSpeaking = false;
31450
31480
  constructor(config) {
31451
31481
  super();
31482
+ if (!config.apiKey && !config.sessionToken) {
31483
+ throw new EstuaryError(
31484
+ "AUTH_FAILED" /* AUTH_FAILED */,
31485
+ "Either apiKey or sessionToken must be provided"
31486
+ );
31487
+ }
31452
31488
  this.config = config;
31453
31489
  this.logger = new Logger(config.debug ?? false);
31454
31490
  this.socketManager = new SocketManager(config, this.logger);
31455
31491
  this.forwardSocketEvents();
31456
- const restClient = new RestClient(config.serverUrl, config.apiKey);
31457
- this._memory = new MemoryClient(restClient, config.characterId, config.playerId);
31458
- this._character = new CharacterClient(restClient);
31492
+ if (config.apiKey) {
31493
+ const restClient = new RestClient(config.serverUrl, config.apiKey);
31494
+ this._memory = new MemoryClient(restClient, config.characterId, config.playerId);
31495
+ this._character = new CharacterClient(restClient);
31496
+ }
31459
31497
  }
31460
31498
  /** Memory API client for querying memories, graphs, and facts */
31461
31499
  get memory() {
31500
+ if (!this._memory) {
31501
+ throw new EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, REST_UNAVAILABLE_MESSAGE);
31502
+ }
31462
31503
  return this._memory;
31463
31504
  }
31464
31505
  /** Fetch character details including 3D model and avatar URLs. */
31465
31506
  async getCharacter(characterId) {
31507
+ if (!this._character) {
31508
+ throw new EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, REST_UNAVAILABLE_MESSAGE);
31509
+ }
31466
31510
  return this._character.getCharacter(characterId ?? this.config.characterId);
31467
31511
  }
31468
31512
  /** Current session info (null if not connected) */
@@ -31502,6 +31546,12 @@ var EstuaryClient = class extends TypedEventEmitter {
31502
31546
  this.ensureConnected();
31503
31547
  this.socketManager.emitEvent("text", { text, textOnly });
31504
31548
  }
31549
+ /** Script the character to say a specific prewritten line. Defaults to TTS enabled (textOnly=false). */
31550
+ sayLine(text, textOnly = false) {
31551
+ if (!text?.trim()) return;
31552
+ this.ensureConnected();
31553
+ this.socketManager.emitEvent("say_line", { text, text_only: textOnly });
31554
+ }
31505
31555
  /** Interrupt the current bot response */
31506
31556
  interrupt(messageId) {
31507
31557
  this.ensureConnected();
@@ -31554,12 +31604,7 @@ var EstuaryClient = class extends TypedEventEmitter {
31554
31604
  if (!this.audioPlayer && result.resolvedTransport === "websocket" && typeof AudioContext !== "undefined") {
31555
31605
  this.audioPlayer = new AudioPlayer(sampleRate, (event) => {
31556
31606
  if (event.type === "started") {
31557
- this._hasAutoInterrupted = true;
31558
- if (this._autoInterruptGraceTimer) clearTimeout(this._autoInterruptGraceTimer);
31559
- this._autoInterruptGraceTimer = setTimeout(() => {
31560
- this._hasAutoInterrupted = false;
31561
- this._autoInterruptGraceTimer = null;
31562
- }, 1500);
31607
+ this.startPlaybackGrace();
31563
31608
  this.emit("audioPlaybackStarted", event.messageId);
31564
31609
  if (this.config.suppressMicDuringPlayback) {
31565
31610
  this.voiceManager?.setSuppressed?.(true);
@@ -31576,7 +31621,9 @@ var EstuaryClient = class extends TypedEventEmitter {
31576
31621
  }
31577
31622
  await this.voiceManager.start();
31578
31623
  this.voiceManager.setSpeakingStateCallback?.((speaking) => {
31624
+ this._isLiveKitSpeaking = speaking;
31579
31625
  if (speaking) {
31626
+ this.startPlaybackGrace();
31580
31627
  this.emit("audioPlaybackStarted", "livekit-audio");
31581
31628
  if (this.config.suppressMicDuringPlayback) {
31582
31629
  this.voiceManager?.setSuppressed?.(true);
@@ -31601,6 +31648,7 @@ var EstuaryClient = class extends TypedEventEmitter {
31601
31648
  await this.voiceManager.stop();
31602
31649
  this.voiceManager.dispose();
31603
31650
  this.voiceManager = null;
31651
+ this._isLiveKitSpeaking = false;
31604
31652
  this.emit("voiceStopped");
31605
31653
  }
31606
31654
  }
@@ -31615,12 +31663,19 @@ var EstuaryClient = class extends TypedEventEmitter {
31615
31663
  get isMuted() {
31616
31664
  return this.voiceManager?.isMuted ?? false;
31617
31665
  }
31618
- /** Get/set suppressMicDuringPlayback at runtime (no reconnect needed) */
31666
+ /** Whether mic suppression during playback is enabled */
31619
31667
  get suppressMicDuringPlayback() {
31620
31668
  return this.config.suppressMicDuringPlayback ?? false;
31621
31669
  }
31670
+ /** Update mic suppression during playback at runtime (no reconnect needed) */
31622
31671
  set suppressMicDuringPlayback(enabled) {
31623
31672
  this.config.suppressMicDuringPlayback = enabled;
31673
+ if (!this.voiceManager?.isActive) return;
31674
+ if (enabled && this._isBotPlaying) {
31675
+ this.voiceManager.setSuppressed?.(true);
31676
+ } else if (!enabled) {
31677
+ this.voiceManager.setSuppressed?.(false);
31678
+ }
31624
31679
  }
31625
31680
  /** Whether voice is currently active */
31626
31681
  get isVoiceActive() {
@@ -31632,6 +31687,20 @@ var EstuaryClient = class extends TypedEventEmitter {
31632
31687
  throw new EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, "Not connected to server. Call connect() first.");
31633
31688
  }
31634
31689
  }
31690
+ /** Whether bot audio is currently playing (via AudioPlayer or LiveKit) */
31691
+ get _isBotPlaying() {
31692
+ return (this.audioPlayer?.playing ?? false) || this._isLiveKitSpeaking;
31693
+ }
31694
+ /** Suppress auto-interrupt for 1500ms so trailing STT partials from the
31695
+ * user's previous speech don't kill the new bot audio. */
31696
+ startPlaybackGrace() {
31697
+ this._hasAutoInterrupted = true;
31698
+ if (this._autoInterruptGraceTimer) clearTimeout(this._autoInterruptGraceTimer);
31699
+ this._autoInterruptGraceTimer = setTimeout(() => {
31700
+ this._hasAutoInterrupted = false;
31701
+ this._autoInterruptGraceTimer = null;
31702
+ }, 1500);
31703
+ }
31635
31704
  forwardSocketEvents() {
31636
31705
  this.socketManager.on("connected", (session) => {
31637
31706
  this._sessionInfo = session;
@@ -31721,7 +31790,7 @@ var EstuaryClient = class extends TypedEventEmitter {
31721
31790
  if ((this.config.autoInterruptOnSpeech ?? true) === false) return;
31722
31791
  if (this.config.suppressMicDuringPlayback) return;
31723
31792
  if (stt.isFinal) return;
31724
- if (!this.audioPlayer?.playing) return;
31793
+ if (!this._isBotPlaying) return;
31725
31794
  if (this._hasAutoInterrupted) return;
31726
31795
  this._hasAutoInterrupted = true;
31727
31796
  this.interrupt();