@estuary-ai/sdk 0.1.32 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,8 +1,10 @@
1
1
  interface EstuaryConfig {
2
2
  /** Base URL of the Estuary server (e.g., "https://api.estuary-ai.com") */
3
3
  serverUrl: string;
4
- /** API key (starts with "est_") */
5
- apiKey: string;
4
+ /** API key (starts with "est_"). Required unless sessionToken is provided. */
5
+ apiKey?: string;
6
+ /** Session token from share exchange (starts with "sst_"). Alternative to apiKey for share flows. */
7
+ sessionToken?: string;
6
8
  /** Character (agent) ID */
7
9
  characterId: string;
8
10
  /** Unique identifier for the end user */
@@ -50,10 +52,11 @@ interface BotResponse {
50
52
  tokenStream?: boolean;
51
53
  }
52
54
  interface BotVoice {
53
- audio: string;
55
+ audio?: string;
54
56
  messageId: string;
55
57
  chunkIndex: number;
56
- isFinal: boolean;
58
+ isFinal?: boolean;
59
+ isLivekit?: boolean;
57
60
  }
58
61
  interface SttResponse {
59
62
  text: string;
@@ -330,6 +333,7 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
330
333
  private actionParsers;
331
334
  private _hasAutoInterrupted;
332
335
  private _autoInterruptGraceTimer;
336
+ private _isLiveKitSpeaking;
333
337
  constructor(config: EstuaryConfig);
334
338
  /** Memory API client for querying memories, graphs, and facts */
335
339
  get memory(): MemoryClient;
@@ -347,6 +351,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
347
351
  disconnect(): Promise<void>;
348
352
  /** Send a text message to the character. Defaults to textOnly=true (no TTS audio response). Pass textOnly=false to receive voice audio. */
349
353
  sendText(text: string, textOnly?: boolean): void;
354
+ /** Script the character to say a specific prewritten line. Defaults to TTS enabled (textOnly=false). */
355
+ sayLine(text: string, textOnly?: boolean): void;
350
356
  /** Interrupt the current bot response */
351
357
  interrupt(messageId?: string): void;
352
358
  /** Send a camera image for vision processing */
@@ -365,12 +371,18 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
365
371
  toggleMute(): void;
366
372
  /** Whether the microphone is muted */
367
373
  get isMuted(): boolean;
368
- /** Get/set suppressMicDuringPlayback at runtime (no reconnect needed) */
374
+ /** Whether mic suppression during playback is enabled */
369
375
  get suppressMicDuringPlayback(): boolean;
376
+ /** Update mic suppression during playback at runtime (no reconnect needed) */
370
377
  set suppressMicDuringPlayback(enabled: boolean);
371
378
  /** Whether voice is currently active */
372
379
  get isVoiceActive(): boolean;
373
380
  private ensureConnected;
381
+ /** Whether bot audio is currently playing (via AudioPlayer or LiveKit) */
382
+ private get _isBotPlaying();
383
+ /** Suppress auto-interrupt for 1500ms so trailing STT partials from the
384
+ * user's previous speech don't kill the new bot audio. */
385
+ private startPlaybackGrace;
374
386
  private forwardSocketEvents;
375
387
  private handleBotResponse;
376
388
  private handleBotVoice;
package/dist/index.d.ts CHANGED
@@ -1,8 +1,10 @@
1
1
  interface EstuaryConfig {
2
2
  /** Base URL of the Estuary server (e.g., "https://api.estuary-ai.com") */
3
3
  serverUrl: string;
4
- /** API key (starts with "est_") */
5
- apiKey: string;
4
+ /** API key (starts with "est_"). Required unless sessionToken is provided. */
5
+ apiKey?: string;
6
+ /** Session token from share exchange (starts with "sst_"). Alternative to apiKey for share flows. */
7
+ sessionToken?: string;
6
8
  /** Character (agent) ID */
7
9
  characterId: string;
8
10
  /** Unique identifier for the end user */
@@ -50,10 +52,11 @@ interface BotResponse {
50
52
  tokenStream?: boolean;
51
53
  }
52
54
  interface BotVoice {
53
- audio: string;
55
+ audio?: string;
54
56
  messageId: string;
55
57
  chunkIndex: number;
56
- isFinal: boolean;
58
+ isFinal?: boolean;
59
+ isLivekit?: boolean;
57
60
  }
58
61
  interface SttResponse {
59
62
  text: string;
@@ -330,6 +333,7 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
330
333
  private actionParsers;
331
334
  private _hasAutoInterrupted;
332
335
  private _autoInterruptGraceTimer;
336
+ private _isLiveKitSpeaking;
333
337
  constructor(config: EstuaryConfig);
334
338
  /** Memory API client for querying memories, graphs, and facts */
335
339
  get memory(): MemoryClient;
@@ -347,6 +351,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
347
351
  disconnect(): Promise<void>;
348
352
  /** Send a text message to the character. Defaults to textOnly=true (no TTS audio response). Pass textOnly=false to receive voice audio. */
349
353
  sendText(text: string, textOnly?: boolean): void;
354
+ /** Script the character to say a specific prewritten line. Defaults to TTS enabled (textOnly=false). */
355
+ sayLine(text: string, textOnly?: boolean): void;
350
356
  /** Interrupt the current bot response */
351
357
  interrupt(messageId?: string): void;
352
358
  /** Send a camera image for vision processing */
@@ -365,12 +371,18 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
365
371
  toggleMute(): void;
366
372
  /** Whether the microphone is muted */
367
373
  get isMuted(): boolean;
368
- /** Get/set suppressMicDuringPlayback at runtime (no reconnect needed) */
374
+ /** Whether mic suppression during playback is enabled */
369
375
  get suppressMicDuringPlayback(): boolean;
376
+ /** Update mic suppression during playback at runtime (no reconnect needed) */
370
377
  set suppressMicDuringPlayback(enabled: boolean);
371
378
  /** Whether voice is currently active */
372
379
  get isVoiceActive(): boolean;
373
380
  private ensureConnected;
381
+ /** Whether bot audio is currently playing (via AudioPlayer or LiveKit) */
382
+ private get _isBotPlaying();
383
+ /** Suppress auto-interrupt for 1500ms so trailing STT partials from the
384
+ * user's previous speech don't kill the new bot audio. */
385
+ private startPlaybackGrace;
374
386
  private forwardSocketEvents;
375
387
  private handleBotResponse;
376
388
  private handleBotVoice;
package/dist/index.js CHANGED
@@ -4980,11 +4980,8 @@ var init_websocket_voice = __esm({
4980
4980
  this.logger.debug("WebSocket voice stopped");
4981
4981
  }
4982
4982
  toggleMute() {
4983
- if (!this._isActive || !this.mediaStream) return;
4983
+ if (!this._isActive) return;
4984
4984
  this._isMuted = !this._isMuted;
4985
- for (const track of this.mediaStream.getAudioTracks()) {
4986
- track.enabled = !this._isMuted;
4987
- }
4988
4985
  this.logger.debug("Mute toggled:", this._isMuted);
4989
4986
  }
4990
4987
  setSuppressed(suppressed) {
@@ -5042,6 +5039,7 @@ var init_livekit_voice = __esm({
5042
5039
  room = null;
5043
5040
  // livekit-client Room (dynamically imported)
5044
5041
  _isMuted = false;
5042
+ _isSuppressed = false;
5045
5043
  _isActive = false;
5046
5044
  speakingStateCallback = null;
5047
5045
  audioLevelCallback = null;
@@ -5208,14 +5206,20 @@ var init_livekit_voice = __esm({
5208
5206
  }
5209
5207
  this._isActive = false;
5210
5208
  this._isMuted = false;
5209
+ this._isSuppressed = false;
5211
5210
  this.logger.debug("LiveKit voice stopped");
5212
5211
  }
5213
5212
  toggleMute() {
5214
5213
  if (!this._isActive || !this.room) return;
5215
5214
  this._isMuted = !this._isMuted;
5216
- this.room.localParticipant.setMicrophoneEnabled(!this._isMuted);
5215
+ this.updateTrackEnabled();
5217
5216
  this.logger.debug("Mute toggled:", this._isMuted);
5218
5217
  }
5218
+ setSuppressed(suppressed) {
5219
+ this._isSuppressed = suppressed;
5220
+ this.updateTrackEnabled();
5221
+ this.logger.debug("Audio suppression:", suppressed ? "on" : "off");
5222
+ }
5219
5223
  dispose() {
5220
5224
  this.speakingStateCallback = null;
5221
5225
  this.audioLevelCallback = null;
@@ -5229,6 +5233,19 @@ var init_livekit_voice = __esm({
5229
5233
  }
5230
5234
  this._isActive = false;
5231
5235
  this._isMuted = false;
5236
+ this._isSuppressed = false;
5237
+ }
5238
+ /** Mute/unmute the local audio track directly via MediaStreamTrack.enabled.
5239
+ * This avoids setMicrophoneEnabled() which publishes/unpublishes the track
5240
+ * through the LiveKit server and can fail with engine timeout errors. */
5241
+ updateTrackEnabled() {
5242
+ if (!this.room) return;
5243
+ const enabled = !this._isMuted && !this._isSuppressed;
5244
+ for (const [, publication] of this.room.localParticipant.audioTrackPublications) {
5245
+ if (publication.track?.mediaStreamTrack) {
5246
+ publication.track.mediaStreamTrack.enabled = enabled;
5247
+ }
5248
+ }
5232
5249
  }
5233
5250
  // ─── Audio Level Polling (participant.audioLevel) ───────────────
5234
5251
  startAudioLevelPolling() {
@@ -8911,7 +8928,8 @@ function toBotVoice(wire) {
8911
8928
  audio: wire.audio,
8912
8929
  messageId: wire.message_id,
8913
8930
  chunkIndex: wire.chunk_index,
8914
- isFinal: wire.is_final
8931
+ isFinal: wire.is_final ?? false,
8932
+ isLivekit: wire.is_livekit
8915
8933
  };
8916
8934
  }
8917
8935
  function toSttResponse(wire) {
@@ -9009,13 +9027,18 @@ var SocketManager = class extends TypedEventEmitter {
9009
9027
  let settled = false;
9010
9028
  const onConnect = () => {
9011
9029
  this.logger.debug("Socket connected, authenticating...");
9012
- this.socket.emit("authenticate", {
9013
- api_key: this.config.apiKey,
9030
+ const authPayload = {
9014
9031
  character_id: this.config.characterId,
9015
9032
  player_id: this.config.playerId,
9016
9033
  audio_sample_rate: this.config.audioSampleRate ?? 16e3,
9017
9034
  realtime_memory: this.config.realtimeMemory ?? false
9018
- });
9035
+ };
9036
+ if (this.config.sessionToken) {
9037
+ authPayload.api_key = this.config.sessionToken;
9038
+ } else if (this.config.apiKey) {
9039
+ authPayload.api_key = this.config.apiKey;
9040
+ }
9041
+ this.socket.emit("authenticate", authPayload);
9019
9042
  };
9020
9043
  const onSessionInfo = (data) => {
9021
9044
  this.sessionInfo = toSessionInfo(data);
@@ -9344,6 +9367,7 @@ var AudioPlayer = class _AudioPlayer {
9344
9367
  _isCleared = false;
9345
9368
  _interruptedMessageId = null;
9346
9369
  _drainTimer = null;
9370
+ /** How long to wait for more chunks before declaring playback complete (ms) */
9347
9371
  static DRAIN_DELAY_MS = 300;
9348
9372
  constructor(sampleRate, onEvent) {
9349
9373
  this.sampleRate = sampleRate;
@@ -9362,6 +9386,7 @@ var AudioPlayer = class _AudioPlayer {
9362
9386
  this._interruptedMessageId = id;
9363
9387
  }
9364
9388
  enqueue(voice) {
9389
+ if (!voice.audio) return;
9365
9390
  if (voice.messageId === this._interruptedMessageId) return;
9366
9391
  if (this._interruptedMessageId && voice.messageId !== this._interruptedMessageId) {
9367
9392
  this._interruptedMessageId = null;
@@ -9375,7 +9400,9 @@ var AudioPlayer = class _AudioPlayer {
9375
9400
  const buffer = ctx.createBuffer(1, float32.length, this.sampleRate);
9376
9401
  buffer.getChannelData(0).set(float32);
9377
9402
  this.queue.push({ buffer, messageId: voice.messageId });
9378
- if (!this.isPlaying) {
9403
+ const wasDraining = this._drainTimer !== null;
9404
+ this.cancelDrain();
9405
+ if (!this.isPlaying || wasDraining) {
9379
9406
  this.playNext();
9380
9407
  }
9381
9408
  }
@@ -9474,6 +9501,7 @@ var AudioPlayer = class _AudioPlayer {
9474
9501
  }
9475
9502
  return;
9476
9503
  }
9504
+ this.cancelDrain();
9477
9505
  const { buffer, messageId } = this.queue.shift();
9478
9506
  if (messageId !== this.currentMessageId) {
9479
9507
  if (this.currentMessageId) {
@@ -9595,34 +9623,50 @@ function parseActions(text) {
9595
9623
 
9596
9624
  // src/client.ts
9597
9625
  var DEFAULT_SAMPLE_RATE = 24e3;
9626
+ var REST_UNAVAILABLE_MESSAGE = "REST API not available with session token auth. Use a server-side proxy for REST calls.";
9598
9627
  var EstuaryClient = class extends TypedEventEmitter {
9599
9628
  config;
9600
9629
  logger;
9601
9630
  socketManager;
9602
9631
  voiceManager = null;
9603
9632
  audioPlayer = null;
9604
- _memory;
9605
- _character;
9633
+ _memory = null;
9634
+ _character = null;
9606
9635
  _sessionInfo = null;
9607
9636
  actionParsers = /* @__PURE__ */ new Map();
9608
9637
  _hasAutoInterrupted = false;
9609
9638
  _autoInterruptGraceTimer = null;
9639
+ _isLiveKitSpeaking = false;
9610
9640
  constructor(config) {
9611
9641
  super();
9642
+ if (!config.apiKey && !config.sessionToken) {
9643
+ throw new exports.EstuaryError(
9644
+ "AUTH_FAILED" /* AUTH_FAILED */,
9645
+ "Either apiKey or sessionToken must be provided"
9646
+ );
9647
+ }
9612
9648
  this.config = config;
9613
9649
  this.logger = new Logger(config.debug ?? false);
9614
9650
  this.socketManager = new SocketManager(config, this.logger);
9615
9651
  this.forwardSocketEvents();
9616
- const restClient = new RestClient(config.serverUrl, config.apiKey);
9617
- this._memory = new MemoryClient(restClient, config.characterId, config.playerId);
9618
- this._character = new CharacterClient(restClient);
9652
+ if (config.apiKey) {
9653
+ const restClient = new RestClient(config.serverUrl, config.apiKey);
9654
+ this._memory = new MemoryClient(restClient, config.characterId, config.playerId);
9655
+ this._character = new CharacterClient(restClient);
9656
+ }
9619
9657
  }
9620
9658
  /** Memory API client for querying memories, graphs, and facts */
9621
9659
  get memory() {
9660
+ if (!this._memory) {
9661
+ throw new exports.EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, REST_UNAVAILABLE_MESSAGE);
9662
+ }
9622
9663
  return this._memory;
9623
9664
  }
9624
9665
  /** Fetch character details including 3D model and avatar URLs. */
9625
9666
  async getCharacter(characterId) {
9667
+ if (!this._character) {
9668
+ throw new exports.EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, REST_UNAVAILABLE_MESSAGE);
9669
+ }
9626
9670
  return this._character.getCharacter(characterId ?? this.config.characterId);
9627
9671
  }
9628
9672
  /** Current session info (null if not connected) */
@@ -9662,6 +9706,12 @@ var EstuaryClient = class extends TypedEventEmitter {
9662
9706
  this.ensureConnected();
9663
9707
  this.socketManager.emitEvent("text", { text, textOnly });
9664
9708
  }
9709
+ /** Script the character to say a specific prewritten line. Defaults to TTS enabled (textOnly=false). */
9710
+ sayLine(text, textOnly = false) {
9711
+ if (!text?.trim()) return;
9712
+ this.ensureConnected();
9713
+ this.socketManager.emitEvent("say_line", { text, text_only: textOnly });
9714
+ }
9665
9715
  /** Interrupt the current bot response */
9666
9716
  interrupt(messageId) {
9667
9717
  this.ensureConnected();
@@ -9714,12 +9764,7 @@ var EstuaryClient = class extends TypedEventEmitter {
9714
9764
  if (!this.audioPlayer && result.resolvedTransport === "websocket" && typeof AudioContext !== "undefined") {
9715
9765
  this.audioPlayer = new AudioPlayer(sampleRate, (event) => {
9716
9766
  if (event.type === "started") {
9717
- this._hasAutoInterrupted = true;
9718
- if (this._autoInterruptGraceTimer) clearTimeout(this._autoInterruptGraceTimer);
9719
- this._autoInterruptGraceTimer = setTimeout(() => {
9720
- this._hasAutoInterrupted = false;
9721
- this._autoInterruptGraceTimer = null;
9722
- }, 1500);
9767
+ this.startPlaybackGrace();
9723
9768
  this.emit("audioPlaybackStarted", event.messageId);
9724
9769
  if (this.config.suppressMicDuringPlayback) {
9725
9770
  this.voiceManager?.setSuppressed?.(true);
@@ -9736,7 +9781,9 @@ var EstuaryClient = class extends TypedEventEmitter {
9736
9781
  }
9737
9782
  await this.voiceManager.start();
9738
9783
  this.voiceManager.setSpeakingStateCallback?.((speaking) => {
9784
+ this._isLiveKitSpeaking = speaking;
9739
9785
  if (speaking) {
9786
+ this.startPlaybackGrace();
9740
9787
  this.emit("audioPlaybackStarted", "livekit-audio");
9741
9788
  if (this.config.suppressMicDuringPlayback) {
9742
9789
  this.voiceManager?.setSuppressed?.(true);
@@ -9761,6 +9808,7 @@ var EstuaryClient = class extends TypedEventEmitter {
9761
9808
  await this.voiceManager.stop();
9762
9809
  this.voiceManager.dispose();
9763
9810
  this.voiceManager = null;
9811
+ this._isLiveKitSpeaking = false;
9764
9812
  this.emit("voiceStopped");
9765
9813
  }
9766
9814
  }
@@ -9775,12 +9823,19 @@ var EstuaryClient = class extends TypedEventEmitter {
9775
9823
  get isMuted() {
9776
9824
  return this.voiceManager?.isMuted ?? false;
9777
9825
  }
9778
- /** Get/set suppressMicDuringPlayback at runtime (no reconnect needed) */
9826
+ /** Whether mic suppression during playback is enabled */
9779
9827
  get suppressMicDuringPlayback() {
9780
9828
  return this.config.suppressMicDuringPlayback ?? false;
9781
9829
  }
9830
+ /** Update mic suppression during playback at runtime (no reconnect needed) */
9782
9831
  set suppressMicDuringPlayback(enabled) {
9783
9832
  this.config.suppressMicDuringPlayback = enabled;
9833
+ if (!this.voiceManager?.isActive) return;
9834
+ if (enabled && this._isBotPlaying) {
9835
+ this.voiceManager.setSuppressed?.(true);
9836
+ } else if (!enabled) {
9837
+ this.voiceManager.setSuppressed?.(false);
9838
+ }
9784
9839
  }
9785
9840
  /** Whether voice is currently active */
9786
9841
  get isVoiceActive() {
@@ -9792,6 +9847,20 @@ var EstuaryClient = class extends TypedEventEmitter {
9792
9847
  throw new exports.EstuaryError("NOT_CONNECTED" /* NOT_CONNECTED */, "Not connected to server. Call connect() first.");
9793
9848
  }
9794
9849
  }
9850
+ /** Whether bot audio is currently playing (via AudioPlayer or LiveKit) */
9851
+ get _isBotPlaying() {
9852
+ return (this.audioPlayer?.playing ?? false) || this._isLiveKitSpeaking;
9853
+ }
9854
+ /** Suppress auto-interrupt for 1500ms so trailing STT partials from the
9855
+ * user's previous speech don't kill the new bot audio. */
9856
+ startPlaybackGrace() {
9857
+ this._hasAutoInterrupted = true;
9858
+ if (this._autoInterruptGraceTimer) clearTimeout(this._autoInterruptGraceTimer);
9859
+ this._autoInterruptGraceTimer = setTimeout(() => {
9860
+ this._hasAutoInterrupted = false;
9861
+ this._autoInterruptGraceTimer = null;
9862
+ }, 1500);
9863
+ }
9795
9864
  forwardSocketEvents() {
9796
9865
  this.socketManager.on("connected", (session) => {
9797
9866
  this._sessionInfo = session;
@@ -9881,7 +9950,7 @@ var EstuaryClient = class extends TypedEventEmitter {
9881
9950
  if ((this.config.autoInterruptOnSpeech ?? true) === false) return;
9882
9951
  if (this.config.suppressMicDuringPlayback) return;
9883
9952
  if (stt.isFinal) return;
9884
- if (!this.audioPlayer?.playing) return;
9953
+ if (!this._isBotPlaying) return;
9885
9954
  if (this._hasAutoInterrupted) return;
9886
9955
  this._hasAutoInterrupted = true;
9887
9956
  this.interrupt();