@estuary-ai/sdk 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -74,6 +74,50 @@ await client.startVoice();
74
74
  client.toggleMute();
75
75
  ```
76
76
 
77
+ ### Interrupts
78
+
79
+ Interrupt the bot's current response (stops audio playback and generation):
80
+
81
+ ```typescript
82
+ client.interrupt(); // interrupt current response
83
+ client.interrupt('msg_abc123'); // interrupt a specific message
84
+ ```
85
+
86
+ ### Vision / Camera
87
+
88
+ Send images for vision processing. The server may also request captures via the `cameraCaptureRequest` event.
89
+
90
+ ```typescript
91
+ // Send a camera image proactively
92
+ client.sendCameraImage(base64Image, 'image/jpeg');
93
+
94
+ // Respond to a server-initiated capture request
95
+ client.on('cameraCaptureRequest', (request) => {
96
+ const image = captureFrame(); // your capture logic
97
+ client.sendCameraImage(image, 'image/jpeg', request.requestId, request.text);
98
+ });
99
+ ```
100
+
101
+ ### Character Actions
102
+
103
+ Bot responses can include inline action tags (e.g., `<action name="wave" target="user"/>`). The SDK automatically parses these, strips them from `botResponse.text`, and emits `characterAction` events:
104
+
105
+ ```typescript
106
+ client.on('characterAction', (action) => {
107
+ console.log(action.name); // e.g., "wave"
108
+ console.log(action.params); // e.g., { target: "user" }
109
+ console.log(action.messageId); // originating message
110
+ });
111
+ ```
112
+
113
+ For non-streaming contexts, use the `parseActions` utility:
114
+
115
+ ```typescript
116
+ import { parseActions } from '@estuary-ai/sdk';
117
+
118
+ const { actions, cleanText } = parseActions(rawBotText);
119
+ ```
120
+
77
121
  ### Memory & Knowledge Graph
78
122
 
79
123
  ```typescript
@@ -81,6 +125,9 @@ const memories = await client.memory.getMemories({ status: 'active', limit: 50 }
81
125
  const facts = await client.memory.getCoreFacts();
82
126
  const graph = await client.memory.getGraph({ includeEntities: true });
83
127
  const results = await client.memory.search('favorite food');
128
+ const timeline = await client.memory.getTimeline({ groupBy: 'week' });
129
+ const stats = await client.memory.getStats();
130
+ await client.memory.deleteAll(true); // pass true to confirm
84
131
  ```
85
132
 
86
133
  ### Real-Time Memory Extraction
@@ -109,17 +156,69 @@ await client.connect();
109
156
  ## Events
110
157
 
111
158
  ```typescript
159
+ // Connection
112
160
  client.on('connected', (session) => { /* authenticated */ });
113
161
  client.on('disconnected', (reason) => { /* lost connection */ });
114
- client.on('botResponse', (response) => { /* streaming text */ });
162
+ client.on('reconnecting', (attempt) => { /* reconnect attempt number */ });
163
+ client.on('connectionStateChanged', (state) => { /* ConnectionState enum */ });
164
+ client.on('authError', (error) => { /* authentication failed */ });
165
+
166
+ // Conversation
167
+ client.on('botResponse', (response) => { /* streaming text (actions auto-stripped) */ });
115
168
  client.on('botVoice', (voice) => { /* audio chunk */ });
116
- client.on('sttResponse', (stt) => { /* speech-to-text */ });
169
+ client.on('sttResponse', (stt) => { /* speech-to-text transcript */ });
117
170
  client.on('interrupt', (data) => { /* response interrupted */ });
171
+ client.on('characterAction', (action) => { /* parsed action from bot response */ });
172
+ client.on('cameraCaptureRequest', (request) => { /* server requests a camera image */ });
173
+
174
+ // Voice
175
+ client.on('voiceStarted', () => { /* voice session began */ });
176
+ client.on('voiceStopped', () => { /* voice session ended */ });
177
+ client.on('livekitConnected', (room) => { /* joined LiveKit room */ });
178
+ client.on('livekitDisconnected', () => { /* left LiveKit room */ });
179
+
180
+ // Audio playback
181
+ client.on('audioPlaybackStarted', (messageId) => { /* bot audio started playing */ });
182
+ client.on('audioPlaybackComplete', (messageId) => { /* bot audio finished playing */ });
183
+
184
+ // Memory
118
185
  client.on('memoryUpdated', (event) => { /* real-time memory extraction */ });
186
+
187
+ // Errors & limits
119
188
  client.on('error', (error) => { /* EstuaryError */ });
120
189
  client.on('quotaExceeded', (data) => { /* rate limited */ });
121
190
  ```
122
191
 
192
+ ## Error Handling
193
+
194
+ Errors are instances of `EstuaryError` with a typed `code` field:
195
+
196
+ ```typescript
197
+ import { EstuaryError, ErrorCode } from '@estuary-ai/sdk';
198
+
199
+ client.on('error', (error) => {
200
+ if (error instanceof EstuaryError) {
201
+ switch (error.code) {
202
+ case ErrorCode.NOT_CONNECTED:
203
+ case ErrorCode.CONNECTION_FAILED:
204
+ case ErrorCode.CONNECTION_TIMEOUT:
205
+ // connection issues
206
+ break;
207
+ case ErrorCode.AUTH_FAILED:
208
+ // bad API key or character ID
209
+ break;
210
+ case ErrorCode.MICROPHONE_DENIED:
211
+ // user denied mic permission
212
+ break;
213
+ }
214
+ }
215
+ });
216
+
217
+ client.on('authError', (message) => {
218
+ console.error('Authentication failed:', message);
219
+ });
220
+ ```
221
+
123
222
  ## Configuration
124
223
 
125
224
  ```typescript
@@ -135,9 +234,46 @@ interface EstuaryConfig {
135
234
  debug?: boolean; // Default: false
136
235
  voiceTransport?: 'websocket' | 'livekit' | 'auto'; // Default: 'auto'
137
236
  realtimeMemory?: boolean; // Enable real-time memory extraction events. Default: false
237
+ suppressMicDuringPlayback?: boolean; // Mute mic while bot audio plays (software AEC). Default: false
138
238
  }
139
239
  ```
140
240
 
241
+ ## Exports
242
+
243
+ Key exports for TypeScript users:
244
+
245
+ ```typescript
246
+ // Client
247
+ import { EstuaryClient } from '@estuary-ai/sdk';
248
+
249
+ // Errors
250
+ import { EstuaryError, ErrorCode } from '@estuary-ai/sdk';
251
+
252
+ // Enums
253
+ import { ConnectionState } from '@estuary-ai/sdk';
254
+
255
+ // Utilities
256
+ import { parseActions } from '@estuary-ai/sdk';
257
+
258
+ // Types (import type)
259
+ import type {
260
+ EstuaryConfig,
261
+ SessionInfo,
262
+ BotResponse,
263
+ BotVoice,
264
+ SttResponse,
265
+ InterruptData,
266
+ CameraCaptureRequest,
267
+ CharacterAction,
268
+ QuotaExceededData,
269
+ MemoryData,
270
+ MemoryUpdatedEvent,
271
+ EstuaryEventMap,
272
+ ParsedAction,
273
+ MemoryClient,
274
+ } from '@estuary-ai/sdk';
275
+ ```
276
+
141
277
  ## Requirements
142
278
 
143
279
  - Node.js 18+ or modern browser
package/dist/index.d.mts CHANGED
@@ -21,6 +21,8 @@ interface EstuaryConfig {
21
21
  voiceTransport?: VoiceTransport;
22
22
  /** Enable real-time memory extraction after each response (default: false) */
23
23
  realtimeMemory?: boolean;
24
+ /** Suppress mic during TTS playback (software AEC fallback, disables barge-in). Default: false */
25
+ suppressMicDuringPlayback?: boolean;
24
26
  }
25
27
  type VoiceTransport = 'websocket' | 'livekit' | 'auto';
26
28
  declare enum ConnectionState {
@@ -130,6 +132,8 @@ interface VoiceManager {
130
132
  start(): Promise<void>;
131
133
  stop(): Promise<void>;
132
134
  toggleMute(): void;
135
+ /** Suppress audio sending (software AEC). No-op if not supported. */
136
+ setSuppressed?(suppressed: boolean): void;
133
137
  readonly isMuted: boolean;
134
138
  readonly isActive: boolean;
135
139
  dispose(): void;
package/dist/index.js CHANGED
@@ -4889,6 +4889,7 @@ var init_websocket_voice = __esm({
4889
4889
  scriptProcessor = null;
4890
4890
  sourceNode = null;
4891
4891
  _isMuted = false;
4892
+ _isSuppressed = false;
4892
4893
  _isActive = false;
4893
4894
  constructor(socketManager, sampleRate, logger) {
4894
4895
  this.socketManager = socketManager;
@@ -4911,7 +4912,13 @@ var init_websocket_voice = __esm({
4911
4912
  let stream;
4912
4913
  try {
4913
4914
  stream = await navigator.mediaDevices.getUserMedia({
4914
- audio: { sampleRate: this.sampleRate, channelCount: 1 }
4915
+ audio: {
4916
+ sampleRate: this.sampleRate,
4917
+ channelCount: 1,
4918
+ echoCancellation: true,
4919
+ noiseSuppression: true,
4920
+ autoGainControl: true
4921
+ }
4915
4922
  });
4916
4923
  } catch (err) {
4917
4924
  throw new exports.EstuaryError(
@@ -4928,7 +4935,7 @@ var init_websocket_voice = __esm({
4928
4935
  const nativeRate = this.audioContext.sampleRate;
4929
4936
  const targetRate = this.sampleRate;
4930
4937
  this.scriptProcessor.onaudioprocess = (event) => {
4931
- if (this._isMuted) return;
4938
+ if (this._isMuted || this._isSuppressed) return;
4932
4939
  const inputData = event.inputBuffer.getChannelData(0);
4933
4940
  let pcmFloat;
4934
4941
  if (nativeRate !== targetRate) {
@@ -4958,6 +4965,7 @@ var init_websocket_voice = __esm({
4958
4965
  this.cleanup();
4959
4966
  this._isActive = false;
4960
4967
  this._isMuted = false;
4968
+ this._isSuppressed = false;
4961
4969
  this.logger.debug("WebSocket voice stopped");
4962
4970
  }
4963
4971
  toggleMute() {
@@ -4968,10 +4976,15 @@ var init_websocket_voice = __esm({
4968
4976
  }
4969
4977
  this.logger.debug("Mute toggled:", this._isMuted);
4970
4978
  }
4979
+ setSuppressed(suppressed) {
4980
+ this._isSuppressed = suppressed;
4981
+ this.logger.debug("Audio suppression:", suppressed ? "on" : "off");
4982
+ }
4971
4983
  dispose() {
4972
4984
  this.cleanup();
4973
4985
  this._isActive = false;
4974
4986
  this._isMuted = false;
4987
+ this._isSuppressed = false;
4975
4988
  }
4976
4989
  cleanup() {
4977
4990
  if (this.scriptProcessor) {
@@ -9184,6 +9197,8 @@ var AudioPlayer = class {
9184
9197
  sampleRate;
9185
9198
  onEvent;
9186
9199
  audioContext = null;
9200
+ mediaStreamDest = null;
9201
+ audioElement = null;
9187
9202
  queue = [];
9188
9203
  currentSource = null;
9189
9204
  currentMessageId = null;
@@ -9219,6 +9234,16 @@ var AudioPlayer = class {
9219
9234
  }
9220
9235
  dispose() {
9221
9236
  this.clear();
9237
+ if (this.audioElement) {
9238
+ this.audioElement.pause();
9239
+ this.audioElement.srcObject = null;
9240
+ this.audioElement.remove();
9241
+ this.audioElement = null;
9242
+ }
9243
+ if (this.mediaStreamDest) {
9244
+ this.mediaStreamDest.disconnect();
9245
+ this.mediaStreamDest = null;
9246
+ }
9222
9247
  if (this.audioContext) {
9223
9248
  this.audioContext.close().catch(() => {
9224
9249
  });
@@ -9231,8 +9256,20 @@ var AudioPlayer = class {
9231
9256
  return null;
9232
9257
  }
9233
9258
  const AudioCtx = globalThis.AudioContext || globalThis.webkitAudioContext;
9234
- this.audioContext = new AudioCtx({ sampleRate: this.sampleRate });
9235
- return this.audioContext;
9259
+ const ctx = new AudioCtx({ sampleRate: this.sampleRate });
9260
+ this.audioContext = ctx;
9261
+ if (typeof document !== "undefined") {
9262
+ this.mediaStreamDest = ctx.createMediaStreamDestination();
9263
+ const el = document.createElement("audio");
9264
+ el.srcObject = this.mediaStreamDest.stream;
9265
+ el.autoplay = true;
9266
+ el.style.display = "none";
9267
+ document.body.appendChild(el);
9268
+ el.play().catch(() => {
9269
+ });
9270
+ this.audioElement = el;
9271
+ }
9272
+ return ctx;
9236
9273
  }
9237
9274
  playNext() {
9238
9275
  const ctx = this.getAudioContext();
@@ -9255,7 +9292,7 @@ var AudioPlayer = class {
9255
9292
  this.isPlaying = true;
9256
9293
  const source = ctx.createBufferSource();
9257
9294
  source.buffer = buffer;
9258
- source.connect(ctx.destination);
9295
+ source.connect(this.mediaStreamDest ?? ctx.destination);
9259
9296
  this.currentSource = source;
9260
9297
  source.onended = () => {
9261
9298
  this.currentSource = null;
@@ -9418,6 +9455,9 @@ var EstuaryClient = class extends TypedEventEmitter {
9418
9455
  this.ensureConnected();
9419
9456
  this.socketManager.emitEvent("client_interrupt", { message_id: messageId });
9420
9457
  this.audioPlayer?.clear();
9458
+ if (this.config.suppressMicDuringPlayback) {
9459
+ this.voiceManager?.setSuppressed?.(false);
9460
+ }
9421
9461
  }
9422
9462
  /** Send a camera image for vision processing */
9423
9463
  sendCameraImage(imageBase64, mimeType, requestId, text) {
@@ -9456,9 +9496,15 @@ var EstuaryClient = class extends TypedEventEmitter {
9456
9496
  this.audioPlayer = new AudioPlayer(sampleRate, (event) => {
9457
9497
  if (event.type === "started") {
9458
9498
  this.emit("audioPlaybackStarted", event.messageId);
9499
+ if (this.config.suppressMicDuringPlayback) {
9500
+ this.voiceManager?.setSuppressed?.(true);
9501
+ }
9459
9502
  } else if (event.type === "complete") {
9460
9503
  this.emit("audioPlaybackComplete", event.messageId);
9461
9504
  this.notifyAudioPlaybackComplete(event.messageId);
9505
+ if (this.config.suppressMicDuringPlayback) {
9506
+ this.voiceManager?.setSuppressed?.(false);
9507
+ }
9462
9508
  }
9463
9509
  });
9464
9510
  }
@@ -9513,6 +9559,9 @@ var EstuaryClient = class extends TypedEventEmitter {
9513
9559
  this.socketManager.on("interrupt", (data) => {
9514
9560
  this.audioPlayer?.clear();
9515
9561
  this.actionParsers.clear();
9562
+ if (this.config.suppressMicDuringPlayback) {
9563
+ this.voiceManager?.setSuppressed?.(false);
9564
+ }
9516
9565
  this.emit("interrupt", data);
9517
9566
  });
9518
9567
  this.socketManager.on("error", (error) => this.emit("error", error));