npm - @estuary-ai/sdk - Versions diffs - 0.1.3 → 0.1.5 - Mend

@estuary-ai/sdk 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +138 -2
package/dist/index.d.mts +35 -1
package/dist/index.js +101 -3
package/dist/index.js.map +1 -1
package/dist/index.mjs +87 -5
package/dist/index.mjs.map +1 -1
package/dist/{websocket-voice-A4CK3UTM.mjs → websocket-voice-HYHCIYEW.mjs} +17 -4
package/dist/websocket-voice-HYHCIYEW.mjs.map +1 -0
package/package.json +1 -1
package/dist/websocket-voice-A4CK3UTM.mjs.map +0 -1

package/README.md CHANGED Viewed

@@ -74,6 +74,50 @@ await client.startVoice();
 client.toggleMute();
 ```
+### Interrupts
+Interrupt the bot's current response (stops audio playback and generation):
+```typescript
+client.interrupt();                // interrupt current response
+client.interrupt('msg_abc123');    // interrupt a specific message
+```
+### Vision / Camera
+Send images for vision processing. The server may also request captures via the `cameraCaptureRequest` event.
+```typescript
+// Send a camera image proactively
+client.sendCameraImage(base64Image, 'image/jpeg');
+// Respond to a server-initiated capture request
+client.on('cameraCaptureRequest', (request) => {
+  const image = captureFrame(); // your capture logic
+  client.sendCameraImage(image, 'image/jpeg', request.requestId, request.text);
+});
+```
+### Character Actions
+Bot responses can include inline action tags (e.g., `<action name="wave" target="user"/>`). The SDK automatically parses these, strips them from `botResponse.text`, and emits `characterAction` events:
+```typescript
+client.on('characterAction', (action) => {
+  console.log(action.name);      // e.g., "wave"
+  console.log(action.params);    // e.g., { target: "user" }
+  console.log(action.messageId); // originating message
+});
+```
+For non-streaming contexts, use the `parseActions` utility:
+```typescript
+import { parseActions } from '@estuary-ai/sdk';
+const { actions, cleanText } = parseActions(rawBotText);
+```
 ### Memory & Knowledge Graph
 ```typescript
@@ -81,6 +125,9 @@ const memories = await client.memory.getMemories({ status: 'active', limit: 50 }
 const facts = await client.memory.getCoreFacts();
 const graph = await client.memory.getGraph({ includeEntities: true });
 const results = await client.memory.search('favorite food');
+const timeline = await client.memory.getTimeline({ groupBy: 'week' });
+const stats = await client.memory.getStats();
+await client.memory.deleteAll(true); // pass true to confirm
 ```
 ### Real-Time Memory Extraction
@@ -109,17 +156,69 @@ await client.connect();
 ## Events
 ```typescript
+// Connection
 client.on('connected', (session) => { /* authenticated */ });
 client.on('disconnected', (reason) => { /* lost connection */ });
-client.on('botResponse', (response) => { /* streaming text */ });
+client.on('reconnecting', (attempt) => { /* reconnect attempt number */ });
+client.on('connectionStateChanged', (state) => { /* ConnectionState enum */ });
+client.on('authError', (error) => { /* authentication failed */ });
+// Conversation
+client.on('botResponse', (response) => { /* streaming text (actions auto-stripped) */ });
 client.on('botVoice', (voice) => { /* audio chunk */ });
-client.on('sttResponse', (stt) => { /* speech-to-text */ });
+client.on('sttResponse', (stt) => { /* speech-to-text transcript */ });
 client.on('interrupt', (data) => { /* response interrupted */ });
+client.on('characterAction', (action) => { /* parsed action from bot response */ });
+client.on('cameraCaptureRequest', (request) => { /* server requests a camera image */ });
+// Voice
+client.on('voiceStarted', () => { /* voice session began */ });
+client.on('voiceStopped', () => { /* voice session ended */ });
+client.on('livekitConnected', (room) => { /* joined LiveKit room */ });
+client.on('livekitDisconnected', () => { /* left LiveKit room */ });
+// Audio playback
+client.on('audioPlaybackStarted', (messageId) => { /* bot audio started playing */ });
+client.on('audioPlaybackComplete', (messageId) => { /* bot audio finished playing */ });
+// Memory
 client.on('memoryUpdated', (event) => { /* real-time memory extraction */ });
+// Errors & limits
 client.on('error', (error) => { /* EstuaryError */ });
 client.on('quotaExceeded', (data) => { /* rate limited */ });
 ```
+## Error Handling
+Errors are instances of `EstuaryError` with a typed `code` field:
+```typescript
+import { EstuaryError, ErrorCode } from '@estuary-ai/sdk';
+client.on('error', (error) => {
+  if (error instanceof EstuaryError) {
+    switch (error.code) {
+      case ErrorCode.NOT_CONNECTED:
+      case ErrorCode.CONNECTION_FAILED:
+      case ErrorCode.CONNECTION_TIMEOUT:
+        // connection issues
+        break;
+      case ErrorCode.AUTH_FAILED:
+        // bad API key or character ID
+        break;
+      case ErrorCode.MICROPHONE_DENIED:
+        // user denied mic permission
+        break;
+    }
+  }
+});
+client.on('authError', (message) => {
+  console.error('Authentication failed:', message);
+});
+```
 ## Configuration
 ```typescript
@@ -135,9 +234,46 @@ interface EstuaryConfig {
   debug?: boolean;             // Default: false
   voiceTransport?: 'websocket' | 'livekit' | 'auto'; // Default: 'auto'
   realtimeMemory?: boolean;    // Enable real-time memory extraction events. Default: false
+  suppressMicDuringPlayback?: boolean; // Mute mic while bot audio plays (software AEC). Default: false
 }
 ```
+## Exports
+Key exports for TypeScript users:
+```typescript
+// Client
+import { EstuaryClient } from '@estuary-ai/sdk';
+// Errors
+import { EstuaryError, ErrorCode } from '@estuary-ai/sdk';
+// Enums
+import { ConnectionState } from '@estuary-ai/sdk';
+// Utilities
+import { parseActions } from '@estuary-ai/sdk';
+// Types (import type)
+import type {
+  EstuaryConfig,
+  SessionInfo,
+  BotResponse,
+  BotVoice,
+  SttResponse,
+  InterruptData,
+  CameraCaptureRequest,
+  CharacterAction,
+  QuotaExceededData,
+  MemoryData,
+  MemoryUpdatedEvent,
+  EstuaryEventMap,
+  ParsedAction,
+  MemoryClient,
+} from '@estuary-ai/sdk';
+```
 ## Requirements
 - Node.js 18+ or modern browser

package/dist/index.d.mts CHANGED Viewed

@@ -21,6 +21,8 @@ interface EstuaryConfig {
     voiceTransport?: VoiceTransport;
     /** Enable real-time memory extraction after each response (default: false) */
     realtimeMemory?: boolean;
+    /** Suppress mic during TTS playback (software AEC fallback, disables barge-in). Default: false */
+    suppressMicDuringPlayback?: boolean;
 }
 type VoiceTransport = 'websocket' | 'livekit' | 'auto';
 declare enum ConnectionState {
@@ -96,6 +98,14 @@ interface MemoryUpdatedEvent {
     newMemories: MemoryData[];
     timestamp: string;
 }
+interface CharacterAction {
+    /** Action name (e.g., "follow_user", "sit", "look_at") */
+    name: string;
+    /** Action parameters as key-value pairs */
+    params: Record<string, string>;
+    /** Message ID of the bot response that contained this action */
+    messageId: string;
+}
 type EstuaryEventMap = {
     connected: (session: SessionInfo) => void;
     disconnected: (reason: string) => void;
@@ -109,6 +119,7 @@ type EstuaryEventMap = {
     authError: (error: string) => void;
     quotaExceeded: (data: QuotaExceededData) => void;
     cameraCaptureRequest: (request: CameraCaptureRequest) => void;
+    characterAction: (action: CharacterAction) => void;
     voiceStarted: () => void;
     voiceStopped: () => void;
     livekitConnected: (room: string) => void;
@@ -121,6 +132,8 @@ interface VoiceManager {
     start(): Promise<void>;
     stop(): Promise<void>;
     toggleMute(): void;
+    /** Suppress audio sending (software AEC). No-op if not supported. */
+    setSuppressed?(suppressed: boolean): void;
     readonly isMuted: boolean;
     readonly isActive: boolean;
     dispose(): void;
@@ -229,6 +242,7 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
     private audioPlayer;
     private _memory;
     private _sessionInfo;
+    private actionParsers;
     constructor(config: EstuaryConfig);
     /** Memory API client for querying memories, graphs, and facts */
     get memory(): MemoryClient;
@@ -266,6 +280,7 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
     get isVoiceActive(): boolean;
     private ensureConnected;
     private forwardSocketEvents;
+    private handleBotResponse;
     private handleBotVoice;
 }
@@ -289,4 +304,23 @@ declare class EstuaryError extends Error {
     constructor(code: ErrorCode, message: string, details?: unknown);
 }
-export { type BotResponse, type BotVoice, type CameraCaptureRequest, ConnectionState, type CoreFactsResponse, ErrorCode, EstuaryClient, type EstuaryConfig, EstuaryError, type EstuaryEventMap, type InterruptData, type LiveKitTokenResponse, MemoryClient, type MemoryData, type MemoryGraphOptions, type MemoryGraphResponse, type MemoryListOptions, type MemoryListResponse, type MemorySearchOptions, type MemorySearchResponse, type MemoryStatsResponse, type MemoryTimelineOptions, type MemoryTimelineResponse, type MemoryUpdatedEvent, type QuotaExceededData, type SessionInfo, type SttResponse, type VoiceManager, type VoiceTransport };
+/**
+ * Parses `<action name="..." .../>` XML tags from bot response text.
+ *
+ * Designed for streaming: call `parse()` with the accumulated text on each
+ * chunk and it returns only newly-discovered actions since the last call.
+ */
+interface ParsedAction {
+    name: string;
+    params: Record<string, string>;
+}
+/**
+ * One-shot parse: extract all actions and return clean text.
+ * Useful for non-streaming contexts.
+ */
+declare function parseActions(text: string): {
+    actions: ParsedAction[];
+    cleanText: string;
+};
+export { type BotResponse, type BotVoice, type CameraCaptureRequest, type CharacterAction, ConnectionState, type CoreFactsResponse, ErrorCode, EstuaryClient, type EstuaryConfig, EstuaryError, type EstuaryEventMap, type InterruptData, type LiveKitTokenResponse, MemoryClient, type MemoryData, type MemoryGraphOptions, type MemoryGraphResponse, type MemoryListOptions, type MemoryListResponse, type MemorySearchOptions, type MemorySearchResponse, type MemoryStatsResponse, type MemoryTimelineOptions, type MemoryTimelineResponse, type MemoryUpdatedEvent, type ParsedAction, type QuotaExceededData, type SessionInfo, type SttResponse, type VoiceManager, type VoiceTransport, parseActions };

package/dist/index.js CHANGED Viewed

@@ -4889,6 +4889,7 @@ var init_websocket_voice = __esm({
       scriptProcessor = null;
       sourceNode = null;
       _isMuted = false;
+      _isSuppressed = false;
       _isActive = false;
       constructor(socketManager, sampleRate, logger) {
         this.socketManager = socketManager;
@@ -4911,7 +4912,13 @@ var init_websocket_voice = __esm({
         let stream;
         try {
           stream = await navigator.mediaDevices.getUserMedia({
-            audio: { sampleRate: this.sampleRate, channelCount: 1 }
+            audio: {
+              sampleRate: this.sampleRate,
+              channelCount: 1,
+              echoCancellation: true,
+              noiseSuppression: true,
+              autoGainControl: true
+            }
           });
         } catch (err) {
           throw new exports.EstuaryError(
@@ -4928,7 +4935,7 @@ var init_websocket_voice = __esm({
         const nativeRate = this.audioContext.sampleRate;
         const targetRate = this.sampleRate;
         this.scriptProcessor.onaudioprocess = (event) => {
-          if (this._isMuted) return;
+          if (this._isMuted || this._isSuppressed) return;
           const inputData = event.inputBuffer.getChannelData(0);
           let pcmFloat;
           if (nativeRate !== targetRate) {
@@ -4958,6 +4965,7 @@ var init_websocket_voice = __esm({
         this.cleanup();
         this._isActive = false;
         this._isMuted = false;
+        this._isSuppressed = false;
         this.logger.debug("WebSocket voice stopped");
       }
       toggleMute() {
@@ -4968,10 +4976,15 @@ var init_websocket_voice = __esm({
         }
         this.logger.debug("Mute toggled:", this._isMuted);
       }
+      setSuppressed(suppressed) {
+        this._isSuppressed = suppressed;
+        this.logger.debug("Audio suppression:", suppressed ? "on" : "off");
+      }
       dispose() {
         this.cleanup();
         this._isActive = false;
         this._isMuted = false;
+        this._isSuppressed = false;
       }
       cleanup() {
         if (this.scriptProcessor) {
@@ -9310,6 +9323,53 @@ var Logger = class {
 // src/client.ts
 init_errors();
+// src/utils/action-parser.ts
+var ACTION_TAG_RE = /<action\s+([^>]*?)\/>/gi;
+var ATTR_RE = /(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*'([^']*)'/g;
+function parseAttributes(attrString) {
+  const attrs = {};
+  let match;
+  while ((match = ATTR_RE.exec(attrString)) !== null) {
+    const key = match[1] ?? match[3];
+    const value2 = match[2] ?? match[4];
+    attrs[key] = value2;
+  }
+  return attrs;
+}
+var StreamingActionParser = class {
+  emittedCount = 0;
+  /**
+   * Parse the accumulated response text and return any new actions found
+   * since the last call. Also returns the text with all action tags stripped.
+   */
+  parse(accumulatedText) {
+    const allActions = [];
+    let match;
+    ACTION_TAG_RE.lastIndex = 0;
+    while ((match = ACTION_TAG_RE.exec(accumulatedText)) !== null) {
+      const attrs = parseAttributes(match[1]);
+      const name = attrs.name;
+      if (name) {
+        delete attrs.name;
+        allActions.push({ name, params: attrs });
+      }
+    }
+    const newActions = allActions.slice(this.emittedCount);
+    this.emittedCount = allActions.length;
+    const cleanText = accumulatedText.replace(ACTION_TAG_RE, "").replace(/\s{2,}/g, " ").trim();
+    return { actions: newActions, cleanText };
+  }
+  reset() {
+    this.emittedCount = 0;
+  }
+};
+function parseActions(text) {
+  const parser = new StreamingActionParser();
+  return parser.parse(text);
+}
+// src/client.ts
 var DEFAULT_SAMPLE_RATE = 16e3;
 var EstuaryClient = class extends TypedEventEmitter {
   config;
@@ -9319,6 +9379,7 @@ var EstuaryClient = class extends TypedEventEmitter {
   audioPlayer = null;
   _memory;
   _sessionInfo = null;
+  actionParsers = /* @__PURE__ */ new Map();
   constructor(config) {
     super();
     this.config = config;
@@ -9370,6 +9431,9 @@ var EstuaryClient = class extends TypedEventEmitter {
     this.ensureConnected();
     this.socketManager.emitEvent("client_interrupt", { message_id: messageId });
     this.audioPlayer?.clear();
+    if (this.config.suppressMicDuringPlayback) {
+      this.voiceManager?.setSuppressed?.(false);
+    }
   }
   /** Send a camera image for vision processing */
   sendCameraImage(imageBase64, mimeType, requestId, text) {
@@ -9408,9 +9472,15 @@ var EstuaryClient = class extends TypedEventEmitter {
       this.audioPlayer = new AudioPlayer(sampleRate, (event) => {
         if (event.type === "started") {
           this.emit("audioPlaybackStarted", event.messageId);
+          if (this.config.suppressMicDuringPlayback) {
+            this.voiceManager?.setSuppressed?.(true);
+          }
         } else if (event.type === "complete") {
           this.emit("audioPlaybackComplete", event.messageId);
           this.notifyAudioPlaybackComplete(event.messageId);
+          if (this.config.suppressMicDuringPlayback) {
+            this.voiceManager?.setSuppressed?.(false);
+          }
         }
       });
     }
@@ -9454,15 +9524,20 @@ var EstuaryClient = class extends TypedEventEmitter {
     });
     this.socketManager.on("disconnected", (reason) => {
       this._sessionInfo = null;
+      this.actionParsers.clear();
       this.emit("disconnected", reason);
     });
     this.socketManager.on("reconnecting", (attempt) => this.emit("reconnecting", attempt));
     this.socketManager.on("connectionStateChanged", (state) => this.emit("connectionStateChanged", state));
-    this.socketManager.on("botResponse", (response) => this.emit("botResponse", response));
+    this.socketManager.on("botResponse", (response) => this.handleBotResponse(response));
     this.socketManager.on("botVoice", (voice) => this.handleBotVoice(voice));
     this.socketManager.on("sttResponse", (response) => this.emit("sttResponse", response));
     this.socketManager.on("interrupt", (data) => {
       this.audioPlayer?.clear();
+      this.actionParsers.clear();
+      if (this.config.suppressMicDuringPlayback) {
+        this.voiceManager?.setSuppressed?.(false);
+      }
       this.emit("interrupt", data);
     });
     this.socketManager.on("error", (error) => this.emit("error", error));
@@ -9473,6 +9548,28 @@ var EstuaryClient = class extends TypedEventEmitter {
     this.socketManager.on("livekitDisconnected", () => this.emit("livekitDisconnected"));
     this.socketManager.on("memoryUpdated", (event) => this.emit("memoryUpdated", event));
   }
+  handleBotResponse(response) {
+    const { messageId } = response;
+    if (!this.actionParsers.has(messageId)) {
+      this.actionParsers.set(messageId, new StreamingActionParser());
+    }
+    const parser = this.actionParsers.get(messageId);
+    const { actions, cleanText } = parser.parse(response.text);
+    for (const action of actions) {
+      this.emit("characterAction", {
+        name: action.name,
+        params: action.params,
+        messageId
+      });
+    }
+    this.emit("botResponse", {
+      ...response,
+      text: cleanText
+    });
+    if (response.isFinal) {
+      this.actionParsers.delete(messageId);
+    }
+  }
   handleBotVoice(voice) {
     this.emit("botVoice", voice);
     this.audioPlayer?.enqueue(voice);
@@ -9500,5 +9597,6 @@ xmlhttprequest-ssl/lib/XMLHttpRequest.js:
 exports.ConnectionState = ConnectionState;
 exports.EstuaryClient = EstuaryClient;
+exports.parseActions = parseActions;
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map