npm - @keyframelabs/sdk - Versions diffs - 0.1.0 - Mend

@keyframelabs/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,148 @@
+# @keyframelabs/sdk
+Browser SDK for KeyframeLab's Persona avatar sessions. Connect any voice AI agent to a real-time avatar.
+## Installation
+```bash
+npm install @keyframelabs/sdk
+```
+## Quick Start
+On your node / python backend, create a session using your KeyframeLabs API key.
+```typescript
+const response = await fetch('https://api.keyframelabs.com/v1/session'
+  method: 'POST',
+  headers: {
+    'Content-Type': 'application/json',
+    Authorization: `Bearer ${KFL_API_KEY}`,
+  },
+  body: JSON.stringify({
+    persona_id: "luna", // or cosmo or astro, etc.
+    model_id: "persona-1-live"
+  }),
+);
+if (!response.ok) {
+  throw new Error(`HTTP error! status: ${response.status}`);
+}
+// Contains your serverUrl and participantToken
+const result = await response.json();
+```
+Then, using the client on the browser:
+```typescript
+import { createClient } from '@keyframelabs/sdk';
+// Create a Persona client
+const persona = createClient({
+  serverUrl: "wss://...",
+  participantToken: "A6gB...",
+  onVideoTrack: (track) => {
+    // Some HTML video element
+    videoElement.srcObject = new MediaStream([track]);
+  },
+  onAudioTrack: (track) => {
+    // Some HTML audio element
+    audioElement.srcObject = new MediaStream([track]);
+  },
+});
+// Connect to the avatar
+await persona.connect();
+// Send audio from your voice AI agent (24kHz 16-bit PCM)
+persona.sendAudio(pcmAudioBytes);
+// Signal an interruption (clears pending frames)
+persona.interrupt();
+// Close when done
+await persona.close();
+```
+## API
+### `createClient(options)`
+Create a new Persona client.
+**Options:**
+- `personaId` - Persona ID (e.g., 'luna', 'cosmo')
+- `apiUrl` - Central API URL (optional)
+- `apiKey` - API key for authentication (optional)
+- `onVideoTrack` - Callback when video track is available
+- `onAudioTrack` - Callback when audio track is available
+- `onStateChange` - Callback when session state changes
+- `onError` - Callback on error
+### `PersonaSession`
+The client instance returned by `createClient()`.
+**Methods:**
+- `connect()` - Connect to the avatar session
+- `sendAudio(pcmData)` - Send 24kHz 16-bit PCM audio
+- `interrupt()` - Signal an interruption (clears pending frames)
+- `close()` - Close the session
+**Properties:**
+- `state` - Current session state ('disconnected', 'connecting', 'connected', 'error')
+## Integrating Voice AI Agents
+The SDK is intentionally minimal - it only handles the avatar connection. You bring your own voice AI agent (Gemini, ElevenLabs, OpenAI, etc.).
+Example integration pattern:
+```typescript
+import { createClient } from '@keyframelabs/sdk';
+// Your agent implementation (copy from experiments/src/agents/)
+import { GeminiLiveAgent } from './agents/gemini-live';
+const persona = createClient({
+  personaId: 'luna',
+  onVideoTrack: (track) => {
+    videoElement.srcObject = new MediaStream([track]);
+  },
+});
+const agent = new GeminiLiveAgent();
+// Wire agent audio to persona
+agent.on('audio', (pcmData) => persona.sendAudio(pcmData));
+// Handle interruptions
+agent.on('interrupted', () => persona.interrupt());
+// Connect both
+await persona.connect();
+await agent.connect({ apiKey: 'your-gemini-key' });
+// Send microphone audio to agent
+// (capture and send PCM to agent.sendAudio())
+```
+## Architecture
+```
+Browser                          GPU Node
+┌─────────────────┐              ┌─────────────────┐
+│  Microphone     │──PCM 16kHz──▶│                 │
+│       ↓         │              │                 │
+│  Voice AI Agent │              │  AvatarSession  │
+│       ↓         │              │                 │
+│  PersonaSession │──DataStream─▶│       ↓         │
+│       ↑         │              │  Inference      │
+│  Video Element  │◀──WebRTC────│       ↓         │
+└─────────────────┘              │  Video          │
+                                 └─────────────────┘
+```
+## License
+MIT

package/package.json ADDED Viewed

@@ -0,0 +1,39 @@
+{
+  "name": "@keyframelabs/sdk",
+  "version": "0.1.0",
+  "description": "Browser SDK for KeyframeLab's Persona avatar sessions",
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "files": [
+    "dist",
+    "src"
+  ],
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "livekit-client": "^2.15.14"
+  },
+  "devDependencies": {
+    "typescript": "^5.9.3"
+  },
+  "peerDependencies": {
+    "@google/genai": "^1.0.0"
+  },
+  "peerDependenciesMeta": {
+    "@google/genai": {
+      "optional": true
+    }
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch"
+  }
+}

package/src/PersonaSession.ts ADDED Viewed

@@ -0,0 +1,324 @@
+/**
+ * PersonaSession - Core SDK class for connecting to Persona avatars.
+ *
+ * Handles:
+ * - Creating sessions via Central API
+ * - Connecting to LiveKit room as "user" participant
+ * - Sending audio to "persona" via DataStream
+ * - Receiving video from "persona"
+ * - Interrupt signaling
+ */
+import {
+  Room,
+  RoomEvent,
+  Track,
+  RemoteTrack,
+  RemoteParticipant,
+  DisconnectReason,
+} from 'livekit-client';
+// ByteStreamWriter type from livekit-client (not exported directly)
+type ByteStreamWriter = Awaited<ReturnType<Room['localParticipant']['streamBytes']>>;
+import type {
+  CloseReason,
+  PersonaSessionOptions,
+  SessionState
+} from './types.js';
+/** Map LiveKit DisconnectReason to our CloseReason */
+function mapDisconnectReason(reason?: DisconnectReason): CloseReason {
+  switch (reason) {
+    case DisconnectReason.CLIENT_INITIATED:
+      return 'client_initiated';
+    case DisconnectReason.ROOM_DELETED:
+      return 'room_deleted';
+    case DisconnectReason.SERVER_SHUTDOWN:
+      return 'server_shutdown';
+    case DisconnectReason.PARTICIPANT_REMOVED:
+      return 'participant_removed';
+    default:
+      return 'unknown';
+  }
+}
+/** DataStream topic for audio (must match GPU node) */
+const AUDIO_STREAM_TOPIC = 'lk.audio_stream';
+/** DataStream topic for control messages (interrupt, etc.) */
+const CONTROL_STREAM_TOPIC = 'lk.control';
+/** Auto-flush timeout in ms after last audio chunk */
+const AUTO_FLUSH_TIMEOUT_MS = 300;
+/**
+ * PersonaSession manages the connection between your app and a Persona avatar.
+ *
+ * @example
+ * ```typescript
+ * const session = new PersonaSession({
+ *   // WebRTC URL, access token, and agent identity returned from KeyframeLab API (called from your server code)
+ *   serverUrl: websocket-url,
+ *   participantToken: webrtc-participant-token,
+ *   agentIdentity: agent_identity,
+ *   onVideoTrack: (track) => {
+ *     videoElement.srcObject = new MediaStream([track]);
+ *   },
+ * });
+ *
+ * await session.connect();
+ * session.sendAudio(audioBytes);
+ * await session.close();
+ * ```
+ */
+export class PersonaSession {
+  private options: Required<Pick<PersonaSessionOptions, 'serverUrl' | 'participantToken' | 'agentIdentity'>> &
+    PersonaSessionOptions;
+  private room: Room | null = null;
+  private _state: SessionState = 'disconnected';
+  private byteStreamWriter: ByteStreamWriter | null = null;
+  // Write queue for proper ordering and completion tracking
+  private writeQueue: Promise<void> = Promise.resolve();
+  // Auto-flush timer for turn end detection
+  private autoFlushTimer: ReturnType<typeof setTimeout> | null = null;
+  constructor(options: PersonaSessionOptions) {
+    this.options = { ...options };
+  }
+  /** Current session state */
+  get state(): SessionState {
+    return this._state;
+  }
+  private setState(state: SessionState): void {
+    if (this._state !== state) {
+      this._state = state;
+      this.options.onStateChange?.(state);
+    }
+  }
+  /**
+   * Connect to a Persona avatar session.
+   *
+   * Creates a session via Central API and connects to the LiveKit room.
+   */
+  async connect(): Promise<void> {
+    if (this._state !== 'disconnected') {
+      throw new Error('Session already connected or connecting');
+    }
+    this.setState('connecting');
+    try {
+      // 1. Connect to LiveKit room
+      this.room = new Room();
+      this.setupRoomListeners();
+      await this.room.connect(this.options.serverUrl, this.options.participantToken);
+      // Open ByteStream for audio
+      this.byteStreamWriter = await this.room.localParticipant.streamBytes({
+        topic: AUDIO_STREAM_TOPIC,
+        destinationIdentities: [this.options.agentIdentity],
+      });
+      console.log('[PersonaSession] Opened audio stream');
+      this.setState('connected');
+      console.log('[PersonaSession] Connected to room');
+    } catch (error) {
+      this.setState('error');
+      this.options.onError?.(error instanceof Error ? error : new Error(String(error)));
+      throw error;
+    }
+  }
+  private setupRoomListeners(): void {
+    if (!this.room) return;
+    // Handle tracks from persona
+    this.room.on(RoomEvent.TrackSubscribed, (track: RemoteTrack, _pub, participant: RemoteParticipant) => {
+      if (participant.identity !== this.options.agentIdentity) return;
+      const mediaTrack = track.mediaStreamTrack;
+      if (track.kind === Track.Kind.Video) {
+        console.log('[PersonaSession] Got video track from persona');
+        this.options.onVideoTrack?.(mediaTrack);
+      } else if (track.kind === Track.Kind.Audio) {
+        console.log('[PersonaSession] Got audio track from persona');
+        this.options.onAudioTrack?.(mediaTrack);
+      }
+    });
+    // Handle disconnection
+    this.room.on(RoomEvent.Disconnected, (reason?: DisconnectReason) => {
+      const closeReason = mapDisconnectReason(reason);
+      console.log('[PersonaSession] Disconnected from room:', closeReason);
+      this.setState('disconnected');
+      this.options.onClose?.(closeReason);
+    });
+    // Handle errors
+    this.room.on(RoomEvent.MediaDevicesError, (error: Error) => {
+      console.error('[PersonaSession] Media devices error:', error);
+      this.options.onError?.(error);
+    });
+  }
+  /**
+   * Send audio data to the persona for video synthesis.
+   *
+   * @param pcmData - 16-bit PCM audio at 24kHz
+   */
+  sendAudio(pcmData: Uint8Array): void {
+    if (!this.room || this._state !== 'connected') {
+      console.warn('[PersonaSession] sendAudio dropped - not connected');
+      return;
+    }
+    // Chain writes to ensure ordering and track completion
+    // Lazily create ByteStreamWriter if needed (e.g., after endAudioTurn closed it)
+    console.log(`[PersonaSession] Writing ${pcmData.length} bytes to ByteStream`);
+    this.writeQueue = this.writeQueue
+      .then(async () => {
+        if (!this.byteStreamWriter && this.room) {
+          this.byteStreamWriter = await this.room.localParticipant.streamBytes({
+            topic: AUDIO_STREAM_TOPIC,
+            destinationIdentities: [this.options.agentIdentity],
+          });
+        }
+        await this.byteStreamWriter?.write(pcmData);
+      })
+      .catch((err) => {
+        console.warn('[PersonaSession] Failed to write audio:', err);
+      });
+    // Schedule auto-flush after timeout (reset on each audio chunk)
+    this.scheduleAutoFlush();
+  }
+  /**
+   * Schedule auto-flush after a timeout.
+   * Called on each sendAudio() to reset the timer.
+   */
+  private scheduleAutoFlush(): void {
+    if (this.autoFlushTimer) {
+      clearTimeout(this.autoFlushTimer);
+    }
+    this.autoFlushTimer = setTimeout(() => {
+      if (this.byteStreamWriter) {
+        console.log('[PersonaSession] Auto-flush: no audio for', AUTO_FLUSH_TIMEOUT_MS, 'ms');
+        this.endAudioTurn();
+      }
+    }, AUTO_FLUSH_TIMEOUT_MS);
+  }
+  /**
+   * Flush all pending audio writes.
+   * Call before close to ensure all audio is sent.
+   */
+  async flush(): Promise<void> {
+    await this.writeQueue;
+  }
+  /**
+   * End the current audio turn and flush any buffered data.
+   *
+   * This is called automatically 300ms after the last sendAudio() call.
+   * You typically don't need to call this manually, but it's available
+   * if you need immediate flushing.
+   */
+  async endAudioTurn(): Promise<void> {
+    // Clear auto-flush timer to prevent double-flush
+    if (this.autoFlushTimer) {
+      clearTimeout(this.autoFlushTimer);
+      this.autoFlushTimer = null;
+    }
+    console.debug('[PersonaSession] endAudioTurn() called, awaiting writeQueue...');
+    await this.writeQueue;
+    console.debug('[PersonaSession] writeQueue complete, closing stream...');
+    if (this.byteStreamWriter) {
+      try {
+        await this.byteStreamWriter.close();
+        console.debug('[PersonaSession] stream closed successfully');
+      } catch (err) {
+        console.warn('[PersonaSession] stream close error:', err);
+      }
+      this.byteStreamWriter = null;
+    } else {
+      console.debug('[PersonaSession] no stream to close');
+    }
+  }
+  /**
+   * Signal an interruption to the avatar.
+   *
+   * Clears any pending frames in the GPU node's publish queue,
+   * allowing the avatar to quickly respond to new audio.
+   *
+   * Call this when the user interrupts the agent (e.g., starts speaking
+   * while the agent is still talking).
+   */
+  async interrupt(): Promise<void> {
+    // Clear auto-flush timer - we're interrupting, don't need to flush
+    if (this.autoFlushTimer) {
+      clearTimeout(this.autoFlushTimer);
+      this.autoFlushTimer = null;
+    }
+    if (!this.room || this._state !== 'connected') {
+      console.warn('[PersonaSession] interrupt() called but not connected');
+      return;
+    }
+    try {
+      console.debug('[PersonaSession] Sending interrupt');
+      const writer = await this.room.localParticipant.streamBytes({
+        topic: CONTROL_STREAM_TOPIC,
+        destinationIdentities: [this.options.agentIdentity],
+      });
+      await writer.write(new TextEncoder().encode('interrupt'));
+      await writer.close();
+    } catch (error) {
+      console.warn('[PersonaSession] Failed to send interrupt:', error);
+    }
+  }
+  /**
+   * Close the session and clean up resources.
+   */
+  async close(): Promise<void> {
+    // Clear auto-flush timer
+    if (this.autoFlushTimer) {
+      clearTimeout(this.autoFlushTimer);
+      this.autoFlushTimer = null;
+    }
+    // Wait for pending writes to complete before closing
+    await this.flush();
+    // Close byte stream
+    if (this.byteStreamWriter) {
+      try {
+        await this.byteStreamWriter.close();
+      } catch {
+        // Ignore close errors
+      }
+      this.byteStreamWriter = null;
+    }
+    // Disconnect from room
+    if (this.room) {
+      this.room.disconnect();
+      this.room = null;
+    }
+    this.setState('disconnected');
+  }
+}

package/src/audio-utils.ts ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * Audio utilities for PCM processing and silence generation.
+ */
+/** Sample rate for audio sent to Persona */
+export const SAMPLE_RATE = 24000;
+/** Bytes per sample (16-bit PCM) */
+export const BYTES_PER_SAMPLE = 2;
+/** Number of channels (mono) */
+export const CHANNELS = 1;
+/**
+ * Convert base64-encoded audio to Uint8Array.
+ *
+ * @param base64 - Base64 encoded audio data
+ * @returns Uint8Array of audio bytes
+ */
+export function base64ToBytes(base64: string): Uint8Array {
+  const binaryString = atob(base64);
+  const bytes = new Uint8Array(binaryString.length);
+  for (let i = 0; i < binaryString.length; i++) {
+    bytes[i] = binaryString.charCodeAt(i);
+  }
+  return bytes;
+}
+/**
+ * Convert Uint8Array to base64 string.
+ *
+ * @param bytes - Audio bytes
+ * @returns Base64 encoded string
+ */
+export function bytesToBase64(bytes: Uint8Array): string {
+  let binary = '';
+  for (let i = 0; i < bytes.length; i++) {
+    binary += String.fromCharCode(bytes[i]);
+  }
+  return btoa(binary);
+}
+/**
+ * Resample PCM audio from one sample rate to another.
+ * Simple linear interpolation - not high quality but sufficient for real-time.
+ *
+ * @param input - Input PCM bytes (16-bit signed)
+ * @param fromRate - Source sample rate
+ * @param toRate - Target sample rate
+ * @returns Resampled PCM bytes
+ */
+export function resamplePcm(
+  input: Uint8Array,
+  fromRate: number,
+  toRate: number
+): Uint8Array {
+  if (fromRate === toRate) {
+    return input;
+  }
+  const inputView = new Int16Array(input.buffer, input.byteOffset, input.length / 2);
+  const ratio = fromRate / toRate;
+  const outputLength = Math.floor(inputView.length / ratio);
+  const output = new Int16Array(outputLength);
+  for (let i = 0; i < outputLength; i++) {
+    const srcIndex = i * ratio;
+    const srcIndexFloor = Math.floor(srcIndex);
+    const srcIndexCeil = Math.min(srcIndexFloor + 1, inputView.length - 1);
+    const fraction = srcIndex - srcIndexFloor;
+    // Linear interpolation
+    output[i] = Math.round(
+      inputView[srcIndexFloor] * (1 - fraction) + inputView[srcIndexCeil] * fraction
+    );
+  }
+  return new Uint8Array(output.buffer);
+}
+/**
+ * Create a simple typed event emitter.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export function createEventEmitter<T extends Record<string, any>>() {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const listeners = new Map<keyof T, Set<(data: any) => void>>();
+  return {
+    on<K extends keyof T>(event: K, handler: (data: T[K]) => void): void {
+      if (!listeners.has(event)) {
+        listeners.set(event, new Set());
+      }
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      listeners.get(event)!.add(handler as (data: any) => void);
+    },
+    off<K extends keyof T>(event: K, handler: (data: T[K]) => void): void {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      listeners.get(event)?.delete(handler as (data: any) => void);
+    },
+    emit<K extends keyof T>(event: K, data: T[K]): void {
+      listeners.get(event)?.forEach((handler) => handler(data));
+    },
+    removeAllListeners(): void {
+      listeners.clear();
+    },
+  };
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * @keyframelabs/sdk - Browser SDK for KeyframeLab's Persona avatar sessions.
+ *
+ * @example
+ * ```typescript
+ * import { createClient } from '@keyframelabs/sdk';
+ *
+ * const client = createClient({
+ *   personaId: 'luna',
+ *   // WebRTC URL and access token returned from KeyframeLab API (called from your server code)
+ *   serverUrl: 'wss://webrtc-server-url.com',
+ *   participantToken: 'webrtc-participant-token',
+ *   onVideoTrack: (track) => {
+ *     videoElement.srcObject = new MediaStream([track]);
+ *   },
+ * });
+ *
+ * await client.connect();
+ * client.sendAudio(audioBytes);
+ * await client.close();
+ * ```
+ */
+import { PersonaSession } from './PersonaSession.js';
+import type { PersonaSessionOptions } from './types.js';
+/**
+ * Create a new Persona client.
+ *
+ * @param options - Session configuration options
+ * @returns A new PersonaSession instance
+ */
+const createClient = (options: PersonaSessionOptions): PersonaSession => {
+  return new PersonaSession(options);
+};
+// Core
+export { createClient, PersonaSession };
+// Types
+export type {
+  CloseReason,
+  PersonaSessionOptions,
+  SessionState,
+} from './types.js';
+// Utilities
+export {
+  SAMPLE_RATE,
+  base64ToBytes,
+  bytesToBase64,
+  createEventEmitter,
+  resamplePcm,
+} from './audio-utils.js';

package/src/types.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Shared types for Persona SDK.
+ */
+/** Session state machine */
+export type SessionState =
+  | 'disconnected'
+  | 'connecting'
+  | 'connected'
+  | 'error';
+/** Reason for session close */
+export type CloseReason =
+  | 'client_initiated'      // User called close()
+  | 'room_deleted'          // Backend deleted the room (max duration, etc.)
+  | 'server_shutdown'       // Server is shutting down
+  | 'participant_removed'   // Participant was kicked
+  | 'unknown';              // Unknown reason
+/** Options for creating a PersonaSession */
+export interface PersonaSessionOptions {
+  /** WebRTC URL (returned from KeyframeLab API) */
+  serverUrl: string;
+  /** WebRTC access token (returned from KeyframeLab API) */
+  participantToken: string;
+  /** Identity of the agent participant */
+  agentIdentity: string;
+  /** Called when video track is available */
+  onVideoTrack?: (track: MediaStreamTrack) => void;
+  /** Called when audio track is available */
+  onAudioTrack?: (track: MediaStreamTrack) => void;
+  /** Called when session state changes */
+  onStateChange?: (state: SessionState) => void;
+  /** Called on error */
+  onError?: (error: Error) => void;
+  /** Called when session is closed (by server or client) */
+  onClose?: (reason: CloseReason) => void;
+}