npm - @elevenlabs/client - Versions diffs - 0.8.0 → 0.9.0 - Mend

@elevenlabs/client 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.turbo/turbo-build.log +14 -13
package/.turbo/turbo-generate-version.log +1 -1
package/README.md +359 -0
package/dist/BaseConversation.d.ts +2 -1
package/dist/index.d.ts +2 -0
package/dist/lib.cjs +1 -1
package/dist/lib.cjs.map +1 -1
package/dist/lib.modern.js +1 -1
package/dist/lib.modern.js.map +1 -1
package/dist/lib.module.js +1 -1
package/dist/lib.module.js.map +1 -1
package/dist/lib.umd.js +1 -1
package/dist/lib.umd.js.map +1 -1
package/dist/scribe/connection.d.ts +174 -0
package/dist/scribe/index.d.ts +6 -0
package/dist/scribe/scribe.d.ts +118 -0
package/dist/utils/events.d.ts +3 -2
package/dist/utils/scribeAudioProcessor.generated.d.ts +1 -0
package/dist/version.d.ts +1 -1
package/package.json +3 -3
package/scripts/generateWorklets.js +9 -3
package/worklets/scribeAudioProcessor.js +52 -0

package/dist/scribe/connection.d.ts ADDED Viewed

@@ -0,0 +1,174 @@
+import type { SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage } from "@elevenlabs/types";
+export type { SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, };
+export type WebSocketMessage = SessionStartedMessage | PartialTranscriptMessage | FinalTranscriptMessage | FinalTranscriptWithTimestampsMessage | ScribeErrorMessage | ScribeAuthErrorMessage;
+/**
+ * Events emitted by the RealtimeConnection.
+ */
+export declare enum RealtimeEvents {
+    /** Emitted when the session is successfully started */
+    SESSION_STARTED = "session_started",
+    /** Emitted when a partial (interim) transcript is available */
+    PARTIAL_TRANSCRIPT = "partial_transcript",
+    /** Emitted when a final transcript is available */
+    FINAL_TRANSCRIPT = "final_transcript",
+    /** Emitted when a final transcript with timestamps is available */
+    FINAL_TRANSCRIPT_WITH_TIMESTAMPS = "final_transcript_with_timestamps",
+    /** Emitted when an authentication error occurs */
+    AUTH_ERROR = "auth_error",
+    /** Emitted when an error occurs */
+    ERROR = "error",
+    /** Emitted when the WebSocket connection is opened */
+    OPEN = "open",
+    /** Emitted when the WebSocket connection is closed */
+    CLOSE = "close"
+}
+/**
+ * Manages a real-time transcription WebSocket connection.
+ *
+ * @example
+ * ```typescript
+ * const connection = await Scribe.connect({
+ *     token: "...",
+ *     modelId: "scribe_realtime_v2",
+ *     audioFormat: AudioFormat.PCM_16000,
+ *     sampleRate: 16000,
+ * });
+ *
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
+ *     console.log("Session started");
+ * });
+ *
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
+ *     console.log("Partial:", data.transcript);
+ * });
+ *
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
+ *     console.log("Final:", data.transcript);
+ *     connection.close();
+ * });
+ *
+ * // Send audio data
+ * connection.send({ audioBase64: base64String });
+ *
+ * // Commit and close
+ * connection.commit();
+ * ```
+ */
+export declare class RealtimeConnection {
+    private websocket;
+    private eventEmitter;
+    private currentSampleRate;
+    _audioCleanup?: () => void;
+    constructor(sampleRate: number);
+    /**
+     * @internal
+     * Used internally by ScribeRealtime to attach the WebSocket after connection is created.
+     */
+    setWebSocket(websocket: WebSocket): void;
+    /**
+     * Attaches an event listener for the specified event.
+     *
+     * @param event - The event to listen for (use RealtimeEvents enum)
+     * @param listener - The callback function to execute when the event fires
+     *
+     * @example
+     * ```typescript
+     * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
+     *     console.log("Session started", data);
+     * });
+     *
+     * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
+     *     console.log("Partial:", data.transcript);
+     * });
+     *
+     * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
+     *     console.log("Final:", data.transcript);
+     * });
+     * ```
+     */
+    on(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
+    /**
+     * Removes an event listener for the specified event.
+     *
+     * @param event - The event to stop listening for
+     * @param listener - The callback function to remove
+     *
+     * @example
+     * ```typescript
+     * const handler = (data) => console.log(data);
+     * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
+     *
+     * // Later, remove the listener
+     * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
+     * ```
+     */
+    off(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
+    /**
+     * Sends audio data to the transcription service.
+     *
+     * @param data - Audio data configuration
+     * @param data.audioBase64 - Base64-encoded audio data
+     * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false)
+     * @param data.sampleRate - Sample rate of the audio (default: configured sample rate)
+     *
+     * @throws {Error} If the WebSocket connection is not open
+     *
+     * @example
+     * ```typescript
+     * // Send audio chunk without committing
+     * connection.send({
+     *     audioBase64: base64EncodedAudio,
+     * });
+     *
+     * // Send audio chunk with custom sample rate
+     * connection.send({
+     *     audioBase64: base64EncodedAudio,
+     *     sampleRate: 16000,
+     * });
+     * ```
+     */
+    send(data: {
+        audioBase64: string;
+        commit?: boolean;
+        sampleRate?: number;
+    }): void;
+    /**
+     * Commits the transcription, signaling that all audio has been sent.
+     * This finalizes the transcription and triggers a FINAL_TRANSCRIPT event.
+     *
+     * @throws {Error} If the WebSocket connection is not open
+     *
+     * @remarks
+     * Only needed when using CommitStrategy.MANUAL.
+     * When using CommitStrategy.VAD, commits are handled automatically by the server.
+     *
+     * @example
+     * ```typescript
+     * // Send all audio chunks
+     * for (const chunk of audioChunks) {
+     *     connection.send({ audioBase64: chunk });
+     * }
+     *
+     * // Finalize the transcription
+     * connection.commit();
+     * ```
+     */
+    commit(): void;
+    /**
+     * Closes the WebSocket connection and cleans up resources.
+     * This will terminate any ongoing transcription and stop microphone streaming if active.
+     *
+     * @remarks
+     * After calling close(), this connection cannot be reused.
+     * Create a new connection if you need to start transcribing again.
+     *
+     * @example
+     * ```typescript
+     * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
+     *     console.log("Final:", data.transcript);
+     *     connection.close();
+     * });
+     * ```
+     */
+    close(): void;
+}

package/dist/scribe/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export { ScribeRealtime as Scribe } from "./scribe";
+export { RealtimeConnection } from "./connection";
+export { AudioFormat, CommitStrategy } from "./scribe";
+export { RealtimeEvents } from "./connection";
+export type { AudioOptions, MicrophoneOptions } from "./scribe";
+export type { WebSocketMessage, SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, } from "./connection";

package/dist/scribe/scribe.d.ts ADDED Viewed

@@ -0,0 +1,118 @@
+import { RealtimeConnection } from "./connection";
+export declare enum AudioFormat {
+    PCM_8000 = "pcm_8000",
+    PCM_16000 = "pcm_16000",
+    PCM_22050 = "pcm_22050",
+    PCM_24000 = "pcm_24000",
+    PCM_44100 = "pcm_44100",
+    PCM_48000 = "pcm_48000",
+    ULAW_8000 = "ulaw_8000"
+}
+export declare enum CommitStrategy {
+    MANUAL = "manual",
+    VAD = "vad"
+}
+interface BaseOptions {
+    /**
+     * Token to use for the WebSocket connection. Obtained from the ElevenLabs API.
+     */
+    token: string;
+    /**
+     * Strategy for committing transcriptions.
+     * @default CommitStrategy.MANUAL
+     */
+    commitStrategy?: CommitStrategy;
+    /**
+     * Silence threshold in seconds for VAD (Voice Activity Detection).
+     * Must be a positive number between 0.3 and 3.0
+     */
+    vadSilenceThresholdSecs?: number;
+    /**
+     * Threshold for voice activity detection.
+     * Must be between 0.1 and 0.9.
+     */
+    vadThreshold?: number;
+    /**
+     * Minimum speech duration in milliseconds.
+     * Must be a positive integer between 50 and 2000.
+     */
+    minSpeechDurationMs?: number;
+    /**
+     * Minimum silence duration in milliseconds.
+     * Must be a positive integer between 50 and 2000.
+     */
+    minSilenceDurationMs?: number;
+    /**
+     * Model ID to use for transcription.
+     * Must be a valid model ID.
+     */
+    modelId: string;
+    /**
+     * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file.
+     * Can sometimes improve transcription performance if known beforehand.
+     */
+    languageCode?: string;
+    /**
+     * Base URI to use for the WebSocket connection.
+     * If not provided, the default URI will be used.
+     */
+    baseUri?: string;
+}
+export interface AudioOptions extends BaseOptions {
+    audioFormat: AudioFormat;
+    sampleRate: number;
+    microphone?: never;
+}
+/**
+ * Options for automatic microphone streaming in the browser.
+ */
+export interface MicrophoneOptions extends BaseOptions {
+    microphone?: {
+        deviceId?: string;
+        echoCancellation?: boolean;
+        noiseSuppression?: boolean;
+        autoGainControl?: boolean;
+        channelCount?: number;
+    };
+    audioFormat?: never;
+    sampleRate?: never;
+}
+/**
+ * Real-time speech-to-text transcription client for browser environments.
+ * Supports microphone streaming and manual audio chunk transmission.
+ */
+export declare class ScribeRealtime {
+    private static readonly DEFAULT_BASE_URI;
+    private static getWebSocketUri;
+    private static buildWebSocketUri;
+    /**
+     * Establishes a WebSocket connection for real-time speech-to-text transcription.
+     *
+     * @param options - Configuration options for the connection
+     * @returns A RealtimeConnection instance
+     *
+     * @example
+     * ```typescript
+     * // Manual audio streaming
+     * const connection = Scribe.connect({
+     *     token: "...",
+     *     modelId: "scribe_realtime_v2",
+     *     audioFormat: AudioFormat.PCM_16000,
+     *     sampleRate: 16000,
+     * });
+     *
+     * // Automatic microphone streaming
+     * const connection = Scribe.connect({
+     *     token: "...",
+     *     modelId: "scribe_realtime_v2",
+     *     microphone: {
+     *         echoCancellation: true,
+     *         noiseSuppression: true
+     *     }
+     * });
+     * ```
+     */
+    static connect(options: AudioOptions | MicrophoneOptions): RealtimeConnection;
+    private static streamFromMicrophone;
+}
+export {};

package/dist/utils/events.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Outgoing } from "@elevenlabs/types";
-import { AgentChatResponsePartClientEvent, AgentResponse, AgentResponseCorrection, AgentToolResponseClientEvent, AsrInitiationMetadataEvent as AsrMetadataEvent, Audio, ClientToolCallMessage, ConversationMetadata, Interruption, McpConnectionStatusClientEvent, McpToolCall, Ping, InternalTentativeAgentResponse as TentativeAgentResponseInternal, UserTranscript, VadScore } from "@elevenlabs/types/generated/types/asyncapi-types";
+import { AgentChatResponsePartClientEvent, AgentResponse, AgentResponseCorrection, AgentToolResponseClientEvent, AsrInitiationMetadataEvent as AsrMetadataEvent, Audio, ClientToolCallMessage, ConversationMetadata, ErrorMessage, Interruption, McpConnectionStatusClientEvent, McpToolCall, Ping, InternalTentativeAgentResponse as TentativeAgentResponseInternal, UserTranscript, VadScore } from "@elevenlabs/types/generated/types/asyncapi-types";
 export type UserTranscriptionEvent = UserTranscript;
 export type AgentResponseEvent = AgentResponse;
 export type AgentAudioEvent = Audio;
@@ -16,7 +16,8 @@ export type ConversationMetadataEvent = ConversationMetadata;
 export type AsrInitiationMetadataEvent = AsrMetadataEvent;
 export type MCPConnectionStatusEvent = McpConnectionStatusClientEvent;
 export type AgentChatResponsePartEvent = AgentChatResponsePartClientEvent;
-export type IncomingSocketEvent = UserTranscriptionEvent | AgentResponseEvent | AgentResponseCorrectionEvent | AgentAudioEvent | InterruptionEvent | InternalTentativeAgentResponseEvent | ConfigEvent | PingEvent | ClientToolCallEvent | VadScoreEvent | MCPToolCallClientEvent | AgentToolResponseEvent | ConversationMetadataEvent | AsrInitiationMetadataEvent | MCPConnectionStatusEvent | AgentChatResponsePartEvent;
+export type ErrorMessageEvent = ErrorMessage;
+export type IncomingSocketEvent = UserTranscriptionEvent | AgentResponseEvent | AgentResponseCorrectionEvent | AgentAudioEvent | InterruptionEvent | InternalTentativeAgentResponseEvent | ConfigEvent | PingEvent | ClientToolCallEvent | VadScoreEvent | MCPToolCallClientEvent | AgentToolResponseEvent | ConversationMetadataEvent | AsrInitiationMetadataEvent | MCPConnectionStatusEvent | AgentChatResponsePartEvent | ErrorMessageEvent;
 export type PongEvent = Outgoing.PongClientToOrchestratorEvent;
 export type UserAudioEvent = Outgoing.UserAudio;
 export type UserFeedbackEvent = Outgoing.UserFeedbackClientToOrchestratorEvent;

package/dist/utils/scribeAudioProcessor.generated.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const loadScribeAudioProcessor: (worklet: AudioWorklet, path?: string) => Promise<void>;

package/dist/version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const PACKAGE_VERSION = "0.8.0";
1	+ export declare const PACKAGE_VERSION = "0.9.0";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elevenlabs/client",
-  "version": "0.8.0",
+  "version": "0.9.0",
   "description": "ElevenLabs JavaScript Client Library",
   "main": "./dist/lib.umd.js",
   "module": "./dist/lib.module.js",
@@ -39,12 +39,12 @@
   },
   "dependencies": {
     "livekit-client": "^2.11.4",
-    "@elevenlabs/types": "0.0.2"
+    "@elevenlabs/types": "0.1.0"
   },
   "scripts": {
     "generate-version": "printf \"// This file is auto-generated during build\\nexport const PACKAGE_VERSION = \\\"%s\\\";\\n\" \"$npm_package_version\" > src/version.ts",
     "generate-worklets": "node scripts/generateWorklets.js",
-    "prebuild": "pnpm run generate-version && pnpm run generate-worklets",
+    "prebuild": "npm run generate-version && npm run generate-worklets",
     "build": "BROWSERSLIST_ENV=modern microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts",
     "clean": "rm -rf ./dist",
     "dev": "pnpm run clean && pnpm run generate-version && pnpm run generate-worklets && BROWSERSLIST_ENV=development microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts -w -f modern",

package/scripts/generateWorklets.js CHANGED Viewed

@@ -20,10 +20,16 @@ const worklets = [
     processorName: 'rawAudioProcessor'
   },
   {
-    jsFile: 'audioConcatProcessor.js',
+    jsFile: 'audioConcatProcessor.js',
     tsFile: 'audioConcatProcessor.generated.ts',
     exportName: 'loadAudioConcatProcessor',
     processorName: 'audioConcatProcessor'
+  },
+  {
+    jsFile: 'scribeAudioProcessor.js',
+    tsFile: 'scribeAudioProcessor.generated.ts',
+    exportName: 'loadScribeAudioProcessor',
+    processorName: 'scribeAudioProcessor'
   }
 ];
@@ -32,9 +38,9 @@ console.log('Generating TypeScript worklet files...');
 worklets.forEach(({ jsFile, tsFile, exportName, processorName }) => {
   const jsPath = path.join(workletDir, jsFile);
   const tsPath = path.join(outputDir, tsFile);
   const jsContent = fs.readFileSync(jsPath, 'utf8');
   const tsContent = `// AUTO-GENERATED BY packages/client/scripts/generateWorklets.js
 import { createWorkletModuleLoader } from "./createWorkletModuleLoader";

package/worklets/scribeAudioProcessor.js ADDED Viewed

@@ -0,0 +1,52 @@
+/*
+ * Scribe Audio Processor for converting microphone audio to PCM16 format
+ * USED BY @elevenlabs/client
+ */
+class ScribeAudioProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.buffer = [];
+    this.bufferSize = 4096; // Buffer size for optimal chunk transmission
+  }
+  process(inputs) {
+    const input = inputs[0];
+    if (input.length > 0) {
+      const channelData = input[0]; // Get first channel (mono)
+      // Add incoming audio to buffer
+      this.buffer.push(...channelData);
+      // When buffer reaches threshold, convert and send
+      if (this.buffer.length >= this.bufferSize) {
+        const float32Array = new Float32Array(this.buffer);
+        const int16Array = new Int16Array(float32Array.length);
+        // Convert Float32 [-1, 1] to Int16 [-32768, 32767]
+        for (let i = 0; i < float32Array.length; i++) {
+          // Clamp the value to prevent overflow
+          const sample = Math.max(-1, Math.min(1, float32Array[i]));
+          // Scale to PCM16 range
+          int16Array[i] = sample < 0 ? sample * 32768 : sample * 32767;
+        }
+        // Send to main thread as transferable ArrayBuffer
+        this.port.postMessage(
+          {
+            audioData: int16Array.buffer
+          },
+          [int16Array.buffer]
+        );
+        // Clear buffer
+        this.buffer = [];
+      }
+    }
+    return true; // Continue processing
+  }
+}
+registerProcessor("scribeAudioProcessor", ScribeAudioProcessor);