@elevenlabs/client 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ import type { SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage } from "@elevenlabs/types";
2
+ export type { SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, };
3
+ export type WebSocketMessage = SessionStartedMessage | PartialTranscriptMessage | FinalTranscriptMessage | FinalTranscriptWithTimestampsMessage | ScribeErrorMessage | ScribeAuthErrorMessage;
4
+ /**
5
+ * Events emitted by the RealtimeConnection.
6
+ */
7
+ export declare enum RealtimeEvents {
8
+ /** Emitted when the session is successfully started */
9
+ SESSION_STARTED = "session_started",
10
+ /** Emitted when a partial (interim) transcript is available */
11
+ PARTIAL_TRANSCRIPT = "partial_transcript",
12
+ /** Emitted when a final transcript is available */
13
+ FINAL_TRANSCRIPT = "final_transcript",
14
+ /** Emitted when a final transcript with timestamps is available */
15
+ FINAL_TRANSCRIPT_WITH_TIMESTAMPS = "final_transcript_with_timestamps",
16
+ /** Emitted when an authentication error occurs */
17
+ AUTH_ERROR = "auth_error",
18
+ /** Emitted when an error occurs */
19
+ ERROR = "error",
20
+ /** Emitted when the WebSocket connection is opened */
21
+ OPEN = "open",
22
+ /** Emitted when the WebSocket connection is closed */
23
+ CLOSE = "close"
24
+ }
25
+ /**
26
+ * Manages a real-time transcription WebSocket connection.
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * const connection = await Scribe.connect({
31
+ * token: "...",
32
+ * modelId: "scribe_realtime_v2",
33
+ * audioFormat: AudioFormat.PCM_16000,
34
+ * sampleRate: 16000,
35
+ * });
36
+ *
37
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
38
+ * console.log("Session started");
39
+ * });
40
+ *
41
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
42
+ * console.log("Partial:", data.transcript);
43
+ * });
44
+ *
45
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
46
+ * console.log("Final:", data.transcript);
47
+ * connection.close();
48
+ * });
49
+ *
50
+ * // Send audio data
51
+ * connection.send({ audioBase64: base64String });
52
+ *
53
+ * // Commit and close
54
+ * connection.commit();
55
+ * ```
56
+ */
57
+ export declare class RealtimeConnection {
58
+ private websocket;
59
+ private eventEmitter;
60
+ private currentSampleRate;
61
+ _audioCleanup?: () => void;
62
+ constructor(sampleRate: number);
63
+ /**
64
+ * @internal
65
+ * Used internally by ScribeRealtime to attach the WebSocket after connection is created.
66
+ */
67
+ setWebSocket(websocket: WebSocket): void;
68
+ /**
69
+ * Attaches an event listener for the specified event.
70
+ *
71
+ * @param event - The event to listen for (use RealtimeEvents enum)
72
+ * @param listener - The callback function to execute when the event fires
73
+ *
74
+ * @example
75
+ * ```typescript
76
+ * connection.on(RealtimeEvents.SESSION_STARTED, (data) => {
77
+ * console.log("Session started", data);
78
+ * });
79
+ *
80
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
81
+ * console.log("Partial:", data.transcript);
82
+ * });
83
+ *
84
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
85
+ * console.log("Final:", data.transcript);
86
+ * });
87
+ * ```
88
+ */
89
+ on(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
90
+ /**
91
+ * Removes an event listener for the specified event.
92
+ *
93
+ * @param event - The event to stop listening for
94
+ * @param listener - The callback function to remove
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * const handler = (data) => console.log(data);
99
+ * connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
100
+ *
101
+ * // Later, remove the listener
102
+ * connection.off(RealtimeEvents.PARTIAL_TRANSCRIPT, handler);
103
+ * ```
104
+ */
105
+ off(event: RealtimeEvents, listener: (...args: unknown[]) => void): void;
106
+ /**
107
+ * Sends audio data to the transcription service.
108
+ *
109
+ * @param data - Audio data configuration
110
+ * @param data.audioBase64 - Base64-encoded audio data
111
+ * @param data.commit - Whether to commit the transcription after this chunk. You likely want to use connection.commit() instead (default: false)
112
+ * @param data.sampleRate - Sample rate of the audio (default: configured sample rate)
113
+ *
114
+ * @throws {Error} If the WebSocket connection is not open
115
+ *
116
+ * @example
117
+ * ```typescript
118
+ * // Send audio chunk without committing
119
+ * connection.send({
120
+ * audioBase64: base64EncodedAudio,
121
+ * });
122
+ *
123
+ * // Send audio chunk with custom sample rate
124
+ * connection.send({
125
+ * audioBase64: base64EncodedAudio,
126
+ * sampleRate: 16000,
127
+ * });
128
+ * ```
129
+ */
130
+ send(data: {
131
+ audioBase64: string;
132
+ commit?: boolean;
133
+ sampleRate?: number;
134
+ }): void;
135
+ /**
136
+ * Commits the transcription, signaling that all audio has been sent.
137
+ * This finalizes the transcription and triggers a FINAL_TRANSCRIPT event.
138
+ *
139
+ * @throws {Error} If the WebSocket connection is not open
140
+ *
141
+ * @remarks
142
+ * Only needed when using CommitStrategy.MANUAL.
143
+ * When using CommitStrategy.VAD, commits are handled automatically by the server.
144
+ *
145
+ * @example
146
+ * ```typescript
147
+ * // Send all audio chunks
148
+ * for (const chunk of audioChunks) {
149
+ * connection.send({ audioBase64: chunk });
150
+ * }
151
+ *
152
+ * // Finalize the transcription
153
+ * connection.commit();
154
+ * ```
155
+ */
156
+ commit(): void;
157
+ /**
158
+ * Closes the WebSocket connection and cleans up resources.
159
+ * This will terminate any ongoing transcription and stop microphone streaming if active.
160
+ *
161
+ * @remarks
162
+ * After calling close(), this connection cannot be reused.
163
+ * Create a new connection if you need to start transcribing again.
164
+ *
165
+ * @example
166
+ * ```typescript
167
+ * connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
168
+ * console.log("Final:", data.transcript);
169
+ * connection.close();
170
+ * });
171
+ * ```
172
+ */
173
+ close(): void;
174
+ }
@@ -0,0 +1,6 @@
1
+ export { ScribeRealtime as Scribe } from "./scribe";
2
+ export { RealtimeConnection } from "./connection";
3
+ export { AudioFormat, CommitStrategy } from "./scribe";
4
+ export { RealtimeEvents } from "./connection";
5
+ export type { AudioOptions, MicrophoneOptions } from "./scribe";
6
+ export type { WebSocketMessage, SessionStartedMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, } from "./connection";
@@ -0,0 +1,118 @@
1
+ import { RealtimeConnection } from "./connection";
2
+ export declare enum AudioFormat {
3
+ PCM_8000 = "pcm_8000",
4
+ PCM_16000 = "pcm_16000",
5
+ PCM_22050 = "pcm_22050",
6
+ PCM_24000 = "pcm_24000",
7
+ PCM_44100 = "pcm_44100",
8
+ PCM_48000 = "pcm_48000",
9
+ ULAW_8000 = "ulaw_8000"
10
+ }
11
+ export declare enum CommitStrategy {
12
+ MANUAL = "manual",
13
+ VAD = "vad"
14
+ }
15
+ interface BaseOptions {
16
+ /**
17
+ * Token to use for the WebSocket connection. Obtained from the ElevenLabs API.
18
+ */
19
+ token: string;
20
+ /**
21
+ * Strategy for committing transcriptions.
22
+ * @default CommitStrategy.MANUAL
23
+ */
24
+ commitStrategy?: CommitStrategy;
25
+ /**
26
+ * Silence threshold in seconds for VAD (Voice Activity Detection).
27
+ * Must be a positive number between 0.3 and 3.0
28
+ */
29
+ vadSilenceThresholdSecs?: number;
30
+ /**
31
+ * Threshold for voice activity detection.
32
+ * Must be between 0.1 and 0.9.
33
+ */
34
+ vadThreshold?: number;
35
+ /**
36
+ * Minimum speech duration in milliseconds.
37
+ * Must be a positive integer between 50 and 2000.
38
+ */
39
+ minSpeechDurationMs?: number;
40
+ /**
41
+ * Minimum silence duration in milliseconds.
42
+ * Must be a positive integer between 50 and 2000.
43
+ */
44
+ minSilenceDurationMs?: number;
45
+ /**
46
+ * Model ID to use for transcription.
47
+ * Must be a valid model ID.
48
+ */
49
+ modelId: string;
50
+ /**
51
+ * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file.
52
+ * Can sometimes improve transcription performance if known beforehand.
53
+ */
54
+ languageCode?: string;
55
+ /**
56
+ * Base URI to use for the WebSocket connection.
57
+ * If not provided, the default URI will be used.
58
+ */
59
+ baseUri?: string;
60
+ }
61
+ export interface AudioOptions extends BaseOptions {
62
+ audioFormat: AudioFormat;
63
+ sampleRate: number;
64
+ microphone?: never;
65
+ }
66
+ /**
67
+ * Options for automatic microphone streaming in the browser.
68
+ */
69
+ export interface MicrophoneOptions extends BaseOptions {
70
+ microphone?: {
71
+ deviceId?: string;
72
+ echoCancellation?: boolean;
73
+ noiseSuppression?: boolean;
74
+ autoGainControl?: boolean;
75
+ channelCount?: number;
76
+ };
77
+ audioFormat?: never;
78
+ sampleRate?: never;
79
+ }
80
+ /**
81
+ * Real-time speech-to-text transcription client for browser environments.
82
+ * Supports microphone streaming and manual audio chunk transmission.
83
+ */
84
+ export declare class ScribeRealtime {
85
+ private static readonly DEFAULT_BASE_URI;
86
+ private static getWebSocketUri;
87
+ private static buildWebSocketUri;
88
+ /**
89
+ * Establishes a WebSocket connection for real-time speech-to-text transcription.
90
+ *
91
+ * @param options - Configuration options for the connection
92
+ * @returns A RealtimeConnection instance
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * // Manual audio streaming
97
+ * const connection = Scribe.connect({
98
+ * token: "...",
99
+ * modelId: "scribe_realtime_v2",
100
+ * audioFormat: AudioFormat.PCM_16000,
101
+ * sampleRate: 16000,
102
+ * });
103
+ *
104
+ * // Automatic microphone streaming
105
+ * const connection = Scribe.connect({
106
+ * token: "...",
107
+ * modelId: "scribe_realtime_v2",
108
+ * microphone: {
109
+ * echoCancellation: true,
110
+ * noiseSuppression: true
111
+ * }
112
+ * });
113
+ * ```
114
+ */
115
+ static connect(options: AudioOptions | MicrophoneOptions): RealtimeConnection;
116
+ private static streamFromMicrophone;
117
+ }
118
+ export {};
@@ -1,5 +1,5 @@
1
1
  import { Outgoing } from "@elevenlabs/types";
2
- import { AgentChatResponsePartClientEvent, AgentResponse, AgentResponseCorrection, AgentToolResponseClientEvent, AsrInitiationMetadataEvent as AsrMetadataEvent, Audio, ClientToolCallMessage, ConversationMetadata, Interruption, McpConnectionStatusClientEvent, McpToolCall, Ping, InternalTentativeAgentResponse as TentativeAgentResponseInternal, UserTranscript, VadScore } from "@elevenlabs/types/generated/types/asyncapi-types";
2
+ import { AgentChatResponsePartClientEvent, AgentResponse, AgentResponseCorrection, AgentToolResponseClientEvent, AsrInitiationMetadataEvent as AsrMetadataEvent, Audio, ClientToolCallMessage, ConversationMetadata, ErrorMessage, Interruption, McpConnectionStatusClientEvent, McpToolCall, Ping, InternalTentativeAgentResponse as TentativeAgentResponseInternal, UserTranscript, VadScore } from "@elevenlabs/types/generated/types/asyncapi-types";
3
3
  export type UserTranscriptionEvent = UserTranscript;
4
4
  export type AgentResponseEvent = AgentResponse;
5
5
  export type AgentAudioEvent = Audio;
@@ -16,7 +16,8 @@ export type ConversationMetadataEvent = ConversationMetadata;
16
16
  export type AsrInitiationMetadataEvent = AsrMetadataEvent;
17
17
  export type MCPConnectionStatusEvent = McpConnectionStatusClientEvent;
18
18
  export type AgentChatResponsePartEvent = AgentChatResponsePartClientEvent;
19
- export type IncomingSocketEvent = UserTranscriptionEvent | AgentResponseEvent | AgentResponseCorrectionEvent | AgentAudioEvent | InterruptionEvent | InternalTentativeAgentResponseEvent | ConfigEvent | PingEvent | ClientToolCallEvent | VadScoreEvent | MCPToolCallClientEvent | AgentToolResponseEvent | ConversationMetadataEvent | AsrInitiationMetadataEvent | MCPConnectionStatusEvent | AgentChatResponsePartEvent;
19
+ export type ErrorMessageEvent = ErrorMessage;
20
+ export type IncomingSocketEvent = UserTranscriptionEvent | AgentResponseEvent | AgentResponseCorrectionEvent | AgentAudioEvent | InterruptionEvent | InternalTentativeAgentResponseEvent | ConfigEvent | PingEvent | ClientToolCallEvent | VadScoreEvent | MCPToolCallClientEvent | AgentToolResponseEvent | ConversationMetadataEvent | AsrInitiationMetadataEvent | MCPConnectionStatusEvent | AgentChatResponsePartEvent | ErrorMessageEvent;
20
21
  export type PongEvent = Outgoing.PongClientToOrchestratorEvent;
21
22
  export type UserAudioEvent = Outgoing.UserAudio;
22
23
  export type UserFeedbackEvent = Outgoing.UserFeedbackClientToOrchestratorEvent;
@@ -0,0 +1 @@
1
+ export declare const loadScribeAudioProcessor: (worklet: AudioWorklet, path?: string) => Promise<void>;
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const PACKAGE_VERSION = "0.8.0";
1
+ export declare const PACKAGE_VERSION = "0.9.0";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elevenlabs/client",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "ElevenLabs JavaScript Client Library",
5
5
  "main": "./dist/lib.umd.js",
6
6
  "module": "./dist/lib.module.js",
@@ -39,12 +39,12 @@
39
39
  },
40
40
  "dependencies": {
41
41
  "livekit-client": "^2.11.4",
42
- "@elevenlabs/types": "0.0.2"
42
+ "@elevenlabs/types": "0.1.0"
43
43
  },
44
44
  "scripts": {
45
45
  "generate-version": "printf \"// This file is auto-generated during build\\nexport const PACKAGE_VERSION = \\\"%s\\\";\\n\" \"$npm_package_version\" > src/version.ts",
46
46
  "generate-worklets": "node scripts/generateWorklets.js",
47
- "prebuild": "pnpm run generate-version && pnpm run generate-worklets",
47
+ "prebuild": "npm run generate-version && npm run generate-worklets",
48
48
  "build": "BROWSERSLIST_ENV=modern microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts",
49
49
  "clean": "rm -rf ./dist",
50
50
  "dev": "pnpm run clean && pnpm run generate-version && pnpm run generate-worklets && BROWSERSLIST_ENV=development microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts -w -f modern",
@@ -20,10 +20,16 @@ const worklets = [
20
20
  processorName: 'rawAudioProcessor'
21
21
  },
22
22
  {
23
- jsFile: 'audioConcatProcessor.js',
23
+ jsFile: 'audioConcatProcessor.js',
24
24
  tsFile: 'audioConcatProcessor.generated.ts',
25
25
  exportName: 'loadAudioConcatProcessor',
26
26
  processorName: 'audioConcatProcessor'
27
+ },
28
+ {
29
+ jsFile: 'scribeAudioProcessor.js',
30
+ tsFile: 'scribeAudioProcessor.generated.ts',
31
+ exportName: 'loadScribeAudioProcessor',
32
+ processorName: 'scribeAudioProcessor'
27
33
  }
28
34
  ];
29
35
 
@@ -32,9 +38,9 @@ console.log('Generating TypeScript worklet files...');
32
38
  worklets.forEach(({ jsFile, tsFile, exportName, processorName }) => {
33
39
  const jsPath = path.join(workletDir, jsFile);
34
40
  const tsPath = path.join(outputDir, tsFile);
35
-
41
+
36
42
  const jsContent = fs.readFileSync(jsPath, 'utf8');
37
-
43
+
38
44
  const tsContent = `// AUTO-GENERATED BY packages/client/scripts/generateWorklets.js
39
45
  import { createWorkletModuleLoader } from "./createWorkletModuleLoader";
40
46
 
@@ -0,0 +1,52 @@
1
+ /*
2
+ * Scribe Audio Processor for converting microphone audio to PCM16 format
3
+ * USED BY @elevenlabs/client
4
+ */
5
+
6
+ class ScribeAudioProcessor extends AudioWorkletProcessor {
7
+ constructor() {
8
+ super();
9
+ this.buffer = [];
10
+ this.bufferSize = 4096; // Buffer size for optimal chunk transmission
11
+ }
12
+
13
+ process(inputs) {
14
+ const input = inputs[0];
15
+ if (input.length > 0) {
16
+ const channelData = input[0]; // Get first channel (mono)
17
+
18
+ // Add incoming audio to buffer
19
+ this.buffer.push(...channelData);
20
+
21
+ // When buffer reaches threshold, convert and send
22
+ if (this.buffer.length >= this.bufferSize) {
23
+ const float32Array = new Float32Array(this.buffer);
24
+ const int16Array = new Int16Array(float32Array.length);
25
+
26
+ // Convert Float32 [-1, 1] to Int16 [-32768, 32767]
27
+ for (let i = 0; i < float32Array.length; i++) {
28
+ // Clamp the value to prevent overflow
29
+ const sample = Math.max(-1, Math.min(1, float32Array[i]));
30
+ // Scale to PCM16 range
31
+ int16Array[i] = sample < 0 ? sample * 32768 : sample * 32767;
32
+ }
33
+
34
+ // Send to main thread as transferable ArrayBuffer
35
+ this.port.postMessage(
36
+ {
37
+ audioData: int16Array.buffer
38
+ },
39
+ [int16Array.buffer]
40
+ );
41
+
42
+ // Clear buffer
43
+ this.buffer = [];
44
+ }
45
+ }
46
+
47
+ return true; // Continue processing
48
+ }
49
+ }
50
+
51
+ registerProcessor("scribeAudioProcessor", ScribeAudioProcessor);
52
+