@elevenlabs/react 0.14.3 → 1.0.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -1085
- package/dist/conversation/ConversationClientTools.d.ts +39 -0
- package/dist/conversation/ConversationClientTools.d.ts.map +1 -0
- package/dist/conversation/ConversationClientTools.js +87 -0
- package/dist/conversation/ConversationClientTools.js.map +1 -0
- package/dist/conversation/ConversationContext.d.ts +47 -0
- package/dist/conversation/ConversationContext.d.ts.map +1 -0
- package/dist/conversation/ConversationContext.js +61 -0
- package/dist/conversation/ConversationContext.js.map +1 -0
- package/dist/conversation/ConversationControls.d.ts +34 -0
- package/dist/conversation/ConversationControls.d.ts.map +1 -0
- package/dist/conversation/ConversationControls.js +113 -0
- package/dist/conversation/ConversationControls.js.map +1 -0
- package/dist/conversation/ConversationFeedback.d.ts +19 -0
- package/dist/conversation/ConversationFeedback.d.ts.map +1 -0
- package/dist/conversation/ConversationFeedback.js +44 -0
- package/dist/conversation/ConversationFeedback.js.map +1 -0
- package/dist/conversation/ConversationInput.d.ts +18 -0
- package/dist/conversation/ConversationInput.d.ts.map +1 -0
- package/dist/conversation/ConversationInput.js +40 -0
- package/dist/conversation/ConversationInput.js.map +1 -0
- package/dist/conversation/ConversationMode.d.ts +19 -0
- package/dist/conversation/ConversationMode.d.ts.map +1 -0
- package/dist/conversation/ConversationMode.js +40 -0
- package/dist/conversation/ConversationMode.js.map +1 -0
- package/dist/conversation/ConversationProvider.d.ts +4 -0
- package/dist/conversation/ConversationProvider.d.ts.map +1 -0
- package/dist/conversation/ConversationProvider.js +127 -0
- package/dist/conversation/ConversationProvider.js.map +1 -0
- package/dist/conversation/ConversationStatus.d.ts +19 -0
- package/dist/conversation/ConversationStatus.d.ts.map +1 -0
- package/dist/conversation/ConversationStatus.js +44 -0
- package/dist/conversation/ConversationStatus.js.map +1 -0
- package/dist/conversation/ListenerMap.d.ts +29 -0
- package/dist/conversation/ListenerMap.d.ts.map +1 -0
- package/dist/conversation/ListenerMap.js +63 -0
- package/dist/conversation/ListenerMap.js.map +1 -0
- package/dist/conversation/ListenerSet.d.ts +7 -0
- package/dist/conversation/ListenerSet.d.ts.map +1 -0
- package/dist/conversation/ListenerSet.js +17 -0
- package/dist/conversation/ListenerSet.js.map +1 -0
- package/dist/conversation/types.d.ts +9 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +2 -0
- package/dist/conversation/types.js.map +1 -0
- package/dist/conversation/useConversation.d.ts +45 -0
- package/dist/conversation/useConversation.d.ts.map +1 -0
- package/dist/conversation/useConversation.js +76 -0
- package/dist/conversation/useConversation.js.map +1 -0
- package/dist/conversation/useStableCallbacks.d.ts +13 -0
- package/dist/conversation/useStableCallbacks.d.ts.map +1 -0
- package/dist/conversation/useStableCallbacks.js +33 -0
- package/dist/conversation/useStableCallbacks.js.map +1 -0
- package/dist/index.d.ts +19 -51
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -0
- package/dist/lib.iife.js +984 -0
- package/dist/lib.iife.js.map +1 -0
- package/dist/scribe.d.ts +1 -0
- package/dist/scribe.d.ts.map +1 -0
- package/dist/scribe.js +307 -0
- package/dist/scribe.js.map +1 -0
- package/dist/version.d.ts +2 -1
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +3 -0
- package/dist/version.js.map +1 -0
- package/package.json +25 -20
- package/.turbo/turbo-build.log +0 -29
- package/.turbo/turbo-check-types.log +0 -4
- package/.turbo/turbo-generate-version.log +0 -4
- package/.turbo/turbo-lint$colon$es.log +0 -6
- package/.turbo/turbo-lint$colon$prettier.log +0 -6
- package/CHANGELOG.md +0 -53
- package/dist/lib.cjs +0 -2
- package/dist/lib.cjs.map +0 -1
- package/dist/lib.modern.js +0 -2
- package/dist/lib.modern.js.map +0 -1
- package/dist/lib.module.js +0 -2
- package/dist/lib.module.js.map +0 -1
- package/dist/lib.umd.js +0 -2
- package/dist/lib.umd.js.map +0 -1
- package/jest.config.cjs +0 -23
package/README.md
CHANGED
|
@@ -4,1124 +4,72 @@
|
|
|
4
4
|
|
|
5
5
|
Build multimodal agents with the [ElevenAgents platform](https://elevenlabs.io/docs/eleven-agents/overview).
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
A React library for building voice and text conversations with ElevenAgents. For React Native, use [`@elevenlabs/react-native`](https://www.npmjs.com/package/@elevenlabs/react-native).
|
|
8
8
|
|
|
9
9
|

|
|
10
10
|
[](https://discord.gg/elevenlabs)
|
|
11
|
-
[](https://twitter.com/ElevenLabs)
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
15
|
-
Install the package in your project through package manager.
|
|
16
|
-
|
|
17
15
|
```shell
|
|
18
16
|
npm install @elevenlabs/react
|
|
19
|
-
# or
|
|
20
|
-
yarn add @elevenlabs/react
|
|
21
|
-
# or
|
|
22
|
-
pnpm install @elevenlabs/react
|
|
23
17
|
```
|
|
24
18
|
|
|
25
|
-
##
|
|
26
|
-
|
|
27
|
-
### useConversation
|
|
28
|
-
|
|
29
|
-
React hook for managing WebSocket and WebRTC connections and audio usage for ElevenAgents.
|
|
30
|
-
|
|
31
|
-
#### Initialize conversation
|
|
32
|
-
|
|
33
|
-
First, initialize the Conversation instance.
|
|
19
|
+
## Quick Start
|
|
34
20
|
|
|
35
21
|
```tsx
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
```js
|
|
43
|
-
// call after explaning to the user why the microphone access is needed
|
|
44
|
-
// handle errors and show appropriate message to the user
|
|
45
|
-
try {
|
|
46
|
-
await navigator.mediaDevices.getUserMedia();
|
|
47
|
-
} catch {
|
|
48
|
-
// handle error
|
|
49
|
-
}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
#### Options
|
|
53
|
-
|
|
54
|
-
The conversation can be initialized with certain options. Those are all optional.
|
|
55
|
-
|
|
56
|
-
```tsx
|
|
57
|
-
const conversation = useConversation({
|
|
58
|
-
/* options object */
|
|
59
|
-
});
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
| Property | Description |
|
|
63
|
-
|----------|-------------|
|
|
64
|
-
| **clientTools** | Object definition for client tools that can be invoked by the agent. [See below](#client-tools) for details. |
|
|
65
|
-
| **overrides** | Object definition for conversation settings overrides. [See below](#conversation-overrides) for details. |
|
|
66
|
-
| **textOnly** | Whether the conversation should run in text-only mode. [See below](#text-only) for details. |
|
|
67
|
-
| **onConnect** | Handler called when the conversation connection is established. |
|
|
68
|
-
| **onDisconnect** | Handler called when the conversation connection has ended. |
|
|
69
|
-
| **onMessage** | Handler called when a new message is received. These can be tentative or final transcriptions of user voice, replies produced by LLM, or debug messages when a debug option is enabled. |
|
|
70
|
-
| **onError** | Handler called when an error is encountered. |
|
|
71
|
-
| **onStatusChange** | Handler called whenever connection status changes. Can be `connected`, `connecting`, or `disconnected` (initial). |
|
|
72
|
-
| **onModeChange** | Handler called when a status changes, e.g., agent switches from `speaking` to `listening`, or vice versa. |
|
|
73
|
-
| **onCanSendFeedbackChange** | Handler called when sending feedback becomes available or unavailable. |
|
|
74
|
-
| **onUnhandledClientToolCall** | Handler called when a client tool is invoked but no corresponding client tool was defined. |
|
|
75
|
-
| **onDebug** | Handler called for debugging events, including tentative agent responses and internal events. Useful for development and troubleshooting. |
|
|
76
|
-
| **onAudio** | Handler called when audio data is received from the agent. Provides access to raw audio events for custom processing. |
|
|
77
|
-
| **onInterruption** | Handler called when the conversation is interrupted, typically when the user starts speaking while the agent is talking. |
|
|
78
|
-
| **onVadScore** | Handler called with voice activity detection scores, indicating the likelihood of speech in the audio input. |
|
|
79
|
-
| **onMCPToolCall** | Handler called when an MCP (Model Context Protocol) tool is invoked by the agent. |
|
|
80
|
-
| **onMCPConnectionStatus** | Handler called when the MCP connection status changes, useful for monitoring MCP server connectivity. |
|
|
81
|
-
| **onAgentToolRequest** | Handler called when the agent begins tool execution. |
|
|
82
|
-
| **onAgentToolResponse** | Handler called when the agent receives a response from a tool execution. |
|
|
83
|
-
| **onConversationMetadata** | Handler called with conversation initiation metadata, providing information about the conversation setup. |
|
|
84
|
-
| **onAsrInitiationMetadata** | Handler called with ASR (Automatic Speech Recognition) initiation metadata, containing configuration details for speech recognition. |
|
|
85
|
-
| **onAudioAlignment** | Handler called with character-level timing data for synthesized audio. Provides arrays of characters, start times, and durations for text-to-speech synchronization. |
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
##### Client Tools
|
|
89
|
-
|
|
90
|
-
Client tools are a way to enabled agent to invoke client-side functionality. This can be used to trigger actions in the client, such as opening a modal or doing an API call on behalf of the user.
|
|
91
|
-
|
|
92
|
-
Client tools definition is an object of functions, and needs to be identical with your configuration within the [ElevenLabs UI](https://elevenlabs.io/app/conversational-ai), where you can name and describe different tools, as well as set up the parameters passed by the agent.
|
|
93
|
-
|
|
94
|
-
```ts
|
|
95
|
-
const conversation = useConversation({
|
|
96
|
-
clientTools: {
|
|
97
|
-
displayMessage: (parameters: { text: string }) => {
|
|
98
|
-
alert(text);
|
|
99
|
-
|
|
100
|
-
return "Message displayed";
|
|
101
|
-
},
|
|
102
|
-
},
|
|
103
|
-
});
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
In case function returns a value, it will be passed back to the agent as a response.
|
|
107
|
-
Note that the tool needs to be explicitly set to be blocking conversation in ElevenLabs UI for the agent to await and react to the response, otherwise agent assumes success and continues the conversation.
|
|
108
|
-
|
|
109
|
-
#### Conversation overrides
|
|
110
|
-
|
|
111
|
-
You may choose to override various settings of the conversation and set them dynamically based other user interactions.
|
|
112
|
-
We support overriding various settings.
|
|
113
|
-
These settings are optional and can be used to customize the conversation experience.
|
|
114
|
-
The following settings are available:
|
|
115
|
-
|
|
116
|
-
```ts
|
|
117
|
-
const conversation = useConversation({
|
|
118
|
-
overrides: {
|
|
119
|
-
agent: {
|
|
120
|
-
prompt: {
|
|
121
|
-
prompt: "My custom prompt",
|
|
122
|
-
llm: "gemini-2.5-flash",
|
|
123
|
-
},
|
|
124
|
-
firstMessage: "My custom first message",
|
|
125
|
-
language: "en",
|
|
126
|
-
},
|
|
127
|
-
tts: {
|
|
128
|
-
voiceId: "custom voice id",
|
|
129
|
-
speed: 1.0,
|
|
130
|
-
stability: 0.5,
|
|
131
|
-
similarityBoost: 0.8,
|
|
132
|
-
},
|
|
133
|
-
conversation: {
|
|
134
|
-
textOnly: true,
|
|
135
|
-
},
|
|
136
|
-
},
|
|
137
|
-
});
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
#### User identification
|
|
141
|
-
|
|
142
|
-
You can optionally pass a user ID to identify the user in the conversation. This can be your own customer identifier. This will be included in the conversation initiation data sent to the server:
|
|
143
|
-
|
|
144
|
-
Tracking this ID can be helpful for filtering conversations, tracking analytics on a user level, etc.
|
|
145
|
-
|
|
146
|
-
```ts
|
|
147
|
-
// Or pass it when starting the session
|
|
148
|
-
const conversationId = await conversation.startSession({
|
|
149
|
-
agentId: "<your-agent-id>",
|
|
150
|
-
userId: "user-123",
|
|
151
|
-
connectionType: "webrtc",
|
|
152
|
-
});
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
#### Text only
|
|
156
|
-
|
|
157
|
-
If your agent is configured to run in text-only mode, i.e. it does not send or receive audio messages,
|
|
158
|
-
you can use this flag to use a lighter version of the conversation. In that case, the
|
|
159
|
-
user will not be asked for microphone permissions and no audio context will be created.
|
|
160
|
-
|
|
161
|
-
```ts
|
|
162
|
-
const conversation = useConversation({
|
|
163
|
-
textOnly: true,
|
|
164
|
-
});
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
#### Prefer Headphones for iOS Devices
|
|
168
|
-
|
|
169
|
-
While this SDK leaves the choice of audio input/output device to the browser/system, iOS Safari seem to prefer the built-in speaker over headphones even when bluetooth device is in use. If you want to "force" the use of headphones on iOS devices when available, you can use the following option. Please, keep in mind that this is not guaranteed, since this functionality is not provided by the browser. System audio should be the default choice.
|
|
170
|
-
|
|
171
|
-
```ts
|
|
172
|
-
const conversation = useConversation({
|
|
173
|
-
preferHeadphonesForIosDevices: true,
|
|
174
|
-
});
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
#### Connection delay
|
|
178
|
-
|
|
179
|
-
You can configure additional delay between when the microphone is activated and when the connection is established.
|
|
180
|
-
On Android, the delay is set to 3 seconds by default to make sure the device has time to switch to the correct audio mode.
|
|
181
|
-
Without it, you may experience issues with the beginning of the first message being cut off.
|
|
182
|
-
|
|
183
|
-
```ts
|
|
184
|
-
const conversation = useConversation({
|
|
185
|
-
connectionDelay: {
|
|
186
|
-
android: 3_000,
|
|
187
|
-
ios: 0,
|
|
188
|
-
default: 0,
|
|
189
|
-
},
|
|
190
|
-
});
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
#### Acquiring a Wake Lock
|
|
194
|
-
|
|
195
|
-
By default, the conversation will attempt to acquire a [wake lock](https://developer.mozilla.org/en-US/docs/Web/API/Screen_Wake_Lock_API) to prevent the device from going to sleep during the conversation.
|
|
196
|
-
This can be disabled by setting the `useWakeLock` option to `false`:
|
|
197
|
-
|
|
198
|
-
```ts
|
|
199
|
-
const conversation = useConversation({
|
|
200
|
-
useWakeLock: false,
|
|
201
|
-
});
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
#### Data Residency
|
|
205
|
-
|
|
206
|
-
The React SDK supports data residency for compliance with regional regulations. You can specify the server location when initializing the conversation:
|
|
207
|
-
|
|
208
|
-
```ts
|
|
209
|
-
const conversation = useConversation({
|
|
210
|
-
serverLocation: "eu-residency", // "us", "global", "eu-residency", or "in-residency"
|
|
211
|
-
});
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
Available locations:
|
|
215
|
-
|
|
216
|
-
- `"us"` (default) - United States servers
|
|
217
|
-
- `"global"` - Global servers (same as US)
|
|
218
|
-
- `"eu-residency"` - European Union residency servers
|
|
219
|
-
- `"in-residency"` - India residency servers
|
|
220
|
-
|
|
221
|
-
The SDK automatically routes both WebSocket and WebRTC connections to the appropriate regional servers based on your selection. This ensures that all conversation data, including audio streams, remain within the specified geographic region.
|
|
222
|
-
|
|
223
|
-
#### Methods
|
|
224
|
-
|
|
225
|
-
##### startConversation
|
|
226
|
-
|
|
227
|
-
`startConversation` method kicks off the WebSocket or WebRTC connection and starts using the microphone to communicate with the ElevenLabs agent. The method accepts an options object, with the `signedUrl`, `conversationToken` or `agentId` option being required.
|
|
228
|
-
|
|
229
|
-
Agent ID can be acquired through [ElevenLabs UI](https://elevenlabs.io/app/conversational-ai) and is always necessary.
|
|
230
|
-
|
|
231
|
-
```js
|
|
232
|
-
const conversation = useConversation();
|
|
233
|
-
|
|
234
|
-
// For public agents, pass in the agent ID and the connection type
|
|
235
|
-
const conversationId = await conversation.startSession({
|
|
236
|
-
agentId: "<your-agent-id>",
|
|
237
|
-
connectionType: "webrtc", // either 'webrtc' or 'websocket'
|
|
238
|
-
});
|
|
239
|
-
```
|
|
240
|
-
|
|
241
|
-
For public agents (i.e. agents that don't have authentication enabled), define `agentId` - no signed link generation necessary.
|
|
242
|
-
|
|
243
|
-
In case the conversation requires authorization, use the REST API to generate signed links for a WebSocket connection or a conversation token for a WebRTC connection.
|
|
244
|
-
|
|
245
|
-
`startSession` returns promise resolving to `conversationId`. The value is a globally unique conversation ID you can use to identify separate conversations.
|
|
246
|
-
|
|
247
|
-
For WebSocket connections:
|
|
248
|
-
|
|
249
|
-
```js
|
|
250
|
-
// Node.js server
|
|
251
|
-
|
|
252
|
-
app.get("/signed-url", yourAuthMiddleware, async (req, res) => {
|
|
253
|
-
const response = await fetch(
|
|
254
|
-
`https://api.elevenlabs.io/v1/convai/conversation/get-signed-url?agent_id=${process.env.AGENT_ID}`,
|
|
255
|
-
{
|
|
256
|
-
headers: {
|
|
257
|
-
// Requesting a signed url requires your ElevenLabs API key
|
|
258
|
-
// Do NOT expose your API key to the client!
|
|
259
|
-
"xi-api-key": process.env.ELEVENLABS_API_KEY,
|
|
260
|
-
},
|
|
261
|
-
}
|
|
262
|
-
);
|
|
263
|
-
|
|
264
|
-
if (!response.ok) {
|
|
265
|
-
return res.status(500).send("Failed to get signed URL");
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
const body = await response.json();
|
|
269
|
-
res.send(body.signed_url);
|
|
270
|
-
});
|
|
271
|
-
```
|
|
272
|
-
|
|
273
|
-
```js
|
|
274
|
-
// Client
|
|
275
|
-
|
|
276
|
-
const response = await fetch("/signed-url", yourAuthHeaders);
|
|
277
|
-
const signedUrl = await response.text();
|
|
278
|
-
|
|
279
|
-
const { conversation } = useConversation();
|
|
280
|
-
|
|
281
|
-
const conversationId = await conversation.startSession({
|
|
282
|
-
signedUrl,
|
|
283
|
-
connectionType: "websocket",
|
|
284
|
-
});
|
|
285
|
-
```
|
|
286
|
-
|
|
287
|
-
For WebRTC connections:
|
|
288
|
-
|
|
289
|
-
```js
|
|
290
|
-
// Node.js server
|
|
291
|
-
|
|
292
|
-
app.get("/conversation-token", yourAuthMiddleware, async (req, res) => {
|
|
293
|
-
const response = await fetch(
|
|
294
|
-
`https://api.elevenlabs.io/v1/convai/conversation/token?agent_id=${process.env.AGENT_ID}`,
|
|
295
|
-
{
|
|
296
|
-
headers: {
|
|
297
|
-
// Requesting a conversation token requires your ElevenLabs API key
|
|
298
|
-
// Do NOT expose your API key to the client!
|
|
299
|
-
'xi-api-key': process.env.ELEVENLABS_API_KEY,
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
);
|
|
303
|
-
|
|
304
|
-
if (!response.ok) {
|
|
305
|
-
return res.status(500).send("Failed to get conversation token");
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
const body = await response.json();
|
|
309
|
-
res.send(body.token);
|
|
310
|
-
);
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
```js
|
|
314
|
-
// Client
|
|
315
|
-
|
|
316
|
-
const response = await fetch("/conversation-token", yourAuthHeaders);
|
|
317
|
-
const conversationToken = await response.text();
|
|
318
|
-
|
|
319
|
-
const { conversation } = useConversation();
|
|
320
|
-
|
|
321
|
-
const conversationId = await conversation.startSession({
|
|
322
|
-
conversationToken,
|
|
323
|
-
connectionType: "webrtc",
|
|
324
|
-
});
|
|
325
|
-
```
|
|
326
|
-
|
|
327
|
-
You can provide a device ID to start the conversation using the input/output device of your choice. If the device ID is invalid, the default input and output devices will be used.
|
|
328
|
-
|
|
329
|
-
```js
|
|
330
|
-
const { conversation } = useConversation();
|
|
331
|
-
|
|
332
|
-
const conversationId = await conversation.startSession({
|
|
333
|
-
conversationToken,
|
|
334
|
-
connectionType: "webrtc",
|
|
335
|
-
inputDeviceId: "<new-input-device-id>",
|
|
336
|
-
outputDeviceId: "<new-input-device-id>",
|
|
337
|
-
});
|
|
338
|
-
```
|
|
339
|
-
|
|
340
|
-
**Note:** Device switching only works for voice conversations. You can enumerate available devices using the [MediaDevices.enumerateDevices()](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/enumerateDevices) API.
|
|
341
|
-
|
|
342
|
-
##### endSession
|
|
343
|
-
|
|
344
|
-
A method to manually end the conversation. The method will end the conversation and disconnect from websocket.
|
|
345
|
-
|
|
346
|
-
```js
|
|
347
|
-
await conversation.endSession();
|
|
348
|
-
```
|
|
349
|
-
|
|
350
|
-
##### sendFeedback
|
|
351
|
-
|
|
352
|
-
A method for sending binary feedback to the agent.
|
|
353
|
-
The method accepts a boolean value, where `true` represents positive feedback and `false` negative feedback.
|
|
354
|
-
Feedback is always correlated to the most recent agent response and can be sent only once per response.
|
|
355
|
-
Check `canSendFeedback` state to see if feedback can be sent in the given moment.
|
|
356
|
-
|
|
357
|
-
```js
|
|
358
|
-
const { sendFeedback } = useConversation();
|
|
359
|
-
|
|
360
|
-
sendFeedback(true); // positive feedback
|
|
361
|
-
sendFeedback(false); // negative feedback
|
|
362
|
-
```
|
|
363
|
-
|
|
364
|
-
##### sendContextualUpdate
|
|
365
|
-
|
|
366
|
-
A method to send contextual updates to the agent.
|
|
367
|
-
This can be used to inform the agent about user actions that are not directly related to the conversation, but may influence the agent's responses.
|
|
368
|
-
|
|
369
|
-
```js
|
|
370
|
-
const { sendContextualUpdate } = useConversation();
|
|
371
|
-
|
|
372
|
-
sendContextualUpdate(
|
|
373
|
-
"User navigated to another page. Consider it for next response, but don't react to this contextual update."
|
|
374
|
-
);
|
|
375
|
-
```
|
|
376
|
-
|
|
377
|
-
##### sendUserMessage
|
|
378
|
-
|
|
379
|
-
Sends a text messages to the agent.
|
|
380
|
-
|
|
381
|
-
Can be used to let the user type in the message instead of using the microphone.
|
|
382
|
-
Unlike `sendContextualUpdate`, this will be treated as a user message and will prompt the agent to take its turn in the conversation.
|
|
383
|
-
|
|
384
|
-
```js
|
|
385
|
-
const { sendUserMessage, sendUserActivity } = useConversation();
|
|
386
|
-
const [value, setValue] = useState("");
|
|
387
|
-
|
|
388
|
-
return (
|
|
389
|
-
<>
|
|
390
|
-
<input
|
|
391
|
-
value={value}
|
|
392
|
-
onChange={e => {
|
|
393
|
-
setValue(e.target.value);
|
|
394
|
-
sendUserActivity();
|
|
395
|
-
}}
|
|
396
|
-
/>
|
|
397
|
-
<button
|
|
398
|
-
onClick={() => {
|
|
399
|
-
sendUserMessage(value);
|
|
400
|
-
setValue(value);
|
|
401
|
-
}}
|
|
402
|
-
>
|
|
403
|
-
SEND
|
|
404
|
-
</button>
|
|
405
|
-
</>
|
|
406
|
-
);
|
|
407
|
-
```
|
|
408
|
-
|
|
409
|
-
##### sendUserActivity
|
|
410
|
-
|
|
411
|
-
Notifies the agent about user activity.
|
|
412
|
-
|
|
413
|
-
The agent will not attempt to speak for at least 2 seconds after the user activity is detected.
|
|
414
|
-
This can be used to prevent the agent from interrupting the user when they are typing.
|
|
415
|
-
|
|
416
|
-
```js
|
|
417
|
-
const { sendUserMessage, sendUserActivity } = useConversation();
|
|
418
|
-
const [value, setValue] = useState("");
|
|
419
|
-
|
|
420
|
-
return (
|
|
421
|
-
<>
|
|
422
|
-
<input
|
|
423
|
-
value={value}
|
|
424
|
-
onChange={e => {
|
|
425
|
-
setValue(e.target.value);
|
|
426
|
-
sendUserActivity();
|
|
427
|
-
}}
|
|
428
|
-
/>
|
|
429
|
-
<button
|
|
430
|
-
onClick={() => {
|
|
431
|
-
sendUserMessage(value);
|
|
432
|
-
setValue(value);
|
|
433
|
-
}}
|
|
434
|
-
>
|
|
435
|
-
SEND
|
|
436
|
-
</button>
|
|
437
|
-
</>
|
|
438
|
-
);
|
|
439
|
-
```
|
|
440
|
-
|
|
441
|
-
##### setVolume
|
|
442
|
-
|
|
443
|
-
A method to set the output volume of the conversation. Accepts object with volume field between 0 and 1.
|
|
444
|
-
|
|
445
|
-
```js
|
|
446
|
-
const [volume, setVolume] = useState(0.5);
|
|
447
|
-
const conversation = useConversation({ volume });
|
|
448
|
-
|
|
449
|
-
// Set the volume
|
|
450
|
-
setVolume(0.5);
|
|
451
|
-
```
|
|
452
|
-
|
|
453
|
-
##### muteMic
|
|
454
|
-
|
|
455
|
-
A method to mute/unmute the microphone.
|
|
456
|
-
|
|
457
|
-
```js
|
|
458
|
-
const [micMuted, setMicMuted] = useState(false);
|
|
459
|
-
const conversation = useConversation({ micMuted });
|
|
460
|
-
|
|
461
|
-
// Mute the microphone
|
|
462
|
-
setMicMuted(true);
|
|
463
|
-
|
|
464
|
-
// Unmute the microphone
|
|
465
|
-
setMicMuted(false);
|
|
466
|
-
```
|
|
467
|
-
|
|
468
|
-
##### changeInputDevice
|
|
469
|
-
|
|
470
|
-
Switch the audio input device during an active voice conversation. This method is only available for voice conversations.
|
|
471
|
-
|
|
472
|
-
**Note:** In WebRTC mode the input format and sample rate are hardcoded to `pcm` and `48000` respectively. Changing those values when changing the input device is a no-op.
|
|
473
|
-
|
|
474
|
-
```js
|
|
475
|
-
// Change to a specific input device
|
|
476
|
-
await conversation.changeInputDevice({
|
|
477
|
-
sampleRate: 16000,
|
|
478
|
-
format: "pcm",
|
|
479
|
-
preferHeadphonesForIosDevices: true,
|
|
480
|
-
inputDeviceId: "your-device-id", // Optional: specific device ID
|
|
481
|
-
});
|
|
482
|
-
```
|
|
483
|
-
|
|
484
|
-
##### changeOutputDevice
|
|
485
|
-
|
|
486
|
-
Switch the audio output device during an active voice conversation. This method is only available for voice conversations.
|
|
487
|
-
|
|
488
|
-
**Note:** In WebRTC mode the output format and sample rate are hardcoded to `pcm` and `48000` respectively. Changing those values when changing the output device is a no-op.
|
|
489
|
-
|
|
490
|
-
```js
|
|
491
|
-
// Change to a specific output device
|
|
492
|
-
await conversation.changeOutputDevice({
|
|
493
|
-
sampleRate: 16000,
|
|
494
|
-
format: "pcm",
|
|
495
|
-
outputDeviceId: "your-device-id", // Optional: specific device ID
|
|
496
|
-
});
|
|
497
|
-
```
|
|
498
|
-
|
|
499
|
-
**Note:** Device switching only works for voice conversations. If no specific `deviceId` is provided, the browser will use its default device selection. You can enumerate available devices using the [MediaDevices.enumerateDevices()](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/enumerateDevices) API.
|
|
500
|
-
|
|
501
|
-
##### getInputByteFrequencyData / getOutputByteFrequencyData
|
|
502
|
-
|
|
503
|
-
Methods that return `Uint8Array`s containing the current input/output frequency data. See [AnalyserNode.getByteFrequencyData](https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode/getByteFrequencyData) for more information.
|
|
504
|
-
|
|
505
|
-
**Note:** These methods are only available for voice conversations. In WebRTC mode the audio is hardcoded to use `pcm_48000`, meaning any visualization using the returned data might show different patterns to WebSocket connections.
|
|
506
|
-
|
|
507
|
-
##### status
|
|
508
|
-
|
|
509
|
-
A React state containing the current status of the conversation.
|
|
510
|
-
|
|
511
|
-
```js
|
|
512
|
-
const { status } = useConversation();
|
|
513
|
-
console.log(status); // "connected" or "disconnected"
|
|
514
|
-
```
|
|
515
|
-
|
|
516
|
-
##### isSpeaking
|
|
517
|
-
|
|
518
|
-
A React state containing the information of whether the agent is currently speaking.
|
|
519
|
-
This is helpful for indicating the mode in your UI.
|
|
520
|
-
|
|
521
|
-
```js
|
|
522
|
-
const { isSpeaking } = useConversation();
|
|
523
|
-
console.log(isSpeaking); // boolean
|
|
524
|
-
```
|
|
525
|
-
|
|
526
|
-
##### canSendFeedback
|
|
527
|
-
|
|
528
|
-
A React state representing whether the user can send feedback to the agent.
|
|
529
|
-
When false, calls to `sendFeedback` will be ignored.
|
|
530
|
-
This is helpful to conditionally show the feedback button in your UI.
|
|
531
|
-
|
|
532
|
-
```js
|
|
533
|
-
const { canSendFeedback } = useConversation();
|
|
534
|
-
console.log(canSendFeedback); // boolean
|
|
535
|
-
```
|
|
536
|
-
|
|
537
|
-
### useScribe
|
|
538
|
-
|
|
539
|
-
React hook for managing real-time speech-to-text transcription with ElevenLabs Scribe Realtime v2.
|
|
540
|
-
|
|
541
|
-
#### Quick Start
|
|
542
|
-
|
|
543
|
-
```tsx
|
|
544
|
-
import { useEffect } from "react";
|
|
545
|
-
import { useScribe } from "@elevenlabs/react";
|
|
546
|
-
|
|
547
|
-
function MyComponent() {
|
|
548
|
-
const scribe = useScribe({
|
|
549
|
-
modelId: "scribe_v2_realtime",
|
|
550
|
-
onPartialTranscript: (data) => {
|
|
551
|
-
console.log("Partial:", data.text);
|
|
552
|
-
},
|
|
553
|
-
onCommittedTranscript: (data) => {
|
|
554
|
-
console.log("Committed:", data.text);
|
|
555
|
-
},
|
|
556
|
-
});
|
|
557
|
-
|
|
558
|
-
// Start recording
|
|
559
|
-
const handleStart = async () => {
|
|
560
|
-
try {
|
|
561
|
-
const token = await fetchTokenFromServer();
|
|
562
|
-
await scribe.connect({
|
|
563
|
-
token,
|
|
564
|
-
microphone: {
|
|
565
|
-
echoCancellation: true,
|
|
566
|
-
noiseSuppression: true,
|
|
567
|
-
},
|
|
568
|
-
});
|
|
569
|
-
} catch (err) {
|
|
570
|
-
console.error("Failed to start recording:", err);
|
|
571
|
-
}
|
|
572
|
-
};
|
|
573
|
-
|
|
574
|
-
// Stop recording
|
|
575
|
-
const handleDisconnect = () => {
|
|
576
|
-
scribe.disconnect();
|
|
577
|
-
};
|
|
578
|
-
|
|
579
|
-
// Disconnect on unmount
|
|
580
|
-
useEffect(() => {
|
|
581
|
-
return () => {
|
|
582
|
-
if (scribe.isConnected) {
|
|
583
|
-
scribe.disconnect();
|
|
584
|
-
}
|
|
585
|
-
};
|
|
586
|
-
}, [scribe]);
|
|
587
|
-
|
|
588
|
-
return (
|
|
589
|
-
<div>
|
|
590
|
-
<button onClick={handleStart} disabled={scribe.isConnected}>
|
|
591
|
-
Start Recording
|
|
592
|
-
</button>
|
|
593
|
-
<button onClick={handleDisconnect} disabled={!scribe.isConnected}>
|
|
594
|
-
Stop
|
|
595
|
-
</button>
|
|
596
|
-
|
|
597
|
-
{scribe.partialTranscript && <p>Live: {scribe.partialTranscript}</p>}
|
|
598
|
-
|
|
599
|
-
<div>
|
|
600
|
-
{scribe.committedTranscripts.map((t) => (
|
|
601
|
-
<p key={t.id}>{t.text}</p>
|
|
602
|
-
))}
|
|
603
|
-
</div>
|
|
604
|
-
</div>
|
|
605
|
-
);
|
|
606
|
-
}
|
|
607
|
-
```
|
|
608
|
-
|
|
609
|
-
#### Getting a Token
|
|
610
|
-
|
|
611
|
-
Scribe requires a single-use token for authentication. Create an API endpoint on your server:
|
|
612
|
-
|
|
613
|
-
```js
|
|
614
|
-
// Node.js server
|
|
615
|
-
app.get("/scribe-token", yourAuthMiddleware, async (req, res) => {
|
|
616
|
-
const response = await fetch(
|
|
617
|
-
"https://api.elevenlabs.io/v1/single-use-token/realtime_scribe",
|
|
618
|
-
{
|
|
619
|
-
method: "POST",
|
|
620
|
-
headers: {
|
|
621
|
-
"xi-api-key": process.env.ELEVENLABS_API_KEY,
|
|
622
|
-
},
|
|
623
|
-
}
|
|
624
|
-
);
|
|
625
|
-
|
|
626
|
-
const data = await response.json();
|
|
627
|
-
res.json({ token: data.token });
|
|
628
|
-
});
|
|
629
|
-
```
|
|
630
|
-
|
|
631
|
-
**Warning:** Your ElevenLabs API key is sensitive, do not leak it to the client. Always generate the token on the server.
|
|
632
|
-
|
|
633
|
-
```tsx
|
|
634
|
-
// Client
|
|
635
|
-
const fetchToken = async () => {
|
|
636
|
-
const response = await fetch("/scribe-token");
|
|
637
|
-
const { token } = await response.json();
|
|
638
|
-
return token;
|
|
639
|
-
};
|
|
640
|
-
```
|
|
641
|
-
|
|
642
|
-
#### Hook Options
|
|
643
|
-
|
|
644
|
-
Configure the hook with default options and callbacks:
|
|
645
|
-
|
|
646
|
-
```tsx
|
|
647
|
-
const scribe = useScribe({
|
|
648
|
-
// Connection options (can be overridden in connect())
|
|
649
|
-
token: "optional-default-token",
|
|
650
|
-
modelId: "scribe_v2_realtime",
|
|
651
|
-
baseUri: "wss://api.elevenlabs.io",
|
|
652
|
-
|
|
653
|
-
// VAD options
|
|
654
|
-
commitStrategy: CommitStrategy.AUTOMATIC,
|
|
655
|
-
vadSilenceThresholdSecs: 0.5,
|
|
656
|
-
vadThreshold: 0.5,
|
|
657
|
-
minSpeechDurationMs: 100,
|
|
658
|
-
minSilenceDurationMs: 500,
|
|
659
|
-
languageCode: "en",
|
|
660
|
-
|
|
661
|
-
// Microphone options (for automatic mode)
|
|
662
|
-
microphone: {
|
|
663
|
-
deviceId: "optional-device-id",
|
|
664
|
-
echoCancellation: true,
|
|
665
|
-
noiseSuppression: true,
|
|
666
|
-
autoGainControl: true,
|
|
667
|
-
},
|
|
668
|
-
|
|
669
|
-
// Manual audio options (for file transcription)
|
|
670
|
-
audioFormat: AudioFormat.PCM_16000,
|
|
671
|
-
sampleRate: 16000,
|
|
672
|
-
|
|
673
|
-
// Auto-connect on mount
|
|
674
|
-
autoConnect: false,
|
|
675
|
-
|
|
676
|
-
// Event callbacks
|
|
677
|
-
onSessionStarted: () => console.log("Session started"),
|
|
678
|
-
onPartialTranscript: (data) => console.log("Partial:", data.text),
|
|
679
|
-
onCommittedTranscript: (data) => console.log("Committed:", data.text),
|
|
680
|
-
onCommittedTranscriptWithTimestamps: (data) => console.log("With timestamps:", data),
|
|
681
|
-
onError: (error) => console.error("Error:", error),
|
|
682
|
-
onAuthError: (data) => console.error("Auth error:", data.error),
|
|
683
|
-
onQuotaExceededError: (data) => console.error("Quota exceeded:", data.error),
|
|
684
|
-
onConnect: () => console.log("Connected"),
|
|
685
|
-
onDisconnect: () => console.log("Disconnected"),
|
|
686
|
-
});
|
|
687
|
-
```
|
|
688
|
-
|
|
689
|
-
#### Microphone Mode
|
|
690
|
-
|
|
691
|
-
Stream audio directly from the user's microphone:
|
|
692
|
-
|
|
693
|
-
```tsx
|
|
694
|
-
function MicrophoneTranscription() {
|
|
695
|
-
const scribe = useScribe({
|
|
696
|
-
modelId: "scribe_v2_realtime",
|
|
697
|
-
});
|
|
698
|
-
|
|
699
|
-
const startRecording = async () => {
|
|
700
|
-
const token = await fetchToken();
|
|
701
|
-
await scribe.connect({
|
|
702
|
-
token,
|
|
703
|
-
microphone: {
|
|
704
|
-
echoCancellation: true,
|
|
705
|
-
noiseSuppression: true,
|
|
706
|
-
autoGainControl: true,
|
|
707
|
-
},
|
|
708
|
-
});
|
|
709
|
-
};
|
|
22
|
+
import {
|
|
23
|
+
ConversationProvider,
|
|
24
|
+
useConversationControls,
|
|
25
|
+
useConversationStatus,
|
|
26
|
+
} from "@elevenlabs/react";
|
|
710
27
|
|
|
28
|
+
function App() {
|
|
711
29
|
return (
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
<button onClick={scribe.disconnect} disabled={!scribe.isConnected}>
|
|
717
|
-
Stop
|
|
718
|
-
</button>
|
|
719
|
-
|
|
720
|
-
{scribe.partialTranscript && (
|
|
721
|
-
<div>
|
|
722
|
-
<strong>Speaking:</strong> {scribe.partialTranscript}
|
|
723
|
-
</div>
|
|
724
|
-
)}
|
|
725
|
-
|
|
726
|
-
{scribe.committedTranscripts.map((transcript) => (
|
|
727
|
-
<div key={transcript.id}>{transcript.text}</div>
|
|
728
|
-
))}
|
|
729
|
-
</div>
|
|
30
|
+
{/* replace with your agent's ID */}
|
|
31
|
+
<ConversationProvider agentId="agent_7101k5zvyjhmfg983brhmhkd98n6">
|
|
32
|
+
<Conversation />
|
|
33
|
+
</ConversationProvider>
|
|
730
34
|
);
|
|
731
35
|
}
|
|
732
|
-
```
|
|
733
|
-
|
|
734
|
-
#### Manual Audio Mode (File Transcription)
|
|
735
|
-
|
|
736
|
-
Transcribe pre-recorded audio files:
|
|
737
|
-
|
|
738
|
-
```tsx
|
|
739
|
-
import { useScribe, AudioFormat } from "@elevenlabs/react";
|
|
740
|
-
|
|
741
|
-
function FileTranscription() {
|
|
742
|
-
const [file, setFile] = useState<File | null>(null);
|
|
743
|
-
const scribe = useScribe({
|
|
744
|
-
modelId: "scribe_v2_realtime",
|
|
745
|
-
audioFormat: AudioFormat.PCM_16000,
|
|
746
|
-
sampleRate: 16000,
|
|
747
|
-
});
|
|
748
|
-
|
|
749
|
-
const transcribeFile = async () => {
|
|
750
|
-
if (!file) return;
|
|
751
|
-
|
|
752
|
-
const token = await fetchToken();
|
|
753
|
-
await scribe.connect({ token });
|
|
754
|
-
|
|
755
|
-
// Decode audio file
|
|
756
|
-
const arrayBuffer = await file.arrayBuffer();
|
|
757
|
-
const audioContext = new AudioContext({ sampleRate: 16000 });
|
|
758
|
-
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
|
759
|
-
|
|
760
|
-
// Convert to PCM16
|
|
761
|
-
const channelData = audioBuffer.getChannelData(0);
|
|
762
|
-
const pcmData = new Int16Array(channelData.length);
|
|
763
|
-
|
|
764
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
765
|
-
const sample = Math.max(-1, Math.min(1, channelData[i]));
|
|
766
|
-
pcmData[i] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
767
|
-
}
|
|
768
|
-
|
|
769
|
-
// Send in chunks
|
|
770
|
-
const chunkSize = 4096;
|
|
771
|
-
for (let offset = 0; offset < pcmData.length; offset += chunkSize) {
|
|
772
|
-
const chunk = pcmData.slice(offset, offset + chunkSize);
|
|
773
|
-
const bytes = new Uint8Array(chunk.buffer);
|
|
774
|
-
const base64 = btoa(String.fromCharCode(...bytes));
|
|
775
36
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
// Commit transcription
|
|
781
|
-
scribe.commit();
|
|
782
|
-
};
|
|
37
|
+
function Conversation() {
|
|
38
|
+
const { startSession, endSession } = useConversationControls();
|
|
39
|
+
const { status } = useConversationStatus();
|
|
783
40
|
|
|
784
41
|
return (
|
|
785
42
|
<div>
|
|
786
|
-
<
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
43
|
+
<p>Status: {status}</p>
|
|
44
|
+
<button
|
|
45
|
+
onClick={() =>
|
|
46
|
+
startSession({
|
|
47
|
+
onConnect: ({ conversationId }) =>
|
|
48
|
+
console.log("Connected:", conversationId),
|
|
49
|
+
onError: (message) => console.error("Error:", message),
|
|
50
|
+
})
|
|
51
|
+
}
|
|
52
|
+
>
|
|
53
|
+
Start
|
|
793
54
|
</button>
|
|
794
|
-
|
|
795
|
-
{scribe.committedTranscripts.map((transcript) => (
|
|
796
|
-
<div key={transcript.id}>{transcript.text}</div>
|
|
797
|
-
))}
|
|
55
|
+
<button onClick={() => endSession()}>End</button>
|
|
798
56
|
</div>
|
|
799
57
|
);
|
|
800
58
|
}
|
|
801
59
|
```
|
|
802
60
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
##### State
|
|
806
|
-
|
|
807
|
-
- **status** - Current connection status: `"disconnected"`, `"connecting"`, `"connected"`, `"transcribing"`, or `"error"`
|
|
808
|
-
- **isConnected** - Boolean indicating if connected
|
|
809
|
-
- **isTranscribing** - Boolean indicating if actively transcribing
|
|
810
|
-
- **partialTranscript** - Current partial (interim) transcript
|
|
811
|
-
- **committedTranscripts** - Array of completed transcript segments
|
|
812
|
-
- **error** - Current error message, or null
|
|
813
|
-
|
|
814
|
-
```tsx
|
|
815
|
-
const scribe = useScribe(/* options */);
|
|
816
|
-
|
|
817
|
-
console.log(scribe.status); // "connected"
|
|
818
|
-
console.log(scribe.isConnected); // true
|
|
819
|
-
console.log(scribe.partialTranscript); // "hello world"
|
|
820
|
-
console.log(scribe.committedTranscripts); // [{ id: "...", text: "...", words: ..., isFinal: true }]
|
|
821
|
-
console.log(scribe.error); // null or error string
|
|
822
|
-
```
|
|
823
|
-
|
|
824
|
-
##### Methods
|
|
825
|
-
|
|
826
|
-
###### connect(options?)
|
|
827
|
-
|
|
828
|
-
Connect to Scribe. Options provided here override hook defaults:
|
|
829
|
-
|
|
830
|
-
```tsx
|
|
831
|
-
await scribe.connect({
|
|
832
|
-
token: "your-token", // Required
|
|
833
|
-
microphone: { /* ... */ }, // For microphone mode
|
|
834
|
-
// OR
|
|
835
|
-
audioFormat: AudioFormat.PCM_16000, // For manual mode
|
|
836
|
-
sampleRate: 16000,
|
|
837
|
-
});
|
|
838
|
-
```
|
|
839
|
-
|
|
840
|
-
###### disconnect()
|
|
841
|
-
|
|
842
|
-
Disconnect and clean up resources:
|
|
843
|
-
|
|
844
|
-
```tsx
|
|
845
|
-
scribe.disconnect();
|
|
846
|
-
```
|
|
847
|
-
|
|
848
|
-
###### sendAudio(audioBase64, options?)
|
|
61
|
+
## Documentation
|
|
849
62
|
|
|
850
|
-
|
|
63
|
+
For the full API reference including connection types, client tools, conversation overrides, and more, see the [React SDK documentation](https://elevenlabs.io/docs/eleven-agents/libraries/react).
|
|
851
64
|
|
|
852
|
-
|
|
853
|
-
scribe.sendAudio(base64AudioChunk, {
|
|
854
|
-
commit: false, // Optional: commit immediately
|
|
855
|
-
sampleRate: 16000, // Optional: override sample rate
|
|
856
|
-
previousText: "Previous transcription text", // Optional: include text from a previous transcription or base64 encoded audio data. Will be used to provide context to the model. Can only be sent in the first audio chunk.
|
|
857
|
-
});
|
|
858
|
-
```
|
|
859
|
-
|
|
860
|
-
**Warning:** The `previousText` field can only be sent in the first audio chunk of a session. If sent in any other chunk an error will be returned.
|
|
861
|
-
|
|
862
|
-
###### commit()
|
|
863
|
-
|
|
864
|
-
Manually commit the current transcription:
|
|
865
|
-
|
|
866
|
-
```tsx
|
|
867
|
-
scribe.commit();
|
|
868
|
-
```
|
|
869
|
-
|
|
870
|
-
###### clearTranscripts()
|
|
871
|
-
|
|
872
|
-
Clear all transcripts from state:
|
|
873
|
-
|
|
874
|
-
```tsx
|
|
875
|
-
scribe.clearTranscripts();
|
|
876
|
-
```
|
|
877
|
-
|
|
878
|
-
###### getConnection()
|
|
879
|
-
|
|
880
|
-
Get the underlying connection instance:
|
|
881
|
-
|
|
882
|
-
```tsx
|
|
883
|
-
const connection = scribe.getConnection();
|
|
884
|
-
// Returns RealtimeConnection | null
|
|
885
|
-
```
|
|
886
|
-
|
|
887
|
-
#### Transcript Segment Type
|
|
888
|
-
|
|
889
|
-
Each committed transcript segment has the following structure:
|
|
890
|
-
|
|
891
|
-
```typescript
|
|
892
|
-
interface TranscriptSegment {
|
|
893
|
-
id: string; // Unique identifier
|
|
894
|
-
text: string; // Transcript text
|
|
895
|
-
timestamp: number; // Unix timestamp
|
|
896
|
-
isFinal: boolean; // Always true for committed transcripts
|
|
897
|
-
}
|
|
898
|
-
```
|
|
899
|
-
|
|
900
|
-
#### Event Callbacks
|
|
901
|
-
|
|
902
|
-
All event callbacks are optional and can be provided as hook options:
|
|
903
|
-
|
|
904
|
-
```tsx
|
|
905
|
-
const scribe = useScribe({
|
|
906
|
-
onSessionStarted: () => {
|
|
907
|
-
console.log("Session started");
|
|
908
|
-
},
|
|
909
|
-
onPartialTranscript: (data: { text: string }) => {
|
|
910
|
-
console.log("Partial:", data.text);
|
|
911
|
-
},
|
|
912
|
-
onCommittedTranscript: (data: { text: string }) => {
|
|
913
|
-
console.log("Committed:", data.text);
|
|
914
|
-
},
|
|
915
|
-
onCommittedTranscriptWithTimestamps: (data: {
|
|
916
|
-
text: string;
|
|
917
|
-
words?: { start: number; end: number }[];
|
|
918
|
-
}) => {
|
|
919
|
-
console.log("Text:", data.text);
|
|
920
|
-
console.log("Word timestamps:", data.words);
|
|
921
|
-
},
|
|
922
|
-
// Generic error handler for all errors
|
|
923
|
-
onError: (error: Error | Event) => {
|
|
924
|
-
console.error("Scribe error:", error);
|
|
925
|
-
},
|
|
926
|
-
// Specific errors can also be tracked
|
|
927
|
-
onAuthError: (data: { error: string }) => {
|
|
928
|
-
console.error("Auth error:", data.error);
|
|
929
|
-
},
|
|
930
|
-
onConnect: () => {
|
|
931
|
-
console.log("WebSocket opened");
|
|
932
|
-
},
|
|
933
|
-
onDisconnect: () => {
|
|
934
|
-
console.log("WebSocket closed");
|
|
935
|
-
},
|
|
936
|
-
});
|
|
937
|
-
```
|
|
938
|
-
|
|
939
|
-
#### Commit Strategies
|
|
940
|
-
|
|
941
|
-
Control when transcriptions are committed:
|
|
942
|
-
|
|
943
|
-
```tsx
|
|
944
|
-
import { CommitStrategy } from "@elevenlabs/react";
|
|
945
|
-
|
|
946
|
-
// Manual (default) - you control when to commit
|
|
947
|
-
const scribe = useScribe({
|
|
948
|
-
commitStrategy: CommitStrategy.MANUAL,
|
|
949
|
-
});
|
|
950
|
-
|
|
951
|
-
// Later...
|
|
952
|
-
scribe.commit(); // Commit transcription
|
|
953
|
-
|
|
954
|
-
// Voice Activity Detection - model detects silences and automatically commits
|
|
955
|
-
const scribe = useScribe({
|
|
956
|
-
commitStrategy: CommitStrategy.VAD,
|
|
957
|
-
});
|
|
958
|
-
```
|
|
959
|
-
|
|
960
|
-
#### Complete Example
|
|
961
|
-
|
|
962
|
-
```tsx
|
|
963
|
-
import { useScribe, CommitStrategy } from "@elevenlabs/react";
|
|
964
|
-
import { useState, useEffect } from "react";
|
|
965
|
-
|
|
966
|
-
type Mode = "microphone" | "file"
|
|
967
|
-
|
|
968
|
-
function ScribeDemo() {
|
|
969
|
-
const [mode, setMode] = useState<Mode>("microphone");
|
|
970
|
-
|
|
971
|
-
const scribe = useScribe({
|
|
972
|
-
modelId: "scribe_v2_realtime",
|
|
973
|
-
commitStrategy: CommitStrategy.AUTOMATIC,
|
|
974
|
-
onSessionStarted: () => console.log("Started"),
|
|
975
|
-
onCommittedTranscript: (data) => console.log("Committed:", data.text),
|
|
976
|
-
onError: (error) => console.error("Error:", error),
|
|
977
|
-
});
|
|
978
|
-
|
|
979
|
-
const startMicrophone = async () => {
|
|
980
|
-
const token = await fetchToken();
|
|
981
|
-
await scribe.connect({
|
|
982
|
-
token,
|
|
983
|
-
microphone: {
|
|
984
|
-
echoCancellation: true,
|
|
985
|
-
noiseSuppression: true,
|
|
986
|
-
},
|
|
987
|
-
});
|
|
988
|
-
};
|
|
989
|
-
|
|
990
|
-
const handleDisconnect = () => scribe.disconnect();
|
|
991
|
-
|
|
992
|
-
const handleClearTranscripts = () => scribe.clearTranscripts();
|
|
993
|
-
|
|
994
|
-
useEffect(() => {
|
|
995
|
-
return () => {
|
|
996
|
-
handleDisconnect();
|
|
997
|
-
};
|
|
998
|
-
}, []);
|
|
999
|
-
|
|
1000
|
-
return (
|
|
1001
|
-
<div>
|
|
1002
|
-
<h1>Scribe Demo</h1>
|
|
1003
|
-
|
|
1004
|
-
{/* Status */}
|
|
1005
|
-
<div>
|
|
1006
|
-
Status: {scribe.status}
|
|
1007
|
-
{scribe.error && <span>Error: {scribe.error}</span>}
|
|
1008
|
-
</div>
|
|
1009
|
-
|
|
1010
|
-
{/* Controls */}
|
|
1011
|
-
<div>
|
|
1012
|
-
{!scribe.isConnected ? (
|
|
1013
|
-
<button onClick={startMicrophone}>Start Recording</button>
|
|
1014
|
-
) : (
|
|
1015
|
-
<button onClick={handleDisconnect}>Stop</button>
|
|
1016
|
-
)}
|
|
1017
|
-
<button onClick={handleClearTranscripts}>Clear</button>
|
|
1018
|
-
</div>
|
|
1019
|
-
|
|
1020
|
-
{/* Live Transcript */}
|
|
1021
|
-
{scribe.partialTranscript && (
|
|
1022
|
-
<div>
|
|
1023
|
-
<strong>Live:</strong> {scribe.partialTranscript}
|
|
1024
|
-
</div>
|
|
1025
|
-
)}
|
|
1026
|
-
|
|
1027
|
-
{/* Committed Transcripts */}
|
|
1028
|
-
<div>
|
|
1029
|
-
<h2>Transcripts ({scribe.committedTranscripts.length})</h2>
|
|
1030
|
-
{scribe.committedTranscripts.map((t) => (
|
|
1031
|
-
<div key={t.id}>
|
|
1032
|
-
<span>{new Date(t.timestamp).toLocaleTimeString()}</span>
|
|
1033
|
-
<p>{t.text}</p>
|
|
1034
|
-
</div>
|
|
1035
|
-
))}
|
|
1036
|
-
</div>
|
|
1037
|
-
</div>
|
|
1038
|
-
);
|
|
1039
|
-
}
|
|
1040
|
-
```
|
|
1041
|
-
|
|
1042
|
-
#### TypeScript Support
|
|
1043
|
-
|
|
1044
|
-
Full TypeScript types are included:
|
|
1045
|
-
|
|
1046
|
-
```typescript
|
|
1047
|
-
import {
|
|
1048
|
-
useScribe,
|
|
1049
|
-
AudioFormat,
|
|
1050
|
-
CommitStrategy,
|
|
1051
|
-
RealtimeEvents,
|
|
1052
|
-
type UseScribeReturn,
|
|
1053
|
-
type ScribeHookOptions,
|
|
1054
|
-
type ScribeStatus,
|
|
1055
|
-
type TranscriptSegment,
|
|
1056
|
-
type RealtimeConnection,
|
|
1057
|
-
} from "@elevenlabs/react";
|
|
1058
|
-
|
|
1059
|
-
const scribe: UseScribeReturn = useScribe({
|
|
1060
|
-
modelId: "scribe_v2_realtime",
|
|
1061
|
-
microphone: {
|
|
1062
|
-
echoCancellation: true,
|
|
1063
|
-
},
|
|
1064
|
-
});
|
|
1065
|
-
```
|
|
1066
|
-
|
|
1067
|
-
## CSP compliance
|
|
1068
|
-
|
|
1069
|
-
If your application has a tight Content Security Policy and does not allow data: or blob: in the `script-src` (w3.org/TR/CSP2#source-list-guid-matching), you self-host the needed files in the public folder.
|
|
1070
|
-
|
|
1071
|
-
Whitelisting these values is not recommended w3.org/TR/CSP2#source-list-guid-matching.
|
|
1072
|
-
|
|
1073
|
-
Add the worklet files to your public folder eg `public/elevenlabs`.
|
|
1074
|
-
|
|
1075
|
-
```
|
|
1076
|
-
@elevenlabs/client/scripts/
|
|
1077
|
-
```
|
|
1078
|
-
|
|
1079
|
-
Then call start with
|
|
1080
|
-
|
|
1081
|
-
```ts
|
|
1082
|
-
await conversation.startSession({
|
|
1083
|
-
...
|
|
1084
|
-
workletPaths: {
|
|
1085
|
-
'rawAudioProcessor': '/elevenlabs/rawAudioProcessor.worklet.js',
|
|
1086
|
-
'audioConcatProcessor':
|
|
1087
|
-
'/elevenlabs/audioConcatProcessor.worklet.js',
|
|
1088
|
-
},
|
|
1089
|
-
});
|
|
1090
|
-
```
|
|
1091
|
-
|
|
1092
|
-
It is recommended to update the scripts with a build script like
|
|
1093
|
-
|
|
1094
|
-
```js
|
|
1095
|
-
import { viteStaticCopy } from 'vite-plugin-static-copy'
|
|
1096
|
-
import { createRequire } from 'node:module';
|
|
1097
|
-
import path from 'path';
|
|
1098
|
-
|
|
1099
|
-
const require = createRequire(import.meta.url);
|
|
1100
|
-
|
|
1101
|
-
export default {
|
|
1102
|
-
plugins: [
|
|
1103
|
-
viteStaticCopy({
|
|
1104
|
-
targets: [
|
|
1105
|
-
{
|
|
1106
|
-
src: require.resolve('@elevenlabs/client')/dist/worklets/audioConcatProcessor.js',
|
|
1107
|
-
dest: 'dist',
|
|
1108
|
-
},
|
|
1109
|
-
{
|
|
1110
|
-
src: require.resolve('@elevenlabs/client')/dist/worklets/rawAudioProcessor.js',
|
|
1111
|
-
dest: 'dist',
|
|
1112
|
-
},
|
|
1113
|
-
],
|
|
1114
|
-
}),
|
|
1115
|
-
],
|
|
1116
|
-
}
|
|
1117
|
-
```
|
|
65
|
+
For real-time speech-to-text with the `useScribe` hook, see the [Scribe documentation](https://elevenlabs.io/docs/eleven-api/guides/how-to/speech-to-text/realtime/client-side-streaming).
|
|
1118
66
|
|
|
1119
67
|
## Development
|
|
1120
68
|
|
|
1121
|
-
Please
|
|
69
|
+
Please refer to the README.md file in the root of this repository.
|
|
1122
70
|
|
|
1123
71
|
## Contributing
|
|
1124
72
|
|
|
1125
|
-
Please
|
|
73
|
+
Please create an issue first to discuss the proposed changes. Any contributions are welcome!
|
|
1126
74
|
|
|
1127
75
|
Remember, if merged, your code will be used as part of a MIT licensed project. By submitting a Pull Request, you are giving your consent for your code to be integrated into this library.
|