@elevenlabs/client 0.8.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +13 -12
- package/.turbo/turbo-generate-version.log +1 -1
- package/README.md +359 -0
- package/dist/index.d.ts +2 -0
- package/dist/lib.cjs +1 -1
- package/dist/lib.cjs.map +1 -1
- package/dist/lib.modern.js +1 -1
- package/dist/lib.modern.js.map +1 -1
- package/dist/lib.module.js +1 -1
- package/dist/lib.module.js.map +1 -1
- package/dist/lib.umd.js +1 -1
- package/dist/lib.umd.js.map +1 -1
- package/dist/scribe/connection.d.ts +174 -0
- package/dist/scribe/index.d.ts +6 -0
- package/dist/scribe/scribe.d.ts +118 -0
- package/dist/utils/scribeAudioProcessor.generated.d.ts +1 -0
- package/dist/version.d.ts +1 -1
- package/package.json +2 -2
- package/scripts/generateWorklets.js +9 -3
- package/worklets/scribeAudioProcessor.js +52 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,29 +1,30 @@
|
|
|
1
1
|
|
|
2
|
-
> @elevenlabs/client@0.
|
|
2
|
+
> @elevenlabs/client@0.9.1 prebuild /home/runner/work/packages/packages/packages/client
|
|
3
3
|
> npm run generate-version && npm run generate-worklets
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
> @elevenlabs/client@0.
|
|
6
|
+
> @elevenlabs/client@0.9.1 generate-version
|
|
7
7
|
> printf "// This file is auto-generated during build\nexport const PACKAGE_VERSION = \"%s\";\n" "$npm_package_version" > src/version.ts
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
> @elevenlabs/client@0.
|
|
10
|
+
> @elevenlabs/client@0.9.1 generate-worklets
|
|
11
11
|
> node scripts/generateWorklets.js
|
|
12
12
|
|
|
13
13
|
Generating TypeScript worklet files...
|
|
14
14
|
Generated rawAudioProcessor.generated.ts from rawAudioProcessor.js
|
|
15
15
|
Generated audioConcatProcessor.generated.ts from audioConcatProcessor.js
|
|
16
|
+
Generated scribeAudioProcessor.generated.ts from scribeAudioProcessor.js
|
|
16
17
|
Worklet generation complete!
|
|
17
18
|
|
|
18
|
-
> @elevenlabs/client@0.
|
|
19
|
+
> @elevenlabs/client@0.9.1 build /home/runner/work/packages/packages/packages/client
|
|
19
20
|
> BROWSERSLIST_ENV=modern microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts
|
|
20
21
|
|
|
21
22
|
Build "@elevenlabs/client" to dist:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
9
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
13.5 kB: lib.cjs.gz
|
|
24
|
+
12 kB: lib.cjs.br
|
|
25
|
+
12.2 kB: lib.modern.js.gz
|
|
26
|
+
10.9 kB: lib.modern.js.br
|
|
27
|
+
13.5 kB: lib.module.js.gz
|
|
28
|
+
12 kB: lib.module.js.br
|
|
29
|
+
13.5 kB: lib.umd.js.gz
|
|
30
|
+
12.1 kB: lib.umd.js.br
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
|
|
2
|
-
> @elevenlabs/client@0.
|
|
2
|
+
> @elevenlabs/client@0.9.1 generate-version /home/runner/work/packages/packages/packages/client
|
|
3
3
|
> printf "// This file is auto-generated during build\nexport const PACKAGE_VERSION = \"%s\";\n" "$npm_package_version" > src/version.ts
|
|
4
4
|
|
package/README.md
CHANGED
|
@@ -441,6 +441,365 @@ await conversation.changeOutputDevice({
|
|
|
441
441
|
|
|
442
442
|
**Note:** Device switching only works for voice conversations. If no specific `deviceId` is provided, the browser will use its default device selection. You can enumerate available devices using the [MediaDevices.enumerateDevices()](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/enumerateDevices) API.
|
|
443
443
|
|
|
444
|
+
## Scribe - Real-time Speech-to-Text
|
|
445
|
+
|
|
446
|
+
Scribe is ElevenLabs' real-time speech-to-text API that provides low-latency transcription with support for both streaming microphone input and pre-recorded audio files.
|
|
447
|
+
|
|
448
|
+
**Note:** Scribe Realtime v2 is currently in closed beta. For access please [contact sales](https://elevenlabs.io/contact-sales).
|
|
449
|
+
|
|
450
|
+
### Quick Start
|
|
451
|
+
|
|
452
|
+
```js
|
|
453
|
+
import { Scribe, RealtimeEvents } from "@elevenlabs/client";
|
|
454
|
+
|
|
455
|
+
// Connect with microphone streaming
|
|
456
|
+
const connection = Scribe.connect({
|
|
457
|
+
token: "your-token",
|
|
458
|
+
modelId: "scribe_realtime_v2",
|
|
459
|
+
microphone: {
|
|
460
|
+
echoCancellation: true,
|
|
461
|
+
noiseSuppression: true,
|
|
462
|
+
},
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
// Listen for transcripts
|
|
466
|
+
connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
|
|
467
|
+
console.log("Partial:", data.text);
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
|
|
471
|
+
console.log("Final:", data.text);
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
// Close connection when done
|
|
475
|
+
connection.close();
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
### Getting a Token
|
|
479
|
+
|
|
480
|
+
Scribe requires a single-use token for authentication. These tokens are generated via the ElevenLabs API on the server.
|
|
481
|
+
|
|
482
|
+
You should create an API endpoint on your server to generate these tokens:
|
|
483
|
+
|
|
484
|
+
```js
|
|
485
|
+
// Node.js server
|
|
486
|
+
app.get("/scribe-token", yourAuthMiddleware, async (req, res) => {
|
|
487
|
+
const response = await fetch(
|
|
488
|
+
"https://api.elevenlabs.io/v1/single-use-token/realtime_scribe",
|
|
489
|
+
{
|
|
490
|
+
headers: {
|
|
491
|
+
"xi-api-key": process.env.ELEVENLABS_API_KEY,
|
|
492
|
+
},
|
|
493
|
+
}
|
|
494
|
+
);
|
|
495
|
+
|
|
496
|
+
const data = await response.json();
|
|
497
|
+
res.json({ token: data.token });
|
|
498
|
+
});
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
```js
|
|
502
|
+
// Client
|
|
503
|
+
const response = await fetch("/scribe-token");
|
|
504
|
+
const { token } = await response.json();
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
**Warning:** Your ElevenLabs API key is sensitive, do not leak it to the client. Always generate the token on the server.
|
|
508
|
+
|
|
509
|
+
### Microphone Mode
|
|
510
|
+
|
|
511
|
+
Automatically stream audio from the user's microphone:
|
|
512
|
+
|
|
513
|
+
```js
|
|
514
|
+
import { Scribe, RealtimeEvents } from "@elevenlabs/client";
|
|
515
|
+
|
|
516
|
+
const connection = Scribe.connect({
|
|
517
|
+
token: "your-token",
|
|
518
|
+
modelId: "scribe_realtime_v2",
|
|
519
|
+
microphone: {
|
|
520
|
+
deviceId: "optional-device-id", // Optional: specific microphone
|
|
521
|
+
echoCancellation: true,
|
|
522
|
+
noiseSuppression: true,
|
|
523
|
+
autoGainControl: true,
|
|
524
|
+
channelCount: 1,
|
|
525
|
+
},
|
|
526
|
+
});
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
The microphone stream is automatically converted to PCM16 format required by the API. In this mode audio is automatically committed.
|
|
530
|
+
|
|
531
|
+
### Manual Audio Mode
|
|
532
|
+
|
|
533
|
+
For transcribing pre-recorded audio files or custom audio sources:
|
|
534
|
+
|
|
535
|
+
```js
|
|
536
|
+
import { Scribe, AudioFormat, RealtimeEvents } from "@elevenlabs/client";
|
|
537
|
+
|
|
538
|
+
const connection = Scribe.connect({
|
|
539
|
+
token: "your-token",
|
|
540
|
+
modelId: "scribe_realtime_v2",
|
|
541
|
+
audioFormat: AudioFormat.PCM_16000,
|
|
542
|
+
sampleRate: 16000,
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
// Send audio chunks as base64
|
|
546
|
+
connection.send({ audioBase64: base64AudioChunk });
|
|
547
|
+
|
|
548
|
+
// Signal end of audio segment
|
|
549
|
+
connection.commit();
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
#### Example: Transcribing an Audio File
|
|
553
|
+
|
|
554
|
+
```js
|
|
555
|
+
// Get file from input element
|
|
556
|
+
const fileInput = document.querySelector('input[type="file"]');
|
|
557
|
+
const audioFile = fileInput.files[0];
|
|
558
|
+
|
|
559
|
+
// Read file as ArrayBuffer
|
|
560
|
+
const arrayBuffer = await audioFile.arrayBuffer();
|
|
561
|
+
const audioData = new Uint8Array(arrayBuffer);
|
|
562
|
+
|
|
563
|
+
// Convert to base64 and send in chunks
|
|
564
|
+
const chunkSize = 8192; // 8KB chunks
|
|
565
|
+
for (let i = 0; i < audioData.length; i += chunkSize) {
|
|
566
|
+
const chunk = audioData.slice(i, i + chunkSize);
|
|
567
|
+
const base64 = btoa(String.fromCharCode(...chunk));
|
|
568
|
+
connection.send({ audioBase64: base64 });
|
|
569
|
+
|
|
570
|
+
// Optional: Add delay to simulate real-time streaming
|
|
571
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Signal end of audio
|
|
575
|
+
connection.commit();
|
|
576
|
+
```
|
|
577
|
+
|
|
578
|
+
### Event Handlers
|
|
579
|
+
|
|
580
|
+
Subscribe to events using the connection instance:
|
|
581
|
+
|
|
582
|
+
```js
|
|
583
|
+
import { RealtimeEvents } from "@elevenlabs/client";
|
|
584
|
+
|
|
585
|
+
// Session started
|
|
586
|
+
connection.on(RealtimeEvents.SESSION_STARTED, () => {
|
|
587
|
+
console.log("Session started");
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
// Partial transcripts (interim results)
|
|
591
|
+
connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
|
|
592
|
+
console.log("Partial:", data.text);
|
|
593
|
+
// { text: string, language_code?: string }
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
// Final transcripts
|
|
597
|
+
connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
|
|
598
|
+
console.log("Final:", data.text);
|
|
599
|
+
// { text: string, language_code?: string }
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
// Final transcripts with word-level timestamps
|
|
603
|
+
connection.on(RealtimeEvents.FINAL_TRANSCRIPT_WITH_TIMESTAMPS, (data) => {
|
|
604
|
+
console.log("Final:", data.text);
|
|
605
|
+
console.log("Timestamps:", data.timestamps);
|
|
606
|
+
// { text: string, timestamps?: { start: number, end: number }[] }
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
// Errors
|
|
610
|
+
connection.on(RealtimeEvents.ERROR, (error) => {
|
|
611
|
+
console.error("Error:", error);
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
// Authentication errors
|
|
615
|
+
connection.on(RealtimeEvents.AUTH_ERROR, (data) => {
|
|
616
|
+
console.error("Auth error:", data.error);
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
// Connection opened
|
|
620
|
+
connection.on(RealtimeEvents.OPEN, () => {
|
|
621
|
+
console.log("Connection opened");
|
|
622
|
+
});
|
|
623
|
+
|
|
624
|
+
// Connection closed
|
|
625
|
+
connection.on(RealtimeEvents.CLOSE, () => {
|
|
626
|
+
console.log("Connection closed");
|
|
627
|
+
});
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
### Configuration Options
|
|
631
|
+
|
|
632
|
+
#### Common Options
|
|
633
|
+
|
|
634
|
+
All connection modes support these options:
|
|
635
|
+
|
|
636
|
+
```js
|
|
637
|
+
const connection = await scribe.connect({
|
|
638
|
+
token: "your-token", // Required: Single-use token
|
|
639
|
+
modelId: "scribe_realtime_v2", // Required: Model ID
|
|
640
|
+
baseUri: "wss://api.elevenlabs.io", // Optional: Custom endpoint
|
|
641
|
+
|
|
642
|
+
// Voice Activity Detection (VAD) settings
|
|
643
|
+
commitStrategy: CommitStrategy.MANUAL, // or CommitStrategy.VAD
|
|
644
|
+
vadSilenceThresholdSecs: 0.5, // Seconds of silence before committing
|
|
645
|
+
vadThreshold: 0.5, // VAD sensitivity (0-1)
|
|
646
|
+
minSpeechDurationMs: 100, // Minimum speech duration to process
|
|
647
|
+
minSilenceDurationMs: 500, // Minimum silence to detect pause
|
|
648
|
+
|
|
649
|
+
languageCode: "en", // ISO 639-1 language code
|
|
650
|
+
});
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
#### Microphone-Specific Options
|
|
654
|
+
|
|
655
|
+
```js
|
|
656
|
+
const connection = await scribe.connect({
|
|
657
|
+
// ... common options
|
|
658
|
+
microphone: {
|
|
659
|
+
deviceId: "optional-device-id",
|
|
660
|
+
echoCancellation: true,
|
|
661
|
+
noiseSuppression: true,
|
|
662
|
+
autoGainControl: true,
|
|
663
|
+
channelCount: 1,
|
|
664
|
+
},
|
|
665
|
+
});
|
|
666
|
+
```
|
|
667
|
+
|
|
668
|
+
#### Manual Audio Options
|
|
669
|
+
|
|
670
|
+
```js
|
|
671
|
+
import { AudioFormat } from "@elevenlabs/client";
|
|
672
|
+
|
|
673
|
+
const connection = Scribe.connect({
|
|
674
|
+
// ... common options
|
|
675
|
+
audioFormat: AudioFormat.PCM_16000, // or AudioFormat.PCM_24000
|
|
676
|
+
sampleRate: 16000, // Must match audioFormat
|
|
677
|
+
});
|
|
678
|
+
```
|
|
679
|
+
|
|
680
|
+
### Commit Strategies
|
|
681
|
+
|
|
682
|
+
Scribe supports two commit strategies when in manual audio mode:
|
|
683
|
+
|
|
684
|
+
#### Manual
|
|
685
|
+
|
|
686
|
+
You explicitly control when to commit transcriptions:
|
|
687
|
+
|
|
688
|
+
```js
|
|
689
|
+
import { Scribe, CommitStrategy, RealtimeEvents } from "@elevenlabs/client";
|
|
690
|
+
|
|
691
|
+
const connection = Scribe.connect({
|
|
692
|
+
token: "your-token",
|
|
693
|
+
modelId: "scribe_realtime_v2",
|
|
694
|
+
commitStrategy: CommitStrategy.MANUAL,
|
|
695
|
+
audioFormat: AudioFormat.PCM_16000,
|
|
696
|
+
sampleRate: 16000,
|
|
697
|
+
});
|
|
698
|
+
|
|
699
|
+
connection.send({ audioBase64: base64Audio });
|
|
700
|
+
|
|
701
|
+
// Later, when you want to commit the segment
|
|
702
|
+
connection.commit();
|
|
703
|
+
```
|
|
704
|
+
|
|
705
|
+
#### Voice Activity Detection (VAD)
|
|
706
|
+
|
|
707
|
+
The API automatically detects when speech ends and commits the transcription:
|
|
708
|
+
|
|
709
|
+
```js
|
|
710
|
+
import { Scribe, CommitStrategy, RealtimeEvents } from "@elevenlabs/client";
|
|
711
|
+
|
|
712
|
+
const connection = Scribe.connect({
|
|
713
|
+
token: "your-token",
|
|
714
|
+
modelId: "scribe_realtime_v2",
|
|
715
|
+
commitStrategy: CommitStrategy.VAD,
|
|
716
|
+
audioFormat: AudioFormat.PCM_16000,
|
|
717
|
+
sampleRate: 16000,
|
|
718
|
+
});
|
|
719
|
+
```
|
|
720
|
+
|
|
721
|
+
### Connection Methods
|
|
722
|
+
|
|
723
|
+
#### close()
|
|
724
|
+
|
|
725
|
+
Close the connection and clean up resources:
|
|
726
|
+
|
|
727
|
+
```js
|
|
728
|
+
connection.close();
|
|
729
|
+
```
|
|
730
|
+
|
|
731
|
+
#### send(options)
|
|
732
|
+
|
|
733
|
+
Send audio data (manual mode only):
|
|
734
|
+
|
|
735
|
+
```js
|
|
736
|
+
connection.send({
|
|
737
|
+
audioBase64: base64AudioData,
|
|
738
|
+
commit: false, // Optional: commit immediately
|
|
739
|
+
sampleRate: 16000, // Optional: override sample rate
|
|
740
|
+
});
|
|
741
|
+
```
|
|
742
|
+
|
|
743
|
+
#### commit()
|
|
744
|
+
|
|
745
|
+
Manually commit the current segment:
|
|
746
|
+
|
|
747
|
+
```js
|
|
748
|
+
connection.commit();
|
|
749
|
+
```
|
|
750
|
+
|
|
751
|
+
### TypeScript Support
|
|
752
|
+
|
|
753
|
+
Full TypeScript types are included:
|
|
754
|
+
|
|
755
|
+
```typescript
|
|
756
|
+
import {
|
|
757
|
+
Scribe,
|
|
758
|
+
RealtimeConnection,
|
|
759
|
+
AudioFormat,
|
|
760
|
+
CommitStrategy,
|
|
761
|
+
RealtimeEvents,
|
|
762
|
+
type AudioOptions,
|
|
763
|
+
type MicrophoneOptions,
|
|
764
|
+
type PartialTranscriptMessage,
|
|
765
|
+
type FinalTranscriptMessage,
|
|
766
|
+
} from "@elevenlabs/client";
|
|
767
|
+
|
|
768
|
+
const connection: RealtimeConnection = await scribe.connect({
|
|
769
|
+
token: "your-token",
|
|
770
|
+
modelId: "scribe_realtime_v2",
|
|
771
|
+
microphone: {
|
|
772
|
+
echoCancellation: true,
|
|
773
|
+
},
|
|
774
|
+
});
|
|
775
|
+
```
|
|
776
|
+
|
|
777
|
+
### Error Handling
|
|
778
|
+
|
|
779
|
+
Always handle errors appropriately:
|
|
780
|
+
|
|
781
|
+
```js
|
|
782
|
+
import { Scribe, RealtimeEvents } from "@elevenlabs/client";
|
|
783
|
+
|
|
784
|
+
try {
|
|
785
|
+
const connection = Scribe.connect({
|
|
786
|
+
token: "your-token",
|
|
787
|
+
modelId: "scribe_realtime_v2",
|
|
788
|
+
microphone: {},
|
|
789
|
+
});
|
|
790
|
+
|
|
791
|
+
connection.on(RealtimeEvents.ERROR, (error) => {
|
|
792
|
+
console.error("Connection error:", error);
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
connection.on(RealtimeEvents.AUTH_ERROR, (data) => {
|
|
796
|
+
console.error("Authentication failed:", data.error);
|
|
797
|
+
});
|
|
798
|
+
} catch (error) {
|
|
799
|
+
console.error("Failed to connect:", error);
|
|
800
|
+
}
|
|
801
|
+
```
|
|
802
|
+
|
|
444
803
|
## CSP compliance
|
|
445
804
|
|
|
446
805
|
If your application has a tight Content Security Policy and does not allow data: or blob: in the `script-src` (w3.org/TR/CSP2#source-list-guid-matching), you self-host the needed files in the public folder.
|
package/dist/index.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export { WebRTCConnection } from "./utils/WebRTCConnection";
|
|
|
12
12
|
export { postOverallFeedback } from "./utils/postOverallFeedback";
|
|
13
13
|
export { VoiceConversation } from "./VoiceConversation";
|
|
14
14
|
export { TextConversation } from "./TextConversation";
|
|
15
|
+
export { Scribe, AudioFormat, CommitStrategy, RealtimeEvents, RealtimeConnection, } from "./scribe";
|
|
16
|
+
export type { AudioOptions, MicrophoneOptions, WebSocketMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, } from "./scribe";
|
|
15
17
|
export declare class Conversation extends BaseConversation {
|
|
16
18
|
static startSession(options: PartialOptions): Promise<Conversation>;
|
|
17
19
|
}
|