@elevenlabs/client 0.8.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,30 @@
1
1
 
2
- > @elevenlabs/client@0.8.1 prebuild /home/runner/work/packages/packages/packages/client
2
+ > @elevenlabs/client@0.9.1 prebuild /home/runner/work/packages/packages/packages/client
3
3
  > npm run generate-version && npm run generate-worklets
4
4
 
5
5
 
6
- > @elevenlabs/client@0.8.1 generate-version
6
+ > @elevenlabs/client@0.9.1 generate-version
7
7
  > printf "// This file is auto-generated during build\nexport const PACKAGE_VERSION = \"%s\";\n" "$npm_package_version" > src/version.ts
8
8
 
9
9
 
10
- > @elevenlabs/client@0.8.1 generate-worklets
10
+ > @elevenlabs/client@0.9.1 generate-worklets
11
11
  > node scripts/generateWorklets.js
12
12
 
13
13
  Generating TypeScript worklet files...
14
14
  Generated rawAudioProcessor.generated.ts from rawAudioProcessor.js
15
15
  Generated audioConcatProcessor.generated.ts from audioConcatProcessor.js
16
+ Generated scribeAudioProcessor.generated.ts from scribeAudioProcessor.js
16
17
  Worklet generation complete!
17
18
 
18
- > @elevenlabs/client@0.8.1 build /home/runner/work/packages/packages/packages/client
19
+ > @elevenlabs/client@0.9.1 build /home/runner/work/packages/packages/packages/client
19
20
  > BROWSERSLIST_ENV=modern microbundle --jsx React.createElement --jsxFragment React.Fragment --jsxImportSource react src/index.ts
20
21
 
21
22
  Build "@elevenlabs/client" to dist:
22
- 11.3 kB: lib.cjs.gz
23
- 10.1 kB: lib.cjs.br
24
- 10.2 kB: lib.modern.js.gz
25
- 9.18 kB: lib.modern.js.br
26
- 11.4 kB: lib.module.js.gz
27
- 10.2 kB: lib.module.js.br
28
- 11.4 kB: lib.umd.js.gz
29
- 10.2 kB: lib.umd.js.br
23
+ 13.5 kB: lib.cjs.gz
24
+ 12 kB: lib.cjs.br
25
+ 12.2 kB: lib.modern.js.gz
26
+ 10.9 kB: lib.modern.js.br
27
+ 13.5 kB: lib.module.js.gz
28
+ 12 kB: lib.module.js.br
29
+ 13.5 kB: lib.umd.js.gz
30
+ 12.1 kB: lib.umd.js.br
@@ -1,4 +1,4 @@
1
1
 
2
- > @elevenlabs/client@0.8.1 generate-version /home/runner/work/packages/packages/packages/client
2
+ > @elevenlabs/client@0.9.1 generate-version /home/runner/work/packages/packages/packages/client
3
3
  > printf "// This file is auto-generated during build\nexport const PACKAGE_VERSION = \"%s\";\n" "$npm_package_version" > src/version.ts
4
4
 
package/README.md CHANGED
@@ -441,6 +441,365 @@ await conversation.changeOutputDevice({
441
441
 
442
442
  **Note:** Device switching only works for voice conversations. If no specific `deviceId` is provided, the browser will use its default device selection. You can enumerate available devices using the [MediaDevices.enumerateDevices()](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/enumerateDevices) API.
443
443
 
444
+ ## Scribe - Real-time Speech-to-Text
445
+
446
+ Scribe is ElevenLabs' real-time speech-to-text API that provides low-latency transcription with support for both streaming microphone input and pre-recorded audio files.
447
+
448
+ **Note:** Scribe Realtime v2 is currently in closed beta. For access please [contact sales](https://elevenlabs.io/contact-sales).
449
+
450
+ ### Quick Start
451
+
452
+ ```js
453
+ import { Scribe, RealtimeEvents } from "@elevenlabs/client";
454
+
455
+ // Connect with microphone streaming
456
+ const connection = Scribe.connect({
457
+ token: "your-token",
458
+ modelId: "scribe_realtime_v2",
459
+ microphone: {
460
+ echoCancellation: true,
461
+ noiseSuppression: true,
462
+ },
463
+ });
464
+
465
+ // Listen for transcripts
466
+ connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
467
+ console.log("Partial:", data.text);
468
+ });
469
+
470
+ connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
471
+ console.log("Final:", data.text);
472
+ });
473
+
474
+ // Close connection when done
475
+ connection.close();
476
+ ```
477
+
478
+ ### Getting a Token
479
+
480
+ Scribe requires a single-use token for authentication. These tokens are generated via the ElevenLabs API on the server.
481
+
482
+ You should create an API endpoint on your server to generate these tokens:
483
+
484
+ ```js
485
+ // Node.js server
486
+ app.get("/scribe-token", yourAuthMiddleware, async (req, res) => {
487
+ const response = await fetch(
488
+ "https://api.elevenlabs.io/v1/single-use-token/realtime_scribe",
489
+ {
490
+ headers: {
491
+ "xi-api-key": process.env.ELEVENLABS_API_KEY,
492
+ },
493
+ }
494
+ );
495
+
496
+ const data = await response.json();
497
+ res.json({ token: data.token });
498
+ });
499
+ ```
500
+
501
+ ```js
502
+ // Client
503
+ const response = await fetch("/scribe-token");
504
+ const { token } = await response.json();
505
+ ```
506
+
507
+ **Warning:** Your ElevenLabs API key is sensitive, do not leak it to the client. Always generate the token on the server.
508
+
509
+ ### Microphone Mode
510
+
511
+ Automatically stream audio from the user's microphone:
512
+
513
+ ```js
514
+ import { Scribe, RealtimeEvents } from "@elevenlabs/client";
515
+
516
+ const connection = Scribe.connect({
517
+ token: "your-token",
518
+ modelId: "scribe_realtime_v2",
519
+ microphone: {
520
+ deviceId: "optional-device-id", // Optional: specific microphone
521
+ echoCancellation: true,
522
+ noiseSuppression: true,
523
+ autoGainControl: true,
524
+ channelCount: 1,
525
+ },
526
+ });
527
+ ```
528
+
529
+ The microphone stream is automatically converted to PCM16 format required by the API. In this mode audio is automatically committed.
530
+
531
+ ### Manual Audio Mode
532
+
533
+ For transcribing pre-recorded audio files or custom audio sources:
534
+
535
+ ```js
536
+ import { Scribe, AudioFormat, RealtimeEvents } from "@elevenlabs/client";
537
+
538
+ const connection = Scribe.connect({
539
+ token: "your-token",
540
+ modelId: "scribe_realtime_v2",
541
+ audioFormat: AudioFormat.PCM_16000,
542
+ sampleRate: 16000,
543
+ });
544
+
545
+ // Send audio chunks as base64
546
+ connection.send({ audioBase64: base64AudioChunk });
547
+
548
+ // Signal end of audio segment
549
+ connection.commit();
550
+ ```
551
+
552
+ #### Example: Transcribing an Audio File
553
+
554
+ ```js
555
+ // Get file from input element
556
+ const fileInput = document.querySelector('input[type="file"]');
557
+ const audioFile = fileInput.files[0];
558
+
559
+ // Read file as ArrayBuffer
560
+ const arrayBuffer = await audioFile.arrayBuffer();
561
+ const audioData = new Uint8Array(arrayBuffer);
562
+
563
+ // Convert to base64 and send in chunks
564
+ const chunkSize = 8192; // 8KB chunks
565
+ for (let i = 0; i < audioData.length; i += chunkSize) {
566
+ const chunk = audioData.slice(i, i + chunkSize);
567
+ const base64 = btoa(String.fromCharCode(...chunk));
568
+ connection.send({ audioBase64: base64 });
569
+
570
+ // Optional: Add delay to simulate real-time streaming
571
+ await new Promise((resolve) => setTimeout(resolve, 100));
572
+ }
573
+
574
+ // Signal end of audio
575
+ connection.commit();
576
+ ```
577
+
578
+ ### Event Handlers
579
+
580
+ Subscribe to events using the connection instance:
581
+
582
+ ```js
583
+ import { RealtimeEvents } from "@elevenlabs/client";
584
+
585
+ // Session started
586
+ connection.on(RealtimeEvents.SESSION_STARTED, () => {
587
+ console.log("Session started");
588
+ });
589
+
590
+ // Partial transcripts (interim results)
591
+ connection.on(RealtimeEvents.PARTIAL_TRANSCRIPT, (data) => {
592
+ console.log("Partial:", data.text);
593
+ // { text: string, language_code?: string }
594
+ });
595
+
596
+ // Final transcripts
597
+ connection.on(RealtimeEvents.FINAL_TRANSCRIPT, (data) => {
598
+ console.log("Final:", data.text);
599
+ // { text: string, language_code?: string }
600
+ });
601
+
602
+ // Final transcripts with word-level timestamps
603
+ connection.on(RealtimeEvents.FINAL_TRANSCRIPT_WITH_TIMESTAMPS, (data) => {
604
+ console.log("Final:", data.text);
605
+ console.log("Timestamps:", data.timestamps);
606
+ // { text: string, timestamps?: { start: number, end: number }[] }
607
+ });
608
+
609
+ // Errors
610
+ connection.on(RealtimeEvents.ERROR, (error) => {
611
+ console.error("Error:", error);
612
+ });
613
+
614
+ // Authentication errors
615
+ connection.on(RealtimeEvents.AUTH_ERROR, (data) => {
616
+ console.error("Auth error:", data.error);
617
+ });
618
+
619
+ // Connection opened
620
+ connection.on(RealtimeEvents.OPEN, () => {
621
+ console.log("Connection opened");
622
+ });
623
+
624
+ // Connection closed
625
+ connection.on(RealtimeEvents.CLOSE, () => {
626
+ console.log("Connection closed");
627
+ });
628
+ ```
629
+
630
+ ### Configuration Options
631
+
632
+ #### Common Options
633
+
634
+ All connection modes support these options:
635
+
636
+ ```js
637
+ const connection = await scribe.connect({
638
+ token: "your-token", // Required: Single-use token
639
+ modelId: "scribe_realtime_v2", // Required: Model ID
640
+ baseUri: "wss://api.elevenlabs.io", // Optional: Custom endpoint
641
+
642
+ // Voice Activity Detection (VAD) settings
643
+ commitStrategy: CommitStrategy.MANUAL, // or CommitStrategy.VAD
644
+ vadSilenceThresholdSecs: 0.5, // Seconds of silence before committing
645
+ vadThreshold: 0.5, // VAD sensitivity (0-1)
646
+ minSpeechDurationMs: 100, // Minimum speech duration to process
647
+ minSilenceDurationMs: 500, // Minimum silence to detect pause
648
+
649
+ languageCode: "en", // ISO 639-1 language code
650
+ });
651
+ ```
652
+
653
+ #### Microphone-Specific Options
654
+
655
+ ```js
656
+ const connection = await scribe.connect({
657
+ // ... common options
658
+ microphone: {
659
+ deviceId: "optional-device-id",
660
+ echoCancellation: true,
661
+ noiseSuppression: true,
662
+ autoGainControl: true,
663
+ channelCount: 1,
664
+ },
665
+ });
666
+ ```
667
+
668
+ #### Manual Audio Options
669
+
670
+ ```js
671
+ import { AudioFormat } from "@elevenlabs/client";
672
+
673
+ const connection = Scribe.connect({
674
+ // ... common options
675
+ audioFormat: AudioFormat.PCM_16000, // or AudioFormat.PCM_24000
676
+ sampleRate: 16000, // Must match audioFormat
677
+ });
678
+ ```
679
+
680
+ ### Commit Strategies
681
+
682
+ Scribe supports two commit strategies when in manual audio mode:
683
+
684
+ #### Manual
685
+
686
+ You explicitly control when to commit transcriptions:
687
+
688
+ ```js
689
+ import { Scribe, CommitStrategy, RealtimeEvents } from "@elevenlabs/client";
690
+
691
+ const connection = Scribe.connect({
692
+ token: "your-token",
693
+ modelId: "scribe_realtime_v2",
694
+ commitStrategy: CommitStrategy.MANUAL,
695
+ audioFormat: AudioFormat.PCM_16000,
696
+ sampleRate: 16000,
697
+ });
698
+
699
+ connection.send({ audioBase64: base64Audio });
700
+
701
+ // Later, when you want to commit the segment
702
+ connection.commit();
703
+ ```
704
+
705
+ #### Voice Activity Detection (VAD)
706
+
707
+ The API automatically detects when speech ends and commits the transcription:
708
+
709
+ ```js
710
+ import { Scribe, CommitStrategy, RealtimeEvents } from "@elevenlabs/client";
711
+
712
+ const connection = Scribe.connect({
713
+ token: "your-token",
714
+ modelId: "scribe_realtime_v2",
715
+ commitStrategy: CommitStrategy.VAD,
716
+ audioFormat: AudioFormat.PCM_16000,
717
+ sampleRate: 16000,
718
+ });
719
+ ```
720
+
721
+ ### Connection Methods
722
+
723
+ #### close()
724
+
725
+ Close the connection and clean up resources:
726
+
727
+ ```js
728
+ connection.close();
729
+ ```
730
+
731
+ #### send(options)
732
+
733
+ Send audio data (manual mode only):
734
+
735
+ ```js
736
+ connection.send({
737
+ audioBase64: base64AudioData,
738
+ commit: false, // Optional: commit immediately
739
+ sampleRate: 16000, // Optional: override sample rate
740
+ });
741
+ ```
742
+
743
+ #### commit()
744
+
745
+ Manually commit the current segment:
746
+
747
+ ```js
748
+ connection.commit();
749
+ ```
750
+
751
+ ### TypeScript Support
752
+
753
+ Full TypeScript types are included:
754
+
755
+ ```typescript
756
+ import {
757
+ Scribe,
758
+ RealtimeConnection,
759
+ AudioFormat,
760
+ CommitStrategy,
761
+ RealtimeEvents,
762
+ type AudioOptions,
763
+ type MicrophoneOptions,
764
+ type PartialTranscriptMessage,
765
+ type FinalTranscriptMessage,
766
+ } from "@elevenlabs/client";
767
+
768
+ const connection: RealtimeConnection = await scribe.connect({
769
+ token: "your-token",
770
+ modelId: "scribe_realtime_v2",
771
+ microphone: {
772
+ echoCancellation: true,
773
+ },
774
+ });
775
+ ```
776
+
777
+ ### Error Handling
778
+
779
+ Always handle errors appropriately:
780
+
781
+ ```js
782
+ import { Scribe, RealtimeEvents } from "@elevenlabs/client";
783
+
784
+ try {
785
+ const connection = Scribe.connect({
786
+ token: "your-token",
787
+ modelId: "scribe_realtime_v2",
788
+ microphone: {},
789
+ });
790
+
791
+ connection.on(RealtimeEvents.ERROR, (error) => {
792
+ console.error("Connection error:", error);
793
+ });
794
+
795
+ connection.on(RealtimeEvents.AUTH_ERROR, (data) => {
796
+ console.error("Authentication failed:", data.error);
797
+ });
798
+ } catch (error) {
799
+ console.error("Failed to connect:", error);
800
+ }
801
+ ```
802
+
444
803
  ## CSP compliance
445
804
 
446
805
  If your application has a tight Content Security Policy and does not allow data: or blob: in the `script-src` (w3.org/TR/CSP2#source-list-guid-matching), you self-host the needed files in the public folder.
package/dist/index.d.ts CHANGED
@@ -12,6 +12,8 @@ export { WebRTCConnection } from "./utils/WebRTCConnection";
12
12
  export { postOverallFeedback } from "./utils/postOverallFeedback";
13
13
  export { VoiceConversation } from "./VoiceConversation";
14
14
  export { TextConversation } from "./TextConversation";
15
+ export { Scribe, AudioFormat, CommitStrategy, RealtimeEvents, RealtimeConnection, } from "./scribe";
16
+ export type { AudioOptions, MicrophoneOptions, WebSocketMessage, PartialTranscriptMessage, FinalTranscriptMessage, FinalTranscriptWithTimestampsMessage, ScribeErrorMessage, ScribeAuthErrorMessage, } from "./scribe";
15
17
  export declare class Conversation extends BaseConversation {
16
18
  static startSession(options: PartialOptions): Promise<Conversation>;
17
19
  }