voice-router-dev 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import { DeepgramModelCode, DeepgramTopicModeType, DeepgramRedactType, AssemblyAISampleRateType, AssemblyAIEncodingType, AssemblyAISpeechModelType, SonioxRealtimeModelCode, SonioxLanguageCode, SonioxModelCode, DeepgramLanguageCode, SpeechmaticsLanguageCode, AzureLocaleCode } from './constants.js';
2
- import { e as StreamingProviderType, B as BatchOnlyProviderType, T as TranscriptionProvider } from './provider-metadata-Dsk2PVud.js';
1
+ import { DeepgramModelCode, DeepgramTopicModeType, DeepgramRedactType, AssemblyAISampleRateType, AssemblyAIEncodingType, AssemblyAISpeechModelType, SonioxRealtimeModelCode, SonioxLanguageCode, ElevenLabsRealtimeModelCode, ElevenLabsAudioFormatType, SonioxModelCode, ElevenLabsModelCode, DeepgramLanguageCode, ElevenLabsLanguageCode, SpeechmaticsLanguageCode, AzureLocaleCode } from './constants.js';
2
+ import { e as StreamingProviderType, B as BatchOnlyProviderType, T as TranscriptionProvider } from './provider-metadata-DbsSGAO7.js';
3
3
 
4
4
  /**
5
5
  * Unified audio encoding types for Voice Router SDK
@@ -5674,6 +5674,617 @@ interface CreateTranscriptionRequest {
5674
5674
  known_speaker_references?: string[];
5675
5675
  }
5676
5676
 
5677
+ /**
5678
+ * Generated by orval v7.9.0 🍺
5679
+ * Do not edit manually.
5680
+ * ElevenLabs Speech-to-Text API
5681
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5682
+ * OpenAPI spec version: 1.0
5683
+ */
5684
+ type DocxExportOptionsFormat = (typeof DocxExportOptionsFormat)[keyof typeof DocxExportOptionsFormat];
5685
+ declare const DocxExportOptionsFormat: {
5686
+ readonly docx: "docx";
5687
+ };
5688
+
5689
+ /**
5690
+ * Generated by orval v7.9.0 🍺
5691
+ * Do not edit manually.
5692
+ * ElevenLabs Speech-to-Text API
5693
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5694
+ * OpenAPI spec version: 1.0
5695
+ */
5696
+ type DocxExportOptionsMaxSegmentChars = number | null;
5697
+
5698
+ /**
5699
+ * Generated by orval v7.9.0 🍺
5700
+ * Do not edit manually.
5701
+ * ElevenLabs Speech-to-Text API
5702
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5703
+ * OpenAPI spec version: 1.0
5704
+ */
5705
+ type DocxExportOptionsMaxSegmentDurationS = number | null;
5706
+
5707
+ /**
5708
+ * Generated by orval v7.9.0 🍺
5709
+ * Do not edit manually.
5710
+ * ElevenLabs Speech-to-Text API
5711
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5712
+ * OpenAPI spec version: 1.0
5713
+ */
5714
+ type DocxExportOptionsSegmentOnSilenceLongerThanS = number | null;
5715
+
5716
+ /**
5717
+ * Generated by orval v7.9.0 🍺
5718
+ * Do not edit manually.
5719
+ * ElevenLabs Speech-to-Text API
5720
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5721
+ * OpenAPI spec version: 1.0
5722
+ */
5723
+
5724
+ interface DocxExportOptions {
5725
+ include_speakers?: boolean;
5726
+ include_timestamps?: boolean;
5727
+ format: DocxExportOptionsFormat;
5728
+ segment_on_silence_longer_than_s?: DocxExportOptionsSegmentOnSilenceLongerThanS;
5729
+ max_segment_duration_s?: DocxExportOptionsMaxSegmentDurationS;
5730
+ max_segment_chars?: DocxExportOptionsMaxSegmentChars;
5731
+ }
5732
+
5733
+ /**
5734
+ * Generated by orval v7.9.0 🍺
5735
+ * Do not edit manually.
5736
+ * ElevenLabs Speech-to-Text API
5737
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5738
+ * OpenAPI spec version: 1.0
5739
+ */
5740
+ type HtmlExportOptionsFormat = (typeof HtmlExportOptionsFormat)[keyof typeof HtmlExportOptionsFormat];
5741
+ declare const HtmlExportOptionsFormat: {
5742
+ readonly html: "html";
5743
+ };
5744
+
5745
+ /**
5746
+ * Generated by orval v7.9.0 🍺
5747
+ * Do not edit manually.
5748
+ * ElevenLabs Speech-to-Text API
5749
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5750
+ * OpenAPI spec version: 1.0
5751
+ */
5752
+ type HtmlExportOptionsMaxSegmentChars = number | null;
5753
+
5754
+ /**
5755
+ * Generated by orval v7.9.0 🍺
5756
+ * Do not edit manually.
5757
+ * ElevenLabs Speech-to-Text API
5758
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5759
+ * OpenAPI spec version: 1.0
5760
+ */
5761
+ type HtmlExportOptionsMaxSegmentDurationS = number | null;
5762
+
5763
+ /**
5764
+ * Generated by orval v7.9.0 🍺
5765
+ * Do not edit manually.
5766
+ * ElevenLabs Speech-to-Text API
5767
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5768
+ * OpenAPI spec version: 1.0
5769
+ */
5770
+ type HtmlExportOptionsSegmentOnSilenceLongerThanS = number | null;
5771
+
5772
+ /**
5773
+ * Generated by orval v7.9.0 🍺
5774
+ * Do not edit manually.
5775
+ * ElevenLabs Speech-to-Text API
5776
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5777
+ * OpenAPI spec version: 1.0
5778
+ */
5779
+
5780
+ interface HtmlExportOptions {
5781
+ include_speakers?: boolean;
5782
+ include_timestamps?: boolean;
5783
+ format: HtmlExportOptionsFormat;
5784
+ segment_on_silence_longer_than_s?: HtmlExportOptionsSegmentOnSilenceLongerThanS;
5785
+ max_segment_duration_s?: HtmlExportOptionsMaxSegmentDurationS;
5786
+ max_segment_chars?: HtmlExportOptionsMaxSegmentChars;
5787
+ }
5788
+
5789
+ /**
5790
+ * Generated by orval v7.9.0 🍺
5791
+ * Do not edit manually.
5792
+ * ElevenLabs Speech-to-Text API
5793
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5794
+ * OpenAPI spec version: 1.0
5795
+ */
5796
+ type PdfExportOptionsFormat = (typeof PdfExportOptionsFormat)[keyof typeof PdfExportOptionsFormat];
5797
+ declare const PdfExportOptionsFormat: {
5798
+ readonly pdf: "pdf";
5799
+ };
5800
+
5801
+ /**
5802
+ * Generated by orval v7.9.0 🍺
5803
+ * Do not edit manually.
5804
+ * ElevenLabs Speech-to-Text API
5805
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5806
+ * OpenAPI spec version: 1.0
5807
+ */
5808
+ type PdfExportOptionsMaxSegmentChars = number | null;
5809
+
5810
+ /**
5811
+ * Generated by orval v7.9.0 🍺
5812
+ * Do not edit manually.
5813
+ * ElevenLabs Speech-to-Text API
5814
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5815
+ * OpenAPI spec version: 1.0
5816
+ */
5817
+ type PdfExportOptionsMaxSegmentDurationS = number | null;
5818
+
5819
+ /**
5820
+ * Generated by orval v7.9.0 🍺
5821
+ * Do not edit manually.
5822
+ * ElevenLabs Speech-to-Text API
5823
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5824
+ * OpenAPI spec version: 1.0
5825
+ */
5826
+ type PdfExportOptionsSegmentOnSilenceLongerThanS = number | null;
5827
+
5828
+ /**
5829
+ * Generated by orval v7.9.0 🍺
5830
+ * Do not edit manually.
5831
+ * ElevenLabs Speech-to-Text API
5832
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5833
+ * OpenAPI spec version: 1.0
5834
+ */
5835
+
5836
+ interface PdfExportOptions {
5837
+ include_speakers?: boolean;
5838
+ include_timestamps?: boolean;
5839
+ format: PdfExportOptionsFormat;
5840
+ segment_on_silence_longer_than_s?: PdfExportOptionsSegmentOnSilenceLongerThanS;
5841
+ max_segment_duration_s?: PdfExportOptionsMaxSegmentDurationS;
5842
+ max_segment_chars?: PdfExportOptionsMaxSegmentChars;
5843
+ }
5844
+
5845
+ /**
5846
+ * Generated by orval v7.9.0 🍺
5847
+ * Do not edit manually.
5848
+ * ElevenLabs Speech-to-Text API
5849
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5850
+ * OpenAPI spec version: 1.0
5851
+ */
5852
+ type SegmentedJsonExportOptionsFormat = (typeof SegmentedJsonExportOptionsFormat)[keyof typeof SegmentedJsonExportOptionsFormat];
5853
+ declare const SegmentedJsonExportOptionsFormat: {
5854
+ readonly segmented_json: "segmented_json";
5855
+ };
5856
+
5857
+ /**
5858
+ * Generated by orval v7.9.0 🍺
5859
+ * Do not edit manually.
5860
+ * ElevenLabs Speech-to-Text API
5861
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5862
+ * OpenAPI spec version: 1.0
5863
+ */
5864
+ type SegmentedJsonExportOptionsMaxSegmentChars = number | null;
5865
+
5866
+ /**
5867
+ * Generated by orval v7.9.0 🍺
5868
+ * Do not edit manually.
5869
+ * ElevenLabs Speech-to-Text API
5870
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5871
+ * OpenAPI spec version: 1.0
5872
+ */
5873
+ type SegmentedJsonExportOptionsMaxSegmentDurationS = number | null;
5874
+
5875
+ /**
5876
+ * Generated by orval v7.9.0 🍺
5877
+ * Do not edit manually.
5878
+ * ElevenLabs Speech-to-Text API
5879
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5880
+ * OpenAPI spec version: 1.0
5881
+ */
5882
+ type SegmentedJsonExportOptionsSegmentOnSilenceLongerThanS = number | null;
5883
+
5884
+ /**
5885
+ * Generated by orval v7.9.0 🍺
5886
+ * Do not edit manually.
5887
+ * ElevenLabs Speech-to-Text API
5888
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5889
+ * OpenAPI spec version: 1.0
5890
+ */
5891
+
5892
+ interface SegmentedJsonExportOptions {
5893
+ include_speakers?: boolean;
5894
+ include_timestamps?: boolean;
5895
+ format: SegmentedJsonExportOptionsFormat;
5896
+ segment_on_silence_longer_than_s?: SegmentedJsonExportOptionsSegmentOnSilenceLongerThanS;
5897
+ max_segment_duration_s?: SegmentedJsonExportOptionsMaxSegmentDurationS;
5898
+ max_segment_chars?: SegmentedJsonExportOptionsMaxSegmentChars;
5899
+ }
5900
+
5901
+ /**
5902
+ * Generated by orval v7.9.0 🍺
5903
+ * Do not edit manually.
5904
+ * ElevenLabs Speech-to-Text API
5905
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5906
+ * OpenAPI spec version: 1.0
5907
+ */
5908
+ type SrtExportOptionsFormat = (typeof SrtExportOptionsFormat)[keyof typeof SrtExportOptionsFormat];
5909
+ declare const SrtExportOptionsFormat: {
5910
+ readonly srt: "srt";
5911
+ };
5912
+
5913
+ /**
5914
+ * Generated by orval v7.9.0 🍺
5915
+ * Do not edit manually.
5916
+ * ElevenLabs Speech-to-Text API
5917
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5918
+ * OpenAPI spec version: 1.0
5919
+ */
5920
+ type SrtExportOptionsMaxCharactersPerLine = number | null;
5921
+
5922
+ /**
5923
+ * Generated by orval v7.9.0 🍺
5924
+ * Do not edit manually.
5925
+ * ElevenLabs Speech-to-Text API
5926
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5927
+ * OpenAPI spec version: 1.0
5928
+ */
5929
+ type SrtExportOptionsMaxSegmentChars = number | null;
5930
+
5931
+ /**
5932
+ * Generated by orval v7.9.0 🍺
5933
+ * Do not edit manually.
5934
+ * ElevenLabs Speech-to-Text API
5935
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5936
+ * OpenAPI spec version: 1.0
5937
+ */
5938
+ type SrtExportOptionsMaxSegmentDurationS = number | null;
5939
+
5940
+ /**
5941
+ * Generated by orval v7.9.0 🍺
5942
+ * Do not edit manually.
5943
+ * ElevenLabs Speech-to-Text API
5944
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5945
+ * OpenAPI spec version: 1.0
5946
+ */
5947
+ type SrtExportOptionsSegmentOnSilenceLongerThanS = number | null;
5948
+
5949
+ /**
5950
+ * Generated by orval v7.9.0 🍺
5951
+ * Do not edit manually.
5952
+ * ElevenLabs Speech-to-Text API
5953
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5954
+ * OpenAPI spec version: 1.0
5955
+ */
5956
+
5957
+ interface SrtExportOptions {
5958
+ max_characters_per_line?: SrtExportOptionsMaxCharactersPerLine;
5959
+ include_speakers?: boolean;
5960
+ include_timestamps?: boolean;
5961
+ format: SrtExportOptionsFormat;
5962
+ segment_on_silence_longer_than_s?: SrtExportOptionsSegmentOnSilenceLongerThanS;
5963
+ max_segment_duration_s?: SrtExportOptionsMaxSegmentDurationS;
5964
+ max_segment_chars?: SrtExportOptionsMaxSegmentChars;
5965
+ }
5966
+
5967
+ /**
5968
+ * Generated by orval v7.9.0 🍺
5969
+ * Do not edit manually.
5970
+ * ElevenLabs Speech-to-Text API
5971
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5972
+ * OpenAPI spec version: 1.0
5973
+ */
5974
+ type TxtExportOptionsFormat = (typeof TxtExportOptionsFormat)[keyof typeof TxtExportOptionsFormat];
5975
+ declare const TxtExportOptionsFormat: {
5976
+ readonly txt: "txt";
5977
+ };
5978
+
5979
+ /**
5980
+ * Generated by orval v7.9.0 🍺
5981
+ * Do not edit manually.
5982
+ * ElevenLabs Speech-to-Text API
5983
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5984
+ * OpenAPI spec version: 1.0
5985
+ */
5986
+ type TxtExportOptionsMaxCharactersPerLine = number | null;
5987
+
5988
+ /**
5989
+ * Generated by orval v7.9.0 🍺
5990
+ * Do not edit manually.
5991
+ * ElevenLabs Speech-to-Text API
5992
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
5993
+ * OpenAPI spec version: 1.0
5994
+ */
5995
+ type TxtExportOptionsMaxSegmentChars = number | null;
5996
+
5997
+ /**
5998
+ * Generated by orval v7.9.0 🍺
5999
+ * Do not edit manually.
6000
+ * ElevenLabs Speech-to-Text API
6001
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6002
+ * OpenAPI spec version: 1.0
6003
+ */
6004
+ type TxtExportOptionsMaxSegmentDurationS = number | null;
6005
+
6006
+ /**
6007
+ * Generated by orval v7.9.0 🍺
6008
+ * Do not edit manually.
6009
+ * ElevenLabs Speech-to-Text API
6010
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6011
+ * OpenAPI spec version: 1.0
6012
+ */
6013
+ type TxtExportOptionsSegmentOnSilenceLongerThanS = number | null;
6014
+
6015
+ /**
6016
+ * Generated by orval v7.9.0 🍺
6017
+ * Do not edit manually.
6018
+ * ElevenLabs Speech-to-Text API
6019
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6020
+ * OpenAPI spec version: 1.0
6021
+ */
6022
+
6023
+ interface TxtExportOptions {
6024
+ max_characters_per_line?: TxtExportOptionsMaxCharactersPerLine;
6025
+ include_speakers?: boolean;
6026
+ include_timestamps?: boolean;
6027
+ format: TxtExportOptionsFormat;
6028
+ segment_on_silence_longer_than_s?: TxtExportOptionsSegmentOnSilenceLongerThanS;
6029
+ max_segment_duration_s?: TxtExportOptionsMaxSegmentDurationS;
6030
+ max_segment_chars?: TxtExportOptionsMaxSegmentChars;
6031
+ }
6032
+
6033
+ /**
6034
+ * Generated by orval v7.9.0 🍺
6035
+ * Do not edit manually.
6036
+ * ElevenLabs Speech-to-Text API
6037
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6038
+ * OpenAPI spec version: 1.0
6039
+ */
6040
+
6041
+ type ExportOptions = SegmentedJsonExportOptions | DocxExportOptions | PdfExportOptions | TxtExportOptions | HtmlExportOptions | SrtExportOptions;
6042
+
6043
+ /**
6044
+ * Generated by orval v7.9.0 🍺
6045
+ * Do not edit manually.
6046
+ * ElevenLabs Speech-to-Text API
6047
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6048
+ * OpenAPI spec version: 1.0
6049
+ */
6050
+
6051
+ /**
6052
+ * @maxItems 10
6053
+ */
6054
+ type AdditionalFormats = ExportOptions[];
6055
+
6056
+ /**
6057
+ * Generated by orval v7.9.0 🍺
6058
+ * Do not edit manually.
6059
+ * ElevenLabs Speech-to-Text API
6060
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6061
+ * OpenAPI spec version: 1.0
6062
+ */
6063
+ /**
6064
+ * The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters.
6065
+ */
6066
+ type BodySpeechToTextV1SpeechToTextPostCloudStorageUrl = string | null;
6067
+
6068
+ /**
6069
+ * Generated by orval v7.9.0 🍺
6070
+ * Do not edit manually.
6071
+ * ElevenLabs Speech-to-Text API
6072
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6073
+ * OpenAPI spec version: 1.0
6074
+ */
6075
+ /**
6076
+ * Diarization threshold to apply during speaker diarization. A higher value means there will be a lower chance of one speaker being diarized as two different speakers but also a higher chance of two different speakers being diarized as one speaker (less total speakers predicted). A low value means there will be a higher chance of one speaker being diarized as two different speakers but also a lower chance of two different speakers being diarized as one speaker (more total speakers predicted). Can only be set when diarize=True and num_speakers=None. Defaults to None, in which case we will choose a threshold based on the model_id (0.22 usually).
6077
+ */
6078
+ type BodySpeechToTextV1SpeechToTextPostDiarizationThreshold = number | null;
6079
+
6080
+ /**
6081
+ * Generated by orval v7.9.0 🍺
6082
+ * Do not edit manually.
6083
+ * ElevenLabs Speech-to-Text API
6084
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6085
+ * OpenAPI spec version: 1.0
6086
+ */
6087
+ /**
6088
+ * Detect entities in the transcript. Can be 'all' to detect all entities, a single entity type or category string, or a list of entity types/categories. Categories include 'pii', 'phi', 'pci', 'other', 'offensive_language'. When enabled, detected entities will be returned in the 'entities' field with their text, type, and character positions. Usage of this parameter will incur additional costs.
6089
+ */
6090
+ type BodySpeechToTextV1SpeechToTextPostEntityDetection = string | string[] | null;
6091
+
6092
+ /**
6093
+ * Generated by orval v7.9.0 🍺
6094
+ * Do not edit manually.
6095
+ * ElevenLabs Speech-to-Text API
6096
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6097
+ * OpenAPI spec version: 1.0
6098
+ */
6099
+ /**
6100
+ * The file to transcribe. All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB.
6101
+ */
6102
+ type BodySpeechToTextV1SpeechToTextPostFile = Blob | null;
6103
+
6104
+ /**
6105
+ * Generated by orval v7.9.0 🍺
6106
+ * Do not edit manually.
6107
+ * ElevenLabs Speech-to-Text API
6108
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6109
+ * OpenAPI spec version: 1.0
6110
+ */
6111
+ /**
6112
+ * The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.
6113
+ */
6114
+ type BodySpeechToTextV1SpeechToTextPostFileFormat = (typeof BodySpeechToTextV1SpeechToTextPostFileFormat)[keyof typeof BodySpeechToTextV1SpeechToTextPostFileFormat];
6115
+ declare const BodySpeechToTextV1SpeechToTextPostFileFormat: {
6116
+ readonly pcm_s16le_16: "pcm_s16le_16";
6117
+ readonly other: "other";
6118
+ };
6119
+
6120
+ /**
6121
+ * Generated by orval v7.9.0 🍺
6122
+ * Do not edit manually.
6123
+ * ElevenLabs Speech-to-Text API
6124
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6125
+ * OpenAPI spec version: 1.0
6126
+ */
6127
+ /**
6128
+ * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically.
6129
+ */
6130
+ type BodySpeechToTextV1SpeechToTextPostLanguageCode = string | null;
6131
+
6132
+ /**
6133
+ * Generated by orval v7.9.0 🍺
6134
+ * Do not edit manually.
6135
+ * ElevenLabs Speech-to-Text API
6136
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6137
+ * OpenAPI spec version: 1.0
6138
+ */
6139
+ /**
6140
+ * The ID of the model to use for transcription.
6141
+ */
6142
+ type BodySpeechToTextV1SpeechToTextPostModelId = (typeof BodySpeechToTextV1SpeechToTextPostModelId)[keyof typeof BodySpeechToTextV1SpeechToTextPostModelId];
6143
+ declare const BodySpeechToTextV1SpeechToTextPostModelId: {
6144
+ readonly scribe_v1: "scribe_v1";
6145
+ readonly scribe_v2: "scribe_v2";
6146
+ };
6147
+
6148
+ /**
6149
+ * Generated by orval v7.9.0 🍺
6150
+ * Do not edit manually.
6151
+ * ElevenLabs Speech-to-Text API
6152
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6153
+ * OpenAPI spec version: 1.0
6154
+ */
6155
+ /**
6156
+ * The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in this case the amount of speakers is set to the maximum value the model supports.
6157
+ */
6158
+ type BodySpeechToTextV1SpeechToTextPostNumSpeakers = number | null;
6159
+
6160
+ /**
6161
+ * Generated by orval v7.9.0 🍺
6162
+ * Do not edit manually.
6163
+ * ElevenLabs Speech-to-Text API
6164
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6165
+ * OpenAPI spec version: 1.0
6166
+ */
6167
+ /**
6168
+ * If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be an integer between 0 and 2147483647.
6169
+ */
6170
+ type BodySpeechToTextV1SpeechToTextPostSeed = number | null;
6171
+
6172
+ /**
6173
+ * Generated by orval v7.9.0 🍺
6174
+ * Do not edit manually.
6175
+ * ElevenLabs Speech-to-Text API
6176
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6177
+ * OpenAPI spec version: 1.0
6178
+ */
6179
+ /**
6180
+ * Controls the randomness of the transcription output. Accepts values between 0.0 and 2.0, where higher values result in more diverse and less deterministic results. If omitted, we will use a temperature based on the model you selected which is usually 0.
6181
+ */
6182
+ type BodySpeechToTextV1SpeechToTextPostTemperature = number | null;
6183
+
6184
+ /**
6185
+ * Generated by orval v7.9.0 🍺
6186
+ * Do not edit manually.
6187
+ * ElevenLabs Speech-to-Text API
6188
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6189
+ * OpenAPI spec version: 1.0
6190
+ */
6191
+ /**
6192
+ * The granularity of the timestamps in the transcription. 'word' provides word-level timestamps and 'character' provides character-level timestamps per word.
6193
+ */
6194
+ type BodySpeechToTextV1SpeechToTextPostTimestampsGranularity = (typeof BodySpeechToTextV1SpeechToTextPostTimestampsGranularity)[keyof typeof BodySpeechToTextV1SpeechToTextPostTimestampsGranularity];
6195
+ declare const BodySpeechToTextV1SpeechToTextPostTimestampsGranularity: {
6196
+ readonly none: "none";
6197
+ readonly word: "word";
6198
+ readonly character: "character";
6199
+ };
6200
+
6201
+ /**
6202
+ * Generated by orval v7.9.0 🍺
6203
+ * Do not edit manually.
6204
+ * ElevenLabs Speech-to-Text API
6205
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6206
+ * OpenAPI spec version: 1.0
6207
+ */
6208
+ /**
6209
+ * Optional specific webhook ID to send the transcription result to. Only valid when webhook is set to true. If not provided, transcription will be sent to all configured speech-to-text webhooks.
6210
+ */
6211
+ type BodySpeechToTextV1SpeechToTextPostWebhookId = string | null;
6212
+
6213
+ /**
6214
+ * Generated by orval v7.9.0 🍺
6215
+ * Do not edit manually.
6216
+ * ElevenLabs Speech-to-Text API
6217
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6218
+ * OpenAPI spec version: 1.0
6219
+ */
6220
+ type BodySpeechToTextV1SpeechToTextPostWebhookMetadataAnyOf = {
6221
+ [key: string]: unknown;
6222
+ };
6223
+
6224
+ /**
6225
+ * Generated by orval v7.9.0 🍺
6226
+ * Do not edit manually.
6227
+ * ElevenLabs Speech-to-Text API
6228
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6229
+ * OpenAPI spec version: 1.0
6230
+ */
6231
+
6232
+ /**
6233
+ * Optional metadata to be included in the webhook response. This should be a JSON string representing an object with a maximum depth of 2 levels and maximum size of 16KB. Useful for tracking internal IDs, job references, or other contextual information.
6234
+ */
6235
+ type BodySpeechToTextV1SpeechToTextPostWebhookMetadata = string | BodySpeechToTextV1SpeechToTextPostWebhookMetadataAnyOf | null;
6236
+
6237
+ /**
6238
+ * Generated by orval v7.9.0 🍺
6239
+ * Do not edit manually.
6240
+ * ElevenLabs Speech-to-Text API
6241
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
6242
+ * OpenAPI spec version: 1.0
6243
+ */
6244
+
6245
+ interface BodySpeechToTextV1SpeechToTextPost {
6246
+ /** The ID of the model to use for transcription. */
6247
+ model_id: BodySpeechToTextV1SpeechToTextPostModelId;
6248
+ /** The file to transcribe. All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB. */
6249
+ file?: BodySpeechToTextV1SpeechToTextPostFile;
6250
+ /** An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically. */
6251
+ language_code?: BodySpeechToTextV1SpeechToTextPostLanguageCode;
6252
+ /** Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. */
6253
+ tag_audio_events?: boolean;
6254
+ /** The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in this case the amount of speakers is set to the maximum value the model supports. */
6255
+ num_speakers?: BodySpeechToTextV1SpeechToTextPostNumSpeakers;
6256
+ /** The granularity of the timestamps in the transcription. 'word' provides word-level timestamps and 'character' provides character-level timestamps per word. */
6257
+ timestamps_granularity?: BodySpeechToTextV1SpeechToTextPostTimestampsGranularity;
6258
+ /** Whether to annotate which speaker is currently talking in the uploaded file. */
6259
+ diarize?: boolean;
6260
+ /** Diarization threshold to apply during speaker diarization. A higher value means there will be a lower chance of one speaker being diarized as two different speakers but also a higher chance of two different speakers being diarized as one speaker (less total speakers predicted). A low value means there will be a higher chance of one speaker being diarized as two different speakers but also a lower chance of two different speakers being diarized as one speaker (more total speakers predicted). Can only be set when diarize=True and num_speakers=None. Defaults to None, in which case we will choose a threshold based on the model_id (0.22 usually). */
6261
+ diarization_threshold?: BodySpeechToTextV1SpeechToTextPostDiarizationThreshold;
6262
+ /** A list of additional formats to export the transcript to. */
6263
+ additional_formats?: AdditionalFormats;
6264
+ /** The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform. */
6265
+ file_format?: BodySpeechToTextV1SpeechToTextPostFileFormat;
6266
+ /** The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters. */
6267
+ cloud_storage_url?: BodySpeechToTextV1SpeechToTextPostCloudStorageUrl;
6268
+ /** Whether to send the transcription result to configured speech-to-text webhooks. If set the request will return early without the transcription, which will be delivered later via webhook. */
6269
+ webhook?: boolean;
6270
+ /** Optional specific webhook ID to send the transcription result to. Only valid when webhook is set to true. If not provided, transcription will be sent to all configured speech-to-text webhooks. */
6271
+ webhook_id?: BodySpeechToTextV1SpeechToTextPostWebhookId;
6272
+ /** Controls the randomness of the transcription output. Accepts values between 0.0 and 2.0, where higher values result in more diverse and less deterministic results. If omitted, we will use a temperature based on the model you selected which is usually 0. */
6273
+ temperature?: BodySpeechToTextV1SpeechToTextPostTemperature;
6274
+ /** If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be an integer between 0 and 2147483647. */
6275
+ seed?: BodySpeechToTextV1SpeechToTextPostSeed;
6276
+ /** Whether the audio file contains multiple channels where each channel contains a single speaker. When enabled, each channel will be transcribed independently and the results will be combined. Each word in the response will include a 'channel_index' field indicating which channel it was spoken on. A maximum of 5 channels is supported. */
6277
+ use_multi_channel?: boolean;
6278
+ /** Optional metadata to be included in the webhook response. This should be a JSON string representing an object with a maximum depth of 2 levels and maximum size of 16KB. Useful for tracking internal IDs, job references, or other contextual information. */
6279
+ webhook_metadata?: BodySpeechToTextV1SpeechToTextPostWebhookMetadata;
6280
+ /** Detect entities in the transcript. Can be 'all' to detect all entities, a single entity type or category string, or a list of entity types/categories. Categories include 'pii', 'phi', 'pci', 'other', 'offensive_language'. When enabled, detected entities will be returned in the 'entities' field with their text, type, and character positions. Usage of this parameter will incur additional costs. */
6281
+ entity_detection?: BodySpeechToTextV1SpeechToTextPostEntityDetection;
6282
+ /** If true, the transcription will not have any filler words, false starts and non-speech sounds. Only supported with scribe_v2 model. */
6283
+ no_verbatim?: boolean;
6284
+ /** A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 100. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur additional costs. */
6285
+ keyterms?: string[];
6286
+ }
6287
+
5677
6288
  /**
5678
6289
  * Generated by orval v7.9.0 🍺
5679
6290
  * Do not edit manually.
@@ -6501,6 +7112,50 @@ interface SonioxStreamingOptions {
6501
7112
  */
6502
7113
  clientReferenceId?: string;
6503
7114
  }
7115
+ /**
7116
+ * ElevenLabs realtime streaming options
7117
+ *
7118
+ * Based on the WebSocket API at wss://api.elevenlabs.io/v1/speech-to-text/realtime.
7119
+ * Supports VAD-based or manual commit strategies.
7120
+ *
7121
+ * @see https://elevenlabs.io/docs/capabilities/speech-to-text#realtime-streaming
7122
+ */
7123
+ interface ElevenLabsStreamingOptions {
7124
+ /**
7125
+ * Realtime model to use
7126
+ * @default "scribe_v2_realtime"
7127
+ */
7128
+ model?: ElevenLabsRealtimeModelCode;
7129
+ /**
7130
+ * Audio format specification
7131
+ * PCM formats include sample rate (e.g., "pcm_16000")
7132
+ * @default "pcm_16000"
7133
+ */
7134
+ audioFormat?: ElevenLabsAudioFormatType;
7135
+ /** ISO 639-1/3 language code to improve recognition accuracy */
7136
+ languageCode?: string;
7137
+ /** Include word-level timestamps in responses */
7138
+ includeTimestamps?: boolean;
7139
+ /** Include language detection info in responses */
7140
+ includeLanguageDetection?: boolean;
7141
+ /**
7142
+ * Commit strategy for finalizing transcript segments
7143
+ * - "manual": Client sends explicit commit messages
7144
+ * - "vad": Automatic voice activity detection
7145
+ * @default "vad"
7146
+ */
7147
+ commitStrategy?: "manual" | "vad";
7148
+ /** Silence threshold in seconds for VAD commit strategy */
7149
+ vadSilenceThresholdSecs?: number;
7150
+ /** VAD activation threshold (0-1) */
7151
+ vadThreshold?: number;
7152
+ /** Minimum speech duration in ms before triggering transcription */
7153
+ minSpeechDurationMs?: number;
7154
+ /** Minimum silence duration in ms before committing */
7155
+ minSilenceDurationMs?: number;
7156
+ /** Context from previous text to improve continuity */
7157
+ previousText?: string;
7158
+ }
6504
7159
  /**
6505
7160
  * Union of all provider-specific streaming options
6506
7161
  */
@@ -6514,11 +7169,13 @@ type ProviderStreamingOptions = ({
6514
7169
  provider: "openai-whisper";
6515
7170
  } & OpenAIStreamingOptions) | ({
6516
7171
  provider: "soniox";
6517
- } & SonioxStreamingOptions);
7172
+ } & SonioxStreamingOptions) | ({
7173
+ provider: "elevenlabs";
7174
+ } & ElevenLabsStreamingOptions);
6518
7175
  /**
6519
7176
  * Type-safe streaming options for a specific provider
6520
7177
  */
6521
- type StreamingOptionsForProvider<P extends StreamingProvider> = P extends "gladia" ? GladiaStreamingOptions : P extends "deepgram" ? DeepgramStreamingOptions : P extends "assemblyai" ? AssemblyAIStreamingOptions : P extends "openai-whisper" ? OpenAIStreamingOptions : P extends "soniox" ? SonioxStreamingOptions : never;
7178
+ type StreamingOptionsForProvider<P extends StreamingProvider> = P extends "gladia" ? GladiaStreamingOptions : P extends "deepgram" ? DeepgramStreamingOptions : P extends "assemblyai" ? AssemblyAIStreamingOptions : P extends "openai-whisper" ? OpenAIStreamingOptions : P extends "soniox" ? SonioxStreamingOptions : P extends "elevenlabs" ? ElevenLabsStreamingOptions : never;
6522
7179
  /**
6523
7180
  * Type-safe transcribeStream parameters for a specific provider
6524
7181
  */
@@ -7000,7 +7657,7 @@ type SpeechmaticsOperatingPoint = "standard" | "enhanced";
7000
7657
  * { model: SonioxModel.stt_rt_v3 }
7001
7658
  * ```
7002
7659
  */
7003
- type TranscriptionModel = DeepgramModelCode | StreamingSupportedModels | SpeechModel | SonioxModelCode | SpeechmaticsOperatingPoint;
7660
+ type TranscriptionModel = DeepgramModelCode | StreamingSupportedModels | SpeechModel | SonioxModelCode | ElevenLabsModelCode | SpeechmaticsOperatingPoint;
7004
7661
  /**
7005
7662
  * Unified transcription language type with autocomplete for all providers
7006
7663
  *
@@ -7023,7 +7680,7 @@ type TranscriptionModel = DeepgramModelCode | StreamingSupportedModels | SpeechM
7023
7680
  * { language: SonioxLanguage.en }
7024
7681
  * ```
7025
7682
  */
7026
- type TranscriptionLanguage = TranscriptLanguageCode | TranscriptionLanguageCodeEnum | DeepgramLanguageCode | SonioxLanguageCode | SpeechmaticsLanguageCode | AzureLocaleCode;
7683
+ type TranscriptionLanguage = TranscriptLanguageCode | TranscriptionLanguageCodeEnum | DeepgramLanguageCode | SonioxLanguageCode | ElevenLabsLanguageCode | SpeechmaticsLanguageCode | AzureLocaleCode;
7027
7684
 
7028
7685
  /**
7029
7686
  * Extended data from AssemblyAI transcription
@@ -7090,6 +7747,24 @@ interface DeepgramExtendedData {
7090
7747
  /**
7091
7748
  * Map of provider names to their extended data types
7092
7749
  */
7750
+ /** ElevenLabs extended data (entities, audio events, language probability) */
7751
+ interface ElevenLabsExtendedData {
7752
+ /** Detected entities (PII, PHI, PCI, etc.) */
7753
+ entities?: Array<{
7754
+ text: string;
7755
+ entity_type: string;
7756
+ start_char: number;
7757
+ end_char: number;
7758
+ }>;
7759
+ /** Audio events detected (laughter, music, etc.) */
7760
+ audioEvents?: Array<{
7761
+ text: string;
7762
+ start: number;
7763
+ end: number;
7764
+ }>;
7765
+ /** Language detection probability */
7766
+ languageProbability?: number;
7767
+ }
7093
7768
  type ProviderExtendedDataMap = {
7094
7769
  assemblyai: AssemblyAIExtendedData;
7095
7770
  gladia: GladiaExtendedData;
@@ -7098,6 +7773,7 @@ type ProviderExtendedDataMap = {
7098
7773
  "azure-stt": Record<string, never>;
7099
7774
  speechmatics: Record<string, never>;
7100
7775
  soniox: Record<string, never>;
7776
+ elevenlabs: ElevenLabsExtendedData;
7101
7777
  };
7102
7778
 
7103
7779
  /**
@@ -7250,6 +7926,11 @@ interface TranscribeOptions {
7250
7926
  * @see https://platform.openai.com/docs/api-reference/audio/createTranscription
7251
7927
  */
7252
7928
  openai?: Partial<Omit<CreateTranscriptionRequest, "file" | "model">>;
7929
+ /**
7930
+ * ElevenLabs-specific options (passed directly to API)
7931
+ * @see https://elevenlabs.io/docs/api-reference/speech-to-text
7932
+ */
7933
+ elevenlabs?: Partial<Omit<BodySpeechToTextV1SpeechToTextPost, "file" | "model_id" | "language_code" | "diarize" | "keyterms">>;
7253
7934
  }
7254
7935
  /**
7255
7936
  * Speaker information from diarization
@@ -7310,8 +7991,8 @@ interface Utterance {
7310
7991
  speaker?: string;
7311
7992
  /** Confidence score (0-1) */
7312
7993
  confidence?: number;
7313
- /** Words in this utterance */
7314
- words?: Word[];
7994
+ /** Words in this utterance (empty array when word-level data is unavailable) */
7995
+ words: Word[];
7315
7996
  /**
7316
7997
  * Unique utterance identifier (provider-assigned)
7317
7998
  *
@@ -7484,6 +8165,7 @@ type ProviderRawResponseMap = {
7484
8165
  "azure-stt": Transcription;
7485
8166
  speechmatics: unknown;
7486
8167
  soniox: unknown;
8168
+ elevenlabs: unknown;
7487
8169
  };
7488
8170
  /**
7489
8171
  * Unified transcription response with provider-specific type safety
@@ -7899,6 +8581,27 @@ interface StreamingOptions extends Omit<TranscribeOptions, "webhookUrl"> {
7899
8581
  * ```
7900
8582
  */
7901
8583
  sonioxStreaming?: SonioxStreamingOptions;
8584
+ /**
8585
+ * ElevenLabs-specific streaming options
8586
+ *
8587
+ * Pass provider-specific options for ElevenLabs realtime transcription.
8588
+ * These override the generic options above.
8589
+ *
8590
+ * @example
8591
+ * ```typescript
8592
+ * import { ElevenLabsRealtimeModel, ElevenLabsAudioFormat } from 'voice-router-dev/constants'
8593
+ *
8594
+ * await adapter.transcribeStream({
8595
+ * elevenlabsStreaming: {
8596
+ * model: ElevenLabsRealtimeModel.scribe_v2_realtime,
8597
+ * audioFormat: ElevenLabsAudioFormat.pcm_16000,
8598
+ * commitStrategy: 'vad',
8599
+ * includeTimestamps: true
8600
+ * }
8601
+ * });
8602
+ * ```
8603
+ */
8604
+ elevenlabsStreaming?: ElevenLabsStreamingOptions;
7902
8605
  /**
7903
8606
  * Regional endpoint for streaming (Gladia only)
7904
8607
  *
@@ -8247,4 +8950,267 @@ interface TranscriptReadyNotification {
8247
8950
  */
8248
8951
  type TranscriptWebhookNotification = TranscriptReadyNotification | RedactedAudioNotification;
8249
8952
 
8250
- export { type CustomSpellingConfigDTO as $, type AssemblyAIStreamingOptions as A, type AudioToLlmListDTOError as B, type CallbackConfig as C, type DeepgramStreamingOptions as D, type AudioToLlmResultDTO as E, type FileResponse as F, type GladiaStreamingOptions as G, type CallbackConfigDto as H, CallbackMethodEnum as I, type CallbackTranscriptionErrorPayload as J, type CallbackTranscriptionErrorPayloadCustomMetadata as K, type ListTranscriptsOptions as L, type MessagesConfig as M, type NamedEntityRecognitionDTO as N, CallbackTranscriptionErrorPayloadEvent as O, type PreProcessingConfig as P, type CallbackTranscriptionSuccessPayload as Q, type RealtimeProcessingConfig as R, type StreamingOptions as S, type TranscribeOptions as T, type UnifiedTranscriptResponse as U, type CallbackTranscriptionSuccessPayloadCustomMetadata as V, type WordDTO as W, CallbackTranscriptionSuccessPayloadEvent as X, type ChapterizationDTOError as Y, type ChapterizationDTOResults as Z, type CodeSwitchingConfigDTO as _, type StreamingCallbacks as a, type Entity as a$, type CustomSpellingConfigDTOSpellingDictionary as a0, type CustomVocabularyConfigDTO as a1, type CustomVocabularyConfigDTOVocabularyItem as a2, type CustomVocabularyEntryDTO as a3, type DiarizationConfigDTO as a4, type DiarizationDTO as a5, type DiarizationDTOError as a6, type DisplayModeDTO as a7, type DisplayModeDTOError as a8, type ErrorDTO as a9, type SubtitlesConfigDTO as aA, SubtitlesFormatEnum as aB, SubtitlesStyleEnum as aC, type SummarizationConfigDTO as aD, type SummarizationDTOError as aE, SummaryTypesEnum as aF, TranscriptionControllerListV2KindItem as aG, type TranscriptionControllerListV2Params as aH, TranscriptionControllerListV2StatusItem as aI, type TranscriptionResultDTO as aJ, type TranslationConfigDTO as aK, type TranslationDTOError as aL, TranslationModelEnum as aM, type TranslationResultDTO as aN, type TranslationResultDTOError as aO, type TranscriptOptionalParams as aP, TranscriptStatus as aQ, type TranscriptWord as aR, AudioIntelligenceModelStatus as aS, type AutoHighlightResult as aT, type AutoHighlightsResult as aU, type Chapter as aV, type ContentSafetyLabel as aW, type ContentSafetyLabelResult as aX, type ContentSafetyLabelsResult as aY, type ContentSafetyLabelsResultSeverityScoreSummary as aZ, type ContentSafetyLabelsResultSummary as a_, type InitTranscriptionRequest as aa, type InitTranscriptionRequestCustomMetadata as ab, type ModerationDTO as ac, type ModerationDTOError as ad, type NamedEntityRecognitionDTOError as ae, type NamesConsistencyDTO as af, type NamesConsistencyDTOError as ag, type PreRecordedRequestParamsResponse as ah, type PreRecordedResponseCustomMetadata as ai, type PreRecordedResponseFile as aj, PreRecordedResponseKind as ak, type PreRecordedResponsePostSessionMetadata as al, type PreRecordedResponseRequestParams as am, type PreRecordedResponseResult as an, PreRecordedResponseStatus as ao, type SentencesDTO as ap, type SentencesDTOError as aq, type SentimentAnalysisDTOError as ar, type SpeakerReidentificationDTO as as, type SpeakerReidentificationDTOError as at, type StreamingRequest as au, type StreamingRequestCustomMetadata as av, type StructuredDataExtractionConfigDTO as aw, type StructuredDataExtractionDTO as ax, type StructuredDataExtractionDTOError as ay, type SubtitleDTO as az, type StreamingSession as b, type TranscriptSentimentAnalysis as b$, EntityType as b0, type ListTranscriptsParams as b1, PiiPolicy as b2, RedactPiiAudioQuality as b3, type RedactedAudioNotification as b4, type RedactedAudioResponse as b5, RedactedAudioStatus as b6, Sentiment as b7, type SentimentAnalysisResult as b8, type SentimentAnalysisResultChannel as b9, type TranscriptDisfluencies as bA, type TranscriptEntities as bB, type TranscriptEntityDetection as bC, type TranscriptFilterProfanity as bD, type TranscriptFormatText as bE, type TranscriptIabCategories as bF, type TranscriptIabCategoriesResult as bG, TranscriptLanguageCode as bH, type TranscriptLanguageCodeProperty as bI, type TranscriptLanguageConfidence as bJ, type TranscriptLanguageConfidenceThreshold as bK, type TranscriptLanguageDetection as bL, type TranscriptMultichannel as bM, type TranscriptOptionalParamsLanguageCode as bN, type TranscriptOptionalParamsLanguageCodeOneOf as bO, type TranscriptOptionalParamsRedactPiiSub as bP, type TranscriptOptionalParamsSpeakersExpected as bQ, type TranscriptOptionalParamsSpeechModel as bR, type TranscriptOptionalParamsSpeechThreshold as bS, type TranscriptOptionalParamsWebhookAuthHeaderName as bT, type TranscriptOptionalParamsWebhookAuthHeaderValue as bU, type TranscriptPunctuate as bV, type TranscriptReadyNotification as bW, TranscriptReadyStatus as bX, type TranscriptRedactPiiAudio as bY, type TranscriptRedactPiiAudioQuality as bZ, type TranscriptRedactPiiPolicies as b_, type SentimentAnalysisResultSpeaker as ba, type SeverityScoreSummary as bb, SpeechModel as bc, SubstitutionPolicy as bd, SummaryModel as be, SummaryType as bf, type Timestamp as bg, type TopicDetectionModelResult as bh, type TopicDetectionModelResultSummary as bi, type TopicDetectionResult as bj, type TopicDetectionResultLabelsItem as bk, type Transcript as bl, type TranscriptAudioDuration as bm, type TranscriptAudioEndAt as bn, type TranscriptAudioStartFrom as bo, type TranscriptAutoChapters as bp, type TranscriptAutoHighlightsResult as bq, TranscriptBoostParam as br, type TranscriptBoostParamProperty as bs, type TranscriptChapters as bt, type TranscriptConfidence as bu, type TranscriptContentSafety as bv, type TranscriptContentSafetyLabels as bw, type TranscriptCustomSpelling as bx, type TranscriptCustomSpellingProperty as by, type TranscriptCustomTopics as bz, type StreamEvent as c, type TranscriptionWord as c$, type TranscriptSentimentAnalysisResults as c0, type TranscriptSpeakerLabels as c1, type TranscriptSpeakersExpected as c2, type TranscriptSpeechModel as c3, type TranscriptSpeechThreshold as c4, type TranscriptSpeedBoost as c5, type TranscriptSummary as c6, type TranscriptSummaryModel as c7, type TranscriptSummaryType as c8, type TranscriptText as c9, type TranscriptTextUsageTokens as cA, type TranscriptionSegment as cB, type RealtimeSessionCreateRequestGAModel as cC, RealtimeTranscriptionSessionCreateRequestTurnDetectionType as cD, RealtimeTranscriptionSessionCreateRequestInputAudioFormat as cE, AudioResponseFormat as cF, type CreateTranscription200One as cG, type CreateTranscriptionRequest as cH, type CreateTranscriptionRequestModel as cI, type CreateTranscriptionRequestStream as cJ, CreateTranscriptionRequestTimestampGranularitiesItem as cK, type CreateTranscriptionResponseDiarizedJson as cL, CreateTranscriptionResponseDiarizedJsonTask as cM, type CreateTranscriptionResponseDiarizedJsonUsage as cN, type CreateTranscriptionResponseJson as cO, type CreateTranscriptionResponseJsonLogprobsItem as cP, type CreateTranscriptionResponseJsonUsage as cQ, type CreateTranscriptionResponseVerboseJson as cR, type TranscriptTextUsageDuration as cS, TranscriptTextUsageDurationType as cT, type TranscriptTextUsageTokensInputTokenDetails as cU, TranscriptTextUsageTokensType as cV, type TranscriptionChunkingStrategy as cW, type TranscriptionChunkingStrategyAnyOf as cX, type TranscriptionDiarizedSegment as cY, TranscriptionDiarizedSegmentType as cZ, TranscriptionInclude as c_, type TranscriptThrottled as ca, type TranscriptUtterance as cb, type TranscriptUtteranceChannel as cc, type TranscriptUtterances as cd, type TranscriptWebhookAuthHeaderName as ce, type TranscriptWebhookNotification as cf, type TranscriptWebhookStatusCode as cg, type TranscriptWebhookUrl as ch, type TranscriptWordChannel as ci, type TranscriptWordSpeaker as cj, type TranscriptWords as ck, type StreamingUpdateConfiguration as cl, type Transcription as cm, Status as cn, type EntityError as co, type EntityReference as cp, type DiarizationProperties as cq, type DiarizationSpeakersProperties as cr, LanguageIdentificationMode as cs, type LanguageIdentificationProperties as ct, type LanguageIdentificationPropertiesSpeechModelMapping as cu, ProfanityFilterMode as cv, PunctuationMode as cw, type TranscriptionCustomProperties as cx, type TranscriptionLinks as cy, type TranscriptionProperties as cz, StreamingSupportedEncodingEnum as d, type ListenV1ResponseResultsChannelsItemSearchItemHitsItem as d$, type VadConfig as d0, VadConfigType as d1, type ListenV1Response as d2, type ManageV1FilterAccessorParameter as d3, ManageV1FilterDeploymentParameter as d4, type ManageV1LimitParameter as d5, type ManageV1PageParameter as d6, ManageV1FilterEndpointParameter as d7, ManageV1FilterMethodParameter as d8, type SharedTopics as d9, type ListenV1ModelParameter as dA, type ListenV1MultichannelParameter as dB, type ListenV1NumeralsParameter as dC, type ListenV1ParagraphsParameter as dD, type ListenV1ProfanityFilterParameter as dE, type ListenV1PunctuateParameter as dF, type ListenV1RedactParameter as dG, ListenV1RedactParameterOneOfItem as dH, type ListenV1ReplaceParameter as dI, type ListenV1ResponseMetadata as dJ, type ListenV1ResponseMetadataIntentsInfo as dK, type ListenV1ResponseMetadataModelInfo as dL, type ListenV1ResponseMetadataSentimentInfo as dM, type ListenV1ResponseMetadataSummaryInfo as dN, type ListenV1ResponseMetadataTopicsInfo as dO, type ListenV1ResponseResults as dP, type ListenV1ResponseResultsChannels as dQ, type ListenV1ResponseResultsChannelsItem as dR, type ListenV1ResponseResultsChannelsItemAlternativesItem as dS, type ListenV1ResponseResultsChannelsItemAlternativesItemEntitiesItem as dT, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphs as dU, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphsParagraphsItem as dV, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphsParagraphsItemSentencesItem as dW, type ListenV1ResponseResultsChannelsItemAlternativesItemSummariesItem as dX, type ListenV1ResponseResultsChannelsItemAlternativesItemTopicsItem as dY, type ListenV1ResponseResultsChannelsItemAlternativesItemWordsItem as dZ, type ListenV1ResponseResultsChannelsItemSearchItem as d_, type SharedIntents as da, type SharedSentiments as db, type SharedCallbackParameter as dc, SharedCallbackMethodParameter as dd, type SharedSentimentParameter as de, type SharedSummarizeParameter as df, type SharedTagParameter as dg, type SharedTopicsParameter as dh, type SharedCustomTopicParameter as di, SharedCustomTopicModeParameter as dj, type SharedIntentsParameter as dk, type SharedCustomIntentParameter as dl, SharedCustomIntentModeParameter as dm, type SharedMipOptOutParameter as dn, type ListenV1DetectEntitiesParameter as dp, type ListenV1DetectLanguageParameter as dq, type ListenV1DiarizeParameter as dr, type ListenV1DictationParameter as ds, ListenV1EncodingParameter as dt, type ListenV1FillerWordsParameter as du, type ListenV1KeytermParameter as dv, type ListenV1KeywordsParameter as dw, type ListenV1LanguageParameter as dx, type ListenV1MeasurementsParameter as dy, type ListenV1MediaTranscribeParams as dz, StreamingSupportedBitDepthEnum as e, type ErrorEvent as e$, type ListenV1ResponseResultsSummary as e0, type ListenV1ResponseResultsUtterances as e1, type ListenV1ResponseResultsUtterancesItem as e2, type ListenV1ResponseResultsUtterancesItemWordsItem as e3, type ListenV1SearchParameter as e4, type ListenV1SmartFormatParameter as e5, type ListenV1UttSplitParameter as e6, type ListenV1UtterancesParameter as e7, type ListenV1VersionParameter as e8, type ManageV1EndDateTimeParameter as e9, type Word as eA, type Utterance as eB, type TranscriptionStatus as eC, type TranscriptMetadata as eD, type TranscriptData as eE, type ListTranscriptsResponse as eF, type ProviderRawResponseMap as eG, type StreamEventType as eH, type SpeechEvent as eI, type TranslationEvent as eJ, type SentimentEvent as eK, type EntityEvent as eL, type SummarizationEvent as eM, type ChapterizationEvent as eN, type AudioAckEvent as eO, type LifecycleEvent as eP, type AudioChunk as eQ, type RawWebSocketMessage as eR, type AssemblyAIUpdateConfiguration as eS, type OpenAIStreamingOptions as eT, type SonioxStreamingOptions as eU, type ProviderStreamingOptions as eV, type StreamingOptionsForProvider as eW, type TranscribeStreamParams as eX, type BeginEvent as eY, type TurnEvent as eZ, type TerminationEvent as e_, type ManageV1FilterRequestIdParameter as ea, ManageV1FilterStatusParameter as eb, type ManageV1ProjectsRequestsListParams as ec, type ManageV1StartDateTimeParameter as ed, type SharedExtraParameter as ee, type SharedIntentsResults as ef, type SharedIntentsResultsIntents as eg, type SharedIntentsResultsIntentsSegmentsItem as eh, type SharedIntentsResultsIntentsSegmentsItemIntentsItem as ei, type SharedSentimentsAverage as ej, type SharedSentimentsSegmentsItem as ek, type SharedTopicsResults as el, type SharedTopicsResultsTopics as em, type SharedTopicsResultsTopicsSegmentsItem as en, type SharedTopicsResultsTopicsSegmentsItemTopicsItem as eo, type SpeechmaticsOperatingPoint as ep, type TranscriptionModel as eq, type TranscriptionLanguage as er, type AssemblyAIExtendedData as es, type GladiaExtendedData as et, type DeepgramExtendedData as eu, type ProviderExtendedDataMap as ev, type StreamingProvider as ew, type BatchOnlyProvider as ex, type SessionStatus as ey, type Speaker as ez, StreamingSupportedSampleRateEnum as f, type StreamingEventMessage as f0, type StreamingWord as f1, type StreamingForceEndpoint as f2, StreamingSupportedModels as g, type LanguageConfig as h, type PostProcessingConfig as i, type TranscriptionMetadataDTO as j, type TranscriptionDTO as k, type TranslationDTO as l, type SummarizationDTO as m, type SentimentAnalysisDTO as n, type ChapterizationDTO as o, type PreRecordedResponse as p, type UtteranceDTO as q, TranscriptionLanguageCodeEnum as r, TranslationLanguageCodeEnum as s, StreamingSupportedRegions as t, type AddonErrorDTO as u, type AudioToLlmDTO as v, type AudioToLlmDTOError as w, type AudioToLlmDTOResults as x, type AudioToLlmListConfigDTO as y, type AudioToLlmListDTO as z };
8953
+ /**
8954
+ * Generated by orval v7.9.0 🍺
8955
+ * Do not edit manually.
8956
+ * ElevenLabs Speech-to-Text API
8957
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
8958
+ * OpenAPI spec version: 1.0
8959
+ */
8960
+ interface AdditionalFormatResponseModel {
8961
+ /** The requested format. */
8962
+ requested_format: string;
8963
+ /** The file extension of the additional format. */
8964
+ file_extension: string;
8965
+ /** The content type of the additional format. */
8966
+ content_type: string;
8967
+ /** Whether the content is base64 encoded. */
8968
+ is_base64_encoded: boolean;
8969
+ /** The content of the additional format. */
8970
+ content: string;
8971
+ }
8972
+
8973
+ /**
8974
+ * Generated by orval v7.9.0 🍺
8975
+ * Do not edit manually.
8976
+ * ElevenLabs Speech-to-Text API
8977
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
8978
+ * OpenAPI spec version: 1.0
8979
+ */
8980
+ interface DetectedEntity {
8981
+ /** The text that was identified as an entity. */
8982
+ text: string;
8983
+ /** The type of entity detected (e.g., 'credit_card', 'email_address', 'person_name'). */
8984
+ entity_type: string;
8985
+ /** Start character position in the transcript text. */
8986
+ start_char: number;
8987
+ /** End character position in the transcript text. */
8988
+ end_char: number;
8989
+ }
8990
+
8991
+ /**
8992
+ * Generated by orval v7.9.0 🍺
8993
+ * Do not edit manually.
8994
+ * ElevenLabs Speech-to-Text API
8995
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
8996
+ * OpenAPI spec version: 1.0
8997
+ */
8998
+
8999
+ type SpeechToTextChunkResponseModelAdditionalFormatsAnyOfItem = AdditionalFormatResponseModel | null;
9000
+
9001
+ /**
9002
+ * Generated by orval v7.9.0 🍺
9003
+ * Do not edit manually.
9004
+ * ElevenLabs Speech-to-Text API
9005
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9006
+ * OpenAPI spec version: 1.0
9007
+ */
9008
+
9009
+ /**
9010
+ * Requested additional formats of the transcript.
9011
+ */
9012
+ type SpeechToTextChunkResponseModelAdditionalFormats = SpeechToTextChunkResponseModelAdditionalFormatsAnyOfItem[] | null;
9013
+
9014
+ /**
9015
+ * Generated by orval v7.9.0 🍺
9016
+ * Do not edit manually.
9017
+ * ElevenLabs Speech-to-Text API
9018
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9019
+ * OpenAPI spec version: 1.0
9020
+ */
9021
+ /**
9022
+ * The channel index this transcript belongs to (for multichannel audio).
9023
+ */
9024
+ type SpeechToTextChunkResponseModelChannelIndex = number | null;
9025
+
9026
+ /**
9027
+ * Generated by orval v7.9.0 🍺
9028
+ * Do not edit manually.
9029
+ * ElevenLabs Speech-to-Text API
9030
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9031
+ * OpenAPI spec version: 1.0
9032
+ */
9033
+
9034
+ /**
9035
+ * List of detected entities with their text, type, and character positions in the transcript.
9036
+ */
9037
+ type SpeechToTextChunkResponseModelEntities = DetectedEntity[] | null;
9038
+
9039
+ /**
9040
+ * Generated by orval v7.9.0 🍺
9041
+ * Do not edit manually.
9042
+ * ElevenLabs Speech-to-Text API
9043
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9044
+ * OpenAPI spec version: 1.0
9045
+ */
9046
+ /**
9047
+ * The transcription ID of the response.
9048
+ */
9049
+ type SpeechToTextChunkResponseModelTranscriptionId = string | null;
9050
+
9051
+ /**
9052
+ * Generated by orval v7.9.0 🍺
9053
+ * Do not edit manually.
9054
+ * ElevenLabs Speech-to-Text API
9055
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9056
+ * OpenAPI spec version: 1.0
9057
+ */
9058
+ /**
9059
+ * The end time of the character in seconds.
9060
+ */
9061
+ type SpeechToTextCharacterResponseModelEnd = number | null;
9062
+
9063
+ /**
9064
+ * Generated by orval v7.9.0 🍺
9065
+ * Do not edit manually.
9066
+ * ElevenLabs Speech-to-Text API
9067
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9068
+ * OpenAPI spec version: 1.0
9069
+ */
9070
+ /**
9071
+ * The start time of the character in seconds.
9072
+ */
9073
+ type SpeechToTextCharacterResponseModelStart = number | null;
9074
+
9075
+ /**
9076
+ * Generated by orval v7.9.0 🍺
9077
+ * Do not edit manually.
9078
+ * ElevenLabs Speech-to-Text API
9079
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9080
+ * OpenAPI spec version: 1.0
9081
+ */
9082
+
9083
+ interface SpeechToTextCharacterResponseModel {
9084
+ /** The character that was transcribed. */
9085
+ text: string;
9086
+ /** The start time of the character in seconds. */
9087
+ start?: SpeechToTextCharacterResponseModelStart;
9088
+ /** The end time of the character in seconds. */
9089
+ end?: SpeechToTextCharacterResponseModelEnd;
9090
+ }
9091
+
9092
+ /**
9093
+ * Generated by orval v7.9.0 🍺
9094
+ * Do not edit manually.
9095
+ * ElevenLabs Speech-to-Text API
9096
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9097
+ * OpenAPI spec version: 1.0
9098
+ */
9099
+
9100
+ /**
9101
+ * The characters that make up the word and their timing information.
9102
+ */
9103
+ type SpeechToTextWordResponseModelCharacters = SpeechToTextCharacterResponseModel[] | null;
9104
+
9105
+ /**
9106
+ * Generated by orval v7.9.0 🍺
9107
+ * Do not edit manually.
9108
+ * ElevenLabs Speech-to-Text API
9109
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9110
+ * OpenAPI spec version: 1.0
9111
+ */
9112
+ /**
9113
+ * The end time of the word or sound in seconds.
9114
+ */
9115
+ type SpeechToTextWordResponseModelEnd = number | null;
9116
+
9117
+ /**
9118
+ * Generated by orval v7.9.0 🍺
9119
+ * Do not edit manually.
9120
+ * ElevenLabs Speech-to-Text API
9121
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9122
+ * OpenAPI spec version: 1.0
9123
+ */
9124
+ /**
9125
+ * Unique identifier for the speaker of this word.
9126
+ */
9127
+ type SpeechToTextWordResponseModelSpeakerId = string | null;
9128
+
9129
+ /**
9130
+ * Generated by orval v7.9.0 🍺
9131
+ * Do not edit manually.
9132
+ * ElevenLabs Speech-to-Text API
9133
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9134
+ * OpenAPI spec version: 1.0
9135
+ */
9136
+ /**
9137
+ * The start time of the word or sound in seconds.
9138
+ */
9139
+ type SpeechToTextWordResponseModelStart = number | null;
9140
+
9141
+ /**
9142
+ * Generated by orval v7.9.0 🍺
9143
+ * Do not edit manually.
9144
+ * ElevenLabs Speech-to-Text API
9145
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9146
+ * OpenAPI spec version: 1.0
9147
+ */
9148
+ /**
9149
+ * The type of the word or sound. 'audio_event' is used for non-word sounds like laughter or footsteps.
9150
+ */
9151
+ type SpeechToTextWordResponseModelType = (typeof SpeechToTextWordResponseModelType)[keyof typeof SpeechToTextWordResponseModelType];
9152
+ declare const SpeechToTextWordResponseModelType: {
9153
+ readonly word: "word";
9154
+ readonly spacing: "spacing";
9155
+ readonly audio_event: "audio_event";
9156
+ };
9157
+
9158
+ /**
9159
+ * Generated by orval v7.9.0 🍺
9160
+ * Do not edit manually.
9161
+ * ElevenLabs Speech-to-Text API
9162
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9163
+ * OpenAPI spec version: 1.0
9164
+ */
9165
+
9166
+ /**
9167
+ * Word-level detail of the transcription with timing information.
9168
+ */
9169
+ interface SpeechToTextWordResponseModel {
9170
+ /** The word or sound that was transcribed. */
9171
+ text: string;
9172
+ /** The start time of the word or sound in seconds. */
9173
+ start?: SpeechToTextWordResponseModelStart;
9174
+ /** The end time of the word or sound in seconds. */
9175
+ end?: SpeechToTextWordResponseModelEnd;
9176
+ /** The type of the word or sound. 'audio_event' is used for non-word sounds like laughter or footsteps. */
9177
+ type: SpeechToTextWordResponseModelType;
9178
+ /** Unique identifier for the speaker of this word. */
9179
+ speaker_id?: SpeechToTextWordResponseModelSpeakerId;
9180
+ /** The log of the probability with which this word was predicted. Logprobs are in range [-infinity, 0], higher logprobs indicate a higher confidence the model has in its predictions. */
9181
+ logprob: number;
9182
+ /** The characters that make up the word and their timing information. */
9183
+ characters?: SpeechToTextWordResponseModelCharacters;
9184
+ }
9185
+
9186
+ /**
9187
+ * Generated by orval v7.9.0 🍺
9188
+ * Do not edit manually.
9189
+ * ElevenLabs Speech-to-Text API
9190
+ * ElevenLabs Speech-to-Text API - Batch and realtime transcription endpoints. Filtered from the official ElevenLabs API spec.
9191
+ * OpenAPI spec version: 1.0
9192
+ */
9193
+
9194
+ /**
9195
+ * Chunk-level detail of the transcription with timing information.
9196
+ */
9197
+ interface SpeechToTextChunkResponseModel {
9198
+ /** The detected language code (e.g. 'eng' for English). */
9199
+ language_code: string;
9200
+ /** The confidence score of the language detection (0 to 1). */
9201
+ language_probability: number;
9202
+ /** The raw text of the transcription. */
9203
+ text: string;
9204
+ /** List of words with their timing information. */
9205
+ words: SpeechToTextWordResponseModel[];
9206
+ /** The channel index this transcript belongs to (for multichannel audio). */
9207
+ channel_index?: SpeechToTextChunkResponseModelChannelIndex;
9208
+ /** Requested additional formats of the transcript. */
9209
+ additional_formats?: SpeechToTextChunkResponseModelAdditionalFormats;
9210
+ /** The transcription ID of the response. */
9211
+ transcription_id?: SpeechToTextChunkResponseModelTranscriptionId;
9212
+ /** List of detected entities with their text, type, and character positions in the transcript. */
9213
+ entities?: SpeechToTextChunkResponseModelEntities;
9214
+ }
9215
+
9216
+ export { type CustomSpellingConfigDTO as $, type AssemblyAIStreamingOptions as A, type AudioToLlmListDTOError as B, type CallbackConfig as C, type DeepgramStreamingOptions as D, type AudioToLlmResultDTO as E, type FileResponse as F, type GladiaStreamingOptions as G, type CallbackConfigDto as H, CallbackMethodEnum as I, type CallbackTranscriptionErrorPayload as J, type CallbackTranscriptionErrorPayloadCustomMetadata as K, type ListTranscriptsOptions as L, type MessagesConfig as M, type NamedEntityRecognitionDTO as N, CallbackTranscriptionErrorPayloadEvent as O, type PreProcessingConfig as P, type CallbackTranscriptionSuccessPayload as Q, type RealtimeProcessingConfig as R, type StreamingOptions as S, type TranscribeOptions as T, type UnifiedTranscriptResponse as U, type CallbackTranscriptionSuccessPayloadCustomMetadata as V, type WordDTO as W, CallbackTranscriptionSuccessPayloadEvent as X, type ChapterizationDTOError as Y, type ChapterizationDTOResults as Z, type CodeSwitchingConfigDTO as _, type StreamingCallbacks as a, type Entity as a$, type CustomSpellingConfigDTOSpellingDictionary as a0, type CustomVocabularyConfigDTO as a1, type CustomVocabularyConfigDTOVocabularyItem as a2, type CustomVocabularyEntryDTO as a3, type DiarizationConfigDTO as a4, type DiarizationDTO as a5, type DiarizationDTOError as a6, type DisplayModeDTO as a7, type DisplayModeDTOError as a8, type ErrorDTO as a9, type SubtitlesConfigDTO as aA, SubtitlesFormatEnum as aB, SubtitlesStyleEnum as aC, type SummarizationConfigDTO as aD, type SummarizationDTOError as aE, SummaryTypesEnum as aF, TranscriptionControllerListV2KindItem as aG, type TranscriptionControllerListV2Params as aH, TranscriptionControllerListV2StatusItem as aI, type TranscriptionResultDTO as aJ, type TranslationConfigDTO as aK, type TranslationDTOError as aL, TranslationModelEnum as aM, type TranslationResultDTO as aN, type TranslationResultDTOError as aO, type TranscriptOptionalParams as aP, TranscriptStatus as aQ, type TranscriptWord as aR, AudioIntelligenceModelStatus as aS, type AutoHighlightResult as aT, type AutoHighlightsResult as aU, type Chapter as aV, type ContentSafetyLabel as aW, type ContentSafetyLabelResult as aX, type ContentSafetyLabelsResult as aY, type ContentSafetyLabelsResultSeverityScoreSummary as aZ, type ContentSafetyLabelsResultSummary as a_, type InitTranscriptionRequest as aa, type InitTranscriptionRequestCustomMetadata as ab, type ModerationDTO as ac, type ModerationDTOError as ad, type NamedEntityRecognitionDTOError as ae, type NamesConsistencyDTO as af, type NamesConsistencyDTOError as ag, type PreRecordedRequestParamsResponse as ah, type PreRecordedResponseCustomMetadata as ai, type PreRecordedResponseFile as aj, PreRecordedResponseKind as ak, type PreRecordedResponsePostSessionMetadata as al, type PreRecordedResponseRequestParams as am, type PreRecordedResponseResult as an, PreRecordedResponseStatus as ao, type SentencesDTO as ap, type SentencesDTOError as aq, type SentimentAnalysisDTOError as ar, type SpeakerReidentificationDTO as as, type SpeakerReidentificationDTOError as at, type StreamingRequest as au, type StreamingRequestCustomMetadata as av, type StructuredDataExtractionConfigDTO as aw, type StructuredDataExtractionDTO as ax, type StructuredDataExtractionDTOError as ay, type SubtitleDTO as az, type StreamingSession as b, type TranscriptSentimentAnalysis as b$, EntityType as b0, type ListTranscriptsParams as b1, PiiPolicy as b2, RedactPiiAudioQuality as b3, type RedactedAudioNotification as b4, type RedactedAudioResponse as b5, RedactedAudioStatus as b6, Sentiment as b7, type SentimentAnalysisResult as b8, type SentimentAnalysisResultChannel as b9, type TranscriptDisfluencies as bA, type TranscriptEntities as bB, type TranscriptEntityDetection as bC, type TranscriptFilterProfanity as bD, type TranscriptFormatText as bE, type TranscriptIabCategories as bF, type TranscriptIabCategoriesResult as bG, TranscriptLanguageCode as bH, type TranscriptLanguageCodeProperty as bI, type TranscriptLanguageConfidence as bJ, type TranscriptLanguageConfidenceThreshold as bK, type TranscriptLanguageDetection as bL, type TranscriptMultichannel as bM, type TranscriptOptionalParamsLanguageCode as bN, type TranscriptOptionalParamsLanguageCodeOneOf as bO, type TranscriptOptionalParamsRedactPiiSub as bP, type TranscriptOptionalParamsSpeakersExpected as bQ, type TranscriptOptionalParamsSpeechModel as bR, type TranscriptOptionalParamsSpeechThreshold as bS, type TranscriptOptionalParamsWebhookAuthHeaderName as bT, type TranscriptOptionalParamsWebhookAuthHeaderValue as bU, type TranscriptPunctuate as bV, type TranscriptReadyNotification as bW, TranscriptReadyStatus as bX, type TranscriptRedactPiiAudio as bY, type TranscriptRedactPiiAudioQuality as bZ, type TranscriptRedactPiiPolicies as b_, type SentimentAnalysisResultSpeaker as ba, type SeverityScoreSummary as bb, SpeechModel as bc, SubstitutionPolicy as bd, SummaryModel as be, SummaryType as bf, type Timestamp as bg, type TopicDetectionModelResult as bh, type TopicDetectionModelResultSummary as bi, type TopicDetectionResult as bj, type TopicDetectionResultLabelsItem as bk, type Transcript as bl, type TranscriptAudioDuration as bm, type TranscriptAudioEndAt as bn, type TranscriptAudioStartFrom as bo, type TranscriptAutoChapters as bp, type TranscriptAutoHighlightsResult as bq, TranscriptBoostParam as br, type TranscriptBoostParamProperty as bs, type TranscriptChapters as bt, type TranscriptConfidence as bu, type TranscriptContentSafety as bv, type TranscriptContentSafetyLabels as bw, type TranscriptCustomSpelling as bx, type TranscriptCustomSpellingProperty as by, type TranscriptCustomTopics as bz, type StreamEvent as c, type TranscriptionWord as c$, type TranscriptSentimentAnalysisResults as c0, type TranscriptSpeakerLabels as c1, type TranscriptSpeakersExpected as c2, type TranscriptSpeechModel as c3, type TranscriptSpeechThreshold as c4, type TranscriptSpeedBoost as c5, type TranscriptSummary as c6, type TranscriptSummaryModel as c7, type TranscriptSummaryType as c8, type TranscriptText as c9, type TranscriptTextUsageTokens as cA, type TranscriptionSegment as cB, type RealtimeSessionCreateRequestGAModel as cC, RealtimeTranscriptionSessionCreateRequestTurnDetectionType as cD, RealtimeTranscriptionSessionCreateRequestInputAudioFormat as cE, AudioResponseFormat as cF, type CreateTranscription200One as cG, type CreateTranscriptionRequest as cH, type CreateTranscriptionRequestModel as cI, type CreateTranscriptionRequestStream as cJ, CreateTranscriptionRequestTimestampGranularitiesItem as cK, type CreateTranscriptionResponseDiarizedJson as cL, CreateTranscriptionResponseDiarizedJsonTask as cM, type CreateTranscriptionResponseDiarizedJsonUsage as cN, type CreateTranscriptionResponseJson as cO, type CreateTranscriptionResponseJsonLogprobsItem as cP, type CreateTranscriptionResponseJsonUsage as cQ, type CreateTranscriptionResponseVerboseJson as cR, type TranscriptTextUsageDuration as cS, TranscriptTextUsageDurationType as cT, type TranscriptTextUsageTokensInputTokenDetails as cU, TranscriptTextUsageTokensType as cV, type TranscriptionChunkingStrategy as cW, type TranscriptionChunkingStrategyAnyOf as cX, type TranscriptionDiarizedSegment as cY, TranscriptionDiarizedSegmentType as cZ, TranscriptionInclude as c_, type TranscriptThrottled as ca, type TranscriptUtterance as cb, type TranscriptUtteranceChannel as cc, type TranscriptUtterances as cd, type TranscriptWebhookAuthHeaderName as ce, type TranscriptWebhookNotification as cf, type TranscriptWebhookStatusCode as cg, type TranscriptWebhookUrl as ch, type TranscriptWordChannel as ci, type TranscriptWordSpeaker as cj, type TranscriptWords as ck, type StreamingUpdateConfiguration as cl, type Transcription as cm, Status as cn, type EntityError as co, type EntityReference as cp, type DiarizationProperties as cq, type DiarizationSpeakersProperties as cr, LanguageIdentificationMode as cs, type LanguageIdentificationProperties as ct, type LanguageIdentificationPropertiesSpeechModelMapping as cu, ProfanityFilterMode as cv, PunctuationMode as cw, type TranscriptionCustomProperties as cx, type TranscriptionLinks as cy, type TranscriptionProperties as cz, StreamingSupportedEncodingEnum as d, type ListenV1ResponseResultsChannelsItemSearchItemHitsItem as d$, type VadConfig as d0, VadConfigType as d1, type ListenV1Response as d2, type ManageV1FilterAccessorParameter as d3, ManageV1FilterDeploymentParameter as d4, type ManageV1LimitParameter as d5, type ManageV1PageParameter as d6, ManageV1FilterEndpointParameter as d7, ManageV1FilterMethodParameter as d8, type SharedTopics as d9, type ListenV1ModelParameter as dA, type ListenV1MultichannelParameter as dB, type ListenV1NumeralsParameter as dC, type ListenV1ParagraphsParameter as dD, type ListenV1ProfanityFilterParameter as dE, type ListenV1PunctuateParameter as dF, type ListenV1RedactParameter as dG, ListenV1RedactParameterOneOfItem as dH, type ListenV1ReplaceParameter as dI, type ListenV1ResponseMetadata as dJ, type ListenV1ResponseMetadataIntentsInfo as dK, type ListenV1ResponseMetadataModelInfo as dL, type ListenV1ResponseMetadataSentimentInfo as dM, type ListenV1ResponseMetadataSummaryInfo as dN, type ListenV1ResponseMetadataTopicsInfo as dO, type ListenV1ResponseResults as dP, type ListenV1ResponseResultsChannels as dQ, type ListenV1ResponseResultsChannelsItem as dR, type ListenV1ResponseResultsChannelsItemAlternativesItem as dS, type ListenV1ResponseResultsChannelsItemAlternativesItemEntitiesItem as dT, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphs as dU, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphsParagraphsItem as dV, type ListenV1ResponseResultsChannelsItemAlternativesItemParagraphsParagraphsItemSentencesItem as dW, type ListenV1ResponseResultsChannelsItemAlternativesItemSummariesItem as dX, type ListenV1ResponseResultsChannelsItemAlternativesItemTopicsItem as dY, type ListenV1ResponseResultsChannelsItemAlternativesItemWordsItem as dZ, type ListenV1ResponseResultsChannelsItemSearchItem as d_, type SharedIntents as da, type SharedSentiments as db, type SharedCallbackParameter as dc, SharedCallbackMethodParameter as dd, type SharedSentimentParameter as de, type SharedSummarizeParameter as df, type SharedTagParameter as dg, type SharedTopicsParameter as dh, type SharedCustomTopicParameter as di, SharedCustomTopicModeParameter as dj, type SharedIntentsParameter as dk, type SharedCustomIntentParameter as dl, SharedCustomIntentModeParameter as dm, type SharedMipOptOutParameter as dn, type ListenV1DetectEntitiesParameter as dp, type ListenV1DetectLanguageParameter as dq, type ListenV1DiarizeParameter as dr, type ListenV1DictationParameter as ds, ListenV1EncodingParameter as dt, type ListenV1FillerWordsParameter as du, type ListenV1KeytermParameter as dv, type ListenV1KeywordsParameter as dw, type ListenV1LanguageParameter as dx, type ListenV1MeasurementsParameter as dy, type ListenV1MediaTranscribeParams as dz, StreamingSupportedBitDepthEnum as e, type SegmentedJsonExportOptionsMaxSegmentDurationS as e$, type ListenV1ResponseResultsSummary as e0, type ListenV1ResponseResultsUtterances as e1, type ListenV1ResponseResultsUtterancesItem as e2, type ListenV1ResponseResultsUtterancesItemWordsItem as e3, type ListenV1SearchParameter as e4, type ListenV1SmartFormatParameter as e5, type ListenV1UttSplitParameter as e6, type ListenV1UtterancesParameter as e7, type ListenV1VersionParameter as e8, type ManageV1EndDateTimeParameter as e9, type BodySpeechToTextV1SpeechToTextPostNumSpeakers as eA, type BodySpeechToTextV1SpeechToTextPostSeed as eB, type BodySpeechToTextV1SpeechToTextPostTemperature as eC, BodySpeechToTextV1SpeechToTextPostTimestampsGranularity as eD, type BodySpeechToTextV1SpeechToTextPostWebhookId as eE, type BodySpeechToTextV1SpeechToTextPostWebhookMetadata as eF, type BodySpeechToTextV1SpeechToTextPostWebhookMetadataAnyOf as eG, type DetectedEntity as eH, type DocxExportOptions as eI, DocxExportOptionsFormat as eJ, type DocxExportOptionsMaxSegmentChars as eK, type DocxExportOptionsMaxSegmentDurationS as eL, type DocxExportOptionsSegmentOnSilenceLongerThanS as eM, type ExportOptions as eN, type HtmlExportOptions as eO, HtmlExportOptionsFormat as eP, type HtmlExportOptionsMaxSegmentChars as eQ, type HtmlExportOptionsMaxSegmentDurationS as eR, type HtmlExportOptionsSegmentOnSilenceLongerThanS as eS, type PdfExportOptions as eT, PdfExportOptionsFormat as eU, type PdfExportOptionsMaxSegmentChars as eV, type PdfExportOptionsMaxSegmentDurationS as eW, type PdfExportOptionsSegmentOnSilenceLongerThanS as eX, type SegmentedJsonExportOptions as eY, SegmentedJsonExportOptionsFormat as eZ, type SegmentedJsonExportOptionsMaxSegmentChars as e_, type ManageV1FilterRequestIdParameter as ea, ManageV1FilterStatusParameter as eb, type ManageV1ProjectsRequestsListParams as ec, type ManageV1StartDateTimeParameter as ed, type SharedExtraParameter as ee, type SharedIntentsResults as ef, type SharedIntentsResultsIntents as eg, type SharedIntentsResultsIntentsSegmentsItem as eh, type SharedIntentsResultsIntentsSegmentsItemIntentsItem as ei, type SharedSentimentsAverage as ej, type SharedSentimentsSegmentsItem as ek, type SharedTopicsResults as el, type SharedTopicsResultsTopics as em, type SharedTopicsResultsTopicsSegmentsItem as en, type SharedTopicsResultsTopicsSegmentsItemTopicsItem as eo, type SpeechToTextChunkResponseModel as ep, type AdditionalFormatResponseModel as eq, type AdditionalFormats as er, type BodySpeechToTextV1SpeechToTextPost as es, type BodySpeechToTextV1SpeechToTextPostCloudStorageUrl as et, type BodySpeechToTextV1SpeechToTextPostDiarizationThreshold as eu, type BodySpeechToTextV1SpeechToTextPostEntityDetection as ev, type BodySpeechToTextV1SpeechToTextPostFile as ew, BodySpeechToTextV1SpeechToTextPostFileFormat as ex, type BodySpeechToTextV1SpeechToTextPostLanguageCode as ey, BodySpeechToTextV1SpeechToTextPostModelId as ez, StreamingSupportedSampleRateEnum as f, type TranscribeStreamParams as f$, type SegmentedJsonExportOptionsSegmentOnSilenceLongerThanS as f0, type SpeechToTextCharacterResponseModel as f1, type SpeechToTextCharacterResponseModelEnd as f2, type SpeechToTextCharacterResponseModelStart as f3, type SpeechToTextChunkResponseModelAdditionalFormats as f4, type SpeechToTextChunkResponseModelAdditionalFormatsAnyOfItem as f5, type SpeechToTextChunkResponseModelChannelIndex as f6, type SpeechToTextChunkResponseModelEntities as f7, type SpeechToTextChunkResponseModelTranscriptionId as f8, type SpeechToTextWordResponseModel as f9, type BatchOnlyProvider as fA, type SessionStatus as fB, type Speaker as fC, type Word as fD, type Utterance as fE, type TranscriptionStatus as fF, type TranscriptMetadata as fG, type TranscriptData as fH, type ListTranscriptsResponse as fI, type ProviderRawResponseMap as fJ, type StreamEventType as fK, type SpeechEvent as fL, type TranslationEvent as fM, type SentimentEvent as fN, type EntityEvent as fO, type SummarizationEvent as fP, type ChapterizationEvent as fQ, type AudioAckEvent as fR, type LifecycleEvent as fS, type AudioChunk as fT, type RawWebSocketMessage as fU, type AssemblyAIUpdateConfiguration as fV, type OpenAIStreamingOptions as fW, type SonioxStreamingOptions as fX, type ElevenLabsStreamingOptions as fY, type ProviderStreamingOptions as fZ, type StreamingOptionsForProvider as f_, type SpeechToTextWordResponseModelCharacters as fa, type SpeechToTextWordResponseModelEnd as fb, type SpeechToTextWordResponseModelSpeakerId as fc, type SpeechToTextWordResponseModelStart as fd, SpeechToTextWordResponseModelType as fe, type SrtExportOptions as ff, SrtExportOptionsFormat as fg, type SrtExportOptionsMaxCharactersPerLine as fh, type SrtExportOptionsMaxSegmentChars as fi, type SrtExportOptionsMaxSegmentDurationS as fj, type SrtExportOptionsSegmentOnSilenceLongerThanS as fk, type TxtExportOptions as fl, TxtExportOptionsFormat as fm, type TxtExportOptionsMaxCharactersPerLine as fn, type TxtExportOptionsMaxSegmentChars as fo, type TxtExportOptionsMaxSegmentDurationS as fp, type TxtExportOptionsSegmentOnSilenceLongerThanS as fq, type SpeechmaticsOperatingPoint as fr, type TranscriptionModel as fs, type TranscriptionLanguage as ft, type AssemblyAIExtendedData as fu, type GladiaExtendedData as fv, type DeepgramExtendedData as fw, type ElevenLabsExtendedData as fx, type ProviderExtendedDataMap as fy, type StreamingProvider as fz, StreamingSupportedModels as g, type BeginEvent as g0, type TurnEvent as g1, type TerminationEvent as g2, type ErrorEvent as g3, type StreamingEventMessage as g4, type StreamingWord as g5, type StreamingForceEndpoint as g6, type LanguageConfig as h, type PostProcessingConfig as i, type TranscriptionMetadataDTO as j, type TranscriptionDTO as k, type TranslationDTO as l, type SummarizationDTO as m, type SentimentAnalysisDTO as n, type ChapterizationDTO as o, type PreRecordedResponse as p, type UtteranceDTO as q, TranscriptionLanguageCodeEnum as r, TranslationLanguageCodeEnum as s, StreamingSupportedRegions as t, type AddonErrorDTO as u, type AudioToLlmDTO as v, type AudioToLlmDTOError as w, type AudioToLlmDTOResults as x, type AudioToLlmListConfigDTO as y, type AudioToLlmListDTO as z };