voice-router-dev 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -5889,6 +5889,431 @@ interface StreamingRequest {
5889
5889
  callback_config?: CallbackConfig;
5890
5890
  }
5891
5891
 
5892
+ /**
5893
+ * Provider-specific streaming enums for type-safe autocomplete
5894
+ *
5895
+ * These const objects provide IDE autocomplete and compile-time validation
5896
+ * for streaming options. Use these instead of raw strings for better DX.
5897
+ *
5898
+ * @example
5899
+ * ```typescript
5900
+ * import { DeepgramEncoding, GladiaEncoding, DeepgramModel } from '@meeting-baas/sdk'
5901
+ *
5902
+ * await adapter.transcribeStream({
5903
+ * deepgramStreaming: {
5904
+ * encoding: DeepgramEncoding.linear16,
5905
+ * model: DeepgramModel.nova3,
5906
+ * language: DeepgramLanguage.en
5907
+ * }
5908
+ * })
5909
+ * ```
5910
+ */
5911
+
5912
+ /**
5913
+ * Deepgram transcription models
5914
+ *
5915
+ * Derived from `ListenV1ModelParameter` in the OpenAPI spec.
5916
+ * Values are kept in sync with the generated type union.
5917
+ *
5918
+ * @see {@link ListenV1ModelParameter} for the underlying type
5919
+ * @example
5920
+ * ```typescript
5921
+ * import { DeepgramModel } from 'voice-router-dev'
5922
+ *
5923
+ * { model: DeepgramModel["nova-3"] }
5924
+ * { model: DeepgramModel["nova-2-medical"] }
5925
+ * ```
5926
+ */
5927
+ declare const DeepgramModel: {
5928
+ readonly "nova-3": "nova-3";
5929
+ readonly "nova-3-general": "nova-3-general";
5930
+ readonly "nova-3-medical": "nova-3-medical";
5931
+ readonly "nova-2": "nova-2";
5932
+ readonly "nova-2-general": "nova-2-general";
5933
+ readonly "nova-2-meeting": "nova-2-meeting";
5934
+ readonly "nova-2-finance": "nova-2-finance";
5935
+ readonly "nova-2-conversationalai": "nova-2-conversationalai";
5936
+ readonly "nova-2-voicemail": "nova-2-voicemail";
5937
+ readonly "nova-2-video": "nova-2-video";
5938
+ readonly "nova-2-medical": "nova-2-medical";
5939
+ readonly "nova-2-drivethru": "nova-2-drivethru";
5940
+ readonly "nova-2-automotive": "nova-2-automotive";
5941
+ readonly nova: "nova";
5942
+ readonly "nova-general": "nova-general";
5943
+ readonly "nova-phonecall": "nova-phonecall";
5944
+ readonly "nova-medical": "nova-medical";
5945
+ readonly enhanced: "enhanced";
5946
+ readonly "enhanced-general": "enhanced-general";
5947
+ readonly "enhanced-meeting": "enhanced-meeting";
5948
+ readonly "enhanced-phonecall": "enhanced-phonecall";
5949
+ readonly "enhanced-finance": "enhanced-finance";
5950
+ readonly base: "base";
5951
+ readonly meeting: "meeting";
5952
+ readonly phonecall: "phonecall";
5953
+ readonly finance: "finance";
5954
+ readonly conversationalai: "conversationalai";
5955
+ readonly voicemail: "voicemail";
5956
+ readonly video: "video";
5957
+ };
5958
+
5959
+ /**
5960
+ * AssemblyAI audio encoding formats
5961
+ *
5962
+ * Derived from `AudioEncoding` type in the auto-synced streaming types.
5963
+ *
5964
+ * @example
5965
+ * ```typescript
5966
+ * import { AssemblyAIEncoding } from 'voice-router-dev'
5967
+ *
5968
+ * { encoding: AssemblyAIEncoding.pcmS16le }
5969
+ * ```
5970
+ */
5971
+ declare const AssemblyAIEncoding: {
5972
+ /** PCM signed 16-bit little-endian (recommended) */
5973
+ readonly pcmS16le: "pcm_s16le";
5974
+ /** μ-law (telephony) */
5975
+ readonly pcmMulaw: "pcm_mulaw";
5976
+ };
5977
+ /**
5978
+ * AssemblyAI streaming speech models
5979
+ *
5980
+ * Derived from `StreamingSpeechModel` type in the auto-synced streaming types.
5981
+ *
5982
+ * @example
5983
+ * ```typescript
5984
+ * import { AssemblyAISpeechModel } from 'voice-router-dev'
5985
+ *
5986
+ * { speechModel: AssemblyAISpeechModel.english }
5987
+ * { speechModel: AssemblyAISpeechModel.multilingual }
5988
+ * ```
5989
+ */
5990
+ declare const AssemblyAISpeechModel: {
5991
+ /** Optimized for English */
5992
+ readonly english: "universal-streaming-english";
5993
+ /** Supports 20+ languages */
5994
+ readonly multilingual: "universal-streaming-multilingual";
5995
+ };
5996
+ /**
5997
+ * AssemblyAI supported sample rates
5998
+ *
5999
+ * @example
6000
+ * ```typescript
6001
+ * import { AssemblyAISampleRate } from '@meeting-baas/sdk'
6002
+ *
6003
+ * { sampleRate: AssemblyAISampleRate.rate16000 }
6004
+ * ```
6005
+ */
6006
+ declare const AssemblyAISampleRate: {
6007
+ readonly rate8000: 8000;
6008
+ readonly rate16000: 16000;
6009
+ readonly rate22050: 22050;
6010
+ readonly rate44100: 44100;
6011
+ readonly rate48000: 48000;
6012
+ };
6013
+ /** Deepgram model type derived from const object */
6014
+ type DeepgramModelType = (typeof DeepgramModel)[keyof typeof DeepgramModel];
6015
+
6016
+ /** AssemblyAI encoding type derived from const object */
6017
+ type AssemblyAIEncodingType = (typeof AssemblyAIEncoding)[keyof typeof AssemblyAIEncoding];
6018
+ /** AssemblyAI speech model type derived from const object */
6019
+ type AssemblyAISpeechModelType = (typeof AssemblyAISpeechModel)[keyof typeof AssemblyAISpeechModel];
6020
+ /** AssemblyAI sample rate type derived from const object */
6021
+ type AssemblyAISampleRateType = (typeof AssemblyAISampleRate)[keyof typeof AssemblyAISampleRate];
6022
+
6023
+ type StreamingSpeechModel = "universal-streaming-english" | "universal-streaming-multilingual";
6024
+ type StreamingUpdateConfiguration = {
6025
+ type: "UpdateConfiguration";
6026
+ end_of_turn_confidence_threshold?: number;
6027
+ min_end_of_turn_silence_when_confident?: number;
6028
+ max_turn_silence?: number;
6029
+ vad_threshold?: number;
6030
+ format_turns?: boolean;
6031
+ };
6032
+
6033
+ /**
6034
+ * Provider-specific streaming option types using OpenAPI-generated schemas
6035
+ *
6036
+ * These types provide compile-time safety by restricting options to what
6037
+ * each provider actually supports according to their OpenAPI specifications.
6038
+ *
6039
+ * For autocomplete-friendly const objects, import from './streaming-enums':
6040
+ * @example
6041
+ * ```typescript
6042
+ * import { DeepgramEncoding, DeepgramModel, GladiaEncoding } from './streaming-enums'
6043
+ * ```
6044
+ */
6045
+
6046
+ /**
6047
+ * Gladia streaming options (from OpenAPI spec)
6048
+ *
6049
+ * Based on the generated `StreamingRequest` type from Gladia's OpenAPI spec.
6050
+ * All supported encodings, sample rates, and bit depths are from the spec.
6051
+ */
6052
+ interface GladiaStreamingOptions {
6053
+ /** Audio encoding format - only Gladia-supported formats (type-safe enum) */
6054
+ encoding?: StreamingSupportedEncodingEnum;
6055
+ /** Sample rate - only Gladia-supported rates (type-safe enum) */
6056
+ sampleRate?: StreamingSupportedSampleRateEnum;
6057
+ /** Bit depth - only Gladia-supported depths (type-safe enum) */
6058
+ bitDepth?: StreamingSupportedBitDepthEnum;
6059
+ /** Number of audio channels (1-8) */
6060
+ channels?: number;
6061
+ /** Endpointing duration in seconds (0.01-10) */
6062
+ endpointing?: number;
6063
+ /** Language configuration */
6064
+ languageConfig?: LanguageConfig;
6065
+ /** Interim/partial results */
6066
+ interimResults?: boolean;
6067
+ }
6068
+ /**
6069
+ * Deepgram streaming options (from OpenAPI spec)
6070
+ *
6071
+ * Based on the generated `ListenV1MediaTranscribeParams` type from Deepgram's OpenAPI spec.
6072
+ * All supported options come directly from the spec. Now using properly typed parameter enums!
6073
+ *
6074
+ * @see https://developers.deepgram.com/docs/streaming
6075
+ */
6076
+ interface DeepgramStreamingOptions {
6077
+ /**
6078
+ * Audio encoding format
6079
+ * Use `DeepgramEncoding` const for autocomplete:
6080
+ * @example
6081
+ * ```typescript
6082
+ * import { DeepgramEncoding } from '@meeting-baas/sdk'
6083
+ * { encoding: DeepgramEncoding.linear16 }
6084
+ * ```
6085
+ */
6086
+ encoding?: (typeof ListenV1EncodingParameter)[keyof typeof ListenV1EncodingParameter];
6087
+ /** Sample rate in Hz */
6088
+ sampleRate?: number;
6089
+ /** Number of audio channels */
6090
+ channels?: number;
6091
+ /** Language code (BCP-47 format, e.g., 'en', 'en-US', 'es') */
6092
+ language?: ListenV1LanguageParameter;
6093
+ /**
6094
+ * Model to use for transcription
6095
+ * Use `DeepgramModel` const for autocomplete:
6096
+ * @example
6097
+ * ```typescript
6098
+ * import { DeepgramModel } from '@meeting-baas/sdk'
6099
+ * { model: DeepgramModel.nova3 }
6100
+ * { model: DeepgramModel.nova2Medical }
6101
+ * ```
6102
+ */
6103
+ model?: ListenV1ModelParameter | DeepgramModelType;
6104
+ /** Model version (e.g., 'latest') */
6105
+ version?: ListenV1VersionParameter;
6106
+ /** Enable language detection */
6107
+ languageDetection?: boolean;
6108
+ /** Enable speaker diarization */
6109
+ diarization?: boolean;
6110
+ /** Enable punctuation */
6111
+ punctuate?: boolean;
6112
+ /** Enable smart formatting (dates, numbers, etc.) */
6113
+ smartFormat?: boolean;
6114
+ /** Enable interim results (partial transcripts) */
6115
+ interimResults?: boolean;
6116
+ /** Enable filler words detection ("uh", "um") */
6117
+ fillerWords?: boolean;
6118
+ /** Convert written numbers to digits ("twenty" -> "20") */
6119
+ numerals?: boolean;
6120
+ /** Convert measurements to abbreviations ("five meters" -> "5m") */
6121
+ measurements?: boolean;
6122
+ /** Enable paragraph formatting */
6123
+ paragraphs?: boolean;
6124
+ /** Enable profanity filtering */
6125
+ profanityFilter?: boolean;
6126
+ /** Enable dictation mode (optimized for dictation) */
6127
+ dictation?: boolean;
6128
+ /** Utterance split duration threshold in milliseconds */
6129
+ utteranceSplit?: number;
6130
+ /** Enable real-time sentiment analysis */
6131
+ sentiment?: boolean;
6132
+ /** Enable entity detection */
6133
+ detectEntities?: boolean;
6134
+ /** Enable topic detection */
6135
+ topics?: boolean;
6136
+ /** Custom topic definitions */
6137
+ customTopic?: string[];
6138
+ /**
6139
+ * Custom topic detection mode
6140
+ * Use `DeepgramTopicMode` const for autocomplete
6141
+ */
6142
+ customTopicMode?: SharedCustomTopicModeParameter;
6143
+ /** Enable intent recognition */
6144
+ intents?: boolean;
6145
+ /** Custom intent definitions */
6146
+ customIntent?: string[];
6147
+ /**
6148
+ * Custom intent detection mode
6149
+ * Use `DeepgramTopicMode` const for autocomplete
6150
+ */
6151
+ customIntentMode?: SharedCustomTopicModeParameter;
6152
+ /** Enable summarization */
6153
+ summarize?: boolean;
6154
+ /** Custom vocabulary/keywords for boosting */
6155
+ keywords?: string | string[];
6156
+ /**
6157
+ * Key term prompting (Nova-3 only)
6158
+ * More powerful than keywords - provides context about terms
6159
+ */
6160
+ keyterm?: string[];
6161
+ /**
6162
+ * Enable PII redaction
6163
+ * Use `DeepgramRedact` const for autocomplete:
6164
+ * @example
6165
+ * ```typescript
6166
+ * import { DeepgramRedact } from '@meeting-baas/sdk'
6167
+ * { redact: [DeepgramRedact.pii, DeepgramRedact.pci] }
6168
+ * ```
6169
+ */
6170
+ redact?: boolean | ListenV1RedactParameterOneOfItem[];
6171
+ /** Callback URL for webhooks */
6172
+ callback?: string;
6173
+ /** Extra metadata to include in response */
6174
+ extra?: Record<string, unknown>;
6175
+ /** Tags to include in response */
6176
+ tag?: string[];
6177
+ /**
6178
+ * Endpointing mode for VAD
6179
+ * - number: silence duration in ms to trigger endpoint
6180
+ * - false: disable VAD endpointing
6181
+ */
6182
+ endpointing?: number | false;
6183
+ /** Voice activity detection threshold (0-1) */
6184
+ vadThreshold?: number;
6185
+ }
6186
+
6187
+ /**
6188
+ * AssemblyAI streaming options
6189
+ *
6190
+ * Based on the v3 Universal Streaming API parameters from the AssemblyAI SDK.
6191
+ * Supports advanced features like VAD tuning, end-of-turn detection, and profanity filtering.
6192
+ *
6193
+ * @see https://www.assemblyai.com/docs/speech-to-text/streaming
6194
+ */
6195
+ interface AssemblyAIStreamingOptions {
6196
+ /**
6197
+ * Sample rate in Hz
6198
+ * Use `AssemblyAISampleRate` const for autocomplete:
6199
+ * @example
6200
+ * ```typescript
6201
+ * import { AssemblyAISampleRate } from '@meeting-baas/sdk'
6202
+ * { sampleRate: AssemblyAISampleRate.rate16000 }
6203
+ * ```
6204
+ */
6205
+ sampleRate?: AssemblyAISampleRateType;
6206
+ /**
6207
+ * Audio encoding format
6208
+ * Use `AssemblyAIEncoding` const for autocomplete:
6209
+ * @example
6210
+ * ```typescript
6211
+ * import { AssemblyAIEncoding } from '@meeting-baas/sdk'
6212
+ * { encoding: AssemblyAIEncoding.pcmS16le }
6213
+ * ```
6214
+ */
6215
+ encoding?: AssemblyAIEncodingType;
6216
+ /**
6217
+ * Speech model to use
6218
+ * Use `AssemblyAISpeechModel` const for autocomplete:
6219
+ * @example
6220
+ * ```typescript
6221
+ * import { AssemblyAISpeechModel } from '@meeting-baas/sdk'
6222
+ * { speechModel: AssemblyAISpeechModel.english }
6223
+ * { speechModel: AssemblyAISpeechModel.multilingual }
6224
+ * ```
6225
+ */
6226
+ speechModel?: AssemblyAISpeechModelType | StreamingSpeechModel;
6227
+ /** Enable automatic language detection */
6228
+ languageDetection?: boolean;
6229
+ /**
6230
+ * Confidence threshold for end-of-turn detection (0-1)
6231
+ * Higher values require more confidence before ending a turn
6232
+ * @default 0.5
6233
+ */
6234
+ endOfTurnConfidenceThreshold?: number;
6235
+ /**
6236
+ * Minimum silence duration (ms) to trigger end-of-turn when confident
6237
+ * Only applies when confidence is above threshold
6238
+ * @default 1000
6239
+ */
6240
+ minEndOfTurnSilenceWhenConfident?: number;
6241
+ /**
6242
+ * Maximum silence duration (ms) before forcing end-of-turn
6243
+ * Regardless of confidence level
6244
+ * @default 20000
6245
+ */
6246
+ maxTurnSilence?: number;
6247
+ /**
6248
+ * VAD sensitivity threshold (0-1)
6249
+ * Lower values are more sensitive to quiet speech
6250
+ */
6251
+ vadThreshold?: number;
6252
+ /**
6253
+ * Enable real-time text formatting of turns
6254
+ * Applies punctuation, capitalization, and formatting
6255
+ */
6256
+ formatTurns?: boolean;
6257
+ /** Filter profanity in real-time transcription */
6258
+ filterProfanity?: boolean;
6259
+ /**
6260
+ * Key terms to boost in recognition
6261
+ * Increases recognition accuracy for specific words/phrases
6262
+ */
6263
+ keyterms?: string[];
6264
+ /**
6265
+ * Key term prompting for context
6266
+ * Provides additional context about the terms to improve recognition
6267
+ */
6268
+ keytermsPrompt?: string[];
6269
+ /**
6270
+ * Inactivity timeout in milliseconds
6271
+ * Session will close if no audio is received for this duration
6272
+ */
6273
+ inactivityTimeout?: number;
6274
+ /**
6275
+ * Use token-based authentication
6276
+ * If true, will create a temporary token before connecting
6277
+ */
6278
+ useToken?: boolean;
6279
+ /**
6280
+ * Token expiration time in seconds (minimum 60)
6281
+ * Only used if useToken is true
6282
+ * @default 3600
6283
+ */
6284
+ tokenExpiresIn?: number;
6285
+ }
6286
+ /**
6287
+ * AssemblyAI dynamic configuration update
6288
+ * Can be sent mid-stream to adjust parameters
6289
+ */
6290
+ type AssemblyAIUpdateConfiguration = Omit<StreamingUpdateConfiguration, "type">;
6291
+ /**
6292
+ * Union of all provider-specific streaming options
6293
+ */
6294
+ type ProviderStreamingOptions = ({
6295
+ provider: "gladia";
6296
+ } & GladiaStreamingOptions) | ({
6297
+ provider: "deepgram";
6298
+ } & DeepgramStreamingOptions) | ({
6299
+ provider: "assemblyai";
6300
+ } & AssemblyAIStreamingOptions);
6301
+ /**
6302
+ * Type-safe streaming options for a specific provider
6303
+ */
6304
+ type StreamingOptionsForProvider<P extends StreamingProvider> = P extends "gladia" ? GladiaStreamingOptions : P extends "deepgram" ? DeepgramStreamingOptions : P extends "assemblyai" ? AssemblyAIStreamingOptions : never;
6305
+ /**
6306
+ * Type-safe transcribeStream parameters for a specific provider
6307
+ */
6308
+ interface TranscribeStreamParams<P extends StreamingProvider> {
6309
+ /** Streaming options specific to this provider */
6310
+ options?: StreamingOptionsForProvider<P> & {
6311
+ provider: P;
6312
+ };
6313
+ /** Event callbacks */
6314
+ callbacks?: StreamingCallbacks;
6315
+ }
6316
+
5892
6317
  /**
5893
6318
  * Unified types for the Voice Router SDK
5894
6319
  * These types provide a provider-agnostic interface for transcription services
@@ -6146,7 +6571,7 @@ interface Speaker {
6146
6571
  */
6147
6572
  interface Word {
6148
6573
  /** The transcribed word */
6149
- text: string;
6574
+ word: string;
6150
6575
  /** Start time in seconds */
6151
6576
  start: number;
6152
6577
  /** End time in seconds */
@@ -6308,7 +6733,7 @@ interface UnifiedTranscriptResponse<P extends TranscriptionProvider = Transcript
6308
6733
  /**
6309
6734
  * Streaming transcription event types
6310
6735
  */
6311
- type StreamEventType = "open" | "transcript" | "utterance" | "metadata" | "error" | "close";
6736
+ type StreamEventType = "open" | "transcript" | "utterance" | "metadata" | "error" | "close" | "speech_start" | "speech_end" | "translation" | "sentiment" | "entity" | "summarization" | "chapterization" | "audio_ack" | "lifecycle";
6312
6737
  /**
6313
6738
  * Streaming transcription event
6314
6739
  */
@@ -6326,6 +6751,10 @@ interface StreamEvent {
6326
6751
  speaker?: string;
6327
6752
  /** Confidence score for this event */
6328
6753
  confidence?: number;
6754
+ /** Language of the transcript/utterance */
6755
+ language?: string;
6756
+ /** Channel number for multi-channel audio */
6757
+ channel?: number;
6329
6758
  /** Error information (for type: "error") */
6330
6759
  error?: {
6331
6760
  code: string;
@@ -6335,6 +6764,109 @@ interface StreamEvent {
6335
6764
  /** Additional event data */
6336
6765
  data?: unknown;
6337
6766
  }
6767
+ /**
6768
+ * Speech event data (for speech_start/speech_end events)
6769
+ */
6770
+ interface SpeechEvent {
6771
+ /** Event type: speech_start or speech_end */
6772
+ type: "speech_start" | "speech_end";
6773
+ /** Timestamp in seconds */
6774
+ timestamp: number;
6775
+ /** Channel number */
6776
+ channel?: number;
6777
+ /** Session ID */
6778
+ sessionId?: string;
6779
+ }
6780
+ /**
6781
+ * Translation event data (for real-time translation)
6782
+ */
6783
+ interface TranslationEvent {
6784
+ /** Utterance ID this translation belongs to */
6785
+ utteranceId?: string;
6786
+ /** Original text */
6787
+ original?: string;
6788
+ /** Target language */
6789
+ targetLanguage: string;
6790
+ /** Translated text */
6791
+ translatedText: string;
6792
+ /** Whether this is a final translation */
6793
+ isFinal?: boolean;
6794
+ }
6795
+ /**
6796
+ * Sentiment analysis result (for real-time sentiment)
6797
+ */
6798
+ interface SentimentEvent {
6799
+ /** Utterance ID this sentiment belongs to */
6800
+ utteranceId?: string;
6801
+ /** Sentiment label (positive, negative, neutral) */
6802
+ sentiment: string;
6803
+ /** Confidence score 0-1 */
6804
+ confidence?: number;
6805
+ }
6806
+ /**
6807
+ * Named entity recognition result
6808
+ */
6809
+ interface EntityEvent {
6810
+ /** Utterance ID this entity belongs to */
6811
+ utteranceId?: string;
6812
+ /** Entity text */
6813
+ text: string;
6814
+ /** Entity type (PERSON, ORGANIZATION, LOCATION, etc.) */
6815
+ type: string;
6816
+ /** Start position */
6817
+ start?: number;
6818
+ /** End position */
6819
+ end?: number;
6820
+ }
6821
+ /**
6822
+ * Post-processing summarization event
6823
+ */
6824
+ interface SummarizationEvent {
6825
+ /** Full summarization text */
6826
+ summary: string;
6827
+ /** Error if summarization failed */
6828
+ error?: string;
6829
+ }
6830
+ /**
6831
+ * Post-processing chapterization event
6832
+ */
6833
+ interface ChapterizationEvent {
6834
+ /** Generated chapters */
6835
+ chapters: Array<{
6836
+ /** Chapter title/headline */
6837
+ headline: string;
6838
+ /** Chapter summary */
6839
+ summary: string;
6840
+ /** Start time in seconds */
6841
+ start: number;
6842
+ /** End time in seconds */
6843
+ end: number;
6844
+ }>;
6845
+ /** Error if chapterization failed */
6846
+ error?: string;
6847
+ }
6848
+ /**
6849
+ * Audio chunk acknowledgment event
6850
+ */
6851
+ interface AudioAckEvent {
6852
+ /** Byte range of the acknowledged audio chunk [start, end] */
6853
+ byteRange?: [number, number];
6854
+ /** Time range in seconds of the acknowledged audio chunk [start, end] */
6855
+ timeRange?: [number, number];
6856
+ /** Acknowledgment timestamp */
6857
+ timestamp?: string;
6858
+ }
6859
+ /**
6860
+ * Lifecycle event (session start, recording end, etc.)
6861
+ */
6862
+ interface LifecycleEvent {
6863
+ /** Lifecycle event type */
6864
+ eventType: "start_session" | "start_recording" | "stop_recording" | "end_recording" | "end_session";
6865
+ /** Event timestamp */
6866
+ timestamp?: string;
6867
+ /** Session ID */
6868
+ sessionId?: string;
6869
+ }
6338
6870
  /**
6339
6871
  * Audio chunk for streaming transcription
6340
6872
  */
@@ -6424,6 +6956,56 @@ interface StreamingOptions extends Omit<TranscribeOptions, "webhookUrl"> {
6424
6956
  * ```
6425
6957
  */
6426
6958
  gladiaStreaming?: Partial<Omit<StreamingRequest, "encoding" | "sample_rate" | "bit_depth" | "channels">>;
6959
+ /**
6960
+ * Deepgram-specific streaming options (passed to WebSocket URL)
6961
+ *
6962
+ * Includes filler_words, numerals, measurements, paragraphs,
6963
+ * profanity_filter, topics, intents, custom_topic, custom_intent,
6964
+ * keyterm, dictation, utt_split, and more.
6965
+ *
6966
+ * @see https://developers.deepgram.com/docs/streaming
6967
+ *
6968
+ * @example
6969
+ * ```typescript
6970
+ * await adapter.transcribeStream({
6971
+ * deepgramStreaming: {
6972
+ * fillerWords: true,
6973
+ * profanityFilter: true,
6974
+ * topics: true,
6975
+ * intents: true,
6976
+ * customTopic: ['sales', 'support'],
6977
+ * customIntent: ['purchase', 'complaint'],
6978
+ * numerals: true
6979
+ * }
6980
+ * });
6981
+ * ```
6982
+ */
6983
+ deepgramStreaming?: DeepgramStreamingOptions;
6984
+ /**
6985
+ * AssemblyAI-specific streaming options (passed to WebSocket URL & configuration)
6986
+ *
6987
+ * Includes end-of-turn detection tuning, VAD threshold, profanity filter,
6988
+ * keyterms, speech model selection, and language detection.
6989
+ *
6990
+ * @see https://www.assemblyai.com/docs/speech-to-text/streaming
6991
+ *
6992
+ * @example
6993
+ * ```typescript
6994
+ * await adapter.transcribeStream({
6995
+ * assemblyaiStreaming: {
6996
+ * speechModel: 'universal-streaming-multilingual',
6997
+ * languageDetection: true,
6998
+ * endOfTurnConfidenceThreshold: 0.7,
6999
+ * minEndOfTurnSilenceWhenConfident: 500,
7000
+ * vadThreshold: 0.3,
7001
+ * formatTurns: true,
7002
+ * filterProfanity: true,
7003
+ * keyterms: ['TypeScript', 'JavaScript', 'API']
7004
+ * }
7005
+ * });
7006
+ * ```
7007
+ */
7008
+ assemblyaiStreaming?: AssemblyAIStreamingOptions;
6427
7009
  }
6428
7010
  /**
6429
7011
  * Callback functions for streaming events
@@ -6445,6 +7027,24 @@ interface StreamingCallbacks {
6445
7027
  }) => void;
6446
7028
  /** Called when the stream is closed */
6447
7029
  onClose?: (code?: number, reason?: string) => void;
7030
+ /** Called when speech starts (Gladia: requires receive_speech_events) */
7031
+ onSpeechStart?: (event: SpeechEvent) => void;
7032
+ /** Called when speech ends (Gladia: requires receive_speech_events) */
7033
+ onSpeechEnd?: (event: SpeechEvent) => void;
7034
+ /** Called for real-time translation (Gladia: requires translation enabled) */
7035
+ onTranslation?: (event: TranslationEvent) => void;
7036
+ /** Called for real-time sentiment analysis (Gladia: requires sentiment_analysis enabled) */
7037
+ onSentiment?: (event: SentimentEvent) => void;
7038
+ /** Called for named entity recognition (Gladia: requires named_entity_recognition enabled) */
7039
+ onEntity?: (event: EntityEvent) => void;
7040
+ /** Called when post-processing summarization completes (Gladia: requires summarization enabled) */
7041
+ onSummarization?: (event: SummarizationEvent) => void;
7042
+ /** Called when post-processing chapterization completes (Gladia: requires chapterization enabled) */
7043
+ onChapterization?: (event: ChapterizationEvent) => void;
7044
+ /** Called for audio chunk acknowledgments (Gladia: requires receive_acknowledgments) */
7045
+ onAudioAck?: (event: AudioAckEvent) => void;
7046
+ /** Called for session lifecycle events (Gladia: requires receive_lifecycle_events) */
7047
+ onLifecycle?: (event: LifecycleEvent) => void;
6448
7048
  }
6449
7049
  /**
6450
7050
  * Represents an active streaming transcription session
@@ -6661,107 +7261,6 @@ declare abstract class BaseAdapter implements TranscriptionAdapter {
6661
7261
  }): Promise<UnifiedTranscriptResponse>;
6662
7262
  }
6663
7263
 
6664
- /**
6665
- * Provider-specific streaming option types using OpenAPI-generated schemas
6666
- *
6667
- * These types provide compile-time safety by restricting options to what
6668
- * each provider actually supports according to their OpenAPI specifications.
6669
- */
6670
-
6671
- /**
6672
- * Gladia streaming options (from OpenAPI spec)
6673
- *
6674
- * Based on the generated `StreamingRequest` type from Gladia's OpenAPI spec.
6675
- * All supported encodings, sample rates, and bit depths are from the spec.
6676
- */
6677
- interface GladiaStreamingOptions {
6678
- /** Audio encoding format - only Gladia-supported formats (type-safe enum) */
6679
- encoding?: StreamingSupportedEncodingEnum;
6680
- /** Sample rate - only Gladia-supported rates (type-safe enum) */
6681
- sampleRate?: StreamingSupportedSampleRateEnum;
6682
- /** Bit depth - only Gladia-supported depths (type-safe enum) */
6683
- bitDepth?: StreamingSupportedBitDepthEnum;
6684
- /** Number of audio channels (1-8) */
6685
- channels?: number;
6686
- /** Endpointing duration in seconds (0.01-10) */
6687
- endpointing?: number;
6688
- /** Language configuration */
6689
- languageConfig?: LanguageConfig;
6690
- /** Interim/partial results */
6691
- interimResults?: boolean;
6692
- }
6693
- /**
6694
- * Deepgram streaming options (from OpenAPI spec)
6695
- *
6696
- * Based on the generated `ListenV1MediaTranscribeParams` type from Deepgram's OpenAPI spec.
6697
- * All supported options come directly from the spec. Now using properly typed parameter enums!
6698
- */
6699
- interface DeepgramStreamingOptions {
6700
- /** Audio encoding format - type-safe enum from OpenAPI spec */
6701
- encoding?: (typeof ListenV1EncodingParameter)[keyof typeof ListenV1EncodingParameter];
6702
- /** Sample rate in Hz */
6703
- sampleRate?: number;
6704
- /** Language code - type-safe from OpenAPI spec (BCP-47 format, e.g., 'en', 'en-US', 'es') */
6705
- language?: ListenV1LanguageParameter;
6706
- /** Model to use - type-safe union from OpenAPI spec */
6707
- model?: ListenV1ModelParameter;
6708
- /** Model version - type-safe from OpenAPI spec (e.g., 'latest') */
6709
- version?: ListenV1VersionParameter;
6710
- /** Enable speaker diarization */
6711
- diarization?: boolean;
6712
- /** Enable language detection */
6713
- languageDetection?: boolean;
6714
- /** Enable punctuation */
6715
- punctuate?: boolean;
6716
- /** Enable smart formatting */
6717
- smartFormat?: boolean;
6718
- /** Enable interim results */
6719
- interimResults?: boolean;
6720
- /** Callback URL for webhooks */
6721
- webhookUrl?: string;
6722
- /** Custom vocabulary/keywords */
6723
- keywords?: string | string[];
6724
- /** Number of audio channels */
6725
- channels?: number;
6726
- }
6727
- /**
6728
- * AssemblyAI streaming options
6729
- *
6730
- * AssemblyAI's streaming API is simpler - it only requires sample_rate.
6731
- * Note: AssemblyAI only supports PCM16 encoding for streaming.
6732
- */
6733
- interface AssemblyAIStreamingOptions {
6734
- /** Sample rate in Hz (8000 or 16000 recommended) */
6735
- sampleRate?: 8000 | 16000 | 22050 | 44100 | 48000;
6736
- /** Enable word-level timestamps */
6737
- wordTimestamps?: boolean;
6738
- }
6739
- /**
6740
- * Union of all provider-specific streaming options
6741
- */
6742
- type ProviderStreamingOptions = ({
6743
- provider: "gladia";
6744
- } & GladiaStreamingOptions) | ({
6745
- provider: "deepgram";
6746
- } & DeepgramStreamingOptions) | ({
6747
- provider: "assemblyai";
6748
- } & AssemblyAIStreamingOptions);
6749
- /**
6750
- * Type-safe streaming options for a specific provider
6751
- */
6752
- type StreamingOptionsForProvider<P extends StreamingProvider> = P extends "gladia" ? GladiaStreamingOptions : P extends "deepgram" ? DeepgramStreamingOptions : P extends "assemblyai" ? AssemblyAIStreamingOptions : never;
6753
- /**
6754
- * Type-safe transcribeStream parameters for a specific provider
6755
- */
6756
- interface TranscribeStreamParams<P extends StreamingProvider> {
6757
- /** Streaming options specific to this provider */
6758
- options?: StreamingOptionsForProvider<P> & {
6759
- provider: P;
6760
- };
6761
- /** Event callbacks */
6762
- callbacks?: StreamingCallbacks;
6763
- }
6764
-
6765
7264
  /**
6766
7265
  * Configuration for VoiceRouter
6767
7266
  */
@@ -7346,11 +7845,46 @@ declare class GladiaAdapter extends BaseAdapter {
7346
7845
  * Creates a WebSocket connection to Gladia for streaming transcription.
7347
7846
  * First initializes a session via REST API, then connects to WebSocket.
7348
7847
  *
7848
+ * Supports all Gladia streaming features:
7849
+ * - Real-time transcription with interim/final results
7850
+ * - Speech detection events (speech_start, speech_end)
7851
+ * - Real-time translation to other languages
7852
+ * - Real-time sentiment analysis
7853
+ * - Real-time named entity recognition
7854
+ * - Post-processing summarization and chapterization
7855
+ * - Audio preprocessing (audio enhancement, speech threshold)
7856
+ * - Custom vocabulary and spelling
7857
+ * - Multi-language code switching
7858
+ *
7349
7859
  * @param options - Streaming configuration options
7860
+ * @param options.encoding - Audio encoding (wav/pcm, wav/alaw, wav/ulaw)
7861
+ * @param options.sampleRate - Sample rate (8000, 16000, 32000, 44100, 48000)
7862
+ * @param options.bitDepth - Bit depth (8, 16, 24, 32)
7863
+ * @param options.channels - Number of channels (1-8)
7864
+ * @param options.language - Language code for transcription
7865
+ * @param options.interimResults - Enable partial/interim transcripts
7866
+ * @param options.endpointing - Silence duration to end utterance (0.01-10 seconds)
7867
+ * @param options.maxSilence - Max duration without endpointing (5-60 seconds)
7868
+ * @param options.customVocabulary - Words to boost in recognition
7869
+ * @param options.sentimentAnalysis - Enable real-time sentiment analysis
7870
+ * @param options.entityDetection - Enable named entity recognition
7871
+ * @param options.summarization - Enable post-processing summarization
7872
+ * @param options.gladiaStreaming - Full Gladia streaming options (pre_processing, realtime_processing, post_processing, messages_config)
7350
7873
  * @param callbacks - Event callbacks for transcription results
7874
+ * @param callbacks.onTranscript - Interim/final transcript received
7875
+ * @param callbacks.onUtterance - Complete utterance detected
7876
+ * @param callbacks.onSpeechStart - Speech detected (requires messages_config.receive_speech_events)
7877
+ * @param callbacks.onSpeechEnd - Speech ended (requires messages_config.receive_speech_events)
7878
+ * @param callbacks.onTranslation - Translation result (requires translation enabled)
7879
+ * @param callbacks.onSentiment - Sentiment analysis result
7880
+ * @param callbacks.onEntity - Named entity detected
7881
+ * @param callbacks.onSummarization - Summarization completed
7882
+ * @param callbacks.onChapterization - Chapterization completed
7883
+ * @param callbacks.onAudioAck - Audio chunk acknowledged
7884
+ * @param callbacks.onLifecycle - Session lifecycle events
7351
7885
  * @returns Promise that resolves with a StreamingSession
7352
7886
  *
7353
- * @example Real-time streaming
7887
+ * @example Basic real-time streaming
7354
7888
  * ```typescript
7355
7889
  * const session = await adapter.transcribeStream({
7356
7890
  * encoding: 'wav/pcm',
@@ -7372,14 +7906,71 @@ declare class GladiaAdapter extends BaseAdapter {
7372
7906
  * });
7373
7907
  *
7374
7908
  * // Send audio chunks
7375
- * const audioChunk = getAudioChunk(); // Your audio source
7909
+ * const audioChunk = getAudioChunk();
7376
7910
  * await session.sendAudio({ data: audioChunk });
7377
7911
  *
7378
7912
  * // Close when done
7379
7913
  * await session.close();
7380
7914
  * ```
7915
+ *
7916
+ * @example Advanced streaming with all features
7917
+ * ```typescript
7918
+ * const session = await adapter.transcribeStream({
7919
+ * encoding: 'wav/pcm',
7920
+ * sampleRate: 16000,
7921
+ * language: 'en',
7922
+ * sentimentAnalysis: true,
7923
+ * entityDetection: true,
7924
+ * summarization: true,
7925
+ * gladiaStreaming: {
7926
+ * pre_processing: {
7927
+ * audio_enhancer: true,
7928
+ * speech_threshold: 0.5
7929
+ * },
7930
+ * realtime_processing: {
7931
+ * translation: true,
7932
+ * translation_config: { target_languages: ['fr', 'es'] }
7933
+ * },
7934
+ * post_processing: {
7935
+ * chapterization: true
7936
+ * },
7937
+ * messages_config: {
7938
+ * receive_speech_events: true,
7939
+ * receive_acknowledgments: true,
7940
+ * receive_lifecycle_events: true
7941
+ * }
7942
+ * }
7943
+ * }, {
7944
+ * onTranscript: (e) => console.log('Transcript:', e.text),
7945
+ * onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
7946
+ * onSpeechEnd: (e) => console.log('Speech ended at:', e.timestamp),
7947
+ * onTranslation: (e) => console.log(`${e.targetLanguage}: ${e.translatedText}`),
7948
+ * onSentiment: (e) => console.log('Sentiment:', e.sentiment),
7949
+ * onEntity: (e) => console.log(`Entity: ${e.type} - ${e.text}`),
7950
+ * onSummarization: (e) => console.log('Summary:', e.summary),
7951
+ * onChapterization: (e) => console.log('Chapters:', e.chapters),
7952
+ * onAudioAck: (e) => console.log('Audio ack:', e.byteRange),
7953
+ * onLifecycle: (e) => console.log('Lifecycle:', e.eventType)
7954
+ * });
7955
+ * ```
7381
7956
  */
7382
7957
  transcribeStream(options?: StreamingOptions, callbacks?: StreamingCallbacks): Promise<StreamingSession>;
7958
+ /**
7959
+ * Build streaming request with full type safety from OpenAPI specs
7960
+ *
7961
+ * Maps normalized options to Gladia streaming request format,
7962
+ * including all advanced features like pre-processing, real-time
7963
+ * processing, post-processing, and message configuration.
7964
+ */
7965
+ private buildStreamingRequest;
7966
+ /**
7967
+ * Handle all WebSocket message types from Gladia streaming
7968
+ *
7969
+ * Processes transcript, utterance, speech events, real-time processing
7970
+ * results (translation, sentiment, NER), post-processing results
7971
+ * (summarization, chapterization), acknowledgments, and lifecycle events.
7972
+ */
7973
+ private handleWebSocketMessage;
7383
7974
  }
7384
7975
  /**
7385
7976
  * Factory function to create a Gladia adapter
@@ -7575,19 +8166,37 @@ declare class AssemblyAIAdapter extends BaseAdapter {
7575
8166
  * Stream audio for real-time transcription
7576
8167
  *
7577
8168
  * Creates a WebSocket connection to AssemblyAI for streaming transcription.
7578
- * First obtains a temporary token, then connects and streams audio chunks.
8169
+ * Uses the v3 Universal Streaming API with full support for all parameters.
8170
+ *
8171
+ * Supports all AssemblyAI streaming features:
8172
+ * - Real-time transcription with interim/final results (Turn events)
8173
+ * - End-of-turn detection tuning (confidence threshold, silence duration)
8174
+ * - Voice Activity Detection (VAD) threshold tuning
8175
+ * - Real-time text formatting
8176
+ * - Profanity filtering
8177
+ * - Custom vocabulary (keyterms)
8178
+ * - Language detection
8179
+ * - Model selection (English or Multilingual)
8180
+ * - Dynamic configuration updates mid-stream
8181
+ * - Force endpoint command
7579
8182
  *
7580
8183
  * @param options - Streaming configuration options
8184
+ * @param options.sampleRate - Sample rate (8000, 16000, 22050, 44100, 48000)
8185
+ * @param options.encoding - Audio encoding (pcm_s16le, pcm_mulaw)
8186
+ * @param options.assemblyaiStreaming - All AssemblyAI-specific streaming options
7581
8187
  * @param callbacks - Event callbacks for transcription results
7582
- * @returns Promise that resolves with a StreamingSession
7583
- *
7584
- * @example Real-time streaming
8188
+ * @param callbacks.onTranscript - Interim/final transcript received (Turn event)
8189
+ * @param callbacks.onUtterance - Complete utterance (Turn with end_of_turn=true)
8190
+ * @param callbacks.onMetadata - Session metadata (Begin, Termination events)
8191
+ * @param callbacks.onError - Error occurred
8192
+ * @param callbacks.onClose - Connection closed
8193
+ * @returns Promise that resolves with an extended StreamingSession
8194
+ *
8195
+ * @example Basic real-time streaming
7585
8196
  * ```typescript
7586
8197
  * const session = await adapter.transcribeStream({
7587
- * encoding: 'pcm_s16le',
7588
8198
  * sampleRate: 16000,
7589
- * language: 'en',
7590
- * interimResults: true
8199
+ * encoding: 'pcm_s16le'
7591
8200
  * }, {
7592
8201
  * onOpen: () => console.log('Connected'),
7593
8202
  * onTranscript: (event) => {
@@ -7602,14 +8211,56 @@ declare class AssemblyAIAdapter extends BaseAdapter {
7602
8211
  * });
7603
8212
  *
7604
8213
  * // Send audio chunks
7605
- * const audioChunk = getAudioChunk(); // Your audio source
8214
+ * const audioChunk = getAudioChunk();
7606
8215
  * await session.sendAudio({ data: audioChunk });
7607
8216
  *
7608
8217
  * // Close when done
7609
8218
  * await session.close();
7610
8219
  * ```
8220
+ *
8221
+ * @example Advanced streaming with all features
8222
+ * ```typescript
8223
+ * const session = await adapter.transcribeStream({
8224
+ * sampleRate: 16000,
8225
+ * assemblyaiStreaming: {
8226
+ * speechModel: 'universal-streaming-multilingual',
8227
+ * languageDetection: true,
8228
+ * endOfTurnConfidenceThreshold: 0.7,
8229
+ * minEndOfTurnSilenceWhenConfident: 500,
8230
+ * maxTurnSilence: 15000,
8231
+ * vadThreshold: 0.3,
8232
+ * formatTurns: true,
8233
+ * filterProfanity: true,
8234
+ * keyterms: ['TypeScript', 'JavaScript', 'API'],
8235
+ * inactivityTimeout: 60000
8236
+ * }
8237
+ * }, {
8238
+ * onTranscript: (e) => console.log('Transcript:', e.text),
8239
+ * onMetadata: (m) => console.log('Metadata:', m)
8240
+ * });
8241
+ *
8242
+ * // Update configuration mid-stream
8243
+ * session.updateConfiguration?.({
8244
+ * end_of_turn_confidence_threshold: 0.5,
8245
+ * vad_threshold: 0.2
8246
+ * });
8247
+ *
8248
+ * // Force endpoint detection
8249
+ * session.forceEndpoint?.();
8250
+ * ```
7611
8251
  */
7612
- transcribeStream(options?: StreamingOptions, callbacks?: StreamingCallbacks): Promise<StreamingSession>;
8252
+ transcribeStream(options?: StreamingOptions, callbacks?: StreamingCallbacks): Promise<StreamingSession & {
8253
+ updateConfiguration?: (config: Partial<Omit<StreamingUpdateConfiguration, "type">>) => void;
8254
+ forceEndpoint?: () => void;
8255
+ }>;
8256
+ /**
8257
+ * Build WebSocket URL with all streaming parameters
8258
+ */
8259
+ private buildStreamingUrl;
8260
+ /**
8261
+ * Handle all WebSocket message types from AssemblyAI streaming
8262
+ */
8263
+ private handleWebSocketMessage;
7613
8264
  }
7614
8265
  /**
7615
8266
  * Factory function to create an AssemblyAI adapter
@@ -7765,11 +8416,44 @@ declare class DeepgramAdapter extends BaseAdapter {
7765
8416
  * Creates a WebSocket connection to Deepgram for streaming transcription.
7766
8417
  * Send audio chunks via session.sendAudio() and receive results via callbacks.
7767
8418
  *
8419
+ * Supports all Deepgram streaming features:
8420
+ * - Real-time transcription with interim/final results
8421
+ * - Speech detection events (SpeechStarted, UtteranceEnd)
8422
+ * - Speaker diarization
8423
+ * - Language detection
8424
+ * - Real-time sentiment, entity detection, topics, intents
8425
+ * - Custom vocabulary (keywords, keyterms)
8426
+ * - PII redaction
8427
+ * - Filler words, numerals, measurements, paragraphs
8428
+ * - Profanity filtering
8429
+ * - Dictation mode
8430
+ *
7768
8431
  * @param options - Streaming configuration options
8432
+ * @param options.encoding - Audio encoding (linear16, flac, mulaw, opus, speex, g729)
8433
+ * @param options.sampleRate - Sample rate in Hz
8434
+ * @param options.channels - Number of audio channels
8435
+ * @param options.language - Language code for transcription
8436
+ * @param options.model - Model to use (nova-2, nova-3, base, enhanced, etc.)
8437
+ * @param options.diarization - Enable speaker identification
8438
+ * @param options.languageDetection - Auto-detect language
8439
+ * @param options.interimResults - Enable partial transcripts
8440
+ * @param options.summarization - Enable summarization
8441
+ * @param options.sentimentAnalysis - Enable sentiment analysis
8442
+ * @param options.entityDetection - Enable entity detection
8443
+ * @param options.piiRedaction - Enable PII redaction
8444
+ * @param options.customVocabulary - Keywords to boost recognition
8445
+ * @param options.deepgramStreaming - All Deepgram-specific streaming options
7769
8446
  * @param callbacks - Event callbacks for transcription results
8447
+ * @param callbacks.onTranscript - Interim/final transcript received
8448
+ * @param callbacks.onUtterance - Complete utterance detected
8449
+ * @param callbacks.onSpeechStart - Speech detected (Deepgram SpeechStarted)
8450
+ * @param callbacks.onSpeechEnd - Speech ended (Deepgram UtteranceEnd)
8451
+ * @param callbacks.onMetadata - Metadata received
8452
+ * @param callbacks.onError - Error occurred
8453
+ * @param callbacks.onClose - Connection closed
7770
8454
  * @returns Promise that resolves with a StreamingSession
7771
8455
  *
7772
- * @example Real-time streaming
8456
+ * @example Basic real-time streaming
7773
8457
  * ```typescript
7774
8458
  * const session = await adapter.transcribeStream({
7775
8459
  * encoding: 'linear16',
@@ -7792,14 +8476,53 @@ declare class DeepgramAdapter extends BaseAdapter {
7792
8476
  * });
7793
8477
  *
7794
8478
  * // Send audio chunks
7795
- * const audioChunk = getAudioChunk(); // Your audio source
8479
+ * const audioChunk = getAudioChunk();
7796
8480
  * await session.sendAudio({ data: audioChunk });
7797
8481
  *
7798
8482
  * // Close when done
7799
8483
  * await session.close();
7800
8484
  * ```
8485
+ *
8486
+ * @example Advanced streaming with all features
8487
+ * ```typescript
8488
+ * const session = await adapter.transcribeStream({
8489
+ * encoding: 'linear16',
8490
+ * sampleRate: 16000,
8491
+ * language: 'en',
8492
+ * model: 'nova-3',
8493
+ * diarization: true,
8494
+ * sentimentAnalysis: true,
8495
+ * entityDetection: true,
8496
+ * deepgramStreaming: {
8497
+ * fillerWords: true,
8498
+ * numerals: true,
8499
+ * profanityFilter: true,
8500
+ * topics: true,
8501
+ * intents: true,
8502
+ * customTopic: ['sales', 'support'],
8503
+ * customIntent: ['purchase', 'complaint'],
8504
+ * keyterm: ['TypeScript', 'JavaScript'],
8505
+ * utteranceSplit: 800,
8506
+ * punctuate: true,
8507
+ * smartFormat: true
8508
+ * }
8509
+ * }, {
8510
+ * onTranscript: (e) => console.log('Transcript:', e.text),
8511
+ * onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
8512
+ * onSpeechEnd: (e) => console.log('Utterance ended'),
8513
+ * onMetadata: (m) => console.log('Metadata:', m)
8514
+ * });
8515
+ * ```
7801
8516
  */
7802
8517
  transcribeStream(options?: StreamingOptions, callbacks?: StreamingCallbacks): Promise<StreamingSession>;
8518
+ /**
8519
+ * Build WebSocket URL with all streaming parameters
8520
+ */
8521
+ private buildStreamingUrl;
8522
+ /**
8523
+ * Handle all WebSocket message types from Deepgram streaming
8524
+ */
8525
+ private handleWebSocketMessage;
7803
8526
  }
7804
8527
  /**
7805
8528
  * Factory function to create a Deepgram adapter
@@ -13727,4 +14450,4 @@ declare namespace index {
13727
14450
  export { index_AudioIntelligenceModelStatus as AudioIntelligenceModelStatus, type index_AutoHighlightResult as AutoHighlightResult, type index_AutoHighlightsResult as AutoHighlightsResult, type index_BadRequestResponse as BadRequestResponse, type index_CannotAccessUploadedFileResponse as CannotAccessUploadedFileResponse, type index_Chapter as Chapter, type index_ContentSafetyLabel as ContentSafetyLabel, type index_ContentSafetyLabelResult as ContentSafetyLabelResult, type index_ContentSafetyLabelsResult as ContentSafetyLabelsResult, type index_ContentSafetyLabelsResultSeverityScoreSummary as ContentSafetyLabelsResultSeverityScoreSummary, type index_ContentSafetyLabelsResultSummary as ContentSafetyLabelsResultSummary, type index_CreateRealtimeTemporaryTokenParams as CreateRealtimeTemporaryTokenParams, type index_Entity as Entity, index_EntityType as EntityType, type Error$1 as Error, type index_GatewayTimeoutResponse as GatewayTimeoutResponse, type index_GetSubtitlesParams as GetSubtitlesParams, type index_InternalServerErrorResponse as InternalServerErrorResponse, type index_LemurActionItemsParams as LemurActionItemsParams, type index_LemurActionItemsParamsAllOf as LemurActionItemsParamsAllOf, type index_LemurActionItemsResponse as LemurActionItemsResponse, type index_LemurBaseParams as LemurBaseParams, type index_LemurBaseParamsContext as LemurBaseParamsContext, type index_LemurBaseParamsContextOneOf as LemurBaseParamsContextOneOf, type index_LemurBaseParamsFinalModel as LemurBaseParamsFinalModel, type index_LemurBaseResponse as LemurBaseResponse, index_LemurModel as LemurModel, type index_LemurQuestion as LemurQuestion, type index_LemurQuestionAnswer as LemurQuestionAnswer, type index_LemurQuestionAnswerParams as LemurQuestionAnswerParams, type index_LemurQuestionAnswerParamsAllOf as LemurQuestionAnswerParamsAllOf, type index_LemurQuestionAnswerResponse as LemurQuestionAnswerResponse, type index_LemurQuestionAnswerResponseAllOf as LemurQuestionAnswerResponseAllOf, type index_LemurQuestionContext as LemurQuestionContext, type index_LemurQuestionContextOneOf as LemurQuestionContextOneOf, type index_LemurResponse as LemurResponse, type index_LemurStringResponse as LemurStringResponse, type index_LemurStringResponseAllOf as LemurStringResponseAllOf, type index_LemurSummaryParams as LemurSummaryParams, type index_LemurSummaryParamsAllOf as LemurSummaryParamsAllOf, type index_LemurSummaryResponse as LemurSummaryResponse, type index_LemurTaskParams as LemurTaskParams, type index_LemurTaskParamsAllOf as LemurTaskParamsAllOf, type index_LemurTaskResponse as LemurTaskResponse, type index_LemurUsage as LemurUsage, type index_ListTranscriptParams as ListTranscriptParams, type index_ListTranscriptsParams as ListTranscriptsParams, type index_NotFoundResponse as NotFoundResponse, type index_PageDetails as PageDetails, type index_PageDetailsNextUrl as PageDetailsNextUrl, type index_PageDetailsPrevUrl as PageDetailsPrevUrl, type index_ParagraphsResponse as ParagraphsResponse, index_PiiPolicy as PiiPolicy, type index_PurgeLemurRequestDataResponse as PurgeLemurRequestDataResponse, type index_RealtimeTemporaryTokenResponse as RealtimeTemporaryTokenResponse, index_RedactPiiAudioQuality as RedactPiiAudioQuality, type index_RedactedAudioNotification as RedactedAudioNotification, type index_RedactedAudioResponse as RedactedAudioResponse, index_RedactedAudioStatus as RedactedAudioStatus, type index_SentencesResponse as SentencesResponse, index_Sentiment as Sentiment, type SentimentAnalysisResult$1 as SentimentAnalysisResult, type index_SentimentAnalysisResultChannel as SentimentAnalysisResultChannel, type index_SentimentAnalysisResultSpeaker as SentimentAnalysisResultSpeaker, type index_ServiceUnavailableResponse as ServiceUnavailableResponse, type index_SeverityScoreSummary as SeverityScoreSummary, index_SpeechModel as SpeechModel, index_SubstitutionPolicy as SubstitutionPolicy, index_SubtitleFormat as SubtitleFormat, index_SummaryModel as SummaryModel, index_SummaryType as SummaryType, type index_Timestamp as Timestamp, type index_TooManyRequestsResponse as TooManyRequestsResponse, type index_TopicDetectionModelResult as TopicDetectionModelResult, type index_TopicDetectionModelResultSummary as TopicDetectionModelResultSummary, type index_TopicDetectionResult as TopicDetectionResult, type index_TopicDetectionResultLabelsItem as TopicDetectionResultLabelsItem, type index_Transcript as Transcript, type index_TranscriptAudioDuration as TranscriptAudioDuration, type index_TranscriptAudioEndAt as TranscriptAudioEndAt, type index_TranscriptAudioStartFrom as TranscriptAudioStartFrom, type index_TranscriptAutoChapters as TranscriptAutoChapters, type index_TranscriptAutoHighlightsResult as TranscriptAutoHighlightsResult, index_TranscriptBoostParam as TranscriptBoostParam, type index_TranscriptBoostParamProperty as TranscriptBoostParamProperty, type index_TranscriptChapters as TranscriptChapters, type index_TranscriptConfidence as TranscriptConfidence, type index_TranscriptContentSafety as TranscriptContentSafety, type index_TranscriptContentSafetyLabels as TranscriptContentSafetyLabels, type index_TranscriptCustomSpelling as TranscriptCustomSpelling, type index_TranscriptCustomSpellingProperty as TranscriptCustomSpellingProperty, type index_TranscriptCustomTopics as TranscriptCustomTopics, type index_TranscriptDisfluencies as TranscriptDisfluencies, type index_TranscriptEntities as TranscriptEntities, type index_TranscriptEntityDetection as TranscriptEntityDetection, type index_TranscriptFilterProfanity as TranscriptFilterProfanity, type index_TranscriptFormatText as TranscriptFormatText, type index_TranscriptIabCategories as TranscriptIabCategories, type index_TranscriptIabCategoriesResult as TranscriptIabCategoriesResult, index_TranscriptLanguageCode as TranscriptLanguageCode, type index_TranscriptLanguageCodeProperty as TranscriptLanguageCodeProperty, type index_TranscriptLanguageConfidence as TranscriptLanguageConfidence, type index_TranscriptLanguageConfidenceThreshold as TranscriptLanguageConfidenceThreshold, type index_TranscriptLanguageDetection as TranscriptLanguageDetection, type index_TranscriptList as TranscriptList, type index_TranscriptListItem as TranscriptListItem, type index_TranscriptListItemCompleted as TranscriptListItemCompleted, type index_TranscriptListItemError as TranscriptListItemError, type index_TranscriptMultichannel as TranscriptMultichannel, type index_TranscriptOptionalParams as TranscriptOptionalParams, type index_TranscriptOptionalParamsLanguageCode as TranscriptOptionalParamsLanguageCode, type index_TranscriptOptionalParamsLanguageCodeOneOf as TranscriptOptionalParamsLanguageCodeOneOf, type index_TranscriptOptionalParamsRedactPiiSub as TranscriptOptionalParamsRedactPiiSub, type index_TranscriptOptionalParamsSpeakersExpected as TranscriptOptionalParamsSpeakersExpected, type index_TranscriptOptionalParamsSpeechModel as TranscriptOptionalParamsSpeechModel, type index_TranscriptOptionalParamsSpeechThreshold as TranscriptOptionalParamsSpeechThreshold, type index_TranscriptOptionalParamsWebhookAuthHeaderName as TranscriptOptionalParamsWebhookAuthHeaderName, type index_TranscriptOptionalParamsWebhookAuthHeaderValue as TranscriptOptionalParamsWebhookAuthHeaderValue, type index_TranscriptParagraph as TranscriptParagraph, type index_TranscriptParams as TranscriptParams, type index_TranscriptParamsAllOf as TranscriptParamsAllOf, type index_TranscriptPunctuate as TranscriptPunctuate, type index_TranscriptReadyNotification as TranscriptReadyNotification, index_TranscriptReadyStatus as TranscriptReadyStatus, type index_TranscriptRedactPiiAudio as TranscriptRedactPiiAudio, type index_TranscriptRedactPiiAudioQuality as TranscriptRedactPiiAudioQuality, type index_TranscriptRedactPiiPolicies as TranscriptRedactPiiPolicies, type index_TranscriptSentence as TranscriptSentence, type index_TranscriptSentenceChannel as TranscriptSentenceChannel, type index_TranscriptSentenceSpeaker as TranscriptSentenceSpeaker, type index_TranscriptSentimentAnalysis as TranscriptSentimentAnalysis, type index_TranscriptSentimentAnalysisResults as TranscriptSentimentAnalysisResults, type index_TranscriptSpeakerLabels as TranscriptSpeakerLabels, type index_TranscriptSpeakersExpected as TranscriptSpeakersExpected, type index_TranscriptSpeechModel as TranscriptSpeechModel, type index_TranscriptSpeechThreshold as TranscriptSpeechThreshold, type index_TranscriptSpeedBoost as TranscriptSpeedBoost, index_TranscriptStatus as TranscriptStatus, type index_TranscriptSummary as TranscriptSummary, type index_TranscriptSummaryModel as TranscriptSummaryModel, type index_TranscriptSummaryType as TranscriptSummaryType, type index_TranscriptText as TranscriptText, type index_TranscriptThrottled as TranscriptThrottled, type index_TranscriptUtterance as TranscriptUtterance, type index_TranscriptUtteranceChannel as TranscriptUtteranceChannel, type index_TranscriptUtterances as TranscriptUtterances, type index_TranscriptWebhookAuthHeaderName as TranscriptWebhookAuthHeaderName, type index_TranscriptWebhookNotification as TranscriptWebhookNotification, type index_TranscriptWebhookStatusCode as TranscriptWebhookStatusCode, type index_TranscriptWebhookUrl as TranscriptWebhookUrl, type index_TranscriptWord as TranscriptWord, type index_TranscriptWordChannel as TranscriptWordChannel, type index_TranscriptWordSpeaker as TranscriptWordSpeaker, type index_TranscriptWords as TranscriptWords, type index_UnauthorizedResponse as UnauthorizedResponse, type index_UploadedFile as UploadedFile, type index_WordSearchMatch as WordSearchMatch, type index_WordSearchParams as WordSearchParams, type index_WordSearchResponse as WordSearchResponse, type index_WordSearchTimestamp as WordSearchTimestamp };
13728
14451
  }
13729
14452
 
13730
- export { AssemblyAIAdapter, type Chapter as AssemblyAIChapter, type ContentSafetyLabelsResult as AssemblyAIContentSafetyResult, type Entity as AssemblyAIEntity, type AssemblyAIExtendedData, type AutoHighlightsResult as AssemblyAIHighlightsResult, type TranscriptOptionalParams as AssemblyAIOptions, type SentimentAnalysisResult$1 as AssemblyAISentimentResult, type AssemblyAIStreamingOptions, type TopicDetectionModelResult as AssemblyAITopicsResult, index as AssemblyAITypes, AssemblyAIWebhookHandler, type TranscriptWebhookNotification as AssemblyAIWebhookPayload, type AudioChunk, type AudioInput, AudioResponseFormat, AudioTranscriptionModel, AzureSTTAdapter, AzureWebhookHandler, BaseAdapter, BaseWebhookHandler, type BatchOnlyProvider, DeepgramAdapter, type DeepgramExtendedData, type ListenV1ResponseMetadata as DeepgramMetadata, type ListenV1MediaTranscribeParams as DeepgramOptions, type DeepgramStreamingOptions, DeepgramWebhookHandler, type ListenV1Response as DeepgramWebhookPayload, GladiaAdapter, type AudioToLlmListConfigDTO as GladiaAudioToLlmConfig, type AudioToLlmListDTO as GladiaAudioToLlmResult, type ChapterizationDTO as GladiaChapters, type CodeSwitchingConfigDTO as GladiaCodeSwitchingConfig, type NamedEntityRecognitionDTO as GladiaEntities, type GladiaExtendedData, type ModerationDTO as GladiaModeration, type InitTranscriptionRequest as GladiaOptions, type SentimentAnalysisDTO as GladiaSentiment, type SpeakerReidentificationDTO as GladiaSpeakerReidentification, type GladiaStreamingOptions, type StreamingRequest as GladiaStreamingRequest, type StructuredDataExtractionDTO as GladiaStructuredData, type TranslationDTO as GladiaTranslation, index$1 as GladiaTypes, type CallbackTranscriptionErrorPayload as GladiaWebhookErrorPayload, GladiaWebhookHandler, type GladiaWebhookPayload, type CallbackTranscriptionSuccessPayload as GladiaWebhookSuccessPayload, ListenV1EncodingParameter, type ListenV1LanguageParameter, type ListenV1ModelParameter, type ListenV1VersionParameter, OpenAIWhisperAdapter, type CreateTranscriptionRequest as OpenAIWhisperOptions, type ProviderCapabilities, type ProviderConfig, type ProviderExtendedDataMap, type ProviderRawResponseMap, type ProviderStreamingOptions, type ProviderWebhookPayloadMap, type SessionStatus, SpeakV1ContainerParameter, SpeakV1EncodingParameter, SpeakV1SampleRateParameter, type Speaker, SpeechmaticsAdapter, type SpeechmaticsOperatingPoint, SpeechmaticsWebhookHandler, type StreamEvent, type StreamEventType, type StreamingCallbacks, type StreamingOptions, type StreamingOptionsForProvider, type StreamingProvider, type StreamingSession, StreamingSupportedBitDepthEnum, StreamingSupportedEncodingEnum, StreamingSupportedSampleRateEnum, type TranscribeOptions, type TranscribeStreamParams, type TranscriptionAdapter, type TranscriptionLanguage, type TranscriptionModel, type TranscriptionProvider, type TranscriptionStatus, type UnifiedTranscriptResponse, type UnifiedWebhookEvent, type Utterance, VoiceRouter, type VoiceRouterConfig, type WebhookEventType, WebhookRouter, type WebhookRouterOptions, type WebhookRouterResult, type WebhookValidation, type WebhookVerificationOptions, type Word, createAssemblyAIAdapter, createAssemblyAIWebhookHandler, createAzureSTTAdapter, createAzureWebhookHandler, createDeepgramAdapter, createDeepgramWebhookHandler, createGladiaAdapter, createGladiaWebhookHandler, createOpenAIWhisperAdapter, createSpeechmaticsAdapter, createVoiceRouter, createWebhookRouter };
14453
+ export { AssemblyAIAdapter, type Chapter as AssemblyAIChapter, type ContentSafetyLabelsResult as AssemblyAIContentSafetyResult, AssemblyAIEncoding, type AssemblyAIEncodingType, type Entity as AssemblyAIEntity, type AssemblyAIExtendedData, type AutoHighlightsResult as AssemblyAIHighlightsResult, type TranscriptOptionalParams as AssemblyAIOptions, AssemblyAISampleRate, type AssemblyAISampleRateType, type SentimentAnalysisResult$1 as AssemblyAISentimentResult, AssemblyAISpeechModel, type AssemblyAISpeechModelType, type AssemblyAIStreamingOptions, type TopicDetectionModelResult as AssemblyAITopicsResult, index as AssemblyAITypes, type AssemblyAIUpdateConfiguration, AssemblyAIWebhookHandler, type TranscriptWebhookNotification as AssemblyAIWebhookPayload, type AudioAckEvent, type AudioChunk, type AudioInput, AudioResponseFormat, AudioTranscriptionModel, AzureSTTAdapter, AzureWebhookHandler, BaseAdapter, BaseWebhookHandler, type BatchOnlyProvider, type ChapterizationEvent, DeepgramAdapter, ListenV1EncodingParameter as DeepgramEncoding, type DeepgramExtendedData, type ListenV1ResponseMetadata as DeepgramMetadata, DeepgramModel, type DeepgramModelType, type ListenV1MediaTranscribeParams as DeepgramOptions, ListenV1RedactParameterOneOfItem as DeepgramRedact, ListenV1RedactParameterOneOfItem as DeepgramRedactType, type DeepgramStreamingOptions, SharedCustomTopicModeParameter as DeepgramTopicMode, SharedCustomTopicModeParameter as DeepgramTopicModeType, DeepgramWebhookHandler, type ListenV1Response as DeepgramWebhookPayload, type EntityEvent, GladiaAdapter, type AudioToLlmListConfigDTO as GladiaAudioToLlmConfig, type AudioToLlmListDTO as GladiaAudioToLlmResult, StreamingSupportedBitDepthEnum as GladiaBitDepth, type ChapterizationDTO as GladiaChapters, type CodeSwitchingConfigDTO as GladiaCodeSwitchingConfig, StreamingSupportedEncodingEnum as GladiaEncoding, type NamedEntityRecognitionDTO as GladiaEntities, type GladiaExtendedData, TranscriptionLanguageCodeEnum as GladiaLanguage, StreamingSupportedModels as GladiaModel, type ModerationDTO as GladiaModeration, type InitTranscriptionRequest as GladiaOptions, StreamingSupportedSampleRateEnum as GladiaSampleRate, type SentimentAnalysisDTO as GladiaSentiment, type SpeakerReidentificationDTO as GladiaSpeakerReidentification, type GladiaStreamingOptions, type StreamingRequest as GladiaStreamingRequest, type StructuredDataExtractionDTO as GladiaStructuredData, type TranslationDTO as GladiaTranslation, TranslationLanguageCodeEnum as GladiaTranslationLanguage, index$1 as GladiaTypes, type CallbackTranscriptionErrorPayload as GladiaWebhookErrorPayload, GladiaWebhookHandler, type GladiaWebhookPayload, type CallbackTranscriptionSuccessPayload as GladiaWebhookSuccessPayload, type LifecycleEvent, ListenV1EncodingParameter, type ListenV1LanguageParameter, type ListenV1ModelParameter, type ListenV1VersionParameter, OpenAIWhisperAdapter, type CreateTranscriptionRequest as OpenAIWhisperOptions, type ProviderCapabilities, type ProviderConfig, type ProviderExtendedDataMap, type ProviderRawResponseMap, type ProviderStreamingOptions, type ProviderWebhookPayloadMap, type SentimentEvent, type SessionStatus, SpeakV1ContainerParameter, SpeakV1EncodingParameter, SpeakV1SampleRateParameter, type Speaker, type SpeechEvent, SpeechmaticsAdapter, type SpeechmaticsOperatingPoint, SpeechmaticsWebhookHandler, type StreamEvent, type StreamEventType, type StreamingCallbacks, type StreamingOptions, type StreamingOptionsForProvider, type StreamingProvider, type StreamingSession, StreamingSupportedBitDepthEnum, StreamingSupportedEncodingEnum, StreamingSupportedSampleRateEnum, type SummarizationEvent, type TranscribeOptions, type TranscribeStreamParams, type TranscriptionAdapter, type TranscriptionLanguage, type TranscriptionModel, type TranscriptionProvider, type TranscriptionStatus, type TranslationEvent, type UnifiedTranscriptResponse, type UnifiedWebhookEvent, type Utterance, VoiceRouter, type VoiceRouterConfig, type WebhookEventType, WebhookRouter, type WebhookRouterOptions, type WebhookRouterResult, type WebhookValidation, type WebhookVerificationOptions, type Word, createAssemblyAIAdapter, createAssemblyAIWebhookHandler, createAzureSTTAdapter, createAzureWebhookHandler, createDeepgramAdapter, createDeepgramWebhookHandler, createGladiaAdapter, createGladiaWebhookHandler, createOpenAIWhisperAdapter, createSpeechmaticsAdapter, createVoiceRouter, createWebhookRouter };