@360labs/live-transcribe 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2689 @@
1
+ /**
2
+ * Lightweight EventEmitter implementation for browser and Node.js compatibility
3
+ */
4
+ declare class EventEmitter<TEvents extends Record<string, (...args: any[]) => void> = Record<string, (...args: any[]) => void>> {
5
+ private events;
6
+ /**
7
+ * Subscribe to an event
8
+ * @param event - Event name to subscribe to
9
+ * @param listener - Callback function
10
+ */
11
+ on<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
12
+ /**
13
+ * Unsubscribe from an event
14
+ * @param event - Event name to unsubscribe from
15
+ * @param listener - Callback function to remove
16
+ */
17
+ off<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
18
+ /**
19
+ * Subscribe to an event for one-time notification
20
+ * @param event - Event name to subscribe to
21
+ * @param listener - Callback function
22
+ */
23
+ once<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
24
+ /**
25
+ * Emit an event to all subscribers
26
+ * @param event - Event name to emit
27
+ * @param args - Arguments to pass to listeners
28
+ */
29
+ protected emit<K extends keyof TEvents>(event: K, ...args: Parameters<TEvents[K]>): boolean;
30
+ /**
31
+ * Remove all listeners for an event, or all listeners if no event specified
32
+ * @param event - Optional event name
33
+ */
34
+ removeAllListeners<K extends keyof TEvents>(event?: K): this;
35
+ /**
36
+ * Get the number of listeners for an event
37
+ * @param event - Event name
38
+ */
39
+ listenerCount<K extends keyof TEvents>(event: K): number;
40
+ /**
41
+ * Get all event names that have listeners
42
+ */
43
+ eventNames(): (keyof TEvents)[];
44
+ }
45
+
46
+ /**
47
+ * Represents an individual word in a transcription with timing information
48
+ */
49
+ interface Word {
50
+ /** The transcribed word text */
51
+ text: string;
52
+ /** Start time in milliseconds */
53
+ start: number;
54
+ /** End time in milliseconds */
55
+ end: number;
56
+ /** Confidence score (0-1) */
57
+ confidence?: number;
58
+ }
59
+ /**
60
+ * Represents a transcription result from the provider
61
+ */
62
+ interface TranscriptionResult {
63
+ /** The transcribed text */
64
+ text: string;
65
+ /** Whether this is a final or interim result */
66
+ isFinal: boolean;
67
+ /** Confidence score (0-1) */
68
+ confidence?: number;
69
+ /** When the transcription occurred (Unix timestamp) */
70
+ timestamp: number;
71
+ /** Array of individual words with timing information */
72
+ words?: Word[];
73
+ /** Speaker identification if supported by provider */
74
+ speaker?: string;
75
+ /** Detected or specified language code */
76
+ language?: string;
77
+ }
78
+ /**
79
+ * Represents a segment of transcription with timing information
80
+ */
81
+ interface TranscriptionSegment {
82
+ /** Unique identifier for the segment */
83
+ id: string;
84
+ /** The transcribed text for this segment */
85
+ text: string;
86
+ /** Start time in milliseconds */
87
+ start: number;
88
+ /** End time in milliseconds */
89
+ end: number;
90
+ /** Whether this segment is finalized */
91
+ isFinal: boolean;
92
+ /** Confidence score (0-1) */
93
+ confidence?: number;
94
+ }
95
+
96
+ /**
97
+ * Supported transcription providers
98
+ */
99
+ declare enum TranscriptionProvider {
100
+ /** Browser-native Web Speech API */
101
+ WebSpeechAPI = "web-speech",
102
+ /** Deepgram real-time transcription */
103
+ Deepgram = "deepgram",
104
+ /** AssemblyAI real-time transcription */
105
+ AssemblyAI = "assemblyai",
106
+ /** Custom provider implementation */
107
+ Custom = "custom"
108
+ }
109
+ /**
110
+ * Audio encoding formats
111
+ */
112
+ declare enum AudioEncoding {
113
+ /** 16-bit signed little-endian linear PCM */
114
+ LINEAR16 = "linear16",
115
+ /** G.711 mu-law */
116
+ MULAW = "mulaw",
117
+ /** G.711 A-law */
118
+ ALAW = "alaw",
119
+ /** Opus encoding */
120
+ OPUS = "opus"
121
+ }
122
+ /**
123
+ * Audio configuration options
124
+ */
125
+ interface AudioConfig {
126
+ /** Sample rate in Hz (default: 16000) */
127
+ sampleRate?: number;
128
+ /** Number of audio channels (default: 1) */
129
+ channels?: number;
130
+ /** Bit depth (default: 16) */
131
+ bitDepth?: number;
132
+ /** Audio encoding format */
133
+ encoding?: AudioEncoding;
134
+ }
135
+ /**
136
+ * Main configuration for transcription
137
+ */
138
+ interface TranscriptionConfig {
139
+ /** The transcription provider to use */
140
+ provider: TranscriptionProvider;
141
+ /** API key for cloud providers (Deepgram, AssemblyAI) */
142
+ apiKey?: string;
143
+ /** Language code (default: 'en-US') */
144
+ language?: string;
145
+ /** Whether to return interim results (default: true) */
146
+ interimResults?: boolean;
147
+ /** Enable profanity filter (default: false) */
148
+ profanityFilter?: boolean;
149
+ /** Enable automatic punctuation (default: true) */
150
+ punctuation?: boolean;
151
+ /** Audio configuration options */
152
+ audioConfig?: AudioConfig;
153
+ /** Provider-specific options */
154
+ providerOptions?: Record<string, unknown>;
155
+ }
156
+ /**
157
+ * Default audio configuration values
158
+ */
159
+ declare const DEFAULT_AUDIO_CONFIG: Required<AudioConfig>;
160
+ /**
161
+ * Default transcription configuration values
162
+ */
163
+ declare const DEFAULT_TRANSCRIPTION_CONFIG: Partial<TranscriptionConfig>;
164
+
165
+ /**
166
+ * Possible states of a transcription session
167
+ */
168
+ declare enum SessionState {
169
+ /** Session has not started */
170
+ IDLE = "idle",
171
+ /** Session is initializing (connecting to provider, requesting mic access) */
172
+ INITIALIZING = "initializing",
173
+ /** Session is actively transcribing */
174
+ ACTIVE = "active",
175
+ /** Session is paused */
176
+ PAUSED = "paused",
177
+ /** Session is stopping */
178
+ STOPPING = "stopping",
179
+ /** Session has stopped */
180
+ STOPPED = "stopped",
181
+ /** Session encountered an error */
182
+ ERROR = "error"
183
+ }
184
+ /**
185
+ * Configuration options for a transcription session
186
+ */
187
+ interface SessionConfig {
188
+ /** Whether to record audio during the session (default: false) */
189
+ recordAudio?: boolean;
190
+ /** Maximum session duration in milliseconds */
191
+ maxDuration?: number;
192
+ /** Auto-stop after silence duration in milliseconds */
193
+ silenceTimeout?: number;
194
+ /** Enable Voice Activity Detection (default: false) */
195
+ enableVAD?: boolean;
196
+ /** VAD sensitivity threshold (0-1, default: 0.5) */
197
+ vadThreshold?: number;
198
+ }
199
+ /**
200
+ * Metadata about a transcription session
201
+ */
202
+ interface SessionMetadata {
203
+ /** Unique session identifier */
204
+ id: string;
205
+ /** Session start time (Unix timestamp) */
206
+ startTime: number;
207
+ /** Session end time (Unix timestamp) */
208
+ endTime?: number;
209
+ /** Session duration in milliseconds */
210
+ duration?: number;
211
+ /** Total word count in the session */
212
+ wordCount: number;
213
+ /** Provider used for the session */
214
+ provider: TranscriptionProvider;
215
+ }
216
+ /**
217
+ * Default session configuration values
218
+ */
219
+ declare const DEFAULT_SESSION_CONFIG: Required<SessionConfig>;
220
+ /**
221
+ * Statistics for all managed sessions
222
+ */
223
+ interface SessionStats {
224
+ /** Total number of sessions */
225
+ totalSessions: number;
226
+ /** Number of currently active sessions */
227
+ activeSessions: number;
228
+ /** Total number of transcripts across all sessions */
229
+ totalTranscripts: number;
230
+ /** Total duration across all sessions in ms */
231
+ totalDuration: number;
232
+ /** Average confidence score */
233
+ averageConfidence: number;
234
+ }
235
+ /**
236
+ * Statistics for a single session
237
+ */
238
+ interface SessionStatistics {
239
+ /** Total word count */
240
+ wordCount: number;
241
+ /** Average confidence score */
242
+ averageConfidence: number;
243
+ /** Speaking rate in words per minute */
244
+ speakingRate: number;
245
+ /** Number of silence periods */
246
+ silencePeriods: number;
247
+ /** Total duration in milliseconds */
248
+ durationMs: number;
249
+ /** Number of transcripts */
250
+ transcriptCount: number;
251
+ }
252
+ /**
253
+ * Options for merging transcripts
254
+ */
255
+ interface MergeOptions {
256
+ /** Separator between transcripts */
257
+ separator?: string;
258
+ /** Include timestamps */
259
+ includeTimestamps?: boolean;
260
+ /** Include speaker labels */
261
+ includeSpeakers?: boolean;
262
+ /** Only include final transcripts */
263
+ finalOnly?: boolean;
264
+ }
265
+ /**
266
+ * Supported export formats
267
+ */
268
+ type ExportFormat = 'json' | 'text' | 'srt' | 'vtt' | 'csv';
269
+ /**
270
+ * Result of exporting a session
271
+ */
272
+ interface ExportResult {
273
+ /** Export format used */
274
+ format: ExportFormat;
275
+ /** Exported data */
276
+ data: string | ArrayBuffer;
277
+ /** Suggested filename */
278
+ filename: string;
279
+ /** MIME type */
280
+ mimeType: string;
281
+ }
282
+ /**
283
+ * Data structure for importing a session
284
+ */
285
+ interface SessionImport {
286
+ /** Session metadata */
287
+ metadata: SessionMetadata;
288
+ /** Transcript results */
289
+ transcripts: TranscriptionResult[];
290
+ /** Session configuration */
291
+ config: SessionConfig;
292
+ }
293
+ /**
294
+ * Full session export data
295
+ */
296
+ interface SessionExportData {
297
+ /** Export version */
298
+ version: string;
299
+ /** Session data */
300
+ session: SessionImport;
301
+ /** Export timestamp */
302
+ exportedAt: number;
303
+ }
304
+ /**
305
+ * Options for text export
306
+ */
307
+ interface TextExportOptions {
308
+ /** Include timestamps */
309
+ includeTimestamps?: boolean;
310
+ /** Include speaker labels */
311
+ includeSpeakers?: boolean;
312
+ /** Include confidence scores */
313
+ includeConfidence?: boolean;
314
+ /** Add paragraph breaks */
315
+ paragraphBreaks?: boolean;
316
+ }
317
+ /**
318
+ * Options for CSV export
319
+ */
320
+ interface CSVExportOptions {
321
+ /** Column delimiter */
322
+ delimiter?: string;
323
+ /** Include header row */
324
+ includeHeaders?: boolean;
325
+ /** Columns to include */
326
+ columns?: string[];
327
+ }
328
+
329
+ /**
330
+ * Error codes for transcription errors
331
+ */
332
+ declare enum ErrorCode {
333
+ /** Failed to initialize the provider */
334
+ INITIALIZATION_FAILED = "initialization_failed",
335
+ /** Failed to connect to the transcription service */
336
+ CONNECTION_FAILED = "connection_failed",
337
+ /** API key invalid or authentication failed */
338
+ AUTHENTICATION_FAILED = "authentication_failed",
339
+ /** Microphone access was denied by the user */
340
+ MICROPHONE_ACCESS_DENIED = "microphone_access_denied",
341
+ /** Browser does not support required features */
342
+ UNSUPPORTED_BROWSER = "unsupported_browser",
343
+ /** Network error during transcription */
344
+ NETWORK_ERROR = "network_error",
345
+ /** Provider-specific error */
346
+ PROVIDER_ERROR = "provider_error",
347
+ /** Invalid configuration provided */
348
+ INVALID_CONFIG = "invalid_config",
349
+ /** Session has expired */
350
+ SESSION_EXPIRED = "session_expired",
351
+ /** Unknown error occurred */
352
+ UNKNOWN_ERROR = "unknown_error"
353
+ }
354
+ /**
355
+ * Custom error class for transcription errors
356
+ */
357
+ declare class TranscriptionError extends Error {
358
+ /** Error code identifying the type of error */
359
+ readonly code: ErrorCode;
360
+ /** Provider that generated the error */
361
+ readonly provider?: TranscriptionProvider;
362
+ /** Additional error details */
363
+ readonly details?: unknown;
364
+ constructor(message: string, code: ErrorCode, provider?: TranscriptionProvider, details?: unknown);
365
+ }
366
+ /**
367
+ * Event map for transcription events
368
+ * Used for type-safe event handling
369
+ */
370
+ interface TranscriptionEvents {
371
+ /** Emitted for any transcription result (interim or final) */
372
+ transcript: (result: TranscriptionResult) => void;
373
+ /** Emitted for interim (non-final) transcription results */
374
+ interim: (result: TranscriptionResult) => void;
375
+ /** Emitted for final transcription results */
376
+ final: (result: TranscriptionResult) => void;
377
+ /** Emitted when transcription starts */
378
+ start: () => void;
379
+ /** Emitted when transcription stops */
380
+ stop: () => void;
381
+ /** Emitted when transcription is paused */
382
+ pause: () => void;
383
+ /** Emitted when transcription is resumed */
384
+ resume: () => void;
385
+ /** Emitted when an error occurs */
386
+ error: (error: TranscriptionError) => void;
387
+ /** Emitted when session state changes */
388
+ stateChange: (state: SessionState) => void;
389
+ /** Emitted with current audio level (0-1) */
390
+ audioLevel: (level: number) => void;
391
+ /** Emitted when silence is detected */
392
+ silence: () => void;
393
+ /** Emitted when speech is detected */
394
+ speech: () => void;
395
+ /** Index signature for extensibility */
396
+ [key: string]: (...args: any[]) => void;
397
+ }
398
+ /**
399
+ * Type for event names
400
+ */
401
+ type TranscriptionEventName = keyof TranscriptionEvents;
402
+
403
+ /**
404
+ * Interface that all transcription providers must implement
405
+ */
406
+ interface ITranscriptionProvider {
407
+ /**
408
+ * Initialize the provider with configuration
409
+ * @param config - Transcription configuration
410
+ */
411
+ initialize(config: TranscriptionConfig): Promise<void>;
412
+ /**
413
+ * Start transcription
414
+ */
415
+ start(): Promise<void>;
416
+ /**
417
+ * Stop transcription
418
+ */
419
+ stop(): Promise<void>;
420
+ /**
421
+ * Pause transcription
422
+ */
423
+ pause(): void;
424
+ /**
425
+ * Resume transcription after pause
426
+ */
427
+ resume(): void;
428
+ /**
429
+ * Send audio data to the provider
430
+ * @param audioData - Raw audio data as ArrayBuffer
431
+ */
432
+ sendAudio(audioData: ArrayBuffer): void;
433
+ /**
434
+ * Get the current session state
435
+ */
436
+ getState(): SessionState;
437
+ /**
438
+ * Check if this provider is supported in the current environment
439
+ */
440
+ isSupported(): boolean;
441
+ /**
442
+ * Clean up resources and connections
443
+ */
444
+ cleanup(): Promise<void>;
445
+ }
446
+ /**
447
+ * Describes the capabilities of a transcription provider
448
+ */
449
+ interface ProviderCapabilities {
450
+ /** Whether the provider supports interim (partial) results */
451
+ supportsInterim: boolean;
452
+ /** Whether the provider provides word-level timestamps */
453
+ supportsWordTimestamps: boolean;
454
+ /** Whether the provider supports speaker diarization */
455
+ supportsSpeakerDiarization: boolean;
456
+ /** Whether the provider supports automatic punctuation */
457
+ supportsPunctuation: boolean;
458
+ /** Whether the provider supports automatic language detection */
459
+ supportsLanguageDetection: boolean;
460
+ /** List of supported language codes */
461
+ supportedLanguages: string[];
462
+ }
463
+ /**
464
+ * Base provider information
465
+ */
466
+ interface ProviderInfo {
467
+ /** Provider display name */
468
+ name: string;
469
+ /** Provider identifier */
470
+ id: string;
471
+ /** Provider capabilities */
472
+ capabilities: ProviderCapabilities;
473
+ /** Whether the provider requires an API key */
474
+ requiresApiKey: boolean;
475
+ /** Whether the provider works in browser environment */
476
+ supportsBrowser: boolean;
477
+ /** Whether the provider works in Node.js environment */
478
+ supportsNode: boolean;
479
+ }
480
+
481
+ /**
482
+ * Abstract base class for all transcription providers
483
+ * Provides common functionality including event handling, session management,
484
+ * and audio recording capabilities
485
+ */
486
+ declare abstract class BaseTranscriber extends EventEmitter<TranscriptionEvents> implements ITranscriptionProvider {
487
+ /** Transcription configuration */
488
+ protected config: TranscriptionConfig;
489
+ /** Current session state */
490
+ protected state: SessionState;
491
+ /** Session metadata */
492
+ protected sessionMetadata: SessionMetadata;
493
+ /** Recorded audio chunks */
494
+ protected audioRecording: ArrayBuffer[];
495
+ /** Session start timestamp */
496
+ protected startTime?: number;
497
+ /** Word count in current session */
498
+ protected wordCount: number;
499
+ /**
500
+ * Create a new BaseTranscriber instance
501
+ * @param config - Transcription configuration
502
+ */
503
+ constructor(config: TranscriptionConfig);
504
+ /**
505
+ * Initialize the provider with configuration
506
+ * Must be implemented by concrete providers
507
+ */
508
+ abstract initialize(): Promise<void>;
509
+ /**
510
+ * Start transcription
511
+ * Must be implemented by concrete providers
512
+ */
513
+ abstract start(): Promise<void>;
514
+ /**
515
+ * Stop transcription
516
+ * Must be implemented by concrete providers
517
+ */
518
+ abstract stop(): Promise<void>;
519
+ /**
520
+ * Pause transcription
521
+ * Must be implemented by concrete providers
522
+ */
523
+ abstract pause(): void;
524
+ /**
525
+ * Resume transcription after pause
526
+ * Must be implemented by concrete providers
527
+ */
528
+ abstract resume(): void;
529
+ /**
530
+ * Send audio data to the provider
531
+ * Must be implemented by concrete providers
532
+ * @param audioData - Raw audio data
533
+ */
534
+ abstract sendAudio(audioData: ArrayBuffer): void;
535
+ /**
536
+ * Check if provider is supported in current environment
537
+ * Must be implemented by concrete providers
538
+ */
539
+ abstract isSupported(): boolean;
540
+ /**
541
+ * Clean up resources and connections
542
+ * Must be implemented by concrete providers
543
+ */
544
+ abstract cleanup(): Promise<void>;
545
+ /**
546
+ * Get the current session state
547
+ */
548
+ getState(): SessionState;
549
+ /**
550
+ * Get session metadata
551
+ */
552
+ getMetadata(): SessionMetadata;
553
+ /**
554
+ * Get recorded audio data
555
+ * @returns Combined audio data or null if not recording
556
+ */
557
+ getRecording(): ArrayBuffer | null;
558
+ /**
559
+ * Update session state and emit state change event
560
+ * @param newState - New session state
561
+ */
562
+ protected setState(newState: SessionState): void;
563
+ /**
564
+ * Handle incoming transcription result
565
+ * @param result - Transcription result from provider
566
+ */
567
+ protected handleTranscript(result: TranscriptionResult): void;
568
+ /**
569
+ * Handle errors and emit error event
570
+ * @param error - Error to handle
571
+ */
572
+ protected handleError(error: Error | TranscriptionError): void;
573
+ /**
574
+ * Validate configuration
575
+ * @throws TranscriptionError if configuration is invalid
576
+ */
577
+ protected validateConfig(): void;
578
+ /**
579
+ * Record audio data if recording is enabled
580
+ * @param data - Audio data to record
581
+ */
582
+ protected recordAudioData(data: ArrayBuffer): void;
583
+ /**
584
+ * Calculate session duration
585
+ * @returns Duration in milliseconds
586
+ */
587
+ protected calculateDuration(): number;
588
+ /**
589
+ * Clear recording data
590
+ */
591
+ protected clearRecording(): void;
592
+ /**
593
+ * Reset session state for new session
594
+ */
595
+ protected resetSession(): void;
596
+ /**
597
+ * Generate a unique session ID
598
+ */
599
+ private generateSessionId;
600
+ /**
601
+ * Initialize session metadata
602
+ */
603
+ private initializeMetadata;
604
+ }
605
+
606
+ /**
607
+ * Session export data structure
608
+ */
609
+ interface SessionExport {
610
+ /** Session metadata */
611
+ metadata: SessionMetadata;
612
+ /** All transcription results */
613
+ transcripts: TranscriptionResult[];
614
+ /** Concatenated final text */
615
+ fullText: string;
616
+ /** Recorded audio data if available */
617
+ audioData?: ArrayBuffer;
618
+ }
619
+ /**
620
+ * Manages a transcription session with support for recording and transcript management
621
+ */
622
+ declare class TranscriptionSession {
623
+ /** Unique session identifier */
624
+ readonly id: string;
625
+ /** Transcription provider instance */
626
+ readonly provider: ITranscriptionProvider;
627
+ /** Session configuration */
628
+ private config;
629
+ /** Collected transcription results */
630
+ private transcripts;
631
+ /** Current session state */
632
+ private state;
633
+ /** Max duration timer */
634
+ private maxDurationTimer?;
635
+ /** Silence timeout timer */
636
+ private silenceTimer?;
637
+ /** Session start timestamp */
638
+ private startTime?;
639
+ /**
640
+ * Create a new TranscriptionSession
641
+ * @param provider - Transcription provider to use
642
+ * @param sessionConfig - Session configuration options
643
+ */
644
+ constructor(provider: ITranscriptionProvider, sessionConfig?: SessionConfig);
645
+ /**
646
+ * Start the transcription session
647
+ */
648
+ start(): Promise<void>;
649
+ /**
650
+ * Stop the transcription session
651
+ */
652
+ stop(): Promise<void>;
653
+ /**
654
+ * Pause the transcription session
655
+ */
656
+ pause(): void;
657
+ /**
658
+ * Resume the transcription session
659
+ */
660
+ resume(): void;
661
+ /**
662
+ * Add a transcription result to the session
663
+ * @param result - Transcription result to add
664
+ */
665
+ addTranscript(result: TranscriptionResult): void;
666
+ /**
667
+ * Get transcription results
668
+ * @param finalOnly - If true, return only final results
669
+ */
670
+ getTranscripts(finalOnly?: boolean): TranscriptionResult[];
671
+ /**
672
+ * Get concatenated text from all final transcripts
673
+ */
674
+ getFullText(): string;
675
+ /**
676
+ * Get the current session state
677
+ */
678
+ getState(): SessionState;
679
+ /**
680
+ * Export session data in raw format
681
+ */
682
+ exportRaw(): SessionExport;
683
+ /**
684
+ * Export session data in specified format
685
+ * @param format - Export format (json, text, srt, vtt, csv)
686
+ */
687
+ export(format?: ExportFormat): ExportResult;
688
+ /**
689
+ * Get session statistics
690
+ */
691
+ getStatistics(): SessionStatistics;
692
+ /**
693
+ * Clear all transcripts
694
+ */
695
+ clear(): void;
696
+ /**
697
+ * Get the total word count from final transcripts
698
+ */
699
+ private getWordCount;
700
+ /**
701
+ * Set up session timers (max duration, silence timeout)
702
+ */
703
+ private setupTimers;
704
+ /**
705
+ * Reset the silence timeout timer
706
+ */
707
+ private resetSilenceTimer;
708
+ /**
709
+ * Clear all timers
710
+ */
711
+ private clearTimers;
712
+ /**
713
+ * Generate a unique session ID
714
+ */
715
+ private generateSessionId;
716
+ }
717
+
718
+ /**
719
+ * Session manager options
720
+ */
721
+ interface SessionManagerOptions {
722
+ /** Default session configuration */
723
+ defaultConfig?: SessionConfig;
724
+ /** Maximum number of concurrent sessions */
725
+ maxSessions?: number;
726
+ }
727
+ /**
728
+ * Manages multiple transcription sessions
729
+ */
730
+ declare class SessionManager {
731
+ /** Active sessions map */
732
+ private sessions;
733
+ /** Currently active session ID */
734
+ private activeSessionId;
735
+ /** Default session configuration */
736
+ private defaultConfig;
737
+ /** Maximum number of sessions */
738
+ private maxSessions;
739
+ /**
740
+ * Create a new SessionManager
741
+ * @param options - Manager configuration
742
+ */
743
+ constructor(options?: SessionManagerOptions);
744
+ /**
745
+ * Create a new transcription session
746
+ * @param provider - Transcription provider instance
747
+ * @param config - Session configuration (merged with defaults)
748
+ * @returns New TranscriptionSession instance
749
+ */
750
+ createSession(provider: ITranscriptionProvider, config?: SessionConfig): TranscriptionSession;
751
+ /**
752
+ * Get session by ID
753
+ * @param sessionId - Session ID
754
+ * @returns TranscriptionSession or null
755
+ */
756
+ getSession(sessionId: string): TranscriptionSession | null;
757
+ /**
758
+ * Get the currently active session
759
+ * @returns Active TranscriptionSession or null
760
+ */
761
+ getActiveSession(): TranscriptionSession | null;
762
+ /**
763
+ * Set the active session
764
+ * @param sessionId - Session ID to make active
765
+ */
766
+ setActiveSession(sessionId: string): void;
767
+ /**
768
+ * Get all sessions
769
+ * @returns Array of all sessions
770
+ */
771
+ getAllSessions(): TranscriptionSession[];
772
+ /**
773
+ * Delete a session
774
+ * @param sessionId - Session ID to delete
775
+ */
776
+ deleteSession(sessionId: string): Promise<void>;
777
+ /**
778
+ * Clear all sessions
779
+ */
780
+ clearAllSessions(): Promise<void>;
781
+ /**
782
+ * Get statistics for all sessions
783
+ * @returns Session statistics
784
+ */
785
+ getSessionStats(): SessionStats;
786
+ /**
787
+ * Export a session to specified format
788
+ * @param sessionId - Session ID
789
+ * @param format - Export format
790
+ * @returns Export result
791
+ */
792
+ exportSession(sessionId: string, format: ExportFormat): ExportResult;
793
+ /**
794
+ * Import a session from data
795
+ * @param data - Session import data
796
+ * @param provider - Provider instance for the session
797
+ * @returns Imported session
798
+ */
799
+ importSession(data: SessionImport, provider: ITranscriptionProvider): TranscriptionSession;
800
+ /**
801
+ * Check if a session exists
802
+ * @param sessionId - Session ID
803
+ * @returns True if session exists
804
+ */
805
+ hasSession(sessionId: string): boolean;
806
+ /**
807
+ * Get session count
808
+ * @returns Number of sessions
809
+ */
810
+ getSessionCount(): number;
811
+ /**
812
+ * Get sessions by state
813
+ * @param state - Session state to filter by
814
+ * @returns Array of sessions with matching state
815
+ */
816
+ getSessionsByState(state: SessionState): TranscriptionSession[];
817
+ }
818
+
819
+ /**
820
+ * Web Speech API type declarations
821
+ */
822
+ declare global {
823
+ interface Window {
824
+ SpeechRecognition: typeof SpeechRecognition;
825
+ webkitSpeechRecognition: typeof SpeechRecognition;
826
+ }
827
+ }
828
+ /**
829
+ * SpeechRecognition interfaces for TypeScript
830
+ */
831
+ interface SpeechRecognitionEvent extends Event {
832
+ resultIndex: number;
833
+ results: SpeechRecognitionResultList;
834
+ }
835
+ interface SpeechRecognitionResultList {
836
+ length: number;
837
+ item(index: number): SpeechRecognitionResult;
838
+ [index: number]: SpeechRecognitionResult;
839
+ }
840
+ interface SpeechRecognitionResult {
841
+ length: number;
842
+ item(index: number): SpeechRecognitionAlternative;
843
+ [index: number]: SpeechRecognitionAlternative;
844
+ isFinal: boolean;
845
+ }
846
+ interface SpeechRecognitionAlternative {
847
+ transcript: string;
848
+ confidence: number;
849
+ }
850
+ interface SpeechRecognitionErrorEvent extends Event {
851
+ error: string;
852
+ message: string;
853
+ }
854
+ interface SpeechRecognition extends EventTarget {
855
+ continuous: boolean;
856
+ interimResults: boolean;
857
+ lang: string;
858
+ maxAlternatives: number;
859
+ grammars: unknown;
860
+ onstart: ((this: SpeechRecognition, ev: Event) => void) | null;
861
+ onend: ((this: SpeechRecognition, ev: Event) => void) | null;
862
+ onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => void) | null;
863
+ onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => void) | null;
864
+ onspeechstart: ((this: SpeechRecognition, ev: Event) => void) | null;
865
+ onspeechend: ((this: SpeechRecognition, ev: Event) => void) | null;
866
+ onaudiostart: ((this: SpeechRecognition, ev: Event) => void) | null;
867
+ onaudioend: ((this: SpeechRecognition, ev: Event) => void) | null;
868
+ start(): void;
869
+ stop(): void;
870
+ abort(): void;
871
+ }
872
+ declare let SpeechRecognition: {
873
+ prototype: SpeechRecognition;
874
+ new (): SpeechRecognition;
875
+ };
876
+ /**
877
+ * Transcription provider using browser's native Web Speech API
878
+ * Works in Chrome, Edge, and Safari without requiring API keys
879
+ */
880
+ declare class WebSpeechProvider extends BaseTranscriber {
881
+ /** Speech recognition instance */
882
+ private recognition;
883
+ /** Media stream from microphone */
884
+ private mediaStream;
885
+ /** Audio context for analysis */
886
+ private audioContext;
887
+ /** Audio analyser for VAD */
888
+ private analyser;
889
+ /** Script processor for audio level monitoring */
890
+ private audioLevelInterval;
891
+ /** Whether recognition is being restarted automatically */
892
+ private isRestarting;
893
+ /** Retry count for auto-restart */
894
+ private retryCount;
895
+ /** Maximum retry attempts */
896
+ private readonly maxRetries;
897
+ /** Provider capabilities */
898
+ static readonly capabilities: ProviderCapabilities;
899
+ /**
900
+ * Create a new WebSpeechProvider
901
+ * @param config - Transcription configuration
902
+ */
903
+ constructor(config: Omit<TranscriptionConfig, 'provider'> & {
904
+ provider?: TranscriptionProvider;
905
+ });
906
+ /**
907
+ * Check if Web Speech API is supported in the current environment
908
+ */
909
+ isSupported(): boolean;
910
+ /**
911
+ * Initialize the Web Speech API provider
912
+ */
913
+ initialize(): Promise<void>;
914
+ /**
915
+ * Start transcription
916
+ */
917
+ start(): Promise<void>;
918
+ /**
919
+ * Stop transcription
920
+ */
921
+ stop(): Promise<void>;
922
+ /**
923
+ * Pause transcription
924
+ */
925
+ pause(): void;
926
+ /**
927
+ * Resume transcription
928
+ */
929
+ resume(): void;
930
+ /**
931
+ * Send audio data - not supported by Web Speech API
932
+ * @param _audioData - Audio data (unused)
933
+ */
934
+ sendAudio(_audioData: ArrayBuffer): void;
935
+ /**
936
+ * Clean up all resources
937
+ */
938
+ cleanup(): Promise<void>;
939
+ /**
940
+ * Get provider capabilities
941
+ */
942
+ getCapabilities(): ProviderCapabilities;
943
+ /**
944
+ * Set up event handlers for speech recognition
945
+ */
946
+ private setupEventHandlers;
947
+ /**
948
+ * Process speech recognition results
949
+ */
950
+ private processRecognitionResult;
951
+ /**
952
+ * Handle recognition end event
953
+ */
954
+ private handleRecognitionEnd;
955
+ /**
956
+ * Handle recognition errors
957
+ */
958
+ private handleRecognitionError;
959
+ /**
960
+ * Request microphone access
961
+ */
962
+ private getMicrophoneAccess;
963
+ /**
964
+ * Stop media stream tracks
965
+ */
966
+ private stopMediaStream;
967
+ /**
968
+ * Set up audio level monitoring for VAD
969
+ */
970
+ private setupAudioLevelMonitoring;
971
+ /**
972
+ * Stop audio level monitoring
973
+ */
974
+ private stopAudioLevelMonitoring;
975
+ }
976
+
977
+ /**
978
+ * Deepgram-specific configuration options
979
+ */
980
+ interface DeepgramOptions {
981
+ /** Deepgram model to use */
982
+ model?: 'nova-2' | 'nova' | 'enhanced' | 'base';
983
+ /** Model tier */
984
+ tier?: 'nova' | 'enhanced' | 'base';
985
+ /** Model version */
986
+ version?: 'latest' | string;
987
+ /** Enable punctuation */
988
+ punctuate?: boolean;
989
+ /** Enable speaker diarization */
990
+ diarize?: boolean;
991
+ /** Enable multichannel processing */
992
+ multichannel?: boolean;
993
+ /** Number of alternative transcripts */
994
+ alternatives?: number;
995
+ /** Convert numbers to numerals */
996
+ numerals?: boolean;
997
+ /** Search terms to boost */
998
+ search?: string[];
999
+ /** Words to replace */
1000
+ replace?: string[];
1001
+ /** Keywords to boost */
1002
+ keywords?: string[];
1003
+ /** Endpointing timeout in milliseconds */
1004
+ endpointing?: number;
1005
+ /** Enable smart formatting */
1006
+ smartFormat?: boolean;
1007
+ }
1008
+ /**
1009
+ * Transcription provider using Deepgram's WebSocket streaming API
1010
+ * Provides high-accuracy transcription with word-level timestamps
1011
+ */
1012
+ declare class DeepgramProvider extends BaseTranscriber {
1013
+ /** WebSocket connection */
1014
+ private socket;
1015
+ /** Media stream from microphone */
1016
+ private mediaStream;
1017
+ /** Audio context for processing */
1018
+ private audioContext;
1019
+ /** Audio processor node */
1020
+ private processor;
1021
+ /** Connection attempt counter */
1022
+ private connectionAttempts;
1023
+ /** Maximum reconnection attempts */
1024
+ private readonly maxRetries;
1025
+ /** Reconnection timeout */
1026
+ private reconnectTimeout;
1027
+ /** Keep-alive interval */
1028
+ private keepAliveInterval;
1029
+ /** Flag indicating if connection is ready */
1030
+ private isConnectionReady;
1031
+ /** Flag for intentional close */
1032
+ private isIntentionalClose;
1033
+ /** Provider capabilities */
1034
+ static readonly capabilities: ProviderCapabilities;
1035
+ /**
1036
+ * Create a new DeepgramProvider
1037
+ * @param config - Transcription configuration with API key
1038
+ */
1039
+ constructor(config: Omit<TranscriptionConfig, 'provider'> & {
1040
+ provider?: TranscriptionProvider;
1041
+ });
1042
+ /**
1043
+ * Check if Deepgram provider is supported
1044
+ */
1045
+ isSupported(): boolean;
1046
+ /**
1047
+ * Initialize the Deepgram provider
1048
+ */
1049
+ initialize(): Promise<void>;
1050
+ /**
1051
+ * Start transcription
1052
+ */
1053
+ start(): Promise<void>;
1054
+ /**
1055
+ * Stop transcription
1056
+ */
1057
+ stop(): Promise<void>;
1058
+ /**
1059
+ * Pause transcription
1060
+ */
1061
+ pause(): void;
1062
+ /**
1063
+ * Resume transcription
1064
+ */
1065
+ resume(): void;
1066
+ /**
1067
+ * Send audio data through WebSocket
1068
+ * @param audioData - Raw audio data as ArrayBuffer
1069
+ */
1070
+ sendAudio(audioData: ArrayBuffer): void;
1071
+ /**
1072
+ * Clean up all resources
1073
+ */
1074
+ cleanup(): Promise<void>;
1075
+ /**
1076
+ * Get provider capabilities
1077
+ */
1078
+ getCapabilities(): ProviderCapabilities;
1079
+ /**
1080
+ * Build WebSocket URL with query parameters
1081
+ */
1082
+ private buildWebSocketUrl;
1083
+ /**
1084
+ * Set up WebSocket connection
1085
+ */
1086
+ private setupWebSocket;
1087
+ /**
1088
+ * Handle WebSocket open event
1089
+ */
1090
+ private handleWebSocketOpen;
1091
+ /**
1092
+ * Handle incoming WebSocket messages
1093
+ */
1094
+ private handleWebSocketMessage;
1095
+ /**
1096
+ * Process transcription result from Deepgram
1097
+ */
1098
+ private processTranscriptionResult;
1099
+ /**
1100
+ * Handle Deepgram-specific errors
1101
+ */
1102
+ private handleDeepgramError;
1103
+ /**
1104
+ * Handle WebSocket error
1105
+ */
1106
+ private handleWebSocketError;
1107
+ /**
1108
+ * Handle WebSocket close
1109
+ */
1110
+ private handleWebSocketClose;
1111
+ /**
1112
+ * Attempt to reconnect
1113
+ */
1114
+ private reconnect;
1115
+ /**
1116
+ * Start keep-alive interval
1117
+ */
1118
+ private startKeepAlive;
1119
+ /**
1120
+ * Stop keep-alive interval
1121
+ */
1122
+ private stopKeepAlive;
1123
+ /**
1124
+ * Request microphone access
1125
+ */
1126
+ private getMicrophoneAccess;
1127
+ /**
1128
+ * Set up audio processing pipeline
1129
+ */
1130
+ private setupAudioProcessing;
1131
+ /**
1132
+ * Stop audio processing
1133
+ */
1134
+ private stopAudioProcessing;
1135
+ /**
1136
+ * Convert Float32 audio samples to Int16
1137
+ */
1138
+ private convertFloat32ToInt16;
1139
+ /**
1140
+ * Close WebSocket connection
1141
+ */
1142
+ private closeWebSocket;
1143
+ /**
1144
+ * Stop media stream tracks
1145
+ */
1146
+ private stopMediaStream;
1147
+ }
1148
+
1149
+ /**
1150
+ * AssemblyAI-specific configuration options
1151
+ */
1152
+ interface AssemblyAIOptions {
1153
+ /** Keywords to boost recognition */
1154
+ wordBoost?: string[];
1155
+ /** Boost parameter strength */
1156
+ boostParam?: 'low' | 'default' | 'high';
1157
+ /** Disable partial (interim) transcripts */
1158
+ disablePartialTranscripts?: boolean;
1159
+ /** Auto-format text */
1160
+ formatText?: boolean;
1161
+ /** Enable punctuation */
1162
+ punctuate?: boolean;
1163
+ /** Include disfluencies (um, uh) */
1164
+ disfluencies?: boolean;
1165
+ /** Enable multichannel processing */
1166
+ multichannel?: boolean;
1167
+ /** Enable dual channel processing */
1168
+ dualChannel?: boolean;
1169
+ /** Enable speaker labels */
1170
+ speakerLabels?: boolean;
1171
+ /** Expected number of speakers */
1172
+ speakersExpected?: number;
1173
+ /** Enable entity detection */
1174
+ entityDetection?: boolean;
1175
+ /** Enable sentiment analysis */
1176
+ sentimentAnalysis?: boolean;
1177
+ /** Enable auto highlights */
1178
+ autoHighlights?: boolean;
1179
+ /** Enable content safety detection */
1180
+ contentSafety?: boolean;
1181
+ }
1182
+ /**
1183
+ * Transcription provider using AssemblyAI's real-time WebSocket API
1184
+ * Provides high-accuracy transcription with advanced features
1185
+ */
1186
+ declare class AssemblyAIProvider extends BaseTranscriber {
1187
+ /** WebSocket connection */
1188
+ private socket;
1189
+ /** Media stream from microphone */
1190
+ private mediaStream;
1191
+ /** Audio context for processing */
1192
+ private audioContext;
1193
+ /** Audio processor node */
1194
+ private processor;
1195
+ /** Session token for WebSocket authentication */
1196
+ private sessionToken;
1197
+ /** Connection attempt counter */
1198
+ private connectionAttempts;
1199
+ /** Maximum reconnection attempts */
1200
+ private readonly maxRetries;
1201
+ /** Reconnection timeout */
1202
+ private reconnectTimeout;
1203
+ /** Flag indicating if connection is ready */
1204
+ private isConnectionReady;
1205
+ /** Flag for intentional close */
1206
+ private isIntentionalClose;
1207
+ /** Session ID from AssemblyAI */
1208
+ private sessionId;
1209
+ /** Provider capabilities */
1210
+ static readonly capabilities: ProviderCapabilities;
1211
+ /**
1212
+ * Create a new AssemblyAIProvider
1213
+ * @param config - Transcription configuration with API key
1214
+ */
1215
+ constructor(config: Omit<TranscriptionConfig, 'provider'> & {
1216
+ provider?: TranscriptionProvider;
1217
+ });
1218
+ /**
1219
+ * Check if AssemblyAI provider is supported
1220
+ */
1221
+ isSupported(): boolean;
1222
+ /**
1223
+ * Initialize the AssemblyAI provider
1224
+ */
1225
+ initialize(): Promise<void>;
1226
+ /**
1227
+ * Start transcription
1228
+ */
1229
+ start(): Promise<void>;
1230
+ /**
1231
+ * Stop transcription
1232
+ */
1233
+ stop(): Promise<void>;
1234
+ /**
1235
+ * Pause transcription
1236
+ */
1237
+ pause(): void;
1238
+ /**
1239
+ * Resume transcription
1240
+ */
1241
+ resume(): void;
1242
+ /**
1243
+ * Send audio data through WebSocket
1244
+ * @param audioData - Raw audio data as ArrayBuffer
1245
+ */
1246
+ sendAudio(audioData: ArrayBuffer): void;
1247
+ /**
1248
+ * Clean up all resources
1249
+ */
1250
+ cleanup(): Promise<void>;
1251
+ /**
1252
+ * Get provider capabilities
1253
+ */
1254
+ getCapabilities(): ProviderCapabilities;
1255
+ /**
1256
+ * Get temporary session token from AssemblyAI
1257
+ */
1258
+ private getSessionToken;
1259
+ /**
1260
+ * Set up WebSocket connection
1261
+ */
1262
+ private setupWebSocket;
1263
+ /**
1264
+ * Handle WebSocket open event
1265
+ */
1266
+ private handleWebSocketOpen;
1267
+ /**
1268
+ * Handle incoming WebSocket messages
1269
+ */
1270
+ private handleWebSocketMessage;
1271
+ /**
1272
+ * Handle SessionBegins message
1273
+ */
1274
+ private handleSessionBegins;
1275
+ /**
1276
+ * Handle partial (interim) transcript
1277
+ */
1278
+ private handlePartialTranscript;
1279
+ /**
1280
+ * Handle final transcript
1281
+ */
1282
+ private handleFinalTranscript;
1283
+ /**
1284
+ * Handle session terminated message
1285
+ */
1286
+ private handleSessionTerminated;
1287
+ /**
1288
+ * Handle AssemblyAI-specific errors
1289
+ */
1290
+ private handleAssemblyAIError;
1291
+ /**
1292
+ * Handle WebSocket error
1293
+ */
1294
+ private handleWebSocketError;
1295
+ /**
1296
+ * Handle WebSocket close
1297
+ */
1298
+ private handleWebSocketClose;
1299
+ /**
1300
+ * Attempt to reconnect
1301
+ */
1302
+ private reconnect;
1303
+ /**
1304
+ * Request microphone access
1305
+ */
1306
+ private getMicrophoneAccess;
1307
+ /**
1308
+ * Set up audio processing pipeline
1309
+ */
1310
+ private setupAudioProcessing;
1311
+ /**
1312
+ * Stop audio processing
1313
+ */
1314
+ private stopAudioProcessing;
1315
+ /**
1316
+ * Convert Float32 audio samples to PCM16 (Int16)
1317
+ */
1318
+ private convertFloat32ToPCM16;
1319
+ /**
1320
+ * Encode ArrayBuffer to base64
1321
+ */
1322
+ private encodeAudioToBase64;
1323
+ /**
1324
+ * Send audio data message
1325
+ */
1326
+ private sendAudioMessage;
1327
+ /**
1328
+ * Send terminate session message
1329
+ */
1330
+ private sendTerminateMessage;
1331
+ /**
1332
+ * Close WebSocket connection
1333
+ */
1334
+ private closeWebSocket;
1335
+ /**
1336
+ * Stop media stream tracks
1337
+ */
1338
+ private stopMediaStream;
1339
+ }
1340
+
1341
+ /**
1342
+ * Static utility class for audio processing operations
1343
+ * Provides conversion, resampling, and format utilities
1344
+ */
1345
+ declare class AudioProcessor {
1346
+ /**
1347
+ * Convert Float32 audio samples to Int16
1348
+ * @param buffer - Input Float32Array
1349
+ * @returns Int16Array of converted samples
1350
+ */
1351
+ static convertFloat32ToInt16(buffer: Float32Array): Int16Array;
1352
+ /**
1353
+ * Convert Int16 audio samples to Float32
1354
+ * @param buffer - Input Int16Array
1355
+ * @returns Float32Array of converted samples
1356
+ */
1357
+ static convertInt16ToFloat32(buffer: Int16Array): Float32Array;
1358
+ /**
1359
+ * Resample audio buffer to different sample rate
1360
+ * Uses linear interpolation
1361
+ * @param buffer - Input audio buffer
1362
+ * @param fromRate - Source sample rate
1363
+ * @param toRate - Target sample rate
1364
+ * @returns Resampled Float32Array
1365
+ */
1366
+ static resampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
1367
+ /**
1368
+ * Downsample audio buffer (optimized for reducing sample rate)
1369
+ * @param buffer - Input audio buffer
1370
+ * @param fromRate - Source sample rate
1371
+ * @param toRate - Target sample rate
1372
+ * @returns Downsampled Float32Array
1373
+ */
1374
+ static downsampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
1375
+ /**
1376
+ * Upsample audio buffer (optimized for increasing sample rate)
1377
+ * @param buffer - Input audio buffer
1378
+ * @param fromRate - Source sample rate
1379
+ * @param toRate - Target sample rate
1380
+ * @returns Upsampled Float32Array
1381
+ */
1382
+ static upsampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
1383
+ /**
1384
+ * Normalize audio buffer to peak amplitude of 1.0
1385
+ * @param buffer - Input audio buffer
1386
+ * @returns Normalized Float32Array
1387
+ */
1388
+ static normalizeBuffer(buffer: Float32Array): Float32Array;
1389
+ /**
1390
+ * Apply gain to audio buffer
1391
+ * @param buffer - Input audio buffer
1392
+ * @param gain - Gain multiplier
1393
+ * @returns Processed Float32Array
1394
+ */
1395
+ static applyGain(buffer: Float32Array, gain: number): Float32Array;
1396
+ /**
1397
+ * Mix two audio buffers together
1398
+ * @param buffer1 - First audio buffer
1399
+ * @param buffer2 - Second audio buffer
1400
+ * @param ratio - Mix ratio (0-1, where 0.5 is equal mix)
1401
+ * @returns Mixed Float32Array
1402
+ */
1403
+ static mixBuffers(buffer1: Float32Array, buffer2: Float32Array, ratio?: number): Float32Array;
1404
+ /**
1405
+ * Convert AudioBuffer to WAV format
1406
+ * @param audioBuffer - Web Audio API AudioBuffer
1407
+ * @param sampleRate - Output sample rate (defaults to buffer's sample rate)
1408
+ * @returns WAV file as ArrayBuffer
1409
+ */
1410
+ static bufferToWav(audioBuffer: AudioBuffer, sampleRate?: number): ArrayBuffer;
1411
+ /**
1412
+ * Create WAV file header
1413
+ * @param dataLength - Length of audio data in bytes
1414
+ * @param sampleRate - Sample rate
1415
+ * @param channels - Number of channels
1416
+ * @param bitDepth - Bits per sample
1417
+ * @returns WAV header as ArrayBuffer
1418
+ */
1419
+ static createWavHeader(dataLength: number, sampleRate: number, channels: number, bitDepth: number): ArrayBuffer;
1420
+ /**
1421
+ * Write string to DataView
1422
+ * @param view - DataView to write to
1423
+ * @param offset - Byte offset
1424
+ * @param string - String to write
1425
+ */
1426
+ private static writeString;
1427
+ /**
1428
+ * Convert raw PCM Float32 array to WAV ArrayBuffer
1429
+ * @param samples - Float32Array of audio samples
1430
+ * @param sampleRate - Sample rate
1431
+ * @returns WAV file as ArrayBuffer
1432
+ */
1433
+ static float32ToWav(samples: Float32Array, sampleRate: number): ArrayBuffer;
1434
+ }
1435
+
1436
+ /**
1437
+ * Voice Activity Detection options
1438
+ */
1439
+ interface VADOptions {
1440
+ /** Energy threshold for speech detection (0-1, default: 0.01) */
1441
+ threshold?: number;
1442
+ /** Minimum duration of speech in ms to trigger start (default: 300) */
1443
+ minSpeechDuration?: number;
1444
+ /** Minimum duration of silence in ms to trigger end (default: 500) */
1445
+ minSilenceDuration?: number;
1446
+ /** Size of energy history buffer (default: 10) */
1447
+ historySize?: number;
1448
+ /** Callback when speech starts */
1449
+ onSpeechStart?: () => void;
1450
+ /** Callback when speech ends */
1451
+ onSpeechEnd?: () => void;
1452
+ /** Callback for volume level changes */
1453
+ onVolumeChange?: (level: number) => void;
1454
+ }
1455
+ /**
1456
+ * Voice Activity Detector using energy-based detection
1457
+ * Detects speech and silence in audio streams
1458
+ */
1459
+ declare class VoiceActivityDetector {
1460
+ /** Energy threshold for speech detection */
1461
+ private threshold;
1462
+ /** Minimum speech duration in ms */
1463
+ private minSpeechDuration;
1464
+ /** Minimum silence duration in ms */
1465
+ private minSilenceDuration;
1466
+ /** Energy history buffer */
1467
+ private energyHistory;
1468
+ /** History buffer size */
1469
+ private historySize;
1470
+ /** Current speaking state */
1471
+ private isSpeaking;
1472
+ /** Speech start time */
1473
+ private speechStartTime;
1474
+ /** Silence start time */
1475
+ private silenceStartTime;
1476
+ /** Speech start callback */
1477
+ private onSpeechStart?;
1478
+ /** Speech end callback */
1479
+ private onSpeechEnd?;
1480
+ /** Volume change callback */
1481
+ private onVolumeChange?;
1482
+ /** Last processed timestamp */
1483
+ private lastProcessTime;
1484
+ /**
1485
+ * Create a new VoiceActivityDetector
1486
+ * @param options - VAD configuration options
1487
+ */
1488
+ constructor(options?: VADOptions);
1489
+ /**
1490
+ * Process audio data and detect voice activity
1491
+ * @param audioData - Audio samples as Float32Array
1492
+ * @returns Current speaking state
1493
+ */
1494
+ processAudio(audioData: Float32Array): boolean;
1495
+ /**
1496
+ * Calculate RMS (Root Mean Square) energy of audio buffer
1497
+ * @param buffer - Audio samples
1498
+ * @returns RMS energy value (0-1)
1499
+ */
1500
+ calculateRMSEnergy(buffer: Float32Array): number;
1501
+ /**
1502
+ * Calculate adaptive threshold based on energy history
1503
+ * @returns Adaptive threshold value
1504
+ */
1505
+ calculateAdaptiveThreshold(): number;
1506
+ /**
1507
+ * Reset detector state
1508
+ */
1509
+ reset(): void;
1510
+ /**
1511
+ * Update threshold value
1512
+ * @param threshold - New threshold (0-1)
1513
+ */
1514
+ setThreshold(threshold: number): void;
1515
+ /**
1516
+ * Get average energy from history
1517
+ * @returns Average energy value
1518
+ */
1519
+ getAverageEnergy(): number;
1520
+ /**
1521
+ * Check if speech is currently detected
1522
+ * @returns Speaking state
1523
+ */
1524
+ isSpeechDetected(): boolean;
1525
+ /**
1526
+ * Get current threshold
1527
+ * @returns Threshold value
1528
+ */
1529
+ getThreshold(): number;
1530
+ /**
1531
+ * Update callbacks
1532
+ * @param callbacks - New callback functions
1533
+ */
1534
+ setCallbacks(callbacks: {
1535
+ onSpeechStart?: () => void;
1536
+ onSpeechEnd?: () => void;
1537
+ onVolumeChange?: (level: number) => void;
1538
+ }): void;
1539
+ }
1540
+
1541
+ /**
1542
+ * Audio level monitor options
1543
+ */
1544
+ interface AudioLevelMonitorOptions {
1545
+ /** Smoothing factor for level calculation (0-1, default: 0.8) */
1546
+ smoothingFactor?: number;
1547
+ /** Callback for level changes */
1548
+ onLevelChange?: (level: number) => void;
1549
+ }
1550
+ /**
1551
+ * Monitors and reports audio levels with smoothing
1552
+ */
1553
+ declare class AudioLevelMonitor {
1554
+ /** Smoothing factor (0-1, higher = smoother) */
1555
+ private smoothingFactor;
1556
+ /** Current smoothed level */
1557
+ private currentLevel;
1558
+ /** Peak level since last reset */
1559
+ private peakLevel;
1560
+ /** Level change callback */
1561
+ private onLevelChange?;
1562
+ /**
1563
+ * Create a new AudioLevelMonitor
1564
+ * @param options - Monitor configuration
1565
+ */
1566
+ constructor(options?: AudioLevelMonitorOptions);
1567
+ /**
1568
+ * Process audio data and update levels
1569
+ * @param audioData - Audio samples as Float32Array
1570
+ * @returns Current smoothed level
1571
+ */
1572
+ processAudio(audioData: Float32Array): number;
1573
+ /**
1574
+ * Calculate RMS level of audio buffer
1575
+ * @param buffer - Audio samples
1576
+ * @returns Level value (0-1)
1577
+ */
1578
+ calculateLevel(buffer: Float32Array): number;
1579
+ /**
1580
+ * Get current smoothed level
1581
+ * @returns Current level (0-1)
1582
+ */
1583
+ getCurrentLevel(): number;
1584
+ /**
1585
+ * Get peak level since last reset
1586
+ * @returns Peak level (0-1)
1587
+ */
1588
+ getPeakLevel(): number;
1589
+ /**
1590
+ * Reset current and peak levels
1591
+ */
1592
+ reset(): void;
1593
+ /**
1594
+ * Reset only the peak level
1595
+ */
1596
+ resetPeak(): void;
1597
+ /**
1598
+ * Convert current level to decibels
1599
+ * @returns Level in dB (typically -60 to 0)
1600
+ */
1601
+ getDecibels(): number;
1602
+ /**
1603
+ * Convert specific level to decibels
1604
+ * @param level - Level value (0-1)
1605
+ * @returns Level in dB
1606
+ */
1607
+ static toDecibels(level: number): number;
1608
+ /**
1609
+ * Convert decibels to linear level
1610
+ * @param db - Level in decibels
1611
+ * @returns Linear level (0-1)
1612
+ */
1613
+ static fromDecibels(db: number): number;
1614
+ /**
1615
+ * Set smoothing factor
1616
+ * @param factor - Smoothing factor (0-1)
1617
+ */
1618
+ setSmoothingFactor(factor: number): void;
1619
+ /**
1620
+ * Get current smoothing factor
1621
+ * @returns Smoothing factor
1622
+ */
1623
+ getSmoothingFactor(): number;
1624
+ /**
1625
+ * Set level change callback
1626
+ * @param callback - Callback function
1627
+ */
1628
+ setOnLevelChange(callback: ((level: number) => void) | undefined): void;
1629
+ /**
1630
+ * Get level as percentage (0-100)
1631
+ * @returns Level percentage
1632
+ */
1633
+ getLevelPercentage(): number;
1634
+ }
1635
+
1636
+ /**
1637
+ * Circular buffer manager for audio chunks
1638
+ * Efficiently manages audio data for streaming applications
1639
+ */
1640
+ declare class AudioBufferManager {
1641
+ /** Maximum number of chunks to store */
1642
+ private bufferSize;
1643
+ /** Buffer storage */
1644
+ private buffer;
1645
+ /** Write position */
1646
+ private writeIndex;
1647
+ /** Read position */
1648
+ private readIndex;
1649
+ /** Number of available chunks */
1650
+ private count;
1651
+ /**
1652
+ * Create a new AudioBufferManager
1653
+ * @param bufferSize - Maximum number of chunks to store
1654
+ */
1655
+ constructor(bufferSize?: number);
1656
+ /**
1657
+ * Write a chunk to the buffer
1658
+ * @param chunk - Audio data chunk
1659
+ */
1660
+ write(chunk: Float32Array): void;
1661
+ /**
1662
+ * Read and remove chunks from the buffer
1663
+ * @param numChunks - Number of chunks to read (default: all available)
1664
+ * @returns Array of audio chunks
1665
+ */
1666
+ read(numChunks?: number): Float32Array[];
1667
+ /**
1668
+ * Read chunks without removing them
1669
+ * @param numChunks - Number of chunks to peek (default: all available)
1670
+ * @returns Array of audio chunks
1671
+ */
1672
+ peek(numChunks?: number): Float32Array[];
1673
+ /**
1674
+ * Clear all data from buffer
1675
+ */
1676
+ clear(): void;
1677
+ /**
1678
+ * Get number of available chunks
1679
+ * @returns Number of chunks in buffer
1680
+ */
1681
+ getAvailableChunks(): number;
1682
+ /**
1683
+ * Check if buffer is full
1684
+ * @returns True if buffer is full
1685
+ */
1686
+ isFull(): boolean;
1687
+ /**
1688
+ * Check if buffer is empty
1689
+ * @returns True if buffer is empty
1690
+ */
1691
+ isEmpty(): boolean;
1692
+ /**
1693
+ * Concatenate multiple chunks into a single buffer
1694
+ * @param chunks - Array of audio chunks
1695
+ * @returns Single concatenated Float32Array
1696
+ */
1697
+ concatenateChunks(chunks: Float32Array[]): Float32Array;
1698
+ /**
1699
+ * Get all data as a single concatenated buffer
1700
+ * @returns Concatenated Float32Array
1701
+ */
1702
+ getAll(): Float32Array;
1703
+ /**
1704
+ * Get total number of samples across all chunks
1705
+ * @returns Total sample count
1706
+ */
1707
+ getTotalSamples(): number;
1708
+ /**
1709
+ * Get buffer capacity
1710
+ * @returns Maximum number of chunks
1711
+ */
1712
+ getCapacity(): number;
1713
+ /**
1714
+ * Resize the buffer
1715
+ * @param newSize - New buffer size
1716
+ */
1717
+ resize(newSize: number): void;
1718
+ }
1719
+
1720
+ /**
1721
+ * Audio recording format
1722
+ */
1723
+ type AudioFormat = 'raw' | 'wav';
1724
+ /**
1725
+ * Records audio chunks and exports to various formats
1726
+ */
1727
+ declare class AudioRecorder {
1728
+ /** Recorded audio chunks */
1729
+ private audioChunks;
1730
+ /** Sample rate */
1731
+ private sampleRate;
1732
+ /** Recording state */
1733
+ private isRecording;
1734
+ /** Recording start time */
1735
+ private startTime;
1736
+ /**
1737
+ * Create a new AudioRecorder
1738
+ * @param sampleRate - Sample rate for recording
1739
+ */
1740
+ constructor(sampleRate?: number);
1741
+ /**
1742
+ * Start recording
1743
+ */
1744
+ start(): void;
1745
+ /**
1746
+ * Record an audio chunk
1747
+ * @param audioData - Audio data to record
1748
+ */
1749
+ recordChunk(audioData: Float32Array): void;
1750
+ /**
1751
+ * Stop recording and return all recorded audio
1752
+ * @returns Complete audio as Float32Array
1753
+ */
1754
+ stop(): Float32Array;
1755
+ /**
1756
+ * Clear all recorded audio
1757
+ */
1758
+ clear(): void;
1759
+ /**
1760
+ * Export recording to specified format
1761
+ * @param format - Output format ('raw' or 'wav')
1762
+ * @returns Audio data as ArrayBuffer
1763
+ */
1764
+ export(format?: AudioFormat): ArrayBuffer;
1765
+ /**
1766
+ * Get recording duration in seconds
1767
+ * @returns Duration in seconds
1768
+ */
1769
+ getDuration(): number;
1770
+ /**
1771
+ * Get number of recorded chunks
1772
+ * @returns Chunk count
1773
+ */
1774
+ getChunkCount(): number;
1775
+ /**
1776
+ * Check if currently recording
1777
+ * @returns Recording state
1778
+ */
1779
+ getIsRecording(): boolean;
1780
+ /**
1781
+ * Get sample rate
1782
+ * @returns Sample rate
1783
+ */
1784
+ getSampleRate(): number;
1785
+ /**
1786
+ * Set sample rate (only effective before recording starts)
1787
+ * @param sampleRate - New sample rate
1788
+ */
1789
+ setSampleRate(sampleRate: number): void;
1790
+ /**
1791
+ * Get total number of recorded samples
1792
+ * @returns Sample count
1793
+ */
1794
+ getTotalSamples(): number;
1795
+ /**
1796
+ * Get recording start time
1797
+ * @returns Start timestamp or null
1798
+ */
1799
+ getStartTime(): number | null;
1800
+ /**
1801
+ * Get elapsed recording time in milliseconds
1802
+ * @returns Elapsed time in ms
1803
+ */
1804
+ getElapsedTime(): number;
1805
+ /**
1806
+ * Get combined audio data
1807
+ * @returns Concatenated Float32Array
1808
+ */
1809
+ private getCombinedAudio;
1810
+ /**
1811
+ * Create a Blob from the recording
1812
+ * @param format - Output format
1813
+ * @returns Blob with audio data
1814
+ */
1815
+ toBlob(format?: AudioFormat): Blob;
1816
+ /**
1817
+ * Create a data URL from the recording
1818
+ * @param format - Output format
1819
+ * @returns Data URL string
1820
+ */
1821
+ toDataURL(format?: AudioFormat): string;
1822
+ /**
1823
+ * Download the recording
1824
+ * @param filename - Output filename
1825
+ * @param format - Output format
1826
+ */
1827
+ download(filename?: string, format?: AudioFormat): void;
1828
+ }
1829
+
1830
+ /**
1831
+ * Interface for storage adapters
1832
+ * Provides abstraction for different storage backends
1833
+ */
1834
+ interface StorageAdapter {
1835
+ /**
1836
+ * Save data to storage
1837
+ * @param key - Storage key
1838
+ * @param data - Data to store
1839
+ */
1840
+ save(key: string, data: unknown): Promise<void>;
1841
+ /**
1842
+ * Load data from storage
1843
+ * @param key - Storage key
1844
+ * @returns Stored data or null if not found
1845
+ */
1846
+ load(key: string): Promise<unknown | null>;
1847
+ /**
1848
+ * Delete data from storage
1849
+ * @param key - Storage key
1850
+ */
1851
+ delete(key: string): Promise<void>;
1852
+ /**
1853
+ * List all keys in storage
1854
+ * @returns Array of keys
1855
+ */
1856
+ list(): Promise<string[]>;
1857
+ /**
1858
+ * Check if key exists in storage
1859
+ * @param key - Storage key
1860
+ * @returns True if key exists
1861
+ */
1862
+ exists(key: string): Promise<boolean>;
1863
+ }
1864
+ /**
1865
+ * Storage adapter using browser localStorage
1866
+ * Suitable for small data (<5MB)
1867
+ */
1868
+ declare class LocalStorageAdapter implements StorageAdapter {
1869
+ /** Prefix for all keys */
1870
+ private prefix;
1871
+ /**
1872
+ * Create a new LocalStorageAdapter
1873
+ * @param prefix - Key prefix (default: 'live-transcribe')
1874
+ */
1875
+ constructor(prefix?: string);
1876
+ /**
1877
+ * Get prefixed key
1878
+ */
1879
+ private getKey;
1880
+ save(key: string, data: unknown): Promise<void>;
1881
+ load(key: string): Promise<unknown | null>;
1882
+ delete(key: string): Promise<void>;
1883
+ list(): Promise<string[]>;
1884
+ exists(key: string): Promise<boolean>;
1885
+ }
1886
+ /**
1887
+ * In-memory storage adapter
1888
+ * Useful for testing or temporary storage
1889
+ */
1890
+ declare class MemoryStorageAdapter implements StorageAdapter {
1891
+ private storage;
1892
+ save(key: string, data: unknown): Promise<void>;
1893
+ load(key: string): Promise<unknown | null>;
1894
+ delete(key: string): Promise<void>;
1895
+ list(): Promise<string[]>;
1896
+ exists(key: string): Promise<boolean>;
1897
+ /**
1898
+ * Clear all data
1899
+ */
1900
+ clear(): void;
1901
+ /**
1902
+ * Get storage size
1903
+ */
1904
+ size(): number;
1905
+ }
1906
+
1907
+ /**
1908
+ * Session data for JSON export
1909
+ */
1910
+ interface SessionData$4 {
1911
+ metadata: SessionMetadata;
1912
+ transcripts: TranscriptionResult[];
1913
+ }
1914
+ /**
1915
+ * Exports transcription sessions to JSON format
1916
+ */
1917
+ declare class JSONExporter {
1918
+ /**
1919
+ * Export session to minified JSON string
1920
+ * @param session - Session data to export
1921
+ * @returns JSON string
1922
+ */
1923
+ static export(session: SessionData$4): string;
1924
+ /**
1925
+ * Export session to formatted/pretty JSON string
1926
+ * @param session - Session data to export
1927
+ * @param indent - Indentation spaces (default: 2)
1928
+ * @returns Formatted JSON string
1929
+ */
1930
+ static exportPretty(session: SessionData$4, indent?: number): string;
1931
+ /**
1932
+ * Parse JSON and validate structure
1933
+ * @param json - JSON string to parse
1934
+ * @returns Parsed session export data
1935
+ */
1936
+ static parse(json: string): SessionExportData;
1937
+ }
1938
+
1939
+ /**
1940
+ * Session data for text export
1941
+ */
1942
+ interface SessionData$3 {
1943
+ transcripts: TranscriptionResult[];
1944
+ }
1945
+ /**
1946
+ * Exports transcription sessions to plain text format
1947
+ */
1948
+ declare class TextExporter {
1949
+ /**
1950
+ * Export session transcripts to plain text
1951
+ * @param session - Session data to export
1952
+ * @param options - Export options
1953
+ * @returns Plain text string
1954
+ */
1955
+ static export(session: SessionData$3, options?: TextExportOptions): string;
1956
+ /**
1957
+ * Export as continuous text without any formatting
1958
+ * @param session - Session data to export
1959
+ * @returns Plain text string
1960
+ */
1961
+ static exportPlain(session: SessionData$3): string;
1962
+ }
1963
+
1964
+ /**
1965
+ * Session data for SRT export
1966
+ */
1967
+ interface SessionData$2 {
1968
+ transcripts: TranscriptionResult[];
1969
+ }
1970
+ /**
1971
+ * Exports transcription sessions to SRT (SubRip) subtitle format
1972
+ */
1973
+ declare class SRTExporter {
1974
+ /**
1975
+ * Export session transcripts to SRT format
1976
+ * @param session - Session data to export
1977
+ * @returns SRT formatted string
1978
+ */
1979
+ static export(session: SessionData$2): string;
1980
+ /**
1981
+ * Format milliseconds to SRT timestamp format (HH:MM:SS,mmm)
1982
+ * @param ms - Time in milliseconds
1983
+ * @returns Formatted timestamp
1984
+ */
1985
+ private static formatTime;
1986
+ /**
1987
+ * Pad number with leading zeros
1988
+ * @param num - Number to pad
1989
+ * @param length - Target length
1990
+ * @returns Padded string
1991
+ */
1992
+ private static pad;
1993
+ }
1994
+
1995
+ /**
1996
+ * Session data for VTT export
1997
+ */
1998
+ interface SessionData$1 {
1999
+ transcripts: TranscriptionResult[];
2000
+ }
2001
+ /**
2002
+ * Exports transcription sessions to WebVTT subtitle format
2003
+ */
2004
+ declare class VTTExporter {
2005
+ /**
2006
+ * Export session transcripts to WebVTT format
2007
+ * @param session - Session data to export
2008
+ * @returns WebVTT formatted string
2009
+ */
2010
+ static export(session: SessionData$1): string;
2011
+ /**
2012
+ * Export with cue identifiers
2013
+ * @param session - Session data to export
2014
+ * @param cuePrefix - Prefix for cue identifiers
2015
+ * @returns WebVTT formatted string with cue IDs
2016
+ */
2017
+ static exportWithCues(session: SessionData$1, cuePrefix?: string): string;
2018
+ /**
2019
+ * Format milliseconds to WebVTT timestamp format (HH:MM:SS.mmm)
2020
+ * @param ms - Time in milliseconds
2021
+ * @returns Formatted timestamp
2022
+ */
2023
+ private static formatTime;
2024
+ /**
2025
+ * Pad number with leading zeros
2026
+ */
2027
+ private static pad;
2028
+ }
2029
+
2030
+ /**
2031
+ * Session data for CSV export
2032
+ */
2033
+ interface SessionData {
2034
+ transcripts: TranscriptionResult[];
2035
+ }
2036
+ /**
2037
+ * Exports transcription sessions to CSV format
2038
+ */
2039
+ declare class CSVExporter {
2040
+ /**
2041
+ * Export session transcripts to CSV format
2042
+ * @param session - Session data to export
2043
+ * @param options - Export options
2044
+ * @returns CSV formatted string
2045
+ */
2046
+ static export(session: SessionData, options?: CSVExportOptions): string;
2047
+ /**
2048
+ * Export only final transcripts
2049
+ * @param session - Session data to export
2050
+ * @param options - Export options
2051
+ * @returns CSV formatted string
2052
+ */
2053
+ static exportFinalOnly(session: SessionData, options?: CSVExportOptions): string;
2054
+ /**
2055
+ * Get field value from transcript
2056
+ * @param transcript - Transcription result
2057
+ * @param field - Field name
2058
+ * @returns Field value
2059
+ */
2060
+ private static getFieldValue;
2061
+ /**
2062
+ * Escape field for CSV format
2063
+ * @param field - Field value
2064
+ * @param delimiter - CSV delimiter
2065
+ * @returns Escaped field
2066
+ */
2067
+ private static escapeField;
2068
+ }
2069
+
2070
+ /**
2071
+ * Validation error details
2072
+ */
2073
+ interface ValidationError {
2074
+ /** Field that failed validation */
2075
+ field: string;
2076
+ /** Error message */
2077
+ message: string;
2078
+ /** Error code */
2079
+ code: string;
2080
+ }
2081
+ /**
2082
+ * Validation warning details
2083
+ */
2084
+ interface ValidationWarning {
2085
+ /** Field with warning */
2086
+ field: string;
2087
+ /** Warning message */
2088
+ message: string;
2089
+ }
2090
+ /**
2091
+ * Result of validation
2092
+ */
2093
+ interface ValidationResult {
2094
+ /** Whether validation passed */
2095
+ valid: boolean;
2096
+ /** List of errors */
2097
+ errors: ValidationError[];
2098
+ /** List of warnings */
2099
+ warnings?: ValidationWarning[];
2100
+ }
2101
+ /**
2102
+ * Validate transcription configuration
2103
+ * @param config - Configuration to validate
2104
+ * @returns Validation result
2105
+ */
2106
+ declare function validateTranscriptionConfig(config: TranscriptionConfig): ValidationResult;
2107
+ /**
2108
+ * Validate audio configuration
2109
+ * @param config - Audio config to validate
2110
+ * @returns Validation result
2111
+ */
2112
+ declare function validateAudioConfig(config: AudioConfig): ValidationResult;
2113
+ /**
2114
+ * Validate session configuration
2115
+ * @param config - Session config to validate
2116
+ * @returns Validation result
2117
+ */
2118
+ declare function validateSessionConfig(config: SessionConfig): ValidationResult;
2119
+ /**
2120
+ * Validate BCP-47 language code
2121
+ * @param code - Language code to validate
2122
+ * @returns True if valid
2123
+ */
2124
+ declare function validateLanguageCode(code: string): boolean;
2125
+ /**
2126
+ * Validate API key format for a provider
2127
+ * @param provider - Transcription provider
2128
+ * @param key - API key to validate
2129
+ * @returns Validation result
2130
+ */
2131
+ declare function validateApiKey(provider: TranscriptionProvider, key?: string): ValidationResult;
2132
+
2133
+ /**
2134
+ * Browser information
2135
+ */
2136
+ interface BrowserInfo {
2137
+ /** Browser name */
2138
+ name: string;
2139
+ /** Browser version */
2140
+ version: string;
2141
+ /** Operating system */
2142
+ os: string;
2143
+ /** Whether device is mobile */
2144
+ isMobile: boolean;
2145
+ }
2146
+ /**
2147
+ * Support check result
2148
+ */
2149
+ interface SupportCheck {
2150
+ /** Whether feature is supported */
2151
+ supported: boolean;
2152
+ /** Details about support status */
2153
+ details: string;
2154
+ /** Fallback recommendation if not supported */
2155
+ fallback?: string;
2156
+ }
2157
+ /**
2158
+ * Full compatibility report
2159
+ */
2160
+ interface CompatibilityReport {
2161
+ /** Browser information */
2162
+ browser: BrowserInfo;
2163
+ /** Web Speech API support */
2164
+ webSpeechAPI: SupportCheck;
2165
+ /** WebSocket support */
2166
+ webSocket: SupportCheck;
2167
+ /** Media devices support */
2168
+ mediaDevices: SupportCheck;
2169
+ /** Audio context support */
2170
+ audioContext: SupportCheck;
2171
+ /** Overall compatibility */
2172
+ overallCompatible: boolean;
2173
+ /** Recommendations for improvement */
2174
+ recommendations: string[];
2175
+ }
2176
+ /**
2177
+ * Get browser information
2178
+ * @returns Browser info object
2179
+ */
2180
+ declare function getBrowserInfo(): BrowserInfo;
2181
+ /**
2182
+ * Check Web Speech API support
2183
+ * @returns Support check result
2184
+ */
2185
+ declare function checkWebSpeechAPISupport(): SupportCheck;
2186
+ /**
2187
+ * Check WebSocket support
2188
+ * @returns Support check result
2189
+ */
2190
+ declare function checkWebSocketSupport(): SupportCheck;
2191
+ /**
2192
+ * Check media devices support
2193
+ * @returns Support check result
2194
+ */
2195
+ declare function checkMediaDevicesSupport(): SupportCheck;
2196
+ /**
2197
+ * Check AudioContext support
2198
+ * @returns Support check result
2199
+ */
2200
+ declare function checkAudioContextSupport(): SupportCheck;
2201
+ /**
2202
+ * Get comprehensive compatibility report
2203
+ * @returns Full compatibility report
2204
+ */
2205
+ declare function getFullCompatibilityReport(): CompatibilityReport;
2206
+
2207
+ /**
2208
+ * Retry options
2209
+ */
2210
+ interface RetryOptions {
2211
+ /** Maximum number of attempts (default: 3) */
2212
+ maxAttempts?: number;
2213
+ /** Initial delay in ms (default: 1000) */
2214
+ delay?: number;
2215
+ /** Backoff strategy (default: 'exponential') */
2216
+ backoff?: 'linear' | 'exponential';
2217
+ /** Maximum delay in ms (default: 30000) */
2218
+ maxDelay?: number;
2219
+ /** Function to determine if should retry */
2220
+ shouldRetry?: (error: unknown) => boolean;
2221
+ }
2222
+ /**
2223
+ * Create a debounced function
2224
+ * @param func - Function to debounce
2225
+ * @param wait - Wait time in ms
2226
+ * @returns Debounced function
2227
+ */
2228
+ declare function debounce<T extends (...args: unknown[]) => unknown>(func: T, wait: number): (...args: Parameters<T>) => void;
2229
+ /**
2230
+ * Create a throttled function
2231
+ * @param func - Function to throttle
2232
+ * @param limit - Minimum interval in ms
2233
+ * @returns Throttled function
2234
+ */
2235
+ declare function throttle<T extends (...args: unknown[]) => unknown>(func: T, limit: number): (...args: Parameters<T>) => void;
2236
+ /**
2237
+ * Async sleep utility
2238
+ * @param ms - Milliseconds to sleep
2239
+ * @returns Promise that resolves after delay
2240
+ */
2241
+ declare function sleep(ms: number): Promise<void>;
2242
+ /**
2243
+ * Add timeout to a promise
2244
+ * @param promise - Promise to wrap
2245
+ * @param ms - Timeout in milliseconds
2246
+ * @param message - Error message on timeout
2247
+ * @returns Promise that rejects if timeout exceeded
2248
+ */
2249
+ declare function timeout<T>(promise: Promise<T>, ms: number, message?: string): Promise<T>;
2250
+ /**
2251
+ * Retry a function with backoff
2252
+ * @param fn - Async function to retry
2253
+ * @param options - Retry options
2254
+ * @returns Promise with result
2255
+ */
2256
+ declare function retry<T>(fn: () => Promise<T>, options?: RetryOptions): Promise<T>;
2257
+ /**
2258
+ * Create a cancellable timeout
2259
+ * @param ms - Timeout in milliseconds
2260
+ * @returns Object with promise and cancel function
2261
+ */
2262
+ declare function cancellableTimeout(ms: number): {
2263
+ promise: Promise<void>;
2264
+ cancel: () => void;
2265
+ };
2266
+ /**
2267
+ * Execute function at regular intervals
2268
+ * @param fn - Function to execute
2269
+ * @param interval - Interval in ms
2270
+ * @param immediate - Execute immediately on start
2271
+ * @returns Function to stop the interval
2272
+ */
2273
+ declare function setIntervalAsync(fn: () => Promise<void>, interval: number, immediate?: boolean): () => void;
2274
+
2275
+ /**
2276
+ * Timestamp format options
2277
+ */
2278
+ type TimestampFormat = 'srt' | 'vtt' | 'readable' | 'iso' | 'ms';
2279
+ /**
2280
+ * Display options for transcript formatting
2281
+ */
2282
+ interface DisplayOptions {
2283
+ /** Show timestamps */
2284
+ showTimestamps?: boolean;
2285
+ /** Show confidence scores */
2286
+ showConfidence?: boolean;
2287
+ /** Highlight interim results */
2288
+ highlightInterim?: boolean;
2289
+ /** Maximum text length */
2290
+ maxLength?: number;
2291
+ }
2292
+ /**
2293
+ * Format duration in milliseconds to readable string
2294
+ * @param ms - Duration in milliseconds
2295
+ * @returns Formatted string (e.g., "2m 30s")
2296
+ */
2297
+ declare function formatDuration(ms: number): string;
2298
+ /**
2299
+ * Format timestamp to specified format
2300
+ * @param ms - Time in milliseconds
2301
+ * @param format - Output format
2302
+ * @returns Formatted timestamp
2303
+ */
2304
+ declare function formatTimestamp(ms: number, format?: TimestampFormat): string;
2305
+ /**
2306
+ * Format confidence score as percentage
2307
+ * @param confidence - Confidence value (0-1)
2308
+ * @returns Formatted percentage string
2309
+ */
2310
+ declare function formatConfidence(confidence: number): string;
2311
+ /**
2312
+ * Format file size in bytes to human-readable string
2313
+ * @param bytes - Size in bytes
2314
+ * @returns Formatted size string
2315
+ */
2316
+ declare function formatFileSize(bytes: number): string;
2317
+ /**
2318
+ * Format transcripts for display
2319
+ * @param results - Transcription results
2320
+ * @param options - Display options
2321
+ * @returns Formatted string
2322
+ */
2323
+ declare function formatTranscriptForDisplay(results: TranscriptionResult[], options?: DisplayOptions): string;
2324
+ /**
2325
+ * Format transcript as plain text
2326
+ * @param results - Transcription results
2327
+ * @param finalOnly - Only include final results
2328
+ * @returns Plain text string
2329
+ */
2330
+ declare function formatAsPlainText(results: TranscriptionResult[], finalOnly?: boolean): string;
2331
+ /**
2332
+ * Format number with thousand separators
2333
+ * @param num - Number to format
2334
+ * @returns Formatted string
2335
+ */
2336
+ declare function formatNumber(num: number): string;
2337
+ /**
2338
+ * Truncate text with ellipsis
2339
+ * @param text - Text to truncate
2340
+ * @param maxLength - Maximum length
2341
+ * @returns Truncated text
2342
+ */
2343
+ declare function truncateText(text: string, maxLength: number): string;
2344
+
2345
+ /**
2346
+ * Language information
2347
+ */
2348
+ interface LanguageInfo {
2349
+ /** Language code (e.g., 'en-US') */
2350
+ code: string;
2351
+ /** English name */
2352
+ name: string;
2353
+ /** Native name */
2354
+ nativeName: string;
2355
+ /** Provider that supports this language */
2356
+ provider: TranscriptionProvider;
2357
+ }
2358
+ /**
2359
+ * Get supported languages for a provider
2360
+ * @param provider - Transcription provider
2361
+ * @returns Array of language info
2362
+ */
2363
+ declare function getSupportedLanguages(provider: TranscriptionProvider): LanguageInfo[];
2364
+ /**
2365
+ * Normalize language code to standard format
2366
+ * @param code - Language code to normalize
2367
+ * @returns Normalized code
2368
+ */
2369
+ declare function normalizeLanguageCode(code: string): string;
2370
+ /**
2371
+ * Get human-readable language name
2372
+ * @param code - Language code
2373
+ * @returns Language name or code if unknown
2374
+ */
2375
+ declare function getLanguageName(code: string): string;
2376
+ /**
2377
+ * Get native language name
2378
+ * @param code - Language code
2379
+ * @returns Native name or code if unknown
2380
+ */
2381
+ declare function getNativeLanguageName(code: string): string;
2382
+ /**
2383
+ * Detect browser language
2384
+ * @returns Detected language code
2385
+ */
2386
+ declare function detectBrowserLanguage(): string;
2387
+ /**
2388
+ * Check if language is supported by provider
2389
+ * @param code - Language code
2390
+ * @param provider - Transcription provider
2391
+ * @returns True if supported
2392
+ */
2393
+ declare function isLanguageSupported(code: string, provider: TranscriptionProvider): boolean;
2394
+ /**
2395
+ * Get best matching language for provider
2396
+ * @param code - Preferred language code
2397
+ * @param provider - Transcription provider
2398
+ * @returns Best matching language code or default
2399
+ */
2400
+ declare function getBestMatchingLanguage(code: string, provider: TranscriptionProvider): string;
2401
+
2402
+ /**
2403
+ * Generate a unique ID
2404
+ * @param prefix - Optional prefix
2405
+ * @returns Unique ID string
2406
+ */
2407
+ declare function generateId(prefix?: string): string;
2408
+ /**
2409
+ * Deep clone an object
2410
+ * @param obj - Object to clone
2411
+ * @returns Cloned object
2412
+ */
2413
+ declare function deepClone<T>(obj: T): T;
2414
+ /**
2415
+ * Deep merge multiple objects
2416
+ * @param objects - Objects to merge
2417
+ * @returns Merged object
2418
+ */
2419
+ declare function mergeDeep<T extends Record<string, unknown>>(...objects: Partial<T>[]): T;
2420
+ /**
2421
+ * Check if value is a function
2422
+ * @param value - Value to check
2423
+ * @returns True if function
2424
+ */
2425
+ declare function isFunction(value: unknown): value is (...args: unknown[]) => unknown;
2426
+ /**
2427
+ * Check if value is a plain object
2428
+ * @param value - Value to check
2429
+ * @returns True if object
2430
+ */
2431
+ declare function isObject(value: unknown): value is Record<string, unknown>;
2432
+ /**
2433
+ * Check if value is empty
2434
+ * @param value - Value to check
2435
+ * @returns True if empty
2436
+ */
2437
+ declare function isEmpty(value: unknown): boolean;
2438
+ /**
2439
+ * Pick specific properties from object
2440
+ * @param obj - Source object
2441
+ * @param keys - Keys to pick
2442
+ * @returns New object with selected keys
2443
+ */
2444
+ declare function pick<T extends Record<string, unknown>, K extends keyof T>(obj: T, keys: K[]): Pick<T, K>;
2445
+ /**
2446
+ * Omit specific properties from object
2447
+ * @param obj - Source object
2448
+ * @param keys - Keys to omit
2449
+ * @returns New object without selected keys
2450
+ */
2451
+ declare function omit<T extends Record<string, unknown>, K extends keyof T>(obj: T, keys: K[]): Omit<T, K>;
2452
+ /**
2453
+ * Create a promise that resolves after a condition is met
2454
+ * @param condition - Function that returns true when condition is met
2455
+ * @param interval - Check interval in ms
2456
+ * @param timeout - Maximum wait time in ms
2457
+ * @returns Promise that resolves when condition is met
2458
+ */
2459
+ declare function waitFor(condition: () => boolean, interval?: number, timeout?: number): Promise<void>;
2460
+ /**
2461
+ * Group array items by key
2462
+ * @param array - Array to group
2463
+ * @param keyFn - Function to get key from item
2464
+ * @returns Grouped object
2465
+ */
2466
+ declare function groupBy<T, K extends string | number>(array: T[], keyFn: (item: T) => K): Record<K, T[]>;
2467
+ /**
2468
+ * Ensure value is within range
2469
+ * @param value - Value to clamp
2470
+ * @param min - Minimum value
2471
+ * @param max - Maximum value
2472
+ * @returns Clamped value
2473
+ */
2474
+ declare function clamp(value: number, min: number, max: number): number;
2475
+ /**
2476
+ * Round number to specified decimal places
2477
+ * @param value - Number to round
2478
+ * @param decimals - Number of decimal places
2479
+ * @returns Rounded number
2480
+ */
2481
+ declare function round(value: number, decimals?: number): number;
2482
+
2483
+ /**
2484
+ * Audio format information
2485
+ */
2486
+ interface AudioFormatInfo {
2487
+ /** Detected format */
2488
+ format: string;
2489
+ /** Sample rate in Hz */
2490
+ sampleRate?: number;
2491
+ /** Number of channels */
2492
+ channels?: number;
2493
+ /** Bit depth */
2494
+ bitDepth?: number;
2495
+ /** Duration in seconds */
2496
+ duration?: number;
2497
+ }
2498
+ /**
2499
+ * Calculate audio bitrate
2500
+ * @param sampleRate - Sample rate in Hz
2501
+ * @param bitDepth - Bits per sample
2502
+ * @param channels - Number of channels
2503
+ * @returns Bitrate in bits per second
2504
+ */
2505
+ declare function calculateBitrate(sampleRate: number, bitDepth: number, channels: number): number;
2506
+ /**
2507
+ * Estimate audio file size
2508
+ * @param durationMs - Duration in milliseconds
2509
+ * @param config - Audio configuration
2510
+ * @returns Estimated size in bytes
2511
+ */
2512
+ declare function estimateAudioSize(durationMs: number, config: AudioConfig): number;
2513
+ /**
2514
+ * Get optimal buffer size for sample rate
2515
+ * @param sampleRate - Sample rate in Hz
2516
+ * @returns Optimal buffer size (power of 2)
2517
+ */
2518
+ declare function getOptimalBufferSize(sampleRate: number): number;
2519
+ /**
2520
+ * Validate and detect audio format from ArrayBuffer
2521
+ * @param data - Audio data
2522
+ * @returns Audio format information
2523
+ */
2524
+ declare function validateAudioFormat(data: ArrayBuffer): AudioFormatInfo;
2525
+ /**
2526
+ * Calculate RMS (Root Mean Square) of audio buffer
2527
+ * @param buffer - Audio samples
2528
+ * @returns RMS value (0-1)
2529
+ */
2530
+ declare function calculateRMS(buffer: Float32Array): number;
2531
+ /**
2532
+ * Calculate peak amplitude of audio buffer
2533
+ * @param buffer - Audio samples
2534
+ * @returns Peak value (0-1)
2535
+ */
2536
+ declare function calculatePeak(buffer: Float32Array): number;
2537
+ /**
2538
+ * Convert decibels to linear amplitude
2539
+ * @param db - Value in decibels
2540
+ * @returns Linear amplitude
2541
+ */
2542
+ declare function dbToLinear(db: number): number;
2543
+ /**
2544
+ * Convert linear amplitude to decibels
2545
+ * @param linear - Linear amplitude
2546
+ * @returns Value in decibels
2547
+ */
2548
+ declare function linearToDb(linear: number): number;
2549
+ /**
2550
+ * Check if audio data contains silence
2551
+ * @param buffer - Audio samples
2552
+ * @param threshold - Silence threshold (default: 0.001)
2553
+ * @returns True if mostly silence
2554
+ */
2555
+ declare function isSilence(buffer: Float32Array, threshold?: number): boolean;
2556
+ /**
2557
+ * Get audio constraints for getUserMedia
2558
+ * @param config - Audio configuration
2559
+ * @returns MediaStreamConstraints for audio
2560
+ */
2561
+ declare function getAudioConstraints(config?: AudioConfig): MediaTrackConstraints;
2562
+ /**
2563
+ * Calculate audio duration from samples
2564
+ * @param samples - Number of samples
2565
+ * @param sampleRate - Sample rate in Hz
2566
+ * @returns Duration in milliseconds
2567
+ */
2568
+ declare function samplesToDuration(samples: number, sampleRate: number): number;
2569
+ /**
2570
+ * Calculate samples from duration
2571
+ * @param durationMs - Duration in milliseconds
2572
+ * @param sampleRate - Sample rate in Hz
2573
+ * @returns Number of samples
2574
+ */
2575
+ declare function durationToSamples(durationMs: number, sampleRate: number): number;
2576
+
2577
+ /**
2578
+ * Live Transcribe - Professional live speech transcription library
2579
+ * @module live-transcribe
2580
+ */
2581
+
2582
+ /**
2583
+ * Library version
2584
+ */
2585
+ declare const VERSION = "0.1.0";
2586
+ /**
2587
+ * Library name
2588
+ */
2589
+ declare const LIBRARY_NAME = "live-transcribe";
2590
+ /**
2591
+ * Quick start options for simplified initialization
2592
+ */
2593
+ interface QuickStartOptions {
2594
+ /** Transcription provider (default: auto-detect) */
2595
+ provider?: TranscriptionProvider;
2596
+ /** API key for cloud providers */
2597
+ apiKey?: string;
2598
+ /** Language code (default: 'en-US') */
2599
+ language?: string;
2600
+ /** Callback for transcript events */
2601
+ onTranscript?: (result: TranscriptionResult) => void;
2602
+ /** Callback for error events */
2603
+ onError?: (error: TranscriptionError) => void;
2604
+ /** Callback when transcription starts */
2605
+ onStart?: () => void;
2606
+ /** Callback when transcription stops */
2607
+ onStop?: () => void;
2608
+ /** Enable interim results (default: true) */
2609
+ interimResults?: boolean;
2610
+ /** Enable audio recording */
2611
+ recordAudio?: boolean;
2612
+ }
2613
+ /**
2614
+ * Create a transcription provider instance
2615
+ * @param config - Transcription configuration
2616
+ * @returns Configured provider instance
2617
+ * @example
2618
+ * ```typescript
2619
+ * const provider = createTranscriber({
2620
+ * provider: TranscriptionProvider.WebSpeechAPI,
2621
+ * language: 'en-US'
2622
+ * });
2623
+ * ```
2624
+ */
2625
+ declare function createTranscriber(config: TranscriptionConfig): ITranscriptionProvider;
2626
+ /**
2627
+ * Create a transcription session with a configured provider
2628
+ * @param config - Transcription configuration
2629
+ * @param sessionConfig - Optional session configuration
2630
+ * @returns Configured TranscriptionSession
2631
+ * @example
2632
+ * ```typescript
2633
+ * const session = createSession({
2634
+ * provider: TranscriptionProvider.Deepgram,
2635
+ * apiKey: 'your-api-key',
2636
+ * language: 'en-US'
2637
+ * }, {
2638
+ * recordAudio: true
2639
+ * });
2640
+ * ```
2641
+ */
2642
+ declare function createSession(config: TranscriptionConfig, sessionConfig?: SessionConfig): TranscriptionSession;
2643
+ /**
2644
+ * Quick start transcription with minimal configuration
2645
+ * Auto-detects the best available provider and handles initialization
2646
+ * @param options - Quick start options
2647
+ * @returns Ready-to-use TranscriptionSession
2648
+ * @example
2649
+ * ```typescript
2650
+ * const session = await quickStart({
2651
+ * language: 'en-US',
2652
+ * onTranscript: (result) => console.log(result.text),
2653
+ * onError: (error) => console.error(error)
2654
+ * });
2655
+ *
2656
+ * // Session is already started and transcribing
2657
+ * // Stop when done:
2658
+ * await session.stop();
2659
+ * ```
2660
+ */
2661
+ declare function quickStart(options?: QuickStartOptions): Promise<TranscriptionSession>;
2662
+ /**
2663
+ * Check if a specific provider is supported in the current environment
2664
+ * @param provider - Provider to check
2665
+ * @returns True if provider is supported
2666
+ */
2667
+ declare function isProviderSupported(provider: TranscriptionProvider): boolean;
2668
+ /**
2669
+ * Get list of supported providers in current environment
2670
+ * @returns Array of supported providers
2671
+ */
2672
+ declare function getSupportedProviders(): TranscriptionProvider[];
2673
+ declare const _default: {
2674
+ VERSION: string;
2675
+ LIBRARY_NAME: string;
2676
+ createTranscriber: typeof createTranscriber;
2677
+ createSession: typeof createSession;
2678
+ quickStart: typeof quickStart;
2679
+ isProviderSupported: typeof isProviderSupported;
2680
+ getSupportedProviders: typeof getSupportedProviders;
2681
+ TranscriptionProvider: typeof TranscriptionProvider;
2682
+ WebSpeechProvider: typeof WebSpeechProvider;
2683
+ DeepgramProvider: typeof DeepgramProvider;
2684
+ AssemblyAIProvider: typeof AssemblyAIProvider;
2685
+ TranscriptionSession: typeof TranscriptionSession;
2686
+ SessionManager: typeof SessionManager;
2687
+ };
2688
+
2689
+ export { type AssemblyAIOptions, AssemblyAIProvider, AudioBufferManager, type AudioConfig, AudioEncoding, type AudioFormat, type AudioFormatInfo, AudioLevelMonitor, type AudioLevelMonitorOptions, AudioProcessor, AudioRecorder, BaseTranscriber, type BrowserInfo, type CSVExportOptions, CSVExporter, type CompatibilityReport, DEFAULT_AUDIO_CONFIG, DEFAULT_SESSION_CONFIG, DEFAULT_TRANSCRIPTION_CONFIG, type DeepgramOptions, DeepgramProvider, type DisplayOptions, ErrorCode, EventEmitter, type ExportFormat, type ExportResult, type ITranscriptionProvider, JSONExporter, LIBRARY_NAME, type LanguageInfo, LocalStorageAdapter, MemoryStorageAdapter, type MergeOptions, type ProviderCapabilities, type ProviderInfo, type QuickStartOptions, type RetryOptions, SRTExporter, type SessionConfig, type SessionExport, type SessionExportData, type SessionImport, SessionManager, type SessionManagerOptions, type SessionMetadata, SessionState, type SessionStatistics, type SessionStats, type StorageAdapter, type SupportCheck, type TextExportOptions, TextExporter, type TimestampFormat, type TranscriptionConfig, TranscriptionError, type TranscriptionEventName, type TranscriptionEvents, TranscriptionProvider, type TranscriptionResult, type TranscriptionSegment, TranscriptionSession, type VADOptions, VERSION, VTTExporter, type ValidationError, type ValidationResult, type ValidationWarning, VoiceActivityDetector, WebSpeechProvider, type Word, calculateBitrate, calculatePeak, calculateRMS, cancellableTimeout, checkAudioContextSupport, checkMediaDevicesSupport, checkWebSocketSupport, checkWebSpeechAPISupport, clamp, createSession, createTranscriber, dbToLinear, debounce, deepClone, _default as default, detectBrowserLanguage, durationToSamples, estimateAudioSize, formatAsPlainText, formatConfidence, formatDuration, formatFileSize, formatNumber, formatTimestamp, formatTranscriptForDisplay, generateId, getAudioConstraints, getBestMatchingLanguage, getBrowserInfo, getFullCompatibilityReport, getLanguageName, getNativeLanguageName, getOptimalBufferSize, getSupportedLanguages, getSupportedProviders, groupBy, isEmpty, isFunction, isLanguageSupported, isObject, isProviderSupported, isSilence, linearToDb, mergeDeep, normalizeLanguageCode, omit, pick, quickStart, retry, round, samplesToDuration, setIntervalAsync, sleep, throttle, timeout, truncateText, validateApiKey, validateAudioConfig, validateAudioFormat, validateLanguageCode, validateSessionConfig, validateTranscriptionConfig, waitFor };