@360labs/live-transcribe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/LICENSE +21 -0
- package/README.md +170 -0
- package/dist/index.d.mts +2689 -0
- package/dist/index.d.ts +2689 -0
- package/dist/index.js +4777 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +4682 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +86 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,2689 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight EventEmitter implementation for browser and Node.js compatibility
|
|
3
|
+
*/
|
|
4
|
+
declare class EventEmitter<TEvents extends Record<string, (...args: any[]) => void> = Record<string, (...args: any[]) => void>> {
|
|
5
|
+
private events;
|
|
6
|
+
/**
|
|
7
|
+
* Subscribe to an event
|
|
8
|
+
* @param event - Event name to subscribe to
|
|
9
|
+
* @param listener - Callback function
|
|
10
|
+
*/
|
|
11
|
+
on<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
|
|
12
|
+
/**
|
|
13
|
+
* Unsubscribe from an event
|
|
14
|
+
* @param event - Event name to unsubscribe from
|
|
15
|
+
* @param listener - Callback function to remove
|
|
16
|
+
*/
|
|
17
|
+
off<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
|
|
18
|
+
/**
|
|
19
|
+
* Subscribe to an event for one-time notification
|
|
20
|
+
* @param event - Event name to subscribe to
|
|
21
|
+
* @param listener - Callback function
|
|
22
|
+
*/
|
|
23
|
+
once<K extends keyof TEvents>(event: K, listener: TEvents[K]): this;
|
|
24
|
+
/**
|
|
25
|
+
* Emit an event to all subscribers
|
|
26
|
+
* @param event - Event name to emit
|
|
27
|
+
* @param args - Arguments to pass to listeners
|
|
28
|
+
*/
|
|
29
|
+
protected emit<K extends keyof TEvents>(event: K, ...args: Parameters<TEvents[K]>): boolean;
|
|
30
|
+
/**
|
|
31
|
+
* Remove all listeners for an event, or all listeners if no event specified
|
|
32
|
+
* @param event - Optional event name
|
|
33
|
+
*/
|
|
34
|
+
removeAllListeners<K extends keyof TEvents>(event?: K): this;
|
|
35
|
+
/**
|
|
36
|
+
* Get the number of listeners for an event
|
|
37
|
+
* @param event - Event name
|
|
38
|
+
*/
|
|
39
|
+
listenerCount<K extends keyof TEvents>(event: K): number;
|
|
40
|
+
/**
|
|
41
|
+
* Get all event names that have listeners
|
|
42
|
+
*/
|
|
43
|
+
eventNames(): (keyof TEvents)[];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Represents an individual word in a transcription with timing information
|
|
48
|
+
*/
|
|
49
|
+
interface Word {
|
|
50
|
+
/** The transcribed word text */
|
|
51
|
+
text: string;
|
|
52
|
+
/** Start time in milliseconds */
|
|
53
|
+
start: number;
|
|
54
|
+
/** End time in milliseconds */
|
|
55
|
+
end: number;
|
|
56
|
+
/** Confidence score (0-1) */
|
|
57
|
+
confidence?: number;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Represents a transcription result from the provider
|
|
61
|
+
*/
|
|
62
|
+
interface TranscriptionResult {
|
|
63
|
+
/** The transcribed text */
|
|
64
|
+
text: string;
|
|
65
|
+
/** Whether this is a final or interim result */
|
|
66
|
+
isFinal: boolean;
|
|
67
|
+
/** Confidence score (0-1) */
|
|
68
|
+
confidence?: number;
|
|
69
|
+
/** When the transcription occurred (Unix timestamp) */
|
|
70
|
+
timestamp: number;
|
|
71
|
+
/** Array of individual words with timing information */
|
|
72
|
+
words?: Word[];
|
|
73
|
+
/** Speaker identification if supported by provider */
|
|
74
|
+
speaker?: string;
|
|
75
|
+
/** Detected or specified language code */
|
|
76
|
+
language?: string;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Represents a segment of transcription with timing information
|
|
80
|
+
*/
|
|
81
|
+
interface TranscriptionSegment {
|
|
82
|
+
/** Unique identifier for the segment */
|
|
83
|
+
id: string;
|
|
84
|
+
/** The transcribed text for this segment */
|
|
85
|
+
text: string;
|
|
86
|
+
/** Start time in milliseconds */
|
|
87
|
+
start: number;
|
|
88
|
+
/** End time in milliseconds */
|
|
89
|
+
end: number;
|
|
90
|
+
/** Whether this segment is finalized */
|
|
91
|
+
isFinal: boolean;
|
|
92
|
+
/** Confidence score (0-1) */
|
|
93
|
+
confidence?: number;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Supported transcription providers
|
|
98
|
+
*/
|
|
99
|
+
declare enum TranscriptionProvider {
|
|
100
|
+
/** Browser-native Web Speech API */
|
|
101
|
+
WebSpeechAPI = "web-speech",
|
|
102
|
+
/** Deepgram real-time transcription */
|
|
103
|
+
Deepgram = "deepgram",
|
|
104
|
+
/** AssemblyAI real-time transcription */
|
|
105
|
+
AssemblyAI = "assemblyai",
|
|
106
|
+
/** Custom provider implementation */
|
|
107
|
+
Custom = "custom"
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Audio encoding formats
|
|
111
|
+
*/
|
|
112
|
+
declare enum AudioEncoding {
|
|
113
|
+
/** 16-bit signed little-endian linear PCM */
|
|
114
|
+
LINEAR16 = "linear16",
|
|
115
|
+
/** G.711 mu-law */
|
|
116
|
+
MULAW = "mulaw",
|
|
117
|
+
/** G.711 A-law */
|
|
118
|
+
ALAW = "alaw",
|
|
119
|
+
/** Opus encoding */
|
|
120
|
+
OPUS = "opus"
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Audio configuration options
|
|
124
|
+
*/
|
|
125
|
+
interface AudioConfig {
|
|
126
|
+
/** Sample rate in Hz (default: 16000) */
|
|
127
|
+
sampleRate?: number;
|
|
128
|
+
/** Number of audio channels (default: 1) */
|
|
129
|
+
channels?: number;
|
|
130
|
+
/** Bit depth (default: 16) */
|
|
131
|
+
bitDepth?: number;
|
|
132
|
+
/** Audio encoding format */
|
|
133
|
+
encoding?: AudioEncoding;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Main configuration for transcription
|
|
137
|
+
*/
|
|
138
|
+
interface TranscriptionConfig {
|
|
139
|
+
/** The transcription provider to use */
|
|
140
|
+
provider: TranscriptionProvider;
|
|
141
|
+
/** API key for cloud providers (Deepgram, AssemblyAI) */
|
|
142
|
+
apiKey?: string;
|
|
143
|
+
/** Language code (default: 'en-US') */
|
|
144
|
+
language?: string;
|
|
145
|
+
/** Whether to return interim results (default: true) */
|
|
146
|
+
interimResults?: boolean;
|
|
147
|
+
/** Enable profanity filter (default: false) */
|
|
148
|
+
profanityFilter?: boolean;
|
|
149
|
+
/** Enable automatic punctuation (default: true) */
|
|
150
|
+
punctuation?: boolean;
|
|
151
|
+
/** Audio configuration options */
|
|
152
|
+
audioConfig?: AudioConfig;
|
|
153
|
+
/** Provider-specific options */
|
|
154
|
+
providerOptions?: Record<string, unknown>;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Default audio configuration values
|
|
158
|
+
*/
|
|
159
|
+
declare const DEFAULT_AUDIO_CONFIG: Required<AudioConfig>;
|
|
160
|
+
/**
|
|
161
|
+
* Default transcription configuration values
|
|
162
|
+
*/
|
|
163
|
+
declare const DEFAULT_TRANSCRIPTION_CONFIG: Partial<TranscriptionConfig>;
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Possible states of a transcription session
|
|
167
|
+
*/
|
|
168
|
+
declare enum SessionState {
|
|
169
|
+
/** Session has not started */
|
|
170
|
+
IDLE = "idle",
|
|
171
|
+
/** Session is initializing (connecting to provider, requesting mic access) */
|
|
172
|
+
INITIALIZING = "initializing",
|
|
173
|
+
/** Session is actively transcribing */
|
|
174
|
+
ACTIVE = "active",
|
|
175
|
+
/** Session is paused */
|
|
176
|
+
PAUSED = "paused",
|
|
177
|
+
/** Session is stopping */
|
|
178
|
+
STOPPING = "stopping",
|
|
179
|
+
/** Session has stopped */
|
|
180
|
+
STOPPED = "stopped",
|
|
181
|
+
/** Session encountered an error */
|
|
182
|
+
ERROR = "error"
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Configuration options for a transcription session
|
|
186
|
+
*/
|
|
187
|
+
interface SessionConfig {
|
|
188
|
+
/** Whether to record audio during the session (default: false) */
|
|
189
|
+
recordAudio?: boolean;
|
|
190
|
+
/** Maximum session duration in milliseconds */
|
|
191
|
+
maxDuration?: number;
|
|
192
|
+
/** Auto-stop after silence duration in milliseconds */
|
|
193
|
+
silenceTimeout?: number;
|
|
194
|
+
/** Enable Voice Activity Detection (default: false) */
|
|
195
|
+
enableVAD?: boolean;
|
|
196
|
+
/** VAD sensitivity threshold (0-1, default: 0.5) */
|
|
197
|
+
vadThreshold?: number;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Metadata about a transcription session
|
|
201
|
+
*/
|
|
202
|
+
interface SessionMetadata {
|
|
203
|
+
/** Unique session identifier */
|
|
204
|
+
id: string;
|
|
205
|
+
/** Session start time (Unix timestamp) */
|
|
206
|
+
startTime: number;
|
|
207
|
+
/** Session end time (Unix timestamp) */
|
|
208
|
+
endTime?: number;
|
|
209
|
+
/** Session duration in milliseconds */
|
|
210
|
+
duration?: number;
|
|
211
|
+
/** Total word count in the session */
|
|
212
|
+
wordCount: number;
|
|
213
|
+
/** Provider used for the session */
|
|
214
|
+
provider: TranscriptionProvider;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Default session configuration values
|
|
218
|
+
*/
|
|
219
|
+
declare const DEFAULT_SESSION_CONFIG: Required<SessionConfig>;
|
|
220
|
+
/**
|
|
221
|
+
* Statistics for all managed sessions
|
|
222
|
+
*/
|
|
223
|
+
interface SessionStats {
|
|
224
|
+
/** Total number of sessions */
|
|
225
|
+
totalSessions: number;
|
|
226
|
+
/** Number of currently active sessions */
|
|
227
|
+
activeSessions: number;
|
|
228
|
+
/** Total number of transcripts across all sessions */
|
|
229
|
+
totalTranscripts: number;
|
|
230
|
+
/** Total duration across all sessions in ms */
|
|
231
|
+
totalDuration: number;
|
|
232
|
+
/** Average confidence score */
|
|
233
|
+
averageConfidence: number;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Statistics for a single session
|
|
237
|
+
*/
|
|
238
|
+
interface SessionStatistics {
|
|
239
|
+
/** Total word count */
|
|
240
|
+
wordCount: number;
|
|
241
|
+
/** Average confidence score */
|
|
242
|
+
averageConfidence: number;
|
|
243
|
+
/** Speaking rate in words per minute */
|
|
244
|
+
speakingRate: number;
|
|
245
|
+
/** Number of silence periods */
|
|
246
|
+
silencePeriods: number;
|
|
247
|
+
/** Total duration in milliseconds */
|
|
248
|
+
durationMs: number;
|
|
249
|
+
/** Number of transcripts */
|
|
250
|
+
transcriptCount: number;
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Options for merging transcripts
|
|
254
|
+
*/
|
|
255
|
+
interface MergeOptions {
|
|
256
|
+
/** Separator between transcripts */
|
|
257
|
+
separator?: string;
|
|
258
|
+
/** Include timestamps */
|
|
259
|
+
includeTimestamps?: boolean;
|
|
260
|
+
/** Include speaker labels */
|
|
261
|
+
includeSpeakers?: boolean;
|
|
262
|
+
/** Only include final transcripts */
|
|
263
|
+
finalOnly?: boolean;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Supported export formats
|
|
267
|
+
*/
|
|
268
|
+
type ExportFormat = 'json' | 'text' | 'srt' | 'vtt' | 'csv';
|
|
269
|
+
/**
|
|
270
|
+
* Result of exporting a session
|
|
271
|
+
*/
|
|
272
|
+
interface ExportResult {
|
|
273
|
+
/** Export format used */
|
|
274
|
+
format: ExportFormat;
|
|
275
|
+
/** Exported data */
|
|
276
|
+
data: string | ArrayBuffer;
|
|
277
|
+
/** Suggested filename */
|
|
278
|
+
filename: string;
|
|
279
|
+
/** MIME type */
|
|
280
|
+
mimeType: string;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Data structure for importing a session
|
|
284
|
+
*/
|
|
285
|
+
interface SessionImport {
|
|
286
|
+
/** Session metadata */
|
|
287
|
+
metadata: SessionMetadata;
|
|
288
|
+
/** Transcript results */
|
|
289
|
+
transcripts: TranscriptionResult[];
|
|
290
|
+
/** Session configuration */
|
|
291
|
+
config: SessionConfig;
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Full session export data
|
|
295
|
+
*/
|
|
296
|
+
interface SessionExportData {
|
|
297
|
+
/** Export version */
|
|
298
|
+
version: string;
|
|
299
|
+
/** Session data */
|
|
300
|
+
session: SessionImport;
|
|
301
|
+
/** Export timestamp */
|
|
302
|
+
exportedAt: number;
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Options for text export
|
|
306
|
+
*/
|
|
307
|
+
interface TextExportOptions {
|
|
308
|
+
/** Include timestamps */
|
|
309
|
+
includeTimestamps?: boolean;
|
|
310
|
+
/** Include speaker labels */
|
|
311
|
+
includeSpeakers?: boolean;
|
|
312
|
+
/** Include confidence scores */
|
|
313
|
+
includeConfidence?: boolean;
|
|
314
|
+
/** Add paragraph breaks */
|
|
315
|
+
paragraphBreaks?: boolean;
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Options for CSV export
|
|
319
|
+
*/
|
|
320
|
+
interface CSVExportOptions {
|
|
321
|
+
/** Column delimiter */
|
|
322
|
+
delimiter?: string;
|
|
323
|
+
/** Include header row */
|
|
324
|
+
includeHeaders?: boolean;
|
|
325
|
+
/** Columns to include */
|
|
326
|
+
columns?: string[];
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Error codes for transcription errors
|
|
331
|
+
*/
|
|
332
|
+
declare enum ErrorCode {
|
|
333
|
+
/** Failed to initialize the provider */
|
|
334
|
+
INITIALIZATION_FAILED = "initialization_failed",
|
|
335
|
+
/** Failed to connect to the transcription service */
|
|
336
|
+
CONNECTION_FAILED = "connection_failed",
|
|
337
|
+
/** API key invalid or authentication failed */
|
|
338
|
+
AUTHENTICATION_FAILED = "authentication_failed",
|
|
339
|
+
/** Microphone access was denied by the user */
|
|
340
|
+
MICROPHONE_ACCESS_DENIED = "microphone_access_denied",
|
|
341
|
+
/** Browser does not support required features */
|
|
342
|
+
UNSUPPORTED_BROWSER = "unsupported_browser",
|
|
343
|
+
/** Network error during transcription */
|
|
344
|
+
NETWORK_ERROR = "network_error",
|
|
345
|
+
/** Provider-specific error */
|
|
346
|
+
PROVIDER_ERROR = "provider_error",
|
|
347
|
+
/** Invalid configuration provided */
|
|
348
|
+
INVALID_CONFIG = "invalid_config",
|
|
349
|
+
/** Session has expired */
|
|
350
|
+
SESSION_EXPIRED = "session_expired",
|
|
351
|
+
/** Unknown error occurred */
|
|
352
|
+
UNKNOWN_ERROR = "unknown_error"
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Custom error class for transcription errors
|
|
356
|
+
*/
|
|
357
|
+
declare class TranscriptionError extends Error {
|
|
358
|
+
/** Error code identifying the type of error */
|
|
359
|
+
readonly code: ErrorCode;
|
|
360
|
+
/** Provider that generated the error */
|
|
361
|
+
readonly provider?: TranscriptionProvider;
|
|
362
|
+
/** Additional error details */
|
|
363
|
+
readonly details?: unknown;
|
|
364
|
+
constructor(message: string, code: ErrorCode, provider?: TranscriptionProvider, details?: unknown);
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Event map for transcription events
|
|
368
|
+
* Used for type-safe event handling
|
|
369
|
+
*/
|
|
370
|
+
interface TranscriptionEvents {
|
|
371
|
+
/** Emitted for any transcription result (interim or final) */
|
|
372
|
+
transcript: (result: TranscriptionResult) => void;
|
|
373
|
+
/** Emitted for interim (non-final) transcription results */
|
|
374
|
+
interim: (result: TranscriptionResult) => void;
|
|
375
|
+
/** Emitted for final transcription results */
|
|
376
|
+
final: (result: TranscriptionResult) => void;
|
|
377
|
+
/** Emitted when transcription starts */
|
|
378
|
+
start: () => void;
|
|
379
|
+
/** Emitted when transcription stops */
|
|
380
|
+
stop: () => void;
|
|
381
|
+
/** Emitted when transcription is paused */
|
|
382
|
+
pause: () => void;
|
|
383
|
+
/** Emitted when transcription is resumed */
|
|
384
|
+
resume: () => void;
|
|
385
|
+
/** Emitted when an error occurs */
|
|
386
|
+
error: (error: TranscriptionError) => void;
|
|
387
|
+
/** Emitted when session state changes */
|
|
388
|
+
stateChange: (state: SessionState) => void;
|
|
389
|
+
/** Emitted with current audio level (0-1) */
|
|
390
|
+
audioLevel: (level: number) => void;
|
|
391
|
+
/** Emitted when silence is detected */
|
|
392
|
+
silence: () => void;
|
|
393
|
+
/** Emitted when speech is detected */
|
|
394
|
+
speech: () => void;
|
|
395
|
+
/** Index signature for extensibility */
|
|
396
|
+
[key: string]: (...args: any[]) => void;
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Type for event names
|
|
400
|
+
*/
|
|
401
|
+
type TranscriptionEventName = keyof TranscriptionEvents;
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Interface that all transcription providers must implement
|
|
405
|
+
*/
|
|
406
|
+
interface ITranscriptionProvider {
|
|
407
|
+
/**
|
|
408
|
+
* Initialize the provider with configuration
|
|
409
|
+
* @param config - Transcription configuration
|
|
410
|
+
*/
|
|
411
|
+
initialize(config: TranscriptionConfig): Promise<void>;
|
|
412
|
+
/**
|
|
413
|
+
* Start transcription
|
|
414
|
+
*/
|
|
415
|
+
start(): Promise<void>;
|
|
416
|
+
/**
|
|
417
|
+
* Stop transcription
|
|
418
|
+
*/
|
|
419
|
+
stop(): Promise<void>;
|
|
420
|
+
/**
|
|
421
|
+
* Pause transcription
|
|
422
|
+
*/
|
|
423
|
+
pause(): void;
|
|
424
|
+
/**
|
|
425
|
+
* Resume transcription after pause
|
|
426
|
+
*/
|
|
427
|
+
resume(): void;
|
|
428
|
+
/**
|
|
429
|
+
* Send audio data to the provider
|
|
430
|
+
* @param audioData - Raw audio data as ArrayBuffer
|
|
431
|
+
*/
|
|
432
|
+
sendAudio(audioData: ArrayBuffer): void;
|
|
433
|
+
/**
|
|
434
|
+
* Get the current session state
|
|
435
|
+
*/
|
|
436
|
+
getState(): SessionState;
|
|
437
|
+
/**
|
|
438
|
+
* Check if this provider is supported in the current environment
|
|
439
|
+
*/
|
|
440
|
+
isSupported(): boolean;
|
|
441
|
+
/**
|
|
442
|
+
* Clean up resources and connections
|
|
443
|
+
*/
|
|
444
|
+
cleanup(): Promise<void>;
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Describes the capabilities of a transcription provider
|
|
448
|
+
*/
|
|
449
|
+
interface ProviderCapabilities {
|
|
450
|
+
/** Whether the provider supports interim (partial) results */
|
|
451
|
+
supportsInterim: boolean;
|
|
452
|
+
/** Whether the provider provides word-level timestamps */
|
|
453
|
+
supportsWordTimestamps: boolean;
|
|
454
|
+
/** Whether the provider supports speaker diarization */
|
|
455
|
+
supportsSpeakerDiarization: boolean;
|
|
456
|
+
/** Whether the provider supports automatic punctuation */
|
|
457
|
+
supportsPunctuation: boolean;
|
|
458
|
+
/** Whether the provider supports automatic language detection */
|
|
459
|
+
supportsLanguageDetection: boolean;
|
|
460
|
+
/** List of supported language codes */
|
|
461
|
+
supportedLanguages: string[];
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Base provider information
|
|
465
|
+
*/
|
|
466
|
+
interface ProviderInfo {
|
|
467
|
+
/** Provider display name */
|
|
468
|
+
name: string;
|
|
469
|
+
/** Provider identifier */
|
|
470
|
+
id: string;
|
|
471
|
+
/** Provider capabilities */
|
|
472
|
+
capabilities: ProviderCapabilities;
|
|
473
|
+
/** Whether the provider requires an API key */
|
|
474
|
+
requiresApiKey: boolean;
|
|
475
|
+
/** Whether the provider works in browser environment */
|
|
476
|
+
supportsBrowser: boolean;
|
|
477
|
+
/** Whether the provider works in Node.js environment */
|
|
478
|
+
supportsNode: boolean;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* Abstract base class for all transcription providers
|
|
483
|
+
* Provides common functionality including event handling, session management,
|
|
484
|
+
* and audio recording capabilities
|
|
485
|
+
*/
|
|
486
|
+
declare abstract class BaseTranscriber extends EventEmitter<TranscriptionEvents> implements ITranscriptionProvider {
|
|
487
|
+
/** Transcription configuration */
|
|
488
|
+
protected config: TranscriptionConfig;
|
|
489
|
+
/** Current session state */
|
|
490
|
+
protected state: SessionState;
|
|
491
|
+
/** Session metadata */
|
|
492
|
+
protected sessionMetadata: SessionMetadata;
|
|
493
|
+
/** Recorded audio chunks */
|
|
494
|
+
protected audioRecording: ArrayBuffer[];
|
|
495
|
+
/** Session start timestamp */
|
|
496
|
+
protected startTime?: number;
|
|
497
|
+
/** Word count in current session */
|
|
498
|
+
protected wordCount: number;
|
|
499
|
+
/**
|
|
500
|
+
* Create a new BaseTranscriber instance
|
|
501
|
+
* @param config - Transcription configuration
|
|
502
|
+
*/
|
|
503
|
+
constructor(config: TranscriptionConfig);
|
|
504
|
+
/**
|
|
505
|
+
* Initialize the provider with configuration
|
|
506
|
+
* Must be implemented by concrete providers
|
|
507
|
+
*/
|
|
508
|
+
abstract initialize(): Promise<void>;
|
|
509
|
+
/**
|
|
510
|
+
* Start transcription
|
|
511
|
+
* Must be implemented by concrete providers
|
|
512
|
+
*/
|
|
513
|
+
abstract start(): Promise<void>;
|
|
514
|
+
/**
|
|
515
|
+
* Stop transcription
|
|
516
|
+
* Must be implemented by concrete providers
|
|
517
|
+
*/
|
|
518
|
+
abstract stop(): Promise<void>;
|
|
519
|
+
/**
|
|
520
|
+
* Pause transcription
|
|
521
|
+
* Must be implemented by concrete providers
|
|
522
|
+
*/
|
|
523
|
+
abstract pause(): void;
|
|
524
|
+
/**
|
|
525
|
+
* Resume transcription after pause
|
|
526
|
+
* Must be implemented by concrete providers
|
|
527
|
+
*/
|
|
528
|
+
abstract resume(): void;
|
|
529
|
+
/**
|
|
530
|
+
* Send audio data to the provider
|
|
531
|
+
* Must be implemented by concrete providers
|
|
532
|
+
* @param audioData - Raw audio data
|
|
533
|
+
*/
|
|
534
|
+
abstract sendAudio(audioData: ArrayBuffer): void;
|
|
535
|
+
/**
|
|
536
|
+
* Check if provider is supported in current environment
|
|
537
|
+
* Must be implemented by concrete providers
|
|
538
|
+
*/
|
|
539
|
+
abstract isSupported(): boolean;
|
|
540
|
+
/**
|
|
541
|
+
* Clean up resources and connections
|
|
542
|
+
* Must be implemented by concrete providers
|
|
543
|
+
*/
|
|
544
|
+
abstract cleanup(): Promise<void>;
|
|
545
|
+
/**
|
|
546
|
+
* Get the current session state
|
|
547
|
+
*/
|
|
548
|
+
getState(): SessionState;
|
|
549
|
+
/**
|
|
550
|
+
* Get session metadata
|
|
551
|
+
*/
|
|
552
|
+
getMetadata(): SessionMetadata;
|
|
553
|
+
/**
|
|
554
|
+
* Get recorded audio data
|
|
555
|
+
* @returns Combined audio data or null if not recording
|
|
556
|
+
*/
|
|
557
|
+
getRecording(): ArrayBuffer | null;
|
|
558
|
+
/**
|
|
559
|
+
* Update session state and emit state change event
|
|
560
|
+
* @param newState - New session state
|
|
561
|
+
*/
|
|
562
|
+
protected setState(newState: SessionState): void;
|
|
563
|
+
/**
|
|
564
|
+
* Handle incoming transcription result
|
|
565
|
+
* @param result - Transcription result from provider
|
|
566
|
+
*/
|
|
567
|
+
protected handleTranscript(result: TranscriptionResult): void;
|
|
568
|
+
/**
|
|
569
|
+
* Handle errors and emit error event
|
|
570
|
+
* @param error - Error to handle
|
|
571
|
+
*/
|
|
572
|
+
protected handleError(error: Error | TranscriptionError): void;
|
|
573
|
+
/**
|
|
574
|
+
* Validate configuration
|
|
575
|
+
* @throws TranscriptionError if configuration is invalid
|
|
576
|
+
*/
|
|
577
|
+
protected validateConfig(): void;
|
|
578
|
+
/**
|
|
579
|
+
* Record audio data if recording is enabled
|
|
580
|
+
* @param data - Audio data to record
|
|
581
|
+
*/
|
|
582
|
+
protected recordAudioData(data: ArrayBuffer): void;
|
|
583
|
+
/**
|
|
584
|
+
* Calculate session duration
|
|
585
|
+
* @returns Duration in milliseconds
|
|
586
|
+
*/
|
|
587
|
+
protected calculateDuration(): number;
|
|
588
|
+
/**
|
|
589
|
+
* Clear recording data
|
|
590
|
+
*/
|
|
591
|
+
protected clearRecording(): void;
|
|
592
|
+
/**
|
|
593
|
+
* Reset session state for new session
|
|
594
|
+
*/
|
|
595
|
+
protected resetSession(): void;
|
|
596
|
+
/**
|
|
597
|
+
* Generate a unique session ID
|
|
598
|
+
*/
|
|
599
|
+
private generateSessionId;
|
|
600
|
+
/**
|
|
601
|
+
* Initialize session metadata
|
|
602
|
+
*/
|
|
603
|
+
private initializeMetadata;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Session export data structure
|
|
608
|
+
*/
|
|
609
|
+
interface SessionExport {
|
|
610
|
+
/** Session metadata */
|
|
611
|
+
metadata: SessionMetadata;
|
|
612
|
+
/** All transcription results */
|
|
613
|
+
transcripts: TranscriptionResult[];
|
|
614
|
+
/** Concatenated final text */
|
|
615
|
+
fullText: string;
|
|
616
|
+
/** Recorded audio data if available */
|
|
617
|
+
audioData?: ArrayBuffer;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Manages a transcription session with support for recording and transcript management
|
|
621
|
+
*/
|
|
622
|
+
declare class TranscriptionSession {
|
|
623
|
+
/** Unique session identifier */
|
|
624
|
+
readonly id: string;
|
|
625
|
+
/** Transcription provider instance */
|
|
626
|
+
readonly provider: ITranscriptionProvider;
|
|
627
|
+
/** Session configuration */
|
|
628
|
+
private config;
|
|
629
|
+
/** Collected transcription results */
|
|
630
|
+
private transcripts;
|
|
631
|
+
/** Current session state */
|
|
632
|
+
private state;
|
|
633
|
+
/** Max duration timer */
|
|
634
|
+
private maxDurationTimer?;
|
|
635
|
+
/** Silence timeout timer */
|
|
636
|
+
private silenceTimer?;
|
|
637
|
+
/** Session start timestamp */
|
|
638
|
+
private startTime?;
|
|
639
|
+
/**
|
|
640
|
+
* Create a new TranscriptionSession
|
|
641
|
+
* @param provider - Transcription provider to use
|
|
642
|
+
* @param sessionConfig - Session configuration options
|
|
643
|
+
*/
|
|
644
|
+
constructor(provider: ITranscriptionProvider, sessionConfig?: SessionConfig);
|
|
645
|
+
/**
|
|
646
|
+
* Start the transcription session
|
|
647
|
+
*/
|
|
648
|
+
start(): Promise<void>;
|
|
649
|
+
/**
|
|
650
|
+
* Stop the transcription session
|
|
651
|
+
*/
|
|
652
|
+
stop(): Promise<void>;
|
|
653
|
+
/**
|
|
654
|
+
* Pause the transcription session
|
|
655
|
+
*/
|
|
656
|
+
pause(): void;
|
|
657
|
+
/**
|
|
658
|
+
* Resume the transcription session
|
|
659
|
+
*/
|
|
660
|
+
resume(): void;
|
|
661
|
+
/**
|
|
662
|
+
* Add a transcription result to the session
|
|
663
|
+
* @param result - Transcription result to add
|
|
664
|
+
*/
|
|
665
|
+
addTranscript(result: TranscriptionResult): void;
|
|
666
|
+
/**
|
|
667
|
+
* Get transcription results
|
|
668
|
+
* @param finalOnly - If true, return only final results
|
|
669
|
+
*/
|
|
670
|
+
getTranscripts(finalOnly?: boolean): TranscriptionResult[];
|
|
671
|
+
/**
|
|
672
|
+
* Get concatenated text from all final transcripts
|
|
673
|
+
*/
|
|
674
|
+
getFullText(): string;
|
|
675
|
+
/**
|
|
676
|
+
* Get the current session state
|
|
677
|
+
*/
|
|
678
|
+
getState(): SessionState;
|
|
679
|
+
/**
|
|
680
|
+
* Export session data in raw format
|
|
681
|
+
*/
|
|
682
|
+
exportRaw(): SessionExport;
|
|
683
|
+
/**
|
|
684
|
+
* Export session data in specified format
|
|
685
|
+
* @param format - Export format (json, text, srt, vtt, csv)
|
|
686
|
+
*/
|
|
687
|
+
export(format?: ExportFormat): ExportResult;
|
|
688
|
+
/**
|
|
689
|
+
* Get session statistics
|
|
690
|
+
*/
|
|
691
|
+
getStatistics(): SessionStatistics;
|
|
692
|
+
/**
|
|
693
|
+
* Clear all transcripts
|
|
694
|
+
*/
|
|
695
|
+
clear(): void;
|
|
696
|
+
/**
|
|
697
|
+
* Get the total word count from final transcripts
|
|
698
|
+
*/
|
|
699
|
+
private getWordCount;
|
|
700
|
+
/**
|
|
701
|
+
* Set up session timers (max duration, silence timeout)
|
|
702
|
+
*/
|
|
703
|
+
private setupTimers;
|
|
704
|
+
/**
|
|
705
|
+
* Reset the silence timeout timer
|
|
706
|
+
*/
|
|
707
|
+
private resetSilenceTimer;
|
|
708
|
+
/**
|
|
709
|
+
* Clear all timers
|
|
710
|
+
*/
|
|
711
|
+
private clearTimers;
|
|
712
|
+
/**
|
|
713
|
+
* Generate a unique session ID
|
|
714
|
+
*/
|
|
715
|
+
private generateSessionId;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/**
|
|
719
|
+
* Session manager options
|
|
720
|
+
*/
|
|
721
|
+
interface SessionManagerOptions {
|
|
722
|
+
/** Default session configuration */
|
|
723
|
+
defaultConfig?: SessionConfig;
|
|
724
|
+
/** Maximum number of concurrent sessions */
|
|
725
|
+
maxSessions?: number;
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* Manages multiple transcription sessions
|
|
729
|
+
*/
|
|
730
|
+
declare class SessionManager {
|
|
731
|
+
/** Active sessions map */
|
|
732
|
+
private sessions;
|
|
733
|
+
/** Currently active session ID */
|
|
734
|
+
private activeSessionId;
|
|
735
|
+
/** Default session configuration */
|
|
736
|
+
private defaultConfig;
|
|
737
|
+
/** Maximum number of sessions */
|
|
738
|
+
private maxSessions;
|
|
739
|
+
/**
|
|
740
|
+
* Create a new SessionManager
|
|
741
|
+
* @param options - Manager configuration
|
|
742
|
+
*/
|
|
743
|
+
constructor(options?: SessionManagerOptions);
|
|
744
|
+
/**
|
|
745
|
+
* Create a new transcription session
|
|
746
|
+
* @param provider - Transcription provider instance
|
|
747
|
+
* @param config - Session configuration (merged with defaults)
|
|
748
|
+
* @returns New TranscriptionSession instance
|
|
749
|
+
*/
|
|
750
|
+
createSession(provider: ITranscriptionProvider, config?: SessionConfig): TranscriptionSession;
|
|
751
|
+
/**
|
|
752
|
+
* Get session by ID
|
|
753
|
+
* @param sessionId - Session ID
|
|
754
|
+
* @returns TranscriptionSession or null
|
|
755
|
+
*/
|
|
756
|
+
getSession(sessionId: string): TranscriptionSession | null;
|
|
757
|
+
/**
|
|
758
|
+
* Get the currently active session
|
|
759
|
+
* @returns Active TranscriptionSession or null
|
|
760
|
+
*/
|
|
761
|
+
getActiveSession(): TranscriptionSession | null;
|
|
762
|
+
/**
|
|
763
|
+
* Set the active session
|
|
764
|
+
* @param sessionId - Session ID to make active
|
|
765
|
+
*/
|
|
766
|
+
setActiveSession(sessionId: string): void;
|
|
767
|
+
/**
|
|
768
|
+
* Get all sessions
|
|
769
|
+
* @returns Array of all sessions
|
|
770
|
+
*/
|
|
771
|
+
getAllSessions(): TranscriptionSession[];
|
|
772
|
+
/**
|
|
773
|
+
* Delete a session
|
|
774
|
+
* @param sessionId - Session ID to delete
|
|
775
|
+
*/
|
|
776
|
+
deleteSession(sessionId: string): Promise<void>;
|
|
777
|
+
/**
|
|
778
|
+
* Clear all sessions
|
|
779
|
+
*/
|
|
780
|
+
clearAllSessions(): Promise<void>;
|
|
781
|
+
/**
|
|
782
|
+
* Get statistics for all sessions
|
|
783
|
+
* @returns Session statistics
|
|
784
|
+
*/
|
|
785
|
+
getSessionStats(): SessionStats;
|
|
786
|
+
/**
|
|
787
|
+
* Export a session to specified format
|
|
788
|
+
* @param sessionId - Session ID
|
|
789
|
+
* @param format - Export format
|
|
790
|
+
* @returns Export result
|
|
791
|
+
*/
|
|
792
|
+
exportSession(sessionId: string, format: ExportFormat): ExportResult;
|
|
793
|
+
/**
|
|
794
|
+
* Import a session from data
|
|
795
|
+
* @param data - Session import data
|
|
796
|
+
* @param provider - Provider instance for the session
|
|
797
|
+
* @returns Imported session
|
|
798
|
+
*/
|
|
799
|
+
importSession(data: SessionImport, provider: ITranscriptionProvider): TranscriptionSession;
|
|
800
|
+
/**
|
|
801
|
+
* Check if a session exists
|
|
802
|
+
* @param sessionId - Session ID
|
|
803
|
+
* @returns True if session exists
|
|
804
|
+
*/
|
|
805
|
+
hasSession(sessionId: string): boolean;
|
|
806
|
+
/**
|
|
807
|
+
* Get session count
|
|
808
|
+
* @returns Number of sessions
|
|
809
|
+
*/
|
|
810
|
+
getSessionCount(): number;
|
|
811
|
+
/**
|
|
812
|
+
* Get sessions by state
|
|
813
|
+
* @param state - Session state to filter by
|
|
814
|
+
* @returns Array of sessions with matching state
|
|
815
|
+
*/
|
|
816
|
+
getSessionsByState(state: SessionState): TranscriptionSession[];
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* Web Speech API type declarations
|
|
821
|
+
*/
|
|
822
|
+
declare global {
|
|
823
|
+
interface Window {
|
|
824
|
+
SpeechRecognition: typeof SpeechRecognition;
|
|
825
|
+
webkitSpeechRecognition: typeof SpeechRecognition;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
/**
|
|
829
|
+
* SpeechRecognition interfaces for TypeScript
|
|
830
|
+
*/
|
|
831
|
+
interface SpeechRecognitionEvent extends Event {
|
|
832
|
+
resultIndex: number;
|
|
833
|
+
results: SpeechRecognitionResultList;
|
|
834
|
+
}
|
|
835
|
+
interface SpeechRecognitionResultList {
|
|
836
|
+
length: number;
|
|
837
|
+
item(index: number): SpeechRecognitionResult;
|
|
838
|
+
[index: number]: SpeechRecognitionResult;
|
|
839
|
+
}
|
|
840
|
+
interface SpeechRecognitionResult {
|
|
841
|
+
length: number;
|
|
842
|
+
item(index: number): SpeechRecognitionAlternative;
|
|
843
|
+
[index: number]: SpeechRecognitionAlternative;
|
|
844
|
+
isFinal: boolean;
|
|
845
|
+
}
|
|
846
|
+
interface SpeechRecognitionAlternative {
|
|
847
|
+
transcript: string;
|
|
848
|
+
confidence: number;
|
|
849
|
+
}
|
|
850
|
+
interface SpeechRecognitionErrorEvent extends Event {
|
|
851
|
+
error: string;
|
|
852
|
+
message: string;
|
|
853
|
+
}
|
|
854
|
+
interface SpeechRecognition extends EventTarget {
|
|
855
|
+
continuous: boolean;
|
|
856
|
+
interimResults: boolean;
|
|
857
|
+
lang: string;
|
|
858
|
+
maxAlternatives: number;
|
|
859
|
+
grammars: unknown;
|
|
860
|
+
onstart: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
861
|
+
onend: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
862
|
+
onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => void) | null;
|
|
863
|
+
onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => void) | null;
|
|
864
|
+
onspeechstart: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
865
|
+
onspeechend: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
866
|
+
onaudiostart: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
867
|
+
onaudioend: ((this: SpeechRecognition, ev: Event) => void) | null;
|
|
868
|
+
start(): void;
|
|
869
|
+
stop(): void;
|
|
870
|
+
abort(): void;
|
|
871
|
+
}
|
|
872
|
+
declare let SpeechRecognition: {
|
|
873
|
+
prototype: SpeechRecognition;
|
|
874
|
+
new (): SpeechRecognition;
|
|
875
|
+
};
|
|
876
|
+
/**
|
|
877
|
+
* Transcription provider using browser's native Web Speech API
|
|
878
|
+
* Works in Chrome, Edge, and Safari without requiring API keys
|
|
879
|
+
*/
|
|
880
|
+
declare class WebSpeechProvider extends BaseTranscriber {
|
|
881
|
+
/** Speech recognition instance */
|
|
882
|
+
private recognition;
|
|
883
|
+
/** Media stream from microphone */
|
|
884
|
+
private mediaStream;
|
|
885
|
+
/** Audio context for analysis */
|
|
886
|
+
private audioContext;
|
|
887
|
+
/** Audio analyser for VAD */
|
|
888
|
+
private analyser;
|
|
889
|
+
/** Script processor for audio level monitoring */
|
|
890
|
+
private audioLevelInterval;
|
|
891
|
+
/** Whether recognition is being restarted automatically */
|
|
892
|
+
private isRestarting;
|
|
893
|
+
/** Retry count for auto-restart */
|
|
894
|
+
private retryCount;
|
|
895
|
+
/** Maximum retry attempts */
|
|
896
|
+
private readonly maxRetries;
|
|
897
|
+
/** Provider capabilities */
|
|
898
|
+
static readonly capabilities: ProviderCapabilities;
|
|
899
|
+
/**
|
|
900
|
+
* Create a new WebSpeechProvider
|
|
901
|
+
* @param config - Transcription configuration
|
|
902
|
+
*/
|
|
903
|
+
constructor(config: Omit<TranscriptionConfig, 'provider'> & {
|
|
904
|
+
provider?: TranscriptionProvider;
|
|
905
|
+
});
|
|
906
|
+
/**
|
|
907
|
+
* Check if Web Speech API is supported in the current environment
|
|
908
|
+
*/
|
|
909
|
+
isSupported(): boolean;
|
|
910
|
+
/**
|
|
911
|
+
* Initialize the Web Speech API provider
|
|
912
|
+
*/
|
|
913
|
+
initialize(): Promise<void>;
|
|
914
|
+
/**
|
|
915
|
+
* Start transcription
|
|
916
|
+
*/
|
|
917
|
+
start(): Promise<void>;
|
|
918
|
+
/**
|
|
919
|
+
* Stop transcription
|
|
920
|
+
*/
|
|
921
|
+
stop(): Promise<void>;
|
|
922
|
+
/**
|
|
923
|
+
* Pause transcription
|
|
924
|
+
*/
|
|
925
|
+
pause(): void;
|
|
926
|
+
/**
|
|
927
|
+
* Resume transcription
|
|
928
|
+
*/
|
|
929
|
+
resume(): void;
|
|
930
|
+
/**
|
|
931
|
+
* Send audio data - not supported by Web Speech API
|
|
932
|
+
* @param _audioData - Audio data (unused)
|
|
933
|
+
*/
|
|
934
|
+
sendAudio(_audioData: ArrayBuffer): void;
|
|
935
|
+
/**
|
|
936
|
+
* Clean up all resources
|
|
937
|
+
*/
|
|
938
|
+
cleanup(): Promise<void>;
|
|
939
|
+
/**
|
|
940
|
+
* Get provider capabilities
|
|
941
|
+
*/
|
|
942
|
+
getCapabilities(): ProviderCapabilities;
|
|
943
|
+
/**
|
|
944
|
+
* Set up event handlers for speech recognition
|
|
945
|
+
*/
|
|
946
|
+
private setupEventHandlers;
|
|
947
|
+
/**
|
|
948
|
+
* Process speech recognition results
|
|
949
|
+
*/
|
|
950
|
+
private processRecognitionResult;
|
|
951
|
+
/**
|
|
952
|
+
* Handle recognition end event
|
|
953
|
+
*/
|
|
954
|
+
private handleRecognitionEnd;
|
|
955
|
+
/**
|
|
956
|
+
* Handle recognition errors
|
|
957
|
+
*/
|
|
958
|
+
private handleRecognitionError;
|
|
959
|
+
/**
|
|
960
|
+
* Request microphone access
|
|
961
|
+
*/
|
|
962
|
+
private getMicrophoneAccess;
|
|
963
|
+
/**
|
|
964
|
+
* Stop media stream tracks
|
|
965
|
+
*/
|
|
966
|
+
private stopMediaStream;
|
|
967
|
+
/**
|
|
968
|
+
* Set up audio level monitoring for VAD
|
|
969
|
+
*/
|
|
970
|
+
private setupAudioLevelMonitoring;
|
|
971
|
+
/**
|
|
972
|
+
* Stop audio level monitoring
|
|
973
|
+
*/
|
|
974
|
+
private stopAudioLevelMonitoring;
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
/**
|
|
978
|
+
* Deepgram-specific configuration options
|
|
979
|
+
*/
|
|
980
|
+
interface DeepgramOptions {
|
|
981
|
+
/** Deepgram model to use */
|
|
982
|
+
model?: 'nova-2' | 'nova' | 'enhanced' | 'base';
|
|
983
|
+
/** Model tier */
|
|
984
|
+
tier?: 'nova' | 'enhanced' | 'base';
|
|
985
|
+
/** Model version */
|
|
986
|
+
version?: 'latest' | string;
|
|
987
|
+
/** Enable punctuation */
|
|
988
|
+
punctuate?: boolean;
|
|
989
|
+
/** Enable speaker diarization */
|
|
990
|
+
diarize?: boolean;
|
|
991
|
+
/** Enable multichannel processing */
|
|
992
|
+
multichannel?: boolean;
|
|
993
|
+
/** Number of alternative transcripts */
|
|
994
|
+
alternatives?: number;
|
|
995
|
+
/** Convert numbers to numerals */
|
|
996
|
+
numerals?: boolean;
|
|
997
|
+
/** Search terms to boost */
|
|
998
|
+
search?: string[];
|
|
999
|
+
/** Words to replace */
|
|
1000
|
+
replace?: string[];
|
|
1001
|
+
/** Keywords to boost */
|
|
1002
|
+
keywords?: string[];
|
|
1003
|
+
/** Endpointing timeout in milliseconds */
|
|
1004
|
+
endpointing?: number;
|
|
1005
|
+
/** Enable smart formatting */
|
|
1006
|
+
smartFormat?: boolean;
|
|
1007
|
+
}
|
|
1008
|
+
/**
|
|
1009
|
+
* Transcription provider using Deepgram's WebSocket streaming API
|
|
1010
|
+
* Provides high-accuracy transcription with word-level timestamps
|
|
1011
|
+
*/
|
|
1012
|
+
declare class DeepgramProvider extends BaseTranscriber {
|
|
1013
|
+
/** WebSocket connection */
|
|
1014
|
+
private socket;
|
|
1015
|
+
/** Media stream from microphone */
|
|
1016
|
+
private mediaStream;
|
|
1017
|
+
/** Audio context for processing */
|
|
1018
|
+
private audioContext;
|
|
1019
|
+
/** Audio processor node */
|
|
1020
|
+
private processor;
|
|
1021
|
+
/** Connection attempt counter */
|
|
1022
|
+
private connectionAttempts;
|
|
1023
|
+
/** Maximum reconnection attempts */
|
|
1024
|
+
private readonly maxRetries;
|
|
1025
|
+
/** Reconnection timeout */
|
|
1026
|
+
private reconnectTimeout;
|
|
1027
|
+
/** Keep-alive interval */
|
|
1028
|
+
private keepAliveInterval;
|
|
1029
|
+
/** Flag indicating if connection is ready */
|
|
1030
|
+
private isConnectionReady;
|
|
1031
|
+
/** Flag for intentional close */
|
|
1032
|
+
private isIntentionalClose;
|
|
1033
|
+
/** Provider capabilities */
|
|
1034
|
+
static readonly capabilities: ProviderCapabilities;
|
|
1035
|
+
/**
|
|
1036
|
+
* Create a new DeepgramProvider
|
|
1037
|
+
* @param config - Transcription configuration with API key
|
|
1038
|
+
*/
|
|
1039
|
+
constructor(config: Omit<TranscriptionConfig, 'provider'> & {
|
|
1040
|
+
provider?: TranscriptionProvider;
|
|
1041
|
+
});
|
|
1042
|
+
/**
|
|
1043
|
+
* Check if Deepgram provider is supported
|
|
1044
|
+
*/
|
|
1045
|
+
isSupported(): boolean;
|
|
1046
|
+
/**
|
|
1047
|
+
* Initialize the Deepgram provider
|
|
1048
|
+
*/
|
|
1049
|
+
initialize(): Promise<void>;
|
|
1050
|
+
/**
|
|
1051
|
+
* Start transcription
|
|
1052
|
+
*/
|
|
1053
|
+
start(): Promise<void>;
|
|
1054
|
+
/**
|
|
1055
|
+
* Stop transcription
|
|
1056
|
+
*/
|
|
1057
|
+
stop(): Promise<void>;
|
|
1058
|
+
/**
|
|
1059
|
+
* Pause transcription
|
|
1060
|
+
*/
|
|
1061
|
+
pause(): void;
|
|
1062
|
+
/**
|
|
1063
|
+
* Resume transcription
|
|
1064
|
+
*/
|
|
1065
|
+
resume(): void;
|
|
1066
|
+
/**
|
|
1067
|
+
* Send audio data through WebSocket
|
|
1068
|
+
* @param audioData - Raw audio data as ArrayBuffer
|
|
1069
|
+
*/
|
|
1070
|
+
sendAudio(audioData: ArrayBuffer): void;
|
|
1071
|
+
/**
|
|
1072
|
+
* Clean up all resources
|
|
1073
|
+
*/
|
|
1074
|
+
cleanup(): Promise<void>;
|
|
1075
|
+
/**
|
|
1076
|
+
* Get provider capabilities
|
|
1077
|
+
*/
|
|
1078
|
+
getCapabilities(): ProviderCapabilities;
|
|
1079
|
+
/**
|
|
1080
|
+
* Build WebSocket URL with query parameters
|
|
1081
|
+
*/
|
|
1082
|
+
private buildWebSocketUrl;
|
|
1083
|
+
/**
|
|
1084
|
+
* Set up WebSocket connection
|
|
1085
|
+
*/
|
|
1086
|
+
private setupWebSocket;
|
|
1087
|
+
/**
|
|
1088
|
+
* Handle WebSocket open event
|
|
1089
|
+
*/
|
|
1090
|
+
private handleWebSocketOpen;
|
|
1091
|
+
/**
|
|
1092
|
+
* Handle incoming WebSocket messages
|
|
1093
|
+
*/
|
|
1094
|
+
private handleWebSocketMessage;
|
|
1095
|
+
/**
|
|
1096
|
+
* Process transcription result from Deepgram
|
|
1097
|
+
*/
|
|
1098
|
+
private processTranscriptionResult;
|
|
1099
|
+
/**
|
|
1100
|
+
* Handle Deepgram-specific errors
|
|
1101
|
+
*/
|
|
1102
|
+
private handleDeepgramError;
|
|
1103
|
+
/**
|
|
1104
|
+
* Handle WebSocket error
|
|
1105
|
+
*/
|
|
1106
|
+
private handleWebSocketError;
|
|
1107
|
+
/**
|
|
1108
|
+
* Handle WebSocket close
|
|
1109
|
+
*/
|
|
1110
|
+
private handleWebSocketClose;
|
|
1111
|
+
/**
|
|
1112
|
+
* Attempt to reconnect
|
|
1113
|
+
*/
|
|
1114
|
+
private reconnect;
|
|
1115
|
+
/**
|
|
1116
|
+
* Start keep-alive interval
|
|
1117
|
+
*/
|
|
1118
|
+
private startKeepAlive;
|
|
1119
|
+
/**
|
|
1120
|
+
* Stop keep-alive interval
|
|
1121
|
+
*/
|
|
1122
|
+
private stopKeepAlive;
|
|
1123
|
+
/**
|
|
1124
|
+
* Request microphone access
|
|
1125
|
+
*/
|
|
1126
|
+
private getMicrophoneAccess;
|
|
1127
|
+
/**
|
|
1128
|
+
* Set up audio processing pipeline
|
|
1129
|
+
*/
|
|
1130
|
+
private setupAudioProcessing;
|
|
1131
|
+
/**
|
|
1132
|
+
* Stop audio processing
|
|
1133
|
+
*/
|
|
1134
|
+
private stopAudioProcessing;
|
|
1135
|
+
/**
|
|
1136
|
+
* Convert Float32 audio samples to Int16
|
|
1137
|
+
*/
|
|
1138
|
+
private convertFloat32ToInt16;
|
|
1139
|
+
/**
|
|
1140
|
+
* Close WebSocket connection
|
|
1141
|
+
*/
|
|
1142
|
+
private closeWebSocket;
|
|
1143
|
+
/**
|
|
1144
|
+
* Stop media stream tracks
|
|
1145
|
+
*/
|
|
1146
|
+
private stopMediaStream;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
/**
|
|
1150
|
+
* AssemblyAI-specific configuration options
|
|
1151
|
+
*/
|
|
1152
|
+
interface AssemblyAIOptions {
|
|
1153
|
+
/** Keywords to boost recognition */
|
|
1154
|
+
wordBoost?: string[];
|
|
1155
|
+
/** Boost parameter strength */
|
|
1156
|
+
boostParam?: 'low' | 'default' | 'high';
|
|
1157
|
+
/** Disable partial (interim) transcripts */
|
|
1158
|
+
disablePartialTranscripts?: boolean;
|
|
1159
|
+
/** Auto-format text */
|
|
1160
|
+
formatText?: boolean;
|
|
1161
|
+
/** Enable punctuation */
|
|
1162
|
+
punctuate?: boolean;
|
|
1163
|
+
/** Include disfluencies (um, uh) */
|
|
1164
|
+
disfluencies?: boolean;
|
|
1165
|
+
/** Enable multichannel processing */
|
|
1166
|
+
multichannel?: boolean;
|
|
1167
|
+
/** Enable dual channel processing */
|
|
1168
|
+
dualChannel?: boolean;
|
|
1169
|
+
/** Enable speaker labels */
|
|
1170
|
+
speakerLabels?: boolean;
|
|
1171
|
+
/** Expected number of speakers */
|
|
1172
|
+
speakersExpected?: number;
|
|
1173
|
+
/** Enable entity detection */
|
|
1174
|
+
entityDetection?: boolean;
|
|
1175
|
+
/** Enable sentiment analysis */
|
|
1176
|
+
sentimentAnalysis?: boolean;
|
|
1177
|
+
/** Enable auto highlights */
|
|
1178
|
+
autoHighlights?: boolean;
|
|
1179
|
+
/** Enable content safety detection */
|
|
1180
|
+
contentSafety?: boolean;
|
|
1181
|
+
}
|
|
1182
|
+
/**
|
|
1183
|
+
* Transcription provider using AssemblyAI's real-time WebSocket API
|
|
1184
|
+
* Provides high-accuracy transcription with advanced features
|
|
1185
|
+
*/
|
|
1186
|
+
declare class AssemblyAIProvider extends BaseTranscriber {
|
|
1187
|
+
/** WebSocket connection */
|
|
1188
|
+
private socket;
|
|
1189
|
+
/** Media stream from microphone */
|
|
1190
|
+
private mediaStream;
|
|
1191
|
+
/** Audio context for processing */
|
|
1192
|
+
private audioContext;
|
|
1193
|
+
/** Audio processor node */
|
|
1194
|
+
private processor;
|
|
1195
|
+
/** Session token for WebSocket authentication */
|
|
1196
|
+
private sessionToken;
|
|
1197
|
+
/** Connection attempt counter */
|
|
1198
|
+
private connectionAttempts;
|
|
1199
|
+
/** Maximum reconnection attempts */
|
|
1200
|
+
private readonly maxRetries;
|
|
1201
|
+
/** Reconnection timeout */
|
|
1202
|
+
private reconnectTimeout;
|
|
1203
|
+
/** Flag indicating if connection is ready */
|
|
1204
|
+
private isConnectionReady;
|
|
1205
|
+
/** Flag for intentional close */
|
|
1206
|
+
private isIntentionalClose;
|
|
1207
|
+
/** Session ID from AssemblyAI */
|
|
1208
|
+
private sessionId;
|
|
1209
|
+
/** Provider capabilities */
|
|
1210
|
+
static readonly capabilities: ProviderCapabilities;
|
|
1211
|
+
/**
|
|
1212
|
+
* Create a new AssemblyAIProvider
|
|
1213
|
+
* @param config - Transcription configuration with API key
|
|
1214
|
+
*/
|
|
1215
|
+
constructor(config: Omit<TranscriptionConfig, 'provider'> & {
|
|
1216
|
+
provider?: TranscriptionProvider;
|
|
1217
|
+
});
|
|
1218
|
+
/**
|
|
1219
|
+
* Check if AssemblyAI provider is supported
|
|
1220
|
+
*/
|
|
1221
|
+
isSupported(): boolean;
|
|
1222
|
+
/**
|
|
1223
|
+
* Initialize the AssemblyAI provider
|
|
1224
|
+
*/
|
|
1225
|
+
initialize(): Promise<void>;
|
|
1226
|
+
/**
|
|
1227
|
+
* Start transcription
|
|
1228
|
+
*/
|
|
1229
|
+
start(): Promise<void>;
|
|
1230
|
+
/**
|
|
1231
|
+
* Stop transcription
|
|
1232
|
+
*/
|
|
1233
|
+
stop(): Promise<void>;
|
|
1234
|
+
/**
|
|
1235
|
+
* Pause transcription
|
|
1236
|
+
*/
|
|
1237
|
+
pause(): void;
|
|
1238
|
+
/**
|
|
1239
|
+
* Resume transcription
|
|
1240
|
+
*/
|
|
1241
|
+
resume(): void;
|
|
1242
|
+
/**
|
|
1243
|
+
* Send audio data through WebSocket
|
|
1244
|
+
* @param audioData - Raw audio data as ArrayBuffer
|
|
1245
|
+
*/
|
|
1246
|
+
sendAudio(audioData: ArrayBuffer): void;
|
|
1247
|
+
/**
|
|
1248
|
+
* Clean up all resources
|
|
1249
|
+
*/
|
|
1250
|
+
cleanup(): Promise<void>;
|
|
1251
|
+
/**
|
|
1252
|
+
* Get provider capabilities
|
|
1253
|
+
*/
|
|
1254
|
+
getCapabilities(): ProviderCapabilities;
|
|
1255
|
+
/**
|
|
1256
|
+
* Get temporary session token from AssemblyAI
|
|
1257
|
+
*/
|
|
1258
|
+
private getSessionToken;
|
|
1259
|
+
/**
|
|
1260
|
+
* Set up WebSocket connection
|
|
1261
|
+
*/
|
|
1262
|
+
private setupWebSocket;
|
|
1263
|
+
/**
|
|
1264
|
+
* Handle WebSocket open event
|
|
1265
|
+
*/
|
|
1266
|
+
private handleWebSocketOpen;
|
|
1267
|
+
/**
|
|
1268
|
+
* Handle incoming WebSocket messages
|
|
1269
|
+
*/
|
|
1270
|
+
private handleWebSocketMessage;
|
|
1271
|
+
/**
|
|
1272
|
+
* Handle SessionBegins message
|
|
1273
|
+
*/
|
|
1274
|
+
private handleSessionBegins;
|
|
1275
|
+
/**
|
|
1276
|
+
* Handle partial (interim) transcript
|
|
1277
|
+
*/
|
|
1278
|
+
private handlePartialTranscript;
|
|
1279
|
+
/**
|
|
1280
|
+
* Handle final transcript
|
|
1281
|
+
*/
|
|
1282
|
+
private handleFinalTranscript;
|
|
1283
|
+
/**
|
|
1284
|
+
* Handle session terminated message
|
|
1285
|
+
*/
|
|
1286
|
+
private handleSessionTerminated;
|
|
1287
|
+
/**
|
|
1288
|
+
* Handle AssemblyAI-specific errors
|
|
1289
|
+
*/
|
|
1290
|
+
private handleAssemblyAIError;
|
|
1291
|
+
/**
|
|
1292
|
+
* Handle WebSocket error
|
|
1293
|
+
*/
|
|
1294
|
+
private handleWebSocketError;
|
|
1295
|
+
/**
|
|
1296
|
+
* Handle WebSocket close
|
|
1297
|
+
*/
|
|
1298
|
+
private handleWebSocketClose;
|
|
1299
|
+
/**
|
|
1300
|
+
* Attempt to reconnect
|
|
1301
|
+
*/
|
|
1302
|
+
private reconnect;
|
|
1303
|
+
/**
|
|
1304
|
+
* Request microphone access
|
|
1305
|
+
*/
|
|
1306
|
+
private getMicrophoneAccess;
|
|
1307
|
+
/**
|
|
1308
|
+
* Set up audio processing pipeline
|
|
1309
|
+
*/
|
|
1310
|
+
private setupAudioProcessing;
|
|
1311
|
+
/**
|
|
1312
|
+
* Stop audio processing
|
|
1313
|
+
*/
|
|
1314
|
+
private stopAudioProcessing;
|
|
1315
|
+
/**
|
|
1316
|
+
* Convert Float32 audio samples to PCM16 (Int16)
|
|
1317
|
+
*/
|
|
1318
|
+
private convertFloat32ToPCM16;
|
|
1319
|
+
/**
|
|
1320
|
+
* Encode ArrayBuffer to base64
|
|
1321
|
+
*/
|
|
1322
|
+
private encodeAudioToBase64;
|
|
1323
|
+
/**
|
|
1324
|
+
* Send audio data message
|
|
1325
|
+
*/
|
|
1326
|
+
private sendAudioMessage;
|
|
1327
|
+
/**
|
|
1328
|
+
* Send terminate session message
|
|
1329
|
+
*/
|
|
1330
|
+
private sendTerminateMessage;
|
|
1331
|
+
/**
|
|
1332
|
+
* Close WebSocket connection
|
|
1333
|
+
*/
|
|
1334
|
+
private closeWebSocket;
|
|
1335
|
+
/**
|
|
1336
|
+
* Stop media stream tracks
|
|
1337
|
+
*/
|
|
1338
|
+
private stopMediaStream;
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
/**
|
|
1342
|
+
* Static utility class for audio processing operations
|
|
1343
|
+
* Provides conversion, resampling, and format utilities
|
|
1344
|
+
*/
|
|
1345
|
+
declare class AudioProcessor {
|
|
1346
|
+
/**
|
|
1347
|
+
* Convert Float32 audio samples to Int16
|
|
1348
|
+
* @param buffer - Input Float32Array
|
|
1349
|
+
* @returns Int16Array of converted samples
|
|
1350
|
+
*/
|
|
1351
|
+
static convertFloat32ToInt16(buffer: Float32Array): Int16Array;
|
|
1352
|
+
/**
|
|
1353
|
+
* Convert Int16 audio samples to Float32
|
|
1354
|
+
* @param buffer - Input Int16Array
|
|
1355
|
+
* @returns Float32Array of converted samples
|
|
1356
|
+
*/
|
|
1357
|
+
static convertInt16ToFloat32(buffer: Int16Array): Float32Array;
|
|
1358
|
+
/**
|
|
1359
|
+
* Resample audio buffer to different sample rate
|
|
1360
|
+
* Uses linear interpolation
|
|
1361
|
+
* @param buffer - Input audio buffer
|
|
1362
|
+
* @param fromRate - Source sample rate
|
|
1363
|
+
* @param toRate - Target sample rate
|
|
1364
|
+
* @returns Resampled Float32Array
|
|
1365
|
+
*/
|
|
1366
|
+
static resampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
|
|
1367
|
+
/**
|
|
1368
|
+
* Downsample audio buffer (optimized for reducing sample rate)
|
|
1369
|
+
* @param buffer - Input audio buffer
|
|
1370
|
+
* @param fromRate - Source sample rate
|
|
1371
|
+
* @param toRate - Target sample rate
|
|
1372
|
+
* @returns Downsampled Float32Array
|
|
1373
|
+
*/
|
|
1374
|
+
static downsampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
|
|
1375
|
+
/**
|
|
1376
|
+
* Upsample audio buffer (optimized for increasing sample rate)
|
|
1377
|
+
* @param buffer - Input audio buffer
|
|
1378
|
+
* @param fromRate - Source sample rate
|
|
1379
|
+
* @param toRate - Target sample rate
|
|
1380
|
+
* @returns Upsampled Float32Array
|
|
1381
|
+
*/
|
|
1382
|
+
static upsampleBuffer(buffer: Float32Array, fromRate: number, toRate: number): Float32Array;
|
|
1383
|
+
/**
|
|
1384
|
+
* Normalize audio buffer to peak amplitude of 1.0
|
|
1385
|
+
* @param buffer - Input audio buffer
|
|
1386
|
+
* @returns Normalized Float32Array
|
|
1387
|
+
*/
|
|
1388
|
+
static normalizeBuffer(buffer: Float32Array): Float32Array;
|
|
1389
|
+
/**
|
|
1390
|
+
* Apply gain to audio buffer
|
|
1391
|
+
* @param buffer - Input audio buffer
|
|
1392
|
+
* @param gain - Gain multiplier
|
|
1393
|
+
* @returns Processed Float32Array
|
|
1394
|
+
*/
|
|
1395
|
+
static applyGain(buffer: Float32Array, gain: number): Float32Array;
|
|
1396
|
+
/**
|
|
1397
|
+
* Mix two audio buffers together
|
|
1398
|
+
* @param buffer1 - First audio buffer
|
|
1399
|
+
* @param buffer2 - Second audio buffer
|
|
1400
|
+
* @param ratio - Mix ratio (0-1, where 0.5 is equal mix)
|
|
1401
|
+
* @returns Mixed Float32Array
|
|
1402
|
+
*/
|
|
1403
|
+
static mixBuffers(buffer1: Float32Array, buffer2: Float32Array, ratio?: number): Float32Array;
|
|
1404
|
+
/**
|
|
1405
|
+
* Convert AudioBuffer to WAV format
|
|
1406
|
+
* @param audioBuffer - Web Audio API AudioBuffer
|
|
1407
|
+
* @param sampleRate - Output sample rate (defaults to buffer's sample rate)
|
|
1408
|
+
* @returns WAV file as ArrayBuffer
|
|
1409
|
+
*/
|
|
1410
|
+
static bufferToWav(audioBuffer: AudioBuffer, sampleRate?: number): ArrayBuffer;
|
|
1411
|
+
/**
|
|
1412
|
+
* Create WAV file header
|
|
1413
|
+
* @param dataLength - Length of audio data in bytes
|
|
1414
|
+
* @param sampleRate - Sample rate
|
|
1415
|
+
* @param channels - Number of channels
|
|
1416
|
+
* @param bitDepth - Bits per sample
|
|
1417
|
+
* @returns WAV header as ArrayBuffer
|
|
1418
|
+
*/
|
|
1419
|
+
static createWavHeader(dataLength: number, sampleRate: number, channels: number, bitDepth: number): ArrayBuffer;
|
|
1420
|
+
/**
|
|
1421
|
+
* Write string to DataView
|
|
1422
|
+
* @param view - DataView to write to
|
|
1423
|
+
* @param offset - Byte offset
|
|
1424
|
+
* @param string - String to write
|
|
1425
|
+
*/
|
|
1426
|
+
private static writeString;
|
|
1427
|
+
/**
|
|
1428
|
+
* Convert raw PCM Float32 array to WAV ArrayBuffer
|
|
1429
|
+
* @param samples - Float32Array of audio samples
|
|
1430
|
+
* @param sampleRate - Sample rate
|
|
1431
|
+
* @returns WAV file as ArrayBuffer
|
|
1432
|
+
*/
|
|
1433
|
+
static float32ToWav(samples: Float32Array, sampleRate: number): ArrayBuffer;
|
|
1434
|
+
}
|
|
1435
|
+
|
|
1436
|
+
/**
|
|
1437
|
+
* Voice Activity Detection options
|
|
1438
|
+
*/
|
|
1439
|
+
interface VADOptions {
|
|
1440
|
+
/** Energy threshold for speech detection (0-1, default: 0.01) */
|
|
1441
|
+
threshold?: number;
|
|
1442
|
+
/** Minimum duration of speech in ms to trigger start (default: 300) */
|
|
1443
|
+
minSpeechDuration?: number;
|
|
1444
|
+
/** Minimum duration of silence in ms to trigger end (default: 500) */
|
|
1445
|
+
minSilenceDuration?: number;
|
|
1446
|
+
/** Size of energy history buffer (default: 10) */
|
|
1447
|
+
historySize?: number;
|
|
1448
|
+
/** Callback when speech starts */
|
|
1449
|
+
onSpeechStart?: () => void;
|
|
1450
|
+
/** Callback when speech ends */
|
|
1451
|
+
onSpeechEnd?: () => void;
|
|
1452
|
+
/** Callback for volume level changes */
|
|
1453
|
+
onVolumeChange?: (level: number) => void;
|
|
1454
|
+
}
|
|
1455
|
+
/**
|
|
1456
|
+
* Voice Activity Detector using energy-based detection
|
|
1457
|
+
* Detects speech and silence in audio streams
|
|
1458
|
+
*/
|
|
1459
|
+
declare class VoiceActivityDetector {
|
|
1460
|
+
/** Energy threshold for speech detection */
|
|
1461
|
+
private threshold;
|
|
1462
|
+
/** Minimum speech duration in ms */
|
|
1463
|
+
private minSpeechDuration;
|
|
1464
|
+
/** Minimum silence duration in ms */
|
|
1465
|
+
private minSilenceDuration;
|
|
1466
|
+
/** Energy history buffer */
|
|
1467
|
+
private energyHistory;
|
|
1468
|
+
/** History buffer size */
|
|
1469
|
+
private historySize;
|
|
1470
|
+
/** Current speaking state */
|
|
1471
|
+
private isSpeaking;
|
|
1472
|
+
/** Speech start time */
|
|
1473
|
+
private speechStartTime;
|
|
1474
|
+
/** Silence start time */
|
|
1475
|
+
private silenceStartTime;
|
|
1476
|
+
/** Speech start callback */
|
|
1477
|
+
private onSpeechStart?;
|
|
1478
|
+
/** Speech end callback */
|
|
1479
|
+
private onSpeechEnd?;
|
|
1480
|
+
/** Volume change callback */
|
|
1481
|
+
private onVolumeChange?;
|
|
1482
|
+
/** Last processed timestamp */
|
|
1483
|
+
private lastProcessTime;
|
|
1484
|
+
/**
|
|
1485
|
+
* Create a new VoiceActivityDetector
|
|
1486
|
+
* @param options - VAD configuration options
|
|
1487
|
+
*/
|
|
1488
|
+
constructor(options?: VADOptions);
|
|
1489
|
+
/**
|
|
1490
|
+
* Process audio data and detect voice activity
|
|
1491
|
+
* @param audioData - Audio samples as Float32Array
|
|
1492
|
+
* @returns Current speaking state
|
|
1493
|
+
*/
|
|
1494
|
+
processAudio(audioData: Float32Array): boolean;
|
|
1495
|
+
/**
|
|
1496
|
+
* Calculate RMS (Root Mean Square) energy of audio buffer
|
|
1497
|
+
* @param buffer - Audio samples
|
|
1498
|
+
* @returns RMS energy value (0-1)
|
|
1499
|
+
*/
|
|
1500
|
+
calculateRMSEnergy(buffer: Float32Array): number;
|
|
1501
|
+
/**
|
|
1502
|
+
* Calculate adaptive threshold based on energy history
|
|
1503
|
+
* @returns Adaptive threshold value
|
|
1504
|
+
*/
|
|
1505
|
+
calculateAdaptiveThreshold(): number;
|
|
1506
|
+
/**
|
|
1507
|
+
* Reset detector state
|
|
1508
|
+
*/
|
|
1509
|
+
reset(): void;
|
|
1510
|
+
/**
|
|
1511
|
+
* Update threshold value
|
|
1512
|
+
* @param threshold - New threshold (0-1)
|
|
1513
|
+
*/
|
|
1514
|
+
setThreshold(threshold: number): void;
|
|
1515
|
+
/**
|
|
1516
|
+
* Get average energy from history
|
|
1517
|
+
* @returns Average energy value
|
|
1518
|
+
*/
|
|
1519
|
+
getAverageEnergy(): number;
|
|
1520
|
+
/**
|
|
1521
|
+
* Check if speech is currently detected
|
|
1522
|
+
* @returns Speaking state
|
|
1523
|
+
*/
|
|
1524
|
+
isSpeechDetected(): boolean;
|
|
1525
|
+
/**
|
|
1526
|
+
* Get current threshold
|
|
1527
|
+
* @returns Threshold value
|
|
1528
|
+
*/
|
|
1529
|
+
getThreshold(): number;
|
|
1530
|
+
/**
|
|
1531
|
+
* Update callbacks
|
|
1532
|
+
* @param callbacks - New callback functions
|
|
1533
|
+
*/
|
|
1534
|
+
setCallbacks(callbacks: {
|
|
1535
|
+
onSpeechStart?: () => void;
|
|
1536
|
+
onSpeechEnd?: () => void;
|
|
1537
|
+
onVolumeChange?: (level: number) => void;
|
|
1538
|
+
}): void;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
/**
|
|
1542
|
+
* Audio level monitor options
|
|
1543
|
+
*/
|
|
1544
|
+
interface AudioLevelMonitorOptions {
|
|
1545
|
+
/** Smoothing factor for level calculation (0-1, default: 0.8) */
|
|
1546
|
+
smoothingFactor?: number;
|
|
1547
|
+
/** Callback for level changes */
|
|
1548
|
+
onLevelChange?: (level: number) => void;
|
|
1549
|
+
}
|
|
1550
|
+
/**
|
|
1551
|
+
* Monitors and reports audio levels with smoothing
|
|
1552
|
+
*/
|
|
1553
|
+
declare class AudioLevelMonitor {
|
|
1554
|
+
/** Smoothing factor (0-1, higher = smoother) */
|
|
1555
|
+
private smoothingFactor;
|
|
1556
|
+
/** Current smoothed level */
|
|
1557
|
+
private currentLevel;
|
|
1558
|
+
/** Peak level since last reset */
|
|
1559
|
+
private peakLevel;
|
|
1560
|
+
/** Level change callback */
|
|
1561
|
+
private onLevelChange?;
|
|
1562
|
+
/**
|
|
1563
|
+
* Create a new AudioLevelMonitor
|
|
1564
|
+
* @param options - Monitor configuration
|
|
1565
|
+
*/
|
|
1566
|
+
constructor(options?: AudioLevelMonitorOptions);
|
|
1567
|
+
/**
|
|
1568
|
+
* Process audio data and update levels
|
|
1569
|
+
* @param audioData - Audio samples as Float32Array
|
|
1570
|
+
* @returns Current smoothed level
|
|
1571
|
+
*/
|
|
1572
|
+
processAudio(audioData: Float32Array): number;
|
|
1573
|
+
/**
|
|
1574
|
+
* Calculate RMS level of audio buffer
|
|
1575
|
+
* @param buffer - Audio samples
|
|
1576
|
+
* @returns Level value (0-1)
|
|
1577
|
+
*/
|
|
1578
|
+
calculateLevel(buffer: Float32Array): number;
|
|
1579
|
+
/**
|
|
1580
|
+
* Get current smoothed level
|
|
1581
|
+
* @returns Current level (0-1)
|
|
1582
|
+
*/
|
|
1583
|
+
getCurrentLevel(): number;
|
|
1584
|
+
/**
|
|
1585
|
+
* Get peak level since last reset
|
|
1586
|
+
* @returns Peak level (0-1)
|
|
1587
|
+
*/
|
|
1588
|
+
getPeakLevel(): number;
|
|
1589
|
+
/**
|
|
1590
|
+
* Reset current and peak levels
|
|
1591
|
+
*/
|
|
1592
|
+
reset(): void;
|
|
1593
|
+
/**
|
|
1594
|
+
* Reset only the peak level
|
|
1595
|
+
*/
|
|
1596
|
+
resetPeak(): void;
|
|
1597
|
+
/**
|
|
1598
|
+
* Convert current level to decibels
|
|
1599
|
+
* @returns Level in dB (typically -60 to 0)
|
|
1600
|
+
*/
|
|
1601
|
+
getDecibels(): number;
|
|
1602
|
+
/**
|
|
1603
|
+
* Convert specific level to decibels
|
|
1604
|
+
* @param level - Level value (0-1)
|
|
1605
|
+
* @returns Level in dB
|
|
1606
|
+
*/
|
|
1607
|
+
static toDecibels(level: number): number;
|
|
1608
|
+
/**
|
|
1609
|
+
* Convert decibels to linear level
|
|
1610
|
+
* @param db - Level in decibels
|
|
1611
|
+
* @returns Linear level (0-1)
|
|
1612
|
+
*/
|
|
1613
|
+
static fromDecibels(db: number): number;
|
|
1614
|
+
/**
|
|
1615
|
+
* Set smoothing factor
|
|
1616
|
+
* @param factor - Smoothing factor (0-1)
|
|
1617
|
+
*/
|
|
1618
|
+
setSmoothingFactor(factor: number): void;
|
|
1619
|
+
/**
|
|
1620
|
+
* Get current smoothing factor
|
|
1621
|
+
* @returns Smoothing factor
|
|
1622
|
+
*/
|
|
1623
|
+
getSmoothingFactor(): number;
|
|
1624
|
+
/**
|
|
1625
|
+
* Set level change callback
|
|
1626
|
+
* @param callback - Callback function
|
|
1627
|
+
*/
|
|
1628
|
+
setOnLevelChange(callback: ((level: number) => void) | undefined): void;
|
|
1629
|
+
/**
|
|
1630
|
+
* Get level as percentage (0-100)
|
|
1631
|
+
* @returns Level percentage
|
|
1632
|
+
*/
|
|
1633
|
+
getLevelPercentage(): number;
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1636
|
+
/**
|
|
1637
|
+
* Circular buffer manager for audio chunks
|
|
1638
|
+
* Efficiently manages audio data for streaming applications
|
|
1639
|
+
*/
|
|
1640
|
+
declare class AudioBufferManager {
|
|
1641
|
+
/** Maximum number of chunks to store */
|
|
1642
|
+
private bufferSize;
|
|
1643
|
+
/** Buffer storage */
|
|
1644
|
+
private buffer;
|
|
1645
|
+
/** Write position */
|
|
1646
|
+
private writeIndex;
|
|
1647
|
+
/** Read position */
|
|
1648
|
+
private readIndex;
|
|
1649
|
+
/** Number of available chunks */
|
|
1650
|
+
private count;
|
|
1651
|
+
/**
|
|
1652
|
+
* Create a new AudioBufferManager
|
|
1653
|
+
* @param bufferSize - Maximum number of chunks to store
|
|
1654
|
+
*/
|
|
1655
|
+
constructor(bufferSize?: number);
|
|
1656
|
+
/**
|
|
1657
|
+
* Write a chunk to the buffer
|
|
1658
|
+
* @param chunk - Audio data chunk
|
|
1659
|
+
*/
|
|
1660
|
+
write(chunk: Float32Array): void;
|
|
1661
|
+
/**
|
|
1662
|
+
* Read and remove chunks from the buffer
|
|
1663
|
+
* @param numChunks - Number of chunks to read (default: all available)
|
|
1664
|
+
* @returns Array of audio chunks
|
|
1665
|
+
*/
|
|
1666
|
+
read(numChunks?: number): Float32Array[];
|
|
1667
|
+
/**
|
|
1668
|
+
* Read chunks without removing them
|
|
1669
|
+
* @param numChunks - Number of chunks to peek (default: all available)
|
|
1670
|
+
* @returns Array of audio chunks
|
|
1671
|
+
*/
|
|
1672
|
+
peek(numChunks?: number): Float32Array[];
|
|
1673
|
+
/**
|
|
1674
|
+
* Clear all data from buffer
|
|
1675
|
+
*/
|
|
1676
|
+
clear(): void;
|
|
1677
|
+
/**
|
|
1678
|
+
* Get number of available chunks
|
|
1679
|
+
* @returns Number of chunks in buffer
|
|
1680
|
+
*/
|
|
1681
|
+
getAvailableChunks(): number;
|
|
1682
|
+
/**
|
|
1683
|
+
* Check if buffer is full
|
|
1684
|
+
* @returns True if buffer is full
|
|
1685
|
+
*/
|
|
1686
|
+
isFull(): boolean;
|
|
1687
|
+
/**
|
|
1688
|
+
* Check if buffer is empty
|
|
1689
|
+
* @returns True if buffer is empty
|
|
1690
|
+
*/
|
|
1691
|
+
isEmpty(): boolean;
|
|
1692
|
+
/**
|
|
1693
|
+
* Concatenate multiple chunks into a single buffer
|
|
1694
|
+
* @param chunks - Array of audio chunks
|
|
1695
|
+
* @returns Single concatenated Float32Array
|
|
1696
|
+
*/
|
|
1697
|
+
concatenateChunks(chunks: Float32Array[]): Float32Array;
|
|
1698
|
+
/**
|
|
1699
|
+
* Get all data as a single concatenated buffer
|
|
1700
|
+
* @returns Concatenated Float32Array
|
|
1701
|
+
*/
|
|
1702
|
+
getAll(): Float32Array;
|
|
1703
|
+
/**
|
|
1704
|
+
* Get total number of samples across all chunks
|
|
1705
|
+
* @returns Total sample count
|
|
1706
|
+
*/
|
|
1707
|
+
getTotalSamples(): number;
|
|
1708
|
+
/**
|
|
1709
|
+
* Get buffer capacity
|
|
1710
|
+
* @returns Maximum number of chunks
|
|
1711
|
+
*/
|
|
1712
|
+
getCapacity(): number;
|
|
1713
|
+
/**
|
|
1714
|
+
* Resize the buffer
|
|
1715
|
+
* @param newSize - New buffer size
|
|
1716
|
+
*/
|
|
1717
|
+
resize(newSize: number): void;
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
/**
|
|
1721
|
+
* Audio recording format
|
|
1722
|
+
*/
|
|
1723
|
+
type AudioFormat = 'raw' | 'wav';
|
|
1724
|
+
/**
|
|
1725
|
+
* Records audio chunks and exports to various formats
|
|
1726
|
+
*/
|
|
1727
|
+
declare class AudioRecorder {
|
|
1728
|
+
/** Recorded audio chunks */
|
|
1729
|
+
private audioChunks;
|
|
1730
|
+
/** Sample rate */
|
|
1731
|
+
private sampleRate;
|
|
1732
|
+
/** Recording state */
|
|
1733
|
+
private isRecording;
|
|
1734
|
+
/** Recording start time */
|
|
1735
|
+
private startTime;
|
|
1736
|
+
/**
|
|
1737
|
+
* Create a new AudioRecorder
|
|
1738
|
+
* @param sampleRate - Sample rate for recording
|
|
1739
|
+
*/
|
|
1740
|
+
constructor(sampleRate?: number);
|
|
1741
|
+
/**
|
|
1742
|
+
* Start recording
|
|
1743
|
+
*/
|
|
1744
|
+
start(): void;
|
|
1745
|
+
/**
|
|
1746
|
+
* Record an audio chunk
|
|
1747
|
+
* @param audioData - Audio data to record
|
|
1748
|
+
*/
|
|
1749
|
+
recordChunk(audioData: Float32Array): void;
|
|
1750
|
+
/**
|
|
1751
|
+
* Stop recording and return all recorded audio
|
|
1752
|
+
* @returns Complete audio as Float32Array
|
|
1753
|
+
*/
|
|
1754
|
+
stop(): Float32Array;
|
|
1755
|
+
/**
|
|
1756
|
+
* Clear all recorded audio
|
|
1757
|
+
*/
|
|
1758
|
+
clear(): void;
|
|
1759
|
+
/**
|
|
1760
|
+
* Export recording to specified format
|
|
1761
|
+
* @param format - Output format ('raw' or 'wav')
|
|
1762
|
+
* @returns Audio data as ArrayBuffer
|
|
1763
|
+
*/
|
|
1764
|
+
export(format?: AudioFormat): ArrayBuffer;
|
|
1765
|
+
/**
|
|
1766
|
+
* Get recording duration in seconds
|
|
1767
|
+
* @returns Duration in seconds
|
|
1768
|
+
*/
|
|
1769
|
+
getDuration(): number;
|
|
1770
|
+
/**
|
|
1771
|
+
* Get number of recorded chunks
|
|
1772
|
+
* @returns Chunk count
|
|
1773
|
+
*/
|
|
1774
|
+
getChunkCount(): number;
|
|
1775
|
+
/**
|
|
1776
|
+
* Check if currently recording
|
|
1777
|
+
* @returns Recording state
|
|
1778
|
+
*/
|
|
1779
|
+
getIsRecording(): boolean;
|
|
1780
|
+
/**
|
|
1781
|
+
* Get sample rate
|
|
1782
|
+
* @returns Sample rate
|
|
1783
|
+
*/
|
|
1784
|
+
getSampleRate(): number;
|
|
1785
|
+
/**
|
|
1786
|
+
* Set sample rate (only effective before recording starts)
|
|
1787
|
+
* @param sampleRate - New sample rate
|
|
1788
|
+
*/
|
|
1789
|
+
setSampleRate(sampleRate: number): void;
|
|
1790
|
+
/**
|
|
1791
|
+
* Get total number of recorded samples
|
|
1792
|
+
* @returns Sample count
|
|
1793
|
+
*/
|
|
1794
|
+
getTotalSamples(): number;
|
|
1795
|
+
/**
|
|
1796
|
+
* Get recording start time
|
|
1797
|
+
* @returns Start timestamp or null
|
|
1798
|
+
*/
|
|
1799
|
+
getStartTime(): number | null;
|
|
1800
|
+
/**
|
|
1801
|
+
* Get elapsed recording time in milliseconds
|
|
1802
|
+
* @returns Elapsed time in ms
|
|
1803
|
+
*/
|
|
1804
|
+
getElapsedTime(): number;
|
|
1805
|
+
/**
|
|
1806
|
+
* Get combined audio data
|
|
1807
|
+
* @returns Concatenated Float32Array
|
|
1808
|
+
*/
|
|
1809
|
+
private getCombinedAudio;
|
|
1810
|
+
/**
|
|
1811
|
+
* Create a Blob from the recording
|
|
1812
|
+
* @param format - Output format
|
|
1813
|
+
* @returns Blob with audio data
|
|
1814
|
+
*/
|
|
1815
|
+
toBlob(format?: AudioFormat): Blob;
|
|
1816
|
+
/**
|
|
1817
|
+
* Create a data URL from the recording
|
|
1818
|
+
* @param format - Output format
|
|
1819
|
+
* @returns Data URL string
|
|
1820
|
+
*/
|
|
1821
|
+
toDataURL(format?: AudioFormat): string;
|
|
1822
|
+
/**
|
|
1823
|
+
* Download the recording
|
|
1824
|
+
* @param filename - Output filename
|
|
1825
|
+
* @param format - Output format
|
|
1826
|
+
*/
|
|
1827
|
+
download(filename?: string, format?: AudioFormat): void;
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
/**
|
|
1831
|
+
* Interface for storage adapters
|
|
1832
|
+
* Provides abstraction for different storage backends
|
|
1833
|
+
*/
|
|
1834
|
+
interface StorageAdapter {
|
|
1835
|
+
/**
|
|
1836
|
+
* Save data to storage
|
|
1837
|
+
* @param key - Storage key
|
|
1838
|
+
* @param data - Data to store
|
|
1839
|
+
*/
|
|
1840
|
+
save(key: string, data: unknown): Promise<void>;
|
|
1841
|
+
/**
|
|
1842
|
+
* Load data from storage
|
|
1843
|
+
* @param key - Storage key
|
|
1844
|
+
* @returns Stored data or null if not found
|
|
1845
|
+
*/
|
|
1846
|
+
load(key: string): Promise<unknown | null>;
|
|
1847
|
+
/**
|
|
1848
|
+
* Delete data from storage
|
|
1849
|
+
* @param key - Storage key
|
|
1850
|
+
*/
|
|
1851
|
+
delete(key: string): Promise<void>;
|
|
1852
|
+
/**
|
|
1853
|
+
* List all keys in storage
|
|
1854
|
+
* @returns Array of keys
|
|
1855
|
+
*/
|
|
1856
|
+
list(): Promise<string[]>;
|
|
1857
|
+
/**
|
|
1858
|
+
* Check if key exists in storage
|
|
1859
|
+
* @param key - Storage key
|
|
1860
|
+
* @returns True if key exists
|
|
1861
|
+
*/
|
|
1862
|
+
exists(key: string): Promise<boolean>;
|
|
1863
|
+
}
|
|
1864
|
+
/**
|
|
1865
|
+
* Storage adapter using browser localStorage
|
|
1866
|
+
* Suitable for small data (<5MB)
|
|
1867
|
+
*/
|
|
1868
|
+
declare class LocalStorageAdapter implements StorageAdapter {
|
|
1869
|
+
/** Prefix for all keys */
|
|
1870
|
+
private prefix;
|
|
1871
|
+
/**
|
|
1872
|
+
* Create a new LocalStorageAdapter
|
|
1873
|
+
* @param prefix - Key prefix (default: 'live-transcribe')
|
|
1874
|
+
*/
|
|
1875
|
+
constructor(prefix?: string);
|
|
1876
|
+
/**
|
|
1877
|
+
* Get prefixed key
|
|
1878
|
+
*/
|
|
1879
|
+
private getKey;
|
|
1880
|
+
save(key: string, data: unknown): Promise<void>;
|
|
1881
|
+
load(key: string): Promise<unknown | null>;
|
|
1882
|
+
delete(key: string): Promise<void>;
|
|
1883
|
+
list(): Promise<string[]>;
|
|
1884
|
+
exists(key: string): Promise<boolean>;
|
|
1885
|
+
}
|
|
1886
|
+
/**
|
|
1887
|
+
* In-memory storage adapter
|
|
1888
|
+
* Useful for testing or temporary storage
|
|
1889
|
+
*/
|
|
1890
|
+
declare class MemoryStorageAdapter implements StorageAdapter {
|
|
1891
|
+
private storage;
|
|
1892
|
+
save(key: string, data: unknown): Promise<void>;
|
|
1893
|
+
load(key: string): Promise<unknown | null>;
|
|
1894
|
+
delete(key: string): Promise<void>;
|
|
1895
|
+
list(): Promise<string[]>;
|
|
1896
|
+
exists(key: string): Promise<boolean>;
|
|
1897
|
+
/**
|
|
1898
|
+
* Clear all data
|
|
1899
|
+
*/
|
|
1900
|
+
clear(): void;
|
|
1901
|
+
/**
|
|
1902
|
+
* Get storage size
|
|
1903
|
+
*/
|
|
1904
|
+
size(): number;
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
/**
|
|
1908
|
+
* Session data for JSON export
|
|
1909
|
+
*/
|
|
1910
|
+
interface SessionData$4 {
|
|
1911
|
+
metadata: SessionMetadata;
|
|
1912
|
+
transcripts: TranscriptionResult[];
|
|
1913
|
+
}
|
|
1914
|
+
/**
|
|
1915
|
+
* Exports transcription sessions to JSON format
|
|
1916
|
+
*/
|
|
1917
|
+
declare class JSONExporter {
|
|
1918
|
+
/**
|
|
1919
|
+
* Export session to minified JSON string
|
|
1920
|
+
* @param session - Session data to export
|
|
1921
|
+
* @returns JSON string
|
|
1922
|
+
*/
|
|
1923
|
+
static export(session: SessionData$4): string;
|
|
1924
|
+
/**
|
|
1925
|
+
* Export session to formatted/pretty JSON string
|
|
1926
|
+
* @param session - Session data to export
|
|
1927
|
+
* @param indent - Indentation spaces (default: 2)
|
|
1928
|
+
* @returns Formatted JSON string
|
|
1929
|
+
*/
|
|
1930
|
+
static exportPretty(session: SessionData$4, indent?: number): string;
|
|
1931
|
+
/**
|
|
1932
|
+
* Parse JSON and validate structure
|
|
1933
|
+
* @param json - JSON string to parse
|
|
1934
|
+
* @returns Parsed session export data
|
|
1935
|
+
*/
|
|
1936
|
+
static parse(json: string): SessionExportData;
|
|
1937
|
+
}
|
|
1938
|
+
|
|
1939
|
+
/**
|
|
1940
|
+
* Session data for text export
|
|
1941
|
+
*/
|
|
1942
|
+
interface SessionData$3 {
|
|
1943
|
+
transcripts: TranscriptionResult[];
|
|
1944
|
+
}
|
|
1945
|
+
/**
|
|
1946
|
+
* Exports transcription sessions to plain text format
|
|
1947
|
+
*/
|
|
1948
|
+
declare class TextExporter {
|
|
1949
|
+
/**
|
|
1950
|
+
* Export session transcripts to plain text
|
|
1951
|
+
* @param session - Session data to export
|
|
1952
|
+
* @param options - Export options
|
|
1953
|
+
* @returns Plain text string
|
|
1954
|
+
*/
|
|
1955
|
+
static export(session: SessionData$3, options?: TextExportOptions): string;
|
|
1956
|
+
/**
|
|
1957
|
+
* Export as continuous text without any formatting
|
|
1958
|
+
* @param session - Session data to export
|
|
1959
|
+
* @returns Plain text string
|
|
1960
|
+
*/
|
|
1961
|
+
static exportPlain(session: SessionData$3): string;
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1964
|
+
/**
|
|
1965
|
+
* Session data for SRT export
|
|
1966
|
+
*/
|
|
1967
|
+
interface SessionData$2 {
|
|
1968
|
+
transcripts: TranscriptionResult[];
|
|
1969
|
+
}
|
|
1970
|
+
/**
|
|
1971
|
+
* Exports transcription sessions to SRT (SubRip) subtitle format
|
|
1972
|
+
*/
|
|
1973
|
+
declare class SRTExporter {
|
|
1974
|
+
/**
|
|
1975
|
+
* Export session transcripts to SRT format
|
|
1976
|
+
* @param session - Session data to export
|
|
1977
|
+
* @returns SRT formatted string
|
|
1978
|
+
*/
|
|
1979
|
+
static export(session: SessionData$2): string;
|
|
1980
|
+
/**
|
|
1981
|
+
* Format milliseconds to SRT timestamp format (HH:MM:SS,mmm)
|
|
1982
|
+
* @param ms - Time in milliseconds
|
|
1983
|
+
* @returns Formatted timestamp
|
|
1984
|
+
*/
|
|
1985
|
+
private static formatTime;
|
|
1986
|
+
/**
|
|
1987
|
+
* Pad number with leading zeros
|
|
1988
|
+
* @param num - Number to pad
|
|
1989
|
+
* @param length - Target length
|
|
1990
|
+
* @returns Padded string
|
|
1991
|
+
*/
|
|
1992
|
+
private static pad;
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
/**
|
|
1996
|
+
* Session data for VTT export
|
|
1997
|
+
*/
|
|
1998
|
+
interface SessionData$1 {
|
|
1999
|
+
transcripts: TranscriptionResult[];
|
|
2000
|
+
}
|
|
2001
|
+
/**
|
|
2002
|
+
* Exports transcription sessions to WebVTT subtitle format
|
|
2003
|
+
*/
|
|
2004
|
+
declare class VTTExporter {
|
|
2005
|
+
/**
|
|
2006
|
+
* Export session transcripts to WebVTT format
|
|
2007
|
+
* @param session - Session data to export
|
|
2008
|
+
* @returns WebVTT formatted string
|
|
2009
|
+
*/
|
|
2010
|
+
static export(session: SessionData$1): string;
|
|
2011
|
+
/**
|
|
2012
|
+
* Export with cue identifiers
|
|
2013
|
+
* @param session - Session data to export
|
|
2014
|
+
* @param cuePrefix - Prefix for cue identifiers
|
|
2015
|
+
* @returns WebVTT formatted string with cue IDs
|
|
2016
|
+
*/
|
|
2017
|
+
static exportWithCues(session: SessionData$1, cuePrefix?: string): string;
|
|
2018
|
+
/**
|
|
2019
|
+
* Format milliseconds to WebVTT timestamp format (HH:MM:SS.mmm)
|
|
2020
|
+
* @param ms - Time in milliseconds
|
|
2021
|
+
* @returns Formatted timestamp
|
|
2022
|
+
*/
|
|
2023
|
+
private static formatTime;
|
|
2024
|
+
/**
|
|
2025
|
+
* Pad number with leading zeros
|
|
2026
|
+
*/
|
|
2027
|
+
private static pad;
|
|
2028
|
+
}
|
|
2029
|
+
|
|
2030
|
+
/**
|
|
2031
|
+
* Session data for CSV export
|
|
2032
|
+
*/
|
|
2033
|
+
interface SessionData {
|
|
2034
|
+
transcripts: TranscriptionResult[];
|
|
2035
|
+
}
|
|
2036
|
+
/**
|
|
2037
|
+
* Exports transcription sessions to CSV format
|
|
2038
|
+
*/
|
|
2039
|
+
declare class CSVExporter {
|
|
2040
|
+
/**
|
|
2041
|
+
* Export session transcripts to CSV format
|
|
2042
|
+
* @param session - Session data to export
|
|
2043
|
+
* @param options - Export options
|
|
2044
|
+
* @returns CSV formatted string
|
|
2045
|
+
*/
|
|
2046
|
+
static export(session: SessionData, options?: CSVExportOptions): string;
|
|
2047
|
+
/**
|
|
2048
|
+
* Export only final transcripts
|
|
2049
|
+
* @param session - Session data to export
|
|
2050
|
+
* @param options - Export options
|
|
2051
|
+
* @returns CSV formatted string
|
|
2052
|
+
*/
|
|
2053
|
+
static exportFinalOnly(session: SessionData, options?: CSVExportOptions): string;
|
|
2054
|
+
/**
|
|
2055
|
+
* Get field value from transcript
|
|
2056
|
+
* @param transcript - Transcription result
|
|
2057
|
+
* @param field - Field name
|
|
2058
|
+
* @returns Field value
|
|
2059
|
+
*/
|
|
2060
|
+
private static getFieldValue;
|
|
2061
|
+
/**
|
|
2062
|
+
* Escape field for CSV format
|
|
2063
|
+
* @param field - Field value
|
|
2064
|
+
* @param delimiter - CSV delimiter
|
|
2065
|
+
* @returns Escaped field
|
|
2066
|
+
*/
|
|
2067
|
+
private static escapeField;
|
|
2068
|
+
}
|
|
2069
|
+
|
|
2070
|
+
/**
|
|
2071
|
+
* Validation error details
|
|
2072
|
+
*/
|
|
2073
|
+
interface ValidationError {
|
|
2074
|
+
/** Field that failed validation */
|
|
2075
|
+
field: string;
|
|
2076
|
+
/** Error message */
|
|
2077
|
+
message: string;
|
|
2078
|
+
/** Error code */
|
|
2079
|
+
code: string;
|
|
2080
|
+
}
|
|
2081
|
+
/**
|
|
2082
|
+
* Validation warning details
|
|
2083
|
+
*/
|
|
2084
|
+
interface ValidationWarning {
|
|
2085
|
+
/** Field with warning */
|
|
2086
|
+
field: string;
|
|
2087
|
+
/** Warning message */
|
|
2088
|
+
message: string;
|
|
2089
|
+
}
|
|
2090
|
+
/**
|
|
2091
|
+
* Result of validation
|
|
2092
|
+
*/
|
|
2093
|
+
interface ValidationResult {
|
|
2094
|
+
/** Whether validation passed */
|
|
2095
|
+
valid: boolean;
|
|
2096
|
+
/** List of errors */
|
|
2097
|
+
errors: ValidationError[];
|
|
2098
|
+
/** List of warnings */
|
|
2099
|
+
warnings?: ValidationWarning[];
|
|
2100
|
+
}
|
|
2101
|
+
/**
|
|
2102
|
+
* Validate transcription configuration
|
|
2103
|
+
* @param config - Configuration to validate
|
|
2104
|
+
* @returns Validation result
|
|
2105
|
+
*/
|
|
2106
|
+
declare function validateTranscriptionConfig(config: TranscriptionConfig): ValidationResult;
|
|
2107
|
+
/**
|
|
2108
|
+
* Validate audio configuration
|
|
2109
|
+
* @param config - Audio config to validate
|
|
2110
|
+
* @returns Validation result
|
|
2111
|
+
*/
|
|
2112
|
+
declare function validateAudioConfig(config: AudioConfig): ValidationResult;
|
|
2113
|
+
/**
|
|
2114
|
+
* Validate session configuration
|
|
2115
|
+
* @param config - Session config to validate
|
|
2116
|
+
* @returns Validation result
|
|
2117
|
+
*/
|
|
2118
|
+
declare function validateSessionConfig(config: SessionConfig): ValidationResult;
|
|
2119
|
+
/**
|
|
2120
|
+
* Validate BCP-47 language code
|
|
2121
|
+
* @param code - Language code to validate
|
|
2122
|
+
* @returns True if valid
|
|
2123
|
+
*/
|
|
2124
|
+
declare function validateLanguageCode(code: string): boolean;
|
|
2125
|
+
/**
|
|
2126
|
+
* Validate API key format for a provider
|
|
2127
|
+
* @param provider - Transcription provider
|
|
2128
|
+
* @param key - API key to validate
|
|
2129
|
+
* @returns Validation result
|
|
2130
|
+
*/
|
|
2131
|
+
declare function validateApiKey(provider: TranscriptionProvider, key?: string): ValidationResult;
|
|
2132
|
+
|
|
2133
|
+
/**
|
|
2134
|
+
* Browser information
|
|
2135
|
+
*/
|
|
2136
|
+
interface BrowserInfo {
|
|
2137
|
+
/** Browser name */
|
|
2138
|
+
name: string;
|
|
2139
|
+
/** Browser version */
|
|
2140
|
+
version: string;
|
|
2141
|
+
/** Operating system */
|
|
2142
|
+
os: string;
|
|
2143
|
+
/** Whether device is mobile */
|
|
2144
|
+
isMobile: boolean;
|
|
2145
|
+
}
|
|
2146
|
+
/**
|
|
2147
|
+
* Support check result
|
|
2148
|
+
*/
|
|
2149
|
+
interface SupportCheck {
|
|
2150
|
+
/** Whether feature is supported */
|
|
2151
|
+
supported: boolean;
|
|
2152
|
+
/** Details about support status */
|
|
2153
|
+
details: string;
|
|
2154
|
+
/** Fallback recommendation if not supported */
|
|
2155
|
+
fallback?: string;
|
|
2156
|
+
}
|
|
2157
|
+
/**
|
|
2158
|
+
* Full compatibility report
|
|
2159
|
+
*/
|
|
2160
|
+
interface CompatibilityReport {
|
|
2161
|
+
/** Browser information */
|
|
2162
|
+
browser: BrowserInfo;
|
|
2163
|
+
/** Web Speech API support */
|
|
2164
|
+
webSpeechAPI: SupportCheck;
|
|
2165
|
+
/** WebSocket support */
|
|
2166
|
+
webSocket: SupportCheck;
|
|
2167
|
+
/** Media devices support */
|
|
2168
|
+
mediaDevices: SupportCheck;
|
|
2169
|
+
/** Audio context support */
|
|
2170
|
+
audioContext: SupportCheck;
|
|
2171
|
+
/** Overall compatibility */
|
|
2172
|
+
overallCompatible: boolean;
|
|
2173
|
+
/** Recommendations for improvement */
|
|
2174
|
+
recommendations: string[];
|
|
2175
|
+
}
|
|
2176
|
+
/**
|
|
2177
|
+
* Get browser information
|
|
2178
|
+
* @returns Browser info object
|
|
2179
|
+
*/
|
|
2180
|
+
declare function getBrowserInfo(): BrowserInfo;
|
|
2181
|
+
/**
|
|
2182
|
+
* Check Web Speech API support
|
|
2183
|
+
* @returns Support check result
|
|
2184
|
+
*/
|
|
2185
|
+
declare function checkWebSpeechAPISupport(): SupportCheck;
|
|
2186
|
+
/**
|
|
2187
|
+
* Check WebSocket support
|
|
2188
|
+
* @returns Support check result
|
|
2189
|
+
*/
|
|
2190
|
+
declare function checkWebSocketSupport(): SupportCheck;
|
|
2191
|
+
/**
|
|
2192
|
+
* Check media devices support
|
|
2193
|
+
* @returns Support check result
|
|
2194
|
+
*/
|
|
2195
|
+
declare function checkMediaDevicesSupport(): SupportCheck;
|
|
2196
|
+
/**
|
|
2197
|
+
* Check AudioContext support
|
|
2198
|
+
* @returns Support check result
|
|
2199
|
+
*/
|
|
2200
|
+
declare function checkAudioContextSupport(): SupportCheck;
|
|
2201
|
+
/**
|
|
2202
|
+
* Get comprehensive compatibility report
|
|
2203
|
+
* @returns Full compatibility report
|
|
2204
|
+
*/
|
|
2205
|
+
declare function getFullCompatibilityReport(): CompatibilityReport;
|
|
2206
|
+
|
|
2207
|
+
/**
|
|
2208
|
+
* Retry options
|
|
2209
|
+
*/
|
|
2210
|
+
interface RetryOptions {
|
|
2211
|
+
/** Maximum number of attempts (default: 3) */
|
|
2212
|
+
maxAttempts?: number;
|
|
2213
|
+
/** Initial delay in ms (default: 1000) */
|
|
2214
|
+
delay?: number;
|
|
2215
|
+
/** Backoff strategy (default: 'exponential') */
|
|
2216
|
+
backoff?: 'linear' | 'exponential';
|
|
2217
|
+
/** Maximum delay in ms (default: 30000) */
|
|
2218
|
+
maxDelay?: number;
|
|
2219
|
+
/** Function to determine if should retry */
|
|
2220
|
+
shouldRetry?: (error: unknown) => boolean;
|
|
2221
|
+
}
|
|
2222
|
+
/**
|
|
2223
|
+
* Create a debounced function
|
|
2224
|
+
* @param func - Function to debounce
|
|
2225
|
+
* @param wait - Wait time in ms
|
|
2226
|
+
* @returns Debounced function
|
|
2227
|
+
*/
|
|
2228
|
+
declare function debounce<T extends (...args: unknown[]) => unknown>(func: T, wait: number): (...args: Parameters<T>) => void;
|
|
2229
|
+
/**
|
|
2230
|
+
* Create a throttled function
|
|
2231
|
+
* @param func - Function to throttle
|
|
2232
|
+
* @param limit - Minimum interval in ms
|
|
2233
|
+
* @returns Throttled function
|
|
2234
|
+
*/
|
|
2235
|
+
declare function throttle<T extends (...args: unknown[]) => unknown>(func: T, limit: number): (...args: Parameters<T>) => void;
|
|
2236
|
+
/**
|
|
2237
|
+
* Async sleep utility
|
|
2238
|
+
* @param ms - Milliseconds to sleep
|
|
2239
|
+
* @returns Promise that resolves after delay
|
|
2240
|
+
*/
|
|
2241
|
+
declare function sleep(ms: number): Promise<void>;
|
|
2242
|
+
/**
|
|
2243
|
+
* Add timeout to a promise
|
|
2244
|
+
* @param promise - Promise to wrap
|
|
2245
|
+
* @param ms - Timeout in milliseconds
|
|
2246
|
+
* @param message - Error message on timeout
|
|
2247
|
+
* @returns Promise that rejects if timeout exceeded
|
|
2248
|
+
*/
|
|
2249
|
+
declare function timeout<T>(promise: Promise<T>, ms: number, message?: string): Promise<T>;
|
|
2250
|
+
/**
|
|
2251
|
+
* Retry a function with backoff
|
|
2252
|
+
* @param fn - Async function to retry
|
|
2253
|
+
* @param options - Retry options
|
|
2254
|
+
* @returns Promise with result
|
|
2255
|
+
*/
|
|
2256
|
+
declare function retry<T>(fn: () => Promise<T>, options?: RetryOptions): Promise<T>;
|
|
2257
|
+
/**
|
|
2258
|
+
* Create a cancellable timeout
|
|
2259
|
+
* @param ms - Timeout in milliseconds
|
|
2260
|
+
* @returns Object with promise and cancel function
|
|
2261
|
+
*/
|
|
2262
|
+
declare function cancellableTimeout(ms: number): {
|
|
2263
|
+
promise: Promise<void>;
|
|
2264
|
+
cancel: () => void;
|
|
2265
|
+
};
|
|
2266
|
+
/**
|
|
2267
|
+
* Execute function at regular intervals
|
|
2268
|
+
* @param fn - Function to execute
|
|
2269
|
+
* @param interval - Interval in ms
|
|
2270
|
+
* @param immediate - Execute immediately on start
|
|
2271
|
+
* @returns Function to stop the interval
|
|
2272
|
+
*/
|
|
2273
|
+
declare function setIntervalAsync(fn: () => Promise<void>, interval: number, immediate?: boolean): () => void;
|
|
2274
|
+
|
|
2275
|
+
/**
|
|
2276
|
+
* Timestamp format options
|
|
2277
|
+
*/
|
|
2278
|
+
type TimestampFormat = 'srt' | 'vtt' | 'readable' | 'iso' | 'ms';
|
|
2279
|
+
/**
|
|
2280
|
+
* Display options for transcript formatting
|
|
2281
|
+
*/
|
|
2282
|
+
interface DisplayOptions {
|
|
2283
|
+
/** Show timestamps */
|
|
2284
|
+
showTimestamps?: boolean;
|
|
2285
|
+
/** Show confidence scores */
|
|
2286
|
+
showConfidence?: boolean;
|
|
2287
|
+
/** Highlight interim results */
|
|
2288
|
+
highlightInterim?: boolean;
|
|
2289
|
+
/** Maximum text length */
|
|
2290
|
+
maxLength?: number;
|
|
2291
|
+
}
|
|
2292
|
+
/**
|
|
2293
|
+
* Format duration in milliseconds to readable string
|
|
2294
|
+
* @param ms - Duration in milliseconds
|
|
2295
|
+
* @returns Formatted string (e.g., "2m 30s")
|
|
2296
|
+
*/
|
|
2297
|
+
declare function formatDuration(ms: number): string;
|
|
2298
|
+
/**
|
|
2299
|
+
* Format timestamp to specified format
|
|
2300
|
+
* @param ms - Time in milliseconds
|
|
2301
|
+
* @param format - Output format
|
|
2302
|
+
* @returns Formatted timestamp
|
|
2303
|
+
*/
|
|
2304
|
+
declare function formatTimestamp(ms: number, format?: TimestampFormat): string;
|
|
2305
|
+
/**
|
|
2306
|
+
* Format confidence score as percentage
|
|
2307
|
+
* @param confidence - Confidence value (0-1)
|
|
2308
|
+
* @returns Formatted percentage string
|
|
2309
|
+
*/
|
|
2310
|
+
declare function formatConfidence(confidence: number): string;
|
|
2311
|
+
/**
|
|
2312
|
+
* Format file size in bytes to human-readable string
|
|
2313
|
+
* @param bytes - Size in bytes
|
|
2314
|
+
* @returns Formatted size string
|
|
2315
|
+
*/
|
|
2316
|
+
declare function formatFileSize(bytes: number): string;
|
|
2317
|
+
/**
|
|
2318
|
+
* Format transcripts for display
|
|
2319
|
+
* @param results - Transcription results
|
|
2320
|
+
* @param options - Display options
|
|
2321
|
+
* @returns Formatted string
|
|
2322
|
+
*/
|
|
2323
|
+
declare function formatTranscriptForDisplay(results: TranscriptionResult[], options?: DisplayOptions): string;
|
|
2324
|
+
/**
|
|
2325
|
+
* Format transcript as plain text
|
|
2326
|
+
* @param results - Transcription results
|
|
2327
|
+
* @param finalOnly - Only include final results
|
|
2328
|
+
* @returns Plain text string
|
|
2329
|
+
*/
|
|
2330
|
+
declare function formatAsPlainText(results: TranscriptionResult[], finalOnly?: boolean): string;
|
|
2331
|
+
/**
|
|
2332
|
+
* Format number with thousand separators
|
|
2333
|
+
* @param num - Number to format
|
|
2334
|
+
* @returns Formatted string
|
|
2335
|
+
*/
|
|
2336
|
+
declare function formatNumber(num: number): string;
|
|
2337
|
+
/**
|
|
2338
|
+
* Truncate text with ellipsis
|
|
2339
|
+
* @param text - Text to truncate
|
|
2340
|
+
* @param maxLength - Maximum length
|
|
2341
|
+
* @returns Truncated text
|
|
2342
|
+
*/
|
|
2343
|
+
declare function truncateText(text: string, maxLength: number): string;
|
|
2344
|
+
|
|
2345
|
+
/**
|
|
2346
|
+
* Language information
|
|
2347
|
+
*/
|
|
2348
|
+
interface LanguageInfo {
|
|
2349
|
+
/** Language code (e.g., 'en-US') */
|
|
2350
|
+
code: string;
|
|
2351
|
+
/** English name */
|
|
2352
|
+
name: string;
|
|
2353
|
+
/** Native name */
|
|
2354
|
+
nativeName: string;
|
|
2355
|
+
/** Provider that supports this language */
|
|
2356
|
+
provider: TranscriptionProvider;
|
|
2357
|
+
}
|
|
2358
|
+
/**
|
|
2359
|
+
* Get supported languages for a provider
|
|
2360
|
+
* @param provider - Transcription provider
|
|
2361
|
+
* @returns Array of language info
|
|
2362
|
+
*/
|
|
2363
|
+
declare function getSupportedLanguages(provider: TranscriptionProvider): LanguageInfo[];
|
|
2364
|
+
/**
|
|
2365
|
+
* Normalize language code to standard format
|
|
2366
|
+
* @param code - Language code to normalize
|
|
2367
|
+
* @returns Normalized code
|
|
2368
|
+
*/
|
|
2369
|
+
declare function normalizeLanguageCode(code: string): string;
|
|
2370
|
+
/**
|
|
2371
|
+
* Get human-readable language name
|
|
2372
|
+
* @param code - Language code
|
|
2373
|
+
* @returns Language name or code if unknown
|
|
2374
|
+
*/
|
|
2375
|
+
declare function getLanguageName(code: string): string;
|
|
2376
|
+
/**
|
|
2377
|
+
* Get native language name
|
|
2378
|
+
* @param code - Language code
|
|
2379
|
+
* @returns Native name or code if unknown
|
|
2380
|
+
*/
|
|
2381
|
+
declare function getNativeLanguageName(code: string): string;
|
|
2382
|
+
/**
|
|
2383
|
+
* Detect browser language
|
|
2384
|
+
* @returns Detected language code
|
|
2385
|
+
*/
|
|
2386
|
+
declare function detectBrowserLanguage(): string;
|
|
2387
|
+
/**
|
|
2388
|
+
* Check if language is supported by provider
|
|
2389
|
+
* @param code - Language code
|
|
2390
|
+
* @param provider - Transcription provider
|
|
2391
|
+
* @returns True if supported
|
|
2392
|
+
*/
|
|
2393
|
+
declare function isLanguageSupported(code: string, provider: TranscriptionProvider): boolean;
|
|
2394
|
+
/**
|
|
2395
|
+
* Get best matching language for provider
|
|
2396
|
+
* @param code - Preferred language code
|
|
2397
|
+
* @param provider - Transcription provider
|
|
2398
|
+
* @returns Best matching language code or default
|
|
2399
|
+
*/
|
|
2400
|
+
declare function getBestMatchingLanguage(code: string, provider: TranscriptionProvider): string;
|
|
2401
|
+
|
|
2402
|
+
/**
|
|
2403
|
+
* Generate a unique ID
|
|
2404
|
+
* @param prefix - Optional prefix
|
|
2405
|
+
* @returns Unique ID string
|
|
2406
|
+
*/
|
|
2407
|
+
declare function generateId(prefix?: string): string;
|
|
2408
|
+
/**
|
|
2409
|
+
* Deep clone an object
|
|
2410
|
+
* @param obj - Object to clone
|
|
2411
|
+
* @returns Cloned object
|
|
2412
|
+
*/
|
|
2413
|
+
declare function deepClone<T>(obj: T): T;
|
|
2414
|
+
/**
|
|
2415
|
+
* Deep merge multiple objects
|
|
2416
|
+
* @param objects - Objects to merge
|
|
2417
|
+
* @returns Merged object
|
|
2418
|
+
*/
|
|
2419
|
+
declare function mergeDeep<T extends Record<string, unknown>>(...objects: Partial<T>[]): T;
|
|
2420
|
+
/**
|
|
2421
|
+
* Check if value is a function
|
|
2422
|
+
* @param value - Value to check
|
|
2423
|
+
* @returns True if function
|
|
2424
|
+
*/
|
|
2425
|
+
declare function isFunction(value: unknown): value is (...args: unknown[]) => unknown;
|
|
2426
|
+
/**
|
|
2427
|
+
* Check if value is a plain object
|
|
2428
|
+
* @param value - Value to check
|
|
2429
|
+
* @returns True if object
|
|
2430
|
+
*/
|
|
2431
|
+
declare function isObject(value: unknown): value is Record<string, unknown>;
|
|
2432
|
+
/**
|
|
2433
|
+
* Check if value is empty
|
|
2434
|
+
* @param value - Value to check
|
|
2435
|
+
* @returns True if empty
|
|
2436
|
+
*/
|
|
2437
|
+
declare function isEmpty(value: unknown): boolean;
|
|
2438
|
+
/**
|
|
2439
|
+
* Pick specific properties from object
|
|
2440
|
+
* @param obj - Source object
|
|
2441
|
+
* @param keys - Keys to pick
|
|
2442
|
+
* @returns New object with selected keys
|
|
2443
|
+
*/
|
|
2444
|
+
declare function pick<T extends Record<string, unknown>, K extends keyof T>(obj: T, keys: K[]): Pick<T, K>;
|
|
2445
|
+
/**
|
|
2446
|
+
* Omit specific properties from object
|
|
2447
|
+
* @param obj - Source object
|
|
2448
|
+
* @param keys - Keys to omit
|
|
2449
|
+
* @returns New object without selected keys
|
|
2450
|
+
*/
|
|
2451
|
+
declare function omit<T extends Record<string, unknown>, K extends keyof T>(obj: T, keys: K[]): Omit<T, K>;
|
|
2452
|
+
/**
|
|
2453
|
+
* Create a promise that resolves after a condition is met
|
|
2454
|
+
* @param condition - Function that returns true when condition is met
|
|
2455
|
+
* @param interval - Check interval in ms
|
|
2456
|
+
* @param timeout - Maximum wait time in ms
|
|
2457
|
+
* @returns Promise that resolves when condition is met
|
|
2458
|
+
*/
|
|
2459
|
+
declare function waitFor(condition: () => boolean, interval?: number, timeout?: number): Promise<void>;
|
|
2460
|
+
/**
|
|
2461
|
+
* Group array items by key
|
|
2462
|
+
* @param array - Array to group
|
|
2463
|
+
* @param keyFn - Function to get key from item
|
|
2464
|
+
* @returns Grouped object
|
|
2465
|
+
*/
|
|
2466
|
+
declare function groupBy<T, K extends string | number>(array: T[], keyFn: (item: T) => K): Record<K, T[]>;
|
|
2467
|
+
/**
|
|
2468
|
+
* Ensure value is within range
|
|
2469
|
+
* @param value - Value to clamp
|
|
2470
|
+
* @param min - Minimum value
|
|
2471
|
+
* @param max - Maximum value
|
|
2472
|
+
* @returns Clamped value
|
|
2473
|
+
*/
|
|
2474
|
+
declare function clamp(value: number, min: number, max: number): number;
|
|
2475
|
+
/**
|
|
2476
|
+
* Round number to specified decimal places
|
|
2477
|
+
* @param value - Number to round
|
|
2478
|
+
* @param decimals - Number of decimal places
|
|
2479
|
+
* @returns Rounded number
|
|
2480
|
+
*/
|
|
2481
|
+
declare function round(value: number, decimals?: number): number;
|
|
2482
|
+
|
|
2483
|
+
/**
|
|
2484
|
+
* Audio format information
|
|
2485
|
+
*/
|
|
2486
|
+
interface AudioFormatInfo {
|
|
2487
|
+
/** Detected format */
|
|
2488
|
+
format: string;
|
|
2489
|
+
/** Sample rate in Hz */
|
|
2490
|
+
sampleRate?: number;
|
|
2491
|
+
/** Number of channels */
|
|
2492
|
+
channels?: number;
|
|
2493
|
+
/** Bit depth */
|
|
2494
|
+
bitDepth?: number;
|
|
2495
|
+
/** Duration in seconds */
|
|
2496
|
+
duration?: number;
|
|
2497
|
+
}
|
|
2498
|
+
/**
|
|
2499
|
+
* Calculate audio bitrate
|
|
2500
|
+
* @param sampleRate - Sample rate in Hz
|
|
2501
|
+
* @param bitDepth - Bits per sample
|
|
2502
|
+
* @param channels - Number of channels
|
|
2503
|
+
* @returns Bitrate in bits per second
|
|
2504
|
+
*/
|
|
2505
|
+
declare function calculateBitrate(sampleRate: number, bitDepth: number, channels: number): number;
|
|
2506
|
+
/**
|
|
2507
|
+
* Estimate audio file size
|
|
2508
|
+
* @param durationMs - Duration in milliseconds
|
|
2509
|
+
* @param config - Audio configuration
|
|
2510
|
+
* @returns Estimated size in bytes
|
|
2511
|
+
*/
|
|
2512
|
+
declare function estimateAudioSize(durationMs: number, config: AudioConfig): number;
|
|
2513
|
+
/**
|
|
2514
|
+
* Get optimal buffer size for sample rate
|
|
2515
|
+
* @param sampleRate - Sample rate in Hz
|
|
2516
|
+
* @returns Optimal buffer size (power of 2)
|
|
2517
|
+
*/
|
|
2518
|
+
declare function getOptimalBufferSize(sampleRate: number): number;
|
|
2519
|
+
/**
|
|
2520
|
+
* Validate and detect audio format from ArrayBuffer
|
|
2521
|
+
* @param data - Audio data
|
|
2522
|
+
* @returns Audio format information
|
|
2523
|
+
*/
|
|
2524
|
+
declare function validateAudioFormat(data: ArrayBuffer): AudioFormatInfo;
|
|
2525
|
+
/**
|
|
2526
|
+
* Calculate RMS (Root Mean Square) of audio buffer
|
|
2527
|
+
* @param buffer - Audio samples
|
|
2528
|
+
* @returns RMS value (0-1)
|
|
2529
|
+
*/
|
|
2530
|
+
declare function calculateRMS(buffer: Float32Array): number;
|
|
2531
|
+
/**
|
|
2532
|
+
* Calculate peak amplitude of audio buffer
|
|
2533
|
+
* @param buffer - Audio samples
|
|
2534
|
+
* @returns Peak value (0-1)
|
|
2535
|
+
*/
|
|
2536
|
+
declare function calculatePeak(buffer: Float32Array): number;
|
|
2537
|
+
/**
|
|
2538
|
+
* Convert decibels to linear amplitude
|
|
2539
|
+
* @param db - Value in decibels
|
|
2540
|
+
* @returns Linear amplitude
|
|
2541
|
+
*/
|
|
2542
|
+
declare function dbToLinear(db: number): number;
|
|
2543
|
+
/**
|
|
2544
|
+
* Convert linear amplitude to decibels
|
|
2545
|
+
* @param linear - Linear amplitude
|
|
2546
|
+
* @returns Value in decibels
|
|
2547
|
+
*/
|
|
2548
|
+
declare function linearToDb(linear: number): number;
|
|
2549
|
+
/**
|
|
2550
|
+
* Check if audio data contains silence
|
|
2551
|
+
* @param buffer - Audio samples
|
|
2552
|
+
* @param threshold - Silence threshold (default: 0.001)
|
|
2553
|
+
* @returns True if mostly silence
|
|
2554
|
+
*/
|
|
2555
|
+
declare function isSilence(buffer: Float32Array, threshold?: number): boolean;
|
|
2556
|
+
/**
|
|
2557
|
+
* Get audio constraints for getUserMedia
|
|
2558
|
+
* @param config - Audio configuration
|
|
2559
|
+
* @returns MediaStreamConstraints for audio
|
|
2560
|
+
*/
|
|
2561
|
+
declare function getAudioConstraints(config?: AudioConfig): MediaTrackConstraints;
|
|
2562
|
+
/**
|
|
2563
|
+
* Calculate audio duration from samples
|
|
2564
|
+
* @param samples - Number of samples
|
|
2565
|
+
* @param sampleRate - Sample rate in Hz
|
|
2566
|
+
* @returns Duration in milliseconds
|
|
2567
|
+
*/
|
|
2568
|
+
declare function samplesToDuration(samples: number, sampleRate: number): number;
|
|
2569
|
+
/**
|
|
2570
|
+
* Calculate samples from duration
|
|
2571
|
+
* @param durationMs - Duration in milliseconds
|
|
2572
|
+
* @param sampleRate - Sample rate in Hz
|
|
2573
|
+
* @returns Number of samples
|
|
2574
|
+
*/
|
|
2575
|
+
declare function durationToSamples(durationMs: number, sampleRate: number): number;
|
|
2576
|
+
|
|
2577
|
+
/**
|
|
2578
|
+
* Live Transcribe - Professional live speech transcription library
|
|
2579
|
+
* @module live-transcribe
|
|
2580
|
+
*/
|
|
2581
|
+
|
|
2582
|
+
/**
|
|
2583
|
+
* Library version
|
|
2584
|
+
*/
|
|
2585
|
+
declare const VERSION = "0.1.0";
|
|
2586
|
+
/**
|
|
2587
|
+
* Library name
|
|
2588
|
+
*/
|
|
2589
|
+
declare const LIBRARY_NAME = "live-transcribe";
|
|
2590
|
+
/**
|
|
2591
|
+
* Quick start options for simplified initialization
|
|
2592
|
+
*/
|
|
2593
|
+
interface QuickStartOptions {
|
|
2594
|
+
/** Transcription provider (default: auto-detect) */
|
|
2595
|
+
provider?: TranscriptionProvider;
|
|
2596
|
+
/** API key for cloud providers */
|
|
2597
|
+
apiKey?: string;
|
|
2598
|
+
/** Language code (default: 'en-US') */
|
|
2599
|
+
language?: string;
|
|
2600
|
+
/** Callback for transcript events */
|
|
2601
|
+
onTranscript?: (result: TranscriptionResult) => void;
|
|
2602
|
+
/** Callback for error events */
|
|
2603
|
+
onError?: (error: TranscriptionError) => void;
|
|
2604
|
+
/** Callback when transcription starts */
|
|
2605
|
+
onStart?: () => void;
|
|
2606
|
+
/** Callback when transcription stops */
|
|
2607
|
+
onStop?: () => void;
|
|
2608
|
+
/** Enable interim results (default: true) */
|
|
2609
|
+
interimResults?: boolean;
|
|
2610
|
+
/** Enable audio recording */
|
|
2611
|
+
recordAudio?: boolean;
|
|
2612
|
+
}
|
|
2613
|
+
/**
|
|
2614
|
+
* Create a transcription provider instance
|
|
2615
|
+
* @param config - Transcription configuration
|
|
2616
|
+
* @returns Configured provider instance
|
|
2617
|
+
* @example
|
|
2618
|
+
* ```typescript
|
|
2619
|
+
* const provider = createTranscriber({
|
|
2620
|
+
* provider: TranscriptionProvider.WebSpeechAPI,
|
|
2621
|
+
* language: 'en-US'
|
|
2622
|
+
* });
|
|
2623
|
+
* ```
|
|
2624
|
+
*/
|
|
2625
|
+
declare function createTranscriber(config: TranscriptionConfig): ITranscriptionProvider;
|
|
2626
|
+
/**
|
|
2627
|
+
* Create a transcription session with a configured provider
|
|
2628
|
+
* @param config - Transcription configuration
|
|
2629
|
+
* @param sessionConfig - Optional session configuration
|
|
2630
|
+
* @returns Configured TranscriptionSession
|
|
2631
|
+
* @example
|
|
2632
|
+
* ```typescript
|
|
2633
|
+
* const session = createSession({
|
|
2634
|
+
* provider: TranscriptionProvider.Deepgram,
|
|
2635
|
+
* apiKey: 'your-api-key',
|
|
2636
|
+
* language: 'en-US'
|
|
2637
|
+
* }, {
|
|
2638
|
+
* recordAudio: true
|
|
2639
|
+
* });
|
|
2640
|
+
* ```
|
|
2641
|
+
*/
|
|
2642
|
+
declare function createSession(config: TranscriptionConfig, sessionConfig?: SessionConfig): TranscriptionSession;
|
|
2643
|
+
/**
|
|
2644
|
+
* Quick start transcription with minimal configuration
|
|
2645
|
+
* Auto-detects the best available provider and handles initialization
|
|
2646
|
+
* @param options - Quick start options
|
|
2647
|
+
* @returns Ready-to-use TranscriptionSession
|
|
2648
|
+
* @example
|
|
2649
|
+
* ```typescript
|
|
2650
|
+
* const session = await quickStart({
|
|
2651
|
+
* language: 'en-US',
|
|
2652
|
+
* onTranscript: (result) => console.log(result.text),
|
|
2653
|
+
* onError: (error) => console.error(error)
|
|
2654
|
+
* });
|
|
2655
|
+
*
|
|
2656
|
+
* // Session is already started and transcribing
|
|
2657
|
+
* // Stop when done:
|
|
2658
|
+
* await session.stop();
|
|
2659
|
+
* ```
|
|
2660
|
+
*/
|
|
2661
|
+
declare function quickStart(options?: QuickStartOptions): Promise<TranscriptionSession>;
|
|
2662
|
+
/**
|
|
2663
|
+
* Check if a specific provider is supported in the current environment
|
|
2664
|
+
* @param provider - Provider to check
|
|
2665
|
+
* @returns True if provider is supported
|
|
2666
|
+
*/
|
|
2667
|
+
declare function isProviderSupported(provider: TranscriptionProvider): boolean;
|
|
2668
|
+
/**
|
|
2669
|
+
* Get list of supported providers in current environment
|
|
2670
|
+
* @returns Array of supported providers
|
|
2671
|
+
*/
|
|
2672
|
+
declare function getSupportedProviders(): TranscriptionProvider[];
|
|
2673
|
+
declare const _default: {
|
|
2674
|
+
VERSION: string;
|
|
2675
|
+
LIBRARY_NAME: string;
|
|
2676
|
+
createTranscriber: typeof createTranscriber;
|
|
2677
|
+
createSession: typeof createSession;
|
|
2678
|
+
quickStart: typeof quickStart;
|
|
2679
|
+
isProviderSupported: typeof isProviderSupported;
|
|
2680
|
+
getSupportedProviders: typeof getSupportedProviders;
|
|
2681
|
+
TranscriptionProvider: typeof TranscriptionProvider;
|
|
2682
|
+
WebSpeechProvider: typeof WebSpeechProvider;
|
|
2683
|
+
DeepgramProvider: typeof DeepgramProvider;
|
|
2684
|
+
AssemblyAIProvider: typeof AssemblyAIProvider;
|
|
2685
|
+
TranscriptionSession: typeof TranscriptionSession;
|
|
2686
|
+
SessionManager: typeof SessionManager;
|
|
2687
|
+
};
|
|
2688
|
+
|
|
2689
|
+
export { type AssemblyAIOptions, AssemblyAIProvider, AudioBufferManager, type AudioConfig, AudioEncoding, type AudioFormat, type AudioFormatInfo, AudioLevelMonitor, type AudioLevelMonitorOptions, AudioProcessor, AudioRecorder, BaseTranscriber, type BrowserInfo, type CSVExportOptions, CSVExporter, type CompatibilityReport, DEFAULT_AUDIO_CONFIG, DEFAULT_SESSION_CONFIG, DEFAULT_TRANSCRIPTION_CONFIG, type DeepgramOptions, DeepgramProvider, type DisplayOptions, ErrorCode, EventEmitter, type ExportFormat, type ExportResult, type ITranscriptionProvider, JSONExporter, LIBRARY_NAME, type LanguageInfo, LocalStorageAdapter, MemoryStorageAdapter, type MergeOptions, type ProviderCapabilities, type ProviderInfo, type QuickStartOptions, type RetryOptions, SRTExporter, type SessionConfig, type SessionExport, type SessionExportData, type SessionImport, SessionManager, type SessionManagerOptions, type SessionMetadata, SessionState, type SessionStatistics, type SessionStats, type StorageAdapter, type SupportCheck, type TextExportOptions, TextExporter, type TimestampFormat, type TranscriptionConfig, TranscriptionError, type TranscriptionEventName, type TranscriptionEvents, TranscriptionProvider, type TranscriptionResult, type TranscriptionSegment, TranscriptionSession, type VADOptions, VERSION, VTTExporter, type ValidationError, type ValidationResult, type ValidationWarning, VoiceActivityDetector, WebSpeechProvider, type Word, calculateBitrate, calculatePeak, calculateRMS, cancellableTimeout, checkAudioContextSupport, checkMediaDevicesSupport, checkWebSocketSupport, checkWebSpeechAPISupport, clamp, createSession, createTranscriber, dbToLinear, debounce, deepClone, _default as default, detectBrowserLanguage, durationToSamples, estimateAudioSize, formatAsPlainText, formatConfidence, formatDuration, formatFileSize, formatNumber, formatTimestamp, formatTranscriptForDisplay, generateId, getAudioConstraints, getBestMatchingLanguage, getBrowserInfo, getFullCompatibilityReport, getLanguageName, getNativeLanguageName, getOptimalBufferSize, getSupportedLanguages, getSupportedProviders, groupBy, isEmpty, isFunction, isLanguageSupported, isObject, isProviderSupported, isSilence, linearToDb, mergeDeep, normalizeLanguageCode, omit, pick, quickStart, retry, round, samplesToDuration, setIntervalAsync, sleep, throttle, timeout, truncateText, validateApiKey, validateAudioConfig, validateAudioFormat, validateLanguageCode, validateSessionConfig, validateTranscriptionConfig, waitFor };
|