@360labs/live-transcribe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/LICENSE +21 -0
- package/README.md +170 -0
- package/dist/index.d.mts +2689 -0
- package/dist/index.d.ts +2689 -0
- package/dist/index.js +4777 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +4682 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +86 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,4777 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
/* @360labs/live-transcribe - MIT License - Built by 360labs */
|
|
6
|
+
|
|
7
|
+
// src/core/EventEmitter.ts
|
|
8
|
+
var EventEmitter = class {
|
|
9
|
+
constructor() {
|
|
10
|
+
this.events = /* @__PURE__ */ new Map();
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Subscribe to an event
|
|
14
|
+
* @param event - Event name to subscribe to
|
|
15
|
+
* @param listener - Callback function
|
|
16
|
+
*/
|
|
17
|
+
on(event, listener) {
|
|
18
|
+
if (!this.events.has(event)) {
|
|
19
|
+
this.events.set(event, /* @__PURE__ */ new Set());
|
|
20
|
+
}
|
|
21
|
+
this.events.get(event).add(listener);
|
|
22
|
+
return this;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Unsubscribe from an event
|
|
26
|
+
* @param event - Event name to unsubscribe from
|
|
27
|
+
* @param listener - Callback function to remove
|
|
28
|
+
*/
|
|
29
|
+
off(event, listener) {
|
|
30
|
+
const listeners = this.events.get(event);
|
|
31
|
+
if (listeners) {
|
|
32
|
+
listeners.delete(listener);
|
|
33
|
+
if (listeners.size === 0) {
|
|
34
|
+
this.events.delete(event);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return this;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Subscribe to an event for one-time notification
|
|
41
|
+
* @param event - Event name to subscribe to
|
|
42
|
+
* @param listener - Callback function
|
|
43
|
+
*/
|
|
44
|
+
once(event, listener) {
|
|
45
|
+
const onceWrapper = ((...args) => {
|
|
46
|
+
this.off(event, onceWrapper);
|
|
47
|
+
listener.apply(this, args);
|
|
48
|
+
});
|
|
49
|
+
return this.on(event, onceWrapper);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Emit an event to all subscribers
|
|
53
|
+
* @param event - Event name to emit
|
|
54
|
+
* @param args - Arguments to pass to listeners
|
|
55
|
+
*/
|
|
56
|
+
emit(event, ...args) {
|
|
57
|
+
const listeners = this.events.get(event);
|
|
58
|
+
if (!listeners || listeners.size === 0) {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
listeners.forEach((listener) => {
|
|
62
|
+
try {
|
|
63
|
+
listener.apply(this, args);
|
|
64
|
+
} catch (error) {
|
|
65
|
+
console.error(`Error in event listener for "${String(event)}":`, error);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Remove all listeners for an event, or all listeners if no event specified
|
|
72
|
+
* @param event - Optional event name
|
|
73
|
+
*/
|
|
74
|
+
removeAllListeners(event) {
|
|
75
|
+
if (event !== void 0) {
|
|
76
|
+
this.events.delete(event);
|
|
77
|
+
} else {
|
|
78
|
+
this.events.clear();
|
|
79
|
+
}
|
|
80
|
+
return this;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Get the number of listeners for an event
|
|
84
|
+
* @param event - Event name
|
|
85
|
+
*/
|
|
86
|
+
listenerCount(event) {
|
|
87
|
+
const listeners = this.events.get(event);
|
|
88
|
+
return listeners ? listeners.size : 0;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Get all event names that have listeners
|
|
92
|
+
*/
|
|
93
|
+
eventNames() {
|
|
94
|
+
return Array.from(this.events.keys());
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// src/types/config.ts
|
|
99
|
+
var TranscriptionProvider = /* @__PURE__ */ ((TranscriptionProvider2) => {
|
|
100
|
+
TranscriptionProvider2["WebSpeechAPI"] = "web-speech";
|
|
101
|
+
TranscriptionProvider2["Deepgram"] = "deepgram";
|
|
102
|
+
TranscriptionProvider2["AssemblyAI"] = "assemblyai";
|
|
103
|
+
TranscriptionProvider2["Custom"] = "custom";
|
|
104
|
+
return TranscriptionProvider2;
|
|
105
|
+
})(TranscriptionProvider || {});
|
|
106
|
+
var AudioEncoding = /* @__PURE__ */ ((AudioEncoding2) => {
|
|
107
|
+
AudioEncoding2["LINEAR16"] = "linear16";
|
|
108
|
+
AudioEncoding2["MULAW"] = "mulaw";
|
|
109
|
+
AudioEncoding2["ALAW"] = "alaw";
|
|
110
|
+
AudioEncoding2["OPUS"] = "opus";
|
|
111
|
+
return AudioEncoding2;
|
|
112
|
+
})(AudioEncoding || {});
|
|
113
|
+
var DEFAULT_AUDIO_CONFIG = {
|
|
114
|
+
sampleRate: 16e3,
|
|
115
|
+
channels: 1,
|
|
116
|
+
bitDepth: 16,
|
|
117
|
+
encoding: "linear16" /* LINEAR16 */
|
|
118
|
+
};
|
|
119
|
+
var DEFAULT_TRANSCRIPTION_CONFIG = {
|
|
120
|
+
language: "en-US",
|
|
121
|
+
interimResults: true,
|
|
122
|
+
profanityFilter: false,
|
|
123
|
+
punctuation: true
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// src/types/session.ts
|
|
127
|
+
var SessionState = /* @__PURE__ */ ((SessionState2) => {
|
|
128
|
+
SessionState2["IDLE"] = "idle";
|
|
129
|
+
SessionState2["INITIALIZING"] = "initializing";
|
|
130
|
+
SessionState2["ACTIVE"] = "active";
|
|
131
|
+
SessionState2["PAUSED"] = "paused";
|
|
132
|
+
SessionState2["STOPPING"] = "stopping";
|
|
133
|
+
SessionState2["STOPPED"] = "stopped";
|
|
134
|
+
SessionState2["ERROR"] = "error";
|
|
135
|
+
return SessionState2;
|
|
136
|
+
})(SessionState || {});
|
|
137
|
+
var DEFAULT_SESSION_CONFIG = {
|
|
138
|
+
recordAudio: false,
|
|
139
|
+
maxDuration: 0,
|
|
140
|
+
// 0 means no limit
|
|
141
|
+
silenceTimeout: 0,
|
|
142
|
+
// 0 means no auto-stop
|
|
143
|
+
enableVAD: false,
|
|
144
|
+
vadThreshold: 0.5
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
// src/types/events.ts
|
|
148
|
+
var ErrorCode = /* @__PURE__ */ ((ErrorCode2) => {
|
|
149
|
+
ErrorCode2["INITIALIZATION_FAILED"] = "initialization_failed";
|
|
150
|
+
ErrorCode2["CONNECTION_FAILED"] = "connection_failed";
|
|
151
|
+
ErrorCode2["AUTHENTICATION_FAILED"] = "authentication_failed";
|
|
152
|
+
ErrorCode2["MICROPHONE_ACCESS_DENIED"] = "microphone_access_denied";
|
|
153
|
+
ErrorCode2["UNSUPPORTED_BROWSER"] = "unsupported_browser";
|
|
154
|
+
ErrorCode2["NETWORK_ERROR"] = "network_error";
|
|
155
|
+
ErrorCode2["PROVIDER_ERROR"] = "provider_error";
|
|
156
|
+
ErrorCode2["INVALID_CONFIG"] = "invalid_config";
|
|
157
|
+
ErrorCode2["SESSION_EXPIRED"] = "session_expired";
|
|
158
|
+
ErrorCode2["UNKNOWN_ERROR"] = "unknown_error";
|
|
159
|
+
return ErrorCode2;
|
|
160
|
+
})(ErrorCode || {});
|
|
161
|
+
var TranscriptionError = class _TranscriptionError extends Error {
|
|
162
|
+
constructor(message, code, provider, details) {
|
|
163
|
+
super(message);
|
|
164
|
+
this.name = "TranscriptionError";
|
|
165
|
+
this.code = code;
|
|
166
|
+
this.provider = provider;
|
|
167
|
+
this.details = details;
|
|
168
|
+
if (Error.captureStackTrace) {
|
|
169
|
+
Error.captureStackTrace(this, _TranscriptionError);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
// src/core/BaseTranscriber.ts
|
|
175
|
+
var BaseTranscriber = class extends EventEmitter {
|
|
176
|
+
/**
|
|
177
|
+
* Create a new BaseTranscriber instance
|
|
178
|
+
* @param config - Transcription configuration
|
|
179
|
+
*/
|
|
180
|
+
constructor(config) {
|
|
181
|
+
super();
|
|
182
|
+
/** Current session state */
|
|
183
|
+
this.state = "idle" /* IDLE */;
|
|
184
|
+
/** Recorded audio chunks */
|
|
185
|
+
this.audioRecording = [];
|
|
186
|
+
/** Word count in current session */
|
|
187
|
+
this.wordCount = 0;
|
|
188
|
+
this.config = { ...DEFAULT_TRANSCRIPTION_CONFIG, ...config };
|
|
189
|
+
this.sessionMetadata = this.initializeMetadata();
|
|
190
|
+
this.validateConfig();
|
|
191
|
+
}
|
|
192
|
+
// ==================== Public Helper Methods ====================
|
|
193
|
+
/**
|
|
194
|
+
* Get the current session state
|
|
195
|
+
*/
|
|
196
|
+
getState() {
|
|
197
|
+
return this.state;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Get session metadata
|
|
201
|
+
*/
|
|
202
|
+
getMetadata() {
|
|
203
|
+
return {
|
|
204
|
+
...this.sessionMetadata,
|
|
205
|
+
duration: this.calculateDuration(),
|
|
206
|
+
wordCount: this.wordCount
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Get recorded audio data
|
|
211
|
+
* @returns Combined audio data or null if not recording
|
|
212
|
+
*/
|
|
213
|
+
getRecording() {
|
|
214
|
+
if (this.audioRecording.length === 0) {
|
|
215
|
+
return null;
|
|
216
|
+
}
|
|
217
|
+
const totalLength = this.audioRecording.reduce((acc, chunk) => acc + chunk.byteLength, 0);
|
|
218
|
+
const combined = new ArrayBuffer(totalLength);
|
|
219
|
+
const view = new Uint8Array(combined);
|
|
220
|
+
let offset = 0;
|
|
221
|
+
for (const chunk of this.audioRecording) {
|
|
222
|
+
view.set(new Uint8Array(chunk), offset);
|
|
223
|
+
offset += chunk.byteLength;
|
|
224
|
+
}
|
|
225
|
+
return combined;
|
|
226
|
+
}
|
|
227
|
+
// ==================== Protected Helper Methods ====================
|
|
228
|
+
/**
|
|
229
|
+
* Update session state and emit state change event
|
|
230
|
+
* @param newState - New session state
|
|
231
|
+
*/
|
|
232
|
+
setState(newState) {
|
|
233
|
+
const previousState = this.state;
|
|
234
|
+
this.state = newState;
|
|
235
|
+
if (newState === "active" /* ACTIVE */ && !this.startTime) {
|
|
236
|
+
this.startTime = Date.now();
|
|
237
|
+
this.sessionMetadata.startTime = this.startTime;
|
|
238
|
+
}
|
|
239
|
+
if (newState === "stopped" /* STOPPED */ || newState === "error" /* ERROR */) {
|
|
240
|
+
this.sessionMetadata.endTime = Date.now();
|
|
241
|
+
this.sessionMetadata.duration = this.calculateDuration();
|
|
242
|
+
}
|
|
243
|
+
if (previousState !== newState) {
|
|
244
|
+
this.emit("stateChange", newState);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Handle incoming transcription result
|
|
249
|
+
* @param result - Transcription result from provider
|
|
250
|
+
*/
|
|
251
|
+
handleTranscript(result) {
|
|
252
|
+
if (result.isFinal && result.text) {
|
|
253
|
+
const words = result.text.trim().split(/\s+/).filter((w) => w.length > 0);
|
|
254
|
+
this.wordCount += words.length;
|
|
255
|
+
}
|
|
256
|
+
this.emit("transcript", result);
|
|
257
|
+
if (result.isFinal) {
|
|
258
|
+
this.emit("final", result);
|
|
259
|
+
} else {
|
|
260
|
+
this.emit("interim", result);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Handle errors and emit error event
|
|
265
|
+
* @param error - Error to handle
|
|
266
|
+
*/
|
|
267
|
+
handleError(error) {
|
|
268
|
+
let transcriptionError;
|
|
269
|
+
if (error instanceof TranscriptionError) {
|
|
270
|
+
transcriptionError = error;
|
|
271
|
+
} else {
|
|
272
|
+
transcriptionError = new TranscriptionError(
|
|
273
|
+
error.message,
|
|
274
|
+
"unknown_error" /* UNKNOWN_ERROR */,
|
|
275
|
+
this.config.provider,
|
|
276
|
+
error
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
this.setState("error" /* ERROR */);
|
|
280
|
+
this.emit("error", transcriptionError);
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Validate configuration
|
|
284
|
+
* @throws TranscriptionError if configuration is invalid
|
|
285
|
+
*/
|
|
286
|
+
validateConfig() {
|
|
287
|
+
if (!this.config.provider) {
|
|
288
|
+
throw new TranscriptionError(
|
|
289
|
+
"Provider must be specified in configuration",
|
|
290
|
+
"invalid_config" /* INVALID_CONFIG */
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
const cloudProviders = ["deepgram", "assemblyai"];
|
|
294
|
+
if (cloudProviders.includes(this.config.provider) && !this.config.apiKey) {
|
|
295
|
+
throw new TranscriptionError(
|
|
296
|
+
`API key is required for ${this.config.provider} provider`,
|
|
297
|
+
"invalid_config" /* INVALID_CONFIG */,
|
|
298
|
+
this.config.provider
|
|
299
|
+
);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Record audio data if recording is enabled
|
|
304
|
+
* @param data - Audio data to record
|
|
305
|
+
*/
|
|
306
|
+
recordAudioData(data) {
|
|
307
|
+
this.audioRecording.push(data.slice(0));
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Calculate session duration
|
|
311
|
+
* @returns Duration in milliseconds
|
|
312
|
+
*/
|
|
313
|
+
calculateDuration() {
|
|
314
|
+
if (!this.startTime) {
|
|
315
|
+
return 0;
|
|
316
|
+
}
|
|
317
|
+
const endTime = this.sessionMetadata.endTime || Date.now();
|
|
318
|
+
return endTime - this.startTime;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Clear recording data
|
|
322
|
+
*/
|
|
323
|
+
clearRecording() {
|
|
324
|
+
this.audioRecording = [];
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Reset session state for new session
|
|
328
|
+
*/
|
|
329
|
+
resetSession() {
|
|
330
|
+
this.state = "idle" /* IDLE */;
|
|
331
|
+
this.startTime = void 0;
|
|
332
|
+
this.wordCount = 0;
|
|
333
|
+
this.audioRecording = [];
|
|
334
|
+
this.sessionMetadata = this.initializeMetadata();
|
|
335
|
+
}
|
|
336
|
+
// ==================== Private Helper Methods ====================
|
|
337
|
+
/**
|
|
338
|
+
* Generate a unique session ID
|
|
339
|
+
*/
|
|
340
|
+
generateSessionId() {
|
|
341
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
342
|
+
const r = Math.random() * 16 | 0;
|
|
343
|
+
const v = c === "x" ? r : r & 3 | 8;
|
|
344
|
+
return v.toString(16);
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Initialize session metadata
|
|
349
|
+
*/
|
|
350
|
+
initializeMetadata() {
|
|
351
|
+
return {
|
|
352
|
+
id: this.generateSessionId(),
|
|
353
|
+
startTime: 0,
|
|
354
|
+
wordCount: 0,
|
|
355
|
+
provider: this.config.provider
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
// src/utils/exporters/JSONExporter.ts
|
|
361
|
+
var JSONExporter = class {
|
|
362
|
+
/**
|
|
363
|
+
* Export session to minified JSON string
|
|
364
|
+
* @param session - Session data to export
|
|
365
|
+
* @returns JSON string
|
|
366
|
+
*/
|
|
367
|
+
static export(session) {
|
|
368
|
+
const exportData = {
|
|
369
|
+
version: "1.0.0",
|
|
370
|
+
session: {
|
|
371
|
+
metadata: session.metadata,
|
|
372
|
+
transcripts: session.transcripts,
|
|
373
|
+
config: {}
|
|
374
|
+
},
|
|
375
|
+
exportedAt: Date.now()
|
|
376
|
+
};
|
|
377
|
+
return JSON.stringify(exportData);
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Export session to formatted/pretty JSON string
|
|
381
|
+
* @param session - Session data to export
|
|
382
|
+
* @param indent - Indentation spaces (default: 2)
|
|
383
|
+
* @returns Formatted JSON string
|
|
384
|
+
*/
|
|
385
|
+
static exportPretty(session, indent = 2) {
|
|
386
|
+
const exportData = {
|
|
387
|
+
version: "1.0.0",
|
|
388
|
+
session: {
|
|
389
|
+
metadata: session.metadata,
|
|
390
|
+
transcripts: session.transcripts,
|
|
391
|
+
config: {}
|
|
392
|
+
},
|
|
393
|
+
exportedAt: Date.now()
|
|
394
|
+
};
|
|
395
|
+
return JSON.stringify(exportData, null, indent);
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Parse JSON and validate structure
|
|
399
|
+
* @param json - JSON string to parse
|
|
400
|
+
* @returns Parsed session export data
|
|
401
|
+
*/
|
|
402
|
+
static parse(json) {
|
|
403
|
+
const data = JSON.parse(json);
|
|
404
|
+
if (!data.version || !data.session) {
|
|
405
|
+
throw new Error("Invalid session export format");
|
|
406
|
+
}
|
|
407
|
+
return data;
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
// src/utils/exporters/TextExporter.ts
|
|
412
|
+
var TextExporter = class {
|
|
413
|
+
/**
|
|
414
|
+
* Export session transcripts to plain text
|
|
415
|
+
* @param session - Session data to export
|
|
416
|
+
* @param options - Export options
|
|
417
|
+
* @returns Plain text string
|
|
418
|
+
*/
|
|
419
|
+
static export(session, options = {}) {
|
|
420
|
+
const {
|
|
421
|
+
includeTimestamps = false,
|
|
422
|
+
includeSpeakers = false,
|
|
423
|
+
includeConfidence = false,
|
|
424
|
+
paragraphBreaks = false
|
|
425
|
+
} = options;
|
|
426
|
+
const lines = [];
|
|
427
|
+
const finalTranscripts = session.transcripts.filter((t) => t.isFinal);
|
|
428
|
+
for (const transcript of finalTranscripts) {
|
|
429
|
+
let line = "";
|
|
430
|
+
if (includeTimestamps && transcript.timestamp) {
|
|
431
|
+
const time = new Date(transcript.timestamp).toISOString().substr(11, 8);
|
|
432
|
+
line += `[${time}] `;
|
|
433
|
+
}
|
|
434
|
+
if (includeSpeakers && transcript.speaker) {
|
|
435
|
+
line += `${transcript.speaker}: `;
|
|
436
|
+
}
|
|
437
|
+
line += transcript.text;
|
|
438
|
+
if (includeConfidence && transcript.confidence !== void 0) {
|
|
439
|
+
line += ` (${Math.round(transcript.confidence * 100)}%)`;
|
|
440
|
+
}
|
|
441
|
+
lines.push(line);
|
|
442
|
+
}
|
|
443
|
+
const separator = paragraphBreaks ? "\n\n" : " ";
|
|
444
|
+
return lines.join(separator).trim();
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Export as continuous text without any formatting
|
|
448
|
+
* @param session - Session data to export
|
|
449
|
+
* @returns Plain text string
|
|
450
|
+
*/
|
|
451
|
+
static exportPlain(session) {
|
|
452
|
+
return session.transcripts.filter((t) => t.isFinal).map((t) => t.text).join(" ").trim();
|
|
453
|
+
}
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
// src/utils/exporters/SRTExporter.ts
|
|
457
|
+
var SRTExporter = class {
|
|
458
|
+
/**
|
|
459
|
+
* Export session transcripts to SRT format
|
|
460
|
+
* @param session - Session data to export
|
|
461
|
+
* @returns SRT formatted string
|
|
462
|
+
*/
|
|
463
|
+
static export(session) {
|
|
464
|
+
const finalTranscripts = session.transcripts.filter((t) => t.isFinal);
|
|
465
|
+
const lines = [];
|
|
466
|
+
let sequenceNumber = 1;
|
|
467
|
+
let currentTime = 0;
|
|
468
|
+
for (const transcript of finalTranscripts) {
|
|
469
|
+
if (!transcript.text.trim()) continue;
|
|
470
|
+
let startTime = currentTime;
|
|
471
|
+
let endTime;
|
|
472
|
+
if (transcript.words && transcript.words.length > 0) {
|
|
473
|
+
startTime = transcript.words[0].start;
|
|
474
|
+
endTime = transcript.words[transcript.words.length - 1].end;
|
|
475
|
+
} else {
|
|
476
|
+
const wordCount = transcript.text.split(/\s+/).length;
|
|
477
|
+
const durationMs = wordCount / 150 * 60 * 1e3;
|
|
478
|
+
endTime = startTime + Math.max(durationMs, 1e3);
|
|
479
|
+
}
|
|
480
|
+
lines.push(String(sequenceNumber));
|
|
481
|
+
lines.push(`${this.formatTime(startTime)} --> ${this.formatTime(endTime)}`);
|
|
482
|
+
lines.push(transcript.text);
|
|
483
|
+
lines.push("");
|
|
484
|
+
sequenceNumber++;
|
|
485
|
+
currentTime = endTime + 100;
|
|
486
|
+
}
|
|
487
|
+
return lines.join("\n");
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Format milliseconds to SRT timestamp format (HH:MM:SS,mmm)
|
|
491
|
+
* @param ms - Time in milliseconds
|
|
492
|
+
* @returns Formatted timestamp
|
|
493
|
+
*/
|
|
494
|
+
static formatTime(ms) {
|
|
495
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
496
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
497
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
498
|
+
const seconds = totalSeconds % 60;
|
|
499
|
+
const milliseconds = Math.floor(ms % 1e3);
|
|
500
|
+
return `${this.pad(hours, 2)}:${this.pad(minutes, 2)}:${this.pad(seconds, 2)},${this.pad(milliseconds, 3)}`;
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* Pad number with leading zeros
|
|
504
|
+
* @param num - Number to pad
|
|
505
|
+
* @param length - Target length
|
|
506
|
+
* @returns Padded string
|
|
507
|
+
*/
|
|
508
|
+
static pad(num, length) {
|
|
509
|
+
return String(num).padStart(length, "0");
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
// src/utils/exporters/VTTExporter.ts
|
|
514
|
+
var VTTExporter = class {
|
|
515
|
+
/**
|
|
516
|
+
* Export session transcripts to WebVTT format
|
|
517
|
+
* @param session - Session data to export
|
|
518
|
+
* @returns WebVTT formatted string
|
|
519
|
+
*/
|
|
520
|
+
static export(session) {
|
|
521
|
+
const finalTranscripts = session.transcripts.filter((t) => t.isFinal);
|
|
522
|
+
const lines = ["WEBVTT", ""];
|
|
523
|
+
let currentTime = 0;
|
|
524
|
+
for (const transcript of finalTranscripts) {
|
|
525
|
+
if (!transcript.text.trim()) continue;
|
|
526
|
+
let startTime = currentTime;
|
|
527
|
+
let endTime;
|
|
528
|
+
if (transcript.words && transcript.words.length > 0) {
|
|
529
|
+
startTime = transcript.words[0].start;
|
|
530
|
+
endTime = transcript.words[transcript.words.length - 1].end;
|
|
531
|
+
} else {
|
|
532
|
+
const wordCount = transcript.text.split(/\s+/).length;
|
|
533
|
+
const durationMs = wordCount / 150 * 60 * 1e3;
|
|
534
|
+
endTime = startTime + Math.max(durationMs, 1e3);
|
|
535
|
+
}
|
|
536
|
+
lines.push(`${this.formatTime(startTime)} --> ${this.formatTime(endTime)}`);
|
|
537
|
+
if (transcript.speaker) {
|
|
538
|
+
lines.push(`<v ${transcript.speaker}>${transcript.text}`);
|
|
539
|
+
} else {
|
|
540
|
+
lines.push(transcript.text);
|
|
541
|
+
}
|
|
542
|
+
lines.push("");
|
|
543
|
+
currentTime = endTime + 100;
|
|
544
|
+
}
|
|
545
|
+
return lines.join("\n");
|
|
546
|
+
}
|
|
547
|
+
/**
|
|
548
|
+
* Export with cue identifiers
|
|
549
|
+
* @param session - Session data to export
|
|
550
|
+
* @param cuePrefix - Prefix for cue identifiers
|
|
551
|
+
* @returns WebVTT formatted string with cue IDs
|
|
552
|
+
*/
|
|
553
|
+
static exportWithCues(session, cuePrefix = "cue") {
|
|
554
|
+
const finalTranscripts = session.transcripts.filter((t) => t.isFinal);
|
|
555
|
+
const lines = ["WEBVTT", ""];
|
|
556
|
+
let cueNumber = 1;
|
|
557
|
+
let currentTime = 0;
|
|
558
|
+
for (const transcript of finalTranscripts) {
|
|
559
|
+
if (!transcript.text.trim()) continue;
|
|
560
|
+
let startTime = currentTime;
|
|
561
|
+
let endTime;
|
|
562
|
+
if (transcript.words && transcript.words.length > 0) {
|
|
563
|
+
startTime = transcript.words[0].start;
|
|
564
|
+
endTime = transcript.words[transcript.words.length - 1].end;
|
|
565
|
+
} else {
|
|
566
|
+
const wordCount = transcript.text.split(/\s+/).length;
|
|
567
|
+
const durationMs = wordCount / 150 * 60 * 1e3;
|
|
568
|
+
endTime = startTime + Math.max(durationMs, 1e3);
|
|
569
|
+
}
|
|
570
|
+
lines.push(`${cuePrefix}-${cueNumber}`);
|
|
571
|
+
lines.push(`${this.formatTime(startTime)} --> ${this.formatTime(endTime)}`);
|
|
572
|
+
lines.push(transcript.text);
|
|
573
|
+
lines.push("");
|
|
574
|
+
cueNumber++;
|
|
575
|
+
currentTime = endTime + 100;
|
|
576
|
+
}
|
|
577
|
+
return lines.join("\n");
|
|
578
|
+
}
|
|
579
|
+
/**
|
|
580
|
+
* Format milliseconds to WebVTT timestamp format (HH:MM:SS.mmm)
|
|
581
|
+
* @param ms - Time in milliseconds
|
|
582
|
+
* @returns Formatted timestamp
|
|
583
|
+
*/
|
|
584
|
+
static formatTime(ms) {
|
|
585
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
586
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
587
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
588
|
+
const seconds = totalSeconds % 60;
|
|
589
|
+
const milliseconds = Math.floor(ms % 1e3);
|
|
590
|
+
return `${this.pad(hours, 2)}:${this.pad(minutes, 2)}:${this.pad(seconds, 2)}.${this.pad(milliseconds, 3)}`;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Pad number with leading zeros
|
|
594
|
+
*/
|
|
595
|
+
static pad(num, length) {
|
|
596
|
+
return String(num).padStart(length, "0");
|
|
597
|
+
}
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
// src/utils/exporters/CSVExporter.ts
|
|
601
|
+
var CSVExporter = class {
|
|
602
|
+
/**
|
|
603
|
+
* Export session transcripts to CSV format
|
|
604
|
+
* @param session - Session data to export
|
|
605
|
+
* @param options - Export options
|
|
606
|
+
* @returns CSV formatted string
|
|
607
|
+
*/
|
|
608
|
+
static export(session, options = {}) {
|
|
609
|
+
const {
|
|
610
|
+
delimiter = ",",
|
|
611
|
+
includeHeaders = true,
|
|
612
|
+
columns = ["timestamp", "text", "isFinal", "confidence", "speaker"]
|
|
613
|
+
} = options;
|
|
614
|
+
const rows = [];
|
|
615
|
+
if (includeHeaders) {
|
|
616
|
+
rows.push(columns.map((col) => this.escapeField(col, delimiter)).join(delimiter));
|
|
617
|
+
}
|
|
618
|
+
for (const transcript of session.transcripts) {
|
|
619
|
+
const values = columns.map((col) => {
|
|
620
|
+
const value = this.getFieldValue(transcript, col);
|
|
621
|
+
return this.escapeField(String(value), delimiter);
|
|
622
|
+
});
|
|
623
|
+
rows.push(values.join(delimiter));
|
|
624
|
+
}
|
|
625
|
+
return rows.join("\n");
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Export only final transcripts
|
|
629
|
+
* @param session - Session data to export
|
|
630
|
+
* @param options - Export options
|
|
631
|
+
* @returns CSV formatted string
|
|
632
|
+
*/
|
|
633
|
+
static exportFinalOnly(session, options = {}) {
|
|
634
|
+
const filteredSession = {
|
|
635
|
+
transcripts: session.transcripts.filter((t) => t.isFinal)
|
|
636
|
+
};
|
|
637
|
+
return this.export(filteredSession, options);
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Get field value from transcript
|
|
641
|
+
* @param transcript - Transcription result
|
|
642
|
+
* @param field - Field name
|
|
643
|
+
* @returns Field value
|
|
644
|
+
*/
|
|
645
|
+
static getFieldValue(transcript, field) {
|
|
646
|
+
switch (field) {
|
|
647
|
+
case "timestamp":
|
|
648
|
+
return transcript.timestamp || "";
|
|
649
|
+
case "text":
|
|
650
|
+
return transcript.text || "";
|
|
651
|
+
case "isFinal":
|
|
652
|
+
return transcript.isFinal;
|
|
653
|
+
case "confidence":
|
|
654
|
+
return transcript.confidence !== void 0 ? transcript.confidence : "";
|
|
655
|
+
case "speaker":
|
|
656
|
+
return transcript.speaker || "";
|
|
657
|
+
case "language":
|
|
658
|
+
return transcript.language || "";
|
|
659
|
+
case "wordCount":
|
|
660
|
+
return transcript.text ? transcript.text.split(/\s+/).filter((w) => w).length : 0;
|
|
661
|
+
case "startTime":
|
|
662
|
+
return transcript.words?.[0]?.start || "";
|
|
663
|
+
case "endTime":
|
|
664
|
+
return transcript.words?.[transcript.words.length - 1]?.end || "";
|
|
665
|
+
default:
|
|
666
|
+
return "";
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Escape field for CSV format
|
|
671
|
+
* @param field - Field value
|
|
672
|
+
* @param delimiter - CSV delimiter
|
|
673
|
+
* @returns Escaped field
|
|
674
|
+
*/
|
|
675
|
+
static escapeField(field, delimiter) {
|
|
676
|
+
if (field.includes(delimiter) || field.includes('"') || field.includes("\n") || field.includes("\r")) {
|
|
677
|
+
const escaped = field.replace(/"/g, '""');
|
|
678
|
+
return `"${escaped}"`;
|
|
679
|
+
}
|
|
680
|
+
return field;
|
|
681
|
+
}
|
|
682
|
+
};
|
|
683
|
+
|
|
684
|
+
// src/core/TranscriptionSession.ts
|
|
685
|
+
var TranscriptionSession = class {
|
|
686
|
+
/**
|
|
687
|
+
* Create a new TranscriptionSession
|
|
688
|
+
* @param provider - Transcription provider to use
|
|
689
|
+
* @param sessionConfig - Session configuration options
|
|
690
|
+
*/
|
|
691
|
+
constructor(provider, sessionConfig = {}) {
|
|
692
|
+
/** Collected transcription results */
|
|
693
|
+
this.transcripts = [];
|
|
694
|
+
/** Current session state */
|
|
695
|
+
this.state = "idle" /* IDLE */;
|
|
696
|
+
this.provider = provider;
|
|
697
|
+
this.config = { ...DEFAULT_SESSION_CONFIG, ...sessionConfig };
|
|
698
|
+
this.id = this.generateSessionId();
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Start the transcription session
|
|
702
|
+
*/
|
|
703
|
+
async start() {
|
|
704
|
+
if (this.state !== "idle" /* IDLE */ && this.state !== "stopped" /* STOPPED */) {
|
|
705
|
+
throw new Error(`Cannot start session in state: ${this.state}`);
|
|
706
|
+
}
|
|
707
|
+
this.state = "initializing" /* INITIALIZING */;
|
|
708
|
+
this.startTime = Date.now();
|
|
709
|
+
try {
|
|
710
|
+
await this.provider.start();
|
|
711
|
+
this.state = "active" /* ACTIVE */;
|
|
712
|
+
this.setupTimers();
|
|
713
|
+
} catch (error) {
|
|
714
|
+
this.state = "error" /* ERROR */;
|
|
715
|
+
throw error;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Stop the transcription session
|
|
720
|
+
*/
|
|
721
|
+
async stop() {
|
|
722
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "idle" /* IDLE */) {
|
|
723
|
+
return;
|
|
724
|
+
}
|
|
725
|
+
this.state = "stopping" /* STOPPING */;
|
|
726
|
+
this.clearTimers();
|
|
727
|
+
try {
|
|
728
|
+
await this.provider.stop();
|
|
729
|
+
} finally {
|
|
730
|
+
this.state = "stopped" /* STOPPED */;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Pause the transcription session
|
|
735
|
+
*/
|
|
736
|
+
pause() {
|
|
737
|
+
if (this.state !== "active" /* ACTIVE */) {
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
this.provider.pause();
|
|
741
|
+
this.state = "paused" /* PAUSED */;
|
|
742
|
+
this.clearTimers();
|
|
743
|
+
}
|
|
744
|
+
/**
|
|
745
|
+
* Resume the transcription session
|
|
746
|
+
*/
|
|
747
|
+
resume() {
|
|
748
|
+
if (this.state !== "paused" /* PAUSED */) {
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
this.provider.resume();
|
|
752
|
+
this.state = "active" /* ACTIVE */;
|
|
753
|
+
this.setupTimers();
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Add a transcription result to the session
|
|
757
|
+
* @param result - Transcription result to add
|
|
758
|
+
*/
|
|
759
|
+
addTranscript(result) {
|
|
760
|
+
this.transcripts.push(result);
|
|
761
|
+
this.resetSilenceTimer();
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Get transcription results
|
|
765
|
+
* @param finalOnly - If true, return only final results
|
|
766
|
+
*/
|
|
767
|
+
getTranscripts(finalOnly = false) {
|
|
768
|
+
if (finalOnly) {
|
|
769
|
+
return this.transcripts.filter((t) => t.isFinal);
|
|
770
|
+
}
|
|
771
|
+
return [...this.transcripts];
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Get concatenated text from all final transcripts
|
|
775
|
+
*/
|
|
776
|
+
getFullText() {
|
|
777
|
+
return this.transcripts.filter((t) => t.isFinal).map((t) => t.text).join(" ").trim();
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Get the current session state
|
|
781
|
+
*/
|
|
782
|
+
getState() {
|
|
783
|
+
return this.state;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Export session data in raw format
|
|
787
|
+
*/
|
|
788
|
+
exportRaw() {
|
|
789
|
+
const metadata = {
|
|
790
|
+
id: this.id,
|
|
791
|
+
startTime: this.startTime || 0,
|
|
792
|
+
endTime: this.state === "stopped" /* STOPPED */ ? Date.now() : void 0,
|
|
793
|
+
duration: this.startTime ? Date.now() - this.startTime : 0,
|
|
794
|
+
wordCount: this.getWordCount(),
|
|
795
|
+
provider: this.provider.getState()
|
|
796
|
+
};
|
|
797
|
+
return {
|
|
798
|
+
metadata,
|
|
799
|
+
transcripts: this.getTranscripts(),
|
|
800
|
+
fullText: this.getFullText()
|
|
801
|
+
};
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* Export session data in specified format
|
|
805
|
+
* @param format - Export format (json, text, srt, vtt, csv)
|
|
806
|
+
*/
|
|
807
|
+
export(format = "json") {
|
|
808
|
+
const transcripts = this.getTranscripts(true);
|
|
809
|
+
const sessionData = { transcripts };
|
|
810
|
+
const rawExport = this.exportRaw();
|
|
811
|
+
let data;
|
|
812
|
+
let mimeType;
|
|
813
|
+
let extension;
|
|
814
|
+
switch (format) {
|
|
815
|
+
case "json":
|
|
816
|
+
data = JSONExporter.export({ metadata: rawExport.metadata, transcripts });
|
|
817
|
+
mimeType = "application/json";
|
|
818
|
+
extension = "json";
|
|
819
|
+
break;
|
|
820
|
+
case "text":
|
|
821
|
+
data = TextExporter.export(sessionData, {});
|
|
822
|
+
mimeType = "text/plain";
|
|
823
|
+
extension = "txt";
|
|
824
|
+
break;
|
|
825
|
+
case "srt":
|
|
826
|
+
data = SRTExporter.export(sessionData);
|
|
827
|
+
mimeType = "text/plain";
|
|
828
|
+
extension = "srt";
|
|
829
|
+
break;
|
|
830
|
+
case "vtt":
|
|
831
|
+
data = VTTExporter.export(sessionData);
|
|
832
|
+
mimeType = "text/vtt";
|
|
833
|
+
extension = "vtt";
|
|
834
|
+
break;
|
|
835
|
+
case "csv":
|
|
836
|
+
data = CSVExporter.export(sessionData, {});
|
|
837
|
+
mimeType = "text/csv";
|
|
838
|
+
extension = "csv";
|
|
839
|
+
break;
|
|
840
|
+
default:
|
|
841
|
+
data = JSONExporter.export({ metadata: rawExport.metadata, transcripts });
|
|
842
|
+
mimeType = "application/json";
|
|
843
|
+
extension = "json";
|
|
844
|
+
}
|
|
845
|
+
return {
|
|
846
|
+
format,
|
|
847
|
+
data,
|
|
848
|
+
filename: `transcript-${this.id}.${extension}`,
|
|
849
|
+
mimeType
|
|
850
|
+
};
|
|
851
|
+
}
|
|
852
|
+
/**
|
|
853
|
+
* Get session statistics
|
|
854
|
+
*/
|
|
855
|
+
getStatistics() {
|
|
856
|
+
const transcripts = this.getTranscripts(true);
|
|
857
|
+
const wordCount = this.getWordCount();
|
|
858
|
+
const durationMs = this.startTime ? Date.now() - this.startTime : 0;
|
|
859
|
+
let totalConfidence = 0;
|
|
860
|
+
let confCount = 0;
|
|
861
|
+
for (const t of transcripts) {
|
|
862
|
+
if (t.confidence !== void 0) {
|
|
863
|
+
totalConfidence += t.confidence;
|
|
864
|
+
confCount++;
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
const averageConfidence = confCount > 0 ? totalConfidence / confCount : 0;
|
|
868
|
+
const durationMinutes = durationMs / 6e4;
|
|
869
|
+
const speakingRate = durationMinutes > 0 ? wordCount / durationMinutes : 0;
|
|
870
|
+
return {
|
|
871
|
+
wordCount,
|
|
872
|
+
averageConfidence,
|
|
873
|
+
speakingRate,
|
|
874
|
+
silencePeriods: 0,
|
|
875
|
+
// Would require VAD tracking
|
|
876
|
+
durationMs,
|
|
877
|
+
transcriptCount: transcripts.length
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* Clear all transcripts
|
|
882
|
+
*/
|
|
883
|
+
clear() {
|
|
884
|
+
this.transcripts = [];
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Get the total word count from final transcripts
|
|
888
|
+
*/
|
|
889
|
+
getWordCount() {
|
|
890
|
+
return this.transcripts.filter((t) => t.isFinal).reduce((count, t) => {
|
|
891
|
+
const words = t.text.trim().split(/\s+/).filter((w) => w.length > 0);
|
|
892
|
+
return count + words.length;
|
|
893
|
+
}, 0);
|
|
894
|
+
}
|
|
895
|
+
/**
|
|
896
|
+
* Set up session timers (max duration, silence timeout)
|
|
897
|
+
*/
|
|
898
|
+
setupTimers() {
|
|
899
|
+
if (this.config.maxDuration > 0) {
|
|
900
|
+
this.maxDurationTimer = setTimeout(() => {
|
|
901
|
+
void this.stop();
|
|
902
|
+
}, this.config.maxDuration);
|
|
903
|
+
}
|
|
904
|
+
this.resetSilenceTimer();
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* Reset the silence timeout timer
|
|
908
|
+
*/
|
|
909
|
+
resetSilenceTimer() {
|
|
910
|
+
if (this.silenceTimer) {
|
|
911
|
+
clearTimeout(this.silenceTimer);
|
|
912
|
+
}
|
|
913
|
+
if (this.config.silenceTimeout > 0 && this.state === "active" /* ACTIVE */) {
|
|
914
|
+
this.silenceTimer = setTimeout(() => {
|
|
915
|
+
void this.stop();
|
|
916
|
+
}, this.config.silenceTimeout);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* Clear all timers
|
|
921
|
+
*/
|
|
922
|
+
clearTimers() {
|
|
923
|
+
if (this.maxDurationTimer) {
|
|
924
|
+
clearTimeout(this.maxDurationTimer);
|
|
925
|
+
this.maxDurationTimer = void 0;
|
|
926
|
+
}
|
|
927
|
+
if (this.silenceTimer) {
|
|
928
|
+
clearTimeout(this.silenceTimer);
|
|
929
|
+
this.silenceTimer = void 0;
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
/**
|
|
933
|
+
* Generate a unique session ID
|
|
934
|
+
*/
|
|
935
|
+
generateSessionId() {
|
|
936
|
+
return "session-" + "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
937
|
+
const r = Math.random() * 16 | 0;
|
|
938
|
+
const v = c === "x" ? r : r & 3 | 8;
|
|
939
|
+
return v.toString(16);
|
|
940
|
+
});
|
|
941
|
+
}
|
|
942
|
+
};
|
|
943
|
+
|
|
944
|
+
// src/core/SessionManager.ts
|
|
945
|
+
var SessionManager = class {
|
|
946
|
+
/**
|
|
947
|
+
* Create a new SessionManager
|
|
948
|
+
* @param options - Manager configuration
|
|
949
|
+
*/
|
|
950
|
+
constructor(options = {}) {
|
|
951
|
+
/** Active sessions map */
|
|
952
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
953
|
+
/** Currently active session ID */
|
|
954
|
+
this.activeSessionId = null;
|
|
955
|
+
this.defaultConfig = options.defaultConfig || {};
|
|
956
|
+
this.maxSessions = options.maxSessions || 10;
|
|
957
|
+
}
|
|
958
|
+
/**
|
|
959
|
+
* Create a new transcription session
|
|
960
|
+
* @param provider - Transcription provider instance
|
|
961
|
+
* @param config - Session configuration (merged with defaults)
|
|
962
|
+
* @returns New TranscriptionSession instance
|
|
963
|
+
*/
|
|
964
|
+
createSession(provider, config) {
|
|
965
|
+
if (this.sessions.size >= this.maxSessions) {
|
|
966
|
+
throw new Error(`Maximum number of sessions (${this.maxSessions}) reached`);
|
|
967
|
+
}
|
|
968
|
+
const mergedConfig = { ...this.defaultConfig, ...config };
|
|
969
|
+
const session = new TranscriptionSession(provider, mergedConfig);
|
|
970
|
+
this.sessions.set(session.id, session);
|
|
971
|
+
if (!this.activeSessionId) {
|
|
972
|
+
this.activeSessionId = session.id;
|
|
973
|
+
}
|
|
974
|
+
return session;
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Get session by ID
|
|
978
|
+
* @param sessionId - Session ID
|
|
979
|
+
* @returns TranscriptionSession or null
|
|
980
|
+
*/
|
|
981
|
+
getSession(sessionId) {
|
|
982
|
+
return this.sessions.get(sessionId) || null;
|
|
983
|
+
}
|
|
984
|
+
/**
|
|
985
|
+
* Get the currently active session
|
|
986
|
+
* @returns Active TranscriptionSession or null
|
|
987
|
+
*/
|
|
988
|
+
getActiveSession() {
|
|
989
|
+
if (!this.activeSessionId) {
|
|
990
|
+
return null;
|
|
991
|
+
}
|
|
992
|
+
return this.sessions.get(this.activeSessionId) || null;
|
|
993
|
+
}
|
|
994
|
+
/**
|
|
995
|
+
* Set the active session
|
|
996
|
+
* @param sessionId - Session ID to make active
|
|
997
|
+
*/
|
|
998
|
+
setActiveSession(sessionId) {
|
|
999
|
+
if (!this.sessions.has(sessionId)) {
|
|
1000
|
+
throw new Error(`Session ${sessionId} not found`);
|
|
1001
|
+
}
|
|
1002
|
+
this.activeSessionId = sessionId;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Get all sessions
|
|
1006
|
+
* @returns Array of all sessions
|
|
1007
|
+
*/
|
|
1008
|
+
getAllSessions() {
|
|
1009
|
+
return Array.from(this.sessions.values());
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Delete a session
|
|
1013
|
+
* @param sessionId - Session ID to delete
|
|
1014
|
+
*/
|
|
1015
|
+
async deleteSession(sessionId) {
|
|
1016
|
+
const session = this.sessions.get(sessionId);
|
|
1017
|
+
if (!session) {
|
|
1018
|
+
return;
|
|
1019
|
+
}
|
|
1020
|
+
if (session.getState() === "active" /* ACTIVE */ || session.getState() === "paused" /* PAUSED */) {
|
|
1021
|
+
await session.stop();
|
|
1022
|
+
}
|
|
1023
|
+
this.sessions.delete(sessionId);
|
|
1024
|
+
if (this.activeSessionId === sessionId) {
|
|
1025
|
+
this.activeSessionId = null;
|
|
1026
|
+
const remaining = this.sessions.keys().next();
|
|
1027
|
+
if (!remaining.done) {
|
|
1028
|
+
this.activeSessionId = remaining.value;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
/**
|
|
1033
|
+
* Clear all sessions
|
|
1034
|
+
*/
|
|
1035
|
+
async clearAllSessions() {
|
|
1036
|
+
const stopPromises = [];
|
|
1037
|
+
for (const session of this.sessions.values()) {
|
|
1038
|
+
const state = session.getState();
|
|
1039
|
+
if (state === "active" /* ACTIVE */ || state === "paused" /* PAUSED */) {
|
|
1040
|
+
stopPromises.push(session.stop());
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
await Promise.all(stopPromises);
|
|
1044
|
+
this.sessions.clear();
|
|
1045
|
+
this.activeSessionId = null;
|
|
1046
|
+
}
|
|
1047
|
+
/**
|
|
1048
|
+
* Get statistics for all sessions
|
|
1049
|
+
* @returns Session statistics
|
|
1050
|
+
*/
|
|
1051
|
+
getSessionStats() {
|
|
1052
|
+
let totalTranscripts = 0;
|
|
1053
|
+
let totalDuration = 0;
|
|
1054
|
+
let totalConfidence = 0;
|
|
1055
|
+
let confidenceCount = 0;
|
|
1056
|
+
let activeSessions = 0;
|
|
1057
|
+
for (const session of this.sessions.values()) {
|
|
1058
|
+
const state = session.getState();
|
|
1059
|
+
if (state === "active" /* ACTIVE */) {
|
|
1060
|
+
activeSessions++;
|
|
1061
|
+
}
|
|
1062
|
+
const stats = session.getStatistics();
|
|
1063
|
+
totalTranscripts += stats.transcriptCount;
|
|
1064
|
+
totalDuration += stats.durationMs;
|
|
1065
|
+
if (stats.averageConfidence > 0) {
|
|
1066
|
+
totalConfidence += stats.averageConfidence;
|
|
1067
|
+
confidenceCount++;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
return {
|
|
1071
|
+
totalSessions: this.sessions.size,
|
|
1072
|
+
activeSessions,
|
|
1073
|
+
totalTranscripts,
|
|
1074
|
+
totalDuration,
|
|
1075
|
+
averageConfidence: confidenceCount > 0 ? totalConfidence / confidenceCount : 0
|
|
1076
|
+
};
|
|
1077
|
+
}
|
|
1078
|
+
/**
|
|
1079
|
+
* Export a session to specified format
|
|
1080
|
+
* @param sessionId - Session ID
|
|
1081
|
+
* @param format - Export format
|
|
1082
|
+
* @returns Export result
|
|
1083
|
+
*/
|
|
1084
|
+
exportSession(sessionId, format) {
|
|
1085
|
+
const session = this.sessions.get(sessionId);
|
|
1086
|
+
if (!session) {
|
|
1087
|
+
throw new Error(`Session ${sessionId} not found`);
|
|
1088
|
+
}
|
|
1089
|
+
const exportData = session.exportRaw();
|
|
1090
|
+
const sessionData = {
|
|
1091
|
+
metadata: exportData.metadata,
|
|
1092
|
+
transcripts: exportData.transcripts
|
|
1093
|
+
};
|
|
1094
|
+
let data;
|
|
1095
|
+
let mimeType;
|
|
1096
|
+
let extension;
|
|
1097
|
+
switch (format) {
|
|
1098
|
+
case "json":
|
|
1099
|
+
data = JSONExporter.exportPretty(sessionData);
|
|
1100
|
+
mimeType = "application/json";
|
|
1101
|
+
extension = "json";
|
|
1102
|
+
break;
|
|
1103
|
+
case "text":
|
|
1104
|
+
data = TextExporter.export(sessionData);
|
|
1105
|
+
mimeType = "text/plain";
|
|
1106
|
+
extension = "txt";
|
|
1107
|
+
break;
|
|
1108
|
+
case "srt":
|
|
1109
|
+
data = SRTExporter.export(sessionData);
|
|
1110
|
+
mimeType = "application/x-subrip";
|
|
1111
|
+
extension = "srt";
|
|
1112
|
+
break;
|
|
1113
|
+
case "vtt":
|
|
1114
|
+
data = VTTExporter.export(sessionData);
|
|
1115
|
+
mimeType = "text/vtt";
|
|
1116
|
+
extension = "vtt";
|
|
1117
|
+
break;
|
|
1118
|
+
case "csv":
|
|
1119
|
+
data = CSVExporter.export(sessionData);
|
|
1120
|
+
mimeType = "text/csv";
|
|
1121
|
+
extension = "csv";
|
|
1122
|
+
break;
|
|
1123
|
+
default:
|
|
1124
|
+
throw new Error(`Unsupported export format: ${format}`);
|
|
1125
|
+
}
|
|
1126
|
+
return {
|
|
1127
|
+
format,
|
|
1128
|
+
data,
|
|
1129
|
+
filename: `transcript-${sessionId}.${extension}`,
|
|
1130
|
+
mimeType
|
|
1131
|
+
};
|
|
1132
|
+
}
|
|
1133
|
+
/**
|
|
1134
|
+
* Import a session from data
|
|
1135
|
+
* @param data - Session import data
|
|
1136
|
+
* @param provider - Provider instance for the session
|
|
1137
|
+
* @returns Imported session
|
|
1138
|
+
*/
|
|
1139
|
+
importSession(data, provider) {
|
|
1140
|
+
if (this.sessions.size >= this.maxSessions) {
|
|
1141
|
+
throw new Error(`Maximum number of sessions (${this.maxSessions}) reached`);
|
|
1142
|
+
}
|
|
1143
|
+
const session = new TranscriptionSession(provider, data.config || {});
|
|
1144
|
+
for (const transcript of data.transcripts) {
|
|
1145
|
+
session.addTranscript(transcript);
|
|
1146
|
+
}
|
|
1147
|
+
this.sessions.set(session.id, session);
|
|
1148
|
+
return session;
|
|
1149
|
+
}
|
|
1150
|
+
/**
|
|
1151
|
+
* Check if a session exists
|
|
1152
|
+
* @param sessionId - Session ID
|
|
1153
|
+
* @returns True if session exists
|
|
1154
|
+
*/
|
|
1155
|
+
hasSession(sessionId) {
|
|
1156
|
+
return this.sessions.has(sessionId);
|
|
1157
|
+
}
|
|
1158
|
+
/**
|
|
1159
|
+
* Get session count
|
|
1160
|
+
* @returns Number of sessions
|
|
1161
|
+
*/
|
|
1162
|
+
getSessionCount() {
|
|
1163
|
+
return this.sessions.size;
|
|
1164
|
+
}
|
|
1165
|
+
/**
|
|
1166
|
+
* Get sessions by state
|
|
1167
|
+
* @param state - Session state to filter by
|
|
1168
|
+
* @returns Array of sessions with matching state
|
|
1169
|
+
*/
|
|
1170
|
+
getSessionsByState(state) {
|
|
1171
|
+
return this.getAllSessions().filter((session) => session.getState() === state);
|
|
1172
|
+
}
|
|
1173
|
+
};
|
|
1174
|
+
|
|
1175
|
+
// src/providers/WebSpeechProvider.ts
|
|
1176
|
+
var _WebSpeechProvider = class _WebSpeechProvider extends BaseTranscriber {
|
|
1177
|
+
/**
|
|
1178
|
+
* Create a new WebSpeechProvider
|
|
1179
|
+
* @param config - Transcription configuration
|
|
1180
|
+
*/
|
|
1181
|
+
constructor(config) {
|
|
1182
|
+
super({ ...config, provider: "web-speech" /* WebSpeechAPI */ });
|
|
1183
|
+
/** Speech recognition instance */
|
|
1184
|
+
this.recognition = null;
|
|
1185
|
+
/** Media stream from microphone */
|
|
1186
|
+
this.mediaStream = null;
|
|
1187
|
+
/** Audio context for analysis */
|
|
1188
|
+
this.audioContext = null;
|
|
1189
|
+
/** Audio analyser for VAD */
|
|
1190
|
+
this.analyser = null;
|
|
1191
|
+
/** Script processor for audio level monitoring */
|
|
1192
|
+
this.audioLevelInterval = null;
|
|
1193
|
+
/** Whether recognition is being restarted automatically */
|
|
1194
|
+
this.isRestarting = false;
|
|
1195
|
+
/** Retry count for auto-restart */
|
|
1196
|
+
this.retryCount = 0;
|
|
1197
|
+
/** Maximum retry attempts */
|
|
1198
|
+
this.maxRetries = 3;
|
|
1199
|
+
}
|
|
1200
|
+
/**
|
|
1201
|
+
* Check if Web Speech API is supported in the current environment
|
|
1202
|
+
*/
|
|
1203
|
+
isSupported() {
|
|
1204
|
+
if (typeof window === "undefined") {
|
|
1205
|
+
return false;
|
|
1206
|
+
}
|
|
1207
|
+
return !!(window.SpeechRecognition || window.webkitSpeechRecognition);
|
|
1208
|
+
}
|
|
1209
|
+
/**
|
|
1210
|
+
* Initialize the Web Speech API provider
|
|
1211
|
+
*/
|
|
1212
|
+
async initialize() {
|
|
1213
|
+
if (!this.isSupported()) {
|
|
1214
|
+
throw new TranscriptionError(
|
|
1215
|
+
"Web Speech API is not supported in this browser",
|
|
1216
|
+
"unsupported_browser" /* UNSUPPORTED_BROWSER */,
|
|
1217
|
+
"web-speech" /* WebSpeechAPI */
|
|
1218
|
+
);
|
|
1219
|
+
}
|
|
1220
|
+
this.setState("initializing" /* INITIALIZING */);
|
|
1221
|
+
try {
|
|
1222
|
+
await this.getMicrophoneAccess();
|
|
1223
|
+
const SpeechRecognitionClass = window.SpeechRecognition || window.webkitSpeechRecognition;
|
|
1224
|
+
this.recognition = new SpeechRecognitionClass();
|
|
1225
|
+
this.recognition.continuous = true;
|
|
1226
|
+
this.recognition.interimResults = this.config.interimResults ?? true;
|
|
1227
|
+
this.recognition.lang = this.config.language || "en-US";
|
|
1228
|
+
this.recognition.maxAlternatives = 1;
|
|
1229
|
+
this.setupEventHandlers();
|
|
1230
|
+
if (this.config.providerOptions?.enableVAD) {
|
|
1231
|
+
this.setupAudioLevelMonitoring();
|
|
1232
|
+
}
|
|
1233
|
+
} catch (error) {
|
|
1234
|
+
this.handleError(error);
|
|
1235
|
+
throw error;
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
/**
|
|
1239
|
+
* Start transcription
|
|
1240
|
+
*/
|
|
1241
|
+
async start() {
|
|
1242
|
+
if (!this.recognition) {
|
|
1243
|
+
await this.initialize();
|
|
1244
|
+
}
|
|
1245
|
+
if (this.state === "active" /* ACTIVE */) {
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
try {
|
|
1249
|
+
this.recognition.start();
|
|
1250
|
+
this.retryCount = 0;
|
|
1251
|
+
} catch (error) {
|
|
1252
|
+
if (error.message?.includes("already started")) {
|
|
1253
|
+
return;
|
|
1254
|
+
}
|
|
1255
|
+
this.handleError(error);
|
|
1256
|
+
throw error;
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
/**
|
|
1260
|
+
* Stop transcription
|
|
1261
|
+
*/
|
|
1262
|
+
async stop() {
|
|
1263
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "idle" /* IDLE */) {
|
|
1264
|
+
return;
|
|
1265
|
+
}
|
|
1266
|
+
this.setState("stopping" /* STOPPING */);
|
|
1267
|
+
this.isRestarting = false;
|
|
1268
|
+
try {
|
|
1269
|
+
if (this.recognition) {
|
|
1270
|
+
this.recognition.stop();
|
|
1271
|
+
}
|
|
1272
|
+
this.stopAudioLevelMonitoring();
|
|
1273
|
+
this.stopMediaStream();
|
|
1274
|
+
this.setState("stopped" /* STOPPED */);
|
|
1275
|
+
this.emit("stop");
|
|
1276
|
+
} catch (error) {
|
|
1277
|
+
this.handleError(error);
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
/**
|
|
1281
|
+
* Pause transcription
|
|
1282
|
+
*/
|
|
1283
|
+
pause() {
|
|
1284
|
+
if (this.state !== "active" /* ACTIVE */) {
|
|
1285
|
+
return;
|
|
1286
|
+
}
|
|
1287
|
+
this.isRestarting = false;
|
|
1288
|
+
try {
|
|
1289
|
+
if (this.recognition) {
|
|
1290
|
+
this.recognition.stop();
|
|
1291
|
+
}
|
|
1292
|
+
this.setState("paused" /* PAUSED */);
|
|
1293
|
+
this.emit("pause");
|
|
1294
|
+
} catch (error) {
|
|
1295
|
+
this.handleError(error);
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
/**
|
|
1299
|
+
* Resume transcription
|
|
1300
|
+
*/
|
|
1301
|
+
resume() {
|
|
1302
|
+
if (this.state !== "paused" /* PAUSED */) {
|
|
1303
|
+
return;
|
|
1304
|
+
}
|
|
1305
|
+
try {
|
|
1306
|
+
if (this.recognition) {
|
|
1307
|
+
this.recognition.start();
|
|
1308
|
+
}
|
|
1309
|
+
this.setState("active" /* ACTIVE */);
|
|
1310
|
+
this.emit("resume");
|
|
1311
|
+
} catch (error) {
|
|
1312
|
+
this.handleError(error);
|
|
1313
|
+
}
|
|
1314
|
+
}
|
|
1315
|
+
/**
|
|
1316
|
+
* Send audio data - not supported by Web Speech API
|
|
1317
|
+
* @param _audioData - Audio data (unused)
|
|
1318
|
+
*/
|
|
1319
|
+
sendAudio(_audioData) {
|
|
1320
|
+
console.warn("WebSpeechProvider does not support external audio input. Audio data is captured directly from the microphone.");
|
|
1321
|
+
}
|
|
1322
|
+
/**
|
|
1323
|
+
* Clean up all resources
|
|
1324
|
+
*/
|
|
1325
|
+
async cleanup() {
|
|
1326
|
+
this.isRestarting = false;
|
|
1327
|
+
if (this.recognition) {
|
|
1328
|
+
try {
|
|
1329
|
+
this.recognition.stop();
|
|
1330
|
+
} catch {
|
|
1331
|
+
}
|
|
1332
|
+
this.recognition.onstart = null;
|
|
1333
|
+
this.recognition.onend = null;
|
|
1334
|
+
this.recognition.onerror = null;
|
|
1335
|
+
this.recognition.onresult = null;
|
|
1336
|
+
this.recognition.onspeechstart = null;
|
|
1337
|
+
this.recognition.onspeechend = null;
|
|
1338
|
+
this.recognition = null;
|
|
1339
|
+
}
|
|
1340
|
+
this.stopAudioLevelMonitoring();
|
|
1341
|
+
if (this.audioContext) {
|
|
1342
|
+
try {
|
|
1343
|
+
await this.audioContext.close();
|
|
1344
|
+
} catch {
|
|
1345
|
+
}
|
|
1346
|
+
this.audioContext = null;
|
|
1347
|
+
this.analyser = null;
|
|
1348
|
+
}
|
|
1349
|
+
this.stopMediaStream();
|
|
1350
|
+
this.removeAllListeners();
|
|
1351
|
+
}
|
|
1352
|
+
/**
|
|
1353
|
+
* Get provider capabilities
|
|
1354
|
+
*/
|
|
1355
|
+
getCapabilities() {
|
|
1356
|
+
return _WebSpeechProvider.capabilities;
|
|
1357
|
+
}
|
|
1358
|
+
// ==================== Private Methods ====================
|
|
1359
|
+
/**
|
|
1360
|
+
* Set up event handlers for speech recognition
|
|
1361
|
+
*/
|
|
1362
|
+
setupEventHandlers() {
|
|
1363
|
+
if (!this.recognition) return;
|
|
1364
|
+
this.recognition.onstart = () => {
|
|
1365
|
+
this.setState("active" /* ACTIVE */);
|
|
1366
|
+
this.emit("start");
|
|
1367
|
+
};
|
|
1368
|
+
this.recognition.onend = () => {
|
|
1369
|
+
this.handleRecognitionEnd();
|
|
1370
|
+
};
|
|
1371
|
+
this.recognition.onerror = (event) => {
|
|
1372
|
+
this.handleRecognitionError(event);
|
|
1373
|
+
};
|
|
1374
|
+
this.recognition.onresult = (event) => {
|
|
1375
|
+
this.processRecognitionResult(event);
|
|
1376
|
+
};
|
|
1377
|
+
this.recognition.onspeechstart = () => {
|
|
1378
|
+
this.emit("speech");
|
|
1379
|
+
};
|
|
1380
|
+
this.recognition.onspeechend = () => {
|
|
1381
|
+
this.emit("silence");
|
|
1382
|
+
};
|
|
1383
|
+
}
|
|
1384
|
+
/**
|
|
1385
|
+
* Process speech recognition results
|
|
1386
|
+
*/
|
|
1387
|
+
processRecognitionResult(event) {
|
|
1388
|
+
for (let i = event.resultIndex; i < event.results.length; i++) {
|
|
1389
|
+
const result = event.results[i];
|
|
1390
|
+
const alternative = result[0];
|
|
1391
|
+
if (!alternative) continue;
|
|
1392
|
+
const transcriptionResult = {
|
|
1393
|
+
text: alternative.transcript,
|
|
1394
|
+
isFinal: result.isFinal,
|
|
1395
|
+
confidence: alternative.confidence,
|
|
1396
|
+
timestamp: Date.now(),
|
|
1397
|
+
language: this.config.language
|
|
1398
|
+
};
|
|
1399
|
+
this.handleTranscript(transcriptionResult);
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
/**
|
|
1403
|
+
* Handle recognition end event
|
|
1404
|
+
*/
|
|
1405
|
+
handleRecognitionEnd() {
|
|
1406
|
+
if (this.state === "stopping" /* STOPPING */ || this.state === "stopped" /* STOPPED */ || this.state === "paused" /* PAUSED */ || this.state === "error" /* ERROR */) {
|
|
1407
|
+
return;
|
|
1408
|
+
}
|
|
1409
|
+
if (this.state === "active" /* ACTIVE */ && !this.isRestarting) {
|
|
1410
|
+
this.isRestarting = true;
|
|
1411
|
+
if (this.retryCount < this.maxRetries) {
|
|
1412
|
+
this.retryCount++;
|
|
1413
|
+
setTimeout(() => {
|
|
1414
|
+
if (this.recognition && this.state === "active" /* ACTIVE */) {
|
|
1415
|
+
try {
|
|
1416
|
+
this.recognition.start();
|
|
1417
|
+
} catch (error) {
|
|
1418
|
+
this.handleError(error);
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
this.isRestarting = false;
|
|
1422
|
+
}, 100);
|
|
1423
|
+
} else {
|
|
1424
|
+
this.setState("stopped" /* STOPPED */);
|
|
1425
|
+
this.emit("stop");
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
/**
|
|
1430
|
+
* Handle recognition errors
|
|
1431
|
+
*/
|
|
1432
|
+
handleRecognitionError(event) {
|
|
1433
|
+
let errorCode;
|
|
1434
|
+
let shouldStop = false;
|
|
1435
|
+
switch (event.error) {
|
|
1436
|
+
case "no-speech":
|
|
1437
|
+
console.warn("No speech detected");
|
|
1438
|
+
return;
|
|
1439
|
+
case "audio-capture":
|
|
1440
|
+
case "not-allowed":
|
|
1441
|
+
errorCode = "microphone_access_denied" /* MICROPHONE_ACCESS_DENIED */;
|
|
1442
|
+
shouldStop = true;
|
|
1443
|
+
break;
|
|
1444
|
+
case "network":
|
|
1445
|
+
errorCode = "network_error" /* NETWORK_ERROR */;
|
|
1446
|
+
break;
|
|
1447
|
+
case "aborted":
|
|
1448
|
+
return;
|
|
1449
|
+
case "service-not-allowed":
|
|
1450
|
+
errorCode = "unsupported_browser" /* UNSUPPORTED_BROWSER */;
|
|
1451
|
+
shouldStop = true;
|
|
1452
|
+
break;
|
|
1453
|
+
default:
|
|
1454
|
+
errorCode = "provider_error" /* PROVIDER_ERROR */;
|
|
1455
|
+
}
|
|
1456
|
+
const error = new TranscriptionError(
|
|
1457
|
+
event.message || `Speech recognition error: ${event.error}`,
|
|
1458
|
+
errorCode,
|
|
1459
|
+
"web-speech" /* WebSpeechAPI */,
|
|
1460
|
+
{ originalError: event.error }
|
|
1461
|
+
);
|
|
1462
|
+
if (shouldStop) {
|
|
1463
|
+
this.handleError(error);
|
|
1464
|
+
} else {
|
|
1465
|
+
this.emit("error", error);
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
/**
|
|
1469
|
+
* Request microphone access
|
|
1470
|
+
*/
|
|
1471
|
+
async getMicrophoneAccess() {
|
|
1472
|
+
try {
|
|
1473
|
+
const constraints = {
|
|
1474
|
+
audio: {
|
|
1475
|
+
echoCancellation: true,
|
|
1476
|
+
noiseSuppression: true,
|
|
1477
|
+
autoGainControl: true,
|
|
1478
|
+
sampleRate: this.config.audioConfig?.sampleRate || 16e3
|
|
1479
|
+
}
|
|
1480
|
+
};
|
|
1481
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
|
|
1482
|
+
} catch (error) {
|
|
1483
|
+
const err = error;
|
|
1484
|
+
throw new TranscriptionError(
|
|
1485
|
+
err.message || "Microphone access denied",
|
|
1486
|
+
"microphone_access_denied" /* MICROPHONE_ACCESS_DENIED */,
|
|
1487
|
+
"web-speech" /* WebSpeechAPI */,
|
|
1488
|
+
error
|
|
1489
|
+
);
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
/**
|
|
1493
|
+
* Stop media stream tracks
|
|
1494
|
+
*/
|
|
1495
|
+
stopMediaStream() {
|
|
1496
|
+
if (this.mediaStream) {
|
|
1497
|
+
this.mediaStream.getTracks().forEach((track) => track.stop());
|
|
1498
|
+
this.mediaStream = null;
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
/**
|
|
1502
|
+
* Set up audio level monitoring for VAD
|
|
1503
|
+
*/
|
|
1504
|
+
setupAudioLevelMonitoring() {
|
|
1505
|
+
if (!this.mediaStream) return;
|
|
1506
|
+
try {
|
|
1507
|
+
this.audioContext = new AudioContext();
|
|
1508
|
+
const source = this.audioContext.createMediaStreamSource(this.mediaStream);
|
|
1509
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
1510
|
+
this.analyser.fftSize = 256;
|
|
1511
|
+
source.connect(this.analyser);
|
|
1512
|
+
const dataArray = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1513
|
+
this.audioLevelInterval = setInterval(() => {
|
|
1514
|
+
if (this.analyser && this.state === "active" /* ACTIVE */) {
|
|
1515
|
+
this.analyser.getByteFrequencyData(dataArray);
|
|
1516
|
+
const average = dataArray.reduce((a, b) => a + b, 0) / dataArray.length;
|
|
1517
|
+
const normalizedLevel = average / 255;
|
|
1518
|
+
this.emit("audioLevel", normalizedLevel);
|
|
1519
|
+
}
|
|
1520
|
+
}, 100);
|
|
1521
|
+
} catch (error) {
|
|
1522
|
+
console.warn("Failed to set up audio level monitoring:", error);
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
/**
|
|
1526
|
+
* Stop audio level monitoring
|
|
1527
|
+
*/
|
|
1528
|
+
stopAudioLevelMonitoring() {
|
|
1529
|
+
if (this.audioLevelInterval) {
|
|
1530
|
+
clearInterval(this.audioLevelInterval);
|
|
1531
|
+
this.audioLevelInterval = null;
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
};
|
|
1535
|
+
/** Provider capabilities */
|
|
1536
|
+
_WebSpeechProvider.capabilities = {
|
|
1537
|
+
supportsInterim: true,
|
|
1538
|
+
supportsWordTimestamps: false,
|
|
1539
|
+
supportsSpeakerDiarization: false,
|
|
1540
|
+
supportsPunctuation: false,
|
|
1541
|
+
supportsLanguageDetection: false,
|
|
1542
|
+
supportedLanguages: [
|
|
1543
|
+
"en-US",
|
|
1544
|
+
"en-GB",
|
|
1545
|
+
"en-AU",
|
|
1546
|
+
"en-CA",
|
|
1547
|
+
"en-IN",
|
|
1548
|
+
"es-ES",
|
|
1549
|
+
"es-MX",
|
|
1550
|
+
"fr-FR",
|
|
1551
|
+
"de-DE",
|
|
1552
|
+
"it-IT",
|
|
1553
|
+
"pt-BR",
|
|
1554
|
+
"pt-PT",
|
|
1555
|
+
"ja-JP",
|
|
1556
|
+
"ko-KR",
|
|
1557
|
+
"zh-CN",
|
|
1558
|
+
"zh-TW",
|
|
1559
|
+
"ru-RU",
|
|
1560
|
+
"ar-SA",
|
|
1561
|
+
"hi-IN",
|
|
1562
|
+
"nl-NL"
|
|
1563
|
+
]
|
|
1564
|
+
};
|
|
1565
|
+
var WebSpeechProvider = _WebSpeechProvider;
|
|
1566
|
+
|
|
1567
|
+
// src/providers/DeepgramProvider.ts
|
|
1568
|
+
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1569
|
+
var KEEP_ALIVE_INTERVAL = 5e3;
|
|
1570
|
+
var RECONNECT_DELAY = 1e3;
|
|
1571
|
+
var _DeepgramProvider = class _DeepgramProvider extends BaseTranscriber {
|
|
1572
|
+
/**
|
|
1573
|
+
* Create a new DeepgramProvider
|
|
1574
|
+
* @param config - Transcription configuration with API key
|
|
1575
|
+
*/
|
|
1576
|
+
constructor(config) {
|
|
1577
|
+
super({ ...config, provider: "deepgram" /* Deepgram */ });
|
|
1578
|
+
/** WebSocket connection */
|
|
1579
|
+
this.socket = null;
|
|
1580
|
+
/** Media stream from microphone */
|
|
1581
|
+
this.mediaStream = null;
|
|
1582
|
+
/** Audio context for processing */
|
|
1583
|
+
this.audioContext = null;
|
|
1584
|
+
/** Audio processor node */
|
|
1585
|
+
this.processor = null;
|
|
1586
|
+
/** Connection attempt counter */
|
|
1587
|
+
this.connectionAttempts = 0;
|
|
1588
|
+
/** Maximum reconnection attempts */
|
|
1589
|
+
this.maxRetries = 3;
|
|
1590
|
+
/** Reconnection timeout */
|
|
1591
|
+
this.reconnectTimeout = null;
|
|
1592
|
+
/** Keep-alive interval */
|
|
1593
|
+
this.keepAliveInterval = null;
|
|
1594
|
+
/** Flag indicating if connection is ready */
|
|
1595
|
+
this.isConnectionReady = false;
|
|
1596
|
+
/** Flag for intentional close */
|
|
1597
|
+
this.isIntentionalClose = false;
|
|
1598
|
+
}
|
|
1599
|
+
/**
|
|
1600
|
+
* Check if Deepgram provider is supported
|
|
1601
|
+
*/
|
|
1602
|
+
isSupported() {
|
|
1603
|
+
if (typeof window === "undefined") {
|
|
1604
|
+
return typeof WebSocket !== "undefined";
|
|
1605
|
+
}
|
|
1606
|
+
return !!(typeof WebSocket !== "undefined" && navigator.mediaDevices && typeof navigator.mediaDevices.getUserMedia === "function" && (window.AudioContext || window.webkitAudioContext));
|
|
1607
|
+
}
|
|
1608
|
+
/**
|
|
1609
|
+
* Initialize the Deepgram provider
|
|
1610
|
+
*/
|
|
1611
|
+
async initialize() {
|
|
1612
|
+
if (!this.config.apiKey) {
|
|
1613
|
+
throw new TranscriptionError(
|
|
1614
|
+
"Deepgram API key is required",
|
|
1615
|
+
"invalid_config" /* INVALID_CONFIG */,
|
|
1616
|
+
"deepgram" /* Deepgram */
|
|
1617
|
+
);
|
|
1618
|
+
}
|
|
1619
|
+
if (!this.isSupported()) {
|
|
1620
|
+
throw new TranscriptionError(
|
|
1621
|
+
"Deepgram provider is not supported in this environment",
|
|
1622
|
+
"unsupported_browser" /* UNSUPPORTED_BROWSER */,
|
|
1623
|
+
"deepgram" /* Deepgram */
|
|
1624
|
+
);
|
|
1625
|
+
}
|
|
1626
|
+
this.setState("initializing" /* INITIALIZING */);
|
|
1627
|
+
this.isConnectionReady = false;
|
|
1628
|
+
this.isIntentionalClose = false;
|
|
1629
|
+
try {
|
|
1630
|
+
await this.setupWebSocket();
|
|
1631
|
+
} catch (error) {
|
|
1632
|
+
this.handleError(error);
|
|
1633
|
+
throw error;
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
/**
|
|
1637
|
+
* Start transcription
|
|
1638
|
+
*/
|
|
1639
|
+
async start() {
|
|
1640
|
+
if (!this.socket || !this.isConnectionReady) {
|
|
1641
|
+
await this.initialize();
|
|
1642
|
+
}
|
|
1643
|
+
if (this.state === "active" /* ACTIVE */) {
|
|
1644
|
+
return;
|
|
1645
|
+
}
|
|
1646
|
+
try {
|
|
1647
|
+
await this.getMicrophoneAccess();
|
|
1648
|
+
this.setupAudioProcessing();
|
|
1649
|
+
this.setState("active" /* ACTIVE */);
|
|
1650
|
+
this.emit("start");
|
|
1651
|
+
} catch (error) {
|
|
1652
|
+
this.handleError(error);
|
|
1653
|
+
throw error;
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
/**
|
|
1657
|
+
* Stop transcription
|
|
1658
|
+
*/
|
|
1659
|
+
async stop() {
|
|
1660
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "idle" /* IDLE */) {
|
|
1661
|
+
return;
|
|
1662
|
+
}
|
|
1663
|
+
this.setState("stopping" /* STOPPING */);
|
|
1664
|
+
this.isIntentionalClose = true;
|
|
1665
|
+
this.stopKeepAlive();
|
|
1666
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN) {
|
|
1667
|
+
try {
|
|
1668
|
+
this.socket.send(JSON.stringify({ type: "CloseStream" }));
|
|
1669
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
1670
|
+
} catch {
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
this.closeWebSocket();
|
|
1674
|
+
this.stopAudioProcessing();
|
|
1675
|
+
this.stopMediaStream();
|
|
1676
|
+
this.setState("stopped" /* STOPPED */);
|
|
1677
|
+
this.emit("stop");
|
|
1678
|
+
}
|
|
1679
|
+
/**
|
|
1680
|
+
* Pause transcription
|
|
1681
|
+
*/
|
|
1682
|
+
pause() {
|
|
1683
|
+
if (this.state !== "active" /* ACTIVE */) {
|
|
1684
|
+
return;
|
|
1685
|
+
}
|
|
1686
|
+
this.stopAudioProcessing();
|
|
1687
|
+
this.setState("paused" /* PAUSED */);
|
|
1688
|
+
this.emit("pause");
|
|
1689
|
+
}
|
|
1690
|
+
/**
|
|
1691
|
+
* Resume transcription
|
|
1692
|
+
*/
|
|
1693
|
+
resume() {
|
|
1694
|
+
if (this.state !== "paused" /* PAUSED */) {
|
|
1695
|
+
return;
|
|
1696
|
+
}
|
|
1697
|
+
if (this.mediaStream && this.audioContext) {
|
|
1698
|
+
this.setupAudioProcessing();
|
|
1699
|
+
}
|
|
1700
|
+
this.setState("active" /* ACTIVE */);
|
|
1701
|
+
this.emit("resume");
|
|
1702
|
+
}
|
|
1703
|
+
/**
|
|
1704
|
+
* Send audio data through WebSocket
|
|
1705
|
+
* @param audioData - Raw audio data as ArrayBuffer
|
|
1706
|
+
*/
|
|
1707
|
+
sendAudio(audioData) {
|
|
1708
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN && this.state === "active" /* ACTIVE */) {
|
|
1709
|
+
this.socket.send(audioData);
|
|
1710
|
+
this.recordAudioData(audioData);
|
|
1711
|
+
}
|
|
1712
|
+
}
|
|
1713
|
+
/**
|
|
1714
|
+
* Clean up all resources
|
|
1715
|
+
*/
|
|
1716
|
+
async cleanup() {
|
|
1717
|
+
this.isIntentionalClose = true;
|
|
1718
|
+
this.stopKeepAlive();
|
|
1719
|
+
if (this.reconnectTimeout) {
|
|
1720
|
+
clearTimeout(this.reconnectTimeout);
|
|
1721
|
+
this.reconnectTimeout = null;
|
|
1722
|
+
}
|
|
1723
|
+
this.closeWebSocket();
|
|
1724
|
+
this.stopAudioProcessing();
|
|
1725
|
+
if (this.audioContext) {
|
|
1726
|
+
try {
|
|
1727
|
+
await this.audioContext.close();
|
|
1728
|
+
} catch {
|
|
1729
|
+
}
|
|
1730
|
+
this.audioContext = null;
|
|
1731
|
+
}
|
|
1732
|
+
this.stopMediaStream();
|
|
1733
|
+
this.connectionAttempts = 0;
|
|
1734
|
+
this.isConnectionReady = false;
|
|
1735
|
+
this.removeAllListeners();
|
|
1736
|
+
}
|
|
1737
|
+
/**
|
|
1738
|
+
* Get provider capabilities
|
|
1739
|
+
*/
|
|
1740
|
+
getCapabilities() {
|
|
1741
|
+
return _DeepgramProvider.capabilities;
|
|
1742
|
+
}
|
|
1743
|
+
// ==================== Private Methods ====================
|
|
1744
|
+
/**
|
|
1745
|
+
* Build WebSocket URL with query parameters
|
|
1746
|
+
*/
|
|
1747
|
+
buildWebSocketUrl() {
|
|
1748
|
+
const params = new URLSearchParams();
|
|
1749
|
+
const options = this.config.providerOptions || {};
|
|
1750
|
+
params.set("model", options.model || "nova-2");
|
|
1751
|
+
if (options.version) params.set("version", options.version);
|
|
1752
|
+
if (options.tier) params.set("tier", options.tier);
|
|
1753
|
+
if (this.config.language) {
|
|
1754
|
+
params.set("language", this.config.language);
|
|
1755
|
+
}
|
|
1756
|
+
params.set("punctuate", String(this.config.punctuation ?? options.punctuate ?? true));
|
|
1757
|
+
params.set("interim_results", String(this.config.interimResults ?? true));
|
|
1758
|
+
if (options.diarize) params.set("diarize", "true");
|
|
1759
|
+
if (options.multichannel) params.set("multichannel", "true");
|
|
1760
|
+
if (options.alternatives) params.set("alternatives", String(options.alternatives));
|
|
1761
|
+
if (options.numerals) params.set("numerals", "true");
|
|
1762
|
+
if (options.smartFormat) params.set("smart_format", "true");
|
|
1763
|
+
if (options.endpointing !== void 0) params.set("endpointing", String(options.endpointing));
|
|
1764
|
+
if (options.keywords?.length) {
|
|
1765
|
+
options.keywords.forEach((k) => params.append("keywords", k));
|
|
1766
|
+
}
|
|
1767
|
+
if (options.search?.length) {
|
|
1768
|
+
options.search.forEach((s) => params.append("search", s));
|
|
1769
|
+
}
|
|
1770
|
+
params.set("encoding", "linear16");
|
|
1771
|
+
params.set("sample_rate", String(this.config.audioConfig?.sampleRate || 16e3));
|
|
1772
|
+
params.set("channels", String(this.config.audioConfig?.channels || 1));
|
|
1773
|
+
return `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1774
|
+
}
|
|
1775
|
+
/**
|
|
1776
|
+
* Set up WebSocket connection
|
|
1777
|
+
*/
|
|
1778
|
+
async setupWebSocket() {
|
|
1779
|
+
return new Promise((resolve, reject) => {
|
|
1780
|
+
const url = this.buildWebSocketUrl();
|
|
1781
|
+
this.socket = new WebSocket(url, ["token", this.config.apiKey]);
|
|
1782
|
+
this.socket.binaryType = "arraybuffer";
|
|
1783
|
+
const connectionTimeout = setTimeout(() => {
|
|
1784
|
+
reject(new TranscriptionError(
|
|
1785
|
+
"WebSocket connection timeout",
|
|
1786
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
1787
|
+
"deepgram" /* Deepgram */
|
|
1788
|
+
));
|
|
1789
|
+
}, 1e4);
|
|
1790
|
+
this.socket.onopen = () => {
|
|
1791
|
+
clearTimeout(connectionTimeout);
|
|
1792
|
+
this.handleWebSocketOpen();
|
|
1793
|
+
resolve();
|
|
1794
|
+
};
|
|
1795
|
+
this.socket.onmessage = (event) => {
|
|
1796
|
+
this.handleWebSocketMessage(event);
|
|
1797
|
+
};
|
|
1798
|
+
this.socket.onerror = (event) => {
|
|
1799
|
+
clearTimeout(connectionTimeout);
|
|
1800
|
+
this.handleWebSocketError(event);
|
|
1801
|
+
reject(new TranscriptionError(
|
|
1802
|
+
"WebSocket connection error",
|
|
1803
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
1804
|
+
"deepgram" /* Deepgram */
|
|
1805
|
+
));
|
|
1806
|
+
};
|
|
1807
|
+
this.socket.onclose = (event) => {
|
|
1808
|
+
clearTimeout(connectionTimeout);
|
|
1809
|
+
this.handleWebSocketClose(event);
|
|
1810
|
+
};
|
|
1811
|
+
});
|
|
1812
|
+
}
|
|
1813
|
+
/**
|
|
1814
|
+
* Handle WebSocket open event
|
|
1815
|
+
*/
|
|
1816
|
+
handleWebSocketOpen() {
|
|
1817
|
+
this.isConnectionReady = true;
|
|
1818
|
+
this.connectionAttempts = 0;
|
|
1819
|
+
this.startKeepAlive();
|
|
1820
|
+
}
|
|
1821
|
+
/**
|
|
1822
|
+
* Handle incoming WebSocket messages
|
|
1823
|
+
*/
|
|
1824
|
+
handleWebSocketMessage(event) {
|
|
1825
|
+
try {
|
|
1826
|
+
const message = JSON.parse(event.data);
|
|
1827
|
+
switch (message.type) {
|
|
1828
|
+
case "Results":
|
|
1829
|
+
this.processTranscriptionResult(message);
|
|
1830
|
+
break;
|
|
1831
|
+
case "Metadata":
|
|
1832
|
+
break;
|
|
1833
|
+
case "SpeechStarted":
|
|
1834
|
+
this.emit("speech");
|
|
1835
|
+
break;
|
|
1836
|
+
case "UtteranceEnd":
|
|
1837
|
+
this.emit("silence");
|
|
1838
|
+
break;
|
|
1839
|
+
case "Error":
|
|
1840
|
+
this.handleDeepgramError(message);
|
|
1841
|
+
break;
|
|
1842
|
+
}
|
|
1843
|
+
} catch (error) {
|
|
1844
|
+
console.error("Failed to parse Deepgram message:", error);
|
|
1845
|
+
}
|
|
1846
|
+
}
|
|
1847
|
+
/**
|
|
1848
|
+
* Process transcription result from Deepgram
|
|
1849
|
+
*/
|
|
1850
|
+
processTranscriptionResult(message) {
|
|
1851
|
+
if (!message.channel?.alternatives?.length) {
|
|
1852
|
+
return;
|
|
1853
|
+
}
|
|
1854
|
+
const alternative = message.channel.alternatives[0];
|
|
1855
|
+
const isFinal = message.is_final ?? message.speech_final ?? false;
|
|
1856
|
+
if (!alternative.transcript && !isFinal) {
|
|
1857
|
+
return;
|
|
1858
|
+
}
|
|
1859
|
+
const words = alternative.words?.map((w) => ({
|
|
1860
|
+
text: w.word,
|
|
1861
|
+
start: Math.round(w.start * 1e3),
|
|
1862
|
+
// Convert to milliseconds
|
|
1863
|
+
end: Math.round(w.end * 1e3),
|
|
1864
|
+
confidence: w.confidence
|
|
1865
|
+
}));
|
|
1866
|
+
const result = {
|
|
1867
|
+
text: alternative.transcript,
|
|
1868
|
+
isFinal,
|
|
1869
|
+
confidence: alternative.confidence,
|
|
1870
|
+
timestamp: Date.now(),
|
|
1871
|
+
words,
|
|
1872
|
+
language: this.config.language
|
|
1873
|
+
};
|
|
1874
|
+
this.handleTranscript(result);
|
|
1875
|
+
}
|
|
1876
|
+
/**
|
|
1877
|
+
* Handle Deepgram-specific errors
|
|
1878
|
+
*/
|
|
1879
|
+
handleDeepgramError(message) {
|
|
1880
|
+
let errorCode = "provider_error" /* PROVIDER_ERROR */;
|
|
1881
|
+
const errorMessage = message.error?.message || "Unknown Deepgram error";
|
|
1882
|
+
if (errorMessage.toLowerCase().includes("unauthorized") || errorMessage.toLowerCase().includes("invalid api key")) {
|
|
1883
|
+
errorCode = "authentication_failed" /* AUTHENTICATION_FAILED */;
|
|
1884
|
+
} else if (errorMessage.toLowerCase().includes("rate limit")) {
|
|
1885
|
+
errorCode = "provider_error" /* PROVIDER_ERROR */;
|
|
1886
|
+
}
|
|
1887
|
+
const error = new TranscriptionError(
|
|
1888
|
+
errorMessage,
|
|
1889
|
+
errorCode,
|
|
1890
|
+
"deepgram" /* Deepgram */,
|
|
1891
|
+
message.error
|
|
1892
|
+
);
|
|
1893
|
+
this.emit("error", error);
|
|
1894
|
+
}
|
|
1895
|
+
/**
|
|
1896
|
+
* Handle WebSocket error
|
|
1897
|
+
*/
|
|
1898
|
+
handleWebSocketError(_event) {
|
|
1899
|
+
if (!this.isIntentionalClose && this.connectionAttempts < this.maxRetries) {
|
|
1900
|
+
this.reconnect();
|
|
1901
|
+
} else {
|
|
1902
|
+
const error = new TranscriptionError(
|
|
1903
|
+
"WebSocket connection error",
|
|
1904
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
1905
|
+
"deepgram" /* Deepgram */
|
|
1906
|
+
);
|
|
1907
|
+
this.handleError(error);
|
|
1908
|
+
}
|
|
1909
|
+
}
|
|
1910
|
+
/**
|
|
1911
|
+
* Handle WebSocket close
|
|
1912
|
+
*/
|
|
1913
|
+
handleWebSocketClose(event) {
|
|
1914
|
+
this.isConnectionReady = false;
|
|
1915
|
+
if (!this.isIntentionalClose && this.state === "active" /* ACTIVE */) {
|
|
1916
|
+
if (this.connectionAttempts < this.maxRetries) {
|
|
1917
|
+
this.reconnect();
|
|
1918
|
+
} else {
|
|
1919
|
+
const error = new TranscriptionError(
|
|
1920
|
+
`WebSocket closed unexpectedly: ${event.code} ${event.reason}`,
|
|
1921
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
1922
|
+
"deepgram" /* Deepgram */
|
|
1923
|
+
);
|
|
1924
|
+
this.handleError(error);
|
|
1925
|
+
}
|
|
1926
|
+
}
|
|
1927
|
+
}
|
|
1928
|
+
/**
|
|
1929
|
+
* Attempt to reconnect
|
|
1930
|
+
*/
|
|
1931
|
+
reconnect() {
|
|
1932
|
+
this.connectionAttempts++;
|
|
1933
|
+
const delay = RECONNECT_DELAY * Math.pow(2, this.connectionAttempts - 1);
|
|
1934
|
+
this.reconnectTimeout = setTimeout(async () => {
|
|
1935
|
+
try {
|
|
1936
|
+
await this.setupWebSocket();
|
|
1937
|
+
if (this.mediaStream) {
|
|
1938
|
+
this.setupAudioProcessing();
|
|
1939
|
+
}
|
|
1940
|
+
} catch (error) {
|
|
1941
|
+
if (this.connectionAttempts < this.maxRetries) {
|
|
1942
|
+
this.reconnect();
|
|
1943
|
+
} else {
|
|
1944
|
+
this.handleError(error);
|
|
1945
|
+
}
|
|
1946
|
+
}
|
|
1947
|
+
}, delay);
|
|
1948
|
+
}
|
|
1949
|
+
/**
|
|
1950
|
+
* Start keep-alive interval
|
|
1951
|
+
*/
|
|
1952
|
+
startKeepAlive() {
|
|
1953
|
+
this.keepAliveInterval = setInterval(() => {
|
|
1954
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN) {
|
|
1955
|
+
this.socket.send(JSON.stringify({ type: "KeepAlive" }));
|
|
1956
|
+
}
|
|
1957
|
+
}, KEEP_ALIVE_INTERVAL);
|
|
1958
|
+
}
|
|
1959
|
+
/**
|
|
1960
|
+
* Stop keep-alive interval
|
|
1961
|
+
*/
|
|
1962
|
+
stopKeepAlive() {
|
|
1963
|
+
if (this.keepAliveInterval) {
|
|
1964
|
+
clearInterval(this.keepAliveInterval);
|
|
1965
|
+
this.keepAliveInterval = null;
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
/**
|
|
1969
|
+
* Request microphone access
|
|
1970
|
+
*/
|
|
1971
|
+
async getMicrophoneAccess() {
|
|
1972
|
+
try {
|
|
1973
|
+
const sampleRate = this.config.audioConfig?.sampleRate || 16e3;
|
|
1974
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
1975
|
+
audio: {
|
|
1976
|
+
sampleRate,
|
|
1977
|
+
channelCount: this.config.audioConfig?.channels || 1,
|
|
1978
|
+
echoCancellation: true,
|
|
1979
|
+
noiseSuppression: true,
|
|
1980
|
+
autoGainControl: true
|
|
1981
|
+
}
|
|
1982
|
+
});
|
|
1983
|
+
} catch (error) {
|
|
1984
|
+
throw new TranscriptionError(
|
|
1985
|
+
"Microphone access denied",
|
|
1986
|
+
"microphone_access_denied" /* MICROPHONE_ACCESS_DENIED */,
|
|
1987
|
+
"deepgram" /* Deepgram */,
|
|
1988
|
+
error
|
|
1989
|
+
);
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
/**
|
|
1993
|
+
* Set up audio processing pipeline
|
|
1994
|
+
*/
|
|
1995
|
+
setupAudioProcessing() {
|
|
1996
|
+
if (!this.mediaStream) return;
|
|
1997
|
+
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
1998
|
+
this.audioContext = new AudioContextClass({
|
|
1999
|
+
sampleRate: this.config.audioConfig?.sampleRate || 16e3
|
|
2000
|
+
});
|
|
2001
|
+
const source = this.audioContext.createMediaStreamSource(this.mediaStream);
|
|
2002
|
+
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
|
2003
|
+
this.processor.onaudioprocess = (event) => {
|
|
2004
|
+
if (this.state !== "active" /* ACTIVE */) return;
|
|
2005
|
+
const inputData = event.inputBuffer.getChannelData(0);
|
|
2006
|
+
const int16Data = this.convertFloat32ToInt16(inputData);
|
|
2007
|
+
this.sendAudio(int16Data.buffer);
|
|
2008
|
+
};
|
|
2009
|
+
source.connect(this.processor);
|
|
2010
|
+
this.processor.connect(this.audioContext.destination);
|
|
2011
|
+
}
|
|
2012
|
+
/**
|
|
2013
|
+
* Stop audio processing
|
|
2014
|
+
*/
|
|
2015
|
+
stopAudioProcessing() {
|
|
2016
|
+
if (this.processor) {
|
|
2017
|
+
this.processor.disconnect();
|
|
2018
|
+
this.processor.onaudioprocess = null;
|
|
2019
|
+
this.processor = null;
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
/**
|
|
2023
|
+
* Convert Float32 audio samples to Int16
|
|
2024
|
+
*/
|
|
2025
|
+
convertFloat32ToInt16(buffer) {
|
|
2026
|
+
const int16 = new Int16Array(buffer.length);
|
|
2027
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2028
|
+
const s = Math.max(-1, Math.min(1, buffer[i]));
|
|
2029
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
2030
|
+
}
|
|
2031
|
+
return int16;
|
|
2032
|
+
}
|
|
2033
|
+
/**
|
|
2034
|
+
* Close WebSocket connection
|
|
2035
|
+
*/
|
|
2036
|
+
closeWebSocket() {
|
|
2037
|
+
if (this.socket) {
|
|
2038
|
+
this.socket.onopen = null;
|
|
2039
|
+
this.socket.onmessage = null;
|
|
2040
|
+
this.socket.onerror = null;
|
|
2041
|
+
this.socket.onclose = null;
|
|
2042
|
+
if (this.socket.readyState === WebSocket.OPEN || this.socket.readyState === WebSocket.CONNECTING) {
|
|
2043
|
+
this.socket.close();
|
|
2044
|
+
}
|
|
2045
|
+
this.socket = null;
|
|
2046
|
+
}
|
|
2047
|
+
}
|
|
2048
|
+
/**
|
|
2049
|
+
* Stop media stream tracks
|
|
2050
|
+
*/
|
|
2051
|
+
stopMediaStream() {
|
|
2052
|
+
if (this.mediaStream) {
|
|
2053
|
+
this.mediaStream.getTracks().forEach((track) => track.stop());
|
|
2054
|
+
this.mediaStream = null;
|
|
2055
|
+
}
|
|
2056
|
+
}
|
|
2057
|
+
};
|
|
2058
|
+
/** Provider capabilities */
|
|
2059
|
+
_DeepgramProvider.capabilities = {
|
|
2060
|
+
supportsInterim: true,
|
|
2061
|
+
supportsWordTimestamps: true,
|
|
2062
|
+
supportsSpeakerDiarization: true,
|
|
2063
|
+
supportsPunctuation: true,
|
|
2064
|
+
supportsLanguageDetection: true,
|
|
2065
|
+
supportedLanguages: [
|
|
2066
|
+
"en",
|
|
2067
|
+
"en-US",
|
|
2068
|
+
"en-GB",
|
|
2069
|
+
"en-AU",
|
|
2070
|
+
"en-IN",
|
|
2071
|
+
"es",
|
|
2072
|
+
"es-ES",
|
|
2073
|
+
"es-419",
|
|
2074
|
+
"fr",
|
|
2075
|
+
"fr-FR",
|
|
2076
|
+
"fr-CA",
|
|
2077
|
+
"de",
|
|
2078
|
+
"de-DE",
|
|
2079
|
+
"it",
|
|
2080
|
+
"it-IT",
|
|
2081
|
+
"pt",
|
|
2082
|
+
"pt-BR",
|
|
2083
|
+
"pt-PT",
|
|
2084
|
+
"nl",
|
|
2085
|
+
"nl-NL",
|
|
2086
|
+
"ja",
|
|
2087
|
+
"ja-JP",
|
|
2088
|
+
"ko",
|
|
2089
|
+
"ko-KR",
|
|
2090
|
+
"zh",
|
|
2091
|
+
"zh-CN",
|
|
2092
|
+
"zh-TW",
|
|
2093
|
+
"ru",
|
|
2094
|
+
"ru-RU",
|
|
2095
|
+
"uk",
|
|
2096
|
+
"uk-UA",
|
|
2097
|
+
"hi",
|
|
2098
|
+
"hi-IN",
|
|
2099
|
+
"tr",
|
|
2100
|
+
"tr-TR",
|
|
2101
|
+
"pl",
|
|
2102
|
+
"pl-PL",
|
|
2103
|
+
"sv",
|
|
2104
|
+
"sv-SE",
|
|
2105
|
+
"da",
|
|
2106
|
+
"da-DK",
|
|
2107
|
+
"no",
|
|
2108
|
+
"no-NO",
|
|
2109
|
+
"fi",
|
|
2110
|
+
"fi-FI"
|
|
2111
|
+
]
|
|
2112
|
+
};
|
|
2113
|
+
var DeepgramProvider = _DeepgramProvider;
|
|
2114
|
+
|
|
2115
|
+
// src/providers/AssemblyAIProvider.ts
|
|
2116
|
+
var ASSEMBLYAI_AUTH_URL = "https://api.assemblyai.com/v2/realtime/token";
|
|
2117
|
+
var ASSEMBLYAI_WS_URL = "wss://api.assemblyai.com/v2/realtime/ws";
|
|
2118
|
+
var SAMPLE_RATE = 16e3;
|
|
2119
|
+
var RECONNECT_DELAY2 = 1e3;
|
|
2120
|
+
var _AssemblyAIProvider = class _AssemblyAIProvider extends BaseTranscriber {
|
|
2121
|
+
/**
|
|
2122
|
+
* Create a new AssemblyAIProvider
|
|
2123
|
+
* @param config - Transcription configuration with API key
|
|
2124
|
+
*/
|
|
2125
|
+
constructor(config) {
|
|
2126
|
+
super({ ...config, provider: "assemblyai" /* AssemblyAI */ });
|
|
2127
|
+
/** WebSocket connection */
|
|
2128
|
+
this.socket = null;
|
|
2129
|
+
/** Media stream from microphone */
|
|
2130
|
+
this.mediaStream = null;
|
|
2131
|
+
/** Audio context for processing */
|
|
2132
|
+
this.audioContext = null;
|
|
2133
|
+
/** Audio processor node */
|
|
2134
|
+
this.processor = null;
|
|
2135
|
+
/** Session token for WebSocket authentication */
|
|
2136
|
+
this.sessionToken = null;
|
|
2137
|
+
/** Connection attempt counter */
|
|
2138
|
+
this.connectionAttempts = 0;
|
|
2139
|
+
/** Maximum reconnection attempts */
|
|
2140
|
+
this.maxRetries = 3;
|
|
2141
|
+
/** Reconnection timeout */
|
|
2142
|
+
this.reconnectTimeout = null;
|
|
2143
|
+
/** Flag indicating if connection is ready */
|
|
2144
|
+
this.isConnectionReady = false;
|
|
2145
|
+
/** Flag for intentional close */
|
|
2146
|
+
this.isIntentionalClose = false;
|
|
2147
|
+
/** Session ID from AssemblyAI */
|
|
2148
|
+
this.sessionId = null;
|
|
2149
|
+
}
|
|
2150
|
+
/**
|
|
2151
|
+
* Check if AssemblyAI provider is supported
|
|
2152
|
+
*/
|
|
2153
|
+
isSupported() {
|
|
2154
|
+
if (typeof window === "undefined") {
|
|
2155
|
+
return typeof WebSocket !== "undefined" && typeof fetch !== "undefined";
|
|
2156
|
+
}
|
|
2157
|
+
return !!(typeof WebSocket !== "undefined" && typeof fetch !== "undefined" && navigator.mediaDevices && typeof navigator.mediaDevices.getUserMedia === "function" && (window.AudioContext || window.webkitAudioContext));
|
|
2158
|
+
}
|
|
2159
|
+
/**
|
|
2160
|
+
* Initialize the AssemblyAI provider
|
|
2161
|
+
*/
|
|
2162
|
+
async initialize() {
|
|
2163
|
+
if (!this.config.apiKey) {
|
|
2164
|
+
throw new TranscriptionError(
|
|
2165
|
+
"AssemblyAI API key is required",
|
|
2166
|
+
"invalid_config" /* INVALID_CONFIG */,
|
|
2167
|
+
"assemblyai" /* AssemblyAI */
|
|
2168
|
+
);
|
|
2169
|
+
}
|
|
2170
|
+
if (!this.isSupported()) {
|
|
2171
|
+
throw new TranscriptionError(
|
|
2172
|
+
"AssemblyAI provider is not supported in this environment",
|
|
2173
|
+
"unsupported_browser" /* UNSUPPORTED_BROWSER */,
|
|
2174
|
+
"assemblyai" /* AssemblyAI */
|
|
2175
|
+
);
|
|
2176
|
+
}
|
|
2177
|
+
this.setState("initializing" /* INITIALIZING */);
|
|
2178
|
+
this.isConnectionReady = false;
|
|
2179
|
+
this.isIntentionalClose = false;
|
|
2180
|
+
try {
|
|
2181
|
+
this.sessionToken = await this.getSessionToken();
|
|
2182
|
+
await this.setupWebSocket();
|
|
2183
|
+
} catch (error) {
|
|
2184
|
+
this.handleError(error);
|
|
2185
|
+
throw error;
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
/**
|
|
2189
|
+
* Start transcription
|
|
2190
|
+
*/
|
|
2191
|
+
async start() {
|
|
2192
|
+
if (!this.socket || !this.isConnectionReady) {
|
|
2193
|
+
await this.initialize();
|
|
2194
|
+
}
|
|
2195
|
+
if (this.state === "active" /* ACTIVE */) {
|
|
2196
|
+
return;
|
|
2197
|
+
}
|
|
2198
|
+
try {
|
|
2199
|
+
await this.getMicrophoneAccess();
|
|
2200
|
+
this.setupAudioProcessing();
|
|
2201
|
+
this.setState("active" /* ACTIVE */);
|
|
2202
|
+
this.emit("start");
|
|
2203
|
+
} catch (error) {
|
|
2204
|
+
this.handleError(error);
|
|
2205
|
+
throw error;
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
/**
|
|
2209
|
+
* Stop transcription
|
|
2210
|
+
*/
|
|
2211
|
+
async stop() {
|
|
2212
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "idle" /* IDLE */) {
|
|
2213
|
+
return;
|
|
2214
|
+
}
|
|
2215
|
+
this.setState("stopping" /* STOPPING */);
|
|
2216
|
+
this.isIntentionalClose = true;
|
|
2217
|
+
this.sendTerminateMessage();
|
|
2218
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
2219
|
+
this.closeWebSocket();
|
|
2220
|
+
this.stopAudioProcessing();
|
|
2221
|
+
this.stopMediaStream();
|
|
2222
|
+
this.setState("stopped" /* STOPPED */);
|
|
2223
|
+
this.emit("stop");
|
|
2224
|
+
}
|
|
2225
|
+
/**
|
|
2226
|
+
* Pause transcription
|
|
2227
|
+
*/
|
|
2228
|
+
pause() {
|
|
2229
|
+
if (this.state !== "active" /* ACTIVE */) {
|
|
2230
|
+
return;
|
|
2231
|
+
}
|
|
2232
|
+
this.stopAudioProcessing();
|
|
2233
|
+
this.setState("paused" /* PAUSED */);
|
|
2234
|
+
this.emit("pause");
|
|
2235
|
+
}
|
|
2236
|
+
/**
|
|
2237
|
+
* Resume transcription
|
|
2238
|
+
*/
|
|
2239
|
+
resume() {
|
|
2240
|
+
if (this.state !== "paused" /* PAUSED */) {
|
|
2241
|
+
return;
|
|
2242
|
+
}
|
|
2243
|
+
if (this.mediaStream && this.audioContext) {
|
|
2244
|
+
this.setupAudioProcessing();
|
|
2245
|
+
}
|
|
2246
|
+
this.setState("active" /* ACTIVE */);
|
|
2247
|
+
this.emit("resume");
|
|
2248
|
+
}
|
|
2249
|
+
/**
|
|
2250
|
+
* Send audio data through WebSocket
|
|
2251
|
+
* @param audioData - Raw audio data as ArrayBuffer
|
|
2252
|
+
*/
|
|
2253
|
+
sendAudio(audioData) {
|
|
2254
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN && this.state === "active" /* ACTIVE */) {
|
|
2255
|
+
const base64Audio = this.encodeAudioToBase64(audioData);
|
|
2256
|
+
this.sendAudioMessage(base64Audio);
|
|
2257
|
+
this.recordAudioData(audioData);
|
|
2258
|
+
}
|
|
2259
|
+
}
|
|
2260
|
+
/**
|
|
2261
|
+
* Clean up all resources
|
|
2262
|
+
*/
|
|
2263
|
+
async cleanup() {
|
|
2264
|
+
this.isIntentionalClose = true;
|
|
2265
|
+
if (this.reconnectTimeout) {
|
|
2266
|
+
clearTimeout(this.reconnectTimeout);
|
|
2267
|
+
this.reconnectTimeout = null;
|
|
2268
|
+
}
|
|
2269
|
+
this.closeWebSocket();
|
|
2270
|
+
this.stopAudioProcessing();
|
|
2271
|
+
if (this.audioContext) {
|
|
2272
|
+
try {
|
|
2273
|
+
await this.audioContext.close();
|
|
2274
|
+
} catch {
|
|
2275
|
+
}
|
|
2276
|
+
this.audioContext = null;
|
|
2277
|
+
}
|
|
2278
|
+
this.stopMediaStream();
|
|
2279
|
+
this.sessionToken = null;
|
|
2280
|
+
this.sessionId = null;
|
|
2281
|
+
this.connectionAttempts = 0;
|
|
2282
|
+
this.isConnectionReady = false;
|
|
2283
|
+
this.removeAllListeners();
|
|
2284
|
+
}
|
|
2285
|
+
/**
|
|
2286
|
+
* Get provider capabilities
|
|
2287
|
+
*/
|
|
2288
|
+
getCapabilities() {
|
|
2289
|
+
return _AssemblyAIProvider.capabilities;
|
|
2290
|
+
}
|
|
2291
|
+
// ==================== Private Methods ====================
|
|
2292
|
+
/**
|
|
2293
|
+
* Get temporary session token from AssemblyAI
|
|
2294
|
+
*/
|
|
2295
|
+
async getSessionToken() {
|
|
2296
|
+
try {
|
|
2297
|
+
const response = await fetch(ASSEMBLYAI_AUTH_URL, {
|
|
2298
|
+
method: "POST",
|
|
2299
|
+
headers: {
|
|
2300
|
+
"authorization": this.config.apiKey,
|
|
2301
|
+
"Content-Type": "application/json"
|
|
2302
|
+
},
|
|
2303
|
+
body: JSON.stringify({ expires_in: 3600 })
|
|
2304
|
+
});
|
|
2305
|
+
if (!response.ok) {
|
|
2306
|
+
if (response.status === 401) {
|
|
2307
|
+
throw new TranscriptionError(
|
|
2308
|
+
"Invalid AssemblyAI API key",
|
|
2309
|
+
"authentication_failed" /* AUTHENTICATION_FAILED */,
|
|
2310
|
+
"assemblyai" /* AssemblyAI */
|
|
2311
|
+
);
|
|
2312
|
+
}
|
|
2313
|
+
throw new TranscriptionError(
|
|
2314
|
+
`Failed to get session token: ${response.statusText}`,
|
|
2315
|
+
"provider_error" /* PROVIDER_ERROR */,
|
|
2316
|
+
"assemblyai" /* AssemblyAI */
|
|
2317
|
+
);
|
|
2318
|
+
}
|
|
2319
|
+
const data = await response.json();
|
|
2320
|
+
return data.token;
|
|
2321
|
+
} catch (error) {
|
|
2322
|
+
if (error instanceof TranscriptionError) {
|
|
2323
|
+
throw error;
|
|
2324
|
+
}
|
|
2325
|
+
throw new TranscriptionError(
|
|
2326
|
+
"Failed to authenticate with AssemblyAI",
|
|
2327
|
+
"network_error" /* NETWORK_ERROR */,
|
|
2328
|
+
"assemblyai" /* AssemblyAI */,
|
|
2329
|
+
error
|
|
2330
|
+
);
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
/**
|
|
2334
|
+
* Set up WebSocket connection
|
|
2335
|
+
*/
|
|
2336
|
+
async setupWebSocket() {
|
|
2337
|
+
return new Promise((resolve, reject) => {
|
|
2338
|
+
const url = `${ASSEMBLYAI_WS_URL}?sample_rate=${SAMPLE_RATE}&token=${this.sessionToken}`;
|
|
2339
|
+
this.socket = new WebSocket(url);
|
|
2340
|
+
const connectionTimeout = setTimeout(() => {
|
|
2341
|
+
reject(new TranscriptionError(
|
|
2342
|
+
"WebSocket connection timeout",
|
|
2343
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
2344
|
+
"assemblyai" /* AssemblyAI */
|
|
2345
|
+
));
|
|
2346
|
+
}, 1e4);
|
|
2347
|
+
this.socket.onopen = () => {
|
|
2348
|
+
clearTimeout(connectionTimeout);
|
|
2349
|
+
this.handleWebSocketOpen();
|
|
2350
|
+
resolve();
|
|
2351
|
+
};
|
|
2352
|
+
this.socket.onmessage = (event) => {
|
|
2353
|
+
this.handleWebSocketMessage(event);
|
|
2354
|
+
};
|
|
2355
|
+
this.socket.onerror = (event) => {
|
|
2356
|
+
clearTimeout(connectionTimeout);
|
|
2357
|
+
this.handleWebSocketError(event);
|
|
2358
|
+
reject(new TranscriptionError(
|
|
2359
|
+
"WebSocket connection error",
|
|
2360
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
2361
|
+
"assemblyai" /* AssemblyAI */
|
|
2362
|
+
));
|
|
2363
|
+
};
|
|
2364
|
+
this.socket.onclose = (event) => {
|
|
2365
|
+
clearTimeout(connectionTimeout);
|
|
2366
|
+
this.handleWebSocketClose(event);
|
|
2367
|
+
};
|
|
2368
|
+
});
|
|
2369
|
+
}
|
|
2370
|
+
/**
|
|
2371
|
+
* Handle WebSocket open event
|
|
2372
|
+
*/
|
|
2373
|
+
handleWebSocketOpen() {
|
|
2374
|
+
this.connectionAttempts = 0;
|
|
2375
|
+
}
|
|
2376
|
+
/**
|
|
2377
|
+
* Handle incoming WebSocket messages
|
|
2378
|
+
*/
|
|
2379
|
+
handleWebSocketMessage(event) {
|
|
2380
|
+
try {
|
|
2381
|
+
const message = JSON.parse(event.data);
|
|
2382
|
+
switch (message.message_type) {
|
|
2383
|
+
case "SessionBegins":
|
|
2384
|
+
this.handleSessionBegins(message);
|
|
2385
|
+
break;
|
|
2386
|
+
case "PartialTranscript":
|
|
2387
|
+
this.handlePartialTranscript(message);
|
|
2388
|
+
break;
|
|
2389
|
+
case "FinalTranscript":
|
|
2390
|
+
this.handleFinalTranscript(message);
|
|
2391
|
+
break;
|
|
2392
|
+
case "SessionTerminated":
|
|
2393
|
+
this.handleSessionTerminated();
|
|
2394
|
+
break;
|
|
2395
|
+
case "error":
|
|
2396
|
+
this.handleAssemblyAIError(message);
|
|
2397
|
+
break;
|
|
2398
|
+
}
|
|
2399
|
+
} catch (error) {
|
|
2400
|
+
console.error("Failed to parse AssemblyAI message:", error);
|
|
2401
|
+
}
|
|
2402
|
+
}
|
|
2403
|
+
/**
|
|
2404
|
+
* Handle SessionBegins message
|
|
2405
|
+
*/
|
|
2406
|
+
handleSessionBegins(message) {
|
|
2407
|
+
this.sessionId = message.session_id || null;
|
|
2408
|
+
this.isConnectionReady = true;
|
|
2409
|
+
}
|
|
2410
|
+
/**
|
|
2411
|
+
* Handle partial (interim) transcript
|
|
2412
|
+
*/
|
|
2413
|
+
handlePartialTranscript(message) {
|
|
2414
|
+
if (!message.text) return;
|
|
2415
|
+
const result = {
|
|
2416
|
+
text: message.text,
|
|
2417
|
+
isFinal: false,
|
|
2418
|
+
confidence: message.confidence,
|
|
2419
|
+
timestamp: Date.now(),
|
|
2420
|
+
language: this.config.language
|
|
2421
|
+
};
|
|
2422
|
+
this.handleTranscript(result);
|
|
2423
|
+
}
|
|
2424
|
+
/**
|
|
2425
|
+
* Handle final transcript
|
|
2426
|
+
*/
|
|
2427
|
+
handleFinalTranscript(message) {
|
|
2428
|
+
if (!message.text) return;
|
|
2429
|
+
const words = message.words?.map((w) => ({
|
|
2430
|
+
text: w.text,
|
|
2431
|
+
start: w.start,
|
|
2432
|
+
end: w.end,
|
|
2433
|
+
confidence: w.confidence
|
|
2434
|
+
}));
|
|
2435
|
+
const result = {
|
|
2436
|
+
text: message.text,
|
|
2437
|
+
isFinal: true,
|
|
2438
|
+
confidence: message.confidence,
|
|
2439
|
+
timestamp: Date.now(),
|
|
2440
|
+
words,
|
|
2441
|
+
language: this.config.language
|
|
2442
|
+
};
|
|
2443
|
+
this.handleTranscript(result);
|
|
2444
|
+
}
|
|
2445
|
+
/**
|
|
2446
|
+
* Handle session terminated message
|
|
2447
|
+
*/
|
|
2448
|
+
handleSessionTerminated() {
|
|
2449
|
+
this.isConnectionReady = false;
|
|
2450
|
+
if (!this.isIntentionalClose) {
|
|
2451
|
+
this.setState("stopped" /* STOPPED */);
|
|
2452
|
+
this.emit("stop");
|
|
2453
|
+
}
|
|
2454
|
+
}
|
|
2455
|
+
/**
|
|
2456
|
+
* Handle AssemblyAI-specific errors
|
|
2457
|
+
*/
|
|
2458
|
+
handleAssemblyAIError(message) {
|
|
2459
|
+
let errorCode = "provider_error" /* PROVIDER_ERROR */;
|
|
2460
|
+
const errorMessage = message.error || "Unknown AssemblyAI error";
|
|
2461
|
+
if (errorMessage.toLowerCase().includes("unauthorized") || errorMessage.toLowerCase().includes("invalid token")) {
|
|
2462
|
+
errorCode = "authentication_failed" /* AUTHENTICATION_FAILED */;
|
|
2463
|
+
} else if (errorMessage.toLowerCase().includes("insufficient credits")) {
|
|
2464
|
+
errorCode = "provider_error" /* PROVIDER_ERROR */;
|
|
2465
|
+
} else if (errorMessage.toLowerCase().includes("session expired")) {
|
|
2466
|
+
errorCode = "session_expired" /* SESSION_EXPIRED */;
|
|
2467
|
+
}
|
|
2468
|
+
const error = new TranscriptionError(
|
|
2469
|
+
errorMessage,
|
|
2470
|
+
errorCode,
|
|
2471
|
+
"assemblyai" /* AssemblyAI */,
|
|
2472
|
+
message
|
|
2473
|
+
);
|
|
2474
|
+
this.emit("error", error);
|
|
2475
|
+
}
|
|
2476
|
+
/**
|
|
2477
|
+
* Handle WebSocket error
|
|
2478
|
+
*/
|
|
2479
|
+
handleWebSocketError(_event) {
|
|
2480
|
+
if (!this.isIntentionalClose && this.connectionAttempts < this.maxRetries) {
|
|
2481
|
+
this.reconnect();
|
|
2482
|
+
} else {
|
|
2483
|
+
const error = new TranscriptionError(
|
|
2484
|
+
"WebSocket connection error",
|
|
2485
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
2486
|
+
"assemblyai" /* AssemblyAI */
|
|
2487
|
+
);
|
|
2488
|
+
this.handleError(error);
|
|
2489
|
+
}
|
|
2490
|
+
}
|
|
2491
|
+
/**
|
|
2492
|
+
* Handle WebSocket close
|
|
2493
|
+
*/
|
|
2494
|
+
handleWebSocketClose(event) {
|
|
2495
|
+
this.isConnectionReady = false;
|
|
2496
|
+
if (!this.isIntentionalClose && this.state === "active" /* ACTIVE */) {
|
|
2497
|
+
if (this.connectionAttempts < this.maxRetries) {
|
|
2498
|
+
this.reconnect();
|
|
2499
|
+
} else {
|
|
2500
|
+
const error = new TranscriptionError(
|
|
2501
|
+
`WebSocket closed unexpectedly: ${event.code} ${event.reason}`,
|
|
2502
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
2503
|
+
"assemblyai" /* AssemblyAI */
|
|
2504
|
+
);
|
|
2505
|
+
this.handleError(error);
|
|
2506
|
+
}
|
|
2507
|
+
}
|
|
2508
|
+
}
|
|
2509
|
+
/**
|
|
2510
|
+
* Attempt to reconnect
|
|
2511
|
+
*/
|
|
2512
|
+
reconnect() {
|
|
2513
|
+
this.connectionAttempts++;
|
|
2514
|
+
const delay = RECONNECT_DELAY2 * Math.pow(2, this.connectionAttempts - 1);
|
|
2515
|
+
this.reconnectTimeout = setTimeout(async () => {
|
|
2516
|
+
try {
|
|
2517
|
+
this.sessionToken = await this.getSessionToken();
|
|
2518
|
+
await this.setupWebSocket();
|
|
2519
|
+
if (this.mediaStream) {
|
|
2520
|
+
this.setupAudioProcessing();
|
|
2521
|
+
}
|
|
2522
|
+
} catch (error) {
|
|
2523
|
+
if (this.connectionAttempts < this.maxRetries) {
|
|
2524
|
+
this.reconnect();
|
|
2525
|
+
} else {
|
|
2526
|
+
this.handleError(error);
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
}, delay);
|
|
2530
|
+
}
|
|
2531
|
+
/**
|
|
2532
|
+
* Request microphone access
|
|
2533
|
+
*/
|
|
2534
|
+
async getMicrophoneAccess() {
|
|
2535
|
+
try {
|
|
2536
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
2537
|
+
audio: {
|
|
2538
|
+
sampleRate: SAMPLE_RATE,
|
|
2539
|
+
channelCount: 1,
|
|
2540
|
+
echoCancellation: true,
|
|
2541
|
+
noiseSuppression: true,
|
|
2542
|
+
autoGainControl: true
|
|
2543
|
+
}
|
|
2544
|
+
});
|
|
2545
|
+
} catch (error) {
|
|
2546
|
+
throw new TranscriptionError(
|
|
2547
|
+
"Microphone access denied",
|
|
2548
|
+
"microphone_access_denied" /* MICROPHONE_ACCESS_DENIED */,
|
|
2549
|
+
"assemblyai" /* AssemblyAI */,
|
|
2550
|
+
error
|
|
2551
|
+
);
|
|
2552
|
+
}
|
|
2553
|
+
}
|
|
2554
|
+
/**
|
|
2555
|
+
* Set up audio processing pipeline
|
|
2556
|
+
*/
|
|
2557
|
+
setupAudioProcessing() {
|
|
2558
|
+
if (!this.mediaStream) return;
|
|
2559
|
+
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
2560
|
+
this.audioContext = new AudioContextClass({ sampleRate: SAMPLE_RATE });
|
|
2561
|
+
const source = this.audioContext.createMediaStreamSource(this.mediaStream);
|
|
2562
|
+
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
|
2563
|
+
this.processor.onaudioprocess = (event) => {
|
|
2564
|
+
if (this.state !== "active" /* ACTIVE */) return;
|
|
2565
|
+
const inputData = event.inputBuffer.getChannelData(0);
|
|
2566
|
+
const pcmData = this.convertFloat32ToPCM16(inputData);
|
|
2567
|
+
this.sendAudio(pcmData);
|
|
2568
|
+
};
|
|
2569
|
+
source.connect(this.processor);
|
|
2570
|
+
this.processor.connect(this.audioContext.destination);
|
|
2571
|
+
}
|
|
2572
|
+
/**
|
|
2573
|
+
* Stop audio processing
|
|
2574
|
+
*/
|
|
2575
|
+
stopAudioProcessing() {
|
|
2576
|
+
if (this.processor) {
|
|
2577
|
+
this.processor.disconnect();
|
|
2578
|
+
this.processor.onaudioprocess = null;
|
|
2579
|
+
this.processor = null;
|
|
2580
|
+
}
|
|
2581
|
+
}
|
|
2582
|
+
/**
|
|
2583
|
+
* Convert Float32 audio samples to PCM16 (Int16)
|
|
2584
|
+
*/
|
|
2585
|
+
convertFloat32ToPCM16(float32Array) {
|
|
2586
|
+
const int16 = new Int16Array(float32Array.length);
|
|
2587
|
+
for (let i = 0; i < float32Array.length; i++) {
|
|
2588
|
+
const s = Math.max(-1, Math.min(1, float32Array[i]));
|
|
2589
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
2590
|
+
}
|
|
2591
|
+
return int16.buffer;
|
|
2592
|
+
}
|
|
2593
|
+
/**
|
|
2594
|
+
* Encode ArrayBuffer to base64
|
|
2595
|
+
*/
|
|
2596
|
+
encodeAudioToBase64(arrayBuffer) {
|
|
2597
|
+
const bytes = new Uint8Array(arrayBuffer);
|
|
2598
|
+
let binary = "";
|
|
2599
|
+
for (let i = 0; i < bytes.byteLength; i++) {
|
|
2600
|
+
binary += String.fromCharCode(bytes[i]);
|
|
2601
|
+
}
|
|
2602
|
+
return btoa(binary);
|
|
2603
|
+
}
|
|
2604
|
+
/**
|
|
2605
|
+
* Send audio data message
|
|
2606
|
+
*/
|
|
2607
|
+
sendAudioMessage(base64Audio) {
|
|
2608
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN) {
|
|
2609
|
+
this.socket.send(JSON.stringify({ audio_data: base64Audio }));
|
|
2610
|
+
}
|
|
2611
|
+
}
|
|
2612
|
+
/**
|
|
2613
|
+
* Send terminate session message
|
|
2614
|
+
*/
|
|
2615
|
+
sendTerminateMessage() {
|
|
2616
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN) {
|
|
2617
|
+
this.socket.send(JSON.stringify({ terminate_session: true }));
|
|
2618
|
+
}
|
|
2619
|
+
}
|
|
2620
|
+
/**
|
|
2621
|
+
* Close WebSocket connection
|
|
2622
|
+
*/
|
|
2623
|
+
closeWebSocket() {
|
|
2624
|
+
if (this.socket) {
|
|
2625
|
+
this.socket.onopen = null;
|
|
2626
|
+
this.socket.onmessage = null;
|
|
2627
|
+
this.socket.onerror = null;
|
|
2628
|
+
this.socket.onclose = null;
|
|
2629
|
+
if (this.socket.readyState === WebSocket.OPEN || this.socket.readyState === WebSocket.CONNECTING) {
|
|
2630
|
+
this.socket.close();
|
|
2631
|
+
}
|
|
2632
|
+
this.socket = null;
|
|
2633
|
+
}
|
|
2634
|
+
}
|
|
2635
|
+
/**
|
|
2636
|
+
* Stop media stream tracks
|
|
2637
|
+
*/
|
|
2638
|
+
stopMediaStream() {
|
|
2639
|
+
if (this.mediaStream) {
|
|
2640
|
+
this.mediaStream.getTracks().forEach((track) => track.stop());
|
|
2641
|
+
this.mediaStream = null;
|
|
2642
|
+
}
|
|
2643
|
+
}
|
|
2644
|
+
};
|
|
2645
|
+
/** Provider capabilities */
|
|
2646
|
+
_AssemblyAIProvider.capabilities = {
|
|
2647
|
+
supportsInterim: true,
|
|
2648
|
+
supportsWordTimestamps: true,
|
|
2649
|
+
supportsSpeakerDiarization: true,
|
|
2650
|
+
supportsPunctuation: true,
|
|
2651
|
+
supportsLanguageDetection: false,
|
|
2652
|
+
supportedLanguages: ["en", "en-US", "en-GB", "en-AU"]
|
|
2653
|
+
};
|
|
2654
|
+
var AssemblyAIProvider = _AssemblyAIProvider;
|
|
2655
|
+
|
|
2656
|
+
// src/utils/AudioProcessor.ts
|
|
2657
|
+
var AudioProcessor = class {
|
|
2658
|
+
/**
|
|
2659
|
+
* Convert Float32 audio samples to Int16
|
|
2660
|
+
* @param buffer - Input Float32Array
|
|
2661
|
+
* @returns Int16Array of converted samples
|
|
2662
|
+
*/
|
|
2663
|
+
static convertFloat32ToInt16(buffer) {
|
|
2664
|
+
const int16 = new Int16Array(buffer.length);
|
|
2665
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2666
|
+
const s = Math.max(-1, Math.min(1, buffer[i]));
|
|
2667
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
2668
|
+
}
|
|
2669
|
+
return int16;
|
|
2670
|
+
}
|
|
2671
|
+
/**
|
|
2672
|
+
* Convert Int16 audio samples to Float32
|
|
2673
|
+
* @param buffer - Input Int16Array
|
|
2674
|
+
* @returns Float32Array of converted samples
|
|
2675
|
+
*/
|
|
2676
|
+
static convertInt16ToFloat32(buffer) {
|
|
2677
|
+
const float32 = new Float32Array(buffer.length);
|
|
2678
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2679
|
+
float32[i] = buffer[i] / 32768;
|
|
2680
|
+
}
|
|
2681
|
+
return float32;
|
|
2682
|
+
}
|
|
2683
|
+
/**
|
|
2684
|
+
* Resample audio buffer to different sample rate
|
|
2685
|
+
* Uses linear interpolation
|
|
2686
|
+
* @param buffer - Input audio buffer
|
|
2687
|
+
* @param fromRate - Source sample rate
|
|
2688
|
+
* @param toRate - Target sample rate
|
|
2689
|
+
* @returns Resampled Float32Array
|
|
2690
|
+
*/
|
|
2691
|
+
static resampleBuffer(buffer, fromRate, toRate) {
|
|
2692
|
+
if (fromRate === toRate) {
|
|
2693
|
+
return buffer;
|
|
2694
|
+
}
|
|
2695
|
+
const ratio = toRate / fromRate;
|
|
2696
|
+
const newLength = Math.round(buffer.length * ratio);
|
|
2697
|
+
const output = new Float32Array(newLength);
|
|
2698
|
+
for (let i = 0; i < newLength; i++) {
|
|
2699
|
+
const srcPosition = i / ratio;
|
|
2700
|
+
const srcIndex = Math.floor(srcPosition);
|
|
2701
|
+
const fraction = srcPosition - srcIndex;
|
|
2702
|
+
const sample1 = buffer[srcIndex] || 0;
|
|
2703
|
+
const sample2 = buffer[srcIndex + 1] || sample1;
|
|
2704
|
+
output[i] = sample1 + (sample2 - sample1) * fraction;
|
|
2705
|
+
}
|
|
2706
|
+
return output;
|
|
2707
|
+
}
|
|
2708
|
+
/**
|
|
2709
|
+
* Downsample audio buffer (optimized for reducing sample rate)
|
|
2710
|
+
* @param buffer - Input audio buffer
|
|
2711
|
+
* @param fromRate - Source sample rate
|
|
2712
|
+
* @param toRate - Target sample rate
|
|
2713
|
+
* @returns Downsampled Float32Array
|
|
2714
|
+
*/
|
|
2715
|
+
static downsampleBuffer(buffer, fromRate, toRate) {
|
|
2716
|
+
if (fromRate <= toRate) {
|
|
2717
|
+
return buffer;
|
|
2718
|
+
}
|
|
2719
|
+
const ratio = fromRate / toRate;
|
|
2720
|
+
const newLength = Math.ceil(buffer.length / ratio);
|
|
2721
|
+
const output = new Float32Array(newLength);
|
|
2722
|
+
for (let i = 0; i < newLength; i++) {
|
|
2723
|
+
const startIdx = Math.floor(i * ratio);
|
|
2724
|
+
const endIdx = Math.min(Math.floor((i + 1) * ratio), buffer.length);
|
|
2725
|
+
let sum = 0;
|
|
2726
|
+
const count = endIdx - startIdx;
|
|
2727
|
+
for (let j = startIdx; j < endIdx; j++) {
|
|
2728
|
+
sum += buffer[j];
|
|
2729
|
+
}
|
|
2730
|
+
output[i] = count > 0 ? sum / count : 0;
|
|
2731
|
+
}
|
|
2732
|
+
return output;
|
|
2733
|
+
}
|
|
2734
|
+
/**
|
|
2735
|
+
* Upsample audio buffer (optimized for increasing sample rate)
|
|
2736
|
+
* @param buffer - Input audio buffer
|
|
2737
|
+
* @param fromRate - Source sample rate
|
|
2738
|
+
* @param toRate - Target sample rate
|
|
2739
|
+
* @returns Upsampled Float32Array
|
|
2740
|
+
*/
|
|
2741
|
+
static upsampleBuffer(buffer, fromRate, toRate) {
|
|
2742
|
+
if (fromRate >= toRate) {
|
|
2743
|
+
return buffer;
|
|
2744
|
+
}
|
|
2745
|
+
return this.resampleBuffer(buffer, fromRate, toRate);
|
|
2746
|
+
}
|
|
2747
|
+
/**
|
|
2748
|
+
* Normalize audio buffer to peak amplitude of 1.0
|
|
2749
|
+
* @param buffer - Input audio buffer
|
|
2750
|
+
* @returns Normalized Float32Array
|
|
2751
|
+
*/
|
|
2752
|
+
static normalizeBuffer(buffer) {
|
|
2753
|
+
let peak = 0;
|
|
2754
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2755
|
+
const abs = Math.abs(buffer[i]);
|
|
2756
|
+
if (abs > peak) peak = abs;
|
|
2757
|
+
}
|
|
2758
|
+
if (peak === 0) {
|
|
2759
|
+
return buffer;
|
|
2760
|
+
}
|
|
2761
|
+
const output = new Float32Array(buffer.length);
|
|
2762
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2763
|
+
output[i] = buffer[i] / peak;
|
|
2764
|
+
}
|
|
2765
|
+
return output;
|
|
2766
|
+
}
|
|
2767
|
+
/**
|
|
2768
|
+
* Apply gain to audio buffer
|
|
2769
|
+
* @param buffer - Input audio buffer
|
|
2770
|
+
* @param gain - Gain multiplier
|
|
2771
|
+
* @returns Processed Float32Array
|
|
2772
|
+
*/
|
|
2773
|
+
static applyGain(buffer, gain) {
|
|
2774
|
+
const output = new Float32Array(buffer.length);
|
|
2775
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2776
|
+
output[i] = Math.max(-1, Math.min(1, buffer[i] * gain));
|
|
2777
|
+
}
|
|
2778
|
+
return output;
|
|
2779
|
+
}
|
|
2780
|
+
/**
|
|
2781
|
+
* Mix two audio buffers together
|
|
2782
|
+
* @param buffer1 - First audio buffer
|
|
2783
|
+
* @param buffer2 - Second audio buffer
|
|
2784
|
+
* @param ratio - Mix ratio (0-1, where 0.5 is equal mix)
|
|
2785
|
+
* @returns Mixed Float32Array
|
|
2786
|
+
*/
|
|
2787
|
+
static mixBuffers(buffer1, buffer2, ratio = 0.5) {
|
|
2788
|
+
const length = Math.max(buffer1.length, buffer2.length);
|
|
2789
|
+
const output = new Float32Array(length);
|
|
2790
|
+
for (let i = 0; i < length; i++) {
|
|
2791
|
+
const s1 = i < buffer1.length ? buffer1[i] : 0;
|
|
2792
|
+
const s2 = i < buffer2.length ? buffer2[i] : 0;
|
|
2793
|
+
output[i] = Math.max(-1, Math.min(1, s1 * (1 - ratio) + s2 * ratio));
|
|
2794
|
+
}
|
|
2795
|
+
return output;
|
|
2796
|
+
}
|
|
2797
|
+
/**
|
|
2798
|
+
* Convert AudioBuffer to WAV format
|
|
2799
|
+
* @param audioBuffer - Web Audio API AudioBuffer
|
|
2800
|
+
* @param sampleRate - Output sample rate (defaults to buffer's sample rate)
|
|
2801
|
+
* @returns WAV file as ArrayBuffer
|
|
2802
|
+
*/
|
|
2803
|
+
static bufferToWav(audioBuffer, sampleRate) {
|
|
2804
|
+
const outputRate = sampleRate || audioBuffer.sampleRate;
|
|
2805
|
+
const channels = audioBuffer.numberOfChannels;
|
|
2806
|
+
const bitDepth = 16;
|
|
2807
|
+
const sourceData = audioBuffer.getChannelData(0);
|
|
2808
|
+
const channelData = outputRate !== audioBuffer.sampleRate ? this.resampleBuffer(sourceData, audioBuffer.sampleRate, outputRate) : new Float32Array(sourceData);
|
|
2809
|
+
const int16Data = this.convertFloat32ToInt16(channelData);
|
|
2810
|
+
const dataLength = int16Data.length * (bitDepth / 8);
|
|
2811
|
+
const header = this.createWavHeader(dataLength, outputRate, channels, bitDepth);
|
|
2812
|
+
const wav = new ArrayBuffer(44 + dataLength);
|
|
2813
|
+
const view = new DataView(wav);
|
|
2814
|
+
const headerView = new DataView(header);
|
|
2815
|
+
for (let i = 0; i < 44; i++) {
|
|
2816
|
+
view.setUint8(i, headerView.getUint8(i));
|
|
2817
|
+
}
|
|
2818
|
+
for (let i = 0; i < int16Data.length; i++) {
|
|
2819
|
+
view.setInt16(44 + i * 2, int16Data[i], true);
|
|
2820
|
+
}
|
|
2821
|
+
return wav;
|
|
2822
|
+
}
|
|
2823
|
+
/**
|
|
2824
|
+
* Create WAV file header
|
|
2825
|
+
* @param dataLength - Length of audio data in bytes
|
|
2826
|
+
* @param sampleRate - Sample rate
|
|
2827
|
+
* @param channels - Number of channels
|
|
2828
|
+
* @param bitDepth - Bits per sample
|
|
2829
|
+
* @returns WAV header as ArrayBuffer
|
|
2830
|
+
*/
|
|
2831
|
+
static createWavHeader(dataLength, sampleRate, channels, bitDepth) {
|
|
2832
|
+
const header = new ArrayBuffer(44);
|
|
2833
|
+
const view = new DataView(header);
|
|
2834
|
+
const byteRate = sampleRate * channels * (bitDepth / 8);
|
|
2835
|
+
const blockAlign = channels * (bitDepth / 8);
|
|
2836
|
+
this.writeString(view, 0, "RIFF");
|
|
2837
|
+
view.setUint32(4, 36 + dataLength, true);
|
|
2838
|
+
this.writeString(view, 8, "WAVE");
|
|
2839
|
+
this.writeString(view, 12, "fmt ");
|
|
2840
|
+
view.setUint32(16, 16, true);
|
|
2841
|
+
view.setUint16(20, 1, true);
|
|
2842
|
+
view.setUint16(22, channels, true);
|
|
2843
|
+
view.setUint32(24, sampleRate, true);
|
|
2844
|
+
view.setUint32(28, byteRate, true);
|
|
2845
|
+
view.setUint16(32, blockAlign, true);
|
|
2846
|
+
view.setUint16(34, bitDepth, true);
|
|
2847
|
+
this.writeString(view, 36, "data");
|
|
2848
|
+
view.setUint32(40, dataLength, true);
|
|
2849
|
+
return header;
|
|
2850
|
+
}
|
|
2851
|
+
/**
|
|
2852
|
+
* Write string to DataView
|
|
2853
|
+
* @param view - DataView to write to
|
|
2854
|
+
* @param offset - Byte offset
|
|
2855
|
+
* @param string - String to write
|
|
2856
|
+
*/
|
|
2857
|
+
static writeString(view, offset, string) {
|
|
2858
|
+
for (let i = 0; i < string.length; i++) {
|
|
2859
|
+
view.setUint8(offset + i, string.charCodeAt(i));
|
|
2860
|
+
}
|
|
2861
|
+
}
|
|
2862
|
+
/**
|
|
2863
|
+
* Convert raw PCM Float32 array to WAV ArrayBuffer
|
|
2864
|
+
* @param samples - Float32Array of audio samples
|
|
2865
|
+
* @param sampleRate - Sample rate
|
|
2866
|
+
* @returns WAV file as ArrayBuffer
|
|
2867
|
+
*/
|
|
2868
|
+
static float32ToWav(samples, sampleRate) {
|
|
2869
|
+
const int16Data = this.convertFloat32ToInt16(samples);
|
|
2870
|
+
const dataLength = int16Data.length * 2;
|
|
2871
|
+
const header = this.createWavHeader(dataLength, sampleRate, 1, 16);
|
|
2872
|
+
const wav = new ArrayBuffer(44 + dataLength);
|
|
2873
|
+
const view = new DataView(wav);
|
|
2874
|
+
const headerView = new DataView(header);
|
|
2875
|
+
for (let i = 0; i < 44; i++) {
|
|
2876
|
+
view.setUint8(i, headerView.getUint8(i));
|
|
2877
|
+
}
|
|
2878
|
+
for (let i = 0; i < int16Data.length; i++) {
|
|
2879
|
+
view.setInt16(44 + i * 2, int16Data[i], true);
|
|
2880
|
+
}
|
|
2881
|
+
return wav;
|
|
2882
|
+
}
|
|
2883
|
+
};
|
|
2884
|
+
|
|
2885
|
+
// src/utils/VoiceActivityDetector.ts
|
|
2886
|
+
var VoiceActivityDetector = class {
|
|
2887
|
+
/**
|
|
2888
|
+
* Create a new VoiceActivityDetector
|
|
2889
|
+
* @param options - VAD configuration options
|
|
2890
|
+
*/
|
|
2891
|
+
constructor(options = {}) {
|
|
2892
|
+
/** Energy history buffer */
|
|
2893
|
+
this.energyHistory = [];
|
|
2894
|
+
/** Current speaking state */
|
|
2895
|
+
this.isSpeaking = false;
|
|
2896
|
+
/** Speech start time */
|
|
2897
|
+
this.speechStartTime = null;
|
|
2898
|
+
/** Silence start time */
|
|
2899
|
+
this.silenceStartTime = null;
|
|
2900
|
+
/** Last processed timestamp */
|
|
2901
|
+
this.lastProcessTime = 0;
|
|
2902
|
+
this.threshold = options.threshold ?? 0.01;
|
|
2903
|
+
this.minSpeechDuration = options.minSpeechDuration ?? 300;
|
|
2904
|
+
this.minSilenceDuration = options.minSilenceDuration ?? 500;
|
|
2905
|
+
this.historySize = options.historySize ?? 10;
|
|
2906
|
+
this.onSpeechStart = options.onSpeechStart;
|
|
2907
|
+
this.onSpeechEnd = options.onSpeechEnd;
|
|
2908
|
+
this.onVolumeChange = options.onVolumeChange;
|
|
2909
|
+
}
|
|
2910
|
+
/**
|
|
2911
|
+
* Process audio data and detect voice activity
|
|
2912
|
+
* @param audioData - Audio samples as Float32Array
|
|
2913
|
+
* @returns Current speaking state
|
|
2914
|
+
*/
|
|
2915
|
+
processAudio(audioData) {
|
|
2916
|
+
const currentTime = Date.now();
|
|
2917
|
+
const energy = this.calculateRMSEnergy(audioData);
|
|
2918
|
+
this.energyHistory.push(energy);
|
|
2919
|
+
if (this.energyHistory.length > this.historySize) {
|
|
2920
|
+
this.energyHistory.shift();
|
|
2921
|
+
}
|
|
2922
|
+
if (this.onVolumeChange) {
|
|
2923
|
+
this.onVolumeChange(energy);
|
|
2924
|
+
}
|
|
2925
|
+
const isAboveThreshold = energy > this.threshold;
|
|
2926
|
+
if (isAboveThreshold) {
|
|
2927
|
+
this.silenceStartTime = null;
|
|
2928
|
+
if (!this.isSpeaking) {
|
|
2929
|
+
if (this.speechStartTime === null) {
|
|
2930
|
+
this.speechStartTime = currentTime;
|
|
2931
|
+
} else if (currentTime - this.speechStartTime >= this.minSpeechDuration) {
|
|
2932
|
+
this.isSpeaking = true;
|
|
2933
|
+
this.speechStartTime = null;
|
|
2934
|
+
if (this.onSpeechStart) {
|
|
2935
|
+
this.onSpeechStart();
|
|
2936
|
+
}
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
} else {
|
|
2940
|
+
this.speechStartTime = null;
|
|
2941
|
+
if (this.isSpeaking) {
|
|
2942
|
+
if (this.silenceStartTime === null) {
|
|
2943
|
+
this.silenceStartTime = currentTime;
|
|
2944
|
+
} else if (currentTime - this.silenceStartTime >= this.minSilenceDuration) {
|
|
2945
|
+
this.isSpeaking = false;
|
|
2946
|
+
this.silenceStartTime = null;
|
|
2947
|
+
if (this.onSpeechEnd) {
|
|
2948
|
+
this.onSpeechEnd();
|
|
2949
|
+
}
|
|
2950
|
+
}
|
|
2951
|
+
}
|
|
2952
|
+
}
|
|
2953
|
+
this.lastProcessTime = currentTime;
|
|
2954
|
+
return this.isSpeaking;
|
|
2955
|
+
}
|
|
2956
|
+
/**
|
|
2957
|
+
* Calculate RMS (Root Mean Square) energy of audio buffer
|
|
2958
|
+
* @param buffer - Audio samples
|
|
2959
|
+
* @returns RMS energy value (0-1)
|
|
2960
|
+
*/
|
|
2961
|
+
calculateRMSEnergy(buffer) {
|
|
2962
|
+
let sum = 0;
|
|
2963
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
2964
|
+
sum += buffer[i] * buffer[i];
|
|
2965
|
+
}
|
|
2966
|
+
return Math.sqrt(sum / buffer.length);
|
|
2967
|
+
}
|
|
2968
|
+
/**
|
|
2969
|
+
* Calculate adaptive threshold based on energy history
|
|
2970
|
+
* @returns Adaptive threshold value
|
|
2971
|
+
*/
|
|
2972
|
+
calculateAdaptiveThreshold() {
|
|
2973
|
+
if (this.energyHistory.length === 0) {
|
|
2974
|
+
return this.threshold;
|
|
2975
|
+
}
|
|
2976
|
+
const average = this.energyHistory.reduce((a, b) => a + b, 0) / this.energyHistory.length;
|
|
2977
|
+
return average * 1.5 + 5e-3;
|
|
2978
|
+
}
|
|
2979
|
+
/**
|
|
2980
|
+
* Reset detector state
|
|
2981
|
+
*/
|
|
2982
|
+
reset() {
|
|
2983
|
+
this.energyHistory = [];
|
|
2984
|
+
this.isSpeaking = false;
|
|
2985
|
+
this.speechStartTime = null;
|
|
2986
|
+
this.silenceStartTime = null;
|
|
2987
|
+
this.lastProcessTime = 0;
|
|
2988
|
+
}
|
|
2989
|
+
/**
|
|
2990
|
+
* Update threshold value
|
|
2991
|
+
* @param threshold - New threshold (0-1)
|
|
2992
|
+
*/
|
|
2993
|
+
setThreshold(threshold) {
|
|
2994
|
+
this.threshold = Math.max(0, Math.min(1, threshold));
|
|
2995
|
+
}
|
|
2996
|
+
/**
|
|
2997
|
+
* Get average energy from history
|
|
2998
|
+
* @returns Average energy value
|
|
2999
|
+
*/
|
|
3000
|
+
getAverageEnergy() {
|
|
3001
|
+
if (this.energyHistory.length === 0) {
|
|
3002
|
+
return 0;
|
|
3003
|
+
}
|
|
3004
|
+
return this.energyHistory.reduce((a, b) => a + b, 0) / this.energyHistory.length;
|
|
3005
|
+
}
|
|
3006
|
+
/**
|
|
3007
|
+
* Check if speech is currently detected
|
|
3008
|
+
* @returns Speaking state
|
|
3009
|
+
*/
|
|
3010
|
+
isSpeechDetected() {
|
|
3011
|
+
return this.isSpeaking;
|
|
3012
|
+
}
|
|
3013
|
+
/**
|
|
3014
|
+
* Get current threshold
|
|
3015
|
+
* @returns Threshold value
|
|
3016
|
+
*/
|
|
3017
|
+
getThreshold() {
|
|
3018
|
+
return this.threshold;
|
|
3019
|
+
}
|
|
3020
|
+
/**
|
|
3021
|
+
* Update callbacks
|
|
3022
|
+
* @param callbacks - New callback functions
|
|
3023
|
+
*/
|
|
3024
|
+
setCallbacks(callbacks) {
|
|
3025
|
+
if (callbacks.onSpeechStart !== void 0) {
|
|
3026
|
+
this.onSpeechStart = callbacks.onSpeechStart;
|
|
3027
|
+
}
|
|
3028
|
+
if (callbacks.onSpeechEnd !== void 0) {
|
|
3029
|
+
this.onSpeechEnd = callbacks.onSpeechEnd;
|
|
3030
|
+
}
|
|
3031
|
+
if (callbacks.onVolumeChange !== void 0) {
|
|
3032
|
+
this.onVolumeChange = callbacks.onVolumeChange;
|
|
3033
|
+
}
|
|
3034
|
+
}
|
|
3035
|
+
};
|
|
3036
|
+
|
|
3037
|
+
// src/utils/AudioLevelMonitor.ts
|
|
3038
|
+
var AudioLevelMonitor = class {
|
|
3039
|
+
/**
|
|
3040
|
+
* Create a new AudioLevelMonitor
|
|
3041
|
+
* @param options - Monitor configuration
|
|
3042
|
+
*/
|
|
3043
|
+
constructor(options = {}) {
|
|
3044
|
+
/** Current smoothed level */
|
|
3045
|
+
this.currentLevel = 0;
|
|
3046
|
+
/** Peak level since last reset */
|
|
3047
|
+
this.peakLevel = 0;
|
|
3048
|
+
this.smoothingFactor = options.smoothingFactor ?? 0.8;
|
|
3049
|
+
this.onLevelChange = options.onLevelChange;
|
|
3050
|
+
}
|
|
3051
|
+
/**
|
|
3052
|
+
* Process audio data and update levels
|
|
3053
|
+
* @param audioData - Audio samples as Float32Array
|
|
3054
|
+
* @returns Current smoothed level
|
|
3055
|
+
*/
|
|
3056
|
+
processAudio(audioData) {
|
|
3057
|
+
const instantLevel = this.calculateLevel(audioData);
|
|
3058
|
+
this.currentLevel = this.smoothingFactor * this.currentLevel + (1 - this.smoothingFactor) * instantLevel;
|
|
3059
|
+
if (this.currentLevel > this.peakLevel) {
|
|
3060
|
+
this.peakLevel = this.currentLevel;
|
|
3061
|
+
}
|
|
3062
|
+
if (this.onLevelChange) {
|
|
3063
|
+
this.onLevelChange(this.currentLevel);
|
|
3064
|
+
}
|
|
3065
|
+
return this.currentLevel;
|
|
3066
|
+
}
|
|
3067
|
+
/**
|
|
3068
|
+
* Calculate RMS level of audio buffer
|
|
3069
|
+
* @param buffer - Audio samples
|
|
3070
|
+
* @returns Level value (0-1)
|
|
3071
|
+
*/
|
|
3072
|
+
calculateLevel(buffer) {
|
|
3073
|
+
let sum = 0;
|
|
3074
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
3075
|
+
sum += buffer[i] * buffer[i];
|
|
3076
|
+
}
|
|
3077
|
+
return Math.sqrt(sum / buffer.length);
|
|
3078
|
+
}
|
|
3079
|
+
/**
|
|
3080
|
+
* Get current smoothed level
|
|
3081
|
+
* @returns Current level (0-1)
|
|
3082
|
+
*/
|
|
3083
|
+
getCurrentLevel() {
|
|
3084
|
+
return this.currentLevel;
|
|
3085
|
+
}
|
|
3086
|
+
/**
|
|
3087
|
+
* Get peak level since last reset
|
|
3088
|
+
* @returns Peak level (0-1)
|
|
3089
|
+
*/
|
|
3090
|
+
getPeakLevel() {
|
|
3091
|
+
return this.peakLevel;
|
|
3092
|
+
}
|
|
3093
|
+
/**
|
|
3094
|
+
* Reset current and peak levels
|
|
3095
|
+
*/
|
|
3096
|
+
reset() {
|
|
3097
|
+
this.currentLevel = 0;
|
|
3098
|
+
this.peakLevel = 0;
|
|
3099
|
+
}
|
|
3100
|
+
/**
|
|
3101
|
+
* Reset only the peak level
|
|
3102
|
+
*/
|
|
3103
|
+
resetPeak() {
|
|
3104
|
+
this.peakLevel = 0;
|
|
3105
|
+
}
|
|
3106
|
+
/**
|
|
3107
|
+
* Convert current level to decibels
|
|
3108
|
+
* @returns Level in dB (typically -60 to 0)
|
|
3109
|
+
*/
|
|
3110
|
+
getDecibels() {
|
|
3111
|
+
if (this.currentLevel <= 0) {
|
|
3112
|
+
return -Infinity;
|
|
3113
|
+
}
|
|
3114
|
+
return 20 * Math.log10(this.currentLevel);
|
|
3115
|
+
}
|
|
3116
|
+
/**
|
|
3117
|
+
* Convert specific level to decibels
|
|
3118
|
+
* @param level - Level value (0-1)
|
|
3119
|
+
* @returns Level in dB
|
|
3120
|
+
*/
|
|
3121
|
+
static toDecibels(level) {
|
|
3122
|
+
if (level <= 0) {
|
|
3123
|
+
return -Infinity;
|
|
3124
|
+
}
|
|
3125
|
+
return 20 * Math.log10(level);
|
|
3126
|
+
}
|
|
3127
|
+
/**
|
|
3128
|
+
* Convert decibels to linear level
|
|
3129
|
+
* @param db - Level in decibels
|
|
3130
|
+
* @returns Linear level (0-1)
|
|
3131
|
+
*/
|
|
3132
|
+
static fromDecibels(db) {
|
|
3133
|
+
return Math.pow(10, db / 20);
|
|
3134
|
+
}
|
|
3135
|
+
/**
|
|
3136
|
+
* Set smoothing factor
|
|
3137
|
+
* @param factor - Smoothing factor (0-1)
|
|
3138
|
+
*/
|
|
3139
|
+
setSmoothingFactor(factor) {
|
|
3140
|
+
this.smoothingFactor = Math.max(0, Math.min(1, factor));
|
|
3141
|
+
}
|
|
3142
|
+
/**
|
|
3143
|
+
* Get current smoothing factor
|
|
3144
|
+
* @returns Smoothing factor
|
|
3145
|
+
*/
|
|
3146
|
+
getSmoothingFactor() {
|
|
3147
|
+
return this.smoothingFactor;
|
|
3148
|
+
}
|
|
3149
|
+
/**
|
|
3150
|
+
* Set level change callback
|
|
3151
|
+
* @param callback - Callback function
|
|
3152
|
+
*/
|
|
3153
|
+
setOnLevelChange(callback) {
|
|
3154
|
+
this.onLevelChange = callback;
|
|
3155
|
+
}
|
|
3156
|
+
/**
|
|
3157
|
+
* Get level as percentage (0-100)
|
|
3158
|
+
* @returns Level percentage
|
|
3159
|
+
*/
|
|
3160
|
+
getLevelPercentage() {
|
|
3161
|
+
return Math.min(100, this.currentLevel * 100);
|
|
3162
|
+
}
|
|
3163
|
+
};
|
|
3164
|
+
|
|
3165
|
+
// src/utils/AudioBufferManager.ts
|
|
3166
|
+
var AudioBufferManager = class {
|
|
3167
|
+
/**
|
|
3168
|
+
* Create a new AudioBufferManager
|
|
3169
|
+
* @param bufferSize - Maximum number of chunks to store
|
|
3170
|
+
*/
|
|
3171
|
+
constructor(bufferSize = 100) {
|
|
3172
|
+
/** Buffer storage */
|
|
3173
|
+
this.buffer = [];
|
|
3174
|
+
/** Write position */
|
|
3175
|
+
this.writeIndex = 0;
|
|
3176
|
+
/** Read position */
|
|
3177
|
+
this.readIndex = 0;
|
|
3178
|
+
/** Number of available chunks */
|
|
3179
|
+
this.count = 0;
|
|
3180
|
+
this.bufferSize = bufferSize;
|
|
3181
|
+
this.buffer = new Array(bufferSize);
|
|
3182
|
+
}
|
|
3183
|
+
/**
|
|
3184
|
+
* Write a chunk to the buffer
|
|
3185
|
+
* @param chunk - Audio data chunk
|
|
3186
|
+
*/
|
|
3187
|
+
write(chunk) {
|
|
3188
|
+
this.buffer[this.writeIndex] = new Float32Array(chunk);
|
|
3189
|
+
this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
|
|
3190
|
+
if (this.count < this.bufferSize) {
|
|
3191
|
+
this.count++;
|
|
3192
|
+
} else {
|
|
3193
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
3194
|
+
}
|
|
3195
|
+
}
|
|
3196
|
+
/**
|
|
3197
|
+
* Read and remove chunks from the buffer
|
|
3198
|
+
* @param numChunks - Number of chunks to read (default: all available)
|
|
3199
|
+
* @returns Array of audio chunks
|
|
3200
|
+
*/
|
|
3201
|
+
read(numChunks) {
|
|
3202
|
+
const toRead = numChunks !== void 0 ? Math.min(numChunks, this.count) : this.count;
|
|
3203
|
+
const chunks = [];
|
|
3204
|
+
for (let i = 0; i < toRead; i++) {
|
|
3205
|
+
const chunk = this.buffer[this.readIndex];
|
|
3206
|
+
if (chunk) {
|
|
3207
|
+
chunks.push(chunk);
|
|
3208
|
+
}
|
|
3209
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
3210
|
+
}
|
|
3211
|
+
this.count -= toRead;
|
|
3212
|
+
return chunks;
|
|
3213
|
+
}
|
|
3214
|
+
/**
|
|
3215
|
+
* Read chunks without removing them
|
|
3216
|
+
* @param numChunks - Number of chunks to peek (default: all available)
|
|
3217
|
+
* @returns Array of audio chunks
|
|
3218
|
+
*/
|
|
3219
|
+
peek(numChunks) {
|
|
3220
|
+
const toPeek = numChunks !== void 0 ? Math.min(numChunks, this.count) : this.count;
|
|
3221
|
+
const chunks = [];
|
|
3222
|
+
let idx = this.readIndex;
|
|
3223
|
+
for (let i = 0; i < toPeek; i++) {
|
|
3224
|
+
const chunk = this.buffer[idx];
|
|
3225
|
+
if (chunk) {
|
|
3226
|
+
chunks.push(chunk);
|
|
3227
|
+
}
|
|
3228
|
+
idx = (idx + 1) % this.bufferSize;
|
|
3229
|
+
}
|
|
3230
|
+
return chunks;
|
|
3231
|
+
}
|
|
3232
|
+
/**
|
|
3233
|
+
* Clear all data from buffer
|
|
3234
|
+
*/
|
|
3235
|
+
clear() {
|
|
3236
|
+
this.buffer = new Array(this.bufferSize);
|
|
3237
|
+
this.writeIndex = 0;
|
|
3238
|
+
this.readIndex = 0;
|
|
3239
|
+
this.count = 0;
|
|
3240
|
+
}
|
|
3241
|
+
/**
|
|
3242
|
+
* Get number of available chunks
|
|
3243
|
+
* @returns Number of chunks in buffer
|
|
3244
|
+
*/
|
|
3245
|
+
getAvailableChunks() {
|
|
3246
|
+
return this.count;
|
|
3247
|
+
}
|
|
3248
|
+
/**
|
|
3249
|
+
* Check if buffer is full
|
|
3250
|
+
* @returns True if buffer is full
|
|
3251
|
+
*/
|
|
3252
|
+
isFull() {
|
|
3253
|
+
return this.count >= this.bufferSize;
|
|
3254
|
+
}
|
|
3255
|
+
/**
|
|
3256
|
+
* Check if buffer is empty
|
|
3257
|
+
* @returns True if buffer is empty
|
|
3258
|
+
*/
|
|
3259
|
+
isEmpty() {
|
|
3260
|
+
return this.count === 0;
|
|
3261
|
+
}
|
|
3262
|
+
/**
|
|
3263
|
+
* Concatenate multiple chunks into a single buffer
|
|
3264
|
+
* @param chunks - Array of audio chunks
|
|
3265
|
+
* @returns Single concatenated Float32Array
|
|
3266
|
+
*/
|
|
3267
|
+
concatenateChunks(chunks) {
|
|
3268
|
+
if (chunks.length === 0) {
|
|
3269
|
+
return new Float32Array(0);
|
|
3270
|
+
}
|
|
3271
|
+
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
3272
|
+
const result = new Float32Array(totalLength);
|
|
3273
|
+
let offset = 0;
|
|
3274
|
+
for (const chunk of chunks) {
|
|
3275
|
+
result.set(chunk, offset);
|
|
3276
|
+
offset += chunk.length;
|
|
3277
|
+
}
|
|
3278
|
+
return result;
|
|
3279
|
+
}
|
|
3280
|
+
/**
|
|
3281
|
+
* Get all data as a single concatenated buffer
|
|
3282
|
+
* @returns Concatenated Float32Array
|
|
3283
|
+
*/
|
|
3284
|
+
getAll() {
|
|
3285
|
+
const chunks = this.peek();
|
|
3286
|
+
return this.concatenateChunks(chunks);
|
|
3287
|
+
}
|
|
3288
|
+
/**
|
|
3289
|
+
* Get total number of samples across all chunks
|
|
3290
|
+
* @returns Total sample count
|
|
3291
|
+
*/
|
|
3292
|
+
getTotalSamples() {
|
|
3293
|
+
let total = 0;
|
|
3294
|
+
let idx = this.readIndex;
|
|
3295
|
+
for (let i = 0; i < this.count; i++) {
|
|
3296
|
+
const chunk = this.buffer[idx];
|
|
3297
|
+
if (chunk) {
|
|
3298
|
+
total += chunk.length;
|
|
3299
|
+
}
|
|
3300
|
+
idx = (idx + 1) % this.bufferSize;
|
|
3301
|
+
}
|
|
3302
|
+
return total;
|
|
3303
|
+
}
|
|
3304
|
+
/**
|
|
3305
|
+
* Get buffer capacity
|
|
3306
|
+
* @returns Maximum number of chunks
|
|
3307
|
+
*/
|
|
3308
|
+
getCapacity() {
|
|
3309
|
+
return this.bufferSize;
|
|
3310
|
+
}
|
|
3311
|
+
/**
|
|
3312
|
+
* Resize the buffer
|
|
3313
|
+
* @param newSize - New buffer size
|
|
3314
|
+
*/
|
|
3315
|
+
resize(newSize) {
|
|
3316
|
+
if (newSize === this.bufferSize) return;
|
|
3317
|
+
const chunks = this.read();
|
|
3318
|
+
this.bufferSize = newSize;
|
|
3319
|
+
this.buffer = new Array(newSize);
|
|
3320
|
+
this.writeIndex = 0;
|
|
3321
|
+
this.readIndex = 0;
|
|
3322
|
+
this.count = 0;
|
|
3323
|
+
for (const chunk of chunks.slice(-newSize)) {
|
|
3324
|
+
this.write(chunk);
|
|
3325
|
+
}
|
|
3326
|
+
}
|
|
3327
|
+
};
|
|
3328
|
+
|
|
3329
|
+
// src/utils/AudioRecorder.ts
|
|
3330
|
+
var AudioRecorder = class {
|
|
3331
|
+
/**
|
|
3332
|
+
* Create a new AudioRecorder
|
|
3333
|
+
* @param sampleRate - Sample rate for recording
|
|
3334
|
+
*/
|
|
3335
|
+
constructor(sampleRate = 16e3) {
|
|
3336
|
+
/** Recorded audio chunks */
|
|
3337
|
+
this.audioChunks = [];
|
|
3338
|
+
/** Recording state */
|
|
3339
|
+
this.isRecording = false;
|
|
3340
|
+
/** Recording start time */
|
|
3341
|
+
this.startTime = null;
|
|
3342
|
+
this.sampleRate = sampleRate;
|
|
3343
|
+
}
|
|
3344
|
+
/**
|
|
3345
|
+
* Start recording
|
|
3346
|
+
*/
|
|
3347
|
+
start() {
|
|
3348
|
+
this.audioChunks = [];
|
|
3349
|
+
this.isRecording = true;
|
|
3350
|
+
this.startTime = Date.now();
|
|
3351
|
+
}
|
|
3352
|
+
/**
|
|
3353
|
+
* Record an audio chunk
|
|
3354
|
+
* @param audioData - Audio data to record
|
|
3355
|
+
*/
|
|
3356
|
+
recordChunk(audioData) {
|
|
3357
|
+
if (this.isRecording) {
|
|
3358
|
+
this.audioChunks.push(new Float32Array(audioData));
|
|
3359
|
+
}
|
|
3360
|
+
}
|
|
3361
|
+
/**
|
|
3362
|
+
* Stop recording and return all recorded audio
|
|
3363
|
+
* @returns Complete audio as Float32Array
|
|
3364
|
+
*/
|
|
3365
|
+
stop() {
|
|
3366
|
+
this.isRecording = false;
|
|
3367
|
+
return this.getCombinedAudio();
|
|
3368
|
+
}
|
|
3369
|
+
/**
|
|
3370
|
+
* Clear all recorded audio
|
|
3371
|
+
*/
|
|
3372
|
+
clear() {
|
|
3373
|
+
this.audioChunks = [];
|
|
3374
|
+
this.startTime = null;
|
|
3375
|
+
}
|
|
3376
|
+
/**
|
|
3377
|
+
* Export recording to specified format
|
|
3378
|
+
* @param format - Output format ('raw' or 'wav')
|
|
3379
|
+
* @returns Audio data as ArrayBuffer
|
|
3380
|
+
*/
|
|
3381
|
+
export(format = "wav") {
|
|
3382
|
+
const audioData = this.getCombinedAudio();
|
|
3383
|
+
if (format === "raw") {
|
|
3384
|
+
return audioData.buffer;
|
|
3385
|
+
}
|
|
3386
|
+
return AudioProcessor.float32ToWav(audioData, this.sampleRate);
|
|
3387
|
+
}
|
|
3388
|
+
/**
|
|
3389
|
+
* Get recording duration in seconds
|
|
3390
|
+
* @returns Duration in seconds
|
|
3391
|
+
*/
|
|
3392
|
+
getDuration() {
|
|
3393
|
+
const totalSamples = this.audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
3394
|
+
return totalSamples / this.sampleRate;
|
|
3395
|
+
}
|
|
3396
|
+
/**
|
|
3397
|
+
* Get number of recorded chunks
|
|
3398
|
+
* @returns Chunk count
|
|
3399
|
+
*/
|
|
3400
|
+
getChunkCount() {
|
|
3401
|
+
return this.audioChunks.length;
|
|
3402
|
+
}
|
|
3403
|
+
/**
|
|
3404
|
+
* Check if currently recording
|
|
3405
|
+
* @returns Recording state
|
|
3406
|
+
*/
|
|
3407
|
+
getIsRecording() {
|
|
3408
|
+
return this.isRecording;
|
|
3409
|
+
}
|
|
3410
|
+
/**
|
|
3411
|
+
* Get sample rate
|
|
3412
|
+
* @returns Sample rate
|
|
3413
|
+
*/
|
|
3414
|
+
getSampleRate() {
|
|
3415
|
+
return this.sampleRate;
|
|
3416
|
+
}
|
|
3417
|
+
/**
|
|
3418
|
+
* Set sample rate (only effective before recording starts)
|
|
3419
|
+
* @param sampleRate - New sample rate
|
|
3420
|
+
*/
|
|
3421
|
+
setSampleRate(sampleRate) {
|
|
3422
|
+
if (!this.isRecording) {
|
|
3423
|
+
this.sampleRate = sampleRate;
|
|
3424
|
+
}
|
|
3425
|
+
}
|
|
3426
|
+
/**
|
|
3427
|
+
* Get total number of recorded samples
|
|
3428
|
+
* @returns Sample count
|
|
3429
|
+
*/
|
|
3430
|
+
getTotalSamples() {
|
|
3431
|
+
return this.audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
3432
|
+
}
|
|
3433
|
+
/**
|
|
3434
|
+
* Get recording start time
|
|
3435
|
+
* @returns Start timestamp or null
|
|
3436
|
+
*/
|
|
3437
|
+
getStartTime() {
|
|
3438
|
+
return this.startTime;
|
|
3439
|
+
}
|
|
3440
|
+
/**
|
|
3441
|
+
* Get elapsed recording time in milliseconds
|
|
3442
|
+
* @returns Elapsed time in ms
|
|
3443
|
+
*/
|
|
3444
|
+
getElapsedTime() {
|
|
3445
|
+
if (!this.startTime) return 0;
|
|
3446
|
+
return Date.now() - this.startTime;
|
|
3447
|
+
}
|
|
3448
|
+
/**
|
|
3449
|
+
* Get combined audio data
|
|
3450
|
+
* @returns Concatenated Float32Array
|
|
3451
|
+
*/
|
|
3452
|
+
getCombinedAudio() {
|
|
3453
|
+
if (this.audioChunks.length === 0) {
|
|
3454
|
+
return new Float32Array(0);
|
|
3455
|
+
}
|
|
3456
|
+
const totalLength = this.audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
3457
|
+
const combined = new Float32Array(totalLength);
|
|
3458
|
+
let offset = 0;
|
|
3459
|
+
for (const chunk of this.audioChunks) {
|
|
3460
|
+
combined.set(chunk, offset);
|
|
3461
|
+
offset += chunk.length;
|
|
3462
|
+
}
|
|
3463
|
+
return combined;
|
|
3464
|
+
}
|
|
3465
|
+
/**
|
|
3466
|
+
* Create a Blob from the recording
|
|
3467
|
+
* @param format - Output format
|
|
3468
|
+
* @returns Blob with audio data
|
|
3469
|
+
*/
|
|
3470
|
+
toBlob(format = "wav") {
|
|
3471
|
+
const buffer = this.export(format);
|
|
3472
|
+
const mimeType = format === "wav" ? "audio/wav" : "application/octet-stream";
|
|
3473
|
+
return new Blob([buffer], { type: mimeType });
|
|
3474
|
+
}
|
|
3475
|
+
/**
|
|
3476
|
+
* Create a data URL from the recording
|
|
3477
|
+
* @param format - Output format
|
|
3478
|
+
* @returns Data URL string
|
|
3479
|
+
*/
|
|
3480
|
+
toDataURL(format = "wav") {
|
|
3481
|
+
const blob = this.toBlob(format);
|
|
3482
|
+
return URL.createObjectURL(blob);
|
|
3483
|
+
}
|
|
3484
|
+
/**
|
|
3485
|
+
* Download the recording
|
|
3486
|
+
* @param filename - Output filename
|
|
3487
|
+
* @param format - Output format
|
|
3488
|
+
*/
|
|
3489
|
+
download(filename = "recording", format = "wav") {
|
|
3490
|
+
const url = this.toDataURL(format);
|
|
3491
|
+
const extension = format === "wav" ? ".wav" : ".raw";
|
|
3492
|
+
const link = document.createElement("a");
|
|
3493
|
+
link.href = url;
|
|
3494
|
+
link.download = filename + extension;
|
|
3495
|
+
link.click();
|
|
3496
|
+
URL.revokeObjectURL(url);
|
|
3497
|
+
}
|
|
3498
|
+
};
|
|
3499
|
+
|
|
3500
|
+
// src/utils/StorageAdapter.ts
|
|
3501
|
+
var LocalStorageAdapter = class {
|
|
3502
|
+
/**
|
|
3503
|
+
* Create a new LocalStorageAdapter
|
|
3504
|
+
* @param prefix - Key prefix (default: 'live-transcribe')
|
|
3505
|
+
*/
|
|
3506
|
+
constructor(prefix = "live-transcribe") {
|
|
3507
|
+
this.prefix = prefix;
|
|
3508
|
+
}
|
|
3509
|
+
/**
|
|
3510
|
+
* Get prefixed key
|
|
3511
|
+
*/
|
|
3512
|
+
getKey(key) {
|
|
3513
|
+
return `${this.prefix}:${key}`;
|
|
3514
|
+
}
|
|
3515
|
+
async save(key, data) {
|
|
3516
|
+
if (typeof window === "undefined" || !window.localStorage) {
|
|
3517
|
+
throw new Error("localStorage is not available");
|
|
3518
|
+
}
|
|
3519
|
+
const serialized = JSON.stringify(data);
|
|
3520
|
+
localStorage.setItem(this.getKey(key), serialized);
|
|
3521
|
+
}
|
|
3522
|
+
async load(key) {
|
|
3523
|
+
if (typeof window === "undefined" || !window.localStorage) {
|
|
3524
|
+
throw new Error("localStorage is not available");
|
|
3525
|
+
}
|
|
3526
|
+
const data = localStorage.getItem(this.getKey(key));
|
|
3527
|
+
if (data === null) {
|
|
3528
|
+
return null;
|
|
3529
|
+
}
|
|
3530
|
+
return JSON.parse(data);
|
|
3531
|
+
}
|
|
3532
|
+
async delete(key) {
|
|
3533
|
+
if (typeof window === "undefined" || !window.localStorage) {
|
|
3534
|
+
throw new Error("localStorage is not available");
|
|
3535
|
+
}
|
|
3536
|
+
localStorage.removeItem(this.getKey(key));
|
|
3537
|
+
}
|
|
3538
|
+
async list() {
|
|
3539
|
+
if (typeof window === "undefined" || !window.localStorage) {
|
|
3540
|
+
throw new Error("localStorage is not available");
|
|
3541
|
+
}
|
|
3542
|
+
const keys = [];
|
|
3543
|
+
const prefixWithColon = `${this.prefix}:`;
|
|
3544
|
+
for (let i = 0; i < localStorage.length; i++) {
|
|
3545
|
+
const key = localStorage.key(i);
|
|
3546
|
+
if (key?.startsWith(prefixWithColon)) {
|
|
3547
|
+
keys.push(key.substring(prefixWithColon.length));
|
|
3548
|
+
}
|
|
3549
|
+
}
|
|
3550
|
+
return keys;
|
|
3551
|
+
}
|
|
3552
|
+
async exists(key) {
|
|
3553
|
+
if (typeof window === "undefined" || !window.localStorage) {
|
|
3554
|
+
throw new Error("localStorage is not available");
|
|
3555
|
+
}
|
|
3556
|
+
return localStorage.getItem(this.getKey(key)) !== null;
|
|
3557
|
+
}
|
|
3558
|
+
};
|
|
3559
|
+
var MemoryStorageAdapter = class {
|
|
3560
|
+
constructor() {
|
|
3561
|
+
this.storage = /* @__PURE__ */ new Map();
|
|
3562
|
+
}
|
|
3563
|
+
async save(key, data) {
|
|
3564
|
+
this.storage.set(key, JSON.parse(JSON.stringify(data)));
|
|
3565
|
+
}
|
|
3566
|
+
async load(key) {
|
|
3567
|
+
const data = this.storage.get(key);
|
|
3568
|
+
if (data === void 0) {
|
|
3569
|
+
return null;
|
|
3570
|
+
}
|
|
3571
|
+
return JSON.parse(JSON.stringify(data));
|
|
3572
|
+
}
|
|
3573
|
+
async delete(key) {
|
|
3574
|
+
this.storage.delete(key);
|
|
3575
|
+
}
|
|
3576
|
+
async list() {
|
|
3577
|
+
return Array.from(this.storage.keys());
|
|
3578
|
+
}
|
|
3579
|
+
async exists(key) {
|
|
3580
|
+
return this.storage.has(key);
|
|
3581
|
+
}
|
|
3582
|
+
/**
|
|
3583
|
+
* Clear all data
|
|
3584
|
+
*/
|
|
3585
|
+
clear() {
|
|
3586
|
+
this.storage.clear();
|
|
3587
|
+
}
|
|
3588
|
+
/**
|
|
3589
|
+
* Get storage size
|
|
3590
|
+
*/
|
|
3591
|
+
size() {
|
|
3592
|
+
return this.storage.size;
|
|
3593
|
+
}
|
|
3594
|
+
};
|
|
3595
|
+
|
|
3596
|
+
// src/utils/validators.ts
|
|
3597
|
+
function validateTranscriptionConfig(config) {
|
|
3598
|
+
const errors = [];
|
|
3599
|
+
const warnings = [];
|
|
3600
|
+
if (!config.provider) {
|
|
3601
|
+
errors.push({
|
|
3602
|
+
field: "provider",
|
|
3603
|
+
message: "Provider is required",
|
|
3604
|
+
code: "REQUIRED_FIELD"
|
|
3605
|
+
});
|
|
3606
|
+
} else if (!Object.values(TranscriptionProvider).includes(config.provider)) {
|
|
3607
|
+
errors.push({
|
|
3608
|
+
field: "provider",
|
|
3609
|
+
message: `Invalid provider: ${config.provider}`,
|
|
3610
|
+
code: "INVALID_PROVIDER"
|
|
3611
|
+
});
|
|
3612
|
+
}
|
|
3613
|
+
const cloudProviders = ["deepgram" /* Deepgram */, "assemblyai" /* AssemblyAI */];
|
|
3614
|
+
if (config.provider && cloudProviders.includes(config.provider)) {
|
|
3615
|
+
if (!config.apiKey) {
|
|
3616
|
+
errors.push({
|
|
3617
|
+
field: "apiKey",
|
|
3618
|
+
message: `API key is required for ${config.provider} provider`,
|
|
3619
|
+
code: "REQUIRED_API_KEY"
|
|
3620
|
+
});
|
|
3621
|
+
} else if (config.apiKey.length < 10) {
|
|
3622
|
+
warnings.push({
|
|
3623
|
+
field: "apiKey",
|
|
3624
|
+
message: "API key seems too short"
|
|
3625
|
+
});
|
|
3626
|
+
}
|
|
3627
|
+
}
|
|
3628
|
+
if (config.language && !validateLanguageCode(config.language)) {
|
|
3629
|
+
errors.push({
|
|
3630
|
+
field: "language",
|
|
3631
|
+
message: `Invalid language code: ${config.language}`,
|
|
3632
|
+
code: "INVALID_LANGUAGE"
|
|
3633
|
+
});
|
|
3634
|
+
}
|
|
3635
|
+
if (config.audioConfig) {
|
|
3636
|
+
const audioValidation = validateAudioConfig(config.audioConfig);
|
|
3637
|
+
errors.push(...audioValidation.errors);
|
|
3638
|
+
if (audioValidation.warnings) {
|
|
3639
|
+
warnings.push(...audioValidation.warnings);
|
|
3640
|
+
}
|
|
3641
|
+
}
|
|
3642
|
+
return {
|
|
3643
|
+
valid: errors.length === 0,
|
|
3644
|
+
errors,
|
|
3645
|
+
warnings: warnings.length > 0 ? warnings : void 0
|
|
3646
|
+
};
|
|
3647
|
+
}
|
|
3648
|
+
function validateAudioConfig(config) {
|
|
3649
|
+
const errors = [];
|
|
3650
|
+
const warnings = [];
|
|
3651
|
+
if (config.sampleRate !== void 0) {
|
|
3652
|
+
if (config.sampleRate < 8e3 || config.sampleRate > 48e3) {
|
|
3653
|
+
errors.push({
|
|
3654
|
+
field: "audioConfig.sampleRate",
|
|
3655
|
+
message: "Sample rate must be between 8000 and 48000",
|
|
3656
|
+
code: "INVALID_SAMPLE_RATE"
|
|
3657
|
+
});
|
|
3658
|
+
} else if (config.sampleRate !== 16e3 && config.sampleRate !== 44100 && config.sampleRate !== 48e3) {
|
|
3659
|
+
warnings.push({
|
|
3660
|
+
field: "audioConfig.sampleRate",
|
|
3661
|
+
message: "Non-standard sample rate may not be supported by all providers"
|
|
3662
|
+
});
|
|
3663
|
+
}
|
|
3664
|
+
}
|
|
3665
|
+
if (config.channels !== void 0 && (config.channels < 1 || config.channels > 2)) {
|
|
3666
|
+
errors.push({
|
|
3667
|
+
field: "audioConfig.channels",
|
|
3668
|
+
message: "Channels must be 1 or 2",
|
|
3669
|
+
code: "INVALID_CHANNELS"
|
|
3670
|
+
});
|
|
3671
|
+
}
|
|
3672
|
+
if (config.bitDepth !== void 0 && ![8, 16, 24].includes(config.bitDepth)) {
|
|
3673
|
+
errors.push({
|
|
3674
|
+
field: "audioConfig.bitDepth",
|
|
3675
|
+
message: "Bit depth must be 8, 16, or 24",
|
|
3676
|
+
code: "INVALID_BIT_DEPTH"
|
|
3677
|
+
});
|
|
3678
|
+
}
|
|
3679
|
+
if (config.encoding !== void 0 && !Object.values(AudioEncoding).includes(config.encoding)) {
|
|
3680
|
+
errors.push({
|
|
3681
|
+
field: "audioConfig.encoding",
|
|
3682
|
+
message: `Invalid encoding: ${config.encoding}`,
|
|
3683
|
+
code: "INVALID_ENCODING"
|
|
3684
|
+
});
|
|
3685
|
+
}
|
|
3686
|
+
return {
|
|
3687
|
+
valid: errors.length === 0,
|
|
3688
|
+
errors,
|
|
3689
|
+
warnings: warnings.length > 0 ? warnings : void 0
|
|
3690
|
+
};
|
|
3691
|
+
}
|
|
3692
|
+
function validateSessionConfig(config) {
|
|
3693
|
+
const errors = [];
|
|
3694
|
+
const warnings = [];
|
|
3695
|
+
if (config.maxDuration !== void 0 && config.maxDuration < 0) {
|
|
3696
|
+
errors.push({
|
|
3697
|
+
field: "maxDuration",
|
|
3698
|
+
message: "Max duration must be positive",
|
|
3699
|
+
code: "INVALID_MAX_DURATION"
|
|
3700
|
+
});
|
|
3701
|
+
}
|
|
3702
|
+
if (config.silenceTimeout !== void 0 && config.silenceTimeout < 0) {
|
|
3703
|
+
errors.push({
|
|
3704
|
+
field: "silenceTimeout",
|
|
3705
|
+
message: "Silence timeout must be positive",
|
|
3706
|
+
code: "INVALID_SILENCE_TIMEOUT"
|
|
3707
|
+
});
|
|
3708
|
+
}
|
|
3709
|
+
if (config.vadThreshold !== void 0 && (config.vadThreshold < 0 || config.vadThreshold > 1)) {
|
|
3710
|
+
errors.push({
|
|
3711
|
+
field: "vadThreshold",
|
|
3712
|
+
message: "VAD threshold must be between 0 and 1",
|
|
3713
|
+
code: "INVALID_VAD_THRESHOLD"
|
|
3714
|
+
});
|
|
3715
|
+
}
|
|
3716
|
+
return {
|
|
3717
|
+
valid: errors.length === 0,
|
|
3718
|
+
errors,
|
|
3719
|
+
warnings: warnings.length > 0 ? warnings : void 0
|
|
3720
|
+
};
|
|
3721
|
+
}
|
|
3722
|
+
function validateLanguageCode(code) {
|
|
3723
|
+
const pattern = /^[a-z]{2,3}(-[A-Z]{2})?(-[A-Za-z]{4})?(-[A-Z]{2}|-[0-9]{3})?$/i;
|
|
3724
|
+
return pattern.test(code);
|
|
3725
|
+
}
|
|
3726
|
+
function validateApiKey(provider, key) {
|
|
3727
|
+
const errors = [];
|
|
3728
|
+
if (provider === "web-speech" /* WebSpeechAPI */) {
|
|
3729
|
+
return { valid: true, errors: [] };
|
|
3730
|
+
}
|
|
3731
|
+
if (!key) {
|
|
3732
|
+
errors.push({
|
|
3733
|
+
field: "apiKey",
|
|
3734
|
+
message: `API key is required for ${provider}`,
|
|
3735
|
+
code: "MISSING_API_KEY"
|
|
3736
|
+
});
|
|
3737
|
+
return { valid: false, errors };
|
|
3738
|
+
}
|
|
3739
|
+
switch (provider) {
|
|
3740
|
+
case "deepgram" /* Deepgram */:
|
|
3741
|
+
if (key.length < 20) {
|
|
3742
|
+
errors.push({
|
|
3743
|
+
field: "apiKey",
|
|
3744
|
+
message: "Deepgram API key appears to be too short",
|
|
3745
|
+
code: "INVALID_API_KEY_FORMAT"
|
|
3746
|
+
});
|
|
3747
|
+
}
|
|
3748
|
+
break;
|
|
3749
|
+
case "assemblyai" /* AssemblyAI */:
|
|
3750
|
+
if (key.length < 20) {
|
|
3751
|
+
errors.push({
|
|
3752
|
+
field: "apiKey",
|
|
3753
|
+
message: "AssemblyAI API key appears to be too short",
|
|
3754
|
+
code: "INVALID_API_KEY_FORMAT"
|
|
3755
|
+
});
|
|
3756
|
+
}
|
|
3757
|
+
break;
|
|
3758
|
+
}
|
|
3759
|
+
return {
|
|
3760
|
+
valid: errors.length === 0,
|
|
3761
|
+
errors
|
|
3762
|
+
};
|
|
3763
|
+
}
|
|
3764
|
+
|
|
3765
|
+
// src/utils/browserCheck.ts
|
|
3766
|
+
function getBrowserInfo() {
|
|
3767
|
+
if (typeof window === "undefined" || typeof navigator === "undefined") {
|
|
3768
|
+
return {
|
|
3769
|
+
name: "Node.js",
|
|
3770
|
+
version: typeof process !== "undefined" ? process.version : "unknown",
|
|
3771
|
+
os: typeof process !== "undefined" ? process.platform : "unknown",
|
|
3772
|
+
isMobile: false
|
|
3773
|
+
};
|
|
3774
|
+
}
|
|
3775
|
+
const ua = navigator.userAgent;
|
|
3776
|
+
let name = "Unknown";
|
|
3777
|
+
let version = "unknown";
|
|
3778
|
+
if (ua.includes("Firefox/")) {
|
|
3779
|
+
name = "Firefox";
|
|
3780
|
+
version = ua.match(/Firefox\/(\d+\.\d+)/)?.[1] || "unknown";
|
|
3781
|
+
} else if (ua.includes("Edg/")) {
|
|
3782
|
+
name = "Edge";
|
|
3783
|
+
version = ua.match(/Edg\/(\d+\.\d+)/)?.[1] || "unknown";
|
|
3784
|
+
} else if (ua.includes("Chrome/")) {
|
|
3785
|
+
name = "Chrome";
|
|
3786
|
+
version = ua.match(/Chrome\/(\d+\.\d+)/)?.[1] || "unknown";
|
|
3787
|
+
} else if (ua.includes("Safari/") && !ua.includes("Chrome")) {
|
|
3788
|
+
name = "Safari";
|
|
3789
|
+
version = ua.match(/Version\/(\d+\.\d+)/)?.[1] || "unknown";
|
|
3790
|
+
}
|
|
3791
|
+
let os = "Unknown";
|
|
3792
|
+
if (ua.includes("Windows")) os = "Windows";
|
|
3793
|
+
else if (ua.includes("Mac OS")) os = "macOS";
|
|
3794
|
+
else if (ua.includes("Linux")) os = "Linux";
|
|
3795
|
+
else if (ua.includes("Android")) os = "Android";
|
|
3796
|
+
else if (ua.includes("iOS") || ua.includes("iPhone") || ua.includes("iPad")) os = "iOS";
|
|
3797
|
+
const isMobile = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(ua);
|
|
3798
|
+
return { name, version, os, isMobile };
|
|
3799
|
+
}
|
|
3800
|
+
function checkWebSpeechAPISupport() {
|
|
3801
|
+
if (typeof window === "undefined") {
|
|
3802
|
+
return {
|
|
3803
|
+
supported: false,
|
|
3804
|
+
details: "Web Speech API is only available in browser environments",
|
|
3805
|
+
fallback: "Use Deepgram or AssemblyAI provider in Node.js"
|
|
3806
|
+
};
|
|
3807
|
+
}
|
|
3808
|
+
const hasSupport = !!(window.SpeechRecognition || window.webkitSpeechRecognition);
|
|
3809
|
+
if (hasSupport) {
|
|
3810
|
+
const isWebkit = !window.SpeechRecognition && !!window.webkitSpeechRecognition;
|
|
3811
|
+
return {
|
|
3812
|
+
supported: true,
|
|
3813
|
+
details: isWebkit ? "Supported via webkit prefix" : "Fully supported"
|
|
3814
|
+
};
|
|
3815
|
+
}
|
|
3816
|
+
return {
|
|
3817
|
+
supported: false,
|
|
3818
|
+
details: "Web Speech API is not supported in this browser",
|
|
3819
|
+
fallback: "Use Deepgram or AssemblyAI provider instead"
|
|
3820
|
+
};
|
|
3821
|
+
}
|
|
3822
|
+
function checkWebSocketSupport() {
|
|
3823
|
+
const hasSupport = typeof WebSocket !== "undefined";
|
|
3824
|
+
if (hasSupport) {
|
|
3825
|
+
return {
|
|
3826
|
+
supported: true,
|
|
3827
|
+
details: "WebSocket is fully supported"
|
|
3828
|
+
};
|
|
3829
|
+
}
|
|
3830
|
+
return {
|
|
3831
|
+
supported: false,
|
|
3832
|
+
details: "WebSocket is not supported",
|
|
3833
|
+
fallback: "Upgrade to a modern browser"
|
|
3834
|
+
};
|
|
3835
|
+
}
|
|
3836
|
+
function checkMediaDevicesSupport() {
|
|
3837
|
+
if (typeof navigator === "undefined") {
|
|
3838
|
+
return {
|
|
3839
|
+
supported: false,
|
|
3840
|
+
details: "Navigator API is not available",
|
|
3841
|
+
fallback: "Run in a browser environment"
|
|
3842
|
+
};
|
|
3843
|
+
}
|
|
3844
|
+
const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
|
|
3845
|
+
const hasEnumerateDevices = !!(navigator.mediaDevices && navigator.mediaDevices.enumerateDevices);
|
|
3846
|
+
if (hasGetUserMedia && hasEnumerateDevices) {
|
|
3847
|
+
return {
|
|
3848
|
+
supported: true,
|
|
3849
|
+
details: "Full media devices support"
|
|
3850
|
+
};
|
|
3851
|
+
}
|
|
3852
|
+
if (hasGetUserMedia) {
|
|
3853
|
+
return {
|
|
3854
|
+
supported: true,
|
|
3855
|
+
details: "getUserMedia supported, enumerateDevices not available"
|
|
3856
|
+
};
|
|
3857
|
+
}
|
|
3858
|
+
return {
|
|
3859
|
+
supported: false,
|
|
3860
|
+
details: "Media devices API is not supported",
|
|
3861
|
+
fallback: "Use HTTPS and a modern browser"
|
|
3862
|
+
};
|
|
3863
|
+
}
|
|
3864
|
+
function checkAudioContextSupport() {
|
|
3865
|
+
if (typeof window === "undefined") {
|
|
3866
|
+
return {
|
|
3867
|
+
supported: false,
|
|
3868
|
+
details: "AudioContext is only available in browser environments"
|
|
3869
|
+
};
|
|
3870
|
+
}
|
|
3871
|
+
const AudioContextClass = window.AudioContext || window.webkitAudioContext;
|
|
3872
|
+
if (AudioContextClass) {
|
|
3873
|
+
const hasWorklet = "audioWorklet" in AudioContext.prototype;
|
|
3874
|
+
return {
|
|
3875
|
+
supported: true,
|
|
3876
|
+
details: hasWorklet ? "Full AudioContext support with AudioWorklet" : "AudioContext supported (no AudioWorklet)"
|
|
3877
|
+
};
|
|
3878
|
+
}
|
|
3879
|
+
return {
|
|
3880
|
+
supported: false,
|
|
3881
|
+
details: "AudioContext is not supported",
|
|
3882
|
+
fallback: "Use a modern browser"
|
|
3883
|
+
};
|
|
3884
|
+
}
|
|
3885
|
+
function getFullCompatibilityReport() {
|
|
3886
|
+
const browser = getBrowserInfo();
|
|
3887
|
+
const webSpeechAPI = checkWebSpeechAPISupport();
|
|
3888
|
+
const webSocket = checkWebSocketSupport();
|
|
3889
|
+
const mediaDevices = checkMediaDevicesSupport();
|
|
3890
|
+
const audioContext = checkAudioContextSupport();
|
|
3891
|
+
const recommendations = [];
|
|
3892
|
+
if (!webSpeechAPI.supported) {
|
|
3893
|
+
recommendations.push("Consider using Chrome, Edge, or Safari for Web Speech API support");
|
|
3894
|
+
}
|
|
3895
|
+
if (!mediaDevices.supported) {
|
|
3896
|
+
recommendations.push("Ensure HTTPS is enabled and grant microphone permissions");
|
|
3897
|
+
}
|
|
3898
|
+
if (!audioContext.supported) {
|
|
3899
|
+
recommendations.push("Update to a modern browser for audio processing support");
|
|
3900
|
+
}
|
|
3901
|
+
const overallCompatible = (webSpeechAPI.supported || webSocket.supported) && mediaDevices.supported;
|
|
3902
|
+
if (browser.isMobile) {
|
|
3903
|
+
recommendations.push("Mobile support may vary; consider testing on desktop for best results");
|
|
3904
|
+
}
|
|
3905
|
+
return {
|
|
3906
|
+
browser,
|
|
3907
|
+
webSpeechAPI,
|
|
3908
|
+
webSocket,
|
|
3909
|
+
mediaDevices,
|
|
3910
|
+
audioContext,
|
|
3911
|
+
overallCompatible,
|
|
3912
|
+
recommendations
|
|
3913
|
+
};
|
|
3914
|
+
}
|
|
3915
|
+
|
|
3916
|
+
// src/utils/timing.ts
|
|
3917
|
+
function debounce(func, wait) {
|
|
3918
|
+
let timeoutId = null;
|
|
3919
|
+
return function debounced(...args) {
|
|
3920
|
+
if (timeoutId) {
|
|
3921
|
+
clearTimeout(timeoutId);
|
|
3922
|
+
}
|
|
3923
|
+
timeoutId = setTimeout(() => {
|
|
3924
|
+
func.apply(null, args);
|
|
3925
|
+
timeoutId = null;
|
|
3926
|
+
}, wait);
|
|
3927
|
+
};
|
|
3928
|
+
}
|
|
3929
|
+
function throttle(func, limit) {
|
|
3930
|
+
let lastRun = 0;
|
|
3931
|
+
let timeoutId = null;
|
|
3932
|
+
return function throttled(...args) {
|
|
3933
|
+
const now = Date.now();
|
|
3934
|
+
if (now - lastRun >= limit) {
|
|
3935
|
+
lastRun = now;
|
|
3936
|
+
func.apply(null, args);
|
|
3937
|
+
} else if (!timeoutId) {
|
|
3938
|
+
const remaining = limit - (now - lastRun);
|
|
3939
|
+
timeoutId = setTimeout(() => {
|
|
3940
|
+
lastRun = Date.now();
|
|
3941
|
+
func.apply(null, args);
|
|
3942
|
+
timeoutId = null;
|
|
3943
|
+
}, remaining);
|
|
3944
|
+
}
|
|
3945
|
+
};
|
|
3946
|
+
}
|
|
3947
|
+
function sleep(ms) {
|
|
3948
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
3949
|
+
}
|
|
3950
|
+
function timeout(promise, ms, message = "Operation timed out") {
|
|
3951
|
+
return new Promise((resolve, reject) => {
|
|
3952
|
+
const timeoutId = setTimeout(() => {
|
|
3953
|
+
reject(new Error(message));
|
|
3954
|
+
}, ms);
|
|
3955
|
+
promise.then((result) => {
|
|
3956
|
+
clearTimeout(timeoutId);
|
|
3957
|
+
resolve(result);
|
|
3958
|
+
}).catch((error) => {
|
|
3959
|
+
clearTimeout(timeoutId);
|
|
3960
|
+
reject(error);
|
|
3961
|
+
});
|
|
3962
|
+
});
|
|
3963
|
+
}
|
|
3964
|
+
async function retry(fn, options = {}) {
|
|
3965
|
+
const {
|
|
3966
|
+
maxAttempts = 3,
|
|
3967
|
+
delay = 1e3,
|
|
3968
|
+
backoff = "exponential",
|
|
3969
|
+
maxDelay = 3e4,
|
|
3970
|
+
shouldRetry = () => true
|
|
3971
|
+
} = options;
|
|
3972
|
+
let lastError;
|
|
3973
|
+
let currentDelay = delay;
|
|
3974
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
3975
|
+
try {
|
|
3976
|
+
return await fn();
|
|
3977
|
+
} catch (error) {
|
|
3978
|
+
lastError = error;
|
|
3979
|
+
if (attempt === maxAttempts || !shouldRetry(error)) {
|
|
3980
|
+
break;
|
|
3981
|
+
}
|
|
3982
|
+
await sleep(currentDelay);
|
|
3983
|
+
if (backoff === "exponential") {
|
|
3984
|
+
currentDelay = Math.min(currentDelay * 2, maxDelay);
|
|
3985
|
+
} else {
|
|
3986
|
+
currentDelay = Math.min(currentDelay + delay, maxDelay);
|
|
3987
|
+
}
|
|
3988
|
+
}
|
|
3989
|
+
}
|
|
3990
|
+
throw lastError;
|
|
3991
|
+
}
|
|
3992
|
+
function cancellableTimeout(ms) {
|
|
3993
|
+
let timeoutId;
|
|
3994
|
+
let rejectFn;
|
|
3995
|
+
const promise = new Promise((resolve, reject) => {
|
|
3996
|
+
rejectFn = reject;
|
|
3997
|
+
timeoutId = setTimeout(resolve, ms);
|
|
3998
|
+
});
|
|
3999
|
+
const cancel = () => {
|
|
4000
|
+
clearTimeout(timeoutId);
|
|
4001
|
+
rejectFn(new Error("Timeout cancelled"));
|
|
4002
|
+
};
|
|
4003
|
+
return { promise, cancel };
|
|
4004
|
+
}
|
|
4005
|
+
function setIntervalAsync(fn, interval, immediate = false) {
|
|
4006
|
+
let stopped = false;
|
|
4007
|
+
let timeoutId;
|
|
4008
|
+
const execute = async () => {
|
|
4009
|
+
if (stopped) return;
|
|
4010
|
+
try {
|
|
4011
|
+
await fn();
|
|
4012
|
+
} catch (error) {
|
|
4013
|
+
console.error("Interval function error:", error);
|
|
4014
|
+
}
|
|
4015
|
+
if (!stopped) {
|
|
4016
|
+
timeoutId = setTimeout(execute, interval);
|
|
4017
|
+
}
|
|
4018
|
+
};
|
|
4019
|
+
if (immediate) {
|
|
4020
|
+
void execute();
|
|
4021
|
+
} else {
|
|
4022
|
+
timeoutId = setTimeout(execute, interval);
|
|
4023
|
+
}
|
|
4024
|
+
return () => {
|
|
4025
|
+
stopped = true;
|
|
4026
|
+
clearTimeout(timeoutId);
|
|
4027
|
+
};
|
|
4028
|
+
}
|
|
4029
|
+
|
|
4030
|
+
// src/utils/formatters.ts
|
|
4031
|
+
function formatDuration(ms) {
|
|
4032
|
+
if (ms < 0) return "0s";
|
|
4033
|
+
const seconds = Math.floor(ms / 1e3);
|
|
4034
|
+
const minutes = Math.floor(seconds / 60);
|
|
4035
|
+
const hours = Math.floor(minutes / 60);
|
|
4036
|
+
const remainingMinutes = minutes % 60;
|
|
4037
|
+
const remainingSeconds = seconds % 60;
|
|
4038
|
+
if (hours > 0) {
|
|
4039
|
+
return `${hours}h ${remainingMinutes}m`;
|
|
4040
|
+
}
|
|
4041
|
+
if (minutes > 0) {
|
|
4042
|
+
return `${minutes}m ${remainingSeconds}s`;
|
|
4043
|
+
}
|
|
4044
|
+
return `${seconds}s`;
|
|
4045
|
+
}
|
|
4046
|
+
function formatTimestamp(ms, format = "readable") {
|
|
4047
|
+
if (ms < 0) ms = 0;
|
|
4048
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
4049
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
4050
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
4051
|
+
const seconds = totalSeconds % 60;
|
|
4052
|
+
const milliseconds = Math.floor(ms % 1e3);
|
|
4053
|
+
const pad = (n, length = 2) => String(n).padStart(length, "0");
|
|
4054
|
+
switch (format) {
|
|
4055
|
+
case "srt":
|
|
4056
|
+
return `${pad(hours)}:${pad(minutes)}:${pad(seconds)},${pad(milliseconds, 3)}`;
|
|
4057
|
+
case "vtt":
|
|
4058
|
+
return `${pad(hours)}:${pad(minutes)}:${pad(seconds)}.${pad(milliseconds, 3)}`;
|
|
4059
|
+
case "readable":
|
|
4060
|
+
if (hours > 0) {
|
|
4061
|
+
return `${hours}:${pad(minutes)}:${pad(seconds)}`;
|
|
4062
|
+
}
|
|
4063
|
+
return `${minutes}:${pad(seconds)}`;
|
|
4064
|
+
case "iso":
|
|
4065
|
+
return new Date(ms).toISOString();
|
|
4066
|
+
case "ms":
|
|
4067
|
+
return String(ms);
|
|
4068
|
+
default:
|
|
4069
|
+
return String(ms);
|
|
4070
|
+
}
|
|
4071
|
+
}
|
|
4072
|
+
function formatConfidence(confidence) {
|
|
4073
|
+
if (confidence < 0 || confidence > 1) {
|
|
4074
|
+
return "N/A";
|
|
4075
|
+
}
|
|
4076
|
+
return `${Math.round(confidence * 100)}%`;
|
|
4077
|
+
}
|
|
4078
|
+
function formatFileSize(bytes) {
|
|
4079
|
+
if (bytes < 0) return "0 B";
|
|
4080
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
4081
|
+
let unitIndex = 0;
|
|
4082
|
+
let size = bytes;
|
|
4083
|
+
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
4084
|
+
size /= 1024;
|
|
4085
|
+
unitIndex++;
|
|
4086
|
+
}
|
|
4087
|
+
return `${size.toFixed(unitIndex > 0 ? 1 : 0)} ${units[unitIndex]}`;
|
|
4088
|
+
}
|
|
4089
|
+
function formatTranscriptForDisplay(results, options = {}) {
|
|
4090
|
+
const {
|
|
4091
|
+
showTimestamps = false,
|
|
4092
|
+
showConfidence = false,
|
|
4093
|
+
highlightInterim = false,
|
|
4094
|
+
maxLength
|
|
4095
|
+
} = options;
|
|
4096
|
+
const parts = [];
|
|
4097
|
+
for (const result of results) {
|
|
4098
|
+
let text = result.text;
|
|
4099
|
+
const prefix = [];
|
|
4100
|
+
if (showTimestamps && result.timestamp) {
|
|
4101
|
+
const time = formatTimestamp(result.timestamp, "readable");
|
|
4102
|
+
prefix.push(`[${time}]`);
|
|
4103
|
+
}
|
|
4104
|
+
if (result.speaker) {
|
|
4105
|
+
prefix.push(`${result.speaker}:`);
|
|
4106
|
+
}
|
|
4107
|
+
if (showConfidence && result.confidence !== void 0) {
|
|
4108
|
+
prefix.push(`(${formatConfidence(result.confidence)})`);
|
|
4109
|
+
}
|
|
4110
|
+
if (highlightInterim && !result.isFinal) {
|
|
4111
|
+
text = `*${text}*`;
|
|
4112
|
+
}
|
|
4113
|
+
const line = prefix.length > 0 ? `${prefix.join(" ")} ${text}` : text;
|
|
4114
|
+
parts.push(line);
|
|
4115
|
+
}
|
|
4116
|
+
let output = parts.join("\n");
|
|
4117
|
+
if (maxLength && output.length > maxLength) {
|
|
4118
|
+
output = output.substring(0, maxLength - 3) + "...";
|
|
4119
|
+
}
|
|
4120
|
+
return output;
|
|
4121
|
+
}
|
|
4122
|
+
function formatAsPlainText(results, finalOnly = true) {
|
|
4123
|
+
const filtered = finalOnly ? results.filter((r) => r.isFinal) : results;
|
|
4124
|
+
return filtered.map((r) => r.text).join(" ").trim();
|
|
4125
|
+
}
|
|
4126
|
+
function formatNumber(num) {
|
|
4127
|
+
return num.toLocaleString();
|
|
4128
|
+
}
|
|
4129
|
+
function truncateText(text, maxLength) {
|
|
4130
|
+
if (text.length <= maxLength) return text;
|
|
4131
|
+
return text.substring(0, maxLength - 3) + "...";
|
|
4132
|
+
}
|
|
4133
|
+
|
|
4134
|
+
// src/utils/languageUtils.ts
|
|
4135
|
+
var LANGUAGE_MAP = {
|
|
4136
|
+
"en": { name: "English", nativeName: "English" },
|
|
4137
|
+
"en-US": { name: "English (US)", nativeName: "English (US)" },
|
|
4138
|
+
"en-GB": { name: "English (UK)", nativeName: "English (UK)" },
|
|
4139
|
+
"en-AU": { name: "English (Australia)", nativeName: "English (Australia)" },
|
|
4140
|
+
"en-CA": { name: "English (Canada)", nativeName: "English (Canada)" },
|
|
4141
|
+
"en-IN": { name: "English (India)", nativeName: "English (India)" },
|
|
4142
|
+
"es": { name: "Spanish", nativeName: "Espa\xF1ol" },
|
|
4143
|
+
"es-ES": { name: "Spanish (Spain)", nativeName: "Espa\xF1ol (Espa\xF1a)" },
|
|
4144
|
+
"es-MX": { name: "Spanish (Mexico)", nativeName: "Espa\xF1ol (M\xE9xico)" },
|
|
4145
|
+
"es-419": { name: "Spanish (Latin America)", nativeName: "Espa\xF1ol (Latinoam\xE9rica)" },
|
|
4146
|
+
"fr": { name: "French", nativeName: "Fran\xE7ais" },
|
|
4147
|
+
"fr-FR": { name: "French (France)", nativeName: "Fran\xE7ais (France)" },
|
|
4148
|
+
"fr-CA": { name: "French (Canada)", nativeName: "Fran\xE7ais (Canada)" },
|
|
4149
|
+
"de": { name: "German", nativeName: "Deutsch" },
|
|
4150
|
+
"de-DE": { name: "German (Germany)", nativeName: "Deutsch (Deutschland)" },
|
|
4151
|
+
"it": { name: "Italian", nativeName: "Italiano" },
|
|
4152
|
+
"it-IT": { name: "Italian (Italy)", nativeName: "Italiano (Italia)" },
|
|
4153
|
+
"pt": { name: "Portuguese", nativeName: "Portugu\xEAs" },
|
|
4154
|
+
"pt-BR": { name: "Portuguese (Brazil)", nativeName: "Portugu\xEAs (Brasil)" },
|
|
4155
|
+
"pt-PT": { name: "Portuguese (Portugal)", nativeName: "Portugu\xEAs (Portugal)" },
|
|
4156
|
+
"nl": { name: "Dutch", nativeName: "Nederlands" },
|
|
4157
|
+
"nl-NL": { name: "Dutch (Netherlands)", nativeName: "Nederlands (Nederland)" },
|
|
4158
|
+
"ja": { name: "Japanese", nativeName: "\u65E5\u672C\u8A9E" },
|
|
4159
|
+
"ja-JP": { name: "Japanese (Japan)", nativeName: "\u65E5\u672C\u8A9E (\u65E5\u672C)" },
|
|
4160
|
+
"ko": { name: "Korean", nativeName: "\uD55C\uAD6D\uC5B4" },
|
|
4161
|
+
"ko-KR": { name: "Korean (Korea)", nativeName: "\uD55C\uAD6D\uC5B4 (\uB300\uD55C\uBBFC\uAD6D)" },
|
|
4162
|
+
"zh": { name: "Chinese", nativeName: "\u4E2D\u6587" },
|
|
4163
|
+
"zh-CN": { name: "Chinese (Simplified)", nativeName: "\u4E2D\u6587 (\u7B80\u4F53)" },
|
|
4164
|
+
"zh-TW": { name: "Chinese (Traditional)", nativeName: "\u4E2D\u6587 (\u7E41\u9AD4)" },
|
|
4165
|
+
"ru": { name: "Russian", nativeName: "\u0420\u0443\u0441\u0441\u043A\u0438\u0439" },
|
|
4166
|
+
"ru-RU": { name: "Russian (Russia)", nativeName: "\u0420\u0443\u0441\u0441\u043A\u0438\u0439 (\u0420\u043E\u0441\u0441\u0438\u044F)" },
|
|
4167
|
+
"ar": { name: "Arabic", nativeName: "\u0627\u0644\u0639\u0631\u0628\u064A\u0629" },
|
|
4168
|
+
"ar-SA": { name: "Arabic (Saudi Arabia)", nativeName: "\u0627\u0644\u0639\u0631\u0628\u064A\u0629 (\u0627\u0644\u0633\u0639\u0648\u062F\u064A\u0629)" },
|
|
4169
|
+
"hi": { name: "Hindi", nativeName: "\u0939\u093F\u0928\u094D\u0926\u0940" },
|
|
4170
|
+
"hi-IN": { name: "Hindi (India)", nativeName: "\u0939\u093F\u0928\u094D\u0926\u0940 (\u092D\u093E\u0930\u0924)" },
|
|
4171
|
+
"tr": { name: "Turkish", nativeName: "T\xFCrk\xE7e" },
|
|
4172
|
+
"tr-TR": { name: "Turkish (Turkey)", nativeName: "T\xFCrk\xE7e (T\xFCrkiye)" },
|
|
4173
|
+
"pl": { name: "Polish", nativeName: "Polski" },
|
|
4174
|
+
"pl-PL": { name: "Polish (Poland)", nativeName: "Polski (Polska)" },
|
|
4175
|
+
"uk": { name: "Ukrainian", nativeName: "\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430" },
|
|
4176
|
+
"uk-UA": { name: "Ukrainian (Ukraine)", nativeName: "\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0430 (\u0423\u043A\u0440\u0430\u0457\u043D\u0430)" },
|
|
4177
|
+
"sv": { name: "Swedish", nativeName: "Svenska" },
|
|
4178
|
+
"sv-SE": { name: "Swedish (Sweden)", nativeName: "Svenska (Sverige)" },
|
|
4179
|
+
"da": { name: "Danish", nativeName: "Dansk" },
|
|
4180
|
+
"da-DK": { name: "Danish (Denmark)", nativeName: "Dansk (Danmark)" },
|
|
4181
|
+
"no": { name: "Norwegian", nativeName: "Norsk" },
|
|
4182
|
+
"no-NO": { name: "Norwegian (Norway)", nativeName: "Norsk (Norge)" },
|
|
4183
|
+
"fi": { name: "Finnish", nativeName: "Suomi" },
|
|
4184
|
+
"fi-FI": { name: "Finnish (Finland)", nativeName: "Suomi (Suomi)" }
|
|
4185
|
+
};
|
|
4186
|
+
var PROVIDER_LANGUAGES = {
|
|
4187
|
+
["web-speech" /* WebSpeechAPI */]: [
|
|
4188
|
+
"en-US",
|
|
4189
|
+
"en-GB",
|
|
4190
|
+
"en-AU",
|
|
4191
|
+
"en-CA",
|
|
4192
|
+
"en-IN",
|
|
4193
|
+
"es-ES",
|
|
4194
|
+
"es-MX",
|
|
4195
|
+
"fr-FR",
|
|
4196
|
+
"de-DE",
|
|
4197
|
+
"it-IT",
|
|
4198
|
+
"pt-BR",
|
|
4199
|
+
"pt-PT",
|
|
4200
|
+
"ja-JP",
|
|
4201
|
+
"ko-KR",
|
|
4202
|
+
"zh-CN",
|
|
4203
|
+
"zh-TW",
|
|
4204
|
+
"ru-RU",
|
|
4205
|
+
"ar-SA",
|
|
4206
|
+
"hi-IN",
|
|
4207
|
+
"nl-NL"
|
|
4208
|
+
],
|
|
4209
|
+
["deepgram" /* Deepgram */]: [
|
|
4210
|
+
"en",
|
|
4211
|
+
"en-US",
|
|
4212
|
+
"en-GB",
|
|
4213
|
+
"en-AU",
|
|
4214
|
+
"en-IN",
|
|
4215
|
+
"es",
|
|
4216
|
+
"es-ES",
|
|
4217
|
+
"es-419",
|
|
4218
|
+
"fr",
|
|
4219
|
+
"fr-FR",
|
|
4220
|
+
"fr-CA",
|
|
4221
|
+
"de",
|
|
4222
|
+
"de-DE",
|
|
4223
|
+
"it",
|
|
4224
|
+
"it-IT",
|
|
4225
|
+
"pt",
|
|
4226
|
+
"pt-BR",
|
|
4227
|
+
"pt-PT",
|
|
4228
|
+
"nl",
|
|
4229
|
+
"nl-NL",
|
|
4230
|
+
"ja",
|
|
4231
|
+
"ja-JP",
|
|
4232
|
+
"ko",
|
|
4233
|
+
"ko-KR",
|
|
4234
|
+
"zh",
|
|
4235
|
+
"zh-CN",
|
|
4236
|
+
"zh-TW",
|
|
4237
|
+
"ru",
|
|
4238
|
+
"ru-RU",
|
|
4239
|
+
"uk",
|
|
4240
|
+
"uk-UA",
|
|
4241
|
+
"hi",
|
|
4242
|
+
"hi-IN",
|
|
4243
|
+
"tr",
|
|
4244
|
+
"tr-TR",
|
|
4245
|
+
"pl",
|
|
4246
|
+
"pl-PL",
|
|
4247
|
+
"sv",
|
|
4248
|
+
"sv-SE",
|
|
4249
|
+
"da",
|
|
4250
|
+
"da-DK",
|
|
4251
|
+
"no",
|
|
4252
|
+
"no-NO",
|
|
4253
|
+
"fi",
|
|
4254
|
+
"fi-FI"
|
|
4255
|
+
],
|
|
4256
|
+
["assemblyai" /* AssemblyAI */]: [
|
|
4257
|
+
"en",
|
|
4258
|
+
"en-US",
|
|
4259
|
+
"en-GB",
|
|
4260
|
+
"en-AU"
|
|
4261
|
+
],
|
|
4262
|
+
["custom" /* Custom */]: []
|
|
4263
|
+
};
|
|
4264
|
+
function getSupportedLanguages(provider) {
|
|
4265
|
+
const codes = PROVIDER_LANGUAGES[provider] || [];
|
|
4266
|
+
return codes.map((code) => {
|
|
4267
|
+
const info = LANGUAGE_MAP[code] || { name: code, nativeName: code };
|
|
4268
|
+
return {
|
|
4269
|
+
code,
|
|
4270
|
+
name: info.name,
|
|
4271
|
+
nativeName: info.nativeName,
|
|
4272
|
+
provider
|
|
4273
|
+
};
|
|
4274
|
+
});
|
|
4275
|
+
}
|
|
4276
|
+
function normalizeLanguageCode(code) {
|
|
4277
|
+
if (!code) return "en-US";
|
|
4278
|
+
const normalized = code.trim();
|
|
4279
|
+
const parts = normalized.split(/[-_]/);
|
|
4280
|
+
if (parts.length === 1) {
|
|
4281
|
+
return parts[0].toLowerCase();
|
|
4282
|
+
}
|
|
4283
|
+
const language = parts[0].toLowerCase();
|
|
4284
|
+
const region = parts[1].toUpperCase();
|
|
4285
|
+
return `${language}-${region}`;
|
|
4286
|
+
}
|
|
4287
|
+
function getLanguageName(code) {
|
|
4288
|
+
const normalized = normalizeLanguageCode(code);
|
|
4289
|
+
const info = LANGUAGE_MAP[normalized];
|
|
4290
|
+
return info?.name || code;
|
|
4291
|
+
}
|
|
4292
|
+
function getNativeLanguageName(code) {
|
|
4293
|
+
const normalized = normalizeLanguageCode(code);
|
|
4294
|
+
const info = LANGUAGE_MAP[normalized];
|
|
4295
|
+
return info?.nativeName || code;
|
|
4296
|
+
}
|
|
4297
|
+
function detectBrowserLanguage() {
|
|
4298
|
+
if (typeof navigator === "undefined") {
|
|
4299
|
+
return "en-US";
|
|
4300
|
+
}
|
|
4301
|
+
const browserLang = navigator.language || navigator.userLanguage;
|
|
4302
|
+
return normalizeLanguageCode(browserLang || "en-US");
|
|
4303
|
+
}
|
|
4304
|
+
function isLanguageSupported(code, provider) {
|
|
4305
|
+
const normalized = normalizeLanguageCode(code);
|
|
4306
|
+
const supported = PROVIDER_LANGUAGES[provider] || [];
|
|
4307
|
+
if (supported.includes(normalized)) {
|
|
4308
|
+
return true;
|
|
4309
|
+
}
|
|
4310
|
+
const baseLang = normalized.split("-")[0];
|
|
4311
|
+
return supported.includes(baseLang);
|
|
4312
|
+
}
|
|
4313
|
+
function getBestMatchingLanguage(code, provider) {
|
|
4314
|
+
const normalized = normalizeLanguageCode(code);
|
|
4315
|
+
const supported = PROVIDER_LANGUAGES[provider] || [];
|
|
4316
|
+
if (supported.includes(normalized)) {
|
|
4317
|
+
return normalized;
|
|
4318
|
+
}
|
|
4319
|
+
const baseLang = normalized.split("-")[0];
|
|
4320
|
+
if (supported.includes(baseLang)) {
|
|
4321
|
+
return baseLang;
|
|
4322
|
+
}
|
|
4323
|
+
const match = supported.find((s) => s.startsWith(baseLang + "-"));
|
|
4324
|
+
if (match) {
|
|
4325
|
+
return match;
|
|
4326
|
+
}
|
|
4327
|
+
return supported.includes("en-US") ? "en-US" : supported[0] || "en-US";
|
|
4328
|
+
}
|
|
4329
|
+
|
|
4330
|
+
// src/utils/helpers.ts
|
|
4331
|
+
function generateId(prefix) {
|
|
4332
|
+
const timestamp = Date.now().toString(36);
|
|
4333
|
+
const random = Math.random().toString(36).substring(2, 9);
|
|
4334
|
+
const id = `${timestamp}-${random}`;
|
|
4335
|
+
return prefix ? `${prefix}-${id}` : id;
|
|
4336
|
+
}
|
|
4337
|
+
function deepClone(obj) {
|
|
4338
|
+
if (obj === null || typeof obj !== "object") {
|
|
4339
|
+
return obj;
|
|
4340
|
+
}
|
|
4341
|
+
if (Array.isArray(obj)) {
|
|
4342
|
+
return obj.map((item) => deepClone(item));
|
|
4343
|
+
}
|
|
4344
|
+
if (obj instanceof Date) {
|
|
4345
|
+
return new Date(obj.getTime());
|
|
4346
|
+
}
|
|
4347
|
+
if (obj instanceof ArrayBuffer) {
|
|
4348
|
+
return obj.slice(0);
|
|
4349
|
+
}
|
|
4350
|
+
const cloned = {};
|
|
4351
|
+
for (const key in obj) {
|
|
4352
|
+
if (Object.prototype.hasOwnProperty.call(obj, key)) {
|
|
4353
|
+
cloned[key] = deepClone(obj[key]);
|
|
4354
|
+
}
|
|
4355
|
+
}
|
|
4356
|
+
return cloned;
|
|
4357
|
+
}
|
|
4358
|
+
function mergeDeep(...objects) {
|
|
4359
|
+
const result = {};
|
|
4360
|
+
for (const obj of objects) {
|
|
4361
|
+
if (!obj) continue;
|
|
4362
|
+
for (const key in obj) {
|
|
4363
|
+
if (!Object.prototype.hasOwnProperty.call(obj, key)) continue;
|
|
4364
|
+
const value = obj[key];
|
|
4365
|
+
const existing = result[key];
|
|
4366
|
+
if (isObject(value) && isObject(existing)) {
|
|
4367
|
+
result[key] = mergeDeep(
|
|
4368
|
+
existing,
|
|
4369
|
+
value
|
|
4370
|
+
);
|
|
4371
|
+
} else {
|
|
4372
|
+
result[key] = value;
|
|
4373
|
+
}
|
|
4374
|
+
}
|
|
4375
|
+
}
|
|
4376
|
+
return result;
|
|
4377
|
+
}
|
|
4378
|
+
function isFunction(value) {
|
|
4379
|
+
return typeof value === "function";
|
|
4380
|
+
}
|
|
4381
|
+
function isObject(value) {
|
|
4382
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
4383
|
+
}
|
|
4384
|
+
function isEmpty(value) {
|
|
4385
|
+
if (value === null || value === void 0) return true;
|
|
4386
|
+
if (typeof value === "string") return value.length === 0;
|
|
4387
|
+
if (Array.isArray(value)) return value.length === 0;
|
|
4388
|
+
if (isObject(value)) return Object.keys(value).length === 0;
|
|
4389
|
+
return false;
|
|
4390
|
+
}
|
|
4391
|
+
function pick(obj, keys) {
|
|
4392
|
+
const result = {};
|
|
4393
|
+
for (const key of keys) {
|
|
4394
|
+
if (Object.prototype.hasOwnProperty.call(obj, key)) {
|
|
4395
|
+
result[key] = obj[key];
|
|
4396
|
+
}
|
|
4397
|
+
}
|
|
4398
|
+
return result;
|
|
4399
|
+
}
|
|
4400
|
+
function omit(obj, keys) {
|
|
4401
|
+
const result = { ...obj };
|
|
4402
|
+
for (const key of keys) {
|
|
4403
|
+
delete result[key];
|
|
4404
|
+
}
|
|
4405
|
+
return result;
|
|
4406
|
+
}
|
|
4407
|
+
function waitFor(condition, interval = 100, timeout2 = 5e3) {
|
|
4408
|
+
return new Promise((resolve, reject) => {
|
|
4409
|
+
const startTime = Date.now();
|
|
4410
|
+
const check = () => {
|
|
4411
|
+
if (condition()) {
|
|
4412
|
+
resolve();
|
|
4413
|
+
return;
|
|
4414
|
+
}
|
|
4415
|
+
if (Date.now() - startTime > timeout2) {
|
|
4416
|
+
reject(new Error("Timeout waiting for condition"));
|
|
4417
|
+
return;
|
|
4418
|
+
}
|
|
4419
|
+
setTimeout(check, interval);
|
|
4420
|
+
};
|
|
4421
|
+
check();
|
|
4422
|
+
});
|
|
4423
|
+
}
|
|
4424
|
+
function groupBy(array, keyFn) {
|
|
4425
|
+
return array.reduce((result, item) => {
|
|
4426
|
+
const key = keyFn(item);
|
|
4427
|
+
if (!result[key]) {
|
|
4428
|
+
result[key] = [];
|
|
4429
|
+
}
|
|
4430
|
+
result[key].push(item);
|
|
4431
|
+
return result;
|
|
4432
|
+
}, {});
|
|
4433
|
+
}
|
|
4434
|
+
function clamp(value, min, max) {
|
|
4435
|
+
return Math.min(Math.max(value, min), max);
|
|
4436
|
+
}
|
|
4437
|
+
function round(value, decimals = 0) {
|
|
4438
|
+
const factor = Math.pow(10, decimals);
|
|
4439
|
+
return Math.round(value * factor) / factor;
|
|
4440
|
+
}
|
|
4441
|
+
|
|
4442
|
+
// src/utils/audioUtils.ts
|
|
4443
|
+
function calculateBitrate(sampleRate, bitDepth, channels) {
|
|
4444
|
+
return sampleRate * bitDepth * channels;
|
|
4445
|
+
}
|
|
4446
|
+
function estimateAudioSize(durationMs, config) {
|
|
4447
|
+
const sampleRate = config.sampleRate || 16e3;
|
|
4448
|
+
const bitDepth = config.bitDepth || 16;
|
|
4449
|
+
const channels = config.channels || 1;
|
|
4450
|
+
const durationSec = durationMs / 1e3;
|
|
4451
|
+
const bytesPerSecond = sampleRate * bitDepth * channels / 8;
|
|
4452
|
+
return Math.ceil(durationSec * bytesPerSecond);
|
|
4453
|
+
}
|
|
4454
|
+
function getOptimalBufferSize(sampleRate) {
|
|
4455
|
+
const targetSamples = sampleRate * 0.02;
|
|
4456
|
+
let bufferSize = 256;
|
|
4457
|
+
while (bufferSize < targetSamples && bufferSize < 16384) {
|
|
4458
|
+
bufferSize *= 2;
|
|
4459
|
+
}
|
|
4460
|
+
return Math.max(256, bufferSize);
|
|
4461
|
+
}
|
|
4462
|
+
function validateAudioFormat(data) {
|
|
4463
|
+
const view = new DataView(data);
|
|
4464
|
+
if (data.byteLength >= 44) {
|
|
4465
|
+
const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3));
|
|
4466
|
+
const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11));
|
|
4467
|
+
if (riff === "RIFF" && wave === "WAVE") {
|
|
4468
|
+
return parseWavHeader(view);
|
|
4469
|
+
}
|
|
4470
|
+
}
|
|
4471
|
+
return {
|
|
4472
|
+
format: "raw"
|
|
4473
|
+
};
|
|
4474
|
+
}
|
|
4475
|
+
function parseWavHeader(view) {
|
|
4476
|
+
try {
|
|
4477
|
+
const channels = view.getUint16(22, true);
|
|
4478
|
+
const sampleRate = view.getUint32(24, true);
|
|
4479
|
+
const bitDepth = view.getUint16(34, true);
|
|
4480
|
+
const dataSize = view.getUint32(40, true);
|
|
4481
|
+
const duration = dataSize / (sampleRate * channels * (bitDepth / 8));
|
|
4482
|
+
return {
|
|
4483
|
+
format: "wav",
|
|
4484
|
+
sampleRate,
|
|
4485
|
+
channels,
|
|
4486
|
+
bitDepth,
|
|
4487
|
+
duration
|
|
4488
|
+
};
|
|
4489
|
+
} catch {
|
|
4490
|
+
return { format: "wav" };
|
|
4491
|
+
}
|
|
4492
|
+
}
|
|
4493
|
+
function calculateRMS(buffer) {
|
|
4494
|
+
let sum = 0;
|
|
4495
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
4496
|
+
sum += buffer[i] * buffer[i];
|
|
4497
|
+
}
|
|
4498
|
+
return Math.sqrt(sum / buffer.length);
|
|
4499
|
+
}
|
|
4500
|
+
function calculatePeak(buffer) {
|
|
4501
|
+
let peak = 0;
|
|
4502
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
4503
|
+
const abs = Math.abs(buffer[i]);
|
|
4504
|
+
if (abs > peak) peak = abs;
|
|
4505
|
+
}
|
|
4506
|
+
return peak;
|
|
4507
|
+
}
|
|
4508
|
+
function dbToLinear(db) {
|
|
4509
|
+
return Math.pow(10, db / 20);
|
|
4510
|
+
}
|
|
4511
|
+
function linearToDb(linear) {
|
|
4512
|
+
if (linear <= 0) return -Infinity;
|
|
4513
|
+
return 20 * Math.log10(linear);
|
|
4514
|
+
}
|
|
4515
|
+
function isSilence(buffer, threshold = 1e-3) {
|
|
4516
|
+
const rms = calculateRMS(buffer);
|
|
4517
|
+
return rms < threshold;
|
|
4518
|
+
}
|
|
4519
|
+
function getAudioConstraints(config = {}) {
|
|
4520
|
+
return {
|
|
4521
|
+
sampleRate: config.sampleRate || 16e3,
|
|
4522
|
+
channelCount: config.channels || 1,
|
|
4523
|
+
echoCancellation: true,
|
|
4524
|
+
noiseSuppression: true,
|
|
4525
|
+
autoGainControl: true
|
|
4526
|
+
};
|
|
4527
|
+
}
|
|
4528
|
+
function samplesToDuration(samples, sampleRate) {
|
|
4529
|
+
return samples / sampleRate * 1e3;
|
|
4530
|
+
}
|
|
4531
|
+
function durationToSamples(durationMs, sampleRate) {
|
|
4532
|
+
return Math.round(durationMs / 1e3 * sampleRate);
|
|
4533
|
+
}
|
|
4534
|
+
|
|
4535
|
+
// src/index.ts
|
|
4536
|
+
var VERSION = "0.1.0";
|
|
4537
|
+
var LIBRARY_NAME = "live-transcribe";
|
|
4538
|
+
function createTranscriber(config) {
|
|
4539
|
+
switch (config.provider) {
|
|
4540
|
+
case "web-speech" /* WebSpeechAPI */:
|
|
4541
|
+
return new WebSpeechProvider(config);
|
|
4542
|
+
case "deepgram" /* Deepgram */:
|
|
4543
|
+
if (!config.apiKey) {
|
|
4544
|
+
throw new TranscriptionError(
|
|
4545
|
+
"API key is required for Deepgram provider",
|
|
4546
|
+
"invalid_config" /* INVALID_CONFIG */,
|
|
4547
|
+
"deepgram" /* Deepgram */
|
|
4548
|
+
);
|
|
4549
|
+
}
|
|
4550
|
+
return new DeepgramProvider(config);
|
|
4551
|
+
case "assemblyai" /* AssemblyAI */:
|
|
4552
|
+
if (!config.apiKey) {
|
|
4553
|
+
throw new TranscriptionError(
|
|
4554
|
+
"API key is required for AssemblyAI provider",
|
|
4555
|
+
"invalid_config" /* INVALID_CONFIG */,
|
|
4556
|
+
"assemblyai" /* AssemblyAI */
|
|
4557
|
+
);
|
|
4558
|
+
}
|
|
4559
|
+
return new AssemblyAIProvider(config);
|
|
4560
|
+
case "custom" /* Custom */:
|
|
4561
|
+
throw new TranscriptionError(
|
|
4562
|
+
"Custom provider requires manual implementation",
|
|
4563
|
+
"invalid_config" /* INVALID_CONFIG */
|
|
4564
|
+
);
|
|
4565
|
+
default:
|
|
4566
|
+
throw new TranscriptionError(
|
|
4567
|
+
`Unsupported provider: ${config.provider}`,
|
|
4568
|
+
"invalid_config" /* INVALID_CONFIG */
|
|
4569
|
+
);
|
|
4570
|
+
}
|
|
4571
|
+
}
|
|
4572
|
+
function createSession(config, sessionConfig) {
|
|
4573
|
+
const provider = createTranscriber(config);
|
|
4574
|
+
return new TranscriptionSession(provider, sessionConfig);
|
|
4575
|
+
}
|
|
4576
|
+
async function quickStart(options = {}) {
|
|
4577
|
+
const {
|
|
4578
|
+
provider: requestedProvider,
|
|
4579
|
+
apiKey,
|
|
4580
|
+
language = "en-US",
|
|
4581
|
+
onTranscript,
|
|
4582
|
+
onError,
|
|
4583
|
+
onStart,
|
|
4584
|
+
onStop,
|
|
4585
|
+
interimResults = true,
|
|
4586
|
+
recordAudio = false
|
|
4587
|
+
} = options;
|
|
4588
|
+
let provider = requestedProvider;
|
|
4589
|
+
if (!provider) {
|
|
4590
|
+
if (apiKey) {
|
|
4591
|
+
provider = "deepgram" /* Deepgram */;
|
|
4592
|
+
} else if (typeof window !== "undefined") {
|
|
4593
|
+
const hasWebSpeech = !!(window.SpeechRecognition || window.webkitSpeechRecognition);
|
|
4594
|
+
if (hasWebSpeech) {
|
|
4595
|
+
provider = "web-speech" /* WebSpeechAPI */;
|
|
4596
|
+
} else {
|
|
4597
|
+
throw new TranscriptionError(
|
|
4598
|
+
"No speech recognition available. Provide an API key for Deepgram or AssemblyAI.",
|
|
4599
|
+
"unsupported_browser" /* UNSUPPORTED_BROWSER */
|
|
4600
|
+
);
|
|
4601
|
+
}
|
|
4602
|
+
} else {
|
|
4603
|
+
throw new TranscriptionError(
|
|
4604
|
+
"Provider must be specified in Node.js environment",
|
|
4605
|
+
"invalid_config" /* INVALID_CONFIG */
|
|
4606
|
+
);
|
|
4607
|
+
}
|
|
4608
|
+
}
|
|
4609
|
+
const transcriptionConfig = {
|
|
4610
|
+
provider,
|
|
4611
|
+
apiKey,
|
|
4612
|
+
language,
|
|
4613
|
+
interimResults
|
|
4614
|
+
};
|
|
4615
|
+
const sessionConfig = {
|
|
4616
|
+
recordAudio
|
|
4617
|
+
};
|
|
4618
|
+
const transcriber = createTranscriber(transcriptionConfig);
|
|
4619
|
+
const session = new TranscriptionSession(transcriber, sessionConfig);
|
|
4620
|
+
if (onTranscript) {
|
|
4621
|
+
transcriber.on("transcript", onTranscript);
|
|
4622
|
+
}
|
|
4623
|
+
if (onError) {
|
|
4624
|
+
transcriber.on("error", onError);
|
|
4625
|
+
}
|
|
4626
|
+
if (onStart) {
|
|
4627
|
+
transcriber.on("start", onStart);
|
|
4628
|
+
}
|
|
4629
|
+
if (onStop) {
|
|
4630
|
+
transcriber.on("stop", onStop);
|
|
4631
|
+
}
|
|
4632
|
+
try {
|
|
4633
|
+
await transcriber.initialize();
|
|
4634
|
+
await session.start();
|
|
4635
|
+
} catch (error) {
|
|
4636
|
+
if (onError && error instanceof TranscriptionError) {
|
|
4637
|
+
onError(error);
|
|
4638
|
+
}
|
|
4639
|
+
throw error;
|
|
4640
|
+
}
|
|
4641
|
+
return session;
|
|
4642
|
+
}
|
|
4643
|
+
function isProviderSupported(provider) {
|
|
4644
|
+
switch (provider) {
|
|
4645
|
+
case "web-speech" /* WebSpeechAPI */:
|
|
4646
|
+
return new WebSpeechProvider({ provider }).isSupported();
|
|
4647
|
+
case "deepgram" /* Deepgram */:
|
|
4648
|
+
return new DeepgramProvider({ provider, apiKey: "test" }).isSupported();
|
|
4649
|
+
case "assemblyai" /* AssemblyAI */:
|
|
4650
|
+
return new AssemblyAIProvider({ provider, apiKey: "test" }).isSupported();
|
|
4651
|
+
default:
|
|
4652
|
+
return false;
|
|
4653
|
+
}
|
|
4654
|
+
}
|
|
4655
|
+
function getSupportedProviders() {
|
|
4656
|
+
const providers = [];
|
|
4657
|
+
if (isProviderSupported("web-speech" /* WebSpeechAPI */)) {
|
|
4658
|
+
providers.push("web-speech" /* WebSpeechAPI */);
|
|
4659
|
+
}
|
|
4660
|
+
if (isProviderSupported("deepgram" /* Deepgram */)) {
|
|
4661
|
+
providers.push("deepgram" /* Deepgram */);
|
|
4662
|
+
}
|
|
4663
|
+
if (isProviderSupported("assemblyai" /* AssemblyAI */)) {
|
|
4664
|
+
providers.push("assemblyai" /* AssemblyAI */);
|
|
4665
|
+
}
|
|
4666
|
+
return providers;
|
|
4667
|
+
}
|
|
4668
|
+
var index_default = {
|
|
4669
|
+
VERSION,
|
|
4670
|
+
LIBRARY_NAME,
|
|
4671
|
+
createTranscriber,
|
|
4672
|
+
createSession,
|
|
4673
|
+
quickStart,
|
|
4674
|
+
isProviderSupported,
|
|
4675
|
+
getSupportedProviders,
|
|
4676
|
+
TranscriptionProvider,
|
|
4677
|
+
WebSpeechProvider,
|
|
4678
|
+
DeepgramProvider,
|
|
4679
|
+
AssemblyAIProvider,
|
|
4680
|
+
TranscriptionSession,
|
|
4681
|
+
SessionManager
|
|
4682
|
+
};
|
|
4683
|
+
|
|
4684
|
+
exports.AssemblyAIProvider = AssemblyAIProvider;
|
|
4685
|
+
exports.AudioBufferManager = AudioBufferManager;
|
|
4686
|
+
exports.AudioEncoding = AudioEncoding;
|
|
4687
|
+
exports.AudioLevelMonitor = AudioLevelMonitor;
|
|
4688
|
+
exports.AudioProcessor = AudioProcessor;
|
|
4689
|
+
exports.AudioRecorder = AudioRecorder;
|
|
4690
|
+
exports.BaseTranscriber = BaseTranscriber;
|
|
4691
|
+
exports.CSVExporter = CSVExporter;
|
|
4692
|
+
exports.DEFAULT_AUDIO_CONFIG = DEFAULT_AUDIO_CONFIG;
|
|
4693
|
+
exports.DEFAULT_SESSION_CONFIG = DEFAULT_SESSION_CONFIG;
|
|
4694
|
+
exports.DEFAULT_TRANSCRIPTION_CONFIG = DEFAULT_TRANSCRIPTION_CONFIG;
|
|
4695
|
+
exports.DeepgramProvider = DeepgramProvider;
|
|
4696
|
+
exports.ErrorCode = ErrorCode;
|
|
4697
|
+
exports.EventEmitter = EventEmitter;
|
|
4698
|
+
exports.JSONExporter = JSONExporter;
|
|
4699
|
+
exports.LIBRARY_NAME = LIBRARY_NAME;
|
|
4700
|
+
exports.LocalStorageAdapter = LocalStorageAdapter;
|
|
4701
|
+
exports.MemoryStorageAdapter = MemoryStorageAdapter;
|
|
4702
|
+
exports.SRTExporter = SRTExporter;
|
|
4703
|
+
exports.SessionManager = SessionManager;
|
|
4704
|
+
exports.SessionState = SessionState;
|
|
4705
|
+
exports.TextExporter = TextExporter;
|
|
4706
|
+
exports.TranscriptionError = TranscriptionError;
|
|
4707
|
+
exports.TranscriptionProvider = TranscriptionProvider;
|
|
4708
|
+
exports.TranscriptionSession = TranscriptionSession;
|
|
4709
|
+
exports.VERSION = VERSION;
|
|
4710
|
+
exports.VTTExporter = VTTExporter;
|
|
4711
|
+
exports.VoiceActivityDetector = VoiceActivityDetector;
|
|
4712
|
+
exports.WebSpeechProvider = WebSpeechProvider;
|
|
4713
|
+
exports.calculateBitrate = calculateBitrate;
|
|
4714
|
+
exports.calculatePeak = calculatePeak;
|
|
4715
|
+
exports.calculateRMS = calculateRMS;
|
|
4716
|
+
exports.cancellableTimeout = cancellableTimeout;
|
|
4717
|
+
exports.checkAudioContextSupport = checkAudioContextSupport;
|
|
4718
|
+
exports.checkMediaDevicesSupport = checkMediaDevicesSupport;
|
|
4719
|
+
exports.checkWebSocketSupport = checkWebSocketSupport;
|
|
4720
|
+
exports.checkWebSpeechAPISupport = checkWebSpeechAPISupport;
|
|
4721
|
+
exports.clamp = clamp;
|
|
4722
|
+
exports.createSession = createSession;
|
|
4723
|
+
exports.createTranscriber = createTranscriber;
|
|
4724
|
+
exports.dbToLinear = dbToLinear;
|
|
4725
|
+
exports.debounce = debounce;
|
|
4726
|
+
exports.deepClone = deepClone;
|
|
4727
|
+
exports.default = index_default;
|
|
4728
|
+
exports.detectBrowserLanguage = detectBrowserLanguage;
|
|
4729
|
+
exports.durationToSamples = durationToSamples;
|
|
4730
|
+
exports.estimateAudioSize = estimateAudioSize;
|
|
4731
|
+
exports.formatAsPlainText = formatAsPlainText;
|
|
4732
|
+
exports.formatConfidence = formatConfidence;
|
|
4733
|
+
exports.formatDuration = formatDuration;
|
|
4734
|
+
exports.formatFileSize = formatFileSize;
|
|
4735
|
+
exports.formatNumber = formatNumber;
|
|
4736
|
+
exports.formatTimestamp = formatTimestamp;
|
|
4737
|
+
exports.formatTranscriptForDisplay = formatTranscriptForDisplay;
|
|
4738
|
+
exports.generateId = generateId;
|
|
4739
|
+
exports.getAudioConstraints = getAudioConstraints;
|
|
4740
|
+
exports.getBestMatchingLanguage = getBestMatchingLanguage;
|
|
4741
|
+
exports.getBrowserInfo = getBrowserInfo;
|
|
4742
|
+
exports.getFullCompatibilityReport = getFullCompatibilityReport;
|
|
4743
|
+
exports.getLanguageName = getLanguageName;
|
|
4744
|
+
exports.getNativeLanguageName = getNativeLanguageName;
|
|
4745
|
+
exports.getOptimalBufferSize = getOptimalBufferSize;
|
|
4746
|
+
exports.getSupportedLanguages = getSupportedLanguages;
|
|
4747
|
+
exports.getSupportedProviders = getSupportedProviders;
|
|
4748
|
+
exports.groupBy = groupBy;
|
|
4749
|
+
exports.isEmpty = isEmpty;
|
|
4750
|
+
exports.isFunction = isFunction;
|
|
4751
|
+
exports.isLanguageSupported = isLanguageSupported;
|
|
4752
|
+
exports.isObject = isObject;
|
|
4753
|
+
exports.isProviderSupported = isProviderSupported;
|
|
4754
|
+
exports.isSilence = isSilence;
|
|
4755
|
+
exports.linearToDb = linearToDb;
|
|
4756
|
+
exports.mergeDeep = mergeDeep;
|
|
4757
|
+
exports.normalizeLanguageCode = normalizeLanguageCode;
|
|
4758
|
+
exports.omit = omit;
|
|
4759
|
+
exports.pick = pick;
|
|
4760
|
+
exports.quickStart = quickStart;
|
|
4761
|
+
exports.retry = retry;
|
|
4762
|
+
exports.round = round;
|
|
4763
|
+
exports.samplesToDuration = samplesToDuration;
|
|
4764
|
+
exports.setIntervalAsync = setIntervalAsync;
|
|
4765
|
+
exports.sleep = sleep;
|
|
4766
|
+
exports.throttle = throttle;
|
|
4767
|
+
exports.timeout = timeout;
|
|
4768
|
+
exports.truncateText = truncateText;
|
|
4769
|
+
exports.validateApiKey = validateApiKey;
|
|
4770
|
+
exports.validateAudioConfig = validateAudioConfig;
|
|
4771
|
+
exports.validateAudioFormat = validateAudioFormat;
|
|
4772
|
+
exports.validateLanguageCode = validateLanguageCode;
|
|
4773
|
+
exports.validateSessionConfig = validateSessionConfig;
|
|
4774
|
+
exports.validateTranscriptionConfig = validateTranscriptionConfig;
|
|
4775
|
+
exports.waitFor = waitFor;
|
|
4776
|
+
//# sourceMappingURL=index.js.map
|
|
4777
|
+
//# sourceMappingURL=index.js.map
|