@sridhar-mani/whisper-web-transcriber 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,433 @@
1
+ class WhisperTranscriber {
2
+ constructor(config = {}) {
3
+ this.instance = null;
4
+ this.mediaRecorder = null;
5
+ this.audioContext = null;
6
+ this.isRecording = false;
7
+ this.audio = null;
8
+ this.audio0 = null;
9
+ this.Module = null;
10
+ this.modelLoaded = false;
11
+ this.initPromise = null;
12
+ this.config = {
13
+ modelUrl: config.modelUrl || WhisperTranscriber.MODEL_URLS[config.modelSize || 'base-en-q5_1'],
14
+ modelSize: config.modelSize || 'base-en-q5_1',
15
+ sampleRate: config.sampleRate || 16000,
16
+ audioIntervalMs: config.audioIntervalMs || 5000,
17
+ onTranscription: config.onTranscription || (() => { }),
18
+ onProgress: config.onProgress || (() => { }),
19
+ onStatus: config.onStatus || (() => { }),
20
+ debug: config.debug || false,
21
+ };
22
+ // Auto-register COI service worker if needed
23
+ this.registerServiceWorkerIfNeeded();
24
+ }
25
+ log(message) {
26
+ if (this.config.debug) {
27
+ console.log('[WhisperTranscriber]', message);
28
+ }
29
+ }
30
+ async registerServiceWorkerIfNeeded() {
31
+ // Check if we need COI and service worker is available
32
+ if (!window.crossOriginIsolated) {
33
+ // For CDN usage, we cannot auto-register service workers due to same-origin policy
34
+ // Instead, provide instructions or helper method
35
+ if (window.COI_SERVICEWORKER_CODE) {
36
+ console.warn('[WhisperTranscriber] SharedArrayBuffer is not available. ' +
37
+ 'To enable it, you need to serve your site with COOP/COEP headers or use a service worker.\n' +
38
+ 'You can get the service worker code by calling: transcriber.getServiceWorkerCode()');
39
+ }
40
+ }
41
+ }
42
+ /**
43
+ * Returns the COI service worker code that users need to save and serve from their domain
44
+ */
45
+ getServiceWorkerCode() {
46
+ if (window.COI_SERVICEWORKER_CODE) {
47
+ return window.COI_SERVICEWORKER_CODE;
48
+ }
49
+ return null;
50
+ }
51
+ /**
52
+ * Helper to generate instructions for setting up Cross-Origin Isolation
53
+ */
54
+ getCrossOriginIsolationInstructions() {
55
+ const swCode = this.getServiceWorkerCode();
56
+ if (!window.crossOriginIsolated) {
57
+ return `
58
+ Cross-Origin Isolation Setup Required
59
+ =====================================
60
+
61
+ WhisperTranscriber requires SharedArrayBuffer, which needs Cross-Origin Isolation.
62
+
63
+ Option 1: Server Headers (Recommended)
64
+ --------------------------------------
65
+ Configure your server to send these headers:
66
+ Cross-Origin-Embedder-Policy: require-corp
67
+ Cross-Origin-Opener-Policy: same-origin
68
+
69
+ Option 2: Service Worker
70
+ ------------------------
71
+ 1. Save the following code as 'coi-serviceworker.js' in your website root:
72
+
73
+ ${swCode ? '--- START SERVICE WORKER CODE ---\n' + swCode + '\n--- END SERVICE WORKER CODE ---' : '[Service worker code not available]'}
74
+
75
+ 2. Register the service worker by adding this to your HTML:
76
+ <script src="/coi-serviceworker.js"></script>
77
+
78
+ 3. Reload the page after registration.
79
+
80
+ Current Status:
81
+ - crossOriginIsolated: ${window.crossOriginIsolated}
82
+ - SharedArrayBuffer available: ${typeof SharedArrayBuffer !== 'undefined'}
83
+ `.trim();
84
+ }
85
+ return 'Cross-Origin Isolation is already enabled! No action needed.';
86
+ }
87
+ getScriptBasePath() {
88
+ // Always use local src/ directory for all assets
89
+ return '/src/';
90
+ }
91
+ async createWorkerFromURL(url) {
92
+ // Fetch the worker script
93
+ const response = await fetch(url);
94
+ const workerCode = await response.text();
95
+ // Create a blob URL for the worker
96
+ const blob = new Blob([workerCode], { type: 'application/javascript' });
97
+ const blobUrl = URL.createObjectURL(blob);
98
+ return new Worker(blobUrl);
99
+ }
100
+ async loadWasmModule() {
101
+ // Check if we have inlined worker code
102
+ if (window.LIBSTREAM_WORKER_CODE) {
103
+ // Use inlined worker
104
+ this.log('Using inlined worker code');
105
+ const workerBlob = new Blob([window.LIBSTREAM_WORKER_CODE], { type: 'application/javascript' });
106
+ const workerBlobUrl = URL.createObjectURL(workerBlob);
107
+ window.__whisperWorkerBlobUrl = workerBlobUrl;
108
+ this.log('Worker blob URL created from inlined code');
109
+ }
110
+ else {
111
+ // Fallback to fetching worker
112
+ const basePath = this.getScriptBasePath();
113
+ const workerUrl = basePath + 'libstream.worker.js';
114
+ try {
115
+ // Pre-fetch and convert worker to blob URL
116
+ const response = await fetch(workerUrl);
117
+ const workerCode = await response.text();
118
+ const blob = new Blob([workerCode], { type: 'application/javascript' });
119
+ const blobUrl = URL.createObjectURL(blob);
120
+ // Store the blob URL for later use
121
+ window.__whisperWorkerBlobUrl = blobUrl;
122
+ this.log('Worker script loaded and blob URL created');
123
+ }
124
+ catch (error) {
125
+ this.log('Failed to pre-fetch worker: ' + error);
126
+ // Continue anyway, it might work with direct loading
127
+ }
128
+ }
129
+ return new Promise((resolve, reject) => {
130
+ // Configure Module before the script loads
131
+ window.Module = {
132
+ locateFile: (path) => {
133
+ // If it's the worker and we have a blob URL, use it
134
+ if (path === 'libstream.worker.js' && window.__whisperWorkerBlobUrl) {
135
+ return window.__whisperWorkerBlobUrl;
136
+ }
137
+ return this.getScriptBasePath() + path;
138
+ },
139
+ onRuntimeInitialized: () => {
140
+ this.log('WASM runtime initialized');
141
+ // The runtime is initialized, we can resolve immediately
142
+ // The Module will set up the whisper functions
143
+ setTimeout(() => {
144
+ const module = window.Module;
145
+ if (module) {
146
+ this.Module = module;
147
+ // Set up the whisper functions if they don't exist
148
+ if (!module.init) {
149
+ module.init = module.cwrap('init', 'number', ['string']);
150
+ }
151
+ if (!module.set_audio) {
152
+ module.set_audio = module.cwrap('set_audio', '', ['number', 'array']);
153
+ }
154
+ if (!module.get_transcribed) {
155
+ module.get_transcribed = module.cwrap('get_transcribed', 'string', []);
156
+ }
157
+ if (!module.set_status) {
158
+ module.set_status = module.cwrap('set_status', '', ['string']);
159
+ }
160
+ this.log('WASM module loaded and functions initialized');
161
+ resolve();
162
+ }
163
+ else {
164
+ reject(new Error('Module not available after runtime initialized'));
165
+ }
166
+ }, 100);
167
+ }
168
+ };
169
+ // Load the WASM module
170
+ if (window.LIBSTREAM_CODE) {
171
+ // Use inlined libstream code
172
+ this.log('Using inlined libstream code');
173
+ const scriptBlob = new Blob([window.LIBSTREAM_CODE], { type: 'application/javascript' });
174
+ const scriptUrl = URL.createObjectURL(scriptBlob);
175
+ const script = document.createElement('script');
176
+ script.src = scriptUrl;
177
+ script.onerror = () => reject(new Error('Failed to load WASM module'));
178
+ document.head.appendChild(script);
179
+ }
180
+ else {
181
+ // Load the WASM module dynamically
182
+ const script = document.createElement('script');
183
+ script.src = this.getScriptBasePath() + 'libstream.js';
184
+ script.onerror = () => reject(new Error('Failed to load WASM module'));
185
+ document.head.appendChild(script);
186
+ }
187
+ });
188
+ }
189
+ async loadHelpers() {
190
+ if (window.HELPERS_CODE) {
191
+ // Use inlined helpers code
192
+ this.log('Using inlined helpers code');
193
+ const scriptBlob = new Blob([window.HELPERS_CODE], { type: 'application/javascript' });
194
+ const scriptUrl = URL.createObjectURL(scriptBlob);
195
+ const script = document.createElement('script');
196
+ script.src = scriptUrl;
197
+ return new Promise((resolve, reject) => {
198
+ script.onload = () => resolve();
199
+ script.onerror = () => reject(new Error('Failed to load helpers'));
200
+ document.head.appendChild(script);
201
+ });
202
+ }
203
+ else {
204
+ // Load helpers.js normally
205
+ const script = document.createElement('script');
206
+ script.src = this.getScriptBasePath() + 'helpers.js';
207
+ return new Promise((resolve, reject) => {
208
+ script.onload = () => resolve();
209
+ script.onerror = () => reject(new Error('Failed to load helpers'));
210
+ document.head.appendChild(script);
211
+ });
212
+ }
213
+ }
214
+ async loadCOIServiceWorker() {
215
+ // Check if SharedArrayBuffer is already available
216
+ if (typeof SharedArrayBuffer !== 'undefined') {
217
+ this.log('SharedArrayBuffer already available');
218
+ return;
219
+ }
220
+ // Try to load coi-serviceworker.js
221
+ const basePath = this.getScriptBasePath();
222
+ const script = document.createElement('script');
223
+ script.src = basePath + 'coi-serviceworker.js';
224
+ return new Promise((resolve) => {
225
+ script.onload = () => {
226
+ this.log('COI service worker loaded');
227
+ resolve();
228
+ };
229
+ script.onerror = () => {
230
+ this.log('Failed to load COI service worker - SharedArrayBuffer may not be available');
231
+ resolve(); // Continue anyway
232
+ };
233
+ document.head.appendChild(script);
234
+ });
235
+ }
236
+ async initialize() {
237
+ if (this.initPromise) {
238
+ return this.initPromise;
239
+ }
240
+ this.initPromise = (async () => {
241
+ try {
242
+ // Try to load COI service worker first for SharedArrayBuffer support
243
+ await this.loadCOIServiceWorker();
244
+ // Set up global variables required by helpers.js
245
+ window.dbVersion = 1;
246
+ window.dbName = 'whisper.transcriber.models';
247
+ // Don't override indexedDB, it's already a global property
248
+ // Load helpers first
249
+ await this.loadHelpers();
250
+ this.log('Helpers loaded');
251
+ // Then load WASM module
252
+ await this.loadWasmModule();
253
+ this.log('WASM module initialized');
254
+ this.config.onStatus('Ready to load model');
255
+ }
256
+ catch (error) {
257
+ this.log('Failed to initialize: ' + error);
258
+ throw error;
259
+ }
260
+ })();
261
+ return this.initPromise;
262
+ }
263
+ async loadModel() {
264
+ if (this.modelLoaded) {
265
+ this.log('Model already loaded');
266
+ return;
267
+ }
268
+ await this.initialize();
269
+ return new Promise((resolve, reject) => {
270
+ const url = this.config.modelUrl;
271
+ const size_mb = WhisperTranscriber.MODEL_SIZES[this.config.modelSize];
272
+ this.config.onStatus('Loading model...');
273
+ const storeFS = (fname, buf) => {
274
+ try {
275
+ this.Module.FS_unlink(fname);
276
+ }
277
+ catch (e) {
278
+ // File doesn't exist, ignore
279
+ }
280
+ this.Module.FS_createDataFile("/", fname, buf, true, true);
281
+ this.log(`Model stored: ${fname}, size: ${buf.length}`);
282
+ this.modelLoaded = true;
283
+ this.config.onStatus('Model loaded successfully');
284
+ resolve();
285
+ };
286
+ const cbProgress = (progress) => {
287
+ this.config.onProgress(Math.round(progress * 100));
288
+ };
289
+ const cbCancel = () => {
290
+ this.config.onStatus('Model loading cancelled');
291
+ reject(new Error('Model loading cancelled'));
292
+ };
293
+ const cbPrint = (msg) => {
294
+ this.log(msg);
295
+ };
296
+ // Use the global loadRemote function from helpers.js
297
+ window.loadRemote(url, 'whisper.bin', size_mb, cbProgress, storeFS, cbCancel, cbPrint);
298
+ });
299
+ }
300
+ async startRecording() {
301
+ if (!this.modelLoaded) {
302
+ throw new Error('Model not loaded. Call loadModel() first.');
303
+ }
304
+ if (this.isRecording) {
305
+ this.log('Already recording');
306
+ return;
307
+ }
308
+ // Initialize whisper instance
309
+ if (!this.instance) {
310
+ // Check if init function exists, otherwise use cwrap
311
+ const init = this.Module.init || this.Module.cwrap('init', 'number', ['string']);
312
+ this.instance = init('whisper.bin');
313
+ if (!this.instance) {
314
+ throw new Error('Failed to initialize Whisper');
315
+ }
316
+ this.log('Whisper instance initialized');
317
+ }
318
+ // Create audio context
319
+ this.audioContext = new AudioContext({
320
+ sampleRate: this.config.sampleRate,
321
+ // @ts-ignore - These properties might not be in the type definition
322
+ channelCount: 1,
323
+ echoCancellation: false,
324
+ autoGainControl: true,
325
+ noiseSuppression: true,
326
+ });
327
+ const set_status = this.Module.set_status || this.Module.cwrap('set_status', '', ['string']);
328
+ set_status("");
329
+ this.isRecording = true;
330
+ this.config.onStatus('Recording...');
331
+ const chunks = [];
332
+ try {
333
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
334
+ this.mediaRecorder = new MediaRecorder(stream);
335
+ this.mediaRecorder.ondataavailable = (e) => {
336
+ chunks.push(e.data);
337
+ const blob = new Blob(chunks, { type: 'audio/ogg; codecs=opus' });
338
+ const reader = new FileReader();
339
+ reader.onload = (event) => {
340
+ const buf = new Uint8Array(event.target.result);
341
+ if (!this.audioContext)
342
+ return;
343
+ this.audioContext.decodeAudioData(buf.buffer, (audioBuffer) => {
344
+ const offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
345
+ const source = offlineContext.createBufferSource();
346
+ source.buffer = audioBuffer;
347
+ source.connect(offlineContext.destination);
348
+ source.start(0);
349
+ offlineContext.startRendering().then((renderedBuffer) => {
350
+ this.audio = renderedBuffer.getChannelData(0);
351
+ const audioAll = new Float32Array(this.audio0 == null ? this.audio.length : this.audio0.length + this.audio.length);
352
+ if (this.audio0 != null) {
353
+ audioAll.set(this.audio0, 0);
354
+ }
355
+ audioAll.set(this.audio, this.audio0 == null ? 0 : this.audio0.length);
356
+ if (this.instance) {
357
+ const set_audio = this.Module.set_audio || this.Module.cwrap('set_audio', '', ['number', 'array']);
358
+ set_audio(this.instance, audioAll);
359
+ }
360
+ });
361
+ });
362
+ };
363
+ reader.readAsArrayBuffer(blob);
364
+ };
365
+ this.mediaRecorder.onstop = () => {
366
+ if (this.isRecording) {
367
+ setTimeout(() => this.startRecording(), 0);
368
+ }
369
+ };
370
+ this.mediaRecorder.start(this.config.audioIntervalMs);
371
+ // Start transcription polling
372
+ this.startTranscriptionPolling();
373
+ }
374
+ catch (error) {
375
+ this.isRecording = false;
376
+ this.config.onStatus('Error: ' + error.message);
377
+ throw error;
378
+ }
379
+ }
380
+ startTranscriptionPolling() {
381
+ const interval = setInterval(() => {
382
+ if (!this.isRecording) {
383
+ clearInterval(interval);
384
+ return;
385
+ }
386
+ const get_transcribed = this.Module.get_transcribed || this.Module.cwrap('get_transcribed', 'string', []);
387
+ const transcribed = get_transcribed();
388
+ if (transcribed != null && transcribed.length > 1) {
389
+ this.config.onTranscription(transcribed);
390
+ }
391
+ }, 100);
392
+ }
393
+ stopRecording() {
394
+ if (!this.isRecording) {
395
+ this.log('Not recording');
396
+ return;
397
+ }
398
+ const set_status = this.Module.set_status || this.Module.cwrap('set_status', '', ['string']);
399
+ set_status("paused");
400
+ this.isRecording = false;
401
+ this.audio0 = null;
402
+ this.audio = null;
403
+ if (this.mediaRecorder) {
404
+ this.mediaRecorder.stop();
405
+ this.mediaRecorder = null;
406
+ }
407
+ if (this.audioContext) {
408
+ this.audioContext.close();
409
+ this.audioContext = null;
410
+ }
411
+ this.config.onStatus('Stopped');
412
+ }
413
+ destroy() {
414
+ this.stopRecording();
415
+ this.instance = null;
416
+ this.Module = null;
417
+ this.modelLoaded = false;
418
+ }
419
+ }
420
+ WhisperTranscriber.MODEL_URLS = {
421
+ 'tiny.en': 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin',
422
+ 'base.en': 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin',
423
+ 'tiny-en-q5_1': 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin',
424
+ 'base-en-q5_1': 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin',
425
+ };
426
+ WhisperTranscriber.MODEL_SIZES = {
427
+ 'tiny.en': 75,
428
+ 'base.en': 142,
429
+ 'tiny-en-q5_1': 31,
430
+ 'base-en-q5_1': 57,
431
+ };
432
+
433
+ export { WhisperTranscriber };