@sridhar-mani/whisper-web-transcriber 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +323 -0
- package/dist/index.bundled.js +441 -0
- package/dist/index.bundled.min.js +1 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.esm.js +433 -0
- package/dist/index.js +441 -0
- package/dist/index.min.js +1 -0
- package/package.json +57 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Whisper Web Transcriber
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# Whisper Web Transcriber
|
|
2
|
+
|
|
3
|
+
Real-time audio transcription in the browser using OpenAI's Whisper model via WebAssembly. This package provides an easy-to-use API for integrating speech-to-text capabilities into web applications without any server-side processing.
|
|
4
|
+
|
|
5
|
+
**[Live Demo](https://demoproject1-jz33savrh-perception30s-projects.vercel.app)** 🎙️ | **[Live Usage on Real Site](https://interviewhacker.ai/)** 🚀
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- 🎙️ Real-time audio transcription from microphone
|
|
10
|
+
- 🌐 Runs entirely in the browser (no server required)
|
|
11
|
+
- 📦 Multiple Whisper model options (tiny, base, quantized versions)
|
|
12
|
+
- 💾 Automatic model caching in IndexedDB
|
|
13
|
+
- 🔧 Simple, promise-based API
|
|
14
|
+
- 📱 Works on all modern browsers with WebAssembly support
|
|
15
|
+
- 🌍 Platform-independent (same WASM works on all OS)
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
### NPM Package
|
|
20
|
+
```bash
|
|
21
|
+
npm install whisper-web-transcriber
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Or using yarn:
|
|
25
|
+
```bash
|
|
26
|
+
yarn add whisper-web-transcriber
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### CDN Usage (Bundled Version)
|
|
30
|
+
```html
|
|
31
|
+
<!-- Single file with all dependencies included -->
|
|
32
|
+
<script src="https://unpkg.com/whisper-web-transcriber/dist/index.bundled.min.js"></script>
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
### Using NPM Package
|
|
39
|
+
```javascript
|
|
40
|
+
import { WhisperTranscriber } from 'whisper-web-transcriber';
|
|
41
|
+
|
|
42
|
+
const transcriber = new WhisperTranscriber({
|
|
43
|
+
modelSize: 'base-en-q5_1',
|
|
44
|
+
onTranscription: (text) => {
|
|
45
|
+
console.log('Transcribed:', text);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
await transcriber.loadModel();
|
|
50
|
+
await transcriber.startRecording();
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Using CDN (Bundled Version)
|
|
54
|
+
```html
|
|
55
|
+
<script src="https://unpkg.com/whisper-web-transcriber/dist/index.bundled.min.js"></script>
|
|
56
|
+
<script>
|
|
57
|
+
const transcriber = new WhisperTranscriber.WhisperTranscriber({
|
|
58
|
+
modelSize: 'base-en-q5_1',
|
|
59
|
+
onTranscription: (text) => {
|
|
60
|
+
console.log('Transcribed:', text);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
transcriber.loadModel().then(() => {
|
|
65
|
+
transcriber.startRecording();
|
|
66
|
+
});
|
|
67
|
+
</script>
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## API Reference
|
|
71
|
+
|
|
72
|
+
### Constructor Options
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
interface WhisperConfig {
|
|
76
|
+
modelUrl?: string; // Custom model URL (optional)
|
|
77
|
+
modelSize?: 'tiny.en' | 'base.en' | 'tiny-en-q5_1' | 'base-en-q5_1';
|
|
78
|
+
sampleRate?: number; // Audio sample rate (default: 16000)
|
|
79
|
+
audioIntervalMs?: number; // Audio processing interval (default: 5000ms)
|
|
80
|
+
onTranscription?: (text: string) => void;
|
|
81
|
+
onProgress?: (progress: number) => void;
|
|
82
|
+
onStatus?: (status: string) => void;
|
|
83
|
+
debug?: boolean; // Enable debug logging (default: false)
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Methods
|
|
88
|
+
|
|
89
|
+
- `loadModel(): Promise<void>` - Downloads and initializes the Whisper model
|
|
90
|
+
- `startRecording(): Promise<void>` - Starts microphone recording and transcription
|
|
91
|
+
- `stopRecording(): void` - Stops recording
|
|
92
|
+
- `destroy(): void` - Cleanup resources
|
|
93
|
+
- `getServiceWorkerCode(): string | null` - Returns the COI service worker code (bundled version only)
|
|
94
|
+
- `getCrossOriginIsolationInstructions(): string` - Returns setup instructions for Cross-Origin Isolation
|
|
95
|
+
|
|
96
|
+
## Model Options
|
|
97
|
+
|
|
98
|
+
| Model | Size | Description |
|
|
99
|
+
|-------|------|-------------|
|
|
100
|
+
| `tiny.en` | 75 MB | Fastest, lower accuracy |
|
|
101
|
+
| `base.en` | 142 MB | Better accuracy, slower |
|
|
102
|
+
| `tiny-en-q5_1` | 31 MB | Quantized tiny model, smaller size |
|
|
103
|
+
| `base-en-q5_1` | 57 MB | Quantized base model, good balance |
|
|
104
|
+
|
|
105
|
+
## Browser Requirements
|
|
106
|
+
|
|
107
|
+
- WebAssembly support
|
|
108
|
+
- SharedArrayBuffer support (requires Cross-Origin Isolation)
|
|
109
|
+
- Microphone access permission
|
|
110
|
+
- Modern browser (Chrome 90+, Firefox 89+, Safari 15+, Edge 90+)
|
|
111
|
+
|
|
112
|
+
## Cross-Origin Isolation Setup
|
|
113
|
+
|
|
114
|
+
WhisperTranscriber requires SharedArrayBuffer, which needs Cross-Origin Isolation. You have two options:
|
|
115
|
+
|
|
116
|
+
### Option 1: Server Headers (Recommended)
|
|
117
|
+
Configure your server to send these headers:
|
|
118
|
+
```
|
|
119
|
+
Cross-Origin-Embedder-Policy: require-corp
|
|
120
|
+
Cross-Origin-Opener-Policy: same-origin
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Option 2: Service Worker
|
|
124
|
+
If you can't modify server headers, use the included service worker:
|
|
125
|
+
|
|
126
|
+
**For NPM users:**
|
|
127
|
+
```html
|
|
128
|
+
<!-- Include at the top of your HTML -->
|
|
129
|
+
<script src="node_modules/whisper-web-transcriber/dist/coi-serviceworker.js"></script>
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**For CDN users:**
|
|
133
|
+
```javascript
|
|
134
|
+
// Get the service worker code
|
|
135
|
+
const transcriber = new WhisperTranscriber.WhisperTranscriber();
|
|
136
|
+
const swCode = transcriber.getServiceWorkerCode();
|
|
137
|
+
|
|
138
|
+
// Save swCode as 'coi-serviceworker.js' on YOUR domain
|
|
139
|
+
// Then include it in your HTML:
|
|
140
|
+
// <script src="/coi-serviceworker.js"></script>
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Important:** Service workers must be served from the same origin as your page. CDN users cannot directly use the service worker from unpkg.
|
|
144
|
+
|
|
145
|
+
### Serving with proper headers
|
|
146
|
+
|
|
147
|
+
**For local development:**
|
|
148
|
+
```bash
|
|
149
|
+
npm run demo
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**For production (examples):**
|
|
153
|
+
|
|
154
|
+
Vercel (`vercel.json`):
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"headers": [
|
|
158
|
+
{
|
|
159
|
+
"source": "/(.*)",
|
|
160
|
+
"headers": [
|
|
161
|
+
{
|
|
162
|
+
"key": "Cross-Origin-Embedder-Policy",
|
|
163
|
+
"value": "require-corp"
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"key": "Cross-Origin-Opener-Policy",
|
|
167
|
+
"value": "same-origin"
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Nginx:
|
|
176
|
+
```nginx
|
|
177
|
+
add_header Cross-Origin-Embedder-Policy "require-corp" always;
|
|
178
|
+
add_header Cross-Origin-Opener-Policy "same-origin" always;
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Complete Examples
|
|
182
|
+
|
|
183
|
+
### Example 1: Using NPM Package
|
|
184
|
+
|
|
185
|
+
```html
|
|
186
|
+
<!DOCTYPE html>
|
|
187
|
+
<html>
|
|
188
|
+
<head>
|
|
189
|
+
<title>Whisper Transcriber - NPM Version</title>
|
|
190
|
+
<!-- Include service worker for Cross-Origin Isolation -->
|
|
191
|
+
<script src="node_modules/whisper-web-transcriber/dist/coi-serviceworker.js"></script>
|
|
192
|
+
</head>
|
|
193
|
+
<body>
|
|
194
|
+
<button id="start">Start Recording</button>
|
|
195
|
+
<button id="stop">Stop Recording</button>
|
|
196
|
+
<div id="transcription"></div>
|
|
197
|
+
|
|
198
|
+
<script type="module">
|
|
199
|
+
import { WhisperTranscriber } from './node_modules/whisper-web-transcriber/dist/index.esm.js';
|
|
200
|
+
|
|
201
|
+
const transcriber = new WhisperTranscriber({
|
|
202
|
+
modelSize: 'tiny-en-q5_1',
|
|
203
|
+
onTranscription: (text) => {
|
|
204
|
+
document.getElementById('transcription').textContent += text + ' ';
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
document.getElementById('start').onclick = async () => {
|
|
209
|
+
await transcriber.loadModel();
|
|
210
|
+
await transcriber.startRecording();
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
document.getElementById('stop').onclick = () => {
|
|
214
|
+
transcriber.stopRecording();
|
|
215
|
+
};
|
|
216
|
+
</script>
|
|
217
|
+
</body>
|
|
218
|
+
</html>
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Example 2: Using CDN (Bundled Version)
|
|
222
|
+
|
|
223
|
+
```html
|
|
224
|
+
<!DOCTYPE html>
|
|
225
|
+
<html>
|
|
226
|
+
<head>
|
|
227
|
+
<title>Whisper Transcriber - CDN Version</title>
|
|
228
|
+
<!-- Note: You still need to handle Cross-Origin Isolation -->
|
|
229
|
+
<!-- Either configure server headers OR save and include the service worker -->
|
|
230
|
+
</head>
|
|
231
|
+
<body>
|
|
232
|
+
<button id="start">Start Recording</button>
|
|
233
|
+
<button id="stop">Stop Recording</button>
|
|
234
|
+
<div id="transcription"></div>
|
|
235
|
+
|
|
236
|
+
<!-- Single script include -->
|
|
237
|
+
<script src="https://unpkg.com/whisper-web-transcriber/dist/index.bundled.min.js"></script>
|
|
238
|
+
<script>
|
|
239
|
+
const transcriber = new WhisperTranscriber.WhisperTranscriber({
|
|
240
|
+
modelSize: 'tiny-en-q5_1',
|
|
241
|
+
onTranscription: (text) => {
|
|
242
|
+
document.getElementById('transcription').textContent += text + ' ';
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// Check if Cross-Origin Isolation is enabled
|
|
247
|
+
if (!window.crossOriginIsolated) {
|
|
248
|
+
console.log(transcriber.getCrossOriginIsolationInstructions());
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
document.getElementById('start').onclick = async () => {
|
|
252
|
+
await transcriber.loadModel();
|
|
253
|
+
await transcriber.startRecording();
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
document.getElementById('stop').onclick = () => {
|
|
257
|
+
transcriber.stopRecording();
|
|
258
|
+
};
|
|
259
|
+
</script>
|
|
260
|
+
</body>
|
|
261
|
+
</html>
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
## Bundled vs Standard Version
|
|
266
|
+
|
|
267
|
+
### Bundled Version (`index.bundled.js`)
|
|
268
|
+
- ✅ **Single file** - All workers and dependencies included
|
|
269
|
+
- ✅ **CDN-friendly** - No CORS issues with web workers
|
|
270
|
+
- ✅ **Zero configuration** - Works out of the box (except for Cross-Origin Isolation)
|
|
271
|
+
- ❌ **Larger initial download** - ~220KB uncompressed, ~95KB minified
|
|
272
|
+
- 📦 **Best for**: Quick prototypes, CDN usage, simple deployments
|
|
273
|
+
|
|
274
|
+
### Standard Version (`index.js`)
|
|
275
|
+
- ✅ **Smaller initial size** - Core library only
|
|
276
|
+
- ✅ **Modular loading** - Workers loaded on demand
|
|
277
|
+
- ❌ **Requires all files** - Must serve worker files from same origin
|
|
278
|
+
- ❌ **More complex setup** - Need to copy files from node_modules
|
|
279
|
+
- 📦 **Best for**: Production apps with bundlers, optimized loading
|
|
280
|
+
|
|
281
|
+
## Performance Considerations
|
|
282
|
+
|
|
283
|
+
- Transcription is CPU-intensive
|
|
284
|
+
- Larger models provide better accuracy but require more processing power
|
|
285
|
+
- Quantized models (Q5_1) offer good balance between size and quality
|
|
286
|
+
- First-time model loading may take time (models are cached afterward)
|
|
287
|
+
|
|
288
|
+
## Troubleshooting
|
|
289
|
+
|
|
290
|
+
### "SharedArrayBuffer is not defined"
|
|
291
|
+
You need to enable Cross-Origin Isolation. See the [Cross-Origin Isolation Setup](#cross-origin-isolation-setup) section.
|
|
292
|
+
|
|
293
|
+
### "Failed to load worker" when using CDN
|
|
294
|
+
Use the bundled version (`index.bundled.min.js`) instead of the standard version.
|
|
295
|
+
|
|
296
|
+
### "Microphone access denied"
|
|
297
|
+
Ensure your site is served over HTTPS (or localhost) and the user has granted microphone permissions.
|
|
298
|
+
|
|
299
|
+
### Service worker not working
|
|
300
|
+
- Service workers must be served from the same origin as your page
|
|
301
|
+
- Check browser console for specific error messages
|
|
302
|
+
- Ensure the service worker file is accessible at the correct path
|
|
303
|
+
|
|
304
|
+
## Technical Details
|
|
305
|
+
|
|
306
|
+
Built using:
|
|
307
|
+
- [whisper.cpp](https://github.com/ggerganov/whisper.cpp) compiled to WebAssembly
|
|
308
|
+
- Web Audio API for microphone access
|
|
309
|
+
- IndexedDB for model caching
|
|
310
|
+
- Service Worker for Cross-Origin Isolation
|
|
311
|
+
|
|
312
|
+
## License
|
|
313
|
+
|
|
314
|
+
MIT
|
|
315
|
+
|
|
316
|
+
## Contributing
|
|
317
|
+
|
|
318
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
319
|
+
|
|
320
|
+
## Acknowledgments
|
|
321
|
+
|
|
322
|
+
- [whisper.cpp](https://github.com/ggerganov/whisper.cpp) by Georgi Gerganov
|
|
323
|
+
- [OpenAI Whisper](https://github.com/openai/whisper) for the original model
|