@timur00kh/whisper.wasm 0.0.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +158 -22
- package/dist/audio/AudioConverter.d.ts +31 -0
- package/dist/audio/AudioConverter.d.ts.map +1 -0
- package/dist/audio/index.d.ts +10 -0
- package/dist/audio/index.d.ts.map +1 -0
- package/dist/audio/types.d.ts +93 -0
- package/dist/audio/types.d.ts.map +1 -0
- package/dist/index.cjs.js +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.es.js +596 -244
- package/dist/index.umd.js +12 -12
- package/dist/libmain-CWYJvMY5.js +13 -0
- package/dist/libmain-D9-QM3iM.mjs +2198 -0
- package/dist/utils/timeoutError.d.ts +5 -0
- package/dist/utils/timeoutError.d.ts.map +1 -0
- package/dist/whisper/TranscriptionSession.d.ts +6 -0
- package/dist/whisper/TranscriptionSession.d.ts.map +1 -1
- package/dist/whisper/WhisperWasmService.d.ts +3 -1
- package/dist/whisper/WhisperWasmService.d.ts.map +1 -1
- package/package.json +7 -4
- package/dist/libmain-D50HCaHR.js +0 -13
- package/dist/libmain-DyRJqz-4.mjs +0 -2198
package/LICENSE
CHANGED
package/README.md
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
# Whisper.wasm
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://github.com/timur00kh/whisper.wasm/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/@timur00kh/whisper.wasm)
|
|
5
|
+
[](https://www.npmjs.com/package/@timur00kh/whisper.wasm)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://github.com/timur00kh/whisper.wasm)
|
|
8
|
+
[](https://github.com/timur00kh/whisper.wasm/issues)
|
|
9
|
+
[](https://github.com/timur00kh/whisper.wasm/releases)
|
|
10
|
+
[](https://timur00kh.github.io/whisper.wasm/)
|
|
11
|
+
|
|
12
|
+
A TypeScript wrapper for [whisper.cpp](https://github.com/ggml-org/whisper.cpp) that brings OpenAI's Whisper speech recognition to the browser using WebAssembly.
|
|
13
|
+
|
|
14
|
+
> Note: Node.js support is experimental / untested at the moment. (The core WASM layer may work, but browser-specific helpers like the AudioConverter require Web APIs.)
|
|
4
15
|
|
|
5
16
|
## Features
|
|
6
17
|
|
|
@@ -8,10 +19,10 @@ A TypeScript wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
|
|
|
8
19
|
- ⚡ **WebAssembly performance** - runs directly in the browser
|
|
9
20
|
- 🌍 **Multi-language support** with automatic language detection
|
|
10
21
|
- 🔄 **Translation capabilities** - translate speech to English
|
|
11
|
-
- 📱 **Cross-platform** -
|
|
22
|
+
- 📱 **Cross-platform** - browser-first; Node.js is experimental / untested
|
|
12
23
|
- 🧠 **Multiple model sizes** - from tiny to large models
|
|
13
24
|
- 🎯 **Streaming transcription** - real-time audio processing
|
|
14
|
-
-
|
|
25
|
+
- 🎵 **Audio conversion helpers (browser-only)** - convert files / mic / `<audio>` to 16kHz `Float32Array` for Whisper
|
|
15
26
|
|
|
16
27
|
## Installation
|
|
17
28
|
|
|
@@ -24,11 +35,11 @@ npm install @timur00kh/whisper.wasm@canary
|
|
|
24
35
|
### Basic Usage
|
|
25
36
|
|
|
26
37
|
```typescript
|
|
27
|
-
import { WhisperWasmService, ModelManager } from '@timur00kh/whisper.wasm';
|
|
38
|
+
import { WhisperWasmService, ModelManager, convertFromFile } from '@timur00kh/whisper.wasm';
|
|
28
39
|
|
|
29
40
|
// Initialize the service
|
|
30
41
|
const whisper = new WhisperWasmService({ logLevel: 1 });
|
|
31
|
-
const modelManager = new ModelManager();
|
|
42
|
+
const modelManager = new ModelManager({ logLevel: 1 });
|
|
32
43
|
|
|
33
44
|
// Check WASM support
|
|
34
45
|
const isSupported = await whisper.checkWasmSupport();
|
|
@@ -37,14 +48,18 @@ if (!isSupported) {
|
|
|
37
48
|
}
|
|
38
49
|
|
|
39
50
|
// Load a model
|
|
40
|
-
const modelData = await modelManager.loadModel('base'); //
|
|
41
|
-
await whisper.
|
|
51
|
+
const modelData = await modelManager.loadModel('base'); // e.g. 'tiny', 'small', 'medium-q5_0', 'large-q5_0'
|
|
52
|
+
await whisper.initModel(modelData);
|
|
42
53
|
|
|
43
54
|
// Create a transcription session for streaming
|
|
44
55
|
const session = whisper.createSession();
|
|
45
56
|
|
|
57
|
+
// Convert an audio/video file to 16kHz Float32Array (browser-only helper)
|
|
58
|
+
// (e.g. a File from <input type="file" />)
|
|
59
|
+
const { audioData } = await convertFromFile(file, { normalize: true });
|
|
60
|
+
|
|
46
61
|
// Process audio in chunks
|
|
47
|
-
const stream = session.
|
|
62
|
+
const stream = session.streaming(audioData, {
|
|
48
63
|
language: 'en',
|
|
49
64
|
threads: 4,
|
|
50
65
|
translate: false,
|
|
@@ -61,7 +76,7 @@ for await (const segment of stream) {
|
|
|
61
76
|
```typescript
|
|
62
77
|
import { ModelManager, getAllModels } from '@timur00kh/whisper.wasm';
|
|
63
78
|
|
|
64
|
-
const modelManager = new ModelManager();
|
|
79
|
+
const modelManager = new ModelManager({ logLevel: 1 });
|
|
65
80
|
|
|
66
81
|
// Get available models
|
|
67
82
|
const availableModels = await modelManager.getAvailableModels();
|
|
@@ -100,7 +115,7 @@ new WhisperWasmService(options?: {
|
|
|
100
115
|
|
|
101
116
|
Checks if WebAssembly is supported in the current environment.
|
|
102
117
|
|
|
103
|
-
##### `
|
|
118
|
+
##### `initModel(model: Uint8Array): Promise<void>`
|
|
104
119
|
|
|
105
120
|
Loads a Whisper model from binary data.
|
|
106
121
|
|
|
@@ -135,22 +150,48 @@ Creates a new transcription session for streaming audio.
|
|
|
135
150
|
|
|
136
151
|
Manages Whisper model loading and caching.
|
|
137
152
|
|
|
153
|
+
#### Constructor
|
|
154
|
+
|
|
155
|
+
```typescript
|
|
156
|
+
new ModelManager(options?: {
|
|
157
|
+
logLevel: LoggerLevelsType;
|
|
158
|
+
})
|
|
159
|
+
```
|
|
160
|
+
|
|
138
161
|
#### Methods
|
|
139
162
|
|
|
140
|
-
##### `getAvailableModels(): Promise<
|
|
163
|
+
##### `getAvailableModels(): Promise<WhisperModel[]>`
|
|
141
164
|
|
|
142
165
|
Returns information about available models.
|
|
143
166
|
|
|
144
|
-
##### `loadModel(modelId:
|
|
167
|
+
##### `loadModel(modelId: ModelID, saveToIndexedDB?: boolean, onProgress?: (progress: number) => void): Promise<Uint8Array>`
|
|
145
168
|
|
|
146
169
|
Loads a model by ID.
|
|
147
170
|
|
|
148
171
|
**Parameters:**
|
|
149
172
|
|
|
150
|
-
- `modelId`: Model identifier (
|
|
151
|
-
- `
|
|
173
|
+
- `modelId`: Model identifier (see “Supported Models” / `getAllModels()`)
|
|
174
|
+
- `saveToIndexedDB`: Whether to use/save cached model in IndexedDB (browser-only)
|
|
152
175
|
- `onProgress`: Progress callback function
|
|
153
176
|
|
|
177
|
+
##### `loadModelByUrl(modelUrl: string, onProgress?: (progress: number) => void): Promise<Uint8Array>`
|
|
178
|
+
|
|
179
|
+
Loads a model from a URL and optionally caches it by URL in IndexedDB.
|
|
180
|
+
|
|
181
|
+
Security note: do **not** pass untrusted URLs here (see “Security & Privacy”).
|
|
182
|
+
|
|
183
|
+
##### `getAvailableModelsSync(): WhisperModel[]`
|
|
184
|
+
|
|
185
|
+
Returns the available model list without checking the cache (sync).
|
|
186
|
+
|
|
187
|
+
##### `getModelConfig(modelId: ModelID): WhisperModel | undefined`
|
|
188
|
+
|
|
189
|
+
Returns model configuration by ID (from the built-in config).
|
|
190
|
+
|
|
191
|
+
##### `getCacheInfo(): Promise<{ count: number; totalSize: number }>`
|
|
192
|
+
|
|
193
|
+
Returns basic IndexedDB cache statistics.
|
|
194
|
+
|
|
154
195
|
##### `clearCache(): Promise<void>`
|
|
155
196
|
|
|
156
197
|
Clears the model cache.
|
|
@@ -161,19 +202,31 @@ Handles streaming audio transcription.
|
|
|
161
202
|
|
|
162
203
|
#### Methods
|
|
163
204
|
|
|
164
|
-
##### `
|
|
205
|
+
##### `streaming(audioData: Float32Array, options?: ITranscriptionSessionOptions): AsyncIterableIterator<WhisperWasmServiceCallbackParams>`
|
|
165
206
|
|
|
166
207
|
Processes audio data in streaming fashion.
|
|
167
208
|
|
|
209
|
+
##### `streamimg(audioData: Float32Array, options?: ITranscriptionSessionOptions): AsyncIterableIterator<WhisperWasmServiceCallbackParams>`
|
|
210
|
+
|
|
211
|
+
Deprecated alias for `streaming(...)`.
|
|
212
|
+
|
|
213
|
+
Notes:
|
|
214
|
+
|
|
215
|
+
- `streamimg(...)` is a deprecated alias; prefer `streaming(...)`.
|
|
216
|
+
|
|
168
217
|
## Supported Models
|
|
169
218
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
|
175
|
-
|
|
|
176
|
-
|
|
|
219
|
+
The full model list is defined in the library config and is available via `getAllModels()` / `ModelManager.getAvailableModels()`.
|
|
220
|
+
|
|
221
|
+
Below are a few common examples (sizes are taken from the current config):
|
|
222
|
+
|
|
223
|
+
| Model ID | Size (MB) | Notes |
|
|
224
|
+
| ------------- | --------- | ----------------------- |
|
|
225
|
+
| `tiny` | 75 | Multilingual |
|
|
226
|
+
| `base` | 142 | Multilingual |
|
|
227
|
+
| `small` | 466 | Multilingual |
|
|
228
|
+
| `medium-q5_0` | 515 | Multilingual, quantized |
|
|
229
|
+
| `large-q5_0` | 1030 | Multilingual, quantized |
|
|
177
230
|
|
|
178
231
|
## Browser Support
|
|
179
232
|
|
|
@@ -182,6 +235,58 @@ Processes audio data in streaming fashion.
|
|
|
182
235
|
- **Safari**: 11+
|
|
183
236
|
- **Edge**: 16+
|
|
184
237
|
|
|
238
|
+
## Security & Privacy
|
|
239
|
+
|
|
240
|
+
### Untrusted URLs (SSRF / DoS)
|
|
241
|
+
|
|
242
|
+
This library performs network requests in a few places:
|
|
243
|
+
|
|
244
|
+
- `ModelManager.loadModel(...)` / `loadModelByUrl(...)` uses `fetch()` to download model binaries.
|
|
245
|
+
- `convertFromAudioElement(audioEl)` may try `fetch(audioEl.src)` (subject to CORS).
|
|
246
|
+
|
|
247
|
+
If you are using this library in **Node.js / server-side** environments, do **not** pass untrusted URLs into these APIs. Apply your own controls at the application layer (domain allowlist, proxying, signed URLs, request timeouts, response size limits).
|
|
248
|
+
|
|
249
|
+
### IndexedDB caching
|
|
250
|
+
|
|
251
|
+
Model caching uses IndexedDB (browser-only). Models can be large; be mindful of per-origin storage quotas and provide a way for users to clear the cache (`ModelManager.clearCache()`).
|
|
252
|
+
|
|
253
|
+
## FAQ
|
|
254
|
+
|
|
255
|
+
### Q: Why is my transcription stopping unexpectedly?
|
|
256
|
+
|
|
257
|
+
A: This is usually related to WebAssembly execution being terminated by the browser due to resource management policies, low battery, or background tab throttling. Use the `restartModelOnError: true` option to automatically restart the model when this happens.
|
|
258
|
+
|
|
259
|
+
### Q: Can I use this in a background tab?
|
|
260
|
+
|
|
261
|
+
A: Some browsers may throttle or pause WebAssembly execution in background tabs. Consider using the `restartModelOnError` option and implementing visibility change listeners to handle this.
|
|
262
|
+
|
|
263
|
+
### Q: Why is the first transcription slower?
|
|
264
|
+
|
|
265
|
+
A: The first transcription includes model initialization time. Subsequent transcriptions with the same model will be faster.
|
|
266
|
+
|
|
267
|
+
### Q: Can I transcribe audio in real-time?
|
|
268
|
+
|
|
269
|
+
A: Yes! Use the `TranscriptionSession` with streaming audio data. For real-time applications, consider using the `tiny` or `base` models for better performance.
|
|
270
|
+
|
|
271
|
+
### Q: What audio formats are supported?
|
|
272
|
+
|
|
273
|
+
A: Whisper expects `Float32Array` audio data at 16kHz. You can either prepare it yourself, or use the built-in **AudioConverter** helpers (browser-only):
|
|
274
|
+
|
|
275
|
+
- `convertFromFile(file)` - audio/video files supported by the browser decoder
|
|
276
|
+
- `convertFromArrayBuffer(buffer)` - decode & convert from an ArrayBuffer
|
|
277
|
+
- `convertFromFloat32Array(data, { inputSampleRate? })` - resample if needed
|
|
278
|
+
- `convertFromMediaStream(stream)` - microphone / capture stream (requires `MediaRecorder`)
|
|
279
|
+
- `convertFromAudioElement(audioEl)` - tries `fetch(audioEl.src)` (CORS), otherwise `captureStream()` fallback (browser support varies)
|
|
280
|
+
|
|
281
|
+
Notes:
|
|
282
|
+
|
|
283
|
+
- AudioConverter uses Web APIs (`Web Audio`, `MediaRecorder`), so it does **not** run in Node.js.
|
|
284
|
+
- `<audio>` conversion may require proper CORS headers to allow `fetch()` of the audio URL.
|
|
285
|
+
|
|
286
|
+
### Q: How do I handle errors gracefully?
|
|
287
|
+
|
|
288
|
+
A: Use try-catch blocks around transcription calls and implement the `restartModelOnError` option for automatic recovery from WebAssembly execution issues.
|
|
289
|
+
|
|
185
290
|
## Demo
|
|
186
291
|
|
|
187
292
|
Try the interactive demo:
|
|
@@ -199,11 +304,42 @@ npm run dev:demo
|
|
|
199
304
|
The demo includes:
|
|
200
305
|
|
|
201
306
|
- Audio file upload and processing
|
|
307
|
+
- Transcription from `<audio>` element
|
|
308
|
+
- Microphone recording (Start/Stop) and transcription
|
|
202
309
|
- Model selection and loading
|
|
203
310
|
- Real-time transcription with progress
|
|
204
311
|
- Language detection and translation
|
|
205
312
|
- Streaming audio support
|
|
206
313
|
|
|
314
|
+
## Changelog
|
|
315
|
+
|
|
316
|
+
For detailed information about changes, new features, and bug fixes, see our [changelog documentation](docs/changelog/).
|
|
317
|
+
|
|
318
|
+
### Recent Updates
|
|
319
|
+
|
|
320
|
+
- **[feature-streaming-api-and-transcribe-done](docs/changelog/feature-streaming-api-and-transcribe-done.md)** - Added `streaming()` API + clarified completion semantics
|
|
321
|
+
- **[feature-restart-on-timeout](docs/changelog/feature-restart-on-timeout.md)** - Added timeout handling, error recovery, and enhanced demo application
|
|
322
|
+
- **[feature-audio-converter](docs/changelog/feature-audio-converter.md)** - Added AudioConverter helpers + demo integration (files, mic, `<audio>`)
|
|
323
|
+
|
|
324
|
+
## Release
|
|
325
|
+
|
|
326
|
+
Stable releases are triggered by pushing a git tag `vX.Y.Z` (created by `npm version`).
|
|
327
|
+
|
|
328
|
+
```bash
|
|
329
|
+
# on main (or after merging to main)
|
|
330
|
+
npm version patch # or: minor / major
|
|
331
|
+
git push origin main --follow-tags
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
This will:
|
|
335
|
+
|
|
336
|
+
- publish the package to npm (`latest`)
|
|
337
|
+
- create a **draft** GitHub Release with auto-generated release notes (based on merged PRs) and attach build artifacts (`wasm/`, `dist/`)
|
|
338
|
+
|
|
339
|
+
To keep release notes clean, use meaningful PR titles and add labels such as:
|
|
340
|
+
`feature`, `enhancement`, `bug`, `fix`, `docs`, `refactor`, `chore`, `tests`, `ci`, `dependencies`.
|
|
341
|
+
Use `skip-changelog` to exclude a PR from the generated notes.
|
|
342
|
+
|
|
207
343
|
## Development
|
|
208
344
|
|
|
209
345
|
### Prerequisites
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { AudioFormat, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks } from './types';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Проверка поддержки Web Audio API
|
|
5
|
+
*/
|
|
6
|
+
export declare function isWebAudioSupported(): boolean;
|
|
7
|
+
/**
|
|
8
|
+
* Получение списка поддерживаемых форматов
|
|
9
|
+
*/
|
|
10
|
+
export declare function getSupportedFormats(): AudioFormat[];
|
|
11
|
+
/**
|
|
12
|
+
* Конвертация аудио из файла (поддерживает как аудио, так и видео файлы)
|
|
13
|
+
*/
|
|
14
|
+
export declare function convertFromFile(file: File, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
|
|
15
|
+
/**
|
|
16
|
+
* Конвертация аудио из MediaStream (микрофон)
|
|
17
|
+
*/
|
|
18
|
+
export declare function convertFromMediaStream(stream: MediaStream, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
|
|
19
|
+
/**
|
|
20
|
+
* Конвертация аудио из HTMLAudioElement
|
|
21
|
+
*/
|
|
22
|
+
export declare function convertFromAudioElement(element: HTMLAudioElement, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
|
|
23
|
+
/**
|
|
24
|
+
* Конвертация аудио из Float32Array
|
|
25
|
+
*/
|
|
26
|
+
export declare function convertFromFloat32Array(data: Float32Array, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
|
|
27
|
+
/**
|
|
28
|
+
* Конвертация аудио из ArrayBuffer
|
|
29
|
+
*/
|
|
30
|
+
export declare function convertFromArrayBuffer(buffer: ArrayBuffer, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
|
|
31
|
+
//# sourceMappingURL=AudioConverter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioConverter.d.ts","sourceRoot":"","sources":["../../src/audio/AudioConverter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAGH,OAAO,EACL,WAAW,EACX,KAAK,qBAAqB,EAC1B,KAAK,qBAAqB,EAC1B,KAAK,uBAAuB,EAC7B,MAAM,SAAS,CAAC;AAuCjB;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAW7C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,WAAW,EAAE,CAiBnD;AAED;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,IAAI,EACV,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAmChC;AAED;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,WAAW,EACnB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAgChC;AAED;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,gBAAgB,EACzB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAwEhC;AAED;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,YAAY,EAClB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAmDhC;AAED;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,WAAW,EACnB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CA8BhC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio converter module for whisper.wasm
|
|
3
|
+
*
|
|
4
|
+
* This module provides utilities for converting various audio formats
|
|
5
|
+
* and sources into the Float32Array format required by whisper.wasm.
|
|
6
|
+
*/
|
|
7
|
+
export { convertFromFile, convertFromMediaStream, convertFromAudioElement, convertFromFloat32Array, convertFromArrayBuffer, isWebAudioSupported, getSupportedFormats, } from './AudioConverter';
|
|
8
|
+
export { AudioFormat } from './types';
|
|
9
|
+
export type { AudioInfo, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks, AudioSource, AudioContextConfig, } from './types';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/audio/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,uBAAuB,EACvB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,YAAY,EACV,SAAS,EACT,qBAAqB,EACrB,qBAAqB,EACrB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,GACnB,MAAM,SAAS,CAAC"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { LoggerLevelsType } from '../utils/Logger';
|
|
2
|
+
|
|
3
|
+
export interface AudioInfo {
|
|
4
|
+
/** Sample rate in Hz */
|
|
5
|
+
sampleRate: number;
|
|
6
|
+
/** Duration in seconds */
|
|
7
|
+
duration: number;
|
|
8
|
+
/** Number of audio channels */
|
|
9
|
+
channels: number;
|
|
10
|
+
/** Audio bit depth */
|
|
11
|
+
bitDepth?: number;
|
|
12
|
+
/** Audio format/container */
|
|
13
|
+
format?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface AudioConverterOptions {
|
|
16
|
+
/** Target sample rate (default: 16000 for Whisper) */
|
|
17
|
+
targetSampleRate?: number;
|
|
18
|
+
/** Target number of channels (default: 1 for mono) */
|
|
19
|
+
targetChannels?: number;
|
|
20
|
+
/**
|
|
21
|
+
* Sample rate for Float32Array inputs.
|
|
22
|
+
* If omitted, Float32Array is assumed to already be at targetSampleRate.
|
|
23
|
+
*/
|
|
24
|
+
inputSampleRate?: number;
|
|
25
|
+
/** Whether to normalize audio levels */
|
|
26
|
+
normalize?: boolean;
|
|
27
|
+
/** Whether to apply noise reduction (basic) */
|
|
28
|
+
noiseReduction?: boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Log level for debugging.
|
|
31
|
+
* Prefer numeric LoggerLevelsType for consistency with the library.
|
|
32
|
+
* String values are also accepted for convenience.
|
|
33
|
+
*/
|
|
34
|
+
logLevel?: LoggerLevelsType | 'ERROR' | 'WARN' | 'INFO' | 'DEBUG';
|
|
35
|
+
/**
|
|
36
|
+
* Optional AbortSignal to cancel long operations (recording, fetch, etc).
|
|
37
|
+
* Implementations should treat abort as a cancellation and reject.
|
|
38
|
+
*/
|
|
39
|
+
signal?: AbortSignal;
|
|
40
|
+
/**
|
|
41
|
+
* For MediaStream / captureStream based conversions: how long to record
|
|
42
|
+
* before auto-stopping. If omitted, defaults are applied.
|
|
43
|
+
*/
|
|
44
|
+
recordingDurationMs?: number;
|
|
45
|
+
}
|
|
46
|
+
export interface AudioConversionResult {
|
|
47
|
+
/** Converted audio data as Float32Array */
|
|
48
|
+
audioData: Float32Array;
|
|
49
|
+
/** Audio metadata */
|
|
50
|
+
audioInfo: AudioInfo;
|
|
51
|
+
/** Conversion warnings/notes */
|
|
52
|
+
warnings?: string[];
|
|
53
|
+
}
|
|
54
|
+
export type ProgressCallback = (progress: number, message: string) => void;
|
|
55
|
+
export type ErrorCallback = (error: Error) => void;
|
|
56
|
+
export interface AudioConverterCallbacks {
|
|
57
|
+
onProgress?: ProgressCallback;
|
|
58
|
+
onError?: ErrorCallback;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Supported audio input formats
|
|
62
|
+
*/
|
|
63
|
+
export declare enum AudioFormat {
|
|
64
|
+
MP3 = "mp3",
|
|
65
|
+
WAV = "wav",
|
|
66
|
+
OGG = "ogg",
|
|
67
|
+
M4A = "m4a",
|
|
68
|
+
AAC = "aac",
|
|
69
|
+
FLAC = "flac",
|
|
70
|
+
MP4 = "mp4",
|
|
71
|
+
WEBM = "webm",
|
|
72
|
+
AVI = "avi",
|
|
73
|
+
MOV = "mov",
|
|
74
|
+
MKV = "mkv",
|
|
75
|
+
RAW_PCM = "raw_pcm",
|
|
76
|
+
MICROPHONE = "microphone",
|
|
77
|
+
AUDIO_ELEMENT = "audio_element"
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Audio source types
|
|
81
|
+
*/
|
|
82
|
+
export type AudioSource = File | Blob | ArrayBuffer | Float32Array | AudioBuffer | HTMLAudioElement | MediaStream;
|
|
83
|
+
/**
|
|
84
|
+
* Audio conversion context for browser environment
|
|
85
|
+
*/
|
|
86
|
+
export interface AudioContextConfig {
|
|
87
|
+
sampleRate?: number;
|
|
88
|
+
channelCount?: number;
|
|
89
|
+
echoCancellation?: boolean;
|
|
90
|
+
autoGainControl?: boolean;
|
|
91
|
+
noiseSuppression?: boolean;
|
|
92
|
+
}
|
|
93
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/audio/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAExD,MAAM,WAAW,SAAS;IACxB,wBAAwB;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,sDAAsD;IACtD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,sDAAsD;IACtD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,wCAAwC;IACxC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+CAA+C;IAC/C,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAElE;;;OAGG;IACH,MAAM,CAAC,EAAE,WAAW,CAAC;IAErB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,qBAAqB;IACpC,2CAA2C;IAC3C,SAAS,EAAE,YAAY,CAAC;IACxB,qBAAqB;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;AAC3E,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;AAEnD,MAAM,WAAW,uBAAuB;IACtC,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED;;GAEG;AACH,oBAAY,WAAW;IAErB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IAGb,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IAGX,OAAO,YAAY;IAGnB,UAAU,eAAe;IACzB,aAAa,kBAAkB;CAChC;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GACnB,IAAI,GACJ,IAAI,GACJ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,gBAAgB,GAChB,WAAW,CAAC;AAEhB;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B"}
|
package/dist/index.cjs.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const p=class p{constructor(e=p.levels.INFO,t=""){this.level=e,this.prefix=t}debug(...e){this.level<=p.levels.DEBUG&&console.debug(`[${this.prefix}] [DEBUG]`,...e)}info(...e){this.level<=p.levels.INFO&&console.info(`[${this.prefix}] [INFO]`,...e)}warn(...e){this.level<=p.levels.WARN&&console.warn(`[${this.prefix}] [WARN]`,...e)}error(...e){this.level<=p.levels.ERROR&&console.error(`[${this.prefix}] [ERROR]`,...e)}setLevel(e){this.level=e}getLevel(){return this.level}};p.levels={DEBUG:0,INFO:1,WARN:2,ERROR:3};let b=p;const B=async()=>WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,10,1,8,0,65,0,253,15,253,98,11])),T={language:"auto",threads:4,translate:!1};function M(l){const e=String(l).trim().replace(",","."),t=e.split(":").map(Number);if(t.some(Number.isNaN))throw new Error(`Bad time: "${l}"`);let r=0,a=0,i=0;if(t.length===3)[r,a]=t,i=parseFloat(e.split(":").pop()||"0");else if(t.length===2)[a]=t,i=parseFloat(e.split(":").pop()||"0");else throw new Error(`Bad time format: "${l}"`);return Math.floor(((r*60+a)*60+i)*1e3)}function R(l){const t=/^\s*\[?\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*-->\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*\]?\s*(.*)\s*$/.exec(l);if(!t)throw new Error("Line does not match VTT-like pattern: "+l);const r=t[1],a=t[2],i=t[3]||"",n=M(r),s=M(a);if(s<n)throw new Error("End time is before start time");return{startMs:n,endMs:s,start:r,end:a,text:i}}function q(l){return new Promise(e=>setTimeout(e,l))}function W(l,e=16e3*100){const t=[];for(let r=0;r<l.length;r+=e)t.push(l.subarray(r,r+e));return t}class L{constructor(e,t){this.whisperService=e,this.logger=new b((t==null?void 0:t.logLevel)||b.levels.ERROR,"TranscriptionSession")}async*streamimg(e,t={}){const r=W(e);let a=0;for await(const i of r){const n=[];let s=null,o=!1,h,d=0;for(this.whisperService.transcribe(i,c=>{d=c.timeEnd,c.timeStart+=a,c.timeEnd+=a,s?(s(c),s=null):n.push(c)},t).then(()=>{o=!0,a+=d,s==null||s(void 0)}).catch(c=>{h=c});;){if(h)throw h;if(o)break;if(n.length)yield n.shift();else{const c=await new Promise(f=>s=f);c&&(yield c)}}t.sleepMsBetweenChunks&&await q(t.sleepMsBetweenChunks)}}}class x extends EventTarget{on(e,t){return this.addEventListener(e,t),()=>this.removeEventListener(e,t)}emit(e,t){this.dispatchEvent(new CustomEvent(e,{detail:t}))}}class A{constructor(e){this.wasmModule=null,this.instance=null,this.modelFileName="whisper.bin",this.isTranscribing=!1,this.bus=new x,this.logger=new b((e==null?void 0:e.logLevel)??b.levels.ERROR,"WhisperWasmService"),e!=null&&e.init&&this.loadWasmScript()}async checkWasmSupport(){return await B()}async loadWasmScript(){this.wasmModule=await(await Promise.resolve().then(()=>require("./libmain-D50HCaHR.js"))).default({print:(e,...t)=>{this.logger.debug(t),e.startsWith("[")?(this.logger.info(e),this.bus.emit("transcribe",e)):(this.logger.debug(e),this.bus.emit("system_info",e))},printErr:(e,...t)=>{this.logger.debug(t),this.logger.warn(e),this.bus.emit("transcribeError",e)}})}async loadWasmModule(e){if(!await this.checkWasmSupport())throw new Error("WASM is not supported");return this.wasmModule&&(this.wasmModule.FS_unlink(this.modelFileName),this.wasmModule.free()),await this.loadWasmScript(),await q(100),this.storeFS(this.modelFileName,e),this.instance=this.wasmModule.init(this.modelFileName),Promise.resolve()}storeFS(e,t){if(!this.wasmModule)throw new Error("WASM module not loaded");try{this.wasmModule.FS_unlink(e)}catch{}this.wasmModule.FS_createDataFile("/",e,t,!0,!0,!0)}async transcribe(e,t,r={}){if(this.isTranscribing)throw new Error("Already transcribing");if(!this.wasmModule)throw new Error("WASM module not loaded");if(!this.instance)throw new Error("WASM instance not loaded");const a=120;e.length>16e3*a&&this.logger.warn("It's not recommended to transcribe audio data that is longer than 120 seconds"),this.isTranscribing=!0;const{language:i="auto",threads:n=4,translate:s=!1}={...T,...r},o=[],h=Date.now();return this.wasmModule.full_default(this.instance,e,i,n,s),await new Promise((d,c)=>{const f=this.bus.on("transcribe",g=>{const{startMs:w,endMs:y,text:_}=R(g.detail),v={timeStart:w,timeEnd:y,text:_,raw:g.detail};o.push(v),t==null||t(v)}),u=setTimeout(()=>{this.isTranscribing=!1,f(),m(),this.logger.error("Transcribe timeout"),c(new Error("Transcribe timeout"))},a*2*1e3),m=this.bus.on("transcribeError",g=>{this.isTranscribing=!1,f(),m(),clearTimeout(u),this.logger.debug("Transcribe error",g.detail),d({segments:o,transcribeDurationMs:Date.now()-h})})})}createSession(){return new L(this,{logLevel:this.logger.getLevel()})}}const S={"tiny.en":{id:"tiny.en",name:"Tiny English",size:75,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin"},tiny:{id:"tiny",name:"Tiny Multilingual",size:75,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"},"base.en":{id:"base.en",name:"Base English",size:142,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"},base:{id:"base",name:"Base Multilingual",size:142,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"},"small.en":{id:"small.en",name:"Small English",size:466,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin"},small:{id:"small",name:"Small Multilingual",size:466,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin"},"tiny.en-q5_1":{id:"tiny.en-q5_1",name:"Tiny English (Q5_1)",size:31,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"},"tiny-q5_1":{id:"tiny-q5_1",name:"Tiny Multilingual (Q5_1)",size:31,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin"},"base.en-q5_1":{id:"base.en-q5_1",name:"Base English (Q5_1)",size:57,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin"},"base-q5_1":{id:"base-q5_1",name:"Base Multilingual (Q5_1)",size:57,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin"},"small.en-q5_1":{id:"small.en-q5_1",name:"Small English (Q5_1)",size:182,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin"},"small-q5_1":{id:"small-q5_1",name:"Small Multilingual (Q5_1)",size:182,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin"},"medium.en-q5_0":{id:"medium.en-q5_0",name:"Medium English (Q5_0)",size:515,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin"},"medium-q5_0":{id:"medium-q5_0",name:"Medium Multilingual (Q5_0)",size:515,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin"},"large-q5_0":{id:"large-q5_0",name:"Large Multilingual (Q5_0)",size:1030,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-q5_0.bin"}};function z(){return Object.values(S).map(({url:l,...e})=>e)}function E(l){return S[l]}class F{constructor(e={logLevel:b.levels.ERROR}){this.cacheEnabled=!0,this.models=z(),this.logger=new b(e.logLevel,"ModelManager")}async loadModel(e,t=!0,r){var m;const a=E(e);if(!a)throw new Error(`Model ${e} not found in config`);if(this.cacheEnabled&&t){const g=await this.getCachedModel(e);if(g)return this.logger.info(`Model ${e} loaded from cache`),r&&r(100),g}this.logger.info(`Loading model ${e} from ${a.url}`);const i=await fetch(a.url);if(!i.ok)throw new Error(`Failed to load model: ${i.statusText}`);const n=i.headers.get("content-length"),s=n?parseInt(n,10):0;let o=0;const h=(m=i.body)==null?void 0:m.getReader();if(!h)throw new Error("Response body is not readable");const d=[];try{let g=!1;for(;!g;){const w=await h.read();if(g=w.done,!g&&w.value&&(d.push(w.value),o+=w.value.length,r&&s>0)){const y=Math.round(o/s*100);r(y)}}}finally{h.releaseLock()}const c=d.reduce((g,w)=>g+w.length,0),f=new Uint8Array(c);let u=0;for(const g of d)f.set(g,u),u+=g.length;return this.cacheEnabled&&t&&await this.saveModelToCache(e,f),r&&r(100),f}async loadModelByUrl(e,t){var r;try{if(this.cacheEnabled){const u=await this.getCachedModelByUrl(e);if(u)return this.logger.info(`WASM module loaded from cache by URL: ${e}`),t&&t(100),u}this.logger.info(`Loading WASM module from URL: ${e}`);const a=await fetch(e);if(!a.ok)throw new Error(`Failed to load WASM module: ${a.statusText}`);const i=a.headers.get("content-length"),n=i?parseInt(i,10):0;let s=0;const o=(r=a.body)==null?void 0:r.getReader();if(!o)throw new Error("Response body is not readable");const h=[];try{let u=!1;for(;!u;){const m=await o.read();if(u=m.done,!u&&m.value&&(h.push(m.value),s+=m.value.length,t&&n>0)){const g=Math.round(s/n*100);t(g)}}}finally{o.releaseLock()}const d=h.reduce((u,m)=>u+m.length,0),c=new Uint8Array(d);let f=0;for(const u of h)c.set(u,f),f+=u.length;return this.cacheEnabled&&await this.saveModelToCacheByUrl(e,c),t&&t(100),c}catch(a){throw this.logger.error(a),new Error("Failed to load WASM module")}}async getCachedModelByUrl(e){try{const a=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readonly").objectStore("modelsByUrl");return new Promise((i,n)=>{const s=a.get(e);s.onsuccess=()=>{const o=s.result;o&&o.data?i(o.data):i(null)},s.onerror=()=>n(s.error)})}catch(t){return this.logger.error("Error reading model from cache by URL:",t),null}}async saveModelToCacheByUrl(e,t){try{const i=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readwrite").objectStore("modelsByUrl");await new Promise((n,s)=>{const o=i.put({url:e,data:t,timestamp:Date.now(),size:t.length});o.onsuccess=()=>n(),o.onerror=()=>s(o.error)}),this.logger.info(`Model saved to cache by URL: ${e}`)}catch(r){this.logger.error("Error saving model to cache by URL:",r)}}async getAvailableModels(){const e=[...this.models];if(!this.cacheEnabled)return e;try{const t=await this.getCachedModelNames();return e.map(r=>({...r,cached:t.includes(r.id)}))}catch(t){return this.logger.error("Error checking cache status:",t),e}}getAvailableModelsSync(){return[...this.models]}getModelConfig(e){return E(e)}async saveModelToCache(e,t){try{const i=(await this.openIndexedDB()).transaction(["models"],"readwrite").objectStore("models");await new Promise((n,s)=>{const o=i.put({name:e,data:t,timestamp:Date.now(),size:t.length});o.onsuccess=()=>n(),o.onerror=()=>s(o.error)}),this.logger.info(`Model ${e} saved to cache`)}catch(r){this.logger.error("Error saving model to cache:",r)}}async getCachedModel(e){try{const a=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((i,n)=>{const s=a.get(e);s.onsuccess=()=>{const o=s.result;o&&o.data?i(o.data):i(null)},s.onerror=()=>n(s.error)})}catch(t){return this.logger.error("Error getting cached model:",t),null}}async getCachedModelNames(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((a,i)=>{const n=r.getAllKeys();n.onsuccess=()=>{const s=n.result;a(s)},n.onerror=()=>i(n.error)})}catch(e){return this.logger.error("Error getting cached model names:",e),[]}}async openIndexedDB(){return new Promise((e,t)=>{const r=indexedDB.open("WhisperModels",2);r.onerror=()=>t(r.error),r.onsuccess=()=>e(r.result),r.onupgradeneeded=a=>{const i=a.target.result;if(!i.objectStoreNames.contains("models")){const n=i.createObjectStore("models",{keyPath:"name"});n.createIndex("timestamp","timestamp",{unique:!1}),n.createIndex("size","size",{unique:!1})}if(!i.objectStoreNames.contains("modelsByUrl")){const n=i.createObjectStore("modelsByUrl",{keyPath:"url"});n.createIndex("timestamp","timestamp",{unique:!1}),n.createIndex("size","size",{unique:!1})}}})}async clearCache(){try{const t=(await this.openIndexedDB()).transaction(["models","modelsByUrl"],"readwrite"),r=t.objectStore("models");await new Promise((i,n)=>{const s=r.clear();s.onsuccess=()=>i(),s.onerror=()=>n(s.error)});const a=t.objectStore("modelsByUrl");await new Promise((i,n)=>{const s=a.clear();s.onsuccess=()=>i(),s.onerror=()=>n(s.error)}),this.logger.info("Model cache cleared")}catch(e){this.logger.error("Error clearing cache:",e)}}async getCacheInfo(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((a,i)=>{const n=r.getAll();n.onsuccess=()=>{const s=n.result,o=s.reduce((h,d)=>h+(d.size||0),0);a({count:s.length,totalSize:o})},n.onerror=()=>i(n.error)})}catch(e){return this.logger.error("Error getting cache info:",e),{count:0,totalSize:0}}}}exports.ModelManager=F;exports.WhisperWasmService=A;exports.getAllModels=z;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const M=class M{constructor(t=M.levels.INFO,e=""){this.level=t,this.prefix=e}debug(...t){this.level<=M.levels.DEBUG&&console.debug(`[${this.prefix}] [DEBUG]`,...t)}info(...t){this.level<=M.levels.INFO&&console.info(`[${this.prefix}] [INFO]`,...t)}warn(...t){this.level<=M.levels.WARN&&console.warn(`[${this.prefix}] [WARN]`,...t)}error(...t){this.level<=M.levels.ERROR&&console.error(`[${this.prefix}] [ERROR]`,...t)}setLevel(t){this.level=t}getLevel(){return this.level}};M.levels={DEBUG:0,INFO:1,WARN:2,ERROR:3};let y=M;const L=async()=>WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,10,1,8,0,65,0,253,15,253,98,11])),W={language:"auto",threads:4,translate:!1};function F(n){const t=String(n).trim().replace(",","."),e=t.split(":").map(Number);if(e.some(Number.isNaN))throw new Error(`Bad time: "${n}"`);let r=0,o=0,i=0;if(e.length===3)[r,o]=e,i=parseFloat(t.split(":").pop()||"0");else if(e.length===2)[o]=e,i=parseFloat(t.split(":").pop()||"0");else throw new Error(`Bad time format: "${n}"`);return Math.floor(((r*60+o)*60+i)*1e3)}function D(n){const e=/^\s*\[?\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*-->\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*\]?\s*(.*)\s*$/.exec(n);if(!e)throw new Error("Line does not match VTT-like pattern: "+n);const r=e[1],o=e[2],i=e[3]||"",s=F(r),a=F(o);if(a<s)throw new Error("End time is before start time");return{startMs:s,endMs:a,start:r,end:o,text:i}}function x(n){return new Promise(t=>setTimeout(t,n))}function I(n,t){let e=null,r=!1,o=null,i=null;return{timeoutError:()=>new Promise((u,l)=>{i=u,o=l,e=setTimeout(()=>{!r&&o&&(r=!0,o(new Error(t)))},n)}),clear:()=>{e&&(clearTimeout(e),e=null),i&&(i(),i=null),r=!0,o=null}}}function $(n,t=16e3*100){const e=[];for(let r=0;r<n.length;r+=t)e.push(n.subarray(r,r+t));return e}class U{constructor(t,e){this.whisperService=t,this.logger=new y((e==null?void 0:e.logLevel)||y.levels.ERROR,"TranscriptionSession")}async*streaming(t,e={}){const{timeoutMs:r=3e4}=e,o=$(t);let i=0;for await(const s of o){const a=[];let u=null,l=!1,d,h=0;const{timeoutError:c,clear:m}=I(r,"Transcribe timeout"),w=()=>this.whisperService.transcribe(s,g=>{h=g.timeEnd,g.timeStart+=i,g.timeEnd+=i,this.logger.debug("Transcription segment in session:",g),u?(u(g),u=null):a.push(g),m()},e).then(()=>{this.logger.debug("Transcription done in session then"),l=!0,i+=h,m(),u==null||u(void 0)}).catch(g=>{this.logger.debug("Transcription error in session catch:",g),d=g,m(),u==null||u(void 0)});for(w();;){if(d){if(e.restartModelOnError){this.whisperService.restartModel(),w();continue}throw d}if(l)break;if(a.length)yield a.shift();else try{const g=await Promise.race([new Promise(f=>u=f),c()]);g&&(yield g)}catch(g){d=g}}e.sleepMsBetweenChunks&&await x(e.sleepMsBetweenChunks)}}async*streamimg(t,e={}){yield*this.streaming(t,e)}}class N extends EventTarget{on(t,e){return this.addEventListener(t,e),()=>this.removeEventListener(t,e)}emit(t,e){this.dispatchEvent(new CustomEvent(t,{detail:e}))}}class j{constructor(t){this.wasmModule=null,this.instance=null,this.modelFileName="whisper.bin",this.isTranscribing=!1,this.bus=new N,this.modelData=null,this.logger=new y((t==null?void 0:t.logLevel)??y.levels.ERROR,"WhisperWasmService"),t!=null&&t.init&&this.loadWasmScript()}async checkWasmSupport(){return await L()}async loadWasmScript(){this.wasmModule=await(await Promise.resolve().then(()=>require("./libmain-CWYJvMY5.js"))).default({print:(t,...e)=>{e.length>0&&this.logger.debug(e),t.startsWith("[")?(this.logger.info(t),this.bus.emit("transcribe",t)):(this.logger.debug(t),this.bus.emit("system_info",t))},printErr:(t,...e)=>{e.length>0&&this.logger.debug(e),this.logger.warn(t),this.bus.emit("transcribeError",t)}})}async initModel(t){if(!await this.checkWasmSupport())throw new Error("WASM is not supported");return this.modelData=t,this.wasmModule&&(this.wasmModule.FS_unlink(this.modelFileName),this.wasmModule.free()),await this.loadWasmScript(),await x(100),this.storeFS(this.modelFileName,t),this.instance=this.wasmModule.init(this.modelFileName),Promise.resolve()}restartModel(){if(!this.modelData)throw new Error("Model not loaded");return this.initModel(this.modelData)}storeFS(t,e){if(!this.wasmModule)throw new Error("WASM module not loaded");try{this.wasmModule.FS_unlink(t)}catch{}this.wasmModule.FS_createDataFile("/",t,e,!0,!0,!0)}async transcribe(t,e,r={}){if(this.isTranscribing)throw new Error("Already transcribing");if(!this.wasmModule)throw new Error("WASM module not loaded");if(!this.instance)throw new Error("WASM instance not loaded");const o=120;t.length>16e3*o&&this.logger.warn("It's not recommended to transcribe audio data that is longer than 120 seconds"),this.isTranscribing=!0;const{language:i="auto",threads:s=4,translate:a=!1}={...W,...r},u=[],l=Date.now();return this.wasmModule.full_default(this.instance,t,i,s,a),await new Promise((d,h)=>{const c=this.bus.on("transcribe",g=>{const{startMs:f,endMs:v,text:B}=D(g.detail),q={timeStart:f,timeEnd:v,text:B,raw:g.detail};u.push(q),e==null||e(q)}),m=setTimeout(()=>{this.isTranscribing=!1,c(),w(),this.logger.error("Transcribe timeout"),h(new Error("Transcribe timeout")),this.bus.emit("transcribeError","Transcribe timeout")},o*2*1e3),w=this.bus.on("transcribeError",g=>{this.isTranscribing=!1,c(),w(),clearTimeout(m),this.logger.debug("Transcribe error",g.detail),d({segments:u,transcribeDurationMs:Date.now()-l})})})}createSession(){return new U(this,{logLevel:this.logger.getLevel()})}}const O={"tiny.en":{id:"tiny.en",name:"Tiny English",size:75,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin"},tiny:{id:"tiny",name:"Tiny Multilingual",size:75,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"},"base.en":{id:"base.en",name:"Base English",size:142,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"},base:{id:"base",name:"Base Multilingual",size:142,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"},"small.en":{id:"small.en",name:"Small English",size:466,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin"},small:{id:"small",name:"Small Multilingual",size:466,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin"},"tiny.en-q5_1":{id:"tiny.en-q5_1",name:"Tiny English (Q5_1)",size:31,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"},"tiny-q5_1":{id:"tiny-q5_1",name:"Tiny Multilingual (Q5_1)",size:31,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin"},"base.en-q5_1":{id:"base.en-q5_1",name:"Base English (Q5_1)",size:57,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin"},"base-q5_1":{id:"base-q5_1",name:"Base Multilingual (Q5_1)",size:57,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin"},"small.en-q5_1":{id:"small.en-q5_1",name:"Small English (Q5_1)",size:182,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin"},"small-q5_1":{id:"small-q5_1",name:"Small Multilingual (Q5_1)",size:182,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin"},"medium.en-q5_0":{id:"medium.en-q5_0",name:"Medium English (Q5_0)",size:515,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin"},"medium-q5_0":{id:"medium-q5_0",name:"Medium Multilingual (Q5_0)",size:515,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin"},"large-q5_0":{id:"large-q5_0",name:"Large Multilingual (Q5_0)",size:1030,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-q5_0.bin"}};function _(){return Object.values(O).map(({url:n,...t})=>t)}function z(n){return O[n]}class H{constructor(t={logLevel:y.levels.ERROR}){this.cacheEnabled=!0,this.models=_(),this.logger=new y(t.logLevel,"ModelManager")}async loadModel(t,e=!0,r){var w;const o=z(t);if(!o)throw new Error(`Model ${t} not found in config`);if(this.cacheEnabled&&e){const g=await this.getCachedModel(t);if(g)return this.logger.info(`Model ${t} loaded from cache`),r&&r(100),g}this.logger.info(`Loading model ${t} from ${o.url}`);const i=await fetch(o.url);if(!i.ok)throw new Error(`Failed to load model: ${i.statusText}`);const s=i.headers.get("content-length"),a=s?parseInt(s,10):0;let u=0;const l=(w=i.body)==null?void 0:w.getReader();if(!l)throw new Error("Response body is not readable");const d=[];try{let g=!1;for(;!g;){const f=await l.read();if(g=f.done,!g&&f.value&&(d.push(f.value),u+=f.value.length,r&&a>0)){const v=Math.round(u/a*100);r(v)}}}finally{l.releaseLock()}const h=d.reduce((g,f)=>g+f.length,0),c=new Uint8Array(h);let m=0;for(const g of d)c.set(g,m),m+=g.length;return this.cacheEnabled&&e&&await this.saveModelToCache(t,c),r&&r(100),c}async loadModelByUrl(t,e){var r;try{if(this.cacheEnabled){const m=await this.getCachedModelByUrl(t);if(m)return this.logger.info(`WASM module loaded from cache by URL: ${t}`),e&&e(100),m}this.logger.info(`Loading WASM module from URL: ${t}`);const o=await fetch(t);if(!o.ok)throw new Error(`Failed to load WASM module: ${o.statusText}`);const i=o.headers.get("content-length"),s=i?parseInt(i,10):0;let a=0;const u=(r=o.body)==null?void 0:r.getReader();if(!u)throw new Error("Response body is not readable");const l=[];try{let m=!1;for(;!m;){const w=await u.read();if(m=w.done,!m&&w.value&&(l.push(w.value),a+=w.value.length,e&&s>0)){const g=Math.round(a/s*100);e(g)}}}finally{u.releaseLock()}const d=l.reduce((m,w)=>m+w.length,0),h=new Uint8Array(d);let c=0;for(const m of l)h.set(m,c),c+=m.length;return this.cacheEnabled&&await this.saveModelToCacheByUrl(t,h),e&&e(100),h}catch(o){throw this.logger.error(o),new Error("Failed to load WASM module")}}async getCachedModelByUrl(t){try{const o=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readonly").objectStore("modelsByUrl");return new Promise((i,s)=>{const a=o.get(t);a.onsuccess=()=>{const u=a.result;u&&u.data?i(u.data):i(null)},a.onerror=()=>s(a.error)})}catch(e){return this.logger.error("Error reading model from cache by URL:",e),null}}async saveModelToCacheByUrl(t,e){try{const i=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readwrite").objectStore("modelsByUrl");await new Promise((s,a)=>{const u=i.put({url:t,data:e,timestamp:Date.now(),size:e.length});u.onsuccess=()=>s(),u.onerror=()=>a(u.error)}),this.logger.info(`Model saved to cache by URL: ${t}`)}catch(r){this.logger.error("Error saving model to cache by URL:",r)}}async getAvailableModels(){const t=[...this.models];if(!this.cacheEnabled)return t;try{const e=await this.getCachedModelNames();return t.map(r=>({...r,cached:e.includes(r.id)}))}catch(e){return this.logger.error("Error checking cache status:",e),t}}getAvailableModelsSync(){return[...this.models]}getModelConfig(t){return z(t)}async saveModelToCache(t,e){try{const i=(await this.openIndexedDB()).transaction(["models"],"readwrite").objectStore("models");await new Promise((s,a)=>{const u=i.put({name:t,data:e,timestamp:Date.now(),size:e.length});u.onsuccess=()=>s(),u.onerror=()=>a(u.error)}),this.logger.info(`Model ${t} saved to cache`)}catch(r){this.logger.error("Error saving model to cache:",r)}}async getCachedModel(t){try{const o=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((i,s)=>{const a=o.get(t);a.onsuccess=()=>{const u=a.result;u&&u.data?i(u.data):i(null)},a.onerror=()=>s(a.error)})}catch(e){return this.logger.error("Error getting cached model:",e),null}}async getCachedModelNames(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((o,i)=>{const s=r.getAllKeys();s.onsuccess=()=>{const a=s.result;o(a)},s.onerror=()=>i(s.error)})}catch(t){return this.logger.error("Error getting cached model names:",t),[]}}async openIndexedDB(){return new Promise((t,e)=>{const r=indexedDB.open("WhisperModels",2);r.onerror=()=>e(r.error),r.onsuccess=()=>t(r.result),r.onupgradeneeded=o=>{const i=o.target.result;if(!i.objectStoreNames.contains("models")){const s=i.createObjectStore("models",{keyPath:"name"});s.createIndex("timestamp","timestamp",{unique:!1}),s.createIndex("size","size",{unique:!1})}if(!i.objectStoreNames.contains("modelsByUrl")){const s=i.createObjectStore("modelsByUrl",{keyPath:"url"});s.createIndex("timestamp","timestamp",{unique:!1}),s.createIndex("size","size",{unique:!1})}}})}async clearCache(){try{const e=(await this.openIndexedDB()).transaction(["models","modelsByUrl"],"readwrite"),r=e.objectStore("models");await new Promise((i,s)=>{const a=r.clear();a.onsuccess=()=>i(),a.onerror=()=>s(a.error)});const o=e.objectStore("modelsByUrl");await new Promise((i,s)=>{const a=o.clear();a.onsuccess=()=>i(),a.onerror=()=>s(a.error)}),this.logger.info("Model cache cleared")}catch(t){this.logger.error("Error clearing cache:",t)}}async getCacheInfo(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((o,i)=>{const s=r.getAll();s.onsuccess=()=>{const a=s.result,u=a.reduce((l,d)=>l+(d.size||0),0);o({count:a.length,totalSize:u})},s.onerror=()=>i(s.error)})}catch(t){return this.logger.error("Error getting cache info:",t),{count:0,totalSize:0}}}}var p=(n=>(n.MP3="mp3",n.WAV="wav",n.OGG="ogg",n.M4A="m4a",n.AAC="aac",n.FLAC="flac",n.MP4="mp4",n.WEBM="webm",n.AVI="avi",n.MOV="mov",n.MKV="mkv",n.RAW_PCM="raw_pcm",n.MICROPHONE="microphone",n.AUDIO_ELEMENT="audio_element",n))(p||{});const S={targetSampleRate:16e3,targetChannels:1,inputSampleRate:16e3,normalize:!0,noiseReduction:!1,logLevel:y.levels.ERROR,signal:void 0,recordingDurationMs:1e4};function Q(n){return typeof n=="number"?n:n?y.levels[n]:y.levels.ERROR}function R(n){return new y(Q(n.logLevel),"AudioConverter")}function E(n){if(n!=null&&n.aborted)throw new DOMException("Aborted","AbortError")}function A(){return typeof window>"u"?!1:!!(window.AudioContext||window.webkitAudioContext||window.OfflineAudioContext||window.webkitOfflineAudioContext)}function V(){return[p.MP3,p.WAV,p.OGG,p.M4A,p.AAC,p.FLAC,p.MP4,p.WEBM,p.AVI,p.MOV,p.MKV,p.RAW_PCM,p.MICROPHONE,p.AUDIO_ELEMENT]}async function G(n,t={},e={}){var s,a,u,l,d;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info(`Converting file: ${n.name}`),(s=e.onProgress)==null||s.call(e,0,`Loading file: ${n.name}`);const h=await Y(n);(a=e.onProgress)==null||a.call(e,20,"File loaded, decoding..."),E(r.signal);const c=await C(h,r,e,o);(u=e.onProgress)==null||u.call(e,40,"Audio decoded, processing...");const m=await P(c,r,e,o,i);return(l=e.onProgress)==null||l.call(e,100,"Conversion completed"),o.info("File conversion completed successfully"),m}catch(h){throw o.error("File conversion failed:",h),(d=e.onError)==null||d.call(e,h),h}}async function T(n,t={},e={}){var s,a,u,l,d;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from MediaStream"),(s=e.onProgress)==null||s.call(e,0,"Starting recording...");const h=await re(n,r,e,o);(a=e.onProgress)==null||a.call(e,50,"Recording completed, decoding...");const c=await h.arrayBuffer(),m=await C(c,r,e,o);(u=e.onProgress)==null||u.call(e,70,"Audio decoded, processing...");const w=await P(m,r,e,o,i);return(l=e.onProgress)==null||l.call(e,100,"Conversion completed"),w}catch(h){throw o.error("MediaStream conversion failed:",h),(d=e.onError)==null||d.call(e,h),h}}async function K(n,t={},e={}){var s,a,u,l,d,h;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from HTMLAudioElement"),(s=e.onProgress)==null||s.call(e,0,"Capturing audio from element...");const c=n.srcObject;if(c&&c instanceof MediaStream){i.push("Using HTMLAudioElement.srcObject MediaStream");const f=await T(c,r,e);return{...f,warnings:[...i,...f.warnings??[]]}}const m=n.currentSrc||n.src;if(m)try{(a=e.onProgress)==null||a.call(e,10,"Fetching audio source...");const f=await ne(m,r.signal);(u=e.onProgress)==null||u.call(e,30,"Fetched, decoding...");const v=await C(f,r,e,o);(l=e.onProgress)==null||l.call(e,60,"Decoded, processing...");const B=await P(v,r,e,o,i);return(d=e.onProgress)==null||d.call(e,100,"Conversion completed"),B}catch(f){if((f==null?void 0:f.name)==="AbortError")throw f;i.push(`Failed to fetch element src (CORS?) – falling back to captureStream: ${f.message}`)}const w=n.captureStream||n.mozCaptureStream;if(typeof w!="function")throw new Error("Unable to capture audio from HTMLAudioElement: no srcObject, fetch failed, and captureStream() is not supported");i.push("Using HTMLAudioElement.captureStream() fallback");const g=w.call(n);try{const f=await T(g,r,e);return{...f,warnings:[...i,...f.warnings??[]]}}finally{g.getTracks().forEach(f=>f.stop())}}catch(c){throw o.error("HTMLAudioElement conversion failed:",c),(h=e.onError)==null||h.call(e,c),c}}async function J(n,t={},e={}){var i,s,a,u;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r);try{E(r.signal),o.info("Converting from Float32Array"),(i=e.onProgress)==null||i.call(e,0,"Processing Float32Array...");const l=window.AudioContext||window.webkitAudioContext,d=r.inputSampleRate??r.targetSampleRate,h=new l({sampleRate:d});try{const c=h.createBuffer(1,n.length,h.sampleRate);c.getChannelData(0).set(n),(s=e.onProgress)==null||s.call(e,30,"AudioBuffer created, processing...");const w=[];d!==r.targetSampleRate&&w.push(`Float32Array sample rate (${d}Hz) will be converted to ${r.targetSampleRate}Hz`);const g=await P(c,r,e,o,w);return(a=e.onProgress)==null||a.call(e,100,"Conversion completed"),o.info("Float32Array conversion completed successfully"),g}finally{try{await h.close()}catch{}}}catch(l){throw o.error("Float32Array conversion failed:",l),(u=e.onError)==null||u.call(e,l),l}}async function X(n,t={},e={}){var s,a,u,l;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from ArrayBuffer"),(s=e.onProgress)==null||s.call(e,0,"Processing ArrayBuffer...");const d=await C(n,r,e,o);(a=e.onProgress)==null||a.call(e,40,"Audio decoded, processing...");const h=await P(d,r,e,o,i);return(u=e.onProgress)==null||u.call(e,100,"Conversion completed"),o.info("ArrayBuffer conversion completed successfully"),h}catch(d){throw o.error("ArrayBuffer conversion failed:",d),(l=e.onError)==null||l.call(e,d),d}}async function Y(n){return new Promise((t,e)=>{const r=new FileReader;r.onload=o=>{var i;return t((i=o.target)==null?void 0:i.result)},r.onerror=o=>e(o),r.readAsArrayBuffer(n)})}async function C(n,t,e,r){var s;(s=e.onProgress)==null||s.call(e,15,"Decoding audio data");const o=window.AudioContext||window.webkitAudioContext,i=new o({sampleRate:t.targetSampleRate});try{return await i.decodeAudioData(n)}catch(a){throw r.error("Audio decoding failed:",a),new Error(`Failed to decode audio: ${a.message}`)}finally{try{await i.close()}catch{}}}async function P(n,t,e,r,o){var a,u,l,d;(a=e.onProgress)==null||a.call(e,50,"Converting audio format..."),te(n,t,o);const i=await Z(n,t,e);(u=e.onProgress)==null||u.call(e,70,"Converting to Float32Array...");const s=b(i);return(l=e.onProgress)==null||l.call(e,80,"Applying effects..."),t.normalize&&k(s),t.noiseReduction&&ee(s),(d=e.onProgress)==null||d.call(e,90,"Finalizing..."),{audioData:s,audioInfo:{sampleRate:i.sampleRate,duration:i.duration,channels:i.numberOfChannels,bitDepth:32,format:"float32"},warnings:o.length>0?o:void 0}}async function Z(n,t,e){var s;(s=e.onProgress)==null||s.call(e,60,"Converting audio format...");const r=window.OfflineAudioContext||window.webkitOfflineAudioContext,o=new r(t.targetChannels,Math.floor(n.length*t.targetSampleRate/n.sampleRate),t.targetSampleRate),i=o.createBufferSource();return i.buffer=n,i.connect(o.destination),i.start(0),await o.startRendering()}function b(n){if(n.numberOfChannels===1)return n.getChannelData(0);{const t=n.getChannelData(0),e=n.getChannelData(1),r=new Float32Array(t.length);for(let o=0;o<t.length;o++)r[o]=(t[o]+e[o])/2;return r}}function k(n){let t=0;for(let e=0;e<n.length;e++)t=Math.max(t,Math.abs(n[e]));if(t>0){const e=.95/t;for(let r=0;r<n.length;r++)n[r]*=e}}function ee(n){const t=new Float32Array(n.length),e=3;for(let r=0;r<n.length;r++){let o=0,i=0;for(let s=Math.max(0,r-e);s<=Math.min(n.length-1,r+e);s++)o+=n[s],i++;t[r]=o/i}n.set(t)}function te(n,t,e){n.numberOfChannels>2&&e.push(`Audio has ${n.numberOfChannels} channels, will be mixed to mono`),n.sampleRate!==t.targetSampleRate&&e.push(`Audio sample rate (${n.sampleRate}Hz) will be converted to ${t.targetSampleRate}Hz`)}async function re(n,t,e,r){var w;if(typeof window>"u")throw new Error("MediaStream recording is only supported in browser environments");if(!window.MediaRecorder)throw new Error("MediaRecorder is not supported in this browser");const o=window.MediaRecorder,s=["audio/webm;codecs=opus","audio/webm","audio/ogg;codecs=opus","audio/ogg"].find(g=>o.isTypeSupported(g)),a=new o(n,s?{mimeType:s}:void 0),u=[],l=new Promise((g,f)=>{a.ondataavailable=v=>{v.data&&v.data.size>0&&u.push(v.data)},a.onerror=()=>f(new Error("MediaRecorder error")),a.onstop=()=>{const v=s||a.mimeType||"application/octet-stream";g(new Blob(u,{type:v}))}}),d=t.signal,h=()=>{try{a.state!=="inactive"&&a.stop()}catch{}};d==null||d.addEventListener("abort",h,{once:!0});const c=t.recordingDurationMs??1e4,m=setTimeout(()=>{try{a.state!=="inactive"&&a.stop()}catch{}},c);(w=e.onProgress)==null||w.call(e,20,"Recording audio..."),r.debug("Starting MediaRecorder",{mimeType:s??a.mimeType,durationMs:c}),a.start(250);try{return await l}finally{clearTimeout(m),d==null||d.removeEventListener("abort",h)}}async function ne(n,t){E(t);const e=await fetch(n,{signal:t});if(!e.ok)throw new Error(`Failed to fetch (${e.status}): ${e.statusText}`);return await e.arrayBuffer()}exports.AudioFormat=p;exports.ModelManager=H;exports.WhisperWasmService=j;exports.convertFromArrayBuffer=X;exports.convertFromAudioElement=K;exports.convertFromFile=G;exports.convertFromFloat32Array=J;exports.convertFromMediaStream=T;exports.getAllModels=_;exports.getSupportedFormats=V;exports.isWebAudioSupported=A;
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,8 @@ export { WhisperWasmService } from './whisper/WhisperWasmService';
|
|
|
2
2
|
export type { WhisperWasmModule } from './whisper/types';
|
|
3
3
|
export { ModelManager } from './whisper/ModelManager';
|
|
4
4
|
export { getAllModels } from './whisper/ModelConfig';
|
|
5
|
+
export type { ModelID, WhisperModel } from './whisper/ModelConfig';
|
|
6
|
+
export { convertFromFile, convertFromMediaStream, convertFromAudioElement, convertFromFloat32Array, convertFromArrayBuffer, isWebAudioSupported, getSupportedFormats, } from './audio/AudioConverter';
|
|
7
|
+
export { AudioFormat } from './audio/types';
|
|
8
|
+
export type { AudioInfo, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks, AudioSource, AudioContextConfig, } from './audio/types';
|
|
5
9
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,YAAY,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,YAAY,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGnE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,uBAAuB,EACvB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,YAAY,EACV,SAAS,EACT,qBAAqB,EACrB,qBAAqB,EACrB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,GACnB,MAAM,eAAe,CAAC"}
|