@timur00kh/whisper.wasm 0.0.6 → 0.0.7-canary.2d7c1b3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 whisper.wasm
3
+ Copyright (c) 2025-2026 Timur Kh (timur00kh)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy of
6
6
  this software and associated documentation files (the "Software"), to deal in
package/README.md CHANGED
@@ -1,6 +1,17 @@
1
1
  # Whisper.wasm
2
2
 
3
- A TypeScript wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) that brings OpenAI's Whisper speech recognition to the browser and Node.js using WebAssembly.
3
+ [![CI](https://github.com/timur00kh/whisper.wasm/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/timur00kh/whisper.wasm/actions/workflows/ci.yml)
4
+ [![npm](https://img.shields.io/npm/v/%40timur00kh%2Fwhisper.wasm?color=blue)](https://www.npmjs.com/package/@timur00kh/whisper.wasm)
5
+ [![npm downloads](https://img.shields.io/npm/dm/%40timur00kh%2Fwhisper.wasm)](https://www.npmjs.com/package/@timur00kh/whisper.wasm)
6
+ [![license](https://img.shields.io/npm/l/%40timur00kh%2Fwhisper.wasm)](LICENSE)
7
+ [![GitHub stars](https://img.shields.io/github/stars/timur00kh/whisper.wasm?style=social)](https://github.com/timur00kh/whisper.wasm)
8
+ [![issues](https://img.shields.io/github/issues/timur00kh/whisper.wasm)](https://github.com/timur00kh/whisper.wasm/issues)
9
+ [![release date](https://img.shields.io/github/release-date/timur00kh/whisper.wasm)](https://github.com/timur00kh/whisper.wasm/releases)
10
+ [![GitHub Pages](https://img.shields.io/badge/demo-GitHub%20Pages-blue)](https://timur00kh.github.io/whisper.wasm/)
11
+
12
+ A TypeScript wrapper for [whisper.cpp](https://github.com/ggml-org/whisper.cpp) that brings OpenAI's Whisper speech recognition to the browser using WebAssembly.
13
+
14
+ > Note: Node.js support is experimental / untested at the moment. (The core WASM layer may work, but browser-specific helpers like the AudioConverter require Web APIs.)
4
15
 
5
16
  ## Features
6
17
 
@@ -8,15 +19,15 @@ A TypeScript wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
8
19
  - ⚡ **WebAssembly performance** - runs directly in the browser
9
20
  - 🌍 **Multi-language support** with automatic language detection
10
21
  - 🔄 **Translation capabilities** - translate speech to English
11
- - 📱 **Cross-platform** - works in browsers and Node.js
22
+ - 📱 **Cross-platform** - browser-first; Node.js is experimental / untested
12
23
  - 🧠 **Multiple model sizes** - from tiny to large models
13
24
  - 🎯 **Streaming transcription** - real-time audio processing
14
- - 📦 **Zero dependencies** - no external libraries required
25
+ - 🎵 **Audio conversion helpers (browser-only)** - convert files / mic / `<audio>` to 16kHz `Float32Array` for Whisper
15
26
 
16
27
  ## Installation
17
28
 
18
29
  ```bash
19
- npm install @timur00kh/whisper.wasm
30
+ npm install @timur00kh/whisper.wasm@canary
20
31
  ```
21
32
 
22
33
  ## Quick Start
@@ -24,7 +35,7 @@ npm install @timur00kh/whisper.wasm
24
35
  ### Basic Usage
25
36
 
26
37
  ```typescript
27
- import { WhisperWasmService, ModelManager } from 'whisper.wasm';
38
+ import { WhisperWasmService, ModelManager, convertFromFile } from '@timur00kh/whisper.wasm';
28
39
 
29
40
  // Initialize the service
30
41
  const whisper = new WhisperWasmService({ logLevel: 1 });
@@ -38,11 +49,15 @@ if (!isSupported) {
38
49
 
39
50
  // Load a model
40
51
  const modelData = await modelManager.loadModel('base'); // or 'tiny', 'small', 'medium', 'large'
41
- await whisper.loadWasmModule(modelData);
52
+ await whisper.initModel(modelData);
42
53
 
43
54
  // Create a transcription session for streaming
44
55
  const session = whisper.createSession();
45
56
 
57
+ // Convert an audio/video file to 16kHz Float32Array (browser-only helper)
58
+ // (e.g. a File from <input type="file" />)
59
+ const { audioData } = await convertFromFile(file, { normalize: true });
60
+
46
61
  // Process audio in chunks
47
62
  const stream = session.streamimg(audioData, {
48
63
  language: 'en',
@@ -59,7 +74,7 @@ for await (const segment of stream) {
59
74
  ### Model Management
60
75
 
61
76
  ```typescript
62
- import { ModelManager, getAllModels } from 'whisper.wasm';
77
+ import { ModelManager, getAllModels } from '@timur00kh/whisper.wasm';
63
78
 
64
79
  const modelManager = new ModelManager();
65
80
 
@@ -100,7 +115,7 @@ new WhisperWasmService(options?: {
100
115
 
101
116
  Checks if WebAssembly is supported in the current environment.
102
117
 
103
- ##### `loadWasmModule(model: Uint8Array): Promise<void>`
118
+ ##### `initModel(model: Uint8Array): Promise<void>`
104
119
 
105
120
  Loads a Whisper model from binary data.
106
121
 
@@ -182,6 +197,43 @@ Processes audio data in streaming fashion.
182
197
  - **Safari**: 11+
183
198
  - **Edge**: 16+
184
199
 
200
+ ## FAQ
201
+
202
+ ### Q: Why is my transcription stopping unexpectedly?
203
+
204
+ A: This is usually related to WebAssembly execution being terminated by the browser due to resource management policies, low battery, or background tab throttling. Use the `restartModelOnError: true` option to automatically restart the model when this happens.
205
+
206
+ ### Q: Can I use this in a background tab?
207
+
208
+ A: Some browsers may throttle or pause WebAssembly execution in background tabs. Consider using the `restartModelOnError` option and implementing visibility change listeners to handle this.
209
+
210
+ ### Q: Why is the first transcription slower?
211
+
212
+ A: The first transcription includes model initialization time. Subsequent transcriptions with the same model will be faster.
213
+
214
+ ### Q: Can I transcribe audio in real-time?
215
+
216
+ A: Yes! Use the `TranscriptionSession` with streaming audio data. For real-time applications, consider using the `tiny` or `base` models for better performance.
217
+
218
+ ### Q: What audio formats are supported?
219
+
220
+ A: Whisper expects `Float32Array` audio data at 16kHz. You can either prepare it yourself, or use the built-in **AudioConverter** helpers (browser-only):
221
+
222
+ - `convertFromFile(file)` - audio/video files supported by the browser decoder
223
+ - `convertFromArrayBuffer(buffer)` - decode & convert from an ArrayBuffer
224
+ - `convertFromFloat32Array(data, { inputSampleRate? })` - resample if needed
225
+ - `convertFromMediaStream(stream)` - microphone / capture stream (requires `MediaRecorder`)
226
+ - `convertFromAudioElement(audioEl)` - tries `fetch(audioEl.src)` (CORS), otherwise `captureStream()` fallback (browser support varies)
227
+
228
+ Notes:
229
+
230
+ - AudioConverter uses Web APIs (`Web Audio`, `MediaRecorder`), so it does **not** run in Node.js.
231
+ - `<audio>` conversion may require proper CORS headers to allow `fetch()` of the audio URL.
232
+
233
+ ### Q: How do I handle errors gracefully?
234
+
235
+ A: Use try-catch blocks around transcription calls and implement the `restartModelOnError` option for automatic recovery from WebAssembly execution issues.
236
+
185
237
  ## Demo
186
238
 
187
239
  Try the interactive demo:
@@ -199,11 +251,22 @@ npm run dev:demo
199
251
  The demo includes:
200
252
 
201
253
  - Audio file upload and processing
254
+ - Transcription from `<audio>` element
255
+ - Microphone recording (Start/Stop) and transcription
202
256
  - Model selection and loading
203
257
  - Real-time transcription with progress
204
258
  - Language detection and translation
205
259
  - Streaming audio support
206
260
 
261
+ ## Changelog
262
+
263
+ For detailed information about changes, new features, and bug fixes, see our [changelog documentation](docs/changelog/).
264
+
265
+ ### Recent Updates
266
+
267
+ - **[feature-restart-on-timeout](docs/changelog/feature-restart-on-timeout.md)** - Added timeout handling, error recovery, and enhanced demo application
268
+ - **[feature-audio-converter](docs/changelog/feature-audio-converter.md)** - Added AudioConverter helpers + demo integration (files, mic, `<audio>`)
269
+
207
270
  ## Development
208
271
 
209
272
  ### Prerequisites
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=setup.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"setup.d.ts","sourceRoot":"","sources":["../../src/__tests__/setup.ts"],"names":[],"mappings":""}
@@ -0,0 +1,31 @@
1
+ import { AudioFormat, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks } from './types';
2
+
3
+ /**
4
+ * Проверка поддержки Web Audio API
5
+ */
6
+ export declare function isWebAudioSupported(): boolean;
7
+ /**
8
+ * Получение списка поддерживаемых форматов
9
+ */
10
+ export declare function getSupportedFormats(): AudioFormat[];
11
+ /**
12
+ * Конвертация аудио из файла (поддерживает как аудио, так и видео файлы)
13
+ */
14
+ export declare function convertFromFile(file: File, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
15
+ /**
16
+ * Конвертация аудио из MediaStream (микрофон)
17
+ */
18
+ export declare function convertFromMediaStream(stream: MediaStream, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
19
+ /**
20
+ * Конвертация аудио из HTMLAudioElement
21
+ */
22
+ export declare function convertFromAudioElement(element: HTMLAudioElement, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
23
+ /**
24
+ * Конвертация аудио из Float32Array
25
+ */
26
+ export declare function convertFromFloat32Array(data: Float32Array, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
27
+ /**
28
+ * Конвертация аудио из ArrayBuffer
29
+ */
30
+ export declare function convertFromArrayBuffer(buffer: ArrayBuffer, options?: AudioConverterOptions, callbacks?: AudioConverterCallbacks): Promise<AudioConversionResult>;
31
+ //# sourceMappingURL=AudioConverter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AudioConverter.d.ts","sourceRoot":"","sources":["../../src/audio/AudioConverter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAGH,OAAO,EACL,WAAW,EACX,KAAK,qBAAqB,EAC1B,KAAK,qBAAqB,EAC1B,KAAK,uBAAuB,EAC7B,MAAM,SAAS,CAAC;AAuCjB;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAW7C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,WAAW,EAAE,CAiBnD;AAED;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,IAAI,EACV,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAmChC;AAED;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,WAAW,EACnB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAgChC;AAED;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,gBAAgB,EACzB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAwEhC;AAED;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,YAAY,EAClB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CAmDhC;AAED;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,WAAW,EACnB,OAAO,GAAE,qBAA0B,EACnC,SAAS,GAAE,uBAA4B,GACtC,OAAO,CAAC,qBAAqB,CAAC,CA8BhC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Audio converter module for whisper.wasm
3
+ *
4
+ * This module provides utilities for converting various audio formats
5
+ * and sources into the Float32Array format required by whisper.wasm.
6
+ */
7
+ export { convertFromFile, convertFromMediaStream, convertFromAudioElement, convertFromFloat32Array, convertFromArrayBuffer, isWebAudioSupported, getSupportedFormats, } from './AudioConverter';
8
+ export { AudioFormat } from './types';
9
+ export type { AudioInfo, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks, AudioSource, AudioContextConfig, } from './types';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/audio/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,uBAAuB,EACvB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,YAAY,EACV,SAAS,EACT,qBAAqB,EACrB,qBAAqB,EACrB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,GACnB,MAAM,SAAS,CAAC"}
@@ -0,0 +1,93 @@
1
+ import { LoggerLevelsType } from '../utils/Logger';
2
+
3
+ export interface AudioInfo {
4
+ /** Sample rate in Hz */
5
+ sampleRate: number;
6
+ /** Duration in seconds */
7
+ duration: number;
8
+ /** Number of audio channels */
9
+ channels: number;
10
+ /** Audio bit depth */
11
+ bitDepth?: number;
12
+ /** Audio format/container */
13
+ format?: string;
14
+ }
15
+ export interface AudioConverterOptions {
16
+ /** Target sample rate (default: 16000 for Whisper) */
17
+ targetSampleRate?: number;
18
+ /** Target number of channels (default: 1 for mono) */
19
+ targetChannels?: number;
20
+ /**
21
+ * Sample rate for Float32Array inputs.
22
+ * If omitted, Float32Array is assumed to already be at targetSampleRate.
23
+ */
24
+ inputSampleRate?: number;
25
+ /** Whether to normalize audio levels */
26
+ normalize?: boolean;
27
+ /** Whether to apply noise reduction (basic) */
28
+ noiseReduction?: boolean;
29
+ /**
30
+ * Log level for debugging.
31
+ * Prefer numeric LoggerLevelsType for consistency with the library.
32
+ * String values are also accepted for convenience.
33
+ */
34
+ logLevel?: LoggerLevelsType | 'ERROR' | 'WARN' | 'INFO' | 'DEBUG';
35
+ /**
36
+ * Optional AbortSignal to cancel long operations (recording, fetch, etc).
37
+ * Implementations should treat abort as a cancellation and reject.
38
+ */
39
+ signal?: AbortSignal;
40
+ /**
41
+ * For MediaStream / captureStream based conversions: how long to record
42
+ * before auto-stopping. If omitted, defaults are applied.
43
+ */
44
+ recordingDurationMs?: number;
45
+ }
46
+ export interface AudioConversionResult {
47
+ /** Converted audio data as Float32Array */
48
+ audioData: Float32Array;
49
+ /** Audio metadata */
50
+ audioInfo: AudioInfo;
51
+ /** Conversion warnings/notes */
52
+ warnings?: string[];
53
+ }
54
+ export type ProgressCallback = (progress: number, message: string) => void;
55
+ export type ErrorCallback = (error: Error) => void;
56
+ export interface AudioConverterCallbacks {
57
+ onProgress?: ProgressCallback;
58
+ onError?: ErrorCallback;
59
+ }
60
+ /**
61
+ * Supported audio input formats
62
+ */
63
+ export declare enum AudioFormat {
64
+ MP3 = "mp3",
65
+ WAV = "wav",
66
+ OGG = "ogg",
67
+ M4A = "m4a",
68
+ AAC = "aac",
69
+ FLAC = "flac",
70
+ MP4 = "mp4",
71
+ WEBM = "webm",
72
+ AVI = "avi",
73
+ MOV = "mov",
74
+ MKV = "mkv",
75
+ RAW_PCM = "raw_pcm",
76
+ MICROPHONE = "microphone",
77
+ AUDIO_ELEMENT = "audio_element"
78
+ }
79
+ /**
80
+ * Audio source types
81
+ */
82
+ export type AudioSource = File | Blob | ArrayBuffer | Float32Array | AudioBuffer | HTMLAudioElement | MediaStream;
83
+ /**
84
+ * Audio conversion context for browser environment
85
+ */
86
+ export interface AudioContextConfig {
87
+ sampleRate?: number;
88
+ channelCount?: number;
89
+ echoCancellation?: boolean;
90
+ autoGainControl?: boolean;
91
+ noiseSuppression?: boolean;
92
+ }
93
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/audio/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAExD,MAAM,WAAW,SAAS;IACxB,wBAAwB;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,sDAAsD;IACtD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,sDAAsD;IACtD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,wCAAwC;IACxC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+CAA+C;IAC/C,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAElE;;;OAGG;IACH,MAAM,CAAC,EAAE,WAAW,CAAC;IAErB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,qBAAqB;IACpC,2CAA2C;IAC3C,SAAS,EAAE,YAAY,CAAC;IACxB,qBAAqB;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;AAC3E,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;AAEnD,MAAM,WAAW,uBAAuB;IACtC,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED;;GAEG;AACH,oBAAY,WAAW;IAErB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IAGb,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,GAAG,QAAQ;IAGX,OAAO,YAAY;IAGnB,UAAU,eAAe;IACzB,aAAa,kBAAkB;CAChC;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GACnB,IAAI,GACJ,IAAI,GACJ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,gBAAgB,GAChB,WAAW,CAAC;AAEhB;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B"}
package/dist/index.cjs.js CHANGED
@@ -1 +1 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const p=class p{constructor(e=p.levels.INFO,t=""){this.level=e,this.prefix=t}debug(...e){this.level<=p.levels.DEBUG&&console.debug(`[${this.prefix}] [DEBUG]`,...e)}info(...e){this.level<=p.levels.INFO&&console.info(`[${this.prefix}] [INFO]`,...e)}warn(...e){this.level<=p.levels.WARN&&console.warn(`[${this.prefix}] [WARN]`,...e)}error(...e){this.level<=p.levels.ERROR&&console.error(`[${this.prefix}] [ERROR]`,...e)}setLevel(e){this.level=e}getLevel(){return this.level}};p.levels={DEBUG:0,INFO:1,WARN:2,ERROR:3};let b=p;const B=async()=>WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,10,1,8,0,65,0,253,15,253,98,11])),T={language:"auto",threads:4,translate:!1};function M(l){const e=String(l).trim().replace(",","."),t=e.split(":").map(Number);if(t.some(Number.isNaN))throw new Error(`Bad time: "${l}"`);let r=0,a=0,i=0;if(t.length===3)[r,a]=t,i=parseFloat(e.split(":").pop()||"0");else if(t.length===2)[a]=t,i=parseFloat(e.split(":").pop()||"0");else throw new Error(`Bad time format: "${l}"`);return Math.floor(((r*60+a)*60+i)*1e3)}function R(l){const t=/^\s*\[?\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*-->\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*\]?\s*(.*)\s*$/.exec(l);if(!t)throw new Error("Line does not match VTT-like pattern: "+l);const r=t[1],a=t[2],i=t[3]||"",n=M(r),s=M(a);if(s<n)throw new Error("End time is before start time");return{startMs:n,endMs:s,start:r,end:a,text:i}}function q(l){return new Promise(e=>setTimeout(e,l))}function W(l,e=16e3*100){const t=[];for(let r=0;r<l.length;r+=e)t.push(l.subarray(r,r+e));return t}class L{constructor(e,t){this.whisperService=e,this.logger=new b((t==null?void 0:t.logLevel)||b.levels.ERROR,"TranscriptionSession")}async*streamimg(e,t={}){const r=W(e);let a=0;for await(const i of r){const n=[];let s=null,o=!1,h,d=0;for(this.whisperService.transcribe(i,c=>{d=c.timeEnd,c.timeStart+=a,c.timeEnd+=a,s?(s(c),s=null):n.push(c)},t).then(()=>{o=!0,a+=d,s==null||s(void 0)}).catch(c=>{h=c});;){if(h)throw h;if(o)break;if(n.length)yield n.shift();else{const c=await new Promise(f=>s=f);c&&(yield c)}}t.sleepMsBetweenChunks&&await q(t.sleepMsBetweenChunks)}}}class x extends EventTarget{on(e,t){return this.addEventListener(e,t),()=>this.removeEventListener(e,t)}emit(e,t){this.dispatchEvent(new CustomEvent(e,{detail:t}))}}class A{constructor(e){this.wasmModule=null,this.instance=null,this.modelFileName="whisper.bin",this.isTranscribing=!1,this.bus=new x,this.logger=new b((e==null?void 0:e.logLevel)??b.levels.ERROR,"WhisperWasmService"),e!=null&&e.init&&this.loadWasmScript()}async checkWasmSupport(){return await B()}async loadWasmScript(){this.wasmModule=await(await Promise.resolve().then(()=>require("./libmain-D50HCaHR.js"))).default({print:(e,...t)=>{this.logger.debug(t),e.startsWith("[")?(this.logger.info(e),this.bus.emit("transcribe",e)):(this.logger.debug(e),this.bus.emit("system_info",e))},printErr:(e,...t)=>{this.logger.debug(t),this.logger.warn(e),this.bus.emit("transcribeError",e)}})}async loadWasmModule(e){if(!await this.checkWasmSupport())throw new Error("WASM is not supported");return this.wasmModule&&(this.wasmModule.FS_unlink(this.modelFileName),this.wasmModule.free()),await this.loadWasmScript(),await q(100),this.storeFS(this.modelFileName,e),this.instance=this.wasmModule.init(this.modelFileName),Promise.resolve()}storeFS(e,t){if(!this.wasmModule)throw new Error("WASM module not loaded");try{this.wasmModule.FS_unlink(e)}catch{}this.wasmModule.FS_createDataFile("/",e,t,!0,!0,!0)}async transcribe(e,t,r={}){if(this.isTranscribing)throw new Error("Already transcribing");if(!this.wasmModule)throw new Error("WASM module not loaded");if(!this.instance)throw new Error("WASM instance not loaded");const a=120;e.length>16e3*a&&this.logger.warn("It's not recommended to transcribe audio data that is longer than 120 seconds"),this.isTranscribing=!0;const{language:i,threads:n,translate:s}={...T,...r},o=[],h=Date.now();return this.wasmModule.full_default(this.instance,e,i,n,s),await new Promise((d,c)=>{const f=this.bus.on("transcribe",g=>{const{startMs:w,endMs:y,text:_}=R(g.detail),v={timeStart:w,timeEnd:y,text:_,raw:g.detail};o.push(v),t==null||t(v)}),u=setTimeout(()=>{this.isTranscribing=!1,f(),m(),this.logger.error("Transcribe timeout"),c(new Error("Transcribe timeout"))},a*2*1e3),m=this.bus.on("transcribeError",g=>{this.isTranscribing=!1,f(),m(),clearTimeout(u),this.logger.debug("Transcribe error",g.detail),d({segments:o,transcribeDurationMs:Date.now()-h})})})}createSession(){return new L(this,{logLevel:this.logger.getLevel()})}}const S={"tiny.en":{id:"tiny.en",name:"Tiny English",size:75,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin"},tiny:{id:"tiny",name:"Tiny Multilingual",size:75,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"},"base.en":{id:"base.en",name:"Base English",size:142,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"},base:{id:"base",name:"Base Multilingual",size:142,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"},"small.en":{id:"small.en",name:"Small English",size:466,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin"},small:{id:"small",name:"Small Multilingual",size:466,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin"},"tiny.en-q5_1":{id:"tiny.en-q5_1",name:"Tiny English (Q5_1)",size:31,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"},"tiny-q5_1":{id:"tiny-q5_1",name:"Tiny Multilingual (Q5_1)",size:31,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin"},"base.en-q5_1":{id:"base.en-q5_1",name:"Base English (Q5_1)",size:57,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin"},"base-q5_1":{id:"base-q5_1",name:"Base Multilingual (Q5_1)",size:57,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin"},"small.en-q5_1":{id:"small.en-q5_1",name:"Small English (Q5_1)",size:182,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin"},"small-q5_1":{id:"small-q5_1",name:"Small Multilingual (Q5_1)",size:182,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin"},"medium.en-q5_0":{id:"medium.en-q5_0",name:"Medium English (Q5_0)",size:515,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin"},"medium-q5_0":{id:"medium-q5_0",name:"Medium Multilingual (Q5_0)",size:515,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin"},"large-q5_0":{id:"large-q5_0",name:"Large Multilingual (Q5_0)",size:1030,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-q5_0.bin"}};function z(){return Object.values(S).map(({url:l,...e})=>e)}function E(l){return S[l]}class F{constructor(e={logLevel:b.levels.ERROR}){this.cacheEnabled=!0,this.models=z(),this.logger=new b(e.logLevel,"ModelManager")}async loadModel(e,t=!0,r){var m;const a=E(e);if(!a)throw new Error(`Model ${e} not found in config`);if(this.cacheEnabled&&t){const g=await this.getCachedModel(e);if(g)return this.logger.info(`Model ${e} loaded from cache`),r&&r(100),g}this.logger.info(`Loading model ${e} from ${a.url}`);const i=await fetch(a.url);if(!i.ok)throw new Error(`Failed to load model: ${i.statusText}`);const n=i.headers.get("content-length"),s=n?parseInt(n,10):0;let o=0;const h=(m=i.body)==null?void 0:m.getReader();if(!h)throw new Error("Response body is not readable");const d=[];try{let g=!1;for(;!g;){const w=await h.read();if(g=w.done,!g&&w.value&&(d.push(w.value),o+=w.value.length,r&&s>0)){const y=Math.round(o/s*100);r(y)}}}finally{h.releaseLock()}const c=d.reduce((g,w)=>g+w.length,0),f=new Uint8Array(c);let u=0;for(const g of d)f.set(g,u),u+=g.length;return this.cacheEnabled&&t&&await this.saveModelToCache(e,f),r&&r(100),f}async loadModelByUrl(e,t){var r;try{if(this.cacheEnabled){const u=await this.getCachedModelByUrl(e);if(u)return this.logger.info(`WASM module loaded from cache by URL: ${e}`),t&&t(100),u}this.logger.info(`Loading WASM module from URL: ${e}`);const a=await fetch(e);if(!a.ok)throw new Error(`Failed to load WASM module: ${a.statusText}`);const i=a.headers.get("content-length"),n=i?parseInt(i,10):0;let s=0;const o=(r=a.body)==null?void 0:r.getReader();if(!o)throw new Error("Response body is not readable");const h=[];try{let u=!1;for(;!u;){const m=await o.read();if(u=m.done,!u&&m.value&&(h.push(m.value),s+=m.value.length,t&&n>0)){const g=Math.round(s/n*100);t(g)}}}finally{o.releaseLock()}const d=h.reduce((u,m)=>u+m.length,0),c=new Uint8Array(d);let f=0;for(const u of h)c.set(u,f),f+=u.length;return this.cacheEnabled&&await this.saveModelToCacheByUrl(e,c),t&&t(100),c}catch(a){throw this.logger.error(a),new Error("Failed to load WASM module")}}async getCachedModelByUrl(e){try{const a=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readonly").objectStore("modelsByUrl");return new Promise((i,n)=>{const s=a.get(e);s.onsuccess=()=>{const o=s.result;o&&o.data?i(o.data):i(null)},s.onerror=()=>n(s.error)})}catch(t){return this.logger.error("Error reading model from cache by URL:",t),null}}async saveModelToCacheByUrl(e,t){try{const i=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readwrite").objectStore("modelsByUrl");await new Promise((n,s)=>{const o=i.put({url:e,data:t,timestamp:Date.now(),size:t.length});o.onsuccess=()=>n(),o.onerror=()=>s(o.error)}),this.logger.info(`Model saved to cache by URL: ${e}`)}catch(r){this.logger.error("Error saving model to cache by URL:",r)}}async getAvailableModels(){const e=[...this.models];if(!this.cacheEnabled)return e;try{const t=await this.getCachedModelNames();return e.map(r=>({...r,cached:t.includes(r.id)}))}catch(t){return this.logger.error("Error checking cache status:",t),e}}getAvailableModelsSync(){return[...this.models]}getModelConfig(e){return E(e)}async saveModelToCache(e,t){try{const i=(await this.openIndexedDB()).transaction(["models"],"readwrite").objectStore("models");await new Promise((n,s)=>{const o=i.put({name:e,data:t,timestamp:Date.now(),size:t.length});o.onsuccess=()=>n(),o.onerror=()=>s(o.error)}),this.logger.info(`Model ${e} saved to cache`)}catch(r){this.logger.error("Error saving model to cache:",r)}}async getCachedModel(e){try{const a=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((i,n)=>{const s=a.get(e);s.onsuccess=()=>{const o=s.result;o&&o.data?i(o.data):i(null)},s.onerror=()=>n(s.error)})}catch(t){return this.logger.error("Error getting cached model:",t),null}}async getCachedModelNames(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((a,i)=>{const n=r.getAllKeys();n.onsuccess=()=>{const s=n.result;a(s)},n.onerror=()=>i(n.error)})}catch(e){return this.logger.error("Error getting cached model names:",e),[]}}async openIndexedDB(){return new Promise((e,t)=>{const r=indexedDB.open("WhisperModels",2);r.onerror=()=>t(r.error),r.onsuccess=()=>e(r.result),r.onupgradeneeded=a=>{const i=a.target.result;if(!i.objectStoreNames.contains("models")){const n=i.createObjectStore("models",{keyPath:"name"});n.createIndex("timestamp","timestamp",{unique:!1}),n.createIndex("size","size",{unique:!1})}if(!i.objectStoreNames.contains("modelsByUrl")){const n=i.createObjectStore("modelsByUrl",{keyPath:"url"});n.createIndex("timestamp","timestamp",{unique:!1}),n.createIndex("size","size",{unique:!1})}}})}async clearCache(){try{const t=(await this.openIndexedDB()).transaction(["models","modelsByUrl"],"readwrite"),r=t.objectStore("models");await new Promise((i,n)=>{const s=r.clear();s.onsuccess=()=>i(),s.onerror=()=>n(s.error)});const a=t.objectStore("modelsByUrl");await new Promise((i,n)=>{const s=a.clear();s.onsuccess=()=>i(),s.onerror=()=>n(s.error)}),this.logger.info("Model cache cleared")}catch(e){this.logger.error("Error clearing cache:",e)}}async getCacheInfo(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((a,i)=>{const n=r.getAll();n.onsuccess=()=>{const s=n.result,o=s.reduce((h,d)=>h+(d.size||0),0);a({count:s.length,totalSize:o})},n.onerror=()=>i(n.error)})}catch(e){return this.logger.error("Error getting cache info:",e),{count:0,totalSize:0}}}}exports.ModelManager=F;exports.WhisperWasmService=A;exports.getAllModels=z;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const M=class M{constructor(t=M.levels.INFO,e=""){this.level=t,this.prefix=e}debug(...t){this.level<=M.levels.DEBUG&&console.debug(`[${this.prefix}] [DEBUG]`,...t)}info(...t){this.level<=M.levels.INFO&&console.info(`[${this.prefix}] [INFO]`,...t)}warn(...t){this.level<=M.levels.WARN&&console.warn(`[${this.prefix}] [WARN]`,...t)}error(...t){this.level<=M.levels.ERROR&&console.error(`[${this.prefix}] [ERROR]`,...t)}setLevel(t){this.level=t}getLevel(){return this.level}};M.levels={DEBUG:0,INFO:1,WARN:2,ERROR:3};let y=M;const L=async()=>WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,10,1,8,0,65,0,253,15,253,98,11])),W={language:"auto",threads:4,translate:!1};function F(n){const t=String(n).trim().replace(",","."),e=t.split(":").map(Number);if(e.some(Number.isNaN))throw new Error(`Bad time: "${n}"`);let r=0,o=0,i=0;if(e.length===3)[r,o]=e,i=parseFloat(t.split(":").pop()||"0");else if(e.length===2)[o]=e,i=parseFloat(t.split(":").pop()||"0");else throw new Error(`Bad time format: "${n}"`);return Math.floor(((r*60+o)*60+i)*1e3)}function D(n){const e=/^\s*\[?\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*-->\s*([0-9]{1,2}:[0-9]{2}:(?:[0-9]{2}[.,][0-9]{1,3})|[0-9]{1,2}:[0-9]{2}[.,][0-9]{1,3})\s*\]?\s*(.*)\s*$/.exec(n);if(!e)throw new Error("Line does not match VTT-like pattern: "+n);const r=e[1],o=e[2],i=e[3]||"",s=F(r),a=F(o);if(a<s)throw new Error("End time is before start time");return{startMs:s,endMs:a,start:r,end:o,text:i}}function x(n){return new Promise(t=>setTimeout(t,n))}function I(n,t){let e=null,r=!1,o=null,i=null;return{timeoutError:()=>new Promise((u,l)=>{i=u,o=l,e=setTimeout(()=>{!r&&o&&(r=!0,o(new Error(t)))},n)}),clear:()=>{e&&(clearTimeout(e),e=null),i&&(i(),i=null),r=!0,o=null}}}function $(n,t=16e3*100){const e=[];for(let r=0;r<n.length;r+=t)e.push(n.subarray(r,r+t));return e}class U{constructor(t,e){this.whisperService=t,this.logger=new y((e==null?void 0:e.logLevel)||y.levels.ERROR,"TranscriptionSession")}async*streamimg(t,e={}){const{timeoutMs:r=3e4}=e,o=$(t);let i=0;for await(const s of o){const a=[];let u=null,l=!1,d,h=0;const{timeoutError:c,clear:m}=I(r,"Transcribe timeout"),w=()=>this.whisperService.transcribe(s,g=>{h=g.timeEnd,g.timeStart+=i,g.timeEnd+=i,this.logger.debug("Transcription segment in session:",g),u?(u(g),u=null):a.push(g),m()},e).then(()=>{this.logger.debug("Transcription done in session then"),l=!0,i+=h,m(),u==null||u(void 0)}).catch(g=>{this.logger.debug("Transcription error in session catch:",g),d=g,m(),u==null||u(void 0)});for(w();;){if(d){if(e.restartModelOnError){this.whisperService.restartModel(),w();continue}throw d}if(l)break;if(a.length)yield a.shift();else try{const g=await Promise.race([new Promise(f=>u=f),c()]);g&&(yield g)}catch(g){d=g}}e.sleepMsBetweenChunks&&await x(e.sleepMsBetweenChunks)}}}class N extends EventTarget{on(t,e){return this.addEventListener(t,e),()=>this.removeEventListener(t,e)}emit(t,e){this.dispatchEvent(new CustomEvent(t,{detail:e}))}}class j{constructor(t){this.wasmModule=null,this.instance=null,this.modelFileName="whisper.bin",this.isTranscribing=!1,this.bus=new N,this.modelData=null,this.logger=new y((t==null?void 0:t.logLevel)??y.levels.ERROR,"WhisperWasmService"),t!=null&&t.init&&this.loadWasmScript()}async checkWasmSupport(){return await L()}async loadWasmScript(){this.wasmModule=await(await Promise.resolve().then(()=>require("./libmain-CWYJvMY5.js"))).default({print:(t,...e)=>{this.logger.debug(e),t.startsWith("[")?(this.logger.info(t),this.bus.emit("transcribe",t)):(this.logger.debug(t),this.bus.emit("system_info",t))},printErr:(t,...e)=>{this.logger.debug(e),this.logger.warn(t),this.bus.emit("transcribeError",t)}})}async initModel(t){if(!await this.checkWasmSupport())throw new Error("WASM is not supported");return this.modelData=t,this.wasmModule&&(this.wasmModule.FS_unlink(this.modelFileName),this.wasmModule.free()),await this.loadWasmScript(),await x(100),this.storeFS(this.modelFileName,t),this.instance=this.wasmModule.init(this.modelFileName),Promise.resolve()}restartModel(){if(!this.modelData)throw new Error("Model not loaded");return this.initModel(this.modelData)}storeFS(t,e){if(!this.wasmModule)throw new Error("WASM module not loaded");try{this.wasmModule.FS_unlink(t)}catch{}this.wasmModule.FS_createDataFile("/",t,e,!0,!0,!0)}async transcribe(t,e,r={}){if(this.isTranscribing)throw new Error("Already transcribing");if(!this.wasmModule)throw new Error("WASM module not loaded");if(!this.instance)throw new Error("WASM instance not loaded");const o=120;t.length>16e3*o&&this.logger.warn("It's not recommended to transcribe audio data that is longer than 120 seconds"),this.isTranscribing=!0;const{language:i="auto",threads:s=4,translate:a=!1}={...W,...r},u=[],l=Date.now();return this.wasmModule.full_default(this.instance,t,i,s,a),await new Promise((d,h)=>{const c=this.bus.on("transcribe",g=>{const{startMs:f,endMs:v,text:B}=D(g.detail),q={timeStart:f,timeEnd:v,text:B,raw:g.detail};u.push(q),e==null||e(q)}),m=setTimeout(()=>{this.isTranscribing=!1,c(),w(),this.logger.error("Transcribe timeout"),h(new Error("Transcribe timeout")),this.bus.emit("transcribeError","Transcribe timeout")},o*2*1e3),w=this.bus.on("transcribeError",g=>{this.isTranscribing=!1,c(),w(),clearTimeout(m),this.logger.debug("Transcribe error",g.detail),d({segments:u,transcribeDurationMs:Date.now()-l})})})}createSession(){return new U(this,{logLevel:this.logger.getLevel()})}}const O={"tiny.en":{id:"tiny.en",name:"Tiny English",size:75,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin"},tiny:{id:"tiny",name:"Tiny Multilingual",size:75,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"},"base.en":{id:"base.en",name:"Base English",size:142,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"},base:{id:"base",name:"Base Multilingual",size:142,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"},"small.en":{id:"small.en",name:"Small English",size:466,language:"en",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin"},small:{id:"small",name:"Small Multilingual",size:466,language:"multilingual",quantized:!1,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin"},"tiny.en-q5_1":{id:"tiny.en-q5_1",name:"Tiny English (Q5_1)",size:31,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"},"tiny-q5_1":{id:"tiny-q5_1",name:"Tiny Multilingual (Q5_1)",size:31,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin"},"base.en-q5_1":{id:"base.en-q5_1",name:"Base English (Q5_1)",size:57,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin"},"base-q5_1":{id:"base-q5_1",name:"Base Multilingual (Q5_1)",size:57,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin"},"small.en-q5_1":{id:"small.en-q5_1",name:"Small English (Q5_1)",size:182,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin"},"small-q5_1":{id:"small-q5_1",name:"Small Multilingual (Q5_1)",size:182,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin"},"medium.en-q5_0":{id:"medium.en-q5_0",name:"Medium English (Q5_0)",size:515,language:"en",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin"},"medium-q5_0":{id:"medium-q5_0",name:"Medium Multilingual (Q5_0)",size:515,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin"},"large-q5_0":{id:"large-q5_0",name:"Large Multilingual (Q5_0)",size:1030,language:"multilingual",quantized:!0,url:"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-q5_0.bin"}};function _(){return Object.values(O).map(({url:n,...t})=>t)}function z(n){return O[n]}class H{constructor(t={logLevel:y.levels.ERROR}){this.cacheEnabled=!0,this.models=_(),this.logger=new y(t.logLevel,"ModelManager")}async loadModel(t,e=!0,r){var w;const o=z(t);if(!o)throw new Error(`Model ${t} not found in config`);if(this.cacheEnabled&&e){const g=await this.getCachedModel(t);if(g)return this.logger.info(`Model ${t} loaded from cache`),r&&r(100),g}this.logger.info(`Loading model ${t} from ${o.url}`);const i=await fetch(o.url);if(!i.ok)throw new Error(`Failed to load model: ${i.statusText}`);const s=i.headers.get("content-length"),a=s?parseInt(s,10):0;let u=0;const l=(w=i.body)==null?void 0:w.getReader();if(!l)throw new Error("Response body is not readable");const d=[];try{let g=!1;for(;!g;){const f=await l.read();if(g=f.done,!g&&f.value&&(d.push(f.value),u+=f.value.length,r&&a>0)){const v=Math.round(u/a*100);r(v)}}}finally{l.releaseLock()}const h=d.reduce((g,f)=>g+f.length,0),c=new Uint8Array(h);let m=0;for(const g of d)c.set(g,m),m+=g.length;return this.cacheEnabled&&e&&await this.saveModelToCache(t,c),r&&r(100),c}async loadModelByUrl(t,e){var r;try{if(this.cacheEnabled){const m=await this.getCachedModelByUrl(t);if(m)return this.logger.info(`WASM module loaded from cache by URL: ${t}`),e&&e(100),m}this.logger.info(`Loading WASM module from URL: ${t}`);const o=await fetch(t);if(!o.ok)throw new Error(`Failed to load WASM module: ${o.statusText}`);const i=o.headers.get("content-length"),s=i?parseInt(i,10):0;let a=0;const u=(r=o.body)==null?void 0:r.getReader();if(!u)throw new Error("Response body is not readable");const l=[];try{let m=!1;for(;!m;){const w=await u.read();if(m=w.done,!m&&w.value&&(l.push(w.value),a+=w.value.length,e&&s>0)){const g=Math.round(a/s*100);e(g)}}}finally{u.releaseLock()}const d=l.reduce((m,w)=>m+w.length,0),h=new Uint8Array(d);let c=0;for(const m of l)h.set(m,c),c+=m.length;return this.cacheEnabled&&await this.saveModelToCacheByUrl(t,h),e&&e(100),h}catch(o){throw this.logger.error(o),new Error("Failed to load WASM module")}}async getCachedModelByUrl(t){try{const o=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readonly").objectStore("modelsByUrl");return new Promise((i,s)=>{const a=o.get(t);a.onsuccess=()=>{const u=a.result;u&&u.data?i(u.data):i(null)},a.onerror=()=>s(a.error)})}catch(e){return this.logger.error("Error reading model from cache by URL:",e),null}}async saveModelToCacheByUrl(t,e){try{const i=(await this.openIndexedDB()).transaction(["modelsByUrl"],"readwrite").objectStore("modelsByUrl");await new Promise((s,a)=>{const u=i.put({url:t,data:e,timestamp:Date.now(),size:e.length});u.onsuccess=()=>s(),u.onerror=()=>a(u.error)}),this.logger.info(`Model saved to cache by URL: ${t}`)}catch(r){this.logger.error("Error saving model to cache by URL:",r)}}async getAvailableModels(){const t=[...this.models];if(!this.cacheEnabled)return t;try{const e=await this.getCachedModelNames();return t.map(r=>({...r,cached:e.includes(r.id)}))}catch(e){return this.logger.error("Error checking cache status:",e),t}}getAvailableModelsSync(){return[...this.models]}getModelConfig(t){return z(t)}async saveModelToCache(t,e){try{const i=(await this.openIndexedDB()).transaction(["models"],"readwrite").objectStore("models");await new Promise((s,a)=>{const u=i.put({name:t,data:e,timestamp:Date.now(),size:e.length});u.onsuccess=()=>s(),u.onerror=()=>a(u.error)}),this.logger.info(`Model ${t} saved to cache`)}catch(r){this.logger.error("Error saving model to cache:",r)}}async getCachedModel(t){try{const o=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((i,s)=>{const a=o.get(t);a.onsuccess=()=>{const u=a.result;u&&u.data?i(u.data):i(null)},a.onerror=()=>s(a.error)})}catch(e){return this.logger.error("Error getting cached model:",e),null}}async getCachedModelNames(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((o,i)=>{const s=r.getAllKeys();s.onsuccess=()=>{const a=s.result;o(a)},s.onerror=()=>i(s.error)})}catch(t){return this.logger.error("Error getting cached model names:",t),[]}}async openIndexedDB(){return new Promise((t,e)=>{const r=indexedDB.open("WhisperModels",2);r.onerror=()=>e(r.error),r.onsuccess=()=>t(r.result),r.onupgradeneeded=o=>{const i=o.target.result;if(!i.objectStoreNames.contains("models")){const s=i.createObjectStore("models",{keyPath:"name"});s.createIndex("timestamp","timestamp",{unique:!1}),s.createIndex("size","size",{unique:!1})}if(!i.objectStoreNames.contains("modelsByUrl")){const s=i.createObjectStore("modelsByUrl",{keyPath:"url"});s.createIndex("timestamp","timestamp",{unique:!1}),s.createIndex("size","size",{unique:!1})}}})}async clearCache(){try{const e=(await this.openIndexedDB()).transaction(["models","modelsByUrl"],"readwrite"),r=e.objectStore("models");await new Promise((i,s)=>{const a=r.clear();a.onsuccess=()=>i(),a.onerror=()=>s(a.error)});const o=e.objectStore("modelsByUrl");await new Promise((i,s)=>{const a=o.clear();a.onsuccess=()=>i(),a.onerror=()=>s(a.error)}),this.logger.info("Model cache cleared")}catch(t){this.logger.error("Error clearing cache:",t)}}async getCacheInfo(){try{const r=(await this.openIndexedDB()).transaction(["models"],"readonly").objectStore("models");return new Promise((o,i)=>{const s=r.getAll();s.onsuccess=()=>{const a=s.result,u=a.reduce((l,d)=>l+(d.size||0),0);o({count:a.length,totalSize:u})},s.onerror=()=>i(s.error)})}catch(t){return this.logger.error("Error getting cache info:",t),{count:0,totalSize:0}}}}var p=(n=>(n.MP3="mp3",n.WAV="wav",n.OGG="ogg",n.M4A="m4a",n.AAC="aac",n.FLAC="flac",n.MP4="mp4",n.WEBM="webm",n.AVI="avi",n.MOV="mov",n.MKV="mkv",n.RAW_PCM="raw_pcm",n.MICROPHONE="microphone",n.AUDIO_ELEMENT="audio_element",n))(p||{});const S={targetSampleRate:16e3,targetChannels:1,inputSampleRate:16e3,normalize:!0,noiseReduction:!1,logLevel:y.levels.ERROR,signal:void 0,recordingDurationMs:1e4};function Q(n){return typeof n=="number"?n:n?y.levels[n]:y.levels.ERROR}function R(n){return new y(Q(n.logLevel),"AudioConverter")}function E(n){if(n!=null&&n.aborted)throw new DOMException("Aborted","AbortError")}function A(){return typeof window>"u"?!1:!!(window.AudioContext||window.webkitAudioContext||window.OfflineAudioContext||window.webkitOfflineAudioContext)}function V(){return[p.MP3,p.WAV,p.OGG,p.M4A,p.AAC,p.FLAC,p.MP4,p.WEBM,p.AVI,p.MOV,p.MKV,p.RAW_PCM,p.MICROPHONE,p.AUDIO_ELEMENT]}async function G(n,t={},e={}){var s,a,u,l,d;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info(`Converting file: ${n.name}`),(s=e.onProgress)==null||s.call(e,0,`Loading file: ${n.name}`);const h=await Y(n);(a=e.onProgress)==null||a.call(e,20,"File loaded, decoding..."),E(r.signal);const c=await C(h,r,e,o);(u=e.onProgress)==null||u.call(e,40,"Audio decoded, processing...");const m=await P(c,r,e,o,i);return(l=e.onProgress)==null||l.call(e,100,"Conversion completed"),o.info("File conversion completed successfully"),m}catch(h){throw o.error("File conversion failed:",h),(d=e.onError)==null||d.call(e,h),h}}async function T(n,t={},e={}){var s,a,u,l,d;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from MediaStream"),(s=e.onProgress)==null||s.call(e,0,"Starting recording...");const h=await re(n,r,e,o);(a=e.onProgress)==null||a.call(e,50,"Recording completed, decoding...");const c=await h.arrayBuffer(),m=await C(c,r,e,o);(u=e.onProgress)==null||u.call(e,70,"Audio decoded, processing...");const w=await P(m,r,e,o,i);return(l=e.onProgress)==null||l.call(e,100,"Conversion completed"),w}catch(h){throw o.error("MediaStream conversion failed:",h),(d=e.onError)==null||d.call(e,h),h}}async function K(n,t={},e={}){var s,a,u,l,d,h;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from HTMLAudioElement"),(s=e.onProgress)==null||s.call(e,0,"Capturing audio from element...");const c=n.srcObject;if(c&&c instanceof MediaStream){i.push("Using HTMLAudioElement.srcObject MediaStream");const f=await T(c,r,e);return{...f,warnings:[...i,...f.warnings??[]]}}const m=n.currentSrc||n.src;if(m)try{(a=e.onProgress)==null||a.call(e,10,"Fetching audio source...");const f=await ne(m,r.signal);(u=e.onProgress)==null||u.call(e,30,"Fetched, decoding...");const v=await C(f,r,e,o);(l=e.onProgress)==null||l.call(e,60,"Decoded, processing...");const B=await P(v,r,e,o,i);return(d=e.onProgress)==null||d.call(e,100,"Conversion completed"),B}catch(f){if((f==null?void 0:f.name)==="AbortError")throw f;i.push(`Failed to fetch element src (CORS?) – falling back to captureStream: ${f.message}`)}const w=n.captureStream||n.mozCaptureStream;if(typeof w!="function")throw new Error("Unable to capture audio from HTMLAudioElement: no srcObject, fetch failed, and captureStream() is not supported");i.push("Using HTMLAudioElement.captureStream() fallback");const g=w.call(n);try{const f=await T(g,r,e);return{...f,warnings:[...i,...f.warnings??[]]}}finally{g.getTracks().forEach(f=>f.stop())}}catch(c){throw o.error("HTMLAudioElement conversion failed:",c),(h=e.onError)==null||h.call(e,c),c}}async function J(n,t={},e={}){var i,s,a,u;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r);try{E(r.signal),o.info("Converting from Float32Array"),(i=e.onProgress)==null||i.call(e,0,"Processing Float32Array...");const l=window.AudioContext||window.webkitAudioContext,d=r.inputSampleRate??r.targetSampleRate,h=new l({sampleRate:d});try{const c=h.createBuffer(1,n.length,h.sampleRate);c.getChannelData(0).set(n),(s=e.onProgress)==null||s.call(e,30,"AudioBuffer created, processing...");const w=[];d!==r.targetSampleRate&&w.push(`Float32Array sample rate (${d}Hz) will be converted to ${r.targetSampleRate}Hz`);const g=await P(c,r,e,o,w);return(a=e.onProgress)==null||a.call(e,100,"Conversion completed"),o.info("Float32Array conversion completed successfully"),g}finally{try{await h.close()}catch{}}}catch(l){throw o.error("Float32Array conversion failed:",l),(u=e.onError)==null||u.call(e,l),l}}async function X(n,t={},e={}){var s,a,u,l;if(!A())throw new Error("Web Audio API is not supported in this browser");const r={...S,...t},o=R(r),i=[];try{E(r.signal),o.info("Converting from ArrayBuffer"),(s=e.onProgress)==null||s.call(e,0,"Processing ArrayBuffer...");const d=await C(n,r,e,o);(a=e.onProgress)==null||a.call(e,40,"Audio decoded, processing...");const h=await P(d,r,e,o,i);return(u=e.onProgress)==null||u.call(e,100,"Conversion completed"),o.info("ArrayBuffer conversion completed successfully"),h}catch(d){throw o.error("ArrayBuffer conversion failed:",d),(l=e.onError)==null||l.call(e,d),d}}async function Y(n){return new Promise((t,e)=>{const r=new FileReader;r.onload=o=>{var i;return t((i=o.target)==null?void 0:i.result)},r.onerror=o=>e(o),r.readAsArrayBuffer(n)})}async function C(n,t,e,r){var s;(s=e.onProgress)==null||s.call(e,15,"Decoding audio data");const o=window.AudioContext||window.webkitAudioContext,i=new o({sampleRate:t.targetSampleRate});try{return await i.decodeAudioData(n)}catch(a){throw r.error("Audio decoding failed:",a),new Error(`Failed to decode audio: ${a.message}`)}finally{try{await i.close()}catch{}}}async function P(n,t,e,r,o){var a,u,l,d;(a=e.onProgress)==null||a.call(e,50,"Converting audio format..."),te(n,t,o);const i=await Z(n,t,e);(u=e.onProgress)==null||u.call(e,70,"Converting to Float32Array...");const s=b(i);return(l=e.onProgress)==null||l.call(e,80,"Applying effects..."),t.normalize&&k(s),t.noiseReduction&&ee(s),(d=e.onProgress)==null||d.call(e,90,"Finalizing..."),{audioData:s,audioInfo:{sampleRate:i.sampleRate,duration:i.duration,channels:i.numberOfChannels,bitDepth:32,format:"float32"},warnings:o.length>0?o:void 0}}async function Z(n,t,e){var s;(s=e.onProgress)==null||s.call(e,60,"Converting audio format...");const r=window.OfflineAudioContext||window.webkitOfflineAudioContext,o=new r(t.targetChannels,Math.floor(n.length*t.targetSampleRate/n.sampleRate),t.targetSampleRate),i=o.createBufferSource();return i.buffer=n,i.connect(o.destination),i.start(0),await o.startRendering()}function b(n){if(n.numberOfChannels===1)return n.getChannelData(0);{const t=n.getChannelData(0),e=n.getChannelData(1),r=new Float32Array(t.length);for(let o=0;o<t.length;o++)r[o]=(t[o]+e[o])/2;return r}}function k(n){let t=0;for(let e=0;e<n.length;e++)t=Math.max(t,Math.abs(n[e]));if(t>0){const e=.95/t;for(let r=0;r<n.length;r++)n[r]*=e}}function ee(n){const t=new Float32Array(n.length),e=3;for(let r=0;r<n.length;r++){let o=0,i=0;for(let s=Math.max(0,r-e);s<=Math.min(n.length-1,r+e);s++)o+=n[s],i++;t[r]=o/i}n.set(t)}function te(n,t,e){n.numberOfChannels>2&&e.push(`Audio has ${n.numberOfChannels} channels, will be mixed to mono`),n.sampleRate!==t.targetSampleRate&&e.push(`Audio sample rate (${n.sampleRate}Hz) will be converted to ${t.targetSampleRate}Hz`)}async function re(n,t,e,r){var w;if(typeof window>"u")throw new Error("MediaStream recording is only supported in browser environments");if(!window.MediaRecorder)throw new Error("MediaRecorder is not supported in this browser");const o=window.MediaRecorder,s=["audio/webm;codecs=opus","audio/webm","audio/ogg;codecs=opus","audio/ogg"].find(g=>o.isTypeSupported(g)),a=new o(n,s?{mimeType:s}:void 0),u=[],l=new Promise((g,f)=>{a.ondataavailable=v=>{v.data&&v.data.size>0&&u.push(v.data)},a.onerror=()=>f(new Error("MediaRecorder error")),a.onstop=()=>{const v=s||a.mimeType||"application/octet-stream";g(new Blob(u,{type:v}))}}),d=t.signal,h=()=>{try{a.state!=="inactive"&&a.stop()}catch{}};d==null||d.addEventListener("abort",h,{once:!0});const c=t.recordingDurationMs??1e4,m=setTimeout(()=>{try{a.state!=="inactive"&&a.stop()}catch{}},c);(w=e.onProgress)==null||w.call(e,20,"Recording audio..."),r.debug("Starting MediaRecorder",{mimeType:s??a.mimeType,durationMs:c}),a.start(250);try{return await l}finally{clearTimeout(m),d==null||d.removeEventListener("abort",h)}}async function ne(n,t){E(t);const e=await fetch(n,{signal:t});if(!e.ok)throw new Error(`Failed to fetch (${e.status}): ${e.statusText}`);return await e.arrayBuffer()}exports.AudioFormat=p;exports.ModelManager=H;exports.WhisperWasmService=j;exports.convertFromArrayBuffer=X;exports.convertFromAudioElement=K;exports.convertFromFile=G;exports.convertFromFloat32Array=J;exports.convertFromMediaStream=T;exports.getAllModels=_;exports.getSupportedFormats=V;exports.isWebAudioSupported=A;
package/dist/index.d.ts CHANGED
@@ -2,4 +2,7 @@ export { WhisperWasmService } from './whisper/WhisperWasmService';
2
2
  export type { WhisperWasmModule } from './whisper/types';
3
3
  export { ModelManager } from './whisper/ModelManager';
4
4
  export { getAllModels } from './whisper/ModelConfig';
5
+ export { convertFromFile, convertFromMediaStream, convertFromAudioElement, convertFromFloat32Array, convertFromArrayBuffer, isWebAudioSupported, getSupportedFormats, } from './audio/AudioConverter';
6
+ export { AudioFormat } from './audio/types';
7
+ export type { AudioInfo, AudioConverterOptions, AudioConversionResult, AudioConverterCallbacks, AudioSource, AudioContextConfig, } from './audio/types';
5
8
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,YAAY,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,YAAY,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAGrD,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,uBAAuB,EACvB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,YAAY,EACV,SAAS,EACT,qBAAqB,EACrB,qBAAqB,EACrB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,GACnB,MAAM,eAAe,CAAC"}