whisper-cpp-node 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,353 +1,353 @@
1
- # whisper-cpp-node
2
-
3
- Node.js bindings for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - fast speech-to-text with GPU acceleration.
4
-
5
- ## Features
6
-
7
- - **Fast**: Native whisper.cpp performance with GPU acceleration
8
- - **Cross-platform**: macOS (Metal), Windows (Vulkan)
9
- - **Core ML**: Optional Apple Neural Engine support for 3x+ speedup (macOS)
10
- - **OpenVINO**: Optional Intel CPU/GPU encoder acceleration (Windows/Linux)
11
- - **Streaming VAD**: Built-in Silero voice activity detection
12
- - **TypeScript**: Full type definitions included
13
- - **Self-contained**: No external dependencies, just install and use
14
-
15
- ## Requirements
16
-
17
- **macOS:**
18
- - macOS 13.3+ (Ventura or later)
19
- - Apple Silicon (M1/M2/M3/M4)
20
- - Node.js 18+
21
-
22
- **Windows:**
23
- - Windows 10/11 (x64)
24
- - Node.js 18+
25
- - Vulkan-capable GPU (optional, for GPU acceleration)
26
-
27
- ## Installation
28
-
29
- ```bash
30
- npm install whisper-cpp-node
31
- # or
32
- pnpm add whisper-cpp-node
33
- ```
34
-
35
- The platform-specific binary is automatically installed:
36
- - macOS ARM64: `@whisper-cpp-node/darwin-arm64`
37
- - Windows x64: `@whisper-cpp-node/win32-x64`
38
-
39
- ## Quick Start
40
-
41
- ### File-based transcription
42
-
43
- ```typescript
44
- import {
45
- createWhisperContext,
46
- transcribeAsync,
47
- } from "whisper-cpp-node";
48
-
49
- // Create a context with your model
50
- const ctx = createWhisperContext({
51
- model: "./models/ggml-base.en.bin",
52
- use_gpu: true,
53
- });
54
-
55
- // Transcribe audio file
56
- const result = await transcribeAsync(ctx, {
57
- fname_inp: "./audio.wav",
58
- language: "en",
59
- });
60
-
61
- // Result: { segments: [["00:00:00,000", "00:00:02,500", " Hello world"], ...] }
62
- for (const [start, end, text] of result.segments) {
63
- console.log(`[${start} --> ${end}]${text}`);
64
- }
65
-
66
- // Clean up
67
- ctx.free();
68
- ```
69
-
70
- ### Buffer-based transcription
71
-
72
- ```typescript
73
- import {
74
- createWhisperContext,
75
- transcribeAsync,
76
- } from "whisper-cpp-node";
77
-
78
- const ctx = createWhisperContext({
79
- model: "./models/ggml-base.en.bin",
80
- use_gpu: true,
81
- });
82
-
83
- // Pass raw PCM audio (16kHz, mono, float32)
84
- const pcmData = new Float32Array(/* your audio samples */);
85
- const result = await transcribeAsync(ctx, {
86
- pcmf32: pcmData,
87
- language: "en",
88
- });
89
-
90
- for (const [start, end, text] of result.segments) {
91
- console.log(`[${start} --> ${end}]${text}`);
92
- }
93
-
94
- ctx.free();
95
- ```
96
-
97
- ### Streaming transcription
98
-
99
- Get real-time output as audio is processed. The `on_new_segment` callback fires for each segment as it's generated, while the final callback still receives all segments at completion (backward compatible):
100
-
101
- ```typescript
102
- import { createWhisperContext, transcribe } from "whisper-cpp-node";
103
-
104
- const ctx = createWhisperContext({
105
- model: "./models/ggml-base.en.bin",
106
- });
107
-
108
- transcribe(ctx, {
109
- fname_inp: "./long-audio.wav",
110
- language: "en",
111
-
112
- // Called for each segment as it's generated
113
- on_new_segment: (segment) => {
114
- console.log(`[${segment.start}]${segment.text}`);
115
- },
116
- }, (err, result) => {
117
- // Final callback still receives ALL segments at completion
118
- console.log(`Done! ${result.segments.length} segments`);
119
- ctx.free();
120
- });
121
- ```
122
-
123
- ## API
124
-
125
- ### `createWhisperContext(options)`
126
-
127
- Create a persistent context for transcription.
128
-
129
- ```typescript
130
- interface WhisperContextOptions {
131
- model: string; // Path to GGML model file (required)
132
- use_gpu?: boolean; // Enable GPU acceleration (default: true)
133
- // Uses Metal on macOS, Vulkan on Windows
134
- use_coreml?: boolean; // Enable Core ML on macOS (default: false)
135
- use_openvino?: boolean; // Enable OpenVINO encoder on Intel (default: false)
136
- openvino_device?: string; // OpenVINO device: 'CPU', 'GPU', 'NPU' (default: 'CPU')
137
- openvino_model_path?: string; // Path to OpenVINO encoder model (auto-derived)
138
- openvino_cache_dir?: string; // Cache dir for compiled OpenVINO models
139
- flash_attn?: boolean; // Enable Flash Attention (default: false)
140
- gpu_device?: number; // GPU device index (default: 0)
141
- dtw?: string; // DTW preset for word timestamps
142
- no_prints?: boolean; // Suppress log output (default: false)
143
- }
144
- ```
145
-
146
- ### `transcribeAsync(context, options)`
147
-
148
- Transcribe audio (Promise-based). Accepts either a file path or PCM buffer.
149
-
150
- ```typescript
151
- // File input
152
- interface TranscribeOptionsFile {
153
- fname_inp: string; // Path to audio file
154
- // ... common options
155
- }
156
-
157
- // Buffer input
158
- interface TranscribeOptionsBuffer {
159
- pcmf32: Float32Array; // Raw PCM (16kHz, mono, float32, -1.0 to 1.0)
160
- // ... common options
161
- }
162
-
163
- // Common options (partial list - see types.ts for full options)
164
- interface TranscribeOptionsBase {
165
- // Language
166
- language?: string; // Language code ('en', 'zh', 'auto')
167
- translate?: boolean; // Translate to English
168
- detect_language?: boolean; // Auto-detect language
169
-
170
- // Threading
171
- n_threads?: number; // CPU threads (default: 4)
172
- n_processors?: number; // Parallel processors
173
-
174
- // Audio processing
175
- offset_ms?: number; // Start offset in ms
176
- duration_ms?: number; // Duration to process (0 = all)
177
-
178
- // Output control
179
- no_timestamps?: boolean; // Disable timestamps
180
- max_len?: number; // Max segment length (chars)
181
- max_tokens?: number; // Max tokens per segment
182
- split_on_word?: boolean; // Split on word boundaries
183
- token_timestamps?: boolean; // Include token-level timestamps
184
-
185
- // Sampling
186
- temperature?: number; // Sampling temperature (0.0 = greedy)
187
- beam_size?: number; // Beam search size (-1 = greedy)
188
- best_of?: number; // Best-of-N sampling
189
-
190
- // Thresholds
191
- entropy_thold?: number; // Entropy threshold
192
- logprob_thold?: number; // Log probability threshold
193
- no_speech_thold?: number; // No-speech probability threshold
194
-
195
- // Context
196
- prompt?: string; // Initial prompt text
197
- no_context?: boolean; // Don't use previous context
198
-
199
- // VAD preprocessing
200
- vad?: boolean; // Enable VAD preprocessing
201
- vad_model?: string; // Path to VAD model
202
- vad_threshold?: number; // VAD threshold (0.0-1.0)
203
- vad_min_speech_duration_ms?: number;
204
- vad_min_silence_duration_ms?: number;
205
- vad_speech_pad_ms?: number;
206
-
207
- // Callbacks
208
- progress_callback?: (progress: number) => void;
209
- on_new_segment?: (segment: StreamingSegment) => void; // Streaming callback
210
- }
211
-
212
- // Streaming segment (passed to on_new_segment callback)
213
- interface StreamingSegment {
214
- start: string; // Start timestamp "HH:MM:SS,mmm"
215
- end: string; // End timestamp
216
- text: string; // Transcribed text
217
- segment_index: number; // 0-based index
218
- is_partial: boolean; // Reserved for future use
219
- tokens?: StreamingToken[]; // Only if token_timestamps enabled
220
- }
221
-
222
- // Result
223
- interface TranscribeResult {
224
- segments: TranscriptSegment[];
225
- }
226
-
227
- // Segment is a tuple: [start, end, text]
228
- type TranscriptSegment = [string, string, string];
229
- // Example: ["00:00:00,000", "00:00:02,500", " Hello world"]
230
- ```
231
-
232
- ### `createVadContext(options)`
233
-
234
- Create a voice activity detection context for streaming audio.
235
-
236
- ```typescript
237
- interface VadContextOptions {
238
- model: string; // Path to Silero VAD model
239
- threshold?: number; // Speech threshold (default: 0.5)
240
- n_threads?: number; // Number of threads (default: 1)
241
- no_prints?: boolean; // Suppress log output
242
- }
243
-
244
- interface VadContext {
245
- getWindowSamples(): number; // Returns 512 (32ms at 16kHz)
246
- getSampleRate(): number; // Returns 16000
247
- process(samples: Float32Array): number; // Returns probability 0.0-1.0
248
- reset(): void; // Reset LSTM state
249
- free(): void; // Release resources
250
- }
251
- ```
252
-
253
- #### VAD Example
254
-
255
- ```typescript
256
- import { createVadContext } from "whisper-cpp-node";
257
-
258
- const vad = createVadContext({
259
- model: "./models/ggml-silero-v6.2.0.bin",
260
- threshold: 0.5,
261
- });
262
-
263
- const windowSize = vad.getWindowSamples(); // 512 samples
264
-
265
- // Process audio in 32ms chunks
266
- function processAudioChunk(samples: Float32Array) {
267
- const probability = vad.process(samples);
268
- if (probability >= 0.5) {
269
- console.log("Speech detected!", probability);
270
- }
271
- }
272
-
273
- // Reset when starting new audio stream
274
- vad.reset();
275
-
276
- // Clean up when done
277
- vad.free();
278
- ```
279
-
280
- ## Core ML Acceleration (macOS)
281
-
282
- For 3x+ faster encoding on Apple Silicon:
283
-
284
- 1. Generate a Core ML model:
285
- ```bash
286
- pip install ane_transformers openai-whisper coremltools
287
- ./models/generate-coreml-model.sh base.en
288
- ```
289
-
290
- 2. Place it next to your GGML model:
291
- ```
292
- models/ggml-base.en.bin
293
- models/ggml-base.en-encoder.mlmodelc/
294
- ```
295
-
296
- 3. Enable Core ML:
297
- ```typescript
298
- const ctx = createWhisperContext({
299
- model: "./models/ggml-base.en.bin",
300
- use_coreml: true,
301
- });
302
- ```
303
-
304
- ## OpenVINO Acceleration (Intel)
305
-
306
- For faster encoder inference on Intel CPUs and GPUs (requires build with OpenVINO support):
307
-
308
- 1. Install OpenVINO and convert the model:
309
- ```bash
310
- pip install openvino openvino-dev
311
- python models/convert-whisper-to-openvino.py --model base.en
312
- ```
313
-
314
- 2. The OpenVINO model files are placed next to your GGML model:
315
- ```
316
- models/ggml-base.en.bin
317
- models/ggml-base.en-encoder-openvino.xml
318
- models/ggml-base.en-encoder-openvino.bin
319
- ```
320
-
321
- 3. Enable OpenVINO:
322
- ```typescript
323
- const ctx = createWhisperContext({
324
- model: "./models/ggml-base.en.bin",
325
- use_openvino: true,
326
- openvino_device: "CPU", // or "GPU" for Intel iGPU
327
- openvino_cache_dir: "./openvino_cache", // optional, speeds up init
328
- });
329
- ```
330
-
331
- **Note:** OpenVINO support requires the addon to be built with `-DADDON_OPENVINO=ON`.
332
-
333
- ## Models
334
-
335
- Download models from [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp):
336
-
337
- ```bash
338
- # Base English model (~150MB)
339
- curl -L -o models/ggml-base.en.bin \
340
- https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
341
-
342
- # Large v3 Turbo quantized (~500MB)
343
- curl -L -o models/ggml-large-v3-turbo-q4_0.bin \
344
- https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q4_0.bin
345
-
346
- # Silero VAD model (for streaming VAD)
347
- curl -L -o models/ggml-silero-v6.2.0.bin \
348
- https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-silero-v6.2.0.bin
349
- ```
350
-
351
- ## License
352
-
353
- MIT
1
+ # whisper-cpp-node
2
+
3
+ Node.js bindings for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - fast speech-to-text with GPU acceleration.
4
+
5
+ ## Features
6
+
7
+ - **Fast**: Native whisper.cpp performance with GPU acceleration
8
+ - **Cross-platform**: macOS (Metal), Windows (Vulkan)
9
+ - **Core ML**: Optional Apple Neural Engine support for 3x+ speedup (macOS)
10
+ - **OpenVINO**: Optional Intel CPU/GPU encoder acceleration (Windows/Linux)
11
+ - **Streaming VAD**: Built-in Silero voice activity detection
12
+ - **TypeScript**: Full type definitions included
13
+ - **Self-contained**: No external dependencies, just install and use
14
+
15
+ ## Requirements
16
+
17
+ **macOS:**
18
+ - macOS 13.3+ (Ventura or later)
19
+ - Apple Silicon (M1/M2/M3/M4)
20
+ - Node.js 18+
21
+
22
+ **Windows:**
23
+ - Windows 10/11 (x64)
24
+ - Node.js 18+
25
+ - Vulkan-capable GPU (optional, for GPU acceleration)
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ npm install whisper-cpp-node
31
+ # or
32
+ pnpm add whisper-cpp-node
33
+ ```
34
+
35
+ The platform-specific binary is automatically installed:
36
+ - macOS ARM64: `@whisper-cpp-node/darwin-arm64`
37
+ - Windows x64: `@whisper-cpp-node/win32-x64`
38
+
39
+ ## Quick Start
40
+
41
+ ### File-based transcription
42
+
43
+ ```typescript
44
+ import {
45
+ createWhisperContext,
46
+ transcribeAsync,
47
+ } from "whisper-cpp-node";
48
+
49
+ // Create a context with your model
50
+ const ctx = createWhisperContext({
51
+ model: "./models/ggml-base.en.bin",
52
+ use_gpu: true,
53
+ });
54
+
55
+ // Transcribe audio file
56
+ const result = await transcribeAsync(ctx, {
57
+ fname_inp: "./audio.wav",
58
+ language: "en",
59
+ });
60
+
61
+ // Result: { segments: [["00:00:00,000", "00:00:02,500", " Hello world"], ...] }
62
+ for (const [start, end, text] of result.segments) {
63
+ console.log(`[${start} --> ${end}]${text}`);
64
+ }
65
+
66
+ // Clean up
67
+ ctx.free();
68
+ ```
69
+
70
+ ### Buffer-based transcription
71
+
72
+ ```typescript
73
+ import {
74
+ createWhisperContext,
75
+ transcribeAsync,
76
+ } from "whisper-cpp-node";
77
+
78
+ const ctx = createWhisperContext({
79
+ model: "./models/ggml-base.en.bin",
80
+ use_gpu: true,
81
+ });
82
+
83
+ // Pass raw PCM audio (16kHz, mono, float32)
84
+ const pcmData = new Float32Array(/* your audio samples */);
85
+ const result = await transcribeAsync(ctx, {
86
+ pcmf32: pcmData,
87
+ language: "en",
88
+ });
89
+
90
+ for (const [start, end, text] of result.segments) {
91
+ console.log(`[${start} --> ${end}]${text}`);
92
+ }
93
+
94
+ ctx.free();
95
+ ```
96
+
97
+ ### Streaming transcription
98
+
99
+ Get real-time output as audio is processed. The `on_new_segment` callback fires for each segment as it's generated, while the final callback still receives all segments at completion (backward compatible):
100
+
101
+ ```typescript
102
+ import { createWhisperContext, transcribe } from "whisper-cpp-node";
103
+
104
+ const ctx = createWhisperContext({
105
+ model: "./models/ggml-base.en.bin",
106
+ });
107
+
108
+ transcribe(ctx, {
109
+ fname_inp: "./long-audio.wav",
110
+ language: "en",
111
+
112
+ // Called for each segment as it's generated
113
+ on_new_segment: (segment) => {
114
+ console.log(`[${segment.start}]${segment.text}`);
115
+ },
116
+ }, (err, result) => {
117
+ // Final callback still receives ALL segments at completion
118
+ console.log(`Done! ${result.segments.length} segments`);
119
+ ctx.free();
120
+ });
121
+ ```
122
+
123
+ ## API
124
+
125
+ ### `createWhisperContext(options)`
126
+
127
+ Create a persistent context for transcription.
128
+
129
+ ```typescript
130
+ interface WhisperContextOptions {
131
+ model: string; // Path to GGML model file (required)
132
+ use_gpu?: boolean; // Enable GPU acceleration (default: true)
133
+ // Uses Metal on macOS, Vulkan on Windows
134
+ use_coreml?: boolean; // Enable Core ML on macOS (default: false)
135
+ use_openvino?: boolean; // Enable OpenVINO encoder on Intel (default: false)
136
+ openvino_device?: string; // OpenVINO device: 'CPU', 'GPU', 'NPU' (default: 'CPU')
137
+ openvino_model_path?: string; // Path to OpenVINO encoder model (auto-derived)
138
+ openvino_cache_dir?: string; // Cache dir for compiled OpenVINO models
139
+ flash_attn?: boolean; // Enable Flash Attention (default: false)
140
+ gpu_device?: number; // GPU device index (default: 0)
141
+ dtw?: string; // DTW preset for word timestamps
142
+ no_prints?: boolean; // Suppress log output (default: false)
143
+ }
144
+ ```
145
+
146
+ ### `transcribeAsync(context, options)`
147
+
148
+ Transcribe audio (Promise-based). Accepts either a file path or PCM buffer.
149
+
150
+ ```typescript
151
+ // File input
152
+ interface TranscribeOptionsFile {
153
+ fname_inp: string; // Path to audio file
154
+ // ... common options
155
+ }
156
+
157
+ // Buffer input
158
+ interface TranscribeOptionsBuffer {
159
+ pcmf32: Float32Array; // Raw PCM (16kHz, mono, float32, -1.0 to 1.0)
160
+ // ... common options
161
+ }
162
+
163
+ // Common options (partial list - see types.ts for full options)
164
+ interface TranscribeOptionsBase {
165
+ // Language
166
+ language?: string; // Language code ('en', 'zh', 'auto')
167
+ translate?: boolean; // Translate to English
168
+ detect_language?: boolean; // Auto-detect language
169
+
170
+ // Threading
171
+ n_threads?: number; // CPU threads (default: 4)
172
+ n_processors?: number; // Parallel processors
173
+
174
+ // Audio processing
175
+ offset_ms?: number; // Start offset in ms
176
+ duration_ms?: number; // Duration to process (0 = all)
177
+
178
+ // Output control
179
+ no_timestamps?: boolean; // Disable timestamps
180
+ max_len?: number; // Max segment length (chars)
181
+ max_tokens?: number; // Max tokens per segment
182
+ split_on_word?: boolean; // Split on word boundaries
183
+ token_timestamps?: boolean; // Include token-level timestamps
184
+
185
+ // Sampling
186
+ temperature?: number; // Sampling temperature (0.0 = greedy)
187
+ beam_size?: number; // Beam search size (-1 = greedy)
188
+ best_of?: number; // Best-of-N sampling
189
+
190
+ // Thresholds
191
+ entropy_thold?: number; // Entropy threshold
192
+ logprob_thold?: number; // Log probability threshold
193
+ no_speech_thold?: number; // No-speech probability threshold
194
+
195
+ // Context
196
+ prompt?: string; // Initial prompt text
197
+ no_context?: boolean; // Don't use previous context
198
+
199
+ // VAD preprocessing
200
+ vad?: boolean; // Enable VAD preprocessing
201
+ vad_model?: string; // Path to VAD model
202
+ vad_threshold?: number; // VAD threshold (0.0-1.0)
203
+ vad_min_speech_duration_ms?: number;
204
+ vad_min_silence_duration_ms?: number;
205
+ vad_speech_pad_ms?: number;
206
+
207
+ // Callbacks
208
+ progress_callback?: (progress: number) => void;
209
+ on_new_segment?: (segment: StreamingSegment) => void; // Streaming callback
210
+ }
211
+
212
+ // Streaming segment (passed to on_new_segment callback)
213
+ interface StreamingSegment {
214
+ start: string; // Start timestamp "HH:MM:SS,mmm"
215
+ end: string; // End timestamp
216
+ text: string; // Transcribed text
217
+ segment_index: number; // 0-based index
218
+ is_partial: boolean; // Reserved for future use
219
+ tokens?: StreamingToken[]; // Only if token_timestamps enabled
220
+ }
221
+
222
+ // Result
223
+ interface TranscribeResult {
224
+ segments: TranscriptSegment[];
225
+ }
226
+
227
+ // Segment is a tuple: [start, end, text]
228
+ type TranscriptSegment = [string, string, string];
229
+ // Example: ["00:00:00,000", "00:00:02,500", " Hello world"]
230
+ ```
231
+
232
+ ### `createVadContext(options)`
233
+
234
+ Create a voice activity detection context for streaming audio.
235
+
236
+ ```typescript
237
+ interface VadContextOptions {
238
+ model: string; // Path to Silero VAD model
239
+ threshold?: number; // Speech threshold (default: 0.5)
240
+ n_threads?: number; // Number of threads (default: 1)
241
+ no_prints?: boolean; // Suppress log output
242
+ }
243
+
244
+ interface VadContext {
245
+ getWindowSamples(): number; // Returns 512 (32ms at 16kHz)
246
+ getSampleRate(): number; // Returns 16000
247
+ process(samples: Float32Array): number; // Returns probability 0.0-1.0
248
+ reset(): void; // Reset LSTM state
249
+ free(): void; // Release resources
250
+ }
251
+ ```
252
+
253
+ #### VAD Example
254
+
255
+ ```typescript
256
+ import { createVadContext } from "whisper-cpp-node";
257
+
258
+ const vad = createVadContext({
259
+ model: "./models/ggml-silero-v6.2.0.bin",
260
+ threshold: 0.5,
261
+ });
262
+
263
+ const windowSize = vad.getWindowSamples(); // 512 samples
264
+
265
+ // Process audio in 32ms chunks
266
+ function processAudioChunk(samples: Float32Array) {
267
+ const probability = vad.process(samples);
268
+ if (probability >= 0.5) {
269
+ console.log("Speech detected!", probability);
270
+ }
271
+ }
272
+
273
+ // Reset when starting new audio stream
274
+ vad.reset();
275
+
276
+ // Clean up when done
277
+ vad.free();
278
+ ```
279
+
280
+ ## Core ML Acceleration (macOS)
281
+
282
+ For 3x+ faster encoding on Apple Silicon:
283
+
284
+ 1. Generate a Core ML model:
285
+ ```bash
286
+ pip install ane_transformers openai-whisper coremltools
287
+ ./models/generate-coreml-model.sh base.en
288
+ ```
289
+
290
+ 2. Place it next to your GGML model:
291
+ ```
292
+ models/ggml-base.en.bin
293
+ models/ggml-base.en-encoder.mlmodelc/
294
+ ```
295
+
296
+ 3. Enable Core ML:
297
+ ```typescript
298
+ const ctx = createWhisperContext({
299
+ model: "./models/ggml-base.en.bin",
300
+ use_coreml: true,
301
+ });
302
+ ```
303
+
304
+ ## OpenVINO Acceleration (Intel)
305
+
306
+ For faster encoder inference on Intel CPUs and GPUs (requires build with OpenVINO support):
307
+
308
+ 1. Install OpenVINO and convert the model:
309
+ ```bash
310
+ pip install openvino openvino-dev
311
+ python models/convert-whisper-to-openvino.py --model base.en
312
+ ```
313
+
314
+ 2. The OpenVINO model files are placed next to your GGML model:
315
+ ```
316
+ models/ggml-base.en.bin
317
+ models/ggml-base.en-encoder-openvino.xml
318
+ models/ggml-base.en-encoder-openvino.bin
319
+ ```
320
+
321
+ 3. Enable OpenVINO:
322
+ ```typescript
323
+ const ctx = createWhisperContext({
324
+ model: "./models/ggml-base.en.bin",
325
+ use_openvino: true,
326
+ openvino_device: "CPU", // or "GPU" for Intel iGPU
327
+ openvino_cache_dir: "./openvino_cache", // optional, speeds up init
328
+ });
329
+ ```
330
+
331
+ **Note:** OpenVINO support requires the addon to be built with `-DADDON_OPENVINO=ON`.
332
+
333
+ ## Models
334
+
335
+ Download models from [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp):
336
+
337
+ ```bash
338
+ # Base English model (~150MB)
339
+ curl -L -o models/ggml-base.en.bin \
340
+ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
341
+
342
+ # Large v3 Turbo quantized (~500MB)
343
+ curl -L -o models/ggml-large-v3-turbo-q4_0.bin \
344
+ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q4_0.bin
345
+
346
+ # Silero VAD model (for streaming VAD)
347
+ curl -L -o models/ggml-silero-v6.2.0.bin \
348
+ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-silero-v6.2.0.bin
349
+ ```
350
+
351
+ ## License
352
+
353
+ MIT
package/dist/types.d.ts CHANGED
@@ -34,8 +34,27 @@ export interface WhisperContextOptions {
34
34
  * Can speed up init time, especially for GPU, by caching compiled 'blobs'
35
35
  */
36
36
  openvino_cache_dir?: string;
37
- /** DTW alignment preset for word-level timestamps (e.g., 'base.en', 'small', 'large.v3') */
37
+ /**
38
+ * DTW alignment preset for word-level timestamps.
39
+ *
40
+ * Named presets: 'tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en',
41
+ * 'medium', 'medium.en', 'large.v1', 'large.v2', 'large.v3', 'large.v3.turbo'
42
+ *
43
+ * Dynamic presets:
44
+ * 'top-N' - Use all heads from top N text layers (plain averaging)
45
+ * 'top-N-norm' - Use top N layers + L2 norm filtering (keeps dtw_norm_top_k best heads)
46
+ *
47
+ * @example
48
+ * dtw: 'large.v3.turbo' // named preset
49
+ * dtw: 'top-4' // top 4 layers, all heads averaged
50
+ * dtw: 'top-4-norm' // top 4 layers, L2 norm selects best heads
51
+ */
38
52
  dtw?: string;
53
+ /**
54
+ * Number of attention heads to keep after L2 norm filtering (default: 10).
55
+ * Only used when dtw is set to a 'top-N-norm' preset.
56
+ */
57
+ dtw_norm_top_k?: number;
39
58
  /** Suppress whisper.cpp log output (default: false) */
40
59
  no_prints?: boolean;
41
60
  }
@@ -159,12 +178,18 @@ export interface TranscribeOptionsBuffer extends TranscribeOptionsBase {
159
178
  */
160
179
  export type TranscribeOptions = TranscribeOptionsFile | TranscribeOptionsBuffer;
161
180
  /**
162
- * Transcription result segment (tuple format)
163
- * [0]: Start time in format "HH:MM:SS,mmm"
164
- * [1]: End time in format "HH:MM:SS,mmm"
165
- * [2]: Transcribed text
181
+ * Transcription result segment
166
182
  */
167
- export type TranscriptSegment = [start: string, end: string, text: string];
183
+ export interface TranscriptSegment {
184
+ /** Start time in format "HH:MM:SS,mmm" */
185
+ start: string;
186
+ /** End time in format "HH:MM:SS,mmm" */
187
+ end: string;
188
+ /** Transcribed text */
189
+ text: string;
190
+ /** Token-level data (only present when token_timestamps is enabled) */
191
+ tokens?: StreamingToken[];
192
+ }
168
193
  /**
169
194
  * Token information for streaming callbacks
170
195
  */
@@ -173,10 +198,16 @@ export interface StreamingToken {
173
198
  text: string;
174
199
  /** Token probability (0.0 to 1.0) */
175
200
  probability: number;
176
- /** Token timestamp start (in centiseconds from audio start, only if token_timestamps enabled) */
177
- t0?: number;
178
- /** Token timestamp end (in centiseconds from audio start, only if token_timestamps enabled) */
179
- t1?: number;
201
+ /** Token timestamp start (in centiseconds from audio start) */
202
+ t0: number;
203
+ /** Token timestamp end (in centiseconds from audio start) */
204
+ t1: number;
205
+ /**
206
+ * DTW-aligned timestamp (in centiseconds from audio start).
207
+ * Only meaningful when the context was created with a DTW preset.
208
+ * Typically more accurate than t0/t1 for word-level alignment.
209
+ */
210
+ t_dtw: number;
180
211
  }
181
212
  /**
182
213
  * Segment data passed to streaming callback
@@ -199,8 +230,10 @@ export interface StreamingSegment {
199
230
  * Transcription result
200
231
  */
201
232
  export interface TranscribeResult {
202
- /** Array of transcript segments as [start, end, text] tuples */
233
+ /** Array of transcript segments */
203
234
  segments: TranscriptSegment[];
235
+ /** Detected language (when detect_language is true) */
236
+ language?: string;
204
237
  }
205
238
  /**
206
239
  * Options for creating a VadContext
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,4FAA4F;IAC5F,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,uDAAuD;IACvD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IAEpC,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAG1B,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mDAAmD;IACnD,YAAY,CAAC,EAAE,MAAM,CAAC;IAGtB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IAGnB,mCAAmC;IACnC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,0BAA0B;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qCAAqC;IACrC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oDAAoD;IACpD,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8CAA8C;IAC9C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,qCAAqC;IACrC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,sCAAsC;IACtC,eAAe,CAAC,EAAE,MAAM,CAAC;IAGzB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iCAAiC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,6BAA6B;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,iCAAiC;IACjC,YAAY,CAAC,EAAE,OAAO,CAAC;IAGvB,iCAAiC;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,oDAAoD;IACpD,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qBAAqB;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4BAA4B;IAC5B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,uBAAuB;IACvB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAG3B,+BAA+B;IAC/B,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,8CAA8C;IAC9C,0BAA0B,CAAC,EAAE,MAAM,CAAC;IACpC,+CAA+C;IAC/C,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC,yCAAyC;IACzC,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,qCAAqC;IACrC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gCAAgC;IAChC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,mDAAmD;IACnD,iBAAiB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IAE/C;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,OAAO,EAAE,gBAAgB,KAAK,IAAI,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,qBAAqB;IAClE,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,KAAK,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,qBAAqB;IACpE,uEAAuE;IACvE,MAAM,EAAE,YAAY,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,qBAAqB,GAAG,uBAAuB,CAAC;AAEhF;;;;;GAKG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,qCAAqC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,iGAAiG;IACjG,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,+FAA+F;IAC/F,EAAE,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,wCAAwC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,8BAA8B;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,yDAAyD;IACzD,UAAU,EAAE,OAAO,CAAC;IACpB,iEAAiE;IACjE,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oCAAoC;IACpC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yCAAyC;IACzC,aAAa,IAAI,MAAM,CAAC;IACxB,qCAAqC;IACrC,cAAc,IAAI,OAAO,CAAC;IAC1B,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,KAAK,OAAO,EAAE,qBAAqB,GAAG,cAAc,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8CAA8C;IAC9C,gBAAgB,IAAI,MAAM,CAAC;IAC3B,8CAA8C;IAC9C,aAAa,IAAI,MAAM,CAAC;IACxB,iEAAiE;IACjE,OAAO,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAAC;IACvC,oCAAoC;IACpC,KAAK,IAAI,IAAI,CAAC;IACd,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,CAC/B,KAAK,EAAE,KAAK,GAAG,IAAI,EACnB,MAAM,CAAC,EAAE,gBAAgB,KACtB,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,yBAAyB,CAAC;IAC1C,UAAU,EAAE,qBAAqB,CAAC;IAClC,UAAU,EAAE,CACV,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,EAAE,kBAAkB,KACzB,IAAI,CAAC;IACV,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;;;;;;;;;;;;;;OAcG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IAEpC,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAG1B,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mDAAmD;IACnD,YAAY,CAAC,EAAE,MAAM,CAAC;IAGtB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IAGnB,mCAAmC;IACnC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,0BAA0B;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qCAAqC;IACrC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oDAAoD;IACpD,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8CAA8C;IAC9C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,qCAAqC;IACrC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,sCAAsC;IACtC,eAAe,CAAC,EAAE,MAAM,CAAC;IAGzB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iCAAiC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,6BAA6B;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,iCAAiC;IACjC,YAAY,CAAC,EAAE,OAAO,CAAC;IAGvB,iCAAiC;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,oDAAoD;IACpD,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qBAAqB;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4BAA4B;IAC5B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,uBAAuB;IACvB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAG3B,+BAA+B;IAC/B,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,8CAA8C;IAC9C,0BAA0B,CAAC,EAAE,MAAM,CAAC;IACpC,+CAA+C;IAC/C,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC,yCAAyC;IACzC,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,qCAAqC;IACrC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gCAAgC;IAChC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,mDAAmD;IACnD,iBAAiB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IAE/C;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,OAAO,EAAE,gBAAgB,KAAK,IAAI,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,qBAAqB;IAClE,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,KAAK,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,qBAAqB;IACpE,uEAAuE;IACvE,MAAM,EAAE,YAAY,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,qBAAqB,GAAG,uBAAuB,CAAC;AAEhF;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,uEAAuE;IACvE,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,qCAAqC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,+DAA+D;IAC/D,EAAE,EAAE,MAAM,CAAC;IACX,6DAA6D;IAC7D,EAAE,EAAE,MAAM,CAAC;IACX;;;;OAIG;IACH,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,wCAAwC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,8BAA8B;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,yDAAyD;IACzD,UAAU,EAAE,OAAO,CAAC;IACpB,iEAAiE;IACjE,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,mCAAmC;IACnC,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,uDAAuD;IACvD,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oCAAoC;IACpC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yCAAyC;IACzC,aAAa,IAAI,MAAM,CAAC;IACxB,qCAAqC;IACrC,cAAc,IAAI,OAAO,CAAC;IAC1B,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,KAAK,OAAO,EAAE,qBAAqB,GAAG,cAAc,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8CAA8C;IAC9C,gBAAgB,IAAI,MAAM,CAAC;IAC3B,8CAA8C;IAC9C,aAAa,IAAI,MAAM,CAAC;IACxB,iEAAiE;IACjE,OAAO,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAAC;IACvC,oCAAoC;IACpC,KAAK,IAAI,IAAI,CAAC;IACd,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,CAC/B,KAAK,EAAE,KAAK,GAAG,IAAI,EACnB,MAAM,CAAC,EAAE,gBAAgB,KACtB,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,yBAAyB,CAAC;IAC1C,UAAU,EAAE,qBAAqB,CAAC;IAClC,UAAU,EAAE,CACV,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,EAAE,kBAAkB,KACzB,IAAI,CAAC;IACV,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper-cpp-node",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "Node.js bindings for whisper.cpp - fast speech-to-text with GPU acceleration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -21,8 +21,8 @@
21
21
  "dist"
22
22
  ],
23
23
  "optionalDependencies": {
24
- "@whisper-cpp-node/darwin-arm64": "0.2.2",
25
- "@whisper-cpp-node/win32-x64": "0.2.3"
24
+ "@whisper-cpp-node/darwin-arm64": "0.2.3",
25
+ "@whisper-cpp-node/win32-x64": "0.2.4"
26
26
  },
27
27
  "devDependencies": {
28
28
  "@types/node": "^20.0.0",