@whisper-cpp-node/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +160 -0
- package/dist/index.d.ts +54 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +68 -0
- package/dist/index.js.map +1 -0
- package/dist/loader.d.ts +6 -0
- package/dist/loader.d.ts.map +1 -0
- package/dist/loader.js +80 -0
- package/dist/loader.js.map +1 -0
- package/dist/types.d.ts +141 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +50 -0
package/README.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# @whisper-cpp-node/core
|
|
2
|
+
|
|
3
|
+
Node.js bindings for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - fast speech-to-text on Apple Silicon with Core ML and Metal support.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Fast**: Native whisper.cpp performance with Metal GPU acceleration
|
|
8
|
+
- **Core ML**: Optional Apple Neural Engine support for 3x+ speedup
|
|
9
|
+
- **Streaming VAD**: Built-in Silero voice activity detection
|
|
10
|
+
- **TypeScript**: Full type definitions included
|
|
11
|
+
- **Self-contained**: No external dependencies, just install and use
|
|
12
|
+
|
|
13
|
+
## Requirements
|
|
14
|
+
|
|
15
|
+
- macOS 13.3+ (Ventura or later)
|
|
16
|
+
- Apple Silicon (M1/M2/M3/M4)
|
|
17
|
+
- Node.js 18+
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install @whisper-cpp-node/core
|
|
23
|
+
# or
|
|
24
|
+
pnpm add @whisper-cpp-node/core
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The platform-specific binary (`@whisper-cpp-node/darwin-arm64`) is automatically installed.
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import {
|
|
33
|
+
createWhisperContext,
|
|
34
|
+
transcribeAsync,
|
|
35
|
+
} from "@whisper-cpp-node/core";
|
|
36
|
+
|
|
37
|
+
// Create a context with your model
|
|
38
|
+
const ctx = createWhisperContext({
|
|
39
|
+
model: "./models/ggml-base.en.bin",
|
|
40
|
+
use_gpu: true,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Transcribe audio
|
|
44
|
+
const result = await transcribeAsync(ctx, {
|
|
45
|
+
fname_inp: "./audio.wav",
|
|
46
|
+
language: "en",
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
console.log(result.segments);
|
|
50
|
+
|
|
51
|
+
// Clean up
|
|
52
|
+
ctx.free();
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## API
|
|
56
|
+
|
|
57
|
+
### `createWhisperContext(options)`
|
|
58
|
+
|
|
59
|
+
Create a persistent context for transcription.
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
interface WhisperContextOptions {
|
|
63
|
+
model: string; // Path to GGML model file (required)
|
|
64
|
+
use_gpu?: boolean; // Enable GPU acceleration (default: true)
|
|
65
|
+
use_coreml?: boolean; // Enable Core ML on macOS (default: false)
|
|
66
|
+
flash_attn?: boolean; // Enable Flash Attention (default: false)
|
|
67
|
+
gpu_device?: number; // GPU device index (default: 0)
|
|
68
|
+
dtw?: string; // DTW preset for word timestamps
|
|
69
|
+
no_prints?: boolean; // Suppress log output (default: false)
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### `transcribeAsync(context, options)`
|
|
74
|
+
|
|
75
|
+
Transcribe audio file (Promise-based).
|
|
76
|
+
|
|
77
|
+
```typescript
|
|
78
|
+
interface TranscribeOptions {
|
|
79
|
+
fname_inp: string; // Path to audio file (required)
|
|
80
|
+
language?: string; // Language code (e.g., 'en', 'zh', 'auto')
|
|
81
|
+
translate?: boolean; // Translate to English
|
|
82
|
+
n_threads?: number; // Number of threads
|
|
83
|
+
no_timestamps?: boolean; // Disable timestamps
|
|
84
|
+
// ... see types.ts for full options
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
interface TranscribeResult {
|
|
88
|
+
segments: Array<{
|
|
89
|
+
start: string; // "HH:MM:SS,mmm"
|
|
90
|
+
end: string;
|
|
91
|
+
text: string;
|
|
92
|
+
}>;
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### `createVadContext(options)`
|
|
97
|
+
|
|
98
|
+
Create a voice activity detection context.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
interface VadContextOptions {
|
|
102
|
+
model: string; // Path to Silero VAD model
|
|
103
|
+
threshold?: number; // Speech threshold (default: 0.5)
|
|
104
|
+
n_threads?: number; // Number of threads (default: 1)
|
|
105
|
+
no_prints?: boolean; // Suppress log output
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Usage
|
|
109
|
+
const vad = createVadContext({
|
|
110
|
+
model: "./models/ggml-silero-v6.2.0.bin",
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const samples = new Float32Array(vad.getWindowSamples());
|
|
114
|
+
// ... fill samples with 16kHz audio
|
|
115
|
+
const probability = vad.process(samples);
|
|
116
|
+
|
|
117
|
+
vad.free();
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Core ML Acceleration
|
|
121
|
+
|
|
122
|
+
For 3x+ faster encoding on Apple Silicon:
|
|
123
|
+
|
|
124
|
+
1. Generate a Core ML model:
|
|
125
|
+
```bash
|
|
126
|
+
pip install ane_transformers openai-whisper coremltools
|
|
127
|
+
./models/generate-coreml-model.sh base.en
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
2. Place it next to your GGML model:
|
|
131
|
+
```
|
|
132
|
+
models/ggml-base.en.bin
|
|
133
|
+
models/ggml-base.en-encoder.mlmodelc/
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
3. Enable Core ML:
|
|
137
|
+
```typescript
|
|
138
|
+
const ctx = createWhisperContext({
|
|
139
|
+
model: "./models/ggml-base.en.bin",
|
|
140
|
+
use_coreml: true,
|
|
141
|
+
});
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Models
|
|
145
|
+
|
|
146
|
+
Download models from [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp):
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# Base English model (~150MB)
|
|
150
|
+
curl -L -o models/ggml-base.en.bin \
|
|
151
|
+
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
|
|
152
|
+
|
|
153
|
+
# Large v3 Turbo quantized (~500MB)
|
|
154
|
+
curl -L -o models/ggml-large-v3-turbo-q4_0.bin \
|
|
155
|
+
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q4_0.bin
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## License
|
|
159
|
+
|
|
160
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { WhisperContext, WhisperContextOptions, VadContext, VadContextOptions, TranscribeOptions, TranscribeResult } from "./types";
|
|
2
|
+
export type { WhisperContextOptions, VadContextOptions, TranscribeOptions, TranscribeResult, TranscriptSegment, WhisperContext, VadContext, WhisperContextConstructor, VadContextConstructor, } from "./types";
|
|
3
|
+
export declare const WhisperContextClass: import("./types").WhisperContextConstructor;
|
|
4
|
+
export declare const VadContextClass: import("./types").VadContextConstructor;
|
|
5
|
+
export declare const transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
|
|
6
|
+
export declare const transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
|
|
7
|
+
/**
|
|
8
|
+
* Create a new WhisperContext
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* const ctx = createWhisperContext({
|
|
13
|
+
* model: './models/ggml-base.en.bin',
|
|
14
|
+
* use_gpu: true,
|
|
15
|
+
* use_coreml: true,
|
|
16
|
+
* });
|
|
17
|
+
*
|
|
18
|
+
* const result = await transcribeAsync(ctx, {
|
|
19
|
+
* fname_inp: './audio.wav',
|
|
20
|
+
* language: 'en',
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* console.log(result.segments);
|
|
24
|
+
* ctx.free();
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export declare function createWhisperContext(options: WhisperContextOptions): WhisperContext;
|
|
28
|
+
/**
|
|
29
|
+
* Create a new VadContext for voice activity detection
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* const vad = createVadContext({
|
|
34
|
+
* model: './models/ggml-silero-v6.2.0.bin',
|
|
35
|
+
* threshold: 0.5,
|
|
36
|
+
* });
|
|
37
|
+
*
|
|
38
|
+
* const samples = new Float32Array(512);
|
|
39
|
+
* const probability = vad.process(samples);
|
|
40
|
+
*
|
|
41
|
+
* vad.free();
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export declare function createVadContext(options: VadContextOptions): VadContext;
|
|
45
|
+
declare const _default: {
|
|
46
|
+
WhisperContext: import("./types").WhisperContextConstructor;
|
|
47
|
+
VadContext: import("./types").VadContextConstructor;
|
|
48
|
+
transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
|
|
49
|
+
transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
|
|
50
|
+
createWhisperContext: typeof createWhisperContext;
|
|
51
|
+
createVadContext: typeof createVadContext;
|
|
52
|
+
};
|
|
53
|
+
export default _default;
|
|
54
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAGjB,YAAY,EACV,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,SAAS,CAAC;AAMjB,eAAO,MAAM,mBAAmB,6CAAuB,CAAC;AACxD,eAAO,MAAM,eAAe,yCAAmB,CAAC;AAGhD,eAAO,MAAM,UAAU,+GAAmB,CAAC;AAG3C,eAAO,MAAM,eAAe,EAAkC,CAC5D,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,qBAAqB,GAC7B,cAAc,CAEhB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAEvE;;;;;+BAhDU,cAAc,WACd,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC;;;;AAiD9B,wBAOE"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.transcribeAsync = exports.transcribe = exports.VadContextClass = exports.WhisperContextClass = void 0;
|
|
4
|
+
exports.createWhisperContext = createWhisperContext;
|
|
5
|
+
exports.createVadContext = createVadContext;
|
|
6
|
+
const util_1 = require("util");
|
|
7
|
+
const loader_1 = require("./loader");
|
|
8
|
+
// Load native addon
|
|
9
|
+
const addon = (0, loader_1.loadNativeAddon)();
|
|
10
|
+
// Export native constructors with different names to avoid conflict
|
|
11
|
+
exports.WhisperContextClass = addon.WhisperContext;
|
|
12
|
+
exports.VadContextClass = addon.VadContext;
|
|
13
|
+
// Original callback-based transcribe
|
|
14
|
+
exports.transcribe = addon.transcribe;
|
|
15
|
+
// Promisified version for async/await
|
|
16
|
+
exports.transcribeAsync = (0, util_1.promisify)(addon.transcribe);
|
|
17
|
+
/**
|
|
18
|
+
* Create a new WhisperContext
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const ctx = createWhisperContext({
|
|
23
|
+
* model: './models/ggml-base.en.bin',
|
|
24
|
+
* use_gpu: true,
|
|
25
|
+
* use_coreml: true,
|
|
26
|
+
* });
|
|
27
|
+
*
|
|
28
|
+
* const result = await transcribeAsync(ctx, {
|
|
29
|
+
* fname_inp: './audio.wav',
|
|
30
|
+
* language: 'en',
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* console.log(result.segments);
|
|
34
|
+
* ctx.free();
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
function createWhisperContext(options) {
|
|
38
|
+
return new addon.WhisperContext(options);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Create a new VadContext for voice activity detection
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* ```typescript
|
|
45
|
+
* const vad = createVadContext({
|
|
46
|
+
* model: './models/ggml-silero-v6.2.0.bin',
|
|
47
|
+
* threshold: 0.5,
|
|
48
|
+
* });
|
|
49
|
+
*
|
|
50
|
+
* const samples = new Float32Array(512);
|
|
51
|
+
* const probability = vad.process(samples);
|
|
52
|
+
*
|
|
53
|
+
* vad.free();
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
function createVadContext(options) {
|
|
57
|
+
return new addon.VadContext(options);
|
|
58
|
+
}
|
|
59
|
+
// Default export with all functionality
|
|
60
|
+
exports.default = {
|
|
61
|
+
WhisperContext: addon.WhisperContext,
|
|
62
|
+
VadContext: addon.VadContext,
|
|
63
|
+
transcribe: addon.transcribe,
|
|
64
|
+
transcribeAsync: exports.transcribeAsync,
|
|
65
|
+
createWhisperContext,
|
|
66
|
+
createVadContext,
|
|
67
|
+
};
|
|
68
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AA6DA,oDAIC;AAkBD,4CAEC;AArFD,+BAAiC;AACjC,qCAA2C;AAwB3C,oBAAoB;AACpB,MAAM,KAAK,GAAiB,IAAA,wBAAe,GAAE,CAAC;AAE9C,oEAAoE;AACvD,QAAA,mBAAmB,GAAG,KAAK,CAAC,cAAc,CAAC;AAC3C,QAAA,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC;AAEhD,qCAAqC;AACxB,QAAA,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;AAE3C,sCAAsC;AACzB,QAAA,eAAe,GAAG,IAAA,gBAAS,EAAC,KAAK,CAAC,UAAU,CAG3B,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,oBAAoB,CAClC,OAA8B;IAE9B,OAAO,IAAI,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,SAAgB,gBAAgB,CAAC,OAA0B;IACzD,OAAO,IAAI,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;AACvC,CAAC;AAED,wCAAwC;AACxC,kBAAe;IACb,cAAc,EAAE,KAAK,CAAC,cAAc;IACpC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,eAAe,EAAf,uBAAe;IACf,oBAAoB;IACpB,gBAAgB;CACjB,CAAC"}
|
package/dist/loader.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AA6D5C;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CA4B9C"}
|
package/dist/loader.js
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.loadNativeAddon = loadNativeAddon;
|
|
4
|
+
const os_1 = require("os");
|
|
5
|
+
const path_1 = require("path");
|
|
6
|
+
const fs_1 = require("fs");
|
|
7
|
+
/**
|
|
8
|
+
* Supported platform-arch combinations
|
|
9
|
+
*/
|
|
10
|
+
const SUPPORTED_PLATFORMS = {
|
|
11
|
+
"darwin-arm64": "@whisper-cpp-node/darwin-arm64",
|
|
12
|
+
// Future: add more platforms
|
|
13
|
+
// "darwin-x64": "@whisper-cpp-node/darwin-x64",
|
|
14
|
+
// "linux-x64": "@whisper-cpp-node/linux-x64",
|
|
15
|
+
// "win32-x64": "@whisper-cpp-node/win32-x64",
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Get the platform key for current system
|
|
19
|
+
*/
|
|
20
|
+
function getPlatformKey() {
|
|
21
|
+
return `${(0, os_1.platform)()}-${(0, os_1.arch)()}`;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Get the platform-specific package name
|
|
25
|
+
*/
|
|
26
|
+
function getPlatformPackage() {
|
|
27
|
+
const platformKey = getPlatformKey();
|
|
28
|
+
const packageName = SUPPORTED_PLATFORMS[platformKey];
|
|
29
|
+
if (!packageName) {
|
|
30
|
+
const supported = Object.keys(SUPPORTED_PLATFORMS).join(", ");
|
|
31
|
+
throw new Error(`Unsupported platform: ${platformKey}. ` +
|
|
32
|
+
`Supported platforms: ${supported}`);
|
|
33
|
+
}
|
|
34
|
+
return packageName;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Try to find the binary in workspace development paths
|
|
38
|
+
*/
|
|
39
|
+
function tryWorkspacePath() {
|
|
40
|
+
const platformKey = getPlatformKey();
|
|
41
|
+
// In monorepo development, the binary is in sibling package
|
|
42
|
+
const possiblePaths = [
|
|
43
|
+
// From dist/ folder: ../darwin-arm64/whisper.node
|
|
44
|
+
(0, path_1.join)(__dirname, "..", "..", platformKey, "whisper.node"),
|
|
45
|
+
// From src/ folder during ts-node: ../../darwin-arm64/whisper.node
|
|
46
|
+
(0, path_1.join)(__dirname, "..", "..", "..", platformKey, "whisper.node"),
|
|
47
|
+
];
|
|
48
|
+
for (const p of possiblePaths) {
|
|
49
|
+
if ((0, fs_1.existsSync)(p)) {
|
|
50
|
+
return p;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Load the native addon for the current platform
|
|
57
|
+
*/
|
|
58
|
+
function loadNativeAddon() {
|
|
59
|
+
const packageName = getPlatformPackage();
|
|
60
|
+
// First, try workspace development path
|
|
61
|
+
const workspacePath = tryWorkspacePath();
|
|
62
|
+
if (workspacePath) {
|
|
63
|
+
return require(workspacePath);
|
|
64
|
+
}
|
|
65
|
+
// Then try the installed package
|
|
66
|
+
try {
|
|
67
|
+
const binaryPath = require.resolve((0, path_1.join)(packageName, "whisper.node"));
|
|
68
|
+
return require(binaryPath);
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
const err = error;
|
|
72
|
+
if (err.code === "MODULE_NOT_FOUND") {
|
|
73
|
+
throw new Error(`Native binary not found. Please ensure ${packageName} is installed.\n` +
|
|
74
|
+
`Try running: npm install ${packageName}\n` +
|
|
75
|
+
`Original error: ${err.message}`);
|
|
76
|
+
}
|
|
77
|
+
throw new Error(`Failed to load native addon from ${packageName}: ${err.message}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.js","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":";;AAmEA,0CA4BC;AA/FD,2BAAoC;AACpC,+BAA4B;AAC5B,2BAAgC;AAGhC;;GAEG;AACH,MAAM,mBAAmB,GAA2B;IAClD,cAAc,EAAE,gCAAgC;IAChD,6BAA6B;IAC7B,gDAAgD;IAChD,8CAA8C;IAC9C,8CAA8C;CAC/C,CAAC;AAEF;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO,GAAG,IAAA,aAAQ,GAAE,IAAI,IAAA,SAAI,GAAE,EAAE,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;IAErD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9D,MAAM,IAAI,KAAK,CACb,yBAAyB,WAAW,IAAI;YACtC,wBAAwB,SAAS,EAAE,CACtC,CAAC;IACJ,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,4DAA4D;IAC5D,MAAM,aAAa,GAAG;QACpB,kDAAkD;QAClD,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;QACxD,mEAAmE;QACnE,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;KAC/D,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;QAC9B,IAAI,IAAA,eAAU,EAAC,CAAC,CAAC,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAgB,eAAe;IAC7B,MAAM,WAAW,GAAG,kBAAkB,EAAE,CAAC;IAEzC,wCAAwC;IACxC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAC;IACzC,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,OAAO,CAAC,aAAa,CAAiB,CAAC;IAChD,CAAC;IAED,iCAAiC;IACjC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,IAAA,WAAI,EAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;QACtE,OAAO,OAAO,CAAC,UAAU,CAAiB,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAA8B,CAAC;QAE3C,IAAI,GAAG,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,0CAA0C,WAAW,kBAAkB;gBACrE,4BAA4B,WAAW,IAAI;gBAC3C,mBAAmB,GAAG,CAAC,OAAO,EAAE,CACnC,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,KAAK,CACb,oCAAoC,WAAW,KAAK,GAAG,CAAC,OAAO,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Options for creating a WhisperContext
|
|
3
|
+
*/
|
|
4
|
+
export interface WhisperContextOptions {
|
|
5
|
+
/** Path to the GGML model file */
|
|
6
|
+
model: string;
|
|
7
|
+
/** Enable GPU acceleration (default: true) */
|
|
8
|
+
use_gpu?: boolean;
|
|
9
|
+
/** Enable Flash Attention (default: false) */
|
|
10
|
+
flash_attn?: boolean;
|
|
11
|
+
/** GPU device index (default: 0) */
|
|
12
|
+
gpu_device?: number;
|
|
13
|
+
/** Enable Core ML acceleration on macOS (default: false) */
|
|
14
|
+
use_coreml?: boolean;
|
|
15
|
+
/** DTW alignment preset for word-level timestamps (e.g., 'base.en', 'small', 'large.v3') */
|
|
16
|
+
dtw?: string;
|
|
17
|
+
/** Suppress whisper.cpp log output (default: false) */
|
|
18
|
+
no_prints?: boolean;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Options for transcription
|
|
22
|
+
*/
|
|
23
|
+
export interface TranscribeOptions {
|
|
24
|
+
/** Path to the audio file */
|
|
25
|
+
fname_inp: string;
|
|
26
|
+
/** Language code (e.g., 'en', 'zh', 'auto') */
|
|
27
|
+
language?: string;
|
|
28
|
+
/** Translate to English */
|
|
29
|
+
translate?: boolean;
|
|
30
|
+
/** Number of threads to use */
|
|
31
|
+
n_threads?: number;
|
|
32
|
+
/** Number of processors */
|
|
33
|
+
n_processors?: number;
|
|
34
|
+
/** Disable timestamps in output */
|
|
35
|
+
no_timestamps?: boolean;
|
|
36
|
+
/** Detect language automatically */
|
|
37
|
+
detect_language?: boolean;
|
|
38
|
+
/** Single segment mode */
|
|
39
|
+
single_segment?: boolean;
|
|
40
|
+
/** Maximum segment length (0 = no limit) */
|
|
41
|
+
max_len?: number;
|
|
42
|
+
/** Maximum tokens per segment (0 = no limit) */
|
|
43
|
+
max_tokens?: number;
|
|
44
|
+
/** Maximum context size (-1 = default) */
|
|
45
|
+
max_context?: number;
|
|
46
|
+
/** Temperature for sampling */
|
|
47
|
+
temperature?: number;
|
|
48
|
+
/** Temperature increment for fallback */
|
|
49
|
+
temperature_inc?: number;
|
|
50
|
+
/** Best of N sampling */
|
|
51
|
+
best_of?: number;
|
|
52
|
+
/** Beam size (-1 = greedy) */
|
|
53
|
+
beam_size?: number;
|
|
54
|
+
/** Entropy threshold */
|
|
55
|
+
entropy_thold?: number;
|
|
56
|
+
/** Log probability threshold */
|
|
57
|
+
logprob_thold?: number;
|
|
58
|
+
/** No speech threshold */
|
|
59
|
+
no_speech_thold?: number;
|
|
60
|
+
/** Initial prompt text */
|
|
61
|
+
prompt?: string;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Transcription result segment (tuple format)
|
|
65
|
+
* [0]: Start time in format "HH:MM:SS,mmm"
|
|
66
|
+
* [1]: End time in format "HH:MM:SS,mmm"
|
|
67
|
+
* [2]: Transcribed text
|
|
68
|
+
*/
|
|
69
|
+
export type TranscriptSegment = [start: string, end: string, text: string];
|
|
70
|
+
/**
|
|
71
|
+
* Transcription result
|
|
72
|
+
*/
|
|
73
|
+
export interface TranscribeResult {
|
|
74
|
+
/** Array of transcript segments as [start, end, text] tuples */
|
|
75
|
+
segments: TranscriptSegment[];
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Options for creating a VadContext
|
|
79
|
+
*/
|
|
80
|
+
export interface VadContextOptions {
|
|
81
|
+
/** Path to the Silero VAD model file */
|
|
82
|
+
model: string;
|
|
83
|
+
/** Speech detection threshold (default: 0.5) */
|
|
84
|
+
threshold?: number;
|
|
85
|
+
/** Number of threads (default: 1) */
|
|
86
|
+
n_threads?: number;
|
|
87
|
+
/** Suppress model loading prints */
|
|
88
|
+
no_prints?: boolean;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* WhisperContext class for persistent model context
|
|
92
|
+
*/
|
|
93
|
+
export interface WhisperContext {
|
|
94
|
+
/** Get whisper.cpp system info string */
|
|
95
|
+
getSystemInfo(): string;
|
|
96
|
+
/** Check if model is multilingual */
|
|
97
|
+
isMultilingual(): boolean;
|
|
98
|
+
/** Free the context and release resources */
|
|
99
|
+
free(): void;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* WhisperContext constructor type
|
|
103
|
+
*/
|
|
104
|
+
export interface WhisperContextConstructor {
|
|
105
|
+
new (options: WhisperContextOptions): WhisperContext;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* VadContext class for voice activity detection
|
|
109
|
+
*/
|
|
110
|
+
export interface VadContext {
|
|
111
|
+
/** Get the required window size in samples */
|
|
112
|
+
getWindowSamples(): number;
|
|
113
|
+
/** Get the expected sample rate (16000 Hz) */
|
|
114
|
+
getSampleRate(): number;
|
|
115
|
+
/** Process audio samples and return speech probability [0, 1] */
|
|
116
|
+
process(samples: Float32Array): number;
|
|
117
|
+
/** Reset the internal LSTM state */
|
|
118
|
+
reset(): void;
|
|
119
|
+
/** Free the context and release resources */
|
|
120
|
+
free(): void;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* VadContext constructor type
|
|
124
|
+
*/
|
|
125
|
+
export interface VadContextConstructor {
|
|
126
|
+
new (options: VadContextOptions): VadContext;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Transcribe callback function signature
|
|
130
|
+
*/
|
|
131
|
+
export type TranscribeCallback = (error: Error | null, result?: TranscribeResult) => void;
|
|
132
|
+
/**
|
|
133
|
+
* Native addon interface
|
|
134
|
+
*/
|
|
135
|
+
export interface WhisperAddon {
|
|
136
|
+
WhisperContext: WhisperContextConstructor;
|
|
137
|
+
VadContext: VadContextConstructor;
|
|
138
|
+
transcribe: (context: WhisperContext, options: TranscribeOptions, callback: TranscribeCallback) => void;
|
|
139
|
+
whisper: Record<string, unknown>;
|
|
140
|
+
}
|
|
141
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,4FAA4F;IAC5F,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,uDAAuD;IACvD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mCAAmC;IACnC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,0BAA0B;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4CAA4C;IAC5C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,0BAA0B;IAC1B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oCAAoC;IACpC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yCAAyC;IACzC,aAAa,IAAI,MAAM,CAAC;IACxB,qCAAqC;IACrC,cAAc,IAAI,OAAO,CAAC;IAC1B,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,KAAK,OAAO,EAAE,qBAAqB,GAAG,cAAc,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8CAA8C;IAC9C,gBAAgB,IAAI,MAAM,CAAC;IAC3B,8CAA8C;IAC9C,aAAa,IAAI,MAAM,CAAC;IACxB,iEAAiE;IACjE,OAAO,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAAC;IACvC,oCAAoC;IACpC,KAAK,IAAI,IAAI,CAAC;IACd,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,CAC/B,KAAK,EAAE,KAAK,GAAG,IAAI,EACnB,MAAM,CAAC,EAAE,gBAAgB,KACtB,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,yBAAyB,CAAC;IAC1C,UAAU,EAAE,qBAAqB,CAAC;IAClC,UAAU,EAAE,CACV,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,EAAE,kBAAkB,KACzB,IAAI,CAAC;IACV,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@whisper-cpp-node/core",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Node.js bindings for whisper.cpp - fast speech-to-text on Apple Silicon",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/niconicoye/whisper.cpp",
|
|
9
|
+
"directory": "npm/packages/core"
|
|
10
|
+
},
|
|
11
|
+
"main": "dist/index.js",
|
|
12
|
+
"types": "dist/index.d.ts",
|
|
13
|
+
"exports": {
|
|
14
|
+
".": {
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
|
+
"require": "./dist/index.js",
|
|
17
|
+
"import": "./dist/index.mjs"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"dist"
|
|
22
|
+
],
|
|
23
|
+
"optionalDependencies": {
|
|
24
|
+
"@whisper-cpp-node/darwin-arm64": "0.1.0"
|
|
25
|
+
},
|
|
26
|
+
"devDependencies": {
|
|
27
|
+
"@types/node": "^20.0.0",
|
|
28
|
+
"typescript": "^5.3.0"
|
|
29
|
+
},
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18.0.0"
|
|
32
|
+
},
|
|
33
|
+
"keywords": [
|
|
34
|
+
"whisper",
|
|
35
|
+
"whisper.cpp",
|
|
36
|
+
"speech-to-text",
|
|
37
|
+
"transcription",
|
|
38
|
+
"audio",
|
|
39
|
+
"asr",
|
|
40
|
+
"apple-silicon",
|
|
41
|
+
"coreml",
|
|
42
|
+
"metal"
|
|
43
|
+
],
|
|
44
|
+
"publishConfig": {
|
|
45
|
+
"access": "public"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"build": "tsc"
|
|
49
|
+
}
|
|
50
|
+
}
|