@whisper-cpp-node/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,160 @@
1
+ # @whisper-cpp-node/core
2
+
3
+ Node.js bindings for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - fast speech-to-text on Apple Silicon with Core ML and Metal support.
4
+
5
+ ## Features
6
+
7
+ - **Fast**: Native whisper.cpp performance with Metal GPU acceleration
8
+ - **Core ML**: Optional Apple Neural Engine support for 3x+ speedup
9
+ - **Streaming VAD**: Built-in Silero voice activity detection
10
+ - **TypeScript**: Full type definitions included
11
+ - **Self-contained**: No external dependencies, just install and use
12
+
13
+ ## Requirements
14
+
15
+ - macOS 13.3+ (Ventura or later)
16
+ - Apple Silicon (M1/M2/M3/M4)
17
+ - Node.js 18+
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ npm install @whisper-cpp-node/core
23
+ # or
24
+ pnpm add @whisper-cpp-node/core
25
+ ```
26
+
27
+ The platform-specific binary (`@whisper-cpp-node/darwin-arm64`) is automatically installed.
28
+
29
+ ## Quick Start
30
+
31
+ ```typescript
32
+ import {
33
+ createWhisperContext,
34
+ transcribeAsync,
35
+ } from "@whisper-cpp-node/core";
36
+
37
+ // Create a context with your model
38
+ const ctx = createWhisperContext({
39
+ model: "./models/ggml-base.en.bin",
40
+ use_gpu: true,
41
+ });
42
+
43
+ // Transcribe audio
44
+ const result = await transcribeAsync(ctx, {
45
+ fname_inp: "./audio.wav",
46
+ language: "en",
47
+ });
48
+
49
+ console.log(result.segments);
50
+
51
+ // Clean up
52
+ ctx.free();
53
+ ```
54
+
55
+ ## API
56
+
57
+ ### `createWhisperContext(options)`
58
+
59
+ Create a persistent context for transcription.
60
+
61
+ ```typescript
62
+ interface WhisperContextOptions {
63
+ model: string; // Path to GGML model file (required)
64
+ use_gpu?: boolean; // Enable GPU acceleration (default: true)
65
+ use_coreml?: boolean; // Enable Core ML on macOS (default: false)
66
+ flash_attn?: boolean; // Enable Flash Attention (default: false)
67
+ gpu_device?: number; // GPU device index (default: 0)
68
+ dtw?: string; // DTW preset for word timestamps
69
+ no_prints?: boolean; // Suppress log output (default: false)
70
+ }
71
+ ```
72
+
73
+ ### `transcribeAsync(context, options)`
74
+
75
+ Transcribe audio file (Promise-based).
76
+
77
+ ```typescript
78
+ interface TranscribeOptions {
79
+ fname_inp: string; // Path to audio file (required)
80
+ language?: string; // Language code (e.g., 'en', 'zh', 'auto')
81
+ translate?: boolean; // Translate to English
82
+ n_threads?: number; // Number of threads
83
+ no_timestamps?: boolean; // Disable timestamps
84
+ // ... see types.ts for full options
85
+ }
86
+
87
+ interface TranscribeResult {
88
+ segments: Array<{
89
+ start: string; // "HH:MM:SS,mmm"
90
+ end: string;
91
+ text: string;
92
+ }>;
93
+ }
94
+ ```
95
+
96
+ ### `createVadContext(options)`
97
+
98
+ Create a voice activity detection context.
99
+
100
+ ```typescript
101
+ interface VadContextOptions {
102
+ model: string; // Path to Silero VAD model
103
+ threshold?: number; // Speech threshold (default: 0.5)
104
+ n_threads?: number; // Number of threads (default: 1)
105
+ no_prints?: boolean; // Suppress log output
106
+ }
107
+
108
+ // Usage
109
+ const vad = createVadContext({
110
+ model: "./models/ggml-silero-v6.2.0.bin",
111
+ });
112
+
113
+ const samples = new Float32Array(vad.getWindowSamples());
114
+ // ... fill samples with 16kHz audio
115
+ const probability = vad.process(samples);
116
+
117
+ vad.free();
118
+ ```
119
+
120
+ ## Core ML Acceleration
121
+
122
+ For 3x+ faster encoding on Apple Silicon:
123
+
124
+ 1. Generate a Core ML model:
125
+ ```bash
126
+ pip install ane_transformers openai-whisper coremltools
127
+ ./models/generate-coreml-model.sh base.en
128
+ ```
129
+
130
+ 2. Place it next to your GGML model:
131
+ ```
132
+ models/ggml-base.en.bin
133
+ models/ggml-base.en-encoder.mlmodelc/
134
+ ```
135
+
136
+ 3. Enable Core ML:
137
+ ```typescript
138
+ const ctx = createWhisperContext({
139
+ model: "./models/ggml-base.en.bin",
140
+ use_coreml: true,
141
+ });
142
+ ```
143
+
144
+ ## Models
145
+
146
+ Download models from [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp):
147
+
148
+ ```bash
149
+ # Base English model (~150MB)
150
+ curl -L -o models/ggml-base.en.bin \
151
+ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
152
+
153
+ # Large v3 Turbo quantized (~500MB)
154
+ curl -L -o models/ggml-large-v3-turbo-q4_0.bin \
155
+ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q4_0.bin
156
+ ```
157
+
158
+ ## License
159
+
160
+ MIT
@@ -0,0 +1,54 @@
1
+ import type { WhisperContext, WhisperContextOptions, VadContext, VadContextOptions, TranscribeOptions, TranscribeResult } from "./types";
2
+ export type { WhisperContextOptions, VadContextOptions, TranscribeOptions, TranscribeResult, TranscriptSegment, WhisperContext, VadContext, WhisperContextConstructor, VadContextConstructor, } from "./types";
3
+ export declare const WhisperContextClass: import("./types").WhisperContextConstructor;
4
+ export declare const VadContextClass: import("./types").VadContextConstructor;
5
+ export declare const transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
6
+ export declare const transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
7
+ /**
8
+ * Create a new WhisperContext
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const ctx = createWhisperContext({
13
+ * model: './models/ggml-base.en.bin',
14
+ * use_gpu: true,
15
+ * use_coreml: true,
16
+ * });
17
+ *
18
+ * const result = await transcribeAsync(ctx, {
19
+ * fname_inp: './audio.wav',
20
+ * language: 'en',
21
+ * });
22
+ *
23
+ * console.log(result.segments);
24
+ * ctx.free();
25
+ * ```
26
+ */
27
+ export declare function createWhisperContext(options: WhisperContextOptions): WhisperContext;
28
+ /**
29
+ * Create a new VadContext for voice activity detection
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * const vad = createVadContext({
34
+ * model: './models/ggml-silero-v6.2.0.bin',
35
+ * threshold: 0.5,
36
+ * });
37
+ *
38
+ * const samples = new Float32Array(512);
39
+ * const probability = vad.process(samples);
40
+ *
41
+ * vad.free();
42
+ * ```
43
+ */
44
+ export declare function createVadContext(options: VadContextOptions): VadContext;
45
+ declare const _default: {
46
+ WhisperContext: import("./types").WhisperContextConstructor;
47
+ VadContext: import("./types").VadContextConstructor;
48
+ transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
49
+ transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
50
+ createWhisperContext: typeof createWhisperContext;
51
+ createVadContext: typeof createVadContext;
52
+ };
53
+ export default _default;
54
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAGjB,YAAY,EACV,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,SAAS,CAAC;AAMjB,eAAO,MAAM,mBAAmB,6CAAuB,CAAC;AACxD,eAAO,MAAM,eAAe,yCAAmB,CAAC;AAGhD,eAAO,MAAM,UAAU,+GAAmB,CAAC;AAG3C,eAAO,MAAM,eAAe,EAAkC,CAC5D,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,qBAAqB,GAC7B,cAAc,CAEhB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAEvE;;;;;+BAhDU,cAAc,WACd,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC;;;;AAiD9B,wBAOE"}
package/dist/index.js ADDED
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.transcribeAsync = exports.transcribe = exports.VadContextClass = exports.WhisperContextClass = void 0;
4
+ exports.createWhisperContext = createWhisperContext;
5
+ exports.createVadContext = createVadContext;
6
+ const util_1 = require("util");
7
+ const loader_1 = require("./loader");
8
+ // Load native addon
9
+ const addon = (0, loader_1.loadNativeAddon)();
10
+ // Export native constructors with different names to avoid conflict
11
+ exports.WhisperContextClass = addon.WhisperContext;
12
+ exports.VadContextClass = addon.VadContext;
13
+ // Original callback-based transcribe
14
+ exports.transcribe = addon.transcribe;
15
+ // Promisified version for async/await
16
+ exports.transcribeAsync = (0, util_1.promisify)(addon.transcribe);
17
+ /**
18
+ * Create a new WhisperContext
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * const ctx = createWhisperContext({
23
+ * model: './models/ggml-base.en.bin',
24
+ * use_gpu: true,
25
+ * use_coreml: true,
26
+ * });
27
+ *
28
+ * const result = await transcribeAsync(ctx, {
29
+ * fname_inp: './audio.wav',
30
+ * language: 'en',
31
+ * });
32
+ *
33
+ * console.log(result.segments);
34
+ * ctx.free();
35
+ * ```
36
+ */
37
+ function createWhisperContext(options) {
38
+ return new addon.WhisperContext(options);
39
+ }
40
+ /**
41
+ * Create a new VadContext for voice activity detection
42
+ *
43
+ * @example
44
+ * ```typescript
45
+ * const vad = createVadContext({
46
+ * model: './models/ggml-silero-v6.2.0.bin',
47
+ * threshold: 0.5,
48
+ * });
49
+ *
50
+ * const samples = new Float32Array(512);
51
+ * const probability = vad.process(samples);
52
+ *
53
+ * vad.free();
54
+ * ```
55
+ */
56
+ function createVadContext(options) {
57
+ return new addon.VadContext(options);
58
+ }
59
+ // Default export with all functionality
60
+ exports.default = {
61
+ WhisperContext: addon.WhisperContext,
62
+ VadContext: addon.VadContext,
63
+ transcribe: addon.transcribe,
64
+ transcribeAsync: exports.transcribeAsync,
65
+ createWhisperContext,
66
+ createVadContext,
67
+ };
68
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AA6DA,oDAIC;AAkBD,4CAEC;AArFD,+BAAiC;AACjC,qCAA2C;AAwB3C,oBAAoB;AACpB,MAAM,KAAK,GAAiB,IAAA,wBAAe,GAAE,CAAC;AAE9C,oEAAoE;AACvD,QAAA,mBAAmB,GAAG,KAAK,CAAC,cAAc,CAAC;AAC3C,QAAA,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC;AAEhD,qCAAqC;AACxB,QAAA,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;AAE3C,sCAAsC;AACzB,QAAA,eAAe,GAAG,IAAA,gBAAS,EAAC,KAAK,CAAC,UAAU,CAG3B,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,oBAAoB,CAClC,OAA8B;IAE9B,OAAO,IAAI,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,SAAgB,gBAAgB,CAAC,OAA0B;IACzD,OAAO,IAAI,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;AACvC,CAAC;AAED,wCAAwC;AACxC,kBAAe;IACb,cAAc,EAAE,KAAK,CAAC,cAAc;IACpC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,eAAe,EAAf,uBAAe;IACf,oBAAoB;IACpB,gBAAgB;CACjB,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { WhisperAddon } from "./types";
2
+ /**
3
+ * Load the native addon for the current platform
4
+ */
5
+ export declare function loadNativeAddon(): WhisperAddon;
6
+ //# sourceMappingURL=loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AA6D5C;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CA4B9C"}
package/dist/loader.js ADDED
@@ -0,0 +1,80 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.loadNativeAddon = loadNativeAddon;
4
+ const os_1 = require("os");
5
+ const path_1 = require("path");
6
+ const fs_1 = require("fs");
7
+ /**
8
+ * Supported platform-arch combinations
9
+ */
10
+ const SUPPORTED_PLATFORMS = {
11
+ "darwin-arm64": "@whisper-cpp-node/darwin-arm64",
12
+ // Future: add more platforms
13
+ // "darwin-x64": "@whisper-cpp-node/darwin-x64",
14
+ // "linux-x64": "@whisper-cpp-node/linux-x64",
15
+ // "win32-x64": "@whisper-cpp-node/win32-x64",
16
+ };
17
+ /**
18
+ * Get the platform key for current system
19
+ */
20
+ function getPlatformKey() {
21
+ return `${(0, os_1.platform)()}-${(0, os_1.arch)()}`;
22
+ }
23
+ /**
24
+ * Get the platform-specific package name
25
+ */
26
+ function getPlatformPackage() {
27
+ const platformKey = getPlatformKey();
28
+ const packageName = SUPPORTED_PLATFORMS[platformKey];
29
+ if (!packageName) {
30
+ const supported = Object.keys(SUPPORTED_PLATFORMS).join(", ");
31
+ throw new Error(`Unsupported platform: ${platformKey}. ` +
32
+ `Supported platforms: ${supported}`);
33
+ }
34
+ return packageName;
35
+ }
36
+ /**
37
+ * Try to find the binary in workspace development paths
38
+ */
39
+ function tryWorkspacePath() {
40
+ const platformKey = getPlatformKey();
41
+ // In monorepo development, the binary is in sibling package
42
+ const possiblePaths = [
43
+ // From dist/ folder: ../darwin-arm64/whisper.node
44
+ (0, path_1.join)(__dirname, "..", "..", platformKey, "whisper.node"),
45
+ // From src/ folder during ts-node: ../../darwin-arm64/whisper.node
46
+ (0, path_1.join)(__dirname, "..", "..", "..", platformKey, "whisper.node"),
47
+ ];
48
+ for (const p of possiblePaths) {
49
+ if ((0, fs_1.existsSync)(p)) {
50
+ return p;
51
+ }
52
+ }
53
+ return null;
54
+ }
55
+ /**
56
+ * Load the native addon for the current platform
57
+ */
58
+ function loadNativeAddon() {
59
+ const packageName = getPlatformPackage();
60
+ // First, try workspace development path
61
+ const workspacePath = tryWorkspacePath();
62
+ if (workspacePath) {
63
+ return require(workspacePath);
64
+ }
65
+ // Then try the installed package
66
+ try {
67
+ const binaryPath = require.resolve((0, path_1.join)(packageName, "whisper.node"));
68
+ return require(binaryPath);
69
+ }
70
+ catch (error) {
71
+ const err = error;
72
+ if (err.code === "MODULE_NOT_FOUND") {
73
+ throw new Error(`Native binary not found. Please ensure ${packageName} is installed.\n` +
74
+ `Try running: npm install ${packageName}\n` +
75
+ `Original error: ${err.message}`);
76
+ }
77
+ throw new Error(`Failed to load native addon from ${packageName}: ${err.message}`);
78
+ }
79
+ }
80
+ //# sourceMappingURL=loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":";;AAmEA,0CA4BC;AA/FD,2BAAoC;AACpC,+BAA4B;AAC5B,2BAAgC;AAGhC;;GAEG;AACH,MAAM,mBAAmB,GAA2B;IAClD,cAAc,EAAE,gCAAgC;IAChD,6BAA6B;IAC7B,gDAAgD;IAChD,8CAA8C;IAC9C,8CAA8C;CAC/C,CAAC;AAEF;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO,GAAG,IAAA,aAAQ,GAAE,IAAI,IAAA,SAAI,GAAE,EAAE,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;IAErD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9D,MAAM,IAAI,KAAK,CACb,yBAAyB,WAAW,IAAI;YACtC,wBAAwB,SAAS,EAAE,CACtC,CAAC;IACJ,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,4DAA4D;IAC5D,MAAM,aAAa,GAAG;QACpB,kDAAkD;QAClD,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;QACxD,mEAAmE;QACnE,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;KAC/D,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;QAC9B,IAAI,IAAA,eAAU,EAAC,CAAC,CAAC,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAgB,eAAe;IAC7B,MAAM,WAAW,GAAG,kBAAkB,EAAE,CAAC;IAEzC,wCAAwC;IACxC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAC;IACzC,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,OAAO,CAAC,aAAa,CAAiB,CAAC;IAChD,CAAC;IAED,iCAAiC;IACjC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,IAAA,WAAI,EAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;QACtE,OAAO,OAAO,CAAC,UAAU,CAAiB,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAA8B,CAAC;QAE3C,IAAI,GAAG,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,0CAA0C,WAAW,kBAAkB;gBACrE,4BAA4B,WAAW,IAAI;gBAC3C,mBAAmB,GAAG,CAAC,OAAO,EAAE,CACnC,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,KAAK,CACb,oCAAoC,WAAW,KAAK,GAAG,CAAC,OAAO,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Options for creating a WhisperContext
3
+ */
4
+ export interface WhisperContextOptions {
5
+ /** Path to the GGML model file */
6
+ model: string;
7
+ /** Enable GPU acceleration (default: true) */
8
+ use_gpu?: boolean;
9
+ /** Enable Flash Attention (default: false) */
10
+ flash_attn?: boolean;
11
+ /** GPU device index (default: 0) */
12
+ gpu_device?: number;
13
+ /** Enable Core ML acceleration on macOS (default: false) */
14
+ use_coreml?: boolean;
15
+ /** DTW alignment preset for word-level timestamps (e.g., 'base.en', 'small', 'large.v3') */
16
+ dtw?: string;
17
+ /** Suppress whisper.cpp log output (default: false) */
18
+ no_prints?: boolean;
19
+ }
20
+ /**
21
+ * Options for transcription
22
+ */
23
+ export interface TranscribeOptions {
24
+ /** Path to the audio file */
25
+ fname_inp: string;
26
+ /** Language code (e.g., 'en', 'zh', 'auto') */
27
+ language?: string;
28
+ /** Translate to English */
29
+ translate?: boolean;
30
+ /** Number of threads to use */
31
+ n_threads?: number;
32
+ /** Number of processors */
33
+ n_processors?: number;
34
+ /** Disable timestamps in output */
35
+ no_timestamps?: boolean;
36
+ /** Detect language automatically */
37
+ detect_language?: boolean;
38
+ /** Single segment mode */
39
+ single_segment?: boolean;
40
+ /** Maximum segment length (0 = no limit) */
41
+ max_len?: number;
42
+ /** Maximum tokens per segment (0 = no limit) */
43
+ max_tokens?: number;
44
+ /** Maximum context size (-1 = default) */
45
+ max_context?: number;
46
+ /** Temperature for sampling */
47
+ temperature?: number;
48
+ /** Temperature increment for fallback */
49
+ temperature_inc?: number;
50
+ /** Best of N sampling */
51
+ best_of?: number;
52
+ /** Beam size (-1 = greedy) */
53
+ beam_size?: number;
54
+ /** Entropy threshold */
55
+ entropy_thold?: number;
56
+ /** Log probability threshold */
57
+ logprob_thold?: number;
58
+ /** No speech threshold */
59
+ no_speech_thold?: number;
60
+ /** Initial prompt text */
61
+ prompt?: string;
62
+ }
63
+ /**
64
+ * Transcription result segment (tuple format)
65
+ * [0]: Start time in format "HH:MM:SS,mmm"
66
+ * [1]: End time in format "HH:MM:SS,mmm"
67
+ * [2]: Transcribed text
68
+ */
69
+ export type TranscriptSegment = [start: string, end: string, text: string];
70
+ /**
71
+ * Transcription result
72
+ */
73
+ export interface TranscribeResult {
74
+ /** Array of transcript segments as [start, end, text] tuples */
75
+ segments: TranscriptSegment[];
76
+ }
77
+ /**
78
+ * Options for creating a VadContext
79
+ */
80
+ export interface VadContextOptions {
81
+ /** Path to the Silero VAD model file */
82
+ model: string;
83
+ /** Speech detection threshold (default: 0.5) */
84
+ threshold?: number;
85
+ /** Number of threads (default: 1) */
86
+ n_threads?: number;
87
+ /** Suppress model loading prints */
88
+ no_prints?: boolean;
89
+ }
90
+ /**
91
+ * WhisperContext class for persistent model context
92
+ */
93
+ export interface WhisperContext {
94
+ /** Get whisper.cpp system info string */
95
+ getSystemInfo(): string;
96
+ /** Check if model is multilingual */
97
+ isMultilingual(): boolean;
98
+ /** Free the context and release resources */
99
+ free(): void;
100
+ }
101
+ /**
102
+ * WhisperContext constructor type
103
+ */
104
+ export interface WhisperContextConstructor {
105
+ new (options: WhisperContextOptions): WhisperContext;
106
+ }
107
+ /**
108
+ * VadContext class for voice activity detection
109
+ */
110
+ export interface VadContext {
111
+ /** Get the required window size in samples */
112
+ getWindowSamples(): number;
113
+ /** Get the expected sample rate (16000 Hz) */
114
+ getSampleRate(): number;
115
+ /** Process audio samples and return speech probability [0, 1] */
116
+ process(samples: Float32Array): number;
117
+ /** Reset the internal LSTM state */
118
+ reset(): void;
119
+ /** Free the context and release resources */
120
+ free(): void;
121
+ }
122
+ /**
123
+ * VadContext constructor type
124
+ */
125
+ export interface VadContextConstructor {
126
+ new (options: VadContextOptions): VadContext;
127
+ }
128
+ /**
129
+ * Transcribe callback function signature
130
+ */
131
+ export type TranscribeCallback = (error: Error | null, result?: TranscribeResult) => void;
132
+ /**
133
+ * Native addon interface
134
+ */
135
+ export interface WhisperAddon {
136
+ WhisperContext: WhisperContextConstructor;
137
+ VadContext: VadContextConstructor;
138
+ transcribe: (context: WhisperContext, options: TranscribeOptions, callback: TranscribeCallback) => void;
139
+ whisper: Record<string, unknown>;
140
+ }
141
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,4FAA4F;IAC5F,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,uDAAuD;IACvD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mCAAmC;IACnC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,0BAA0B;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4CAA4C;IAC5C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,0BAA0B;IAC1B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oCAAoC;IACpC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yCAAyC;IACzC,aAAa,IAAI,MAAM,CAAC;IACxB,qCAAqC;IACrC,cAAc,IAAI,OAAO,CAAC;IAC1B,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,KAAK,OAAO,EAAE,qBAAqB,GAAG,cAAc,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8CAA8C;IAC9C,gBAAgB,IAAI,MAAM,CAAC;IAC3B,8CAA8C;IAC9C,aAAa,IAAI,MAAM,CAAC;IACxB,iEAAiE;IACjE,OAAO,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAAC;IACvC,oCAAoC;IACpC,KAAK,IAAI,IAAI,CAAC;IACd,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,CAC/B,KAAK,EAAE,KAAK,GAAG,IAAI,EACnB,MAAM,CAAC,EAAE,gBAAgB,KACtB,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,yBAAyB,CAAC;IAC1C,UAAU,EAAE,qBAAqB,CAAC;IAClC,UAAU,EAAE,CACV,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,EAAE,kBAAkB,KACzB,IAAI,CAAC;IACV,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}
package/dist/types.js ADDED
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "@whisper-cpp-node/core",
3
+ "version": "0.1.0",
4
+ "description": "Node.js bindings for whisper.cpp - fast speech-to-text on Apple Silicon",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/niconicoye/whisper.cpp",
9
+ "directory": "npm/packages/core"
10
+ },
11
+ "main": "dist/index.js",
12
+ "types": "dist/index.d.ts",
13
+ "exports": {
14
+ ".": {
15
+ "types": "./dist/index.d.ts",
16
+ "require": "./dist/index.js",
17
+ "import": "./dist/index.mjs"
18
+ }
19
+ },
20
+ "files": [
21
+ "dist"
22
+ ],
23
+ "optionalDependencies": {
24
+ "@whisper-cpp-node/darwin-arm64": "0.1.0"
25
+ },
26
+ "devDependencies": {
27
+ "@types/node": "^20.0.0",
28
+ "typescript": "^5.3.0"
29
+ },
30
+ "engines": {
31
+ "node": ">=18.0.0"
32
+ },
33
+ "keywords": [
34
+ "whisper",
35
+ "whisper.cpp",
36
+ "speech-to-text",
37
+ "transcription",
38
+ "audio",
39
+ "asr",
40
+ "apple-silicon",
41
+ "coreml",
42
+ "metal"
43
+ ],
44
+ "publishConfig": {
45
+ "access": "public"
46
+ },
47
+ "scripts": {
48
+ "build": "tsc"
49
+ }
50
+ }