npm - whisper-cpp-node - Versions diffs - 0.2.0 - Mend

whisper-cpp-node 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,316 @@
+# whisper-cpp-node
+Node.js bindings for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - fast speech-to-text with GPU acceleration.
+## Features
+- **Fast**: Native whisper.cpp performance with GPU acceleration
+- **Cross-platform**: macOS (Metal), Windows (Vulkan)
+- **Core ML**: Optional Apple Neural Engine support for 3x+ speedup (macOS)
+- **OpenVINO**: Optional Intel CPU/GPU encoder acceleration (Windows/Linux)
+- **Streaming VAD**: Built-in Silero voice activity detection
+- **TypeScript**: Full type definitions included
+- **Self-contained**: No external dependencies, just install and use
+## Requirements
+**macOS:**
+- macOS 13.3+ (Ventura or later)
+- Apple Silicon (M1/M2/M3/M4)
+- Node.js 18+
+**Windows:**
+- Windows 10/11 (x64)
+- Node.js 18+
+- Vulkan-capable GPU (optional, for GPU acceleration)
+## Installation
+```bash
+npm install whisper-cpp-node
+# or
+pnpm add whisper-cpp-node
+```
+The platform-specific binary is automatically installed:
+- macOS ARM64: `@whisper-cpp-node/darwin-arm64`
+- Windows x64: `@whisper-cpp-node/win32-x64`
+## Quick Start
+### File-based transcription
+```typescript
+import {
+  createWhisperContext,
+  transcribeAsync,
+} from "whisper-cpp-node";
+// Create a context with your model
+const ctx = createWhisperContext({
+  model: "./models/ggml-base.en.bin",
+  use_gpu: true,
+});
+// Transcribe audio file
+const result = await transcribeAsync(ctx, {
+  fname_inp: "./audio.wav",
+  language: "en",
+});
+// Result: { segments: [["00:00:00,000", "00:00:02,500", " Hello world"], ...] }
+for (const [start, end, text] of result.segments) {
+  console.log(`[${start} --> ${end}]${text}`);
+}
+// Clean up
+ctx.free();
+```
+### Buffer-based transcription
+```typescript
+import {
+  createWhisperContext,
+  transcribeAsync,
+} from "whisper-cpp-node";
+const ctx = createWhisperContext({
+  model: "./models/ggml-base.en.bin",
+  use_gpu: true,
+});
+// Pass raw PCM audio (16kHz, mono, float32)
+const pcmData = new Float32Array(/* your audio samples */);
+const result = await transcribeAsync(ctx, {
+  pcmf32: pcmData,
+  language: "en",
+});
+for (const [start, end, text] of result.segments) {
+  console.log(`[${start} --> ${end}]${text}`);
+}
+ctx.free();
+```
+## API
+### `createWhisperContext(options)`
+Create a persistent context for transcription.
+```typescript
+interface WhisperContextOptions {
+  model: string;           // Path to GGML model file (required)
+  use_gpu?: boolean;       // Enable GPU acceleration (default: true)
+                           // Uses Metal on macOS, Vulkan on Windows
+  use_coreml?: boolean;    // Enable Core ML on macOS (default: false)
+  use_openvino?: boolean;  // Enable OpenVINO encoder on Intel (default: false)
+  openvino_device?: string; // OpenVINO device: 'CPU', 'GPU', 'NPU' (default: 'CPU')
+  openvino_model_path?: string; // Path to OpenVINO encoder model (auto-derived)
+  openvino_cache_dir?: string;  // Cache dir for compiled OpenVINO models
+  flash_attn?: boolean;    // Enable Flash Attention (default: false)
+  gpu_device?: number;     // GPU device index (default: 0)
+  dtw?: string;            // DTW preset for word timestamps
+  no_prints?: boolean;     // Suppress log output (default: false)
+}
+```
+### `transcribeAsync(context, options)`
+Transcribe audio (Promise-based). Accepts either a file path or PCM buffer.
+```typescript
+// File input
+interface TranscribeOptionsFile {
+  fname_inp: string;       // Path to audio file
+  // ... common options
+}
+// Buffer input
+interface TranscribeOptionsBuffer {
+  pcmf32: Float32Array;    // Raw PCM (16kHz, mono, float32, -1.0 to 1.0)
+  // ... common options
+}
+// Common options (partial list - see types.ts for full options)
+interface TranscribeOptionsBase {
+  // Language
+  language?: string;       // Language code ('en', 'zh', 'auto')
+  translate?: boolean;     // Translate to English
+  detect_language?: boolean; // Auto-detect language
+  // Threading
+  n_threads?: number;      // CPU threads (default: 4)
+  n_processors?: number;   // Parallel processors
+  // Audio processing
+  offset_ms?: number;      // Start offset in ms
+  duration_ms?: number;    // Duration to process (0 = all)
+  // Output control
+  no_timestamps?: boolean; // Disable timestamps
+  max_len?: number;        // Max segment length (chars)
+  max_tokens?: number;     // Max tokens per segment
+  split_on_word?: boolean; // Split on word boundaries
+  token_timestamps?: boolean; // Include token-level timestamps
+  // Sampling
+  temperature?: number;    // Sampling temperature (0.0 = greedy)
+  beam_size?: number;      // Beam search size (-1 = greedy)
+  best_of?: number;        // Best-of-N sampling
+  // Thresholds
+  entropy_thold?: number;  // Entropy threshold
+  logprob_thold?: number;  // Log probability threshold
+  no_speech_thold?: number; // No-speech probability threshold
+  // Context
+  prompt?: string;         // Initial prompt text
+  no_context?: boolean;    // Don't use previous context
+  // VAD preprocessing
+  vad?: boolean;           // Enable VAD preprocessing
+  vad_model?: string;      // Path to VAD model
+  vad_threshold?: number;  // VAD threshold (0.0-1.0)
+  vad_min_speech_duration_ms?: number;
+  vad_min_silence_duration_ms?: number;
+  vad_speech_pad_ms?: number;
+  // Callbacks
+  progress_callback?: (progress: number) => void;
+}
+// Result
+interface TranscribeResult {
+  segments: TranscriptSegment[];
+}
+// Segment is a tuple: [start, end, text]
+type TranscriptSegment = [string, string, string];
+// Example: ["00:00:00,000", "00:00:02,500", " Hello world"]
+```
+### `createVadContext(options)`
+Create a voice activity detection context for streaming audio.
+```typescript
+interface VadContextOptions {
+  model: string;           // Path to Silero VAD model
+  threshold?: number;      // Speech threshold (default: 0.5)
+  n_threads?: number;      // Number of threads (default: 1)
+  no_prints?: boolean;     // Suppress log output
+}
+interface VadContext {
+  getWindowSamples(): number;  // Returns 512 (32ms at 16kHz)
+  getSampleRate(): number;     // Returns 16000
+  process(samples: Float32Array): number;  // Returns probability 0.0-1.0
+  reset(): void;               // Reset LSTM state
+  free(): void;                // Release resources
+}
+```
+#### VAD Example
+```typescript
+import { createVadContext } from "whisper-cpp-node";
+const vad = createVadContext({
+  model: "./models/ggml-silero-v6.2.0.bin",
+  threshold: 0.5,
+});
+const windowSize = vad.getWindowSamples(); // 512 samples
+// Process audio in 32ms chunks
+function processAudioChunk(samples: Float32Array) {
+  const probability = vad.process(samples);
+  if (probability >= 0.5) {
+    console.log("Speech detected!", probability);
+  }
+}
+// Reset when starting new audio stream
+vad.reset();
+// Clean up when done
+vad.free();
+```
+## Core ML Acceleration (macOS)
+For 3x+ faster encoding on Apple Silicon:
+1. Generate a Core ML model:
+   ```bash
+   pip install ane_transformers openai-whisper coremltools
+   ./models/generate-coreml-model.sh base.en
+   ```
+2. Place it next to your GGML model:
+   ```
+   models/ggml-base.en.bin
+   models/ggml-base.en-encoder.mlmodelc/
+   ```
+3. Enable Core ML:
+   ```typescript
+   const ctx = createWhisperContext({
+     model: "./models/ggml-base.en.bin",
+     use_coreml: true,
+   });
+   ```
+## OpenVINO Acceleration (Intel)
+For faster encoder inference on Intel CPUs and GPUs (requires build with OpenVINO support):
+1. Install OpenVINO and convert the model:
+   ```bash
+   pip install openvino openvino-dev
+   python models/convert-whisper-to-openvino.py --model base.en
+   ```
+2. The OpenVINO model files are placed next to your GGML model:
+   ```
+   models/ggml-base.en.bin
+   models/ggml-base.en-encoder-openvino.xml
+   models/ggml-base.en-encoder-openvino.bin
+   ```
+3. Enable OpenVINO:
+   ```typescript
+   const ctx = createWhisperContext({
+     model: "./models/ggml-base.en.bin",
+     use_openvino: true,
+     openvino_device: "CPU",  // or "GPU" for Intel iGPU
+     openvino_cache_dir: "./openvino_cache", // optional, speeds up init
+   });
+   ```
+**Note:** OpenVINO support requires the addon to be built with `-DADDON_OPENVINO=ON`.
+## Models
+Download models from [Hugging Face](https://huggingface.co/ggerganov/whisper.cpp):
+```bash
+# Base English model (~150MB)
+curl -L -o models/ggml-base.en.bin \
+  https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
+# Large v3 Turbo quantized (~500MB)
+curl -L -o models/ggml-large-v3-turbo-q4_0.bin \
+  https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q4_0.bin
+# Silero VAD model (for streaming VAD)
+curl -L -o models/ggml-silero-v6.2.0.bin \
+  https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-silero-v6.2.0.bin
+```
+## License
+MIT

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import type { WhisperContext, WhisperContextOptions, VadContext, VadContextOptions, TranscribeOptions, TranscribeResult } from "./types";
+export type { WhisperContextOptions, VadContextOptions, TranscribeOptions, TranscribeOptionsBase, TranscribeOptionsFile, TranscribeOptionsBuffer, TranscribeResult, TranscriptSegment, WhisperContext, VadContext, WhisperContextConstructor, VadContextConstructor, } from "./types";
+export declare const WhisperContextClass: import("./types").WhisperContextConstructor;
+export declare const VadContextClass: import("./types").VadContextConstructor;
+export declare const transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
+export declare const transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
+/**
+ * Create a new WhisperContext
+ *
+ * @example
+ * ```typescript
+ * const ctx = createWhisperContext({
+ *   model: './models/ggml-base.en.bin',
+ *   use_gpu: true,
+ *   use_coreml: true,
+ * });
+ *
+ * const result = await transcribeAsync(ctx, {
+ *   fname_inp: './audio.wav',
+ *   language: 'en',
+ * });
+ *
+ * console.log(result.segments);
+ * ctx.free();
+ * ```
+ */
+export declare function createWhisperContext(options: WhisperContextOptions): WhisperContext;
+/**
+ * Create a new VadContext for voice activity detection
+ *
+ * @example
+ * ```typescript
+ * const vad = createVadContext({
+ *   model: './models/ggml-silero-v6.2.0.bin',
+ *   threshold: 0.5,
+ * });
+ *
+ * const samples = new Float32Array(512);
+ * const probability = vad.process(samples);
+ *
+ * vad.free();
+ * ```
+ */
+export declare function createVadContext(options: VadContextOptions): VadContext;
+declare const _default: {
+    WhisperContext: import("./types").WhisperContextConstructor;
+    VadContext: import("./types").VadContextConstructor;
+    transcribe: (context: WhisperContext, options: TranscribeOptions, callback: import("./types").TranscribeCallback) => void;
+    transcribeAsync: (context: WhisperContext, options: TranscribeOptions) => Promise<TranscribeResult>;
+    createWhisperContext: typeof createWhisperContext;
+    createVadContext: typeof createVadContext;
+};
+export default _default;
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EACd,qBAAqB,EACrB,UAAU,EACV,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAGjB,YAAY,EACV,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,SAAS,CAAC;AAMjB,eAAO,MAAM,mBAAmB,6CAAuB,CAAC;AACxD,eAAO,MAAM,eAAe,yCAAmB,CAAC;AAGhD,eAAO,MAAM,UAAU,+GAAmB,CAAC;AAG3C,eAAO,MAAM,eAAe,EAAkC,CAC5D,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,qBAAqB,GAC7B,cAAc,CAEhB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAEvE;;;;;+BAhDU,cAAc,WACd,iBAAiB,KACvB,OAAO,CAAC,gBAAgB,CAAC;;;;AAiD9B,wBAOE"}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,68 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.transcribeAsync = exports.transcribe = exports.VadContextClass = exports.WhisperContextClass = void 0;
+exports.createWhisperContext = createWhisperContext;
+exports.createVadContext = createVadContext;
+const util_1 = require("util");
+const loader_1 = require("./loader");
+// Load native addon
+const addon = (0, loader_1.loadNativeAddon)();
+// Export native constructors with different names to avoid conflict
+exports.WhisperContextClass = addon.WhisperContext;
+exports.VadContextClass = addon.VadContext;
+// Original callback-based transcribe
+exports.transcribe = addon.transcribe;
+// Promisified version for async/await
+exports.transcribeAsync = (0, util_1.promisify)(addon.transcribe);
+/**
+ * Create a new WhisperContext
+ *
+ * @example
+ * ```typescript
+ * const ctx = createWhisperContext({
+ *   model: './models/ggml-base.en.bin',
+ *   use_gpu: true,
+ *   use_coreml: true,
+ * });
+ *
+ * const result = await transcribeAsync(ctx, {
+ *   fname_inp: './audio.wav',
+ *   language: 'en',
+ * });
+ *
+ * console.log(result.segments);
+ * ctx.free();
+ * ```
+ */
+function createWhisperContext(options) {
+    return new addon.WhisperContext(options);
+}
+/**
+ * Create a new VadContext for voice activity detection
+ *
+ * @example
+ * ```typescript
+ * const vad = createVadContext({
+ *   model: './models/ggml-silero-v6.2.0.bin',
+ *   threshold: 0.5,
+ * });
+ *
+ * const samples = new Float32Array(512);
+ * const probability = vad.process(samples);
+ *
+ * vad.free();
+ * ```
+ */
+function createVadContext(options) {
+    return new addon.VadContext(options);
+}
+// Default export with all functionality
+exports.default = {
+    WhisperContext: addon.WhisperContext,
+    VadContext: addon.VadContext,
+    transcribe: addon.transcribe,
+    transcribeAsync: exports.transcribeAsync,
+    createWhisperContext,
+    createVadContext,
+};
+//# sourceMappingURL=index.js.map

package/dist/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAgEA,oDAIC;AAkBD,4CAEC;AAxFD,+BAAiC;AACjC,qCAA2C;AA2B3C,oBAAoB;AACpB,MAAM,KAAK,GAAiB,IAAA,wBAAe,GAAE,CAAC;AAE9C,oEAAoE;AACvD,QAAA,mBAAmB,GAAG,KAAK,CAAC,cAAc,CAAC;AAC3C,QAAA,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC;AAEhD,qCAAqC;AACxB,QAAA,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;AAE3C,sCAAsC;AACzB,QAAA,eAAe,GAAG,IAAA,gBAAS,EAAC,KAAK,CAAC,UAAU,CAG3B,CAAC;AAE/B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,oBAAoB,CAClC,OAA8B;IAE9B,OAAO,IAAI,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,SAAgB,gBAAgB,CAAC,OAA0B;IACzD,OAAO,IAAI,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;AACvC,CAAC;AAED,wCAAwC;AACxC,kBAAe;IACb,cAAc,EAAE,KAAK,CAAC,cAAc;IACpC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,eAAe,EAAf,uBAAe;IACf,oBAAoB;IACpB,gBAAgB;CACjB,CAAC"}

package/dist/loader.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { WhisperAddon } from "./types";
+/**
+ * Load the native addon for the current platform
+ */
+export declare function loadNativeAddon(): WhisperAddon;
+//# sourceMappingURL=loader.d.ts.map

package/dist/loader.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AA6D5C;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CA4B9C"}

package/dist/loader.js ADDED Viewed

@@ -0,0 +1,80 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.loadNativeAddon = loadNativeAddon;
+const os_1 = require("os");
+const path_1 = require("path");
+const fs_1 = require("fs");
+/**
+ * Supported platform-arch combinations
+ */
+const SUPPORTED_PLATFORMS = {
+    "darwin-arm64": "@whisper-cpp-node/darwin-arm64",
+    "win32-x64": "@whisper-cpp-node/win32-x64",
+    // Future: add more platforms
+    // "darwin-x64": "@whisper-cpp-node/darwin-x64",
+    // "linux-x64": "@whisper-cpp-node/linux-x64",
+};
+/**
+ * Get the platform key for current system
+ */
+function getPlatformKey() {
+    return `${(0, os_1.platform)()}-${(0, os_1.arch)()}`;
+}
+/**
+ * Get the platform-specific package name
+ */
+function getPlatformPackage() {
+    const platformKey = getPlatformKey();
+    const packageName = SUPPORTED_PLATFORMS[platformKey];
+    if (!packageName) {
+        const supported = Object.keys(SUPPORTED_PLATFORMS).join(", ");
+        throw new Error(`Unsupported platform: ${platformKey}. ` +
+            `Supported platforms: ${supported}`);
+    }
+    return packageName;
+}
+/**
+ * Try to find the binary in workspace development paths
+ */
+function tryWorkspacePath() {
+    const platformKey = getPlatformKey();
+    // In monorepo development, the binary is in sibling package
+    const possiblePaths = [
+        // From dist/ folder: ../darwin-arm64/whisper.node
+        (0, path_1.join)(__dirname, "..", "..", platformKey, "whisper.node"),
+        // From src/ folder during ts-node: ../../darwin-arm64/whisper.node
+        (0, path_1.join)(__dirname, "..", "..", "..", platformKey, "whisper.node"),
+    ];
+    for (const p of possiblePaths) {
+        if ((0, fs_1.existsSync)(p)) {
+            return p;
+        }
+    }
+    return null;
+}
+/**
+ * Load the native addon for the current platform
+ */
+function loadNativeAddon() {
+    const packageName = getPlatformPackage();
+    // First, try workspace development path
+    const workspacePath = tryWorkspacePath();
+    if (workspacePath) {
+        return require(workspacePath);
+    }
+    // Then try the installed package
+    try {
+        const binaryPath = require.resolve((0, path_1.join)(packageName, "whisper.node"));
+        return require(binaryPath);
+    }
+    catch (error) {
+        const err = error;
+        if (err.code === "MODULE_NOT_FOUND") {
+            throw new Error(`Native binary not found. Please ensure ${packageName} is installed.\n` +
+                `Try running: npm install ${packageName}\n` +
+                `Original error: ${err.message}`);
+        }
+        throw new Error(`Failed to load native addon from ${packageName}: ${err.message}`);
+    }
+}
+//# sourceMappingURL=loader.js.map

package/dist/loader.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../src/loader.ts"],"names":[],"mappings":";;AAmEA,0CA4BC;AA/FD,2BAAoC;AACpC,+BAA4B;AAC5B,2BAAgC;AAGhC;;GAEG;AACH,MAAM,mBAAmB,GAA2B;IAClD,cAAc,EAAE,gCAAgC;IAChD,WAAW,EAAE,6BAA6B;IAC1C,6BAA6B;IAC7B,gDAAgD;IAChD,8CAA8C;CAC/C,CAAC;AAEF;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO,GAAG,IAAA,aAAQ,GAAE,IAAI,IAAA,SAAI,GAAE,EAAE,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;IAErD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9D,MAAM,IAAI,KAAK,CACb,yBAAyB,WAAW,IAAI;YACtC,wBAAwB,SAAS,EAAE,CACtC,CAAC;IACJ,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,4DAA4D;IAC5D,MAAM,aAAa,GAAG;QACpB,kDAAkD;QAClD,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;QACxD,mEAAmE;QACnE,IAAA,WAAI,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,cAAc,CAAC;KAC/D,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;QAC9B,IAAI,IAAA,eAAU,EAAC,CAAC,CAAC,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAgB,eAAe;IAC7B,MAAM,WAAW,GAAG,kBAAkB,EAAE,CAAC;IAEzC,wCAAwC;IACxC,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAC;IACzC,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,OAAO,CAAC,aAAa,CAAiB,CAAC;IAChD,CAAC;IAED,iCAAiC;IACjC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,IAAA,WAAI,EAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;QACtE,OAAO,OAAO,CAAC,UAAU,CAAiB,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAA8B,CAAC;QAE3C,IAAI,GAAG,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,0CAA0C,WAAW,kBAAkB;gBACrE,4BAA4B,WAAW,IAAI;gBAC3C,mBAAmB,GAAG,CAAC,OAAO,EAAE,CACnC,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,KAAK,CACb,oCAAoC,WAAW,KAAK,GAAG,CAAC,OAAO,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC"}

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,233 @@
+/**
+ * Options for creating a WhisperContext
+ */
+export interface WhisperContextOptions {
+    /** Path to the GGML model file */
+    model: string;
+    /** Enable GPU acceleration (default: true) */
+    use_gpu?: boolean;
+    /** Enable Flash Attention (default: false) */
+    flash_attn?: boolean;
+    /** GPU device index (default: 0) */
+    gpu_device?: number;
+    /** Enable Core ML acceleration on macOS (default: false) */
+    use_coreml?: boolean;
+    /**
+     * Enable OpenVINO encoder acceleration (Intel CPUs/GPUs, default: false)
+     * Requires build with -DADDON_OPENVINO=ON and OpenVINO runtime installed.
+     * The OpenVINO encoder model must exist alongside the GGML model
+     * (e.g., ggml-base.en-encoder-openvino.xml for ggml-base.en.bin)
+     */
+    use_openvino?: boolean;
+    /**
+     * Path to OpenVINO encoder model (optional)
+     * If not specified, derived from the GGML model path with "-encoder-openvino.xml" suffix
+     */
+    openvino_model_path?: string;
+    /**
+     * OpenVINO device to run encoder inference on (default: "CPU")
+     * Options: "CPU", "GPU", "NPU", etc.
+     */
+    openvino_device?: string;
+    /**
+     * OpenVINO cache directory for compiled models (optional)
+     * Can speed up init time, especially for GPU, by caching compiled 'blobs'
+     */
+    openvino_cache_dir?: string;
+    /** DTW alignment preset for word-level timestamps (e.g., 'base.en', 'small', 'large.v3') */
+    dtw?: string;
+    /** Suppress whisper.cpp log output (default: false) */
+    no_prints?: boolean;
+}
+/**
+ * Base transcription options (shared between file and buffer input)
+ */
+export interface TranscribeOptionsBase {
+    /** Language code (e.g., 'en', 'zh', 'auto') */
+    language?: string;
+    /** Translate to English */
+    translate?: boolean;
+    /** Detect language automatically */
+    detect_language?: boolean;
+    /** Number of threads to use */
+    n_threads?: number;
+    /** Number of processors for parallel processing */
+    n_processors?: number;
+    /** Start offset in milliseconds */
+    offset_ms?: number;
+    /** Duration to process in milliseconds (0 = all) */
+    duration_ms?: number;
+    /** Audio context size */
+    audio_ctx?: number;
+    /** Disable timestamps in output */
+    no_timestamps?: boolean;
+    /** Single segment mode */
+    single_segment?: boolean;
+    /** Maximum segment length in characters (0 = no limit) */
+    max_len?: number;
+    /** Maximum tokens per segment (0 = no limit) */
+    max_tokens?: number;
+    /** Maximum context size (-1 = default) */
+    max_context?: number;
+    /** Split segments on word boundaries */
+    split_on_word?: boolean;
+    /** Include token-level timestamps */
+    token_timestamps?: boolean;
+    /** Word timestamp threshold */
+    word_thold?: number;
+    /** Use comma in timestamp format (default: true) */
+    comma_in_time?: boolean;
+    /** Temperature for sampling (0.0 = greedy) */
+    temperature?: number;
+    /** Temperature increment for fallback */
+    temperature_inc?: number;
+    /** Best of N sampling candidates */
+    best_of?: number;
+    /** Beam size for beam search (-1 = greedy) */
+    beam_size?: number;
+    /** Disable temperature fallback */
+    no_fallback?: boolean;
+    /** Entropy threshold for fallback */
+    entropy_thold?: number;
+    /** Log probability threshold */
+    logprob_thold?: number;
+    /** No speech probability threshold */
+    no_speech_thold?: number;
+    /** Initial prompt text for context */
+    prompt?: string;
+    /** Don't use previous context */
+    no_context?: boolean;
+    /** Suppress blank outputs */
+    suppress_blank?: boolean;
+    /** Suppress non-speech tokens */
+    suppress_nst?: boolean;
+    /** Enable speaker diarization */
+    diarize?: boolean;
+    /** Enable tinydiarize for speaker turn detection */
+    tinydiarize?: boolean;
+    /** Print special tokens */
+    print_special?: boolean;
+    /** Print progress */
+    print_progress?: boolean;
+    /** Print realtime output */
+    print_realtime?: boolean;
+    /** Print timestamps */
+    print_timestamps?: boolean;
+    /** Enable VAD preprocessing */
+    vad?: boolean;
+    /** Path to VAD model */
+    vad_model?: string;
+    /** VAD speech detection threshold (0.0-1.0) */
+    vad_threshold?: number;
+    /** Minimum speech duration in milliseconds */
+    vad_min_speech_duration_ms?: number;
+    /** Minimum silence duration in milliseconds */
+    vad_min_silence_duration_ms?: number;
+    /** Maximum speech duration in seconds */
+    vad_max_speech_duration_s?: number;
+    /** Speech padding in milliseconds */
+    vad_speech_pad_ms?: number;
+    /** VAD samples overlap ratio */
+    vad_samples_overlap?: number;
+    /** Progress callback function (progress: 0-100) */
+    progress_callback?: (progress: number) => void;
+}
+/**
+ * Transcription options with file input
+ */
+export interface TranscribeOptionsFile extends TranscribeOptionsBase {
+    /** Path to the audio file */
+    fname_inp: string;
+    pcmf32?: never;
+}
+/**
+ * Transcription options with PCM buffer input
+ */
+export interface TranscribeOptionsBuffer extends TranscribeOptionsBase {
+    /** Raw PCM audio samples (16kHz, mono, float32, values -1.0 to 1.0) */
+    pcmf32: Float32Array;
+    fname_inp?: never;
+}
+/**
+ * Options for transcription - either file path or PCM buffer
+ */
+export type TranscribeOptions = TranscribeOptionsFile | TranscribeOptionsBuffer;
+/**
+ * Transcription result segment (tuple format)
+ * [0]: Start time in format "HH:MM:SS,mmm"
+ * [1]: End time in format "HH:MM:SS,mmm"
+ * [2]: Transcribed text
+ */
+export type TranscriptSegment = [start: string, end: string, text: string];
+/**
+ * Transcription result
+ */
+export interface TranscribeResult {
+    /** Array of transcript segments as [start, end, text] tuples */
+    segments: TranscriptSegment[];
+}
+/**
+ * Options for creating a VadContext
+ */
+export interface VadContextOptions {
+    /** Path to the Silero VAD model file */
+    model: string;
+    /** Speech detection threshold (default: 0.5) */
+    threshold?: number;
+    /** Number of threads (default: 1) */
+    n_threads?: number;
+    /** Suppress model loading prints */
+    no_prints?: boolean;
+}
+/**
+ * WhisperContext class for persistent model context
+ */
+export interface WhisperContext {
+    /** Get whisper.cpp system info string */
+    getSystemInfo(): string;
+    /** Check if model is multilingual */
+    isMultilingual(): boolean;
+    /** Free the context and release resources */
+    free(): void;
+}
+/**
+ * WhisperContext constructor type
+ */
+export interface WhisperContextConstructor {
+    new (options: WhisperContextOptions): WhisperContext;
+}
+/**
+ * VadContext class for voice activity detection
+ */
+export interface VadContext {
+    /** Get the required window size in samples */
+    getWindowSamples(): number;
+    /** Get the expected sample rate (16000 Hz) */
+    getSampleRate(): number;
+    /** Process audio samples and return speech probability [0, 1] */
+    process(samples: Float32Array): number;
+    /** Reset the internal LSTM state */
+    reset(): void;
+    /** Free the context and release resources */
+    free(): void;
+}
+/**
+ * VadContext constructor type
+ */
+export interface VadContextConstructor {
+    new (options: VadContextOptions): VadContext;
+}
+/**
+ * Transcribe callback function signature
+ */
+export type TranscribeCallback = (error: Error | null, result?: TranscribeResult) => void;
+/**
+ * Native addon interface
+ */
+export interface WhisperAddon {
+    WhisperContext: WhisperContextConstructor;
+    VadContext: VadContextConstructor;
+    transcribe: (context: WhisperContext, options: TranscribeOptions, callback: TranscribeCallback) => void;
+    whisper: Record<string, unknown>;
+}
+//# sourceMappingURL=types.d.ts.map

package/dist/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oCAAoC;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,4FAA4F;IAC5F,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,uDAAuD;IACvD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IAEpC,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAG1B,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mDAAmD;IACnD,YAAY,CAAC,EAAE,MAAM,CAAC;IAGtB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IAGnB,mCAAmC;IACnC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,0BAA0B;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qCAAqC;IACrC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oDAAoD;IACpD,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8CAA8C;IAC9C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,qCAAqC;IACrC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,sCAAsC;IACtC,eAAe,CAAC,EAAE,MAAM,CAAC;IAGzB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iCAAiC;IACjC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,6BAA6B;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,iCAAiC;IACjC,YAAY,CAAC,EAAE,OAAO,CAAC;IAGvB,iCAAiC;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,oDAAoD;IACpD,WAAW,CAAC,EAAE,OAAO,CAAC;IAGtB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qBAAqB;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,4BAA4B;IAC5B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,uBAAuB;IACvB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAG3B,+BAA+B;IAC/B,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,8CAA8C;IAC9C,0BAA0B,CAAC,EAAE,MAAM,CAAC;IACpC,+CAA+C;IAC/C,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC,yCAAyC;IACzC,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,qCAAqC;IACrC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gCAAgC;IAChC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,mDAAmD;IACnD,iBAAiB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,qBAAqB;IAClE,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,KAAK,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,qBAAqB;IACpE,uEAAuE;IACvE,MAAM,EAAE,YAAY,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,qBAAqB,GAAG,uBAAuB,CAAC;AAEhF;;;;;GAKG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oCAAoC;IACpC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yCAAyC;IACzC,aAAa,IAAI,MAAM,CAAC;IACxB,qCAAqC;IACrC,cAAc,IAAI,OAAO,CAAC;IAC1B,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,KAAK,OAAO,EAAE,qBAAqB,GAAG,cAAc,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8CAA8C;IAC9C,gBAAgB,IAAI,MAAM,CAAC;IAC3B,8CAA8C;IAC9C,aAAa,IAAI,MAAM,CAAC;IACxB,iEAAiE;IACjE,OAAO,CAAC,OAAO,EAAE,YAAY,GAAG,MAAM,CAAC;IACvC,oCAAoC;IACpC,KAAK,IAAI,IAAI,CAAC;IACd,6CAA6C;IAC7C,IAAI,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,OAAO,EAAE,iBAAiB,GAAG,UAAU,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,CAC/B,KAAK,EAAE,KAAK,GAAG,IAAI,EACnB,MAAM,CAAC,EAAE,gBAAgB,KACtB,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,yBAAyB,CAAC;IAC1C,UAAU,EAAE,qBAAqB,CAAC;IAClC,UAAU,EAAE,CACV,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,EAAE,kBAAkB,KACzB,IAAI,CAAC;IACV,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}

package/dist/types.js ADDED Viewed

@@ -0,0 +1,3 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+//# sourceMappingURL=types.js.map

package/dist/types.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}

package/package.json ADDED Viewed

@@ -0,0 +1,56 @@
+{
+  "name": "whisper-cpp-node",
+  "version": "0.2.0",
+  "description": "Node.js bindings for whisper.cpp - fast speech-to-text with GPU acceleration",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/predict-woo/whisper.cpp",
+    "directory": "npm/packages/whisper-cpp-node"
+  },
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "require": "./dist/index.js",
+      "import": "./dist/index.mjs"
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "optionalDependencies": {
+    "@whisper-cpp-node/darwin-arm64": "0.2.1",
+    "@whisper-cpp-node/win32-x64": "0.2.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "typescript": "^5.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "keywords": [
+    "whisper",
+    "whisper.cpp",
+    "speech-to-text",
+    "transcription",
+    "audio",
+    "asr",
+    "apple-silicon",
+    "coreml",
+    "metal",
+    "windows",
+    "vulkan",
+    "openvino",
+    "gpu",
+    "intel"
+  ],
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc"
+  }
+}