npm - node-av - Versions diffs - 4.0.0 → 5.0.1 - Mend

node-av 4.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/README.md +23 -0
package/binding.gyp +19 -11
package/dist/api/bitstream-filter.d.ts +13 -12
package/dist/api/bitstream-filter.js +33 -29
package/dist/api/bitstream-filter.js.map +1 -1
package/dist/api/decoder.d.ts +211 -96
package/dist/api/decoder.js +396 -375
package/dist/api/decoder.js.map +1 -1
package/dist/api/demuxer.d.ts +10 -10
package/dist/api/demuxer.js +7 -10
package/dist/api/demuxer.js.map +1 -1
package/dist/api/encoder.d.ts +155 -122
package/dist/api/encoder.js +368 -541
package/dist/api/encoder.js.map +1 -1
package/dist/api/filter-complex.d.ts +769 -0
package/dist/api/filter-complex.js +1596 -0
package/dist/api/filter-complex.js.map +1 -0
package/dist/api/filter-presets.d.ts +68 -0
package/dist/api/filter-presets.js +96 -0
package/dist/api/filter-presets.js.map +1 -1
package/dist/api/filter.d.ts +183 -113
package/dist/api/filter.js +347 -365
package/dist/api/filter.js.map +1 -1
package/dist/api/fmp4-stream.d.ts +18 -2
package/dist/api/fmp4-stream.js +45 -4
package/dist/api/fmp4-stream.js.map +1 -1
package/dist/api/hardware.d.ts +47 -0
package/dist/api/hardware.js +45 -0
package/dist/api/hardware.js.map +1 -1
package/dist/api/index.d.ts +2 -0
package/dist/api/index.js +3 -0
package/dist/api/index.js.map +1 -1
package/dist/api/io-stream.d.ts +3 -3
package/dist/api/io-stream.js.map +1 -1
package/dist/api/muxer.d.ts +10 -10
package/dist/api/muxer.js +6 -6
package/dist/api/muxer.js.map +1 -1
package/dist/api/pipeline.d.ts +2 -2
package/dist/api/pipeline.js +22 -22
package/dist/api/pipeline.js.map +1 -1
package/dist/api/rtp-stream.d.ts +5 -2
package/dist/api/rtp-stream.js +33 -4
package/dist/api/rtp-stream.js.map +1 -1
package/dist/api/types.d.ts +63 -7
package/dist/api/utilities/audio-sample.d.ts +10 -0
package/dist/api/utilities/audio-sample.js +10 -0
package/dist/api/utilities/audio-sample.js.map +1 -1
package/dist/api/utilities/channel-layout.d.ts +1 -0
package/dist/api/utilities/channel-layout.js +1 -0
package/dist/api/utilities/channel-layout.js.map +1 -1
package/dist/api/utilities/image.d.ts +38 -0
package/dist/api/utilities/image.js +38 -0
package/dist/api/utilities/image.js.map +1 -1
package/dist/api/utilities/index.d.ts +1 -0
package/dist/api/utilities/index.js +2 -0
package/dist/api/utilities/index.js.map +1 -1
package/dist/api/utilities/media-type.d.ts +1 -0
package/dist/api/utilities/media-type.js +1 -0
package/dist/api/utilities/media-type.js.map +1 -1
package/dist/api/utilities/pixel-format.d.ts +3 -0
package/dist/api/utilities/pixel-format.js +3 -0
package/dist/api/utilities/pixel-format.js.map +1 -1
package/dist/api/utilities/sample-format.d.ts +5 -0
package/dist/api/utilities/sample-format.js +5 -0
package/dist/api/utilities/sample-format.js.map +1 -1
package/dist/api/utilities/scheduler.d.ts +21 -52
package/dist/api/utilities/scheduler.js +20 -58
package/dist/api/utilities/scheduler.js.map +1 -1
package/dist/api/utilities/streaming.d.ts +32 -1
package/dist/api/utilities/streaming.js +32 -1
package/dist/api/utilities/streaming.js.map +1 -1
package/dist/api/utilities/timestamp.d.ts +14 -0
package/dist/api/utilities/timestamp.js +14 -0
package/dist/api/utilities/timestamp.js.map +1 -1
package/dist/api/utilities/whisper-model.d.ts +310 -0
package/dist/api/utilities/whisper-model.js +528 -0
package/dist/api/utilities/whisper-model.js.map +1 -0
package/dist/api/whisper.d.ts +324 -0
package/dist/api/whisper.js +362 -0
package/dist/api/whisper.js.map +1 -0
package/dist/constants/constants.d.ts +3 -1
package/dist/constants/constants.js +1 -0
package/dist/constants/constants.js.map +1 -1
package/dist/ffmpeg/index.d.ts +3 -3
package/dist/ffmpeg/index.js +3 -3
package/dist/ffmpeg/utils.d.ts +27 -0
package/dist/ffmpeg/utils.js +28 -16
package/dist/ffmpeg/utils.js.map +1 -1
package/dist/lib/binding.d.ts +4 -4
package/dist/lib/binding.js.map +1 -1
package/dist/lib/codec-parameters.d.ts +47 -1
package/dist/lib/codec-parameters.js +55 -0
package/dist/lib/codec-parameters.js.map +1 -1
package/dist/lib/fifo.d.ts +416 -0
package/dist/lib/fifo.js +453 -0
package/dist/lib/fifo.js.map +1 -0
package/dist/lib/frame.d.ts +96 -1
package/dist/lib/frame.js +139 -1
package/dist/lib/frame.js.map +1 -1
package/dist/lib/index.d.ts +1 -0
package/dist/lib/index.js +2 -0
package/dist/lib/index.js.map +1 -1
package/dist/lib/native-types.d.ts +29 -2
package/dist/lib/rational.d.ts +18 -0
package/dist/lib/rational.js +19 -0
package/dist/lib/rational.js.map +1 -1
package/dist/lib/types.d.ts +23 -1
package/install/check.js +2 -2
package/package.json +31 -21

package/dist/api/whisper.d.ts ADDED Viewed

@@ -0,0 +1,324 @@
+import type { Frame } from '../lib/frame.js';
+import type { WhisperModelName, WhisperVADModelName } from './utilities/whisper-model.js';
+/**
+ * Transcribed audio segment from Whisper.
+ *
+ * Represents a single transcribed segment with timing information.
+ * Start and end times are in milliseconds from the beginning of the audio.
+ */
+export interface WhisperSegment {
+    /**
+     * Start time of the segment in milliseconds.
+     */
+    start: number;
+    /**
+     * End time of the segment in milliseconds.
+     */
+    end: number;
+    /**
+     * Transcribed text content.
+     */
+    text: string;
+    /**
+     * Indicates if this segment represents a speaker turn.
+     * Only available when VAD (Voice Activity Detection) is enabled.
+     */
+    turn?: boolean;
+}
+/**
+ * Options for configuring Whisper transcriber.
+ *
+ * Controls model selection, language, GPU acceleration, VAD, and output behavior.
+ */
+export interface WhisperTranscriberOptions {
+    /**
+     * Path to whisper.cpp GGML model file.
+     *
+     * Required. Download models using {@link WhisperDownloader}.
+     * ```
+     */
+    model: WhisperModelName;
+    /**
+     * Path to VAD (Voice Activity Detection) model file.
+     *
+     * Optional. Enables better audio segmentation using Silero VAD.
+     * Download VAD models using {@link WhisperDownloader.downloadVADModel}.
+     * ```
+     */
+    vadModel?: WhisperVADModelName;
+    /**
+     * Directory where models will be downloaded if not already present.
+     *
+     * @default '<PROJECT_DIR>/models'
+     */
+    modelDir?: string;
+    /**
+     * Language code for transcription.
+     *
+     * Use 'auto' for automatic language detection.
+     *
+     * @default 'auto'
+     */
+    language?: string;
+    /**
+     * Audio queue size in seconds.
+     *
+     * Maximum duration of audio buffered before processing.
+     * Increase when using VAD for better segmentation.
+     *
+     * @default 3
+     */
+    queue?: number;
+    /**
+     * Enable GPU acceleration for processing.
+     *
+     * Requires whisper.cpp built with GPU support (CUDA/Vulkan/Metal).
+     *
+     * @default true
+     */
+    useGpu?: boolean;
+    /**
+     * GPU device index to use.
+     *
+     * Only relevant when multiple GPUs are available.
+     *
+     * @default 0
+     */
+    gpuDevice?: number;
+    /**
+     * VAD threshold for voice activity detection.
+     *
+     * Higher values are more conservative (less likely to detect speech).
+     * Range: 0.0 to 1.0
+     *
+     * @default 0.5
+     */
+    vadThreshold?: number;
+    /**
+     * Minimum speech duration for VAD in seconds.
+     *
+     * Audio chunks shorter than this will be filtered out.
+     *
+     * @default 0.1
+     */
+    vadMinSpeechDuration?: number;
+    /**
+     * Minimum silence duration for VAD in seconds.
+     *
+     * Silence shorter than this won't trigger segment boundaries.
+     *
+     * @default 0.5
+     */
+    vadMinSilenceDuration?: number;
+}
+/**
+ * High-level Whisper transcriber for automatic speech recognition.
+ *
+ * Provides streaming audio transcription using OpenAI's Whisper model via whisper.cpp.
+ * Supports GPU acceleration, VAD (Voice Activity Detection), and real-time processing.
+ * Built on FFmpeg's whisper filter with automatic frame metadata extraction.
+ *
+ * Features:
+ * - Real-time streaming transcription
+ * - GPU acceleration (CUDA/Vulkan/Metal)
+ * - Voice Activity Detection for better segmentation
+ * - Automatic language detection
+ * - Type-safe transcription segments
+ * - Frame-based API for flexible integration
+ *
+ * @example
+ * ```typescript
+ * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+ * import { WhisperDownloader } from 'node-av/api/utilities/whisper-model';
+ *
+ * // Download model
+ * const modelPath = await WhisperDownloader.downloadModel({
+ *   model: 'base.en',
+ *   outputPath: './models'
+ * });
+ *
+ * // Open audio and create decoder
+ * await using input = await Demuxer.open('podcast.mp3');
+ * using decoder = await Decoder.create(input.audio());
+ *
+ * // Create transcriber
+ * await using transcriber = await WhisperTranscriber.create({
+ *   model: modelPath,
+ *   language: 'en'
+ * });
+ *
+ * // Transcribe using decoded frames
+ * for await (const segment of transcriber.transcribe(decoder.frames(input.packets()))) {
+ *   const timestamp = `[${(segment.start / 1000).toFixed(1)}s - ${(segment.end / 1000).toFixed(1)}s]`;
+ *   console.log(`${timestamp}: ${segment.text}`);
+ * }
+ * ```
+ *
+ * @example
+ * ```typescript
+ * // Real-time microphone transcription with VAD
+ * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+ * import { WhisperDownloader } from 'node-av/api/utilities/whisper-model';
+ *
+ * // Download VAD model
+ * const vadPath = await WhisperDownloader.downloadVADModel('silero-v5.1.2', './models');
+ *
+ * // Setup transcriber with VAD
+ * await using transcriber = await WhisperTranscriber.create({
+ *   model: './models/ggml-medium.bin',
+ *   language: 'en',
+ *   queue: 10,
+ *   vadModel: vadPath,
+ *   vadThreshold: 0.5
+ * });
+ *
+ * // Live transcription from decoded audio frames
+ * using decoder = await Decoder.create(microphoneStream);
+ * for await (const segment of transcriber.transcribe(decoder.frames(microphonePackets))) {
+ *   if (segment.turn) {
+ *     console.log('\n--- New speaker turn ---');
+ *   }
+ *   console.log(segment.text);
+ * }
+ * ```
+ *
+ * @see {@link WhisperDownloader} For downloading Whisper and VAD models
+ * @see {@link Decoder} For audio decoding
+ * @see {@link Demuxer} For reading media files
+ */
+export declare class WhisperTranscriber implements Disposable {
+    private options;
+    private isClosed;
+    /**
+     * @param options - Transcriber configuration
+     *
+     * Use {@link create} factory method instead
+     *
+     * @internal
+     */
+    private constructor();
+    /**
+     * Create a Whisper transcriber instance.
+     *
+     * Initializes the transcriber with the specified model and configuration.
+     * The transcriber can then process audio frames from any source.
+     *
+     * @param options - Transcriber configuration
+     *
+     * @returns Configured transcriber instance
+     *
+     * @throws {Error} If model file does not exist
+     *
+     * @throws {Error} If VAD model file does not exist (when vadModel specified)
+     *
+     * @example
+     * ```typescript
+     * import { WhisperTranscriber } from 'node-av/api';
+     *
+     * // Create transcriber with basic options
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.en.bin',
+     *   language: 'en'
+     * });
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // Create transcriber with GPU and VAD support
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.bin',
+     *   language: 'auto',
+     *   useGpu: true,
+     *   gpuDevice: 0,
+     *   vadModel: './models/ggml-silero-v5.1.2.bin',
+     *   vadThreshold: 0.5,
+     *   queue: 10
+     * });
+     * ```
+     */
+    static create(options: WhisperTranscriberOptions): Promise<WhisperTranscriber>;
+    /**
+     * Transcribe audio frames to text segments.
+     *
+     * Processes audio frames through the Whisper filter and yields transcribed segments.
+     * Each segment contains start/end timestamps and the transcribed text.
+     * Reads metadata directly from frame metadata tags (lavfi.whisper.text, lavfi.whisper.duration).
+     *
+     * The generator continues until the input stream ends or close() is called.
+     * Always use with `for await...of` to properly handle async iteration.
+     *
+     * @param frames - Audio frames (from Decoder.frames()) or single frame to transcribe
+     *
+     * @yields {WhisperSegment} Transcribed audio segments with timing and text
+     *
+     * @throws {FFmpegError} If filter initialization fails
+     *
+     * @example
+     * ```typescript
+     * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+     *
+     * await using input = await Demuxer.open('podcast.mp3');
+     * using decoder = await Decoder.create(input.audio());
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.en.bin',
+     *   language: 'en'
+     * });
+     *
+     * // Transcribe decoded frames
+     * for await (const segment of transcriber.transcribe(decoder.frames(input.packets()))) {
+     *   console.log(`[${segment.start}ms]: ${segment.text}`);
+     * }
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // With custom timing format
+     * const audioFrames = decoder.frames(input.packets());
+     * for await (const segment of transcriber.transcribe(audioFrames)) {
+     *   const startSec = (segment.start / 1000).toFixed(2);
+     *   const endSec = (segment.end / 1000).toFixed(2);
+     *   console.log(`[${startSec}s - ${endSec}s]: ${segment.text}`);
+     * }
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // Process single frame
+     * using frame = decoder.decodeSync(packet);
+     * for await (const segment of transcriber.transcribe(frame)) {
+     *   console.log(`Transcribed: ${segment.text}`);
+     * }
+     * ```
+     */
+    transcribe(frames: AsyncIterable<Frame | null> | Frame | null): AsyncGenerator<WhisperSegment, void, unknown>;
+    /**
+     * Close transcriber and clean up resources.
+     *
+     * Releases filter graph and stops frame processing.
+     * Called automatically when using `await using` syntax.
+     *
+     * @example
+     * ```typescript
+     * // Automatic cleanup
+     * {
+     *   await using transcriber = await WhisperTranscriber.create(options);
+     *   // Use transcriber
+     * } // Automatically calls close()
+     *
+     * // Manual cleanup
+     * const transcriber = await WhisperTranscriber.create(options);
+     * try {
+     *   // Use transcriber
+     * } finally {
+     *   await transcriber.close();
+     * }
+     * ```
+     */
+    close(): void;
+    /**
+     * Symbol.asyncDispose implementation for `await using` syntax.
+     *
+     * @internal
+     */
+    [Symbol.dispose](): void;
+}

package/dist/api/whisper.js ADDED Viewed

@@ -0,0 +1,362 @@
+var __addDisposableResource = (this && this.__addDisposableResource) || function (env, value, async) {
+    if (value !== null && value !== void 0) {
+        if (typeof value !== "object" && typeof value !== "function") throw new TypeError("Object expected.");
+        var dispose, inner;
+        if (async) {
+            if (!Symbol.asyncDispose) throw new TypeError("Symbol.asyncDispose is not defined.");
+            dispose = value[Symbol.asyncDispose];
+        }
+        if (dispose === void 0) {
+            if (!Symbol.dispose) throw new TypeError("Symbol.dispose is not defined.");
+            dispose = value[Symbol.dispose];
+            if (async) inner = dispose;
+        }
+        if (typeof dispose !== "function") throw new TypeError("Object not disposable.");
+        if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };
+        env.stack.push({ value: value, dispose: dispose, async: async });
+    }
+    else if (async) {
+        env.stack.push({ async: true });
+    }
+    return value;
+};
+var __disposeResources = (this && this.__disposeResources) || (function (SuppressedError) {
+    return function (env) {
+        function fail(e) {
+            env.error = env.hasError ? new SuppressedError(e, env.error, "An error was suppressed during disposal.") : e;
+            env.hasError = true;
+        }
+        var r, s = 0;
+        function next() {
+            while (r = env.stack.pop()) {
+                try {
+                    if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);
+                    if (r.dispose) {
+                        var result = r.dispose.call(r.value);
+                        if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });
+                    }
+                    else s |= 1;
+                }
+                catch (e) {
+                    fail(e);
+                }
+            }
+            if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();
+            if (env.hasError) throw env.error;
+        }
+        return next();
+    };
+})(typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
+    var e = new Error(message);
+    return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
+});
+import { FilterPreset } from './filter-presets.js';
+import { FilterAPI } from './filter.js';
+import { WhisperDownloader } from './utilities/whisper-model.js';
+/**
+ * High-level Whisper transcriber for automatic speech recognition.
+ *
+ * Provides streaming audio transcription using OpenAI's Whisper model via whisper.cpp.
+ * Supports GPU acceleration, VAD (Voice Activity Detection), and real-time processing.
+ * Built on FFmpeg's whisper filter with automatic frame metadata extraction.
+ *
+ * Features:
+ * - Real-time streaming transcription
+ * - GPU acceleration (CUDA/Vulkan/Metal)
+ * - Voice Activity Detection for better segmentation
+ * - Automatic language detection
+ * - Type-safe transcription segments
+ * - Frame-based API for flexible integration
+ *
+ * @example
+ * ```typescript
+ * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+ * import { WhisperDownloader } from 'node-av/api/utilities/whisper-model';
+ *
+ * // Download model
+ * const modelPath = await WhisperDownloader.downloadModel({
+ *   model: 'base.en',
+ *   outputPath: './models'
+ * });
+ *
+ * // Open audio and create decoder
+ * await using input = await Demuxer.open('podcast.mp3');
+ * using decoder = await Decoder.create(input.audio());
+ *
+ * // Create transcriber
+ * await using transcriber = await WhisperTranscriber.create({
+ *   model: modelPath,
+ *   language: 'en'
+ * });
+ *
+ * // Transcribe using decoded frames
+ * for await (const segment of transcriber.transcribe(decoder.frames(input.packets()))) {
+ *   const timestamp = `[${(segment.start / 1000).toFixed(1)}s - ${(segment.end / 1000).toFixed(1)}s]`;
+ *   console.log(`${timestamp}: ${segment.text}`);
+ * }
+ * ```
+ *
+ * @example
+ * ```typescript
+ * // Real-time microphone transcription with VAD
+ * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+ * import { WhisperDownloader } from 'node-av/api/utilities/whisper-model';
+ *
+ * // Download VAD model
+ * const vadPath = await WhisperDownloader.downloadVADModel('silero-v5.1.2', './models');
+ *
+ * // Setup transcriber with VAD
+ * await using transcriber = await WhisperTranscriber.create({
+ *   model: './models/ggml-medium.bin',
+ *   language: 'en',
+ *   queue: 10,
+ *   vadModel: vadPath,
+ *   vadThreshold: 0.5
+ * });
+ *
+ * // Live transcription from decoded audio frames
+ * using decoder = await Decoder.create(microphoneStream);
+ * for await (const segment of transcriber.transcribe(decoder.frames(microphonePackets))) {
+ *   if (segment.turn) {
+ *     console.log('\n--- New speaker turn ---');
+ *   }
+ *   console.log(segment.text);
+ * }
+ * ```
+ *
+ * @see {@link WhisperDownloader} For downloading Whisper and VAD models
+ * @see {@link Decoder} For audio decoding
+ * @see {@link Demuxer} For reading media files
+ */
+export class WhisperTranscriber {
+    options;
+    isClosed = false;
+    /**
+     * @param options - Transcriber configuration
+     *
+     * Use {@link create} factory method instead
+     *
+     * @internal
+     */
+    constructor(options) {
+        this.options = options;
+    }
+    /**
+     * Create a Whisper transcriber instance.
+     *
+     * Initializes the transcriber with the specified model and configuration.
+     * The transcriber can then process audio frames from any source.
+     *
+     * @param options - Transcriber configuration
+     *
+     * @returns Configured transcriber instance
+     *
+     * @throws {Error} If model file does not exist
+     *
+     * @throws {Error} If VAD model file does not exist (when vadModel specified)
+     *
+     * @example
+     * ```typescript
+     * import { WhisperTranscriber } from 'node-av/api';
+     *
+     * // Create transcriber with basic options
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.en.bin',
+     *   language: 'en'
+     * });
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // Create transcriber with GPU and VAD support
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.bin',
+     *   language: 'auto',
+     *   useGpu: true,
+     *   gpuDevice: 0,
+     *   vadModel: './models/ggml-silero-v5.1.2.bin',
+     *   vadThreshold: 0.5,
+     *   queue: 10
+     * });
+     * ```
+     */
+    static async create(options) {
+        const modelsToDownload = [options.model, options.vadModel].filter(Boolean);
+        const [modelPath, vadModelPath] = await WhisperDownloader.downloadModels(modelsToDownload, options.modelDir);
+        const fullOptions = {
+            model: modelPath,
+            vadModel: vadModelPath,
+            modelDir: options.modelDir ?? WhisperDownloader.DEFAULT_MODEL_PATH,
+            language: options.language ?? 'auto',
+            queue: options.queue ?? 3,
+            useGpu: options.useGpu ?? true,
+            gpuDevice: options.gpuDevice ?? 0,
+            vadThreshold: options.vadThreshold ?? 0.5,
+            vadMinSpeechDuration: options.vadMinSpeechDuration ?? 0.1,
+            vadMinSilenceDuration: options.vadMinSilenceDuration ?? 0.5,
+        };
+        return new WhisperTranscriber(fullOptions);
+    }
+    /**
+     * Transcribe audio frames to text segments.
+     *
+     * Processes audio frames through the Whisper filter and yields transcribed segments.
+     * Each segment contains start/end timestamps and the transcribed text.
+     * Reads metadata directly from frame metadata tags (lavfi.whisper.text, lavfi.whisper.duration).
+     *
+     * The generator continues until the input stream ends or close() is called.
+     * Always use with `for await...of` to properly handle async iteration.
+     *
+     * @param frames - Audio frames (from Decoder.frames()) or single frame to transcribe
+     *
+     * @yields {WhisperSegment} Transcribed audio segments with timing and text
+     *
+     * @throws {FFmpegError} If filter initialization fails
+     *
+     * @example
+     * ```typescript
+     * import { Demuxer, Decoder, WhisperTranscriber } from 'node-av/api';
+     *
+     * await using input = await Demuxer.open('podcast.mp3');
+     * using decoder = await Decoder.create(input.audio());
+     * await using transcriber = await WhisperTranscriber.create({
+     *   model: './models/ggml-base.en.bin',
+     *   language: 'en'
+     * });
+     *
+     * // Transcribe decoded frames
+     * for await (const segment of transcriber.transcribe(decoder.frames(input.packets()))) {
+     *   console.log(`[${segment.start}ms]: ${segment.text}`);
+     * }
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // With custom timing format
+     * const audioFrames = decoder.frames(input.packets());
+     * for await (const segment of transcriber.transcribe(audioFrames)) {
+     *   const startSec = (segment.start / 1000).toFixed(2);
+     *   const endSec = (segment.end / 1000).toFixed(2);
+     *   console.log(`[${startSec}s - ${endSec}s]: ${segment.text}`);
+     * }
+     * ```
+     *
+     * @example
+     * ```typescript
+     * // Process single frame
+     * using frame = decoder.decodeSync(packet);
+     * for await (const segment of transcriber.transcribe(frame)) {
+     *   console.log(`Transcribed: ${segment.text}`);
+     * }
+     * ```
+     */
+    async *transcribe(frames) {
+        const env_1 = { stack: [], error: void 0, hasError: false };
+        try {
+            const chain = FilterPreset.chain()
+                .whisper({
+                model: this.options.model,
+                language: this.options.language,
+                queue: this.options.queue,
+                useGpu: this.options.useGpu,
+                gpuDevice: this.options.gpuDevice,
+                vadModel: this.options.vadModel,
+                vadThreshold: this.options.vadThreshold,
+                vadMinSpeechDuration: this.options.vadMinSpeechDuration,
+                vadMinSilenceDuration: this.options.vadMinSilenceDuration,
+            })
+                .build();
+            // Create filter API
+            const filter = __addDisposableResource(env_1, FilterAPI.create(chain, {
+                allowReinit: true,
+                dropOnChange: false,
+            }), false);
+            // Track cumulative time for start/end timestamps
+            let cumulativeTime = 0; // in milliseconds
+            const filterGenerator = filter.frames(frames);
+            // Decode and process frames through filter
+            for await (const frame_1 of filterGenerator) {
+                const env_2 = { stack: [], error: void 0, hasError: false };
+                try {
+                    const frame = __addDisposableResource(env_2, frame_1, false);
+                    if (this.isClosed) {
+                        break;
+                    }
+                    if (!frame?.isAudio()) {
+                        continue;
+                    }
+                    // Get frame metadata
+                    const metadata = frame.getMetadata();
+                    const text = metadata.get('lavfi.whisper.text');
+                    const durationStr = metadata.get('lavfi.whisper.duration');
+                    if (text?.trim()) {
+                        // Parse duration (in seconds)
+                        const duration = durationStr ? parseFloat(durationStr) * 1000 : 0;
+                        // Yield transcribed segment
+                        yield {
+                            start: cumulativeTime,
+                            end: cumulativeTime + duration,
+                            text: text.trim(),
+                        };
+                        // Update cumulative time
+                        if (duration > 0) {
+                            cumulativeTime += duration;
+                        }
+                    }
+                }
+                catch (e_1) {
+                    env_2.error = e_1;
+                    env_2.hasError = true;
+                }
+                finally {
+                    __disposeResources(env_2);
+                }
+            }
+        }
+        catch (e_2) {
+            env_1.error = e_2;
+            env_1.hasError = true;
+        }
+        finally {
+            __disposeResources(env_1);
+        }
+    }
+    /**
+     * Close transcriber and clean up resources.
+     *
+     * Releases filter graph and stops frame processing.
+     * Called automatically when using `await using` syntax.
+     *
+     * @example
+     * ```typescript
+     * // Automatic cleanup
+     * {
+     *   await using transcriber = await WhisperTranscriber.create(options);
+     *   // Use transcriber
+     * } // Automatically calls close()
+     *
+     * // Manual cleanup
+     * const transcriber = await WhisperTranscriber.create(options);
+     * try {
+     *   // Use transcriber
+     * } finally {
+     *   await transcriber.close();
+     * }
+     * ```
+     */
+    close() {
+        if (this.isClosed) {
+            return;
+        }
+        this.isClosed = true;
+    }
+    /**
+     * Symbol.asyncDispose implementation for `await using` syntax.
+     *
+     * @internal
+     */
+    [Symbol.dispose]() {
+        this.close();
+    }
+}
+//# sourceMappingURL=whisper.js.map

package/dist/api/whisper.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"whisper.js","sourceRoot":"","sources":["../../src/api/whisper.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AAkIjE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0EG;AACH,MAAM,OAAO,kBAAkB;IACrB,OAAO,CAAsC;IAC7C,QAAQ,GAAG,KAAK,CAAC;IAEzB;;;;;;OAMG;IACH,YAAoB,OAA4C;QAC9D,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAsCG;IACH,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAkC;QACpD,MAAM,gBAAgB,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAA+C,CAAC;QACzH,MAAM,CAAC,SAAS,EAAE,YAAY,CAAC,GAAG,MAAM,iBAAiB,CAAC,cAAc,CAAC,gBAAgB,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE7G,MAAM,WAAW,GAAwC;YACvD,KAAK,EAAE,SAA6B;YACpC,QAAQ,EAAE,YAAmC;YAC7C,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,iBAAiB,CAAC,kBAAkB;YAClE,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,MAAM;YACpC,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC;YACzB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,CAAC;YACjC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YACzC,oBAAoB,EAAE,OAAO,CAAC,oBAAoB,IAAI,GAAG;YACzD,qBAAqB,EAAE,OAAO,CAAC,qBAAqB,IAAI,GAAG;SAC5D,CAAC;QAEF,OAAO,IAAI,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAC7C,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoDG;IACH,KAAK,CAAC,CAAC,UAAU,CAAC,MAAkD;;;YAClE,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,EAAE;iBAC/B,OAAO,CAAC;gBACP,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;gBACzB,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;gBAC/B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;gBACzB,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;gBAC3B,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;gBACjC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;gBAC/B,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY;gBACvC,oBAAoB,EAAE,IAAI,CAAC,OAAO,CAAC,oBAAoB;gBACvD,qBAAqB,EAAE,IAAI,CAAC,OAAO,CAAC,qBAAqB;aAC1D,CAAC;iBACD,KAAK,EAAE,CAAC;YAEX,oBAAoB;YACpB,MAAM,MAAM,kCAAG,SAAS,CAAC,MAAM,CAAC,KAAK,EAAE;gBACrC,WAAW,EAAE,IAAI;gBACjB,YAAY,EAAE,KAAK;aACpB,CAAC,QAAA,CAAC;YAEH,iDAAiD;YACjD,IAAI,cAAc,GAAG,CAAC,CAAC,CAAC,kBAAkB;YAC1C,MAAM,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAE9C,2CAA2C;YAC3C,IAAI,KAAK,mBAAiB,eAAe,EAAE,CAAC;;;0BAA3B,KAAK,iDAAA;oBACpB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;wBAClB,MAAM;oBACR,CAAC;oBAED,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC;wBACtB,SAAS;oBACX,CAAC;oBAED,qBAAqB;oBACrB,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;oBACrC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;oBAChD,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;oBAE3D,IAAI,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC;wBACjB,8BAA8B;wBAC9B,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;wBAElE,4BAA4B;wBAC5B,MAAM;4BACJ,KAAK,EAAE,cAAc;4BACrB,GAAG,EAAE,cAAc,GAAG,QAAQ;4BAC9B,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE;yBAClB,CAAC;wBAEF,yBAAyB;wBACzB,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;4BACjB,cAAc,IAAI,QAAQ,CAAC;wBAC7B,CAAC;oBACH,CAAC;;;;;;;;;aACF;;;;;;;;;KACF;IAED;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,KAAK;QACH,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,OAAO;QACT,CAAC;QAED,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,CAAC,MAAM,CAAC,OAAO,CAAC;QACd,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;CACF"}