npm - node-av - Versions diffs - 6.0.0-beta.8 → 6.0.0 - Mend

node-av 6.0.0-beta.8 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/README.md +2 -1
package/dist/api/bitstream-filter.js +0 -2
package/dist/api/bitstream-filter.js.map +1 -1
package/dist/api/decoder.d.ts +169 -9
package/dist/api/decoder.js +351 -43
package/dist/api/decoder.js.map +1 -1
package/dist/api/encoder.d.ts +129 -0
package/dist/api/encoder.js +288 -41
package/dist/api/encoder.js.map +1 -1
package/dist/api/filter.js +0 -2
package/dist/api/filter.js.map +1 -1
package/dist/api/muxer.d.ts +19 -0
package/dist/api/muxer.js +44 -24
package/dist/api/muxer.js.map +1 -1
package/dist/api/rtp-stream.d.ts +14 -11
package/dist/api/rtp-stream.js +22 -47
package/dist/api/rtp-stream.js.map +1 -1
package/dist/api/scaler.js.map +1 -1
package/dist/api/utilities/async-queue.js +0 -2
package/dist/api/utilities/async-queue.js.map +1 -1
package/dist/api/utilities/codec-format.d.ts +87 -0
package/dist/api/utilities/codec-format.js +117 -0
package/dist/api/utilities/codec-format.js.map +1 -0
package/dist/api/utilities/index.d.ts +1 -0
package/dist/api/utilities/index.js +2 -0
package/dist/api/utilities/index.js.map +1 -1
package/dist/api/utilities/whisper-model.d.ts +20 -0
package/dist/api/utilities/whisper-model.js +61 -2
package/dist/api/utilities/whisper-model.js.map +1 -1
package/dist/constants/bsf-options.d.ts +8 -1
package/dist/constants/constants.d.ts +2 -0
package/dist/constants/constants.js +2 -0
package/dist/constants/constants.js.map +1 -1
package/dist/constants/format-options.d.ts +4 -0
package/dist/lib/binding.d.ts +2 -0
package/dist/lib/binding.js.map +1 -1
package/dist/lib/frame.d.ts +8 -0
package/dist/lib/frame.js +10 -0
package/dist/lib/frame.js.map +1 -1
package/dist/lib/native-types.d.ts +12 -0
package/dist/lib/packet.d.ts +8 -0
package/dist/lib/packet.js +10 -0
package/dist/lib/packet.js.map +1 -1
package/dist/lib/utilities.d.ts +45 -0
package/dist/lib/utilities.js +49 -0
package/dist/lib/utilities.js.map +1 -1
package/package.json +16 -16

package/dist/api/encoder.d.ts CHANGED Viewed

@@ -90,6 +90,38 @@ export interface EncoderOptions<C = unknown> {
      * @default FFmpeg default (both methods, codec chooses best)
      */
     threadType?: AVThreadType;
+    /**
+     * Automatically resample incoming audio to a format the codec supports.
+     *
+     * Audio encoders only accept specific sample rates, sample formats, and channel
+     * layouts (e.g. libmp3lame rejects 96 kHz; AAC needs planar `fltp`). When `true`,
+     * the encoder transparently converts each frame to the nearest supported
+     * sample rate / sample format / channel layout (like `ffmpeg`'s automatic
+     * `aresample`). When `false` (default), an unsupported input raises a descriptive
+     * error instead — keeping behaviour explicit and 1:1 with the codec.
+     *
+     * Has no effect on video.
+     *
+     * @default false
+     */
+    autoResample?: boolean;
+    /**
+     * Automatically convert incoming video to a pixel format the codec supports.
+     *
+     * Video encoders only accept specific pixel formats (e.g. libx264 wants planar
+     * YUV like `yuv420p` and rejects `rgb24`). When `true`, the encoder transparently
+     * converts each frame to the least-loss supported pixel format via swscale (like
+     * `ffmpeg`'s automatic `format` filter), keeping the same resolution. When `false`
+     * (default), an unsupported input raises a descriptive error instead — keeping
+     * behaviour explicit and 1:1 with the codec.
+     *
+     * Resolution is never changed (the encoder already adopts the frame's dimensions),
+     * and hardware frames are left untouched (their format is negotiated via the
+     * hardware frames context). Has no effect on audio.
+     *
+     * @default false
+     */
+    autoFormat?: boolean;
     /**
      * Additional codec-specific options.
      *
@@ -174,6 +206,16 @@ export declare class Encoder implements Disposable {
     private opts?;
     private options;
     private audioFrameBuffer?;
+    private autoResample;
+    private audioResampler?;
+    private resampledFrame?;
+    private audioInputLayout?;
+    private autoFormat;
+    private videoScaler?;
+    private scaledFrame?;
+    private videoTargetFormat?;
+    private supportsParamChange?;
+    private encoderChannels?;
     private inputQueue;
     private outputQueue;
     private workerPromise;
@@ -1156,6 +1198,93 @@ export declare class Encoder implements Disposable {
      * @internal
      */
     private setupHardwareAcceleration;
+    /**
+     * Configure the codec context's audio parameters from the first frame.
+     *
+     * Audio encoders only accept specific sample rates / sample formats / channel
+     * layouts. This picks codec-supported targets; if they differ from the input it
+     * either sets up a resampler (when `autoResample`) or throws a descriptive error.
+     *
+     * @param frame - First audio frame
+     *
+     * @throws {Error} If the input is unsupported and `autoResample` is disabled
+     *
+     * @throws {FFmpegError} If the resampler fails to configure
+     *
+     * @internal
+     */
+    private setupAudioParams;
+    /**
+     * Lazily allocate the reused resampler output frame.
+     *
+     * @returns The allocated output frame
+     *
+     * @internal
+     */
+    private getResampleFrame;
+    /**
+     * Resample an incoming audio frame to the codec's target format.
+     *
+     * Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
+     * The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
+     * reused frame and its carried timing are only relevant on the non-FIFO path.
+     *
+     * @param frame - Source audio frame
+     *
+     * @returns The resampled frame (owned by the encoder, reused across calls)
+     *
+     * @internal
+     */
+    private resampleAudio;
+    /**
+     * Drain samples buffered inside the resampler (rate-conversion delay) into the
+     * encoder path. Returns the drained frame if any, else null.
+     *
+     * @returns The drained frame (reused), or null when the resampler is empty
+     *
+     * @internal
+     */
+    private drainResampler;
+    /**
+     * Configure the codec context's pixel format from the first video frame.
+     *
+     * Video encoders only accept specific pixel formats. This keeps the input format
+     * when the codec accepts it; otherwise it either sets up a swscale converter to
+     * the least-loss supported format (when `autoFormat`) or throws a descriptive
+     * error. Hardware frames are left untouched - their format is negotiated through
+     * the hardware frames context, not swscale.
+     *
+     * @param frame - First video frame
+     *
+     * @throws {Error} If the input is unsupported and `autoFormat` is disabled
+     *
+     * @throws {FFmpegError} If the converter fails to configure
+     *
+     * @internal
+     */
+    private setupVideoFormat;
+    /**
+     * Lazily allocate the reused scaler output frame.
+     *
+     * @returns The allocated output frame
+     *
+     * @internal
+     */
+    private getScaledFrame;
+    /**
+     * Convert an incoming video frame to the codec's target pixel format.
+     *
+     * Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
+     * Resolution is unchanged - only the pixel format differs. Timing is carried over
+     * explicitly so the encoder's PTS rescale stays correct.
+     *
+     * @param frame - Source video frame
+     *
+     * @returns The converted frame (owned by the encoder, reused across calls)
+     *
+     * @internal
+     */
+    private scaleVideo;
     /**
      * Prepare frame for encoding.
      *

package/dist/api/encoder.js CHANGED Viewed

@@ -50,7 +50,8 @@ var __disposeResources = (this && this.__disposeResources) || (function (Suppres
     var e = new Error(message);
     return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
 });
-import { AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, AV_CODEC_CAP_PARAM_CHANGE, AV_CODEC_FLAG_COPY_OPAQUE, AV_CODEC_FLAG_FRAME_DURATION, AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, AV_PICTURE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PKT_FLAG_TRUSTED, AVCHROMA_LOC_UNSPECIFIED, AVERROR_EAGAIN, AVERROR_ENCODER_NOT_FOUND, AVERROR_EOF, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO, EOF, } from '../constants/constants.js';
+/* eslint-disable @stylistic/indent-binary-ops */
+import { AV_CHANNEL_ORDER_UNSPEC, AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, AV_CODEC_CAP_PARAM_CHANGE, AV_CODEC_FLAG_COPY_OPAQUE, AV_CODEC_FLAG_FRAME_DURATION, AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, AV_PICTURE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PKT_FLAG_TRUSTED, AVCHROMA_LOC_UNSPECIFIED, AVERROR_EAGAIN, AVERROR_ENCODER_NOT_FOUND, AVERROR_EOF, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO, EOF, SWS_BILINEAR, } from '../constants/constants.js';
 import { CodecContext } from '../lib/codec-context.js';
 import { Codec } from '../lib/codec.js';
 import { Dictionary } from '../lib/dictionary.js';
@@ -58,10 +59,13 @@ import { FFmpegError } from '../lib/error.js';
 import { Frame } from '../lib/frame.js';
 import { Packet } from '../lib/packet.js';
 import { Rational } from '../lib/rational.js';
-import { avRescaleQ } from '../lib/utilities.js';
+import { SoftwareResampleContext } from '../lib/software-resample-context.js';
+import { SoftwareScaleContext } from '../lib/software-scale-context.js';
+import { avChannelLayoutDefault, avGetPixFmtName, avGetSampleFmtName, avRescaleQ } from '../lib/utilities.js';
 import { AudioFrameBuffer } from './audio-frame-buffer.js';
 import { FRAME_THREAD_QUEUE_SIZE, PACKET_THREAD_QUEUE_SIZE } from './constants.js';
 import { AsyncQueue } from './utilities/async-queue.js';
+import { pickSupportedLayout, pickSupportedPixelFormat, pickSupportedRate, pickSupportedSampleFormat } from './utilities/codec-format.js';
 import { SchedulerControl } from './utilities/scheduler.js';
 import { parseBitrate } from './utils.js';
 /**
@@ -132,6 +136,16 @@ export class Encoder {
     opts;
     options;
     audioFrameBuffer;
+    autoResample;
+    audioResampler;
+    resampledFrame;
+    audioInputLayout;
+    autoFormat;
+    videoScaler;
+    scaledFrame;
+    videoTargetFormat;
+    supportsParamChange;
+    encoderChannels;
     // Worker pattern for push-based processing
     inputQueue;
     outputQueue;
@@ -154,6 +168,8 @@ export class Encoder {
         this.codec = codec;
         this.options = options;
         this.opts = opts;
+        this.autoResample = options.autoResample ?? false;
+        this.autoFormat = options.autoFormat ?? false;
         this.packet = new Packet();
         this.packet.alloc();
         this.inputQueue = new AsyncQueue(FRAME_THREAD_QUEUE_SIZE, (f) => f.free());
@@ -736,8 +752,15 @@ export class Encoder {
         // Open encoder if not already done
         this.initializePromise ??= this.initialize(frame);
         await this.initializePromise;
+        // Give an unspecified-layout frame the concrete native layout the codec was
+        // opened with (and the resampler configured for), so both accept it.
+        if (this.audioInputLayout) {
+            frame.channelLayout = this.audioInputLayout;
+        }
+        // Convert to the codec's format first (audio resample / video pixfmt).
+        const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
         // Prepare frame for encoding (set quality, validate channel count)
-        this.prepareFrameForEncoding(frame);
+        this.prepareFrameForEncoding(input);
         const encode = async (newFrame) => {
             const sendRet = await this.codecContext.sendFrame(newFrame);
             if (sendRet < 0 && sendRet !== AVERROR_EOF) {
@@ -747,10 +770,10 @@ export class Encoder {
         };
         if (this.audioFrameBuffer) {
             // Push frame into buffer - actual sending happens in receive()
-            await this.audioFrameBuffer.push(frame);
+            await this.audioFrameBuffer.push(input);
         }
         else {
-            await encode(frame);
+            await encode(input);
         }
     }
     /**
@@ -804,8 +827,15 @@ export class Encoder {
         if (!this.initialized) {
             this.initializeSync(frame);
         }
+        // Give an unspecified-layout frame the concrete native layout the codec was
+        // opened with (and the resampler configured for), so both accept it.
+        if (this.audioInputLayout) {
+            frame.channelLayout = this.audioInputLayout;
+        }
+        // Convert to the codec's format first (audio resample / video pixfmt).
+        const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
         // Prepare frame for encoding (set quality, validate channel count)
-        this.prepareFrameForEncoding(frame);
+        this.prepareFrameForEncoding(input);
         const encode = (newFrame) => {
             const sendRet = this.codecContext.sendFrameSync(newFrame);
             if (sendRet < 0 && sendRet !== AVERROR_EOF) {
@@ -815,10 +845,10 @@ export class Encoder {
         };
         if (this.audioFrameBuffer) {
             // Push frame into buffer - actual sending happens in receiveSync()
-            this.audioFrameBuffer.pushSync(frame);
+            this.audioFrameBuffer.pushSync(input);
         }
         else {
-            encode(frame);
+            encode(input);
         }
     }
     /**
@@ -1183,6 +1213,16 @@ export class Encoder {
         if (this.isClosed || !this.initialized) {
             return;
         }
+        // Drain samples buffered inside the resampler into the FIFO/encoder first.
+        const drained = this.drainResampler();
+        if (drained) {
+            if (this.audioFrameBuffer) {
+                await this.audioFrameBuffer.push(drained);
+            }
+            else {
+                await this.codecContext.sendFrame(drained);
+            }
+        }
         // If using AudioFrameBuffer, flush remaining buffered samples first
         if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
             // Pull any remaining partial frame (may be less than frameSize)
@@ -1243,6 +1283,16 @@ export class Encoder {
         if (this.isClosed || !this.initialized) {
             return;
         }
+        // Drain samples buffered inside the resampler into the FIFO/encoder first.
+        const drained = this.drainResampler();
+        if (drained) {
+            if (this.audioFrameBuffer) {
+                this.audioFrameBuffer.pushSync(drained);
+            }
+            else {
+                this.codecContext.sendFrameSync(drained);
+            }
+        }
         // If using AudioFrameBuffer, flush remaining buffered samples first
         if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
             // Pull any remaining partial frame (may be less than frameSize)
@@ -1602,18 +1652,22 @@ export class Encoder {
             return;
         }
         this.isClosed = true;
-        // Close queues
         this.inputQueue.close();
         this.outputQueue.close();
-        // Free any frames/packets left buffered on an aborted/early-closed pipeline.
         this.inputQueue.clear();
         this.outputQueue.clear();
         this.packet.free();
-        this.codecContext.freeContext();
-        // Release the audio frame buffer (owns a native Frame + AudioFifo) used by
-        // fixed-frame-size audio encoders.
         this.audioFrameBuffer?.[Symbol.dispose]();
         this.audioFrameBuffer = undefined;
+        this.audioResampler?.[Symbol.dispose]();
+        this.audioResampler = undefined;
+        this.resampledFrame?.free();
+        this.resampledFrame = undefined;
+        this.videoScaler?.[Symbol.dispose]();
+        this.videoScaler = undefined;
+        this.scaledFrame?.free();
+        this.scaledFrame = undefined;
+        this.codecContext.freeContext();
         this.initialized = false;
     }
     /**
@@ -1806,7 +1860,8 @@ export class Encoder {
             }
             this.codecContext.width = frame.width;
             this.codecContext.height = frame.height;
-            this.codecContext.pixelFormat = frame.format;
+            // Pick a codec-supported pixel format (converting on demand when autoFormat).
+            this.setupVideoFormat(frame);
             this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
             this.codecContext.colorRange = frame.colorRange;
             this.codecContext.colorPrimaries = frame.colorPrimaries;
@@ -1818,12 +1873,8 @@ export class Encoder {
             }
         }
         else {
-            // Audio: Always use frame timebase (which is typically 1/sample_rate)
-            // This ensures correct PTS progression for audio frames
-            this.codecContext.timeBase = frame.timeBase;
-            this.codecContext.sampleRate = frame.sampleRate;
-            this.codecContext.sampleFormat = frame.format;
-            this.codecContext.channelLayout = frame.channelLayout;
+            // Audio: pick codec-supported sample rate/format/layout (resampling on demand).
+            this.setupAudioParams(frame);
         }
         // Setup hardware acceleration with validation
         this.setupHardwareAcceleration(frame);
@@ -1899,7 +1950,8 @@ export class Encoder {
             }
             this.codecContext.width = frame.width;
             this.codecContext.height = frame.height;
-            this.codecContext.pixelFormat = frame.format;
+            // Pick a codec-supported pixel format (converting on demand when autoFormat).
+            this.setupVideoFormat(frame);
             this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
             this.codecContext.colorRange = frame.colorRange;
             this.codecContext.colorPrimaries = frame.colorPrimaries;
@@ -1911,12 +1963,8 @@ export class Encoder {
             }
         }
         else {
-            // Audio: Always use frame timebase (which is typically 1/sample_rate)
-            // This ensures correct PTS progression for audio frames
-            this.codecContext.timeBase = frame.timeBase;
-            this.codecContext.sampleRate = frame.sampleRate;
-            this.codecContext.sampleFormat = frame.format;
-            this.codecContext.channelLayout = frame.channelLayout;
+            // Audio: pick codec-supported sample rate/format/layout (resampling on demand).
+            this.setupAudioParams(frame);
         }
         // Setup hardware acceleration with validation
         this.setupHardwareAcceleration(frame);
@@ -2009,6 +2057,199 @@ export class Encoder {
             }
         }
     }
+    /**
+     * Configure the codec context's audio parameters from the first frame.
+     *
+     * Audio encoders only accept specific sample rates / sample formats / channel
+     * layouts. This picks codec-supported targets; if they differ from the input it
+     * either sets up a resampler (when `autoResample`) or throws a descriptive error.
+     *
+     * @param frame - First audio frame
+     *
+     * @throws {Error} If the input is unsupported and `autoResample` is disabled
+     *
+     * @throws {FFmpegError} If the resampler fails to configure
+     *
+     * @internal
+     */
+    setupAudioParams(frame) {
+        // Always use frame timebase (typically 1/sample_rate) for correct audio PTS.
+        this.codecContext.timeBase = frame.timeBase;
+        const inRate = frame.sampleRate;
+        const inFmt = frame.format;
+        // Codec open and swr both need a concrete layout. PCM/raw frames often carry
+        // an unspecified layout (order UNSPEC, mask 0); normalize it to the canonical
+        // native layout and re-apply it to each incoming frame (see encode()) so it
+        // matches the opened codec context / resampler input.
+        let inLayout = frame.channelLayout;
+        if (inLayout.order === AV_CHANNEL_ORDER_UNSPEC) {
+            inLayout = avChannelLayoutDefault(inLayout.nbChannels);
+            this.audioInputLayout = inLayout;
+        }
+        const targetRate = pickSupportedRate(inRate, this.codec.supportedSamplerates);
+        const targetFmt = pickSupportedSampleFormat(inFmt, this.codec.sampleFormats);
+        const targetLayout = pickSupportedLayout(inLayout, this.codec.channelLayouts);
+        const needsResample = targetRate !== inRate || targetFmt !== inFmt || targetLayout.nbChannels !== inLayout.nbChannels;
+        if (needsResample && !this.autoResample) {
+            const rates = this.codec.supportedSamplerates;
+            throw new Error(`Encoder '${this.codec.name}' does not support the input audio format ` +
+                `(${inRate} Hz, ${avGetSampleFmtName(inFmt) ?? inFmt}, ${inLayout.nbChannels}ch)` +
+                (rates && rates.length > 0 ? `. Supported sample rates: ${rates.join(', ')}` : '') +
+                '. Set { autoResample: true } on the encoder, or convert the input with an aresample/aformat filter first.');
+        }
+        this.codecContext.sampleRate = targetRate;
+        this.codecContext.sampleFormat = targetFmt;
+        this.codecContext.channelLayout = targetLayout;
+        if (needsResample) {
+            const swr = new SoftwareResampleContext();
+            FFmpegError.throwIfError(swr.allocSetOpts2(targetLayout, targetFmt, targetRate, inLayout, inFmt, inRate), 'Failed to configure audio resampler');
+            FFmpegError.throwIfError(swr.init(), 'Failed to initialize audio resampler');
+            this.audioResampler = swr;
+        }
+    }
+    /**
+     * Lazily allocate the reused resampler output frame.
+     *
+     * @returns The allocated output frame
+     *
+     * @internal
+     */
+    getResampleFrame() {
+        if (!this.resampledFrame) {
+            this.resampledFrame = new Frame();
+            this.resampledFrame.alloc();
+        }
+        return this.resampledFrame;
+    }
+    /**
+     * Resample an incoming audio frame to the codec's target format.
+     *
+     * Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
+     * The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
+     * reused frame and its carried timing are only relevant on the non-FIFO path.
+     *
+     * @param frame - Source audio frame
+     *
+     * @returns The resampled frame (owned by the encoder, reused across calls)
+     *
+     * @internal
+     */
+    resampleAudio(frame) {
+        const out = this.getResampleFrame();
+        out.unref();
+        out.format = this.codecContext.sampleFormat;
+        out.sampleRate = this.codecContext.sampleRate;
+        out.channelLayout = this.codecContext.channelLayout;
+        FFmpegError.throwIfError(this.audioResampler.convertFrame(out, frame), 'Failed to resample audio frame');
+        out.timeBase = frame.timeBase;
+        out.pts = frame.pts;
+        return out;
+    }
+    /**
+     * Drain samples buffered inside the resampler (rate-conversion delay) into the
+     * encoder path. Returns the drained frame if any, else null.
+     *
+     * @returns The drained frame (reused), or null when the resampler is empty
+     *
+     * @internal
+     */
+    drainResampler() {
+        if (!this.audioResampler) {
+            return null;
+        }
+        const out = this.getResampleFrame();
+        out.unref();
+        out.format = this.codecContext.sampleFormat;
+        out.sampleRate = this.codecContext.sampleRate;
+        out.channelLayout = this.codecContext.channelLayout;
+        const ret = this.audioResampler.convertFrame(out, null);
+        if (ret < 0 || out.nbSamples <= 0) {
+            return null;
+        }
+        return out;
+    }
+    /**
+     * Configure the codec context's pixel format from the first video frame.
+     *
+     * Video encoders only accept specific pixel formats. This keeps the input format
+     * when the codec accepts it; otherwise it either sets up a swscale converter to
+     * the least-loss supported format (when `autoFormat`) or throws a descriptive
+     * error. Hardware frames are left untouched - their format is negotiated through
+     * the hardware frames context, not swscale.
+     *
+     * @param frame - First video frame
+     *
+     * @throws {Error} If the input is unsupported and `autoFormat` is disabled
+     *
+     * @throws {FFmpegError} If the converter fails to configure
+     *
+     * @internal
+     */
+    setupVideoFormat(frame) {
+        const inFmt = frame.format;
+        // Hardware frames carry a hw pixfmt negotiated via hw_frames_ctx; swscale can't
+        // touch them - leave the format untouched.
+        if (frame.isHwFrame()) {
+            this.codecContext.pixelFormat = inFmt;
+            return;
+        }
+        const targetFmt = pickSupportedPixelFormat(inFmt, this.codec.pixelFormats);
+        const needsConversion = targetFmt !== inFmt;
+        if (needsConversion && !this.autoFormat) {
+            const supported = this.codec.pixelFormats;
+            throw new Error(`Encoder '${this.codec.name}' does not support the input pixel format ` +
+                `(${avGetPixFmtName(inFmt) ?? inFmt}). Supported: ${supported.map((f) => avGetPixFmtName(f) ?? f).join(', ')}` +
+                '. Set { autoFormat: true } on the encoder, or convert the input with a scale/format filter first.');
+        }
+        this.codecContext.pixelFormat = targetFmt;
+        // Set up a same-size swscale converter when the codec needs a different format.
+        if (needsConversion) {
+            this.videoTargetFormat = targetFmt;
+            const sws = new SoftwareScaleContext();
+            sws.getContext(frame.width, frame.height, inFmt, frame.width, frame.height, targetFmt, SWS_BILINEAR);
+            FFmpegError.throwIfError(sws.initContext(), 'Failed to configure pixel-format converter');
+            this.videoScaler = sws;
+        }
+    }
+    /**
+     * Lazily allocate the reused scaler output frame.
+     *
+     * @returns The allocated output frame
+     *
+     * @internal
+     */
+    getScaledFrame() {
+        if (!this.scaledFrame) {
+            this.scaledFrame = new Frame();
+            this.scaledFrame.alloc();
+        }
+        return this.scaledFrame;
+    }
+    /**
+     * Convert an incoming video frame to the codec's target pixel format.
+     *
+     * Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
+     * Resolution is unchanged - only the pixel format differs. Timing is carried over
+     * explicitly so the encoder's PTS rescale stays correct.
+     *
+     * @param frame - Source video frame
+     *
+     * @returns The converted frame (owned by the encoder, reused across calls)
+     *
+     * @internal
+     */
+    scaleVideo(frame) {
+        const out = this.getScaledFrame();
+        out.unref();
+        out.format = this.videoTargetFormat;
+        out.width = frame.width;
+        out.height = frame.height;
+        FFmpegError.throwIfError(this.videoScaler.scaleFrameSync(out, frame), 'Failed to convert video frame format');
+        out.timeBase = frame.timeBase;
+        out.pts = frame.pts;
+        out.duration = frame.duration;
+        return out;
+    }
     /**
      * Prepare frame for encoding.
      *
@@ -2038,24 +2279,26 @@ export class Encoder {
         // - Audio: frame.timeBase from first frame (typically 1/sample_rate)
         const encoderTimebase = this.codecContext.timeBase;
         const oldTimebase = frame.timeBase;
+        const pts = frame.pts;
+        const duration = frame.duration;
         // IMPORTANT: Calculate duration BEFORE converting frame timebase
         // This matches FFmpeg's video_sync_process() which calculates:
         //   duration = frame->duration * av_q2d(frame->time_base) / av_q2d(ofp->tb_out)
         // We need the OLD timebase to convert duration properly
         let frameDuration;
-        if (frame.duration && frame.duration > 0n) {
+        if (duration && duration > 0n) {
             // Convert duration from frame timebase to encoder timebase
             // This ensures encoder gets correct frame duration for timestamps
-            frameDuration = avRescaleQ(frame.duration, oldTimebase, encoderTimebase);
+            frameDuration = avRescaleQ(duration, oldTimebase, encoderTimebase);
         }
         else {
             // Default to 1 (constant frame rate behavior)
             // Matches FFmpeg's CFR mode: frame->duration = 1
             frameDuration = 1n;
         }
-        if (frame.pts !== null && frame.pts !== undefined) {
+        if (pts !== null && pts !== undefined) {
             // Convert PTS to encoder timebase
-            frame.pts = avRescaleQ(frame.pts, oldTimebase, encoderTimebase);
+            frame.pts = avRescaleQ(pts, oldTimebase, encoderTimebase);
             // IMPORTANT: Set frame timebase to encoder timebase
             // FFmpeg does this in adjust_frame_pts_to_encoder_tb(): frame->time_base = tb_dst
             // This ensures encoder gets frames with correct timebase (1/framerate for video, 1/sample_rate for audio)
@@ -2066,22 +2309,26 @@ export class Encoder {
         // based on vsync_method (CFR: 1, VFR: calculated, PASSTHROUGH: calculated)
         // Since we don't have automatic filter like FFmpeg, we always set it here
         frame.duration = frameDuration;
-        if (this.codecContext.codecType === AVMEDIA_TYPE_VIDEO) {
+        const codecType = this.codecContext.codecType;
+        if (codecType === AVMEDIA_TYPE_VIDEO) {
             // Video: Set frame quality from encoder's global quality
             // Only set if encoder has globalQuality configured and frame doesn't already have quality set
-            if (this.codecContext.globalQuality > 0 && frame.quality <= 0) {
-                frame.quality = this.codecContext.globalQuality;
+            const globalQuality = this.codecContext.globalQuality;
+            if (globalQuality > 0 && frame.quality <= 0) {
+                frame.quality = globalQuality;
             }
         }
-        else if (this.codecContext.codecType === AVMEDIA_TYPE_AUDIO) {
+        else if (codecType === AVMEDIA_TYPE_AUDIO) {
             // Audio: Validate channel count consistency
-            // If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain constant
-            const supportsParamChange = this.codec.hasCapabilities(AV_CODEC_CAP_PARAM_CHANGE);
-            if (!supportsParamChange) {
-                const encoderChannels = this.codecContext.channelLayout.nbChannels;
+            // If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain
+            // constant. The capability and the encoder's channel count are stable after open,
+            // so resolve them once instead of per frame.
+            this.supportsParamChange ??= this.codec.hasCapabilities(AV_CODEC_CAP_PARAM_CHANGE);
+            if (!this.supportsParamChange) {
+                this.encoderChannels ??= this.codecContext.channelLayout.nbChannels;
                 const frameChannels = frame.channelLayout?.nbChannels ?? 0;
-                if (encoderChannels !== frameChannels) {
-                    throw new Error(`Audio channel count changed (${encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
+                if (this.encoderChannels !== frameChannels) {
+                    throw new Error(`Audio channel count changed (${this.encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
                 }
             }
         }