node-av 6.0.0-beta.8 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +2 -1
  2. package/dist/api/bitstream-filter.js +0 -2
  3. package/dist/api/bitstream-filter.js.map +1 -1
  4. package/dist/api/decoder.d.ts +169 -9
  5. package/dist/api/decoder.js +351 -43
  6. package/dist/api/decoder.js.map +1 -1
  7. package/dist/api/encoder.d.ts +129 -0
  8. package/dist/api/encoder.js +288 -41
  9. package/dist/api/encoder.js.map +1 -1
  10. package/dist/api/filter.js +0 -2
  11. package/dist/api/filter.js.map +1 -1
  12. package/dist/api/muxer.d.ts +19 -0
  13. package/dist/api/muxer.js +44 -24
  14. package/dist/api/muxer.js.map +1 -1
  15. package/dist/api/rtp-stream.d.ts +14 -11
  16. package/dist/api/rtp-stream.js +22 -47
  17. package/dist/api/rtp-stream.js.map +1 -1
  18. package/dist/api/scaler.js.map +1 -1
  19. package/dist/api/utilities/async-queue.js +0 -2
  20. package/dist/api/utilities/async-queue.js.map +1 -1
  21. package/dist/api/utilities/codec-format.d.ts +87 -0
  22. package/dist/api/utilities/codec-format.js +117 -0
  23. package/dist/api/utilities/codec-format.js.map +1 -0
  24. package/dist/api/utilities/index.d.ts +1 -0
  25. package/dist/api/utilities/index.js +2 -0
  26. package/dist/api/utilities/index.js.map +1 -1
  27. package/dist/api/utilities/whisper-model.d.ts +20 -0
  28. package/dist/api/utilities/whisper-model.js +61 -2
  29. package/dist/api/utilities/whisper-model.js.map +1 -1
  30. package/dist/constants/bsf-options.d.ts +8 -1
  31. package/dist/constants/constants.d.ts +2 -0
  32. package/dist/constants/constants.js +2 -0
  33. package/dist/constants/constants.js.map +1 -1
  34. package/dist/constants/format-options.d.ts +4 -0
  35. package/dist/lib/binding.d.ts +2 -0
  36. package/dist/lib/binding.js.map +1 -1
  37. package/dist/lib/frame.d.ts +8 -0
  38. package/dist/lib/frame.js +10 -0
  39. package/dist/lib/frame.js.map +1 -1
  40. package/dist/lib/native-types.d.ts +12 -0
  41. package/dist/lib/packet.d.ts +8 -0
  42. package/dist/lib/packet.js +10 -0
  43. package/dist/lib/packet.js.map +1 -1
  44. package/dist/lib/utilities.d.ts +45 -0
  45. package/dist/lib/utilities.js +49 -0
  46. package/dist/lib/utilities.js.map +1 -1
  47. package/package.json +16 -16
@@ -90,6 +90,38 @@ export interface EncoderOptions<C = unknown> {
90
90
  * @default FFmpeg default (both methods, codec chooses best)
91
91
  */
92
92
  threadType?: AVThreadType;
93
+ /**
94
+ * Automatically resample incoming audio to a format the codec supports.
95
+ *
96
+ * Audio encoders only accept specific sample rates, sample formats, and channel
97
+ * layouts (e.g. libmp3lame rejects 96 kHz; AAC needs planar `fltp`). When `true`,
98
+ * the encoder transparently converts each frame to the nearest supported
99
+ * sample rate / sample format / channel layout (like `ffmpeg`'s automatic
100
+ * `aresample`). When `false` (default), an unsupported input raises a descriptive
101
+ * error instead — keeping behaviour explicit and 1:1 with the codec.
102
+ *
103
+ * Has no effect on video.
104
+ *
105
+ * @default false
106
+ */
107
+ autoResample?: boolean;
108
+ /**
109
+ * Automatically convert incoming video to a pixel format the codec supports.
110
+ *
111
+ * Video encoders only accept specific pixel formats (e.g. libx264 wants planar
112
+ * YUV like `yuv420p` and rejects `rgb24`). When `true`, the encoder transparently
113
+ * converts each frame to the least-loss supported pixel format via swscale (like
114
+ * `ffmpeg`'s automatic `format` filter), keeping the same resolution. When `false`
115
+ * (default), an unsupported input raises a descriptive error instead — keeping
116
+ * behaviour explicit and 1:1 with the codec.
117
+ *
118
+ * Resolution is never changed (the encoder already adopts the frame's dimensions),
119
+ * and hardware frames are left untouched (their format is negotiated via the
120
+ * hardware frames context). Has no effect on audio.
121
+ *
122
+ * @default false
123
+ */
124
+ autoFormat?: boolean;
93
125
  /**
94
126
  * Additional codec-specific options.
95
127
  *
@@ -174,6 +206,16 @@ export declare class Encoder implements Disposable {
174
206
  private opts?;
175
207
  private options;
176
208
  private audioFrameBuffer?;
209
+ private autoResample;
210
+ private audioResampler?;
211
+ private resampledFrame?;
212
+ private audioInputLayout?;
213
+ private autoFormat;
214
+ private videoScaler?;
215
+ private scaledFrame?;
216
+ private videoTargetFormat?;
217
+ private supportsParamChange?;
218
+ private encoderChannels?;
177
219
  private inputQueue;
178
220
  private outputQueue;
179
221
  private workerPromise;
@@ -1156,6 +1198,93 @@ export declare class Encoder implements Disposable {
1156
1198
  * @internal
1157
1199
  */
1158
1200
  private setupHardwareAcceleration;
1201
+ /**
1202
+ * Configure the codec context's audio parameters from the first frame.
1203
+ *
1204
+ * Audio encoders only accept specific sample rates / sample formats / channel
1205
+ * layouts. This picks codec-supported targets; if they differ from the input it
1206
+ * either sets up a resampler (when `autoResample`) or throws a descriptive error.
1207
+ *
1208
+ * @param frame - First audio frame
1209
+ *
1210
+ * @throws {Error} If the input is unsupported and `autoResample` is disabled
1211
+ *
1212
+ * @throws {FFmpegError} If the resampler fails to configure
1213
+ *
1214
+ * @internal
1215
+ */
1216
+ private setupAudioParams;
1217
+ /**
1218
+ * Lazily allocate the reused resampler output frame.
1219
+ *
1220
+ * @returns The allocated output frame
1221
+ *
1222
+ * @internal
1223
+ */
1224
+ private getResampleFrame;
1225
+ /**
1226
+ * Resample an incoming audio frame to the codec's target format.
1227
+ *
1228
+ * Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
1229
+ * The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
1230
+ * reused frame and its carried timing are only relevant on the non-FIFO path.
1231
+ *
1232
+ * @param frame - Source audio frame
1233
+ *
1234
+ * @returns The resampled frame (owned by the encoder, reused across calls)
1235
+ *
1236
+ * @internal
1237
+ */
1238
+ private resampleAudio;
1239
+ /**
1240
+ * Drain samples buffered inside the resampler (rate-conversion delay) into the
1241
+ * encoder path. Returns the drained frame if any, else null.
1242
+ *
1243
+ * @returns The drained frame (reused), or null when the resampler is empty
1244
+ *
1245
+ * @internal
1246
+ */
1247
+ private drainResampler;
1248
+ /**
1249
+ * Configure the codec context's pixel format from the first video frame.
1250
+ *
1251
+ * Video encoders only accept specific pixel formats. This keeps the input format
1252
+ * when the codec accepts it; otherwise it either sets up a swscale converter to
1253
+ * the least-loss supported format (when `autoFormat`) or throws a descriptive
1254
+ * error. Hardware frames are left untouched - their format is negotiated through
1255
+ * the hardware frames context, not swscale.
1256
+ *
1257
+ * @param frame - First video frame
1258
+ *
1259
+ * @throws {Error} If the input is unsupported and `autoFormat` is disabled
1260
+ *
1261
+ * @throws {FFmpegError} If the converter fails to configure
1262
+ *
1263
+ * @internal
1264
+ */
1265
+ private setupVideoFormat;
1266
+ /**
1267
+ * Lazily allocate the reused scaler output frame.
1268
+ *
1269
+ * @returns The allocated output frame
1270
+ *
1271
+ * @internal
1272
+ */
1273
+ private getScaledFrame;
1274
+ /**
1275
+ * Convert an incoming video frame to the codec's target pixel format.
1276
+ *
1277
+ * Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
1278
+ * Resolution is unchanged - only the pixel format differs. Timing is carried over
1279
+ * explicitly so the encoder's PTS rescale stays correct.
1280
+ *
1281
+ * @param frame - Source video frame
1282
+ *
1283
+ * @returns The converted frame (owned by the encoder, reused across calls)
1284
+ *
1285
+ * @internal
1286
+ */
1287
+ private scaleVideo;
1159
1288
  /**
1160
1289
  * Prepare frame for encoding.
1161
1290
  *
@@ -50,7 +50,8 @@ var __disposeResources = (this && this.__disposeResources) || (function (Suppres
50
50
  var e = new Error(message);
51
51
  return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
52
52
  });
53
- import { AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, AV_CODEC_CAP_PARAM_CHANGE, AV_CODEC_FLAG_COPY_OPAQUE, AV_CODEC_FLAG_FRAME_DURATION, AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, AV_PICTURE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PKT_FLAG_TRUSTED, AVCHROMA_LOC_UNSPECIFIED, AVERROR_EAGAIN, AVERROR_ENCODER_NOT_FOUND, AVERROR_EOF, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO, EOF, } from '../constants/constants.js';
53
+ /* eslint-disable @stylistic/indent-binary-ops */
54
+ import { AV_CHANNEL_ORDER_UNSPEC, AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, AV_CODEC_CAP_PARAM_CHANGE, AV_CODEC_FLAG_COPY_OPAQUE, AV_CODEC_FLAG_FRAME_DURATION, AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, AV_PICTURE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PKT_FLAG_TRUSTED, AVCHROMA_LOC_UNSPECIFIED, AVERROR_EAGAIN, AVERROR_ENCODER_NOT_FOUND, AVERROR_EOF, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO, EOF, SWS_BILINEAR, } from '../constants/constants.js';
54
55
  import { CodecContext } from '../lib/codec-context.js';
55
56
  import { Codec } from '../lib/codec.js';
56
57
  import { Dictionary } from '../lib/dictionary.js';
@@ -58,10 +59,13 @@ import { FFmpegError } from '../lib/error.js';
58
59
  import { Frame } from '../lib/frame.js';
59
60
  import { Packet } from '../lib/packet.js';
60
61
  import { Rational } from '../lib/rational.js';
61
- import { avRescaleQ } from '../lib/utilities.js';
62
+ import { SoftwareResampleContext } from '../lib/software-resample-context.js';
63
+ import { SoftwareScaleContext } from '../lib/software-scale-context.js';
64
+ import { avChannelLayoutDefault, avGetPixFmtName, avGetSampleFmtName, avRescaleQ } from '../lib/utilities.js';
62
65
  import { AudioFrameBuffer } from './audio-frame-buffer.js';
63
66
  import { FRAME_THREAD_QUEUE_SIZE, PACKET_THREAD_QUEUE_SIZE } from './constants.js';
64
67
  import { AsyncQueue } from './utilities/async-queue.js';
68
+ import { pickSupportedLayout, pickSupportedPixelFormat, pickSupportedRate, pickSupportedSampleFormat } from './utilities/codec-format.js';
65
69
  import { SchedulerControl } from './utilities/scheduler.js';
66
70
  import { parseBitrate } from './utils.js';
67
71
  /**
@@ -132,6 +136,16 @@ export class Encoder {
132
136
  opts;
133
137
  options;
134
138
  audioFrameBuffer;
139
+ autoResample;
140
+ audioResampler;
141
+ resampledFrame;
142
+ audioInputLayout;
143
+ autoFormat;
144
+ videoScaler;
145
+ scaledFrame;
146
+ videoTargetFormat;
147
+ supportsParamChange;
148
+ encoderChannels;
135
149
  // Worker pattern for push-based processing
136
150
  inputQueue;
137
151
  outputQueue;
@@ -154,6 +168,8 @@ export class Encoder {
154
168
  this.codec = codec;
155
169
  this.options = options;
156
170
  this.opts = opts;
171
+ this.autoResample = options.autoResample ?? false;
172
+ this.autoFormat = options.autoFormat ?? false;
157
173
  this.packet = new Packet();
158
174
  this.packet.alloc();
159
175
  this.inputQueue = new AsyncQueue(FRAME_THREAD_QUEUE_SIZE, (f) => f.free());
@@ -736,8 +752,15 @@ export class Encoder {
736
752
  // Open encoder if not already done
737
753
  this.initializePromise ??= this.initialize(frame);
738
754
  await this.initializePromise;
755
+ // Give an unspecified-layout frame the concrete native layout the codec was
756
+ // opened with (and the resampler configured for), so both accept it.
757
+ if (this.audioInputLayout) {
758
+ frame.channelLayout = this.audioInputLayout;
759
+ }
760
+ // Convert to the codec's format first (audio resample / video pixfmt).
761
+ const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
739
762
  // Prepare frame for encoding (set quality, validate channel count)
740
- this.prepareFrameForEncoding(frame);
763
+ this.prepareFrameForEncoding(input);
741
764
  const encode = async (newFrame) => {
742
765
  const sendRet = await this.codecContext.sendFrame(newFrame);
743
766
  if (sendRet < 0 && sendRet !== AVERROR_EOF) {
@@ -747,10 +770,10 @@ export class Encoder {
747
770
  };
748
771
  if (this.audioFrameBuffer) {
749
772
  // Push frame into buffer - actual sending happens in receive()
750
- await this.audioFrameBuffer.push(frame);
773
+ await this.audioFrameBuffer.push(input);
751
774
  }
752
775
  else {
753
- await encode(frame);
776
+ await encode(input);
754
777
  }
755
778
  }
756
779
  /**
@@ -804,8 +827,15 @@ export class Encoder {
804
827
  if (!this.initialized) {
805
828
  this.initializeSync(frame);
806
829
  }
830
+ // Give an unspecified-layout frame the concrete native layout the codec was
831
+ // opened with (and the resampler configured for), so both accept it.
832
+ if (this.audioInputLayout) {
833
+ frame.channelLayout = this.audioInputLayout;
834
+ }
835
+ // Convert to the codec's format first (audio resample / video pixfmt).
836
+ const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
807
837
  // Prepare frame for encoding (set quality, validate channel count)
808
- this.prepareFrameForEncoding(frame);
838
+ this.prepareFrameForEncoding(input);
809
839
  const encode = (newFrame) => {
810
840
  const sendRet = this.codecContext.sendFrameSync(newFrame);
811
841
  if (sendRet < 0 && sendRet !== AVERROR_EOF) {
@@ -815,10 +845,10 @@ export class Encoder {
815
845
  };
816
846
  if (this.audioFrameBuffer) {
817
847
  // Push frame into buffer - actual sending happens in receiveSync()
818
- this.audioFrameBuffer.pushSync(frame);
848
+ this.audioFrameBuffer.pushSync(input);
819
849
  }
820
850
  else {
821
- encode(frame);
851
+ encode(input);
822
852
  }
823
853
  }
824
854
  /**
@@ -1183,6 +1213,16 @@ export class Encoder {
1183
1213
  if (this.isClosed || !this.initialized) {
1184
1214
  return;
1185
1215
  }
1216
+ // Drain samples buffered inside the resampler into the FIFO/encoder first.
1217
+ const drained = this.drainResampler();
1218
+ if (drained) {
1219
+ if (this.audioFrameBuffer) {
1220
+ await this.audioFrameBuffer.push(drained);
1221
+ }
1222
+ else {
1223
+ await this.codecContext.sendFrame(drained);
1224
+ }
1225
+ }
1186
1226
  // If using AudioFrameBuffer, flush remaining buffered samples first
1187
1227
  if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
1188
1228
  // Pull any remaining partial frame (may be less than frameSize)
@@ -1243,6 +1283,16 @@ export class Encoder {
1243
1283
  if (this.isClosed || !this.initialized) {
1244
1284
  return;
1245
1285
  }
1286
+ // Drain samples buffered inside the resampler into the FIFO/encoder first.
1287
+ const drained = this.drainResampler();
1288
+ if (drained) {
1289
+ if (this.audioFrameBuffer) {
1290
+ this.audioFrameBuffer.pushSync(drained);
1291
+ }
1292
+ else {
1293
+ this.codecContext.sendFrameSync(drained);
1294
+ }
1295
+ }
1246
1296
  // If using AudioFrameBuffer, flush remaining buffered samples first
1247
1297
  if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
1248
1298
  // Pull any remaining partial frame (may be less than frameSize)
@@ -1602,18 +1652,22 @@ export class Encoder {
1602
1652
  return;
1603
1653
  }
1604
1654
  this.isClosed = true;
1605
- // Close queues
1606
1655
  this.inputQueue.close();
1607
1656
  this.outputQueue.close();
1608
- // Free any frames/packets left buffered on an aborted/early-closed pipeline.
1609
1657
  this.inputQueue.clear();
1610
1658
  this.outputQueue.clear();
1611
1659
  this.packet.free();
1612
- this.codecContext.freeContext();
1613
- // Release the audio frame buffer (owns a native Frame + AudioFifo) used by
1614
- // fixed-frame-size audio encoders.
1615
1660
  this.audioFrameBuffer?.[Symbol.dispose]();
1616
1661
  this.audioFrameBuffer = undefined;
1662
+ this.audioResampler?.[Symbol.dispose]();
1663
+ this.audioResampler = undefined;
1664
+ this.resampledFrame?.free();
1665
+ this.resampledFrame = undefined;
1666
+ this.videoScaler?.[Symbol.dispose]();
1667
+ this.videoScaler = undefined;
1668
+ this.scaledFrame?.free();
1669
+ this.scaledFrame = undefined;
1670
+ this.codecContext.freeContext();
1617
1671
  this.initialized = false;
1618
1672
  }
1619
1673
  /**
@@ -1806,7 +1860,8 @@ export class Encoder {
1806
1860
  }
1807
1861
  this.codecContext.width = frame.width;
1808
1862
  this.codecContext.height = frame.height;
1809
- this.codecContext.pixelFormat = frame.format;
1863
+ // Pick a codec-supported pixel format (converting on demand when autoFormat).
1864
+ this.setupVideoFormat(frame);
1810
1865
  this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
1811
1866
  this.codecContext.colorRange = frame.colorRange;
1812
1867
  this.codecContext.colorPrimaries = frame.colorPrimaries;
@@ -1818,12 +1873,8 @@ export class Encoder {
1818
1873
  }
1819
1874
  }
1820
1875
  else {
1821
- // Audio: Always use frame timebase (which is typically 1/sample_rate)
1822
- // This ensures correct PTS progression for audio frames
1823
- this.codecContext.timeBase = frame.timeBase;
1824
- this.codecContext.sampleRate = frame.sampleRate;
1825
- this.codecContext.sampleFormat = frame.format;
1826
- this.codecContext.channelLayout = frame.channelLayout;
1876
+ // Audio: pick codec-supported sample rate/format/layout (resampling on demand).
1877
+ this.setupAudioParams(frame);
1827
1878
  }
1828
1879
  // Setup hardware acceleration with validation
1829
1880
  this.setupHardwareAcceleration(frame);
@@ -1899,7 +1950,8 @@ export class Encoder {
1899
1950
  }
1900
1951
  this.codecContext.width = frame.width;
1901
1952
  this.codecContext.height = frame.height;
1902
- this.codecContext.pixelFormat = frame.format;
1953
+ // Pick a codec-supported pixel format (converting on demand when autoFormat).
1954
+ this.setupVideoFormat(frame);
1903
1955
  this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
1904
1956
  this.codecContext.colorRange = frame.colorRange;
1905
1957
  this.codecContext.colorPrimaries = frame.colorPrimaries;
@@ -1911,12 +1963,8 @@ export class Encoder {
1911
1963
  }
1912
1964
  }
1913
1965
  else {
1914
- // Audio: Always use frame timebase (which is typically 1/sample_rate)
1915
- // This ensures correct PTS progression for audio frames
1916
- this.codecContext.timeBase = frame.timeBase;
1917
- this.codecContext.sampleRate = frame.sampleRate;
1918
- this.codecContext.sampleFormat = frame.format;
1919
- this.codecContext.channelLayout = frame.channelLayout;
1966
+ // Audio: pick codec-supported sample rate/format/layout (resampling on demand).
1967
+ this.setupAudioParams(frame);
1920
1968
  }
1921
1969
  // Setup hardware acceleration with validation
1922
1970
  this.setupHardwareAcceleration(frame);
@@ -2009,6 +2057,199 @@ export class Encoder {
2009
2057
  }
2010
2058
  }
2011
2059
  }
2060
+ /**
2061
+ * Configure the codec context's audio parameters from the first frame.
2062
+ *
2063
+ * Audio encoders only accept specific sample rates / sample formats / channel
2064
+ * layouts. This picks codec-supported targets; if they differ from the input it
2065
+ * either sets up a resampler (when `autoResample`) or throws a descriptive error.
2066
+ *
2067
+ * @param frame - First audio frame
2068
+ *
2069
+ * @throws {Error} If the input is unsupported and `autoResample` is disabled
2070
+ *
2071
+ * @throws {FFmpegError} If the resampler fails to configure
2072
+ *
2073
+ * @internal
2074
+ */
2075
+ setupAudioParams(frame) {
2076
+ // Always use frame timebase (typically 1/sample_rate) for correct audio PTS.
2077
+ this.codecContext.timeBase = frame.timeBase;
2078
+ const inRate = frame.sampleRate;
2079
+ const inFmt = frame.format;
2080
+ // Codec open and swr both need a concrete layout. PCM/raw frames often carry
2081
+ // an unspecified layout (order UNSPEC, mask 0); normalize it to the canonical
2082
+ // native layout and re-apply it to each incoming frame (see encode()) so it
2083
+ // matches the opened codec context / resampler input.
2084
+ let inLayout = frame.channelLayout;
2085
+ if (inLayout.order === AV_CHANNEL_ORDER_UNSPEC) {
2086
+ inLayout = avChannelLayoutDefault(inLayout.nbChannels);
2087
+ this.audioInputLayout = inLayout;
2088
+ }
2089
+ const targetRate = pickSupportedRate(inRate, this.codec.supportedSamplerates);
2090
+ const targetFmt = pickSupportedSampleFormat(inFmt, this.codec.sampleFormats);
2091
+ const targetLayout = pickSupportedLayout(inLayout, this.codec.channelLayouts);
2092
+ const needsResample = targetRate !== inRate || targetFmt !== inFmt || targetLayout.nbChannels !== inLayout.nbChannels;
2093
+ if (needsResample && !this.autoResample) {
2094
+ const rates = this.codec.supportedSamplerates;
2095
+ throw new Error(`Encoder '${this.codec.name}' does not support the input audio format ` +
2096
+ `(${inRate} Hz, ${avGetSampleFmtName(inFmt) ?? inFmt}, ${inLayout.nbChannels}ch)` +
2097
+ (rates && rates.length > 0 ? `. Supported sample rates: ${rates.join(', ')}` : '') +
2098
+ '. Set { autoResample: true } on the encoder, or convert the input with an aresample/aformat filter first.');
2099
+ }
2100
+ this.codecContext.sampleRate = targetRate;
2101
+ this.codecContext.sampleFormat = targetFmt;
2102
+ this.codecContext.channelLayout = targetLayout;
2103
+ if (needsResample) {
2104
+ const swr = new SoftwareResampleContext();
2105
+ FFmpegError.throwIfError(swr.allocSetOpts2(targetLayout, targetFmt, targetRate, inLayout, inFmt, inRate), 'Failed to configure audio resampler');
2106
+ FFmpegError.throwIfError(swr.init(), 'Failed to initialize audio resampler');
2107
+ this.audioResampler = swr;
2108
+ }
2109
+ }
2110
+ /**
2111
+ * Lazily allocate the reused resampler output frame.
2112
+ *
2113
+ * @returns The allocated output frame
2114
+ *
2115
+ * @internal
2116
+ */
2117
+ getResampleFrame() {
2118
+ if (!this.resampledFrame) {
2119
+ this.resampledFrame = new Frame();
2120
+ this.resampledFrame.alloc();
2121
+ }
2122
+ return this.resampledFrame;
2123
+ }
2124
+ /**
2125
+ * Resample an incoming audio frame to the codec's target format.
2126
+ *
2127
+ * Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
2128
+ * The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
2129
+ * reused frame and its carried timing are only relevant on the non-FIFO path.
2130
+ *
2131
+ * @param frame - Source audio frame
2132
+ *
2133
+ * @returns The resampled frame (owned by the encoder, reused across calls)
2134
+ *
2135
+ * @internal
2136
+ */
2137
+ resampleAudio(frame) {
2138
+ const out = this.getResampleFrame();
2139
+ out.unref();
2140
+ out.format = this.codecContext.sampleFormat;
2141
+ out.sampleRate = this.codecContext.sampleRate;
2142
+ out.channelLayout = this.codecContext.channelLayout;
2143
+ FFmpegError.throwIfError(this.audioResampler.convertFrame(out, frame), 'Failed to resample audio frame');
2144
+ out.timeBase = frame.timeBase;
2145
+ out.pts = frame.pts;
2146
+ return out;
2147
+ }
2148
+ /**
2149
+ * Drain samples buffered inside the resampler (rate-conversion delay) into the
2150
+ * encoder path. Returns the drained frame if any, else null.
2151
+ *
2152
+ * @returns The drained frame (reused), or null when the resampler is empty
2153
+ *
2154
+ * @internal
2155
+ */
2156
+ drainResampler() {
2157
+ if (!this.audioResampler) {
2158
+ return null;
2159
+ }
2160
+ const out = this.getResampleFrame();
2161
+ out.unref();
2162
+ out.format = this.codecContext.sampleFormat;
2163
+ out.sampleRate = this.codecContext.sampleRate;
2164
+ out.channelLayout = this.codecContext.channelLayout;
2165
+ const ret = this.audioResampler.convertFrame(out, null);
2166
+ if (ret < 0 || out.nbSamples <= 0) {
2167
+ return null;
2168
+ }
2169
+ return out;
2170
+ }
2171
+ /**
2172
+ * Configure the codec context's pixel format from the first video frame.
2173
+ *
2174
+ * Video encoders only accept specific pixel formats. This keeps the input format
2175
+ * when the codec accepts it; otherwise it either sets up a swscale converter to
2176
+ * the least-loss supported format (when `autoFormat`) or throws a descriptive
2177
+ * error. Hardware frames are left untouched - their format is negotiated through
2178
+ * the hardware frames context, not swscale.
2179
+ *
2180
+ * @param frame - First video frame
2181
+ *
2182
+ * @throws {Error} If the input is unsupported and `autoFormat` is disabled
2183
+ *
2184
+ * @throws {FFmpegError} If the converter fails to configure
2185
+ *
2186
+ * @internal
2187
+ */
2188
+ setupVideoFormat(frame) {
2189
+ const inFmt = frame.format;
2190
+ // Hardware frames carry a hw pixfmt negotiated via hw_frames_ctx; swscale can't
2191
+ // touch them - leave the format untouched.
2192
+ if (frame.isHwFrame()) {
2193
+ this.codecContext.pixelFormat = inFmt;
2194
+ return;
2195
+ }
2196
+ const targetFmt = pickSupportedPixelFormat(inFmt, this.codec.pixelFormats);
2197
+ const needsConversion = targetFmt !== inFmt;
2198
+ if (needsConversion && !this.autoFormat) {
2199
+ const supported = this.codec.pixelFormats;
2200
+ throw new Error(`Encoder '${this.codec.name}' does not support the input pixel format ` +
2201
+ `(${avGetPixFmtName(inFmt) ?? inFmt}). Supported: ${supported.map((f) => avGetPixFmtName(f) ?? f).join(', ')}` +
2202
+ '. Set { autoFormat: true } on the encoder, or convert the input with a scale/format filter first.');
2203
+ }
2204
+ this.codecContext.pixelFormat = targetFmt;
2205
+ // Set up a same-size swscale converter when the codec needs a different format.
2206
+ if (needsConversion) {
2207
+ this.videoTargetFormat = targetFmt;
2208
+ const sws = new SoftwareScaleContext();
2209
+ sws.getContext(frame.width, frame.height, inFmt, frame.width, frame.height, targetFmt, SWS_BILINEAR);
2210
+ FFmpegError.throwIfError(sws.initContext(), 'Failed to configure pixel-format converter');
2211
+ this.videoScaler = sws;
2212
+ }
2213
+ }
2214
+ /**
2215
+ * Lazily allocate the reused scaler output frame.
2216
+ *
2217
+ * @returns The allocated output frame
2218
+ *
2219
+ * @internal
2220
+ */
2221
+ getScaledFrame() {
2222
+ if (!this.scaledFrame) {
2223
+ this.scaledFrame = new Frame();
2224
+ this.scaledFrame.alloc();
2225
+ }
2226
+ return this.scaledFrame;
2227
+ }
2228
+ /**
2229
+ * Convert an incoming video frame to the codec's target pixel format.
2230
+ *
2231
+ * Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
2232
+ * Resolution is unchanged - only the pixel format differs. Timing is carried over
2233
+ * explicitly so the encoder's PTS rescale stays correct.
2234
+ *
2235
+ * @param frame - Source video frame
2236
+ *
2237
+ * @returns The converted frame (owned by the encoder, reused across calls)
2238
+ *
2239
+ * @internal
2240
+ */
2241
+ scaleVideo(frame) {
2242
+ const out = this.getScaledFrame();
2243
+ out.unref();
2244
+ out.format = this.videoTargetFormat;
2245
+ out.width = frame.width;
2246
+ out.height = frame.height;
2247
+ FFmpegError.throwIfError(this.videoScaler.scaleFrameSync(out, frame), 'Failed to convert video frame format');
2248
+ out.timeBase = frame.timeBase;
2249
+ out.pts = frame.pts;
2250
+ out.duration = frame.duration;
2251
+ return out;
2252
+ }
2012
2253
  /**
2013
2254
  * Prepare frame for encoding.
2014
2255
  *
@@ -2038,24 +2279,26 @@ export class Encoder {
2038
2279
  // - Audio: frame.timeBase from first frame (typically 1/sample_rate)
2039
2280
  const encoderTimebase = this.codecContext.timeBase;
2040
2281
  const oldTimebase = frame.timeBase;
2282
+ const pts = frame.pts;
2283
+ const duration = frame.duration;
2041
2284
  // IMPORTANT: Calculate duration BEFORE converting frame timebase
2042
2285
  // This matches FFmpeg's video_sync_process() which calculates:
2043
2286
  // duration = frame->duration * av_q2d(frame->time_base) / av_q2d(ofp->tb_out)
2044
2287
  // We need the OLD timebase to convert duration properly
2045
2288
  let frameDuration;
2046
- if (frame.duration && frame.duration > 0n) {
2289
+ if (duration && duration > 0n) {
2047
2290
  // Convert duration from frame timebase to encoder timebase
2048
2291
  // This ensures encoder gets correct frame duration for timestamps
2049
- frameDuration = avRescaleQ(frame.duration, oldTimebase, encoderTimebase);
2292
+ frameDuration = avRescaleQ(duration, oldTimebase, encoderTimebase);
2050
2293
  }
2051
2294
  else {
2052
2295
  // Default to 1 (constant frame rate behavior)
2053
2296
  // Matches FFmpeg's CFR mode: frame->duration = 1
2054
2297
  frameDuration = 1n;
2055
2298
  }
2056
- if (frame.pts !== null && frame.pts !== undefined) {
2299
+ if (pts !== null && pts !== undefined) {
2057
2300
  // Convert PTS to encoder timebase
2058
- frame.pts = avRescaleQ(frame.pts, oldTimebase, encoderTimebase);
2301
+ frame.pts = avRescaleQ(pts, oldTimebase, encoderTimebase);
2059
2302
  // IMPORTANT: Set frame timebase to encoder timebase
2060
2303
  // FFmpeg does this in adjust_frame_pts_to_encoder_tb(): frame->time_base = tb_dst
2061
2304
  // This ensures encoder gets frames with correct timebase (1/framerate for video, 1/sample_rate for audio)
@@ -2066,22 +2309,26 @@ export class Encoder {
2066
2309
  // based on vsync_method (CFR: 1, VFR: calculated, PASSTHROUGH: calculated)
2067
2310
  // Since we don't have automatic filter like FFmpeg, we always set it here
2068
2311
  frame.duration = frameDuration;
2069
- if (this.codecContext.codecType === AVMEDIA_TYPE_VIDEO) {
2312
+ const codecType = this.codecContext.codecType;
2313
+ if (codecType === AVMEDIA_TYPE_VIDEO) {
2070
2314
  // Video: Set frame quality from encoder's global quality
2071
2315
  // Only set if encoder has globalQuality configured and frame doesn't already have quality set
2072
- if (this.codecContext.globalQuality > 0 && frame.quality <= 0) {
2073
- frame.quality = this.codecContext.globalQuality;
2316
+ const globalQuality = this.codecContext.globalQuality;
2317
+ if (globalQuality > 0 && frame.quality <= 0) {
2318
+ frame.quality = globalQuality;
2074
2319
  }
2075
2320
  }
2076
- else if (this.codecContext.codecType === AVMEDIA_TYPE_AUDIO) {
2321
+ else if (codecType === AVMEDIA_TYPE_AUDIO) {
2077
2322
  // Audio: Validate channel count consistency
2078
- // If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain constant
2079
- const supportsParamChange = this.codec.hasCapabilities(AV_CODEC_CAP_PARAM_CHANGE);
2080
- if (!supportsParamChange) {
2081
- const encoderChannels = this.codecContext.channelLayout.nbChannels;
2323
+ // If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain
2324
+ // constant. The capability and the encoder's channel count are stable after open,
2325
+ // so resolve them once instead of per frame.
2326
+ this.supportsParamChange ??= this.codec.hasCapabilities(AV_CODEC_CAP_PARAM_CHANGE);
2327
+ if (!this.supportsParamChange) {
2328
+ this.encoderChannels ??= this.codecContext.channelLayout.nbChannels;
2082
2329
  const frameChannels = frame.channelLayout?.nbChannels ?? 0;
2083
- if (encoderChannels !== frameChannels) {
2084
- throw new Error(`Audio channel count changed (${encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
2330
+ if (this.encoderChannels !== frameChannels) {
2331
+ throw new Error(`Audio channel count changed (${this.encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
2085
2332
  }
2086
2333
  }
2087
2334
  }