node-av 6.0.0-beta.8 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/api/bitstream-filter.js +0 -2
- package/dist/api/bitstream-filter.js.map +1 -1
- package/dist/api/decoder.d.ts +169 -9
- package/dist/api/decoder.js +351 -43
- package/dist/api/decoder.js.map +1 -1
- package/dist/api/encoder.d.ts +129 -0
- package/dist/api/encoder.js +288 -41
- package/dist/api/encoder.js.map +1 -1
- package/dist/api/filter.js +0 -2
- package/dist/api/filter.js.map +1 -1
- package/dist/api/muxer.d.ts +19 -0
- package/dist/api/muxer.js +44 -24
- package/dist/api/muxer.js.map +1 -1
- package/dist/api/rtp-stream.d.ts +14 -11
- package/dist/api/rtp-stream.js +22 -47
- package/dist/api/rtp-stream.js.map +1 -1
- package/dist/api/scaler.js.map +1 -1
- package/dist/api/utilities/async-queue.js +0 -2
- package/dist/api/utilities/async-queue.js.map +1 -1
- package/dist/api/utilities/codec-format.d.ts +87 -0
- package/dist/api/utilities/codec-format.js +117 -0
- package/dist/api/utilities/codec-format.js.map +1 -0
- package/dist/api/utilities/index.d.ts +1 -0
- package/dist/api/utilities/index.js +2 -0
- package/dist/api/utilities/index.js.map +1 -1
- package/dist/api/utilities/whisper-model.d.ts +20 -0
- package/dist/api/utilities/whisper-model.js +61 -2
- package/dist/api/utilities/whisper-model.js.map +1 -1
- package/dist/constants/bsf-options.d.ts +8 -1
- package/dist/constants/constants.d.ts +2 -0
- package/dist/constants/constants.js +2 -0
- package/dist/constants/constants.js.map +1 -1
- package/dist/constants/format-options.d.ts +4 -0
- package/dist/lib/binding.d.ts +2 -0
- package/dist/lib/binding.js.map +1 -1
- package/dist/lib/frame.d.ts +8 -0
- package/dist/lib/frame.js +10 -0
- package/dist/lib/frame.js.map +1 -1
- package/dist/lib/native-types.d.ts +12 -0
- package/dist/lib/packet.d.ts +8 -0
- package/dist/lib/packet.js +10 -0
- package/dist/lib/packet.js.map +1 -1
- package/dist/lib/utilities.d.ts +45 -0
- package/dist/lib/utilities.js +49 -0
- package/dist/lib/utilities.js.map +1 -1
- package/package.json +16 -16
package/dist/api/encoder.d.ts
CHANGED
|
@@ -90,6 +90,38 @@ export interface EncoderOptions<C = unknown> {
|
|
|
90
90
|
* @default FFmpeg default (both methods, codec chooses best)
|
|
91
91
|
*/
|
|
92
92
|
threadType?: AVThreadType;
|
|
93
|
+
/**
|
|
94
|
+
* Automatically resample incoming audio to a format the codec supports.
|
|
95
|
+
*
|
|
96
|
+
* Audio encoders only accept specific sample rates, sample formats, and channel
|
|
97
|
+
* layouts (e.g. libmp3lame rejects 96 kHz; AAC needs planar `fltp`). When `true`,
|
|
98
|
+
* the encoder transparently converts each frame to the nearest supported
|
|
99
|
+
* sample rate / sample format / channel layout (like `ffmpeg`'s automatic
|
|
100
|
+
* `aresample`). When `false` (default), an unsupported input raises a descriptive
|
|
101
|
+
* error instead — keeping behaviour explicit and 1:1 with the codec.
|
|
102
|
+
*
|
|
103
|
+
* Has no effect on video.
|
|
104
|
+
*
|
|
105
|
+
* @default false
|
|
106
|
+
*/
|
|
107
|
+
autoResample?: boolean;
|
|
108
|
+
/**
|
|
109
|
+
* Automatically convert incoming video to a pixel format the codec supports.
|
|
110
|
+
*
|
|
111
|
+
* Video encoders only accept specific pixel formats (e.g. libx264 wants planar
|
|
112
|
+
* YUV like `yuv420p` and rejects `rgb24`). When `true`, the encoder transparently
|
|
113
|
+
* converts each frame to the least-loss supported pixel format via swscale (like
|
|
114
|
+
* `ffmpeg`'s automatic `format` filter), keeping the same resolution. When `false`
|
|
115
|
+
* (default), an unsupported input raises a descriptive error instead — keeping
|
|
116
|
+
* behaviour explicit and 1:1 with the codec.
|
|
117
|
+
*
|
|
118
|
+
* Resolution is never changed (the encoder already adopts the frame's dimensions),
|
|
119
|
+
* and hardware frames are left untouched (their format is negotiated via the
|
|
120
|
+
* hardware frames context). Has no effect on audio.
|
|
121
|
+
*
|
|
122
|
+
* @default false
|
|
123
|
+
*/
|
|
124
|
+
autoFormat?: boolean;
|
|
93
125
|
/**
|
|
94
126
|
* Additional codec-specific options.
|
|
95
127
|
*
|
|
@@ -174,6 +206,16 @@ export declare class Encoder implements Disposable {
|
|
|
174
206
|
private opts?;
|
|
175
207
|
private options;
|
|
176
208
|
private audioFrameBuffer?;
|
|
209
|
+
private autoResample;
|
|
210
|
+
private audioResampler?;
|
|
211
|
+
private resampledFrame?;
|
|
212
|
+
private audioInputLayout?;
|
|
213
|
+
private autoFormat;
|
|
214
|
+
private videoScaler?;
|
|
215
|
+
private scaledFrame?;
|
|
216
|
+
private videoTargetFormat?;
|
|
217
|
+
private supportsParamChange?;
|
|
218
|
+
private encoderChannels?;
|
|
177
219
|
private inputQueue;
|
|
178
220
|
private outputQueue;
|
|
179
221
|
private workerPromise;
|
|
@@ -1156,6 +1198,93 @@ export declare class Encoder implements Disposable {
|
|
|
1156
1198
|
* @internal
|
|
1157
1199
|
*/
|
|
1158
1200
|
private setupHardwareAcceleration;
|
|
1201
|
+
/**
|
|
1202
|
+
* Configure the codec context's audio parameters from the first frame.
|
|
1203
|
+
*
|
|
1204
|
+
* Audio encoders only accept specific sample rates / sample formats / channel
|
|
1205
|
+
* layouts. This picks codec-supported targets; if they differ from the input it
|
|
1206
|
+
* either sets up a resampler (when `autoResample`) or throws a descriptive error.
|
|
1207
|
+
*
|
|
1208
|
+
* @param frame - First audio frame
|
|
1209
|
+
*
|
|
1210
|
+
* @throws {Error} If the input is unsupported and `autoResample` is disabled
|
|
1211
|
+
*
|
|
1212
|
+
* @throws {FFmpegError} If the resampler fails to configure
|
|
1213
|
+
*
|
|
1214
|
+
* @internal
|
|
1215
|
+
*/
|
|
1216
|
+
private setupAudioParams;
|
|
1217
|
+
/**
|
|
1218
|
+
* Lazily allocate the reused resampler output frame.
|
|
1219
|
+
*
|
|
1220
|
+
* @returns The allocated output frame
|
|
1221
|
+
*
|
|
1222
|
+
* @internal
|
|
1223
|
+
*/
|
|
1224
|
+
private getResampleFrame;
|
|
1225
|
+
/**
|
|
1226
|
+
* Resample an incoming audio frame to the codec's target format.
|
|
1227
|
+
*
|
|
1228
|
+
* Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
|
|
1229
|
+
* The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
|
|
1230
|
+
* reused frame and its carried timing are only relevant on the non-FIFO path.
|
|
1231
|
+
*
|
|
1232
|
+
* @param frame - Source audio frame
|
|
1233
|
+
*
|
|
1234
|
+
* @returns The resampled frame (owned by the encoder, reused across calls)
|
|
1235
|
+
*
|
|
1236
|
+
* @internal
|
|
1237
|
+
*/
|
|
1238
|
+
private resampleAudio;
|
|
1239
|
+
/**
|
|
1240
|
+
* Drain samples buffered inside the resampler (rate-conversion delay) into the
|
|
1241
|
+
* encoder path. Returns the drained frame if any, else null.
|
|
1242
|
+
*
|
|
1243
|
+
* @returns The drained frame (reused), or null when the resampler is empty
|
|
1244
|
+
*
|
|
1245
|
+
* @internal
|
|
1246
|
+
*/
|
|
1247
|
+
private drainResampler;
|
|
1248
|
+
/**
|
|
1249
|
+
* Configure the codec context's pixel format from the first video frame.
|
|
1250
|
+
*
|
|
1251
|
+
* Video encoders only accept specific pixel formats. This keeps the input format
|
|
1252
|
+
* when the codec accepts it; otherwise it either sets up a swscale converter to
|
|
1253
|
+
* the least-loss supported format (when `autoFormat`) or throws a descriptive
|
|
1254
|
+
* error. Hardware frames are left untouched - their format is negotiated through
|
|
1255
|
+
* the hardware frames context, not swscale.
|
|
1256
|
+
*
|
|
1257
|
+
* @param frame - First video frame
|
|
1258
|
+
*
|
|
1259
|
+
* @throws {Error} If the input is unsupported and `autoFormat` is disabled
|
|
1260
|
+
*
|
|
1261
|
+
* @throws {FFmpegError} If the converter fails to configure
|
|
1262
|
+
*
|
|
1263
|
+
* @internal
|
|
1264
|
+
*/
|
|
1265
|
+
private setupVideoFormat;
|
|
1266
|
+
/**
|
|
1267
|
+
* Lazily allocate the reused scaler output frame.
|
|
1268
|
+
*
|
|
1269
|
+
* @returns The allocated output frame
|
|
1270
|
+
*
|
|
1271
|
+
* @internal
|
|
1272
|
+
*/
|
|
1273
|
+
private getScaledFrame;
|
|
1274
|
+
/**
|
|
1275
|
+
* Convert an incoming video frame to the codec's target pixel format.
|
|
1276
|
+
*
|
|
1277
|
+
* Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
|
|
1278
|
+
* Resolution is unchanged - only the pixel format differs. Timing is carried over
|
|
1279
|
+
* explicitly so the encoder's PTS rescale stays correct.
|
|
1280
|
+
*
|
|
1281
|
+
* @param frame - Source video frame
|
|
1282
|
+
*
|
|
1283
|
+
* @returns The converted frame (owned by the encoder, reused across calls)
|
|
1284
|
+
*
|
|
1285
|
+
* @internal
|
|
1286
|
+
*/
|
|
1287
|
+
private scaleVideo;
|
|
1159
1288
|
/**
|
|
1160
1289
|
* Prepare frame for encoding.
|
|
1161
1290
|
*
|
package/dist/api/encoder.js
CHANGED
|
@@ -50,7 +50,8 @@ var __disposeResources = (this && this.__disposeResources) || (function (Suppres
|
|
|
50
50
|
var e = new Error(message);
|
|
51
51
|
return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
|
|
52
52
|
});
|
|
53
|
-
|
|
53
|
+
/* eslint-disable @stylistic/indent-binary-ops */
|
|
54
|
+
import { AV_CHANNEL_ORDER_UNSPEC, AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, AV_CODEC_CAP_PARAM_CHANGE, AV_CODEC_FLAG_COPY_OPAQUE, AV_CODEC_FLAG_FRAME_DURATION, AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, AV_PICTURE_TYPE_NONE, AV_PIX_FMT_NONE, AV_PKT_FLAG_TRUSTED, AVCHROMA_LOC_UNSPECIFIED, AVERROR_EAGAIN, AVERROR_ENCODER_NOT_FOUND, AVERROR_EOF, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO, EOF, SWS_BILINEAR, } from '../constants/constants.js';
|
|
54
55
|
import { CodecContext } from '../lib/codec-context.js';
|
|
55
56
|
import { Codec } from '../lib/codec.js';
|
|
56
57
|
import { Dictionary } from '../lib/dictionary.js';
|
|
@@ -58,10 +59,13 @@ import { FFmpegError } from '../lib/error.js';
|
|
|
58
59
|
import { Frame } from '../lib/frame.js';
|
|
59
60
|
import { Packet } from '../lib/packet.js';
|
|
60
61
|
import { Rational } from '../lib/rational.js';
|
|
61
|
-
import {
|
|
62
|
+
import { SoftwareResampleContext } from '../lib/software-resample-context.js';
|
|
63
|
+
import { SoftwareScaleContext } from '../lib/software-scale-context.js';
|
|
64
|
+
import { avChannelLayoutDefault, avGetPixFmtName, avGetSampleFmtName, avRescaleQ } from '../lib/utilities.js';
|
|
62
65
|
import { AudioFrameBuffer } from './audio-frame-buffer.js';
|
|
63
66
|
import { FRAME_THREAD_QUEUE_SIZE, PACKET_THREAD_QUEUE_SIZE } from './constants.js';
|
|
64
67
|
import { AsyncQueue } from './utilities/async-queue.js';
|
|
68
|
+
import { pickSupportedLayout, pickSupportedPixelFormat, pickSupportedRate, pickSupportedSampleFormat } from './utilities/codec-format.js';
|
|
65
69
|
import { SchedulerControl } from './utilities/scheduler.js';
|
|
66
70
|
import { parseBitrate } from './utils.js';
|
|
67
71
|
/**
|
|
@@ -132,6 +136,16 @@ export class Encoder {
|
|
|
132
136
|
opts;
|
|
133
137
|
options;
|
|
134
138
|
audioFrameBuffer;
|
|
139
|
+
autoResample;
|
|
140
|
+
audioResampler;
|
|
141
|
+
resampledFrame;
|
|
142
|
+
audioInputLayout;
|
|
143
|
+
autoFormat;
|
|
144
|
+
videoScaler;
|
|
145
|
+
scaledFrame;
|
|
146
|
+
videoTargetFormat;
|
|
147
|
+
supportsParamChange;
|
|
148
|
+
encoderChannels;
|
|
135
149
|
// Worker pattern for push-based processing
|
|
136
150
|
inputQueue;
|
|
137
151
|
outputQueue;
|
|
@@ -154,6 +168,8 @@ export class Encoder {
|
|
|
154
168
|
this.codec = codec;
|
|
155
169
|
this.options = options;
|
|
156
170
|
this.opts = opts;
|
|
171
|
+
this.autoResample = options.autoResample ?? false;
|
|
172
|
+
this.autoFormat = options.autoFormat ?? false;
|
|
157
173
|
this.packet = new Packet();
|
|
158
174
|
this.packet.alloc();
|
|
159
175
|
this.inputQueue = new AsyncQueue(FRAME_THREAD_QUEUE_SIZE, (f) => f.free());
|
|
@@ -736,8 +752,15 @@ export class Encoder {
|
|
|
736
752
|
// Open encoder if not already done
|
|
737
753
|
this.initializePromise ??= this.initialize(frame);
|
|
738
754
|
await this.initializePromise;
|
|
755
|
+
// Give an unspecified-layout frame the concrete native layout the codec was
|
|
756
|
+
// opened with (and the resampler configured for), so both accept it.
|
|
757
|
+
if (this.audioInputLayout) {
|
|
758
|
+
frame.channelLayout = this.audioInputLayout;
|
|
759
|
+
}
|
|
760
|
+
// Convert to the codec's format first (audio resample / video pixfmt).
|
|
761
|
+
const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
|
|
739
762
|
// Prepare frame for encoding (set quality, validate channel count)
|
|
740
|
-
this.prepareFrameForEncoding(
|
|
763
|
+
this.prepareFrameForEncoding(input);
|
|
741
764
|
const encode = async (newFrame) => {
|
|
742
765
|
const sendRet = await this.codecContext.sendFrame(newFrame);
|
|
743
766
|
if (sendRet < 0 && sendRet !== AVERROR_EOF) {
|
|
@@ -747,10 +770,10 @@ export class Encoder {
|
|
|
747
770
|
};
|
|
748
771
|
if (this.audioFrameBuffer) {
|
|
749
772
|
// Push frame into buffer - actual sending happens in receive()
|
|
750
|
-
await this.audioFrameBuffer.push(
|
|
773
|
+
await this.audioFrameBuffer.push(input);
|
|
751
774
|
}
|
|
752
775
|
else {
|
|
753
|
-
await encode(
|
|
776
|
+
await encode(input);
|
|
754
777
|
}
|
|
755
778
|
}
|
|
756
779
|
/**
|
|
@@ -804,8 +827,15 @@ export class Encoder {
|
|
|
804
827
|
if (!this.initialized) {
|
|
805
828
|
this.initializeSync(frame);
|
|
806
829
|
}
|
|
830
|
+
// Give an unspecified-layout frame the concrete native layout the codec was
|
|
831
|
+
// opened with (and the resampler configured for), so both accept it.
|
|
832
|
+
if (this.audioInputLayout) {
|
|
833
|
+
frame.channelLayout = this.audioInputLayout;
|
|
834
|
+
}
|
|
835
|
+
// Convert to the codec's format first (audio resample / video pixfmt).
|
|
836
|
+
const input = this.audioResampler ? this.resampleAudio(frame) : this.videoScaler ? this.scaleVideo(frame) : frame;
|
|
807
837
|
// Prepare frame for encoding (set quality, validate channel count)
|
|
808
|
-
this.prepareFrameForEncoding(
|
|
838
|
+
this.prepareFrameForEncoding(input);
|
|
809
839
|
const encode = (newFrame) => {
|
|
810
840
|
const sendRet = this.codecContext.sendFrameSync(newFrame);
|
|
811
841
|
if (sendRet < 0 && sendRet !== AVERROR_EOF) {
|
|
@@ -815,10 +845,10 @@ export class Encoder {
|
|
|
815
845
|
};
|
|
816
846
|
if (this.audioFrameBuffer) {
|
|
817
847
|
// Push frame into buffer - actual sending happens in receiveSync()
|
|
818
|
-
this.audioFrameBuffer.pushSync(
|
|
848
|
+
this.audioFrameBuffer.pushSync(input);
|
|
819
849
|
}
|
|
820
850
|
else {
|
|
821
|
-
encode(
|
|
851
|
+
encode(input);
|
|
822
852
|
}
|
|
823
853
|
}
|
|
824
854
|
/**
|
|
@@ -1183,6 +1213,16 @@ export class Encoder {
|
|
|
1183
1213
|
if (this.isClosed || !this.initialized) {
|
|
1184
1214
|
return;
|
|
1185
1215
|
}
|
|
1216
|
+
// Drain samples buffered inside the resampler into the FIFO/encoder first.
|
|
1217
|
+
const drained = this.drainResampler();
|
|
1218
|
+
if (drained) {
|
|
1219
|
+
if (this.audioFrameBuffer) {
|
|
1220
|
+
await this.audioFrameBuffer.push(drained);
|
|
1221
|
+
}
|
|
1222
|
+
else {
|
|
1223
|
+
await this.codecContext.sendFrame(drained);
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1186
1226
|
// If using AudioFrameBuffer, flush remaining buffered samples first
|
|
1187
1227
|
if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
|
|
1188
1228
|
// Pull any remaining partial frame (may be less than frameSize)
|
|
@@ -1243,6 +1283,16 @@ export class Encoder {
|
|
|
1243
1283
|
if (this.isClosed || !this.initialized) {
|
|
1244
1284
|
return;
|
|
1245
1285
|
}
|
|
1286
|
+
// Drain samples buffered inside the resampler into the FIFO/encoder first.
|
|
1287
|
+
const drained = this.drainResampler();
|
|
1288
|
+
if (drained) {
|
|
1289
|
+
if (this.audioFrameBuffer) {
|
|
1290
|
+
this.audioFrameBuffer.pushSync(drained);
|
|
1291
|
+
}
|
|
1292
|
+
else {
|
|
1293
|
+
this.codecContext.sendFrameSync(drained);
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1246
1296
|
// If using AudioFrameBuffer, flush remaining buffered samples first
|
|
1247
1297
|
if (this.audioFrameBuffer && this.audioFrameBuffer.size > 0) {
|
|
1248
1298
|
// Pull any remaining partial frame (may be less than frameSize)
|
|
@@ -1602,18 +1652,22 @@ export class Encoder {
|
|
|
1602
1652
|
return;
|
|
1603
1653
|
}
|
|
1604
1654
|
this.isClosed = true;
|
|
1605
|
-
// Close queues
|
|
1606
1655
|
this.inputQueue.close();
|
|
1607
1656
|
this.outputQueue.close();
|
|
1608
|
-
// Free any frames/packets left buffered on an aborted/early-closed pipeline.
|
|
1609
1657
|
this.inputQueue.clear();
|
|
1610
1658
|
this.outputQueue.clear();
|
|
1611
1659
|
this.packet.free();
|
|
1612
|
-
this.codecContext.freeContext();
|
|
1613
|
-
// Release the audio frame buffer (owns a native Frame + AudioFifo) used by
|
|
1614
|
-
// fixed-frame-size audio encoders.
|
|
1615
1660
|
this.audioFrameBuffer?.[Symbol.dispose]();
|
|
1616
1661
|
this.audioFrameBuffer = undefined;
|
|
1662
|
+
this.audioResampler?.[Symbol.dispose]();
|
|
1663
|
+
this.audioResampler = undefined;
|
|
1664
|
+
this.resampledFrame?.free();
|
|
1665
|
+
this.resampledFrame = undefined;
|
|
1666
|
+
this.videoScaler?.[Symbol.dispose]();
|
|
1667
|
+
this.videoScaler = undefined;
|
|
1668
|
+
this.scaledFrame?.free();
|
|
1669
|
+
this.scaledFrame = undefined;
|
|
1670
|
+
this.codecContext.freeContext();
|
|
1617
1671
|
this.initialized = false;
|
|
1618
1672
|
}
|
|
1619
1673
|
/**
|
|
@@ -1806,7 +1860,8 @@ export class Encoder {
|
|
|
1806
1860
|
}
|
|
1807
1861
|
this.codecContext.width = frame.width;
|
|
1808
1862
|
this.codecContext.height = frame.height;
|
|
1809
|
-
|
|
1863
|
+
// Pick a codec-supported pixel format (converting on demand when autoFormat).
|
|
1864
|
+
this.setupVideoFormat(frame);
|
|
1810
1865
|
this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
|
|
1811
1866
|
this.codecContext.colorRange = frame.colorRange;
|
|
1812
1867
|
this.codecContext.colorPrimaries = frame.colorPrimaries;
|
|
@@ -1818,12 +1873,8 @@ export class Encoder {
|
|
|
1818
1873
|
}
|
|
1819
1874
|
}
|
|
1820
1875
|
else {
|
|
1821
|
-
// Audio:
|
|
1822
|
-
|
|
1823
|
-
this.codecContext.timeBase = frame.timeBase;
|
|
1824
|
-
this.codecContext.sampleRate = frame.sampleRate;
|
|
1825
|
-
this.codecContext.sampleFormat = frame.format;
|
|
1826
|
-
this.codecContext.channelLayout = frame.channelLayout;
|
|
1876
|
+
// Audio: pick codec-supported sample rate/format/layout (resampling on demand).
|
|
1877
|
+
this.setupAudioParams(frame);
|
|
1827
1878
|
}
|
|
1828
1879
|
// Setup hardware acceleration with validation
|
|
1829
1880
|
this.setupHardwareAcceleration(frame);
|
|
@@ -1899,7 +1950,8 @@ export class Encoder {
|
|
|
1899
1950
|
}
|
|
1900
1951
|
this.codecContext.width = frame.width;
|
|
1901
1952
|
this.codecContext.height = frame.height;
|
|
1902
|
-
|
|
1953
|
+
// Pick a codec-supported pixel format (converting on demand when autoFormat).
|
|
1954
|
+
this.setupVideoFormat(frame);
|
|
1903
1955
|
this.codecContext.sampleAspectRatio = frame.sampleAspectRatio;
|
|
1904
1956
|
this.codecContext.colorRange = frame.colorRange;
|
|
1905
1957
|
this.codecContext.colorPrimaries = frame.colorPrimaries;
|
|
@@ -1911,12 +1963,8 @@ export class Encoder {
|
|
|
1911
1963
|
}
|
|
1912
1964
|
}
|
|
1913
1965
|
else {
|
|
1914
|
-
// Audio:
|
|
1915
|
-
|
|
1916
|
-
this.codecContext.timeBase = frame.timeBase;
|
|
1917
|
-
this.codecContext.sampleRate = frame.sampleRate;
|
|
1918
|
-
this.codecContext.sampleFormat = frame.format;
|
|
1919
|
-
this.codecContext.channelLayout = frame.channelLayout;
|
|
1966
|
+
// Audio: pick codec-supported sample rate/format/layout (resampling on demand).
|
|
1967
|
+
this.setupAudioParams(frame);
|
|
1920
1968
|
}
|
|
1921
1969
|
// Setup hardware acceleration with validation
|
|
1922
1970
|
this.setupHardwareAcceleration(frame);
|
|
@@ -2009,6 +2057,199 @@ export class Encoder {
|
|
|
2009
2057
|
}
|
|
2010
2058
|
}
|
|
2011
2059
|
}
|
|
2060
|
+
/**
|
|
2061
|
+
* Configure the codec context's audio parameters from the first frame.
|
|
2062
|
+
*
|
|
2063
|
+
* Audio encoders only accept specific sample rates / sample formats / channel
|
|
2064
|
+
* layouts. This picks codec-supported targets; if they differ from the input it
|
|
2065
|
+
* either sets up a resampler (when `autoResample`) or throws a descriptive error.
|
|
2066
|
+
*
|
|
2067
|
+
* @param frame - First audio frame
|
|
2068
|
+
*
|
|
2069
|
+
* @throws {Error} If the input is unsupported and `autoResample` is disabled
|
|
2070
|
+
*
|
|
2071
|
+
* @throws {FFmpegError} If the resampler fails to configure
|
|
2072
|
+
*
|
|
2073
|
+
* @internal
|
|
2074
|
+
*/
|
|
2075
|
+
setupAudioParams(frame) {
|
|
2076
|
+
// Always use frame timebase (typically 1/sample_rate) for correct audio PTS.
|
|
2077
|
+
this.codecContext.timeBase = frame.timeBase;
|
|
2078
|
+
const inRate = frame.sampleRate;
|
|
2079
|
+
const inFmt = frame.format;
|
|
2080
|
+
// Codec open and swr both need a concrete layout. PCM/raw frames often carry
|
|
2081
|
+
// an unspecified layout (order UNSPEC, mask 0); normalize it to the canonical
|
|
2082
|
+
// native layout and re-apply it to each incoming frame (see encode()) so it
|
|
2083
|
+
// matches the opened codec context / resampler input.
|
|
2084
|
+
let inLayout = frame.channelLayout;
|
|
2085
|
+
if (inLayout.order === AV_CHANNEL_ORDER_UNSPEC) {
|
|
2086
|
+
inLayout = avChannelLayoutDefault(inLayout.nbChannels);
|
|
2087
|
+
this.audioInputLayout = inLayout;
|
|
2088
|
+
}
|
|
2089
|
+
const targetRate = pickSupportedRate(inRate, this.codec.supportedSamplerates);
|
|
2090
|
+
const targetFmt = pickSupportedSampleFormat(inFmt, this.codec.sampleFormats);
|
|
2091
|
+
const targetLayout = pickSupportedLayout(inLayout, this.codec.channelLayouts);
|
|
2092
|
+
const needsResample = targetRate !== inRate || targetFmt !== inFmt || targetLayout.nbChannels !== inLayout.nbChannels;
|
|
2093
|
+
if (needsResample && !this.autoResample) {
|
|
2094
|
+
const rates = this.codec.supportedSamplerates;
|
|
2095
|
+
throw new Error(`Encoder '${this.codec.name}' does not support the input audio format ` +
|
|
2096
|
+
`(${inRate} Hz, ${avGetSampleFmtName(inFmt) ?? inFmt}, ${inLayout.nbChannels}ch)` +
|
|
2097
|
+
(rates && rates.length > 0 ? `. Supported sample rates: ${rates.join(', ')}` : '') +
|
|
2098
|
+
'. Set { autoResample: true } on the encoder, or convert the input with an aresample/aformat filter first.');
|
|
2099
|
+
}
|
|
2100
|
+
this.codecContext.sampleRate = targetRate;
|
|
2101
|
+
this.codecContext.sampleFormat = targetFmt;
|
|
2102
|
+
this.codecContext.channelLayout = targetLayout;
|
|
2103
|
+
if (needsResample) {
|
|
2104
|
+
const swr = new SoftwareResampleContext();
|
|
2105
|
+
FFmpegError.throwIfError(swr.allocSetOpts2(targetLayout, targetFmt, targetRate, inLayout, inFmt, inRate), 'Failed to configure audio resampler');
|
|
2106
|
+
FFmpegError.throwIfError(swr.init(), 'Failed to initialize audio resampler');
|
|
2107
|
+
this.audioResampler = swr;
|
|
2108
|
+
}
|
|
2109
|
+
}
|
|
2110
|
+
/**
|
|
2111
|
+
* Lazily allocate the reused resampler output frame.
|
|
2112
|
+
*
|
|
2113
|
+
* @returns The allocated output frame
|
|
2114
|
+
*
|
|
2115
|
+
* @internal
|
|
2116
|
+
*/
|
|
2117
|
+
getResampleFrame() {
|
|
2118
|
+
if (!this.resampledFrame) {
|
|
2119
|
+
this.resampledFrame = new Frame();
|
|
2120
|
+
this.resampledFrame.alloc();
|
|
2121
|
+
}
|
|
2122
|
+
return this.resampledFrame;
|
|
2123
|
+
}
|
|
2124
|
+
/**
|
|
2125
|
+
* Resample an incoming audio frame to the codec's target format.
|
|
2126
|
+
*
|
|
2127
|
+
* Reuses a single output frame; `swr_convert_frame` allocates/sizes its buffer.
|
|
2128
|
+
* The (fixed-frame-size) audio FIFO copies the samples and re-stamps PTS, so the
|
|
2129
|
+
* reused frame and its carried timing are only relevant on the non-FIFO path.
|
|
2130
|
+
*
|
|
2131
|
+
* @param frame - Source audio frame
|
|
2132
|
+
*
|
|
2133
|
+
* @returns The resampled frame (owned by the encoder, reused across calls)
|
|
2134
|
+
*
|
|
2135
|
+
* @internal
|
|
2136
|
+
*/
|
|
2137
|
+
resampleAudio(frame) {
|
|
2138
|
+
const out = this.getResampleFrame();
|
|
2139
|
+
out.unref();
|
|
2140
|
+
out.format = this.codecContext.sampleFormat;
|
|
2141
|
+
out.sampleRate = this.codecContext.sampleRate;
|
|
2142
|
+
out.channelLayout = this.codecContext.channelLayout;
|
|
2143
|
+
FFmpegError.throwIfError(this.audioResampler.convertFrame(out, frame), 'Failed to resample audio frame');
|
|
2144
|
+
out.timeBase = frame.timeBase;
|
|
2145
|
+
out.pts = frame.pts;
|
|
2146
|
+
return out;
|
|
2147
|
+
}
|
|
2148
|
+
/**
|
|
2149
|
+
* Drain samples buffered inside the resampler (rate-conversion delay) into the
|
|
2150
|
+
* encoder path. Returns the drained frame if any, else null.
|
|
2151
|
+
*
|
|
2152
|
+
* @returns The drained frame (reused), or null when the resampler is empty
|
|
2153
|
+
*
|
|
2154
|
+
* @internal
|
|
2155
|
+
*/
|
|
2156
|
+
drainResampler() {
|
|
2157
|
+
if (!this.audioResampler) {
|
|
2158
|
+
return null;
|
|
2159
|
+
}
|
|
2160
|
+
const out = this.getResampleFrame();
|
|
2161
|
+
out.unref();
|
|
2162
|
+
out.format = this.codecContext.sampleFormat;
|
|
2163
|
+
out.sampleRate = this.codecContext.sampleRate;
|
|
2164
|
+
out.channelLayout = this.codecContext.channelLayout;
|
|
2165
|
+
const ret = this.audioResampler.convertFrame(out, null);
|
|
2166
|
+
if (ret < 0 || out.nbSamples <= 0) {
|
|
2167
|
+
return null;
|
|
2168
|
+
}
|
|
2169
|
+
return out;
|
|
2170
|
+
}
|
|
2171
|
+
/**
|
|
2172
|
+
* Configure the codec context's pixel format from the first video frame.
|
|
2173
|
+
*
|
|
2174
|
+
* Video encoders only accept specific pixel formats. This keeps the input format
|
|
2175
|
+
* when the codec accepts it; otherwise it either sets up a swscale converter to
|
|
2176
|
+
* the least-loss supported format (when `autoFormat`) or throws a descriptive
|
|
2177
|
+
* error. Hardware frames are left untouched - their format is negotiated through
|
|
2178
|
+
* the hardware frames context, not swscale.
|
|
2179
|
+
*
|
|
2180
|
+
* @param frame - First video frame
|
|
2181
|
+
*
|
|
2182
|
+
* @throws {Error} If the input is unsupported and `autoFormat` is disabled
|
|
2183
|
+
*
|
|
2184
|
+
* @throws {FFmpegError} If the converter fails to configure
|
|
2185
|
+
*
|
|
2186
|
+
* @internal
|
|
2187
|
+
*/
|
|
2188
|
+
setupVideoFormat(frame) {
|
|
2189
|
+
const inFmt = frame.format;
|
|
2190
|
+
// Hardware frames carry a hw pixfmt negotiated via hw_frames_ctx; swscale can't
|
|
2191
|
+
// touch them - leave the format untouched.
|
|
2192
|
+
if (frame.isHwFrame()) {
|
|
2193
|
+
this.codecContext.pixelFormat = inFmt;
|
|
2194
|
+
return;
|
|
2195
|
+
}
|
|
2196
|
+
const targetFmt = pickSupportedPixelFormat(inFmt, this.codec.pixelFormats);
|
|
2197
|
+
const needsConversion = targetFmt !== inFmt;
|
|
2198
|
+
if (needsConversion && !this.autoFormat) {
|
|
2199
|
+
const supported = this.codec.pixelFormats;
|
|
2200
|
+
throw new Error(`Encoder '${this.codec.name}' does not support the input pixel format ` +
|
|
2201
|
+
`(${avGetPixFmtName(inFmt) ?? inFmt}). Supported: ${supported.map((f) => avGetPixFmtName(f) ?? f).join(', ')}` +
|
|
2202
|
+
'. Set { autoFormat: true } on the encoder, or convert the input with a scale/format filter first.');
|
|
2203
|
+
}
|
|
2204
|
+
this.codecContext.pixelFormat = targetFmt;
|
|
2205
|
+
// Set up a same-size swscale converter when the codec needs a different format.
|
|
2206
|
+
if (needsConversion) {
|
|
2207
|
+
this.videoTargetFormat = targetFmt;
|
|
2208
|
+
const sws = new SoftwareScaleContext();
|
|
2209
|
+
sws.getContext(frame.width, frame.height, inFmt, frame.width, frame.height, targetFmt, SWS_BILINEAR);
|
|
2210
|
+
FFmpegError.throwIfError(sws.initContext(), 'Failed to configure pixel-format converter');
|
|
2211
|
+
this.videoScaler = sws;
|
|
2212
|
+
}
|
|
2213
|
+
}
|
|
2214
|
+
/**
|
|
2215
|
+
* Lazily allocate the reused scaler output frame.
|
|
2216
|
+
*
|
|
2217
|
+
* @returns The allocated output frame
|
|
2218
|
+
*
|
|
2219
|
+
* @internal
|
|
2220
|
+
*/
|
|
2221
|
+
getScaledFrame() {
|
|
2222
|
+
if (!this.scaledFrame) {
|
|
2223
|
+
this.scaledFrame = new Frame();
|
|
2224
|
+
this.scaledFrame.alloc();
|
|
2225
|
+
}
|
|
2226
|
+
return this.scaledFrame;
|
|
2227
|
+
}
|
|
2228
|
+
/**
|
|
2229
|
+
* Convert an incoming video frame to the codec's target pixel format.
|
|
2230
|
+
*
|
|
2231
|
+
* Reuses a single output frame; `sws_scale_frame` allocates/sizes its buffer.
|
|
2232
|
+
* Resolution is unchanged - only the pixel format differs. Timing is carried over
|
|
2233
|
+
* explicitly so the encoder's PTS rescale stays correct.
|
|
2234
|
+
*
|
|
2235
|
+
* @param frame - Source video frame
|
|
2236
|
+
*
|
|
2237
|
+
* @returns The converted frame (owned by the encoder, reused across calls)
|
|
2238
|
+
*
|
|
2239
|
+
* @internal
|
|
2240
|
+
*/
|
|
2241
|
+
scaleVideo(frame) {
|
|
2242
|
+
const out = this.getScaledFrame();
|
|
2243
|
+
out.unref();
|
|
2244
|
+
out.format = this.videoTargetFormat;
|
|
2245
|
+
out.width = frame.width;
|
|
2246
|
+
out.height = frame.height;
|
|
2247
|
+
FFmpegError.throwIfError(this.videoScaler.scaleFrameSync(out, frame), 'Failed to convert video frame format');
|
|
2248
|
+
out.timeBase = frame.timeBase;
|
|
2249
|
+
out.pts = frame.pts;
|
|
2250
|
+
out.duration = frame.duration;
|
|
2251
|
+
return out;
|
|
2252
|
+
}
|
|
2012
2253
|
/**
|
|
2013
2254
|
* Prepare frame for encoding.
|
|
2014
2255
|
*
|
|
@@ -2038,24 +2279,26 @@ export class Encoder {
|
|
|
2038
2279
|
// - Audio: frame.timeBase from first frame (typically 1/sample_rate)
|
|
2039
2280
|
const encoderTimebase = this.codecContext.timeBase;
|
|
2040
2281
|
const oldTimebase = frame.timeBase;
|
|
2282
|
+
const pts = frame.pts;
|
|
2283
|
+
const duration = frame.duration;
|
|
2041
2284
|
// IMPORTANT: Calculate duration BEFORE converting frame timebase
|
|
2042
2285
|
// This matches FFmpeg's video_sync_process() which calculates:
|
|
2043
2286
|
// duration = frame->duration * av_q2d(frame->time_base) / av_q2d(ofp->tb_out)
|
|
2044
2287
|
// We need the OLD timebase to convert duration properly
|
|
2045
2288
|
let frameDuration;
|
|
2046
|
-
if (
|
|
2289
|
+
if (duration && duration > 0n) {
|
|
2047
2290
|
// Convert duration from frame timebase to encoder timebase
|
|
2048
2291
|
// This ensures encoder gets correct frame duration for timestamps
|
|
2049
|
-
frameDuration = avRescaleQ(
|
|
2292
|
+
frameDuration = avRescaleQ(duration, oldTimebase, encoderTimebase);
|
|
2050
2293
|
}
|
|
2051
2294
|
else {
|
|
2052
2295
|
// Default to 1 (constant frame rate behavior)
|
|
2053
2296
|
// Matches FFmpeg's CFR mode: frame->duration = 1
|
|
2054
2297
|
frameDuration = 1n;
|
|
2055
2298
|
}
|
|
2056
|
-
if (
|
|
2299
|
+
if (pts !== null && pts !== undefined) {
|
|
2057
2300
|
// Convert PTS to encoder timebase
|
|
2058
|
-
frame.pts = avRescaleQ(
|
|
2301
|
+
frame.pts = avRescaleQ(pts, oldTimebase, encoderTimebase);
|
|
2059
2302
|
// IMPORTANT: Set frame timebase to encoder timebase
|
|
2060
2303
|
// FFmpeg does this in adjust_frame_pts_to_encoder_tb(): frame->time_base = tb_dst
|
|
2061
2304
|
// This ensures encoder gets frames with correct timebase (1/framerate for video, 1/sample_rate for audio)
|
|
@@ -2066,22 +2309,26 @@ export class Encoder {
|
|
|
2066
2309
|
// based on vsync_method (CFR: 1, VFR: calculated, PASSTHROUGH: calculated)
|
|
2067
2310
|
// Since we don't have automatic filter like FFmpeg, we always set it here
|
|
2068
2311
|
frame.duration = frameDuration;
|
|
2069
|
-
|
|
2312
|
+
const codecType = this.codecContext.codecType;
|
|
2313
|
+
if (codecType === AVMEDIA_TYPE_VIDEO) {
|
|
2070
2314
|
// Video: Set frame quality from encoder's global quality
|
|
2071
2315
|
// Only set if encoder has globalQuality configured and frame doesn't already have quality set
|
|
2072
|
-
|
|
2073
|
-
|
|
2316
|
+
const globalQuality = this.codecContext.globalQuality;
|
|
2317
|
+
if (globalQuality > 0 && frame.quality <= 0) {
|
|
2318
|
+
frame.quality = globalQuality;
|
|
2074
2319
|
}
|
|
2075
2320
|
}
|
|
2076
|
-
else if (
|
|
2321
|
+
else if (codecType === AVMEDIA_TYPE_AUDIO) {
|
|
2077
2322
|
// Audio: Validate channel count consistency
|
|
2078
|
-
// If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2323
|
+
// If encoder doesn't support AV_CODEC_CAP_PARAM_CHANGE, channel count must remain
|
|
2324
|
+
// constant. The capability and the encoder's channel count are stable after open,
|
|
2325
|
+
// so resolve them once instead of per frame.
|
|
2326
|
+
this.supportsParamChange ??= this.codec.hasCapabilities(AV_CODEC_CAP_PARAM_CHANGE);
|
|
2327
|
+
if (!this.supportsParamChange) {
|
|
2328
|
+
this.encoderChannels ??= this.codecContext.channelLayout.nbChannels;
|
|
2082
2329
|
const frameChannels = frame.channelLayout?.nbChannels ?? 0;
|
|
2083
|
-
if (encoderChannels !== frameChannels) {
|
|
2084
|
-
throw new Error(`Audio channel count changed (${encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
|
|
2330
|
+
if (this.encoderChannels !== frameChannels) {
|
|
2331
|
+
throw new Error(`Audio channel count changed (${this.encoderChannels} -> ${frameChannels}) and encoder '${this.codec.name}' does not support parameter changes`);
|
|
2085
2332
|
}
|
|
2086
2333
|
}
|
|
2087
2334
|
}
|