@invintusmedia/tomp4 1.4.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,502 +0,0 @@
1
- /**
2
- * H.264 I-Frame Encoder (Baseline Profile, CAVLC)
3
- *
4
- * Encodes a single YUV frame as an H.264 IDR (keyframe) using
5
- * Baseline profile with CAVLC entropy coding. Produces SPS, PPS,
6
- * and IDR slice NAL units.
7
- *
8
- * Used by the HLS clipper for smart-rendering: the decoded frame
9
- * at the clip start is re-encoded as a new keyframe.
10
- *
11
- * @module codecs/h264-encoder
12
- */
13
-
14
- import { forwardDCT4x4, forwardHadamard4x4, forwardHadamard2x2, quantize4x4, clip255 } from './h264-transform.js';
15
- import { scanOrder4x4 } from './h264-tables.js';
16
- import { getCoeffToken, getTotalZeros, getTotalZerosChromaDC, getRunBefore, encodeLevels } from './h264-cavlc-tables.js';
17
-
18
- // ── Bitstream Writer ──────────────────────────────────────
19
-
20
- class BitstreamWriter {
21
- constructor(capacity = 65536) {
22
- this.data = new Uint8Array(capacity);
23
- this.bytePos = 0;
24
- this.bitPos = 0; // bits written in current byte (0-7)
25
- }
26
-
27
- _grow() {
28
- const newData = new Uint8Array(this.data.length * 2);
29
- newData.set(this.data);
30
- this.data = newData;
31
- }
32
-
33
- writeBit(bit) {
34
- if (this.bytePos >= this.data.length) this._grow();
35
- this.data[this.bytePos] |= (bit & 1) << (7 - this.bitPos);
36
- this.bitPos++;
37
- if (this.bitPos === 8) {
38
- this.bitPos = 0;
39
- this.bytePos++;
40
- }
41
- }
42
-
43
- writeBits(value, n) {
44
- for (let i = n - 1; i >= 0; i--) {
45
- this.writeBit((value >> i) & 1);
46
- }
47
- }
48
-
49
- /** Unsigned Exp-Golomb */
50
- writeUE(value) {
51
- if (value === 0) {
52
- this.writeBit(1);
53
- return;
54
- }
55
- const val = value + 1;
56
- const numBits = 32 - Math.clz32(val);
57
- const zeros = numBits - 1;
58
- for (let i = 0; i < zeros; i++) this.writeBit(0);
59
- this.writeBits(val, numBits);
60
- }
61
-
62
- /** Signed Exp-Golomb */
63
- writeSE(value) {
64
- if (value === 0) { this.writeUE(0); return; }
65
- this.writeUE(value > 0 ? 2 * value - 1 : -2 * value);
66
- }
67
-
68
- /** Write RBSP trailing bits (stop bit + alignment) */
69
- writeTrailingBits() {
70
- this.writeBit(1);
71
- while (this.bitPos !== 0) this.writeBit(0);
72
- }
73
-
74
- /** Get the written data as Uint8Array */
75
- toUint8Array() {
76
- const len = this.bitPos > 0 ? this.bytePos + 1 : this.bytePos;
77
- return this.data.slice(0, len);
78
- }
79
- }
80
-
81
- // ── Emulation Prevention ──────────────────────────────────
82
-
83
- function addEmulationPrevention(rbsp) {
84
- const result = [];
85
- for (let i = 0; i < rbsp.length; i++) {
86
- if (i >= 2 && rbsp[i - 2] === 0 && rbsp[i - 1] === 0 && rbsp[i] <= 3) {
87
- result.push(0x03); // emulation prevention byte
88
- }
89
- result.push(rbsp[i]);
90
- }
91
- return new Uint8Array(result);
92
- }
93
-
94
- // ── CAVLC Tables ──────────────────────────────────────────
95
-
96
- // coeff_token VLC tables (Table 9-5)
97
- // Indexed by [nC_range][TotalCoeff][TrailingOnes] → [code, length]
98
- // nC_range: 0 (0-1), 1 (2-3), 2 (4-7), 3 (8+)
99
- // This is a large table; we include the most common entries.
100
- // Format: cavlcCoeffToken[nC][totalCoeff][trailingOnes] = [code, codelen]
101
-
102
- function buildCoeffTokenTable() {
103
- // Table 9-5(a): 0 <= nC < 2
104
- const t0 = [];
105
- // [totalCoeff][trailingOnes] = [code, bits]
106
- t0[0] = [[1, 1]]; // (0,0)
107
- t0[1] = [[5, 6], [1, 2]]; // (1,0), (1,1)
108
- t0[2] = [[7, 8], [4, 6], [1, 3]]; // (2,0), (2,1), (2,2)
109
- t0[3] = [[7, 9], [6, 8], [5, 7], [3, 5]]; // (3,0)...(3,3)
110
- t0[4] = [[7, 10], [6, 9], [5, 8], [3, 6]];
111
- t0[5] = [[7, 11], [6, 10], [5, 9], [4, 7]];
112
- t0[6] = [[15, 13], [6, 11], [5, 10], [4, 8]];
113
- t0[7] = [[11, 13], [14, 13], [5, 11], [4, 9]];
114
- t0[8] = [[8, 13], [10, 13], [13, 13], [4, 10]];
115
- t0[9] = [[15, 14], [14, 14], [9, 13], [4, 11]];
116
- t0[10] = [[11, 14], [10, 14], [13, 14], [12, 13]];
117
- t0[11] = [[15, 15], [14, 15], [9, 14], [12, 14]];
118
- t0[12] = [[11, 15], [10, 15], [13, 15], [8, 14]];
119
- t0[13] = [[15, 16], [1, 15], [9, 15], [12, 15]];
120
- t0[14] = [[11, 16], [14, 16], [13, 16], [8, 15]];
121
- t0[15] = [[7, 16], [10, 16], [9, 16], [12, 16]];
122
- t0[16] = [[4, 16], [6, 16], [5, 16], [8, 16]];
123
-
124
- // Table 9-5(b): 2 <= nC < 4
125
- const t1 = [];
126
- t1[0] = [[3, 2]];
127
- t1[1] = [[11, 6], [2, 2]];
128
- t1[2] = [[7, 6], [7, 5], [3, 3]];
129
- t1[3] = [[7, 7], [10, 6], [9, 6], [5, 4]];
130
- t1[4] = [[7, 8], [6, 6], [5, 6], [4, 4]];
131
- t1[5] = [[4, 8], [6, 7], [5, 7], [6, 5]];
132
- t1[6] = [[7, 9], [6, 8], [5, 8], [8, 6]];
133
- t1[7] = [[15, 11], [6, 9], [5, 9], [4, 6]];
134
- t1[8] = [[11, 11], [14, 11], [13, 11], [4, 7]];
135
- t1[9] = [[15, 12], [10, 11], [9, 11], [4, 8]];
136
- t1[10] = [[11, 12], [14, 12], [13, 12], [12, 11]];
137
- t1[11] = [[8, 12], [10, 12], [9, 12], [8, 11]];
138
- t1[12] = [[15, 13], [14, 13], [13, 13], [12, 12]];
139
- t1[13] = [[11, 13], [10, 13], [9, 13], [12, 13]];
140
- t1[14] = [[7, 13], [11, 14], [6, 13], [8, 13]];
141
- t1[15] = [[9, 14], [8, 14], [10, 14], [1, 13]];
142
- t1[16] = [[7, 14], [6, 14], [5, 14], [4, 14]];
143
-
144
- return [t0, t1];
145
- }
146
-
147
- const CAVLC_COEFF_TOKEN = buildCoeffTokenTable();
148
-
149
- // ── Intra 16x16 Prediction for Encoder ────────────────────
150
-
151
- function predictDC16x16(above, left, hasAbove, hasLeft) {
152
- let sum = 0, count = 0;
153
- if (hasAbove) { for (let i = 0; i < 16; i++) sum += above[i]; count += 16; }
154
- if (hasLeft) { for (let i = 0; i < 16; i++) sum += left[i]; count += 16; }
155
- return count > 0 ? (sum + (count >> 1)) / count | 0 : 128;
156
- }
157
-
158
- // ── H.264 I-Frame Encoder ─────────────────────────────────
159
-
160
- export class H264Encoder {
161
- /**
162
- * Encode a YUV frame as H.264 IDR NAL units.
163
- *
164
- * @param {Uint8Array} Y - Luma plane (width * height)
165
- * @param {Uint8Array} U - Chroma U plane ((width/2) * (height/2))
166
- * @param {Uint8Array} V - Chroma V plane ((width/2) * (height/2))
167
- * @param {number} width - Frame width (must be multiple of 16)
168
- * @param {number} height - Frame height (must be multiple of 16)
169
- * @param {number} [qp=26] - Quantization parameter (0-51, lower = better quality)
170
- * @returns {Array<Uint8Array>} Array of NAL units [SPS, PPS, IDR]
171
- */
172
- encode(Y, U, V, width, height, qp = 26) {
173
- const mbW = width >> 4;
174
- const mbH = height >> 4;
175
-
176
- const sps = this._buildSPS(width, height);
177
- const pps = this._buildPPS();
178
- const idr = this._buildIDRSlice(Y, U, V, width, height, mbW, mbH, qp);
179
-
180
- return [sps, pps, idr];
181
- }
182
-
183
- // ── SPS (Baseline Profile) ──────────────────────────────
184
-
185
- _buildSPS(width, height) {
186
- const mbW = width >> 4;
187
- const mbH = height >> 4;
188
- const bs = new BitstreamWriter(64);
189
-
190
- // NAL header: forbidden_zero_bit=0, nal_ref_idc=3, nal_unit_type=7
191
- bs.writeBits(0x67, 8);
192
-
193
- // profile_idc=66 (Baseline)
194
- bs.writeBits(66, 8);
195
- // constraint_set0_flag=1, rest=0, reserved=0
196
- bs.writeBits(0x40, 8);
197
- // level_idc=40 (4.0)
198
- bs.writeBits(40, 8);
199
- // seq_parameter_set_id=0
200
- bs.writeUE(0);
201
- // log2_max_frame_num_minus4=0
202
- bs.writeUE(0);
203
- // pic_order_cnt_type=0
204
- bs.writeUE(0);
205
- // log2_max_pic_order_cnt_lsb_minus4=0
206
- bs.writeUE(0);
207
- // max_num_ref_frames=0 (I-only)
208
- bs.writeUE(0);
209
- // gaps_in_frame_num_value_allowed_flag=0
210
- bs.writeBit(0);
211
- // pic_width_in_mbs_minus1
212
- bs.writeUE(mbW - 1);
213
- // pic_height_in_map_units_minus1
214
- bs.writeUE(mbH - 1);
215
- // frame_mbs_only_flag=1
216
- bs.writeBit(1);
217
- // direct_8x8_inference_flag=0
218
- bs.writeBit(0);
219
- // frame_cropping_flag=0
220
- bs.writeBit(0);
221
- // vui_parameters_present_flag=0
222
- bs.writeBit(0);
223
-
224
- bs.writeTrailingBits();
225
- return addEmulationPrevention(bs.toUint8Array());
226
- }
227
-
228
- // ── PPS ─────────────────────────────────────────────────
229
-
230
- _buildPPS() {
231
- const bs = new BitstreamWriter(32);
232
-
233
- // NAL header: nal_ref_idc=3, nal_unit_type=8
234
- bs.writeBits(0x68, 8);
235
-
236
- // pic_parameter_set_id=0
237
- bs.writeUE(0);
238
- // seq_parameter_set_id=0
239
- bs.writeUE(0);
240
- // entropy_coding_mode_flag=0 (CAVLC)
241
- bs.writeBit(0);
242
- // bottom_field_pic_order_in_frame_present_flag=0
243
- bs.writeBit(0);
244
- // num_slice_groups_minus1=0
245
- bs.writeUE(0);
246
- // num_ref_idx_l0_default_active_minus1=0
247
- bs.writeUE(0);
248
- // num_ref_idx_l1_default_active_minus1=0
249
- bs.writeUE(0);
250
- // weighted_pred_flag=0
251
- bs.writeBit(0);
252
- // weighted_bipred_idc=0
253
- bs.writeBits(0, 2);
254
- // pic_init_qp_minus26=0
255
- bs.writeSE(0);
256
- // pic_init_qs_minus26=0
257
- bs.writeSE(0);
258
- // chroma_qp_index_offset=0
259
- bs.writeSE(0);
260
- // deblocking_filter_control_present_flag=1
261
- bs.writeBit(1);
262
- // constrained_intra_pred_flag=0
263
- bs.writeBit(0);
264
- // redundant_pic_cnt_present_flag=0
265
- bs.writeBit(0);
266
-
267
- bs.writeTrailingBits();
268
- return addEmulationPrevention(bs.toUint8Array());
269
- }
270
-
271
- // ── IDR Slice ───────────────────────────────────────────
272
-
273
- _buildIDRSlice(Y, U, V, width, height, mbW, mbH, qp) {
274
- const bs = new BitstreamWriter(width * height); // generous initial capacity
275
-
276
- // NAL header: nal_ref_idc=3, nal_unit_type=5 (IDR)
277
- bs.writeBits(0x65, 8);
278
-
279
- // Slice header
280
- bs.writeUE(0); // first_mb_in_slice=0
281
- bs.writeUE(7); // slice_type=7 (I, all MBs)
282
- bs.writeUE(0); // pic_parameter_set_id=0
283
- bs.writeBits(0, 4); // frame_num=0 (log2_max_frame_num=4 bits)
284
- bs.writeUE(0); // idr_pic_id=0
285
- bs.writeBits(0, 4); // pic_order_cnt_lsb=0 (4 bits)
286
- // dec_ref_pic_marking: no_output_of_prior=0, long_term_ref=0
287
- bs.writeBit(0);
288
- bs.writeBit(0);
289
- // slice_qp_delta
290
- bs.writeSE(qp - 26);
291
- // deblocking: disable_deblocking_filter_idc=1 (disabled for simplicity)
292
- bs.writeUE(1);
293
-
294
- // Encode macroblocks
295
- for (let mbY = 0; mbY < mbH; mbY++) {
296
- for (let mbX = 0; mbX < mbW; mbX++) {
297
- this._encodeMB(bs, Y, U, V, width, height, mbX, mbY, mbW, qp);
298
- }
299
- }
300
-
301
- bs.writeTrailingBits();
302
- return addEmulationPrevention(bs.toUint8Array());
303
- }
304
-
305
- // ── Macroblock Encoding ─────────────────────────────────
306
-
307
- _encodeMB(bs, Y, U, V, width, height, mbX, mbY, mbW, qp) {
308
- const strideY = width;
309
- const strideC = width >> 1;
310
- const hasAbove = mbY > 0;
311
- const hasLeft = mbX > 0;
312
-
313
- // Get neighbor samples for prediction
314
- const above = new Uint8Array(16);
315
- const left = new Uint8Array(16);
316
- if (hasAbove) for (let i = 0; i < 16; i++) above[i] = Y[(mbY * 16 - 1) * strideY + mbX * 16 + i];
317
- if (hasLeft) for (let i = 0; i < 16; i++) left[i] = Y[(mbY * 16 + i) * strideY + mbX * 16 - 1];
318
-
319
- // Use I_16x16 with DC prediction (mode 2)
320
- const dcPred = predictDC16x16(above, left, hasAbove, hasLeft);
321
-
322
- // Compute residual for each 4x4 block
323
- const dcCoeffs = new Int32Array(16); // DC values for Hadamard
324
- const acBlocks = []; // AC coefficients per block
325
- let hasAC = false;
326
-
327
- for (let blk = 0; blk < 16; blk++) {
328
- const bx = (blk & 3) * 4;
329
- const by = (blk >> 2) * 4;
330
-
331
- // Compute residual
332
- const residual = new Int32Array(16);
333
- for (let y = 0; y < 4; y++) {
334
- for (let x = 0; x < 4; x++) {
335
- const px = mbX * 16 + bx + x;
336
- const py = mbY * 16 + by + y;
337
- residual[y * 4 + x] = Y[py * strideY + px] - dcPred;
338
- }
339
- }
340
-
341
- // Forward DCT
342
- const coeffs = forwardDCT4x4(residual);
343
-
344
- // Quantize
345
- const quantized = quantize4x4(coeffs, qp);
346
-
347
- // DC coefficient goes to Hadamard
348
- dcCoeffs[blk] = quantized[0];
349
-
350
- // AC coefficients
351
- const ac = new Int32Array(15);
352
- for (let i = 1; i < 16; i++) ac[i - 1] = quantized[i];
353
- acBlocks.push(ac);
354
-
355
- for (let i = 0; i < 15; i++) {
356
- if (ac[i] !== 0) { hasAC = true; break; }
357
- }
358
- }
359
-
360
- // Hadamard transform on DC coefficients
361
- const dcHadamard = forwardHadamard4x4(dcCoeffs);
362
- // Quantize DC (simplified: divide by QP step)
363
- const dcQuantized = new Int32Array(16);
364
- const qpDiv6 = (qp / 6) | 0;
365
- const qpMod6 = qp % 6;
366
- const dcMF = [13107, 11916, 10082, 9362, 8192, 7282][qpMod6];
367
- for (let i = 0; i < 16; i++) {
368
- const sign = dcHadamard[i] < 0 ? -1 : 1;
369
- dcQuantized[i] = sign * ((Math.abs(dcHadamard[i]) * dcMF + (1 << (15 + qpDiv6)) / 3) >> (16 + qpDiv6));
370
- }
371
-
372
- let hasDC = false;
373
- for (let i = 0; i < 16; i++) if (dcQuantized[i] !== 0) { hasDC = true; break; }
374
-
375
- // Determine mb_type
376
- // I_16x16_pred_cbpL_cbpC: pred=2(DC), cbpL=hasAC?15:0, cbpC=0
377
- const cbpLuma = hasAC ? 15 : 0;
378
- const cbpChroma = 0; // simplified: skip chroma residual
379
- const predMode = 2; // DC
380
-
381
- // mb_type = 1 + predMode + cbpChroma*4 + (cbpLuma>0 ? 12 : 0)
382
- // For I_16x16 in I-slice: mb_type 1-24, mapped to UE codenum
383
- const mbType = 1 + predMode + cbpChroma * 4 + (cbpLuma > 0 ? 12 : 0);
384
- bs.writeUE(mbType);
385
-
386
- // intra_chroma_pred_mode = 0 (DC) — required for ALL intra MBs
387
- bs.writeUE(0);
388
-
389
- // mb_qp_delta = 0 (first MB uses slice QP)
390
- bs.writeSE(0);
391
-
392
- // Encode DC Hadamard block (CAVLC)
393
- this._encodeCavlcBlock(bs, dcQuantized, 16, 0);
394
-
395
- // Encode AC blocks (if cbpLuma != 0)
396
- if (cbpLuma > 0) {
397
- for (let blk = 0; blk < 16; blk++) {
398
- this._encodeCavlcBlock(bs, acBlocks[blk], 15, 0);
399
- }
400
- }
401
-
402
- // Chroma: encode minimal (DC-only, all zeros for simplified encoder)
403
- // For cbpChroma=0, no chroma residual is encoded
404
- // (The chroma prediction handles the base values)
405
- }
406
-
407
- // ── CAVLC Block Encoding (using spec-correct tables) ────
408
-
409
- /**
410
- * Encode a residual block using CAVLC with the correct VLC tables
411
- * from the H.264 spec (Tables 9-5 through 9-10).
412
- *
413
- * @param {BitstreamWriter} bs - Output bitstream
414
- * @param {Int32Array} coeffs - Quantized coefficients in scan order
415
- * @param {number} maxCoeff - Maximum coefficients (16 for 4x4, 15 for AC)
416
- * @param {number} nC - Predicted number of non-zero coefficients
417
- */
418
- _encodeCavlcBlock(bs, coeffs, maxCoeff, nC) {
419
- // Step 1: Analyze coefficients in reverse scan order
420
- // Find non-zero coefficients and count trailing ones
421
- const nonZeroValues = []; // level values in reverse scan order
422
- const nonZeroPositions = []; // scan positions
423
-
424
- for (let i = maxCoeff - 1; i >= 0; i--) {
425
- if (coeffs[i] !== 0) {
426
- nonZeroValues.push(coeffs[i]);
427
- nonZeroPositions.push(i);
428
- }
429
- }
430
-
431
- const totalCoeff = nonZeroValues.length;
432
-
433
- // Count trailing ones (T1s): consecutive +/-1 at the END of the non-zero list
434
- // In reverse scan order, these are at the BEGINNING of nonZeroValues
435
- let trailingOnes = 0;
436
- for (let i = 0; i < Math.min(totalCoeff, 3); i++) {
437
- if (Math.abs(nonZeroValues[i]) === 1) trailingOnes++;
438
- else break;
439
- }
440
-
441
- // Step 2: Write coeff_token
442
- const [ctBits, ctLen] = getCoeffToken(totalCoeff, trailingOnes, nC);
443
- bs.writeBits(ctBits, ctLen);
444
-
445
- if (totalCoeff === 0) return;
446
-
447
- // Step 3: Write trailing ones sign flags (1 bit each, 0=positive, 1=negative)
448
- for (let i = 0; i < trailingOnes; i++) {
449
- bs.writeBit(nonZeroValues[i] < 0 ? 1 : 0);
450
- }
451
-
452
- // Step 4: Write remaining levels (non-trailing-ones, still in reverse scan order)
453
- if (totalCoeff > trailingOnes) {
454
- const remainingLevels = nonZeroValues.slice(trailingOnes);
455
- const { bits: levelBits, lengths: levelLens } = encodeLevels(
456
- remainingLevels, trailingOnes, totalCoeff
457
- );
458
- for (let i = 0; i < levelBits.length; i++) {
459
- // Write prefix (zeros + 1)
460
- const prefix = levelLens[i] - (levelLens[i] > 0 ? 0 : 0);
461
- // encodeLevels returns {bits, length} — write directly
462
- bs.writeBits(levelBits[i], levelLens[i]);
463
- }
464
- }
465
-
466
- // Step 5: Write total_zeros (only if totalCoeff < maxCoeff)
467
- if (totalCoeff < maxCoeff) {
468
- // Count total zeros before (and between) the non-zero coefficients
469
- let lastNonZeroPos = 0;
470
- for (let i = maxCoeff - 1; i >= 0; i--) {
471
- if (coeffs[i] !== 0) { lastNonZeroPos = i; break; }
472
- }
473
- let totalZeros = 0;
474
- for (let i = 0; i <= lastNonZeroPos; i++) {
475
- if (coeffs[i] === 0) totalZeros++;
476
- }
477
-
478
- const [tzBits, tzLen] = getTotalZeros(totalCoeff, totalZeros);
479
- bs.writeBits(tzBits, tzLen);
480
-
481
- // Step 6: Write run_before for each coefficient (reverse scan order)
482
- // except the last one (its position is implied)
483
- let zerosLeft = totalZeros;
484
- for (let i = 0; i < totalCoeff - 1 && zerosLeft > 0; i++) {
485
- const pos = nonZeroPositions[i];
486
- // Count consecutive zeros before this coefficient in scan order
487
- let run = 0;
488
- for (let j = pos - 1; j >= 0; j--) {
489
- if (coeffs[j] === 0) run++;
490
- else break;
491
- }
492
- run = Math.min(run, zerosLeft);
493
-
494
- const [rbBits, rbLen] = getRunBefore(zerosLeft, run);
495
- bs.writeBits(rbBits, rbLen);
496
- zerosLeft -= run;
497
- }
498
- }
499
- }
500
- }
501
-
502
- export default H264Encoder;