@invintusmedia/tomp4 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,838 @@
1
+ /**
2
+ * Browser-only transcoding using WebCodecs API
3
+ *
4
+ * This module provides hardware-accelerated video transcoding in the browser.
5
+ * It uses WebCodecs for decoding and encoding, achieving faster-than-realtime performance.
6
+ *
7
+ * Supports both MPEG-TS and MP4 input files.
8
+ *
9
+ * @example
10
+ * import { transcode } from 'tomp4';
11
+ *
12
+ * const result = await transcode(videoData, {
13
+ * width: 1280,
14
+ * height: 720,
15
+ * bitrate: 2_000_000,
16
+ * onProgress: (msg) => console.log(msg)
17
+ * });
18
+ *
19
+ * @module transcode
20
+ * @browser-only
21
+ */
22
+
23
+ import { TSParser } from './parsers/mpegts.js';
24
+ import { TSMuxer } from './muxers/mpegts.js';
25
+
26
+ // Re-export TSMuxer for convenience
27
+ export { TSMuxer };
28
+
29
+ // ============================================
30
+ // Format Detection
31
+ // ============================================
32
+
33
+ function isMpegTs(data) {
34
+ if (data.length < 4) return false;
35
+ if (data[0] === 0x47) return true;
36
+ for (let i = 0; i < Math.min(188, data.length); i++) {
37
+ if (data[i] === 0x47 && i + 188 < data.length && data[i + 188] === 0x47) return true;
38
+ }
39
+ return false;
40
+ }
41
+
42
+ function isMp4(data) {
43
+ if (data.length < 8) return false;
44
+ const type = String.fromCharCode(data[4], data[5], data[6], data[7]);
45
+ return type === 'ftyp';
46
+ }
47
+
48
+ // ============================================
49
+ // MP4 Parser (for transcoding input)
50
+ // ============================================
51
+
52
+ /**
53
+ * Simple MP4 parser that extracts video/audio samples for transcoding
54
+ */
55
+ class MP4Parser {
56
+ constructor() {
57
+ this.videoAccessUnits = [];
58
+ this.audioAccessUnits = [];
59
+ this.videoWidth = null;
60
+ this.videoHeight = null;
61
+ this.audioSampleRate = null;
62
+ this.audioChannels = null;
63
+ this.sps = null;
64
+ this.pps = null;
65
+ }
66
+
67
+ parse(data) {
68
+ this.data = data;
69
+ this.view = new DataView(data.buffer, data.byteOffset, data.byteLength);
70
+
71
+ // Find moov and mdat boxes
72
+ let offset = 0;
73
+ let moov = null, mdat = null, mdatOffset = 0;
74
+
75
+ while (offset + 8 <= data.length) {
76
+ const size = this.view.getUint32(offset);
77
+ const type = this.readString(offset + 4, 4);
78
+
79
+ if (size < 8) break;
80
+
81
+ if (type === 'moov') {
82
+ moov = { offset, size };
83
+ } else if (type === 'mdat') {
84
+ mdat = { offset, size };
85
+ mdatOffset = offset + 8;
86
+ }
87
+
88
+ offset += size;
89
+ }
90
+
91
+ if (!moov) throw new Error('No moov box found in MP4');
92
+ if (!mdat) throw new Error('No mdat box found in MP4');
93
+
94
+ // Parse moov to get track info
95
+ this.parseMoov(moov.offset + 8, moov.offset + moov.size, mdatOffset);
96
+ }
97
+
98
+ readString(offset, len) {
99
+ let s = '';
100
+ for (let i = 0; i < len; i++) s += String.fromCharCode(this.data[offset + i]);
101
+ return s;
102
+ }
103
+
104
+ readUint32(offset) {
105
+ return this.view.getUint32(offset);
106
+ }
107
+
108
+ readUint16(offset) {
109
+ return this.view.getUint16(offset);
110
+ }
111
+
112
+ parseMoov(start, end, mdatOffset) {
113
+ let offset = start;
114
+ let timescale = 1000;
115
+
116
+ while (offset + 8 <= end) {
117
+ const size = this.readUint32(offset);
118
+ const type = this.readString(offset + 4, 4);
119
+
120
+ if (size < 8) break;
121
+
122
+ if (type === 'mvhd') {
123
+ // Movie header - get timescale
124
+ const version = this.data[offset + 8];
125
+ timescale = version === 1
126
+ ? this.readUint32(offset + 28)
127
+ : this.readUint32(offset + 20);
128
+ } else if (type === 'trak') {
129
+ this.parseTrak(offset + 8, offset + size, mdatOffset);
130
+ }
131
+
132
+ offset += size;
133
+ }
134
+ }
135
+
136
+ parseTrak(start, end, mdatOffset) {
137
+ let offset = start;
138
+ let trackType = null;
139
+ let mediaTimescale = 1000;
140
+ let stbl = null;
141
+
142
+ while (offset + 8 <= end) {
143
+ const size = this.readUint32(offset);
144
+ const type = this.readString(offset + 4, 4);
145
+
146
+ if (size < 8) break;
147
+
148
+ if (type === 'mdia') {
149
+ // Parse mdia to find handler type and stbl
150
+ let mdiaOff = offset + 8;
151
+ const mdiaEnd = offset + size;
152
+
153
+ while (mdiaOff + 8 <= mdiaEnd) {
154
+ const mSize = this.readUint32(mdiaOff);
155
+ const mType = this.readString(mdiaOff + 4, 4);
156
+
157
+ if (mSize < 8) break;
158
+
159
+ if (mType === 'mdhd') {
160
+ const version = this.data[mdiaOff + 8];
161
+ mediaTimescale = version === 1
162
+ ? this.readUint32(mdiaOff + 28)
163
+ : this.readUint32(mdiaOff + 20);
164
+ } else if (mType === 'hdlr') {
165
+ trackType = this.readString(mdiaOff + 16, 4);
166
+ } else if (mType === 'minf') {
167
+ // Find stbl in minf
168
+ let minfOff = mdiaOff + 8;
169
+ const minfEnd = mdiaOff + mSize;
170
+
171
+ while (minfOff + 8 <= minfEnd) {
172
+ const sSize = this.readUint32(minfOff);
173
+ const sType = this.readString(minfOff + 4, 4);
174
+
175
+ if (sSize < 8) break;
176
+
177
+ if (sType === 'stbl') {
178
+ stbl = { offset: minfOff + 8, end: minfOff + sSize };
179
+ }
180
+
181
+ minfOff += sSize;
182
+ }
183
+ }
184
+
185
+ mdiaOff += mSize;
186
+ }
187
+ }
188
+
189
+ offset += size;
190
+ }
191
+
192
+ if (stbl && trackType) {
193
+ this.parseStbl(stbl.offset, stbl.end, trackType, mediaTimescale, mdatOffset);
194
+ }
195
+ }
196
+
197
+ parseStbl(start, end, trackType, timescale, mdatOffset) {
198
+ let offset = start;
199
+
200
+ // Sample table data
201
+ let stsd = null;
202
+ let stsz = null; // sample sizes
203
+ let stco = null; // chunk offsets
204
+ let stsc = null; // sample-to-chunk
205
+ let stts = null; // time-to-sample
206
+ let ctts = null; // composition time offsets
207
+ let stss = null; // sync samples (keyframes)
208
+
209
+ while (offset + 8 <= end) {
210
+ const size = this.readUint32(offset);
211
+ const type = this.readString(offset + 4, 4);
212
+
213
+ if (size < 8) break;
214
+
215
+ if (type === 'stsd') stsd = { offset: offset + 8, size: size - 8 };
216
+ else if (type === 'stsz') stsz = { offset: offset + 8, size: size - 8 };
217
+ else if (type === 'stco') stco = { offset: offset + 8, size: size - 8, is64: false };
218
+ else if (type === 'co64') stco = { offset: offset + 8, size: size - 8, is64: true };
219
+ else if (type === 'stsc') stsc = { offset: offset + 8, size: size - 8 };
220
+ else if (type === 'stts') stts = { offset: offset + 8, size: size - 8 };
221
+ else if (type === 'ctts') ctts = { offset: offset + 8, size: size - 8 };
222
+ else if (type === 'stss') stss = { offset: offset + 8, size: size - 8 };
223
+
224
+ offset += size;
225
+ }
226
+
227
+ if (!stsd || !stsz || !stco || !stsc || !stts) return;
228
+
229
+ // Parse sample description for codec info
230
+ if (trackType === 'vide') {
231
+ this.parseVideoStsd(stsd.offset, stsd.size);
232
+ } else if (trackType === 'soun') {
233
+ this.parseAudioStsd(stsd.offset, stsd.size);
234
+ }
235
+
236
+ // Build sample list
237
+ const samples = this.buildSampleList(stsz, stco, stsc, stts, ctts, stss, timescale);
238
+
239
+ // Extract samples from mdat
240
+ if (trackType === 'vide') {
241
+ this.extractVideoSamples(samples);
242
+ } else if (trackType === 'soun') {
243
+ this.extractAudioSamples(samples);
244
+ }
245
+ }
246
+
247
+ parseVideoStsd(offset, size) {
248
+ // Skip version/flags and entry count
249
+ const entryCount = this.readUint32(offset + 4);
250
+ if (entryCount < 1) return;
251
+
252
+ let off = offset + 8;
253
+ const entrySize = this.readUint32(off);
254
+ const codec = this.readString(off + 4, 4);
255
+
256
+ // Visual sample entry: skip to width/height
257
+ this.videoWidth = this.readUint16(off + 32);
258
+ this.videoHeight = this.readUint16(off + 34);
259
+
260
+ // Find avcC box
261
+ let boxOff = off + 86; // Skip visual sample entry header
262
+ const boxEnd = off + entrySize;
263
+
264
+ while (boxOff + 8 <= boxEnd) {
265
+ const boxSize = this.readUint32(boxOff);
266
+ const boxType = this.readString(boxOff + 4, 4);
267
+
268
+ if (boxSize < 8) break;
269
+
270
+ if (boxType === 'avcC') {
271
+ this.parseAvcC(boxOff + 8, boxSize - 8);
272
+ break;
273
+ }
274
+
275
+ boxOff += boxSize;
276
+ }
277
+ }
278
+
279
+ parseAvcC(offset, size) {
280
+ // avcC structure
281
+ const configVersion = this.data[offset];
282
+ const avcProfile = this.data[offset + 1];
283
+ const profileCompat = this.data[offset + 2];
284
+ const avcLevel = this.data[offset + 3];
285
+ const nalLengthSize = (this.data[offset + 4] & 0x03) + 1;
286
+
287
+ // SPS
288
+ const numSps = this.data[offset + 5] & 0x1f;
289
+ let off = offset + 6;
290
+
291
+ if (numSps > 0) {
292
+ const spsLen = this.readUint16(off);
293
+ this.sps = this.data.slice(off + 2, off + 2 + spsLen);
294
+ off += 2 + spsLen;
295
+ }
296
+
297
+ // PPS
298
+ const numPps = this.data[off];
299
+ off++;
300
+
301
+ if (numPps > 0) {
302
+ const ppsLen = this.readUint16(off);
303
+ this.pps = this.data.slice(off + 2, off + 2 + ppsLen);
304
+ }
305
+ }
306
+
307
+ parseAudioStsd(offset, size) {
308
+ // Skip version/flags and entry count
309
+ const entryCount = this.readUint32(offset + 4);
310
+ if (entryCount < 1) return;
311
+
312
+ let off = offset + 8;
313
+ const entrySize = this.readUint32(off);
314
+ const codec = this.readString(off + 4, 4);
315
+
316
+ // Audio sample entry
317
+ this.audioChannels = this.readUint16(off + 24);
318
+ this.audioSampleRate = this.readUint32(off + 32) >> 16;
319
+ }
320
+
321
+ buildSampleList(stsz, stco, stsc, stts, ctts, stss, timescale) {
322
+ const samples = [];
323
+
324
+ // Parse sample sizes
325
+ const defaultSize = this.readUint32(stsz.offset + 4);
326
+ const sampleCount = this.readUint32(stsz.offset + 8);
327
+ const sizes = [];
328
+
329
+ if (defaultSize === 0) {
330
+ for (let i = 0; i < sampleCount; i++) {
331
+ sizes.push(this.readUint32(stsz.offset + 12 + i * 4));
332
+ }
333
+ } else {
334
+ for (let i = 0; i < sampleCount; i++) sizes.push(defaultSize);
335
+ }
336
+
337
+ // Parse chunk offsets
338
+ const chunkCount = this.readUint32(stco.offset + 4);
339
+ const chunkOffsets = [];
340
+
341
+ for (let i = 0; i < chunkCount; i++) {
342
+ if (stco.is64) {
343
+ // 64-bit offsets
344
+ const hi = this.readUint32(stco.offset + 8 + i * 8);
345
+ const lo = this.readUint32(stco.offset + 12 + i * 8);
346
+ chunkOffsets.push(hi * 0x100000000 + lo);
347
+ } else {
348
+ chunkOffsets.push(this.readUint32(stco.offset + 8 + i * 4));
349
+ }
350
+ }
351
+
352
+ // Parse sample-to-chunk
353
+ const stscEntryCount = this.readUint32(stsc.offset + 4);
354
+ const stscEntries = [];
355
+
356
+ for (let i = 0; i < stscEntryCount; i++) {
357
+ stscEntries.push({
358
+ firstChunk: this.readUint32(stsc.offset + 8 + i * 12),
359
+ samplesPerChunk: this.readUint32(stsc.offset + 12 + i * 12),
360
+ sampleDescIdx: this.readUint32(stsc.offset + 16 + i * 12)
361
+ });
362
+ }
363
+
364
+ // Parse time-to-sample
365
+ const sttsEntryCount = this.readUint32(stts.offset + 4);
366
+ const sttsEntries = [];
367
+
368
+ for (let i = 0; i < sttsEntryCount; i++) {
369
+ sttsEntries.push({
370
+ count: this.readUint32(stts.offset + 8 + i * 8),
371
+ delta: this.readUint32(stts.offset + 12 + i * 8)
372
+ });
373
+ }
374
+
375
+ // Parse composition time offsets (optional)
376
+ const cttsOffsets = [];
377
+ if (ctts) {
378
+ const cttsEntryCount = this.readUint32(ctts.offset + 4);
379
+ let sampleIdx = 0;
380
+
381
+ for (let i = 0; i < cttsEntryCount; i++) {
382
+ const count = this.readUint32(ctts.offset + 8 + i * 8);
383
+ const offset = this.view.getInt32(ctts.offset + 12 + i * 8);
384
+
385
+ for (let j = 0; j < count; j++) {
386
+ cttsOffsets[sampleIdx++] = offset;
387
+ }
388
+ }
389
+ }
390
+
391
+ // Parse sync samples (optional)
392
+ const syncSamples = new Set();
393
+ if (stss) {
394
+ const stssEntryCount = this.readUint32(stss.offset + 4);
395
+ for (let i = 0; i < stssEntryCount; i++) {
396
+ syncSamples.add(this.readUint32(stss.offset + 8 + i * 4) - 1); // 0-indexed
397
+ }
398
+ }
399
+
400
+ // Build sample offsets from chunk info
401
+ const sampleOffsets = [];
402
+ let sampleIdx = 0;
403
+ let stscIdx = 0;
404
+
405
+ for (let chunkIdx = 0; chunkIdx < chunkCount; chunkIdx++) {
406
+ // Find which stsc entry applies to this chunk
407
+ while (stscIdx + 1 < stscEntries.length &&
408
+ stscEntries[stscIdx + 1].firstChunk <= chunkIdx + 1) {
409
+ stscIdx++;
410
+ }
411
+
412
+ const samplesInChunk = stscEntries[stscIdx].samplesPerChunk;
413
+ let chunkOffset = chunkOffsets[chunkIdx];
414
+
415
+ for (let i = 0; i < samplesInChunk && sampleIdx < sampleCount; i++) {
416
+ sampleOffsets.push(chunkOffset);
417
+ chunkOffset += sizes[sampleIdx];
418
+ sampleIdx++;
419
+ }
420
+ }
421
+
422
+ // Build final sample list with timestamps
423
+ let dts = 0;
424
+ let sttsIdx = 0;
425
+ let sttsRemaining = sttsEntries[0]?.count || 0;
426
+
427
+ for (let i = 0; i < sampleCount; i++) {
428
+ const cts = cttsOffsets[i] || 0;
429
+ const pts = dts + cts;
430
+
431
+ samples.push({
432
+ offset: sampleOffsets[i],
433
+ size: sizes[i],
434
+ pts: Math.round(pts / timescale * 90000), // Convert to 90kHz
435
+ dts: Math.round(dts / timescale * 90000),
436
+ isKey: stss ? syncSamples.has(i) : (i === 0) // If no stss, assume first frame is key
437
+ });
438
+
439
+ // Advance DTS
440
+ if (sttsIdx < sttsEntries.length) {
441
+ dts += sttsEntries[sttsIdx].delta;
442
+ sttsRemaining--;
443
+
444
+ if (sttsRemaining === 0 && sttsIdx + 1 < sttsEntries.length) {
445
+ sttsIdx++;
446
+ sttsRemaining = sttsEntries[sttsIdx].count;
447
+ }
448
+ }
449
+ }
450
+
451
+ return samples;
452
+ }
453
+
454
+ extractVideoSamples(samples) {
455
+ for (const sample of samples) {
456
+ // Read sample data and parse NAL units
457
+ const sampleData = this.data.slice(sample.offset, sample.offset + sample.size);
458
+ const nalUnits = this.parseAvccNalUnits(sampleData);
459
+
460
+ if (nalUnits.length > 0) {
461
+ this.videoAccessUnits.push({
462
+ pts: sample.pts,
463
+ dts: sample.dts,
464
+ isKey: sample.isKey,
465
+ nalUnits
466
+ });
467
+ }
468
+ }
469
+ }
470
+
471
+ parseAvccNalUnits(data) {
472
+ // AVCC format: 4-byte length prefix (usually) followed by NAL unit
473
+ const nalUnits = [];
474
+ let offset = 0;
475
+
476
+ while (offset + 4 <= data.length) {
477
+ const len = (data[offset] << 24) | (data[offset + 1] << 16) |
478
+ (data[offset + 2] << 8) | data[offset + 3];
479
+
480
+ if (len <= 0 || offset + 4 + len > data.length) break;
481
+
482
+ nalUnits.push(data.slice(offset + 4, offset + 4 + len));
483
+ offset += 4 + len;
484
+ }
485
+
486
+ return nalUnits;
487
+ }
488
+
489
+ extractAudioSamples(samples) {
490
+ for (const sample of samples) {
491
+ const sampleData = this.data.slice(sample.offset, sample.offset + sample.size);
492
+
493
+ this.audioAccessUnits.push({
494
+ pts: sample.pts,
495
+ data: sampleData
496
+ });
497
+ }
498
+ }
499
+
500
+ finalize() {
501
+ // Nothing needed - parsing is synchronous
502
+ }
503
+ }
504
+
505
+ // ============================================
506
+ // WebCodecs Support Check
507
+ // ============================================
508
+
509
+ /**
510
+ * Check if WebCodecs is available
511
+ * @returns {boolean}
512
+ */
513
+ export function isWebCodecsSupported() {
514
+ return typeof VideoDecoder !== 'undefined' &&
515
+ typeof VideoEncoder !== 'undefined' &&
516
+ typeof VideoFrame !== 'undefined' &&
517
+ typeof EncodedVideoChunk !== 'undefined';
518
+ }
519
+
520
+ /**
521
+ * Throw if WebCodecs not available
522
+ */
523
+ function requireWebCodecs() {
524
+ if (!isWebCodecsSupported()) {
525
+ throw new Error('WebCodecs API not available. This feature requires a modern browser (Chrome 94+, Edge 94+, or Safari 16.4+).');
526
+ }
527
+ }
528
+
529
+ // ============================================
530
+ // Utility Functions
531
+ // ============================================
532
+
533
+ function concat(arrays) {
534
+ const len = arrays.reduce((s, a) => s + a.length, 0);
535
+ const r = new Uint8Array(len);
536
+ let o = 0;
537
+ for (const a of arrays) { r.set(a, o); o += a.length; }
538
+ return r;
539
+ }
540
+
541
+ function createAvcC(sps, pps) {
542
+ return new Uint8Array([
543
+ 0x01, sps[1], sps[2], sps[3], 0xff, 0xe1,
544
+ (sps.length >> 8) & 0xff, sps.length & 0xff, ...sps,
545
+ 0x01, (pps.length >> 8) & 0xff, pps.length & 0xff, ...pps
546
+ ]);
547
+ }
548
+
549
+ // ============================================
550
+ // Main Transcode Function
551
+ // ============================================
552
+
553
+ /**
554
+ * Transcode video using WebCodecs (browser-only)
555
+ *
556
+ * Supports both MPEG-TS and MP4 input files.
557
+ *
558
+ * @param {Uint8Array} data - Input video data (MPEG-TS or MP4)
559
+ * @param {Object} [options] - Transcode options
560
+ * @param {number} [options.width] - Output width (default: same as input)
561
+ * @param {number} [options.height] - Output height (default: same as input)
562
+ * @param {number} [options.bitrate=1_000_000] - Output bitrate in bps
563
+ * @param {number} [options.keyFrameInterval=30] - Keyframe interval in frames
564
+ * @param {Function} [options.onProgress] - Progress callback (message: string)
565
+ * @returns {Promise<Uint8Array>} - Transcoded MPEG-TS data
566
+ *
567
+ * @example
568
+ * const output = await transcode(videoData, {
569
+ * width: 640,
570
+ * height: 360,
571
+ * bitrate: 1_000_000,
572
+ * onProgress: msg => console.log(msg)
573
+ * });
574
+ */
575
+ export async function transcode(data, options = {}) {
576
+ requireWebCodecs();
577
+
578
+ const log = options.onProgress || (() => {});
579
+ const {
580
+ bitrate = 1_000_000,
581
+ keyFrameInterval = 30
582
+ } = options;
583
+
584
+ // Detect input format and parse
585
+ let parser;
586
+ let sps = null, pps = null;
587
+
588
+ if (isMp4(data)) {
589
+ log('Parsing input MP4...');
590
+ parser = new MP4Parser();
591
+ parser.parse(data);
592
+ parser.finalize();
593
+
594
+ // Get SPS/PPS directly from MP4 parser
595
+ sps = parser.sps;
596
+ pps = parser.pps;
597
+ } else if (isMpegTs(data)) {
598
+ log('Parsing input MPEG-TS...');
599
+ parser = new TSParser();
600
+ parser.parse(data);
601
+ parser.finalize();
602
+
603
+ // Find SPS/PPS in NAL units
604
+ for (const au of parser.videoAccessUnits) {
605
+ for (const nal of au.nalUnits) {
606
+ const t = nal[0] & 0x1f;
607
+ if (t === 7 && !sps) sps = nal;
608
+ if (t === 8 && !pps) pps = nal;
609
+ }
610
+ if (sps && pps) break;
611
+ }
612
+ } else {
613
+ throw new Error('Unsupported input format. Expected MPEG-TS or MP4.');
614
+ }
615
+
616
+ if (!parser.videoAccessUnits || parser.videoAccessUnits.length === 0) {
617
+ throw new Error('No video found in input');
618
+ }
619
+
620
+ log(`Found ${parser.videoAccessUnits.length} video frames`);
621
+
622
+ // Check for audio
623
+ const hasAudio = parser.audioAccessUnits && parser.audioAccessUnits.length > 0;
624
+ if (hasAudio) {
625
+ log(`Found ${parser.audioAccessUnits.length} audio frames (will passthrough)`);
626
+ }
627
+
628
+ if (!sps || !pps) {
629
+ throw new Error('No SPS/PPS found in input');
630
+ }
631
+
632
+ // Parse source dimensions from SPS (simplified)
633
+ const srcW = parser.videoWidth || 1920;
634
+ const srcH = parser.videoHeight || 1080;
635
+ const outW = options.width || srcW;
636
+ const outH = options.height || srcH;
637
+
638
+ const codecStr = `avc1.${sps[1].toString(16).padStart(2, '0')}${sps[2].toString(16).padStart(2, '0')}${sps[3].toString(16).padStart(2, '0')}`;
639
+ log(`Source: ${codecStr}, ${srcW}×${srcH}`);
640
+ log(`Output: ${outW}×${outH} @ ${(bitrate / 1000).toFixed(0)} kbps`);
641
+
642
+ // Get base PTS and estimate FPS
643
+ const basePts = parser.videoAccessUnits[0]?.pts || 0;
644
+ const lastPts = parser.videoAccessUnits[parser.videoAccessUnits.length - 1]?.pts || basePts;
645
+ const duration = (lastPts - basePts) / 90000;
646
+ const estFps = parser.videoAccessUnits.length / duration;
647
+ const videoAUs = parser.videoAccessUnits;
648
+
649
+ log(`Duration: ${duration.toFixed(2)}s, FPS: ${estFps.toFixed(1)}`);
650
+
651
+ // Setup muxer
652
+ const tsMuxer = new TSMuxer();
653
+ tsMuxer.setHasAudio(hasAudio);
654
+
655
+ // Pre-add audio samples
656
+ if (hasAudio) {
657
+ const audioSampleRate = parser.audioSampleRate || 44100;
658
+ const audioChannels = parser.audioChannels || 2;
659
+
660
+ const SAMPLE_RATES = [96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350];
661
+ const sampleRateIdx = SAMPLE_RATES.indexOf(audioSampleRate);
662
+ const sri = sampleRateIdx >= 0 ? sampleRateIdx : 4;
663
+
664
+ for (const audioAu of parser.audioAccessUnits) {
665
+ const rawAac = audioAu.data;
666
+ const frameLen = 7 + rawAac.length;
667
+ const adts = new Uint8Array(frameLen);
668
+
669
+ // ADTS header
670
+ adts[0] = 0xFF;
671
+ adts[1] = 0xF1;
672
+ adts[2] = (1 << 6) | (sri << 2) | ((audioChannels >> 2) & 0x01);
673
+ adts[3] = ((audioChannels & 0x03) << 6) | ((frameLen >> 11) & 0x03);
674
+ adts[4] = (frameLen >> 3) & 0xFF;
675
+ adts[5] = ((frameLen & 0x07) << 5) | 0x1F;
676
+ adts[6] = 0xFC;
677
+ adts.set(rawAac, 7);
678
+
679
+ // Adjust PTS to start at 0
680
+ const pts90k = audioAu.pts - basePts;
681
+ tsMuxer.addAudioSample(adts, Math.max(0, pts90k));
682
+ }
683
+ }
684
+
685
+ // Setup encoder
686
+ let gotSpsPps = false;
687
+ let encodedCount = 0;
688
+
689
+ const encoder = new VideoEncoder({
690
+ output: (chunk, meta) => {
691
+ if (meta?.decoderConfig?.description && !gotSpsPps) {
692
+ const desc = new Uint8Array(meta.decoderConfig.description);
693
+ const numSps = desc[5] & 0x1f;
694
+ let off = 6;
695
+ const spsLen = (desc[off] << 8) | desc[off + 1];
696
+ const encSps = desc.slice(off + 2, off + 2 + spsLen);
697
+ off += 2 + spsLen;
698
+ const numPps = desc[off++];
699
+ const ppsLen = (desc[off] << 8) | desc[off + 1];
700
+ const encPps = desc.slice(off + 2, off + 2 + ppsLen);
701
+ tsMuxer.setSpsPps(encSps, encPps);
702
+ gotSpsPps = true;
703
+ }
704
+
705
+ const data = new Uint8Array(chunk.byteLength);
706
+ chunk.copyTo(data);
707
+ const pts90k = Math.round(chunk.timestamp * 90 / 1000);
708
+ tsMuxer.addVideoSample(data, chunk.type === 'key', pts90k);
709
+ encodedCount++;
710
+ },
711
+ error: e => { throw new Error(`Encoder error: ${e.message}`); }
712
+ });
713
+
714
+ encoder.configure({
715
+ codec: 'avc1.4d001f',
716
+ width: outW,
717
+ height: outH,
718
+ bitrate,
719
+ framerate: Math.round(estFps),
720
+ latencyMode: 'realtime',
721
+ avc: { format: 'avc' }
722
+ });
723
+
724
+ // Setup decoder - decoupled from encoder for parallel operation
725
+ const avcC = createAvcC(sps, pps);
726
+ const needsScale = srcW !== outW || srcH !== outH;
727
+
728
+ // Use OffscreenCanvas for scaling
729
+ const canvas = needsScale ? new OffscreenCanvas(outW, outH) : null;
730
+ const ctx = canvas ? canvas.getContext('2d', { alpha: false }) : null;
731
+
732
+ let processedCount = 0;
733
+ let decodedCount = 0;
734
+ const frameDuration = Math.round(1_000_000 / estFps);
735
+ const startTime = performance.now();
736
+
737
+ // Process frames directly - simple pipeline
738
+ const processFrame = (frame, isKey) => {
739
+ let outFrame;
740
+ if (needsScale) {
741
+ ctx.drawImage(frame, 0, 0, outW, outH);
742
+ outFrame = new VideoFrame(canvas, {
743
+ timestamp: frame.timestamp,
744
+ duration: frame.duration || frameDuration
745
+ });
746
+ } else {
747
+ outFrame = new VideoFrame(frame, {
748
+ timestamp: frame.timestamp,
749
+ duration: frame.duration || frameDuration
750
+ });
751
+ }
752
+ encoder.encode(outFrame, { keyFrame: isKey });
753
+ outFrame.close();
754
+ frame.close();
755
+ processedCount++;
756
+
757
+ if (processedCount % 200 === 0) {
758
+ const elapsed = (performance.now() - encodeStartTime) / 1000;
759
+ log(`Processed ${processedCount}/${videoAUs.length} @ ${(processedCount / elapsed).toFixed(0)} fps`);
760
+ }
761
+ };
762
+
763
+ // Start encoding processor (runs in parallel with decoding)
764
+
765
+ const decoder = new VideoDecoder({
766
+ output: (frame) => {
767
+ const isKey = decodedCount % keyFrameInterval === 0;
768
+ processFrame(frame, isKey);
769
+ decodedCount++;
770
+ },
771
+ error: e => { throw new Error(`Decoder error: ${e.message}`); }
772
+ });
773
+
774
+ decoder.configure({
775
+ codec: codecStr,
776
+ codedWidth: srcW,
777
+ codedHeight: srcH,
778
+ description: avcC
779
+ });
780
+
781
+ // Decode and encode frames
782
+ log('Transcoding...');
783
+ const encodeStartTime = performance.now();
784
+
785
+ for (let i = 0; i < videoAUs.length; i++) {
786
+ const au = videoAUs[i];
787
+
788
+ // Build AVCC-formatted NAL units
789
+ const nalParts = [];
790
+ let isKey = false;
791
+
792
+ for (const nal of au.nalUnits) {
793
+ const t = nal[0] & 0x1f;
794
+ if (t === 5) isKey = true;
795
+ if (t === 1 || t === 5) {
796
+ const len = nal.length;
797
+ nalParts.push(new Uint8Array([(len >> 24) & 0xff, (len >> 16) & 0xff, (len >> 8) & 0xff, len & 0xff]));
798
+ nalParts.push(nal);
799
+ }
800
+ }
801
+
802
+ if (nalParts.length > 0) {
803
+ const data = concat(nalParts);
804
+ const pts90k = au.pts !== undefined ? au.pts : basePts + i * (90000 / estFps);
805
+ const timestamp = Math.max(0, Math.round((pts90k - basePts) / 90 * 1000));
806
+
807
+ decoder.decode(new EncodedVideoChunk({
808
+ type: isKey ? 'key' : 'delta',
809
+ timestamp,
810
+ duration: frameDuration,
811
+ data
812
+ }));
813
+ }
814
+
815
+ // Yield periodically to let encoder catch up and prevent UI freeze
816
+ if (i % 100 === 0) await new Promise(r => setTimeout(r, 0));
817
+ }
818
+
819
+ // Flush decoder and encoder
820
+ log('Flushing decoder...');
821
+ await decoder.flush();
822
+ decoder.close();
823
+
824
+ log(`Flushing encoder (${encoder.encodeQueueSize} frames queued)...`);
825
+ await encoder.flush();
826
+ encoder.close();
827
+
828
+ // Flush remaining audio
829
+ tsMuxer.flush();
830
+
831
+ const totalTime = (performance.now() - startTime) / 1000;
832
+ log(`Transcoded ${processedCount} frames in ${totalTime.toFixed(2)}s (${(duration / totalTime).toFixed(1)}x realtime)`);
833
+
834
+ return tsMuxer.build();
835
+ }
836
+
837
+ // Default export
838
+ export default transcode;