@invintusmedia/tomp4 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tomp4.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * toMp4.js v1.2.0
2
+ * toMp4.js v1.3.0
3
3
  * Convert MPEG-TS and fMP4 to standard MP4
4
4
  * https://github.com/TVWIT/toMp4.js
5
5
  * MIT License
@@ -111,24 +111,23 @@
111
111
  // This is the time the decoder needs to process but player shouldn't display
112
112
  const prerollPts = Math.max(0, startPts - keyframePts);
113
113
 
114
- // Clip audio to the REQUESTED time range (not from keyframe)
115
- // Audio doesn't need keyframe pre-roll
116
- const audioStartPts = startPts;
117
- const audioEndPts = Math.min(endPts, lastFramePts + 90000); // Include audio slightly past last video
114
+ // Clip audio from KEYFRAME time (same as video) so A/V stays in sync
115
+ // even on players that ignore edit lists. The edit list will skip the
116
+ // audio preroll on compliant players, just like it does for video.
117
+ const audioStartPts = keyframePts;
118
+ const audioEndPts = Math.min(endPts, lastFramePts + 90000);
118
119
  const clippedAudio = audioAUs.filter(au => au.pts >= audioStartPts && au.pts < audioEndPts);
119
120
 
120
- // Normalize video timestamps so keyframe starts at 0
121
+ // Normalize both video and audio to the same base (keyframe PTS)
122
+ // so they share a common timeline regardless of edit list support
121
123
  const offset = keyframePts;
122
124
  for (const au of clippedVideo) {
123
125
  au.pts -= offset;
124
126
  au.dts -= offset;
125
127
  }
126
128
 
127
- // Normalize audio timestamps so it starts at 0 (matching video playback start after preroll)
128
- // Audio doesn't have preroll, so it should start at PTS 0 to sync with video after edit list
129
- const audioOffset = audioStartPts; // Use requested start, not keyframe
130
129
  for (const au of clippedAudio) {
131
- au.pts -= audioOffset;
130
+ au.pts -= offset;
132
131
  }
133
132
 
134
133
  return {
@@ -323,144 +322,644 @@
323
322
  // ============================================
324
323
  /**
325
324
  * fMP4 to Standard MP4 Converter
326
- *
327
- * Converts a fragmented MP4 file to a standard MP4 container
328
- * by extracting samples from fragments and rebuilding the moov box.
329
- *
325
+ *
326
+ * Converts fragmented MP4 data to standard MP4 and supports
327
+ * sample-level clipping for fMP4 inputs.
328
+ *
330
329
  * @module fmp4/converter
331
330
  */
332
331
 
333
332
  import {
334
- parseBoxes, findBox, parseChildBoxes, createBox,
335
- parseTfhd, parseTrun
333
+ parseBoxes,
334
+ findBox,
335
+ parseChildBoxes,
336
+ createBox,
337
+ parseTfhd,
338
+ parseTfdt,
339
+ parseTrun,
340
+ getMovieTimescale,
336
341
  } from './utils.js';
337
342
 
338
- // ============================================
339
- // Moov Rebuilding Functions
340
- // ============================================
343
+ function createFullBox(type, version, flags, ...payloads) {
344
+ const header = new Uint8Array(4);
345
+ header[0] = version;
346
+ header[1] = (flags >> 16) & 0xff;
347
+ header[2] = (flags >> 8) & 0xff;
348
+ header[3] = flags & 0xff;
349
+ return createBox(type, header, ...payloads);
350
+ }
351
+
352
+ function sumSampleDurations(samples) {
353
+ let total = 0;
354
+ for (const sample of samples) total += sample.duration || 0;
355
+ return total;
356
+ }
357
+
358
+ function toMovieTimescale(value, trackTimescale, movieTimescale) {
359
+ if (!trackTimescale || !movieTimescale) return value;
360
+ return Math.round((value * movieTimescale) / trackTimescale);
361
+ }
362
+
363
+ function isSyncSample(sample) {
364
+ const flags = sample.flags;
365
+ if (flags === undefined || flags === null) return true;
366
+ return ((flags >> 16) & 0x1) === 0;
367
+ }
368
+
369
+ function parseTrex(trexData) {
370
+ const view = new DataView(trexData.buffer, trexData.byteOffset, trexData.byteLength);
371
+ return {
372
+ trackId: view.getUint32(12),
373
+ defaultSampleDuration: view.getUint32(20),
374
+ defaultSampleSize: view.getUint32(24),
375
+ defaultSampleFlags: view.getUint32(28),
376
+ };
377
+ }
378
+
379
+ function extractTrexDefaults(moovBox) {
380
+ const defaults = new Map();
381
+ const moovChildren = parseChildBoxes(moovBox);
382
+ const mvex = findBox(moovChildren, 'mvex');
383
+ if (!mvex) return defaults;
384
+
385
+ const mvexChildren = parseChildBoxes(mvex);
386
+ for (const child of mvexChildren) {
387
+ if (child.type !== 'trex') continue;
388
+ const trex = parseTrex(child.data);
389
+ defaults.set(trex.trackId, trex);
390
+ }
391
+ return defaults;
392
+ }
393
+
394
+ function extractTrackMetadata(moovBox) {
395
+ const trackMetadata = new Map();
396
+ const trackOrder = [];
397
+ const moovChildren = parseChildBoxes(moovBox);
398
+
399
+ for (const child of moovChildren) {
400
+ if (child.type !== 'trak') continue;
401
+ const trakChildren = parseChildBoxes(child);
402
+
403
+ let trackId = null;
404
+ let timescale = 0;
405
+ let handlerType = 'unknown';
406
+
407
+ for (const trakChild of trakChildren) {
408
+ if (trakChild.type === 'tkhd') {
409
+ const view = new DataView(trakChild.data.buffer, trakChild.data.byteOffset, trakChild.data.byteLength);
410
+ trackId = trakChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
411
+ } else if (trakChild.type === 'mdia') {
412
+ const mdiaChildren = parseChildBoxes(trakChild);
413
+ for (const mdiaChild of mdiaChildren) {
414
+ if (mdiaChild.type === 'mdhd') {
415
+ const view = new DataView(mdiaChild.data.buffer, mdiaChild.data.byteOffset, mdiaChild.data.byteLength);
416
+ timescale = mdiaChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
417
+ } else if (mdiaChild.type === 'hdlr' && mdiaChild.data.byteLength >= 20) {
418
+ handlerType = String.fromCharCode(
419
+ mdiaChild.data[16],
420
+ mdiaChild.data[17],
421
+ mdiaChild.data[18],
422
+ mdiaChild.data[19],
423
+ );
424
+ }
425
+ }
426
+ }
427
+ }
428
+
429
+ if (trackId !== null) {
430
+ trackMetadata.set(trackId, {
431
+ trackId,
432
+ timescale: timescale || 90000,
433
+ handlerType,
434
+ });
435
+ trackOrder.push(trackId);
436
+ }
437
+ }
438
+
439
+ return { trackMetadata, trackOrder };
440
+ }
441
+
442
+ function cloneSample(sample) {
443
+ return {
444
+ duration: sample.duration || 0,
445
+ size: sample.size || 0,
446
+ flags: sample.flags,
447
+ compositionTimeOffset: sample.compositionTimeOffset || 0,
448
+ dts: sample.dts || 0,
449
+ pts: sample.pts || 0,
450
+ byteOffset: sample.byteOffset || 0,
451
+ };
452
+ }
453
+
454
+ function normalizeSamples(samples, baseDts) {
455
+ return samples.map((sample) => {
456
+ const next = cloneSample(sample);
457
+ next.dts -= baseDts;
458
+ next.pts -= baseDts;
459
+ return next;
460
+ });
461
+ }
462
+
463
+ function clipVideoSamples(samples, startTick, endTick) {
464
+ if (!samples.length) {
465
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
466
+ }
467
+
468
+ let requestedStartIndex = samples.length;
469
+ for (let i = 0; i < samples.length; i++) {
470
+ const sampleEnd = (samples[i].pts || 0) + (samples[i].duration || 0);
471
+ if (sampleEnd > startTick) {
472
+ requestedStartIndex = i;
473
+ break;
474
+ }
475
+ }
476
+ if (requestedStartIndex >= samples.length) {
477
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
478
+ }
479
+
480
+ let decodeStartIndex = requestedStartIndex;
481
+ for (let i = requestedStartIndex; i >= 0; i--) {
482
+ if (isSyncSample(samples[i])) {
483
+ decodeStartIndex = i;
484
+ break;
485
+ }
486
+ }
487
+
488
+ let endIndex = samples.length;
489
+ if (Number.isFinite(endTick)) {
490
+ for (let i = decodeStartIndex; i < samples.length; i++) {
491
+ if ((samples[i].pts || 0) >= endTick) {
492
+ endIndex = i;
493
+ break;
494
+ }
495
+ }
496
+ }
497
+ if (endIndex <= decodeStartIndex) {
498
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
499
+ }
500
+
501
+ const selected = samples.slice(decodeStartIndex, endIndex);
502
+ const decodeStartDts = selected[0].dts || 0;
503
+ const mediaTime = Math.max(0, startTick - decodeStartDts);
504
+ const normalized = normalizeSamples(selected, decodeStartDts);
505
+ const decodeDuration = sumSampleDurations(normalized);
506
+ const maxPlayable = Math.max(0, decodeDuration - mediaTime);
507
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : maxPlayable;
508
+ const playbackDuration = Math.min(requested, maxPlayable);
509
+
510
+ return {
511
+ samples: normalized,
512
+ mediaTime,
513
+ playbackDuration,
514
+ };
515
+ }
516
+
517
+ function clipNonVideoSamples(samples, startTick, endTick) {
518
+ if (!samples.length) {
519
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
520
+ }
521
+
522
+ let startIndex = 0;
523
+ while (startIndex < samples.length && (samples[startIndex].pts || 0) < startTick) {
524
+ startIndex++;
525
+ }
526
+ if (startIndex >= samples.length) {
527
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
528
+ }
529
+
530
+ let endIndex = samples.length;
531
+ if (Number.isFinite(endTick)) {
532
+ for (let i = startIndex; i < samples.length; i++) {
533
+ if ((samples[i].pts || 0) >= endTick) {
534
+ endIndex = i;
535
+ break;
536
+ }
537
+ }
538
+ }
539
+ if (endIndex <= startIndex) {
540
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
541
+ }
542
+
543
+ const selected = samples.slice(startIndex, endIndex);
544
+ const decodeStartDts = selected[0].dts || 0;
545
+ const normalized = normalizeSamples(selected, decodeStartDts);
546
+ const decodeDuration = sumSampleDurations(normalized);
547
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : decodeDuration;
548
+ const playbackDuration = Math.min(requested, decodeDuration);
549
+
550
+ return {
551
+ samples: normalized,
552
+ mediaTime: 0,
553
+ playbackDuration,
554
+ };
555
+ }
556
+
557
+ function applyClipToTracks(tracks, options = {}) {
558
+ const hasStart = Number.isFinite(options.startTime);
559
+ const hasEnd = Number.isFinite(options.endTime);
560
+ if (!hasStart && !hasEnd) {
561
+ for (const [, track] of tracks) {
562
+ if (!track.samples.length) continue;
563
+ const baseDts = track.samples[0].dts || 0;
564
+ track.samples = normalizeSamples(track.samples, baseDts);
565
+ track.mediaTime = 0;
566
+ track.playbackDuration = sumSampleDurations(track.samples);
567
+ }
568
+ return tracks;
569
+ }
570
+
571
+ const startSec = hasStart ? Math.max(0, options.startTime) : 0;
572
+ const endSec = hasEnd ? Math.max(startSec, options.endTime) : Infinity;
573
+
574
+ let videoTrackId = null;
575
+ for (const [trackId, track] of tracks) {
576
+ if (track.handlerType === 'vide' && track.samples.length > 0) {
577
+ videoTrackId = trackId;
578
+ break;
579
+ }
580
+ }
581
+
582
+ // First pass: clip video to determine preroll duration
583
+ const clipped = new Map();
584
+ let videoPrerollSec = 0;
585
+
586
+ if (videoTrackId !== null) {
587
+ const vTrack = tracks.get(videoTrackId);
588
+ if (vTrack && vTrack.samples.length) {
589
+ const startTick = Math.round(startSec * vTrack.timescale);
590
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * vTrack.timescale) : Infinity;
591
+ const clip = clipVideoSamples(vTrack.samples, startTick, endTick);
592
+
593
+ if (clip.samples.length) {
594
+ videoPrerollSec = clip.mediaTime / vTrack.timescale;
595
+ clipped.set(videoTrackId, {
596
+ ...vTrack,
597
+ samples: clip.samples,
598
+ mediaTime: clip.mediaTime,
599
+ playbackDuration: clip.playbackDuration,
600
+ chunkOffsets: [],
601
+ });
602
+ }
603
+ }
604
+ }
605
+
606
+ // Second pass: clip non-video tracks, including audio from the video's
607
+ // decode start (keyframe) so A/V stays in sync without edit lists
608
+ for (const [trackId, track] of tracks) {
609
+ if (!track.samples.length || trackId === videoTrackId) continue;
610
+
611
+ const adjustedStartSec = Math.max(0, startSec - videoPrerollSec);
612
+ const startTick = Math.round(adjustedStartSec * track.timescale);
613
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * track.timescale) : Infinity;
614
+ const clip = clipNonVideoSamples(track.samples, startTick, endTick);
615
+
616
+ if (!clip.samples.length) continue;
617
+
618
+ // Audio preroll matches video preroll so both tracks share the same timeline
619
+ const audioPreroll = Math.round(videoPrerollSec * track.timescale);
620
+ const totalDur = sumSampleDurations(clip.samples);
621
+ const playbackDuration = Math.max(0, totalDur - audioPreroll);
622
+
623
+ clipped.set(trackId, {
624
+ ...track,
625
+ samples: clip.samples,
626
+ mediaTime: audioPreroll,
627
+ playbackDuration,
628
+ chunkOffsets: [],
629
+ });
630
+ }
631
+
632
+ return clipped;
633
+ }
634
+
635
+ function collectTrackSamples(boxes, trackMetadata, trexDefaults) {
636
+ const tracks = new Map();
637
+ const mdatChunks = [];
638
+ let combinedMdatOffset = 0;
639
+
640
+ for (let i = 0; i < boxes.length; i++) {
641
+ const box = boxes[i];
642
+ if (box.type === 'moof') {
643
+ const moofChildren = parseChildBoxes(box);
644
+ const moofStart = box.offset;
645
+
646
+ let nextMdatOffset = -1;
647
+ for (let j = i + 1; j < boxes.length; j++) {
648
+ if (boxes[j].type === 'mdat') {
649
+ nextMdatOffset = boxes[j].offset;
650
+ break;
651
+ }
652
+ if (boxes[j].type === 'moof') break;
653
+ }
654
+ if (nextMdatOffset < 0) continue;
655
+
656
+ const mdatContentStartAbs = nextMdatOffset + 8;
657
+
658
+ for (const child of moofChildren) {
659
+ if (child.type !== 'traf') continue;
660
+
661
+ const trafChildren = parseChildBoxes(child);
662
+ const tfhdBox = findBox(trafChildren, 'tfhd');
663
+ if (!tfhdBox) continue;
664
+
665
+ const tfhdView = new DataView(tfhdBox.data.buffer, tfhdBox.data.byteOffset, tfhdBox.data.byteLength);
666
+ const trackId = tfhdView.getUint32(12);
667
+ const tfhd = parseTfhd(tfhdBox.data, trexDefaults.get(trackId) || {});
668
+ const tfdtBox = findBox(trafChildren, 'tfdt');
669
+ let decodeTime = tfdtBox ? parseTfdt(tfdtBox.data) : 0;
670
+ let runDataCursorAbs = null;
671
+
672
+ if (!tracks.has(trackId)) {
673
+ const meta = trackMetadata.get(trackId) || {};
674
+ tracks.set(trackId, {
675
+ trackId,
676
+ timescale: meta.timescale || 90000,
677
+ handlerType: meta.handlerType || 'unknown',
678
+ samples: [],
679
+ chunkOffsets: [],
680
+ mediaTime: 0,
681
+ playbackDuration: 0,
682
+ });
683
+ }
684
+ const track = tracks.get(trackId);
685
+
686
+ for (const trafChild of trafChildren) {
687
+ if (trafChild.type !== 'trun') continue;
688
+ const { samples, dataOffset, flags } = parseTrun(trafChild.data, tfhd);
689
+ const runSize = samples.reduce((sum, sample) => sum + (sample.size || 0), 0);
690
+
691
+ let dataStartAbs;
692
+ if (flags & 0x1) {
693
+ const baseAbs = (tfhd.flags & 0x1) ? tfhd.baseDataOffset : moofStart;
694
+ dataStartAbs = baseAbs + dataOffset;
695
+ } else if (runDataCursorAbs !== null) {
696
+ dataStartAbs = runDataCursorAbs;
697
+ } else {
698
+ dataStartAbs = mdatContentStartAbs;
699
+ }
700
+
701
+ let sampleByteOffset = combinedMdatOffset + Math.max(0, dataStartAbs - mdatContentStartAbs);
702
+ for (const sample of samples) {
703
+ const dts = decodeTime;
704
+ const pts = dts + (sample.compositionTimeOffset || 0);
705
+ track.samples.push({
706
+ ...sample,
707
+ dts,
708
+ pts,
709
+ byteOffset: sampleByteOffset,
710
+ });
711
+ decodeTime += sample.duration || 0;
712
+ sampleByteOffset += sample.size || 0;
713
+ }
714
+
715
+ runDataCursorAbs = dataStartAbs + runSize;
716
+ }
717
+ }
718
+ } else if (box.type === 'mdat') {
719
+ const data = box.data.subarray(8);
720
+ mdatChunks.push({ data, offset: combinedMdatOffset });
721
+ combinedMdatOffset += data.byteLength;
722
+ }
723
+ }
724
+
725
+ const combinedMdat = new Uint8Array(combinedMdatOffset);
726
+ for (const chunk of mdatChunks) {
727
+ combinedMdat.set(chunk.data, chunk.offset);
728
+ }
729
+
730
+ return { tracks, combinedMdat };
731
+ }
732
+
733
+ function rebuildMdatContent(tracks, trackOrder, sourceMdat) {
734
+ const orderedTrackIds = trackOrder.filter((trackId) => tracks.has(trackId));
735
+ for (const trackId of tracks.keys()) {
736
+ if (!orderedTrackIds.includes(trackId)) orderedTrackIds.push(trackId);
737
+ }
738
+
739
+ let totalSize = 0;
740
+ for (const trackId of orderedTrackIds) {
741
+ const track = tracks.get(trackId);
742
+ for (const sample of track.samples) totalSize += sample.size || 0;
743
+ }
744
+
745
+ const mdatData = new Uint8Array(totalSize);
746
+ let writeOffset = 0;
747
+
748
+ for (const trackId of orderedTrackIds) {
749
+ const track = tracks.get(trackId);
750
+ if (!track || !track.samples.length) {
751
+ if (track) track.chunkOffsets = [];
752
+ continue;
753
+ }
754
+
755
+ track.chunkOffsets = [{ offset: writeOffset, sampleCount: track.samples.length }];
756
+ for (const sample of track.samples) {
757
+ const start = sample.byteOffset || 0;
758
+ const end = start + (sample.size || 0);
759
+ if (start < 0 || end > sourceMdat.byteLength) {
760
+ throw new Error(`Invalid sample byte range for track ${trackId}: ${start}-${end}`);
761
+ }
762
+ mdatData.set(sourceMdat.subarray(start, end), writeOffset);
763
+ sample.byteOffset = writeOffset;
764
+ writeOffset += sample.size || 0;
765
+ }
766
+ }
767
+
768
+ return mdatData;
769
+ }
770
+
771
+ function calculateMovieDuration(tracks, movieTimescale) {
772
+ let maxDuration = 0;
773
+ for (const [, track] of tracks) {
774
+ const fallback = Math.max(0, sumSampleDurations(track.samples) - (track.mediaTime || 0));
775
+ const playbackDuration = track.playbackDuration > 0 ? track.playbackDuration : fallback;
776
+ track.playbackDuration = playbackDuration;
777
+ track.movieDuration = toMovieTimescale(playbackDuration, track.timescale, movieTimescale);
778
+ maxDuration = Math.max(maxDuration, track.movieDuration);
779
+ }
780
+ return maxDuration;
781
+ }
341
782
 
342
783
  function rebuildMvhd(mvhdBox, duration) {
343
784
  const data = new Uint8Array(mvhdBox.data);
344
785
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
345
786
  const version = data[8];
346
787
  const durationOffset = version === 0 ? 24 : 32;
347
- if (version === 0) view.setUint32(durationOffset, duration);
348
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, duration); }
788
+ if (version === 0) {
789
+ view.setUint32(durationOffset, duration);
790
+ } else {
791
+ view.setUint32(durationOffset, 0);
792
+ view.setUint32(durationOffset + 4, duration);
793
+ }
349
794
  return data;
350
795
  }
351
796
 
352
- function rebuildTkhd(tkhdBox, trackInfo, maxDuration) {
797
+ function rebuildTkhd(tkhdBox, trackInfo, maxMovieDuration) {
353
798
  const data = new Uint8Array(tkhdBox.data);
354
799
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
355
800
  const version = data[8];
356
- let trackDuration = maxDuration;
357
- if (trackInfo) { trackDuration = 0; for (const s of trackInfo.samples) trackDuration += s.duration || 0; }
358
- if (version === 0) view.setUint32(28, trackDuration);
359
- else { view.setUint32(36, 0); view.setUint32(40, trackDuration); }
801
+ const duration = trackInfo?.movieDuration ?? maxMovieDuration;
802
+ if (version === 0) view.setUint32(28, duration);
803
+ else {
804
+ view.setUint32(36, 0);
805
+ view.setUint32(40, duration);
806
+ }
360
807
  return data;
361
808
  }
362
809
 
363
- function rebuildMdhd(mdhdBox, trackInfo, maxDuration) {
810
+ function rebuildMdhd(mdhdBox, trackInfo) {
364
811
  const data = new Uint8Array(mdhdBox.data);
365
812
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
366
813
  const version = data[8];
367
- let trackDuration = 0;
368
- if (trackInfo) for (const s of trackInfo.samples) trackDuration += s.duration || 0;
814
+ const duration = sumSampleDurations(trackInfo?.samples || []);
369
815
  const durationOffset = version === 0 ? 24 : 32;
370
- if (version === 0) view.setUint32(durationOffset, trackDuration);
371
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, trackDuration); }
816
+ if (version === 0) {
817
+ view.setUint32(durationOffset, duration);
818
+ } else {
819
+ view.setUint32(durationOffset, 0);
820
+ view.setUint32(durationOffset + 4, duration);
821
+ }
372
822
  return data;
373
823
  }
374
824
 
375
825
  function rebuildStbl(stblBox, trackInfo) {
376
826
  const stblChildren = parseChildBoxes(stblBox);
377
827
  const newParts = [];
378
- for (const child of stblChildren) if (child.type === 'stsd') { newParts.push(child.data); break; }
828
+ for (const child of stblChildren) {
829
+ if (child.type === 'stsd') {
830
+ newParts.push(child.data);
831
+ break;
832
+ }
833
+ }
834
+
379
835
  const samples = trackInfo?.samples || [];
380
836
  const chunkOffsets = trackInfo?.chunkOffsets || [];
381
837
 
382
838
  // stts
383
839
  const sttsEntries = [];
384
- let curDur = null, count = 0;
385
- for (const s of samples) {
386
- const d = s.duration || 0;
387
- if (d === curDur) count++;
388
- else { if (curDur !== null) sttsEntries.push({ count, duration: curDur }); curDur = d; count = 1; }
840
+ let currentDuration = null;
841
+ let currentCount = 0;
842
+ for (const sample of samples) {
843
+ const duration = sample.duration || 0;
844
+ if (duration === currentDuration) currentCount++;
845
+ else {
846
+ if (currentDuration !== null) {
847
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
848
+ }
849
+ currentDuration = duration;
850
+ currentCount = 1;
851
+ }
852
+ }
853
+ if (currentDuration !== null) {
854
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
389
855
  }
390
- if (curDur !== null) sttsEntries.push({ count, duration: curDur });
391
856
  const sttsData = new Uint8Array(8 + sttsEntries.length * 8);
392
857
  const sttsView = new DataView(sttsData.buffer);
393
858
  sttsView.setUint32(4, sttsEntries.length);
394
- let off = 8;
395
- for (const e of sttsEntries) { sttsView.setUint32(off, e.count); sttsView.setUint32(off + 4, e.duration); off += 8; }
859
+ let offset = 8;
860
+ for (const entry of sttsEntries) {
861
+ sttsView.setUint32(offset, entry.count);
862
+ sttsView.setUint32(offset + 4, entry.duration);
863
+ offset += 8;
864
+ }
396
865
  newParts.push(createBox('stts', sttsData));
397
866
 
398
867
  // stsc
399
868
  const stscEntries = [];
400
869
  if (chunkOffsets.length > 0) {
401
- let currentSampleCount = chunkOffsets[0].sampleCount, firstChunk = 1;
870
+ let currentSampleCount = chunkOffsets[0].sampleCount;
871
+ let firstChunk = 1;
402
872
  for (let i = 1; i <= chunkOffsets.length; i++) {
403
873
  const sampleCount = i < chunkOffsets.length ? chunkOffsets[i].sampleCount : -1;
404
874
  if (sampleCount !== currentSampleCount) {
405
- stscEntries.push({ firstChunk, samplesPerChunk: currentSampleCount, sampleDescriptionIndex: 1 });
406
- firstChunk = i + 1; currentSampleCount = sampleCount;
875
+ stscEntries.push({
876
+ firstChunk,
877
+ samplesPerChunk: currentSampleCount,
878
+ sampleDescriptionIndex: 1,
879
+ });
880
+ firstChunk = i + 1;
881
+ currentSampleCount = sampleCount;
407
882
  }
408
883
  }
409
- } else stscEntries.push({ firstChunk: 1, samplesPerChunk: samples.length, sampleDescriptionIndex: 1 });
884
+ }
410
885
  const stscData = new Uint8Array(8 + stscEntries.length * 12);
411
886
  const stscView = new DataView(stscData.buffer);
412
887
  stscView.setUint32(4, stscEntries.length);
413
- off = 8;
414
- for (const e of stscEntries) { stscView.setUint32(off, e.firstChunk); stscView.setUint32(off + 4, e.samplesPerChunk); stscView.setUint32(off + 8, e.sampleDescriptionIndex); off += 12; }
888
+ offset = 8;
889
+ for (const entry of stscEntries) {
890
+ stscView.setUint32(offset, entry.firstChunk);
891
+ stscView.setUint32(offset + 4, entry.samplesPerChunk);
892
+ stscView.setUint32(offset + 8, entry.sampleDescriptionIndex);
893
+ offset += 12;
894
+ }
415
895
  newParts.push(createBox('stsc', stscData));
416
896
 
417
897
  // stsz
418
898
  const stszData = new Uint8Array(12 + samples.length * 4);
419
899
  const stszView = new DataView(stszData.buffer);
420
900
  stszView.setUint32(8, samples.length);
421
- off = 12;
422
- for (const s of samples) { stszView.setUint32(off, s.size || 0); off += 4; }
901
+ offset = 12;
902
+ for (const sample of samples) {
903
+ stszView.setUint32(offset, sample.size || 0);
904
+ offset += 4;
905
+ }
423
906
  newParts.push(createBox('stsz', stszData));
424
907
 
425
908
  // stco
426
- const numChunks = chunkOffsets.length || 1;
427
- const stcoData = new Uint8Array(8 + numChunks * 4);
909
+ const stcoData = new Uint8Array(8 + chunkOffsets.length * 4);
428
910
  const stcoView = new DataView(stcoData.buffer);
429
- stcoView.setUint32(4, numChunks);
430
- for (let i = 0; i < numChunks; i++) stcoView.setUint32(8 + i * 4, chunkOffsets[i]?.offset || 0);
911
+ stcoView.setUint32(4, chunkOffsets.length);
912
+ for (let i = 0; i < chunkOffsets.length; i++) {
913
+ stcoView.setUint32(8 + i * 4, chunkOffsets[i].offset || 0);
914
+ }
431
915
  newParts.push(createBox('stco', stcoData));
432
916
 
433
917
  // ctts
434
- const hasCtts = samples.some(s => s.compositionTimeOffset);
918
+ const hasCtts = samples.some((sample) => sample.compositionTimeOffset);
435
919
  if (hasCtts) {
436
920
  const cttsEntries = [];
437
- let curOff = null; count = 0;
438
- for (const s of samples) {
439
- const o = s.compositionTimeOffset || 0;
440
- if (o === curOff) count++;
441
- else { if (curOff !== null) cttsEntries.push({ count, offset: curOff }); curOff = o; count = 1; }
921
+ let currentOffset = null;
922
+ currentCount = 0;
923
+ for (const sample of samples) {
924
+ const compositionOffset = sample.compositionTimeOffset || 0;
925
+ if (compositionOffset === currentOffset) currentCount++;
926
+ else {
927
+ if (currentOffset !== null) {
928
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
929
+ }
930
+ currentOffset = compositionOffset;
931
+ currentCount = 1;
932
+ }
933
+ }
934
+ if (currentOffset !== null) {
935
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
442
936
  }
443
- if (curOff !== null) cttsEntries.push({ count, offset: curOff });
444
937
  const cttsData = new Uint8Array(8 + cttsEntries.length * 8);
445
938
  const cttsView = new DataView(cttsData.buffer);
446
939
  cttsView.setUint32(4, cttsEntries.length);
447
- off = 8;
448
- for (const e of cttsEntries) { cttsView.setUint32(off, e.count); cttsView.setInt32(off + 4, e.offset); off += 8; }
940
+ offset = 8;
941
+ for (const entry of cttsEntries) {
942
+ cttsView.setUint32(offset, entry.count);
943
+ cttsView.setInt32(offset + 4, entry.offset);
944
+ offset += 8;
945
+ }
449
946
  newParts.push(createBox('ctts', cttsData));
450
947
  }
451
948
 
452
- // stss
949
+ // stss (video sync samples)
453
950
  const syncSamples = [];
454
951
  for (let i = 0; i < samples.length; i++) {
455
- const flags = samples[i].flags;
456
- if (flags !== undefined) { if (!((flags >> 16) & 0x1)) syncSamples.push(i + 1); }
952
+ if (isSyncSample(samples[i])) syncSamples.push(i + 1);
457
953
  }
458
954
  if (syncSamples.length > 0 && syncSamples.length < samples.length) {
459
955
  const stssData = new Uint8Array(8 + syncSamples.length * 4);
460
956
  const stssView = new DataView(stssData.buffer);
461
957
  stssView.setUint32(4, syncSamples.length);
462
- off = 8;
463
- for (const n of syncSamples) { stssView.setUint32(off, n); off += 4; }
958
+ offset = 8;
959
+ for (const sampleNumber of syncSamples) {
960
+ stssView.setUint32(offset, sampleNumber);
961
+ offset += 4;
962
+ }
464
963
  newParts.push(createBox('stss', stssData));
465
964
  }
466
965
 
@@ -477,169 +976,151 @@
477
976
  return createBox('minf', ...newParts);
478
977
  }
479
978
 
480
- function rebuildMdia(mdiaBox, trackInfo, maxDuration) {
979
+ function rebuildMdia(mdiaBox, trackInfo) {
481
980
  const mdiaChildren = parseChildBoxes(mdiaBox);
482
981
  const newParts = [];
483
982
  for (const child of mdiaChildren) {
484
983
  if (child.type === 'minf') newParts.push(rebuildMinf(child, trackInfo));
485
- else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo, maxDuration));
984
+ else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo));
486
985
  else newParts.push(child.data);
487
986
  }
488
987
  return createBox('mdia', ...newParts);
489
988
  }
490
989
 
491
- function rebuildTrak(trakBox, trackIdMap, maxDuration) {
990
+ function rebuildTrak(trakBox, trackInfoMap, maxMovieDuration) {
492
991
  const trakChildren = parseChildBoxes(trakBox);
493
- let trackId = 1;
992
+ let trackId = null;
494
993
  for (const child of trakChildren) {
495
- if (child.type === 'tkhd') {
496
- const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
497
- trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
498
- }
994
+ if (child.type !== 'tkhd') continue;
995
+ const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
996
+ trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
499
997
  }
500
- const trackInfo = trackIdMap.get(trackId);
998
+ if (trackId === null) return null;
999
+
1000
+ const trackInfo = trackInfoMap.get(trackId);
1001
+ if (!trackInfo || !trackInfo.samples.length) return null;
1002
+
501
1003
  const newParts = [];
502
- let hasEdts = false;
503
1004
  for (const child of trakChildren) {
504
- if (child.type === 'edts') { hasEdts = true; newParts.push(child.data); }
505
- else if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo, maxDuration));
506
- else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxDuration));
1005
+ if (child.type === 'edts') continue;
1006
+ if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo));
1007
+ else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxMovieDuration));
507
1008
  else newParts.push(child.data);
508
1009
  }
509
- if (!hasEdts && trackInfo) {
510
- let trackDuration = 0;
511
- for (const s of trackInfo.samples) trackDuration += s.duration || 0;
512
- const elstData = new Uint8Array(20);
513
- const elstView = new DataView(elstData.buffer);
514
- elstView.setUint32(4, 1); elstView.setUint32(8, maxDuration); elstView.setInt32(12, 0); elstView.setInt16(16, 1);
515
- const elst = createBox('elst', elstData);
516
- const edts = createBox('edts', elst);
517
- const tkhdIndex = newParts.findIndex(p => p.length >= 8 && String.fromCharCode(p[4], p[5], p[6], p[7]) === 'tkhd');
518
- if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
519
- }
1010
+
1011
+ const elstPayload = new Uint8Array(16);
1012
+ const elstView = new DataView(elstPayload.buffer);
1013
+ elstView.setUint32(0, 1);
1014
+ elstView.setUint32(4, trackInfo.movieDuration ?? maxMovieDuration);
1015
+ elstView.setInt32(8, Math.max(0, Math.round(trackInfo.mediaTime || 0)));
1016
+ elstView.setUint16(12, 1);
1017
+ elstView.setUint16(14, 0);
1018
+ const elst = createFullBox('elst', 0, 0, elstPayload);
1019
+ const edts = createBox('edts', elst);
1020
+
1021
+ const tkhdIndex = newParts.findIndex((part) =>
1022
+ part.length >= 8 && String.fromCharCode(part[4], part[5], part[6], part[7]) === 'tkhd',
1023
+ );
1024
+ if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
1025
+ else newParts.unshift(edts);
1026
+
520
1027
  return createBox('trak', ...newParts);
521
1028
  }
522
1029
 
523
1030
  function updateStcoOffsets(output, ftypSize, moovSize) {
524
1031
  const mdatContentOffset = ftypSize + moovSize + 8;
525
1032
  const view = new DataView(output.buffer, output.byteOffset, output.byteLength);
1033
+
526
1034
  function scan(start, end) {
527
- let pos = start;
528
- while (pos + 8 <= end) {
529
- const size = view.getUint32(pos);
1035
+ let position = start;
1036
+ while (position + 8 <= end) {
1037
+ const size = view.getUint32(position);
530
1038
  if (size < 8) break;
531
- const type = String.fromCharCode(output[pos + 4], output[pos + 5], output[pos + 6], output[pos + 7]);
1039
+ const type = String.fromCharCode(
1040
+ output[position + 4],
1041
+ output[position + 5],
1042
+ output[position + 6],
1043
+ output[position + 7],
1044
+ );
1045
+
532
1046
  if (type === 'stco') {
533
- const entryCount = view.getUint32(pos + 12);
1047
+ const entryCount = view.getUint32(position + 12);
534
1048
  for (let i = 0; i < entryCount; i++) {
535
- const entryPos = pos + 16 + i * 4;
536
- view.setUint32(entryPos, mdatContentOffset + view.getUint32(entryPos));
1049
+ const entryPos = position + 16 + i * 4;
1050
+ const relativeOffset = view.getUint32(entryPos);
1051
+ view.setUint32(entryPos, mdatContentOffset + relativeOffset);
537
1052
  }
538
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) scan(pos + 8, pos + size);
539
- pos += size;
1053
+ } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
1054
+ scan(position + 8, position + size);
1055
+ }
1056
+
1057
+ position += size;
540
1058
  }
541
1059
  }
1060
+
542
1061
  scan(0, output.byteLength);
543
1062
  }
544
1063
 
545
- // ============================================
546
- // Main Converter Function
547
- // ============================================
548
-
549
1064
  /**
550
1065
  * Convert fragmented MP4 to standard MP4
551
1066
  * @param {Uint8Array} fmp4Data - fMP4 data
1067
+ * @param {object} [options] - Optional clip settings
1068
+ * @param {number} [options.startTime] - Clip start time (seconds)
1069
+ * @param {number} [options.endTime] - Clip end time (seconds)
552
1070
  * @returns {Uint8Array} Standard MP4 data
553
1071
  */
554
- function convertFmp4ToMp4(fmp4Data) {
1072
+ // Shared rebuild functions — also used by mp4-clip.js for standard MP4 clipping
1073
+ {
1074
+ applyClipToTracks,
1075
+ rebuildMdatContent,
1076
+ calculateMovieDuration,
1077
+ rebuildTrak,
1078
+ rebuildMvhd,
1079
+ updateStcoOffsets,
1080
+ };
1081
+
1082
+ function convertFmp4ToMp4(fmp4Data, options = {}) {
555
1083
  const boxes = parseBoxes(fmp4Data);
556
1084
  const ftyp = findBox(boxes, 'ftyp');
557
1085
  const moov = findBox(boxes, 'moov');
558
1086
  if (!ftyp || !moov) throw new Error('Invalid fMP4: missing ftyp or moov');
559
1087
 
560
- const moovChildren = parseChildBoxes(moov);
561
- const originalTrackIds = [];
562
- for (const child of moovChildren) {
563
- if (child.type === 'trak') {
564
- const trakChildren = parseChildBoxes(child);
565
- for (const tc of trakChildren) {
566
- if (tc.type === 'tkhd') {
567
- const view = new DataView(tc.data.buffer, tc.data.byteOffset, tc.data.byteLength);
568
- originalTrackIds.push(tc.data[8] === 0 ? view.getUint32(20) : view.getUint32(28));
569
- }
570
- }
571
- }
572
- }
1088
+ const movieTimescale = getMovieTimescale(moov);
1089
+ const { trackMetadata, trackOrder } = extractTrackMetadata(moov);
1090
+ const trexDefaults = extractTrexDefaults(moov);
1091
+ const { tracks, combinedMdat } = collectTrackSamples(boxes, trackMetadata, trexDefaults);
573
1092
 
574
- const tracks = new Map();
575
- const mdatChunks = [];
576
- let combinedMdatOffset = 0;
1093
+ if (tracks.size === 0) throw new Error('Invalid fMP4: no track fragments found');
577
1094
 
578
- for (let i = 0; i < boxes.length; i++) {
579
- const box = boxes[i];
580
- if (box.type === 'moof') {
581
- const moofChildren = parseChildBoxes(box);
582
- const moofStart = box.offset;
583
- let nextMdatOffset = 0;
584
- for (let j = i + 1; j < boxes.length; j++) {
585
- if (boxes[j].type === 'mdat') { nextMdatOffset = boxes[j].offset; break; }
586
- if (boxes[j].type === 'moof') break;
587
- }
588
- for (const child of moofChildren) {
589
- if (child.type === 'traf') {
590
- const trafChildren = parseChildBoxes(child);
591
- const tfhd = findBox(trafChildren, 'tfhd');
592
- const trun = findBox(trafChildren, 'trun');
593
- if (tfhd && trun) {
594
- const tfhdInfo = parseTfhd(tfhd.data);
595
- const { samples, dataOffset } = parseTrun(trun.data, tfhdInfo);
596
- if (!tracks.has(tfhdInfo.trackId)) tracks.set(tfhdInfo.trackId, { samples: [], chunkOffsets: [] });
597
- const track = tracks.get(tfhdInfo.trackId);
598
- const chunkOffset = combinedMdatOffset + (moofStart + dataOffset) - (nextMdatOffset + 8);
599
- track.chunkOffsets.push({ offset: chunkOffset, sampleCount: samples.length });
600
- track.samples.push(...samples);
601
- }
602
- }
603
- }
604
- } else if (box.type === 'mdat') {
605
- mdatChunks.push({ data: box.data.subarray(8), offset: combinedMdatOffset });
606
- combinedMdatOffset += box.data.subarray(8).byteLength;
607
- }
608
- }
609
-
610
- const totalMdatSize = mdatChunks.reduce((sum, c) => sum + c.data.byteLength, 0);
611
- const combinedMdat = new Uint8Array(totalMdatSize);
612
- for (const chunk of mdatChunks) combinedMdat.set(chunk.data, chunk.offset);
613
-
614
- const trackIdMap = new Map();
615
- const fmp4TrackIds = Array.from(tracks.keys()).sort((a, b) => a - b);
616
- for (let i = 0; i < fmp4TrackIds.length && i < originalTrackIds.length; i++) {
617
- trackIdMap.set(originalTrackIds[i], tracks.get(fmp4TrackIds[i]));
1095
+ const clippedTracks = applyClipToTracks(tracks, options);
1096
+ if (clippedTracks.size === 0) {
1097
+ throw new Error('Clip range produced no samples');
618
1098
  }
619
1099
 
620
- let maxDuration = 0;
621
- for (const [, track] of tracks) {
622
- let dur = 0;
623
- for (const s of track.samples) dur += s.duration || 0;
624
- maxDuration = Math.max(maxDuration, dur);
625
- }
1100
+ const rebuiltMdat = rebuildMdatContent(clippedTracks, trackOrder, combinedMdat);
1101
+ const maxMovieDuration = calculateMovieDuration(clippedTracks, movieTimescale);
626
1102
 
1103
+ const moovChildren = parseChildBoxes(moov);
627
1104
  const newMoovParts = [];
628
1105
  for (const child of moovChildren) {
629
1106
  if (child.type === 'mvex') continue;
630
- if (child.type === 'trak') newMoovParts.push(rebuildTrak(child, trackIdMap, maxDuration));
631
- else if (child.type === 'mvhd') newMoovParts.push(rebuildMvhd(child, maxDuration));
632
- else newMoovParts.push(child.data);
1107
+ if (child.type === 'trak') {
1108
+ const trak = rebuildTrak(child, clippedTracks, maxMovieDuration);
1109
+ if (trak) newMoovParts.push(trak);
1110
+ } else if (child.type === 'mvhd') {
1111
+ newMoovParts.push(rebuildMvhd(child, maxMovieDuration));
1112
+ } else {
1113
+ newMoovParts.push(child.data);
1114
+ }
633
1115
  }
634
1116
 
635
1117
  const newMoov = createBox('moov', ...newMoovParts);
636
- const newMdat = createBox('mdat', combinedMdat);
1118
+ const newMdat = createBox('mdat', rebuiltMdat);
637
1119
  const output = new Uint8Array(ftyp.size + newMoov.byteLength + newMdat.byteLength);
638
1120
  output.set(ftyp.data, 0);
639
1121
  output.set(newMoov, ftyp.size);
640
1122
  output.set(newMdat, ftyp.size + newMoov.byteLength);
641
1123
  updateStcoOffsets(output, ftyp.size, newMoov.byteLength);
642
-
643
1124
  return output;
644
1125
  }
645
1126
 
@@ -705,7 +1186,7 @@
705
1186
  toMp4.isMpegTs = isMpegTs;
706
1187
  toMp4.isFmp4 = isFmp4;
707
1188
  toMp4.isStandardMp4 = isStandardMp4;
708
- toMp4.version = '1.2.0';
1189
+ toMp4.version = '1.3.0';
709
1190
 
710
1191
  return toMp4;
711
1192
  });