@invintusmedia/tomp4 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tomp4.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * toMp4.js v1.1.1
2
+ * toMp4.js v1.2.1
3
3
  * Convert MPEG-TS and fMP4 to standard MP4
4
4
  * https://github.com/TVWIT/toMp4.js
5
5
  * MIT License
@@ -323,144 +323,615 @@
323
323
  // ============================================
324
324
  /**
325
325
  * fMP4 to Standard MP4 Converter
326
- *
327
- * Converts a fragmented MP4 file to a standard MP4 container
328
- * by extracting samples from fragments and rebuilding the moov box.
329
- *
326
+ *
327
+ * Converts fragmented MP4 data to standard MP4 and supports
328
+ * sample-level clipping for fMP4 inputs.
329
+ *
330
330
  * @module fmp4/converter
331
331
  */
332
332
 
333
333
  import {
334
- parseBoxes, findBox, parseChildBoxes, createBox,
335
- parseTfhd, parseTrun
334
+ parseBoxes,
335
+ findBox,
336
+ parseChildBoxes,
337
+ createBox,
338
+ parseTfhd,
339
+ parseTfdt,
340
+ parseTrun,
341
+ getMovieTimescale,
336
342
  } from './utils.js';
337
343
 
338
- // ============================================
339
- // Moov Rebuilding Functions
340
- // ============================================
344
+ function createFullBox(type, version, flags, ...payloads) {
345
+ const header = new Uint8Array(4);
346
+ header[0] = version;
347
+ header[1] = (flags >> 16) & 0xff;
348
+ header[2] = (flags >> 8) & 0xff;
349
+ header[3] = flags & 0xff;
350
+ return createBox(type, header, ...payloads);
351
+ }
352
+
353
+ function sumSampleDurations(samples) {
354
+ let total = 0;
355
+ for (const sample of samples) total += sample.duration || 0;
356
+ return total;
357
+ }
358
+
359
+ function toMovieTimescale(value, trackTimescale, movieTimescale) {
360
+ if (!trackTimescale || !movieTimescale) return value;
361
+ return Math.round((value * movieTimescale) / trackTimescale);
362
+ }
363
+
364
+ function isSyncSample(sample) {
365
+ const flags = sample.flags;
366
+ if (flags === undefined || flags === null) return true;
367
+ return ((flags >> 16) & 0x1) === 0;
368
+ }
369
+
370
+ function parseTrex(trexData) {
371
+ const view = new DataView(trexData.buffer, trexData.byteOffset, trexData.byteLength);
372
+ return {
373
+ trackId: view.getUint32(12),
374
+ defaultSampleDuration: view.getUint32(20),
375
+ defaultSampleSize: view.getUint32(24),
376
+ defaultSampleFlags: view.getUint32(28),
377
+ };
378
+ }
379
+
380
+ function extractTrexDefaults(moovBox) {
381
+ const defaults = new Map();
382
+ const moovChildren = parseChildBoxes(moovBox);
383
+ const mvex = findBox(moovChildren, 'mvex');
384
+ if (!mvex) return defaults;
385
+
386
+ const mvexChildren = parseChildBoxes(mvex);
387
+ for (const child of mvexChildren) {
388
+ if (child.type !== 'trex') continue;
389
+ const trex = parseTrex(child.data);
390
+ defaults.set(trex.trackId, trex);
391
+ }
392
+ return defaults;
393
+ }
394
+
395
+ function extractTrackMetadata(moovBox) {
396
+ const trackMetadata = new Map();
397
+ const trackOrder = [];
398
+ const moovChildren = parseChildBoxes(moovBox);
399
+
400
+ for (const child of moovChildren) {
401
+ if (child.type !== 'trak') continue;
402
+ const trakChildren = parseChildBoxes(child);
403
+
404
+ let trackId = null;
405
+ let timescale = 0;
406
+ let handlerType = 'unknown';
407
+
408
+ for (const trakChild of trakChildren) {
409
+ if (trakChild.type === 'tkhd') {
410
+ const view = new DataView(trakChild.data.buffer, trakChild.data.byteOffset, trakChild.data.byteLength);
411
+ trackId = trakChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
412
+ } else if (trakChild.type === 'mdia') {
413
+ const mdiaChildren = parseChildBoxes(trakChild);
414
+ for (const mdiaChild of mdiaChildren) {
415
+ if (mdiaChild.type === 'mdhd') {
416
+ const view = new DataView(mdiaChild.data.buffer, mdiaChild.data.byteOffset, mdiaChild.data.byteLength);
417
+ timescale = mdiaChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
418
+ } else if (mdiaChild.type === 'hdlr' && mdiaChild.data.byteLength >= 20) {
419
+ handlerType = String.fromCharCode(
420
+ mdiaChild.data[16],
421
+ mdiaChild.data[17],
422
+ mdiaChild.data[18],
423
+ mdiaChild.data[19],
424
+ );
425
+ }
426
+ }
427
+ }
428
+ }
429
+
430
+ if (trackId !== null) {
431
+ trackMetadata.set(trackId, {
432
+ trackId,
433
+ timescale: timescale || 90000,
434
+ handlerType,
435
+ });
436
+ trackOrder.push(trackId);
437
+ }
438
+ }
439
+
440
+ return { trackMetadata, trackOrder };
441
+ }
442
+
443
+ function cloneSample(sample) {
444
+ return {
445
+ duration: sample.duration || 0,
446
+ size: sample.size || 0,
447
+ flags: sample.flags,
448
+ compositionTimeOffset: sample.compositionTimeOffset || 0,
449
+ dts: sample.dts || 0,
450
+ pts: sample.pts || 0,
451
+ byteOffset: sample.byteOffset || 0,
452
+ };
453
+ }
454
+
455
+ function normalizeSamples(samples, baseDts) {
456
+ return samples.map((sample) => {
457
+ const next = cloneSample(sample);
458
+ next.dts -= baseDts;
459
+ next.pts -= baseDts;
460
+ return next;
461
+ });
462
+ }
463
+
464
+ function clipVideoSamples(samples, startTick, endTick) {
465
+ if (!samples.length) {
466
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
467
+ }
468
+
469
+ let requestedStartIndex = samples.length;
470
+ for (let i = 0; i < samples.length; i++) {
471
+ const sampleEnd = (samples[i].pts || 0) + (samples[i].duration || 0);
472
+ if (sampleEnd > startTick) {
473
+ requestedStartIndex = i;
474
+ break;
475
+ }
476
+ }
477
+ if (requestedStartIndex >= samples.length) {
478
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
479
+ }
480
+
481
+ let decodeStartIndex = requestedStartIndex;
482
+ for (let i = requestedStartIndex; i >= 0; i--) {
483
+ if (isSyncSample(samples[i])) {
484
+ decodeStartIndex = i;
485
+ break;
486
+ }
487
+ }
488
+
489
+ let endIndex = samples.length;
490
+ if (Number.isFinite(endTick)) {
491
+ for (let i = decodeStartIndex; i < samples.length; i++) {
492
+ if ((samples[i].pts || 0) >= endTick) {
493
+ endIndex = i;
494
+ break;
495
+ }
496
+ }
497
+ }
498
+ if (endIndex <= decodeStartIndex) {
499
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
500
+ }
501
+
502
+ const selected = samples.slice(decodeStartIndex, endIndex);
503
+ const decodeStartDts = selected[0].dts || 0;
504
+ const mediaTime = Math.max(0, startTick - decodeStartDts);
505
+ const normalized = normalizeSamples(selected, decodeStartDts);
506
+ const decodeDuration = sumSampleDurations(normalized);
507
+ const maxPlayable = Math.max(0, decodeDuration - mediaTime);
508
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : maxPlayable;
509
+ const playbackDuration = Math.min(requested, maxPlayable);
510
+
511
+ return {
512
+ samples: normalized,
513
+ mediaTime,
514
+ playbackDuration,
515
+ };
516
+ }
517
+
518
+ function clipNonVideoSamples(samples, startTick, endTick) {
519
+ if (!samples.length) {
520
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
521
+ }
522
+
523
+ let startIndex = 0;
524
+ while (startIndex < samples.length && (samples[startIndex].pts || 0) < startTick) {
525
+ startIndex++;
526
+ }
527
+ if (startIndex >= samples.length) {
528
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
529
+ }
530
+
531
+ let endIndex = samples.length;
532
+ if (Number.isFinite(endTick)) {
533
+ for (let i = startIndex; i < samples.length; i++) {
534
+ if ((samples[i].pts || 0) >= endTick) {
535
+ endIndex = i;
536
+ break;
537
+ }
538
+ }
539
+ }
540
+ if (endIndex <= startIndex) {
541
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
542
+ }
543
+
544
+ const selected = samples.slice(startIndex, endIndex);
545
+ const decodeStartDts = selected[0].dts || 0;
546
+ const normalized = normalizeSamples(selected, decodeStartDts);
547
+ const decodeDuration = sumSampleDurations(normalized);
548
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : decodeDuration;
549
+ const playbackDuration = Math.min(requested, decodeDuration);
550
+
551
+ return {
552
+ samples: normalized,
553
+ mediaTime: 0,
554
+ playbackDuration,
555
+ };
556
+ }
557
+
558
+ function applyClipToTracks(tracks, options = {}) {
559
+ const hasStart = Number.isFinite(options.startTime);
560
+ const hasEnd = Number.isFinite(options.endTime);
561
+ if (!hasStart && !hasEnd) {
562
+ for (const [, track] of tracks) {
563
+ if (!track.samples.length) continue;
564
+ const baseDts = track.samples[0].dts || 0;
565
+ track.samples = normalizeSamples(track.samples, baseDts);
566
+ track.mediaTime = 0;
567
+ track.playbackDuration = sumSampleDurations(track.samples);
568
+ }
569
+ return tracks;
570
+ }
571
+
572
+ const startSec = hasStart ? Math.max(0, options.startTime) : 0;
573
+ const endSec = hasEnd ? Math.max(startSec, options.endTime) : Infinity;
574
+
575
+ let videoTrackId = null;
576
+ for (const [trackId, track] of tracks) {
577
+ if (track.handlerType === 'vide' && track.samples.length > 0) {
578
+ videoTrackId = trackId;
579
+ break;
580
+ }
581
+ }
582
+
583
+ const clipped = new Map();
584
+ for (const [trackId, track] of tracks) {
585
+ if (!track.samples.length) continue;
586
+
587
+ const startTick = Math.round(startSec * track.timescale);
588
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * track.timescale) : Infinity;
589
+ const clip = trackId === videoTrackId
590
+ ? clipVideoSamples(track.samples, startTick, endTick)
591
+ : clipNonVideoSamples(track.samples, startTick, endTick);
592
+
593
+ if (!clip.samples.length) continue;
594
+
595
+ clipped.set(trackId, {
596
+ ...track,
597
+ samples: clip.samples,
598
+ mediaTime: clip.mediaTime,
599
+ playbackDuration: clip.playbackDuration,
600
+ chunkOffsets: [],
601
+ });
602
+ }
603
+
604
+ return clipped;
605
+ }
606
+
607
+ function collectTrackSamples(boxes, trackMetadata, trexDefaults) {
608
+ const tracks = new Map();
609
+ const mdatChunks = [];
610
+ let combinedMdatOffset = 0;
611
+
612
+ for (let i = 0; i < boxes.length; i++) {
613
+ const box = boxes[i];
614
+ if (box.type === 'moof') {
615
+ const moofChildren = parseChildBoxes(box);
616
+ const moofStart = box.offset;
617
+
618
+ let nextMdatOffset = -1;
619
+ for (let j = i + 1; j < boxes.length; j++) {
620
+ if (boxes[j].type === 'mdat') {
621
+ nextMdatOffset = boxes[j].offset;
622
+ break;
623
+ }
624
+ if (boxes[j].type === 'moof') break;
625
+ }
626
+ if (nextMdatOffset < 0) continue;
627
+
628
+ const mdatContentStartAbs = nextMdatOffset + 8;
629
+
630
+ for (const child of moofChildren) {
631
+ if (child.type !== 'traf') continue;
632
+
633
+ const trafChildren = parseChildBoxes(child);
634
+ const tfhdBox = findBox(trafChildren, 'tfhd');
635
+ if (!tfhdBox) continue;
636
+
637
+ const tfhdView = new DataView(tfhdBox.data.buffer, tfhdBox.data.byteOffset, tfhdBox.data.byteLength);
638
+ const trackId = tfhdView.getUint32(12);
639
+ const tfhd = parseTfhd(tfhdBox.data, trexDefaults.get(trackId) || {});
640
+ const tfdtBox = findBox(trafChildren, 'tfdt');
641
+ let decodeTime = tfdtBox ? parseTfdt(tfdtBox.data) : 0;
642
+ let runDataCursorAbs = null;
643
+
644
+ if (!tracks.has(trackId)) {
645
+ const meta = trackMetadata.get(trackId) || {};
646
+ tracks.set(trackId, {
647
+ trackId,
648
+ timescale: meta.timescale || 90000,
649
+ handlerType: meta.handlerType || 'unknown',
650
+ samples: [],
651
+ chunkOffsets: [],
652
+ mediaTime: 0,
653
+ playbackDuration: 0,
654
+ });
655
+ }
656
+ const track = tracks.get(trackId);
657
+
658
+ for (const trafChild of trafChildren) {
659
+ if (trafChild.type !== 'trun') continue;
660
+ const { samples, dataOffset, flags } = parseTrun(trafChild.data, tfhd);
661
+ const runSize = samples.reduce((sum, sample) => sum + (sample.size || 0), 0);
662
+
663
+ let dataStartAbs;
664
+ if (flags & 0x1) {
665
+ const baseAbs = (tfhd.flags & 0x1) ? tfhd.baseDataOffset : moofStart;
666
+ dataStartAbs = baseAbs + dataOffset;
667
+ } else if (runDataCursorAbs !== null) {
668
+ dataStartAbs = runDataCursorAbs;
669
+ } else {
670
+ dataStartAbs = mdatContentStartAbs;
671
+ }
672
+
673
+ let sampleByteOffset = combinedMdatOffset + Math.max(0, dataStartAbs - mdatContentStartAbs);
674
+ for (const sample of samples) {
675
+ const dts = decodeTime;
676
+ const pts = dts + (sample.compositionTimeOffset || 0);
677
+ track.samples.push({
678
+ ...sample,
679
+ dts,
680
+ pts,
681
+ byteOffset: sampleByteOffset,
682
+ });
683
+ decodeTime += sample.duration || 0;
684
+ sampleByteOffset += sample.size || 0;
685
+ }
686
+
687
+ runDataCursorAbs = dataStartAbs + runSize;
688
+ }
689
+ }
690
+ } else if (box.type === 'mdat') {
691
+ const data = box.data.subarray(8);
692
+ mdatChunks.push({ data, offset: combinedMdatOffset });
693
+ combinedMdatOffset += data.byteLength;
694
+ }
695
+ }
696
+
697
+ const combinedMdat = new Uint8Array(combinedMdatOffset);
698
+ for (const chunk of mdatChunks) {
699
+ combinedMdat.set(chunk.data, chunk.offset);
700
+ }
701
+
702
+ return { tracks, combinedMdat };
703
+ }
704
+
705
+ function rebuildMdatContent(tracks, trackOrder, sourceMdat) {
706
+ const orderedTrackIds = trackOrder.filter((trackId) => tracks.has(trackId));
707
+ for (const trackId of tracks.keys()) {
708
+ if (!orderedTrackIds.includes(trackId)) orderedTrackIds.push(trackId);
709
+ }
710
+
711
+ let totalSize = 0;
712
+ for (const trackId of orderedTrackIds) {
713
+ const track = tracks.get(trackId);
714
+ for (const sample of track.samples) totalSize += sample.size || 0;
715
+ }
716
+
717
+ const mdatData = new Uint8Array(totalSize);
718
+ let writeOffset = 0;
719
+
720
+ for (const trackId of orderedTrackIds) {
721
+ const track = tracks.get(trackId);
722
+ if (!track || !track.samples.length) {
723
+ if (track) track.chunkOffsets = [];
724
+ continue;
725
+ }
726
+
727
+ track.chunkOffsets = [{ offset: writeOffset, sampleCount: track.samples.length }];
728
+ for (const sample of track.samples) {
729
+ const start = sample.byteOffset || 0;
730
+ const end = start + (sample.size || 0);
731
+ if (start < 0 || end > sourceMdat.byteLength) {
732
+ throw new Error(`Invalid sample byte range for track ${trackId}: ${start}-${end}`);
733
+ }
734
+ mdatData.set(sourceMdat.subarray(start, end), writeOffset);
735
+ sample.byteOffset = writeOffset;
736
+ writeOffset += sample.size || 0;
737
+ }
738
+ }
739
+
740
+ return mdatData;
741
+ }
742
+
743
+ function calculateMovieDuration(tracks, movieTimescale) {
744
+ let maxDuration = 0;
745
+ for (const [, track] of tracks) {
746
+ const fallback = Math.max(0, sumSampleDurations(track.samples) - (track.mediaTime || 0));
747
+ const playbackDuration = track.playbackDuration > 0 ? track.playbackDuration : fallback;
748
+ track.playbackDuration = playbackDuration;
749
+ track.movieDuration = toMovieTimescale(playbackDuration, track.timescale, movieTimescale);
750
+ maxDuration = Math.max(maxDuration, track.movieDuration);
751
+ }
752
+ return maxDuration;
753
+ }
341
754
 
342
755
  function rebuildMvhd(mvhdBox, duration) {
343
756
  const data = new Uint8Array(mvhdBox.data);
344
757
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
345
758
  const version = data[8];
346
759
  const durationOffset = version === 0 ? 24 : 32;
347
- if (version === 0) view.setUint32(durationOffset, duration);
348
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, duration); }
760
+ if (version === 0) {
761
+ view.setUint32(durationOffset, duration);
762
+ } else {
763
+ view.setUint32(durationOffset, 0);
764
+ view.setUint32(durationOffset + 4, duration);
765
+ }
349
766
  return data;
350
767
  }
351
768
 
352
- function rebuildTkhd(tkhdBox, trackInfo, maxDuration) {
769
+ function rebuildTkhd(tkhdBox, trackInfo, maxMovieDuration) {
353
770
  const data = new Uint8Array(tkhdBox.data);
354
771
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
355
772
  const version = data[8];
356
- let trackDuration = maxDuration;
357
- if (trackInfo) { trackDuration = 0; for (const s of trackInfo.samples) trackDuration += s.duration || 0; }
358
- if (version === 0) view.setUint32(28, trackDuration);
359
- else { view.setUint32(36, 0); view.setUint32(40, trackDuration); }
773
+ const duration = trackInfo?.movieDuration ?? maxMovieDuration;
774
+ if (version === 0) view.setUint32(28, duration);
775
+ else {
776
+ view.setUint32(36, 0);
777
+ view.setUint32(40, duration);
778
+ }
360
779
  return data;
361
780
  }
362
781
 
363
- function rebuildMdhd(mdhdBox, trackInfo, maxDuration) {
782
+ function rebuildMdhd(mdhdBox, trackInfo) {
364
783
  const data = new Uint8Array(mdhdBox.data);
365
784
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
366
785
  const version = data[8];
367
- let trackDuration = 0;
368
- if (trackInfo) for (const s of trackInfo.samples) trackDuration += s.duration || 0;
786
+ const duration = sumSampleDurations(trackInfo?.samples || []);
369
787
  const durationOffset = version === 0 ? 24 : 32;
370
- if (version === 0) view.setUint32(durationOffset, trackDuration);
371
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, trackDuration); }
788
+ if (version === 0) {
789
+ view.setUint32(durationOffset, duration);
790
+ } else {
791
+ view.setUint32(durationOffset, 0);
792
+ view.setUint32(durationOffset + 4, duration);
793
+ }
372
794
  return data;
373
795
  }
374
796
 
375
797
  function rebuildStbl(stblBox, trackInfo) {
376
798
  const stblChildren = parseChildBoxes(stblBox);
377
799
  const newParts = [];
378
- for (const child of stblChildren) if (child.type === 'stsd') { newParts.push(child.data); break; }
800
+ for (const child of stblChildren) {
801
+ if (child.type === 'stsd') {
802
+ newParts.push(child.data);
803
+ break;
804
+ }
805
+ }
806
+
379
807
  const samples = trackInfo?.samples || [];
380
808
  const chunkOffsets = trackInfo?.chunkOffsets || [];
381
809
 
382
810
  // stts
383
811
  const sttsEntries = [];
384
- let curDur = null, count = 0;
385
- for (const s of samples) {
386
- const d = s.duration || 0;
387
- if (d === curDur) count++;
388
- else { if (curDur !== null) sttsEntries.push({ count, duration: curDur }); curDur = d; count = 1; }
812
+ let currentDuration = null;
813
+ let currentCount = 0;
814
+ for (const sample of samples) {
815
+ const duration = sample.duration || 0;
816
+ if (duration === currentDuration) currentCount++;
817
+ else {
818
+ if (currentDuration !== null) {
819
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
820
+ }
821
+ currentDuration = duration;
822
+ currentCount = 1;
823
+ }
824
+ }
825
+ if (currentDuration !== null) {
826
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
389
827
  }
390
- if (curDur !== null) sttsEntries.push({ count, duration: curDur });
391
828
  const sttsData = new Uint8Array(8 + sttsEntries.length * 8);
392
829
  const sttsView = new DataView(sttsData.buffer);
393
830
  sttsView.setUint32(4, sttsEntries.length);
394
- let off = 8;
395
- for (const e of sttsEntries) { sttsView.setUint32(off, e.count); sttsView.setUint32(off + 4, e.duration); off += 8; }
831
+ let offset = 8;
832
+ for (const entry of sttsEntries) {
833
+ sttsView.setUint32(offset, entry.count);
834
+ sttsView.setUint32(offset + 4, entry.duration);
835
+ offset += 8;
836
+ }
396
837
  newParts.push(createBox('stts', sttsData));
397
838
 
398
839
  // stsc
399
840
  const stscEntries = [];
400
841
  if (chunkOffsets.length > 0) {
401
- let currentSampleCount = chunkOffsets[0].sampleCount, firstChunk = 1;
842
+ let currentSampleCount = chunkOffsets[0].sampleCount;
843
+ let firstChunk = 1;
402
844
  for (let i = 1; i <= chunkOffsets.length; i++) {
403
845
  const sampleCount = i < chunkOffsets.length ? chunkOffsets[i].sampleCount : -1;
404
846
  if (sampleCount !== currentSampleCount) {
405
- stscEntries.push({ firstChunk, samplesPerChunk: currentSampleCount, sampleDescriptionIndex: 1 });
406
- firstChunk = i + 1; currentSampleCount = sampleCount;
847
+ stscEntries.push({
848
+ firstChunk,
849
+ samplesPerChunk: currentSampleCount,
850
+ sampleDescriptionIndex: 1,
851
+ });
852
+ firstChunk = i + 1;
853
+ currentSampleCount = sampleCount;
407
854
  }
408
855
  }
409
- } else stscEntries.push({ firstChunk: 1, samplesPerChunk: samples.length, sampleDescriptionIndex: 1 });
856
+ }
410
857
  const stscData = new Uint8Array(8 + stscEntries.length * 12);
411
858
  const stscView = new DataView(stscData.buffer);
412
859
  stscView.setUint32(4, stscEntries.length);
413
- off = 8;
414
- for (const e of stscEntries) { stscView.setUint32(off, e.firstChunk); stscView.setUint32(off + 4, e.samplesPerChunk); stscView.setUint32(off + 8, e.sampleDescriptionIndex); off += 12; }
860
+ offset = 8;
861
+ for (const entry of stscEntries) {
862
+ stscView.setUint32(offset, entry.firstChunk);
863
+ stscView.setUint32(offset + 4, entry.samplesPerChunk);
864
+ stscView.setUint32(offset + 8, entry.sampleDescriptionIndex);
865
+ offset += 12;
866
+ }
415
867
  newParts.push(createBox('stsc', stscData));
416
868
 
417
869
  // stsz
418
870
  const stszData = new Uint8Array(12 + samples.length * 4);
419
871
  const stszView = new DataView(stszData.buffer);
420
872
  stszView.setUint32(8, samples.length);
421
- off = 12;
422
- for (const s of samples) { stszView.setUint32(off, s.size || 0); off += 4; }
873
+ offset = 12;
874
+ for (const sample of samples) {
875
+ stszView.setUint32(offset, sample.size || 0);
876
+ offset += 4;
877
+ }
423
878
  newParts.push(createBox('stsz', stszData));
424
879
 
425
880
  // stco
426
- const numChunks = chunkOffsets.length || 1;
427
- const stcoData = new Uint8Array(8 + numChunks * 4);
881
+ const stcoData = new Uint8Array(8 + chunkOffsets.length * 4);
428
882
  const stcoView = new DataView(stcoData.buffer);
429
- stcoView.setUint32(4, numChunks);
430
- for (let i = 0; i < numChunks; i++) stcoView.setUint32(8 + i * 4, chunkOffsets[i]?.offset || 0);
883
+ stcoView.setUint32(4, chunkOffsets.length);
884
+ for (let i = 0; i < chunkOffsets.length; i++) {
885
+ stcoView.setUint32(8 + i * 4, chunkOffsets[i].offset || 0);
886
+ }
431
887
  newParts.push(createBox('stco', stcoData));
432
888
 
433
889
  // ctts
434
- const hasCtts = samples.some(s => s.compositionTimeOffset);
890
+ const hasCtts = samples.some((sample) => sample.compositionTimeOffset);
435
891
  if (hasCtts) {
436
892
  const cttsEntries = [];
437
- let curOff = null; count = 0;
438
- for (const s of samples) {
439
- const o = s.compositionTimeOffset || 0;
440
- if (o === curOff) count++;
441
- else { if (curOff !== null) cttsEntries.push({ count, offset: curOff }); curOff = o; count = 1; }
893
+ let currentOffset = null;
894
+ currentCount = 0;
895
+ for (const sample of samples) {
896
+ const compositionOffset = sample.compositionTimeOffset || 0;
897
+ if (compositionOffset === currentOffset) currentCount++;
898
+ else {
899
+ if (currentOffset !== null) {
900
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
901
+ }
902
+ currentOffset = compositionOffset;
903
+ currentCount = 1;
904
+ }
905
+ }
906
+ if (currentOffset !== null) {
907
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
442
908
  }
443
- if (curOff !== null) cttsEntries.push({ count, offset: curOff });
444
909
  const cttsData = new Uint8Array(8 + cttsEntries.length * 8);
445
910
  const cttsView = new DataView(cttsData.buffer);
446
911
  cttsView.setUint32(4, cttsEntries.length);
447
- off = 8;
448
- for (const e of cttsEntries) { cttsView.setUint32(off, e.count); cttsView.setInt32(off + 4, e.offset); off += 8; }
912
+ offset = 8;
913
+ for (const entry of cttsEntries) {
914
+ cttsView.setUint32(offset, entry.count);
915
+ cttsView.setInt32(offset + 4, entry.offset);
916
+ offset += 8;
917
+ }
449
918
  newParts.push(createBox('ctts', cttsData));
450
919
  }
451
920
 
452
- // stss
921
+ // stss (video sync samples)
453
922
  const syncSamples = [];
454
923
  for (let i = 0; i < samples.length; i++) {
455
- const flags = samples[i].flags;
456
- if (flags !== undefined) { if (!((flags >> 16) & 0x1)) syncSamples.push(i + 1); }
924
+ if (isSyncSample(samples[i])) syncSamples.push(i + 1);
457
925
  }
458
926
  if (syncSamples.length > 0 && syncSamples.length < samples.length) {
459
927
  const stssData = new Uint8Array(8 + syncSamples.length * 4);
460
928
  const stssView = new DataView(stssData.buffer);
461
929
  stssView.setUint32(4, syncSamples.length);
462
- off = 8;
463
- for (const n of syncSamples) { stssView.setUint32(off, n); off += 4; }
930
+ offset = 8;
931
+ for (const sampleNumber of syncSamples) {
932
+ stssView.setUint32(offset, sampleNumber);
933
+ offset += 4;
934
+ }
464
935
  newParts.push(createBox('stss', stssData));
465
936
  }
466
937
 
@@ -477,169 +948,141 @@
477
948
  return createBox('minf', ...newParts);
478
949
  }
479
950
 
480
- function rebuildMdia(mdiaBox, trackInfo, maxDuration) {
951
+ function rebuildMdia(mdiaBox, trackInfo) {
481
952
  const mdiaChildren = parseChildBoxes(mdiaBox);
482
953
  const newParts = [];
483
954
  for (const child of mdiaChildren) {
484
955
  if (child.type === 'minf') newParts.push(rebuildMinf(child, trackInfo));
485
- else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo, maxDuration));
956
+ else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo));
486
957
  else newParts.push(child.data);
487
958
  }
488
959
  return createBox('mdia', ...newParts);
489
960
  }
490
961
 
491
- function rebuildTrak(trakBox, trackIdMap, maxDuration) {
962
+ function rebuildTrak(trakBox, trackInfoMap, maxMovieDuration) {
492
963
  const trakChildren = parseChildBoxes(trakBox);
493
- let trackId = 1;
964
+ let trackId = null;
494
965
  for (const child of trakChildren) {
495
- if (child.type === 'tkhd') {
496
- const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
497
- trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
498
- }
966
+ if (child.type !== 'tkhd') continue;
967
+ const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
968
+ trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
499
969
  }
500
- const trackInfo = trackIdMap.get(trackId);
970
+ if (trackId === null) return null;
971
+
972
+ const trackInfo = trackInfoMap.get(trackId);
973
+ if (!trackInfo || !trackInfo.samples.length) return null;
974
+
501
975
  const newParts = [];
502
- let hasEdts = false;
503
976
  for (const child of trakChildren) {
504
- if (child.type === 'edts') { hasEdts = true; newParts.push(child.data); }
505
- else if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo, maxDuration));
506
- else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxDuration));
977
+ if (child.type === 'edts') continue;
978
+ if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo));
979
+ else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxMovieDuration));
507
980
  else newParts.push(child.data);
508
981
  }
509
- if (!hasEdts && trackInfo) {
510
- let trackDuration = 0;
511
- for (const s of trackInfo.samples) trackDuration += s.duration || 0;
512
- const elstData = new Uint8Array(20);
513
- const elstView = new DataView(elstData.buffer);
514
- elstView.setUint32(4, 1); elstView.setUint32(8, maxDuration); elstView.setInt32(12, 0); elstView.setInt16(16, 1);
515
- const elst = createBox('elst', elstData);
516
- const edts = createBox('edts', elst);
517
- const tkhdIndex = newParts.findIndex(p => p.length >= 8 && String.fromCharCode(p[4], p[5], p[6], p[7]) === 'tkhd');
518
- if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
519
- }
982
+
983
+ const elstPayload = new Uint8Array(16);
984
+ const elstView = new DataView(elstPayload.buffer);
985
+ elstView.setUint32(0, 1);
986
+ elstView.setUint32(4, trackInfo.movieDuration ?? maxMovieDuration);
987
+ elstView.setInt32(8, Math.max(0, Math.round(trackInfo.mediaTime || 0)));
988
+ elstView.setUint16(12, 1);
989
+ elstView.setUint16(14, 0);
990
+ const elst = createFullBox('elst', 0, 0, elstPayload);
991
+ const edts = createBox('edts', elst);
992
+
993
+ const tkhdIndex = newParts.findIndex((part) =>
994
+ part.length >= 8 && String.fromCharCode(part[4], part[5], part[6], part[7]) === 'tkhd',
995
+ );
996
+ if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
997
+ else newParts.unshift(edts);
998
+
520
999
  return createBox('trak', ...newParts);
521
1000
  }
522
1001
 
523
1002
  function updateStcoOffsets(output, ftypSize, moovSize) {
524
1003
  const mdatContentOffset = ftypSize + moovSize + 8;
525
1004
  const view = new DataView(output.buffer, output.byteOffset, output.byteLength);
1005
+
526
1006
  function scan(start, end) {
527
- let pos = start;
528
- while (pos + 8 <= end) {
529
- const size = view.getUint32(pos);
1007
+ let position = start;
1008
+ while (position + 8 <= end) {
1009
+ const size = view.getUint32(position);
530
1010
  if (size < 8) break;
531
- const type = String.fromCharCode(output[pos + 4], output[pos + 5], output[pos + 6], output[pos + 7]);
1011
+ const type = String.fromCharCode(
1012
+ output[position + 4],
1013
+ output[position + 5],
1014
+ output[position + 6],
1015
+ output[position + 7],
1016
+ );
1017
+
532
1018
  if (type === 'stco') {
533
- const entryCount = view.getUint32(pos + 12);
1019
+ const entryCount = view.getUint32(position + 12);
534
1020
  for (let i = 0; i < entryCount; i++) {
535
- const entryPos = pos + 16 + i * 4;
536
- view.setUint32(entryPos, mdatContentOffset + view.getUint32(entryPos));
1021
+ const entryPos = position + 16 + i * 4;
1022
+ const relativeOffset = view.getUint32(entryPos);
1023
+ view.setUint32(entryPos, mdatContentOffset + relativeOffset);
537
1024
  }
538
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) scan(pos + 8, pos + size);
539
- pos += size;
1025
+ } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
1026
+ scan(position + 8, position + size);
1027
+ }
1028
+
1029
+ position += size;
540
1030
  }
541
1031
  }
1032
+
542
1033
  scan(0, output.byteLength);
543
1034
  }
544
1035
 
545
- // ============================================
546
- // Main Converter Function
547
- // ============================================
548
-
549
1036
  /**
550
1037
  * Convert fragmented MP4 to standard MP4
551
1038
  * @param {Uint8Array} fmp4Data - fMP4 data
1039
+ * @param {object} [options] - Optional clip settings
1040
+ * @param {number} [options.startTime] - Clip start time (seconds)
1041
+ * @param {number} [options.endTime] - Clip end time (seconds)
552
1042
  * @returns {Uint8Array} Standard MP4 data
553
1043
  */
554
- function convertFmp4ToMp4(fmp4Data) {
1044
+ function convertFmp4ToMp4(fmp4Data, options = {}) {
555
1045
  const boxes = parseBoxes(fmp4Data);
556
1046
  const ftyp = findBox(boxes, 'ftyp');
557
1047
  const moov = findBox(boxes, 'moov');
558
1048
  if (!ftyp || !moov) throw new Error('Invalid fMP4: missing ftyp or moov');
559
1049
 
560
- const moovChildren = parseChildBoxes(moov);
561
- const originalTrackIds = [];
562
- for (const child of moovChildren) {
563
- if (child.type === 'trak') {
564
- const trakChildren = parseChildBoxes(child);
565
- for (const tc of trakChildren) {
566
- if (tc.type === 'tkhd') {
567
- const view = new DataView(tc.data.buffer, tc.data.byteOffset, tc.data.byteLength);
568
- originalTrackIds.push(tc.data[8] === 0 ? view.getUint32(20) : view.getUint32(28));
569
- }
570
- }
571
- }
572
- }
573
-
574
- const tracks = new Map();
575
- const mdatChunks = [];
576
- let combinedMdatOffset = 0;
577
-
578
- for (let i = 0; i < boxes.length; i++) {
579
- const box = boxes[i];
580
- if (box.type === 'moof') {
581
- const moofChildren = parseChildBoxes(box);
582
- const moofStart = box.offset;
583
- let nextMdatOffset = 0;
584
- for (let j = i + 1; j < boxes.length; j++) {
585
- if (boxes[j].type === 'mdat') { nextMdatOffset = boxes[j].offset; break; }
586
- if (boxes[j].type === 'moof') break;
587
- }
588
- for (const child of moofChildren) {
589
- if (child.type === 'traf') {
590
- const trafChildren = parseChildBoxes(child);
591
- const tfhd = findBox(trafChildren, 'tfhd');
592
- const trun = findBox(trafChildren, 'trun');
593
- if (tfhd && trun) {
594
- const tfhdInfo = parseTfhd(tfhd.data);
595
- const { samples, dataOffset } = parseTrun(trun.data, tfhdInfo);
596
- if (!tracks.has(tfhdInfo.trackId)) tracks.set(tfhdInfo.trackId, { samples: [], chunkOffsets: [] });
597
- const track = tracks.get(tfhdInfo.trackId);
598
- const chunkOffset = combinedMdatOffset + (moofStart + dataOffset) - (nextMdatOffset + 8);
599
- track.chunkOffsets.push({ offset: chunkOffset, sampleCount: samples.length });
600
- track.samples.push(...samples);
601
- }
602
- }
603
- }
604
- } else if (box.type === 'mdat') {
605
- mdatChunks.push({ data: box.data.subarray(8), offset: combinedMdatOffset });
606
- combinedMdatOffset += box.data.subarray(8).byteLength;
607
- }
608
- }
1050
+ const movieTimescale = getMovieTimescale(moov);
1051
+ const { trackMetadata, trackOrder } = extractTrackMetadata(moov);
1052
+ const trexDefaults = extractTrexDefaults(moov);
1053
+ const { tracks, combinedMdat } = collectTrackSamples(boxes, trackMetadata, trexDefaults);
609
1054
 
610
- const totalMdatSize = mdatChunks.reduce((sum, c) => sum + c.data.byteLength, 0);
611
- const combinedMdat = new Uint8Array(totalMdatSize);
612
- for (const chunk of mdatChunks) combinedMdat.set(chunk.data, chunk.offset);
1055
+ if (tracks.size === 0) throw new Error('Invalid fMP4: no track fragments found');
613
1056
 
614
- const trackIdMap = new Map();
615
- const fmp4TrackIds = Array.from(tracks.keys()).sort((a, b) => a - b);
616
- for (let i = 0; i < fmp4TrackIds.length && i < originalTrackIds.length; i++) {
617
- trackIdMap.set(originalTrackIds[i], tracks.get(fmp4TrackIds[i]));
1057
+ const clippedTracks = applyClipToTracks(tracks, options);
1058
+ if (clippedTracks.size === 0) {
1059
+ throw new Error('Clip range produced no samples');
618
1060
  }
619
1061
 
620
- let maxDuration = 0;
621
- for (const [, track] of tracks) {
622
- let dur = 0;
623
- for (const s of track.samples) dur += s.duration || 0;
624
- maxDuration = Math.max(maxDuration, dur);
625
- }
1062
+ const rebuiltMdat = rebuildMdatContent(clippedTracks, trackOrder, combinedMdat);
1063
+ const maxMovieDuration = calculateMovieDuration(clippedTracks, movieTimescale);
626
1064
 
1065
+ const moovChildren = parseChildBoxes(moov);
627
1066
  const newMoovParts = [];
628
1067
  for (const child of moovChildren) {
629
1068
  if (child.type === 'mvex') continue;
630
- if (child.type === 'trak') newMoovParts.push(rebuildTrak(child, trackIdMap, maxDuration));
631
- else if (child.type === 'mvhd') newMoovParts.push(rebuildMvhd(child, maxDuration));
632
- else newMoovParts.push(child.data);
1069
+ if (child.type === 'trak') {
1070
+ const trak = rebuildTrak(child, clippedTracks, maxMovieDuration);
1071
+ if (trak) newMoovParts.push(trak);
1072
+ } else if (child.type === 'mvhd') {
1073
+ newMoovParts.push(rebuildMvhd(child, maxMovieDuration));
1074
+ } else {
1075
+ newMoovParts.push(child.data);
1076
+ }
633
1077
  }
634
1078
 
635
1079
  const newMoov = createBox('moov', ...newMoovParts);
636
- const newMdat = createBox('mdat', combinedMdat);
1080
+ const newMdat = createBox('mdat', rebuiltMdat);
637
1081
  const output = new Uint8Array(ftyp.size + newMoov.byteLength + newMdat.byteLength);
638
1082
  output.set(ftyp.data, 0);
639
1083
  output.set(newMoov, ftyp.size);
640
1084
  output.set(newMdat, ftyp.size + newMoov.byteLength);
641
1085
  updateStcoOffsets(output, ftyp.size, newMoov.byteLength);
642
-
643
1086
  return output;
644
1087
  }
645
1088
 
@@ -705,7 +1148,7 @@
705
1148
  toMp4.isMpegTs = isMpegTs;
706
1149
  toMp4.isFmp4 = isFmp4;
707
1150
  toMp4.isStandardMp4 = isStandardMp4;
708
- toMp4.version = '1.1.1';
1151
+ toMp4.version = '1.2.1';
709
1152
 
710
1153
  return toMp4;
711
1154
  });