@invintusmedia/tomp4 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,143 +1,643 @@
1
1
  /**
2
2
  * fMP4 to Standard MP4 Converter
3
- *
4
- * Converts a fragmented MP4 file to a standard MP4 container
5
- * by extracting samples from fragments and rebuilding the moov box.
6
- *
3
+ *
4
+ * Converts fragmented MP4 data to standard MP4 and supports
5
+ * sample-level clipping for fMP4 inputs.
6
+ *
7
7
  * @module fmp4/converter
8
8
  */
9
9
 
10
10
  import {
11
- parseBoxes, findBox, parseChildBoxes, createBox,
12
- parseTfhd, parseTrun
11
+ parseBoxes,
12
+ findBox,
13
+ parseChildBoxes,
14
+ createBox,
15
+ parseTfhd,
16
+ parseTfdt,
17
+ parseTrun,
18
+ getMovieTimescale,
13
19
  } from './utils.js';
14
20
 
15
- // ============================================
16
- // Moov Rebuilding Functions
17
- // ============================================
21
+ function createFullBox(type, version, flags, ...payloads) {
22
+ const header = new Uint8Array(4);
23
+ header[0] = version;
24
+ header[1] = (flags >> 16) & 0xff;
25
+ header[2] = (flags >> 8) & 0xff;
26
+ header[3] = flags & 0xff;
27
+ return createBox(type, header, ...payloads);
28
+ }
29
+
30
+ function sumSampleDurations(samples) {
31
+ let total = 0;
32
+ for (const sample of samples) total += sample.duration || 0;
33
+ return total;
34
+ }
35
+
36
+ function toMovieTimescale(value, trackTimescale, movieTimescale) {
37
+ if (!trackTimescale || !movieTimescale) return value;
38
+ return Math.round((value * movieTimescale) / trackTimescale);
39
+ }
40
+
41
+ function isSyncSample(sample) {
42
+ const flags = sample.flags;
43
+ if (flags === undefined || flags === null) return true;
44
+ return ((flags >> 16) & 0x1) === 0;
45
+ }
46
+
47
+ function parseTrex(trexData) {
48
+ const view = new DataView(trexData.buffer, trexData.byteOffset, trexData.byteLength);
49
+ return {
50
+ trackId: view.getUint32(12),
51
+ defaultSampleDuration: view.getUint32(20),
52
+ defaultSampleSize: view.getUint32(24),
53
+ defaultSampleFlags: view.getUint32(28),
54
+ };
55
+ }
56
+
57
+ function extractTrexDefaults(moovBox) {
58
+ const defaults = new Map();
59
+ const moovChildren = parseChildBoxes(moovBox);
60
+ const mvex = findBox(moovChildren, 'mvex');
61
+ if (!mvex) return defaults;
62
+
63
+ const mvexChildren = parseChildBoxes(mvex);
64
+ for (const child of mvexChildren) {
65
+ if (child.type !== 'trex') continue;
66
+ const trex = parseTrex(child.data);
67
+ defaults.set(trex.trackId, trex);
68
+ }
69
+ return defaults;
70
+ }
71
+
72
+ function extractTrackMetadata(moovBox) {
73
+ const trackMetadata = new Map();
74
+ const trackOrder = [];
75
+ const moovChildren = parseChildBoxes(moovBox);
76
+
77
+ for (const child of moovChildren) {
78
+ if (child.type !== 'trak') continue;
79
+ const trakChildren = parseChildBoxes(child);
80
+
81
+ let trackId = null;
82
+ let timescale = 0;
83
+ let handlerType = 'unknown';
84
+
85
+ for (const trakChild of trakChildren) {
86
+ if (trakChild.type === 'tkhd') {
87
+ const view = new DataView(trakChild.data.buffer, trakChild.data.byteOffset, trakChild.data.byteLength);
88
+ trackId = trakChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
89
+ } else if (trakChild.type === 'mdia') {
90
+ const mdiaChildren = parseChildBoxes(trakChild);
91
+ for (const mdiaChild of mdiaChildren) {
92
+ if (mdiaChild.type === 'mdhd') {
93
+ const view = new DataView(mdiaChild.data.buffer, mdiaChild.data.byteOffset, mdiaChild.data.byteLength);
94
+ timescale = mdiaChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
95
+ } else if (mdiaChild.type === 'hdlr' && mdiaChild.data.byteLength >= 20) {
96
+ handlerType = String.fromCharCode(
97
+ mdiaChild.data[16],
98
+ mdiaChild.data[17],
99
+ mdiaChild.data[18],
100
+ mdiaChild.data[19],
101
+ );
102
+ }
103
+ }
104
+ }
105
+ }
106
+
107
+ if (trackId !== null) {
108
+ trackMetadata.set(trackId, {
109
+ trackId,
110
+ timescale: timescale || 90000,
111
+ handlerType,
112
+ });
113
+ trackOrder.push(trackId);
114
+ }
115
+ }
116
+
117
+ return { trackMetadata, trackOrder };
118
+ }
119
+
120
+ function cloneSample(sample) {
121
+ return {
122
+ duration: sample.duration || 0,
123
+ size: sample.size || 0,
124
+ flags: sample.flags,
125
+ compositionTimeOffset: sample.compositionTimeOffset || 0,
126
+ dts: sample.dts || 0,
127
+ pts: sample.pts || 0,
128
+ byteOffset: sample.byteOffset || 0,
129
+ };
130
+ }
131
+
132
+ function normalizeSamples(samples, baseDts) {
133
+ return samples.map((sample) => {
134
+ const next = cloneSample(sample);
135
+ next.dts -= baseDts;
136
+ next.pts -= baseDts;
137
+ return next;
138
+ });
139
+ }
140
+
141
+ function clipVideoSamples(samples, startTick, endTick) {
142
+ if (!samples.length) {
143
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
144
+ }
145
+
146
+ let requestedStartIndex = samples.length;
147
+ for (let i = 0; i < samples.length; i++) {
148
+ const sampleEnd = (samples[i].pts || 0) + (samples[i].duration || 0);
149
+ if (sampleEnd > startTick) {
150
+ requestedStartIndex = i;
151
+ break;
152
+ }
153
+ }
154
+ if (requestedStartIndex >= samples.length) {
155
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
156
+ }
157
+
158
+ let decodeStartIndex = requestedStartIndex;
159
+ for (let i = requestedStartIndex; i >= 0; i--) {
160
+ if (isSyncSample(samples[i])) {
161
+ decodeStartIndex = i;
162
+ break;
163
+ }
164
+ }
165
+
166
+ let endIndex = samples.length;
167
+ if (Number.isFinite(endTick)) {
168
+ for (let i = decodeStartIndex; i < samples.length; i++) {
169
+ if ((samples[i].pts || 0) >= endTick) {
170
+ endIndex = i;
171
+ break;
172
+ }
173
+ }
174
+ }
175
+ if (endIndex <= decodeStartIndex) {
176
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
177
+ }
178
+
179
+ const selected = samples.slice(decodeStartIndex, endIndex);
180
+ const decodeStartDts = selected[0].dts || 0;
181
+ const mediaTime = Math.max(0, startTick - decodeStartDts);
182
+ const normalized = normalizeSamples(selected, decodeStartDts);
183
+ const decodeDuration = sumSampleDurations(normalized);
184
+ const maxPlayable = Math.max(0, decodeDuration - mediaTime);
185
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : maxPlayable;
186
+ const playbackDuration = Math.min(requested, maxPlayable);
187
+
188
+ return {
189
+ samples: normalized,
190
+ mediaTime,
191
+ playbackDuration,
192
+ };
193
+ }
194
+
195
+ function clipNonVideoSamples(samples, startTick, endTick) {
196
+ if (!samples.length) {
197
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
198
+ }
199
+
200
+ let startIndex = 0;
201
+ while (startIndex < samples.length && (samples[startIndex].pts || 0) < startTick) {
202
+ startIndex++;
203
+ }
204
+ if (startIndex >= samples.length) {
205
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
206
+ }
207
+
208
+ let endIndex = samples.length;
209
+ if (Number.isFinite(endTick)) {
210
+ for (let i = startIndex; i < samples.length; i++) {
211
+ if ((samples[i].pts || 0) >= endTick) {
212
+ endIndex = i;
213
+ break;
214
+ }
215
+ }
216
+ }
217
+ if (endIndex <= startIndex) {
218
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
219
+ }
220
+
221
+ const selected = samples.slice(startIndex, endIndex);
222
+ const decodeStartDts = selected[0].dts || 0;
223
+ const normalized = normalizeSamples(selected, decodeStartDts);
224
+ const decodeDuration = sumSampleDurations(normalized);
225
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : decodeDuration;
226
+ const playbackDuration = Math.min(requested, decodeDuration);
227
+
228
+ return {
229
+ samples: normalized,
230
+ mediaTime: 0,
231
+ playbackDuration,
232
+ };
233
+ }
234
+
235
+ function applyClipToTracks(tracks, options = {}) {
236
+ const hasStart = Number.isFinite(options.startTime);
237
+ const hasEnd = Number.isFinite(options.endTime);
238
+ if (!hasStart && !hasEnd) {
239
+ for (const [, track] of tracks) {
240
+ if (!track.samples.length) continue;
241
+ const baseDts = track.samples[0].dts || 0;
242
+ track.samples = normalizeSamples(track.samples, baseDts);
243
+ track.mediaTime = 0;
244
+ track.playbackDuration = sumSampleDurations(track.samples);
245
+ }
246
+ return tracks;
247
+ }
248
+
249
+ const startSec = hasStart ? Math.max(0, options.startTime) : 0;
250
+ const endSec = hasEnd ? Math.max(startSec, options.endTime) : Infinity;
251
+
252
+ let videoTrackId = null;
253
+ for (const [trackId, track] of tracks) {
254
+ if (track.handlerType === 'vide' && track.samples.length > 0) {
255
+ videoTrackId = trackId;
256
+ break;
257
+ }
258
+ }
259
+
260
+ // First pass: clip video to determine preroll duration
261
+ const clipped = new Map();
262
+ let videoPrerollSec = 0;
263
+
264
+ if (videoTrackId !== null) {
265
+ const vTrack = tracks.get(videoTrackId);
266
+ if (vTrack && vTrack.samples.length) {
267
+ const startTick = Math.round(startSec * vTrack.timescale);
268
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * vTrack.timescale) : Infinity;
269
+ const clip = clipVideoSamples(vTrack.samples, startTick, endTick);
270
+
271
+ if (clip.samples.length) {
272
+ videoPrerollSec = clip.mediaTime / vTrack.timescale;
273
+ clipped.set(videoTrackId, {
274
+ ...vTrack,
275
+ samples: clip.samples,
276
+ mediaTime: clip.mediaTime,
277
+ playbackDuration: clip.playbackDuration,
278
+ chunkOffsets: [],
279
+ });
280
+ }
281
+ }
282
+ }
283
+
284
+ // Second pass: clip non-video tracks, including audio from the video's
285
+ // decode start (keyframe) so A/V stays in sync without edit lists
286
+ for (const [trackId, track] of tracks) {
287
+ if (!track.samples.length || trackId === videoTrackId) continue;
288
+
289
+ const adjustedStartSec = Math.max(0, startSec - videoPrerollSec);
290
+ const startTick = Math.round(adjustedStartSec * track.timescale);
291
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * track.timescale) : Infinity;
292
+ const clip = clipNonVideoSamples(track.samples, startTick, endTick);
293
+
294
+ if (!clip.samples.length) continue;
295
+
296
+ // Audio preroll matches video preroll so both tracks share the same timeline
297
+ const audioPreroll = Math.round(videoPrerollSec * track.timescale);
298
+ const totalDur = sumSampleDurations(clip.samples);
299
+ const playbackDuration = Math.max(0, totalDur - audioPreroll);
300
+
301
+ clipped.set(trackId, {
302
+ ...track,
303
+ samples: clip.samples,
304
+ mediaTime: audioPreroll,
305
+ playbackDuration,
306
+ chunkOffsets: [],
307
+ });
308
+ }
309
+
310
+ return clipped;
311
+ }
312
+
313
+ function collectTrackSamples(boxes, trackMetadata, trexDefaults) {
314
+ const tracks = new Map();
315
+ const mdatChunks = [];
316
+ let combinedMdatOffset = 0;
317
+
318
+ for (let i = 0; i < boxes.length; i++) {
319
+ const box = boxes[i];
320
+ if (box.type === 'moof') {
321
+ const moofChildren = parseChildBoxes(box);
322
+ const moofStart = box.offset;
323
+
324
+ let nextMdatOffset = -1;
325
+ for (let j = i + 1; j < boxes.length; j++) {
326
+ if (boxes[j].type === 'mdat') {
327
+ nextMdatOffset = boxes[j].offset;
328
+ break;
329
+ }
330
+ if (boxes[j].type === 'moof') break;
331
+ }
332
+ if (nextMdatOffset < 0) continue;
333
+
334
+ const mdatContentStartAbs = nextMdatOffset + 8;
335
+
336
+ for (const child of moofChildren) {
337
+ if (child.type !== 'traf') continue;
338
+
339
+ const trafChildren = parseChildBoxes(child);
340
+ const tfhdBox = findBox(trafChildren, 'tfhd');
341
+ if (!tfhdBox) continue;
342
+
343
+ const tfhdView = new DataView(tfhdBox.data.buffer, tfhdBox.data.byteOffset, tfhdBox.data.byteLength);
344
+ const trackId = tfhdView.getUint32(12);
345
+ const tfhd = parseTfhd(tfhdBox.data, trexDefaults.get(trackId) || {});
346
+ const tfdtBox = findBox(trafChildren, 'tfdt');
347
+ let decodeTime = tfdtBox ? parseTfdt(tfdtBox.data) : 0;
348
+ let runDataCursorAbs = null;
349
+
350
+ if (!tracks.has(trackId)) {
351
+ const meta = trackMetadata.get(trackId) || {};
352
+ tracks.set(trackId, {
353
+ trackId,
354
+ timescale: meta.timescale || 90000,
355
+ handlerType: meta.handlerType || 'unknown',
356
+ samples: [],
357
+ chunkOffsets: [],
358
+ mediaTime: 0,
359
+ playbackDuration: 0,
360
+ });
361
+ }
362
+ const track = tracks.get(trackId);
363
+
364
+ for (const trafChild of trafChildren) {
365
+ if (trafChild.type !== 'trun') continue;
366
+ const { samples, dataOffset, flags } = parseTrun(trafChild.data, tfhd);
367
+ const runSize = samples.reduce((sum, sample) => sum + (sample.size || 0), 0);
368
+
369
+ let dataStartAbs;
370
+ if (flags & 0x1) {
371
+ const baseAbs = (tfhd.flags & 0x1) ? tfhd.baseDataOffset : moofStart;
372
+ dataStartAbs = baseAbs + dataOffset;
373
+ } else if (runDataCursorAbs !== null) {
374
+ dataStartAbs = runDataCursorAbs;
375
+ } else {
376
+ dataStartAbs = mdatContentStartAbs;
377
+ }
378
+
379
+ let sampleByteOffset = combinedMdatOffset + Math.max(0, dataStartAbs - mdatContentStartAbs);
380
+ for (const sample of samples) {
381
+ const dts = decodeTime;
382
+ const pts = dts + (sample.compositionTimeOffset || 0);
383
+ track.samples.push({
384
+ ...sample,
385
+ dts,
386
+ pts,
387
+ byteOffset: sampleByteOffset,
388
+ });
389
+ decodeTime += sample.duration || 0;
390
+ sampleByteOffset += sample.size || 0;
391
+ }
392
+
393
+ runDataCursorAbs = dataStartAbs + runSize;
394
+ }
395
+ }
396
+ } else if (box.type === 'mdat') {
397
+ const data = box.data.subarray(8);
398
+ mdatChunks.push({ data, offset: combinedMdatOffset });
399
+ combinedMdatOffset += data.byteLength;
400
+ }
401
+ }
402
+
403
+ const combinedMdat = new Uint8Array(combinedMdatOffset);
404
+ for (const chunk of mdatChunks) {
405
+ combinedMdat.set(chunk.data, chunk.offset);
406
+ }
407
+
408
+ return { tracks, combinedMdat };
409
+ }
410
+
411
+ function rebuildMdatContent(tracks, trackOrder, sourceMdat) {
412
+ const orderedTrackIds = trackOrder.filter((trackId) => tracks.has(trackId));
413
+ for (const trackId of tracks.keys()) {
414
+ if (!orderedTrackIds.includes(trackId)) orderedTrackIds.push(trackId);
415
+ }
416
+
417
+ let totalSize = 0;
418
+ for (const trackId of orderedTrackIds) {
419
+ const track = tracks.get(trackId);
420
+ for (const sample of track.samples) totalSize += sample.size || 0;
421
+ }
422
+
423
+ const mdatData = new Uint8Array(totalSize);
424
+ let writeOffset = 0;
425
+
426
+ for (const trackId of orderedTrackIds) {
427
+ const track = tracks.get(trackId);
428
+ if (!track || !track.samples.length) {
429
+ if (track) track.chunkOffsets = [];
430
+ continue;
431
+ }
432
+
433
+ track.chunkOffsets = [{ offset: writeOffset, sampleCount: track.samples.length }];
434
+ for (const sample of track.samples) {
435
+ const start = sample.byteOffset || 0;
436
+ const end = start + (sample.size || 0);
437
+ if (start < 0 || end > sourceMdat.byteLength) {
438
+ throw new Error(`Invalid sample byte range for track ${trackId}: ${start}-${end}`);
439
+ }
440
+ mdatData.set(sourceMdat.subarray(start, end), writeOffset);
441
+ sample.byteOffset = writeOffset;
442
+ writeOffset += sample.size || 0;
443
+ }
444
+ }
445
+
446
+ return mdatData;
447
+ }
448
+
449
+ function calculateMovieDuration(tracks, movieTimescale) {
450
+ let maxDuration = 0;
451
+ for (const [, track] of tracks) {
452
+ const fallback = Math.max(0, sumSampleDurations(track.samples) - (track.mediaTime || 0));
453
+ const playbackDuration = track.playbackDuration > 0 ? track.playbackDuration : fallback;
454
+ track.playbackDuration = playbackDuration;
455
+ track.movieDuration = toMovieTimescale(playbackDuration, track.timescale, movieTimescale);
456
+ maxDuration = Math.max(maxDuration, track.movieDuration);
457
+ }
458
+ return maxDuration;
459
+ }
18
460
 
19
461
  function rebuildMvhd(mvhdBox, duration) {
20
462
  const data = new Uint8Array(mvhdBox.data);
21
463
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
22
464
  const version = data[8];
23
465
  const durationOffset = version === 0 ? 24 : 32;
24
- if (version === 0) view.setUint32(durationOffset, duration);
25
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, duration); }
466
+ if (version === 0) {
467
+ view.setUint32(durationOffset, duration);
468
+ } else {
469
+ view.setUint32(durationOffset, 0);
470
+ view.setUint32(durationOffset + 4, duration);
471
+ }
26
472
  return data;
27
473
  }
28
474
 
29
- function rebuildTkhd(tkhdBox, trackInfo, maxDuration) {
475
+ function rebuildTkhd(tkhdBox, trackInfo, maxMovieDuration) {
30
476
  const data = new Uint8Array(tkhdBox.data);
31
477
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
32
478
  const version = data[8];
33
- let trackDuration = maxDuration;
34
- if (trackInfo) { trackDuration = 0; for (const s of trackInfo.samples) trackDuration += s.duration || 0; }
35
- if (version === 0) view.setUint32(28, trackDuration);
36
- else { view.setUint32(36, 0); view.setUint32(40, trackDuration); }
479
+ const duration = trackInfo?.movieDuration ?? maxMovieDuration;
480
+ if (version === 0) view.setUint32(28, duration);
481
+ else {
482
+ view.setUint32(36, 0);
483
+ view.setUint32(40, duration);
484
+ }
37
485
  return data;
38
486
  }
39
487
 
40
- function rebuildMdhd(mdhdBox, trackInfo, maxDuration) {
488
+ function rebuildMdhd(mdhdBox, trackInfo) {
41
489
  const data = new Uint8Array(mdhdBox.data);
42
490
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
43
491
  const version = data[8];
44
- let trackDuration = 0;
45
- if (trackInfo) for (const s of trackInfo.samples) trackDuration += s.duration || 0;
492
+ const duration = sumSampleDurations(trackInfo?.samples || []);
46
493
  const durationOffset = version === 0 ? 24 : 32;
47
- if (version === 0) view.setUint32(durationOffset, trackDuration);
48
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, trackDuration); }
494
+ if (version === 0) {
495
+ view.setUint32(durationOffset, duration);
496
+ } else {
497
+ view.setUint32(durationOffset, 0);
498
+ view.setUint32(durationOffset + 4, duration);
499
+ }
49
500
  return data;
50
501
  }
51
502
 
52
503
  function rebuildStbl(stblBox, trackInfo) {
53
504
  const stblChildren = parseChildBoxes(stblBox);
54
505
  const newParts = [];
55
- for (const child of stblChildren) if (child.type === 'stsd') { newParts.push(child.data); break; }
506
+ for (const child of stblChildren) {
507
+ if (child.type === 'stsd') {
508
+ newParts.push(child.data);
509
+ break;
510
+ }
511
+ }
512
+
56
513
  const samples = trackInfo?.samples || [];
57
514
  const chunkOffsets = trackInfo?.chunkOffsets || [];
58
515
 
59
516
  // stts
60
517
  const sttsEntries = [];
61
- let curDur = null, count = 0;
62
- for (const s of samples) {
63
- const d = s.duration || 0;
64
- if (d === curDur) count++;
65
- else { if (curDur !== null) sttsEntries.push({ count, duration: curDur }); curDur = d; count = 1; }
518
+ let currentDuration = null;
519
+ let currentCount = 0;
520
+ for (const sample of samples) {
521
+ const duration = sample.duration || 0;
522
+ if (duration === currentDuration) currentCount++;
523
+ else {
524
+ if (currentDuration !== null) {
525
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
526
+ }
527
+ currentDuration = duration;
528
+ currentCount = 1;
529
+ }
530
+ }
531
+ if (currentDuration !== null) {
532
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
66
533
  }
67
- if (curDur !== null) sttsEntries.push({ count, duration: curDur });
68
534
  const sttsData = new Uint8Array(8 + sttsEntries.length * 8);
69
535
  const sttsView = new DataView(sttsData.buffer);
70
536
  sttsView.setUint32(4, sttsEntries.length);
71
- let off = 8;
72
- for (const e of sttsEntries) { sttsView.setUint32(off, e.count); sttsView.setUint32(off + 4, e.duration); off += 8; }
537
+ let offset = 8;
538
+ for (const entry of sttsEntries) {
539
+ sttsView.setUint32(offset, entry.count);
540
+ sttsView.setUint32(offset + 4, entry.duration);
541
+ offset += 8;
542
+ }
73
543
  newParts.push(createBox('stts', sttsData));
74
544
 
75
545
  // stsc
76
546
  const stscEntries = [];
77
547
  if (chunkOffsets.length > 0) {
78
- let currentSampleCount = chunkOffsets[0].sampleCount, firstChunk = 1;
548
+ let currentSampleCount = chunkOffsets[0].sampleCount;
549
+ let firstChunk = 1;
79
550
  for (let i = 1; i <= chunkOffsets.length; i++) {
80
551
  const sampleCount = i < chunkOffsets.length ? chunkOffsets[i].sampleCount : -1;
81
552
  if (sampleCount !== currentSampleCount) {
82
- stscEntries.push({ firstChunk, samplesPerChunk: currentSampleCount, sampleDescriptionIndex: 1 });
83
- firstChunk = i + 1; currentSampleCount = sampleCount;
553
+ stscEntries.push({
554
+ firstChunk,
555
+ samplesPerChunk: currentSampleCount,
556
+ sampleDescriptionIndex: 1,
557
+ });
558
+ firstChunk = i + 1;
559
+ currentSampleCount = sampleCount;
84
560
  }
85
561
  }
86
- } else stscEntries.push({ firstChunk: 1, samplesPerChunk: samples.length, sampleDescriptionIndex: 1 });
562
+ }
87
563
  const stscData = new Uint8Array(8 + stscEntries.length * 12);
88
564
  const stscView = new DataView(stscData.buffer);
89
565
  stscView.setUint32(4, stscEntries.length);
90
- off = 8;
91
- for (const e of stscEntries) { stscView.setUint32(off, e.firstChunk); stscView.setUint32(off + 4, e.samplesPerChunk); stscView.setUint32(off + 8, e.sampleDescriptionIndex); off += 12; }
566
+ offset = 8;
567
+ for (const entry of stscEntries) {
568
+ stscView.setUint32(offset, entry.firstChunk);
569
+ stscView.setUint32(offset + 4, entry.samplesPerChunk);
570
+ stscView.setUint32(offset + 8, entry.sampleDescriptionIndex);
571
+ offset += 12;
572
+ }
92
573
  newParts.push(createBox('stsc', stscData));
93
574
 
94
575
  // stsz
95
576
  const stszData = new Uint8Array(12 + samples.length * 4);
96
577
  const stszView = new DataView(stszData.buffer);
97
578
  stszView.setUint32(8, samples.length);
98
- off = 12;
99
- for (const s of samples) { stszView.setUint32(off, s.size || 0); off += 4; }
579
+ offset = 12;
580
+ for (const sample of samples) {
581
+ stszView.setUint32(offset, sample.size || 0);
582
+ offset += 4;
583
+ }
100
584
  newParts.push(createBox('stsz', stszData));
101
585
 
102
586
  // stco
103
- const numChunks = chunkOffsets.length || 1;
104
- const stcoData = new Uint8Array(8 + numChunks * 4);
587
+ const stcoData = new Uint8Array(8 + chunkOffsets.length * 4);
105
588
  const stcoView = new DataView(stcoData.buffer);
106
- stcoView.setUint32(4, numChunks);
107
- for (let i = 0; i < numChunks; i++) stcoView.setUint32(8 + i * 4, chunkOffsets[i]?.offset || 0);
589
+ stcoView.setUint32(4, chunkOffsets.length);
590
+ for (let i = 0; i < chunkOffsets.length; i++) {
591
+ stcoView.setUint32(8 + i * 4, chunkOffsets[i].offset || 0);
592
+ }
108
593
  newParts.push(createBox('stco', stcoData));
109
594
 
110
595
  // ctts
111
- const hasCtts = samples.some(s => s.compositionTimeOffset);
596
+ const hasCtts = samples.some((sample) => sample.compositionTimeOffset);
112
597
  if (hasCtts) {
113
598
  const cttsEntries = [];
114
- let curOff = null; count = 0;
115
- for (const s of samples) {
116
- const o = s.compositionTimeOffset || 0;
117
- if (o === curOff) count++;
118
- else { if (curOff !== null) cttsEntries.push({ count, offset: curOff }); curOff = o; count = 1; }
599
+ let currentOffset = null;
600
+ currentCount = 0;
601
+ for (const sample of samples) {
602
+ const compositionOffset = sample.compositionTimeOffset || 0;
603
+ if (compositionOffset === currentOffset) currentCount++;
604
+ else {
605
+ if (currentOffset !== null) {
606
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
607
+ }
608
+ currentOffset = compositionOffset;
609
+ currentCount = 1;
610
+ }
611
+ }
612
+ if (currentOffset !== null) {
613
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
119
614
  }
120
- if (curOff !== null) cttsEntries.push({ count, offset: curOff });
121
615
  const cttsData = new Uint8Array(8 + cttsEntries.length * 8);
122
616
  const cttsView = new DataView(cttsData.buffer);
123
617
  cttsView.setUint32(4, cttsEntries.length);
124
- off = 8;
125
- for (const e of cttsEntries) { cttsView.setUint32(off, e.count); cttsView.setInt32(off + 4, e.offset); off += 8; }
618
+ offset = 8;
619
+ for (const entry of cttsEntries) {
620
+ cttsView.setUint32(offset, entry.count);
621
+ cttsView.setInt32(offset + 4, entry.offset);
622
+ offset += 8;
623
+ }
126
624
  newParts.push(createBox('ctts', cttsData));
127
625
  }
128
626
 
129
- // stss
627
+ // stss (video sync samples)
130
628
  const syncSamples = [];
131
629
  for (let i = 0; i < samples.length; i++) {
132
- const flags = samples[i].flags;
133
- if (flags !== undefined) { if (!((flags >> 16) & 0x1)) syncSamples.push(i + 1); }
630
+ if (isSyncSample(samples[i])) syncSamples.push(i + 1);
134
631
  }
135
632
  if (syncSamples.length > 0 && syncSamples.length < samples.length) {
136
633
  const stssData = new Uint8Array(8 + syncSamples.length * 4);
137
634
  const stssView = new DataView(stssData.buffer);
138
635
  stssView.setUint32(4, syncSamples.length);
139
- off = 8;
140
- for (const n of syncSamples) { stssView.setUint32(off, n); off += 4; }
636
+ offset = 8;
637
+ for (const sampleNumber of syncSamples) {
638
+ stssView.setUint32(offset, sampleNumber);
639
+ offset += 4;
640
+ }
141
641
  newParts.push(createBox('stss', stssData));
142
642
  }
143
643
 
@@ -154,169 +654,151 @@ function rebuildMinf(minfBox, trackInfo) {
154
654
  return createBox('minf', ...newParts);
155
655
  }
156
656
 
157
- function rebuildMdia(mdiaBox, trackInfo, maxDuration) {
657
+ function rebuildMdia(mdiaBox, trackInfo) {
158
658
  const mdiaChildren = parseChildBoxes(mdiaBox);
159
659
  const newParts = [];
160
660
  for (const child of mdiaChildren) {
161
661
  if (child.type === 'minf') newParts.push(rebuildMinf(child, trackInfo));
162
- else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo, maxDuration));
662
+ else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo));
163
663
  else newParts.push(child.data);
164
664
  }
165
665
  return createBox('mdia', ...newParts);
166
666
  }
167
667
 
168
- function rebuildTrak(trakBox, trackIdMap, maxDuration) {
668
+ function rebuildTrak(trakBox, trackInfoMap, maxMovieDuration) {
169
669
  const trakChildren = parseChildBoxes(trakBox);
170
- let trackId = 1;
670
+ let trackId = null;
171
671
  for (const child of trakChildren) {
172
- if (child.type === 'tkhd') {
173
- const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
174
- trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
175
- }
672
+ if (child.type !== 'tkhd') continue;
673
+ const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
674
+ trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
176
675
  }
177
- const trackInfo = trackIdMap.get(trackId);
676
+ if (trackId === null) return null;
677
+
678
+ const trackInfo = trackInfoMap.get(trackId);
679
+ if (!trackInfo || !trackInfo.samples.length) return null;
680
+
178
681
  const newParts = [];
179
- let hasEdts = false;
180
682
  for (const child of trakChildren) {
181
- if (child.type === 'edts') { hasEdts = true; newParts.push(child.data); }
182
- else if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo, maxDuration));
183
- else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxDuration));
683
+ if (child.type === 'edts') continue;
684
+ if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo));
685
+ else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxMovieDuration));
184
686
  else newParts.push(child.data);
185
687
  }
186
- if (!hasEdts && trackInfo) {
187
- let trackDuration = 0;
188
- for (const s of trackInfo.samples) trackDuration += s.duration || 0;
189
- const elstData = new Uint8Array(20);
190
- const elstView = new DataView(elstData.buffer);
191
- elstView.setUint32(4, 1); elstView.setUint32(8, maxDuration); elstView.setInt32(12, 0); elstView.setInt16(16, 1);
192
- const elst = createBox('elst', elstData);
193
- const edts = createBox('edts', elst);
194
- const tkhdIndex = newParts.findIndex(p => p.length >= 8 && String.fromCharCode(p[4], p[5], p[6], p[7]) === 'tkhd');
195
- if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
196
- }
688
+
689
+ const elstPayload = new Uint8Array(16);
690
+ const elstView = new DataView(elstPayload.buffer);
691
+ elstView.setUint32(0, 1);
692
+ elstView.setUint32(4, trackInfo.movieDuration ?? maxMovieDuration);
693
+ elstView.setInt32(8, Math.max(0, Math.round(trackInfo.mediaTime || 0)));
694
+ elstView.setUint16(12, 1);
695
+ elstView.setUint16(14, 0);
696
+ const elst = createFullBox('elst', 0, 0, elstPayload);
697
+ const edts = createBox('edts', elst);
698
+
699
+ const tkhdIndex = newParts.findIndex((part) =>
700
+ part.length >= 8 && String.fromCharCode(part[4], part[5], part[6], part[7]) === 'tkhd',
701
+ );
702
+ if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
703
+ else newParts.unshift(edts);
704
+
197
705
  return createBox('trak', ...newParts);
198
706
  }
199
707
 
200
708
  function updateStcoOffsets(output, ftypSize, moovSize) {
201
709
  const mdatContentOffset = ftypSize + moovSize + 8;
202
710
  const view = new DataView(output.buffer, output.byteOffset, output.byteLength);
711
+
203
712
  function scan(start, end) {
204
- let pos = start;
205
- while (pos + 8 <= end) {
206
- const size = view.getUint32(pos);
713
+ let position = start;
714
+ while (position + 8 <= end) {
715
+ const size = view.getUint32(position);
207
716
  if (size < 8) break;
208
- const type = String.fromCharCode(output[pos + 4], output[pos + 5], output[pos + 6], output[pos + 7]);
717
+ const type = String.fromCharCode(
718
+ output[position + 4],
719
+ output[position + 5],
720
+ output[position + 6],
721
+ output[position + 7],
722
+ );
723
+
209
724
  if (type === 'stco') {
210
- const entryCount = view.getUint32(pos + 12);
725
+ const entryCount = view.getUint32(position + 12);
211
726
  for (let i = 0; i < entryCount; i++) {
212
- const entryPos = pos + 16 + i * 4;
213
- view.setUint32(entryPos, mdatContentOffset + view.getUint32(entryPos));
727
+ const entryPos = position + 16 + i * 4;
728
+ const relativeOffset = view.getUint32(entryPos);
729
+ view.setUint32(entryPos, mdatContentOffset + relativeOffset);
214
730
  }
215
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) scan(pos + 8, pos + size);
216
- pos += size;
731
+ } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
732
+ scan(position + 8, position + size);
733
+ }
734
+
735
+ position += size;
217
736
  }
218
737
  }
738
+
219
739
  scan(0, output.byteLength);
220
740
  }
221
741
 
222
- // ============================================
223
- // Main Converter Function
224
- // ============================================
225
-
226
742
  /**
227
743
  * Convert fragmented MP4 to standard MP4
228
744
  * @param {Uint8Array} fmp4Data - fMP4 data
745
+ * @param {object} [options] - Optional clip settings
746
+ * @param {number} [options.startTime] - Clip start time (seconds)
747
+ * @param {number} [options.endTime] - Clip end time (seconds)
229
748
  * @returns {Uint8Array} Standard MP4 data
230
749
  */
231
- export function convertFmp4ToMp4(fmp4Data) {
750
+ // Shared rebuild functions — also used by mp4-clip.js for standard MP4 clipping
751
+ export {
752
+ applyClipToTracks,
753
+ rebuildMdatContent,
754
+ calculateMovieDuration,
755
+ rebuildTrak,
756
+ rebuildMvhd,
757
+ updateStcoOffsets,
758
+ };
759
+
760
+ export function convertFmp4ToMp4(fmp4Data, options = {}) {
232
761
  const boxes = parseBoxes(fmp4Data);
233
762
  const ftyp = findBox(boxes, 'ftyp');
234
763
  const moov = findBox(boxes, 'moov');
235
764
  if (!ftyp || !moov) throw new Error('Invalid fMP4: missing ftyp or moov');
236
765
 
237
- const moovChildren = parseChildBoxes(moov);
238
- const originalTrackIds = [];
239
- for (const child of moovChildren) {
240
- if (child.type === 'trak') {
241
- const trakChildren = parseChildBoxes(child);
242
- for (const tc of trakChildren) {
243
- if (tc.type === 'tkhd') {
244
- const view = new DataView(tc.data.buffer, tc.data.byteOffset, tc.data.byteLength);
245
- originalTrackIds.push(tc.data[8] === 0 ? view.getUint32(20) : view.getUint32(28));
246
- }
247
- }
248
- }
249
- }
766
+ const movieTimescale = getMovieTimescale(moov);
767
+ const { trackMetadata, trackOrder } = extractTrackMetadata(moov);
768
+ const trexDefaults = extractTrexDefaults(moov);
769
+ const { tracks, combinedMdat } = collectTrackSamples(boxes, trackMetadata, trexDefaults);
250
770
 
251
- const tracks = new Map();
252
- const mdatChunks = [];
253
- let combinedMdatOffset = 0;
771
+ if (tracks.size === 0) throw new Error('Invalid fMP4: no track fragments found');
254
772
 
255
- for (let i = 0; i < boxes.length; i++) {
256
- const box = boxes[i];
257
- if (box.type === 'moof') {
258
- const moofChildren = parseChildBoxes(box);
259
- const moofStart = box.offset;
260
- let nextMdatOffset = 0;
261
- for (let j = i + 1; j < boxes.length; j++) {
262
- if (boxes[j].type === 'mdat') { nextMdatOffset = boxes[j].offset; break; }
263
- if (boxes[j].type === 'moof') break;
264
- }
265
- for (const child of moofChildren) {
266
- if (child.type === 'traf') {
267
- const trafChildren = parseChildBoxes(child);
268
- const tfhd = findBox(trafChildren, 'tfhd');
269
- const trun = findBox(trafChildren, 'trun');
270
- if (tfhd && trun) {
271
- const tfhdInfo = parseTfhd(tfhd.data);
272
- const { samples, dataOffset } = parseTrun(trun.data, tfhdInfo);
273
- if (!tracks.has(tfhdInfo.trackId)) tracks.set(tfhdInfo.trackId, { samples: [], chunkOffsets: [] });
274
- const track = tracks.get(tfhdInfo.trackId);
275
- const chunkOffset = combinedMdatOffset + (moofStart + dataOffset) - (nextMdatOffset + 8);
276
- track.chunkOffsets.push({ offset: chunkOffset, sampleCount: samples.length });
277
- track.samples.push(...samples);
278
- }
279
- }
280
- }
281
- } else if (box.type === 'mdat') {
282
- mdatChunks.push({ data: box.data.subarray(8), offset: combinedMdatOffset });
283
- combinedMdatOffset += box.data.subarray(8).byteLength;
284
- }
285
- }
286
-
287
- const totalMdatSize = mdatChunks.reduce((sum, c) => sum + c.data.byteLength, 0);
288
- const combinedMdat = new Uint8Array(totalMdatSize);
289
- for (const chunk of mdatChunks) combinedMdat.set(chunk.data, chunk.offset);
290
-
291
- const trackIdMap = new Map();
292
- const fmp4TrackIds = Array.from(tracks.keys()).sort((a, b) => a - b);
293
- for (let i = 0; i < fmp4TrackIds.length && i < originalTrackIds.length; i++) {
294
- trackIdMap.set(originalTrackIds[i], tracks.get(fmp4TrackIds[i]));
773
+ const clippedTracks = applyClipToTracks(tracks, options);
774
+ if (clippedTracks.size === 0) {
775
+ throw new Error('Clip range produced no samples');
295
776
  }
296
777
 
297
- let maxDuration = 0;
298
- for (const [, track] of tracks) {
299
- let dur = 0;
300
- for (const s of track.samples) dur += s.duration || 0;
301
- maxDuration = Math.max(maxDuration, dur);
302
- }
778
+ const rebuiltMdat = rebuildMdatContent(clippedTracks, trackOrder, combinedMdat);
779
+ const maxMovieDuration = calculateMovieDuration(clippedTracks, movieTimescale);
303
780
 
781
+ const moovChildren = parseChildBoxes(moov);
304
782
  const newMoovParts = [];
305
783
  for (const child of moovChildren) {
306
784
  if (child.type === 'mvex') continue;
307
- if (child.type === 'trak') newMoovParts.push(rebuildTrak(child, trackIdMap, maxDuration));
308
- else if (child.type === 'mvhd') newMoovParts.push(rebuildMvhd(child, maxDuration));
309
- else newMoovParts.push(child.data);
785
+ if (child.type === 'trak') {
786
+ const trak = rebuildTrak(child, clippedTracks, maxMovieDuration);
787
+ if (trak) newMoovParts.push(trak);
788
+ } else if (child.type === 'mvhd') {
789
+ newMoovParts.push(rebuildMvhd(child, maxMovieDuration));
790
+ } else {
791
+ newMoovParts.push(child.data);
792
+ }
310
793
  }
311
794
 
312
795
  const newMoov = createBox('moov', ...newMoovParts);
313
- const newMdat = createBox('mdat', combinedMdat);
796
+ const newMdat = createBox('mdat', rebuiltMdat);
314
797
  const output = new Uint8Array(ftyp.size + newMoov.byteLength + newMdat.byteLength);
315
798
  output.set(ftyp.data, 0);
316
799
  output.set(newMoov, ftyp.size);
317
800
  output.set(newMdat, ftyp.size + newMoov.byteLength);
318
801
  updateStcoOffsets(output, ftyp.size, newMoov.byteLength);
319
-
320
802
  return output;
321
803
  }
322
804