@invintusmedia/tomp4 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,143 +1,614 @@
1
1
  /**
2
2
  * fMP4 to Standard MP4 Converter
3
- *
4
- * Converts a fragmented MP4 file to a standard MP4 container
5
- * by extracting samples from fragments and rebuilding the moov box.
6
- *
3
+ *
4
+ * Converts fragmented MP4 data to standard MP4 and supports
5
+ * sample-level clipping for fMP4 inputs.
6
+ *
7
7
  * @module fmp4/converter
8
8
  */
9
9
 
10
10
  import {
11
- parseBoxes, findBox, parseChildBoxes, createBox,
12
- parseTfhd, parseTrun
11
+ parseBoxes,
12
+ findBox,
13
+ parseChildBoxes,
14
+ createBox,
15
+ parseTfhd,
16
+ parseTfdt,
17
+ parseTrun,
18
+ getMovieTimescale,
13
19
  } from './utils.js';
14
20
 
15
- // ============================================
16
- // Moov Rebuilding Functions
17
- // ============================================
21
+ function createFullBox(type, version, flags, ...payloads) {
22
+ const header = new Uint8Array(4);
23
+ header[0] = version;
24
+ header[1] = (flags >> 16) & 0xff;
25
+ header[2] = (flags >> 8) & 0xff;
26
+ header[3] = flags & 0xff;
27
+ return createBox(type, header, ...payloads);
28
+ }
29
+
30
+ function sumSampleDurations(samples) {
31
+ let total = 0;
32
+ for (const sample of samples) total += sample.duration || 0;
33
+ return total;
34
+ }
35
+
36
+ function toMovieTimescale(value, trackTimescale, movieTimescale) {
37
+ if (!trackTimescale || !movieTimescale) return value;
38
+ return Math.round((value * movieTimescale) / trackTimescale);
39
+ }
40
+
41
+ function isSyncSample(sample) {
42
+ const flags = sample.flags;
43
+ if (flags === undefined || flags === null) return true;
44
+ return ((flags >> 16) & 0x1) === 0;
45
+ }
46
+
47
+ function parseTrex(trexData) {
48
+ const view = new DataView(trexData.buffer, trexData.byteOffset, trexData.byteLength);
49
+ return {
50
+ trackId: view.getUint32(12),
51
+ defaultSampleDuration: view.getUint32(20),
52
+ defaultSampleSize: view.getUint32(24),
53
+ defaultSampleFlags: view.getUint32(28),
54
+ };
55
+ }
56
+
57
+ function extractTrexDefaults(moovBox) {
58
+ const defaults = new Map();
59
+ const moovChildren = parseChildBoxes(moovBox);
60
+ const mvex = findBox(moovChildren, 'mvex');
61
+ if (!mvex) return defaults;
62
+
63
+ const mvexChildren = parseChildBoxes(mvex);
64
+ for (const child of mvexChildren) {
65
+ if (child.type !== 'trex') continue;
66
+ const trex = parseTrex(child.data);
67
+ defaults.set(trex.trackId, trex);
68
+ }
69
+ return defaults;
70
+ }
71
+
72
+ function extractTrackMetadata(moovBox) {
73
+ const trackMetadata = new Map();
74
+ const trackOrder = [];
75
+ const moovChildren = parseChildBoxes(moovBox);
76
+
77
+ for (const child of moovChildren) {
78
+ if (child.type !== 'trak') continue;
79
+ const trakChildren = parseChildBoxes(child);
80
+
81
+ let trackId = null;
82
+ let timescale = 0;
83
+ let handlerType = 'unknown';
84
+
85
+ for (const trakChild of trakChildren) {
86
+ if (trakChild.type === 'tkhd') {
87
+ const view = new DataView(trakChild.data.buffer, trakChild.data.byteOffset, trakChild.data.byteLength);
88
+ trackId = trakChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
89
+ } else if (trakChild.type === 'mdia') {
90
+ const mdiaChildren = parseChildBoxes(trakChild);
91
+ for (const mdiaChild of mdiaChildren) {
92
+ if (mdiaChild.type === 'mdhd') {
93
+ const view = new DataView(mdiaChild.data.buffer, mdiaChild.data.byteOffset, mdiaChild.data.byteLength);
94
+ timescale = mdiaChild.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
95
+ } else if (mdiaChild.type === 'hdlr' && mdiaChild.data.byteLength >= 20) {
96
+ handlerType = String.fromCharCode(
97
+ mdiaChild.data[16],
98
+ mdiaChild.data[17],
99
+ mdiaChild.data[18],
100
+ mdiaChild.data[19],
101
+ );
102
+ }
103
+ }
104
+ }
105
+ }
106
+
107
+ if (trackId !== null) {
108
+ trackMetadata.set(trackId, {
109
+ trackId,
110
+ timescale: timescale || 90000,
111
+ handlerType,
112
+ });
113
+ trackOrder.push(trackId);
114
+ }
115
+ }
116
+
117
+ return { trackMetadata, trackOrder };
118
+ }
119
+
120
+ function cloneSample(sample) {
121
+ return {
122
+ duration: sample.duration || 0,
123
+ size: sample.size || 0,
124
+ flags: sample.flags,
125
+ compositionTimeOffset: sample.compositionTimeOffset || 0,
126
+ dts: sample.dts || 0,
127
+ pts: sample.pts || 0,
128
+ byteOffset: sample.byteOffset || 0,
129
+ };
130
+ }
131
+
132
+ function normalizeSamples(samples, baseDts) {
133
+ return samples.map((sample) => {
134
+ const next = cloneSample(sample);
135
+ next.dts -= baseDts;
136
+ next.pts -= baseDts;
137
+ return next;
138
+ });
139
+ }
140
+
141
+ function clipVideoSamples(samples, startTick, endTick) {
142
+ if (!samples.length) {
143
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
144
+ }
145
+
146
+ let requestedStartIndex = samples.length;
147
+ for (let i = 0; i < samples.length; i++) {
148
+ const sampleEnd = (samples[i].pts || 0) + (samples[i].duration || 0);
149
+ if (sampleEnd > startTick) {
150
+ requestedStartIndex = i;
151
+ break;
152
+ }
153
+ }
154
+ if (requestedStartIndex >= samples.length) {
155
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
156
+ }
157
+
158
+ let decodeStartIndex = requestedStartIndex;
159
+ for (let i = requestedStartIndex; i >= 0; i--) {
160
+ if (isSyncSample(samples[i])) {
161
+ decodeStartIndex = i;
162
+ break;
163
+ }
164
+ }
165
+
166
+ let endIndex = samples.length;
167
+ if (Number.isFinite(endTick)) {
168
+ for (let i = decodeStartIndex; i < samples.length; i++) {
169
+ if ((samples[i].pts || 0) >= endTick) {
170
+ endIndex = i;
171
+ break;
172
+ }
173
+ }
174
+ }
175
+ if (endIndex <= decodeStartIndex) {
176
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
177
+ }
178
+
179
+ const selected = samples.slice(decodeStartIndex, endIndex);
180
+ const decodeStartDts = selected[0].dts || 0;
181
+ const mediaTime = Math.max(0, startTick - decodeStartDts);
182
+ const normalized = normalizeSamples(selected, decodeStartDts);
183
+ const decodeDuration = sumSampleDurations(normalized);
184
+ const maxPlayable = Math.max(0, decodeDuration - mediaTime);
185
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : maxPlayable;
186
+ const playbackDuration = Math.min(requested, maxPlayable);
187
+
188
+ return {
189
+ samples: normalized,
190
+ mediaTime,
191
+ playbackDuration,
192
+ };
193
+ }
194
+
195
+ function clipNonVideoSamples(samples, startTick, endTick) {
196
+ if (!samples.length) {
197
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
198
+ }
199
+
200
+ let startIndex = 0;
201
+ while (startIndex < samples.length && (samples[startIndex].pts || 0) < startTick) {
202
+ startIndex++;
203
+ }
204
+ if (startIndex >= samples.length) {
205
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
206
+ }
207
+
208
+ let endIndex = samples.length;
209
+ if (Number.isFinite(endTick)) {
210
+ for (let i = startIndex; i < samples.length; i++) {
211
+ if ((samples[i].pts || 0) >= endTick) {
212
+ endIndex = i;
213
+ break;
214
+ }
215
+ }
216
+ }
217
+ if (endIndex <= startIndex) {
218
+ return { samples: [], mediaTime: 0, playbackDuration: 0 };
219
+ }
220
+
221
+ const selected = samples.slice(startIndex, endIndex);
222
+ const decodeStartDts = selected[0].dts || 0;
223
+ const normalized = normalizeSamples(selected, decodeStartDts);
224
+ const decodeDuration = sumSampleDurations(normalized);
225
+ const requested = Number.isFinite(endTick) ? Math.max(0, endTick - startTick) : decodeDuration;
226
+ const playbackDuration = Math.min(requested, decodeDuration);
227
+
228
+ return {
229
+ samples: normalized,
230
+ mediaTime: 0,
231
+ playbackDuration,
232
+ };
233
+ }
234
+
235
+ function applyClipToTracks(tracks, options = {}) {
236
+ const hasStart = Number.isFinite(options.startTime);
237
+ const hasEnd = Number.isFinite(options.endTime);
238
+ if (!hasStart && !hasEnd) {
239
+ for (const [, track] of tracks) {
240
+ if (!track.samples.length) continue;
241
+ const baseDts = track.samples[0].dts || 0;
242
+ track.samples = normalizeSamples(track.samples, baseDts);
243
+ track.mediaTime = 0;
244
+ track.playbackDuration = sumSampleDurations(track.samples);
245
+ }
246
+ return tracks;
247
+ }
248
+
249
+ const startSec = hasStart ? Math.max(0, options.startTime) : 0;
250
+ const endSec = hasEnd ? Math.max(startSec, options.endTime) : Infinity;
251
+
252
+ let videoTrackId = null;
253
+ for (const [trackId, track] of tracks) {
254
+ if (track.handlerType === 'vide' && track.samples.length > 0) {
255
+ videoTrackId = trackId;
256
+ break;
257
+ }
258
+ }
259
+
260
+ const clipped = new Map();
261
+ for (const [trackId, track] of tracks) {
262
+ if (!track.samples.length) continue;
263
+
264
+ const startTick = Math.round(startSec * track.timescale);
265
+ const endTick = Number.isFinite(endSec) ? Math.round(endSec * track.timescale) : Infinity;
266
+ const clip = trackId === videoTrackId
267
+ ? clipVideoSamples(track.samples, startTick, endTick)
268
+ : clipNonVideoSamples(track.samples, startTick, endTick);
269
+
270
+ if (!clip.samples.length) continue;
271
+
272
+ clipped.set(trackId, {
273
+ ...track,
274
+ samples: clip.samples,
275
+ mediaTime: clip.mediaTime,
276
+ playbackDuration: clip.playbackDuration,
277
+ chunkOffsets: [],
278
+ });
279
+ }
280
+
281
+ return clipped;
282
+ }
283
+
284
+ function collectTrackSamples(boxes, trackMetadata, trexDefaults) {
285
+ const tracks = new Map();
286
+ const mdatChunks = [];
287
+ let combinedMdatOffset = 0;
288
+
289
+ for (let i = 0; i < boxes.length; i++) {
290
+ const box = boxes[i];
291
+ if (box.type === 'moof') {
292
+ const moofChildren = parseChildBoxes(box);
293
+ const moofStart = box.offset;
294
+
295
+ let nextMdatOffset = -1;
296
+ for (let j = i + 1; j < boxes.length; j++) {
297
+ if (boxes[j].type === 'mdat') {
298
+ nextMdatOffset = boxes[j].offset;
299
+ break;
300
+ }
301
+ if (boxes[j].type === 'moof') break;
302
+ }
303
+ if (nextMdatOffset < 0) continue;
304
+
305
+ const mdatContentStartAbs = nextMdatOffset + 8;
306
+
307
+ for (const child of moofChildren) {
308
+ if (child.type !== 'traf') continue;
309
+
310
+ const trafChildren = parseChildBoxes(child);
311
+ const tfhdBox = findBox(trafChildren, 'tfhd');
312
+ if (!tfhdBox) continue;
313
+
314
+ const tfhdView = new DataView(tfhdBox.data.buffer, tfhdBox.data.byteOffset, tfhdBox.data.byteLength);
315
+ const trackId = tfhdView.getUint32(12);
316
+ const tfhd = parseTfhd(tfhdBox.data, trexDefaults.get(trackId) || {});
317
+ const tfdtBox = findBox(trafChildren, 'tfdt');
318
+ let decodeTime = tfdtBox ? parseTfdt(tfdtBox.data) : 0;
319
+ let runDataCursorAbs = null;
320
+
321
+ if (!tracks.has(trackId)) {
322
+ const meta = trackMetadata.get(trackId) || {};
323
+ tracks.set(trackId, {
324
+ trackId,
325
+ timescale: meta.timescale || 90000,
326
+ handlerType: meta.handlerType || 'unknown',
327
+ samples: [],
328
+ chunkOffsets: [],
329
+ mediaTime: 0,
330
+ playbackDuration: 0,
331
+ });
332
+ }
333
+ const track = tracks.get(trackId);
334
+
335
+ for (const trafChild of trafChildren) {
336
+ if (trafChild.type !== 'trun') continue;
337
+ const { samples, dataOffset, flags } = parseTrun(trafChild.data, tfhd);
338
+ const runSize = samples.reduce((sum, sample) => sum + (sample.size || 0), 0);
339
+
340
+ let dataStartAbs;
341
+ if (flags & 0x1) {
342
+ const baseAbs = (tfhd.flags & 0x1) ? tfhd.baseDataOffset : moofStart;
343
+ dataStartAbs = baseAbs + dataOffset;
344
+ } else if (runDataCursorAbs !== null) {
345
+ dataStartAbs = runDataCursorAbs;
346
+ } else {
347
+ dataStartAbs = mdatContentStartAbs;
348
+ }
349
+
350
+ let sampleByteOffset = combinedMdatOffset + Math.max(0, dataStartAbs - mdatContentStartAbs);
351
+ for (const sample of samples) {
352
+ const dts = decodeTime;
353
+ const pts = dts + (sample.compositionTimeOffset || 0);
354
+ track.samples.push({
355
+ ...sample,
356
+ dts,
357
+ pts,
358
+ byteOffset: sampleByteOffset,
359
+ });
360
+ decodeTime += sample.duration || 0;
361
+ sampleByteOffset += sample.size || 0;
362
+ }
363
+
364
+ runDataCursorAbs = dataStartAbs + runSize;
365
+ }
366
+ }
367
+ } else if (box.type === 'mdat') {
368
+ const data = box.data.subarray(8);
369
+ mdatChunks.push({ data, offset: combinedMdatOffset });
370
+ combinedMdatOffset += data.byteLength;
371
+ }
372
+ }
373
+
374
+ const combinedMdat = new Uint8Array(combinedMdatOffset);
375
+ for (const chunk of mdatChunks) {
376
+ combinedMdat.set(chunk.data, chunk.offset);
377
+ }
378
+
379
+ return { tracks, combinedMdat };
380
+ }
381
+
382
+ function rebuildMdatContent(tracks, trackOrder, sourceMdat) {
383
+ const orderedTrackIds = trackOrder.filter((trackId) => tracks.has(trackId));
384
+ for (const trackId of tracks.keys()) {
385
+ if (!orderedTrackIds.includes(trackId)) orderedTrackIds.push(trackId);
386
+ }
387
+
388
+ let totalSize = 0;
389
+ for (const trackId of orderedTrackIds) {
390
+ const track = tracks.get(trackId);
391
+ for (const sample of track.samples) totalSize += sample.size || 0;
392
+ }
393
+
394
+ const mdatData = new Uint8Array(totalSize);
395
+ let writeOffset = 0;
396
+
397
+ for (const trackId of orderedTrackIds) {
398
+ const track = tracks.get(trackId);
399
+ if (!track || !track.samples.length) {
400
+ if (track) track.chunkOffsets = [];
401
+ continue;
402
+ }
403
+
404
+ track.chunkOffsets = [{ offset: writeOffset, sampleCount: track.samples.length }];
405
+ for (const sample of track.samples) {
406
+ const start = sample.byteOffset || 0;
407
+ const end = start + (sample.size || 0);
408
+ if (start < 0 || end > sourceMdat.byteLength) {
409
+ throw new Error(`Invalid sample byte range for track ${trackId}: ${start}-${end}`);
410
+ }
411
+ mdatData.set(sourceMdat.subarray(start, end), writeOffset);
412
+ sample.byteOffset = writeOffset;
413
+ writeOffset += sample.size || 0;
414
+ }
415
+ }
416
+
417
+ return mdatData;
418
+ }
419
+
420
+ function calculateMovieDuration(tracks, movieTimescale) {
421
+ let maxDuration = 0;
422
+ for (const [, track] of tracks) {
423
+ const fallback = Math.max(0, sumSampleDurations(track.samples) - (track.mediaTime || 0));
424
+ const playbackDuration = track.playbackDuration > 0 ? track.playbackDuration : fallback;
425
+ track.playbackDuration = playbackDuration;
426
+ track.movieDuration = toMovieTimescale(playbackDuration, track.timescale, movieTimescale);
427
+ maxDuration = Math.max(maxDuration, track.movieDuration);
428
+ }
429
+ return maxDuration;
430
+ }
18
431
 
19
432
  function rebuildMvhd(mvhdBox, duration) {
20
433
  const data = new Uint8Array(mvhdBox.data);
21
434
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
22
435
  const version = data[8];
23
436
  const durationOffset = version === 0 ? 24 : 32;
24
- if (version === 0) view.setUint32(durationOffset, duration);
25
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, duration); }
437
+ if (version === 0) {
438
+ view.setUint32(durationOffset, duration);
439
+ } else {
440
+ view.setUint32(durationOffset, 0);
441
+ view.setUint32(durationOffset + 4, duration);
442
+ }
26
443
  return data;
27
444
  }
28
445
 
29
- function rebuildTkhd(tkhdBox, trackInfo, maxDuration) {
446
+ function rebuildTkhd(tkhdBox, trackInfo, maxMovieDuration) {
30
447
  const data = new Uint8Array(tkhdBox.data);
31
448
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
32
449
  const version = data[8];
33
- let trackDuration = maxDuration;
34
- if (trackInfo) { trackDuration = 0; for (const s of trackInfo.samples) trackDuration += s.duration || 0; }
35
- if (version === 0) view.setUint32(28, trackDuration);
36
- else { view.setUint32(36, 0); view.setUint32(40, trackDuration); }
450
+ const duration = trackInfo?.movieDuration ?? maxMovieDuration;
451
+ if (version === 0) view.setUint32(28, duration);
452
+ else {
453
+ view.setUint32(36, 0);
454
+ view.setUint32(40, duration);
455
+ }
37
456
  return data;
38
457
  }
39
458
 
40
- function rebuildMdhd(mdhdBox, trackInfo, maxDuration) {
459
+ function rebuildMdhd(mdhdBox, trackInfo) {
41
460
  const data = new Uint8Array(mdhdBox.data);
42
461
  const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
43
462
  const version = data[8];
44
- let trackDuration = 0;
45
- if (trackInfo) for (const s of trackInfo.samples) trackDuration += s.duration || 0;
463
+ const duration = sumSampleDurations(trackInfo?.samples || []);
46
464
  const durationOffset = version === 0 ? 24 : 32;
47
- if (version === 0) view.setUint32(durationOffset, trackDuration);
48
- else { view.setUint32(durationOffset, 0); view.setUint32(durationOffset + 4, trackDuration); }
465
+ if (version === 0) {
466
+ view.setUint32(durationOffset, duration);
467
+ } else {
468
+ view.setUint32(durationOffset, 0);
469
+ view.setUint32(durationOffset + 4, duration);
470
+ }
49
471
  return data;
50
472
  }
51
473
 
52
474
  function rebuildStbl(stblBox, trackInfo) {
53
475
  const stblChildren = parseChildBoxes(stblBox);
54
476
  const newParts = [];
55
- for (const child of stblChildren) if (child.type === 'stsd') { newParts.push(child.data); break; }
477
+ for (const child of stblChildren) {
478
+ if (child.type === 'stsd') {
479
+ newParts.push(child.data);
480
+ break;
481
+ }
482
+ }
483
+
56
484
  const samples = trackInfo?.samples || [];
57
485
  const chunkOffsets = trackInfo?.chunkOffsets || [];
58
486
 
59
487
  // stts
60
488
  const sttsEntries = [];
61
- let curDur = null, count = 0;
62
- for (const s of samples) {
63
- const d = s.duration || 0;
64
- if (d === curDur) count++;
65
- else { if (curDur !== null) sttsEntries.push({ count, duration: curDur }); curDur = d; count = 1; }
489
+ let currentDuration = null;
490
+ let currentCount = 0;
491
+ for (const sample of samples) {
492
+ const duration = sample.duration || 0;
493
+ if (duration === currentDuration) currentCount++;
494
+ else {
495
+ if (currentDuration !== null) {
496
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
497
+ }
498
+ currentDuration = duration;
499
+ currentCount = 1;
500
+ }
501
+ }
502
+ if (currentDuration !== null) {
503
+ sttsEntries.push({ count: currentCount, duration: currentDuration });
66
504
  }
67
- if (curDur !== null) sttsEntries.push({ count, duration: curDur });
68
505
  const sttsData = new Uint8Array(8 + sttsEntries.length * 8);
69
506
  const sttsView = new DataView(sttsData.buffer);
70
507
  sttsView.setUint32(4, sttsEntries.length);
71
- let off = 8;
72
- for (const e of sttsEntries) { sttsView.setUint32(off, e.count); sttsView.setUint32(off + 4, e.duration); off += 8; }
508
+ let offset = 8;
509
+ for (const entry of sttsEntries) {
510
+ sttsView.setUint32(offset, entry.count);
511
+ sttsView.setUint32(offset + 4, entry.duration);
512
+ offset += 8;
513
+ }
73
514
  newParts.push(createBox('stts', sttsData));
74
515
 
75
516
  // stsc
76
517
  const stscEntries = [];
77
518
  if (chunkOffsets.length > 0) {
78
- let currentSampleCount = chunkOffsets[0].sampleCount, firstChunk = 1;
519
+ let currentSampleCount = chunkOffsets[0].sampleCount;
520
+ let firstChunk = 1;
79
521
  for (let i = 1; i <= chunkOffsets.length; i++) {
80
522
  const sampleCount = i < chunkOffsets.length ? chunkOffsets[i].sampleCount : -1;
81
523
  if (sampleCount !== currentSampleCount) {
82
- stscEntries.push({ firstChunk, samplesPerChunk: currentSampleCount, sampleDescriptionIndex: 1 });
83
- firstChunk = i + 1; currentSampleCount = sampleCount;
524
+ stscEntries.push({
525
+ firstChunk,
526
+ samplesPerChunk: currentSampleCount,
527
+ sampleDescriptionIndex: 1,
528
+ });
529
+ firstChunk = i + 1;
530
+ currentSampleCount = sampleCount;
84
531
  }
85
532
  }
86
- } else stscEntries.push({ firstChunk: 1, samplesPerChunk: samples.length, sampleDescriptionIndex: 1 });
533
+ }
87
534
  const stscData = new Uint8Array(8 + stscEntries.length * 12);
88
535
  const stscView = new DataView(stscData.buffer);
89
536
  stscView.setUint32(4, stscEntries.length);
90
- off = 8;
91
- for (const e of stscEntries) { stscView.setUint32(off, e.firstChunk); stscView.setUint32(off + 4, e.samplesPerChunk); stscView.setUint32(off + 8, e.sampleDescriptionIndex); off += 12; }
537
+ offset = 8;
538
+ for (const entry of stscEntries) {
539
+ stscView.setUint32(offset, entry.firstChunk);
540
+ stscView.setUint32(offset + 4, entry.samplesPerChunk);
541
+ stscView.setUint32(offset + 8, entry.sampleDescriptionIndex);
542
+ offset += 12;
543
+ }
92
544
  newParts.push(createBox('stsc', stscData));
93
545
 
94
546
  // stsz
95
547
  const stszData = new Uint8Array(12 + samples.length * 4);
96
548
  const stszView = new DataView(stszData.buffer);
97
549
  stszView.setUint32(8, samples.length);
98
- off = 12;
99
- for (const s of samples) { stszView.setUint32(off, s.size || 0); off += 4; }
550
+ offset = 12;
551
+ for (const sample of samples) {
552
+ stszView.setUint32(offset, sample.size || 0);
553
+ offset += 4;
554
+ }
100
555
  newParts.push(createBox('stsz', stszData));
101
556
 
102
557
  // stco
103
- const numChunks = chunkOffsets.length || 1;
104
- const stcoData = new Uint8Array(8 + numChunks * 4);
558
+ const stcoData = new Uint8Array(8 + chunkOffsets.length * 4);
105
559
  const stcoView = new DataView(stcoData.buffer);
106
- stcoView.setUint32(4, numChunks);
107
- for (let i = 0; i < numChunks; i++) stcoView.setUint32(8 + i * 4, chunkOffsets[i]?.offset || 0);
560
+ stcoView.setUint32(4, chunkOffsets.length);
561
+ for (let i = 0; i < chunkOffsets.length; i++) {
562
+ stcoView.setUint32(8 + i * 4, chunkOffsets[i].offset || 0);
563
+ }
108
564
  newParts.push(createBox('stco', stcoData));
109
565
 
110
566
  // ctts
111
- const hasCtts = samples.some(s => s.compositionTimeOffset);
567
+ const hasCtts = samples.some((sample) => sample.compositionTimeOffset);
112
568
  if (hasCtts) {
113
569
  const cttsEntries = [];
114
- let curOff = null; count = 0;
115
- for (const s of samples) {
116
- const o = s.compositionTimeOffset || 0;
117
- if (o === curOff) count++;
118
- else { if (curOff !== null) cttsEntries.push({ count, offset: curOff }); curOff = o; count = 1; }
570
+ let currentOffset = null;
571
+ currentCount = 0;
572
+ for (const sample of samples) {
573
+ const compositionOffset = sample.compositionTimeOffset || 0;
574
+ if (compositionOffset === currentOffset) currentCount++;
575
+ else {
576
+ if (currentOffset !== null) {
577
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
578
+ }
579
+ currentOffset = compositionOffset;
580
+ currentCount = 1;
581
+ }
582
+ }
583
+ if (currentOffset !== null) {
584
+ cttsEntries.push({ count: currentCount, offset: currentOffset });
119
585
  }
120
- if (curOff !== null) cttsEntries.push({ count, offset: curOff });
121
586
  const cttsData = new Uint8Array(8 + cttsEntries.length * 8);
122
587
  const cttsView = new DataView(cttsData.buffer);
123
588
  cttsView.setUint32(4, cttsEntries.length);
124
- off = 8;
125
- for (const e of cttsEntries) { cttsView.setUint32(off, e.count); cttsView.setInt32(off + 4, e.offset); off += 8; }
589
+ offset = 8;
590
+ for (const entry of cttsEntries) {
591
+ cttsView.setUint32(offset, entry.count);
592
+ cttsView.setInt32(offset + 4, entry.offset);
593
+ offset += 8;
594
+ }
126
595
  newParts.push(createBox('ctts', cttsData));
127
596
  }
128
597
 
129
- // stss
598
+ // stss (video sync samples)
130
599
  const syncSamples = [];
131
600
  for (let i = 0; i < samples.length; i++) {
132
- const flags = samples[i].flags;
133
- if (flags !== undefined) { if (!((flags >> 16) & 0x1)) syncSamples.push(i + 1); }
601
+ if (isSyncSample(samples[i])) syncSamples.push(i + 1);
134
602
  }
135
603
  if (syncSamples.length > 0 && syncSamples.length < samples.length) {
136
604
  const stssData = new Uint8Array(8 + syncSamples.length * 4);
137
605
  const stssView = new DataView(stssData.buffer);
138
606
  stssView.setUint32(4, syncSamples.length);
139
- off = 8;
140
- for (const n of syncSamples) { stssView.setUint32(off, n); off += 4; }
607
+ offset = 8;
608
+ for (const sampleNumber of syncSamples) {
609
+ stssView.setUint32(offset, sampleNumber);
610
+ offset += 4;
611
+ }
141
612
  newParts.push(createBox('stss', stssData));
142
613
  }
143
614
 
@@ -154,169 +625,141 @@ function rebuildMinf(minfBox, trackInfo) {
154
625
  return createBox('minf', ...newParts);
155
626
  }
156
627
 
157
- function rebuildMdia(mdiaBox, trackInfo, maxDuration) {
628
+ function rebuildMdia(mdiaBox, trackInfo) {
158
629
  const mdiaChildren = parseChildBoxes(mdiaBox);
159
630
  const newParts = [];
160
631
  for (const child of mdiaChildren) {
161
632
  if (child.type === 'minf') newParts.push(rebuildMinf(child, trackInfo));
162
- else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo, maxDuration));
633
+ else if (child.type === 'mdhd') newParts.push(rebuildMdhd(child, trackInfo));
163
634
  else newParts.push(child.data);
164
635
  }
165
636
  return createBox('mdia', ...newParts);
166
637
  }
167
638
 
168
- function rebuildTrak(trakBox, trackIdMap, maxDuration) {
639
+ function rebuildTrak(trakBox, trackInfoMap, maxMovieDuration) {
169
640
  const trakChildren = parseChildBoxes(trakBox);
170
- let trackId = 1;
641
+ let trackId = null;
171
642
  for (const child of trakChildren) {
172
- if (child.type === 'tkhd') {
173
- const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
174
- trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
175
- }
643
+ if (child.type !== 'tkhd') continue;
644
+ const view = new DataView(child.data.buffer, child.data.byteOffset, child.data.byteLength);
645
+ trackId = child.data[8] === 0 ? view.getUint32(20) : view.getUint32(28);
176
646
  }
177
- const trackInfo = trackIdMap.get(trackId);
647
+ if (trackId === null) return null;
648
+
649
+ const trackInfo = trackInfoMap.get(trackId);
650
+ if (!trackInfo || !trackInfo.samples.length) return null;
651
+
178
652
  const newParts = [];
179
- let hasEdts = false;
180
653
  for (const child of trakChildren) {
181
- if (child.type === 'edts') { hasEdts = true; newParts.push(child.data); }
182
- else if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo, maxDuration));
183
- else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxDuration));
654
+ if (child.type === 'edts') continue;
655
+ if (child.type === 'mdia') newParts.push(rebuildMdia(child, trackInfo));
656
+ else if (child.type === 'tkhd') newParts.push(rebuildTkhd(child, trackInfo, maxMovieDuration));
184
657
  else newParts.push(child.data);
185
658
  }
186
- if (!hasEdts && trackInfo) {
187
- let trackDuration = 0;
188
- for (const s of trackInfo.samples) trackDuration += s.duration || 0;
189
- const elstData = new Uint8Array(20);
190
- const elstView = new DataView(elstData.buffer);
191
- elstView.setUint32(4, 1); elstView.setUint32(8, maxDuration); elstView.setInt32(12, 0); elstView.setInt16(16, 1);
192
- const elst = createBox('elst', elstData);
193
- const edts = createBox('edts', elst);
194
- const tkhdIndex = newParts.findIndex(p => p.length >= 8 && String.fromCharCode(p[4], p[5], p[6], p[7]) === 'tkhd');
195
- if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
196
- }
659
+
660
+ const elstPayload = new Uint8Array(16);
661
+ const elstView = new DataView(elstPayload.buffer);
662
+ elstView.setUint32(0, 1);
663
+ elstView.setUint32(4, trackInfo.movieDuration ?? maxMovieDuration);
664
+ elstView.setInt32(8, Math.max(0, Math.round(trackInfo.mediaTime || 0)));
665
+ elstView.setUint16(12, 1);
666
+ elstView.setUint16(14, 0);
667
+ const elst = createFullBox('elst', 0, 0, elstPayload);
668
+ const edts = createBox('edts', elst);
669
+
670
+ const tkhdIndex = newParts.findIndex((part) =>
671
+ part.length >= 8 && String.fromCharCode(part[4], part[5], part[6], part[7]) === 'tkhd',
672
+ );
673
+ if (tkhdIndex >= 0) newParts.splice(tkhdIndex + 1, 0, edts);
674
+ else newParts.unshift(edts);
675
+
197
676
  return createBox('trak', ...newParts);
198
677
  }
199
678
 
200
679
  function updateStcoOffsets(output, ftypSize, moovSize) {
201
680
  const mdatContentOffset = ftypSize + moovSize + 8;
202
681
  const view = new DataView(output.buffer, output.byteOffset, output.byteLength);
682
+
203
683
  function scan(start, end) {
204
- let pos = start;
205
- while (pos + 8 <= end) {
206
- const size = view.getUint32(pos);
684
+ let position = start;
685
+ while (position + 8 <= end) {
686
+ const size = view.getUint32(position);
207
687
  if (size < 8) break;
208
- const type = String.fromCharCode(output[pos + 4], output[pos + 5], output[pos + 6], output[pos + 7]);
688
+ const type = String.fromCharCode(
689
+ output[position + 4],
690
+ output[position + 5],
691
+ output[position + 6],
692
+ output[position + 7],
693
+ );
694
+
209
695
  if (type === 'stco') {
210
- const entryCount = view.getUint32(pos + 12);
696
+ const entryCount = view.getUint32(position + 12);
211
697
  for (let i = 0; i < entryCount; i++) {
212
- const entryPos = pos + 16 + i * 4;
213
- view.setUint32(entryPos, mdatContentOffset + view.getUint32(entryPos));
698
+ const entryPos = position + 16 + i * 4;
699
+ const relativeOffset = view.getUint32(entryPos);
700
+ view.setUint32(entryPos, mdatContentOffset + relativeOffset);
214
701
  }
215
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) scan(pos + 8, pos + size);
216
- pos += size;
702
+ } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
703
+ scan(position + 8, position + size);
704
+ }
705
+
706
+ position += size;
217
707
  }
218
708
  }
709
+
219
710
  scan(0, output.byteLength);
220
711
  }
221
712
 
222
- // ============================================
223
- // Main Converter Function
224
- // ============================================
225
-
226
713
  /**
227
714
  * Convert fragmented MP4 to standard MP4
228
715
  * @param {Uint8Array} fmp4Data - fMP4 data
716
+ * @param {object} [options] - Optional clip settings
717
+ * @param {number} [options.startTime] - Clip start time (seconds)
718
+ * @param {number} [options.endTime] - Clip end time (seconds)
229
719
  * @returns {Uint8Array} Standard MP4 data
230
720
  */
231
- export function convertFmp4ToMp4(fmp4Data) {
721
+ export function convertFmp4ToMp4(fmp4Data, options = {}) {
232
722
  const boxes = parseBoxes(fmp4Data);
233
723
  const ftyp = findBox(boxes, 'ftyp');
234
724
  const moov = findBox(boxes, 'moov');
235
725
  if (!ftyp || !moov) throw new Error('Invalid fMP4: missing ftyp or moov');
236
726
 
237
- const moovChildren = parseChildBoxes(moov);
238
- const originalTrackIds = [];
239
- for (const child of moovChildren) {
240
- if (child.type === 'trak') {
241
- const trakChildren = parseChildBoxes(child);
242
- for (const tc of trakChildren) {
243
- if (tc.type === 'tkhd') {
244
- const view = new DataView(tc.data.buffer, tc.data.byteOffset, tc.data.byteLength);
245
- originalTrackIds.push(tc.data[8] === 0 ? view.getUint32(20) : view.getUint32(28));
246
- }
247
- }
248
- }
249
- }
727
+ const movieTimescale = getMovieTimescale(moov);
728
+ const { trackMetadata, trackOrder } = extractTrackMetadata(moov);
729
+ const trexDefaults = extractTrexDefaults(moov);
730
+ const { tracks, combinedMdat } = collectTrackSamples(boxes, trackMetadata, trexDefaults);
250
731
 
251
- const tracks = new Map();
252
- const mdatChunks = [];
253
- let combinedMdatOffset = 0;
732
+ if (tracks.size === 0) throw new Error('Invalid fMP4: no track fragments found');
254
733
 
255
- for (let i = 0; i < boxes.length; i++) {
256
- const box = boxes[i];
257
- if (box.type === 'moof') {
258
- const moofChildren = parseChildBoxes(box);
259
- const moofStart = box.offset;
260
- let nextMdatOffset = 0;
261
- for (let j = i + 1; j < boxes.length; j++) {
262
- if (boxes[j].type === 'mdat') { nextMdatOffset = boxes[j].offset; break; }
263
- if (boxes[j].type === 'moof') break;
264
- }
265
- for (const child of moofChildren) {
266
- if (child.type === 'traf') {
267
- const trafChildren = parseChildBoxes(child);
268
- const tfhd = findBox(trafChildren, 'tfhd');
269
- const trun = findBox(trafChildren, 'trun');
270
- if (tfhd && trun) {
271
- const tfhdInfo = parseTfhd(tfhd.data);
272
- const { samples, dataOffset } = parseTrun(trun.data, tfhdInfo);
273
- if (!tracks.has(tfhdInfo.trackId)) tracks.set(tfhdInfo.trackId, { samples: [], chunkOffsets: [] });
274
- const track = tracks.get(tfhdInfo.trackId);
275
- const chunkOffset = combinedMdatOffset + (moofStart + dataOffset) - (nextMdatOffset + 8);
276
- track.chunkOffsets.push({ offset: chunkOffset, sampleCount: samples.length });
277
- track.samples.push(...samples);
278
- }
279
- }
280
- }
281
- } else if (box.type === 'mdat') {
282
- mdatChunks.push({ data: box.data.subarray(8), offset: combinedMdatOffset });
283
- combinedMdatOffset += box.data.subarray(8).byteLength;
284
- }
285
- }
286
-
287
- const totalMdatSize = mdatChunks.reduce((sum, c) => sum + c.data.byteLength, 0);
288
- const combinedMdat = new Uint8Array(totalMdatSize);
289
- for (const chunk of mdatChunks) combinedMdat.set(chunk.data, chunk.offset);
290
-
291
- const trackIdMap = new Map();
292
- const fmp4TrackIds = Array.from(tracks.keys()).sort((a, b) => a - b);
293
- for (let i = 0; i < fmp4TrackIds.length && i < originalTrackIds.length; i++) {
294
- trackIdMap.set(originalTrackIds[i], tracks.get(fmp4TrackIds[i]));
734
+ const clippedTracks = applyClipToTracks(tracks, options);
735
+ if (clippedTracks.size === 0) {
736
+ throw new Error('Clip range produced no samples');
295
737
  }
296
738
 
297
- let maxDuration = 0;
298
- for (const [, track] of tracks) {
299
- let dur = 0;
300
- for (const s of track.samples) dur += s.duration || 0;
301
- maxDuration = Math.max(maxDuration, dur);
302
- }
739
+ const rebuiltMdat = rebuildMdatContent(clippedTracks, trackOrder, combinedMdat);
740
+ const maxMovieDuration = calculateMovieDuration(clippedTracks, movieTimescale);
303
741
 
742
+ const moovChildren = parseChildBoxes(moov);
304
743
  const newMoovParts = [];
305
744
  for (const child of moovChildren) {
306
745
  if (child.type === 'mvex') continue;
307
- if (child.type === 'trak') newMoovParts.push(rebuildTrak(child, trackIdMap, maxDuration));
308
- else if (child.type === 'mvhd') newMoovParts.push(rebuildMvhd(child, maxDuration));
309
- else newMoovParts.push(child.data);
746
+ if (child.type === 'trak') {
747
+ const trak = rebuildTrak(child, clippedTracks, maxMovieDuration);
748
+ if (trak) newMoovParts.push(trak);
749
+ } else if (child.type === 'mvhd') {
750
+ newMoovParts.push(rebuildMvhd(child, maxMovieDuration));
751
+ } else {
752
+ newMoovParts.push(child.data);
753
+ }
310
754
  }
311
755
 
312
756
  const newMoov = createBox('moov', ...newMoovParts);
313
- const newMdat = createBox('mdat', combinedMdat);
757
+ const newMdat = createBox('mdat', rebuiltMdat);
314
758
  const output = new Uint8Array(ftyp.size + newMoov.byteLength + newMdat.byteLength);
315
759
  output.set(ftyp.data, 0);
316
760
  output.set(newMoov, ftyp.size);
317
761
  output.set(newMdat, ftyp.size + newMoov.byteLength);
318
762
  updateStcoOffsets(output, ftyp.size, newMoov.byteLength);
319
-
320
763
  return output;
321
764
  }
322
765