@invintusmedia/tomp4 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ts-to-mp4.js CHANGED
@@ -22,1047 +22,9 @@
22
22
  * ❌ E-AC-3 (0x87)
23
23
  */
24
24
 
25
- // Stream type info
26
- const STREAM_TYPES = {
27
- 0x01: { name: 'MPEG-1 Video', supported: false },
28
- 0x02: { name: 'MPEG-2 Video', supported: false },
29
- 0x03: { name: 'MPEG-1 Audio (MP3)', supported: false },
30
- 0x04: { name: 'MPEG-2 Audio', supported: false },
31
- 0x0F: { name: 'AAC', supported: true },
32
- 0x11: { name: 'AAC-LATM', supported: true },
33
- 0x1B: { name: 'H.264/AVC', supported: true },
34
- 0x24: { name: 'H.265/HEVC', supported: true },
35
- 0x81: { name: 'AC-3 (Dolby)', supported: false },
36
- 0x87: { name: 'E-AC-3', supported: false }
37
- };
25
+ import { TSParser, getCodecInfo } from './parsers/mpegts.js';
26
+ import { MP4Muxer } from './muxers/mp4.js';
38
27
 
39
- // ============================================
40
- // MP4 BOX HELPERS
41
- // ============================================
42
- function createBox(type, ...payloads) {
43
- let size = 8;
44
- for (const p of payloads) size += p.byteLength;
45
- const result = new Uint8Array(size);
46
- const view = new DataView(result.buffer);
47
- view.setUint32(0, size);
48
- result[4] = type.charCodeAt(0);
49
- result[5] = type.charCodeAt(1);
50
- result[6] = type.charCodeAt(2);
51
- result[7] = type.charCodeAt(3);
52
- let offset = 8;
53
- for (const p of payloads) {
54
- result.set(p, offset);
55
- offset += p.byteLength;
56
- }
57
- return result;
58
- }
59
-
60
- function createFullBox(type, version, flags, ...payloads) {
61
- const header = new Uint8Array(4);
62
- header[0] = version;
63
- header[1] = (flags >> 16) & 0xFF;
64
- header[2] = (flags >> 8) & 0xFF;
65
- header[3] = flags & 0xFF;
66
- return createBox(type, header, ...payloads);
67
- }
68
-
69
- // ============================================
70
- // MPEG-TS PARSER
71
- // ============================================
72
- const TS_PACKET_SIZE = 188;
73
- const TS_SYNC_BYTE = 0x47;
74
- const PAT_PID = 0x0000;
75
-
76
- class TSParser {
77
- constructor() {
78
- this.pmtPid = null;
79
- this.videoPid = null;
80
- this.audioPid = null;
81
- this.videoStreamType = null;
82
- this.audioStreamType = null;
83
- this.videoPesBuffer = [];
84
- this.audioPesBuffer = [];
85
- this.videoAccessUnits = [];
86
- this.audioAccessUnits = [];
87
- this.videoPts = [];
88
- this.videoDts = [];
89
- this.audioPts = [];
90
- this.lastAudioPts = null; // Track running audio timestamp
91
- this.adtsPartial = null; // Partial ADTS frame from previous PES
92
- this.audioSampleRate = null; // Detected from ADTS header
93
- this.audioChannels = null;
94
- this.debug = { packets: 0, patFound: false, pmtFound: false };
95
- }
96
-
97
- parse(data) {
98
- let offset = 0;
99
- // Find first sync byte
100
- while (offset < data.byteLength && data[offset] !== TS_SYNC_BYTE) offset++;
101
- if (offset > 0) this.debug.skippedBytes = offset;
102
-
103
- // Parse all packets
104
- while (offset + TS_PACKET_SIZE <= data.byteLength) {
105
- if (data[offset] !== TS_SYNC_BYTE) {
106
- // Try to resync
107
- const nextSync = data.indexOf(TS_SYNC_BYTE, offset + 1);
108
- if (nextSync === -1) break;
109
- offset = nextSync;
110
- continue;
111
- }
112
- this.parsePacket(data.subarray(offset, offset + TS_PACKET_SIZE));
113
- this.debug.packets++;
114
- offset += TS_PACKET_SIZE;
115
- }
116
- }
117
-
118
- parsePacket(packet) {
119
- const pid = ((packet[1] & 0x1F) << 8) | packet[2];
120
- const payloadStart = (packet[1] & 0x40) !== 0;
121
- const adaptationField = (packet[3] & 0x30) >> 4;
122
- let payloadOffset = 4;
123
- if (adaptationField === 2 || adaptationField === 3) {
124
- const adaptLen = packet[4];
125
- payloadOffset = 5 + adaptLen;
126
- if (payloadOffset >= TS_PACKET_SIZE) return; // Invalid adaptation field
127
- }
128
- if (adaptationField === 2) return; // No payload
129
- if (payloadOffset >= packet.length) return;
130
-
131
- const payload = packet.subarray(payloadOffset);
132
- if (payload.length === 0) return;
133
-
134
- if (pid === PAT_PID) this.parsePAT(payload);
135
- else if (pid === this.pmtPid) this.parsePMT(payload);
136
- else if (pid === this.videoPid) this.collectPES(payload, payloadStart, 'video');
137
- else if (pid === this.audioPid) this.collectPES(payload, payloadStart, 'audio');
138
- }
139
-
140
- parsePAT(payload) {
141
- if (payload.length < 12) return;
142
- let offset = payload[0] + 1; // pointer field
143
- if (offset + 8 > payload.length) return;
144
-
145
- // table_id + section_syntax + section_length + transport_stream_id + version + section_number + last_section_number
146
- offset += 8;
147
-
148
- while (offset + 4 <= payload.length - 4) { // -4 for CRC
149
- const programNum = (payload[offset] << 8) | payload[offset + 1];
150
- const pmtPid = ((payload[offset + 2] & 0x1F) << 8) | payload[offset + 3];
151
- if (programNum !== 0 && pmtPid !== 0) {
152
- this.pmtPid = pmtPid;
153
- this.debug.patFound = true;
154
- break;
155
- }
156
- offset += 4;
157
- }
158
- }
159
-
160
- parsePMT(payload) {
161
- if (payload.length < 16) return;
162
- let offset = payload[0] + 1; // pointer field
163
- if (offset + 12 > payload.length) return;
164
-
165
- // table_id
166
- offset++;
167
-
168
- const sectionLength = ((payload[offset] & 0x0F) << 8) | payload[offset + 1];
169
- offset += 2;
170
-
171
- // program_number(2) + version(1) + section_number(1) + last_section(1)
172
- offset += 5;
173
-
174
- // PCR_PID (2)
175
- offset += 2;
176
-
177
- // program_info_length
178
- if (offset + 2 > payload.length) return;
179
- const programInfoLength = ((payload[offset] & 0x0F) << 8) | payload[offset + 1];
180
- offset += 2 + programInfoLength;
181
-
182
- // Calculate end of stream entries (before CRC)
183
- const sectionEnd = Math.min(payload.length - 4, 1 + payload[0] + 3 + sectionLength - 4);
184
-
185
- while (offset + 5 <= sectionEnd) {
186
- const streamType = payload[offset];
187
- const elementaryPid = ((payload[offset + 1] & 0x1F) << 8) | payload[offset + 2];
188
- const esInfoLength = ((payload[offset + 3] & 0x0F) << 8) | payload[offset + 4];
189
-
190
- // Track ANY video stream we find (we'll validate codec support later)
191
- // Video types: 0x01=MPEG-1, 0x02=MPEG-2, 0x1B=H.264, 0x24=HEVC
192
- if (!this.videoPid && (streamType === 0x01 || streamType === 0x02 || streamType === 0x1B || streamType === 0x24)) {
193
- this.videoPid = elementaryPid;
194
- this.videoStreamType = streamType;
195
- this.debug.pmtFound = true;
196
- }
197
- // Track ANY audio stream we find (we'll validate codec support later)
198
- // Audio types: 0x03=MPEG-1, 0x04=MPEG-2, 0x0F=AAC, 0x11=AAC-LATM, 0x81=AC3, 0x87=EAC3
199
- else if (!this.audioPid && (streamType === 0x03 || streamType === 0x04 || streamType === 0x0F || streamType === 0x11 || streamType === 0x81 || streamType === 0x87)) {
200
- this.audioPid = elementaryPid;
201
- this.audioStreamType = streamType;
202
- }
203
-
204
- offset += 5 + esInfoLength;
205
- }
206
- }
207
-
208
- collectPES(payload, isStart, type) {
209
- const buffer = type === 'video' ? this.videoPesBuffer : this.audioPesBuffer;
210
- if (isStart) {
211
- if (type === 'audio') this.debug.audioPesStarts = (this.debug.audioPesStarts || 0) + 1;
212
- if (buffer.length > 0) this.processPES(this.concatenateBuffers(buffer), type);
213
- buffer.length = 0;
214
- }
215
- buffer.push(payload.slice());
216
- }
217
-
218
- processPES(pesData, type) {
219
- if (pesData.length < 9) return;
220
- if (pesData[0] !== 0 || pesData[1] !== 0 || pesData[2] !== 1) return;
221
- const flags = pesData[7];
222
- const headerDataLength = pesData[8];
223
- let pts = null, dts = null;
224
- if (flags & 0x80) pts = this.parsePTS(pesData, 9);
225
- if (flags & 0x40) dts = this.parsePTS(pesData, 14);
226
- const payload = pesData.subarray(9 + headerDataLength);
227
- if (type === 'video') this.processVideoPayload(payload, pts, dts);
228
- else this.processAudioPayload(payload, pts);
229
- }
230
-
231
- parsePTS(data, offset) {
232
- return ((data[offset] & 0x0E) << 29) |
233
- ((data[offset + 1]) << 22) |
234
- ((data[offset + 2] & 0xFE) << 14) |
235
- ((data[offset + 3]) << 7) |
236
- ((data[offset + 4] & 0xFE) >> 1);
237
- }
238
-
239
- processVideoPayload(payload, pts, dts) {
240
- const nalUnits = this.extractNALUnits(payload);
241
- if (nalUnits.length > 0 && pts !== null) {
242
- this.videoAccessUnits.push({ nalUnits, pts, dts: dts !== null ? dts : pts });
243
- this.videoPts.push(pts);
244
- this.videoDts.push(dts !== null ? dts : pts);
245
- }
246
- }
247
-
248
- extractNALUnits(data) {
249
- const nalUnits = [];
250
- let i = 0;
251
- while (i < data.length - 3) {
252
- if (data[i] === 0 && data[i + 1] === 0) {
253
- let startCodeLen = 0;
254
- if (data[i + 2] === 1) startCodeLen = 3;
255
- else if (data[i + 2] === 0 && i + 3 < data.length && data[i + 3] === 1) startCodeLen = 4;
256
- if (startCodeLen > 0) {
257
- let end = i + startCodeLen;
258
- while (end < data.length - 2) {
259
- if (data[end] === 0 && data[end + 1] === 0 &&
260
- (data[end + 2] === 1 || (data[end + 2] === 0 && end + 3 < data.length && data[end + 3] === 1))) break;
261
- end++;
262
- }
263
- if (end >= data.length - 2) end = data.length;
264
- const nalUnit = data.subarray(i + startCodeLen, end);
265
- if (nalUnit.length > 0) nalUnits.push(nalUnit);
266
- i = end;
267
- continue;
268
- }
269
- }
270
- i++;
271
- }
272
- return nalUnits;
273
- }
274
-
275
- processAudioPayload(payload, pts) {
276
- const frames = this.extractADTSFrames(payload);
277
-
278
- // Debug: track audio PES processing
279
- this.debug.audioPesCount = (this.debug.audioPesCount || 0) + 1;
280
- this.debug.audioFramesInPes = (this.debug.audioFramesInPes || 0) + frames.length;
281
-
282
- // Use provided PTS or continue from last known PTS
283
- if (pts !== null) {
284
- this.lastAudioPts = pts;
285
- } else if (this.lastAudioPts !== null) {
286
- pts = this.lastAudioPts;
287
- } else {
288
- // No PTS available yet, skip these frames
289
- this.debug.audioSkipped = (this.debug.audioSkipped || 0) + frames.length;
290
- return;
291
- }
292
-
293
- // Calculate PTS increment based on detected sample rate (or default 48000)
294
- const sampleRate = this.audioSampleRate || 48000;
295
- const ptsIncrement = Math.round(1024 * 90000 / sampleRate);
296
-
297
- for (const frame of frames) {
298
- this.audioAccessUnits.push({ data: frame.data, pts });
299
- this.audioPts.push(pts);
300
- pts += ptsIncrement;
301
- this.lastAudioPts = pts;
302
- }
303
- }
304
-
305
- extractADTSFrames(data) {
306
- // ADTS sample rate table
307
- const SAMPLE_RATES = [96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350];
308
-
309
- const frames = [];
310
- let i = 0;
311
-
312
- // Check for leftover partial frame from previous PES
313
- if (this.adtsPartial && this.adtsPartial.length > 0) {
314
- const combined = new Uint8Array(this.adtsPartial.length + data.length);
315
- combined.set(this.adtsPartial);
316
- combined.set(data, this.adtsPartial.length);
317
- data = combined;
318
- this.adtsPartial = null;
319
- }
320
-
321
- while (i < data.length - 7) {
322
- if (data[i] === 0xFF && (data[i + 1] & 0xF0) === 0xF0) {
323
- const protectionAbsent = data[i + 1] & 0x01;
324
- const frameLength = ((data[i + 3] & 0x03) << 11) | (data[i + 4] << 3) | ((data[i + 5] & 0xE0) >> 5);
325
-
326
- // Extract sample rate and channel config from first valid frame
327
- if (!this.audioSampleRate && frameLength > 0) {
328
- const samplingFreqIndex = ((data[i + 2] & 0x3C) >> 2);
329
- const channelConfig = ((data[i + 2] & 0x01) << 2) | ((data[i + 3] & 0xC0) >> 6);
330
- if (samplingFreqIndex < SAMPLE_RATES.length) {
331
- this.audioSampleRate = SAMPLE_RATES[samplingFreqIndex];
332
- this.audioChannels = channelConfig;
333
- }
334
- }
335
-
336
- if (frameLength > 0) {
337
- if (i + frameLength <= data.length) {
338
- const headerSize = protectionAbsent ? 7 : 9;
339
- frames.push({ header: data.subarray(i, i + headerSize), data: data.subarray(i + headerSize, i + frameLength) });
340
- i += frameLength;
341
- continue;
342
- } else {
343
- this.adtsPartial = data.slice(i);
344
- break;
345
- }
346
- }
347
- }
348
- i++;
349
- }
350
- return frames;
351
- }
352
-
353
- concatenateBuffers(buffers) {
354
- const totalLength = buffers.reduce((sum, b) => sum + b.length, 0);
355
- const result = new Uint8Array(totalLength);
356
- let offset = 0;
357
- for (const buf of buffers) { result.set(buf, offset); offset += buf.length; }
358
- return result;
359
- }
360
-
361
- finalize() {
362
- if (this.videoPesBuffer.length > 0) this.processPES(this.concatenateBuffers(this.videoPesBuffer), 'video');
363
- if (this.audioPesBuffer.length > 0) this.processPES(this.concatenateBuffers(this.audioPesBuffer), 'audio');
364
-
365
- // Normalize timestamps so both audio and video start at 0
366
- // This fixes A/V sync issues when streams have different start times
367
- this.normalizeTimestamps();
368
- }
369
-
370
- normalizeTimestamps() {
371
- // Find the minimum timestamp across all streams
372
- let minPts = Infinity;
373
-
374
- if (this.videoPts.length > 0) {
375
- minPts = Math.min(minPts, Math.min(...this.videoPts));
376
- }
377
- if (this.audioPts.length > 0) {
378
- minPts = Math.min(minPts, Math.min(...this.audioPts));
379
- }
380
-
381
- // If no valid timestamps, nothing to normalize
382
- if (minPts === Infinity || minPts === 0) return;
383
-
384
- // Subtract minimum from all timestamps
385
- for (let i = 0; i < this.videoPts.length; i++) {
386
- this.videoPts[i] -= minPts;
387
- }
388
- for (let i = 0; i < this.videoDts.length; i++) {
389
- this.videoDts[i] -= minPts;
390
- }
391
- for (let i = 0; i < this.audioPts.length; i++) {
392
- this.audioPts[i] -= minPts;
393
- }
394
-
395
- // Also update the access units
396
- for (const au of this.videoAccessUnits) {
397
- au.pts -= minPts;
398
- au.dts -= minPts;
399
- }
400
- for (const au of this.audioAccessUnits) {
401
- au.pts -= minPts;
402
- }
403
-
404
- this.debug.timestampOffset = minPts;
405
- this.debug.timestampNormalized = true;
406
- }
407
- }
408
-
409
- // ============================================
410
- // MP4 BUILDER
411
- // ============================================
412
- // Parse H.264 SPS to extract video dimensions
413
- function parseSPS(sps) {
414
- // Default fallback
415
- const result = { width: 1920, height: 1080 };
416
- if (!sps || sps.length < 4) return result;
417
-
418
- // Skip NAL header byte, start at profile_idc
419
- let offset = 1;
420
- const profile = sps[offset++];
421
- offset++; // constraint flags
422
- offset++; // level_idc
423
-
424
- // Exponential-Golomb decoder
425
- let bitPos = offset * 8;
426
- const getBit = () => (sps[Math.floor(bitPos / 8)] >> (7 - (bitPos++ % 8))) & 1;
427
- const readUE = () => {
428
- let zeros = 0;
429
- while (bitPos < sps.length * 8 && getBit() === 0) zeros++;
430
- let val = (1 << zeros) - 1;
431
- for (let i = 0; i < zeros; i++) val += getBit() << (zeros - 1 - i);
432
- return val;
433
- };
434
- const readSE = () => {
435
- const val = readUE();
436
- return (val & 1) ? (val + 1) >> 1 : -(val >> 1);
437
- };
438
-
439
- try {
440
- readUE(); // seq_parameter_set_id
441
-
442
- // High profile needs chroma_format_idc parsing
443
- if (profile === 100 || profile === 110 || profile === 122 || profile === 244 ||
444
- profile === 44 || profile === 83 || profile === 86 || profile === 118 || profile === 128) {
445
- const chromaFormat = readUE();
446
- if (chromaFormat === 3) getBit(); // separate_colour_plane_flag
447
- readUE(); // bit_depth_luma_minus8
448
- readUE(); // bit_depth_chroma_minus8
449
- getBit(); // qpprime_y_zero_transform_bypass_flag
450
- if (getBit()) { // seq_scaling_matrix_present_flag
451
- for (let i = 0; i < (chromaFormat !== 3 ? 8 : 12); i++) {
452
- if (getBit()) { // scaling_list_present
453
- const size = i < 6 ? 16 : 64;
454
- for (let j = 0; j < size; j++) readSE();
455
- }
456
- }
457
- }
458
- }
459
-
460
- readUE(); // log2_max_frame_num_minus4
461
- const pocType = readUE();
462
- if (pocType === 0) {
463
- readUE(); // log2_max_pic_order_cnt_lsb_minus4
464
- } else if (pocType === 1) {
465
- getBit(); // delta_pic_order_always_zero_flag
466
- readSE(); // offset_for_non_ref_pic
467
- readSE(); // offset_for_top_to_bottom_field
468
- const numRefFrames = readUE();
469
- for (let i = 0; i < numRefFrames; i++) readSE();
470
- }
471
-
472
- readUE(); // max_num_ref_frames
473
- getBit(); // gaps_in_frame_num_value_allowed_flag
474
-
475
- const picWidthMbs = readUE() + 1;
476
- const picHeightMapUnits = readUE() + 1;
477
- const frameMbsOnly = getBit();
478
-
479
- if (!frameMbsOnly) getBit(); // mb_adaptive_frame_field_flag
480
- getBit(); // direct_8x8_inference_flag
481
-
482
- let cropLeft = 0, cropRight = 0, cropTop = 0, cropBottom = 0;
483
- if (getBit()) { // frame_cropping_flag
484
- cropLeft = readUE();
485
- cropRight = readUE();
486
- cropTop = readUE();
487
- cropBottom = readUE();
488
- }
489
-
490
- // Calculate dimensions
491
- const mbWidth = 16;
492
- const mbHeight = frameMbsOnly ? 16 : 32;
493
- result.width = picWidthMbs * mbWidth - (cropLeft + cropRight) * 2;
494
- result.height = (2 - frameMbsOnly) * picHeightMapUnits * mbHeight / (frameMbsOnly ? 1 : 2) - (cropTop + cropBottom) * 2;
495
-
496
- } catch (e) {
497
- // Fall back to defaults on parse error
498
- }
499
-
500
- return result;
501
- }
502
-
503
- class MP4Builder {
504
- constructor(parser) {
505
- this.parser = parser;
506
- this.videoTimescale = 90000;
507
- // Use detected sample rate or default to 48000
508
- this.audioTimescale = parser.audioSampleRate || 48000;
509
- this.audioSampleDuration = 1024;
510
- this.videoDimensions = null;
511
- }
512
-
513
- getVideoDimensions() {
514
- if (this.videoDimensions) return this.videoDimensions;
515
-
516
- // Find SPS NAL unit
517
- for (const au of this.parser.videoAccessUnits) {
518
- for (const nalUnit of au.nalUnits) {
519
- const nalType = nalUnit[0] & 0x1F;
520
- if (nalType === 7) {
521
- this.videoDimensions = parseSPS(nalUnit);
522
- return this.videoDimensions;
523
- }
524
- }
525
- }
526
-
527
- // Fallback
528
- this.videoDimensions = { width: 1920, height: 1080 };
529
- return this.videoDimensions;
530
- }
531
-
532
- build() {
533
- const mdatContent = this.buildMdatContent();
534
- const moov = this.buildMoov(mdatContent.byteLength);
535
- const ftyp = this.buildFtyp();
536
- const mdatOffset = ftyp.byteLength + moov.byteLength + 8;
537
- this.updateChunkOffsets(moov, mdatOffset);
538
- const mdat = createBox('mdat', mdatContent);
539
- const result = new Uint8Array(ftyp.byteLength + moov.byteLength + mdat.byteLength);
540
- result.set(ftyp, 0);
541
- result.set(moov, ftyp.byteLength);
542
- result.set(mdat, ftyp.byteLength + moov.byteLength);
543
- return result;
544
- }
545
-
546
- buildFtyp() {
547
- const data = new Uint8Array(16);
548
- data[0] = 'i'.charCodeAt(0); data[1] = 's'.charCodeAt(0); data[2] = 'o'.charCodeAt(0); data[3] = 'm'.charCodeAt(0);
549
- data[7] = 1;
550
- data[8] = 'i'.charCodeAt(0); data[9] = 's'.charCodeAt(0); data[10] = 'o'.charCodeAt(0); data[11] = 'm'.charCodeAt(0);
551
- data[12] = 'a'.charCodeAt(0); data[13] = 'v'.charCodeAt(0); data[14] = 'c'.charCodeAt(0); data[15] = '1'.charCodeAt(0);
552
- return createBox('ftyp', data);
553
- }
554
-
555
- buildMdatContent() {
556
- const chunks = [];
557
- this.videoSampleSizes = [];
558
- this.videoSampleOffsets = [];
559
- let currentOffset = 0;
560
- for (const au of this.parser.videoAccessUnits) {
561
- this.videoSampleOffsets.push(currentOffset);
562
- let sampleSize = 0;
563
- for (const nalUnit of au.nalUnits) {
564
- const prefixed = new Uint8Array(4 + nalUnit.length);
565
- new DataView(prefixed.buffer).setUint32(0, nalUnit.length);
566
- prefixed.set(nalUnit, 4);
567
- chunks.push(prefixed);
568
- sampleSize += prefixed.length;
569
- }
570
- this.videoSampleSizes.push(sampleSize);
571
- currentOffset += sampleSize;
572
- }
573
- this.videoChunkOffset = 0;
574
- this.audioChunkOffset = currentOffset;
575
- this.audioSampleSizes = [];
576
- for (const frame of this.parser.audioAccessUnits) {
577
- chunks.push(frame.data);
578
- this.audioSampleSizes.push(frame.data.length);
579
- currentOffset += frame.data.length;
580
- }
581
- const totalSize = chunks.reduce((sum, c) => sum + c.length, 0);
582
- const result = new Uint8Array(totalSize);
583
- let offset = 0;
584
- for (const chunk of chunks) { result.set(chunk, offset); offset += chunk.length; }
585
- return result;
586
- }
587
-
588
- buildMoov(mdatSize) {
589
- const mvhd = this.buildMvhd();
590
- const videoTrak = this.buildVideoTrak();
591
- const audioTrak = this.buildAudioTrak();
592
- const udta = this.buildUdta();
593
- return createBox('moov', mvhd, videoTrak, audioTrak, udta);
594
- }
595
-
596
- buildUdta() {
597
- const toolName = 'toMp4.js';
598
- const toolBytes = new TextEncoder().encode(toolName);
599
- const dataBox = new Uint8Array(16 + toolBytes.length);
600
- const dataView = new DataView(dataBox.buffer);
601
- dataView.setUint32(0, 16 + toolBytes.length);
602
- dataBox[4] = 'd'.charCodeAt(0); dataBox[5] = 'a'.charCodeAt(0); dataBox[6] = 't'.charCodeAt(0); dataBox[7] = 'a'.charCodeAt(0);
603
- dataView.setUint32(8, 1); dataView.setUint32(12, 0);
604
- dataBox.set(toolBytes, 16);
605
- const tooBox = createBox('©too', dataBox);
606
- const ilst = createBox('ilst', tooBox);
607
- const hdlrData = new Uint8Array(21);
608
- hdlrData[4] = 'm'.charCodeAt(0); hdlrData[5] = 'd'.charCodeAt(0); hdlrData[6] = 'i'.charCodeAt(0); hdlrData[7] = 'r'.charCodeAt(0);
609
- const metaHdlr = createFullBox('hdlr', 0, 0, hdlrData);
610
- const meta = createFullBox('meta', 0, 0, new Uint8Array(0), metaHdlr, ilst);
611
- return createBox('udta', meta);
612
- }
613
-
614
- buildMvhd() {
615
- const data = new Uint8Array(96);
616
- const view = new DataView(data.buffer);
617
- view.setUint32(8, this.videoTimescale);
618
- view.setUint32(12, this.calculateVideoDuration());
619
- view.setUint32(16, 0x00010000);
620
- view.setUint16(20, 0x0100);
621
- view.setUint32(32, 0x00010000);
622
- view.setUint32(48, 0x00010000);
623
- view.setUint32(64, 0x40000000);
624
- view.setUint32(92, 258);
625
- return createFullBox('mvhd', 0, 0, data);
626
- }
627
-
628
- calculateVideoDuration() {
629
- if (this.parser.videoDts.length < 2) return 0;
630
- const firstDts = this.parser.videoDts[0];
631
- const lastDts = this.parser.videoDts[this.parser.videoDts.length - 1];
632
- const avgDuration = (lastDts - firstDts) / (this.parser.videoDts.length - 1);
633
- return Math.round(lastDts - firstDts + avgDuration);
634
- }
635
-
636
- buildVideoTrak() {
637
- const edts = this.buildVideoEdts();
638
- if (edts) {
639
- return createBox('trak', this.buildVideoTkhd(), edts, this.buildVideoMdia());
640
- }
641
- return createBox('trak', this.buildVideoTkhd(), this.buildVideoMdia());
642
- }
643
-
644
- // Build edit list to fix A/V sync
645
- // The elst box tells the player where media actually starts
646
- buildVideoEdts() {
647
- // Get first video PTS (presentation time)
648
- if (this.parser.videoAccessUnits.length === 0) return null;
649
-
650
- const firstAU = this.parser.videoAccessUnits[0];
651
- const firstVideoPts = firstAU.pts;
652
-
653
- // If video starts at 0, no edit needed
654
- if (firstVideoPts === 0) return null;
655
-
656
- // Create elst box: tells player to start at firstVideoPts in the media
657
- // This compensates for CTTS offset making video appear to start late
658
- const duration = this.calculateVideoDuration();
659
- const mediaTime = firstVideoPts; // Start playback at this media time
660
-
661
- // elst entry: segment_duration (4), media_time (4), media_rate (4)
662
- const elstData = new Uint8Array(16);
663
- const view = new DataView(elstData.buffer);
664
- view.setUint32(0, 1); // entry count
665
- view.setUint32(4, duration); // segment duration in movie timescale
666
- view.setInt32(8, mediaTime); // media time - where to start
667
- view.setUint16(12, 1); // media rate integer (1.0)
668
- view.setUint16(14, 0); // media rate fraction
669
-
670
- const elst = createFullBox('elst', 0, 0, elstData);
671
- return createBox('edts', elst);
672
- }
673
-
674
- buildVideoTkhd() {
675
- const { width, height } = this.getVideoDimensions();
676
- const data = new Uint8Array(80);
677
- const view = new DataView(data.buffer);
678
- view.setUint32(8, 256);
679
- view.setUint32(16, this.calculateVideoDuration());
680
- view.setUint16(32, 0);
681
- view.setUint32(36, 0x00010000);
682
- view.setUint32(52, 0x00010000);
683
- view.setUint32(68, 0x40000000);
684
- view.setUint32(72, width << 16);
685
- view.setUint32(76, height << 16);
686
- return createFullBox('tkhd', 0, 3, data);
687
- }
688
-
689
- buildVideoMdia() {
690
- return createBox('mdia', this.buildVideoMdhd(), this.buildVideoHdlr(), this.buildVideoMinf());
691
- }
692
-
693
- buildVideoMdhd() {
694
- const data = new Uint8Array(20);
695
- const view = new DataView(data.buffer);
696
- view.setUint32(8, this.videoTimescale);
697
- view.setUint32(12, this.calculateVideoDuration());
698
- view.setUint16(16, 0x55C4);
699
- return createFullBox('mdhd', 0, 0, data);
700
- }
701
-
702
- buildVideoHdlr() {
703
- const data = new Uint8Array(21);
704
- data[4] = 'v'.charCodeAt(0); data[5] = 'i'.charCodeAt(0); data[6] = 'd'.charCodeAt(0); data[7] = 'e'.charCodeAt(0);
705
- return createFullBox('hdlr', 0, 0, data);
706
- }
707
-
708
- buildVideoMinf() {
709
- return createBox('minf', this.buildVmhd(), this.buildDinf(), this.buildVideoStbl());
710
- }
711
-
712
- buildVmhd() { return createFullBox('vmhd', 0, 1, new Uint8Array(8)); }
713
-
714
- buildDinf() {
715
- const urlBox = createFullBox('url ', 0, 1, new Uint8Array(0));
716
- const dref = createFullBox('dref', 0, 0, new Uint8Array([0, 0, 0, 1]), urlBox);
717
- return createBox('dinf', dref);
718
- }
719
-
720
- buildVideoStbl() {
721
- const boxes = [this.buildVideoStsd(), this.buildVideoStts(), this.buildVideoCtts(), this.buildVideoStsc(), this.buildVideoStsz(), this.buildVideoStco()];
722
- const stss = this.buildVideoStss();
723
- if (stss) boxes.push(stss);
724
- return createBox('stbl', ...boxes);
725
- }
726
-
727
- buildVideoStsd() {
728
- const { width, height } = this.getVideoDimensions();
729
- const avcC = this.buildAvcC();
730
- const btrtData = new Uint8Array(12);
731
- const btrtView = new DataView(btrtData.buffer);
732
- btrtView.setUint32(4, 2000000); btrtView.setUint32(8, 2000000);
733
- const btrt = createBox('btrt', btrtData);
734
- const paspData = new Uint8Array(8);
735
- const paspView = new DataView(paspData.buffer);
736
- paspView.setUint32(0, 1); paspView.setUint32(4, 1);
737
- const pasp = createBox('pasp', paspData);
738
- const avc1Data = new Uint8Array(78 + avcC.byteLength + btrt.byteLength + pasp.byteLength);
739
- const view = new DataView(avc1Data.buffer);
740
- view.setUint16(6, 1); view.setUint16(24, width); view.setUint16(26, height);
741
- view.setUint32(28, 0x00480000); view.setUint32(32, 0x00480000);
742
- view.setUint16(40, 1); view.setUint16(74, 0x0018); view.setInt16(76, -1);
743
- avc1Data.set(avcC, 78); avc1Data.set(btrt, 78 + avcC.byteLength); avc1Data.set(pasp, 78 + avcC.byteLength + btrt.byteLength);
744
- const avc1 = createBox('avc1', avc1Data);
745
- const stsdHeader = new Uint8Array(4);
746
- new DataView(stsdHeader.buffer).setUint32(0, 1);
747
- return createFullBox('stsd', 0, 0, stsdHeader, avc1);
748
- }
749
-
750
- buildAvcC() {
751
- let sps = null, pps = null;
752
- for (const au of this.parser.videoAccessUnits) {
753
- for (const nalUnit of au.nalUnits) {
754
- const nalType = nalUnit[0] & 0x1F;
755
- if (nalType === 7 && !sps) sps = nalUnit;
756
- if (nalType === 8 && !pps) pps = nalUnit;
757
- if (sps && pps) break;
758
- }
759
- if (sps && pps) break;
760
- }
761
- if (!sps || !pps) {
762
- sps = new Uint8Array([0x67, 0x64, 0x00, 0x1f, 0xac, 0xd9, 0x40, 0x78, 0x02, 0x27, 0xe5, 0xc0, 0x44, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0xf0, 0x3c, 0x60, 0xc6, 0x58]);
763
- pps = new Uint8Array([0x68, 0xeb, 0xe3, 0xcb, 0x22, 0xc0]);
764
- }
765
- const data = new Uint8Array(11 + sps.length + pps.length);
766
- const view = new DataView(data.buffer);
767
- data[0] = 1; data[1] = sps[1]; data[2] = sps[2]; data[3] = sps[3]; data[4] = 0xFF; data[5] = 0xE1;
768
- view.setUint16(6, sps.length); data.set(sps, 8);
769
- data[8 + sps.length] = 1; view.setUint16(9 + sps.length, pps.length); data.set(pps, 11 + sps.length);
770
- return createBox('avcC', data);
771
- }
772
-
773
- buildVideoStts() {
774
- const entries = [];
775
- let lastDuration = -1, count = 0;
776
- for (let i = 0; i < this.parser.videoDts.length; i++) {
777
- const duration = i < this.parser.videoDts.length - 1
778
- ? this.parser.videoDts[i + 1] - this.parser.videoDts[i]
779
- : (entries.length > 0 ? entries[entries.length - 1].duration : 3003);
780
- if (duration === lastDuration) count++;
781
- else { if (count > 0) entries.push({ count, duration: lastDuration }); lastDuration = duration; count = 1; }
782
- }
783
- if (count > 0) entries.push({ count, duration: lastDuration });
784
- const data = new Uint8Array(4 + entries.length * 8);
785
- const view = new DataView(data.buffer);
786
- view.setUint32(0, entries.length);
787
- for (let i = 0; i < entries.length; i++) { view.setUint32(4 + i * 8, entries[i].count); view.setUint32(8 + i * 8, entries[i].duration); }
788
- return createFullBox('stts', 0, 0, data);
789
- }
790
-
791
- buildVideoCtts() {
792
- const entries = [];
793
- for (const au of this.parser.videoAccessUnits) {
794
- const cts = au.pts - au.dts;
795
- if (entries.length > 0 && entries[entries.length - 1].offset === cts) entries[entries.length - 1].count++;
796
- else entries.push({ count: 1, offset: cts });
797
- }
798
- const data = new Uint8Array(4 + entries.length * 8);
799
- const view = new DataView(data.buffer);
800
- view.setUint32(0, entries.length);
801
- for (let i = 0; i < entries.length; i++) { view.setUint32(4 + i * 8, entries[i].count); view.setUint32(8 + i * 8, entries[i].offset); }
802
- return createFullBox('ctts', 0, 0, data);
803
- }
804
-
805
- buildVideoStsc() {
806
- const data = new Uint8Array(4 + 12);
807
- const view = new DataView(data.buffer);
808
- view.setUint32(0, 1); view.setUint32(4, 1); view.setUint32(8, this.videoSampleSizes.length); view.setUint32(12, 1);
809
- return createFullBox('stsc', 0, 0, data);
810
- }
811
-
812
- buildVideoStsz() {
813
- const data = new Uint8Array(8 + this.videoSampleSizes.length * 4);
814
- const view = new DataView(data.buffer);
815
- view.setUint32(0, 0); view.setUint32(4, this.videoSampleSizes.length);
816
- for (let i = 0; i < this.videoSampleSizes.length; i++) view.setUint32(8 + i * 4, this.videoSampleSizes[i]);
817
- return createFullBox('stsz', 0, 0, data);
818
- }
819
-
820
- buildVideoStco() {
821
- const data = new Uint8Array(8);
822
- const view = new DataView(data.buffer);
823
- view.setUint32(0, 1); view.setUint32(4, 0);
824
- return createFullBox('stco', 0, 0, data);
825
- }
826
-
827
- buildVideoStss() {
828
- const keyframes = [];
829
- for (let i = 0; i < this.parser.videoAccessUnits.length; i++) {
830
- for (const nalUnit of this.parser.videoAccessUnits[i].nalUnits) {
831
- if ((nalUnit[0] & 0x1F) === 5) { keyframes.push(i + 1); break; }
832
- }
833
- }
834
- if (keyframes.length === 0) return null;
835
- const data = new Uint8Array(4 + keyframes.length * 4);
836
- const view = new DataView(data.buffer);
837
- view.setUint32(0, keyframes.length);
838
- for (let i = 0; i < keyframes.length; i++) view.setUint32(4 + i * 4, keyframes[i]);
839
- return createFullBox('stss', 0, 0, data);
840
- }
841
-
842
- buildAudioTrak() {
843
- const edts = this.buildAudioEdts();
844
- if (edts) {
845
- return createBox('trak', this.buildAudioTkhd(), edts, this.buildAudioMdia());
846
- }
847
- return createBox('trak', this.buildAudioTkhd(), this.buildAudioMdia());
848
- }
849
-
850
- // Build edit list for audio to sync with video
851
- buildAudioEdts() {
852
- if (this.parser.audioPts.length === 0) return null;
853
-
854
- const firstAudioPts = this.parser.audioPts[0];
855
-
856
- // If audio starts at 0, no edit needed
857
- if (firstAudioPts === 0) return null;
858
-
859
- // Convert audio PTS (90kHz) to audio timescale (48kHz)
860
- const mediaTime = Math.round(firstAudioPts * this.audioTimescale / 90000);
861
- const duration = this.audioSampleSizes.length * this.audioSampleDuration;
862
-
863
- const elstData = new Uint8Array(16);
864
- const view = new DataView(elstData.buffer);
865
- view.setUint32(0, 1); // entry count
866
- view.setUint32(4, Math.round(duration * this.videoTimescale / this.audioTimescale)); // segment duration in movie timescale
867
- view.setInt32(8, mediaTime); // media time
868
- view.setUint16(12, 1); // media rate integer
869
- view.setUint16(14, 0); // media rate fraction
870
-
871
- const elst = createFullBox('elst', 0, 0, elstData);
872
- return createBox('edts', elst);
873
- }
874
-
875
- buildAudioTkhd() {
876
- const data = new Uint8Array(80);
877
- const view = new DataView(data.buffer);
878
- view.setUint32(8, 257);
879
- const audioDuration = this.audioSampleSizes.length * this.audioSampleDuration;
880
- view.setUint32(16, Math.round(audioDuration * this.videoTimescale / this.audioTimescale));
881
- view.setUint16(32, 0x0100);
882
- view.setUint32(36, 0x00010000); view.setUint32(52, 0x00010000); view.setUint32(68, 0x40000000);
883
- return createFullBox('tkhd', 0, 3, data);
884
- }
885
-
886
- buildAudioMdia() { return createBox('mdia', this.buildAudioMdhd(), this.buildAudioHdlr(), this.buildAudioMinf()); }
887
-
888
- buildAudioMdhd() {
889
- const data = new Uint8Array(20);
890
- const view = new DataView(data.buffer);
891
- view.setUint32(8, this.audioTimescale);
892
- view.setUint32(12, this.audioSampleSizes.length * this.audioSampleDuration);
893
- view.setUint16(16, 0x55C4);
894
- return createFullBox('mdhd', 0, 0, data);
895
- }
896
-
897
- buildAudioHdlr() {
898
- const data = new Uint8Array(21);
899
- data[4] = 's'.charCodeAt(0); data[5] = 'o'.charCodeAt(0); data[6] = 'u'.charCodeAt(0); data[7] = 'n'.charCodeAt(0);
900
- return createFullBox('hdlr', 0, 0, data);
901
- }
902
-
903
- buildAudioMinf() { return createBox('minf', this.buildSmhd(), this.buildDinf(), this.buildAudioStbl()); }
904
- buildSmhd() { return createFullBox('smhd', 0, 0, new Uint8Array(4)); }
905
-
906
- buildAudioStbl() {
907
- return createBox('stbl', this.buildAudioStsd(), this.buildAudioStts(), this.buildAudioStsc(), this.buildAudioStsz(), this.buildAudioStco());
908
- }
909
-
910
- buildAudioStsd() {
911
- const esds = this.buildEsds();
912
- const channels = this.parser.audioChannels || 2;
913
- const mp4aData = new Uint8Array(28 + esds.byteLength);
914
- const view = new DataView(mp4aData.buffer);
915
- view.setUint16(6, 1);
916
- view.setUint16(16, channels); // channel count
917
- view.setUint16(18, 16); // sample size
918
- view.setUint32(24, this.audioTimescale << 16);
919
- mp4aData.set(esds, 28);
920
- const mp4a = createBox('mp4a', mp4aData);
921
- const stsdHeader = new Uint8Array(4);
922
- new DataView(stsdHeader.buffer).setUint32(0, 1);
923
- return createFullBox('stsd', 0, 0, stsdHeader, mp4a);
924
- }
925
-
926
- buildEsds() {
927
- // Build AudioSpecificConfig based on detected parameters
928
- const SAMPLE_RATE_INDEX = {
929
- 96000: 0, 88200: 1, 64000: 2, 48000: 3, 44100: 4, 32000: 5,
930
- 24000: 6, 22050: 7, 16000: 8, 12000: 9, 11025: 10, 8000: 11, 7350: 12
931
- };
932
-
933
- const sampleRate = this.audioTimescale;
934
- const channels = this.parser.audioChannels || 2;
935
- const samplingFreqIndex = SAMPLE_RATE_INDEX[sampleRate] ?? 4; // Default to 44100
936
-
937
- // AudioSpecificConfig: 5 bits objType + 4 bits freqIndex + 4 bits channels + 3 bits padding
938
- // AAC-LC = 2
939
- const audioConfig = ((2 << 11) | (samplingFreqIndex << 7) | (channels << 3)) & 0xFFFF;
940
- const audioConfigHigh = (audioConfig >> 8) & 0xFF;
941
- const audioConfigLow = audioConfig & 0xFF;
942
-
943
- const data = new Uint8Array([
944
- 0x00, 0x00, 0x00, 0x00, // version/flags
945
- 0x03, 0x19, // ES_Descriptor tag + length
946
- 0x00, 0x02, // ES_ID
947
- 0x00, // flags
948
- 0x04, 0x11, // DecoderConfigDescriptor tag + length
949
- 0x40, // objectTypeIndication (AAC)
950
- 0x15, // streamType (audio) + upstream + reserved
951
- 0x00, 0x00, 0x00, // bufferSizeDB
952
- 0x00, 0x01, 0xF4, 0x00, // maxBitrate
953
- 0x00, 0x01, 0xF4, 0x00, // avgBitrate
954
- 0x05, 0x02, // DecoderSpecificInfo tag + length
955
- audioConfigHigh, audioConfigLow, // AudioSpecificConfig
956
- 0x06, 0x01, 0x02 // SLConfigDescriptor
957
- ]);
958
- return createBox('esds', data);
959
- }
960
-
961
- buildAudioStts() {
962
- // Use actual PTS differences for accurate timing (like video does)
963
- const audioPts = this.parser.audioPts;
964
-
965
- // If we don't have PTS data, fall back to constant duration
966
- if (audioPts.length < 2) {
967
- const data = new Uint8Array(12);
968
- const view = new DataView(data.buffer);
969
- view.setUint32(0, 1);
970
- view.setUint32(4, this.audioSampleSizes.length);
971
- view.setUint32(8, this.audioSampleDuration);
972
- return createFullBox('stts', 0, 0, data);
973
- }
974
-
975
- // Convert 90kHz PTS to audio timescale (48kHz)
976
- // PTS is in 90kHz, we need durations in 48kHz
977
- const entries = [];
978
- let lastDuration = -1, count = 0;
979
-
980
- for (let i = 0; i < audioPts.length; i++) {
981
- let duration;
982
- if (i < audioPts.length - 1) {
983
- // Calculate actual duration from PTS difference
984
- const ptsDiff = audioPts[i + 1] - audioPts[i];
985
- // Convert from 90kHz to 48kHz: duration = ptsDiff * 48000 / 90000
986
- duration = Math.round(ptsDiff * this.audioTimescale / 90000);
987
- } else {
988
- // Last frame - use standard AAC frame duration
989
- duration = this.audioSampleDuration;
990
- }
991
-
992
- // Clamp to reasonable values (handle discontinuities)
993
- if (duration <= 0 || duration > this.audioSampleDuration * 2) {
994
- duration = this.audioSampleDuration;
995
- }
996
-
997
- if (duration === lastDuration) {
998
- count++;
999
- } else {
1000
- if (count > 0) entries.push({ count, duration: lastDuration });
1001
- lastDuration = duration;
1002
- count = 1;
1003
- }
1004
- }
1005
- if (count > 0) entries.push({ count, duration: lastDuration });
1006
-
1007
- const data = new Uint8Array(4 + entries.length * 8);
1008
- const view = new DataView(data.buffer);
1009
- view.setUint32(0, entries.length);
1010
- for (let i = 0; i < entries.length; i++) {
1011
- view.setUint32(4 + i * 8, entries[i].count);
1012
- view.setUint32(8 + i * 8, entries[i].duration);
1013
- }
1014
- return createFullBox('stts', 0, 0, data);
1015
- }
1016
-
1017
- buildAudioStsc() {
1018
- const data = new Uint8Array(4 + 12);
1019
- const view = new DataView(data.buffer);
1020
- view.setUint32(0, 1); view.setUint32(4, 1); view.setUint32(8, this.audioSampleSizes.length); view.setUint32(12, 1);
1021
- return createFullBox('stsc', 0, 0, data);
1022
- }
1023
-
1024
- buildAudioStsz() {
1025
- const data = new Uint8Array(8 + this.audioSampleSizes.length * 4);
1026
- const view = new DataView(data.buffer);
1027
- view.setUint32(0, 0); view.setUint32(4, this.audioSampleSizes.length);
1028
- for (let i = 0; i < this.audioSampleSizes.length; i++) view.setUint32(8 + i * 4, this.audioSampleSizes[i]);
1029
- return createFullBox('stsz', 0, 0, data);
1030
- }
1031
-
1032
- buildAudioStco() {
1033
- const data = new Uint8Array(8);
1034
- const view = new DataView(data.buffer);
1035
- view.setUint32(0, 1); view.setUint32(4, 0);
1036
- return createFullBox('stco', 0, 0, data);
1037
- }
1038
-
1039
- updateChunkOffsets(moov, mdatOffset) { this.updateStcoInBox(moov, mdatOffset, 0); }
1040
-
1041
- updateStcoInBox(data, mdatOffset, trackIndex) {
1042
- let offset = 8;
1043
- while (offset < data.byteLength - 8) {
1044
- const view = new DataView(data.buffer, data.byteOffset + offset);
1045
- const size = view.getUint32(0);
1046
- const type = String.fromCharCode(data[offset+4], data[offset+5], data[offset+6], data[offset+7]);
1047
- if (size < 8 || offset + size > data.byteLength) break;
1048
- if (type === 'stco') {
1049
- view.setUint32(16, trackIndex === 0 ? mdatOffset + this.videoChunkOffset : mdatOffset + this.audioChunkOffset);
1050
- trackIndex++;
1051
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
1052
- trackIndex = this.updateStcoInBox(data.subarray(offset, offset + size), mdatOffset, trackIndex);
1053
- }
1054
- offset += size;
1055
- }
1056
- return trackIndex;
1057
- }
1058
- }
1059
-
1060
- /**
1061
- * Get codec info for a stream type
1062
- */
1063
- function getCodecInfo(streamType) {
1064
- return STREAM_TYPES[streamType] || { name: `Unknown (0x${streamType?.toString(16)})`, supported: false };
1065
- }
1066
28
 
1067
29
  /**
1068
30
  * Check if a video access unit contains a keyframe (IDR NAL unit)
@@ -1076,7 +38,9 @@ function isKeyframe(accessUnit) {
1076
38
  }
1077
39
 
1078
40
  /**
1079
- * Clip access units to a time range, snapping to keyframes
41
+ * Clip access units to a time range, snapping to keyframes for decode
42
+ * but using edit list for precise playback timing
43
+ *
1080
44
  * @param {Array} videoAUs - Video access units
1081
45
  * @param {Array} audioAUs - Audio access units
1082
46
  * @param {number} startTime - Start time in seconds
@@ -1088,34 +52,52 @@ function clipAccessUnits(videoAUs, audioAUs, startTime, endTime) {
1088
52
  const startPts = startTime * PTS_PER_SECOND;
1089
53
  const endPts = endTime * PTS_PER_SECOND;
1090
54
 
1091
- // Find keyframe at or before startTime
1092
- let startIdx = 0;
55
+ // Find keyframe at or before startTime (needed for decoding)
56
+ let keyframeIdx = 0;
1093
57
  for (let i = 0; i < videoAUs.length; i++) {
1094
58
  if (videoAUs[i].pts > startPts) break;
1095
- if (isKeyframe(videoAUs[i])) startIdx = i;
59
+ if (isKeyframe(videoAUs[i])) keyframeIdx = i;
1096
60
  }
1097
61
 
1098
- // Find first frame after endTime
62
+ // Find first frame at or after endTime
1099
63
  let endIdx = videoAUs.length;
1100
- for (let i = startIdx; i < videoAUs.length; i++) {
64
+ for (let i = keyframeIdx; i < videoAUs.length; i++) {
1101
65
  if (videoAUs[i].pts >= endPts) {
1102
66
  endIdx = i;
1103
67
  break;
1104
68
  }
1105
69
  }
1106
70
 
1107
- // Clip video
1108
- const clippedVideo = videoAUs.slice(startIdx, endIdx);
71
+ // Clip video starting from keyframe (for proper decoding)
72
+ const clippedVideo = videoAUs.slice(keyframeIdx, endIdx);
1109
73
 
1110
- // Get actual PTS range from clipped video
1111
- const actualStartPts = clippedVideo.length > 0 ? clippedVideo[0].pts : 0;
1112
- const actualEndPts = clippedVideo.length > 0 ? clippedVideo[clippedVideo.length - 1].pts : 0;
74
+ if (clippedVideo.length === 0) {
75
+ return {
76
+ video: [],
77
+ audio: [],
78
+ actualStartTime: startTime,
79
+ actualEndTime: endTime,
80
+ offset: 0,
81
+ preroll: 0
82
+ };
83
+ }
1113
84
 
1114
- // Clip audio to match video time range
1115
- const clippedAudio = audioAUs.filter(au => au.pts >= actualStartPts && au.pts <= actualEndPts);
85
+ // Get PTS of keyframe and requested start
86
+ const keyframePts = clippedVideo[0].pts;
87
+ const lastFramePts = clippedVideo[clippedVideo.length - 1].pts;
1116
88
 
1117
- // Normalize timestamps so clip starts at 0
1118
- const offset = actualStartPts;
89
+ // Pre-roll: time between keyframe and requested start
90
+ // This is the time the decoder needs to process but player shouldn't display
91
+ const prerollPts = Math.max(0, startPts - keyframePts);
92
+
93
+ // Clip audio to the REQUESTED time range (not from keyframe)
94
+ // Audio doesn't need keyframe pre-roll
95
+ const audioStartPts = startPts;
96
+ const audioEndPts = Math.min(endPts, lastFramePts);
97
+ const clippedAudio = audioAUs.filter(au => au.pts >= audioStartPts && au.pts < audioEndPts);
98
+
99
+ // Normalize all timestamps so keyframe starts at 0
100
+ const offset = keyframePts;
1119
101
  for (const au of clippedVideo) {
1120
102
  au.pts -= offset;
1121
103
  au.dts -= offset;
@@ -1127,9 +109,12 @@ function clipAccessUnits(videoAUs, audioAUs, startTime, endTime) {
1127
109
  return {
1128
110
  video: clippedVideo,
1129
111
  audio: clippedAudio,
1130
- actualStartTime: actualStartPts / PTS_PER_SECOND,
1131
- actualEndTime: actualEndPts / PTS_PER_SECOND,
1132
- offset
112
+ actualStartTime: keyframePts / PTS_PER_SECOND, // Where decode starts (keyframe)
113
+ actualEndTime: lastFramePts / PTS_PER_SECOND,
114
+ requestedStartTime: startTime, // Where playback should start
115
+ requestedEndTime: endTime,
116
+ offset,
117
+ preroll: prerollPts // Edit list will use this to skip pre-roll frames during playback
1133
118
  };
1134
119
  }
1135
120
 
@@ -1263,6 +248,9 @@ export function convertTsToMp4(tsData, options = {}) {
1263
248
 
1264
249
  log(`Processing...`, { phase: 'convert', percent: 70 });
1265
250
 
251
+ // Track preroll for edit list (used for precise clipping)
252
+ let clipPreroll = 0;
253
+
1266
254
  // Apply time range clipping if specified
1267
255
  if (options.startTime !== undefined || options.endTime !== undefined) {
1268
256
  const startTime = options.startTime || 0;
@@ -1277,24 +265,31 @@ export function convertTsToMp4(tsData, options = {}) {
1277
265
 
1278
266
  parser.videoAccessUnits = clipResult.video;
1279
267
  parser.audioAccessUnits = clipResult.audio;
268
+ clipPreroll = clipResult.preroll;
1280
269
 
1281
270
  // Update PTS arrays to match
1282
271
  parser.videoPts = clipResult.video.map(au => au.pts);
1283
272
  parser.videoDts = clipResult.video.map(au => au.dts);
1284
273
  parser.audioPts = clipResult.audio.map(au => au.pts);
1285
274
 
1286
- log(`Clipped: ${clipResult.actualStartTime.toFixed(2)}s - ${clipResult.actualEndTime.toFixed(2)}s (${clipResult.video.length} video, ${clipResult.audio.length} audio frames)`, { phase: 'convert', percent: 80 });
275
+ const prerollMs = (clipPreroll / 90).toFixed(0);
276
+ const endTimeStr = clipResult.requestedEndTime === Infinity ? 'end' : clipResult.requestedEndTime.toFixed(2) + 's';
277
+ const clipDuration = clipResult.requestedEndTime === Infinity
278
+ ? (clipResult.actualEndTime - clipResult.requestedStartTime).toFixed(2)
279
+ : (clipResult.requestedEndTime - clipResult.requestedStartTime).toFixed(2);
280
+ log(`Clipped: ${clipResult.requestedStartTime.toFixed(2)}s - ${endTimeStr} (${clipDuration}s, ${prerollMs}ms preroll)`, { phase: 'convert', percent: 80 });
1287
281
  }
1288
282
 
1289
283
  log(`Building MP4...`, { phase: 'convert', percent: 85 });
1290
- const builder = new MP4Builder(parser);
1291
- const { width, height } = builder.getVideoDimensions();
284
+ const muxer = new MP4Muxer(parser, { preroll: clipPreroll });
285
+ const { width, height } = muxer.getVideoDimensions();
1292
286
  log(`Dimensions: ${width}x${height}`);
1293
287
 
1294
- const result = builder.build();
288
+ const result = muxer.build();
1295
289
  log(`Complete`, { phase: 'convert', percent: 100 });
1296
290
  return result;
1297
291
  }
1298
292
 
293
+ export { TSParser };
1299
294
  export default convertTsToMp4;
1300
295