@invintusmedia/tomp4 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tomp4.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * toMp4.js v1.0.5
2
+ * toMp4.js v1.0.7
3
3
  * Convert MPEG-TS and fMP4 to standard MP4
4
4
  * https://github.com/TVWIT/toMp4.js
5
5
  * MIT License
@@ -43,1047 +43,9 @@
43
43
  * ❌ E-AC-3 (0x87)
44
44
  */
45
45
 
46
- // Stream type info
47
- const STREAM_TYPES = {
48
- 0x01: { name: 'MPEG-1 Video', supported: false },
49
- 0x02: { name: 'MPEG-2 Video', supported: false },
50
- 0x03: { name: 'MPEG-1 Audio (MP3)', supported: false },
51
- 0x04: { name: 'MPEG-2 Audio', supported: false },
52
- 0x0F: { name: 'AAC', supported: true },
53
- 0x11: { name: 'AAC-LATM', supported: true },
54
- 0x1B: { name: 'H.264/AVC', supported: true },
55
- 0x24: { name: 'H.265/HEVC', supported: true },
56
- 0x81: { name: 'AC-3 (Dolby)', supported: false },
57
- 0x87: { name: 'E-AC-3', supported: false }
58
- };
46
+ import { TSParser, getCodecInfo } from './parsers/mpegts.js';
47
+ import { MP4Muxer } from './muxers/mp4.js';
59
48
 
60
- // ============================================
61
- // MP4 BOX HELPERS
62
- // ============================================
63
- function createBox(type, ...payloads) {
64
- let size = 8;
65
- for (const p of payloads) size += p.byteLength;
66
- const result = new Uint8Array(size);
67
- const view = new DataView(result.buffer);
68
- view.setUint32(0, size);
69
- result[4] = type.charCodeAt(0);
70
- result[5] = type.charCodeAt(1);
71
- result[6] = type.charCodeAt(2);
72
- result[7] = type.charCodeAt(3);
73
- let offset = 8;
74
- for (const p of payloads) {
75
- result.set(p, offset);
76
- offset += p.byteLength;
77
- }
78
- return result;
79
- }
80
-
81
- function createFullBox(type, version, flags, ...payloads) {
82
- const header = new Uint8Array(4);
83
- header[0] = version;
84
- header[1] = (flags >> 16) & 0xFF;
85
- header[2] = (flags >> 8) & 0xFF;
86
- header[3] = flags & 0xFF;
87
- return createBox(type, header, ...payloads);
88
- }
89
-
90
- // ============================================
91
- // MPEG-TS PARSER
92
- // ============================================
93
- const TS_PACKET_SIZE = 188;
94
- const TS_SYNC_BYTE = 0x47;
95
- const PAT_PID = 0x0000;
96
-
97
- class TSParser {
98
- constructor() {
99
- this.pmtPid = null;
100
- this.videoPid = null;
101
- this.audioPid = null;
102
- this.videoStreamType = null;
103
- this.audioStreamType = null;
104
- this.videoPesBuffer = [];
105
- this.audioPesBuffer = [];
106
- this.videoAccessUnits = [];
107
- this.audioAccessUnits = [];
108
- this.videoPts = [];
109
- this.videoDts = [];
110
- this.audioPts = [];
111
- this.lastAudioPts = null; // Track running audio timestamp
112
- this.adtsPartial = null; // Partial ADTS frame from previous PES
113
- this.audioSampleRate = null; // Detected from ADTS header
114
- this.audioChannels = null;
115
- this.debug = { packets: 0, patFound: false, pmtFound: false };
116
- }
117
-
118
- parse(data) {
119
- let offset = 0;
120
- // Find first sync byte
121
- while (offset < data.byteLength && data[offset] !== TS_SYNC_BYTE) offset++;
122
- if (offset > 0) this.debug.skippedBytes = offset;
123
-
124
- // Parse all packets
125
- while (offset + TS_PACKET_SIZE <= data.byteLength) {
126
- if (data[offset] !== TS_SYNC_BYTE) {
127
- // Try to resync
128
- const nextSync = data.indexOf(TS_SYNC_BYTE, offset + 1);
129
- if (nextSync === -1) break;
130
- offset = nextSync;
131
- continue;
132
- }
133
- this.parsePacket(data.subarray(offset, offset + TS_PACKET_SIZE));
134
- this.debug.packets++;
135
- offset += TS_PACKET_SIZE;
136
- }
137
- }
138
-
139
- parsePacket(packet) {
140
- const pid = ((packet[1] & 0x1F) << 8) | packet[2];
141
- const payloadStart = (packet[1] & 0x40) !== 0;
142
- const adaptationField = (packet[3] & 0x30) >> 4;
143
- let payloadOffset = 4;
144
- if (adaptationField === 2 || adaptationField === 3) {
145
- const adaptLen = packet[4];
146
- payloadOffset = 5 + adaptLen;
147
- if (payloadOffset >= TS_PACKET_SIZE) return; // Invalid adaptation field
148
- }
149
- if (adaptationField === 2) return; // No payload
150
- if (payloadOffset >= packet.length) return;
151
-
152
- const payload = packet.subarray(payloadOffset);
153
- if (payload.length === 0) return;
154
-
155
- if (pid === PAT_PID) this.parsePAT(payload);
156
- else if (pid === this.pmtPid) this.parsePMT(payload);
157
- else if (pid === this.videoPid) this.collectPES(payload, payloadStart, 'video');
158
- else if (pid === this.audioPid) this.collectPES(payload, payloadStart, 'audio');
159
- }
160
-
161
- parsePAT(payload) {
162
- if (payload.length < 12) return;
163
- let offset = payload[0] + 1; // pointer field
164
- if (offset + 8 > payload.length) return;
165
-
166
- // table_id + section_syntax + section_length + transport_stream_id + version + section_number + last_section_number
167
- offset += 8;
168
-
169
- while (offset + 4 <= payload.length - 4) { // -4 for CRC
170
- const programNum = (payload[offset] << 8) | payload[offset + 1];
171
- const pmtPid = ((payload[offset + 2] & 0x1F) << 8) | payload[offset + 3];
172
- if (programNum !== 0 && pmtPid !== 0) {
173
- this.pmtPid = pmtPid;
174
- this.debug.patFound = true;
175
- break;
176
- }
177
- offset += 4;
178
- }
179
- }
180
-
181
- parsePMT(payload) {
182
- if (payload.length < 16) return;
183
- let offset = payload[0] + 1; // pointer field
184
- if (offset + 12 > payload.length) return;
185
-
186
- // table_id
187
- offset++;
188
-
189
- const sectionLength = ((payload[offset] & 0x0F) << 8) | payload[offset + 1];
190
- offset += 2;
191
-
192
- // program_number(2) + version(1) + section_number(1) + last_section(1)
193
- offset += 5;
194
-
195
- // PCR_PID (2)
196
- offset += 2;
197
-
198
- // program_info_length
199
- if (offset + 2 > payload.length) return;
200
- const programInfoLength = ((payload[offset] & 0x0F) << 8) | payload[offset + 1];
201
- offset += 2 + programInfoLength;
202
-
203
- // Calculate end of stream entries (before CRC)
204
- const sectionEnd = Math.min(payload.length - 4, 1 + payload[0] + 3 + sectionLength - 4);
205
-
206
- while (offset + 5 <= sectionEnd) {
207
- const streamType = payload[offset];
208
- const elementaryPid = ((payload[offset + 1] & 0x1F) << 8) | payload[offset + 2];
209
- const esInfoLength = ((payload[offset + 3] & 0x0F) << 8) | payload[offset + 4];
210
-
211
- // Track ANY video stream we find (we'll validate codec support later)
212
- // Video types: 0x01=MPEG-1, 0x02=MPEG-2, 0x1B=H.264, 0x24=HEVC
213
- if (!this.videoPid && (streamType === 0x01 || streamType === 0x02 || streamType === 0x1B || streamType === 0x24)) {
214
- this.videoPid = elementaryPid;
215
- this.videoStreamType = streamType;
216
- this.debug.pmtFound = true;
217
- }
218
- // Track ANY audio stream we find (we'll validate codec support later)
219
- // Audio types: 0x03=MPEG-1, 0x04=MPEG-2, 0x0F=AAC, 0x11=AAC-LATM, 0x81=AC3, 0x87=EAC3
220
- else if (!this.audioPid && (streamType === 0x03 || streamType === 0x04 || streamType === 0x0F || streamType === 0x11 || streamType === 0x81 || streamType === 0x87)) {
221
- this.audioPid = elementaryPid;
222
- this.audioStreamType = streamType;
223
- }
224
-
225
- offset += 5 + esInfoLength;
226
- }
227
- }
228
-
229
- collectPES(payload, isStart, type) {
230
- const buffer = type === 'video' ? this.videoPesBuffer : this.audioPesBuffer;
231
- if (isStart) {
232
- if (type === 'audio') this.debug.audioPesStarts = (this.debug.audioPesStarts || 0) + 1;
233
- if (buffer.length > 0) this.processPES(this.concatenateBuffers(buffer), type);
234
- buffer.length = 0;
235
- }
236
- buffer.push(payload.slice());
237
- }
238
-
239
- processPES(pesData, type) {
240
- if (pesData.length < 9) return;
241
- if (pesData[0] !== 0 || pesData[1] !== 0 || pesData[2] !== 1) return;
242
- const flags = pesData[7];
243
- const headerDataLength = pesData[8];
244
- let pts = null, dts = null;
245
- if (flags & 0x80) pts = this.parsePTS(pesData, 9);
246
- if (flags & 0x40) dts = this.parsePTS(pesData, 14);
247
- const payload = pesData.subarray(9 + headerDataLength);
248
- if (type === 'video') this.processVideoPayload(payload, pts, dts);
249
- else this.processAudioPayload(payload, pts);
250
- }
251
-
252
- parsePTS(data, offset) {
253
- return ((data[offset] & 0x0E) << 29) |
254
- ((data[offset + 1]) << 22) |
255
- ((data[offset + 2] & 0xFE) << 14) |
256
- ((data[offset + 3]) << 7) |
257
- ((data[offset + 4] & 0xFE) >> 1);
258
- }
259
-
260
- processVideoPayload(payload, pts, dts) {
261
- const nalUnits = this.extractNALUnits(payload);
262
- if (nalUnits.length > 0 && pts !== null) {
263
- this.videoAccessUnits.push({ nalUnits, pts, dts: dts !== null ? dts : pts });
264
- this.videoPts.push(pts);
265
- this.videoDts.push(dts !== null ? dts : pts);
266
- }
267
- }
268
-
269
- extractNALUnits(data) {
270
- const nalUnits = [];
271
- let i = 0;
272
- while (i < data.length - 3) {
273
- if (data[i] === 0 && data[i + 1] === 0) {
274
- let startCodeLen = 0;
275
- if (data[i + 2] === 1) startCodeLen = 3;
276
- else if (data[i + 2] === 0 && i + 3 < data.length && data[i + 3] === 1) startCodeLen = 4;
277
- if (startCodeLen > 0) {
278
- let end = i + startCodeLen;
279
- while (end < data.length - 2) {
280
- if (data[end] === 0 && data[end + 1] === 0 &&
281
- (data[end + 2] === 1 || (data[end + 2] === 0 && end + 3 < data.length && data[end + 3] === 1))) break;
282
- end++;
283
- }
284
- if (end >= data.length - 2) end = data.length;
285
- const nalUnit = data.subarray(i + startCodeLen, end);
286
- if (nalUnit.length > 0) nalUnits.push(nalUnit);
287
- i = end;
288
- continue;
289
- }
290
- }
291
- i++;
292
- }
293
- return nalUnits;
294
- }
295
-
296
- processAudioPayload(payload, pts) {
297
- const frames = this.extractADTSFrames(payload);
298
-
299
- // Debug: track audio PES processing
300
- this.debug.audioPesCount = (this.debug.audioPesCount || 0) + 1;
301
- this.debug.audioFramesInPes = (this.debug.audioFramesInPes || 0) + frames.length;
302
-
303
- // Use provided PTS or continue from last known PTS
304
- if (pts !== null) {
305
- this.lastAudioPts = pts;
306
- } else if (this.lastAudioPts !== null) {
307
- pts = this.lastAudioPts;
308
- } else {
309
- // No PTS available yet, skip these frames
310
- this.debug.audioSkipped = (this.debug.audioSkipped || 0) + frames.length;
311
- return;
312
- }
313
-
314
- // Calculate PTS increment based on detected sample rate (or default 48000)
315
- const sampleRate = this.audioSampleRate || 48000;
316
- const ptsIncrement = Math.round(1024 * 90000 / sampleRate);
317
-
318
- for (const frame of frames) {
319
- this.audioAccessUnits.push({ data: frame.data, pts });
320
- this.audioPts.push(pts);
321
- pts += ptsIncrement;
322
- this.lastAudioPts = pts;
323
- }
324
- }
325
-
326
- extractADTSFrames(data) {
327
- // ADTS sample rate table
328
- const SAMPLE_RATES = [96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350];
329
-
330
- const frames = [];
331
- let i = 0;
332
-
333
- // Check for leftover partial frame from previous PES
334
- if (this.adtsPartial && this.adtsPartial.length > 0) {
335
- const combined = new Uint8Array(this.adtsPartial.length + data.length);
336
- combined.set(this.adtsPartial);
337
- combined.set(data, this.adtsPartial.length);
338
- data = combined;
339
- this.adtsPartial = null;
340
- }
341
-
342
- while (i < data.length - 7) {
343
- if (data[i] === 0xFF && (data[i + 1] & 0xF0) === 0xF0) {
344
- const protectionAbsent = data[i + 1] & 0x01;
345
- const frameLength = ((data[i + 3] & 0x03) << 11) | (data[i + 4] << 3) | ((data[i + 5] & 0xE0) >> 5);
346
-
347
- // Extract sample rate and channel config from first valid frame
348
- if (!this.audioSampleRate && frameLength > 0) {
349
- const samplingFreqIndex = ((data[i + 2] & 0x3C) >> 2);
350
- const channelConfig = ((data[i + 2] & 0x01) << 2) | ((data[i + 3] & 0xC0) >> 6);
351
- if (samplingFreqIndex < SAMPLE_RATES.length) {
352
- this.audioSampleRate = SAMPLE_RATES[samplingFreqIndex];
353
- this.audioChannels = channelConfig;
354
- }
355
- }
356
-
357
- if (frameLength > 0) {
358
- if (i + frameLength <= data.length) {
359
- const headerSize = protectionAbsent ? 7 : 9;
360
- frames.push({ header: data.subarray(i, i + headerSize), data: data.subarray(i + headerSize, i + frameLength) });
361
- i += frameLength;
362
- continue;
363
- } else {
364
- this.adtsPartial = data.slice(i);
365
- break;
366
- }
367
- }
368
- }
369
- i++;
370
- }
371
- return frames;
372
- }
373
-
374
- concatenateBuffers(buffers) {
375
- const totalLength = buffers.reduce((sum, b) => sum + b.length, 0);
376
- const result = new Uint8Array(totalLength);
377
- let offset = 0;
378
- for (const buf of buffers) { result.set(buf, offset); offset += buf.length; }
379
- return result;
380
- }
381
-
382
- finalize() {
383
- if (this.videoPesBuffer.length > 0) this.processPES(this.concatenateBuffers(this.videoPesBuffer), 'video');
384
- if (this.audioPesBuffer.length > 0) this.processPES(this.concatenateBuffers(this.audioPesBuffer), 'audio');
385
-
386
- // Normalize timestamps so both audio and video start at 0
387
- // This fixes A/V sync issues when streams have different start times
388
- this.normalizeTimestamps();
389
- }
390
-
391
- normalizeTimestamps() {
392
- // Find the minimum timestamp across all streams
393
- let minPts = Infinity;
394
-
395
- if (this.videoPts.length > 0) {
396
- minPts = Math.min(minPts, Math.min(...this.videoPts));
397
- }
398
- if (this.audioPts.length > 0) {
399
- minPts = Math.min(minPts, Math.min(...this.audioPts));
400
- }
401
-
402
- // If no valid timestamps, nothing to normalize
403
- if (minPts === Infinity || minPts === 0) return;
404
-
405
- // Subtract minimum from all timestamps
406
- for (let i = 0; i < this.videoPts.length; i++) {
407
- this.videoPts[i] -= minPts;
408
- }
409
- for (let i = 0; i < this.videoDts.length; i++) {
410
- this.videoDts[i] -= minPts;
411
- }
412
- for (let i = 0; i < this.audioPts.length; i++) {
413
- this.audioPts[i] -= minPts;
414
- }
415
-
416
- // Also update the access units
417
- for (const au of this.videoAccessUnits) {
418
- au.pts -= minPts;
419
- au.dts -= minPts;
420
- }
421
- for (const au of this.audioAccessUnits) {
422
- au.pts -= minPts;
423
- }
424
-
425
- this.debug.timestampOffset = minPts;
426
- this.debug.timestampNormalized = true;
427
- }
428
- }
429
-
430
- // ============================================
431
- // MP4 BUILDER
432
- // ============================================
433
- // Parse H.264 SPS to extract video dimensions
434
- function parseSPS(sps) {
435
- // Default fallback
436
- const result = { width: 1920, height: 1080 };
437
- if (!sps || sps.length < 4) return result;
438
-
439
- // Skip NAL header byte, start at profile_idc
440
- let offset = 1;
441
- const profile = sps[offset++];
442
- offset++; // constraint flags
443
- offset++; // level_idc
444
-
445
- // Exponential-Golomb decoder
446
- let bitPos = offset * 8;
447
- const getBit = () => (sps[Math.floor(bitPos / 8)] >> (7 - (bitPos++ % 8))) & 1;
448
- const readUE = () => {
449
- let zeros = 0;
450
- while (bitPos < sps.length * 8 && getBit() === 0) zeros++;
451
- let val = (1 << zeros) - 1;
452
- for (let i = 0; i < zeros; i++) val += getBit() << (zeros - 1 - i);
453
- return val;
454
- };
455
- const readSE = () => {
456
- const val = readUE();
457
- return (val & 1) ? (val + 1) >> 1 : -(val >> 1);
458
- };
459
-
460
- try {
461
- readUE(); // seq_parameter_set_id
462
-
463
- // High profile needs chroma_format_idc parsing
464
- if (profile === 100 || profile === 110 || profile === 122 || profile === 244 ||
465
- profile === 44 || profile === 83 || profile === 86 || profile === 118 || profile === 128) {
466
- const chromaFormat = readUE();
467
- if (chromaFormat === 3) getBit(); // separate_colour_plane_flag
468
- readUE(); // bit_depth_luma_minus8
469
- readUE(); // bit_depth_chroma_minus8
470
- getBit(); // qpprime_y_zero_transform_bypass_flag
471
- if (getBit()) { // seq_scaling_matrix_present_flag
472
- for (let i = 0; i < (chromaFormat !== 3 ? 8 : 12); i++) {
473
- if (getBit()) { // scaling_list_present
474
- const size = i < 6 ? 16 : 64;
475
- for (let j = 0; j < size; j++) readSE();
476
- }
477
- }
478
- }
479
- }
480
-
481
- readUE(); // log2_max_frame_num_minus4
482
- const pocType = readUE();
483
- if (pocType === 0) {
484
- readUE(); // log2_max_pic_order_cnt_lsb_minus4
485
- } else if (pocType === 1) {
486
- getBit(); // delta_pic_order_always_zero_flag
487
- readSE(); // offset_for_non_ref_pic
488
- readSE(); // offset_for_top_to_bottom_field
489
- const numRefFrames = readUE();
490
- for (let i = 0; i < numRefFrames; i++) readSE();
491
- }
492
-
493
- readUE(); // max_num_ref_frames
494
- getBit(); // gaps_in_frame_num_value_allowed_flag
495
-
496
- const picWidthMbs = readUE() + 1;
497
- const picHeightMapUnits = readUE() + 1;
498
- const frameMbsOnly = getBit();
499
-
500
- if (!frameMbsOnly) getBit(); // mb_adaptive_frame_field_flag
501
- getBit(); // direct_8x8_inference_flag
502
-
503
- let cropLeft = 0, cropRight = 0, cropTop = 0, cropBottom = 0;
504
- if (getBit()) { // frame_cropping_flag
505
- cropLeft = readUE();
506
- cropRight = readUE();
507
- cropTop = readUE();
508
- cropBottom = readUE();
509
- }
510
-
511
- // Calculate dimensions
512
- const mbWidth = 16;
513
- const mbHeight = frameMbsOnly ? 16 : 32;
514
- result.width = picWidthMbs * mbWidth - (cropLeft + cropRight) * 2;
515
- result.height = (2 - frameMbsOnly) * picHeightMapUnits * mbHeight / (frameMbsOnly ? 1 : 2) - (cropTop + cropBottom) * 2;
516
-
517
- } catch (e) {
518
- // Fall back to defaults on parse error
519
- }
520
-
521
- return result;
522
- }
523
-
524
- class MP4Builder {
525
- constructor(parser) {
526
- this.parser = parser;
527
- this.videoTimescale = 90000;
528
- // Use detected sample rate or default to 48000
529
- this.audioTimescale = parser.audioSampleRate || 48000;
530
- this.audioSampleDuration = 1024;
531
- this.videoDimensions = null;
532
- }
533
-
534
- getVideoDimensions() {
535
- if (this.videoDimensions) return this.videoDimensions;
536
-
537
- // Find SPS NAL unit
538
- for (const au of this.parser.videoAccessUnits) {
539
- for (const nalUnit of au.nalUnits) {
540
- const nalType = nalUnit[0] & 0x1F;
541
- if (nalType === 7) {
542
- this.videoDimensions = parseSPS(nalUnit);
543
- return this.videoDimensions;
544
- }
545
- }
546
- }
547
-
548
- // Fallback
549
- this.videoDimensions = { width: 1920, height: 1080 };
550
- return this.videoDimensions;
551
- }
552
-
553
- build() {
554
- const mdatContent = this.buildMdatContent();
555
- const moov = this.buildMoov(mdatContent.byteLength);
556
- const ftyp = this.buildFtyp();
557
- const mdatOffset = ftyp.byteLength + moov.byteLength + 8;
558
- this.updateChunkOffsets(moov, mdatOffset);
559
- const mdat = createBox('mdat', mdatContent);
560
- const result = new Uint8Array(ftyp.byteLength + moov.byteLength + mdat.byteLength);
561
- result.set(ftyp, 0);
562
- result.set(moov, ftyp.byteLength);
563
- result.set(mdat, ftyp.byteLength + moov.byteLength);
564
- return result;
565
- }
566
-
567
- buildFtyp() {
568
- const data = new Uint8Array(16);
569
- data[0] = 'i'.charCodeAt(0); data[1] = 's'.charCodeAt(0); data[2] = 'o'.charCodeAt(0); data[3] = 'm'.charCodeAt(0);
570
- data[7] = 1;
571
- data[8] = 'i'.charCodeAt(0); data[9] = 's'.charCodeAt(0); data[10] = 'o'.charCodeAt(0); data[11] = 'm'.charCodeAt(0);
572
- data[12] = 'a'.charCodeAt(0); data[13] = 'v'.charCodeAt(0); data[14] = 'c'.charCodeAt(0); data[15] = '1'.charCodeAt(0);
573
- return createBox('ftyp', data);
574
- }
575
-
576
- buildMdatContent() {
577
- const chunks = [];
578
- this.videoSampleSizes = [];
579
- this.videoSampleOffsets = [];
580
- let currentOffset = 0;
581
- for (const au of this.parser.videoAccessUnits) {
582
- this.videoSampleOffsets.push(currentOffset);
583
- let sampleSize = 0;
584
- for (const nalUnit of au.nalUnits) {
585
- const prefixed = new Uint8Array(4 + nalUnit.length);
586
- new DataView(prefixed.buffer).setUint32(0, nalUnit.length);
587
- prefixed.set(nalUnit, 4);
588
- chunks.push(prefixed);
589
- sampleSize += prefixed.length;
590
- }
591
- this.videoSampleSizes.push(sampleSize);
592
- currentOffset += sampleSize;
593
- }
594
- this.videoChunkOffset = 0;
595
- this.audioChunkOffset = currentOffset;
596
- this.audioSampleSizes = [];
597
- for (const frame of this.parser.audioAccessUnits) {
598
- chunks.push(frame.data);
599
- this.audioSampleSizes.push(frame.data.length);
600
- currentOffset += frame.data.length;
601
- }
602
- const totalSize = chunks.reduce((sum, c) => sum + c.length, 0);
603
- const result = new Uint8Array(totalSize);
604
- let offset = 0;
605
- for (const chunk of chunks) { result.set(chunk, offset); offset += chunk.length; }
606
- return result;
607
- }
608
-
609
- buildMoov(mdatSize) {
610
- const mvhd = this.buildMvhd();
611
- const videoTrak = this.buildVideoTrak();
612
- const audioTrak = this.buildAudioTrak();
613
- const udta = this.buildUdta();
614
- return createBox('moov', mvhd, videoTrak, audioTrak, udta);
615
- }
616
-
617
- buildUdta() {
618
- const toolName = 'toMp4.js';
619
- const toolBytes = new TextEncoder().encode(toolName);
620
- const dataBox = new Uint8Array(16 + toolBytes.length);
621
- const dataView = new DataView(dataBox.buffer);
622
- dataView.setUint32(0, 16 + toolBytes.length);
623
- dataBox[4] = 'd'.charCodeAt(0); dataBox[5] = 'a'.charCodeAt(0); dataBox[6] = 't'.charCodeAt(0); dataBox[7] = 'a'.charCodeAt(0);
624
- dataView.setUint32(8, 1); dataView.setUint32(12, 0);
625
- dataBox.set(toolBytes, 16);
626
- const tooBox = createBox('©too', dataBox);
627
- const ilst = createBox('ilst', tooBox);
628
- const hdlrData = new Uint8Array(21);
629
- hdlrData[4] = 'm'.charCodeAt(0); hdlrData[5] = 'd'.charCodeAt(0); hdlrData[6] = 'i'.charCodeAt(0); hdlrData[7] = 'r'.charCodeAt(0);
630
- const metaHdlr = createFullBox('hdlr', 0, 0, hdlrData);
631
- const meta = createFullBox('meta', 0, 0, new Uint8Array(0), metaHdlr, ilst);
632
- return createBox('udta', meta);
633
- }
634
-
635
- buildMvhd() {
636
- const data = new Uint8Array(96);
637
- const view = new DataView(data.buffer);
638
- view.setUint32(8, this.videoTimescale);
639
- view.setUint32(12, this.calculateVideoDuration());
640
- view.setUint32(16, 0x00010000);
641
- view.setUint16(20, 0x0100);
642
- view.setUint32(32, 0x00010000);
643
- view.setUint32(48, 0x00010000);
644
- view.setUint32(64, 0x40000000);
645
- view.setUint32(92, 258);
646
- return createFullBox('mvhd', 0, 0, data);
647
- }
648
-
649
- calculateVideoDuration() {
650
- if (this.parser.videoDts.length < 2) return 0;
651
- const firstDts = this.parser.videoDts[0];
652
- const lastDts = this.parser.videoDts[this.parser.videoDts.length - 1];
653
- const avgDuration = (lastDts - firstDts) / (this.parser.videoDts.length - 1);
654
- return Math.round(lastDts - firstDts + avgDuration);
655
- }
656
-
657
- buildVideoTrak() {
658
- const edts = this.buildVideoEdts();
659
- if (edts) {
660
- return createBox('trak', this.buildVideoTkhd(), edts, this.buildVideoMdia());
661
- }
662
- return createBox('trak', this.buildVideoTkhd(), this.buildVideoMdia());
663
- }
664
-
665
- // Build edit list to fix A/V sync
666
- // The elst box tells the player where media actually starts
667
- buildVideoEdts() {
668
- // Get first video PTS (presentation time)
669
- if (this.parser.videoAccessUnits.length === 0) return null;
670
-
671
- const firstAU = this.parser.videoAccessUnits[0];
672
- const firstVideoPts = firstAU.pts;
673
-
674
- // If video starts at 0, no edit needed
675
- if (firstVideoPts === 0) return null;
676
-
677
- // Create elst box: tells player to start at firstVideoPts in the media
678
- // This compensates for CTTS offset making video appear to start late
679
- const duration = this.calculateVideoDuration();
680
- const mediaTime = firstVideoPts; // Start playback at this media time
681
-
682
- // elst entry: segment_duration (4), media_time (4), media_rate (4)
683
- const elstData = new Uint8Array(16);
684
- const view = new DataView(elstData.buffer);
685
- view.setUint32(0, 1); // entry count
686
- view.setUint32(4, duration); // segment duration in movie timescale
687
- view.setInt32(8, mediaTime); // media time - where to start
688
- view.setUint16(12, 1); // media rate integer (1.0)
689
- view.setUint16(14, 0); // media rate fraction
690
-
691
- const elst = createFullBox('elst', 0, 0, elstData);
692
- return createBox('edts', elst);
693
- }
694
-
695
- buildVideoTkhd() {
696
- const { width, height } = this.getVideoDimensions();
697
- const data = new Uint8Array(80);
698
- const view = new DataView(data.buffer);
699
- view.setUint32(8, 256);
700
- view.setUint32(16, this.calculateVideoDuration());
701
- view.setUint16(32, 0);
702
- view.setUint32(36, 0x00010000);
703
- view.setUint32(52, 0x00010000);
704
- view.setUint32(68, 0x40000000);
705
- view.setUint32(72, width << 16);
706
- view.setUint32(76, height << 16);
707
- return createFullBox('tkhd', 0, 3, data);
708
- }
709
-
710
- buildVideoMdia() {
711
- return createBox('mdia', this.buildVideoMdhd(), this.buildVideoHdlr(), this.buildVideoMinf());
712
- }
713
-
714
- buildVideoMdhd() {
715
- const data = new Uint8Array(20);
716
- const view = new DataView(data.buffer);
717
- view.setUint32(8, this.videoTimescale);
718
- view.setUint32(12, this.calculateVideoDuration());
719
- view.setUint16(16, 0x55C4);
720
- return createFullBox('mdhd', 0, 0, data);
721
- }
722
-
723
- buildVideoHdlr() {
724
- const data = new Uint8Array(21);
725
- data[4] = 'v'.charCodeAt(0); data[5] = 'i'.charCodeAt(0); data[6] = 'd'.charCodeAt(0); data[7] = 'e'.charCodeAt(0);
726
- return createFullBox('hdlr', 0, 0, data);
727
- }
728
-
729
- buildVideoMinf() {
730
- return createBox('minf', this.buildVmhd(), this.buildDinf(), this.buildVideoStbl());
731
- }
732
-
733
- buildVmhd() { return createFullBox('vmhd', 0, 1, new Uint8Array(8)); }
734
-
735
- buildDinf() {
736
- const urlBox = createFullBox('url ', 0, 1, new Uint8Array(0));
737
- const dref = createFullBox('dref', 0, 0, new Uint8Array([0, 0, 0, 1]), urlBox);
738
- return createBox('dinf', dref);
739
- }
740
-
741
- buildVideoStbl() {
742
- const boxes = [this.buildVideoStsd(), this.buildVideoStts(), this.buildVideoCtts(), this.buildVideoStsc(), this.buildVideoStsz(), this.buildVideoStco()];
743
- const stss = this.buildVideoStss();
744
- if (stss) boxes.push(stss);
745
- return createBox('stbl', ...boxes);
746
- }
747
-
748
- buildVideoStsd() {
749
- const { width, height } = this.getVideoDimensions();
750
- const avcC = this.buildAvcC();
751
- const btrtData = new Uint8Array(12);
752
- const btrtView = new DataView(btrtData.buffer);
753
- btrtView.setUint32(4, 2000000); btrtView.setUint32(8, 2000000);
754
- const btrt = createBox('btrt', btrtData);
755
- const paspData = new Uint8Array(8);
756
- const paspView = new DataView(paspData.buffer);
757
- paspView.setUint32(0, 1); paspView.setUint32(4, 1);
758
- const pasp = createBox('pasp', paspData);
759
- const avc1Data = new Uint8Array(78 + avcC.byteLength + btrt.byteLength + pasp.byteLength);
760
- const view = new DataView(avc1Data.buffer);
761
- view.setUint16(6, 1); view.setUint16(24, width); view.setUint16(26, height);
762
- view.setUint32(28, 0x00480000); view.setUint32(32, 0x00480000);
763
- view.setUint16(40, 1); view.setUint16(74, 0x0018); view.setInt16(76, -1);
764
- avc1Data.set(avcC, 78); avc1Data.set(btrt, 78 + avcC.byteLength); avc1Data.set(pasp, 78 + avcC.byteLength + btrt.byteLength);
765
- const avc1 = createBox('avc1', avc1Data);
766
- const stsdHeader = new Uint8Array(4);
767
- new DataView(stsdHeader.buffer).setUint32(0, 1);
768
- return createFullBox('stsd', 0, 0, stsdHeader, avc1);
769
- }
770
-
771
- buildAvcC() {
772
- let sps = null, pps = null;
773
- for (const au of this.parser.videoAccessUnits) {
774
- for (const nalUnit of au.nalUnits) {
775
- const nalType = nalUnit[0] & 0x1F;
776
- if (nalType === 7 && !sps) sps = nalUnit;
777
- if (nalType === 8 && !pps) pps = nalUnit;
778
- if (sps && pps) break;
779
- }
780
- if (sps && pps) break;
781
- }
782
- if (!sps || !pps) {
783
- sps = new Uint8Array([0x67, 0x64, 0x00, 0x1f, 0xac, 0xd9, 0x40, 0x78, 0x02, 0x27, 0xe5, 0xc0, 0x44, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0xf0, 0x3c, 0x60, 0xc6, 0x58]);
784
- pps = new Uint8Array([0x68, 0xeb, 0xe3, 0xcb, 0x22, 0xc0]);
785
- }
786
- const data = new Uint8Array(11 + sps.length + pps.length);
787
- const view = new DataView(data.buffer);
788
- data[0] = 1; data[1] = sps[1]; data[2] = sps[2]; data[3] = sps[3]; data[4] = 0xFF; data[5] = 0xE1;
789
- view.setUint16(6, sps.length); data.set(sps, 8);
790
- data[8 + sps.length] = 1; view.setUint16(9 + sps.length, pps.length); data.set(pps, 11 + sps.length);
791
- return createBox('avcC', data);
792
- }
793
-
794
- buildVideoStts() {
795
- const entries = [];
796
- let lastDuration = -1, count = 0;
797
- for (let i = 0; i < this.parser.videoDts.length; i++) {
798
- const duration = i < this.parser.videoDts.length - 1
799
- ? this.parser.videoDts[i + 1] - this.parser.videoDts[i]
800
- : (entries.length > 0 ? entries[entries.length - 1].duration : 3003);
801
- if (duration === lastDuration) count++;
802
- else { if (count > 0) entries.push({ count, duration: lastDuration }); lastDuration = duration; count = 1; }
803
- }
804
- if (count > 0) entries.push({ count, duration: lastDuration });
805
- const data = new Uint8Array(4 + entries.length * 8);
806
- const view = new DataView(data.buffer);
807
- view.setUint32(0, entries.length);
808
- for (let i = 0; i < entries.length; i++) { view.setUint32(4 + i * 8, entries[i].count); view.setUint32(8 + i * 8, entries[i].duration); }
809
- return createFullBox('stts', 0, 0, data);
810
- }
811
-
812
- buildVideoCtts() {
813
- const entries = [];
814
- for (const au of this.parser.videoAccessUnits) {
815
- const cts = au.pts - au.dts;
816
- if (entries.length > 0 && entries[entries.length - 1].offset === cts) entries[entries.length - 1].count++;
817
- else entries.push({ count: 1, offset: cts });
818
- }
819
- const data = new Uint8Array(4 + entries.length * 8);
820
- const view = new DataView(data.buffer);
821
- view.setUint32(0, entries.length);
822
- for (let i = 0; i < entries.length; i++) { view.setUint32(4 + i * 8, entries[i].count); view.setUint32(8 + i * 8, entries[i].offset); }
823
- return createFullBox('ctts', 0, 0, data);
824
- }
825
-
826
- buildVideoStsc() {
827
- const data = new Uint8Array(4 + 12);
828
- const view = new DataView(data.buffer);
829
- view.setUint32(0, 1); view.setUint32(4, 1); view.setUint32(8, this.videoSampleSizes.length); view.setUint32(12, 1);
830
- return createFullBox('stsc', 0, 0, data);
831
- }
832
-
833
- buildVideoStsz() {
834
- const data = new Uint8Array(8 + this.videoSampleSizes.length * 4);
835
- const view = new DataView(data.buffer);
836
- view.setUint32(0, 0); view.setUint32(4, this.videoSampleSizes.length);
837
- for (let i = 0; i < this.videoSampleSizes.length; i++) view.setUint32(8 + i * 4, this.videoSampleSizes[i]);
838
- return createFullBox('stsz', 0, 0, data);
839
- }
840
-
841
- buildVideoStco() {
842
- const data = new Uint8Array(8);
843
- const view = new DataView(data.buffer);
844
- view.setUint32(0, 1); view.setUint32(4, 0);
845
- return createFullBox('stco', 0, 0, data);
846
- }
847
-
848
- buildVideoStss() {
849
- const keyframes = [];
850
- for (let i = 0; i < this.parser.videoAccessUnits.length; i++) {
851
- for (const nalUnit of this.parser.videoAccessUnits[i].nalUnits) {
852
- if ((nalUnit[0] & 0x1F) === 5) { keyframes.push(i + 1); break; }
853
- }
854
- }
855
- if (keyframes.length === 0) return null;
856
- const data = new Uint8Array(4 + keyframes.length * 4);
857
- const view = new DataView(data.buffer);
858
- view.setUint32(0, keyframes.length);
859
- for (let i = 0; i < keyframes.length; i++) view.setUint32(4 + i * 4, keyframes[i]);
860
- return createFullBox('stss', 0, 0, data);
861
- }
862
-
863
- buildAudioTrak() {
864
- const edts = this.buildAudioEdts();
865
- if (edts) {
866
- return createBox('trak', this.buildAudioTkhd(), edts, this.buildAudioMdia());
867
- }
868
- return createBox('trak', this.buildAudioTkhd(), this.buildAudioMdia());
869
- }
870
-
871
- // Build edit list for audio to sync with video
872
- buildAudioEdts() {
873
- if (this.parser.audioPts.length === 0) return null;
874
-
875
- const firstAudioPts = this.parser.audioPts[0];
876
-
877
- // If audio starts at 0, no edit needed
878
- if (firstAudioPts === 0) return null;
879
-
880
- // Convert audio PTS (90kHz) to audio timescale (48kHz)
881
- const mediaTime = Math.round(firstAudioPts * this.audioTimescale / 90000);
882
- const duration = this.audioSampleSizes.length * this.audioSampleDuration;
883
-
884
- const elstData = new Uint8Array(16);
885
- const view = new DataView(elstData.buffer);
886
- view.setUint32(0, 1); // entry count
887
- view.setUint32(4, Math.round(duration * this.videoTimescale / this.audioTimescale)); // segment duration in movie timescale
888
- view.setInt32(8, mediaTime); // media time
889
- view.setUint16(12, 1); // media rate integer
890
- view.setUint16(14, 0); // media rate fraction
891
-
892
- const elst = createFullBox('elst', 0, 0, elstData);
893
- return createBox('edts', elst);
894
- }
895
-
896
- buildAudioTkhd() {
897
- const data = new Uint8Array(80);
898
- const view = new DataView(data.buffer);
899
- view.setUint32(8, 257);
900
- const audioDuration = this.audioSampleSizes.length * this.audioSampleDuration;
901
- view.setUint32(16, Math.round(audioDuration * this.videoTimescale / this.audioTimescale));
902
- view.setUint16(32, 0x0100);
903
- view.setUint32(36, 0x00010000); view.setUint32(52, 0x00010000); view.setUint32(68, 0x40000000);
904
- return createFullBox('tkhd', 0, 3, data);
905
- }
906
-
907
- buildAudioMdia() { return createBox('mdia', this.buildAudioMdhd(), this.buildAudioHdlr(), this.buildAudioMinf()); }
908
-
909
- buildAudioMdhd() {
910
- const data = new Uint8Array(20);
911
- const view = new DataView(data.buffer);
912
- view.setUint32(8, this.audioTimescale);
913
- view.setUint32(12, this.audioSampleSizes.length * this.audioSampleDuration);
914
- view.setUint16(16, 0x55C4);
915
- return createFullBox('mdhd', 0, 0, data);
916
- }
917
-
918
- buildAudioHdlr() {
919
- const data = new Uint8Array(21);
920
- data[4] = 's'.charCodeAt(0); data[5] = 'o'.charCodeAt(0); data[6] = 'u'.charCodeAt(0); data[7] = 'n'.charCodeAt(0);
921
- return createFullBox('hdlr', 0, 0, data);
922
- }
923
-
924
- buildAudioMinf() { return createBox('minf', this.buildSmhd(), this.buildDinf(), this.buildAudioStbl()); }
925
- buildSmhd() { return createFullBox('smhd', 0, 0, new Uint8Array(4)); }
926
-
927
- buildAudioStbl() {
928
- return createBox('stbl', this.buildAudioStsd(), this.buildAudioStts(), this.buildAudioStsc(), this.buildAudioStsz(), this.buildAudioStco());
929
- }
930
-
931
- buildAudioStsd() {
932
- const esds = this.buildEsds();
933
- const channels = this.parser.audioChannels || 2;
934
- const mp4aData = new Uint8Array(28 + esds.byteLength);
935
- const view = new DataView(mp4aData.buffer);
936
- view.setUint16(6, 1);
937
- view.setUint16(16, channels); // channel count
938
- view.setUint16(18, 16); // sample size
939
- view.setUint32(24, this.audioTimescale << 16);
940
- mp4aData.set(esds, 28);
941
- const mp4a = createBox('mp4a', mp4aData);
942
- const stsdHeader = new Uint8Array(4);
943
- new DataView(stsdHeader.buffer).setUint32(0, 1);
944
- return createFullBox('stsd', 0, 0, stsdHeader, mp4a);
945
- }
946
-
947
- buildEsds() {
948
- // Build AudioSpecificConfig based on detected parameters
949
- const SAMPLE_RATE_INDEX = {
950
- 96000: 0, 88200: 1, 64000: 2, 48000: 3, 44100: 4, 32000: 5,
951
- 24000: 6, 22050: 7, 16000: 8, 12000: 9, 11025: 10, 8000: 11, 7350: 12
952
- };
953
-
954
- const sampleRate = this.audioTimescale;
955
- const channels = this.parser.audioChannels || 2;
956
- const samplingFreqIndex = SAMPLE_RATE_INDEX[sampleRate] ?? 4; // Default to 44100
957
-
958
- // AudioSpecificConfig: 5 bits objType + 4 bits freqIndex + 4 bits channels + 3 bits padding
959
- // AAC-LC = 2
960
- const audioConfig = ((2 << 11) | (samplingFreqIndex << 7) | (channels << 3)) & 0xFFFF;
961
- const audioConfigHigh = (audioConfig >> 8) & 0xFF;
962
- const audioConfigLow = audioConfig & 0xFF;
963
-
964
- const data = new Uint8Array([
965
- 0x00, 0x00, 0x00, 0x00, // version/flags
966
- 0x03, 0x19, // ES_Descriptor tag + length
967
- 0x00, 0x02, // ES_ID
968
- 0x00, // flags
969
- 0x04, 0x11, // DecoderConfigDescriptor tag + length
970
- 0x40, // objectTypeIndication (AAC)
971
- 0x15, // streamType (audio) + upstream + reserved
972
- 0x00, 0x00, 0x00, // bufferSizeDB
973
- 0x00, 0x01, 0xF4, 0x00, // maxBitrate
974
- 0x00, 0x01, 0xF4, 0x00, // avgBitrate
975
- 0x05, 0x02, // DecoderSpecificInfo tag + length
976
- audioConfigHigh, audioConfigLow, // AudioSpecificConfig
977
- 0x06, 0x01, 0x02 // SLConfigDescriptor
978
- ]);
979
- return createBox('esds', data);
980
- }
981
-
982
- buildAudioStts() {
983
- // Use actual PTS differences for accurate timing (like video does)
984
- const audioPts = this.parser.audioPts;
985
-
986
- // If we don't have PTS data, fall back to constant duration
987
- if (audioPts.length < 2) {
988
- const data = new Uint8Array(12);
989
- const view = new DataView(data.buffer);
990
- view.setUint32(0, 1);
991
- view.setUint32(4, this.audioSampleSizes.length);
992
- view.setUint32(8, this.audioSampleDuration);
993
- return createFullBox('stts', 0, 0, data);
994
- }
995
-
996
- // Convert 90kHz PTS to audio timescale (48kHz)
997
- // PTS is in 90kHz, we need durations in 48kHz
998
- const entries = [];
999
- let lastDuration = -1, count = 0;
1000
-
1001
- for (let i = 0; i < audioPts.length; i++) {
1002
- let duration;
1003
- if (i < audioPts.length - 1) {
1004
- // Calculate actual duration from PTS difference
1005
- const ptsDiff = audioPts[i + 1] - audioPts[i];
1006
- // Convert from 90kHz to 48kHz: duration = ptsDiff * 48000 / 90000
1007
- duration = Math.round(ptsDiff * this.audioTimescale / 90000);
1008
- } else {
1009
- // Last frame - use standard AAC frame duration
1010
- duration = this.audioSampleDuration;
1011
- }
1012
-
1013
- // Clamp to reasonable values (handle discontinuities)
1014
- if (duration <= 0 || duration > this.audioSampleDuration * 2) {
1015
- duration = this.audioSampleDuration;
1016
- }
1017
-
1018
- if (duration === lastDuration) {
1019
- count++;
1020
- } else {
1021
- if (count > 0) entries.push({ count, duration: lastDuration });
1022
- lastDuration = duration;
1023
- count = 1;
1024
- }
1025
- }
1026
- if (count > 0) entries.push({ count, duration: lastDuration });
1027
-
1028
- const data = new Uint8Array(4 + entries.length * 8);
1029
- const view = new DataView(data.buffer);
1030
- view.setUint32(0, entries.length);
1031
- for (let i = 0; i < entries.length; i++) {
1032
- view.setUint32(4 + i * 8, entries[i].count);
1033
- view.setUint32(8 + i * 8, entries[i].duration);
1034
- }
1035
- return createFullBox('stts', 0, 0, data);
1036
- }
1037
-
1038
- buildAudioStsc() {
1039
- const data = new Uint8Array(4 + 12);
1040
- const view = new DataView(data.buffer);
1041
- view.setUint32(0, 1); view.setUint32(4, 1); view.setUint32(8, this.audioSampleSizes.length); view.setUint32(12, 1);
1042
- return createFullBox('stsc', 0, 0, data);
1043
- }
1044
-
1045
- buildAudioStsz() {
1046
- const data = new Uint8Array(8 + this.audioSampleSizes.length * 4);
1047
- const view = new DataView(data.buffer);
1048
- view.setUint32(0, 0); view.setUint32(4, this.audioSampleSizes.length);
1049
- for (let i = 0; i < this.audioSampleSizes.length; i++) view.setUint32(8 + i * 4, this.audioSampleSizes[i]);
1050
- return createFullBox('stsz', 0, 0, data);
1051
- }
1052
-
1053
- buildAudioStco() {
1054
- const data = new Uint8Array(8);
1055
- const view = new DataView(data.buffer);
1056
- view.setUint32(0, 1); view.setUint32(4, 0);
1057
- return createFullBox('stco', 0, 0, data);
1058
- }
1059
-
1060
- updateChunkOffsets(moov, mdatOffset) { this.updateStcoInBox(moov, mdatOffset, 0); }
1061
-
1062
- updateStcoInBox(data, mdatOffset, trackIndex) {
1063
- let offset = 8;
1064
- while (offset < data.byteLength - 8) {
1065
- const view = new DataView(data.buffer, data.byteOffset + offset);
1066
- const size = view.getUint32(0);
1067
- const type = String.fromCharCode(data[offset+4], data[offset+5], data[offset+6], data[offset+7]);
1068
- if (size < 8 || offset + size > data.byteLength) break;
1069
- if (type === 'stco') {
1070
- view.setUint32(16, trackIndex === 0 ? mdatOffset + this.videoChunkOffset : mdatOffset + this.audioChunkOffset);
1071
- trackIndex++;
1072
- } else if (['moov', 'trak', 'mdia', 'minf', 'stbl'].includes(type)) {
1073
- trackIndex = this.updateStcoInBox(data.subarray(offset, offset + size), mdatOffset, trackIndex);
1074
- }
1075
- offset += size;
1076
- }
1077
- return trackIndex;
1078
- }
1079
- }
1080
-
1081
- /**
1082
- * Get codec info for a stream type
1083
- */
1084
- function getCodecInfo(streamType) {
1085
- return STREAM_TYPES[streamType] || { name: `Unknown (0x${streamType?.toString(16)})`, supported: false };
1086
- }
1087
49
 
1088
50
  /**
1089
51
  * Check if a video access unit contains a keyframe (IDR NAL unit)
@@ -1097,7 +59,9 @@
1097
59
  }
1098
60
 
1099
61
  /**
1100
- * Clip access units to a time range, snapping to keyframes
62
+ * Clip access units to a time range, snapping to keyframes for decode
63
+ * but using edit list for precise playback timing
64
+ *
1101
65
  * @param {Array} videoAUs - Video access units
1102
66
  * @param {Array} audioAUs - Audio access units
1103
67
  * @param {number} startTime - Start time in seconds
@@ -1109,34 +73,52 @@
1109
73
  const startPts = startTime * PTS_PER_SECOND;
1110
74
  const endPts = endTime * PTS_PER_SECOND;
1111
75
 
1112
- // Find keyframe at or before startTime
1113
- let startIdx = 0;
76
+ // Find keyframe at or before startTime (needed for decoding)
77
+ let keyframeIdx = 0;
1114
78
  for (let i = 0; i < videoAUs.length; i++) {
1115
79
  if (videoAUs[i].pts > startPts) break;
1116
- if (isKeyframe(videoAUs[i])) startIdx = i;
80
+ if (isKeyframe(videoAUs[i])) keyframeIdx = i;
1117
81
  }
1118
82
 
1119
- // Find first frame after endTime
83
+ // Find first frame at or after endTime
1120
84
  let endIdx = videoAUs.length;
1121
- for (let i = startIdx; i < videoAUs.length; i++) {
85
+ for (let i = keyframeIdx; i < videoAUs.length; i++) {
1122
86
  if (videoAUs[i].pts >= endPts) {
1123
87
  endIdx = i;
1124
88
  break;
1125
89
  }
1126
90
  }
1127
91
 
1128
- // Clip video
1129
- const clippedVideo = videoAUs.slice(startIdx, endIdx);
92
+ // Clip video starting from keyframe (for proper decoding)
93
+ const clippedVideo = videoAUs.slice(keyframeIdx, endIdx);
94
+
95
+ if (clippedVideo.length === 0) {
96
+ return {
97
+ video: [],
98
+ audio: [],
99
+ actualStartTime: startTime,
100
+ actualEndTime: endTime,
101
+ offset: 0,
102
+ preroll: 0
103
+ };
104
+ }
105
+
106
+ // Get PTS of keyframe and requested start
107
+ const keyframePts = clippedVideo[0].pts;
108
+ const lastFramePts = clippedVideo[clippedVideo.length - 1].pts;
1130
109
 
1131
- // Get actual PTS range from clipped video
1132
- const actualStartPts = clippedVideo.length > 0 ? clippedVideo[0].pts : 0;
1133
- const actualEndPts = clippedVideo.length > 0 ? clippedVideo[clippedVideo.length - 1].pts : 0;
110
+ // Pre-roll: time between keyframe and requested start
111
+ // This is the time the decoder needs to process but player shouldn't display
112
+ const prerollPts = Math.max(0, startPts - keyframePts);
1134
113
 
1135
- // Clip audio to match video time range
1136
- const clippedAudio = audioAUs.filter(au => au.pts >= actualStartPts && au.pts <= actualEndPts);
114
+ // Clip audio to the REQUESTED time range (not from keyframe)
115
+ // Audio doesn't need keyframe pre-roll
116
+ const audioStartPts = startPts;
117
+ const audioEndPts = Math.min(endPts, lastFramePts);
118
+ const clippedAudio = audioAUs.filter(au => au.pts >= audioStartPts && au.pts < audioEndPts);
1137
119
 
1138
- // Normalize timestamps so clip starts at 0
1139
- const offset = actualStartPts;
120
+ // Normalize all timestamps so keyframe starts at 0
121
+ const offset = keyframePts;
1140
122
  for (const au of clippedVideo) {
1141
123
  au.pts -= offset;
1142
124
  au.dts -= offset;
@@ -1148,9 +130,12 @@
1148
130
  return {
1149
131
  video: clippedVideo,
1150
132
  audio: clippedAudio,
1151
- actualStartTime: actualStartPts / PTS_PER_SECOND,
1152
- actualEndTime: actualEndPts / PTS_PER_SECOND,
1153
- offset
133
+ actualStartTime: keyframePts / PTS_PER_SECOND, // Where decode starts (keyframe)
134
+ actualEndTime: lastFramePts / PTS_PER_SECOND,
135
+ requestedStartTime: startTime, // Where playback should start
136
+ requestedEndTime: endTime,
137
+ offset,
138
+ preroll: prerollPts // Edit list will use this to skip pre-roll frames during playback
1154
139
  };
1155
140
  }
1156
141
 
@@ -1284,6 +269,9 @@
1284
269
 
1285
270
  log(`Processing...`, { phase: 'convert', percent: 70 });
1286
271
 
272
+ // Track preroll for edit list (used for precise clipping)
273
+ let clipPreroll = 0;
274
+
1287
275
  // Apply time range clipping if specified
1288
276
  if (options.startTime !== undefined || options.endTime !== undefined) {
1289
277
  const startTime = options.startTime || 0;
@@ -1298,25 +286,32 @@
1298
286
 
1299
287
  parser.videoAccessUnits = clipResult.video;
1300
288
  parser.audioAccessUnits = clipResult.audio;
289
+ clipPreroll = clipResult.preroll;
1301
290
 
1302
291
  // Update PTS arrays to match
1303
292
  parser.videoPts = clipResult.video.map(au => au.pts);
1304
293
  parser.videoDts = clipResult.video.map(au => au.dts);
1305
294
  parser.audioPts = clipResult.audio.map(au => au.pts);
1306
295
 
1307
- log(`Clipped: ${clipResult.actualStartTime.toFixed(2)}s - ${clipResult.actualEndTime.toFixed(2)}s (${clipResult.video.length} video, ${clipResult.audio.length} audio frames)`, { phase: 'convert', percent: 80 });
296
+ const prerollMs = (clipPreroll / 90).toFixed(0);
297
+ const endTimeStr = clipResult.requestedEndTime === Infinity ? 'end' : clipResult.requestedEndTime.toFixed(2) + 's';
298
+ const clipDuration = clipResult.requestedEndTime === Infinity
299
+ ? (clipResult.actualEndTime - clipResult.requestedStartTime).toFixed(2)
300
+ : (clipResult.requestedEndTime - clipResult.requestedStartTime).toFixed(2);
301
+ log(`Clipped: ${clipResult.requestedStartTime.toFixed(2)}s - ${endTimeStr} (${clipDuration}s, ${prerollMs}ms preroll)`, { phase: 'convert', percent: 80 });
1308
302
  }
1309
303
 
1310
304
  log(`Building MP4...`, { phase: 'convert', percent: 85 });
1311
- const builder = new MP4Builder(parser);
1312
- const { width, height } = builder.getVideoDimensions();
305
+ const muxer = new MP4Muxer(parser, { preroll: clipPreroll });
306
+ const { width, height } = muxer.getVideoDimensions();
1313
307
  log(`Dimensions: ${width}x${height}`);
1314
308
 
1315
- const result = builder.build();
309
+ const result = muxer.build();
1316
310
  log(`Complete`, { phase: 'convert', percent: 100 });
1317
311
  return result;
1318
312
  }
1319
313
 
314
+ { TSParser };
1320
315
  default convertTsToMp4;
1321
316
 
1322
317
  // ============================================
@@ -1757,7 +752,7 @@
1757
752
  toMp4.isMpegTs = isMpegTs;
1758
753
  toMp4.isFmp4 = isFmp4;
1759
754
  toMp4.isStandardMp4 = isStandardMp4;
1760
- toMp4.version = '1.0.5';
755
+ toMp4.version = '1.0.7';
1761
756
 
1762
757
  return toMp4;
1763
758
  });