@zenvor/hls.js 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/LICENSE +28 -0
  2. package/README.md +472 -0
  3. package/dist/hls-demo.js +26995 -0
  4. package/dist/hls-demo.js.map +1 -0
  5. package/dist/hls.d.mts +4204 -0
  6. package/dist/hls.d.ts +4204 -0
  7. package/dist/hls.js +40050 -0
  8. package/dist/hls.js.d.ts +4204 -0
  9. package/dist/hls.js.map +1 -0
  10. package/dist/hls.light.js +27145 -0
  11. package/dist/hls.light.js.map +1 -0
  12. package/dist/hls.light.min.js +2 -0
  13. package/dist/hls.light.min.js.map +1 -0
  14. package/dist/hls.light.mjs +26392 -0
  15. package/dist/hls.light.mjs.map +1 -0
  16. package/dist/hls.min.js +2 -0
  17. package/dist/hls.min.js.map +1 -0
  18. package/dist/hls.mjs +38956 -0
  19. package/dist/hls.mjs.map +1 -0
  20. package/dist/hls.worker.js +2 -0
  21. package/dist/hls.worker.js.map +1 -0
  22. package/package.json +143 -0
  23. package/src/config.ts +794 -0
  24. package/src/controller/abr-controller.ts +1019 -0
  25. package/src/controller/algo-data-controller.ts +794 -0
  26. package/src/controller/audio-stream-controller.ts +1099 -0
  27. package/src/controller/audio-track-controller.ts +454 -0
  28. package/src/controller/base-playlist-controller.ts +438 -0
  29. package/src/controller/base-stream-controller.ts +2526 -0
  30. package/src/controller/buffer-controller.ts +2015 -0
  31. package/src/controller/buffer-operation-queue.ts +159 -0
  32. package/src/controller/cap-level-controller.ts +367 -0
  33. package/src/controller/cmcd-controller.ts +422 -0
  34. package/src/controller/content-steering-controller.ts +622 -0
  35. package/src/controller/eme-controller.ts +1617 -0
  36. package/src/controller/error-controller.ts +627 -0
  37. package/src/controller/fps-controller.ts +146 -0
  38. package/src/controller/fragment-finders.ts +256 -0
  39. package/src/controller/fragment-tracker.ts +567 -0
  40. package/src/controller/gap-controller.ts +719 -0
  41. package/src/controller/id3-track-controller.ts +488 -0
  42. package/src/controller/interstitial-player.ts +302 -0
  43. package/src/controller/interstitials-controller.ts +2895 -0
  44. package/src/controller/interstitials-schedule.ts +698 -0
  45. package/src/controller/latency-controller.ts +294 -0
  46. package/src/controller/level-controller.ts +776 -0
  47. package/src/controller/stream-controller.ts +1597 -0
  48. package/src/controller/subtitle-stream-controller.ts +508 -0
  49. package/src/controller/subtitle-track-controller.ts +617 -0
  50. package/src/controller/timeline-controller.ts +677 -0
  51. package/src/crypt/aes-crypto.ts +36 -0
  52. package/src/crypt/aes-decryptor.ts +339 -0
  53. package/src/crypt/decrypter-aes-mode.ts +4 -0
  54. package/src/crypt/decrypter.ts +225 -0
  55. package/src/crypt/fast-aes-key.ts +39 -0
  56. package/src/define-plugin.d.ts +17 -0
  57. package/src/demux/audio/aacdemuxer.ts +126 -0
  58. package/src/demux/audio/ac3-demuxer.ts +170 -0
  59. package/src/demux/audio/adts.ts +249 -0
  60. package/src/demux/audio/base-audio-demuxer.ts +205 -0
  61. package/src/demux/audio/dolby.ts +21 -0
  62. package/src/demux/audio/mp3demuxer.ts +85 -0
  63. package/src/demux/audio/mpegaudio.ts +177 -0
  64. package/src/demux/chunk-cache.ts +42 -0
  65. package/src/demux/dummy-demuxed-track.ts +13 -0
  66. package/src/demux/inject-worker.ts +75 -0
  67. package/src/demux/mp4demuxer.ts +234 -0
  68. package/src/demux/sample-aes.ts +198 -0
  69. package/src/demux/transmuxer-interface.ts +449 -0
  70. package/src/demux/transmuxer-worker.ts +221 -0
  71. package/src/demux/transmuxer.ts +560 -0
  72. package/src/demux/tsdemuxer.ts +1256 -0
  73. package/src/demux/video/avc-video-parser.ts +401 -0
  74. package/src/demux/video/base-video-parser.ts +198 -0
  75. package/src/demux/video/exp-golomb.ts +153 -0
  76. package/src/demux/video/hevc-video-parser.ts +736 -0
  77. package/src/empty-es.js +5 -0
  78. package/src/empty.js +3 -0
  79. package/src/errors.ts +107 -0
  80. package/src/events.ts +548 -0
  81. package/src/exports-default.ts +3 -0
  82. package/src/exports-named.ts +81 -0
  83. package/src/hls.ts +1613 -0
  84. package/src/is-supported.ts +54 -0
  85. package/src/loader/date-range.ts +207 -0
  86. package/src/loader/fragment-loader.ts +403 -0
  87. package/src/loader/fragment.ts +487 -0
  88. package/src/loader/interstitial-asset-list.ts +162 -0
  89. package/src/loader/interstitial-event.ts +337 -0
  90. package/src/loader/key-loader.ts +439 -0
  91. package/src/loader/level-details.ts +203 -0
  92. package/src/loader/level-key.ts +259 -0
  93. package/src/loader/load-stats.ts +17 -0
  94. package/src/loader/m3u8-parser.ts +1072 -0
  95. package/src/loader/playlist-loader.ts +839 -0
  96. package/src/polyfills/number.ts +15 -0
  97. package/src/remux/aac-helper.ts +81 -0
  98. package/src/remux/mp4-generator.ts +1380 -0
  99. package/src/remux/mp4-remuxer.ts +1261 -0
  100. package/src/remux/passthrough-remuxer.ts +434 -0
  101. package/src/task-loop.ts +130 -0
  102. package/src/types/algo.ts +44 -0
  103. package/src/types/buffer.ts +105 -0
  104. package/src/types/component-api.ts +20 -0
  105. package/src/types/demuxer.ts +208 -0
  106. package/src/types/events.ts +574 -0
  107. package/src/types/fragment-tracker.ts +23 -0
  108. package/src/types/level.ts +268 -0
  109. package/src/types/loader.ts +198 -0
  110. package/src/types/media-playlist.ts +92 -0
  111. package/src/types/network-details.ts +3 -0
  112. package/src/types/remuxer.ts +104 -0
  113. package/src/types/track.ts +12 -0
  114. package/src/types/transmuxer.ts +46 -0
  115. package/src/types/tuples.ts +6 -0
  116. package/src/types/vtt.ts +11 -0
  117. package/src/utils/arrays.ts +22 -0
  118. package/src/utils/attr-list.ts +192 -0
  119. package/src/utils/binary-search.ts +46 -0
  120. package/src/utils/buffer-helper.ts +173 -0
  121. package/src/utils/cea-608-parser.ts +1413 -0
  122. package/src/utils/chunker.ts +41 -0
  123. package/src/utils/codecs.ts +314 -0
  124. package/src/utils/cues.ts +96 -0
  125. package/src/utils/discontinuities.ts +174 -0
  126. package/src/utils/encryption-methods-util.ts +21 -0
  127. package/src/utils/error-helper.ts +95 -0
  128. package/src/utils/event-listener-helper.ts +16 -0
  129. package/src/utils/ewma-bandwidth-estimator.ts +97 -0
  130. package/src/utils/ewma.ts +43 -0
  131. package/src/utils/fetch-loader.ts +331 -0
  132. package/src/utils/global.ts +2 -0
  133. package/src/utils/hash.ts +10 -0
  134. package/src/utils/hdr.ts +67 -0
  135. package/src/utils/hex.ts +32 -0
  136. package/src/utils/imsc1-ttml-parser.ts +261 -0
  137. package/src/utils/keysystem-util.ts +45 -0
  138. package/src/utils/level-helper.ts +629 -0
  139. package/src/utils/logger.ts +120 -0
  140. package/src/utils/media-option-attributes.ts +49 -0
  141. package/src/utils/mediacapabilities-helper.ts +301 -0
  142. package/src/utils/mediakeys-helper.ts +210 -0
  143. package/src/utils/mediasource-helper.ts +37 -0
  144. package/src/utils/mp4-tools.ts +1473 -0
  145. package/src/utils/number.ts +3 -0
  146. package/src/utils/numeric-encoding-utils.ts +26 -0
  147. package/src/utils/output-filter.ts +46 -0
  148. package/src/utils/rendition-helper.ts +505 -0
  149. package/src/utils/safe-json-stringify.ts +22 -0
  150. package/src/utils/texttrack-utils.ts +164 -0
  151. package/src/utils/time-ranges.ts +17 -0
  152. package/src/utils/timescale-conversion.ts +46 -0
  153. package/src/utils/utf8-utils.ts +18 -0
  154. package/src/utils/variable-substitution.ts +105 -0
  155. package/src/utils/vttcue.ts +384 -0
  156. package/src/utils/vttparser.ts +497 -0
  157. package/src/utils/webvtt-parser.ts +166 -0
  158. package/src/utils/xhr-loader.ts +337 -0
  159. package/src/version.ts +1 -0
@@ -0,0 +1,1261 @@
1
+ import AAC from './aac-helper';
2
+ import MP4 from './mp4-generator';
3
+ import { ErrorDetails, ErrorTypes } from '../errors';
4
+ import { Events } from '../events';
5
+ import { PlaylistLevelType } from '../types/loader';
6
+ import { type ILogger, Logger } from '../utils/logger';
7
+ import {
8
+ timestampToString,
9
+ toMsFromMpegTsClock,
10
+ } from '../utils/timescale-conversion';
11
+ import type { HlsConfig } from '../config';
12
+ import type { HlsEventEmitter } from '../events';
13
+ import type { SourceBufferName } from '../types/buffer';
14
+ import type {
15
+ AudioSample,
16
+ DemuxedAudioTrack,
17
+ DemuxedMetadataTrack,
18
+ DemuxedUserdataTrack,
19
+ DemuxedVideoTrack,
20
+ VideoSample,
21
+ } from '../types/demuxer';
22
+ import type {
23
+ InitSegmentData,
24
+ Mp4Sample,
25
+ RemuxedMetadata,
26
+ RemuxedTrack,
27
+ RemuxedUserdata,
28
+ Remuxer,
29
+ RemuxerResult,
30
+ } from '../types/remuxer';
31
+ import type { TrackSet } from '../types/track';
32
+ import type { TypeSupported } from '../utils/codecs';
33
+ import type {
34
+ RationalTimestamp,
35
+ TimestampOffset,
36
+ } from '../utils/timescale-conversion';
37
+
38
+ const MAX_SILENT_FRAME_DURATION = 10 * 1000; // 10 seconds
39
+ const AAC_SAMPLES_PER_FRAME = 1024;
40
+ const MPEG_AUDIO_SAMPLE_PER_FRAME = 1152;
41
+ const AC3_SAMPLES_PER_FRAME = 1536;
42
+
43
+ let chromeVersion: number | null = null;
44
+ let safariWebkitVersion: number | null = null;
45
+
46
+ function createMp4Sample(
47
+ isKeyframe: boolean,
48
+ duration: number,
49
+ size: number,
50
+ cts: number,
51
+ ): Mp4Sample {
52
+ return {
53
+ duration,
54
+ size,
55
+ cts,
56
+ flags: {
57
+ isLeading: 0,
58
+ isDependedOn: 0,
59
+ hasRedundancy: 0,
60
+ degradPrio: 0,
61
+ dependsOn: isKeyframe ? 2 : 1,
62
+ isNonSync: isKeyframe ? 0 : 1,
63
+ },
64
+ };
65
+ }
66
+ export default class MP4Remuxer extends Logger implements Remuxer {
67
+ private readonly observer: HlsEventEmitter;
68
+ private readonly config: HlsConfig;
69
+ private readonly typeSupported: TypeSupported;
70
+ private ISGenerated: boolean = false;
71
+ private _initPTS: TimestampOffset | null = null;
72
+ private _initDTS: TimestampOffset | null = null;
73
+ private nextVideoTs: number | null = null;
74
+ private nextAudioTs: number | null = null;
75
+ private videoSampleDuration: number | null = null;
76
+ private isAudioContiguous: boolean = false;
77
+ private isVideoContiguous: boolean = false;
78
+ private videoTrackConfig?: {
79
+ width?: number;
80
+ height?: number;
81
+ pixelRatio?: [number, number];
82
+ };
83
+
84
+ constructor(
85
+ observer: HlsEventEmitter,
86
+ config: HlsConfig,
87
+ typeSupported: TypeSupported,
88
+ logger: ILogger,
89
+ ) {
90
+ super('mp4-remuxer', logger);
91
+ this.observer = observer;
92
+ this.config = config;
93
+ this.typeSupported = typeSupported;
94
+ this.ISGenerated = false;
95
+
96
+ if (chromeVersion === null) {
97
+ const userAgent = navigator.userAgent || '';
98
+ const result = userAgent.match(/Chrome\/(\d+)/i);
99
+ chromeVersion = result ? parseInt(result[1]) : 0;
100
+ }
101
+ if (safariWebkitVersion === null) {
102
+ const result = navigator.userAgent.match(/Safari\/(\d+)/i);
103
+ safariWebkitVersion = result ? parseInt(result[1]) : 0;
104
+ }
105
+ }
106
+
107
+ destroy() {
108
+ // @ts-ignore
109
+ this.config = this.videoTrackConfig = this._initPTS = this._initDTS = null;
110
+ }
111
+
112
+ resetTimeStamp(defaultTimeStamp: TimestampOffset | null) {
113
+ const initPTS = this._initPTS;
114
+ if (
115
+ !initPTS ||
116
+ !defaultTimeStamp ||
117
+ defaultTimeStamp.trackId !== initPTS.trackId ||
118
+ defaultTimeStamp.baseTime !== initPTS.baseTime ||
119
+ defaultTimeStamp.timescale !== initPTS.timescale
120
+ ) {
121
+ this.log(
122
+ `Reset initPTS: ${initPTS ? timestampToString(initPTS) : initPTS} > ${defaultTimeStamp ? timestampToString(defaultTimeStamp) : defaultTimeStamp}`,
123
+ );
124
+ }
125
+
126
+ this._initPTS = this._initDTS = defaultTimeStamp;
127
+ }
128
+
129
+ resetNextTimestamp() {
130
+ this.log('reset next timestamp');
131
+ this.isVideoContiguous = false;
132
+ this.isAudioContiguous = false;
133
+ }
134
+
135
+ resetInitSegment() {
136
+ this.log('ISGenerated flag reset');
137
+ this.ISGenerated = false;
138
+ this.videoTrackConfig = undefined;
139
+ }
140
+
141
+ getVideoStartPts(videoSamples: VideoSample[]) {
142
+ // Get the minimum PTS value relative to the first sample's PTS, normalized for 33-bit wrapping
143
+ let rolloverDetected = false;
144
+ const firstPts = videoSamples[0].pts;
145
+ const startPTS = videoSamples.reduce((minPTS, sample) => {
146
+ let pts = sample.pts;
147
+ let delta = pts - minPTS;
148
+ if (delta < -4294967296) {
149
+ // 2^32, see PTSNormalize for reasoning, but we're hitting a rollover here, and we don't want that to impact the timeOffset calculation
150
+ rolloverDetected = true;
151
+ pts = normalizePts(pts, firstPts);
152
+ delta = pts - minPTS;
153
+ }
154
+ if (delta > 0) {
155
+ return minPTS;
156
+ }
157
+ return pts;
158
+ }, firstPts);
159
+ if (rolloverDetected) {
160
+ this.debug('PTS rollover detected');
161
+ }
162
+ return startPTS;
163
+ }
164
+
165
+ remux(
166
+ audioTrack: DemuxedAudioTrack,
167
+ videoTrack: DemuxedVideoTrack,
168
+ id3Track: DemuxedMetadataTrack,
169
+ textTrack: DemuxedUserdataTrack,
170
+ timeOffset: number,
171
+ accurateTimeOffset: boolean,
172
+ flush: boolean,
173
+ playlistType: PlaylistLevelType,
174
+ ): RemuxerResult {
175
+ let video: RemuxedTrack | undefined;
176
+ let audio: RemuxedTrack | undefined;
177
+ let initSegment: InitSegmentData | undefined;
178
+ let text: RemuxedUserdata | undefined;
179
+ let id3: RemuxedMetadata | undefined;
180
+ let independent: boolean | undefined;
181
+ let audioTimeOffset = timeOffset;
182
+ let videoTimeOffset = timeOffset;
183
+
184
+ // If we're remuxing audio and video progressively, wait until we've received enough samples for each track before proceeding.
185
+ // This is done to synchronize the audio and video streams. We know if the current segment will have samples if the "pid"
186
+ // parameter is greater than -1. The pid is set when the PMT is parsed, which contains the tracks list.
187
+ // However, if the initSegment has already been generated, or we've reached the end of a segment (flush),
188
+ // then we can remux one track without waiting for the other.
189
+ const hasAudio = audioTrack.pid > -1;
190
+ const hasVideo = videoTrack.pid > -1;
191
+ const length = videoTrack.samples.length;
192
+ const enoughAudioSamples = audioTrack.samples.length > 0;
193
+ const enoughVideoSamples = (flush && length > 0) || length > 1;
194
+ const canRemuxAvc =
195
+ ((!hasAudio || enoughAudioSamples) &&
196
+ (!hasVideo || enoughVideoSamples)) ||
197
+ this.ISGenerated ||
198
+ flush;
199
+
200
+ if (canRemuxAvc) {
201
+ if (this.ISGenerated) {
202
+ const config = this.videoTrackConfig;
203
+ if (
204
+ (config &&
205
+ (videoTrack.width !== config.width ||
206
+ videoTrack.height !== config.height ||
207
+ videoTrack.pixelRatio?.[0] !== config.pixelRatio?.[0] ||
208
+ videoTrack.pixelRatio?.[1] !== config.pixelRatio?.[1])) ||
209
+ (!config && enoughVideoSamples) ||
210
+ (this.nextAudioTs === null && enoughAudioSamples)
211
+ ) {
212
+ this.resetInitSegment();
213
+ }
214
+ }
215
+ if (!this.ISGenerated) {
216
+ initSegment = this.generateIS(
217
+ audioTrack,
218
+ videoTrack,
219
+ timeOffset,
220
+ accurateTimeOffset,
221
+ );
222
+ }
223
+
224
+ const isVideoContiguous = this.isVideoContiguous;
225
+ let firstKeyFrameIndex = -1;
226
+ let firstKeyFramePTS;
227
+
228
+ if (enoughVideoSamples) {
229
+ firstKeyFrameIndex = findKeyframeIndex(videoTrack.samples);
230
+ if (!isVideoContiguous && this.config.forceKeyFrameOnDiscontinuity) {
231
+ independent = true;
232
+ if (firstKeyFrameIndex > 0) {
233
+ this.warn(
234
+ `Dropped ${firstKeyFrameIndex} out of ${length} video samples due to a missing keyframe`,
235
+ );
236
+ const startPTS = this.getVideoStartPts(videoTrack.samples);
237
+ videoTrack.samples = videoTrack.samples.slice(firstKeyFrameIndex);
238
+ videoTrack.dropped += firstKeyFrameIndex;
239
+ videoTimeOffset +=
240
+ (videoTrack.samples[0].pts - startPTS) /
241
+ videoTrack.inputTimeScale;
242
+ firstKeyFramePTS = videoTimeOffset;
243
+ } else if (firstKeyFrameIndex === -1) {
244
+ this.warn(`No keyframe found out of ${length} video samples`);
245
+ independent = false;
246
+ }
247
+ }
248
+ }
249
+
250
+ if (this.ISGenerated) {
251
+ if (enoughAudioSamples && enoughVideoSamples) {
252
+ // timeOffset is expected to be the offset of the first timestamp of this fragment (first DTS)
253
+ // if first audio DTS is not aligned with first video DTS then we need to take that into account
254
+ // when providing timeOffset to remuxAudio / remuxVideo. if we don't do that, there might be a permanent / small
255
+ // drift between audio and video streams
256
+ const startPTS = this.getVideoStartPts(videoTrack.samples);
257
+ const tsDelta =
258
+ normalizePts(audioTrack.samples[0].pts, startPTS) - startPTS;
259
+ const audiovideoTimestampDelta = tsDelta / videoTrack.inputTimeScale;
260
+ audioTimeOffset += Math.max(0, audiovideoTimestampDelta);
261
+ videoTimeOffset += Math.max(0, -audiovideoTimestampDelta);
262
+ }
263
+
264
+ // Purposefully remuxing audio before video, so that remuxVideo can use nextAudioPts, which is calculated in remuxAudio.
265
+ if (enoughAudioSamples) {
266
+ // if initSegment was generated without audio samples, regenerate it again
267
+ if (!audioTrack.samplerate) {
268
+ this.warn('regenerate InitSegment as audio detected');
269
+ initSegment = this.generateIS(
270
+ audioTrack,
271
+ videoTrack,
272
+ timeOffset,
273
+ accurateTimeOffset,
274
+ );
275
+ }
276
+ audio = this.remuxAudio(
277
+ audioTrack,
278
+ audioTimeOffset,
279
+ this.isAudioContiguous,
280
+ accurateTimeOffset,
281
+ hasVideo ||
282
+ enoughVideoSamples ||
283
+ playlistType === PlaylistLevelType.AUDIO
284
+ ? videoTimeOffset
285
+ : undefined,
286
+ );
287
+ if (enoughVideoSamples) {
288
+ const audioTrackLength = audio ? audio.endPTS - audio.startPTS : 0;
289
+ // if initSegment was generated without video samples, regenerate it again
290
+ if (!videoTrack.inputTimeScale) {
291
+ this.warn('regenerate InitSegment as video detected');
292
+ initSegment = this.generateIS(
293
+ audioTrack,
294
+ videoTrack,
295
+ timeOffset,
296
+ accurateTimeOffset,
297
+ );
298
+ }
299
+ video = this.remuxVideo(
300
+ videoTrack,
301
+ videoTimeOffset,
302
+ isVideoContiguous,
303
+ audioTrackLength,
304
+ );
305
+ }
306
+ } else if (enoughVideoSamples) {
307
+ video = this.remuxVideo(
308
+ videoTrack,
309
+ videoTimeOffset,
310
+ isVideoContiguous,
311
+ 0,
312
+ );
313
+ }
314
+ if (video) {
315
+ video.firstKeyFrame = firstKeyFrameIndex;
316
+ video.independent = firstKeyFrameIndex !== -1;
317
+ video.firstKeyFramePTS = firstKeyFramePTS;
318
+ }
319
+ }
320
+ }
321
+
322
+ // Allow ID3 and text to remux, even if more audio/video samples are required
323
+ if (this.ISGenerated && this._initPTS && this._initDTS) {
324
+ if (id3Track.samples.length) {
325
+ id3 = flushTextTrackMetadataCueSamples(
326
+ id3Track,
327
+ timeOffset,
328
+ this._initPTS,
329
+ this._initDTS,
330
+ );
331
+ }
332
+
333
+ if (textTrack.samples.length) {
334
+ text = flushTextTrackUserdataCueSamples(
335
+ textTrack,
336
+ timeOffset,
337
+ this._initPTS,
338
+ );
339
+ }
340
+ }
341
+
342
+ return {
343
+ audio,
344
+ video,
345
+ initSegment,
346
+ independent,
347
+ text,
348
+ id3,
349
+ };
350
+ }
351
+
352
+ computeInitPts(
353
+ basetime: number,
354
+ timescale: number,
355
+ presentationTime: number,
356
+ type: 'audio' | 'video',
357
+ ): number {
358
+ const offset = Math.round(presentationTime * timescale);
359
+ let timestamp = normalizePts(basetime, offset);
360
+ if (timestamp < offset + timescale) {
361
+ this.log(
362
+ `Adjusting PTS for rollover in timeline near ${(offset - timestamp) / timescale} ${type}`,
363
+ );
364
+ while (timestamp < offset + timescale) {
365
+ timestamp += 8589934592;
366
+ }
367
+ }
368
+ return timestamp - offset;
369
+ }
370
+
371
+ generateIS(
372
+ audioTrack: DemuxedAudioTrack,
373
+ videoTrack: DemuxedVideoTrack,
374
+ timeOffset: number,
375
+ accurateTimeOffset: boolean,
376
+ ): InitSegmentData | undefined {
377
+ const audioSamples = audioTrack.samples;
378
+ const videoSamples = videoTrack.samples;
379
+ const typeSupported = this.typeSupported;
380
+ const tracks: TrackSet = {};
381
+ const _initPTS = this._initPTS;
382
+ let computePTSDTS = !_initPTS || accurateTimeOffset;
383
+ let container = 'audio/mp4';
384
+ let initPTS: number | undefined;
385
+ let initDTS: number | undefined;
386
+ let timescale: number | undefined;
387
+ let trackId: number = -1;
388
+
389
+ if (computePTSDTS) {
390
+ initPTS = initDTS = Infinity;
391
+ }
392
+
393
+ if (audioTrack.config && audioSamples.length) {
394
+ // let's use audio sampling rate as MP4 time scale.
395
+ // rationale is that there is a integer nb of audio frames per audio sample (1024 for AAC)
396
+ // using audio sampling rate here helps having an integer MP4 frame duration
397
+ // this avoids potential rounding issue and AV sync issue
398
+ audioTrack.timescale = audioTrack.samplerate;
399
+ switch (audioTrack.segmentCodec) {
400
+ case 'mp3':
401
+ if (typeSupported.mpeg) {
402
+ // Chrome and Safari
403
+ container = 'audio/mpeg';
404
+ audioTrack.codec = '';
405
+ } else if (typeSupported.mp3) {
406
+ // Firefox
407
+ audioTrack.codec = 'mp3';
408
+ }
409
+ break;
410
+
411
+ case 'ac3':
412
+ audioTrack.codec = 'ac-3';
413
+ break;
414
+ }
415
+ tracks.audio = {
416
+ id: 'audio',
417
+ container: container,
418
+ codec: audioTrack.codec,
419
+ initSegment:
420
+ audioTrack.segmentCodec === 'mp3' && typeSupported.mpeg
421
+ ? new Uint8Array(0)
422
+ : MP4.initSegment([audioTrack]),
423
+ metadata: {
424
+ channelCount: audioTrack.channelCount,
425
+ },
426
+ };
427
+ if (computePTSDTS) {
428
+ trackId = audioTrack.id;
429
+ timescale = audioTrack.inputTimeScale;
430
+ if (!_initPTS || timescale !== _initPTS.timescale) {
431
+ // remember first PTS of this demuxing context. for audio, PTS = DTS
432
+ initPTS = initDTS = this.computeInitPts(
433
+ audioSamples[0].pts,
434
+ timescale,
435
+ timeOffset,
436
+ 'audio',
437
+ );
438
+ } else {
439
+ computePTSDTS = false;
440
+ }
441
+ }
442
+ }
443
+
444
+ if (videoTrack.sps && videoTrack.pps && videoSamples.length) {
445
+ // let's use input time scale as MP4 video timescale
446
+ // we use input time scale straight away to avoid rounding issues on frame duration / cts computation
447
+ videoTrack.timescale = videoTrack.inputTimeScale;
448
+ tracks.video = {
449
+ id: 'main',
450
+ container: 'video/mp4',
451
+ codec: videoTrack.codec,
452
+ initSegment: MP4.initSegment([videoTrack]),
453
+ metadata: {
454
+ width: videoTrack.width,
455
+ height: videoTrack.height,
456
+ },
457
+ };
458
+ if (computePTSDTS) {
459
+ trackId = videoTrack.id;
460
+ timescale = videoTrack.inputTimeScale;
461
+ if (!_initPTS || timescale !== _initPTS.timescale) {
462
+ const basePTS = this.getVideoStartPts(videoSamples);
463
+ const baseDTS = normalizePts(videoSamples[0].dts, basePTS);
464
+ const videoInitDTS = this.computeInitPts(
465
+ baseDTS,
466
+ timescale,
467
+ timeOffset,
468
+ 'video',
469
+ );
470
+ const videoInitPTS = this.computeInitPts(
471
+ basePTS,
472
+ timescale,
473
+ timeOffset,
474
+ 'video',
475
+ );
476
+ initDTS = Math.min(initDTS as number, videoInitDTS);
477
+ initPTS = Math.min(initPTS as number, videoInitPTS);
478
+ } else {
479
+ computePTSDTS = false;
480
+ }
481
+ }
482
+ this.videoTrackConfig = {
483
+ width: videoTrack.width,
484
+ height: videoTrack.height,
485
+ pixelRatio: videoTrack.pixelRatio,
486
+ };
487
+ }
488
+
489
+ if (Object.keys(tracks).length) {
490
+ this.ISGenerated = true;
491
+ if (computePTSDTS) {
492
+ if (_initPTS) {
493
+ this.warn(
494
+ `Timestamps at playlist time: ${accurateTimeOffset ? '' : '~'}${timeOffset} ${initPTS! / timescale!} != initPTS: ${_initPTS.baseTime / _initPTS.timescale} (${_initPTS.baseTime}/${_initPTS.timescale}) trackId: ${_initPTS.trackId}`,
495
+ );
496
+ }
497
+ this.log(
498
+ `Found initPTS at playlist time: ${timeOffset} offset: ${initPTS! / timescale!} (${initPTS}/${timescale}) trackId: ${trackId}`,
499
+ );
500
+ this._initPTS = {
501
+ baseTime: initPTS as number,
502
+ timescale: timescale as number,
503
+ trackId: trackId as number,
504
+ };
505
+ this._initDTS = {
506
+ baseTime: initDTS as number,
507
+ timescale: timescale as number,
508
+ trackId: trackId as number,
509
+ };
510
+ } else {
511
+ initPTS = timescale = undefined;
512
+ }
513
+
514
+ return {
515
+ tracks,
516
+ initPTS,
517
+ timescale,
518
+ trackId,
519
+ };
520
+ }
521
+ }
522
+
523
+ remuxVideo(
524
+ track: DemuxedVideoTrack,
525
+ timeOffset: number,
526
+ contiguous: boolean,
527
+ audioTrackLength: number,
528
+ ): RemuxedTrack | undefined {
529
+ const timeScale: number = track.inputTimeScale;
530
+ const inputSamples: Array<VideoSample> = track.samples;
531
+ const outputSamples: Array<Mp4Sample> = [];
532
+ const nbSamples = inputSamples.length;
533
+ const initPTS = this._initPTS as RationalTimestamp;
534
+ const initTime = (initPTS.baseTime * timeScale) / initPTS.timescale;
535
+ let nextVideoTs = this.nextVideoTs;
536
+ let offset = 8;
537
+ let mp4SampleDuration = this.videoSampleDuration;
538
+ let firstDTS;
539
+ let lastDTS;
540
+ let minPTS: number = Number.POSITIVE_INFINITY;
541
+ let maxPTS: number = Number.NEGATIVE_INFINITY;
542
+ let sortSamples = false;
543
+
544
+ // if parsed fragment is contiguous with last one, let's use last DTS value as reference
545
+ if (!contiguous || nextVideoTs === null) {
546
+ const pts = initTime + timeOffset * timeScale;
547
+ const cts =
548
+ inputSamples[0].pts -
549
+ normalizePts(inputSamples[0].dts, inputSamples[0].pts);
550
+ if (
551
+ chromeVersion &&
552
+ nextVideoTs !== null &&
553
+ Math.abs(pts - cts - (nextVideoTs + initTime)) < 15000
554
+ ) {
555
+ // treat as contigous to adjust samples that would otherwise produce video buffer gaps in Chrome
556
+ contiguous = true;
557
+ } else {
558
+ // if not contiguous, let's use target timeOffset
559
+ nextVideoTs = pts - cts - initTime;
560
+ }
561
+ }
562
+
563
+ // PTS is coded on 33bits, and can loop from -2^32 to 2^32
564
+ // PTSNormalize will make PTS/DTS value monotonic, we use last known DTS value as reference value
565
+ const nextVideoPts = nextVideoTs + initTime;
566
+ for (let i = 0; i < nbSamples; i++) {
567
+ const sample = inputSamples[i];
568
+ sample.pts = normalizePts(sample.pts, nextVideoPts);
569
+ sample.dts = normalizePts(sample.dts, nextVideoPts);
570
+ if (sample.dts < inputSamples[i > 0 ? i - 1 : i].dts) {
571
+ sortSamples = true;
572
+ }
573
+ }
574
+
575
+ // sort video samples by DTS then PTS then demux id order
576
+ if (sortSamples) {
577
+ inputSamples.sort(function (a, b) {
578
+ const deltadts = a.dts - b.dts;
579
+ const deltapts = a.pts - b.pts;
580
+ return deltadts || deltapts;
581
+ });
582
+ }
583
+
584
+ // Get first/last DTS
585
+ firstDTS = inputSamples[0].dts;
586
+ lastDTS = inputSamples[inputSamples.length - 1].dts;
587
+
588
+ // Sample duration (as expected by trun MP4 boxes), should be the delta between sample DTS
589
+ // set this constant duration as being the avg delta between consecutive DTS.
590
+ const inputDuration = lastDTS - firstDTS;
591
+ const averageSampleDuration = inputDuration
592
+ ? Math.round(inputDuration / (nbSamples - 1))
593
+ : mp4SampleDuration || track.inputTimeScale / 30;
594
+
595
+ // if fragment are contiguous, detect hole/overlapping between fragments
596
+ if (contiguous) {
597
+ // check timestamp continuity across consecutive fragments (this is to remove inter-fragment gap/hole)
598
+ const delta = firstDTS - nextVideoPts;
599
+ const foundHole = delta > averageSampleDuration;
600
+ const foundOverlap = delta < -1;
601
+ if (foundHole || foundOverlap) {
602
+ if (foundHole) {
603
+ this.warn(
604
+ `${(track.segmentCodec || '').toUpperCase()}: ${toMsFromMpegTsClock(
605
+ delta,
606
+ true,
607
+ )} ms (${delta}dts) hole between fragments detected at ${timeOffset.toFixed(
608
+ 3,
609
+ )}`,
610
+ );
611
+ } else {
612
+ this.warn(
613
+ `${(track.segmentCodec || '').toUpperCase()}: ${toMsFromMpegTsClock(
614
+ -delta,
615
+ true,
616
+ )} ms (${delta}dts) overlapping between fragments detected at ${timeOffset.toFixed(
617
+ 3,
618
+ )}`,
619
+ );
620
+ }
621
+ if (
622
+ !foundOverlap ||
623
+ nextVideoPts >= inputSamples[0].pts ||
624
+ chromeVersion
625
+ ) {
626
+ firstDTS = nextVideoPts;
627
+ const firstPTS = inputSamples[0].pts - delta;
628
+ if (foundHole) {
629
+ inputSamples[0].dts = firstDTS;
630
+ inputSamples[0].pts = firstPTS;
631
+ } else {
632
+ let isPTSOrderRetained = true;
633
+ for (let i = 0; i < inputSamples.length; i++) {
634
+ if (inputSamples[i].dts > firstPTS && isPTSOrderRetained) {
635
+ break;
636
+ }
637
+
638
+ const prevPTS = inputSamples[i].pts;
639
+ inputSamples[i].dts -= delta;
640
+ inputSamples[i].pts -= delta;
641
+
642
+ // check to see if this sample's PTS order has changed
643
+ // relative to the next one
644
+ if (i < inputSamples.length - 1) {
645
+ const nextSamplePTS = inputSamples[i + 1].pts;
646
+ const currentSamplePTS = inputSamples[i].pts;
647
+
648
+ const currentOrder = nextSamplePTS <= currentSamplePTS;
649
+ const prevOrder = nextSamplePTS <= prevPTS;
650
+
651
+ isPTSOrderRetained = currentOrder == prevOrder;
652
+ }
653
+ }
654
+ }
655
+ this.log(
656
+ `Video: Initial PTS/DTS adjusted: ${toMsFromMpegTsClock(
657
+ firstPTS,
658
+ true,
659
+ )}/${toMsFromMpegTsClock(
660
+ firstDTS,
661
+ true,
662
+ )}, delta: ${toMsFromMpegTsClock(delta, true)} ms`,
663
+ );
664
+ }
665
+ }
666
+ }
667
+
668
+ firstDTS = Math.max(0, firstDTS);
669
+
670
+ let nbNalu = 0;
671
+ let naluLen = 0;
672
+ let dtsStep = firstDTS;
673
+ for (let i = 0; i < nbSamples; i++) {
674
+ // compute total/avc sample length and nb of NAL units
675
+ const sample = inputSamples[i];
676
+ const units = sample.units;
677
+ const nbUnits = units.length;
678
+ let sampleLen = 0;
679
+ for (let j = 0; j < nbUnits; j++) {
680
+ sampleLen += units[j].data.length;
681
+ }
682
+
683
+ naluLen += sampleLen;
684
+ nbNalu += nbUnits;
685
+ sample.length = sampleLen;
686
+
687
+ // ensure sample monotonic DTS
688
+ if (sample.dts < dtsStep) {
689
+ sample.dts = dtsStep;
690
+ dtsStep += (averageSampleDuration / 4) | 0 || 1;
691
+ } else {
692
+ dtsStep = sample.dts;
693
+ }
694
+
695
+ minPTS = Math.min(sample.pts, minPTS);
696
+ maxPTS = Math.max(sample.pts, maxPTS);
697
+ }
698
+ lastDTS = inputSamples[nbSamples - 1].dts;
699
+
700
+ /* concatenate the video data and construct the mdat in place
701
+ (need 8 more bytes to fill length and mpdat type) */
702
+ const mdatSize = naluLen + 4 * nbNalu + 8;
703
+ let mdat;
704
+ try {
705
+ mdat = new Uint8Array(mdatSize);
706
+ } catch (err) {
707
+ this.observer.emit(Events.ERROR, Events.ERROR, {
708
+ type: ErrorTypes.MUX_ERROR,
709
+ details: ErrorDetails.REMUX_ALLOC_ERROR,
710
+ fatal: false,
711
+ error: err,
712
+ bytes: mdatSize,
713
+ reason: `fail allocating video mdat ${mdatSize}`,
714
+ });
715
+ return;
716
+ }
717
+ const view = new DataView(mdat.buffer);
718
+ view.setUint32(0, mdatSize);
719
+ mdat.set(MP4.types.mdat, 4);
720
+
721
+ let stretchedLastFrame = false;
722
+ let minDtsDelta = Number.POSITIVE_INFINITY;
723
+ let minPtsDelta = Number.POSITIVE_INFINITY;
724
+ let maxDtsDelta = Number.NEGATIVE_INFINITY;
725
+ let maxPtsDelta = Number.NEGATIVE_INFINITY;
726
+ for (let i = 0; i < nbSamples; i++) {
727
+ const VideoSample = inputSamples[i];
728
+ const VideoSampleUnits = VideoSample.units;
729
+ let mp4SampleLength = 0;
730
+ // convert NALU bitstream to MP4 format (prepend NALU with size field)
731
+ for (let j = 0, nbUnits = VideoSampleUnits.length; j < nbUnits; j++) {
732
+ const unit = VideoSampleUnits[j];
733
+ const unitData = unit.data;
734
+ const unitDataLen = unit.data.byteLength;
735
+ view.setUint32(offset, unitDataLen);
736
+ offset += 4;
737
+ mdat.set(unitData, offset);
738
+ offset += unitDataLen;
739
+ mp4SampleLength += 4 + unitDataLen;
740
+ }
741
+
742
+ // expected sample duration is the Decoding Timestamp diff of consecutive samples
743
+ let ptsDelta;
744
+ if (i < nbSamples - 1) {
745
+ mp4SampleDuration = inputSamples[i + 1].dts - VideoSample.dts;
746
+ ptsDelta = inputSamples[i + 1].pts - VideoSample.pts;
747
+ } else {
748
+ const config = this.config;
749
+ const lastFrameDuration =
750
+ i > 0
751
+ ? VideoSample.dts - inputSamples[i - 1].dts
752
+ : averageSampleDuration;
753
+ ptsDelta =
754
+ i > 0
755
+ ? VideoSample.pts - inputSamples[i - 1].pts
756
+ : averageSampleDuration;
757
+ if (config.stretchShortVideoTrack && this.nextAudioTs !== null) {
758
+ // In some cases, a segment's audio track duration may exceed the video track duration.
759
+ // Since we've already remuxed audio, and we know how long the audio track is, we look to
760
+ // see if the delta to the next segment is longer than maxBufferHole.
761
+ // If so, playback would potentially get stuck, so we artificially inflate
762
+ // the duration of the last frame to minimize any potential gap between segments.
763
+ const gapTolerance = Math.floor(config.maxBufferHole * timeScale);
764
+ const deltaToFrameEnd =
765
+ (audioTrackLength
766
+ ? minPTS + audioTrackLength * timeScale
767
+ : this.nextAudioTs + initTime) - VideoSample.pts;
768
+ if (deltaToFrameEnd > gapTolerance) {
769
+ // We subtract lastFrameDuration from deltaToFrameEnd to try to prevent any video
770
+ // frame overlap. maxBufferHole should be >> lastFrameDuration anyway.
771
+ mp4SampleDuration = deltaToFrameEnd - lastFrameDuration;
772
+ if (mp4SampleDuration < 0) {
773
+ mp4SampleDuration = lastFrameDuration;
774
+ } else {
775
+ stretchedLastFrame = true;
776
+ }
777
+ this.log(
778
+ `It is approximately ${
779
+ deltaToFrameEnd / 90
780
+ } ms to the next segment; using duration ${
781
+ mp4SampleDuration / 90
782
+ } ms for the last video frame.`,
783
+ );
784
+ } else {
785
+ mp4SampleDuration = lastFrameDuration;
786
+ }
787
+ } else {
788
+ mp4SampleDuration = lastFrameDuration;
789
+ }
790
+ }
791
+ const compositionTimeOffset = Math.round(
792
+ VideoSample.pts - VideoSample.dts,
793
+ );
794
+ minDtsDelta = Math.min(minDtsDelta, mp4SampleDuration);
795
+ maxDtsDelta = Math.max(maxDtsDelta, mp4SampleDuration);
796
+ minPtsDelta = Math.min(minPtsDelta, ptsDelta);
797
+ maxPtsDelta = Math.max(maxPtsDelta, ptsDelta);
798
+
799
+ outputSamples.push(
800
+ createMp4Sample(
801
+ VideoSample.key,
802
+ mp4SampleDuration,
803
+ mp4SampleLength,
804
+ compositionTimeOffset,
805
+ ),
806
+ );
807
+ }
808
+
809
+ if (outputSamples.length) {
810
+ if (chromeVersion) {
811
+ if (chromeVersion < 70) {
812
+ // Chrome workaround, mark first sample as being a Random Access Point (keyframe) to avoid sourcebuffer append issue
813
+ // https://code.google.com/p/chromium/issues/detail?id=229412
814
+ const flags = outputSamples[0].flags;
815
+ flags.dependsOn = 2;
816
+ flags.isNonSync = 0;
817
+ }
818
+ } else if (safariWebkitVersion) {
819
+ // Fix for "CNN special report, with CC" in test-streams (Safari browser only)
820
+ // Ignore DTS when frame durations are irregular. Safari MSE does not handle this leading to gaps.
821
+ if (
822
+ maxPtsDelta - minPtsDelta < maxDtsDelta - minDtsDelta &&
823
+ averageSampleDuration / maxDtsDelta < 0.025 &&
824
+ outputSamples[0].cts === 0
825
+ ) {
826
+ this.warn(
827
+ 'Found irregular gaps in sample duration. Using PTS instead of DTS to determine MP4 sample duration.',
828
+ );
829
+ let dts = firstDTS;
830
+ for (let i = 0, len = outputSamples.length; i < len; i++) {
831
+ const nextDts = dts + outputSamples[i].duration;
832
+ const pts = dts + outputSamples[i].cts;
833
+ if (i < len - 1) {
834
+ const nextPts = nextDts + outputSamples[i + 1].cts;
835
+ outputSamples[i].duration = nextPts - pts;
836
+ } else {
837
+ outputSamples[i].duration = i
838
+ ? outputSamples[i - 1].duration
839
+ : averageSampleDuration;
840
+ }
841
+ outputSamples[i].cts = 0;
842
+ dts = nextDts;
843
+ }
844
+ }
845
+ }
846
+ }
847
+ // next AVC/HEVC sample DTS should be equal to last sample DTS + last sample duration (in PES timescale)
848
+ mp4SampleDuration =
849
+ stretchedLastFrame || !mp4SampleDuration
850
+ ? averageSampleDuration
851
+ : mp4SampleDuration;
852
+ const endDTS = lastDTS + mp4SampleDuration;
853
+ this.nextVideoTs = nextVideoTs = endDTS - initTime;
854
+ this.videoSampleDuration = mp4SampleDuration;
855
+ this.isVideoContiguous = true;
856
+ const moof = MP4.moof(
857
+ track.sequenceNumber++,
858
+ firstDTS,
859
+ Object.assign(track, {
860
+ samples: outputSamples,
861
+ }),
862
+ );
863
+ const type: SourceBufferName = 'video';
864
+ const data = {
865
+ data1: moof,
866
+ data2: mdat,
867
+ startPTS: (minPTS - initTime) / timeScale,
868
+ endPTS: (maxPTS + mp4SampleDuration - initTime) / timeScale,
869
+ startDTS: (firstDTS - initTime) / timeScale,
870
+ endDTS: nextVideoTs / timeScale,
871
+ type,
872
+ hasAudio: false,
873
+ hasVideo: true,
874
+ nb: outputSamples.length,
875
+ dropped: track.dropped,
876
+ };
877
+ track.samples = [];
878
+ track.dropped = 0;
879
+ return data;
880
+ }
881
+
882
+ getSamplesPerFrame(track: DemuxedAudioTrack) {
883
+ switch (track.segmentCodec) {
884
+ case 'mp3':
885
+ return MPEG_AUDIO_SAMPLE_PER_FRAME;
886
+ case 'ac3':
887
+ return AC3_SAMPLES_PER_FRAME;
888
+ default:
889
+ return AAC_SAMPLES_PER_FRAME;
890
+ }
891
+ }
892
+
893
+ remuxAudio(
894
+ track: DemuxedAudioTrack,
895
+ timeOffset: number,
896
+ contiguous: boolean,
897
+ accurateTimeOffset: boolean,
898
+ videoTimeOffset?: number,
899
+ ): RemuxedTrack | undefined {
900
+ const inputTimeScale: number = track.inputTimeScale;
901
+ const mp4timeScale: number = track.samplerate
902
+ ? track.samplerate
903
+ : inputTimeScale;
904
+ const scaleFactor: number = inputTimeScale / mp4timeScale;
905
+ const mp4SampleDuration: number = this.getSamplesPerFrame(track);
906
+ const inputSampleDuration: number = mp4SampleDuration * scaleFactor;
907
+ const initPTS = this._initPTS as RationalTimestamp;
908
+ const rawMPEG: boolean =
909
+ track.segmentCodec === 'mp3' && this.typeSupported.mpeg;
910
+ const outputSamples: Array<Mp4Sample> = [];
911
+ const alignedWithVideo = videoTimeOffset !== undefined;
912
+
913
+ let inputSamples: Array<AudioSample> = track.samples;
914
+ let offset: number = rawMPEG ? 0 : 8;
915
+ let nextAudioTs: number = this.nextAudioTs || -1;
916
+
917
+ // window.audioSamples ? window.audioSamples.push(inputSamples.map(s => s.pts)) : (window.audioSamples = [inputSamples.map(s => s.pts)]);
918
+
919
+ // for audio samples, also consider consecutive fragments as being contiguous (even if a level switch occurs),
920
+ // for sake of clarity:
921
+ // consecutive fragments are frags with
922
+ // - less than 100ms gaps between new time offset (if accurate) and next expected PTS OR
923
+ // - less than 20 audio frames distance
924
+ // contiguous fragments are consecutive fragments from same quality level (same level, new SN = old SN + 1)
925
+ // this helps ensuring audio continuity
926
+ // and this also avoids audio glitches/cut when switching quality, or reporting wrong duration on first audio frame
927
+ const initTime = (initPTS.baseTime * inputTimeScale) / initPTS.timescale;
928
+ const timeOffsetMpegTS = initTime + timeOffset * inputTimeScale;
929
+ this.isAudioContiguous = contiguous =
930
+ contiguous ||
931
+ ((inputSamples.length &&
932
+ nextAudioTs > 0 &&
933
+ ((accurateTimeOffset &&
934
+ Math.abs(timeOffsetMpegTS - (nextAudioTs + initTime)) < 9000) ||
935
+ Math.abs(
936
+ normalizePts(inputSamples[0].pts, timeOffsetMpegTS) -
937
+ (nextAudioTs + initTime),
938
+ ) <
939
+ 20 * inputSampleDuration)) as boolean);
940
+
941
+ // compute normalized PTS
942
+ inputSamples.forEach(function (sample) {
943
+ sample.pts = normalizePts(sample.pts, timeOffsetMpegTS);
944
+ });
945
+
946
+ if (!contiguous || nextAudioTs < 0) {
947
+ const sampleCount = inputSamples.length;
948
+ // filter out sample with negative PTS that are not playable anyway
949
+ // if we don't remove these negative samples, they will shift all audio samples forward.
950
+ // leading to audio overlap between current / next fragment
951
+ inputSamples = inputSamples.filter((sample) => sample.pts >= 0);
952
+
953
+ if (sampleCount !== inputSamples.length) {
954
+ this.warn(
955
+ `Removed ${inputSamples.length - sampleCount} of ${sampleCount} samples (initPTS ${initTime} / ${inputTimeScale})`,
956
+ );
957
+ }
958
+
959
+ // in case all samples have negative PTS, and have been filtered out, return now
960
+ if (!inputSamples.length) {
961
+ return;
962
+ }
963
+
964
+ if (videoTimeOffset === 0) {
965
+ // Set the start to match video so that start gaps larger than inputSampleDuration are filled with silence
966
+ nextAudioTs = 0;
967
+ } else if (accurateTimeOffset && !alignedWithVideo) {
968
+ // When not seeking, not live, and LevelDetails.PTSKnown, use fragment start as predicted next audio PTS
969
+ nextAudioTs = Math.max(0, timeOffsetMpegTS - initTime);
970
+ } else {
971
+ // if frags are not contiguous and if we cant trust time offset, let's use first sample PTS as next audio PTS
972
+ nextAudioTs = inputSamples[0].pts - initTime;
973
+ }
974
+ }
975
+
976
+ // If the audio track is missing samples, the frames seem to get "left-shifted" within the
977
+ // resulting mp4 segment, causing sync issues and leaving gaps at the end of the audio segment.
978
+ // In an effort to prevent this from happening, we inject frames here where there are gaps.
979
+ // When possible, we inject a silent frame; when that's not possible, we duplicate the last
980
+ // frame.
981
+
982
+ if (track.segmentCodec === 'aac') {
983
+ const maxAudioFramesDrift = this.config.maxAudioFramesDrift;
984
+ for (
985
+ let i = 0, nextPts = nextAudioTs + initTime;
986
+ i < inputSamples.length;
987
+ i++
988
+ ) {
989
+ // First, let's see how far off this frame is from where we expect it to be
990
+ const sample = inputSamples[i];
991
+ const pts = sample.pts;
992
+ const delta = pts - nextPts;
993
+ const duration = Math.abs((1000 * delta) / inputTimeScale);
994
+
995
+ // When remuxing with video, if we're overlapping by more than a duration, drop this sample to stay in sync
996
+ if (
997
+ delta <= -maxAudioFramesDrift * inputSampleDuration &&
998
+ alignedWithVideo
999
+ ) {
1000
+ if (i === 0) {
1001
+ this.warn(
1002
+ `Audio frame @ ${(pts / inputTimeScale).toFixed(
1003
+ 3,
1004
+ )}s overlaps marker by ${Math.round(
1005
+ (1000 * delta) / inputTimeScale,
1006
+ )} ms.`,
1007
+ );
1008
+ this.nextAudioTs = nextAudioTs = pts - initTime;
1009
+ nextPts = pts;
1010
+ }
1011
+ } // eslint-disable-line brace-style
1012
+
1013
+ // Insert missing frames if:
1014
+ // 1: We're more than maxAudioFramesDrift frame away
1015
+ // 2: Not more than MAX_SILENT_FRAME_DURATION away
1016
+ // 3: currentTime (aka nextPtsNorm) is not 0
1017
+ // 4: remuxing with video (videoTimeOffset !== undefined)
1018
+ else if (
1019
+ delta >= maxAudioFramesDrift * inputSampleDuration &&
1020
+ duration < MAX_SILENT_FRAME_DURATION &&
1021
+ alignedWithVideo
1022
+ ) {
1023
+ let missing = Math.round(delta / inputSampleDuration);
1024
+ // Adjust nextPts so that silent samples are aligned with media pts. This will prevent media samples from
1025
+ // later being shifted if nextPts is based on timeOffset and delta is not a multiple of inputSampleDuration.
1026
+ nextPts = pts - missing * inputSampleDuration;
1027
+ while (nextPts < 0 && missing && inputSampleDuration) {
1028
+ missing--;
1029
+ nextPts += inputSampleDuration;
1030
+ }
1031
+ if (i === 0) {
1032
+ this.nextAudioTs = nextAudioTs = nextPts - initTime;
1033
+ }
1034
+ this.warn(
1035
+ `Injecting ${missing} audio frames @ ${(
1036
+ (nextPts - initTime) /
1037
+ inputTimeScale
1038
+ ).toFixed(3)}s due to ${Math.round(
1039
+ (1000 * delta) / inputTimeScale,
1040
+ )} ms gap.`,
1041
+ );
1042
+ for (let j = 0; j < missing; j++) {
1043
+ let fillFrame = AAC.getSilentFrame(
1044
+ track.parsedCodec || track.manifestCodec || track.codec,
1045
+ track.channelCount,
1046
+ );
1047
+ if (!fillFrame) {
1048
+ this.log(
1049
+ 'Unable to get silent frame for given audio codec; duplicating last frame instead.',
1050
+ );
1051
+ fillFrame = sample.unit.subarray();
1052
+ }
1053
+ inputSamples.splice(i, 0, {
1054
+ unit: fillFrame,
1055
+ pts: nextPts,
1056
+ });
1057
+ nextPts += inputSampleDuration;
1058
+ i++;
1059
+ }
1060
+ }
1061
+ sample.pts = nextPts;
1062
+ nextPts += inputSampleDuration;
1063
+ }
1064
+ }
1065
+ let firstPTS: number | null = null;
1066
+ let lastPTS: number | null = null;
1067
+ let mdat: any;
1068
+ let mdatSize: number = 0;
1069
+ let sampleLength: number = inputSamples.length;
1070
+ while (sampleLength--) {
1071
+ mdatSize += inputSamples[sampleLength].unit.byteLength;
1072
+ }
1073
+ for (let j = 0, nbSamples = inputSamples.length; j < nbSamples; j++) {
1074
+ const audioSample = inputSamples[j];
1075
+ const unit = audioSample.unit;
1076
+ let pts = audioSample.pts;
1077
+ if (lastPTS !== null) {
1078
+ // If we have more than one sample, set the duration of the sample to the "real" duration; the PTS diff with
1079
+ // the previous sample
1080
+ const prevSample = outputSamples[j - 1];
1081
+ prevSample.duration = Math.round((pts - lastPTS) / scaleFactor);
1082
+ } else {
1083
+ if (contiguous && track.segmentCodec === 'aac') {
1084
+ // set PTS/DTS to expected PTS/DTS
1085
+ pts = nextAudioTs + initTime;
1086
+ }
1087
+ // remember first PTS of our audioSamples
1088
+ firstPTS = pts;
1089
+ if (mdatSize > 0) {
1090
+ /* concatenate the audio data and construct the mdat in place
1091
+ (need 8 more bytes to fill length and mdat type) */
1092
+ mdatSize += offset;
1093
+ try {
1094
+ mdat = new Uint8Array(mdatSize);
1095
+ } catch (err) {
1096
+ this.observer.emit(Events.ERROR, Events.ERROR, {
1097
+ type: ErrorTypes.MUX_ERROR,
1098
+ details: ErrorDetails.REMUX_ALLOC_ERROR,
1099
+ fatal: false,
1100
+ error: err,
1101
+ bytes: mdatSize,
1102
+ reason: `fail allocating audio mdat ${mdatSize}`,
1103
+ });
1104
+ return;
1105
+ }
1106
+ if (!rawMPEG) {
1107
+ const view = new DataView(mdat.buffer);
1108
+ view.setUint32(0, mdatSize);
1109
+ mdat.set(MP4.types.mdat, 4);
1110
+ }
1111
+ } else {
1112
+ // no audio samples
1113
+ return;
1114
+ }
1115
+ }
1116
+ mdat.set(unit, offset);
1117
+ const unitLen = unit.byteLength;
1118
+ offset += unitLen;
1119
+ // Default the sample's duration to the computed mp4SampleDuration, which will either be 1024 for AAC or 1152 for MPEG
1120
+ // In the case that we have 1 sample, this will be the duration. If we have more than one sample, the duration
1121
+ // becomes the PTS diff with the previous sample
1122
+ outputSamples.push(createMp4Sample(true, mp4SampleDuration, unitLen, 0));
1123
+ lastPTS = pts;
1124
+ }
1125
+
1126
+ // We could end up with no audio samples if all input samples were overlapping with the previously remuxed ones
1127
+ const nbSamples = outputSamples.length;
1128
+ if (!nbSamples) {
1129
+ return;
1130
+ }
1131
+
1132
+ // The next audio sample PTS should be equal to last sample PTS + duration
1133
+ const lastSample = outputSamples[outputSamples.length - 1];
1134
+ nextAudioTs = (lastPTS as number) - initTime;
1135
+ this.nextAudioTs = nextAudioTs + scaleFactor * lastSample.duration;
1136
+
1137
+ // Set the track samples from inputSamples to outputSamples before remuxing
1138
+ const moof = rawMPEG
1139
+ ? new Uint8Array(0)
1140
+ : MP4.moof(
1141
+ track.sequenceNumber++,
1142
+ firstPTS! / scaleFactor,
1143
+ Object.assign({}, track, { samples: outputSamples }),
1144
+ );
1145
+
1146
+ // Clear the track samples. This also clears the samples array in the demuxer, since the reference is shared
1147
+ track.samples = [];
1148
+ const start = (firstPTS! - initTime) / inputTimeScale;
1149
+ const end = this.nextAudioTs / inputTimeScale;
1150
+ const type: SourceBufferName = 'audio';
1151
+ const audioData = {
1152
+ data1: moof,
1153
+ data2: mdat,
1154
+ startPTS: start,
1155
+ endPTS: end,
1156
+ startDTS: start,
1157
+ endDTS: end,
1158
+ type,
1159
+ hasAudio: true,
1160
+ hasVideo: false,
1161
+ nb: nbSamples,
1162
+ };
1163
+
1164
+ this.isAudioContiguous = true;
1165
+ return audioData;
1166
+ }
1167
+ }
1168
+
1169
+ export function normalizePts(value: number, reference: number | null): number {
1170
+ let offset;
1171
+ if (reference === null) {
1172
+ return value;
1173
+ }
1174
+
1175
+ if (reference < value) {
1176
+ // - 2^33
1177
+ offset = -8589934592;
1178
+ } else {
1179
+ // + 2^33
1180
+ offset = 8589934592;
1181
+ }
1182
+ /* PTS is 33bit (from 0 to 2^33 -1)
1183
+ if diff between value and reference is bigger than half of the amplitude (2^32) then it means that
1184
+ PTS looping occured. fill the gap */
1185
+ while (Math.abs(value - reference) > 4294967296) {
1186
+ value += offset;
1187
+ }
1188
+
1189
+ return value;
1190
+ }
1191
+
1192
+ function findKeyframeIndex(samples: Array<VideoSample>): number {
1193
+ for (let i = 0; i < samples.length; i++) {
1194
+ if (samples[i].key) {
1195
+ return i;
1196
+ }
1197
+ }
1198
+ return -1;
1199
+ }
1200
+
1201
+ export function flushTextTrackMetadataCueSamples(
1202
+ track: DemuxedMetadataTrack,
1203
+ timeOffset: number,
1204
+ initPTS: TimestampOffset,
1205
+ initDTS: TimestampOffset,
1206
+ ): RemuxedMetadata | undefined {
1207
+ const length = track.samples.length;
1208
+ if (!length) {
1209
+ return;
1210
+ }
1211
+ const inputTimeScale = track.inputTimeScale;
1212
+ for (let index = 0; index < length; index++) {
1213
+ const sample = track.samples[index];
1214
+ // setting id3 pts, dts to relative time
1215
+ // using this._initPTS and this._initDTS to calculate relative time
1216
+ sample.pts =
1217
+ normalizePts(
1218
+ sample.pts - (initPTS.baseTime * inputTimeScale) / initPTS.timescale,
1219
+ timeOffset * inputTimeScale,
1220
+ ) / inputTimeScale;
1221
+ sample.dts =
1222
+ normalizePts(
1223
+ sample.dts - (initDTS.baseTime * inputTimeScale) / initDTS.timescale,
1224
+ timeOffset * inputTimeScale,
1225
+ ) / inputTimeScale;
1226
+ }
1227
+ const samples = track.samples;
1228
+ track.samples = [];
1229
+ return {
1230
+ samples,
1231
+ };
1232
+ }
1233
+
1234
+ export function flushTextTrackUserdataCueSamples(
1235
+ track: DemuxedUserdataTrack,
1236
+ timeOffset: number,
1237
+ initPTS: RationalTimestamp,
1238
+ ): RemuxedUserdata | undefined {
1239
+ const length = track.samples.length;
1240
+ if (!length) {
1241
+ return;
1242
+ }
1243
+
1244
+ const inputTimeScale = track.inputTimeScale;
1245
+ for (let index = 0; index < length; index++) {
1246
+ const sample = track.samples[index];
1247
+ // setting text pts, dts to relative time
1248
+ // using this._initPTS and this._initDTS to calculate relative time
1249
+ sample.pts =
1250
+ normalizePts(
1251
+ sample.pts - (initPTS.baseTime * inputTimeScale) / initPTS.timescale,
1252
+ timeOffset * inputTimeScale,
1253
+ ) / inputTimeScale;
1254
+ }
1255
+ track.samples.sort((a, b) => a.pts - b.pts);
1256
+ const samples = track.samples;
1257
+ track.samples = [];
1258
+ return {
1259
+ samples,
1260
+ };
1261
+ }