@kenzuya/mediabunny 1.26.0 → 1.28.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +1 -1
  2. package/dist/bundles/{mediabunny.mjs → mediabunny.js} +21963 -21390
  3. package/dist/bundles/mediabunny.min.js +490 -0
  4. package/dist/modules/shared/mp3-misc.d.ts.map +1 -1
  5. package/dist/modules/src/adts/adts-demuxer.d.ts +6 -6
  6. package/dist/modules/src/adts/adts-demuxer.d.ts.map +1 -1
  7. package/dist/modules/src/adts/adts-muxer.d.ts +4 -4
  8. package/dist/modules/src/adts/adts-muxer.d.ts.map +1 -1
  9. package/dist/modules/src/adts/adts-reader.d.ts +1 -1
  10. package/dist/modules/src/adts/adts-reader.d.ts.map +1 -1
  11. package/dist/modules/src/avi/avi-demuxer.d.ts +44 -0
  12. package/dist/modules/src/avi/avi-demuxer.d.ts.map +1 -0
  13. package/dist/modules/src/avi/avi-misc.d.ts +88 -0
  14. package/dist/modules/src/avi/avi-misc.d.ts.map +1 -0
  15. package/dist/modules/src/avi/avi-muxer.d.ts +45 -0
  16. package/dist/modules/src/avi/avi-muxer.d.ts.map +1 -0
  17. package/dist/modules/src/avi/riff-writer.d.ts +26 -0
  18. package/dist/modules/src/avi/riff-writer.d.ts.map +1 -0
  19. package/dist/modules/src/codec-data.d.ts +8 -3
  20. package/dist/modules/src/codec-data.d.ts.map +1 -1
  21. package/dist/modules/src/codec.d.ts +10 -10
  22. package/dist/modules/src/codec.d.ts.map +1 -1
  23. package/dist/modules/src/conversion.d.ts +33 -16
  24. package/dist/modules/src/conversion.d.ts.map +1 -1
  25. package/dist/modules/src/custom-coder.d.ts +8 -8
  26. package/dist/modules/src/custom-coder.d.ts.map +1 -1
  27. package/dist/modules/src/demuxer.d.ts +3 -3
  28. package/dist/modules/src/demuxer.d.ts.map +1 -1
  29. package/dist/modules/src/encode.d.ts +8 -8
  30. package/dist/modules/src/encode.d.ts.map +1 -1
  31. package/dist/modules/src/flac/flac-demuxer.d.ts +7 -7
  32. package/dist/modules/src/flac/flac-demuxer.d.ts.map +1 -1
  33. package/dist/modules/src/flac/flac-misc.d.ts +3 -3
  34. package/dist/modules/src/flac/flac-misc.d.ts.map +1 -1
  35. package/dist/modules/src/flac/flac-muxer.d.ts +5 -5
  36. package/dist/modules/src/flac/flac-muxer.d.ts.map +1 -1
  37. package/dist/modules/src/id3.d.ts +3 -3
  38. package/dist/modules/src/id3.d.ts.map +1 -1
  39. package/dist/modules/src/index.d.ts +20 -20
  40. package/dist/modules/src/index.d.ts.map +1 -1
  41. package/dist/modules/src/input-format.d.ts +22 -0
  42. package/dist/modules/src/input-format.d.ts.map +1 -1
  43. package/dist/modules/src/input-track.d.ts +8 -8
  44. package/dist/modules/src/input-track.d.ts.map +1 -1
  45. package/dist/modules/src/input.d.ts +12 -12
  46. package/dist/modules/src/isobmff/isobmff-boxes.d.ts +2 -2
  47. package/dist/modules/src/isobmff/isobmff-boxes.d.ts.map +1 -1
  48. package/dist/modules/src/isobmff/isobmff-demuxer.d.ts +12 -12
  49. package/dist/modules/src/isobmff/isobmff-demuxer.d.ts.map +1 -1
  50. package/dist/modules/src/isobmff/isobmff-misc.d.ts.map +1 -1
  51. package/dist/modules/src/isobmff/isobmff-muxer.d.ts +11 -11
  52. package/dist/modules/src/isobmff/isobmff-muxer.d.ts.map +1 -1
  53. package/dist/modules/src/isobmff/isobmff-reader.d.ts +2 -2
  54. package/dist/modules/src/isobmff/isobmff-reader.d.ts.map +1 -1
  55. package/dist/modules/src/matroska/ebml.d.ts +3 -3
  56. package/dist/modules/src/matroska/ebml.d.ts.map +1 -1
  57. package/dist/modules/src/matroska/matroska-demuxer.d.ts +13 -13
  58. package/dist/modules/src/matroska/matroska-demuxer.d.ts.map +1 -1
  59. package/dist/modules/src/matroska/matroska-input.d.ts +33 -0
  60. package/dist/modules/src/matroska/matroska-input.d.ts.map +1 -0
  61. package/dist/modules/src/matroska/matroska-misc.d.ts.map +1 -1
  62. package/dist/modules/src/matroska/matroska-muxer.d.ts +5 -5
  63. package/dist/modules/src/matroska/matroska-muxer.d.ts.map +1 -1
  64. package/dist/modules/src/media-sink.d.ts +5 -5
  65. package/dist/modules/src/media-sink.d.ts.map +1 -1
  66. package/dist/modules/src/media-source.d.ts +22 -4
  67. package/dist/modules/src/media-source.d.ts.map +1 -1
  68. package/dist/modules/src/metadata.d.ts +2 -2
  69. package/dist/modules/src/metadata.d.ts.map +1 -1
  70. package/dist/modules/src/misc.d.ts +5 -4
  71. package/dist/modules/src/misc.d.ts.map +1 -1
  72. package/dist/modules/src/mp3/mp3-demuxer.d.ts +7 -7
  73. package/dist/modules/src/mp3/mp3-demuxer.d.ts.map +1 -1
  74. package/dist/modules/src/mp3/mp3-muxer.d.ts +4 -4
  75. package/dist/modules/src/mp3/mp3-muxer.d.ts.map +1 -1
  76. package/dist/modules/src/mp3/mp3-reader.d.ts +2 -2
  77. package/dist/modules/src/mp3/mp3-reader.d.ts.map +1 -1
  78. package/dist/modules/src/mp3/mp3-writer.d.ts +1 -1
  79. package/dist/modules/src/mp3/mp3-writer.d.ts.map +1 -1
  80. package/dist/modules/src/muxer.d.ts +4 -4
  81. package/dist/modules/src/muxer.d.ts.map +1 -1
  82. package/dist/modules/src/ogg/ogg-demuxer.d.ts +7 -7
  83. package/dist/modules/src/ogg/ogg-demuxer.d.ts.map +1 -1
  84. package/dist/modules/src/ogg/ogg-misc.d.ts +1 -1
  85. package/dist/modules/src/ogg/ogg-misc.d.ts.map +1 -1
  86. package/dist/modules/src/ogg/ogg-muxer.d.ts +5 -5
  87. package/dist/modules/src/ogg/ogg-muxer.d.ts.map +1 -1
  88. package/dist/modules/src/ogg/ogg-reader.d.ts +1 -1
  89. package/dist/modules/src/ogg/ogg-reader.d.ts.map +1 -1
  90. package/dist/modules/src/output-format.d.ts +51 -6
  91. package/dist/modules/src/output-format.d.ts.map +1 -1
  92. package/dist/modules/src/output.d.ts +13 -13
  93. package/dist/modules/src/output.d.ts.map +1 -1
  94. package/dist/modules/src/packet.d.ts +1 -1
  95. package/dist/modules/src/packet.d.ts.map +1 -1
  96. package/dist/modules/src/pcm.d.ts.map +1 -1
  97. package/dist/modules/src/reader.d.ts +2 -2
  98. package/dist/modules/src/reader.d.ts.map +1 -1
  99. package/dist/modules/src/sample.d.ts +57 -15
  100. package/dist/modules/src/sample.d.ts.map +1 -1
  101. package/dist/modules/src/source.d.ts +3 -3
  102. package/dist/modules/src/source.d.ts.map +1 -1
  103. package/dist/modules/src/subtitles.d.ts +1 -1
  104. package/dist/modules/src/subtitles.d.ts.map +1 -1
  105. package/dist/modules/src/target.d.ts +2 -2
  106. package/dist/modules/src/target.d.ts.map +1 -1
  107. package/dist/modules/src/tsconfig.tsbuildinfo +1 -1
  108. package/dist/modules/src/wave/riff-writer.d.ts +1 -1
  109. package/dist/modules/src/wave/riff-writer.d.ts.map +1 -1
  110. package/dist/modules/src/wave/wave-demuxer.d.ts +6 -6
  111. package/dist/modules/src/wave/wave-demuxer.d.ts.map +1 -1
  112. package/dist/modules/src/wave/wave-muxer.d.ts +4 -4
  113. package/dist/modules/src/wave/wave-muxer.d.ts.map +1 -1
  114. package/dist/modules/src/writer.d.ts +1 -1
  115. package/dist/modules/src/writer.d.ts.map +1 -1
  116. package/dist/packages/eac3/eac3.wasm +0 -0
  117. package/dist/packages/eac3/mediabunny-eac3.js +1058 -0
  118. package/dist/packages/eac3/mediabunny-eac3.min.js +44 -0
  119. package/dist/packages/mp3-encoder/mediabunny-mp3-encoder.js +694 -0
  120. package/dist/packages/mp3-encoder/mediabunny-mp3-encoder.min.js +58 -0
  121. package/dist/packages/mpeg4/mediabunny-mpeg4.js +1198 -0
  122. package/dist/packages/mpeg4/mediabunny-mpeg4.min.js +44 -0
  123. package/dist/packages/mpeg4/xvid.wasm +0 -0
  124. package/package.json +18 -57
  125. package/dist/bundles/mediabunny.cjs +0 -26140
  126. package/dist/bundles/mediabunny.min.cjs +0 -147
  127. package/dist/bundles/mediabunny.min.mjs +0 -146
  128. package/dist/mediabunny.d.ts +0 -3319
  129. package/dist/modules/shared/mp3-misc.js +0 -147
  130. package/dist/modules/src/adts/adts-demuxer.js +0 -239
  131. package/dist/modules/src/adts/adts-muxer.js +0 -80
  132. package/dist/modules/src/adts/adts-reader.js +0 -63
  133. package/dist/modules/src/codec-data.js +0 -1730
  134. package/dist/modules/src/codec.js +0 -869
  135. package/dist/modules/src/conversion.js +0 -1459
  136. package/dist/modules/src/custom-coder.js +0 -117
  137. package/dist/modules/src/demuxer.js +0 -12
  138. package/dist/modules/src/encode.js +0 -442
  139. package/dist/modules/src/flac/flac-demuxer.js +0 -504
  140. package/dist/modules/src/flac/flac-misc.js +0 -135
  141. package/dist/modules/src/flac/flac-muxer.js +0 -222
  142. package/dist/modules/src/id3.js +0 -848
  143. package/dist/modules/src/index.js +0 -28
  144. package/dist/modules/src/input-format.js +0 -480
  145. package/dist/modules/src/input-track.js +0 -372
  146. package/dist/modules/src/input.js +0 -188
  147. package/dist/modules/src/isobmff/isobmff-boxes.js +0 -1480
  148. package/dist/modules/src/isobmff/isobmff-demuxer.js +0 -2618
  149. package/dist/modules/src/isobmff/isobmff-misc.js +0 -20
  150. package/dist/modules/src/isobmff/isobmff-muxer.js +0 -966
  151. package/dist/modules/src/isobmff/isobmff-reader.js +0 -72
  152. package/dist/modules/src/matroska/ebml.js +0 -653
  153. package/dist/modules/src/matroska/matroska-demuxer.js +0 -2133
  154. package/dist/modules/src/matroska/matroska-misc.js +0 -20
  155. package/dist/modules/src/matroska/matroska-muxer.js +0 -1017
  156. package/dist/modules/src/media-sink.js +0 -1736
  157. package/dist/modules/src/media-source.js +0 -1825
  158. package/dist/modules/src/metadata.js +0 -193
  159. package/dist/modules/src/misc.js +0 -623
  160. package/dist/modules/src/mp3/mp3-demuxer.js +0 -285
  161. package/dist/modules/src/mp3/mp3-muxer.js +0 -123
  162. package/dist/modules/src/mp3/mp3-reader.js +0 -26
  163. package/dist/modules/src/mp3/mp3-writer.js +0 -78
  164. package/dist/modules/src/muxer.js +0 -50
  165. package/dist/modules/src/node.d.ts +0 -9
  166. package/dist/modules/src/node.d.ts.map +0 -1
  167. package/dist/modules/src/node.js +0 -9
  168. package/dist/modules/src/ogg/ogg-demuxer.js +0 -763
  169. package/dist/modules/src/ogg/ogg-misc.js +0 -78
  170. package/dist/modules/src/ogg/ogg-muxer.js +0 -353
  171. package/dist/modules/src/ogg/ogg-reader.js +0 -65
  172. package/dist/modules/src/output-format.js +0 -527
  173. package/dist/modules/src/output.js +0 -300
  174. package/dist/modules/src/packet.js +0 -182
  175. package/dist/modules/src/pcm.js +0 -85
  176. package/dist/modules/src/reader.js +0 -236
  177. package/dist/modules/src/sample.js +0 -1056
  178. package/dist/modules/src/source.js +0 -1182
  179. package/dist/modules/src/subtitles.js +0 -575
  180. package/dist/modules/src/target.js +0 -140
  181. package/dist/modules/src/wave/riff-writer.js +0 -30
  182. package/dist/modules/src/wave/wave-demuxer.js +0 -447
  183. package/dist/modules/src/wave/wave-muxer.js +0 -318
  184. package/dist/modules/src/writer.js +0 -370
  185. package/src/adts/adts-demuxer.ts +0 -331
  186. package/src/adts/adts-muxer.ts +0 -111
  187. package/src/adts/adts-reader.ts +0 -85
  188. package/src/codec-data.ts +0 -2078
  189. package/src/codec.ts +0 -1092
  190. package/src/conversion.ts +0 -2112
  191. package/src/custom-coder.ts +0 -197
  192. package/src/demuxer.ts +0 -24
  193. package/src/encode.ts +0 -739
  194. package/src/flac/flac-demuxer.ts +0 -730
  195. package/src/flac/flac-misc.ts +0 -164
  196. package/src/flac/flac-muxer.ts +0 -320
  197. package/src/id3.ts +0 -925
  198. package/src/index.ts +0 -221
  199. package/src/input-format.ts +0 -541
  200. package/src/input-track.ts +0 -529
  201. package/src/input.ts +0 -235
  202. package/src/isobmff/isobmff-boxes.ts +0 -1719
  203. package/src/isobmff/isobmff-demuxer.ts +0 -3190
  204. package/src/isobmff/isobmff-misc.ts +0 -29
  205. package/src/isobmff/isobmff-muxer.ts +0 -1348
  206. package/src/isobmff/isobmff-reader.ts +0 -91
  207. package/src/matroska/ebml.ts +0 -730
  208. package/src/matroska/matroska-demuxer.ts +0 -2481
  209. package/src/matroska/matroska-misc.ts +0 -29
  210. package/src/matroska/matroska-muxer.ts +0 -1276
  211. package/src/media-sink.ts +0 -2179
  212. package/src/media-source.ts +0 -2243
  213. package/src/metadata.ts +0 -320
  214. package/src/misc.ts +0 -798
  215. package/src/mp3/mp3-demuxer.ts +0 -383
  216. package/src/mp3/mp3-muxer.ts +0 -166
  217. package/src/mp3/mp3-reader.ts +0 -34
  218. package/src/mp3/mp3-writer.ts +0 -120
  219. package/src/muxer.ts +0 -88
  220. package/src/node.ts +0 -11
  221. package/src/ogg/ogg-demuxer.ts +0 -1053
  222. package/src/ogg/ogg-misc.ts +0 -116
  223. package/src/ogg/ogg-muxer.ts +0 -497
  224. package/src/ogg/ogg-reader.ts +0 -93
  225. package/src/output-format.ts +0 -945
  226. package/src/output.ts +0 -488
  227. package/src/packet.ts +0 -263
  228. package/src/pcm.ts +0 -112
  229. package/src/reader.ts +0 -323
  230. package/src/sample.ts +0 -1461
  231. package/src/source.ts +0 -1688
  232. package/src/subtitles.ts +0 -711
  233. package/src/target.ts +0 -204
  234. package/src/tsconfig.json +0 -16
  235. package/src/wave/riff-writer.ts +0 -36
  236. package/src/wave/wave-demuxer.ts +0 -529
  237. package/src/wave/wave-muxer.ts +0 -371
  238. package/src/writer.ts +0 -490
package/src/conversion.ts DELETED
@@ -1,2112 +0,0 @@
1
- /*!
2
- * Copyright (c) 2025-present, Vanilagy and contributors
3
- *
4
- * This Source Code Form is subject to the terms of the Mozilla Public
5
- * License, v. 2.0. If a copy of the MPL was not distributed with this
6
- * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
- */
8
-
9
- import {
10
- AUDIO_CODECS,
11
- AudioCodec,
12
- NON_PCM_AUDIO_CODECS,
13
- SUBTITLE_CODECS,
14
- SubtitleCodec,
15
- VIDEO_CODECS,
16
- VideoCodec,
17
- } from './codec';
18
- import {
19
- getEncodableAudioCodecs,
20
- getFirstEncodableVideoCodec,
21
- Quality,
22
- QUALITY_HIGH,
23
- VideoEncodingConfig,
24
- } from './encode';
25
- import { Input } from './input';
26
- import { InputAudioTrack, InputSubtitleTrack, InputTrack, InputVideoTrack } from './input-track';
27
- import {
28
- AudioSampleSink,
29
- CanvasSink,
30
- EncodedPacketSink,
31
- VideoSampleSink,
32
- } from './media-sink';
33
- import {
34
- AudioSource,
35
- EncodedVideoPacketSource,
36
- EncodedAudioPacketSource,
37
- SubtitleSource,
38
- TextSubtitleSource,
39
- VideoSource,
40
- VideoSampleSource,
41
- AudioSampleSource,
42
- } from './media-source';
43
- import {
44
- assert,
45
- clamp,
46
- isIso639Dash2LanguageCode,
47
- MaybePromise,
48
- normalizeRotation,
49
- promiseWithResolvers,
50
- Rotation,
51
- } from './misc';
52
- import { Output, SubtitleTrackMetadata, TrackType } from './output';
53
- import { Mp4OutputFormat } from './output-format';
54
- import { AudioSample, clampCropRectangle, validateCropRectangle, VideoSample } from './sample';
55
- import { MetadataTags, validateMetadataTags } from './metadata';
56
- import { formatCuesToAss, formatCuesToSrt, formatCuesToWebVTT, SubtitleCue } from './subtitles';
57
- import { NullTarget } from './target';
58
-
59
- /**
60
- * The options for media file conversion.
61
- * @group Conversion
62
- * @public
63
- */
64
- export type ConversionOptions = {
65
- /** The input file. */
66
- input: Input;
67
- /** The output file. */
68
- output: Output;
69
-
70
- /**
71
- * Video-specific options. When passing an object, the same options are applied to all video tracks. When passing a
72
- * function, it will be invoked for each video track and is expected to return or resolve to the options
73
- * for that specific track. The function is passed an instance of {@link InputVideoTrack} as well as a number `n`,
74
- * which is the 1-based index of the track in the list of all video tracks.
75
- */
76
- video?: ConversionVideoOptions
77
- | ((track: InputVideoTrack, n: number) => MaybePromise<ConversionVideoOptions | undefined>);
78
-
79
- /**
80
- * Audio-specific options. When passing an object, the same options are applied to all audio tracks. When passing a
81
- * function, it will be invoked for each audio track and is expected to return or resolve to the options
82
- * for that specific track. The function is passed an instance of {@link InputAudioTrack} as well as a number `n`,
83
- * which is the 1-based index of the track in the list of all audio tracks.
84
- */
85
- audio?: ConversionAudioOptions
86
- | ((track: InputAudioTrack, n: number) => MaybePromise<ConversionAudioOptions | undefined>);
87
-
88
- /**
89
- * Subtitle-specific options. When passing an object, the same options are applied to all subtitle tracks. When passing a
90
- * function, it will be invoked for each subtitle track and is expected to return or resolve to the options
91
- * for that specific track. The function is passed an instance of {@link InputSubtitleTrack} as well as a number `n`,
92
- * which is the 1-based index of the track in the list of all subtitle tracks.
93
- */
94
- subtitle?: ConversionSubtitleOptions
95
- | ((track: InputSubtitleTrack, n: number) => MaybePromise<ConversionSubtitleOptions | undefined>);
96
-
97
- /** Options to trim the input file. */
98
- trim?: {
99
- /** The time in the input file in seconds at which the output file should start. Must be less than `end`. */
100
- start: number;
101
- /** The time in the input file in seconds at which the output file should end. Must be greater than `start`. */
102
- end: number;
103
- };
104
-
105
- /**
106
- * An object or a callback that returns or resolves to an object containing the descriptive metadata tags that
107
- * should be written to the output file. If a function is passed, it will be passed the tags of the input file as
108
- * its first argument, allowing you to modify, augment or extend them.
109
- *
110
- * If no function is set, the input's metadata tags will be copied to the output.
111
- */
112
- tags?: MetadataTags | ((inputTags: MetadataTags) => MaybePromise<MetadataTags>);
113
-
114
- /**
115
- * Whether to show potential console warnings about discarded tracks after calling `Conversion.init()`, defaults to
116
- * `true`. Set this to `false` if you're properly handling the `discardedTracks` and `isValid` fields already and
117
- * want to keep the console output clean.
118
- */
119
- showWarnings?: boolean;
120
- };
121
-
122
- /**
123
- * Video-specific options.
124
- * @group Conversion
125
- * @public
126
- */
127
- export type ConversionVideoOptions = {
128
- /** If `true`, all video tracks will be discarded and will not be present in the output. */
129
- discard?: boolean;
130
- /**
131
- * The desired width of the output video in pixels, defaulting to the video's natural display width. If height
132
- * is not set, it will be deduced automatically based on aspect ratio.
133
- */
134
- width?: number;
135
- /**
136
- * The desired height of the output video in pixels, defaulting to the video's natural display height. If width
137
- * is not set, it will be deduced automatically based on aspect ratio.
138
- */
139
- height?: number;
140
- /**
141
- * The fitting algorithm in case both width and height are set, or if the input video changes its size over time.
142
- *
143
- * - `'fill'` will stretch the image to fill the entire box, potentially altering aspect ratio.
144
- * - `'contain'` will contain the entire image within the box while preserving aspect ratio. This may lead to
145
- * letterboxing.
146
- * - `'cover'` will scale the image until the entire box is filled, while preserving aspect ratio.
147
- */
148
- fit?: 'fill' | 'contain' | 'cover';
149
- /**
150
- * The angle in degrees to rotate the input video by, clockwise. Rotation is applied before cropping and resizing.
151
- * This rotation is _in addition to_ the natural rotation of the input video as specified in input file's metadata.
152
- */
153
- rotate?: Rotation;
154
- /**
155
- * Specifies the rectangular region of the input video to crop to. The crop region will automatically be clamped to
156
- * the dimensions of the input video track. Cropping is performed after rotation but before resizing.
157
- */
158
- crop?: {
159
- /** The distance in pixels from the left edge of the source frame to the left edge of the crop rectangle. */
160
- left: number;
161
- /** The distance in pixels from the top edge of the source frame to the top edge of the crop rectangle. */
162
- top: number;
163
- /** The width in pixels of the crop rectangle. */
164
- width: number;
165
- /** The height in pixels of the crop rectangle. */
166
- height: number;
167
- };
168
- /**
169
- * The desired frame rate of the output video, in hertz. If not specified, the original input frame rate will
170
- * be used (which may be variable).
171
- */
172
- frameRate?: number;
173
- /** The desired output video codec. */
174
- codec?: VideoCodec;
175
- /** The desired bitrate of the output video. */
176
- bitrate?: number | Quality;
177
- /**
178
- * Whether to discard or keep the transparency information of the input video. The default is `'discard'`. Note that
179
- * for `'keep'` to produce a transparent video, you must use an output config that supports it, such as WebM with
180
- * VP9.
181
- */
182
- alpha?: 'discard' | 'keep';
183
- /**
184
- * The interval, in seconds, of how often frames are encoded as a key frame. The default is 5 seconds. Frequent key
185
- * frames improve seeking behavior but increase file size. When using multiple video tracks, you should give them
186
- * all the same key frame interval.
187
- *
188
- * Setting this fields forces a transcode.
189
- */
190
- keyFrameInterval?: number;
191
- /** When `true`, video will always be re-encoded instead of directly copying over the encoded samples. */
192
- forceTranscode?: boolean;
193
- /**
194
- * Allows for custom user-defined processing of video frames, e.g. for applying overlays, color transformations, or
195
- * timestamp modifications. Will be called for each input video sample after transformations and frame rate
196
- * corrections.
197
- *
198
- * Must return a {@link VideoSample} or a `CanvasImageSource`, an array of them, or `null` for dropping the frame.
199
- * When non-timestamped data is returned, the timestamp and duration from the source sample will be used. Rotation
200
- * metadata of the returned sample will be ignored.
201
- *
202
- * This function can also be used to manually resize frames. When doing so, you should signal the post-process
203
- * dimensions using the `processedWidth` and `processedHeight` fields, which enables the encoder to better know what
204
- * to expect. If these fields aren't set, Mediabunny will assume you won't perform any resizing.
205
- */
206
- process?: (sample: VideoSample) => MaybePromise<
207
- CanvasImageSource | VideoSample | (CanvasImageSource | VideoSample)[] | null
208
- >;
209
- /**
210
- * An optional hint specifying the width of video samples returned by the `process` function, for better
211
- * encoder configuration.
212
- */
213
- processedWidth?: number;
214
- /**
215
- * An optional hint specifying the height of video samples returned by the `process` function, for better
216
- * encoder configuration.
217
- */
218
- processedHeight?: number;
219
- };
220
-
221
- /**
222
- * Audio-specific options.
223
- * @group Conversion
224
- * @public
225
- */
226
- export type ConversionAudioOptions = {
227
- /** If `true`, all audio tracks will be discarded and will not be present in the output. */
228
- discard?: boolean;
229
- /** The desired channel count of the output audio. */
230
- numberOfChannels?: number;
231
- /** The desired sample rate of the output audio, in hertz. */
232
- sampleRate?: number;
233
- /** The desired output audio codec. */
234
- codec?: AudioCodec;
235
- /** The desired bitrate of the output audio. */
236
- bitrate?: number | Quality;
237
- /** When `true`, audio will always be re-encoded instead of directly copying over the encoded samples. */
238
- forceTranscode?: boolean;
239
- /**
240
- * Allows for custom user-defined processing of audio samples, e.g. for applying audio effects, transformations, or
241
- * timestamp modifications. Will be called for each input audio sample after remixing and resampling.
242
- *
243
- * Must return an {@link AudioSample}, an array of them, or `null` for dropping the sample.
244
- *
245
- * This function can also be used to manually perform remixing or resampling. When doing so, you should signal the
246
- * post-process parameters using the `processedNumberOfChannels` and `processedSampleRate` fields, which enables the
247
- * encoder to better know what to expect. If these fields aren't set, Mediabunny will assume you won't perform
248
- * remixing or resampling.
249
- */
250
- process?: (sample: AudioSample) => MaybePromise<
251
- AudioSample | AudioSample[] | null
252
- >;
253
- /**
254
- * An optional hint specifying the channel count of audio samples returned by the `process` function, for better
255
- * encoder configuration.
256
- */
257
- processedNumberOfChannels?: number;
258
- /**
259
- * An optional hint specifying the sample rate of audio samples returned by the `process` function, for better
260
- * encoder configuration.
261
- */
262
- processedSampleRate?: number;
263
- };
264
-
265
- /**
266
- * Subtitle-specific options.
267
- * @group Conversion
268
- * @public
269
- */
270
- export type ConversionSubtitleOptions = {
271
- /** If `true`, all subtitle tracks will be discarded and will not be present in the output. */
272
- discard?: boolean;
273
- /** The desired output subtitle codec. */
274
- codec?: SubtitleCodec;
275
- };
276
-
277
- const validateVideoOptions = (videoOptions: ConversionVideoOptions | undefined) => {
278
- if (videoOptions !== undefined && (!videoOptions || typeof videoOptions !== 'object')) {
279
- throw new TypeError('options.video, when provided, must be an object.');
280
- }
281
- if (videoOptions?.discard !== undefined && typeof videoOptions.discard !== 'boolean') {
282
- throw new TypeError('options.video.discard, when provided, must be a boolean.');
283
- }
284
- if (videoOptions?.forceTranscode !== undefined && typeof videoOptions.forceTranscode !== 'boolean') {
285
- throw new TypeError('options.video.forceTranscode, when provided, must be a boolean.');
286
- }
287
- if (videoOptions?.codec !== undefined && !VIDEO_CODECS.includes(videoOptions.codec)) {
288
- throw new TypeError(
289
- `options.video.codec, when provided, must be one of: ${VIDEO_CODECS.join(', ')}.`,
290
- );
291
- }
292
- if (
293
- videoOptions?.bitrate !== undefined
294
- && !(videoOptions.bitrate instanceof Quality)
295
- && (!Number.isInteger(videoOptions.bitrate) || videoOptions.bitrate <= 0)
296
- ) {
297
- throw new TypeError('options.video.bitrate, when provided, must be a positive integer or a quality.');
298
- }
299
- if (
300
- videoOptions?.width !== undefined
301
- && (!Number.isInteger(videoOptions.width) || videoOptions.width <= 0)
302
- ) {
303
- throw new TypeError('options.video.width, when provided, must be a positive integer.');
304
- }
305
- if (
306
- videoOptions?.height !== undefined
307
- && (!Number.isInteger(videoOptions.height) || videoOptions.height <= 0)
308
- ) {
309
- throw new TypeError('options.video.height, when provided, must be a positive integer.');
310
- }
311
- if (videoOptions?.fit !== undefined && !['fill', 'contain', 'cover'].includes(videoOptions.fit)) {
312
- throw new TypeError('options.video.fit, when provided, must be one of \'fill\', \'contain\', or \'cover\'.');
313
- }
314
- if (
315
- videoOptions?.width !== undefined
316
- && videoOptions.height !== undefined
317
- && videoOptions.fit === undefined
318
- ) {
319
- throw new TypeError(
320
- 'When both options.video.width and options.video.height are provided, options.video.fit must also be'
321
- + ' provided.',
322
- );
323
- }
324
- if (videoOptions?.rotate !== undefined && ![0, 90, 180, 270].includes(videoOptions.rotate)) {
325
- throw new TypeError('options.video.rotate, when provided, must be 0, 90, 180 or 270.');
326
- }
327
- if (videoOptions?.crop !== undefined) {
328
- validateCropRectangle(videoOptions.crop, 'options.video.');
329
- }
330
- if (
331
- videoOptions?.frameRate !== undefined
332
- && (!Number.isFinite(videoOptions.frameRate) || videoOptions.frameRate <= 0)
333
- ) {
334
- throw new TypeError('options.video.frameRate, when provided, must be a finite positive number.');
335
- }
336
- if (videoOptions?.alpha !== undefined && !['discard', 'keep'].includes(videoOptions.alpha)) {
337
- throw new TypeError('options.video.alpha, when provided, must be either \'discard\' or \'keep\'.');
338
- }
339
- if (
340
- videoOptions?.keyFrameInterval !== undefined
341
- && (!Number.isFinite(videoOptions.keyFrameInterval) || videoOptions.keyFrameInterval < 0)
342
- ) {
343
- throw new TypeError('options.video.keyFrameInterval, when provided, must be a non-negative number.');
344
- }
345
- if (videoOptions?.process !== undefined && typeof videoOptions.process !== 'function') {
346
- throw new TypeError('options.video.process, when provided, must be a function.');
347
- }
348
- if (
349
- videoOptions?.processedWidth !== undefined
350
- && (!Number.isInteger(videoOptions.processedWidth) || videoOptions.processedWidth <= 0)
351
- ) {
352
- throw new TypeError('options.video.processedWidth, when provided, must be a positive integer.');
353
- }
354
- if (
355
- videoOptions?.processedHeight !== undefined
356
- && (!Number.isInteger(videoOptions.processedHeight) || videoOptions.processedHeight <= 0)
357
- ) {
358
- throw new TypeError('options.video.processedHeight, when provided, must be a positive integer.');
359
- }
360
- };
361
-
362
- const validateAudioOptions = (audioOptions: ConversionAudioOptions | undefined) => {
363
- if (audioOptions !== undefined && (!audioOptions || typeof audioOptions !== 'object')) {
364
- throw new TypeError('options.audio, when provided, must be an object.');
365
- }
366
- if (audioOptions?.discard !== undefined && typeof audioOptions.discard !== 'boolean') {
367
- throw new TypeError('options.audio.discard, when provided, must be a boolean.');
368
- }
369
- if (audioOptions?.forceTranscode !== undefined && typeof audioOptions.forceTranscode !== 'boolean') {
370
- throw new TypeError('options.audio.forceTranscode, when provided, must be a boolean.');
371
- }
372
- if (audioOptions?.codec !== undefined && !AUDIO_CODECS.includes(audioOptions.codec)) {
373
- throw new TypeError(
374
- `options.audio.codec, when provided, must be one of: ${AUDIO_CODECS.join(', ')}.`,
375
- );
376
- }
377
- if (
378
- audioOptions?.bitrate !== undefined
379
- && !(audioOptions.bitrate instanceof Quality)
380
- && (!Number.isInteger(audioOptions.bitrate) || audioOptions.bitrate <= 0)
381
- ) {
382
- throw new TypeError('options.audio.bitrate, when provided, must be a positive integer or a quality.');
383
- }
384
- if (
385
- audioOptions?.numberOfChannels !== undefined
386
- && (!Number.isInteger(audioOptions.numberOfChannels) || audioOptions.numberOfChannels <= 0)
387
- ) {
388
- throw new TypeError('options.audio.numberOfChannels, when provided, must be a positive integer.');
389
- }
390
- if (
391
- audioOptions?.sampleRate !== undefined
392
- && (!Number.isInteger(audioOptions.sampleRate) || audioOptions.sampleRate <= 0)
393
- ) {
394
- throw new TypeError('options.audio.sampleRate, when provided, must be a positive integer.');
395
- }
396
- if (audioOptions?.process !== undefined && typeof audioOptions.process !== 'function') {
397
- throw new TypeError('options.audio.process, when provided, must be a function.');
398
- }
399
- if (
400
- audioOptions?.processedNumberOfChannels !== undefined
401
- && (!Number.isInteger(audioOptions.processedNumberOfChannels) || audioOptions.processedNumberOfChannels <= 0)
402
- ) {
403
- throw new TypeError('options.audio.processedNumberOfChannels, when provided, must be a positive integer.');
404
- }
405
- if (
406
- audioOptions?.processedSampleRate !== undefined
407
- && (!Number.isInteger(audioOptions.processedSampleRate) || audioOptions.processedSampleRate <= 0)
408
- ) {
409
- throw new TypeError('options.audio.processedSampleRate, when provided, must be a positive integer.');
410
- }
411
- };
412
-
413
- const validateSubtitleOptions = (subtitleOptions: ConversionSubtitleOptions | undefined) => {
414
- if (subtitleOptions !== undefined && (!subtitleOptions || typeof subtitleOptions !== 'object')) {
415
- throw new TypeError('options.subtitle, when provided, must be an object.');
416
- }
417
- if (subtitleOptions?.discard !== undefined && typeof subtitleOptions.discard !== 'boolean') {
418
- throw new TypeError('options.subtitle.discard, when provided, must be a boolean.');
419
- }
420
- if (subtitleOptions?.codec !== undefined && !SUBTITLE_CODECS.includes(subtitleOptions.codec)) {
421
- throw new TypeError(
422
- `options.subtitle.codec, when provided, must be one of: ${SUBTITLE_CODECS.join(', ')}.`,
423
- );
424
- }
425
- };
426
-
427
- const FALLBACK_NUMBER_OF_CHANNELS = 2;
428
- const FALLBACK_SAMPLE_RATE = 48000;
429
-
430
- /**
431
- * An input track that was discarded (excluded) from a {@link Conversion} alongside the discard reason.
432
- * @group Conversion
433
- * @public
434
- */
435
- export type DiscardedTrack = {
436
- /** The track that was discarded. */
437
- track: InputTrack;
438
- /**
439
- * The reason for discarding the track.
440
- *
441
- * - `'discarded_by_user'`: You discarded this track by setting `discard: true`.
442
- * - `'max_track_count_reached'`: The output had no more room for another track.
443
- * - `'max_track_count_of_type_reached'`: The output had no more room for another track of this type, or the output
444
- * doesn't support this track type at all.
445
- * - `'unknown_source_codec'`: We don't know the codec of the input track and therefore don't know what to do
446
- * with it.
447
- * - `'undecodable_source_codec'`: The input track's codec is known, but we are unable to decode it.
448
- * - `'no_encodable_target_codec'`: We can't find a codec that we are able to encode and that can be contained
449
- * within the output format. This reason can be hit if the environment doesn't support the necessary encoders, or if
450
- * you requested a codec that cannot be contained within the output format.
451
- */
452
- reason:
453
- | 'discarded_by_user'
454
- | 'max_track_count_reached'
455
- | 'max_track_count_of_type_reached'
456
- | 'unknown_source_codec'
457
- | 'undecodable_source_codec'
458
- | 'no_encodable_target_codec';
459
- };
460
-
461
- /**
462
- * Represents a media file conversion process, used to convert one media file into another. In addition to conversion,
463
- * this class can be used to resize and rotate video, resample audio, drop tracks, or trim to a specific time range.
464
- * @group Conversion
465
- * @public
466
- */
467
- export class Conversion {
468
- /** The input file. */
469
- readonly input: Input;
470
- /** The output file. */
471
- readonly output: Output;
472
-
473
- /** @internal */
474
- _options: ConversionOptions;
475
- /** @internal */
476
- _startTimestamp: number;
477
- /** @internal */
478
- _endTimestamp: number;
479
-
480
- /** @internal */
481
- _addedCounts: Record<TrackType, number> = {
482
- video: 0,
483
- audio: 0,
484
- subtitle: 0,
485
- };
486
-
487
- /** @internal */
488
- _totalTrackCount = 0;
489
-
490
- /** @internal */
491
- _trackPromises: Promise<void>[] = [];
492
-
493
- /** @internal */
494
- _started: Promise<void>;
495
- /** @internal */
496
- _start: () => void;
497
- /** @internal */
498
- _executed = false;
499
-
500
- /** @internal */
501
- _synchronizer = new TrackSynchronizer();
502
-
503
- /** @internal */
504
- _totalDuration: number | null = null;
505
- /** @internal */
506
- _maxTimestamps = new Map<number, number>(); // Track ID -> timestamp
507
-
508
- /** @internal */
509
- _canceled = false;
510
-
511
- /** @internal */
512
- _externalSubtitleSources: Array<{
513
- source: SubtitleSource;
514
- metadata: SubtitleTrackMetadata;
515
- contentProvider?: () => Promise<void>;
516
- }> = [];
517
-
518
- /**
519
- * A callback that is fired whenever the conversion progresses. Returns a number between 0 and 1, indicating the
520
- * completion of the conversion. Note that a progress of 1 doesn't necessarily mean the conversion is complete;
521
- * the conversion is complete once `execute()` resolves.
522
- *
523
- * In order for progress to be computed, this property must be set before `execute` is called.
524
- */
525
- onProgress?: (progress: number) => unknown = undefined;
526
- /** @internal */
527
- _computeProgress = false;
528
- /** @internal */
529
- _lastProgress = 0;
530
-
531
- /**
532
- * Whether this conversion, as it has been configured, is valid and can be executed. If this field is `false`, check
533
- * the `discardedTracks` field for reasons.
534
- */
535
- isValid = false;
536
- /** The list of tracks that are included in the output file. */
537
- readonly utilizedTracks: InputTrack[] = [];
538
- /** The list of tracks from the input file that have been discarded, alongside the discard reason. */
539
- readonly discardedTracks: DiscardedTrack[] = [];
540
-
541
- /** Initializes a new conversion process without starting the conversion. */
542
- static async init(options: ConversionOptions) {
543
- const conversion = new Conversion(options);
544
- await conversion._init();
545
-
546
- return conversion;
547
- }
548
-
549
- /** Creates a new Conversion instance (duh). */
550
- private constructor(options: ConversionOptions) {
551
- if (!options || typeof options !== 'object') {
552
- throw new TypeError('options must be an object.');
553
- }
554
- if (!(options.input instanceof Input)) {
555
- throw new TypeError('options.input must be an Input.');
556
- }
557
- if (!(options.output instanceof Output)) {
558
- throw new TypeError('options.output must be an Output.');
559
- }
560
- if (
561
- options.output._tracks.length > 0
562
- || Object.keys(options.output._metadataTags).length > 0
563
- || options.output.state !== 'pending'
564
- ) {
565
- throw new TypeError('options.output must be fresh: no tracks or metadata tags added and not started.');
566
- }
567
-
568
- if (typeof options.video !== 'function') {
569
- validateVideoOptions(options.video);
570
- }
571
-
572
- if (typeof options.audio !== 'function') {
573
- validateAudioOptions(options.audio);
574
- }
575
-
576
- if (typeof options.subtitle !== 'function') {
577
- validateSubtitleOptions(options.subtitle);
578
- }
579
-
580
- if (options.trim !== undefined && (!options.trim || typeof options.trim !== 'object')) {
581
- throw new TypeError('options.trim, when provided, must be an object.');
582
- }
583
- if (options.trim?.start !== undefined && (!Number.isFinite(options.trim.start) || options.trim.start < 0)) {
584
- throw new TypeError('options.trim.start, when provided, must be a non-negative number.');
585
- }
586
- if (options.trim?.end !== undefined && (!Number.isFinite(options.trim.end) || options.trim.end < 0)) {
587
- throw new TypeError('options.trim.end, when provided, must be a non-negative number.');
588
- }
589
- if (
590
- options.trim?.start !== undefined
591
- && options.trim.end !== undefined
592
- && options.trim.start >= options.trim.end) {
593
- throw new TypeError('options.trim.start must be less than options.trim.end.');
594
- }
595
- if (
596
- options.tags !== undefined
597
- && (typeof options.tags !== 'object' || !options.tags)
598
- && typeof options.tags !== 'function'
599
- ) {
600
- throw new TypeError('options.tags, when provided, must be an object or a function.');
601
- }
602
- if (typeof options.tags === 'object') {
603
- validateMetadataTags(options.tags);
604
- }
605
- if (options.showWarnings !== undefined && typeof options.showWarnings !== 'boolean') {
606
- throw new TypeError('options.showWarnings, when provided, must be a boolean.');
607
- }
608
-
609
- this._options = options;
610
- this.input = options.input;
611
- this.output = options.output;
612
-
613
- this._startTimestamp = options.trim?.start ?? 0;
614
- this._endTimestamp = options.trim?.end ?? Infinity;
615
-
616
- const { promise: started, resolve: start } = promiseWithResolvers();
617
- this._started = started;
618
- this._start = start;
619
- }
620
-
621
- /** @internal */
622
- async _init() {
623
- const inputTracks = await this.input.getTracks();
624
- const outputTrackCounts = this.output.format.getSupportedTrackCounts();
625
-
626
- let nVideo = 1;
627
- let nAudio = 1;
628
- let nSubtitle = 1;
629
-
630
- for (const track of inputTracks) {
631
- let trackOptions: ConversionVideoOptions | ConversionAudioOptions | ConversionSubtitleOptions | undefined = undefined;
632
- if (track.isVideoTrack()) {
633
- if (this._options.video) {
634
- if (typeof this._options.video === 'function') {
635
- trackOptions = await this._options.video(track, nVideo);
636
- validateVideoOptions(trackOptions);
637
- nVideo++;
638
- } else {
639
- trackOptions = this._options.video;
640
- }
641
- }
642
- } else if (track.isAudioTrack()) {
643
- if (this._options.audio) {
644
- if (typeof this._options.audio === 'function') {
645
- trackOptions = await this._options.audio(track, nAudio);
646
- validateAudioOptions(trackOptions);
647
- nAudio++;
648
- } else {
649
- trackOptions = this._options.audio;
650
- }
651
- }
652
- } else if (track.isSubtitleTrack()) {
653
- if (this._options.subtitle) {
654
- if (typeof this._options.subtitle === 'function') {
655
- trackOptions = await this._options.subtitle(track, nSubtitle);
656
- validateSubtitleOptions(trackOptions);
657
- nSubtitle++;
658
- } else {
659
- trackOptions = this._options.subtitle;
660
- }
661
- }
662
- } else {
663
- assert(false);
664
- }
665
-
666
- if (trackOptions?.discard) {
667
- this.discardedTracks.push({
668
- track,
669
- reason: 'discarded_by_user',
670
- });
671
- continue;
672
- }
673
-
674
- if (this._totalTrackCount === outputTrackCounts.total.max) {
675
- this.discardedTracks.push({
676
- track,
677
- reason: 'max_track_count_reached',
678
- });
679
- continue;
680
- }
681
-
682
- if (this._addedCounts[track.type] === outputTrackCounts[track.type].max) {
683
- this.discardedTracks.push({
684
- track,
685
- reason: 'max_track_count_of_type_reached',
686
- });
687
- continue;
688
- }
689
-
690
- if (track.isVideoTrack()) {
691
- await this._processVideoTrack(track, (trackOptions ?? {}) as ConversionVideoOptions);
692
- } else if (track.isAudioTrack()) {
693
- await this._processAudioTrack(track, (trackOptions ?? {}) as ConversionAudioOptions);
694
- } else if (track.isSubtitleTrack()) {
695
- await this._processSubtitleTrack(track, (trackOptions ?? {}) as ConversionSubtitleOptions);
696
- }
697
- }
698
-
699
- // Now, let's deal with metadata tags
700
-
701
- const inputTags = await this.input.getMetadataTags();
702
- let outputTags: MetadataTags;
703
-
704
- if (this._options.tags) {
705
- const result = typeof this._options.tags === 'function'
706
- ? await this._options.tags(inputTags)
707
- : this._options.tags;
708
- validateMetadataTags(result);
709
-
710
- outputTags = result;
711
- } else {
712
- outputTags = inputTags;
713
- }
714
-
715
- // Somewhat dirty but pragmatic
716
- const inputAndOutputFormatMatch = (await this.input.getFormat()).mimeType === this.output.format.mimeType;
717
- const rawTagsAreUnchanged = inputTags.raw === outputTags.raw;
718
-
719
- if (inputTags.raw && rawTagsAreUnchanged && !inputAndOutputFormatMatch) {
720
- // If the input and output formats aren't the same, copying over raw metadata tags makes no sense and only
721
- // results in junk tags, so let's cut them out.
722
- delete outputTags.raw;
723
- }
724
-
725
- this.output.setMetadataTags(outputTags);
726
-
727
- // Let's check if the conversion can actually be executed
728
- this.isValid = this._totalTrackCount >= outputTrackCounts.total.min
729
- && this._addedCounts.video >= outputTrackCounts.video.min
730
- && this._addedCounts.audio >= outputTrackCounts.audio.min
731
- && this._addedCounts.subtitle >= outputTrackCounts.subtitle.min;
732
-
733
- if (this._options.showWarnings ?? true) {
734
- const warnElements: unknown[] = [];
735
-
736
- const unintentionallyDiscardedTracks = this.discardedTracks.filter(x => x.reason !== 'discarded_by_user');
737
- if (unintentionallyDiscardedTracks.length > 0) {
738
- // Let's give the user a notice/warning about discarded tracks so they aren't confused
739
- warnElements.push(
740
- 'Some tracks had to be discarded from the conversion:', unintentionallyDiscardedTracks,
741
- );
742
- }
743
-
744
- if (!this.isValid) {
745
- warnElements.push('\n\n' + this._getInvalidityExplanation().join(''));
746
- }
747
-
748
- if (warnElements.length > 0) {
749
- console.warn(...warnElements);
750
- }
751
- }
752
- }
753
-
754
- /** @internal */
755
- _getInvalidityExplanation() {
756
- const elements: string[] = [];
757
-
758
- if (this.discardedTracks.length === 0) {
759
- elements.push(
760
- 'Due to missing tracks, this conversion cannot be executed.',
761
- );
762
- } else {
763
- const encodabilityIsTheProblem = this.discardedTracks.every(x =>
764
- x.reason === 'discarded_by_user' || x.reason === 'no_encodable_target_codec',
765
- );
766
-
767
- elements.push(
768
- 'Due to discarded tracks, this conversion cannot be executed.',
769
- );
770
-
771
- if (encodabilityIsTheProblem) {
772
- const codecs = this.discardedTracks.flatMap((x) => {
773
- if (x.reason === 'discarded_by_user') return [];
774
-
775
- if (x.track.type === 'video') {
776
- return this.output.format.getSupportedVideoCodecs();
777
- } else if (x.track.type === 'audio') {
778
- return this.output.format.getSupportedAudioCodecs();
779
- } else {
780
- return this.output.format.getSupportedSubtitleCodecs();
781
- }
782
- });
783
-
784
- if (codecs.length === 1) {
785
- elements.push(
786
- `\nTracks were discarded because your environment is not able to encode '${codecs[0]}'.`,
787
- );
788
- } else {
789
- elements.push(
790
- '\nTracks were discarded because your environment is not able to encode any of the following'
791
- + ` codecs: ${codecs.map(x => `'${x}'`).join(', ')}.`,
792
- );
793
- }
794
-
795
- if (codecs.includes('mp3')) {
796
- elements.push(
797
- `\nThe @mediabunny/mp3-encoder extension package provides support for encoding MP3.`,
798
- );
799
- }
800
- } else {
801
- elements.push('\nCheck the discardedTracks field for more info.');
802
- }
803
- }
804
-
805
- return elements;
806
- }
807
-
808
- /**
809
- * Adds an external subtitle track to the output. This can be called after `init()` but before `execute()`.
810
- * This is useful for adding subtitle tracks from separate files that are not part of the input video.
811
- *
812
- * @param source - The subtitle source to add
813
- * @param metadata - Optional metadata for the subtitle track
814
- * @param contentProvider - Optional async function that will be called after the output starts to add content to the subtitle source
815
- */
816
- addExternalSubtitleTrack(
817
- source: SubtitleSource,
818
- metadata: SubtitleTrackMetadata = {},
819
- contentProvider?: () => Promise<void>,
820
- ) {
821
- if (this._executed) {
822
- throw new Error('Cannot add subtitle tracks after conversion has been executed.');
823
- }
824
- if (this.output.state !== 'pending') {
825
- throw new Error('Cannot add subtitle tracks after output has been started.');
826
- }
827
-
828
- // Check track count limits
829
- const outputTrackCounts = this.output.format.getSupportedTrackCounts();
830
- const currentSubtitleCount = this._addedCounts.subtitle + this._externalSubtitleSources.length;
831
-
832
- if (currentSubtitleCount >= outputTrackCounts.subtitle.max) {
833
- throw new Error(
834
- `Cannot add more subtitle tracks. Maximum of ${outputTrackCounts.subtitle.max} subtitle track(s) allowed.`,
835
- );
836
- }
837
-
838
- const totalTrackCount = this._totalTrackCount + this._externalSubtitleSources.length + 1;
839
- if (totalTrackCount > outputTrackCounts.total.max) {
840
- throw new Error(
841
- `Cannot add more tracks. Maximum of ${outputTrackCounts.total.max} total track(s) allowed.`,
842
- );
843
- }
844
-
845
- this._externalSubtitleSources.push({ source, metadata, contentProvider });
846
-
847
- // Update validity check to include external subtitles
848
- this.isValid = this._totalTrackCount + this._externalSubtitleSources.length >= outputTrackCounts.total.min
849
- && this._addedCounts.video >= outputTrackCounts.video.min
850
- && this._addedCounts.audio >= outputTrackCounts.audio.min
851
- && this._addedCounts.subtitle + this._externalSubtitleSources.length >= outputTrackCounts.subtitle.min;
852
- }
853
-
854
- /**
855
- * Executes the conversion process. Resolves once conversion is complete.
856
- *
857
- * Will throw if `isValid` is `false`.
858
- */
859
- async execute() {
860
- if (!this.isValid) {
861
- throw new Error(
862
- 'Cannot execute this conversion because its output configuration is invalid. Make sure to always check'
863
- + ' the isValid field before executing a conversion.\n'
864
- + this._getInvalidityExplanation().join(''),
865
- );
866
- }
867
-
868
- if (this._executed) {
869
- throw new Error('Conversion cannot be executed twice.');
870
- }
871
- this._executed = true;
872
-
873
- if (this.onProgress) {
874
- this._computeProgress = true;
875
- this._totalDuration = Math.min(
876
- (await this.input.computeDuration()) - this._startTimestamp,
877
- this._endTimestamp - this._startTimestamp,
878
- );
879
-
880
- for (const track of this.utilizedTracks) {
881
- this._maxTimestamps.set(track.id, 0);
882
- }
883
-
884
- this.onProgress?.(0);
885
- }
886
-
887
- // Add external subtitle tracks before starting the output
888
- for (const { source, metadata } of this._externalSubtitleSources) {
889
- this.output.addSubtitleTrack(source, metadata);
890
- }
891
-
892
- await this.output.start();
893
- this._start();
894
-
895
- // Now that output has started and tracks are connected, run content providers
896
- const contentProviderPromises = this._externalSubtitleSources
897
- .filter(s => s.contentProvider)
898
- .map(s => s.contentProvider!());
899
-
900
- if (contentProviderPromises.length > 0) {
901
- this._trackPromises.push(...contentProviderPromises);
902
- }
903
-
904
- try {
905
- await Promise.all(this._trackPromises);
906
- } catch (error) {
907
- if (!this._canceled) {
908
- // Make sure to cancel to stop other encoding processes and clean up resources
909
- void this.cancel();
910
- }
911
-
912
- throw error;
913
- }
914
-
915
- if (this._canceled) {
916
- await new Promise(() => {}); // Never resolve
917
- }
918
-
919
- await this.output.finalize();
920
-
921
- if (this._computeProgress) {
922
- this.onProgress?.(1);
923
- }
924
- }
925
-
926
- /** Cancels the conversion process. Does nothing if the conversion is already complete. */
927
- async cancel() {
928
- if (this.output.state === 'finalizing' || this.output.state === 'finalized') {
929
- return;
930
- }
931
-
932
- if (this._canceled) {
933
- console.warn('Conversion already canceled.');
934
- return;
935
- }
936
-
937
- this._canceled = true;
938
- await this.output.cancel();
939
- }
940
-
941
- /** @internal */
942
- async _processVideoTrack(track: InputVideoTrack, trackOptions: ConversionVideoOptions) {
943
- const sourceCodec = track.codec;
944
- if (!sourceCodec) {
945
- this.discardedTracks.push({
946
- track,
947
- reason: 'unknown_source_codec',
948
- });
949
- return;
950
- }
951
-
952
- let videoSource: VideoSource;
953
-
954
- const totalRotation = normalizeRotation(track.rotation + (trackOptions.rotate ?? 0));
955
- const outputSupportsRotation = this.output.format.supportsVideoRotationMetadata;
956
-
957
- const [rotatedWidth, rotatedHeight] = totalRotation % 180 === 0
958
- ? [track.codedWidth, track.codedHeight]
959
- : [track.codedHeight, track.codedWidth];
960
-
961
- const crop = trackOptions.crop;
962
- if (crop) {
963
- clampCropRectangle(crop, rotatedWidth, rotatedHeight);
964
- }
965
-
966
- const [originalWidth, originalHeight] = crop
967
- ? [crop.width, crop.height]
968
- : [rotatedWidth, rotatedHeight];
969
-
970
- let width = originalWidth;
971
- let height = originalHeight;
972
- const aspectRatio = width / height;
973
-
974
- // A lot of video encoders require that the dimensions be multiples of 2
975
- const ceilToMultipleOfTwo = (value: number) => Math.ceil(value / 2) * 2;
976
-
977
- if (trackOptions.width !== undefined && trackOptions.height === undefined) {
978
- width = ceilToMultipleOfTwo(trackOptions.width);
979
- height = ceilToMultipleOfTwo(Math.round(width / aspectRatio));
980
- } else if (trackOptions.width === undefined && trackOptions.height !== undefined) {
981
- height = ceilToMultipleOfTwo(trackOptions.height);
982
- width = ceilToMultipleOfTwo(Math.round(height * aspectRatio));
983
- } else if (trackOptions.width !== undefined && trackOptions.height !== undefined) {
984
- width = ceilToMultipleOfTwo(trackOptions.width);
985
- height = ceilToMultipleOfTwo(trackOptions.height);
986
- }
987
-
988
- const firstTimestamp = await track.getFirstTimestamp();
989
- const needsTranscode = !!trackOptions.forceTranscode
990
- || this._startTimestamp > 0
991
- || firstTimestamp < 0
992
- || !!trackOptions.frameRate
993
- || trackOptions.keyFrameInterval !== undefined
994
- || trackOptions.process !== undefined;
995
- let needsRerender = width !== originalWidth
996
- || height !== originalHeight
997
- // TODO This is suboptimal: Forcing a rerender when both rotation and process are set is not
998
- // performance-optimal, but right now there's no other way because we can't change the track rotation
999
- // metadata after the output has already started. Should be possible with API changes in v2, though!
1000
- || (totalRotation !== 0 && (!outputSupportsRotation || trackOptions.process !== undefined))
1001
- || !!crop;
1002
-
1003
- const alpha = trackOptions.alpha ?? 'discard';
1004
-
1005
- let videoCodecs = this.output.format.getSupportedVideoCodecs();
1006
- if (
1007
- !needsTranscode
1008
- && !trackOptions.bitrate
1009
- && !needsRerender
1010
- && videoCodecs.includes(sourceCodec)
1011
- && (!trackOptions.codec || trackOptions.codec === sourceCodec)
1012
- ) {
1013
- // Fast path, we can simply copy over the encoded packets
1014
-
1015
- const source = new EncodedVideoPacketSource(sourceCodec);
1016
- videoSource = source;
1017
-
1018
- this._trackPromises.push((async () => {
1019
- await this._started;
1020
-
1021
- const sink = new EncodedPacketSink(track);
1022
- const decoderConfig = await track.getDecoderConfig();
1023
- const meta: EncodedVideoChunkMetadata = { decoderConfig: decoderConfig ?? undefined };
1024
- const endPacket = Number.isFinite(this._endTimestamp)
1025
- ? await sink.getPacket(this._endTimestamp, { metadataOnly: true }) ?? undefined
1026
- : undefined;
1027
-
1028
- for await (const packet of sink.packets(undefined, endPacket, { verifyKeyPackets: true })) {
1029
- if (this._canceled) {
1030
- return;
1031
- }
1032
-
1033
- if (alpha === 'discard') {
1034
- // Feels hacky given that the rest of the packet is readonly. But, works for now.
1035
- delete packet.sideData.alpha;
1036
- delete packet.sideData.alphaByteLength;
1037
- }
1038
-
1039
- this._reportProgress(track.id, packet.timestamp);
1040
- await source.add(packet, meta);
1041
-
1042
- if (this._synchronizer.shouldWait(track.id, packet.timestamp)) {
1043
- await this._synchronizer.wait(packet.timestamp);
1044
- }
1045
- }
1046
-
1047
- source.close();
1048
- this._synchronizer.closeTrack(track.id);
1049
- })());
1050
- } else {
1051
- // We need to decode & reencode the video
1052
-
1053
- const canDecode = await track.canDecode();
1054
- if (!canDecode) {
1055
- this.discardedTracks.push({
1056
- track,
1057
- reason: 'undecodable_source_codec',
1058
- });
1059
- return;
1060
- }
1061
-
1062
- if (trackOptions.codec) {
1063
- videoCodecs = videoCodecs.filter(codec => codec === trackOptions.codec);
1064
- }
1065
-
1066
- const bitrate = trackOptions.bitrate ?? QUALITY_HIGH;
1067
-
1068
- const encodableCodec = await getFirstEncodableVideoCodec(videoCodecs, {
1069
- width: trackOptions.process && trackOptions.processedWidth
1070
- ? trackOptions.processedWidth
1071
- : width,
1072
- height: trackOptions.process && trackOptions.processedHeight
1073
- ? trackOptions.processedHeight
1074
- : height,
1075
- bitrate,
1076
- });
1077
- if (!encodableCodec) {
1078
- this.discardedTracks.push({
1079
- track,
1080
- reason: 'no_encodable_target_codec',
1081
- });
1082
- return;
1083
- }
1084
-
1085
- const encodingConfig: VideoEncodingConfig = {
1086
- codec: encodableCodec,
1087
- bitrate,
1088
- keyFrameInterval: trackOptions.keyFrameInterval,
1089
- sizeChangeBehavior: trackOptions.fit ?? 'passThrough',
1090
- alpha,
1091
- };
1092
-
1093
- const source = new VideoSampleSource(encodingConfig);
1094
- videoSource = source;
1095
-
1096
- if (!needsRerender) {
1097
- // If we're directly passing decoded samples back to the encoder, sometimes the encoder may error due
1098
- // to lack of support of certain video frame formats, like when HDR is at play. To check for this, we
1099
- // first try to pass a single frame to the encoder to see how it behaves. If it throws, we then fall
1100
- // back to the rerender path.
1101
- //
1102
- // Creating a new temporary Output is sort of hacky, but due to a lack of an isolated encoder API right
1103
- // now, this is the simplest way. Will refactor in the future! TODO
1104
-
1105
- const tempOutput = new Output({
1106
- format: new Mp4OutputFormat(), // Supports all video codecs
1107
- target: new NullTarget(),
1108
- });
1109
-
1110
- const tempSource = new VideoSampleSource(encodingConfig);
1111
- tempOutput.addVideoTrack(tempSource);
1112
-
1113
- await tempOutput.start();
1114
-
1115
- const sink = new VideoSampleSink(track);
1116
- const firstSample = await sink.getSample(firstTimestamp); // Let's just use the first sample
1117
-
1118
- if (firstSample) {
1119
- try {
1120
- await tempSource.add(firstSample);
1121
- firstSample.close();
1122
- await tempOutput.finalize();
1123
- } catch (error) {
1124
- console.info('Error when probing encoder support. Falling back to rerender path.', error);
1125
- needsRerender = true;
1126
- void tempOutput.cancel();
1127
- }
1128
- } else {
1129
- await tempOutput.cancel();
1130
- }
1131
- }
1132
-
1133
- if (needsRerender) {
1134
- this._trackPromises.push((async () => {
1135
- await this._started;
1136
-
1137
- const sink = new CanvasSink(track, {
1138
- width,
1139
- height,
1140
- fit: trackOptions.fit ?? 'fill',
1141
- rotation: totalRotation, // Bake the rotation into the output
1142
- crop: trackOptions.crop,
1143
- poolSize: 1,
1144
- alpha: alpha === 'keep',
1145
- });
1146
- const iterator = sink.canvases(this._startTimestamp, this._endTimestamp);
1147
- const frameRate = trackOptions.frameRate;
1148
-
1149
- let lastCanvas: HTMLCanvasElement | OffscreenCanvas | null = null;
1150
- let lastCanvasTimestamp: number | null = null;
1151
- let lastCanvasEndTimestamp: number | null = null;
1152
-
1153
- /** Repeats the last sample to pad out the time until the specified timestamp. */
1154
- const padFrames = async (until: number) => {
1155
- assert(lastCanvas);
1156
- assert(frameRate !== undefined);
1157
-
1158
- const frameDifference = Math.round((until - lastCanvasTimestamp!) * frameRate);
1159
-
1160
- for (let i = 1; i < frameDifference; i++) {
1161
- const sample = new VideoSample(lastCanvas, {
1162
- timestamp: lastCanvasTimestamp! + i / frameRate,
1163
- duration: 1 / frameRate,
1164
- });
1165
- await this._registerVideoSample(track, trackOptions, source, sample);
1166
- sample.close();
1167
- }
1168
- };
1169
-
1170
- for await (const { canvas, timestamp, duration } of iterator) {
1171
- if (this._canceled) {
1172
- return;
1173
- }
1174
-
1175
- let adjustedSampleTimestamp = Math.max(timestamp - this._startTimestamp, 0);
1176
- lastCanvasEndTimestamp = adjustedSampleTimestamp + duration;
1177
-
1178
- if (frameRate !== undefined) {
1179
- // Logic for skipping/repeating frames when a frame rate is set
1180
- const alignedTimestamp = Math.floor(adjustedSampleTimestamp * frameRate) / frameRate;
1181
-
1182
- if (lastCanvas !== null) {
1183
- if (alignedTimestamp <= lastCanvasTimestamp!) {
1184
- lastCanvas = canvas;
1185
- lastCanvasTimestamp = alignedTimestamp;
1186
-
1187
- // Skip this sample, since we already added one for this frame
1188
- continue;
1189
- } else {
1190
- // Check if we may need to repeat the previous frame
1191
- await padFrames(alignedTimestamp);
1192
- }
1193
- }
1194
-
1195
- adjustedSampleTimestamp = alignedTimestamp;
1196
- }
1197
-
1198
- const sample = new VideoSample(canvas, {
1199
- timestamp: adjustedSampleTimestamp,
1200
- duration: frameRate !== undefined ? 1 / frameRate : duration,
1201
- });
1202
- await this._registerVideoSample(track, trackOptions, source, sample);
1203
- sample.close();
1204
-
1205
- if (frameRate !== undefined) {
1206
- lastCanvas = canvas;
1207
- lastCanvasTimestamp = adjustedSampleTimestamp;
1208
- }
1209
- }
1210
-
1211
- if (lastCanvas) {
1212
- assert(lastCanvasEndTimestamp !== null);
1213
- assert(frameRate !== undefined);
1214
-
1215
- // If necessary, pad until the end timestamp of the last sample
1216
- await padFrames(Math.floor(lastCanvasEndTimestamp * frameRate) / frameRate);
1217
- }
1218
-
1219
- source.close();
1220
- this._synchronizer.closeTrack(track.id);
1221
- })());
1222
- } else {
1223
- this._trackPromises.push((async () => {
1224
- await this._started;
1225
-
1226
- const sink = new VideoSampleSink(track);
1227
- const frameRate = trackOptions.frameRate;
1228
-
1229
- let lastSample: VideoSample | null = null;
1230
- let lastSampleTimestamp: number | null = null;
1231
- let lastSampleEndTimestamp: number | null = null;
1232
-
1233
- /** Repeats the last sample to pad out the time until the specified timestamp. */
1234
- const padFrames = async (until: number) => {
1235
- assert(lastSample);
1236
- assert(frameRate !== undefined);
1237
-
1238
- const frameDifference = Math.round((until - lastSampleTimestamp!) * frameRate);
1239
-
1240
- for (let i = 1; i < frameDifference; i++) {
1241
- lastSample.setTimestamp(lastSampleTimestamp! + i / frameRate);
1242
- lastSample.setDuration(1 / frameRate);
1243
- await this._registerVideoSample(track, trackOptions, source, lastSample);
1244
- }
1245
-
1246
- lastSample.close();
1247
- };
1248
-
1249
- for await (const sample of sink.samples(this._startTimestamp, this._endTimestamp)) {
1250
- if (this._canceled) {
1251
- lastSample?.close();
1252
- return;
1253
- }
1254
-
1255
- let adjustedSampleTimestamp = Math.max(sample.timestamp - this._startTimestamp, 0);
1256
- lastSampleEndTimestamp = adjustedSampleTimestamp + sample.duration;
1257
-
1258
- if (frameRate !== undefined) {
1259
- // Logic for skipping/repeating frames when a frame rate is set
1260
- const alignedTimestamp = Math.floor(adjustedSampleTimestamp * frameRate) / frameRate;
1261
-
1262
- if (lastSample !== null) {
1263
- if (alignedTimestamp <= lastSampleTimestamp!) {
1264
- lastSample.close();
1265
- lastSample = sample;
1266
- lastSampleTimestamp = alignedTimestamp;
1267
-
1268
- // Skip this sample, since we already added one for this frame
1269
- continue;
1270
- } else {
1271
- // Check if we may need to repeat the previous frame
1272
- await padFrames(alignedTimestamp);
1273
- }
1274
- }
1275
-
1276
- adjustedSampleTimestamp = alignedTimestamp;
1277
- sample.setDuration(1 / frameRate);
1278
- }
1279
-
1280
- sample.setTimestamp(adjustedSampleTimestamp);
1281
- await this._registerVideoSample(track, trackOptions, source, sample);
1282
-
1283
- if (frameRate !== undefined) {
1284
- lastSample = sample;
1285
- lastSampleTimestamp = adjustedSampleTimestamp;
1286
- } else {
1287
- sample.close();
1288
- }
1289
- }
1290
-
1291
- if (lastSample) {
1292
- assert(lastSampleEndTimestamp !== null);
1293
- assert(frameRate !== undefined);
1294
-
1295
- // If necessary, pad until the end timestamp of the last sample
1296
- await padFrames(Math.floor(lastSampleEndTimestamp * frameRate) / frameRate);
1297
- }
1298
-
1299
- source.close();
1300
- this._synchronizer.closeTrack(track.id);
1301
- })());
1302
- }
1303
- }
1304
-
1305
- this.output.addVideoTrack(videoSource, {
1306
- frameRate: trackOptions.frameRate,
1307
- // TODO: This condition can be removed when all demuxers properly homogenize to BCP47 in v2
1308
- languageCode: isIso639Dash2LanguageCode(track.languageCode) ? track.languageCode : undefined,
1309
- name: track.name ?? undefined,
1310
- disposition: track.disposition,
1311
- rotation: needsRerender ? 0 : totalRotation, // Rerendering will bake the rotation into the output
1312
- });
1313
- this._addedCounts.video++;
1314
- this._totalTrackCount++;
1315
-
1316
- this.utilizedTracks.push(track);
1317
- }
1318
-
1319
- /** @internal */
1320
- async _registerVideoSample(
1321
- track: InputVideoTrack,
1322
- trackOptions: ConversionVideoOptions,
1323
- source: VideoSampleSource,
1324
- sample: VideoSample,
1325
- ) {
1326
- if (this._canceled) {
1327
- return;
1328
- }
1329
-
1330
- this._reportProgress(track.id, sample.timestamp);
1331
-
1332
- let finalSamples: VideoSample[];
1333
- if (!trackOptions.process) {
1334
- finalSamples = [sample];
1335
- } else {
1336
- let processed = trackOptions.process(sample);
1337
- if (processed instanceof Promise) processed = await processed;
1338
-
1339
- if (!Array.isArray(processed)) {
1340
- processed = processed === null ? [] : [processed];
1341
- }
1342
-
1343
- finalSamples = processed.map((x) => {
1344
- if (x instanceof VideoSample) {
1345
- return x;
1346
- }
1347
-
1348
- if (typeof VideoFrame !== 'undefined' && x instanceof VideoFrame) {
1349
- return new VideoSample(x);
1350
- }
1351
-
1352
- // Calling the VideoSample constructor here will automatically handle input validation for us
1353
- // (it throws for any non-legal argument).
1354
- return new VideoSample(x, {
1355
- timestamp: sample.timestamp,
1356
- duration: sample.duration,
1357
- });
1358
- });
1359
- }
1360
-
1361
- for (const finalSample of finalSamples) {
1362
- if (this._canceled) {
1363
- break;
1364
- }
1365
-
1366
- await source.add(finalSample);
1367
-
1368
- if (this._synchronizer.shouldWait(track.id, finalSample.timestamp)) {
1369
- await this._synchronizer.wait(finalSample.timestamp);
1370
- }
1371
- }
1372
-
1373
- for (const finalSample of finalSamples) {
1374
- if (finalSample !== sample) {
1375
- finalSample.close();
1376
- }
1377
- }
1378
- }
1379
-
1380
- /** @internal */
1381
- async _processAudioTrack(track: InputAudioTrack, trackOptions: ConversionAudioOptions) {
1382
- const sourceCodec = track.codec;
1383
- if (!sourceCodec) {
1384
- this.discardedTracks.push({
1385
- track,
1386
- reason: 'unknown_source_codec',
1387
- });
1388
- return;
1389
- }
1390
-
1391
- let audioSource: AudioSource;
1392
-
1393
- const originalNumberOfChannels = track.numberOfChannels;
1394
- const originalSampleRate = track.sampleRate;
1395
-
1396
- const firstTimestamp = await track.getFirstTimestamp();
1397
-
1398
- let numberOfChannels = trackOptions.numberOfChannels ?? originalNumberOfChannels;
1399
- let sampleRate = trackOptions.sampleRate ?? originalSampleRate;
1400
- let needsResample = numberOfChannels !== originalNumberOfChannels
1401
- || sampleRate !== originalSampleRate
1402
- || this._startTimestamp > 0
1403
- || firstTimestamp < 0;
1404
-
1405
- let audioCodecs = this.output.format.getSupportedAudioCodecs();
1406
- if (
1407
- !trackOptions.forceTranscode
1408
- && !trackOptions.bitrate
1409
- && !needsResample
1410
- && audioCodecs.includes(sourceCodec)
1411
- && (!trackOptions.codec || trackOptions.codec === sourceCodec)
1412
- && !trackOptions.process
1413
- ) {
1414
- // Fast path, we can simply copy over the encoded packets
1415
-
1416
- const source = new EncodedAudioPacketSource(sourceCodec);
1417
- audioSource = source;
1418
-
1419
- this._trackPromises.push((async () => {
1420
- await this._started;
1421
-
1422
- const sink = new EncodedPacketSink(track);
1423
- const decoderConfig = await track.getDecoderConfig();
1424
- const meta: EncodedAudioChunkMetadata = { decoderConfig: decoderConfig ?? undefined };
1425
- const endPacket = Number.isFinite(this._endTimestamp)
1426
- ? await sink.getPacket(this._endTimestamp, { metadataOnly: true }) ?? undefined
1427
- : undefined;
1428
-
1429
- for await (const packet of sink.packets(undefined, endPacket)) {
1430
- if (this._canceled) {
1431
- return;
1432
- }
1433
-
1434
- this._reportProgress(track.id, packet.timestamp);
1435
- await source.add(packet, meta);
1436
-
1437
- if (this._synchronizer.shouldWait(track.id, packet.timestamp)) {
1438
- await this._synchronizer.wait(packet.timestamp);
1439
- }
1440
- }
1441
-
1442
- source.close();
1443
- this._synchronizer.closeTrack(track.id);
1444
- })());
1445
- } else {
1446
- // We need to decode & reencode the audio
1447
-
1448
- const canDecode = await track.canDecode();
1449
- if (!canDecode) {
1450
- this.discardedTracks.push({
1451
- track,
1452
- reason: 'undecodable_source_codec',
1453
- });
1454
- return;
1455
- }
1456
-
1457
- let codecOfChoice: AudioCodec | null = null;
1458
-
1459
- if (trackOptions.codec) {
1460
- audioCodecs = audioCodecs.filter(codec => codec === trackOptions.codec);
1461
- }
1462
-
1463
- const bitrate = trackOptions.bitrate ?? QUALITY_HIGH;
1464
-
1465
- const encodableCodecs = await getEncodableAudioCodecs(audioCodecs, {
1466
- numberOfChannels: trackOptions.process && trackOptions.processedNumberOfChannels
1467
- ? trackOptions.processedNumberOfChannels
1468
- : numberOfChannels,
1469
- sampleRate: trackOptions.process && trackOptions.processedSampleRate
1470
- ? trackOptions.processedSampleRate
1471
- : sampleRate,
1472
- bitrate,
1473
- });
1474
-
1475
- if (
1476
- !encodableCodecs.some(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec))
1477
- && audioCodecs.some(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec))
1478
- && (numberOfChannels !== FALLBACK_NUMBER_OF_CHANNELS || sampleRate !== FALLBACK_SAMPLE_RATE)
1479
- ) {
1480
- // We could not find a compatible non-PCM codec despite the container supporting them. This can be
1481
- // caused by strange channel count or sample rate configurations. Therefore, let's try again but with
1482
- // fallback parameters.
1483
-
1484
- const encodableCodecsWithDefaultParams = await getEncodableAudioCodecs(audioCodecs, {
1485
- numberOfChannels: FALLBACK_NUMBER_OF_CHANNELS,
1486
- sampleRate: FALLBACK_SAMPLE_RATE,
1487
- bitrate,
1488
- });
1489
-
1490
- const nonPcmCodec = encodableCodecsWithDefaultParams
1491
- .find(codec => (NON_PCM_AUDIO_CODECS as readonly string[]).includes(codec));
1492
- if (nonPcmCodec) {
1493
- // We are able to encode using a non-PCM codec, but it'll require resampling
1494
- needsResample = true;
1495
- codecOfChoice = nonPcmCodec;
1496
- numberOfChannels = FALLBACK_NUMBER_OF_CHANNELS;
1497
- sampleRate = FALLBACK_SAMPLE_RATE;
1498
- }
1499
- } else {
1500
- codecOfChoice = encodableCodecs[0] ?? null;
1501
- }
1502
-
1503
- if (codecOfChoice === null) {
1504
- this.discardedTracks.push({
1505
- track,
1506
- reason: 'no_encodable_target_codec',
1507
- });
1508
- return;
1509
- }
1510
-
1511
- if (needsResample) {
1512
- audioSource = this._resampleAudio(
1513
- track,
1514
- trackOptions,
1515
- codecOfChoice,
1516
- numberOfChannels,
1517
- sampleRate,
1518
- bitrate,
1519
- );
1520
- } else {
1521
- const source = new AudioSampleSource({
1522
- codec: codecOfChoice,
1523
- bitrate,
1524
- });
1525
- audioSource = source;
1526
-
1527
- this._trackPromises.push((async () => {
1528
- await this._started;
1529
-
1530
- const sink = new AudioSampleSink(track);
1531
- for await (const sample of sink.samples(undefined, this._endTimestamp)) {
1532
- if (this._canceled) {
1533
- return;
1534
- }
1535
-
1536
- await this._registerAudioSample(track, trackOptions, source, sample);
1537
- sample.close();
1538
- }
1539
-
1540
- source.close();
1541
- this._synchronizer.closeTrack(track.id);
1542
- })());
1543
- }
1544
- }
1545
-
1546
- this.output.addAudioTrack(audioSource, {
1547
- // TODO: This condition can be removed when all demuxers properly homogenize to BCP47 in v2
1548
- languageCode: isIso639Dash2LanguageCode(track.languageCode) ? track.languageCode : undefined,
1549
- name: track.name ?? undefined,
1550
- disposition: track.disposition,
1551
- });
1552
- this._addedCounts.audio++;
1553
- this._totalTrackCount++;
1554
-
1555
- this.utilizedTracks.push(track);
1556
- }
1557
-
1558
-
1559
- /** @internal */
1560
- async _registerAudioSample(
1561
- track: InputAudioTrack,
1562
- trackOptions: ConversionAudioOptions,
1563
- source: AudioSampleSource,
1564
- sample: AudioSample,
1565
- ) {
1566
- if (this._canceled) {
1567
- return;
1568
- }
1569
-
1570
- this._reportProgress(track.id, sample.timestamp);
1571
-
1572
- let finalSamples: AudioSample[];
1573
- if (!trackOptions.process) {
1574
- finalSamples = [sample];
1575
- } else {
1576
- let processed = trackOptions.process(sample);
1577
- if (processed instanceof Promise) processed = await processed;
1578
-
1579
- if (!Array.isArray(processed)) {
1580
- processed = processed === null ? [] : [processed];
1581
- }
1582
-
1583
- if (!processed.every(x => x instanceof AudioSample)) {
1584
- throw new TypeError(
1585
- 'The audio process function must return an AudioSample, null, or an array of AudioSamples.',
1586
- );
1587
- }
1588
-
1589
- finalSamples = processed;
1590
- }
1591
-
1592
- for (const finalSample of finalSamples) {
1593
- if (this._canceled) {
1594
- break;
1595
- }
1596
-
1597
- await source.add(finalSample);
1598
-
1599
- if (this._synchronizer.shouldWait(track.id, finalSample.timestamp)) {
1600
- await this._synchronizer.wait(finalSample.timestamp);
1601
- }
1602
- }
1603
-
1604
- for (const finalSample of finalSamples) {
1605
- if (finalSample !== sample) {
1606
- finalSample.close();
1607
- }
1608
- }
1609
- }
1610
-
1611
- /** @internal */
1612
- async _processSubtitleTrack(track: InputSubtitleTrack, trackOptions: ConversionSubtitleOptions) {
1613
- const sourceCodec = track.codec;
1614
- if (!sourceCodec) {
1615
- this.discardedTracks.push({
1616
- track,
1617
- reason: 'unknown_source_codec',
1618
- });
1619
- return;
1620
- }
1621
-
1622
- // Determine target codec
1623
- let targetCodec = trackOptions.codec ?? sourceCodec;
1624
- const supportedCodecs = this.output.format.getSupportedSubtitleCodecs();
1625
-
1626
- // Check if target codec is supported by output format
1627
- if (!supportedCodecs.includes(targetCodec)) {
1628
- // Try to use source codec if no specific codec was requested
1629
- if (!trackOptions.codec && supportedCodecs.includes(sourceCodec)) {
1630
- targetCodec = sourceCodec;
1631
- } else {
1632
- // If a specific codec was requested but not supported, or source codec not supported, discard
1633
- this.discardedTracks.push({
1634
- track,
1635
- reason: 'no_encodable_target_codec',
1636
- });
1637
- return;
1638
- }
1639
- }
1640
-
1641
- // Create subtitle source
1642
- const subtitleSource = new TextSubtitleSource(targetCodec);
1643
-
1644
- // Add track promise to extract and add subtitle cues
1645
- this._trackPromises.push((async () => {
1646
- await this._started;
1647
-
1648
- let subtitleText: string;
1649
-
1650
- // If no trim or codec conversion needed, use the efficient export method
1651
- if (this._startTimestamp === 0 && !Number.isFinite(this._endTimestamp) && targetCodec === sourceCodec) {
1652
- subtitleText = await track.exportToText();
1653
- } else {
1654
- // Extract and adjust cues for trim/conversion
1655
- const cues: SubtitleCue[] = [];
1656
- for await (const cue of track.getCues()) {
1657
- const cueEndTime = cue.timestamp + cue.duration;
1658
-
1659
- // Apply trim if needed
1660
- if (this._startTimestamp > 0 || Number.isFinite(this._endTimestamp)) {
1661
- // Skip cues completely outside trim range
1662
- if (cueEndTime <= this._startTimestamp || cue.timestamp >= this._endTimestamp) {
1663
- continue;
1664
- }
1665
-
1666
- // Adjust cue timing
1667
- const adjustedTimestamp = Math.max(cue.timestamp - this._startTimestamp, 0);
1668
- const adjustedEndTime = Math.min(cueEndTime - this._startTimestamp, this._endTimestamp - this._startTimestamp);
1669
-
1670
- cues.push({
1671
- ...cue,
1672
- timestamp: adjustedTimestamp,
1673
- duration: adjustedEndTime - adjustedTimestamp,
1674
- });
1675
- } else {
1676
- cues.push(cue);
1677
- }
1678
-
1679
- if (this._canceled) {
1680
- return;
1681
- }
1682
- }
1683
-
1684
- // Convert to target format
1685
- if (targetCodec === 'srt') {
1686
- subtitleText = formatCuesToSrt(cues);
1687
- } else if (targetCodec === 'webvtt') {
1688
- subtitleText = formatCuesToWebVTT(cues);
1689
- } else if (targetCodec === 'ass' || targetCodec === 'ssa') {
1690
- subtitleText = formatCuesToAss(cues, '');
1691
- } else {
1692
- // For other formats (tx3g, ttml), export from track
1693
- subtitleText = await track.exportToText(targetCodec);
1694
- }
1695
- }
1696
-
1697
- await subtitleSource.add(subtitleText);
1698
- subtitleSource.close();
1699
- })());
1700
-
1701
- this.output.addSubtitleTrack(subtitleSource, {
1702
- languageCode: isIso639Dash2LanguageCode(track.languageCode) ? track.languageCode : undefined,
1703
- name: track.name ?? undefined,
1704
- });
1705
- this._addedCounts.subtitle++;
1706
- this._totalTrackCount++;
1707
-
1708
- this.utilizedTracks.push(track);
1709
- }
1710
-
1711
- /** @internal */
1712
- _resampleAudio(
1713
- track: InputAudioTrack,
1714
- trackOptions: ConversionAudioOptions,
1715
- codec: AudioCodec,
1716
- targetNumberOfChannels: number,
1717
- targetSampleRate: number,
1718
- bitrate: number | Quality,
1719
- ) {
1720
- const source = new AudioSampleSource({
1721
- codec,
1722
- bitrate,
1723
- });
1724
-
1725
- this._trackPromises.push((async () => {
1726
- await this._started;
1727
-
1728
- const resampler = new AudioResampler({
1729
- targetNumberOfChannels,
1730
- targetSampleRate,
1731
- startTime: this._startTimestamp,
1732
- endTime: this._endTimestamp,
1733
- onSample: sample => this._registerAudioSample(track, trackOptions, source, sample),
1734
- });
1735
-
1736
- const sink = new AudioSampleSink(track);
1737
- const iterator = sink.samples(this._startTimestamp, this._endTimestamp);
1738
-
1739
- for await (const sample of iterator) {
1740
- if (this._canceled) {
1741
- return;
1742
- }
1743
-
1744
- await resampler.add(sample);
1745
- }
1746
-
1747
- await resampler.finalize();
1748
-
1749
- source.close();
1750
- this._synchronizer.closeTrack(track.id);
1751
- })());
1752
-
1753
- return source;
1754
- }
1755
-
1756
- /** @internal */
1757
- _reportProgress(trackId: number, endTimestamp: number) {
1758
- if (!this._computeProgress) {
1759
- return;
1760
- }
1761
- assert(this._totalDuration !== null);
1762
-
1763
- this._maxTimestamps.set(
1764
- trackId,
1765
- Math.max(endTimestamp, this._maxTimestamps.get(trackId)!),
1766
- );
1767
-
1768
- const minTimestamp = Math.min(...this._maxTimestamps.values());
1769
- const newProgress = clamp(minTimestamp / this._totalDuration, 0, 1);
1770
-
1771
- if (newProgress !== this._lastProgress) {
1772
- this._lastProgress = newProgress;
1773
- this.onProgress?.(newProgress);
1774
- }
1775
- }
1776
- }
1777
-
1778
- const MAX_TIMESTAMP_GAP = 5;
1779
-
1780
- /**
1781
- * Utility class for synchronizing multiple track packet consumers with one another. We don't want one consumer to get
1782
- * too out-of-sync with the others, as that may lead to a large number of packets that need to be internally buffered
1783
- * before they can be written. Therefore, we use this class to slow down a consumer if it is too far ahead of the
1784
- * slowest consumer.
1785
- */
1786
- class TrackSynchronizer {
1787
- maxTimestamps = new Map<number, number>(); // Track ID -> timestamp
1788
- resolvers: {
1789
- timestamp: number;
1790
- resolve: () => void;
1791
- }[] = [];
1792
-
1793
- computeMinAndMaybeResolve() {
1794
- let newMin = Infinity;
1795
- for (const [, timestamp] of this.maxTimestamps) {
1796
- newMin = Math.min(newMin, timestamp);
1797
- }
1798
-
1799
- for (let i = 0; i < this.resolvers.length; i++) {
1800
- const entry = this.resolvers[i]!;
1801
-
1802
- if (entry.timestamp - newMin < MAX_TIMESTAMP_GAP) {
1803
- // The gap has gotten small enough again, the consumer can continue again
1804
- entry.resolve();
1805
- this.resolvers.splice(i, 1);
1806
- i--;
1807
- }
1808
- }
1809
-
1810
- return newMin;
1811
- }
1812
-
1813
- shouldWait(trackId: number, timestamp: number) {
1814
- this.maxTimestamps.set(trackId, Math.max(timestamp, this.maxTimestamps.get(trackId) ?? -Infinity));
1815
-
1816
- const newMin = this.computeMinAndMaybeResolve();
1817
- return timestamp - newMin >= MAX_TIMESTAMP_GAP; // Should wait if it is too far ahead of the slowest consumer
1818
- }
1819
-
1820
- wait(timestamp: number) {
1821
- const { promise, resolve } = promiseWithResolvers();
1822
-
1823
- this.resolvers.push({
1824
- timestamp,
1825
- resolve,
1826
- });
1827
-
1828
- return promise;
1829
- }
1830
-
1831
- closeTrack(trackId: number) {
1832
- this.maxTimestamps.delete(trackId);
1833
- this.computeMinAndMaybeResolve();
1834
- }
1835
- }
1836
-
1837
- /**
1838
- * Utility class to handle audio resampling, handling both sample rate resampling as well as channel up/downmixing.
1839
- * The advantage over doing this manually rather than using OfflineAudioContext to do it for us is the artifact-free
1840
- * handling of putting multiple resampled audio samples back to back, which produces flaky results using
1841
- * OfflineAudioContext.
1842
- */
1843
- export class AudioResampler {
1844
- sourceSampleRate: number | null = null;
1845
- targetSampleRate: number;
1846
- sourceNumberOfChannels: number | null = null;
1847
- targetNumberOfChannels: number;
1848
- startTime: number;
1849
- endTime: number;
1850
- onSample: (sample: AudioSample) => Promise<void>;
1851
-
1852
- bufferSizeInFrames: number;
1853
- bufferSizeInSamples: number;
1854
- outputBuffer: Float32Array;
1855
- /** Start frame of current buffer */
1856
- bufferStartFrame: number;
1857
- /** The highest index written to in the current buffer */
1858
- maxWrittenFrame: number;
1859
- channelMixer!: (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => number;
1860
- tempSourceBuffer!: Float32Array;
1861
-
1862
- constructor(options: {
1863
- targetSampleRate: number;
1864
- targetNumberOfChannels: number;
1865
- startTime: number;
1866
- endTime: number;
1867
- onSample: (sample: AudioSample) => Promise<void>;
1868
- }) {
1869
- this.targetSampleRate = options.targetSampleRate;
1870
- this.targetNumberOfChannels = options.targetNumberOfChannels;
1871
- this.startTime = options.startTime;
1872
- this.endTime = options.endTime;
1873
- this.onSample = options.onSample;
1874
-
1875
- this.bufferSizeInFrames = Math.floor(this.targetSampleRate * 5.0); // 5 seconds
1876
- this.bufferSizeInSamples = this.bufferSizeInFrames * this.targetNumberOfChannels;
1877
-
1878
- this.outputBuffer = new Float32Array(this.bufferSizeInSamples);
1879
- this.bufferStartFrame = 0;
1880
- this.maxWrittenFrame = -1;
1881
- }
1882
-
1883
- /**
1884
- * Sets up the channel mixer to handle up/downmixing in the case where input and output channel counts don't match.
1885
- */
1886
- doChannelMixerSetup(): void {
1887
- assert(this.sourceNumberOfChannels !== null);
1888
-
1889
- const sourceNum = this.sourceNumberOfChannels;
1890
- const targetNum = this.targetNumberOfChannels;
1891
-
1892
- // Logic taken from
1893
- // https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Basic_concepts_behind_Web_Audio_API
1894
- // Most of the mapping functions are branchless.
1895
-
1896
- if (sourceNum === 1 && targetNum === 2) {
1897
- // Mono to Stereo: M -> L, M -> R
1898
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
1899
- return sourceData[sourceFrameIndex * sourceNum]!;
1900
- };
1901
- } else if (sourceNum === 1 && targetNum === 4) {
1902
- // Mono to Quad: M -> L, M -> R, 0 -> SL, 0 -> SR
1903
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1904
- return sourceData[sourceFrameIndex * sourceNum]! * +(targetChannelIndex < 2);
1905
- };
1906
- } else if (sourceNum === 1 && targetNum === 6) {
1907
- // Mono to 5.1: 0 -> L, 0 -> R, M -> C, 0 -> LFE, 0 -> SL, 0 -> SR
1908
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1909
- return sourceData[sourceFrameIndex * sourceNum]! * +(targetChannelIndex === 2);
1910
- };
1911
- } else if (sourceNum === 2 && targetNum === 1) {
1912
- // Stereo to Mono: 0.5 * (L + R)
1913
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
1914
- const baseIdx = sourceFrameIndex * sourceNum;
1915
- return 0.5 * (sourceData[baseIdx]! + sourceData[baseIdx + 1]!);
1916
- };
1917
- } else if (sourceNum === 2 && targetNum === 4) {
1918
- // Stereo to Quad: L -> L, R -> R, 0 -> SL, 0 -> SR
1919
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1920
- return sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]! * +(targetChannelIndex < 2);
1921
- };
1922
- } else if (sourceNum === 2 && targetNum === 6) {
1923
- // Stereo to 5.1: L -> L, R -> R, 0 -> C, 0 -> LFE, 0 -> SL, 0 -> SR
1924
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1925
- return sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]! * +(targetChannelIndex < 2);
1926
- };
1927
- } else if (sourceNum === 4 && targetNum === 1) {
1928
- // Quad to Mono: 0.25 * (L + R + SL + SR)
1929
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
1930
- const baseIdx = sourceFrameIndex * sourceNum;
1931
- return 0.25 * (
1932
- sourceData[baseIdx]! + sourceData[baseIdx + 1]!
1933
- + sourceData[baseIdx + 2]! + sourceData[baseIdx + 3]!
1934
- );
1935
- };
1936
- } else if (sourceNum === 4 && targetNum === 2) {
1937
- // Quad to Stereo: 0.5 * (L + SL), 0.5 * (R + SR)
1938
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1939
- const baseIdx = sourceFrameIndex * sourceNum;
1940
- return 0.5 * (
1941
- sourceData[baseIdx + targetChannelIndex]!
1942
- + sourceData[baseIdx + targetChannelIndex + 2]!
1943
- );
1944
- };
1945
- } else if (sourceNum === 4 && targetNum === 6) {
1946
- // Quad to 5.1: L -> L, R -> R, 0 -> C, 0 -> LFE, SL -> SL, SR -> SR
1947
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1948
- const baseIdx = sourceFrameIndex * sourceNum;
1949
-
1950
- // It's a bit harder to do this one branchlessly
1951
- if (targetChannelIndex < 2) return sourceData[baseIdx + targetChannelIndex]!; // L, R
1952
- if (targetChannelIndex === 2 || targetChannelIndex === 3) return 0; // C, LFE
1953
- return sourceData[baseIdx + targetChannelIndex - 2]!; // SL, SR
1954
- };
1955
- } else if (sourceNum === 6 && targetNum === 1) {
1956
- // 5.1 to Mono: sqrt(1/2) * (L + R) + C + 0.5 * (SL + SR)
1957
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number) => {
1958
- const baseIdx = sourceFrameIndex * sourceNum;
1959
- return Math.SQRT1_2 * (sourceData[baseIdx]! + sourceData[baseIdx + 1]!)
1960
- + sourceData[baseIdx + 2]!
1961
- + 0.5 * (sourceData[baseIdx + 4]! + sourceData[baseIdx + 5]!);
1962
- };
1963
- } else if (sourceNum === 6 && targetNum === 2) {
1964
- // 5.1 to Stereo: L + sqrt(1/2) * (C + SL), R + sqrt(1/2) * (C + SR)
1965
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1966
- const baseIdx = sourceFrameIndex * sourceNum;
1967
- return sourceData[baseIdx + targetChannelIndex]!
1968
- + Math.SQRT1_2 * (sourceData[baseIdx + 2]! + sourceData[baseIdx + targetChannelIndex + 4]!);
1969
- };
1970
- } else if (sourceNum === 6 && targetNum === 4) {
1971
- // 5.1 to Quad: L + sqrt(1/2) * C, R + sqrt(1/2) * C, SL, SR
1972
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1973
- const baseIdx = sourceFrameIndex * sourceNum;
1974
-
1975
- // It's a bit harder to do this one branchlessly
1976
- if (targetChannelIndex < 2) {
1977
- return sourceData[baseIdx + targetChannelIndex]! + Math.SQRT1_2 * sourceData[baseIdx + 2]!;
1978
- }
1979
- return sourceData[baseIdx + targetChannelIndex + 2]!; // SL, SR
1980
- };
1981
- } else {
1982
- // Discrete fallback: direct mapping with zero-fill or drop
1983
- this.channelMixer = (sourceData: Float32Array, sourceFrameIndex: number, targetChannelIndex: number) => {
1984
- return targetChannelIndex < sourceNum
1985
- ? sourceData[sourceFrameIndex * sourceNum + targetChannelIndex]!
1986
- : 0;
1987
- };
1988
- }
1989
- }
1990
-
1991
- ensureTempBufferSize(requiredSamples: number): void {
1992
- let length = this.tempSourceBuffer.length;
1993
-
1994
- while (length < requiredSamples) {
1995
- length *= 2;
1996
- }
1997
-
1998
- if (length !== this.tempSourceBuffer.length) {
1999
- const newBuffer = new Float32Array(length);
2000
- newBuffer.set(this.tempSourceBuffer);
2001
- this.tempSourceBuffer = newBuffer;
2002
- }
2003
- }
2004
-
2005
- async add(audioSample: AudioSample) {
2006
- if (this.sourceSampleRate === null) {
2007
- // This is the first sample, so let's init the missing data. Initting the sample rate from the decoded
2008
- // sample is more reliable than using the file's metadata, because decoders are free to emit any sample rate
2009
- // they see fit.
2010
- this.sourceSampleRate = audioSample.sampleRate;
2011
- this.sourceNumberOfChannels = audioSample.numberOfChannels;
2012
-
2013
- // Pre-allocate temporary buffer for source data
2014
- this.tempSourceBuffer = new Float32Array(this.sourceSampleRate * this.sourceNumberOfChannels);
2015
-
2016
- this.doChannelMixerSetup();
2017
- }
2018
-
2019
- const requiredSamples = audioSample.numberOfFrames * audioSample.numberOfChannels;
2020
- this.ensureTempBufferSize(requiredSamples);
2021
-
2022
- // Copy the audio data to the temp buffer
2023
- const sourceDataSize = audioSample.allocationSize({ planeIndex: 0, format: 'f32' });
2024
- const sourceView = new Float32Array(this.tempSourceBuffer.buffer, 0, sourceDataSize / 4);
2025
- audioSample.copyTo(sourceView, { planeIndex: 0, format: 'f32' });
2026
-
2027
- const inputStartTime = audioSample.timestamp - this.startTime;
2028
- const inputDuration = audioSample.numberOfFrames / this.sourceSampleRate;
2029
- const inputEndTime = Math.min(inputStartTime + inputDuration, this.endTime - this.startTime);
2030
-
2031
- // Compute which output frames are affected by this sample
2032
- const outputStartFrame = Math.floor(inputStartTime * this.targetSampleRate);
2033
- const outputEndFrame = Math.ceil(inputEndTime * this.targetSampleRate);
2034
-
2035
- for (let outputFrame = outputStartFrame; outputFrame < outputEndFrame; outputFrame++) {
2036
- if (outputFrame < this.bufferStartFrame) {
2037
- continue; // Skip writes to the past
2038
- }
2039
-
2040
- while (outputFrame >= this.bufferStartFrame + this.bufferSizeInFrames) {
2041
- // The write is after the current buffer, so finalize it
2042
- await this.finalizeCurrentBuffer();
2043
- this.bufferStartFrame += this.bufferSizeInFrames;
2044
- }
2045
-
2046
- const bufferFrameIndex = outputFrame - this.bufferStartFrame;
2047
- assert(bufferFrameIndex < this.bufferSizeInFrames);
2048
-
2049
- const outputTime = outputFrame / this.targetSampleRate;
2050
- const inputTime = outputTime - inputStartTime;
2051
- const sourcePosition = inputTime * this.sourceSampleRate;
2052
-
2053
- const sourceLowerFrame = Math.floor(sourcePosition);
2054
- const sourceUpperFrame = Math.ceil(sourcePosition);
2055
- const fraction = sourcePosition - sourceLowerFrame;
2056
-
2057
- // Process each output channel
2058
- for (let targetChannel = 0; targetChannel < this.targetNumberOfChannels; targetChannel++) {
2059
- let lowerSample = 0;
2060
- let upperSample = 0;
2061
-
2062
- if (sourceLowerFrame >= 0 && sourceLowerFrame < audioSample.numberOfFrames) {
2063
- lowerSample = this.channelMixer(sourceView, sourceLowerFrame, targetChannel);
2064
- }
2065
-
2066
- if (sourceUpperFrame >= 0 && sourceUpperFrame < audioSample.numberOfFrames) {
2067
- upperSample = this.channelMixer(sourceView, sourceUpperFrame, targetChannel);
2068
- }
2069
-
2070
- // For resampling, we do naive linear interpolation to find the in-between sample. This produces
2071
- // suboptimal results especially for downsampling (for which a low-pass filter would first need to be
2072
- // applied), but AudioContext doesn't do this either, so, whatever, for now.
2073
- const outputSample = lowerSample + fraction * (upperSample - lowerSample);
2074
-
2075
- // Write to output buffer (interleaved)
2076
- const outputIndex = bufferFrameIndex * this.targetNumberOfChannels + targetChannel;
2077
- this.outputBuffer[outputIndex]! += outputSample; // Add in case of overlapping samples
2078
- }
2079
-
2080
- this.maxWrittenFrame = Math.max(this.maxWrittenFrame, bufferFrameIndex);
2081
- }
2082
- }
2083
-
2084
- async finalizeCurrentBuffer() {
2085
- if (this.maxWrittenFrame < 0) {
2086
- return; // Nothing to finalize
2087
- }
2088
-
2089
- const samplesWritten = (this.maxWrittenFrame + 1) * this.targetNumberOfChannels;
2090
-
2091
- const outputData = new Float32Array(samplesWritten);
2092
- outputData.set(this.outputBuffer.subarray(0, samplesWritten));
2093
-
2094
- const timestampSeconds = this.bufferStartFrame / this.targetSampleRate;
2095
- const audioSample = new AudioSample({
2096
- format: 'f32',
2097
- sampleRate: this.targetSampleRate,
2098
- numberOfChannels: this.targetNumberOfChannels,
2099
- timestamp: timestampSeconds,
2100
- data: outputData,
2101
- });
2102
-
2103
- await this.onSample(audioSample);
2104
-
2105
- this.outputBuffer.fill(0);
2106
- this.maxWrittenFrame = -1;
2107
- }
2108
-
2109
- finalize() {
2110
- return this.finalizeCurrentBuffer();
2111
- }
2112
- }