avbridge 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/CHANGELOG.md +73 -0
  2. package/dist/{chunk-6UUT4BEA.cjs → chunk-2IJ66NTD.cjs} +13 -20
  3. package/dist/chunk-2IJ66NTD.cjs.map +1 -0
  4. package/dist/{chunk-XKPSTC34.cjs → chunk-2XW2O3YI.cjs} +5 -20
  5. package/dist/chunk-2XW2O3YI.cjs.map +1 -0
  6. package/dist/chunk-5KVLE6YI.js +167 -0
  7. package/dist/chunk-5KVLE6YI.js.map +1 -0
  8. package/dist/{chunk-2PGRFCWB.js → chunk-CPJLFFCC.js} +8 -18
  9. package/dist/chunk-CPJLFFCC.js.map +1 -0
  10. package/dist/chunk-CPZ7PXAM.cjs +240 -0
  11. package/dist/chunk-CPZ7PXAM.cjs.map +1 -0
  12. package/dist/{chunk-QQXBPW72.js → chunk-E76AMWI4.js} +4 -18
  13. package/dist/chunk-E76AMWI4.js.map +1 -0
  14. package/dist/{chunk-NV7ILLWH.js → chunk-KY2GPCT7.js} +347 -665
  15. package/dist/chunk-KY2GPCT7.js.map +1 -0
  16. package/dist/chunk-LUFA47FP.js +19 -0
  17. package/dist/chunk-LUFA47FP.js.map +1 -0
  18. package/dist/chunk-Q2VUO52Z.cjs +374 -0
  19. package/dist/chunk-Q2VUO52Z.cjs.map +1 -0
  20. package/dist/chunk-QDJLQR53.cjs +22 -0
  21. package/dist/chunk-QDJLQR53.cjs.map +1 -0
  22. package/dist/chunk-S4WAZC2T.cjs +173 -0
  23. package/dist/chunk-S4WAZC2T.cjs.map +1 -0
  24. package/dist/chunk-SMH6IOP2.js +368 -0
  25. package/dist/chunk-SMH6IOP2.js.map +1 -0
  26. package/dist/chunk-SR3MPV4D.js +237 -0
  27. package/dist/chunk-SR3MPV4D.js.map +1 -0
  28. package/dist/{chunk-7RGG6ME7.cjs → chunk-TBW26OPP.cjs} +365 -688
  29. package/dist/chunk-TBW26OPP.cjs.map +1 -0
  30. package/dist/chunk-X2K3GIWE.js +235 -0
  31. package/dist/chunk-X2K3GIWE.js.map +1 -0
  32. package/dist/chunk-ZCUXHW55.cjs +242 -0
  33. package/dist/chunk-ZCUXHW55.cjs.map +1 -0
  34. package/dist/element-browser.js +799 -493
  35. package/dist/element-browser.js.map +1 -1
  36. package/dist/element.cjs +58 -4
  37. package/dist/element.cjs.map +1 -1
  38. package/dist/element.d.cts +38 -0
  39. package/dist/element.d.ts +38 -0
  40. package/dist/element.js +57 -3
  41. package/dist/element.js.map +1 -1
  42. package/dist/index.cjs +523 -393
  43. package/dist/index.cjs.map +1 -1
  44. package/dist/index.js +494 -366
  45. package/dist/index.js.map +1 -1
  46. package/dist/libav-demux-H2GS46GH.cjs +27 -0
  47. package/dist/libav-demux-H2GS46GH.cjs.map +1 -0
  48. package/dist/libav-demux-OWZ4T2YW.js +6 -0
  49. package/dist/libav-demux-OWZ4T2YW.js.map +1 -0
  50. package/dist/{libav-import-GST2AMPL.cjs → libav-import-2ZVKV2E7.cjs} +2 -2
  51. package/dist/{libav-import-GST2AMPL.cjs.map → libav-import-2ZVKV2E7.cjs.map} +1 -1
  52. package/dist/{libav-import-2JURFHEW.js → libav-import-6MGLCXVQ.js} +2 -2
  53. package/dist/{libav-import-2JURFHEW.js.map → libav-import-6MGLCXVQ.js.map} +1 -1
  54. package/dist/player.cjs +601 -470
  55. package/dist/player.cjs.map +1 -1
  56. package/dist/player.d.cts +50 -0
  57. package/dist/player.d.ts +50 -0
  58. package/dist/player.js +580 -449
  59. package/dist/player.js.map +1 -1
  60. package/dist/remux-OBSMIENG.cjs +35 -0
  61. package/dist/remux-OBSMIENG.cjs.map +1 -0
  62. package/dist/remux-WBYIZBBX.js +10 -0
  63. package/dist/remux-WBYIZBBX.js.map +1 -0
  64. package/dist/source-4TZ6KMNV.js +4 -0
  65. package/dist/{source-F656KYYV.js.map → source-4TZ6KMNV.js.map} +1 -1
  66. package/dist/source-7YLO6E7X.cjs +29 -0
  67. package/dist/{source-73CAH6HW.cjs.map → source-7YLO6E7X.cjs.map} +1 -1
  68. package/dist/source-MTX5ELUZ.js +4 -0
  69. package/dist/{source-QJR3OHTW.js.map → source-MTX5ELUZ.js.map} +1 -1
  70. package/dist/source-VFLXLOCN.cjs +29 -0
  71. package/dist/{source-VB74JQ7Z.cjs.map → source-VFLXLOCN.cjs.map} +1 -1
  72. package/dist/subtitles-4T74JRGT.js +4 -0
  73. package/dist/subtitles-4T74JRGT.js.map +1 -0
  74. package/dist/subtitles-QUH4LPI4.cjs +29 -0
  75. package/dist/subtitles-QUH4LPI4.cjs.map +1 -0
  76. package/package.json +1 -1
  77. package/src/convert/remux.ts +1 -35
  78. package/src/convert/transcode-libav.ts +691 -0
  79. package/src/convert/transcode.ts +12 -4
  80. package/src/element/avbridge-player.ts +16 -0
  81. package/src/element/avbridge-video.ts +54 -0
  82. package/src/errors.ts +6 -0
  83. package/src/player.ts +15 -16
  84. package/src/strategies/fallback/decoder.ts +96 -173
  85. package/src/strategies/fallback/index.ts +19 -2
  86. package/src/strategies/fallback/libav-import.ts +9 -1
  87. package/src/strategies/fallback/video-renderer.ts +107 -0
  88. package/src/strategies/hybrid/decoder.ts +88 -180
  89. package/src/strategies/hybrid/index.ts +17 -2
  90. package/src/strategies/native.ts +6 -3
  91. package/src/strategies/remux/index.ts +14 -2
  92. package/src/strategies/remux/pipeline.ts +72 -12
  93. package/src/subtitles/render.ts +8 -0
  94. package/src/util/libav-demux.ts +405 -0
  95. package/dist/chunk-2PGRFCWB.js.map +0 -1
  96. package/dist/chunk-6UUT4BEA.cjs.map +0 -1
  97. package/dist/chunk-7RGG6ME7.cjs.map +0 -1
  98. package/dist/chunk-NV7ILLWH.js.map +0 -1
  99. package/dist/chunk-QQXBPW72.js.map +0 -1
  100. package/dist/chunk-XKPSTC34.cjs.map +0 -1
  101. package/dist/source-73CAH6HW.cjs +0 -28
  102. package/dist/source-F656KYYV.js +0 -3
  103. package/dist/source-QJR3OHTW.js +0 -3
  104. package/dist/source-VB74JQ7Z.cjs +0 -28
@@ -1,4 +1,5 @@
1
1
  import type { ClockSource } from "./audio-output.js";
2
+ import { SubtitleOverlay } from "../../subtitles/render.js";
2
3
 
3
4
  /**
4
5
  * Renders decoded `VideoFrame`s into a 2D canvas overlaid on the user's
@@ -47,6 +48,16 @@ export class VideoRenderer {
47
48
  /** Cumulative count of ticks where PTS mode painted a frame. */
48
49
  private ticksPainted = 0;
49
50
 
51
+ /**
52
+ * Subtitle overlay div attached to the stage wrapper alongside the
53
+ * canvas. Created lazily when subtitle tracks are attached via the
54
+ * target's `<track>` children. Canvas strategies (hybrid, fallback)
55
+ * hide the <video>, so we can't rely on the browser's native cue
56
+ * rendering; we read TextTrack.cues and render into this overlay.
57
+ */
58
+ private subtitleOverlay: SubtitleOverlay | null = null;
59
+ private subtitleTrack: TextTrack | null = null;
60
+
50
61
  /**
51
62
  * Calibration offset (microseconds) between video PTS and audio clock.
52
63
  * Video PTS and AudioContext.currentTime can drift ~0.1% relative to
@@ -111,6 +122,15 @@ export class VideoRenderer {
111
122
  }
112
123
  target.style.visibility = "hidden";
113
124
 
125
+ // Create a subtitle overlay on the same parent as the canvas so cues
126
+ // appear over the rendered video. Shows nothing until a TextTrack
127
+ // gets attached via attachSubtitleTracks.
128
+ const overlayParent = parent instanceof HTMLElement ? parent : document.body;
129
+ this.subtitleOverlay = new SubtitleOverlay(overlayParent);
130
+ // Watch for <track> children on the target <video>. When one is
131
+ // added, grab its TextTrack and poll cues from it each tick.
132
+ this.watchTextTracks(target);
133
+
114
134
  const ctx = this.canvas.getContext("2d");
115
135
  if (!ctx) throw new Error("video renderer: failed to acquire 2D context");
116
136
  this.ctx = ctx;
@@ -156,10 +176,95 @@ export class VideoRenderer {
156
176
  }
157
177
  }
158
178
 
179
+ /**
180
+ * Watch the target <video>'s textTracks list. When a track is added,
181
+ * grab it and start polling cues on each render tick. Existing tracks
182
+ * (if any) are picked up immediately.
183
+ */
184
+ private watchTextTracks(target: HTMLVideoElement): void {
185
+ const pick = () => {
186
+ if (this.subtitleTrack) return;
187
+ const tracks = target.textTracks;
188
+ if (isDebug()) {
189
+ // eslint-disable-next-line no-console
190
+ console.log(`[avbridge:subs] watchTextTracks pick() — ${tracks.length} tracks`);
191
+ }
192
+ for (let i = 0; i < tracks.length; i++) {
193
+ const t = tracks[i];
194
+ if (isDebug()) {
195
+ // eslint-disable-next-line no-console
196
+ console.log(`[avbridge:subs] track ${i}: kind=${t.kind} mode=${t.mode} cues=${t.cues?.length ?? 0}`);
197
+ }
198
+ if (t.kind === "subtitles" || t.kind === "captions") {
199
+ this.subtitleTrack = t;
200
+ t.mode = "hidden"; // hidden means "cues available via API, don't render"
201
+ if (isDebug()) {
202
+ // eslint-disable-next-line no-console
203
+ console.log(`[avbridge:subs] picked track, mode=hidden`);
204
+ }
205
+ // Listen for cue load completion
206
+ const trackEl = target.querySelector(`track[srclang="${t.language}"]`) as HTMLTrackElement | null;
207
+ if (trackEl) {
208
+ trackEl.addEventListener("load", () => {
209
+ if (isDebug()) {
210
+ // eslint-disable-next-line no-console
211
+ console.log(`[avbridge:subs] track element loaded, cues=${t.cues?.length ?? 0}`);
212
+ }
213
+ });
214
+ trackEl.addEventListener("error", (ev) => {
215
+ // eslint-disable-next-line no-console
216
+ console.warn(`[avbridge:subs] track element error:`, ev);
217
+ });
218
+ }
219
+ break;
220
+ }
221
+ }
222
+ };
223
+ pick();
224
+ if (typeof target.textTracks.addEventListener === "function") {
225
+ target.textTracks.addEventListener("addtrack", (e) => {
226
+ if (isDebug()) {
227
+ // eslint-disable-next-line no-console
228
+ console.log("[avbridge:subs] addtrack event fired");
229
+ }
230
+ void e;
231
+ pick();
232
+ });
233
+ }
234
+ }
235
+
236
+ private _loggedCues = false;
237
+
238
+ /** Find the active cue (if any) for the given media time. */
239
+ private updateSubtitles(): void {
240
+ if (!this.subtitleOverlay || !this.subtitleTrack) return;
241
+ const cues = this.subtitleTrack.cues;
242
+ if (!cues || cues.length === 0) return;
243
+ if (isDebug() && !this._loggedCues) {
244
+ this._loggedCues = true;
245
+ // eslint-disable-next-line no-console
246
+ console.log(`[avbridge:subs] cues available: ${cues.length}, first start=${cues[0].startTime}, last end=${cues[cues.length-1].endTime}`);
247
+ }
248
+ const t = this.clock.now();
249
+ let activeText = "";
250
+ for (let i = 0; i < cues.length; i++) {
251
+ const c = cues[i];
252
+ if (t >= c.startTime && t <= c.endTime) {
253
+ const vttCue = c as VTTCue & { text?: string };
254
+ activeText = vttCue.text ?? "";
255
+ break;
256
+ }
257
+ }
258
+ // Strip VTT tags for plain rendering (e.g. <c.en> voice tags)
259
+ this.subtitleOverlay.setText(activeText.replace(/<[^>]+>/g, ""));
260
+ }
261
+
159
262
  private tick(): void {
160
263
  if (this.destroyed) return;
161
264
  this.rafHandle = requestAnimationFrame(this.tick);
162
265
 
266
+ this.updateSubtitles();
267
+
163
268
  if (this.queue.length === 0) return;
164
269
 
165
270
  const playing = this.clock.isPlaying();
@@ -331,6 +436,8 @@ export class VideoRenderer {
331
436
  this.destroyed = true;
332
437
  if (this.rafHandle != null) cancelAnimationFrame(this.rafHandle);
333
438
  this.flush();
439
+ if (this.subtitleOverlay) { this.subtitleOverlay.destroy(); this.subtitleOverlay = null; }
440
+ this.subtitleTrack = null;
334
441
  this.canvas.remove();
335
442
  this.target.style.visibility = "";
336
443
  }
@@ -20,10 +20,17 @@ import { AudioOutput } from "../fallback/audio-output.js";
20
20
  import type { MediaContext } from "../../types.js";
21
21
  import { dbg } from "../../util/debug.js";
22
22
  import { pickLibavVariant } from "../fallback/variant-routing.js";
23
+ import {
24
+ sanitizePacketTimestamp,
25
+ sanitizeFrameTimestamp,
26
+ libavFrameToInterleavedFloat32,
27
+ } from "../../util/libav-demux.js";
23
28
 
24
29
  export interface HybridDecoderHandles {
25
30
  destroy(): Promise<void>;
26
31
  seek(timeSec: number): Promise<void>;
32
+ /** Swap the active audio track — rebuilds the libav audio decoder + reseeks. */
33
+ setAudioTrack(trackId: number, timeSec: number): Promise<void>;
27
34
  stats(): Record<string, unknown>;
28
35
  onFatalError(handler: (reason: string) => void): void;
29
36
  }
@@ -52,7 +59,14 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
52
59
  const readPkt = await libav.av_packet_alloc();
53
60
  const [fmt_ctx, streams] = await libav.ff_init_demuxer_file(opts.filename);
54
61
  const videoStream = streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_VIDEO) ?? null;
55
- const audioStream = streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO) ?? null;
62
+ // Audio stream is mutable (setAudioTrack swaps it). Prefer the id the
63
+ // probe layer listed first so both entry points agree.
64
+ const firstAudioTrackId = opts.context.audioTracks[0]?.id;
65
+ let audioStream: LibavStream | null =
66
+ (firstAudioTrackId != null
67
+ ? streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === firstAudioTrackId)
68
+ : undefined) ??
69
+ streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO) ?? null;
56
70
 
57
71
  if (!videoStream && !audioStream) {
58
72
  throw new Error("hybrid decoder: file has no decodable streams");
@@ -388,6 +402,79 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
388
402
  try { await inputHandle.detach(); } catch { /* ignore */ }
389
403
  },
390
404
 
405
+ async setAudioTrack(trackId, timeSec) {
406
+ if (audioStream && audioStream.index === trackId) return;
407
+ const newStream = streams.find(
408
+ (s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === trackId,
409
+ );
410
+ if (!newStream) {
411
+ console.warn("[avbridge] hybrid: setAudioTrack — no stream with id", trackId);
412
+ return;
413
+ }
414
+
415
+ const newToken = ++pumpToken;
416
+ if (pumpRunning) {
417
+ try { await pumpRunning; } catch { /* ignore */ }
418
+ }
419
+ if (destroyed) return;
420
+
421
+ // Tear down old audio decoder, build new one.
422
+ if (audioDec) {
423
+ try { await libav.ff_free_decoder?.(audioDec.c, audioDec.pkt, audioDec.frame); } catch { /* ignore */ }
424
+ audioDec = null;
425
+ }
426
+ try {
427
+ const [, c, pkt, frame] = await libav.ff_init_decoder(newStream.codec_id, {
428
+ codecpar: newStream.codecpar,
429
+ });
430
+ audioDec = { c, pkt, frame };
431
+ audioTimeBase = newStream.time_base_num && newStream.time_base_den
432
+ ? [newStream.time_base_num, newStream.time_base_den]
433
+ : undefined;
434
+ } catch (err) {
435
+ console.warn(
436
+ "[avbridge] hybrid: setAudioTrack init failed — switching to no-audio:",
437
+ (err as Error).message,
438
+ );
439
+ audioDec = null;
440
+ opts.audio.setNoAudio();
441
+ }
442
+
443
+ audioStream = newStream;
444
+
445
+ // Re-seek demuxer to current time for the new track.
446
+ try {
447
+ const tsUs = Math.floor(timeSec * 1_000_000);
448
+ const [tsLo, tsHi] = libav.f64toi64
449
+ ? libav.f64toi64(tsUs)
450
+ : [tsUs | 0, Math.floor(tsUs / 0x100000000)];
451
+ await libav.av_seek_frame(
452
+ fmt_ctx,
453
+ -1,
454
+ tsLo,
455
+ tsHi,
456
+ libav.AVSEEK_FLAG_BACKWARD ?? 0,
457
+ );
458
+ } catch (err) {
459
+ console.warn("[avbridge] hybrid: setAudioTrack seek failed:", err);
460
+ }
461
+
462
+ // Flush video decoder too — demuxer moved back to a keyframe.
463
+ try {
464
+ if (videoDecoder && videoDecoder.state === "configured") {
465
+ await videoDecoder.flush();
466
+ }
467
+ } catch { /* ignore */ }
468
+ await flushBSF();
469
+
470
+ syntheticVideoUs = Math.round(timeSec * 1_000_000);
471
+ syntheticAudioUs = Math.round(timeSec * 1_000_000);
472
+
473
+ pumpRunning = pumpLoop(newToken).catch((err) =>
474
+ console.error("[avbridge] hybrid pump failed (post-setAudioTrack):", err),
475
+ );
476
+ },
477
+
391
478
  async seek(timeSec) {
392
479
  const newToken = ++pumpToken;
393
480
  if (pumpRunning) {
@@ -459,185 +546,6 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
459
546
  // time_base to avoid overflow.
460
547
  // ─────────────────────────────────────────────────────────────────────────────
461
548
 
462
- function sanitizePacketTimestamp(
463
- pkt: LibavPacket,
464
- nextUs: () => number,
465
- fallbackTimeBase?: [number, number],
466
- ): void {
467
- const lo = pkt.pts ?? 0;
468
- const hi = pkt.ptshi ?? 0;
469
- const isInvalid = (hi === -2147483648 && lo === 0) || !Number.isFinite(lo);
470
- if (isInvalid) {
471
- const us = nextUs();
472
- pkt.pts = us;
473
- pkt.ptshi = 0;
474
- pkt.time_base_num = 1;
475
- pkt.time_base_den = 1_000_000;
476
- return;
477
- }
478
- const tb = fallbackTimeBase ?? [1, 1_000_000];
479
- const pts64 = hi * 0x100000000 + lo;
480
- const us = Math.round((pts64 * 1_000_000 * tb[0]) / tb[1]);
481
- if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
482
- pkt.pts = us;
483
- pkt.ptshi = us < 0 ? -1 : 0;
484
- pkt.time_base_num = 1;
485
- pkt.time_base_den = 1_000_000;
486
- return;
487
- }
488
- const fallback = nextUs();
489
- pkt.pts = fallback;
490
- pkt.ptshi = 0;
491
- pkt.time_base_num = 1;
492
- pkt.time_base_den = 1_000_000;
493
- }
494
-
495
- // Frame timestamp sanitizer (same as fallback/decoder.ts, for audio frames)
496
- function sanitizeFrameTimestamp(
497
- frame: LibavFrame,
498
- nextUs: () => number,
499
- fallbackTimeBase?: [number, number],
500
- ): void {
501
- const lo = frame.pts ?? 0;
502
- const hi = frame.ptshi ?? 0;
503
- const isInvalid = (hi === -2147483648 && lo === 0) || !Number.isFinite(lo);
504
- if (isInvalid) {
505
- const us = nextUs();
506
- frame.pts = us;
507
- frame.ptshi = 0;
508
- return;
509
- }
510
- const tb = fallbackTimeBase ?? [1, 1_000_000];
511
- const pts64 = hi * 0x100000000 + lo;
512
- const us = Math.round((pts64 * 1_000_000 * tb[0]) / tb[1]);
513
- if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
514
- frame.pts = us;
515
- frame.ptshi = us < 0 ? -1 : 0;
516
- return;
517
- }
518
- const fallback = nextUs();
519
- frame.pts = fallback;
520
- frame.ptshi = 0;
521
- }
522
-
523
- // ─────────────────────────────────────────────────────────────────────────────
524
- // Audio frame → interleaved Float32 (duplicated from fallback/decoder.ts)
525
- // ─────────────────────────────────────────────────────────────────────────────
526
-
527
- const AV_SAMPLE_FMT_U8 = 0;
528
- const AV_SAMPLE_FMT_S16 = 1;
529
- const AV_SAMPLE_FMT_S32 = 2;
530
- const AV_SAMPLE_FMT_FLT = 3;
531
- const AV_SAMPLE_FMT_U8P = 5;
532
- const AV_SAMPLE_FMT_S16P = 6;
533
- const AV_SAMPLE_FMT_S32P = 7;
534
- const AV_SAMPLE_FMT_FLTP = 8;
535
-
536
- interface InterleavedSamples {
537
- data: Float32Array;
538
- channels: number;
539
- sampleRate: number;
540
- }
541
-
542
- function libavFrameToInterleavedFloat32(frame: LibavFrame): InterleavedSamples | null {
543
- const channels = frame.channels ?? frame.ch_layout_nb_channels ?? 1;
544
- const sampleRate = frame.sample_rate ?? 44100;
545
- const nbSamples = frame.nb_samples ?? 0;
546
- if (nbSamples === 0) return null;
547
-
548
- const out = new Float32Array(nbSamples * channels);
549
-
550
- switch (frame.format) {
551
- case AV_SAMPLE_FMT_FLTP: {
552
- const planes = ensurePlanes(frame.data, channels);
553
- for (let ch = 0; ch < channels; ch++) {
554
- const plane = asFloat32(planes[ch]);
555
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i];
556
- }
557
- return { data: out, channels, sampleRate };
558
- }
559
- case AV_SAMPLE_FMT_FLT: {
560
- const flat = asFloat32(frame.data);
561
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i];
562
- return { data: out, channels, sampleRate };
563
- }
564
- case AV_SAMPLE_FMT_S16P: {
565
- const planes = ensurePlanes(frame.data, channels);
566
- for (let ch = 0; ch < channels; ch++) {
567
- const plane = asInt16(planes[ch]);
568
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 32768;
569
- }
570
- return { data: out, channels, sampleRate };
571
- }
572
- case AV_SAMPLE_FMT_S16: {
573
- const flat = asInt16(frame.data);
574
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 32768;
575
- return { data: out, channels, sampleRate };
576
- }
577
- case AV_SAMPLE_FMT_S32P: {
578
- const planes = ensurePlanes(frame.data, channels);
579
- for (let ch = 0; ch < channels; ch++) {
580
- const plane = asInt32(planes[ch]);
581
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 2147483648;
582
- }
583
- return { data: out, channels, sampleRate };
584
- }
585
- case AV_SAMPLE_FMT_S32: {
586
- const flat = asInt32(frame.data);
587
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 2147483648;
588
- return { data: out, channels, sampleRate };
589
- }
590
- case AV_SAMPLE_FMT_U8P: {
591
- const planes = ensurePlanes(frame.data, channels);
592
- for (let ch = 0; ch < channels; ch++) {
593
- const plane = asUint8(planes[ch]);
594
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = (plane[i] - 128) / 128;
595
- }
596
- return { data: out, channels, sampleRate };
597
- }
598
- case AV_SAMPLE_FMT_U8: {
599
- const flat = asUint8(frame.data);
600
- for (let i = 0; i < nbSamples * channels; i++) out[i] = (flat[i] - 128) / 128;
601
- return { data: out, channels, sampleRate };
602
- }
603
- default:
604
- return null;
605
- }
606
- }
607
-
608
- function ensurePlanes(data: unknown, channels: number): unknown[] {
609
- if (Array.isArray(data)) return data;
610
- const arr = data as { length: number; subarray?: (a: number, b: number) => unknown };
611
- const len = arr.length;
612
- const perChannel = Math.floor(len / channels);
613
- const planes: unknown[] = [];
614
- for (let ch = 0; ch < channels; ch++) {
615
- planes.push(arr.subarray ? arr.subarray(ch * perChannel, (ch + 1) * perChannel) : arr);
616
- }
617
- return planes;
618
- }
619
-
620
- function asFloat32(x: unknown): Float32Array {
621
- if (x instanceof Float32Array) return x;
622
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
623
- return new Float32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
624
- }
625
- function asInt16(x: unknown): Int16Array {
626
- if (x instanceof Int16Array) return x;
627
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
628
- return new Int16Array(ta.buffer, ta.byteOffset, ta.byteLength / 2);
629
- }
630
- function asInt32(x: unknown): Int32Array {
631
- if (x instanceof Int32Array) return x;
632
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
633
- return new Int32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
634
- }
635
- function asUint8(x: unknown): Uint8Array {
636
- if (x instanceof Uint8Array) return x;
637
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
638
- return new Uint8Array(ta.buffer, ta.byteOffset, ta.byteLength);
639
- }
640
-
641
549
  // ─────────────────────────────────────────────────────────────────────────────
642
550
  // Bridge loader
643
551
  // ─────────────────────────────────────────────────────────────────────────────
@@ -137,8 +137,23 @@ export async function createHybridSession(
137
137
  await doSeek(time);
138
138
  },
139
139
 
140
- async setAudioTrack(_id) {
141
- // Post-MVP for hybrid strategy
140
+ async setAudioTrack(id) {
141
+ if (!ctx.audioTracks.some((t) => t.id === id)) {
142
+ console.warn("[avbridge] hybrid: setAudioTrack — unknown track id", id);
143
+ return;
144
+ }
145
+ const wasPlaying = audio.isPlaying();
146
+ const currentTime = audio.now();
147
+ await audio.pause().catch(() => {});
148
+ await handles.setAudioTrack(id, currentTime).catch((err) =>
149
+ console.warn("[avbridge] hybrid: handles.setAudioTrack failed:", err),
150
+ );
151
+ await audio.reset(currentTime);
152
+ renderer.flush();
153
+ if (wasPlaying) {
154
+ await waitForBuffer();
155
+ await audio.start();
156
+ }
142
157
  },
143
158
 
144
159
  async setSubtitleTrack(_id) {
@@ -49,10 +49,13 @@ export async function createNativeSession(
49
49
  video.currentTime = time;
50
50
  },
51
51
  async setAudioTrack(id) {
52
- // HTMLMediaElement.audioTracks is not exposed in all browsers, so we
53
- // try-catch and no-op if not available.
52
+ // HTMLMediaElement.audioTracks isn't exposed on all browsers (Chrome
53
+ // needs the MediaCapabilities flag for many containers). Best-effort:
54
+ // try by string id match first, then by index. If the list doesn't
55
+ // exist, silently no-op — the user will still hear whatever track the
56
+ // browser picked by default.
54
57
  const tracks = (video as unknown as { audioTracks?: { length: number; [i: number]: { id: string; enabled: boolean } } }).audioTracks;
55
- if (!tracks) return;
58
+ if (!tracks || tracks.length === 0) return;
56
59
  for (let i = 0; i < tracks.length; i++) {
57
60
  tracks[i].enabled = tracks[i].id === String(id) || i === id;
58
61
  }
@@ -66,8 +66,20 @@ export async function createRemuxSession(
66
66
  const wasPlaying = !video.paused;
67
67
  await pipeline.seek(time, wasPlaying || wantPlay);
68
68
  },
69
- async setAudioTrack(_id) {
70
- // v1: single-track output. Multi-audio remuxing is post-MVP.
69
+ async setAudioTrack(id) {
70
+ if (!context.audioTracks.some((t) => t.id === id)) {
71
+ console.warn("[avbridge] remux: setAudioTrack — unknown track id", id);
72
+ return;
73
+ }
74
+ const wasPlaying = !video.paused;
75
+ const time = video.currentTime || 0;
76
+ // Not yet started? Just note the selection and let play()/seek() drive.
77
+ if (!started) {
78
+ started = true;
79
+ await pipeline.setAudioTrack(id, time, wantPlay || wasPlaying);
80
+ return;
81
+ }
82
+ await pipeline.setAudioTrack(id, time, wasPlaying || wantPlay);
71
83
  },
72
84
  async setSubtitleTrack(id) {
73
85
  const tracks = video.textTracks;
@@ -26,6 +26,11 @@ export interface RemuxPipeline {
26
26
  seek(time: number, autoPlay?: boolean): Promise<void>;
27
27
  /** Update the autoplay intent mid-flight — used when play() arrives after seek() but before the MseSink has been constructed. */
28
28
  setAutoPlay(autoPlay: boolean): void;
29
+ /**
30
+ * Switch the active audio track. Tears down the current Output, rebuilds
31
+ * with the new audio source, and resumes pumping at the given time.
32
+ */
33
+ setAudioTrack(trackId: number, timeSec: number, autoPlay: boolean): Promise<void>;
29
34
  destroy(): Promise<void>;
30
35
  stats(): Record<string, unknown>;
31
36
  }
@@ -37,7 +42,6 @@ export async function createRemuxPipeline(
37
42
  const mb = await import("mediabunny");
38
43
 
39
44
  const videoTrackInfo = ctx.videoTracks[0];
40
- const audioTrackInfo = ctx.audioTracks[0];
41
45
  if (!videoTrackInfo) throw new Error("remux: source has no video track");
42
46
 
43
47
  // Map avbridge codec names back to mediabunny's enum strings.
@@ -45,7 +49,6 @@ export async function createRemuxPipeline(
45
49
  if (!mbVideoCodec) {
46
50
  throw new Error(`remux: video codec "${videoTrackInfo.codec}" is not supported by mediabunny output`);
47
51
  }
48
- const mbAudioCodec = audioTrackInfo ? avbridgeAudioToMediabunny(audioTrackInfo.codec) : null;
49
52
 
50
53
  // Open the input. URL sources go through mediabunny's UrlSource so the
51
54
  // muxer streams via Range requests instead of buffering the whole file.
@@ -55,23 +58,52 @@ export async function createRemuxPipeline(
55
58
  });
56
59
  const allTracks = await input.getTracks();
57
60
  const inputVideo = allTracks.find((t) => t.id === videoTrackInfo.id && t.isVideoTrack());
58
- const inputAudio = audioTrackInfo
59
- ? allTracks.find((t) => t.id === audioTrackInfo.id && t.isAudioTrack())
60
- : null;
61
61
  if (!inputVideo || !inputVideo.isVideoTrack()) {
62
62
  throw new Error("remux: video track not found in input");
63
63
  }
64
- if (audioTrackInfo && (!inputAudio || !inputAudio.isAudioTrack())) {
65
- throw new Error("remux: audio track not found in input");
66
- }
67
64
 
68
- // Pull WebCodecs decoder configs once — used as `meta` on the first packet.
65
+ // Pull the video WebCodecs decoder config once — used as `meta` on the
66
+ // first packet after every Output rebuild.
69
67
  const videoConfig = await inputVideo.getDecoderConfig();
70
- const audioConfig = inputAudio && inputAudio.isAudioTrack() ? await inputAudio.getDecoderConfig() : null;
71
68
 
72
- // Packet sinks (input side) — reused across seeks.
69
+ // Packet sink for video — reused across seeks.
73
70
  const videoSink = new mb.EncodedPacketSink(inputVideo);
74
- const audioSink = inputAudio?.isAudioTrack() ? new mb.EncodedPacketSink(inputAudio) : null;
71
+
72
+ // Audio selection is mutable: setAudioTrack() can swap it. The selected
73
+ // audio derived state (input track, codec, sink, config) is rebuilt via
74
+ // rebuildAudio() whenever the id changes.
75
+ type InputAudioTrack = InstanceType<typeof mb.InputAudioTrack>;
76
+ type AudioDecCfg = Awaited<ReturnType<InputAudioTrack["getDecoderConfig"]>>;
77
+
78
+ let selectedAudioTrackId: number | null = ctx.audioTracks[0]?.id ?? null;
79
+ let inputAudio: InputAudioTrack | null = null;
80
+ let mbAudioCodec: ReturnType<typeof avbridgeAudioToMediabunny> | null = null;
81
+ let audioSink: InstanceType<typeof mb.EncodedPacketSink> | null = null;
82
+ let audioConfig: AudioDecCfg | null = null;
83
+
84
+ async function rebuildAudio(): Promise<void> {
85
+ if (selectedAudioTrackId == null) {
86
+ inputAudio = null;
87
+ mbAudioCodec = null;
88
+ audioSink = null;
89
+ audioConfig = null;
90
+ return;
91
+ }
92
+ const trackInfo = ctx.audioTracks.find((t) => t.id === selectedAudioTrackId);
93
+ if (!trackInfo) {
94
+ throw new Error(`remux: no audio track with id ${selectedAudioTrackId}`);
95
+ }
96
+ const newInput = allTracks.find((t) => t.id === trackInfo.id && t.isAudioTrack());
97
+ if (!newInput || !newInput.isAudioTrack()) {
98
+ throw new Error("remux: audio track not found in input");
99
+ }
100
+ inputAudio = newInput;
101
+ mbAudioCodec = avbridgeAudioToMediabunny(trackInfo.codec);
102
+ audioSink = new mb.EncodedPacketSink(newInput);
103
+ audioConfig = await newInput.getDecoderConfig();
104
+ }
105
+
106
+ await rebuildAudio();
75
107
 
76
108
  // MSE sink — created lazily on first output write, reused across seeks.
77
109
  let sink: MseSink | null = null;
@@ -254,6 +286,34 @@ export async function createRemuxPipeline(
254
286
  pendingAutoPlay = autoPlay;
255
287
  if (sink) sink.setPlayOnSeek(autoPlay);
256
288
  },
289
+ async setAudioTrack(trackId, time, autoPlay) {
290
+ if (selectedAudioTrackId === trackId) return;
291
+ if (!ctx.audioTracks.some((t) => t.id === trackId)) {
292
+ console.warn("[avbridge] remux: setAudioTrack — unknown track id", trackId);
293
+ return;
294
+ }
295
+ // Stop the current pump. The next pumpLoop() will build a fresh
296
+ // Output that uses the newly-selected audio source.
297
+ pumpToken++;
298
+ selectedAudioTrackId = trackId;
299
+ await rebuildAudio().catch((err) => {
300
+ console.warn("[avbridge] remux: rebuildAudio failed:", (err as Error).message);
301
+ });
302
+ // Tear down the existing MseSink — the audio codec may have changed,
303
+ // and the SourceBuffer's mime is fixed at construction time. The next
304
+ // createOutput will recompute `getMimeType()` and the write handler
305
+ // will lazily build a new sink.
306
+ if (sink) {
307
+ try { sink.destroy(); } catch { /* ignore */ }
308
+ sink = null;
309
+ }
310
+ pendingAutoPlay = autoPlay;
311
+ pendingStartTime = time;
312
+ pumpLoop(++pumpToken, time).catch((err) => {
313
+ // eslint-disable-next-line no-console
314
+ console.error("[avbridge] remux pipeline setAudioTrack pump failed:", err);
315
+ });
316
+ },
257
317
  async destroy() {
258
318
  destroyed = true;
259
319
  pumpToken++;
@@ -32,6 +32,14 @@ export class SubtitleOverlay {
32
32
  this.el.textContent = active?.text ?? "";
33
33
  }
34
34
 
35
+ /** Set the currently-displayed text directly (bypasses loadVtt/update). */
36
+ setText(text: string): void {
37
+ // Only touch the DOM if it actually changed — rAF tick runs 60Hz.
38
+ if (this.el.textContent !== text) {
39
+ this.el.textContent = text;
40
+ }
41
+ }
42
+
35
43
  destroy(): void {
36
44
  this.el.remove();
37
45
  this.cues = [];