avbridge 2.3.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/CHANGELOG.md +114 -0
  2. package/dist/{chunk-6UUT4BEA.cjs → chunk-2IJ66NTD.cjs} +13 -20
  3. package/dist/chunk-2IJ66NTD.cjs.map +1 -0
  4. package/dist/{chunk-XKPSTC34.cjs → chunk-2XW2O3YI.cjs} +5 -20
  5. package/dist/chunk-2XW2O3YI.cjs.map +1 -0
  6. package/dist/chunk-5KVLE6YI.js +167 -0
  7. package/dist/chunk-5KVLE6YI.js.map +1 -0
  8. package/dist/{chunk-7RGG6ME7.cjs → chunk-6SOFJV44.cjs} +422 -688
  9. package/dist/chunk-6SOFJV44.cjs.map +1 -0
  10. package/dist/{chunk-2PGRFCWB.js → chunk-CPJLFFCC.js} +8 -18
  11. package/dist/chunk-CPJLFFCC.js.map +1 -0
  12. package/dist/chunk-CPZ7PXAM.cjs +240 -0
  13. package/dist/chunk-CPZ7PXAM.cjs.map +1 -0
  14. package/dist/{chunk-QQXBPW72.js → chunk-E76AMWI4.js} +4 -18
  15. package/dist/chunk-E76AMWI4.js.map +1 -0
  16. package/dist/chunk-LUFA47FP.js +19 -0
  17. package/dist/chunk-LUFA47FP.js.map +1 -0
  18. package/dist/{chunk-NV7ILLWH.js → chunk-OGYHFY6K.js} +404 -665
  19. package/dist/chunk-OGYHFY6K.js.map +1 -0
  20. package/dist/chunk-Q2VUO52Z.cjs +374 -0
  21. package/dist/chunk-Q2VUO52Z.cjs.map +1 -0
  22. package/dist/chunk-QDJLQR53.cjs +22 -0
  23. package/dist/chunk-QDJLQR53.cjs.map +1 -0
  24. package/dist/chunk-S4WAZC2T.cjs +173 -0
  25. package/dist/chunk-S4WAZC2T.cjs.map +1 -0
  26. package/dist/chunk-SMH6IOP2.js +368 -0
  27. package/dist/chunk-SMH6IOP2.js.map +1 -0
  28. package/dist/chunk-SR3MPV4D.js +237 -0
  29. package/dist/chunk-SR3MPV4D.js.map +1 -0
  30. package/dist/chunk-X2K3GIWE.js +235 -0
  31. package/dist/chunk-X2K3GIWE.js.map +1 -0
  32. package/dist/chunk-ZCUXHW55.cjs +242 -0
  33. package/dist/chunk-ZCUXHW55.cjs.map +1 -0
  34. package/dist/element-browser.js +883 -492
  35. package/dist/element-browser.js.map +1 -1
  36. package/dist/element.cjs +88 -6
  37. package/dist/element.cjs.map +1 -1
  38. package/dist/element.d.cts +51 -1
  39. package/dist/element.d.ts +51 -1
  40. package/dist/element.js +87 -5
  41. package/dist/element.js.map +1 -1
  42. package/dist/index.cjs +523 -393
  43. package/dist/index.cjs.map +1 -1
  44. package/dist/index.d.cts +2 -2
  45. package/dist/index.d.ts +2 -2
  46. package/dist/index.js +494 -366
  47. package/dist/index.js.map +1 -1
  48. package/dist/libav-demux-H2GS46GH.cjs +27 -0
  49. package/dist/libav-demux-H2GS46GH.cjs.map +1 -0
  50. package/dist/libav-demux-OWZ4T2YW.js +6 -0
  51. package/dist/libav-demux-OWZ4T2YW.js.map +1 -0
  52. package/dist/{libav-import-GST2AMPL.cjs → libav-import-2ZVKV2E7.cjs} +2 -2
  53. package/dist/{libav-import-GST2AMPL.cjs.map → libav-import-2ZVKV2E7.cjs.map} +1 -1
  54. package/dist/{libav-import-2JURFHEW.js → libav-import-6MGLCXVQ.js} +2 -2
  55. package/dist/{libav-import-2JURFHEW.js.map → libav-import-6MGLCXVQ.js.map} +1 -1
  56. package/dist/{player-B6WB74RD.d.ts → player-DGXeCNfD.d.cts} +41 -1
  57. package/dist/{player-B6WB74RD.d.cts → player-DGXeCNfD.d.ts} +41 -1
  58. package/dist/player.cjs +731 -472
  59. package/dist/player.cjs.map +1 -1
  60. package/dist/player.d.cts +229 -120
  61. package/dist/player.d.ts +229 -120
  62. package/dist/player.js +710 -451
  63. package/dist/player.js.map +1 -1
  64. package/dist/remux-OBSMIENG.cjs +35 -0
  65. package/dist/remux-OBSMIENG.cjs.map +1 -0
  66. package/dist/remux-WBYIZBBX.js +10 -0
  67. package/dist/remux-WBYIZBBX.js.map +1 -0
  68. package/dist/source-4TZ6KMNV.js +4 -0
  69. package/dist/{source-F656KYYV.js.map → source-4TZ6KMNV.js.map} +1 -1
  70. package/dist/source-7YLO6E7X.cjs +29 -0
  71. package/dist/{source-73CAH6HW.cjs.map → source-7YLO6E7X.cjs.map} +1 -1
  72. package/dist/source-MTX5ELUZ.js +4 -0
  73. package/dist/{source-QJR3OHTW.js.map → source-MTX5ELUZ.js.map} +1 -1
  74. package/dist/source-VFLXLOCN.cjs +29 -0
  75. package/dist/{source-VB74JQ7Z.cjs.map → source-VFLXLOCN.cjs.map} +1 -1
  76. package/dist/subtitles-4T74JRGT.js +4 -0
  77. package/dist/subtitles-4T74JRGT.js.map +1 -0
  78. package/dist/subtitles-QUH4LPI4.cjs +29 -0
  79. package/dist/subtitles-QUH4LPI4.cjs.map +1 -0
  80. package/package.json +1 -1
  81. package/src/convert/remux.ts +1 -35
  82. package/src/convert/transcode-libav.ts +691 -0
  83. package/src/convert/transcode.ts +12 -4
  84. package/src/element/avbridge-player.ts +100 -0
  85. package/src/element/avbridge-video.ts +140 -3
  86. package/src/element/player-styles.ts +12 -0
  87. package/src/errors.ts +6 -0
  88. package/src/player.ts +15 -16
  89. package/src/strategies/fallback/decoder.ts +96 -173
  90. package/src/strategies/fallback/index.ts +46 -2
  91. package/src/strategies/fallback/libav-import.ts +9 -1
  92. package/src/strategies/fallback/video-renderer.ts +107 -0
  93. package/src/strategies/hybrid/decoder.ts +88 -180
  94. package/src/strategies/hybrid/index.ts +35 -2
  95. package/src/strategies/native.ts +6 -3
  96. package/src/strategies/remux/index.ts +14 -2
  97. package/src/strategies/remux/pipeline.ts +72 -12
  98. package/src/subtitles/render.ts +8 -0
  99. package/src/types.ts +32 -0
  100. package/src/util/libav-demux.ts +405 -0
  101. package/src/util/time-ranges.ts +40 -0
  102. package/dist/chunk-2PGRFCWB.js.map +0 -1
  103. package/dist/chunk-6UUT4BEA.cjs.map +0 -1
  104. package/dist/chunk-7RGG6ME7.cjs.map +0 -1
  105. package/dist/chunk-NV7ILLWH.js.map +0 -1
  106. package/dist/chunk-QQXBPW72.js.map +0 -1
  107. package/dist/chunk-XKPSTC34.cjs.map +0 -1
  108. package/dist/source-73CAH6HW.cjs +0 -28
  109. package/dist/source-F656KYYV.js +0 -3
  110. package/dist/source-QJR3OHTW.js +0 -3
  111. package/dist/source-VB74JQ7Z.cjs +0 -28
@@ -29,11 +29,22 @@ import { AudioOutput } from "./audio-output.js";
29
29
  import type { MediaContext } from "../../types.js";
30
30
  import { pickLibavVariant } from "./variant-routing.js";
31
31
  import { dbg } from "../../util/debug.js";
32
+ import {
33
+ sanitizeFrameTimestamp,
34
+ libavFrameToInterleavedFloat32,
35
+ } from "../../util/libav-demux.js";
32
36
 
33
37
  export interface DecoderHandles {
34
38
  destroy(): Promise<void>;
35
39
  /** Seek to the given time in seconds. Returns once the new pump has been kicked off. */
36
40
  seek(timeSec: number): Promise<void>;
41
+ /**
42
+ * Switch the active audio track. The decoder tears down the current audio
43
+ * decoder, initializes one for the stream whose container id matches
44
+ * `trackId` (== libav `stream.index`), seeks the demuxer to `timeSec`, and
45
+ * restarts the pump. No-op if the track is already active.
46
+ */
47
+ setAudioTrack(trackId: number, timeSec: number): Promise<void>;
37
48
  stats(): Record<string, unknown>;
38
49
  }
39
50
 
@@ -63,7 +74,15 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
63
74
 
64
75
  const [fmt_ctx, streams] = await libav.ff_init_demuxer_file(opts.filename);
65
76
  const videoStream = streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_VIDEO) ?? null;
66
- const audioStream = streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO) ?? null;
77
+ // Audio stream is mutable so setAudioTrack() can swap it. Default to the
78
+ // track the context picked first (matches probe ordering). We resolve by
79
+ // container id so the selection survives stream reordering.
80
+ const firstAudioTrackId = opts.context.audioTracks[0]?.id;
81
+ let audioStream: LibavStream | null =
82
+ (firstAudioTrackId != null
83
+ ? streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === firstAudioTrackId)
84
+ : undefined) ??
85
+ streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO) ?? null;
67
86
 
68
87
  if (!videoStream && !audioStream) {
69
88
  throw new Error("fallback decoder: file has no decodable streams");
@@ -376,7 +395,7 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
376
395
 
377
396
  for (const f of frames) {
378
397
  if (myToken !== pumpToken || destroyed) return;
379
- const bridgeOpts = sanitizeFrameTimestamp(
398
+ sanitizeFrameTimestamp(
380
399
  f,
381
400
  () => {
382
401
  const ts = syntheticVideoUs;
@@ -385,8 +404,10 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
385
404
  },
386
405
  videoTimeBase,
387
406
  );
407
+ // sanitizeFrameTimestamp normalizes pts to µs, so the bridge can
408
+ // always use the 1/1e6 timebase.
388
409
  try {
389
- const vf = bridge.laFrameToVideoFrame(f, bridgeOpts);
410
+ const vf = bridge.laFrameToVideoFrame(f, { timeBase: [1, 1_000_000] });
390
411
  opts.renderer.enqueue(vf);
391
412
  videoFramesDecoded++;
392
413
  } catch (err) {
@@ -455,6 +476,78 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
455
476
  try { await inputHandle.detach(); } catch { /* ignore */ }
456
477
  },
457
478
 
479
+ async setAudioTrack(trackId, timeSec) {
480
+ if (audioStream && audioStream.index === trackId) return;
481
+ const newStream = streams.find(
482
+ (s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === trackId,
483
+ );
484
+ if (!newStream) {
485
+ console.warn("[avbridge] fallback: setAudioTrack — no stream with id", trackId);
486
+ return;
487
+ }
488
+
489
+ // Stop the pump before touching libav state. Same discipline as seek().
490
+ const newToken = ++pumpToken;
491
+ if (pumpRunning) {
492
+ try { await pumpRunning; } catch { /* ignore */ }
493
+ }
494
+ if (destroyed) return;
495
+
496
+ // Tear down the old audio decoder and init a fresh one for the new stream.
497
+ if (audioDec) {
498
+ try { await libav.ff_free_decoder?.(audioDec.c, audioDec.pkt, audioDec.frame); } catch { /* ignore */ }
499
+ audioDec = null;
500
+ }
501
+ try {
502
+ const [, c, pkt, frame] = await libav.ff_init_decoder(newStream.codec_id, {
503
+ codecpar: newStream.codecpar,
504
+ });
505
+ audioDec = { c, pkt, frame };
506
+ audioTimeBase = newStream.time_base_num && newStream.time_base_den
507
+ ? [newStream.time_base_num, newStream.time_base_den]
508
+ : undefined;
509
+ } catch (err) {
510
+ console.warn(
511
+ "[avbridge] fallback: setAudioTrack init failed — falling back to no-audio mode:",
512
+ (err as Error).message,
513
+ );
514
+ audioDec = null;
515
+ opts.audio.setNoAudio();
516
+ }
517
+
518
+ audioStream = newStream;
519
+
520
+ // Re-seek so packets resume from the user's current position for the
521
+ // new track (and the same video position).
522
+ try {
523
+ const tsUs = Math.floor(timeSec * 1_000_000);
524
+ const [tsLo, tsHi] = libav.f64toi64
525
+ ? libav.f64toi64(tsUs)
526
+ : [tsUs | 0, Math.floor(tsUs / 0x100000000)];
527
+ await libav.av_seek_frame(
528
+ fmt_ctx,
529
+ -1,
530
+ tsLo,
531
+ tsHi,
532
+ libav.AVSEEK_FLAG_BACKWARD ?? 0,
533
+ );
534
+ } catch (err) {
535
+ console.warn("[avbridge] fallback: setAudioTrack seek failed:", err);
536
+ }
537
+
538
+ // Flush the video decoder too — we just moved the demuxer back to a
539
+ // keyframe boundary.
540
+ try { if (videoDec) await libav.avcodec_flush_buffers?.(videoDec.c); } catch { /* ignore */ }
541
+ await flushBSF();
542
+
543
+ syntheticVideoUs = Math.round(timeSec * 1_000_000);
544
+ syntheticAudioUs = Math.round(timeSec * 1_000_000);
545
+
546
+ pumpRunning = pumpLoop(newToken).catch((err) =>
547
+ console.error("[avbridge] fallback pump failed (post-setAudioTrack):", err),
548
+ );
549
+ },
550
+
458
551
  async seek(timeSec) {
459
552
  // Cancel the current pump and wait for it to actually exit before
460
553
  // we start moving file pointers around — concurrent ff_decode_multi
@@ -537,176 +630,6 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
537
630
  };
538
631
  }
539
632
 
540
- // ─────────────────────────────────────────────────────────────────────────────
541
- // Frame timestamp sanitizer.
542
- //
543
- // libav can hand back decoded frames with `pts = AV_NOPTS_VALUE` (encoded as
544
- // ptshi = -2147483648, pts = 0) for inputs whose demuxer can't determine
545
- // presentation times. AVI is the canonical example. The bridge's
546
- // `laFrameToVideoFrame` then multiplies pts × 1e6 × tbNum / tbDen and
547
- // overflows int64, throwing "Value is outside the 'long long' value range".
548
- //
549
- // Fix: replace any invalid pts with a synthetic microsecond counter, force
550
- // the frame's pts/ptshi to that value, and tell the bridge to use a 1/1e6
551
- // timebase so it does an identity conversion.
552
- // ─────────────────────────────────────────────────────────────────────────────
553
-
554
- interface BridgeOpts {
555
- timeBase?: [number, number];
556
- transfer?: boolean;
557
- }
558
-
559
- function sanitizeFrameTimestamp(
560
- frame: LibavFrame,
561
- nextUs: () => number,
562
- fallbackTimeBase?: [number, number],
563
- ): BridgeOpts {
564
- const lo = frame.pts ?? 0;
565
- const hi = frame.ptshi ?? 0;
566
- const isInvalid = (hi === -2147483648 && lo === 0) || !Number.isFinite(lo);
567
- if (isInvalid) {
568
- const us = nextUs();
569
- frame.pts = us;
570
- frame.ptshi = 0;
571
- return { timeBase: [1, 1_000_000] };
572
- }
573
- const tb = fallbackTimeBase ?? [1, 1_000_000];
574
- const pts64 = hi * 0x100000000 + lo;
575
- const us = Math.round((pts64 * 1_000_000 * tb[0]) / tb[1]);
576
- if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
577
- frame.pts = us;
578
- frame.ptshi = us < 0 ? -1 : 0;
579
- return { timeBase: [1, 1_000_000] };
580
- }
581
- const fallback = nextUs();
582
- frame.pts = fallback;
583
- frame.ptshi = 0;
584
- return { timeBase: [1, 1_000_000] };
585
- }
586
-
587
- // ─────────────────────────────────────────────────────────────────────────────
588
- // libav decoded `Frame` → interleaved Float32Array (the format AudioOutput
589
- // schedules).
590
- // ─────────────────────────────────────────────────────────────────────────────
591
-
592
- const AV_SAMPLE_FMT_U8 = 0;
593
- const AV_SAMPLE_FMT_S16 = 1;
594
- const AV_SAMPLE_FMT_S32 = 2;
595
- const AV_SAMPLE_FMT_FLT = 3;
596
- const AV_SAMPLE_FMT_U8P = 5;
597
- const AV_SAMPLE_FMT_S16P = 6;
598
- const AV_SAMPLE_FMT_S32P = 7;
599
- const AV_SAMPLE_FMT_FLTP = 8;
600
-
601
- interface InterleavedSamples {
602
- data: Float32Array;
603
- channels: number;
604
- sampleRate: number;
605
- }
606
-
607
- function libavFrameToInterleavedFloat32(frame: LibavFrame): InterleavedSamples | null {
608
- const channels = frame.channels ?? frame.ch_layout_nb_channels ?? 1;
609
- const sampleRate = frame.sample_rate ?? 44100;
610
- const nbSamples = frame.nb_samples ?? 0;
611
- if (nbSamples === 0) return null;
612
-
613
- const out = new Float32Array(nbSamples * channels);
614
-
615
- switch (frame.format) {
616
- case AV_SAMPLE_FMT_FLTP: {
617
- const planes = ensurePlanes(frame.data, channels);
618
- for (let ch = 0; ch < channels; ch++) {
619
- const plane = asFloat32(planes[ch]);
620
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i];
621
- }
622
- return { data: out, channels, sampleRate };
623
- }
624
- case AV_SAMPLE_FMT_FLT: {
625
- const flat = asFloat32(frame.data);
626
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i];
627
- return { data: out, channels, sampleRate };
628
- }
629
- case AV_SAMPLE_FMT_S16P: {
630
- const planes = ensurePlanes(frame.data, channels);
631
- for (let ch = 0; ch < channels; ch++) {
632
- const plane = asInt16(planes[ch]);
633
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 32768;
634
- }
635
- return { data: out, channels, sampleRate };
636
- }
637
- case AV_SAMPLE_FMT_S16: {
638
- const flat = asInt16(frame.data);
639
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 32768;
640
- return { data: out, channels, sampleRate };
641
- }
642
- case AV_SAMPLE_FMT_S32P: {
643
- const planes = ensurePlanes(frame.data, channels);
644
- for (let ch = 0; ch < channels; ch++) {
645
- const plane = asInt32(planes[ch]);
646
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 2147483648;
647
- }
648
- return { data: out, channels, sampleRate };
649
- }
650
- case AV_SAMPLE_FMT_S32: {
651
- const flat = asInt32(frame.data);
652
- for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 2147483648;
653
- return { data: out, channels, sampleRate };
654
- }
655
- case AV_SAMPLE_FMT_U8P: {
656
- const planes = ensurePlanes(frame.data, channels);
657
- for (let ch = 0; ch < channels; ch++) {
658
- const plane = asUint8(planes[ch]);
659
- for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = (plane[i] - 128) / 128;
660
- }
661
- return { data: out, channels, sampleRate };
662
- }
663
- case AV_SAMPLE_FMT_U8: {
664
- const flat = asUint8(frame.data);
665
- for (let i = 0; i < nbSamples * channels; i++) out[i] = (flat[i] - 128) / 128;
666
- return { data: out, channels, sampleRate };
667
- }
668
- default:
669
- if (!(globalThis as { __avbridgeLoggedSampleFmt?: number }).__avbridgeLoggedSampleFmt) {
670
- (globalThis as { __avbridgeLoggedSampleFmt?: number }).__avbridgeLoggedSampleFmt = frame.format;
671
- console.warn(`[avbridge] unsupported audio sample format from libav: ${frame.format}`);
672
- }
673
- return null;
674
- }
675
- }
676
-
677
- function ensurePlanes(data: unknown, channels: number): unknown[] {
678
- if (Array.isArray(data)) return data;
679
- const arr = data as { length: number; subarray?: (a: number, b: number) => unknown };
680
- const len = arr.length;
681
- const perChannel = Math.floor(len / channels);
682
- const planes: unknown[] = [];
683
- for (let ch = 0; ch < channels; ch++) {
684
- planes.push(arr.subarray ? arr.subarray(ch * perChannel, (ch + 1) * perChannel) : arr);
685
- }
686
- return planes;
687
- }
688
-
689
- function asFloat32(x: unknown): Float32Array {
690
- if (x instanceof Float32Array) return x;
691
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
692
- return new Float32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
693
- }
694
- function asInt16(x: unknown): Int16Array {
695
- if (x instanceof Int16Array) return x;
696
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
697
- return new Int16Array(ta.buffer, ta.byteOffset, ta.byteLength / 2);
698
- }
699
- function asInt32(x: unknown): Int32Array {
700
- if (x instanceof Int32Array) return x;
701
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
702
- return new Int32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
703
- }
704
- function asUint8(x: unknown): Uint8Array {
705
- if (x instanceof Uint8Array) return x;
706
- const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
707
- return new Uint8Array(ta.buffer, ta.byteOffset, ta.byteLength);
708
- }
709
-
710
633
  // ─────────────────────────────────────────────────────────────────────────────
711
634
  // Bridge loader (lazy via the static-import wrapper).
712
635
  // ─────────────────────────────────────────────────────────────────────────────
@@ -3,6 +3,7 @@ import { VideoRenderer } from "./video-renderer.js";
3
3
  import { AudioOutput } from "./audio-output.js";
4
4
  import { startDecoder, type DecoderHandles } from "./decoder.js";
5
5
  import { dbg } from "../../util/debug.js";
6
+ import { makeTimeRanges } from "../../util/time-ranges.js";
6
7
 
7
8
  /**
8
9
  * Fallback strategy session.
@@ -127,6 +128,30 @@ export async function createFallbackSession(
127
128
  get: () => ctx.duration ?? NaN,
128
129
  });
129
130
  }
131
+ // Synthesize HTMLMediaElement parity surfaces that the canvas strategies
132
+ // can't otherwise answer truthfully (the inner <video> has no src, so
133
+ // its own readyState/seekable are zero/empty).
134
+ //
135
+ // readyState: HAVE_NOTHING (0) until the first frame lands; then
136
+ // HAVE_CURRENT_DATA (2) once the cold-start gate is released (both
137
+ // audio+video ready). Simplified from the full five-level spec — we
138
+ // don't distinguish HAVE_FUTURE_DATA vs HAVE_ENOUGH_DATA since our
139
+ // pump semantics make those essentially the same state.
140
+ Object.defineProperty(target, "readyState", {
141
+ configurable: true,
142
+ get: (): number => {
143
+ if (!renderer.hasFrames()) return 0; // HAVE_NOTHING
144
+ if (!audio.isPlaying() && audio.bufferAhead() <= 0 && !audio.isNoAudio()) return 1; // HAVE_METADATA
145
+ return 2; // HAVE_CURRENT_DATA (or better — but 2 is the honest lower bound)
146
+ },
147
+ });
148
+ // seekable: a progressive source is fully seekable once we have duration.
149
+ Object.defineProperty(target, "seekable", {
150
+ configurable: true,
151
+ get: () => makeTimeRanges(ctx.duration && Number.isFinite(ctx.duration) && ctx.duration > 0
152
+ ? [[0, ctx.duration]]
153
+ : []),
154
+ });
130
155
 
131
156
  /**
132
157
  * Wait until the decoder has produced enough buffered output to start
@@ -256,8 +281,25 @@ export async function createFallbackSession(
256
281
  await doSeek(time);
257
282
  },
258
283
 
259
- async setAudioTrack(_id) {
260
- // Multi-track audio is post-MVP for the fallback strategy.
284
+ async setAudioTrack(id) {
285
+ // Verify the id refers to a real track.
286
+ if (!ctx.audioTracks.some((t) => t.id === id)) {
287
+ console.warn("[avbridge] fallback: setAudioTrack — unknown track id", id);
288
+ return;
289
+ }
290
+ const wasPlaying = audio.isPlaying();
291
+ const currentTime = audio.now();
292
+ // Suspend audio, rebuild the decoder + seek, reset audio output, re-gate.
293
+ await audio.pause().catch(() => {});
294
+ await handles.setAudioTrack(id, currentTime).catch((err) =>
295
+ console.warn("[avbridge] fallback: handles.setAudioTrack failed:", err),
296
+ );
297
+ await audio.reset(currentTime);
298
+ renderer.flush();
299
+ if (wasPlaying) {
300
+ await waitForBuffer();
301
+ await audio.start();
302
+ }
261
303
  },
262
304
 
263
305
  async setSubtitleTrack(_id) {
@@ -277,6 +319,8 @@ export async function createFallbackSession(
277
319
  delete (target as unknown as Record<string, unknown>).paused;
278
320
  delete (target as unknown as Record<string, unknown>).volume;
279
321
  delete (target as unknown as Record<string, unknown>).muted;
322
+ delete (target as unknown as Record<string, unknown>).readyState;
323
+ delete (target as unknown as Record<string, unknown>).seekable;
280
324
  } catch { /* ignore */ }
281
325
  },
282
326
 
@@ -23,5 +23,13 @@ export interface BridgeModule {
23
23
  audioStreamToConfig(libav: unknown, stream: unknown): Promise<AudioDecoderConfig | null>;
24
24
  packetToEncodedVideoChunk(pkt: unknown, stream: unknown): EncodedVideoChunk;
25
25
  packetToEncodedAudioChunk(pkt: unknown, stream: unknown): EncodedAudioChunk;
26
- libavFrameToVideoFrame?(frame: unknown, stream: unknown): VideoFrame | null;
26
+ /**
27
+ * Convert a libav-decoded frame (software OR hardware decode) into a
28
+ * WebCodecs VideoFrame. `opts.timeBase` overrides the frame's per-packet
29
+ * timebase; useful when callers have already normalized pts to µs.
30
+ */
31
+ laFrameToVideoFrame(
32
+ frame: unknown,
33
+ opts?: { timeBase?: [number, number]; transfer?: boolean },
34
+ ): VideoFrame;
27
35
  }
@@ -1,4 +1,5 @@
1
1
  import type { ClockSource } from "./audio-output.js";
2
+ import { SubtitleOverlay } from "../../subtitles/render.js";
2
3
 
3
4
  /**
4
5
  * Renders decoded `VideoFrame`s into a 2D canvas overlaid on the user's
@@ -47,6 +48,16 @@ export class VideoRenderer {
47
48
  /** Cumulative count of ticks where PTS mode painted a frame. */
48
49
  private ticksPainted = 0;
49
50
 
51
+ /**
52
+ * Subtitle overlay div attached to the stage wrapper alongside the
53
+ * canvas. Created lazily when subtitle tracks are attached via the
54
+ * target's `<track>` children. Canvas strategies (hybrid, fallback)
55
+ * hide the <video>, so we can't rely on the browser's native cue
56
+ * rendering; we read TextTrack.cues and render into this overlay.
57
+ */
58
+ private subtitleOverlay: SubtitleOverlay | null = null;
59
+ private subtitleTrack: TextTrack | null = null;
60
+
50
61
  /**
51
62
  * Calibration offset (microseconds) between video PTS and audio clock.
52
63
  * Video PTS and AudioContext.currentTime can drift ~0.1% relative to
@@ -111,6 +122,15 @@ export class VideoRenderer {
111
122
  }
112
123
  target.style.visibility = "hidden";
113
124
 
125
+ // Create a subtitle overlay on the same parent as the canvas so cues
126
+ // appear over the rendered video. Shows nothing until a TextTrack
127
+ // gets attached via attachSubtitleTracks.
128
+ const overlayParent = parent instanceof HTMLElement ? parent : document.body;
129
+ this.subtitleOverlay = new SubtitleOverlay(overlayParent);
130
+ // Watch for <track> children on the target <video>. When one is
131
+ // added, grab its TextTrack and poll cues from it each tick.
132
+ this.watchTextTracks(target);
133
+
114
134
  const ctx = this.canvas.getContext("2d");
115
135
  if (!ctx) throw new Error("video renderer: failed to acquire 2D context");
116
136
  this.ctx = ctx;
@@ -156,10 +176,95 @@ export class VideoRenderer {
156
176
  }
157
177
  }
158
178
 
179
+ /**
180
+ * Watch the target <video>'s textTracks list. When a track is added,
181
+ * grab it and start polling cues on each render tick. Existing tracks
182
+ * (if any) are picked up immediately.
183
+ */
184
+ private watchTextTracks(target: HTMLVideoElement): void {
185
+ const pick = () => {
186
+ if (this.subtitleTrack) return;
187
+ const tracks = target.textTracks;
188
+ if (isDebug()) {
189
+ // eslint-disable-next-line no-console
190
+ console.log(`[avbridge:subs] watchTextTracks pick() — ${tracks.length} tracks`);
191
+ }
192
+ for (let i = 0; i < tracks.length; i++) {
193
+ const t = tracks[i];
194
+ if (isDebug()) {
195
+ // eslint-disable-next-line no-console
196
+ console.log(`[avbridge:subs] track ${i}: kind=${t.kind} mode=${t.mode} cues=${t.cues?.length ?? 0}`);
197
+ }
198
+ if (t.kind === "subtitles" || t.kind === "captions") {
199
+ this.subtitleTrack = t;
200
+ t.mode = "hidden"; // hidden means "cues available via API, don't render"
201
+ if (isDebug()) {
202
+ // eslint-disable-next-line no-console
203
+ console.log(`[avbridge:subs] picked track, mode=hidden`);
204
+ }
205
+ // Listen for cue load completion
206
+ const trackEl = target.querySelector(`track[srclang="${t.language}"]`) as HTMLTrackElement | null;
207
+ if (trackEl) {
208
+ trackEl.addEventListener("load", () => {
209
+ if (isDebug()) {
210
+ // eslint-disable-next-line no-console
211
+ console.log(`[avbridge:subs] track element loaded, cues=${t.cues?.length ?? 0}`);
212
+ }
213
+ });
214
+ trackEl.addEventListener("error", (ev) => {
215
+ // eslint-disable-next-line no-console
216
+ console.warn(`[avbridge:subs] track element error:`, ev);
217
+ });
218
+ }
219
+ break;
220
+ }
221
+ }
222
+ };
223
+ pick();
224
+ if (typeof target.textTracks.addEventListener === "function") {
225
+ target.textTracks.addEventListener("addtrack", (e) => {
226
+ if (isDebug()) {
227
+ // eslint-disable-next-line no-console
228
+ console.log("[avbridge:subs] addtrack event fired");
229
+ }
230
+ void e;
231
+ pick();
232
+ });
233
+ }
234
+ }
235
+
236
+ private _loggedCues = false;
237
+
238
+ /** Find the active cue (if any) for the given media time. */
239
+ private updateSubtitles(): void {
240
+ if (!this.subtitleOverlay || !this.subtitleTrack) return;
241
+ const cues = this.subtitleTrack.cues;
242
+ if (!cues || cues.length === 0) return;
243
+ if (isDebug() && !this._loggedCues) {
244
+ this._loggedCues = true;
245
+ // eslint-disable-next-line no-console
246
+ console.log(`[avbridge:subs] cues available: ${cues.length}, first start=${cues[0].startTime}, last end=${cues[cues.length-1].endTime}`);
247
+ }
248
+ const t = this.clock.now();
249
+ let activeText = "";
250
+ for (let i = 0; i < cues.length; i++) {
251
+ const c = cues[i];
252
+ if (t >= c.startTime && t <= c.endTime) {
253
+ const vttCue = c as VTTCue & { text?: string };
254
+ activeText = vttCue.text ?? "";
255
+ break;
256
+ }
257
+ }
258
+ // Strip VTT tags for plain rendering (e.g. <c.en> voice tags)
259
+ this.subtitleOverlay.setText(activeText.replace(/<[^>]+>/g, ""));
260
+ }
261
+
159
262
  private tick(): void {
160
263
  if (this.destroyed) return;
161
264
  this.rafHandle = requestAnimationFrame(this.tick);
162
265
 
266
+ this.updateSubtitles();
267
+
163
268
  if (this.queue.length === 0) return;
164
269
 
165
270
  const playing = this.clock.isPlaying();
@@ -331,6 +436,8 @@ export class VideoRenderer {
331
436
  this.destroyed = true;
332
437
  if (this.rafHandle != null) cancelAnimationFrame(this.rafHandle);
333
438
  this.flush();
439
+ if (this.subtitleOverlay) { this.subtitleOverlay.destroy(); this.subtitleOverlay = null; }
440
+ this.subtitleTrack = null;
334
441
  this.canvas.remove();
335
442
  this.target.style.visibility = "";
336
443
  }