avbridge 2.12.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +177 -0
  2. package/README.md +33 -0
  3. package/dist/{avi-EQE6AR75.cjs → avi-32UABODO.cjs} +12 -4
  4. package/dist/avi-32UABODO.cjs.map +1 -0
  5. package/dist/{avi-Y3N325WZ.cjs → avi-5BPR6QUX.cjs} +12 -4
  6. package/dist/avi-5BPR6QUX.cjs.map +1 -0
  7. package/dist/{avi-NNHH4AAA.js → avi-BLIH7KKV.js} +12 -4
  8. package/dist/avi-BLIH7KKV.js.map +1 -0
  9. package/dist/{avi-S7EY54YA.js → avi-GX2H34IQ.js} +12 -4
  10. package/dist/avi-GX2H34IQ.js.map +1 -0
  11. package/dist/{chunk-2LNXMGT6.js → chunk-5CX7BVVV.js} +5 -5
  12. package/dist/{chunk-2LNXMGT6.js.map → chunk-5CX7BVVV.js.map} +1 -1
  13. package/dist/{chunk-5Y5BTB5D.js → chunk-B76QWPFM.js} +3 -3
  14. package/dist/{chunk-5Y5BTB5D.js.map → chunk-B76QWPFM.js.map} +1 -1
  15. package/dist/{chunk-GJBNLPGI.cjs → chunk-E5MAM2P4.cjs} +9 -9
  16. package/dist/{chunk-GJBNLPGI.cjs.map → chunk-E5MAM2P4.cjs.map} +1 -1
  17. package/dist/{chunk-7EF4VTUS.cjs → chunk-OFJYEITB.cjs} +489 -113
  18. package/dist/chunk-OFJYEITB.cjs.map +1 -0
  19. package/dist/{chunk-HBHSUGNI.cjs → chunk-VLI3Y6IJ.cjs} +5 -5
  20. package/dist/{chunk-HBHSUGNI.cjs.map → chunk-VLI3Y6IJ.cjs.map} +1 -1
  21. package/dist/{chunk-Z26PXRUY.js → chunk-VOC24LYF.js} +486 -110
  22. package/dist/chunk-VOC24LYF.js.map +1 -0
  23. package/dist/element-browser.js +492 -130
  24. package/dist/element-browser.js.map +1 -1
  25. package/dist/element.cjs +3 -3
  26. package/dist/element.js +2 -2
  27. package/dist/index.cjs +18 -18
  28. package/dist/index.js +6 -6
  29. package/dist/player.cjs +658 -170
  30. package/dist/player.cjs.map +1 -1
  31. package/dist/player.d.cts +36 -4
  32. package/dist/player.d.ts +36 -4
  33. package/dist/player.js +658 -170
  34. package/dist/player.js.map +1 -1
  35. package/dist/{remux-VPKCLHHM.cjs → remux-NSBJFMLG.cjs} +9 -9
  36. package/dist/{remux-VPKCLHHM.cjs.map → remux-NSBJFMLG.cjs.map} +1 -1
  37. package/dist/{remux-7TA4FKTY.js → remux-PHUHO3VV.js} +4 -4
  38. package/dist/{remux-7TA4FKTY.js.map → remux-PHUHO3VV.js.map} +1 -1
  39. package/package.json +1 -1
  40. package/src/element/avbridge-player.ts +223 -43
  41. package/src/probe/avi.ts +34 -2
  42. package/src/strategies/fallback/audio-output.ts +164 -35
  43. package/src/strategies/fallback/decoder.ts +467 -60
  44. package/src/strategies/fallback/video-renderer.ts +209 -29
  45. package/src/strategies/hybrid/decoder.ts +56 -28
  46. package/src/strategies/remux/pipeline.ts +12 -3
  47. package/vendor/libav/avbridge/libav-6.8.8.0-avbridge.wasm.mjs +1 -1
  48. package/vendor/libav/avbridge/libav-6.8.8.0-avbridge.wasm.wasm +0 -0
  49. package/vendor/libav/avbridge/libav-avbridge.mjs +1 -1
  50. package/dist/avi-EQE6AR75.cjs.map +0 -1
  51. package/dist/avi-NNHH4AAA.js.map +0 -1
  52. package/dist/avi-S7EY54YA.js.map +0 -1
  53. package/dist/avi-Y3N325WZ.cjs.map +0 -1
  54. package/dist/chunk-7EF4VTUS.cjs.map +0 -1
  55. package/dist/chunk-Z26PXRUY.js.map +0 -1
@@ -38,7 +38,21 @@ export class VideoRenderer {
38
38
  private framesPainted = 0;
39
39
  private framesDroppedLate = 0;
40
40
  private framesDroppedOverflow = 0;
41
+ /** True once the head frame has been painted as a pre-roll poster
42
+ * since the last flush. Used to ensure pre-roll paints exactly one
43
+ * frame (held static) during the post-seek discard window. */
41
44
  private prerolled = false;
45
+ /** PTS (µs) of the most recently painted frame. Used as the calibration
46
+ * reference on the first post-flush snap: the pre-roll path paints one
47
+ * frame *before* PTS-based playback starts, so the queue head's PTS at
48
+ * first PTS-based paint is the *next* frame, off by one frameDur from
49
+ * the actually-displayed frame. Calibrating against the painted frame
50
+ * instead of the queue head removes that one-frame offset and yields
51
+ * calib ≈ 0 instead of +frameDur. */
52
+ private lastPaintedPtsUs = 0;
53
+ private hasLastPaintedPts = false;
54
+ /** Audio-clock reading (ms) at the previous paint, for overlay Δaud. */
55
+ private lastPaintAudMs = 0;
42
56
  /** Wall-clock time of the last paint, in ms (performance.now()). */
43
57
  private lastPaintWall = 0;
44
58
  /** Minimum ms between paints — paces video at roughly source fps. */
@@ -141,24 +155,39 @@ export class VideoRenderer {
141
155
  this.rafHandle = requestAnimationFrame(this.tick);
142
156
  }
143
157
 
144
- /** True once at least one frame has been enqueued. */
158
+ /**
159
+ * True once at least one frame has been enqueued *since the last flush*.
160
+ * Used by `readyState` — initial cold-start reports HAVE_NOTHING until
161
+ * any frame has arrived, and after a seek we want the same semantics
162
+ * (HAVE_NOTHING until post-seek frames arrive), so the cumulative
163
+ * `framesPainted > 0` that used to live here was wrong: it kept the
164
+ * state "true forever" after the first frame ever, so post-seek
165
+ * `waitForBuffer()` would exit immediately with an empty queue and
166
+ * leave video frozen while audio kept going.
167
+ */
145
168
  hasFrames(): boolean {
146
- return this.queue.length > 0 || this.framesPainted > 0;
169
+ return this.queue.length > 0 || this.hasEverEnqueuedSinceFlush;
147
170
  }
148
171
 
172
+ private hasEverEnqueuedSinceFlush = false;
173
+
149
174
  /** Current depth of the frame queue. Used by the decoder for backpressure. */
150
175
  queueDepth(): number {
151
176
  return this.queue.length;
152
177
  }
153
178
 
154
179
  /**
155
- * Soft cap for decoder backpressure. The decoder pump throttles when
156
- * `queueDepth() >= queueHighWater`. Set high enough that normal decode
157
- * bursts don't trigger the renderer's overflow-drop loop (which runs at
158
- * every paint), but low enough that the decoder doesn't run unboundedly
159
- * ahead. The hard cap in `enqueue()` is 64.
180
+ * Cap the decoder may fill the queue up to. Used by the decoder's
181
+ * enqueue-side discard logic (it closes new frames instead of pushing
182
+ * them when this is reached). Sized so a long post-seek catch-up
183
+ * fits the decoder produces frames at PTS T_kf onwards rapidly
184
+ * while the demuxer is chewing through pre-target audio; if the
185
+ * queue can hold the whole post-seek burst, the renderer plays
186
+ * smoothly from pre-roll without a frozen-video gap when audio.start
187
+ * fires. At ~340 KB per SD frame the cap is ~85 MB peak; at HD it's
188
+ * larger but still bounded.
160
189
  */
161
- readonly queueHighWater = 30;
190
+ readonly queueHighWater = 256;
162
191
 
163
192
  enqueue(frame: VideoFrame): void {
164
193
  if (this.destroyed) {
@@ -166,13 +195,16 @@ export class VideoRenderer {
166
195
  return;
167
196
  }
168
197
  this.queue.push(frame);
198
+ this.hasEverEnqueuedSinceFlush = true;
169
199
  if (this.queue.length === 1 && this.framesPainted === 0) {
170
200
  this.resolveFirstFrame();
171
201
  }
172
- // Hard cap. Should rarely trigger because the decoder backs off at
173
- // queueHighWater (30) and the drift correction trims gently. This is
174
- // the last-resort defense against runaway producers.
175
- while (this.queue.length > 60) {
202
+ // Hard cap. The decoder's enqueue-side discard at `queueHighWater`
203
+ // is the primary defense; this `+8` margin is just safety for a
204
+ // racy producer. Drops the OLDEST frames, which during catch-up
205
+ // would mean losing the frames closest to the seek target — so the
206
+ // decoder should be tuned to never reach this.
207
+ while (this.queue.length > this.queueHighWater + 8) {
176
208
  this.queue.shift()?.close();
177
209
  this.framesDroppedOverflow++;
178
210
  }
@@ -271,14 +303,27 @@ export class VideoRenderer {
271
303
 
272
304
  const playing = this.clock.isPlaying();
273
305
 
274
- // Pre-roll: paint the very first frame as a poster while audio buffers.
306
+ // Pre-roll: paint the head frame ONCE as a poster while audio buffers.
307
+ //
308
+ // Safety invariant (load-bearing): with the decoder.ts content-clock
309
+ // fix (POSTMORTEMS 2026-06-01), pre-target frames are discarded at
310
+ // the decoder/enqueue boundary, so queue[0] here is guaranteed to be
311
+ // a near-target frame — never the keyframe-to-target preroll sequence
312
+ // that previously caused the post-seek fast-forward when painted.
313
+ //
314
+ // Paint at most ONE frame and hold it (gate via `prerolled`). Do NOT
315
+ // shift the queue: when audio unfreezes and `playing` becomes true,
316
+ // the regular PTS loop below will paint this same frame again and
317
+ // shift it out. That second paint is a no-op visually (same pixels)
318
+ // so there's no flicker.
319
+ //
320
+ // If the queue is empty (decoder still grinding through the post-seek
321
+ // discard window), just return — last pre-flush frame stays on canvas
322
+ // as the freeze poster, which is the safe fallback.
275
323
  if (!playing) {
276
- if (!this.prerolled) {
277
- const head = this.queue.shift()!;
278
- this.paint(head);
279
- head.close();
324
+ if (!this.prerolled && this.queue.length > 0) {
280
325
  this.prerolled = true;
281
- this.lastPaintWall = performance.now();
326
+ this.paint(this.queue[0]);
282
327
  }
283
328
  return;
284
329
  }
@@ -300,16 +345,81 @@ export class VideoRenderer {
300
345
  // plus a small rate drift (~7ms/s). We snap the offset on first paint
301
346
  // and re-snap every 10 seconds. Between snaps, max drift is ~70ms
302
347
  // (under 2 frames at 24fps, below lip-sync perception threshold).
348
+ //
349
+ // Two cases for the *first* snap after flush:
350
+ // - Anchor `rawAudioNowUs` against `clock.now()` (default for the
351
+ // periodic 10s re-snap) drifts with the audio clock — including
352
+ // decode-stall lag accumulated between `audio.start()` and the
353
+ // first frame's arrival. On a slow seek where the first frame
354
+ // lands 1–2s after audio resumed, this captures the lag as a
355
+ // permanent offset and the video stays that far behind audio.
356
+ // - For the *first* snap post-flush we instead use the audio's
357
+ // **anchor time** (`mediaTimeOfAnchor`, == the seek target / 0
358
+ // on cold start). That gives `headTs − seekTarget` ≈ keyframe
359
+ // offset (usually < 100ms), independent of decode delay.
303
360
  const wallNow = performance.now();
304
- if (!this.ptsCalibrated || wallNow - this.lastCalibrationWall > 10_000) {
305
- this.ptsCalibrationUs = headTs - rawAudioNowUs;
361
+ // First snap after flush/cold-start anchors against the audio's
362
+ // *master-clock reference* (= `mediaTimeOfAnchor`, == the rebased
363
+ // audio first-chunk PTS), NOT `clock.now()`. `clock.now()` includes
364
+ // wall-clock-drifted elapsed time between `audio.start()` and the
365
+ // first paint — on a slow seek where the first frame lands 1-2 s
366
+ // after audio resumed, that decode delay gets baked into the
367
+ // calibration as a permanent video-lag offset. See POSTMORTEMS.md
368
+ // (2026-04-13). The periodic re-snap continues to use `rawAudioNow`
369
+ // as the original design intended — a stateless independent snap
370
+ // every 10 s bounds drift to ~70 ms at the documented ~7 ms/s rate,
371
+ // below the lip-sync perception threshold. Do *not* introduce a
372
+ // smoothed / EMA / bounded-delta variant here: the measured offset
373
+ // includes the current calibration, which produces a feedback loop
374
+ // (postmortem 2026-04-13, hypothesis 3).
375
+ if (!this.ptsCalibrated) {
376
+ const anchorUs = (this.clock.anchorTime?.() ?? this.clock.now()) * 1_000_000;
377
+ // Reference frame for calibration: prefer the pre-rolled frame's
378
+ // PTS over the queue head, since the pre-rolled frame is what the
379
+ // user is *actually looking at* the moment audio starts. The queue
380
+ // head at this point is the NEXT frame (PTS == prerolled + frameDur),
381
+ // and calibrating against it bakes that one-frame offset into the
382
+ // calibration permanently. With the painted-frame reference, calib
383
+ // ≈ 0 when video keyframe lands at the seek target.
384
+ const referencePtsUs = this.hasLastPaintedPts ? this.lastPaintedPtsUs : headTs;
385
+ this.ptsCalibrationUs = referencePtsUs - anchorUs;
306
386
  this.ptsCalibrated = true;
307
387
  this.lastCalibrationWall = wallNow;
388
+ if (isDebug()) {
389
+ // eslint-disable-next-line no-console
390
+ console.log(
391
+ `[avbridge:renderer] CALIB-FIRST audioAnchor=${(anchorUs / 1000).toFixed(1)}ms ` +
392
+ `prerolledPTS=${this.hasLastPaintedPts ? (this.lastPaintedPtsUs / 1000).toFixed(1) : "n/a"}ms ` +
393
+ `queueHeadPTS=${(headTs / 1000).toFixed(1)}ms ` +
394
+ `rawAudioNow=${(rawAudioNowUs / 1000).toFixed(1)}ms ` +
395
+ `→ calib=${(this.ptsCalibrationUs / 1000).toFixed(1)}ms`,
396
+ );
397
+ }
398
+ } else if (wallNow - this.lastCalibrationWall > 10_000) {
399
+ const oldCalib = this.ptsCalibrationUs;
400
+ this.ptsCalibrationUs = headTs - rawAudioNowUs;
401
+ this.lastCalibrationWall = wallNow;
402
+ if (isDebug()) {
403
+ // eslint-disable-next-line no-console
404
+ console.log(
405
+ `[avbridge:renderer] CALIB-RESNAP ` +
406
+ `headPTS=${(headTs / 1000).toFixed(1)}ms rawAudioNow=${(rawAudioNowUs / 1000).toFixed(1)}ms ` +
407
+ `calib ${(oldCalib / 1000).toFixed(1)}ms → ${(this.ptsCalibrationUs / 1000).toFixed(1)}ms ` +
408
+ `(Δ=${((this.ptsCalibrationUs - oldCalib) / 1000).toFixed(1)}ms after 10s)`,
409
+ );
410
+ }
308
411
  }
309
412
 
310
413
  const audioNowUs = rawAudioNowUs + this.ptsCalibrationUs;
311
- const frameDurationUs = this.paintIntervalMs * 1000;
312
- const deadlineUs = audioNowUs + frameDurationUs;
414
+ // Paint the frame whose PTS is at or just before audioNow. A frame
415
+ // at PTS P should be the displayed frame from the moment audio
416
+ // reaches P, *not* from P − frameDur. The previous code used
417
+ // `deadline = audioNow + frameDur`, which painted frames up to one
418
+ // source-frame ahead of audio — a steady ~40 ms video-leads-audio
419
+ // offset that the user perceived as "fast-forward then normal."
420
+ // With `deadline = audioNow`, paints land exactly at the frame's
421
+ // start of display interval; lip sync matches.
422
+ const deadlineUs = audioNowUs;
313
423
 
314
424
  let bestIdx = -1;
315
425
  for (let i = 0; i < this.queue.length; i++) {
@@ -341,20 +451,27 @@ export class VideoRenderer {
341
451
  return;
342
452
  }
343
453
 
344
- // Only drop frames that are more than 2 frame-durations behind.
345
- const dropThresholdUs = audioNowUs - frameDurationUs * 2;
454
+ // Audio-sync skip: when `bestIdx > 0` there are multiple frames in
455
+ // the queue whose PTS deadline. Drop everything before `bestIdx`
456
+ // and paint the latest paintable frame. See POSTMORTEMS.md
457
+ // 2026-05-31 coda for the rationale.
458
+ const _relaxDrop =
459
+ (globalThis as { AVBRIDGE_RELAX_DROP?: boolean }).AVBRIDGE_RELAX_DROP === true;
346
460
  let dropped = 0;
347
- while (bestIdx > 0) {
348
- const ts = this.queue[0].timestamp ?? 0;
349
- if (ts < dropThresholdUs) {
461
+ const initialBestIdx = bestIdx;
462
+ if (!_relaxDrop) {
463
+ while (bestIdx > 0) {
350
464
  this.queue.shift()?.close();
351
465
  this.framesDroppedLate++;
352
466
  bestIdx--;
353
467
  dropped++;
354
- } else {
355
- break;
356
468
  }
357
469
  }
470
+ const paintTs = this.queue[0]?.timestamp ?? 0;
471
+ if (isDebug()) {
472
+ // eslint-disable-next-line no-console
473
+ console.log(`[TRACE] PAINT bestIdx_initial=${initialBestIdx} dropped=${dropped} paintPts=${(paintTs / 1000).toFixed(1)}ms audioNow=${(audioNowUs / 1000).toFixed(1)}ms deadline=${(deadlineUs / 1000).toFixed(1)}ms queueLen=${this.queue.length} wall=${performance.now().toFixed(0)}`);
474
+ }
358
475
 
359
476
  this.ticksPainted++;
360
477
 
@@ -402,6 +519,51 @@ export class VideoRenderer {
402
519
  }
403
520
  try {
404
521
  this.ctx.drawImage(frame, 0, 0, this.canvas.width, this.canvas.height);
522
+
523
+ // Debug overlay (gated on AVBRIDGE_DEBUG). Draws frame info on top
524
+ // of the painted frame so the user can SEE what's actually
525
+ // displayed and at what rate. Three time domains:
526
+ // pts — source content time (from frame.timestamp)
527
+ // aud — audio media clock (clock.now() × 1000)
528
+ // wall — performance.now() (monotonic browser clock)
529
+ // Plus the per-paint deltas. If `Δpts > Δwall` sustained across
530
+ // multiple frames, that's real fast-forward; if it alternates
531
+ // 33/50ms on a 25fps source, that's 3:2 pulldown judder. (See
532
+ // POSTMORTEMS 2026-06-01 for why this overlay was load-bearing
533
+ // when diagnosing the post-seek fast-forward.)
534
+ if (isDebug()) {
535
+ const wallNow = performance.now();
536
+ const audNowMs = this.clock.now() * 1000;
537
+ const ptsMs = (frame.timestamp ?? 0) / 1000;
538
+ const dWall = this.lastPaintWall > 0 ? wallNow - this.lastPaintWall : 0;
539
+ const dAud = this.lastPaintAudMs > 0 ? audNowMs - this.lastPaintAudMs : 0;
540
+ const dPts = this.hasLastPaintedPts ? ptsMs - this.lastPaintedPtsUs / 1000 : 0;
541
+ this.ctx.save();
542
+ this.ctx.font = "bold 18px monospace";
543
+ const lines = [
544
+ `#${this.framesPainted + 1} pts=${ptsMs.toFixed(0)} aud=${audNowMs.toFixed(0)} wall=${wallNow.toFixed(0)}`,
545
+ `Δpts=${dPts.toFixed(0)} Δaud=${dAud.toFixed(0)} Δwall=${dWall.toFixed(0)}`,
546
+ ];
547
+ const lineHeight = 22;
548
+ const padTop = 6;
549
+ const stripH = padTop + lineHeight * lines.length;
550
+ this.ctx.fillStyle = "rgba(0,0,0,0.7)";
551
+ this.ctx.fillRect(0, 0, this.canvas.width, stripH);
552
+ this.ctx.fillStyle = "#0f0";
553
+ for (let i = 0; i < lines.length; i++) {
554
+ this.ctx.fillText(lines[i], 8, padTop + lineHeight * (i + 1) - 4);
555
+ }
556
+ this.ctx.restore();
557
+ }
558
+
559
+ // Record the just-painted frame's PTS so the next paint's overlay
560
+ // Δpts and the next CALIB-RESNAP have a reference. Must run
561
+ // unconditionally — `hasLastPaintedPts`/`lastPaintedPtsUs` are read
562
+ // by the calibration path in tick() too, not just the overlay.
563
+ this.lastPaintedPtsUs = frame.timestamp ?? 0;
564
+ this.hasLastPaintedPts = true;
565
+ this.lastPaintAudMs = this.clock.now() * 1000;
566
+
405
567
  this.framesPainted++;
406
568
  } catch (err) {
407
569
  // Log only once so a structurally broken frame format doesn't spam
@@ -418,7 +580,9 @@ export class VideoRenderer {
418
580
  const count = this.queue.length;
419
581
  while (this.queue.length > 0) this.queue.shift()?.close();
420
582
  this.prerolled = false;
583
+ this.hasLastPaintedPts = false; // calibration ref doesn't carry across seek
421
584
  this.ptsCalibrated = false; // recalibrate at new seek position
585
+ this.hasEverEnqueuedSinceFlush = false; // so waitForBuffer() waits for post-flush frames
422
586
  if (isDebug() && count > 0) {
423
587
  // eslint-disable-next-line no-console
424
588
  console.log(`[avbridge:renderer] FLUSH discarded=${count} painted=${this.framesPainted} drops=${this.framesDroppedLate}`);
@@ -426,11 +590,27 @@ export class VideoRenderer {
426
590
  }
427
591
 
428
592
  stats(): Record<string, unknown> {
593
+ // Queue span — the gap between the oldest and newest queued frame's
594
+ // PTS, in ms. If this collapses while audio keeps advancing, the
595
+ // producer has stalled. If it stays wide with stale head, the
596
+ // producer is bursting faster than realtime but the renderer can't
597
+ // catch up.
598
+ let queueSpanMs = 0;
599
+ let queueHeadMs = 0;
600
+ let queueTailMs = 0;
601
+ if (this.queue.length > 0) {
602
+ queueHeadMs = Math.round((this.queue[0].timestamp ?? 0) / 1000);
603
+ queueTailMs = Math.round((this.queue[this.queue.length - 1].timestamp ?? 0) / 1000);
604
+ queueSpanMs = Math.max(0, queueTailMs - queueHeadMs);
605
+ }
429
606
  return {
430
607
  framesPainted: this.framesPainted,
431
608
  framesDroppedLate: this.framesDroppedLate,
432
609
  framesDroppedOverflow: this.framesDroppedOverflow,
433
610
  queueDepth: this.queue.length,
611
+ queueHeadMs,
612
+ queueTailMs,
613
+ queueSpanMs,
434
614
  };
435
615
  }
436
616
 
@@ -22,7 +22,6 @@ import { dbg } from "../../util/debug.js";
22
22
  import { pickLibavVariant } from "../fallback/variant-routing.js";
23
23
  import {
24
24
  sanitizePacketTimestamp,
25
- sanitizeFrameTimestamp,
26
25
  libavFrameToInterleavedFloat32,
27
26
  packetPtsSec,
28
27
  } from "../../util/libav-demux.js";
@@ -165,6 +164,7 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
165
164
  // ── Bitstream filter for MPEG-4 Part 2 packed B-frames ───────────────
166
165
  let bsfCtx: number | null = null;
167
166
  let bsfPkt: number | null = null;
167
+ let bsfRequiredButMissing = false;
168
168
  if (videoStream && opts.context.videoTracks[0]?.codec === "mpeg4") {
169
169
  try {
170
170
  bsfCtx = await libav.av_bsf_list_parse_str_js("mpeg4_unpack_bframes");
@@ -175,15 +175,23 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
175
175
  bsfPkt = await libav.av_packet_alloc();
176
176
  dbg.info("bsf", "mpeg4_unpack_bframes BSF active (hybrid)");
177
177
  } else {
178
- // eslint-disable-next-line no-console
179
- console.warn("[avbridge] mpeg4_unpack_bframes BSF not available in hybrid decoder");
178
+ bsfRequiredButMissing = true;
180
179
  bsfCtx = null;
181
180
  }
182
181
  } catch (err) {
183
- // eslint-disable-next-line no-console
184
- console.warn("[avbridge] hybrid: failed to init BSF:", (err as Error).message);
182
+ bsfRequiredButMissing = true;
185
183
  bsfCtx = null;
186
184
  bsfPkt = null;
185
+ dbg.warn("bsf", `hybrid: mpeg4_unpack_bframes BSF init failed: ${(err as Error).message}`);
186
+ }
187
+ if (bsfRequiredButMissing) {
188
+ // eslint-disable-next-line no-console
189
+ console.error(
190
+ "[avbridge] MPEG-4 Part 2 (DivX/Xvid) detected but mpeg4_unpack_bframes " +
191
+ "BSF is unavailable in this libav variant. Files with packed B-frames " +
192
+ "will play with incorrect frame ordering. Rebuild the libav variant " +
193
+ "with the `avbsf` fragment included.",
194
+ );
187
195
  }
188
196
  }
189
197
 
@@ -193,7 +201,13 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
193
201
  for (const pkt of packets) {
194
202
  await libav.ff_copyin_packet(bsfPkt, pkt);
195
203
  const sendErr = await libav.av_bsf_send_packet(bsfCtx, bsfPkt);
196
- if (sendErr < 0) { out.push(pkt); continue; }
204
+ if (sendErr < 0) {
205
+ // BSF rejected — DON'T pass the original through. Its buffer may
206
+ // have been transferred into the worker by ff_copyin_packet, so
207
+ // re-posting it would throw DataCloneError on a detached
208
+ // ArrayBuffer. See fallback/decoder.ts for the full explanation.
209
+ continue;
210
+ }
197
211
  while (true) {
198
212
  const recvErr = await libav.av_bsf_receive_packet(bsfCtx, bsfPkt);
199
213
  if (recvErr < 0) break;
@@ -206,10 +220,18 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
206
220
  async function flushBSF(): Promise<void> {
207
221
  if (!bsfCtx || !bsfPkt) return;
208
222
  try {
209
- await libav.av_bsf_send_packet(bsfCtx, 0);
210
- while (true) {
211
- const err = await libav.av_bsf_receive_packet(bsfCtx, bsfPkt);
212
- if (err < 0) break;
223
+ // Use av_bsf_flush to reset the BSF without putting it in EOF mode.
224
+ // See the matching comment in src/strategies/fallback/decoder.ts —
225
+ // sending NULL as the flush signal puts the BSF into EOF state so
226
+ // subsequent sends fail, which corrupts the post-seek pipeline with
227
+ // detached-buffer DataCloneErrors.
228
+ if (libav.av_bsf_flush) {
229
+ await libav.av_bsf_flush(bsfCtx);
230
+ } else {
231
+ while (true) {
232
+ const err = await libav.av_bsf_receive_packet(bsfCtx, bsfPkt);
233
+ if (err < 0) break;
234
+ }
213
235
  }
214
236
  } catch { /* ignore */ }
215
237
  }
@@ -225,8 +247,9 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
225
247
  let videoChunksFed = 0;
226
248
  let bufferedUntilSec = 0;
227
249
 
250
+ // Synthetic video timestamp for packets with AV_NOPTS_VALUE (audio
251
+ // uses the packet PTS directly — see decodeAudioBatch).
228
252
  let syntheticVideoUs = 0;
229
- let syntheticAudioUs = 0;
230
253
 
231
254
  const videoTrackInfo = opts.context.videoTracks.find((t) => t.id === videoStream?.index);
232
255
  const videoFps = videoTrackInfo?.fps && videoTrackInfo.fps > 0 ? videoTrackInfo.fps : 30;
@@ -277,7 +300,7 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
277
300
  // 10-50 ms. Processing audio first ensures the audio scheduler is
278
301
  // fed before video decode starts, reducing perceived stutter.
279
302
  if (audioDec && audioPackets && audioPackets.length > 0) {
280
- await decodeAudioBatch(audioPackets, myToken);
303
+ await decodeAudioBatch(audioPackets, myToken, /*flush*/ false, audioTimeBase);
281
304
  }
282
305
  if (myToken !== pumpToken || destroyed) return;
283
306
 
@@ -340,9 +363,23 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
340
363
  }
341
364
  }
342
365
 
343
- async function decodeAudioBatch(pkts: LibavPacket[], myToken: number, flush = false) {
366
+ async function decodeAudioBatch(
367
+ pkts: LibavPacket[],
368
+ myToken: number,
369
+ flush = false,
370
+ tb?: [number, number],
371
+ ) {
344
372
  if (!audioDec || destroyed || myToken !== pumpToken) return;
345
373
 
374
+ // Capture packet-level PTS before decode (same rationale as fallback
375
+ // decoder — see POSTMORTEMS.md 2026-05-31: libav's reported
376
+ // `frame.pts` is unreliable for some container/codec combinations;
377
+ // the demuxer's packet PTS is reliable). For mp3/aac the packet→frame
378
+ // mapping is 1:1, so the PTS array aligns with `allFrames`.
379
+ const pktPtsSec: (number | null)[] = pkts.map((p) =>
380
+ tb ? packetPtsSec(p, tb) : null,
381
+ );
382
+
346
383
  // For heavy codecs (DTS, AC3), decode in small sub-batches and yield
347
384
  // between them so the event loop can run rAF for video painting.
348
385
  // Each ff_decode_multi call is a blocking WASM invocation.
@@ -386,22 +423,13 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
386
423
  if (myToken !== pumpToken || destroyed) return;
387
424
  const frames = allFrames;
388
425
 
389
- for (const f of frames) {
426
+ for (let i = 0; i < frames.length; i++) {
390
427
  if (myToken !== pumpToken || destroyed) return;
391
- sanitizeFrameTimestamp(
392
- f,
393
- () => {
394
- const ts = syntheticAudioUs;
395
- const samples = f.nb_samples ?? 1024;
396
- const sampleRate = f.sample_rate ?? 44100;
397
- syntheticAudioUs += Math.round((samples * 1_000_000) / sampleRate);
398
- return ts;
399
- },
400
- audioTimeBase,
401
- );
428
+ const f = frames[i];
402
429
  const samples = libavFrameToInterleavedFloat32(f);
403
430
  if (samples) {
404
- opts.audio.schedule(samples.data, samples.channels, samples.sampleRate);
431
+ const pts = pktPtsSec[i] ?? null;
432
+ opts.audio.schedule(samples.data, samples.channels, samples.sampleRate, pts);
405
433
  audioFramesDecoded++;
406
434
  }
407
435
  }
@@ -499,7 +527,6 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
499
527
  await flushBSF();
500
528
 
501
529
  syntheticVideoUs = Math.round(timeSec * 1_000_000);
502
- syntheticAudioUs = Math.round(timeSec * 1_000_000);
503
530
 
504
531
  pumpRunning = pumpLoop(newToken).catch((err) =>
505
532
  console.error("[avbridge] hybrid pump failed (post-setAudioTrack):", err),
@@ -543,7 +570,6 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
543
570
  await flushBSF();
544
571
 
545
572
  syntheticVideoUs = Math.round(timeSec * 1_000_000);
546
- syntheticAudioUs = Math.round(timeSec * 1_000_000);
547
573
 
548
574
  pumpRunning = pumpLoop(newToken).catch((err) =>
549
575
  console.error("[avbridge] hybrid pump failed (post-seek):", err),
@@ -562,6 +588,7 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
562
588
  videoChunksFed,
563
589
  audioFramesDecoded,
564
590
  bsfApplied: bsfCtx ? ["mpeg4_unpack_bframes"] : [],
591
+ bsfMissing: bsfRequiredButMissing ? ["mpeg4_unpack_bframes"] : [],
565
592
  videoDecodeQueueSize: videoDecoder?.decodeQueueSize ?? 0,
566
593
  // Confirmed transport info — see fallback decoder for the pattern.
567
594
  _transport: inputHandle.transport === "http-range" ? "http-range" : "memory",
@@ -687,6 +714,7 @@ interface LibavRuntime {
687
714
  av_bsf_init(ctx: number): Promise<number>;
688
715
  av_bsf_send_packet(ctx: number, pkt: number): Promise<number>;
689
716
  av_bsf_receive_packet(ctx: number, pkt: number): Promise<number>;
717
+ av_bsf_flush?(ctx: number): Promise<void>;
690
718
  av_bsf_free(ctx: number): Promise<void>;
691
719
  ff_copyin_packet(pktPtr: number, packet: LibavPacket): Promise<void>;
692
720
  ff_copyout_packet(pkt: number): Promise<LibavPacket>;
@@ -129,6 +129,13 @@ export async function createRemuxPipeline(
129
129
  }
130
130
 
131
131
  let mimePromise: Promise<string> | null = null;
132
+ // Capture the active pump token at the moment this output was created.
133
+ // A subsequent seek bumps `pumpToken`, and any in-flight write from this
134
+ // (now-stale) output must drop its chunk instead of appending to the
135
+ // SourceBuffer — otherwise stale fragments land at their original
136
+ // timestamps, the deferred seek applies against the wrong buffered
137
+ // range, and the video snaps to the end of the stale range.
138
+ const myToken = pumpToken;
132
139
 
133
140
  const writable = new WritableStream<{
134
141
  type: "write";
@@ -136,11 +143,13 @@ export async function createRemuxPipeline(
136
143
  position: number;
137
144
  }>({
138
145
  write: async (chunk) => {
139
- if (destroyed) return;
146
+ if (destroyed || pumpToken !== myToken) return;
140
147
  if (!sink) {
141
148
  const mime = await (mimePromise ??= output.getMimeType());
149
+ if (destroyed || pumpToken !== myToken) return;
142
150
  sink = new MseSink({ mime, video });
143
151
  await sink.ready();
152
+ if (destroyed || pumpToken !== myToken) return;
144
153
  // Apply deferred seek + autoPlay for the initial start.
145
154
  if (pendingStartTime > 0) {
146
155
  sink.invalidate(pendingStartTime);
@@ -148,10 +157,10 @@ export async function createRemuxPipeline(
148
157
  sink.setPlayOnSeek(pendingAutoPlay);
149
158
  }
150
159
  // Backpressure: wait for the SourceBuffer append queue to drain.
151
- while (sink && !destroyed && (sink.queueLength() > 10 || sink.bufferedAhead() > 60 || sink.totalBuffered() > 120)) {
160
+ while (sink && !destroyed && pumpToken === myToken && (sink.queueLength() > 10 || sink.bufferedAhead() > 60 || sink.totalBuffered() > 120)) {
152
161
  await new Promise((r) => setTimeout(r, 500));
153
162
  }
154
- if (destroyed) return;
163
+ if (destroyed || pumpToken !== myToken) return;
155
164
  sink.append(chunk.data);
156
165
  stats.bytesWritten += chunk.data.byteLength;
157
166
  stats.fragments++;