avbridge 2.12.1 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,8 +29,15 @@ import { AudioOutput } from "./audio-output.js";
29
29
  import type { MediaContext } from "../../types.js";
30
30
  import { pickLibavVariant } from "./variant-routing.js";
31
31
  import { dbg } from "../../util/debug.js";
32
+
33
+ /** True when `globalThis.AVBRIDGE_DEBUG` is set. Used to gate verbose
34
+ * per-packet / per-frame trace lines that are useful for debugging
35
+ * post-seek pts behavior but unreadable in normal use. */
36
+ function isDebug(): boolean {
37
+ return typeof globalThis !== "undefined"
38
+ && !!(globalThis as Record<string, unknown>).AVBRIDGE_DEBUG;
39
+ }
32
40
  import {
33
- sanitizeFrameTimestamp,
34
41
  libavFrameToInterleavedFloat32,
35
42
  packetPtsSec,
36
43
  } from "../../util/libav-demux.js";
@@ -275,9 +282,41 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
275
282
  let watchdogSlowWarned = false;
276
283
  let watchdogOverflowWarned = false;
277
284
 
278
- // Synthetic timestamp counters. Reset on seek.
279
- let syntheticVideoUs = 0;
280
- let syntheticAudioUs = 0;
285
+ // Content clock for video frames. Tracks the last frame's content time
286
+ // in µs. The invariant per emit:
287
+ // - raw libav pts valid → lastContentUs = raw_pts (sync to truth)
288
+ // - raw libav pts NOPTS → lastContentUs += frameStep (extend by one frame)
289
+ // This makes synthetic labels always relative to the *immediately
290
+ // preceding* frame's real content, self-correcting at every valid pts.
291
+ // -1 means "unanchored" — pre-anchor NOPTS frames are discarded outright
292
+ // because we don't know where the decoder actually landed. The anchor
293
+ // is established at the first valid raw pts post-seek.
294
+ //
295
+ // This replaced an older "synthetic counter reset to seekTarget on seek"
296
+ // path which stamped NOPTS preroll frames with the user's requested seek
297
+ // time — producing labels 4+ seconds ahead of actual content, dropping
298
+ // every valid-pts frame as a "regression", and surfacing as a ~2s
299
+ // post-seek fast-forward as the slow-advancing synthetic counter slowly
300
+ // converged with real content. See POSTMORTEMS.md (2026-06-01).
301
+ let lastContentUs = -1;
302
+ let firstValidPtsLoggedSinceSeek = false;
303
+
304
+ // Diagnostic: first post-seek audio packet's PTS. Logged once per seek
305
+ // so the operator can see the demuxer's actual content alignment vs
306
+ // the user's click. With PTS-based audio scheduling, audio packets
307
+ // with PTS before the seek target *naturally* don't get scheduled
308
+ // (their computed ctxStart falls in the past) — no manual trim needed.
309
+ let seenFirstAudioPacketSinceSeek = false;
310
+ let seekTargetSec = 0;
311
+ // Post-seek diagnostic counters. Capture raw pts/dts/pos for the first
312
+ // ~N packets and frames after each seek so we can tell whether libav
313
+ // hands us a valid pts at seek landing, when (if ever) it becomes
314
+ // valid mid-stream, and whether sanitize's NOPTS fallback is firing.
315
+ let diagPktsLoggedSinceSeek = 0;
316
+ let diagFramesLoggedSinceSeek = 0;
317
+ let diagFrameKeysDumped = false;
318
+ const DIAG_MAX_PKTS = 100;
319
+ const DIAG_MAX_FRAMES = 300;
281
320
 
282
321
  // Throughput instrumentation — answers "is the decoder keeping up?".
283
322
  // All counters are cumulative since bootstrap (not reset on seek), so
@@ -345,6 +384,37 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
345
384
  for (const pkt of videoPackets) {
346
385
  const sec = packetPtsSec(pkt, videoTimeBase);
347
386
  if (sec != null && sec > bufferedUntilSec) bufferedUntilSec = sec;
387
+ // [DIAG-PKT] Raw pre-sanitize packet fields for the first N
388
+ // post-seek video packets. Most important question: does the
389
+ // FIRST packet after av_seek_frame carry a valid pts? If yes,
390
+ // we can anchor synthetic counter to that — cheap & robust.
391
+ // If no, fall back to pkt_pos → AVI index → chunk_idx × frameDur.
392
+ if (isDebug() && diagPktsLoggedSinceSeek < DIAG_MAX_PKTS) {
393
+ const rawHi = (pkt as { ptshi?: number }).ptshi ?? 0;
394
+ const rawLo = pkt.pts ?? 0;
395
+ const isInvalidPts = (rawHi === -2147483648 && rawLo === 0);
396
+ const rawPts64 = isInvalidPts ? null : (rawHi * 0x100000000 + rawLo);
397
+ const rawSec = rawPts64 != null && videoTimeBase
398
+ ? (rawPts64 * videoTimeBase[0]) / videoTimeBase[1]
399
+ : null;
400
+ const pktKeys = diagPktsLoggedSinceSeek === 0
401
+ ? `[keys: ${Object.keys(pkt).join(",")}]`
402
+ : "";
403
+ // eslint-disable-next-line no-console
404
+ console.log(
405
+ `[DIAG-PKT] vidx=${diagPktsLoggedSinceSeek} ` +
406
+ `pts=${isInvalidPts ? "NOPTS" : rawPts64} ` +
407
+ `pts_sec=${rawSec != null ? rawSec.toFixed(3) : "n/a"} ` +
408
+ `ptshi=${rawHi} ptslo=${rawLo} ` +
409
+ `flags=0x${(pkt.flags ?? 0).toString(16)} ` +
410
+ `keyframe=${((pkt.flags ?? 0) & 1) ? "Y" : "N"} ` +
411
+ `stream=${pkt.stream_index} ` +
412
+ `dataLen=${pkt.data?.length ?? 0} ` +
413
+ `seekTarget=${seekTargetSec.toFixed(3)} ` +
414
+ pktKeys,
415
+ );
416
+ diagPktsLoggedSinceSeek++;
417
+ }
348
418
  }
349
419
  }
350
420
  if (audioPackets && audioTimeBase) {
@@ -352,6 +422,23 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
352
422
  const sec = packetPtsSec(pkt, audioTimeBase);
353
423
  if (sec != null && sec > bufferedUntilSec) bufferedUntilSec = sec;
354
424
  }
425
+ // Diagnostic: log the first post-seek audio packet's PTS. With
426
+ // PTS-based scheduling, packets whose PTS is before the seek
427
+ // target won't be played (AudioOutput skips them silently), so
428
+ // this is informational only — it tells you how far off the
429
+ // demuxer's seek granularity is from the user's click.
430
+ if (!seenFirstAudioPacketSinceSeek && audioPackets.length > 0) {
431
+ const firstSec = packetPtsSec(audioPackets[0], audioTimeBase);
432
+ if (firstSec != null && Number.isFinite(firstSec)) {
433
+ seenFirstAudioPacketSinceSeek = true;
434
+ dbg.info("av-anchor",
435
+ `seek-target=${seekTargetSec.toFixed(3)}s, ` +
436
+ `first-audio-pkt-pts=${firstSec.toFixed(3)}s ` +
437
+ `(Δ=${((firstSec - seekTargetSec) * 1000).toFixed(1)}ms — ` +
438
+ `pre-target packets will be skipped by AudioOutput)`,
439
+ );
440
+ }
441
+ }
355
442
  }
356
443
 
357
444
  // Decode audio BEFORE video. On software-decode-bound content
@@ -363,7 +450,7 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
363
450
  // packet for cook/mp3/aac, so doing it first barely delays
364
451
  // video decoding at all.
365
452
  if (audioDec && audioPackets && audioPackets.length > 0) {
366
- await decodeAudioBatch(audioPackets, myToken);
453
+ await decodeAudioBatch(audioPackets, myToken, /*flush*/ false, audioTimeBase);
367
454
  }
368
455
  if (myToken !== pumpToken || destroyed) return;
369
456
  if (videoDec && videoPackets && videoPackets.length > 0) {
@@ -431,21 +518,22 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
431
518
  }
432
519
  }
433
520
 
434
- // Throttle: don't run too far ahead of playback. Two backpressure
435
- // signals:
436
- // - Audio buffer (mediaTimeOfNext - now()) > 2 sec — we have
437
- // plenty of audio scheduled.
438
- // - Renderer queue depth >= queueHighWater the canvas can't
439
- // drain fast enough. Without this, fast software decode of
440
- // small frames piles up in the renderer and overflows.
521
+ // Throttle: only on audio buffer (mediaTimeOfNext - now() > 2 s).
522
+ // Renderer queue backpressure is enforced at the *enqueue* side in
523
+ // `decodeVideoBatch` when the queue is at `queueHighWater`, the
524
+ // freshly decoded VideoFrame is closed without being enqueued, so
525
+ // the decoder keeps consuming packets in order. That preserves the
526
+ // reference-frame state needed to decode P/B frames cleanly during
527
+ // post-seek catch-up. Throttling the *pump* on queue depth here
528
+ // would block demuxer reads, which would also stall audio packet
529
+ // processing and starve `audio.bufferAhead()`.
441
530
  {
442
531
  const _throttleStart = performance.now();
443
532
  let _throttled = false;
444
533
  while (
445
534
  !destroyed &&
446
535
  myToken === pumpToken &&
447
- (opts.audio.bufferAhead() > 2.0 ||
448
- opts.renderer.queueDepth() >= opts.renderer.queueHighWater)
536
+ opts.audio.bufferAhead() > 2.0
449
537
  ) {
450
538
  _throttled = true;
451
539
  await new Promise((r) => setTimeout(r, 50));
@@ -494,31 +582,160 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
494
582
 
495
583
  for (const f of frames) {
496
584
  if (myToken !== pumpToken || destroyed) return;
497
- sanitizeFrameTimestamp(
498
- f,
499
- () => {
500
- // Anchor the synthetic timestamp to the last emitted frame's
501
- // pts + one frame step. A plain counter (the old behavior)
502
- // started at 0 and only advanced on invalid frames, which
503
- // made the occasional AV_NOPTS_VALUE output get assigned a
504
- // timestamp near the stream start causing the renderer to
505
- // paint backwards and drop healthy frames around it. Anchoring
506
- // to `lastEmittedPtsUs` keeps invalid frames monotonic with
507
- // their valid neighbors.
508
- const base =
509
- lastEmittedPtsUs >= 0
510
- ? lastEmittedPtsUs + videoFrameStepUs
511
- : syntheticVideoUs;
512
- syntheticVideoUs = base + videoFrameStepUs;
513
- return base;
514
- },
515
- videoTimeBase,
516
- );
517
- // sanitizeFrameTimestamp normalizes pts to µs, so the bridge can
518
- // always use the 1/1e6 timebase.
519
- const _fPts = (f.ptshi ?? 0) * 0x100000000 + (f.pts ?? 0);
585
+ // [DIAG-FRAME] Capture raw pre-sanitize fields. One-shot key dump
586
+ // on first frame post-seek so we can see which fields libav
587
+ // actually exposes (best_effort_timestamp? pkt_dts? pkt_pos?).
588
+ const _diagShouldLog = isDebug() && diagFramesLoggedSinceSeek < DIAG_MAX_FRAMES;
589
+ const _diagRawHi = f.ptshi ?? 0;
590
+ const _diagRawLo = f.pts ?? 0;
591
+ const _diagInvalid = (_diagRawHi === -2147483648 && _diagRawLo === 0);
592
+ const _diagRawPts64 = _diagInvalid ? null : (_diagRawHi * 0x100000000 + _diagRawLo);
593
+ const _diagRawSec = _diagRawPts64 != null && videoTimeBase
594
+ ? (_diagRawPts64 * videoTimeBase[0]) / videoTimeBase[1]
595
+ : null;
596
+ if (_diagShouldLog && !diagFrameKeysDumped) {
597
+ diagFrameKeysDumped = true;
598
+ const allKeys = Object.keys(f);
599
+ const fieldDump: Record<string, unknown> = {};
600
+ for (const k of allKeys) {
601
+ const v = (f as unknown as Record<string, unknown>)[k];
602
+ // Skip the data buffer; everything else is metadata.
603
+ if (k === "data") continue;
604
+ if (typeof v === "object" && v !== null && "length" in (v as object)) continue;
605
+ fieldDump[k] = v;
606
+ }
607
+ // eslint-disable-next-line no-console
608
+ console.log(`[DIAG-FRAME] FIRST FRAME post-seek — all keys: ${allKeys.join(",")}`);
609
+ // eslint-disable-next-line no-console
610
+ console.log(`[DIAG-FRAME] FIRST FRAME field dump:`, fieldDump);
611
+ }
612
+ // Convert raw libav pts (in stream timebase) to µs, or null if NOPTS.
613
+ let rawUs: number | null = null;
614
+ if (!_diagInvalid && _diagRawPts64 != null) {
615
+ const tb = videoTimeBase ?? [1, 1_000_000];
616
+ const us = Math.round((_diagRawPts64 * 1_000_000 * tb[0]) / tb[1]);
617
+ if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
618
+ rawUs = us;
619
+ }
620
+ }
621
+
622
+ // Forward declare _diagLog so PRE-ANCHOR-DROP can call it.
623
+ // Final pts isn't known until after the anchor/step block, so we pass
624
+ // it as a parameter rather than closing over a `let`.
625
+ const _diagLog = (decision: string, finalPtsUs: number, sanFallback: boolean): void => {
626
+ if (!_diagShouldLog) return;
627
+ const ptsSrc = sanFallback
628
+ ? `SYNTHETIC(${_diagInvalid ? "NOPTS" : "invalid-range"})`
629
+ : "LIBAV";
630
+ // eslint-disable-next-line no-console
631
+ console.log(
632
+ `[DIAG-FRAME] vidx=${diagFramesLoggedSinceSeek} ` +
633
+ `raw_pts=${_diagInvalid ? "NOPTS" : _diagRawPts64} ` +
634
+ `raw_pts_sec=${_diagRawSec != null ? _diagRawSec.toFixed(3) : "n/a"} ` +
635
+ `pts_src=${ptsSrc} ` +
636
+ `final_pts_us=${finalPtsUs} ` +
637
+ `final_pts_sec=${(finalPtsUs / 1_000_000).toFixed(3)} ` +
638
+ `seekTarget=${seekTargetSec.toFixed(3)} ` +
639
+ `offset_to_target_ms=${((finalPtsUs / 1000) - (seekTargetSec * 1000)).toFixed(1)} ` +
640
+ `lastEmittedPts_us=${lastEmittedPtsUs} ` +
641
+ `decision=${decision}`,
642
+ );
643
+ diagFramesLoggedSinceSeek++;
644
+ };
645
+
646
+ // Anchor + step invariant.
647
+ // - Unanchored (post-seek, no valid pts seen yet) AND NOPTS frame
648
+ // → discard outright. We don't know where the decoder landed, so
649
+ // stamping a synthetic label would be a lie (this was the source
650
+ // of the post-seek fast-forward bug).
651
+ // - First valid raw pts → anchor `lastContentUs` to it. The pipeline
652
+ // below will then drop this and subsequent frames as pre-target
653
+ // until content reaches seekTarget.
654
+ // - Anchored AND valid → sync `lastContentUs` to truth.
655
+ // - Anchored AND NOPTS → step `lastContentUs += frameStep`.
656
+ let _diagSanFallbackFired = false;
657
+ const seekTargetUs = Math.round(seekTargetSec * 1_000_000);
658
+ if (lastContentUs < 0) {
659
+ if (rawUs == null) {
660
+ // Cold-start keyframe special case. At seekTargetSec === 0 the
661
+ // demuxer guarantees the very first emitted keyframe is content
662
+ // 0 (container start=0.000000). Anchoring there directly avoids
663
+ // discarding the opening I-frame — without this, cold start
664
+ // loses 1-2 frames and the first paint is ~80ms late.
665
+ //
666
+ // STRICTLY gated to seekTarget === 0. The seek path proved
667
+ // correct via the offset-ground-truth experiment (POSTMORTEMS
668
+ // 2026-06-01); this branch must not change its behavior.
669
+ //
670
+ // Why keyframe-pin instead of back-computing from the first
671
+ // valid pts: the I/P/B reorder is densest at the stream head,
672
+ // so `firstValidPts − N × frameStep` is off by however many
673
+ // early B-frames the decoder dropped. The keyframe identity
674
+ // (`f.key_frame === 1`) is the only signal that doesn't depend
675
+ // on frame-spacing assumptions.
676
+ const isColdStartKeyframe =
677
+ seekTargetSec === 0
678
+ && (f as { key_frame?: number }).key_frame === 1;
679
+ if (isColdStartKeyframe) {
680
+ lastContentUs = 0;
681
+ _diagSanFallbackFired = true;
682
+ // Fall through: the frame gets labeled 0 and runs through
683
+ // the regression/pre-target/enqueue pipeline normally.
684
+ } else {
685
+ // Pre-anchor NOPTS: discard. Decoder retains the frame internally
686
+ // as a reference — we just don't expose it to the renderer.
687
+ _diagLog("PRE-ANCHOR-DROP", 0, true);
688
+ continue;
689
+ }
690
+ } else {
691
+ // First valid raw pts post-seek = the anchor.
692
+ lastContentUs = rawUs;
693
+ if (!firstValidPtsLoggedSinceSeek) {
694
+ firstValidPtsLoggedSinceSeek = true;
695
+ if (isDebug()) {
696
+ // eslint-disable-next-line no-console
697
+ console.log(
698
+ `[avbridge:decoder] post-seek anchor established: ` +
699
+ `first valid raw pts = ${(rawUs / 1000).toFixed(1)}ms ` +
700
+ `(seekTarget = ${(seekTargetSec * 1000).toFixed(1)}ms, ` +
701
+ `Δ = ${((rawUs - seekTargetUs) / 1000).toFixed(1)}ms)`,
702
+ );
703
+ }
704
+ // Guard: if the first valid pts is at or beyond the seek
705
+ // target, the pre-anchor NOPTS frames we already discarded
706
+ // may have straddled the target. In normal AVI MPEG-4 seeks,
707
+ // the demuxer lands well before the target (previous
708
+ // keyframe), so this shouldn't happen — log a warning if it
709
+ // does so we know to implement a pkt_pos→AVI-index
710
+ // back-computation path. The cold-start case (seekTarget=0)
711
+ // is handled by the keyframe-pin branch above and shouldn't
712
+ // reach this warning.
713
+ if (rawUs >= seekTargetUs) {
714
+ // eslint-disable-next-line no-console
715
+ console.warn(
716
+ `[avbridge:decoder] first valid raw pts ≥ seek target — ` +
717
+ `pre-anchor NOPTS frames may have straddled the target ` +
718
+ `and been mis-discarded. First painted frame may be late ` +
719
+ `by up to one keyframe interval.`,
720
+ );
721
+ }
722
+ }
723
+ }
724
+ } else {
725
+ if (rawUs != null) {
726
+ lastContentUs = rawUs; // sync to truth on every valid pts
727
+ } else {
728
+ lastContentUs += videoFrameStepUs; // extend from last truth
729
+ _diagSanFallbackFired = true;
730
+ }
731
+ }
732
+ // Write the content label into the frame so the bridge sees it.
733
+ f.pts = lastContentUs;
734
+ f.ptshi = lastContentUs < 0 ? -1 : 0;
735
+ const _fPts = lastContentUs;
520
736
  if (_fPts > newestVideoPtsUs) newestVideoPtsUs = _fPts;
521
737
  if (lastEmittedPtsUs >= 0 && _fPts < lastEmittedPtsUs) {
738
+ _diagLog("REGRESSED-DROP", _fPts, _diagSanFallbackFired);
522
739
  // Decoder emitted a frame with lower PTS than the previous
523
740
  // output. Dropping out-of-order frames here is the right move:
524
741
  // the renderer's paint loop assumes monotonic queue order and
@@ -545,20 +762,70 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
545
762
  continue; // skip enqueue
546
763
  }
547
764
  lastEmittedPtsUs = _fPts;
765
+ // Decode-to-display: after a seek the demuxer lands at the
766
+ // keyframe ≤ click target and the decoder produces frames
767
+ // starting there. Pre-target frames are still DECODED (they're
768
+ // reference frames for later P/B decodes) but they MUST NOT be
769
+ // displayed — otherwise the renderer paints them in a brief
770
+ // fast-forward burst as it catches up to audio (T_click). Drop
771
+ // them at the enqueue boundary; the decoder doesn't care.
772
+ //
773
+ // Tolerance of one frame duration: source frames are quantized
774
+ // (PTS = N × frameStep) but the user's click is arbitrary, so
775
+ // the frame nearest the click is typically a few ms *before* it.
776
+ // Convention (matches `<video>.currentTime = T` and ffplay):
777
+ // display the frame at the largest PTS ≤ T.
778
+ const targetUs = Math.round(seekTargetSec * 1_000_000);
779
+ if (_fPts < targetUs - videoFrameStepUs) {
780
+ _diagLog("PRE-TARGET-DROP", _fPts, _diagSanFallbackFired);
781
+ continue;
782
+ }
548
783
  try {
549
784
  const vf = bridge.laFrameToVideoFrame(f, { timeBase: [1, 1_000_000] });
550
- opts.renderer.enqueue(vf);
785
+ // Renderer-queue backpressure at the enqueue side. Discarding
786
+ // here (rather than throttling the pump on `queueHighWater`)
787
+ // keeps the decoder consuming packets sequentially so its
788
+ // reference-frame state stays intact — essential during
789
+ // post-seek catch-up, when the pump must continue reading
790
+ // packets to advance the demuxer past pre-target audio. Without
791
+ // sequential decode, the next batch's P/B frames decode against
792
+ // a stale reference and produce gray + glitchy output until
793
+ // the next keyframe.
794
+ if (opts.renderer.queueDepth() >= opts.renderer.queueHighWater) {
795
+ vf.close();
796
+ _diagLog("OVERFLOW-DROP", _fPts, _diagSanFallbackFired);
797
+ } else {
798
+ opts.renderer.enqueue(vf);
799
+ _diagLog("ENQUEUED", _fPts, _diagSanFallbackFired);
800
+ }
551
801
  videoFramesDecoded++;
552
802
  } catch (err) {
553
803
  if (videoFramesDecoded === 0) {
554
804
  console.warn("[avbridge] laFrameToVideoFrame failed:", err);
555
805
  }
806
+ _diagLog("BRIDGE-ERROR", _fPts, _diagSanFallbackFired);
556
807
  }
557
808
  }
558
809
  }
559
810
 
560
- async function decodeAudioBatch(pkts: LibavPacket[], myToken: number, flush = false) {
811
+ async function decodeAudioBatch(
812
+ pkts: LibavPacket[],
813
+ myToken: number,
814
+ flush = false,
815
+ tb?: [number, number],
816
+ ) {
561
817
  if (!audioDec || destroyed || myToken !== pumpToken) return;
818
+ // Capture the packet-level PTS *before* decoding. libav's reported
819
+ // `frame.pts` after decode is unreliable for mp3-in-AVI (returns a
820
+ // value that doesn't agree with the stream's reported time base —
821
+ // see POSTMORTEMS.md 2026-05-31). The demuxer's packet PTS is
822
+ // reliable, and for mp3/aac the packet→frame mapping is 1:1, so we
823
+ // forward each packet's PTS to the matching output frame. For codecs
824
+ // where the mapping isn't 1:1, the trailing frames fall back to a
825
+ // synthetic running counter — same behavior as before this change.
826
+ const pktPtsSec: (number | null)[] = pkts.map((p) =>
827
+ tb ? packetPtsSec(p, tb) : null,
828
+ );
562
829
  let frames: LibavFrame[];
563
830
  const _t0 = performance.now();
564
831
  try {
@@ -577,22 +844,21 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
577
844
  audioDecodeBatches++;
578
845
  if (myToken !== pumpToken || destroyed) return;
579
846
 
580
- for (const f of frames) {
847
+ for (let i = 0; i < frames.length; i++) {
581
848
  if (myToken !== pumpToken || destroyed) return;
582
- sanitizeFrameTimestamp(
583
- f,
584
- () => {
585
- const ts = syntheticAudioUs;
586
- const samples = f.nb_samples ?? 1024;
587
- const sampleRate = f.sample_rate ?? 44100;
588
- syntheticAudioUs += Math.round((samples * 1_000_000) / sampleRate);
589
- return ts;
590
- },
591
- audioTimeBase,
592
- );
849
+ const f = frames[i];
593
850
  const samples = libavFrameToInterleavedFloat32(f);
594
851
  if (samples) {
595
- opts.audio.schedule(samples.data, samples.channels, samples.sampleRate);
852
+ const pts = pktPtsSec[i] ?? null;
853
+ if (isDebug()) {
854
+ const dur = samples.data.length / samples.channels / samples.sampleRate;
855
+ // Log every frame — we need to see what happens around seeks.
856
+ // Also surface explicitly when the per-frame PTS is null, which
857
+ // would route the chunk to the LEGACY rebase path in AudioOutput.
858
+ // eslint-disable-next-line no-console
859
+ console.log(`[TRACE-DEC] audio frame #${audioFramesDecoded} pts=${pts != null ? pts.toFixed(4) : "NULL"} dur=${dur.toFixed(4)} samples=${samples.data.length / samples.channels} sr=${samples.sampleRate} ch=${samples.channels} pktsIn=${pkts.length} framesOut=${frames.length}`);
860
+ }
861
+ opts.audio.schedule(samples.data, samples.channels, samples.sampleRate, pts);
596
862
  audioFramesDecoded++;
597
863
  }
598
864
  }
@@ -682,9 +948,9 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
682
948
  try { if (videoDec) await libav.avcodec_flush_buffers?.(videoDec.c); } catch { /* ignore */ }
683
949
  await flushBSF();
684
950
 
685
- syntheticVideoUs = Math.round(timeSec * 1_000_000);
686
- syntheticAudioUs = Math.round(timeSec * 1_000_000);
951
+ lastContentUs = -1;
687
952
  lastEmittedPtsUs = -1;
953
+ firstValidPtsLoggedSinceSeek = false;
688
954
 
689
955
  pumpRunning = pumpLoop(newToken).catch((err) =>
690
956
  console.error("[avbridge] fallback pump failed (post-setAudioTrack):", err),
@@ -692,6 +958,10 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
692
958
  },
693
959
 
694
960
  async seek(timeSec) {
961
+ if (isDebug()) {
962
+ // eslint-disable-next-line no-console
963
+ console.log(`[SEEK] target=${timeSec.toFixed(3)}s (${(timeSec * 1000).toFixed(0)}ms) wall=${performance.now().toFixed(0)}`);
964
+ }
695
965
  // Cancel the current pump and wait for it to actually exit before
696
966
  // we start moving file pointers around — concurrent ff_decode_multi
697
967
  // and av_seek_frame on the same context would be a recipe for memory
@@ -739,11 +1009,19 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
739
1009
  } catch { /* ignore */ }
740
1010
  await flushBSF();
741
1011
 
742
- // Reset synthetic timestamp counters to the seek target so newly
743
- // decoded frames start at the right media time.
744
- syntheticVideoUs = Math.round(timeSec * 1_000_000);
745
- syntheticAudioUs = Math.round(timeSec * 1_000_000);
1012
+ // Reset the content clock to "unanchored". The next decode loop
1013
+ // will discard NOPTS frames until the first valid libav pts
1014
+ // establishes a real anchor, then label every frame relative to
1015
+ // truth. Do NOT set anything to seekTarget here — that lie was
1016
+ // the post-seek fast-forward bug.
1017
+ lastContentUs = -1;
746
1018
  lastEmittedPtsUs = -1;
1019
+ firstValidPtsLoggedSinceSeek = false;
1020
+ seenFirstAudioPacketSinceSeek = false;
1021
+ seekTargetSec = timeSec;
1022
+ diagPktsLoggedSinceSeek = 0;
1023
+ diagFramesLoggedSinceSeek = 0;
1024
+ diagFrameKeysDumped = false;
747
1025
 
748
1026
  // The renderer & audio output are reset by the fallback session
749
1027
  // wrapper that called us — see strategies/fallback/index.ts.