avbridge 2.12.1 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +101 -0
- package/README.md +33 -0
- package/dist/{chunk-UM6WCSGL.cjs → chunk-OFJYEITB.cjs} +356 -91
- package/dist/chunk-OFJYEITB.cjs.map +1 -0
- package/dist/{chunk-BN7BRTLY.js → chunk-VOC24LYF.js} +357 -92
- package/dist/chunk-VOC24LYF.js.map +1 -0
- package/dist/element-browser.js +354 -111
- package/dist/element-browser.js.map +1 -1
- package/dist/element.cjs +2 -2
- package/dist/element.js +1 -1
- package/dist/index.cjs +8 -8
- package/dist/index.js +1 -1
- package/dist/player.cjs +457 -135
- package/dist/player.cjs.map +1 -1
- package/dist/player.d.cts +35 -4
- package/dist/player.d.ts +35 -4
- package/dist/player.js +457 -135
- package/dist/player.js.map +1 -1
- package/package.json +1 -1
- package/src/element/avbridge-player.ts +136 -28
- package/src/strategies/fallback/audio-output.ts +164 -35
- package/src/strategies/fallback/decoder.ts +336 -58
- package/src/strategies/fallback/video-renderer.ts +176 -34
- package/src/strategies/hybrid/decoder.ts +22 -19
- package/src/strategies/remux/pipeline.ts +12 -3
- package/dist/chunk-BN7BRTLY.js.map +0 -1
- package/dist/chunk-UM6WCSGL.cjs.map +0 -1
|
@@ -29,8 +29,15 @@ import { AudioOutput } from "./audio-output.js";
|
|
|
29
29
|
import type { MediaContext } from "../../types.js";
|
|
30
30
|
import { pickLibavVariant } from "./variant-routing.js";
|
|
31
31
|
import { dbg } from "../../util/debug.js";
|
|
32
|
+
|
|
33
|
+
/** True when `globalThis.AVBRIDGE_DEBUG` is set. Used to gate verbose
|
|
34
|
+
* per-packet / per-frame trace lines that are useful for debugging
|
|
35
|
+
* post-seek pts behavior but unreadable in normal use. */
|
|
36
|
+
function isDebug(): boolean {
|
|
37
|
+
return typeof globalThis !== "undefined"
|
|
38
|
+
&& !!(globalThis as Record<string, unknown>).AVBRIDGE_DEBUG;
|
|
39
|
+
}
|
|
32
40
|
import {
|
|
33
|
-
sanitizeFrameTimestamp,
|
|
34
41
|
libavFrameToInterleavedFloat32,
|
|
35
42
|
packetPtsSec,
|
|
36
43
|
} from "../../util/libav-demux.js";
|
|
@@ -275,9 +282,41 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
275
282
|
let watchdogSlowWarned = false;
|
|
276
283
|
let watchdogOverflowWarned = false;
|
|
277
284
|
|
|
278
|
-
//
|
|
279
|
-
|
|
280
|
-
|
|
285
|
+
// Content clock for video frames. Tracks the last frame's content time
|
|
286
|
+
// in µs. The invariant per emit:
|
|
287
|
+
// - raw libav pts valid → lastContentUs = raw_pts (sync to truth)
|
|
288
|
+
// - raw libav pts NOPTS → lastContentUs += frameStep (extend by one frame)
|
|
289
|
+
// This makes synthetic labels always relative to the *immediately
|
|
290
|
+
// preceding* frame's real content, self-correcting at every valid pts.
|
|
291
|
+
// -1 means "unanchored" — pre-anchor NOPTS frames are discarded outright
|
|
292
|
+
// because we don't know where the decoder actually landed. The anchor
|
|
293
|
+
// is established at the first valid raw pts post-seek.
|
|
294
|
+
//
|
|
295
|
+
// This replaced an older "synthetic counter reset to seekTarget on seek"
|
|
296
|
+
// path which stamped NOPTS preroll frames with the user's requested seek
|
|
297
|
+
// time — producing labels 4+ seconds ahead of actual content, dropping
|
|
298
|
+
// every valid-pts frame as a "regression", and surfacing as a ~2s
|
|
299
|
+
// post-seek fast-forward as the slow-advancing synthetic counter slowly
|
|
300
|
+
// converged with real content. See POSTMORTEMS.md (2026-06-01).
|
|
301
|
+
let lastContentUs = -1;
|
|
302
|
+
let firstValidPtsLoggedSinceSeek = false;
|
|
303
|
+
|
|
304
|
+
// Diagnostic: first post-seek audio packet's PTS. Logged once per seek
|
|
305
|
+
// so the operator can see the demuxer's actual content alignment vs
|
|
306
|
+
// the user's click. With PTS-based audio scheduling, audio packets
|
|
307
|
+
// with PTS before the seek target *naturally* don't get scheduled
|
|
308
|
+
// (their computed ctxStart falls in the past) — no manual trim needed.
|
|
309
|
+
let seenFirstAudioPacketSinceSeek = false;
|
|
310
|
+
let seekTargetSec = 0;
|
|
311
|
+
// Post-seek diagnostic counters. Capture raw pts/dts/pos for the first
|
|
312
|
+
// ~N packets and frames after each seek so we can tell whether libav
|
|
313
|
+
// hands us a valid pts at seek landing, when (if ever) it becomes
|
|
314
|
+
// valid mid-stream, and whether sanitize's NOPTS fallback is firing.
|
|
315
|
+
let diagPktsLoggedSinceSeek = 0;
|
|
316
|
+
let diagFramesLoggedSinceSeek = 0;
|
|
317
|
+
let diagFrameKeysDumped = false;
|
|
318
|
+
const DIAG_MAX_PKTS = 100;
|
|
319
|
+
const DIAG_MAX_FRAMES = 300;
|
|
281
320
|
|
|
282
321
|
// Throughput instrumentation — answers "is the decoder keeping up?".
|
|
283
322
|
// All counters are cumulative since bootstrap (not reset on seek), so
|
|
@@ -345,6 +384,37 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
345
384
|
for (const pkt of videoPackets) {
|
|
346
385
|
const sec = packetPtsSec(pkt, videoTimeBase);
|
|
347
386
|
if (sec != null && sec > bufferedUntilSec) bufferedUntilSec = sec;
|
|
387
|
+
// [DIAG-PKT] Raw pre-sanitize packet fields for the first N
|
|
388
|
+
// post-seek video packets. Most important question: does the
|
|
389
|
+
// FIRST packet after av_seek_frame carry a valid pts? If yes,
|
|
390
|
+
// we can anchor synthetic counter to that — cheap & robust.
|
|
391
|
+
// If no, fall back to pkt_pos → AVI index → chunk_idx × frameDur.
|
|
392
|
+
if (isDebug() && diagPktsLoggedSinceSeek < DIAG_MAX_PKTS) {
|
|
393
|
+
const rawHi = (pkt as { ptshi?: number }).ptshi ?? 0;
|
|
394
|
+
const rawLo = pkt.pts ?? 0;
|
|
395
|
+
const isInvalidPts = (rawHi === -2147483648 && rawLo === 0);
|
|
396
|
+
const rawPts64 = isInvalidPts ? null : (rawHi * 0x100000000 + rawLo);
|
|
397
|
+
const rawSec = rawPts64 != null && videoTimeBase
|
|
398
|
+
? (rawPts64 * videoTimeBase[0]) / videoTimeBase[1]
|
|
399
|
+
: null;
|
|
400
|
+
const pktKeys = diagPktsLoggedSinceSeek === 0
|
|
401
|
+
? `[keys: ${Object.keys(pkt).join(",")}]`
|
|
402
|
+
: "";
|
|
403
|
+
// eslint-disable-next-line no-console
|
|
404
|
+
console.log(
|
|
405
|
+
`[DIAG-PKT] vidx=${diagPktsLoggedSinceSeek} ` +
|
|
406
|
+
`pts=${isInvalidPts ? "NOPTS" : rawPts64} ` +
|
|
407
|
+
`pts_sec=${rawSec != null ? rawSec.toFixed(3) : "n/a"} ` +
|
|
408
|
+
`ptshi=${rawHi} ptslo=${rawLo} ` +
|
|
409
|
+
`flags=0x${(pkt.flags ?? 0).toString(16)} ` +
|
|
410
|
+
`keyframe=${((pkt.flags ?? 0) & 1) ? "Y" : "N"} ` +
|
|
411
|
+
`stream=${pkt.stream_index} ` +
|
|
412
|
+
`dataLen=${pkt.data?.length ?? 0} ` +
|
|
413
|
+
`seekTarget=${seekTargetSec.toFixed(3)} ` +
|
|
414
|
+
pktKeys,
|
|
415
|
+
);
|
|
416
|
+
diagPktsLoggedSinceSeek++;
|
|
417
|
+
}
|
|
348
418
|
}
|
|
349
419
|
}
|
|
350
420
|
if (audioPackets && audioTimeBase) {
|
|
@@ -352,6 +422,23 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
352
422
|
const sec = packetPtsSec(pkt, audioTimeBase);
|
|
353
423
|
if (sec != null && sec > bufferedUntilSec) bufferedUntilSec = sec;
|
|
354
424
|
}
|
|
425
|
+
// Diagnostic: log the first post-seek audio packet's PTS. With
|
|
426
|
+
// PTS-based scheduling, packets whose PTS is before the seek
|
|
427
|
+
// target won't be played (AudioOutput skips them silently), so
|
|
428
|
+
// this is informational only — it tells you how far off the
|
|
429
|
+
// demuxer's seek granularity is from the user's click.
|
|
430
|
+
if (!seenFirstAudioPacketSinceSeek && audioPackets.length > 0) {
|
|
431
|
+
const firstSec = packetPtsSec(audioPackets[0], audioTimeBase);
|
|
432
|
+
if (firstSec != null && Number.isFinite(firstSec)) {
|
|
433
|
+
seenFirstAudioPacketSinceSeek = true;
|
|
434
|
+
dbg.info("av-anchor",
|
|
435
|
+
`seek-target=${seekTargetSec.toFixed(3)}s, ` +
|
|
436
|
+
`first-audio-pkt-pts=${firstSec.toFixed(3)}s ` +
|
|
437
|
+
`(Δ=${((firstSec - seekTargetSec) * 1000).toFixed(1)}ms — ` +
|
|
438
|
+
`pre-target packets will be skipped by AudioOutput)`,
|
|
439
|
+
);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
355
442
|
}
|
|
356
443
|
|
|
357
444
|
// Decode audio BEFORE video. On software-decode-bound content
|
|
@@ -363,7 +450,7 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
363
450
|
// packet for cook/mp3/aac, so doing it first barely delays
|
|
364
451
|
// video decoding at all.
|
|
365
452
|
if (audioDec && audioPackets && audioPackets.length > 0) {
|
|
366
|
-
await decodeAudioBatch(audioPackets, myToken);
|
|
453
|
+
await decodeAudioBatch(audioPackets, myToken, /*flush*/ false, audioTimeBase);
|
|
367
454
|
}
|
|
368
455
|
if (myToken !== pumpToken || destroyed) return;
|
|
369
456
|
if (videoDec && videoPackets && videoPackets.length > 0) {
|
|
@@ -431,21 +518,22 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
431
518
|
}
|
|
432
519
|
}
|
|
433
520
|
|
|
434
|
-
// Throttle:
|
|
435
|
-
//
|
|
436
|
-
//
|
|
437
|
-
//
|
|
438
|
-
//
|
|
439
|
-
//
|
|
440
|
-
//
|
|
521
|
+
// Throttle: only on audio buffer (mediaTimeOfNext - now() > 2 s).
|
|
522
|
+
// Renderer queue backpressure is enforced at the *enqueue* side in
|
|
523
|
+
// `decodeVideoBatch` — when the queue is at `queueHighWater`, the
|
|
524
|
+
// freshly decoded VideoFrame is closed without being enqueued, so
|
|
525
|
+
// the decoder keeps consuming packets in order. That preserves the
|
|
526
|
+
// reference-frame state needed to decode P/B frames cleanly during
|
|
527
|
+
// post-seek catch-up. Throttling the *pump* on queue depth here
|
|
528
|
+
// would block demuxer reads, which would also stall audio packet
|
|
529
|
+
// processing and starve `audio.bufferAhead()`.
|
|
441
530
|
{
|
|
442
531
|
const _throttleStart = performance.now();
|
|
443
532
|
let _throttled = false;
|
|
444
533
|
while (
|
|
445
534
|
!destroyed &&
|
|
446
535
|
myToken === pumpToken &&
|
|
447
|
-
|
|
448
|
-
opts.renderer.queueDepth() >= opts.renderer.queueHighWater)
|
|
536
|
+
opts.audio.bufferAhead() > 2.0
|
|
449
537
|
) {
|
|
450
538
|
_throttled = true;
|
|
451
539
|
await new Promise((r) => setTimeout(r, 50));
|
|
@@ -494,31 +582,160 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
494
582
|
|
|
495
583
|
for (const f of frames) {
|
|
496
584
|
if (myToken !== pumpToken || destroyed) return;
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
585
|
+
// [DIAG-FRAME] Capture raw pre-sanitize fields. One-shot key dump
|
|
586
|
+
// on first frame post-seek so we can see which fields libav
|
|
587
|
+
// actually exposes (best_effort_timestamp? pkt_dts? pkt_pos?).
|
|
588
|
+
const _diagShouldLog = isDebug() && diagFramesLoggedSinceSeek < DIAG_MAX_FRAMES;
|
|
589
|
+
const _diagRawHi = f.ptshi ?? 0;
|
|
590
|
+
const _diagRawLo = f.pts ?? 0;
|
|
591
|
+
const _diagInvalid = (_diagRawHi === -2147483648 && _diagRawLo === 0);
|
|
592
|
+
const _diagRawPts64 = _diagInvalid ? null : (_diagRawHi * 0x100000000 + _diagRawLo);
|
|
593
|
+
const _diagRawSec = _diagRawPts64 != null && videoTimeBase
|
|
594
|
+
? (_diagRawPts64 * videoTimeBase[0]) / videoTimeBase[1]
|
|
595
|
+
: null;
|
|
596
|
+
if (_diagShouldLog && !diagFrameKeysDumped) {
|
|
597
|
+
diagFrameKeysDumped = true;
|
|
598
|
+
const allKeys = Object.keys(f);
|
|
599
|
+
const fieldDump: Record<string, unknown> = {};
|
|
600
|
+
for (const k of allKeys) {
|
|
601
|
+
const v = (f as unknown as Record<string, unknown>)[k];
|
|
602
|
+
// Skip the data buffer; everything else is metadata.
|
|
603
|
+
if (k === "data") continue;
|
|
604
|
+
if (typeof v === "object" && v !== null && "length" in (v as object)) continue;
|
|
605
|
+
fieldDump[k] = v;
|
|
606
|
+
}
|
|
607
|
+
// eslint-disable-next-line no-console
|
|
608
|
+
console.log(`[DIAG-FRAME] FIRST FRAME post-seek — all keys: ${allKeys.join(",")}`);
|
|
609
|
+
// eslint-disable-next-line no-console
|
|
610
|
+
console.log(`[DIAG-FRAME] FIRST FRAME field dump:`, fieldDump);
|
|
611
|
+
}
|
|
612
|
+
// Convert raw libav pts (in stream timebase) to µs, or null if NOPTS.
|
|
613
|
+
let rawUs: number | null = null;
|
|
614
|
+
if (!_diagInvalid && _diagRawPts64 != null) {
|
|
615
|
+
const tb = videoTimeBase ?? [1, 1_000_000];
|
|
616
|
+
const us = Math.round((_diagRawPts64 * 1_000_000 * tb[0]) / tb[1]);
|
|
617
|
+
if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
|
|
618
|
+
rawUs = us;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// Forward declare _diagLog so PRE-ANCHOR-DROP can call it.
|
|
623
|
+
// Final pts isn't known until after the anchor/step block, so we pass
|
|
624
|
+
// it as a parameter rather than closing over a `let`.
|
|
625
|
+
const _diagLog = (decision: string, finalPtsUs: number, sanFallback: boolean): void => {
|
|
626
|
+
if (!_diagShouldLog) return;
|
|
627
|
+
const ptsSrc = sanFallback
|
|
628
|
+
? `SYNTHETIC(${_diagInvalid ? "NOPTS" : "invalid-range"})`
|
|
629
|
+
: "LIBAV";
|
|
630
|
+
// eslint-disable-next-line no-console
|
|
631
|
+
console.log(
|
|
632
|
+
`[DIAG-FRAME] vidx=${diagFramesLoggedSinceSeek} ` +
|
|
633
|
+
`raw_pts=${_diagInvalid ? "NOPTS" : _diagRawPts64} ` +
|
|
634
|
+
`raw_pts_sec=${_diagRawSec != null ? _diagRawSec.toFixed(3) : "n/a"} ` +
|
|
635
|
+
`pts_src=${ptsSrc} ` +
|
|
636
|
+
`final_pts_us=${finalPtsUs} ` +
|
|
637
|
+
`final_pts_sec=${(finalPtsUs / 1_000_000).toFixed(3)} ` +
|
|
638
|
+
`seekTarget=${seekTargetSec.toFixed(3)} ` +
|
|
639
|
+
`offset_to_target_ms=${((finalPtsUs / 1000) - (seekTargetSec * 1000)).toFixed(1)} ` +
|
|
640
|
+
`lastEmittedPts_us=${lastEmittedPtsUs} ` +
|
|
641
|
+
`decision=${decision}`,
|
|
642
|
+
);
|
|
643
|
+
diagFramesLoggedSinceSeek++;
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
// Anchor + step invariant.
|
|
647
|
+
// - Unanchored (post-seek, no valid pts seen yet) AND NOPTS frame
|
|
648
|
+
// → discard outright. We don't know where the decoder landed, so
|
|
649
|
+
// stamping a synthetic label would be a lie (this was the source
|
|
650
|
+
// of the post-seek fast-forward bug).
|
|
651
|
+
// - First valid raw pts → anchor `lastContentUs` to it. The pipeline
|
|
652
|
+
// below will then drop this and subsequent frames as pre-target
|
|
653
|
+
// until content reaches seekTarget.
|
|
654
|
+
// - Anchored AND valid → sync `lastContentUs` to truth.
|
|
655
|
+
// - Anchored AND NOPTS → step `lastContentUs += frameStep`.
|
|
656
|
+
let _diagSanFallbackFired = false;
|
|
657
|
+
const seekTargetUs = Math.round(seekTargetSec * 1_000_000);
|
|
658
|
+
if (lastContentUs < 0) {
|
|
659
|
+
if (rawUs == null) {
|
|
660
|
+
// Cold-start keyframe special case. At seekTargetSec === 0 the
|
|
661
|
+
// demuxer guarantees the very first emitted keyframe is content
|
|
662
|
+
// 0 (container start=0.000000). Anchoring there directly avoids
|
|
663
|
+
// discarding the opening I-frame — without this, cold start
|
|
664
|
+
// loses 1-2 frames and the first paint is ~80ms late.
|
|
665
|
+
//
|
|
666
|
+
// STRICTLY gated to seekTarget === 0. The seek path proved
|
|
667
|
+
// correct via the offset-ground-truth experiment (POSTMORTEMS
|
|
668
|
+
// 2026-06-01); this branch must not change its behavior.
|
|
669
|
+
//
|
|
670
|
+
// Why keyframe-pin instead of back-computing from the first
|
|
671
|
+
// valid pts: the I/P/B reorder is densest at the stream head,
|
|
672
|
+
// so `firstValidPts − N × frameStep` is off by however many
|
|
673
|
+
// early B-frames the decoder dropped. The keyframe identity
|
|
674
|
+
// (`f.key_frame === 1`) is the only signal that doesn't depend
|
|
675
|
+
// on frame-spacing assumptions.
|
|
676
|
+
const isColdStartKeyframe =
|
|
677
|
+
seekTargetSec === 0
|
|
678
|
+
&& (f as { key_frame?: number }).key_frame === 1;
|
|
679
|
+
if (isColdStartKeyframe) {
|
|
680
|
+
lastContentUs = 0;
|
|
681
|
+
_diagSanFallbackFired = true;
|
|
682
|
+
// Fall through: the frame gets labeled 0 and runs through
|
|
683
|
+
// the regression/pre-target/enqueue pipeline normally.
|
|
684
|
+
} else {
|
|
685
|
+
// Pre-anchor NOPTS: discard. Decoder retains the frame internally
|
|
686
|
+
// as a reference — we just don't expose it to the renderer.
|
|
687
|
+
_diagLog("PRE-ANCHOR-DROP", 0, true);
|
|
688
|
+
continue;
|
|
689
|
+
}
|
|
690
|
+
} else {
|
|
691
|
+
// First valid raw pts post-seek = the anchor.
|
|
692
|
+
lastContentUs = rawUs;
|
|
693
|
+
if (!firstValidPtsLoggedSinceSeek) {
|
|
694
|
+
firstValidPtsLoggedSinceSeek = true;
|
|
695
|
+
if (isDebug()) {
|
|
696
|
+
// eslint-disable-next-line no-console
|
|
697
|
+
console.log(
|
|
698
|
+
`[avbridge:decoder] post-seek anchor established: ` +
|
|
699
|
+
`first valid raw pts = ${(rawUs / 1000).toFixed(1)}ms ` +
|
|
700
|
+
`(seekTarget = ${(seekTargetSec * 1000).toFixed(1)}ms, ` +
|
|
701
|
+
`Δ = ${((rawUs - seekTargetUs) / 1000).toFixed(1)}ms)`,
|
|
702
|
+
);
|
|
703
|
+
}
|
|
704
|
+
// Guard: if the first valid pts is at or beyond the seek
|
|
705
|
+
// target, the pre-anchor NOPTS frames we already discarded
|
|
706
|
+
// may have straddled the target. In normal AVI MPEG-4 seeks,
|
|
707
|
+
// the demuxer lands well before the target (previous
|
|
708
|
+
// keyframe), so this shouldn't happen — log a warning if it
|
|
709
|
+
// does so we know to implement a pkt_pos→AVI-index
|
|
710
|
+
// back-computation path. The cold-start case (seekTarget=0)
|
|
711
|
+
// is handled by the keyframe-pin branch above and shouldn't
|
|
712
|
+
// reach this warning.
|
|
713
|
+
if (rawUs >= seekTargetUs) {
|
|
714
|
+
// eslint-disable-next-line no-console
|
|
715
|
+
console.warn(
|
|
716
|
+
`[avbridge:decoder] first valid raw pts ≥ seek target — ` +
|
|
717
|
+
`pre-anchor NOPTS frames may have straddled the target ` +
|
|
718
|
+
`and been mis-discarded. First painted frame may be late ` +
|
|
719
|
+
`by up to one keyframe interval.`,
|
|
720
|
+
);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
} else {
|
|
725
|
+
if (rawUs != null) {
|
|
726
|
+
lastContentUs = rawUs; // sync to truth on every valid pts
|
|
727
|
+
} else {
|
|
728
|
+
lastContentUs += videoFrameStepUs; // extend from last truth
|
|
729
|
+
_diagSanFallbackFired = true;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
// Write the content label into the frame so the bridge sees it.
|
|
733
|
+
f.pts = lastContentUs;
|
|
734
|
+
f.ptshi = lastContentUs < 0 ? -1 : 0;
|
|
735
|
+
const _fPts = lastContentUs;
|
|
520
736
|
if (_fPts > newestVideoPtsUs) newestVideoPtsUs = _fPts;
|
|
521
737
|
if (lastEmittedPtsUs >= 0 && _fPts < lastEmittedPtsUs) {
|
|
738
|
+
_diagLog("REGRESSED-DROP", _fPts, _diagSanFallbackFired);
|
|
522
739
|
// Decoder emitted a frame with lower PTS than the previous
|
|
523
740
|
// output. Dropping out-of-order frames here is the right move:
|
|
524
741
|
// the renderer's paint loop assumes monotonic queue order and
|
|
@@ -545,20 +762,70 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
545
762
|
continue; // skip enqueue
|
|
546
763
|
}
|
|
547
764
|
lastEmittedPtsUs = _fPts;
|
|
765
|
+
// Decode-to-display: after a seek the demuxer lands at the
|
|
766
|
+
// keyframe ≤ click target and the decoder produces frames
|
|
767
|
+
// starting there. Pre-target frames are still DECODED (they're
|
|
768
|
+
// reference frames for later P/B decodes) but they MUST NOT be
|
|
769
|
+
// displayed — otherwise the renderer paints them in a brief
|
|
770
|
+
// fast-forward burst as it catches up to audio (T_click). Drop
|
|
771
|
+
// them at the enqueue boundary; the decoder doesn't care.
|
|
772
|
+
//
|
|
773
|
+
// Tolerance of one frame duration: source frames are quantized
|
|
774
|
+
// (PTS = N × frameStep) but the user's click is arbitrary, so
|
|
775
|
+
// the frame nearest the click is typically a few ms *before* it.
|
|
776
|
+
// Convention (matches `<video>.currentTime = T` and ffplay):
|
|
777
|
+
// display the frame at the largest PTS ≤ T.
|
|
778
|
+
const targetUs = Math.round(seekTargetSec * 1_000_000);
|
|
779
|
+
if (_fPts < targetUs - videoFrameStepUs) {
|
|
780
|
+
_diagLog("PRE-TARGET-DROP", _fPts, _diagSanFallbackFired);
|
|
781
|
+
continue;
|
|
782
|
+
}
|
|
548
783
|
try {
|
|
549
784
|
const vf = bridge.laFrameToVideoFrame(f, { timeBase: [1, 1_000_000] });
|
|
550
|
-
|
|
785
|
+
// Renderer-queue backpressure at the enqueue side. Discarding
|
|
786
|
+
// here (rather than throttling the pump on `queueHighWater`)
|
|
787
|
+
// keeps the decoder consuming packets sequentially so its
|
|
788
|
+
// reference-frame state stays intact — essential during
|
|
789
|
+
// post-seek catch-up, when the pump must continue reading
|
|
790
|
+
// packets to advance the demuxer past pre-target audio. Without
|
|
791
|
+
// sequential decode, the next batch's P/B frames decode against
|
|
792
|
+
// a stale reference and produce gray + glitchy output until
|
|
793
|
+
// the next keyframe.
|
|
794
|
+
if (opts.renderer.queueDepth() >= opts.renderer.queueHighWater) {
|
|
795
|
+
vf.close();
|
|
796
|
+
_diagLog("OVERFLOW-DROP", _fPts, _diagSanFallbackFired);
|
|
797
|
+
} else {
|
|
798
|
+
opts.renderer.enqueue(vf);
|
|
799
|
+
_diagLog("ENQUEUED", _fPts, _diagSanFallbackFired);
|
|
800
|
+
}
|
|
551
801
|
videoFramesDecoded++;
|
|
552
802
|
} catch (err) {
|
|
553
803
|
if (videoFramesDecoded === 0) {
|
|
554
804
|
console.warn("[avbridge] laFrameToVideoFrame failed:", err);
|
|
555
805
|
}
|
|
806
|
+
_diagLog("BRIDGE-ERROR", _fPts, _diagSanFallbackFired);
|
|
556
807
|
}
|
|
557
808
|
}
|
|
558
809
|
}
|
|
559
810
|
|
|
560
|
-
async function decodeAudioBatch(
|
|
811
|
+
async function decodeAudioBatch(
|
|
812
|
+
pkts: LibavPacket[],
|
|
813
|
+
myToken: number,
|
|
814
|
+
flush = false,
|
|
815
|
+
tb?: [number, number],
|
|
816
|
+
) {
|
|
561
817
|
if (!audioDec || destroyed || myToken !== pumpToken) return;
|
|
818
|
+
// Capture the packet-level PTS *before* decoding. libav's reported
|
|
819
|
+
// `frame.pts` after decode is unreliable for mp3-in-AVI (returns a
|
|
820
|
+
// value that doesn't agree with the stream's reported time base —
|
|
821
|
+
// see POSTMORTEMS.md 2026-05-31). The demuxer's packet PTS is
|
|
822
|
+
// reliable, and for mp3/aac the packet→frame mapping is 1:1, so we
|
|
823
|
+
// forward each packet's PTS to the matching output frame. For codecs
|
|
824
|
+
// where the mapping isn't 1:1, the trailing frames fall back to a
|
|
825
|
+
// synthetic running counter — same behavior as before this change.
|
|
826
|
+
const pktPtsSec: (number | null)[] = pkts.map((p) =>
|
|
827
|
+
tb ? packetPtsSec(p, tb) : null,
|
|
828
|
+
);
|
|
562
829
|
let frames: LibavFrame[];
|
|
563
830
|
const _t0 = performance.now();
|
|
564
831
|
try {
|
|
@@ -577,22 +844,21 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
577
844
|
audioDecodeBatches++;
|
|
578
845
|
if (myToken !== pumpToken || destroyed) return;
|
|
579
846
|
|
|
580
|
-
for (
|
|
847
|
+
for (let i = 0; i < frames.length; i++) {
|
|
581
848
|
if (myToken !== pumpToken || destroyed) return;
|
|
582
|
-
|
|
583
|
-
f,
|
|
584
|
-
() => {
|
|
585
|
-
const ts = syntheticAudioUs;
|
|
586
|
-
const samples = f.nb_samples ?? 1024;
|
|
587
|
-
const sampleRate = f.sample_rate ?? 44100;
|
|
588
|
-
syntheticAudioUs += Math.round((samples * 1_000_000) / sampleRate);
|
|
589
|
-
return ts;
|
|
590
|
-
},
|
|
591
|
-
audioTimeBase,
|
|
592
|
-
);
|
|
849
|
+
const f = frames[i];
|
|
593
850
|
const samples = libavFrameToInterleavedFloat32(f);
|
|
594
851
|
if (samples) {
|
|
595
|
-
|
|
852
|
+
const pts = pktPtsSec[i] ?? null;
|
|
853
|
+
if (isDebug()) {
|
|
854
|
+
const dur = samples.data.length / samples.channels / samples.sampleRate;
|
|
855
|
+
// Log every frame — we need to see what happens around seeks.
|
|
856
|
+
// Also surface explicitly when the per-frame PTS is null, which
|
|
857
|
+
// would route the chunk to the LEGACY rebase path in AudioOutput.
|
|
858
|
+
// eslint-disable-next-line no-console
|
|
859
|
+
console.log(`[TRACE-DEC] audio frame #${audioFramesDecoded} pts=${pts != null ? pts.toFixed(4) : "NULL"} dur=${dur.toFixed(4)} samples=${samples.data.length / samples.channels} sr=${samples.sampleRate} ch=${samples.channels} pktsIn=${pkts.length} framesOut=${frames.length}`);
|
|
860
|
+
}
|
|
861
|
+
opts.audio.schedule(samples.data, samples.channels, samples.sampleRate, pts);
|
|
596
862
|
audioFramesDecoded++;
|
|
597
863
|
}
|
|
598
864
|
}
|
|
@@ -682,9 +948,9 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
682
948
|
try { if (videoDec) await libav.avcodec_flush_buffers?.(videoDec.c); } catch { /* ignore */ }
|
|
683
949
|
await flushBSF();
|
|
684
950
|
|
|
685
|
-
|
|
686
|
-
syntheticAudioUs = Math.round(timeSec * 1_000_000);
|
|
951
|
+
lastContentUs = -1;
|
|
687
952
|
lastEmittedPtsUs = -1;
|
|
953
|
+
firstValidPtsLoggedSinceSeek = false;
|
|
688
954
|
|
|
689
955
|
pumpRunning = pumpLoop(newToken).catch((err) =>
|
|
690
956
|
console.error("[avbridge] fallback pump failed (post-setAudioTrack):", err),
|
|
@@ -692,6 +958,10 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
692
958
|
},
|
|
693
959
|
|
|
694
960
|
async seek(timeSec) {
|
|
961
|
+
if (isDebug()) {
|
|
962
|
+
// eslint-disable-next-line no-console
|
|
963
|
+
console.log(`[SEEK] target=${timeSec.toFixed(3)}s (${(timeSec * 1000).toFixed(0)}ms) wall=${performance.now().toFixed(0)}`);
|
|
964
|
+
}
|
|
695
965
|
// Cancel the current pump and wait for it to actually exit before
|
|
696
966
|
// we start moving file pointers around — concurrent ff_decode_multi
|
|
697
967
|
// and av_seek_frame on the same context would be a recipe for memory
|
|
@@ -739,11 +1009,19 @@ export async function startDecoder(opts: StartDecoderOptions): Promise<DecoderHa
|
|
|
739
1009
|
} catch { /* ignore */ }
|
|
740
1010
|
await flushBSF();
|
|
741
1011
|
|
|
742
|
-
// Reset
|
|
743
|
-
//
|
|
744
|
-
|
|
745
|
-
|
|
1012
|
+
// Reset the content clock to "unanchored". The next decode loop
|
|
1013
|
+
// will discard NOPTS frames until the first valid libav pts
|
|
1014
|
+
// establishes a real anchor, then label every frame relative to
|
|
1015
|
+
// truth. Do NOT set anything to seekTarget here — that lie was
|
|
1016
|
+
// the post-seek fast-forward bug.
|
|
1017
|
+
lastContentUs = -1;
|
|
746
1018
|
lastEmittedPtsUs = -1;
|
|
1019
|
+
firstValidPtsLoggedSinceSeek = false;
|
|
1020
|
+
seenFirstAudioPacketSinceSeek = false;
|
|
1021
|
+
seekTargetSec = timeSec;
|
|
1022
|
+
diagPktsLoggedSinceSeek = 0;
|
|
1023
|
+
diagFramesLoggedSinceSeek = 0;
|
|
1024
|
+
diagFrameKeysDumped = false;
|
|
747
1025
|
|
|
748
1026
|
// The renderer & audio output are reset by the fallback session
|
|
749
1027
|
// wrapper that called us — see strategies/fallback/index.ts.
|