avbridge 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +73 -0
- package/dist/{chunk-6UUT4BEA.cjs → chunk-2IJ66NTD.cjs} +13 -20
- package/dist/chunk-2IJ66NTD.cjs.map +1 -0
- package/dist/{chunk-XKPSTC34.cjs → chunk-2XW2O3YI.cjs} +5 -20
- package/dist/chunk-2XW2O3YI.cjs.map +1 -0
- package/dist/chunk-5KVLE6YI.js +167 -0
- package/dist/chunk-5KVLE6YI.js.map +1 -0
- package/dist/{chunk-2PGRFCWB.js → chunk-CPJLFFCC.js} +8 -18
- package/dist/chunk-CPJLFFCC.js.map +1 -0
- package/dist/chunk-CPZ7PXAM.cjs +240 -0
- package/dist/chunk-CPZ7PXAM.cjs.map +1 -0
- package/dist/{chunk-QQXBPW72.js → chunk-E76AMWI4.js} +4 -18
- package/dist/chunk-E76AMWI4.js.map +1 -0
- package/dist/{chunk-NV7ILLWH.js → chunk-KY2GPCT7.js} +347 -665
- package/dist/chunk-KY2GPCT7.js.map +1 -0
- package/dist/chunk-LUFA47FP.js +19 -0
- package/dist/chunk-LUFA47FP.js.map +1 -0
- package/dist/chunk-Q2VUO52Z.cjs +374 -0
- package/dist/chunk-Q2VUO52Z.cjs.map +1 -0
- package/dist/chunk-QDJLQR53.cjs +22 -0
- package/dist/chunk-QDJLQR53.cjs.map +1 -0
- package/dist/chunk-S4WAZC2T.cjs +173 -0
- package/dist/chunk-S4WAZC2T.cjs.map +1 -0
- package/dist/chunk-SMH6IOP2.js +368 -0
- package/dist/chunk-SMH6IOP2.js.map +1 -0
- package/dist/chunk-SR3MPV4D.js +237 -0
- package/dist/chunk-SR3MPV4D.js.map +1 -0
- package/dist/{chunk-7RGG6ME7.cjs → chunk-TBW26OPP.cjs} +365 -688
- package/dist/chunk-TBW26OPP.cjs.map +1 -0
- package/dist/chunk-X2K3GIWE.js +235 -0
- package/dist/chunk-X2K3GIWE.js.map +1 -0
- package/dist/chunk-ZCUXHW55.cjs +242 -0
- package/dist/chunk-ZCUXHW55.cjs.map +1 -0
- package/dist/element-browser.js +799 -493
- package/dist/element-browser.js.map +1 -1
- package/dist/element.cjs +58 -4
- package/dist/element.cjs.map +1 -1
- package/dist/element.d.cts +38 -0
- package/dist/element.d.ts +38 -0
- package/dist/element.js +57 -3
- package/dist/element.js.map +1 -1
- package/dist/index.cjs +523 -393
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +494 -366
- package/dist/index.js.map +1 -1
- package/dist/libav-demux-H2GS46GH.cjs +27 -0
- package/dist/libav-demux-H2GS46GH.cjs.map +1 -0
- package/dist/libav-demux-OWZ4T2YW.js +6 -0
- package/dist/libav-demux-OWZ4T2YW.js.map +1 -0
- package/dist/{libav-import-GST2AMPL.cjs → libav-import-2ZVKV2E7.cjs} +2 -2
- package/dist/{libav-import-GST2AMPL.cjs.map → libav-import-2ZVKV2E7.cjs.map} +1 -1
- package/dist/{libav-import-2JURFHEW.js → libav-import-6MGLCXVQ.js} +2 -2
- package/dist/{libav-import-2JURFHEW.js.map → libav-import-6MGLCXVQ.js.map} +1 -1
- package/dist/player.cjs +601 -470
- package/dist/player.cjs.map +1 -1
- package/dist/player.d.cts +50 -0
- package/dist/player.d.ts +50 -0
- package/dist/player.js +580 -449
- package/dist/player.js.map +1 -1
- package/dist/remux-OBSMIENG.cjs +35 -0
- package/dist/remux-OBSMIENG.cjs.map +1 -0
- package/dist/remux-WBYIZBBX.js +10 -0
- package/dist/remux-WBYIZBBX.js.map +1 -0
- package/dist/source-4TZ6KMNV.js +4 -0
- package/dist/{source-F656KYYV.js.map → source-4TZ6KMNV.js.map} +1 -1
- package/dist/source-7YLO6E7X.cjs +29 -0
- package/dist/{source-73CAH6HW.cjs.map → source-7YLO6E7X.cjs.map} +1 -1
- package/dist/source-MTX5ELUZ.js +4 -0
- package/dist/{source-QJR3OHTW.js.map → source-MTX5ELUZ.js.map} +1 -1
- package/dist/source-VFLXLOCN.cjs +29 -0
- package/dist/{source-VB74JQ7Z.cjs.map → source-VFLXLOCN.cjs.map} +1 -1
- package/dist/subtitles-4T74JRGT.js +4 -0
- package/dist/subtitles-4T74JRGT.js.map +1 -0
- package/dist/subtitles-QUH4LPI4.cjs +29 -0
- package/dist/subtitles-QUH4LPI4.cjs.map +1 -0
- package/package.json +1 -1
- package/src/convert/remux.ts +1 -35
- package/src/convert/transcode-libav.ts +691 -0
- package/src/convert/transcode.ts +12 -4
- package/src/element/avbridge-player.ts +16 -0
- package/src/element/avbridge-video.ts +54 -0
- package/src/errors.ts +6 -0
- package/src/player.ts +15 -16
- package/src/strategies/fallback/decoder.ts +96 -173
- package/src/strategies/fallback/index.ts +19 -2
- package/src/strategies/fallback/libav-import.ts +9 -1
- package/src/strategies/fallback/video-renderer.ts +107 -0
- package/src/strategies/hybrid/decoder.ts +88 -180
- package/src/strategies/hybrid/index.ts +17 -2
- package/src/strategies/native.ts +6 -3
- package/src/strategies/remux/index.ts +14 -2
- package/src/strategies/remux/pipeline.ts +72 -12
- package/src/subtitles/render.ts +8 -0
- package/src/util/libav-demux.ts +405 -0
- package/dist/chunk-2PGRFCWB.js.map +0 -1
- package/dist/chunk-6UUT4BEA.cjs.map +0 -1
- package/dist/chunk-7RGG6ME7.cjs.map +0 -1
- package/dist/chunk-NV7ILLWH.js.map +0 -1
- package/dist/chunk-QQXBPW72.js.map +0 -1
- package/dist/chunk-XKPSTC34.cjs.map +0 -1
- package/dist/source-73CAH6HW.cjs +0 -28
- package/dist/source-F656KYYV.js +0 -3
- package/dist/source-QJR3OHTW.js +0 -3
- package/dist/source-VB74JQ7Z.cjs +0 -28
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ClockSource } from "./audio-output.js";
|
|
2
|
+
import { SubtitleOverlay } from "../../subtitles/render.js";
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Renders decoded `VideoFrame`s into a 2D canvas overlaid on the user's
|
|
@@ -47,6 +48,16 @@ export class VideoRenderer {
|
|
|
47
48
|
/** Cumulative count of ticks where PTS mode painted a frame. */
|
|
48
49
|
private ticksPainted = 0;
|
|
49
50
|
|
|
51
|
+
/**
|
|
52
|
+
* Subtitle overlay div attached to the stage wrapper alongside the
|
|
53
|
+
* canvas. Created lazily when subtitle tracks are attached via the
|
|
54
|
+
* target's `<track>` children. Canvas strategies (hybrid, fallback)
|
|
55
|
+
* hide the <video>, so we can't rely on the browser's native cue
|
|
56
|
+
* rendering; we read TextTrack.cues and render into this overlay.
|
|
57
|
+
*/
|
|
58
|
+
private subtitleOverlay: SubtitleOverlay | null = null;
|
|
59
|
+
private subtitleTrack: TextTrack | null = null;
|
|
60
|
+
|
|
50
61
|
/**
|
|
51
62
|
* Calibration offset (microseconds) between video PTS and audio clock.
|
|
52
63
|
* Video PTS and AudioContext.currentTime can drift ~0.1% relative to
|
|
@@ -111,6 +122,15 @@ export class VideoRenderer {
|
|
|
111
122
|
}
|
|
112
123
|
target.style.visibility = "hidden";
|
|
113
124
|
|
|
125
|
+
// Create a subtitle overlay on the same parent as the canvas so cues
|
|
126
|
+
// appear over the rendered video. Shows nothing until a TextTrack
|
|
127
|
+
// gets attached via attachSubtitleTracks.
|
|
128
|
+
const overlayParent = parent instanceof HTMLElement ? parent : document.body;
|
|
129
|
+
this.subtitleOverlay = new SubtitleOverlay(overlayParent);
|
|
130
|
+
// Watch for <track> children on the target <video>. When one is
|
|
131
|
+
// added, grab its TextTrack and poll cues from it each tick.
|
|
132
|
+
this.watchTextTracks(target);
|
|
133
|
+
|
|
114
134
|
const ctx = this.canvas.getContext("2d");
|
|
115
135
|
if (!ctx) throw new Error("video renderer: failed to acquire 2D context");
|
|
116
136
|
this.ctx = ctx;
|
|
@@ -156,10 +176,95 @@ export class VideoRenderer {
|
|
|
156
176
|
}
|
|
157
177
|
}
|
|
158
178
|
|
|
179
|
+
/**
|
|
180
|
+
* Watch the target <video>'s textTracks list. When a track is added,
|
|
181
|
+
* grab it and start polling cues on each render tick. Existing tracks
|
|
182
|
+
* (if any) are picked up immediately.
|
|
183
|
+
*/
|
|
184
|
+
private watchTextTracks(target: HTMLVideoElement): void {
|
|
185
|
+
const pick = () => {
|
|
186
|
+
if (this.subtitleTrack) return;
|
|
187
|
+
const tracks = target.textTracks;
|
|
188
|
+
if (isDebug()) {
|
|
189
|
+
// eslint-disable-next-line no-console
|
|
190
|
+
console.log(`[avbridge:subs] watchTextTracks pick() — ${tracks.length} tracks`);
|
|
191
|
+
}
|
|
192
|
+
for (let i = 0; i < tracks.length; i++) {
|
|
193
|
+
const t = tracks[i];
|
|
194
|
+
if (isDebug()) {
|
|
195
|
+
// eslint-disable-next-line no-console
|
|
196
|
+
console.log(`[avbridge:subs] track ${i}: kind=${t.kind} mode=${t.mode} cues=${t.cues?.length ?? 0}`);
|
|
197
|
+
}
|
|
198
|
+
if (t.kind === "subtitles" || t.kind === "captions") {
|
|
199
|
+
this.subtitleTrack = t;
|
|
200
|
+
t.mode = "hidden"; // hidden means "cues available via API, don't render"
|
|
201
|
+
if (isDebug()) {
|
|
202
|
+
// eslint-disable-next-line no-console
|
|
203
|
+
console.log(`[avbridge:subs] picked track, mode=hidden`);
|
|
204
|
+
}
|
|
205
|
+
// Listen for cue load completion
|
|
206
|
+
const trackEl = target.querySelector(`track[srclang="${t.language}"]`) as HTMLTrackElement | null;
|
|
207
|
+
if (trackEl) {
|
|
208
|
+
trackEl.addEventListener("load", () => {
|
|
209
|
+
if (isDebug()) {
|
|
210
|
+
// eslint-disable-next-line no-console
|
|
211
|
+
console.log(`[avbridge:subs] track element loaded, cues=${t.cues?.length ?? 0}`);
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
trackEl.addEventListener("error", (ev) => {
|
|
215
|
+
// eslint-disable-next-line no-console
|
|
216
|
+
console.warn(`[avbridge:subs] track element error:`, ev);
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
};
|
|
223
|
+
pick();
|
|
224
|
+
if (typeof target.textTracks.addEventListener === "function") {
|
|
225
|
+
target.textTracks.addEventListener("addtrack", (e) => {
|
|
226
|
+
if (isDebug()) {
|
|
227
|
+
// eslint-disable-next-line no-console
|
|
228
|
+
console.log("[avbridge:subs] addtrack event fired");
|
|
229
|
+
}
|
|
230
|
+
void e;
|
|
231
|
+
pick();
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
private _loggedCues = false;
|
|
237
|
+
|
|
238
|
+
/** Find the active cue (if any) for the given media time. */
|
|
239
|
+
private updateSubtitles(): void {
|
|
240
|
+
if (!this.subtitleOverlay || !this.subtitleTrack) return;
|
|
241
|
+
const cues = this.subtitleTrack.cues;
|
|
242
|
+
if (!cues || cues.length === 0) return;
|
|
243
|
+
if (isDebug() && !this._loggedCues) {
|
|
244
|
+
this._loggedCues = true;
|
|
245
|
+
// eslint-disable-next-line no-console
|
|
246
|
+
console.log(`[avbridge:subs] cues available: ${cues.length}, first start=${cues[0].startTime}, last end=${cues[cues.length-1].endTime}`);
|
|
247
|
+
}
|
|
248
|
+
const t = this.clock.now();
|
|
249
|
+
let activeText = "";
|
|
250
|
+
for (let i = 0; i < cues.length; i++) {
|
|
251
|
+
const c = cues[i];
|
|
252
|
+
if (t >= c.startTime && t <= c.endTime) {
|
|
253
|
+
const vttCue = c as VTTCue & { text?: string };
|
|
254
|
+
activeText = vttCue.text ?? "";
|
|
255
|
+
break;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
// Strip VTT tags for plain rendering (e.g. <c.en> voice tags)
|
|
259
|
+
this.subtitleOverlay.setText(activeText.replace(/<[^>]+>/g, ""));
|
|
260
|
+
}
|
|
261
|
+
|
|
159
262
|
private tick(): void {
|
|
160
263
|
if (this.destroyed) return;
|
|
161
264
|
this.rafHandle = requestAnimationFrame(this.tick);
|
|
162
265
|
|
|
266
|
+
this.updateSubtitles();
|
|
267
|
+
|
|
163
268
|
if (this.queue.length === 0) return;
|
|
164
269
|
|
|
165
270
|
const playing = this.clock.isPlaying();
|
|
@@ -331,6 +436,8 @@ export class VideoRenderer {
|
|
|
331
436
|
this.destroyed = true;
|
|
332
437
|
if (this.rafHandle != null) cancelAnimationFrame(this.rafHandle);
|
|
333
438
|
this.flush();
|
|
439
|
+
if (this.subtitleOverlay) { this.subtitleOverlay.destroy(); this.subtitleOverlay = null; }
|
|
440
|
+
this.subtitleTrack = null;
|
|
334
441
|
this.canvas.remove();
|
|
335
442
|
this.target.style.visibility = "";
|
|
336
443
|
}
|
|
@@ -20,10 +20,17 @@ import { AudioOutput } from "../fallback/audio-output.js";
|
|
|
20
20
|
import type { MediaContext } from "../../types.js";
|
|
21
21
|
import { dbg } from "../../util/debug.js";
|
|
22
22
|
import { pickLibavVariant } from "../fallback/variant-routing.js";
|
|
23
|
+
import {
|
|
24
|
+
sanitizePacketTimestamp,
|
|
25
|
+
sanitizeFrameTimestamp,
|
|
26
|
+
libavFrameToInterleavedFloat32,
|
|
27
|
+
} from "../../util/libav-demux.js";
|
|
23
28
|
|
|
24
29
|
export interface HybridDecoderHandles {
|
|
25
30
|
destroy(): Promise<void>;
|
|
26
31
|
seek(timeSec: number): Promise<void>;
|
|
32
|
+
/** Swap the active audio track — rebuilds the libav audio decoder + reseeks. */
|
|
33
|
+
setAudioTrack(trackId: number, timeSec: number): Promise<void>;
|
|
27
34
|
stats(): Record<string, unknown>;
|
|
28
35
|
onFatalError(handler: (reason: string) => void): void;
|
|
29
36
|
}
|
|
@@ -52,7 +59,14 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
|
|
|
52
59
|
const readPkt = await libav.av_packet_alloc();
|
|
53
60
|
const [fmt_ctx, streams] = await libav.ff_init_demuxer_file(opts.filename);
|
|
54
61
|
const videoStream = streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_VIDEO) ?? null;
|
|
55
|
-
|
|
62
|
+
// Audio stream is mutable (setAudioTrack swaps it). Prefer the id the
|
|
63
|
+
// probe layer listed first so both entry points agree.
|
|
64
|
+
const firstAudioTrackId = opts.context.audioTracks[0]?.id;
|
|
65
|
+
let audioStream: LibavStream | null =
|
|
66
|
+
(firstAudioTrackId != null
|
|
67
|
+
? streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === firstAudioTrackId)
|
|
68
|
+
: undefined) ??
|
|
69
|
+
streams.find((s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO) ?? null;
|
|
56
70
|
|
|
57
71
|
if (!videoStream && !audioStream) {
|
|
58
72
|
throw new Error("hybrid decoder: file has no decodable streams");
|
|
@@ -388,6 +402,79 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
|
|
|
388
402
|
try { await inputHandle.detach(); } catch { /* ignore */ }
|
|
389
403
|
},
|
|
390
404
|
|
|
405
|
+
async setAudioTrack(trackId, timeSec) {
|
|
406
|
+
if (audioStream && audioStream.index === trackId) return;
|
|
407
|
+
const newStream = streams.find(
|
|
408
|
+
(s) => s.codec_type === libav.AVMEDIA_TYPE_AUDIO && s.index === trackId,
|
|
409
|
+
);
|
|
410
|
+
if (!newStream) {
|
|
411
|
+
console.warn("[avbridge] hybrid: setAudioTrack — no stream with id", trackId);
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const newToken = ++pumpToken;
|
|
416
|
+
if (pumpRunning) {
|
|
417
|
+
try { await pumpRunning; } catch { /* ignore */ }
|
|
418
|
+
}
|
|
419
|
+
if (destroyed) return;
|
|
420
|
+
|
|
421
|
+
// Tear down old audio decoder, build new one.
|
|
422
|
+
if (audioDec) {
|
|
423
|
+
try { await libav.ff_free_decoder?.(audioDec.c, audioDec.pkt, audioDec.frame); } catch { /* ignore */ }
|
|
424
|
+
audioDec = null;
|
|
425
|
+
}
|
|
426
|
+
try {
|
|
427
|
+
const [, c, pkt, frame] = await libav.ff_init_decoder(newStream.codec_id, {
|
|
428
|
+
codecpar: newStream.codecpar,
|
|
429
|
+
});
|
|
430
|
+
audioDec = { c, pkt, frame };
|
|
431
|
+
audioTimeBase = newStream.time_base_num && newStream.time_base_den
|
|
432
|
+
? [newStream.time_base_num, newStream.time_base_den]
|
|
433
|
+
: undefined;
|
|
434
|
+
} catch (err) {
|
|
435
|
+
console.warn(
|
|
436
|
+
"[avbridge] hybrid: setAudioTrack init failed — switching to no-audio:",
|
|
437
|
+
(err as Error).message,
|
|
438
|
+
);
|
|
439
|
+
audioDec = null;
|
|
440
|
+
opts.audio.setNoAudio();
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
audioStream = newStream;
|
|
444
|
+
|
|
445
|
+
// Re-seek demuxer to current time for the new track.
|
|
446
|
+
try {
|
|
447
|
+
const tsUs = Math.floor(timeSec * 1_000_000);
|
|
448
|
+
const [tsLo, tsHi] = libav.f64toi64
|
|
449
|
+
? libav.f64toi64(tsUs)
|
|
450
|
+
: [tsUs | 0, Math.floor(tsUs / 0x100000000)];
|
|
451
|
+
await libav.av_seek_frame(
|
|
452
|
+
fmt_ctx,
|
|
453
|
+
-1,
|
|
454
|
+
tsLo,
|
|
455
|
+
tsHi,
|
|
456
|
+
libav.AVSEEK_FLAG_BACKWARD ?? 0,
|
|
457
|
+
);
|
|
458
|
+
} catch (err) {
|
|
459
|
+
console.warn("[avbridge] hybrid: setAudioTrack seek failed:", err);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Flush video decoder too — demuxer moved back to a keyframe.
|
|
463
|
+
try {
|
|
464
|
+
if (videoDecoder && videoDecoder.state === "configured") {
|
|
465
|
+
await videoDecoder.flush();
|
|
466
|
+
}
|
|
467
|
+
} catch { /* ignore */ }
|
|
468
|
+
await flushBSF();
|
|
469
|
+
|
|
470
|
+
syntheticVideoUs = Math.round(timeSec * 1_000_000);
|
|
471
|
+
syntheticAudioUs = Math.round(timeSec * 1_000_000);
|
|
472
|
+
|
|
473
|
+
pumpRunning = pumpLoop(newToken).catch((err) =>
|
|
474
|
+
console.error("[avbridge] hybrid pump failed (post-setAudioTrack):", err),
|
|
475
|
+
);
|
|
476
|
+
},
|
|
477
|
+
|
|
391
478
|
async seek(timeSec) {
|
|
392
479
|
const newToken = ++pumpToken;
|
|
393
480
|
if (pumpRunning) {
|
|
@@ -459,185 +546,6 @@ export async function startHybridDecoder(opts: StartHybridDecoderOptions): Promi
|
|
|
459
546
|
// time_base to avoid overflow.
|
|
460
547
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
461
548
|
|
|
462
|
-
function sanitizePacketTimestamp(
|
|
463
|
-
pkt: LibavPacket,
|
|
464
|
-
nextUs: () => number,
|
|
465
|
-
fallbackTimeBase?: [number, number],
|
|
466
|
-
): void {
|
|
467
|
-
const lo = pkt.pts ?? 0;
|
|
468
|
-
const hi = pkt.ptshi ?? 0;
|
|
469
|
-
const isInvalid = (hi === -2147483648 && lo === 0) || !Number.isFinite(lo);
|
|
470
|
-
if (isInvalid) {
|
|
471
|
-
const us = nextUs();
|
|
472
|
-
pkt.pts = us;
|
|
473
|
-
pkt.ptshi = 0;
|
|
474
|
-
pkt.time_base_num = 1;
|
|
475
|
-
pkt.time_base_den = 1_000_000;
|
|
476
|
-
return;
|
|
477
|
-
}
|
|
478
|
-
const tb = fallbackTimeBase ?? [1, 1_000_000];
|
|
479
|
-
const pts64 = hi * 0x100000000 + lo;
|
|
480
|
-
const us = Math.round((pts64 * 1_000_000 * tb[0]) / tb[1]);
|
|
481
|
-
if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
|
|
482
|
-
pkt.pts = us;
|
|
483
|
-
pkt.ptshi = us < 0 ? -1 : 0;
|
|
484
|
-
pkt.time_base_num = 1;
|
|
485
|
-
pkt.time_base_den = 1_000_000;
|
|
486
|
-
return;
|
|
487
|
-
}
|
|
488
|
-
const fallback = nextUs();
|
|
489
|
-
pkt.pts = fallback;
|
|
490
|
-
pkt.ptshi = 0;
|
|
491
|
-
pkt.time_base_num = 1;
|
|
492
|
-
pkt.time_base_den = 1_000_000;
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
// Frame timestamp sanitizer (same as fallback/decoder.ts, for audio frames)
|
|
496
|
-
function sanitizeFrameTimestamp(
|
|
497
|
-
frame: LibavFrame,
|
|
498
|
-
nextUs: () => number,
|
|
499
|
-
fallbackTimeBase?: [number, number],
|
|
500
|
-
): void {
|
|
501
|
-
const lo = frame.pts ?? 0;
|
|
502
|
-
const hi = frame.ptshi ?? 0;
|
|
503
|
-
const isInvalid = (hi === -2147483648 && lo === 0) || !Number.isFinite(lo);
|
|
504
|
-
if (isInvalid) {
|
|
505
|
-
const us = nextUs();
|
|
506
|
-
frame.pts = us;
|
|
507
|
-
frame.ptshi = 0;
|
|
508
|
-
return;
|
|
509
|
-
}
|
|
510
|
-
const tb = fallbackTimeBase ?? [1, 1_000_000];
|
|
511
|
-
const pts64 = hi * 0x100000000 + lo;
|
|
512
|
-
const us = Math.round((pts64 * 1_000_000 * tb[0]) / tb[1]);
|
|
513
|
-
if (Number.isFinite(us) && Math.abs(us) <= Number.MAX_SAFE_INTEGER) {
|
|
514
|
-
frame.pts = us;
|
|
515
|
-
frame.ptshi = us < 0 ? -1 : 0;
|
|
516
|
-
return;
|
|
517
|
-
}
|
|
518
|
-
const fallback = nextUs();
|
|
519
|
-
frame.pts = fallback;
|
|
520
|
-
frame.ptshi = 0;
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
524
|
-
// Audio frame → interleaved Float32 (duplicated from fallback/decoder.ts)
|
|
525
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
526
|
-
|
|
527
|
-
const AV_SAMPLE_FMT_U8 = 0;
|
|
528
|
-
const AV_SAMPLE_FMT_S16 = 1;
|
|
529
|
-
const AV_SAMPLE_FMT_S32 = 2;
|
|
530
|
-
const AV_SAMPLE_FMT_FLT = 3;
|
|
531
|
-
const AV_SAMPLE_FMT_U8P = 5;
|
|
532
|
-
const AV_SAMPLE_FMT_S16P = 6;
|
|
533
|
-
const AV_SAMPLE_FMT_S32P = 7;
|
|
534
|
-
const AV_SAMPLE_FMT_FLTP = 8;
|
|
535
|
-
|
|
536
|
-
interface InterleavedSamples {
|
|
537
|
-
data: Float32Array;
|
|
538
|
-
channels: number;
|
|
539
|
-
sampleRate: number;
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
function libavFrameToInterleavedFloat32(frame: LibavFrame): InterleavedSamples | null {
|
|
543
|
-
const channels = frame.channels ?? frame.ch_layout_nb_channels ?? 1;
|
|
544
|
-
const sampleRate = frame.sample_rate ?? 44100;
|
|
545
|
-
const nbSamples = frame.nb_samples ?? 0;
|
|
546
|
-
if (nbSamples === 0) return null;
|
|
547
|
-
|
|
548
|
-
const out = new Float32Array(nbSamples * channels);
|
|
549
|
-
|
|
550
|
-
switch (frame.format) {
|
|
551
|
-
case AV_SAMPLE_FMT_FLTP: {
|
|
552
|
-
const planes = ensurePlanes(frame.data, channels);
|
|
553
|
-
for (let ch = 0; ch < channels; ch++) {
|
|
554
|
-
const plane = asFloat32(planes[ch]);
|
|
555
|
-
for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i];
|
|
556
|
-
}
|
|
557
|
-
return { data: out, channels, sampleRate };
|
|
558
|
-
}
|
|
559
|
-
case AV_SAMPLE_FMT_FLT: {
|
|
560
|
-
const flat = asFloat32(frame.data);
|
|
561
|
-
for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i];
|
|
562
|
-
return { data: out, channels, sampleRate };
|
|
563
|
-
}
|
|
564
|
-
case AV_SAMPLE_FMT_S16P: {
|
|
565
|
-
const planes = ensurePlanes(frame.data, channels);
|
|
566
|
-
for (let ch = 0; ch < channels; ch++) {
|
|
567
|
-
const plane = asInt16(planes[ch]);
|
|
568
|
-
for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 32768;
|
|
569
|
-
}
|
|
570
|
-
return { data: out, channels, sampleRate };
|
|
571
|
-
}
|
|
572
|
-
case AV_SAMPLE_FMT_S16: {
|
|
573
|
-
const flat = asInt16(frame.data);
|
|
574
|
-
for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 32768;
|
|
575
|
-
return { data: out, channels, sampleRate };
|
|
576
|
-
}
|
|
577
|
-
case AV_SAMPLE_FMT_S32P: {
|
|
578
|
-
const planes = ensurePlanes(frame.data, channels);
|
|
579
|
-
for (let ch = 0; ch < channels; ch++) {
|
|
580
|
-
const plane = asInt32(planes[ch]);
|
|
581
|
-
for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = plane[i] / 2147483648;
|
|
582
|
-
}
|
|
583
|
-
return { data: out, channels, sampleRate };
|
|
584
|
-
}
|
|
585
|
-
case AV_SAMPLE_FMT_S32: {
|
|
586
|
-
const flat = asInt32(frame.data);
|
|
587
|
-
for (let i = 0; i < nbSamples * channels; i++) out[i] = flat[i] / 2147483648;
|
|
588
|
-
return { data: out, channels, sampleRate };
|
|
589
|
-
}
|
|
590
|
-
case AV_SAMPLE_FMT_U8P: {
|
|
591
|
-
const planes = ensurePlanes(frame.data, channels);
|
|
592
|
-
for (let ch = 0; ch < channels; ch++) {
|
|
593
|
-
const plane = asUint8(planes[ch]);
|
|
594
|
-
for (let i = 0; i < nbSamples; i++) out[i * channels + ch] = (plane[i] - 128) / 128;
|
|
595
|
-
}
|
|
596
|
-
return { data: out, channels, sampleRate };
|
|
597
|
-
}
|
|
598
|
-
case AV_SAMPLE_FMT_U8: {
|
|
599
|
-
const flat = asUint8(frame.data);
|
|
600
|
-
for (let i = 0; i < nbSamples * channels; i++) out[i] = (flat[i] - 128) / 128;
|
|
601
|
-
return { data: out, channels, sampleRate };
|
|
602
|
-
}
|
|
603
|
-
default:
|
|
604
|
-
return null;
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
function ensurePlanes(data: unknown, channels: number): unknown[] {
|
|
609
|
-
if (Array.isArray(data)) return data;
|
|
610
|
-
const arr = data as { length: number; subarray?: (a: number, b: number) => unknown };
|
|
611
|
-
const len = arr.length;
|
|
612
|
-
const perChannel = Math.floor(len / channels);
|
|
613
|
-
const planes: unknown[] = [];
|
|
614
|
-
for (let ch = 0; ch < channels; ch++) {
|
|
615
|
-
planes.push(arr.subarray ? arr.subarray(ch * perChannel, (ch + 1) * perChannel) : arr);
|
|
616
|
-
}
|
|
617
|
-
return planes;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
function asFloat32(x: unknown): Float32Array {
|
|
621
|
-
if (x instanceof Float32Array) return x;
|
|
622
|
-
const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
|
623
|
-
return new Float32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
|
|
624
|
-
}
|
|
625
|
-
function asInt16(x: unknown): Int16Array {
|
|
626
|
-
if (x instanceof Int16Array) return x;
|
|
627
|
-
const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
|
628
|
-
return new Int16Array(ta.buffer, ta.byteOffset, ta.byteLength / 2);
|
|
629
|
-
}
|
|
630
|
-
function asInt32(x: unknown): Int32Array {
|
|
631
|
-
if (x instanceof Int32Array) return x;
|
|
632
|
-
const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
|
633
|
-
return new Int32Array(ta.buffer, ta.byteOffset, ta.byteLength / 4);
|
|
634
|
-
}
|
|
635
|
-
function asUint8(x: unknown): Uint8Array {
|
|
636
|
-
if (x instanceof Uint8Array) return x;
|
|
637
|
-
const ta = x as { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
|
638
|
-
return new Uint8Array(ta.buffer, ta.byteOffset, ta.byteLength);
|
|
639
|
-
}
|
|
640
|
-
|
|
641
549
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
642
550
|
// Bridge loader
|
|
643
551
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -137,8 +137,23 @@ export async function createHybridSession(
|
|
|
137
137
|
await doSeek(time);
|
|
138
138
|
},
|
|
139
139
|
|
|
140
|
-
async setAudioTrack(
|
|
141
|
-
|
|
140
|
+
async setAudioTrack(id) {
|
|
141
|
+
if (!ctx.audioTracks.some((t) => t.id === id)) {
|
|
142
|
+
console.warn("[avbridge] hybrid: setAudioTrack — unknown track id", id);
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
const wasPlaying = audio.isPlaying();
|
|
146
|
+
const currentTime = audio.now();
|
|
147
|
+
await audio.pause().catch(() => {});
|
|
148
|
+
await handles.setAudioTrack(id, currentTime).catch((err) =>
|
|
149
|
+
console.warn("[avbridge] hybrid: handles.setAudioTrack failed:", err),
|
|
150
|
+
);
|
|
151
|
+
await audio.reset(currentTime);
|
|
152
|
+
renderer.flush();
|
|
153
|
+
if (wasPlaying) {
|
|
154
|
+
await waitForBuffer();
|
|
155
|
+
await audio.start();
|
|
156
|
+
}
|
|
142
157
|
},
|
|
143
158
|
|
|
144
159
|
async setSubtitleTrack(_id) {
|
package/src/strategies/native.ts
CHANGED
|
@@ -49,10 +49,13 @@ export async function createNativeSession(
|
|
|
49
49
|
video.currentTime = time;
|
|
50
50
|
},
|
|
51
51
|
async setAudioTrack(id) {
|
|
52
|
-
// HTMLMediaElement.audioTracks
|
|
53
|
-
//
|
|
52
|
+
// HTMLMediaElement.audioTracks isn't exposed on all browsers (Chrome
|
|
53
|
+
// needs the MediaCapabilities flag for many containers). Best-effort:
|
|
54
|
+
// try by string id match first, then by index. If the list doesn't
|
|
55
|
+
// exist, silently no-op — the user will still hear whatever track the
|
|
56
|
+
// browser picked by default.
|
|
54
57
|
const tracks = (video as unknown as { audioTracks?: { length: number; [i: number]: { id: string; enabled: boolean } } }).audioTracks;
|
|
55
|
-
if (!tracks) return;
|
|
58
|
+
if (!tracks || tracks.length === 0) return;
|
|
56
59
|
for (let i = 0; i < tracks.length; i++) {
|
|
57
60
|
tracks[i].enabled = tracks[i].id === String(id) || i === id;
|
|
58
61
|
}
|
|
@@ -66,8 +66,20 @@ export async function createRemuxSession(
|
|
|
66
66
|
const wasPlaying = !video.paused;
|
|
67
67
|
await pipeline.seek(time, wasPlaying || wantPlay);
|
|
68
68
|
},
|
|
69
|
-
async setAudioTrack(
|
|
70
|
-
|
|
69
|
+
async setAudioTrack(id) {
|
|
70
|
+
if (!context.audioTracks.some((t) => t.id === id)) {
|
|
71
|
+
console.warn("[avbridge] remux: setAudioTrack — unknown track id", id);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
const wasPlaying = !video.paused;
|
|
75
|
+
const time = video.currentTime || 0;
|
|
76
|
+
// Not yet started? Just note the selection and let play()/seek() drive.
|
|
77
|
+
if (!started) {
|
|
78
|
+
started = true;
|
|
79
|
+
await pipeline.setAudioTrack(id, time, wantPlay || wasPlaying);
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
await pipeline.setAudioTrack(id, time, wasPlaying || wantPlay);
|
|
71
83
|
},
|
|
72
84
|
async setSubtitleTrack(id) {
|
|
73
85
|
const tracks = video.textTracks;
|
|
@@ -26,6 +26,11 @@ export interface RemuxPipeline {
|
|
|
26
26
|
seek(time: number, autoPlay?: boolean): Promise<void>;
|
|
27
27
|
/** Update the autoplay intent mid-flight — used when play() arrives after seek() but before the MseSink has been constructed. */
|
|
28
28
|
setAutoPlay(autoPlay: boolean): void;
|
|
29
|
+
/**
|
|
30
|
+
* Switch the active audio track. Tears down the current Output, rebuilds
|
|
31
|
+
* with the new audio source, and resumes pumping at the given time.
|
|
32
|
+
*/
|
|
33
|
+
setAudioTrack(trackId: number, timeSec: number, autoPlay: boolean): Promise<void>;
|
|
29
34
|
destroy(): Promise<void>;
|
|
30
35
|
stats(): Record<string, unknown>;
|
|
31
36
|
}
|
|
@@ -37,7 +42,6 @@ export async function createRemuxPipeline(
|
|
|
37
42
|
const mb = await import("mediabunny");
|
|
38
43
|
|
|
39
44
|
const videoTrackInfo = ctx.videoTracks[0];
|
|
40
|
-
const audioTrackInfo = ctx.audioTracks[0];
|
|
41
45
|
if (!videoTrackInfo) throw new Error("remux: source has no video track");
|
|
42
46
|
|
|
43
47
|
// Map avbridge codec names back to mediabunny's enum strings.
|
|
@@ -45,7 +49,6 @@ export async function createRemuxPipeline(
|
|
|
45
49
|
if (!mbVideoCodec) {
|
|
46
50
|
throw new Error(`remux: video codec "${videoTrackInfo.codec}" is not supported by mediabunny output`);
|
|
47
51
|
}
|
|
48
|
-
const mbAudioCodec = audioTrackInfo ? avbridgeAudioToMediabunny(audioTrackInfo.codec) : null;
|
|
49
52
|
|
|
50
53
|
// Open the input. URL sources go through mediabunny's UrlSource so the
|
|
51
54
|
// muxer streams via Range requests instead of buffering the whole file.
|
|
@@ -55,23 +58,52 @@ export async function createRemuxPipeline(
|
|
|
55
58
|
});
|
|
56
59
|
const allTracks = await input.getTracks();
|
|
57
60
|
const inputVideo = allTracks.find((t) => t.id === videoTrackInfo.id && t.isVideoTrack());
|
|
58
|
-
const inputAudio = audioTrackInfo
|
|
59
|
-
? allTracks.find((t) => t.id === audioTrackInfo.id && t.isAudioTrack())
|
|
60
|
-
: null;
|
|
61
61
|
if (!inputVideo || !inputVideo.isVideoTrack()) {
|
|
62
62
|
throw new Error("remux: video track not found in input");
|
|
63
63
|
}
|
|
64
|
-
if (audioTrackInfo && (!inputAudio || !inputAudio.isAudioTrack())) {
|
|
65
|
-
throw new Error("remux: audio track not found in input");
|
|
66
|
-
}
|
|
67
64
|
|
|
68
|
-
// Pull WebCodecs decoder
|
|
65
|
+
// Pull the video WebCodecs decoder config once — used as `meta` on the
|
|
66
|
+
// first packet after every Output rebuild.
|
|
69
67
|
const videoConfig = await inputVideo.getDecoderConfig();
|
|
70
|
-
const audioConfig = inputAudio && inputAudio.isAudioTrack() ? await inputAudio.getDecoderConfig() : null;
|
|
71
68
|
|
|
72
|
-
// Packet
|
|
69
|
+
// Packet sink for video — reused across seeks.
|
|
73
70
|
const videoSink = new mb.EncodedPacketSink(inputVideo);
|
|
74
|
-
|
|
71
|
+
|
|
72
|
+
// Audio selection is mutable: setAudioTrack() can swap it. The selected
|
|
73
|
+
// audio derived state (input track, codec, sink, config) is rebuilt via
|
|
74
|
+
// rebuildAudio() whenever the id changes.
|
|
75
|
+
type InputAudioTrack = InstanceType<typeof mb.InputAudioTrack>;
|
|
76
|
+
type AudioDecCfg = Awaited<ReturnType<InputAudioTrack["getDecoderConfig"]>>;
|
|
77
|
+
|
|
78
|
+
let selectedAudioTrackId: number | null = ctx.audioTracks[0]?.id ?? null;
|
|
79
|
+
let inputAudio: InputAudioTrack | null = null;
|
|
80
|
+
let mbAudioCodec: ReturnType<typeof avbridgeAudioToMediabunny> | null = null;
|
|
81
|
+
let audioSink: InstanceType<typeof mb.EncodedPacketSink> | null = null;
|
|
82
|
+
let audioConfig: AudioDecCfg | null = null;
|
|
83
|
+
|
|
84
|
+
async function rebuildAudio(): Promise<void> {
|
|
85
|
+
if (selectedAudioTrackId == null) {
|
|
86
|
+
inputAudio = null;
|
|
87
|
+
mbAudioCodec = null;
|
|
88
|
+
audioSink = null;
|
|
89
|
+
audioConfig = null;
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const trackInfo = ctx.audioTracks.find((t) => t.id === selectedAudioTrackId);
|
|
93
|
+
if (!trackInfo) {
|
|
94
|
+
throw new Error(`remux: no audio track with id ${selectedAudioTrackId}`);
|
|
95
|
+
}
|
|
96
|
+
const newInput = allTracks.find((t) => t.id === trackInfo.id && t.isAudioTrack());
|
|
97
|
+
if (!newInput || !newInput.isAudioTrack()) {
|
|
98
|
+
throw new Error("remux: audio track not found in input");
|
|
99
|
+
}
|
|
100
|
+
inputAudio = newInput;
|
|
101
|
+
mbAudioCodec = avbridgeAudioToMediabunny(trackInfo.codec);
|
|
102
|
+
audioSink = new mb.EncodedPacketSink(newInput);
|
|
103
|
+
audioConfig = await newInput.getDecoderConfig();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
await rebuildAudio();
|
|
75
107
|
|
|
76
108
|
// MSE sink — created lazily on first output write, reused across seeks.
|
|
77
109
|
let sink: MseSink | null = null;
|
|
@@ -254,6 +286,34 @@ export async function createRemuxPipeline(
|
|
|
254
286
|
pendingAutoPlay = autoPlay;
|
|
255
287
|
if (sink) sink.setPlayOnSeek(autoPlay);
|
|
256
288
|
},
|
|
289
|
+
async setAudioTrack(trackId, time, autoPlay) {
|
|
290
|
+
if (selectedAudioTrackId === trackId) return;
|
|
291
|
+
if (!ctx.audioTracks.some((t) => t.id === trackId)) {
|
|
292
|
+
console.warn("[avbridge] remux: setAudioTrack — unknown track id", trackId);
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
// Stop the current pump. The next pumpLoop() will build a fresh
|
|
296
|
+
// Output that uses the newly-selected audio source.
|
|
297
|
+
pumpToken++;
|
|
298
|
+
selectedAudioTrackId = trackId;
|
|
299
|
+
await rebuildAudio().catch((err) => {
|
|
300
|
+
console.warn("[avbridge] remux: rebuildAudio failed:", (err as Error).message);
|
|
301
|
+
});
|
|
302
|
+
// Tear down the existing MseSink — the audio codec may have changed,
|
|
303
|
+
// and the SourceBuffer's mime is fixed at construction time. The next
|
|
304
|
+
// createOutput will recompute `getMimeType()` and the write handler
|
|
305
|
+
// will lazily build a new sink.
|
|
306
|
+
if (sink) {
|
|
307
|
+
try { sink.destroy(); } catch { /* ignore */ }
|
|
308
|
+
sink = null;
|
|
309
|
+
}
|
|
310
|
+
pendingAutoPlay = autoPlay;
|
|
311
|
+
pendingStartTime = time;
|
|
312
|
+
pumpLoop(++pumpToken, time).catch((err) => {
|
|
313
|
+
// eslint-disable-next-line no-console
|
|
314
|
+
console.error("[avbridge] remux pipeline setAudioTrack pump failed:", err);
|
|
315
|
+
});
|
|
316
|
+
},
|
|
257
317
|
async destroy() {
|
|
258
318
|
destroyed = true;
|
|
259
319
|
pumpToken++;
|
package/src/subtitles/render.ts
CHANGED
|
@@ -32,6 +32,14 @@ export class SubtitleOverlay {
|
|
|
32
32
|
this.el.textContent = active?.text ?? "";
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
/** Set the currently-displayed text directly (bypasses loadVtt/update). */
|
|
36
|
+
setText(text: string): void {
|
|
37
|
+
// Only touch the DOM if it actually changed — rAF tick runs 60Hz.
|
|
38
|
+
if (this.el.textContent !== text) {
|
|
39
|
+
this.el.textContent = text;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
35
43
|
destroy(): void {
|
|
36
44
|
this.el.remove();
|
|
37
45
|
this.cues = [];
|