realtime-avatar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +132 -0
- package/CLAUDE.md +17 -0
- package/LICENSE +21 -0
- package/README.md +254 -0
- package/dist/api-keys.d.ts +26 -0
- package/dist/api-keys.d.ts.map +1 -0
- package/dist/api-keys.js +88 -0
- package/dist/api-keys.js.map +1 -0
- package/dist/browser/audio.d.ts +65 -0
- package/dist/browser/audio.d.ts.map +1 -0
- package/dist/browser/audio.js +154 -0
- package/dist/browser/audio.js.map +1 -0
- package/dist/browser/boomerang.d.ts +38 -0
- package/dist/browser/boomerang.d.ts.map +1 -0
- package/dist/browser/boomerang.js +85 -0
- package/dist/browser/boomerang.js.map +1 -0
- package/dist/browser/index.d.ts +8 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +8 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/media-session.d.ts +43 -0
- package/dist/browser/media-session.d.ts.map +1 -0
- package/dist/browser/media-session.js +169 -0
- package/dist/browser/media-session.js.map +1 -0
- package/dist/browser/player.d.ts +162 -0
- package/dist/browser/player.d.ts.map +1 -0
- package/dist/browser/player.js +514 -0
- package/dist/browser/player.js.map +1 -0
- package/dist/browser/view.d.ts +47 -0
- package/dist/browser/view.d.ts.map +1 -0
- package/dist/browser/view.js +7 -0
- package/dist/browser/view.js.map +1 -0
- package/dist/browser/webrtc.d.ts +21 -0
- package/dist/browser/webrtc.d.ts.map +1 -0
- package/dist/browser/webrtc.js +149 -0
- package/dist/browser/webrtc.js.map +1 -0
- package/dist/browser/yuv-canvas.d.ts +13 -0
- package/dist/browser/yuv-canvas.d.ts.map +1 -0
- package/dist/browser/yuv-canvas.js +95 -0
- package/dist/browser/yuv-canvas.js.map +1 -0
- package/dist/client.d.ts +195 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +440 -0
- package/dist/client.js.map +1 -0
- package/dist/errors.d.ts +33 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +73 -0
- package/dist/errors.js.map +1 -0
- package/dist/generated/openapi.d.ts +1523 -0
- package/dist/generated/openapi.d.ts.map +1 -0
- package/dist/generated/openapi.js +6 -0
- package/dist/generated/openapi.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/media.d.ts +40 -0
- package/dist/media.d.ts.map +1 -0
- package/dist/media.js +4 -0
- package/dist/media.js.map +1 -0
- package/dist/mux.d.ts +104 -0
- package/dist/mux.d.ts.map +1 -0
- package/dist/mux.js +290 -0
- package/dist/mux.js.map +1 -0
- package/dist/platform.d.ts +163 -0
- package/dist/platform.d.ts.map +1 -0
- package/dist/platform.js +5 -0
- package/dist/platform.js.map +1 -0
- package/dist/react/index.d.ts +5 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +5 -0
- package/dist/react/index.js.map +1 -0
- package/dist/react/provider.d.ts +37 -0
- package/dist/react/provider.d.ts.map +1 -0
- package/dist/react/provider.js +33 -0
- package/dist/react/provider.js.map +1 -0
- package/dist/react/realtime.d.ts +74 -0
- package/dist/react/realtime.d.ts.map +1 -0
- package/dist/react/realtime.js +105 -0
- package/dist/react/realtime.js.map +1 -0
- package/dist/react/session.d.ts +91 -0
- package/dist/react/session.d.ts.map +1 -0
- package/dist/react/session.js +322 -0
- package/dist/react/session.js.map +1 -0
- package/dist/react/stage.d.ts +23 -0
- package/dist/react/stage.d.ts.map +1 -0
- package/dist/react/stage.js +62 -0
- package/dist/react/stage.js.map +1 -0
- package/dist/schemas.d.ts +59 -0
- package/dist/schemas.d.ts.map +1 -0
- package/dist/schemas.js +58 -0
- package/dist/schemas.js.map +1 -0
- package/dist/server.d.ts +2 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +8 -0
- package/dist/server.js.map +1 -0
- package/dist/session-socket.d.ts +96 -0
- package/dist/session-socket.d.ts.map +1 -0
- package/dist/session-socket.js +299 -0
- package/dist/session-socket.js.map +1 -0
- package/dist/session.d.ts +107 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +192 -0
- package/dist/session.js.map +1 -0
- package/dist/types.d.ts +24 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +94 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import type { RealtimeTurnStream } from "../client";
|
|
2
|
+
import type { AvatarTurnEventSource } from "../session-socket";
|
|
3
|
+
import { type PlayoutDelay } from "./audio";
|
|
4
|
+
/** True when this browser can hardware-decode the h264 avatar stream. */
|
|
5
|
+
export declare function supportsH264Playback(): boolean;
|
|
6
|
+
export type AvatarPlayerState = "idle" | "thinking" | "speaking" | "done" | "error";
|
|
7
|
+
export type AvatarPlayerMetrics = {
|
|
8
|
+
/** Total video frames received this turn. */
|
|
9
|
+
frames: number;
|
|
10
|
+
/** Frames dropped to stay in sync with the audio clock. */
|
|
11
|
+
droppedFrames: number;
|
|
12
|
+
/** ms from turn start to the first audio chunk. */
|
|
13
|
+
firstAudioMs: number | null;
|
|
14
|
+
/** ms from turn start to the first video frame. */
|
|
15
|
+
firstVideoMs: number | null;
|
|
16
|
+
/** ms from turn start to the first frame DRAWN on the running audio clock —
|
|
17
|
+
* the user-perceived time-to-first-frame (audio is playing by then). */
|
|
18
|
+
firstFrameDrawnMs: number | null;
|
|
19
|
+
/** Currently buffered audio, in ms. */
|
|
20
|
+
audioQueueMs: number;
|
|
21
|
+
/** Accumulated audio underrun (gaps the scheduler had to paper over), in ms. */
|
|
22
|
+
audioUnderrunMs: number;
|
|
23
|
+
width: number | null;
|
|
24
|
+
height: number | null;
|
|
25
|
+
};
|
|
26
|
+
export type AvatarPlayHandlers = {
|
|
27
|
+
/** Streaming assistant text: `delta` is the new text, `full` the accumulated reply. */
|
|
28
|
+
onText?: (delta: string, full: string) => void;
|
|
29
|
+
onState?: (state: AvatarPlayerState) => void;
|
|
30
|
+
onMetrics?: (metrics: AvatarPlayerMetrics) => void;
|
|
31
|
+
};
|
|
32
|
+
/** Final source-video playback position from the server's `done` event. */
|
|
33
|
+
export type AvatarSourceVideoState = {
|
|
34
|
+
/** Ping-pong cursor; pass back as the next turn's `source_start_frame`. */
|
|
35
|
+
cursor: number;
|
|
36
|
+
/** Source frame index the render stopped on. */
|
|
37
|
+
index: number;
|
|
38
|
+
direction: "forward" | "reverse";
|
|
39
|
+
/** Source frame count in the cache. */
|
|
40
|
+
frames: number;
|
|
41
|
+
};
|
|
42
|
+
export type AvatarPlaySummary = {
|
|
43
|
+
text: string;
|
|
44
|
+
frames: number;
|
|
45
|
+
elapsedMs: number;
|
|
46
|
+
metrics: AvatarPlayerMetrics;
|
|
47
|
+
/** Present on source-video turns: where to resume the idle loop seamlessly. */
|
|
48
|
+
sourceVideo?: AvatarSourceVideoState;
|
|
49
|
+
};
|
|
50
|
+
export type AvatarPlayerOptions = {
|
|
51
|
+
sampleRate?: number;
|
|
52
|
+
/**
|
|
53
|
+
* `"adaptive"` (default) starts audio the moment the first video frame is
|
|
54
|
+
* decodable (or after a short cap for audio-only turns), minimizing
|
|
55
|
+
* perceived first-frame latency. A number reproduces the legacy fixed
|
|
56
|
+
* playout delay in ms.
|
|
57
|
+
*/
|
|
58
|
+
playoutDelayMs?: PlayoutDelay;
|
|
59
|
+
};
|
|
60
|
+
/**
|
|
61
|
+
* Renders an avatar turn stream to a canvas with audio-clocked video playback.
|
|
62
|
+
*
|
|
63
|
+
* This is the piece every integrator would otherwise hand-roll: it schedules
|
|
64
|
+
* PCM audio, decodes/queues video frames, and drives a `requestAnimationFrame`
|
|
65
|
+
* clock that draws each frame on its audio-aligned presentation time, dropping
|
|
66
|
+
* late frames so lip-sync never drifts.
|
|
67
|
+
*
|
|
68
|
+
* const player = new AvatarPlayer();
|
|
69
|
+
* player.attach(canvasEl);
|
|
70
|
+
* await player.play(await session.chat("who are you?"));
|
|
71
|
+
*/
|
|
72
|
+
export declare class AvatarPlayer {
|
|
73
|
+
private readonly sampleRate;
|
|
74
|
+
private readonly playoutDelay;
|
|
75
|
+
private canvas;
|
|
76
|
+
private scheduler;
|
|
77
|
+
private renderer;
|
|
78
|
+
private queue;
|
|
79
|
+
private pendingJpeg;
|
|
80
|
+
private decoding;
|
|
81
|
+
/** Lazily-configured WebCodecs decoder for `codec: "h264"` streams. */
|
|
82
|
+
private videoDecoder;
|
|
83
|
+
private rafHandle;
|
|
84
|
+
private state;
|
|
85
|
+
private metricsState;
|
|
86
|
+
/** Persistent, gesture-unlocked AudioContext reused across turns. */
|
|
87
|
+
private audioContext;
|
|
88
|
+
/** Lazily-created recording tap; every scheduled audio buffer also feeds it. */
|
|
89
|
+
private audioTap;
|
|
90
|
+
private playStartedAt;
|
|
91
|
+
constructor(options?: AvatarPlayerOptions);
|
|
92
|
+
/** Bind (or rebind) the canvas the player draws into. */
|
|
93
|
+
attach(canvas: HTMLCanvasElement | null): void;
|
|
94
|
+
/**
|
|
95
|
+
* Unlock audio from inside a user gesture (click/tap). Browsers start an
|
|
96
|
+
* AudioContext suspended and only let it resume during a gesture; turns that
|
|
97
|
+
* fire later (e.g. a livestream auto-reaction from a timer/effect) would
|
|
98
|
+
* otherwise play silently. Call this from the first user interaction. Safe to
|
|
99
|
+
* call repeatedly. Resolves once the context is running (or no-ops off-DOM).
|
|
100
|
+
*/
|
|
101
|
+
unlock(): Promise<void>;
|
|
102
|
+
get metrics(): AvatarPlayerMetrics;
|
|
103
|
+
/**
|
|
104
|
+
* A live `MediaStream` carrying everything the player schedules to the
|
|
105
|
+
* speakers — pair it with `canvas.captureStream()` + `MediaRecorder` to
|
|
106
|
+
* record a turn exactly as it played. The tap is additive (speaker output is
|
|
107
|
+
* unchanged) and persists across turns. Returns `null` until the player has
|
|
108
|
+
* an AudioContext — call `unlock()` (any user gesture) first.
|
|
109
|
+
*/
|
|
110
|
+
captureAudioStream(): MediaStream | null;
|
|
111
|
+
/**
|
|
112
|
+
* Play a turn to completion. Accepts anything that exposes turn events —
|
|
113
|
+
* an HTTP `RealtimeTurnStream` or a `RealtimeSessionSocket.turn()` source.
|
|
114
|
+
* Resolves with a summary when the stream ends. Pass the same `signal` used
|
|
115
|
+
* to create the stream so `stop()` and the network abort together.
|
|
116
|
+
*/
|
|
117
|
+
play(stream: RealtimeTurnStream | AvatarTurnEventSource, handlers?: AvatarPlayHandlers & {
|
|
118
|
+
signal?: AbortSignal;
|
|
119
|
+
}): Promise<AvatarPlaySummary>;
|
|
120
|
+
/**
|
|
121
|
+
* After the stream ends, keep the audio clock + render loop running until the
|
|
122
|
+
* buffered audio has fully played out and the video queue has drained, so the
|
|
123
|
+
* tail of the turn actually renders instead of freezing on the last frame.
|
|
124
|
+
*/
|
|
125
|
+
private drainPlayback;
|
|
126
|
+
/** Stop playback: cancel the clock, close audio, clear the queue and canvas. */
|
|
127
|
+
stop(): void;
|
|
128
|
+
/** Fully release resources, including the persistent AudioContext. Call on
|
|
129
|
+
* unmount; after this, unlock() must be called again before the next turn. */
|
|
130
|
+
dispose(): void;
|
|
131
|
+
private handleEvent;
|
|
132
|
+
private queueVideo;
|
|
133
|
+
/**
|
|
134
|
+
* Decodes the pending-JPEG queue into bitmaps with bounded concurrency, off
|
|
135
|
+
* the stream loop. Concurrency matters: serial `await createImageBitmap` for
|
|
136
|
+
* tall (e.g. 832px) frames can exceed the per-frame realtime budget, so decode
|
|
137
|
+
* falls permanently behind the audio clock and the render queue starves —
|
|
138
|
+
* which is exactly the "audio plays but video freezes" stall. Decoding a few
|
|
139
|
+
* frames in parallel keeps throughput ahead of realtime.
|
|
140
|
+
*
|
|
141
|
+
* Frames are inserted into the render queue in pts order regardless of which
|
|
142
|
+
* decode finishes first, and we never drop here — the audio-clocked render
|
|
143
|
+
* loop is the single place that drops late frames.
|
|
144
|
+
*/
|
|
145
|
+
private pumpJpegDecode;
|
|
146
|
+
/**
|
|
147
|
+
* Feed h264 access units to a WebCodecs decoder. Each server chunk is a
|
|
148
|
+
* self-contained Annex-B stream (AUD + SPS/PPS + IDR + P-frames), so any
|
|
149
|
+
* chunk can start decode and dropped chunks never corrupt later ones.
|
|
150
|
+
* Decoded VideoFrames land in the same pts-ordered queue the render clock
|
|
151
|
+
* already drains; hardware decode happens off the event loop.
|
|
152
|
+
*/
|
|
153
|
+
private queueH264;
|
|
154
|
+
/** Insert a decoded frame into the render queue keeping it sorted by pts.
|
|
155
|
+
* A frame in the queue means there is something to lip-sync against, so
|
|
156
|
+
* this is also where adaptive playout releases held audio. */
|
|
157
|
+
private insertOrdered;
|
|
158
|
+
private startVideoClock;
|
|
159
|
+
private draw;
|
|
160
|
+
private setState;
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=player.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"player.d.ts","sourceRoot":"","sources":["../../src/browser/player.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AACpD,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,SAAS,CAAC;AAcjE,yEAAyE;AACzE,wBAAgB,oBAAoB,IAAI,OAAO,CAE9C;AAED,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,UAAU,GAAG,UAAU,GAAG,MAAM,GAAG,OAAO,CAAC;AAEpF,MAAM,MAAM,mBAAmB,GAAG;IAChC,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,2DAA2D;IAC3D,aAAa,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,mDAAmD;IACnD,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B;6EACyE;IACzE,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,uCAAuC;IACvC,YAAY,EAAE,MAAM,CAAC;IACrB,gFAAgF;IAChF,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,uFAAuF;IACvF,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IAC/C,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,iBAAiB,KAAK,IAAI,CAAC;IAC7C,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,mBAAmB,KAAK,IAAI,CAAC;CACpD,CAAC;AAEF,2EAA2E;AAC3E,MAAM,MAAM,sBAAsB,GAAG;IACnC,2EAA2E;IAC3E,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,SAAS,GAAG,SAAS,CAAC;IACjC,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,mBAAmB,CAAC;IAC7B,+EAA+E;IAC/E,WAAW,CAAC,EAAE,sBAAsB,CAAC;CACtC,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,YAAY,CAAC;CAC/B,CAAC;AAEF;;;;;;;;;;;GAWG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAe;IAC5C,OAAO,CAAC,MAAM,CAAkC;IAChD,OAAO,CAAC,SAAS,CAAoC;IACrD,OAAO,CAAC,QAAQ,CAAmC;IACnD,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,WAAW,CAA0B;IAC7C,OAAO,CAAC,QAAQ,CAAS;IACzB,uEAAuE;IACvE,OAAO,CAAC,YAAY,CAA6B;IACjD,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,KAAK,CAA6B;IAC1C,OAAO,CAAC,YAAY,CAAuC;IAC3D,qEAAqE;IACrE,OAAO,CAAC,YAAY,CAA6B;IACjD,gFAAgF;IAChF,OAAO,CAAC,QAAQ,CAAgD;IAChE,OAAO,CAAC,aAAa,CAAK;gBAEd,OAAO,GAAE,mBAAwB;IAK7C,yDAAyD;IACzD,MAAM,CAAC,MAAM,EAAE,iBAAiB,GAAG,IAAI,GAAG,IAAI;IAI9C;;;;;;OAMG;IACG,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAY7B,IAAI,OAAO,IAAI,mBAAmB,CAEjC;IAED;;;;;;OAMG;IACH,kBAAkB,IAAI,WAAW,GAAG,IAAI;IAOxC;;;;;OAKG;IACG,IAAI,CACR,MAAM,EAAE,kBAAkB,GAAG,qBAAqB,EAClD,QAAQ,GAAE,kBAAkB,GAAG;QAAE,MAAM,CAAC,EAAE,WAAW,CAAA;KAAO,GAC3D,OAAO,CAAC,iBAAiB,CAAC;IA+C7B;;;;OAIG;YACW,aAAa;IAoB3B,gFAAgF;IAChF,IAAI,IAAI,IAAI;IAsBZ;mFAC+E;IAC/E,OAAO,IAAI,IAAI;YAQD,WAAW;IA4DzB,OAAO,CAAC,UAAU;IA0ClB;;;;;;;;;;;OAWG;YACW,cAAc;IA2B5B;;;;;;OAMG;IACH,OAAO,CAAC,SAAS;IA+CjB;;mEAE+D;IAC/D,OAAO,CAAC,aAAa;IAiBrB,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,IAAI;IAaZ,OAAO,CAAC,QAAQ;CAKjB"}
|
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
import { Pcm16AudioScheduler } from "./audio";
|
|
2
|
+
import { I420CanvasRenderer } from "./yuv-canvas";
|
|
3
|
+
/** True when this browser can hardware-decode the h264 avatar stream. */
|
|
4
|
+
export function supportsH264Playback() {
|
|
5
|
+
return typeof VideoDecoder === "function" && typeof EncodedVideoChunk === "function";
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Renders an avatar turn stream to a canvas with audio-clocked video playback.
|
|
9
|
+
*
|
|
10
|
+
* This is the piece every integrator would otherwise hand-roll: it schedules
|
|
11
|
+
* PCM audio, decodes/queues video frames, and drives a `requestAnimationFrame`
|
|
12
|
+
* clock that draws each frame on its audio-aligned presentation time, dropping
|
|
13
|
+
* late frames so lip-sync never drifts.
|
|
14
|
+
*
|
|
15
|
+
* const player = new AvatarPlayer();
|
|
16
|
+
* player.attach(canvasEl);
|
|
17
|
+
* await player.play(await session.chat("who are you?"));
|
|
18
|
+
*/
|
|
19
|
+
export class AvatarPlayer {
|
|
20
|
+
sampleRate;
|
|
21
|
+
playoutDelay;
|
|
22
|
+
canvas = null;
|
|
23
|
+
scheduler = null;
|
|
24
|
+
renderer = null;
|
|
25
|
+
queue = [];
|
|
26
|
+
pendingJpeg = [];
|
|
27
|
+
decoding = false;
|
|
28
|
+
/** Lazily-configured WebCodecs decoder for `codec: "h264"` streams. */
|
|
29
|
+
videoDecoder = null;
|
|
30
|
+
rafHandle = null;
|
|
31
|
+
state = "idle";
|
|
32
|
+
metricsState = emptyMetrics();
|
|
33
|
+
/** Persistent, gesture-unlocked AudioContext reused across turns. */
|
|
34
|
+
audioContext = null;
|
|
35
|
+
/** Lazily-created recording tap; every scheduled audio buffer also feeds it. */
|
|
36
|
+
audioTap = null;
|
|
37
|
+
playStartedAt = 0;
|
|
38
|
+
constructor(options = {}) {
|
|
39
|
+
this.sampleRate = options.sampleRate ?? 16_000;
|
|
40
|
+
this.playoutDelay = options.playoutDelayMs ?? "adaptive";
|
|
41
|
+
}
|
|
42
|
+
/** Bind (or rebind) the canvas the player draws into. */
|
|
43
|
+
attach(canvas) {
|
|
44
|
+
this.canvas = canvas;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Unlock audio from inside a user gesture (click/tap). Browsers start an
|
|
48
|
+
* AudioContext suspended and only let it resume during a gesture; turns that
|
|
49
|
+
* fire later (e.g. a livestream auto-reaction from a timer/effect) would
|
|
50
|
+
* otherwise play silently. Call this from the first user interaction. Safe to
|
|
51
|
+
* call repeatedly. Resolves once the context is running (or no-ops off-DOM).
|
|
52
|
+
*/
|
|
53
|
+
async unlock() {
|
|
54
|
+
const AudioContextCtor = typeof window !== "undefined"
|
|
55
|
+
? window.AudioContext || window.webkitAudioContext
|
|
56
|
+
: undefined;
|
|
57
|
+
if (!AudioContextCtor)
|
|
58
|
+
return;
|
|
59
|
+
this.audioContext ??= new AudioContextCtor({ latencyHint: "interactive" });
|
|
60
|
+
if (this.audioContext.state !== "running") {
|
|
61
|
+
await this.audioContext.resume().catch(() => { });
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
get metrics() {
|
|
65
|
+
return { ...this.metricsState };
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* A live `MediaStream` carrying everything the player schedules to the
|
|
69
|
+
* speakers — pair it with `canvas.captureStream()` + `MediaRecorder` to
|
|
70
|
+
* record a turn exactly as it played. The tap is additive (speaker output is
|
|
71
|
+
* unchanged) and persists across turns. Returns `null` until the player has
|
|
72
|
+
* an AudioContext — call `unlock()` (any user gesture) first.
|
|
73
|
+
*/
|
|
74
|
+
captureAudioStream() {
|
|
75
|
+
const context = this.audioContext;
|
|
76
|
+
if (!context || typeof context.createMediaStreamDestination !== "function")
|
|
77
|
+
return null;
|
|
78
|
+
this.audioTap ??= context.createMediaStreamDestination();
|
|
79
|
+
return this.audioTap.stream;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Play a turn to completion. Accepts anything that exposes turn events —
|
|
83
|
+
* an HTTP `RealtimeTurnStream` or a `RealtimeSessionSocket.turn()` source.
|
|
84
|
+
* Resolves with a summary when the stream ends. Pass the same `signal` used
|
|
85
|
+
* to create the stream so `stop()` and the network abort together.
|
|
86
|
+
*/
|
|
87
|
+
async play(stream, handlers = {}) {
|
|
88
|
+
this.stop();
|
|
89
|
+
const startMs = now();
|
|
90
|
+
this.playStartedAt = startMs;
|
|
91
|
+
const signal = handlers.signal;
|
|
92
|
+
this.metricsState = emptyMetrics();
|
|
93
|
+
this.scheduler = new Pcm16AudioScheduler(this.sampleRate, this.playoutDelay, this.audioContext, {}, this.audioTap);
|
|
94
|
+
await this.scheduler.prepare();
|
|
95
|
+
this.setState("thinking", handlers);
|
|
96
|
+
this.startVideoClock();
|
|
97
|
+
let text = "";
|
|
98
|
+
let frames = 0;
|
|
99
|
+
let elapsedMs = 0;
|
|
100
|
+
let sourceVideo;
|
|
101
|
+
let streamErrored = false;
|
|
102
|
+
try {
|
|
103
|
+
for await (const event of stream.events) {
|
|
104
|
+
if (signal?.aborted)
|
|
105
|
+
break;
|
|
106
|
+
const handled = await this.handleEvent(event, startMs, text, handlers);
|
|
107
|
+
text = handled.text;
|
|
108
|
+
if (handled.frames !== undefined)
|
|
109
|
+
frames = handled.frames;
|
|
110
|
+
if (handled.elapsedMs !== undefined)
|
|
111
|
+
elapsedMs = handled.elapsedMs;
|
|
112
|
+
if (handled.sourceVideo)
|
|
113
|
+
sourceVideo = handled.sourceVideo;
|
|
114
|
+
if (handled.done)
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
streamErrored = true;
|
|
120
|
+
throw error;
|
|
121
|
+
}
|
|
122
|
+
finally {
|
|
123
|
+
// CRITICAL: the network stream ends well before playback does — there is
|
|
124
|
+
// ~playoutDelayMs of buffered audio plus a video queue still waiting for
|
|
125
|
+
// its presentation time. Do NOT tear the scheduler down here, or the
|
|
126
|
+
// render clock loses its time source and the buffered video tail freezes
|
|
127
|
+
// on the last drawn frame while the audio keeps playing. Instead keep the
|
|
128
|
+
// clock alive and drain to the end of playback first (unless aborted or
|
|
129
|
+
// the stream errored).
|
|
130
|
+
if (!signal?.aborted && !streamErrored) {
|
|
131
|
+
await this.drainPlayback(signal);
|
|
132
|
+
}
|
|
133
|
+
this.scheduler?.close();
|
|
134
|
+
this.scheduler = null;
|
|
135
|
+
}
|
|
136
|
+
this.setState("done", handlers);
|
|
137
|
+
return { text, frames, elapsedMs: elapsedMs || now() - startMs, metrics: this.metrics, sourceVideo };
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* After the stream ends, keep the audio clock + render loop running until the
|
|
141
|
+
* buffered audio has fully played out and the video queue has drained, so the
|
|
142
|
+
* tail of the turn actually renders instead of freezing on the last frame.
|
|
143
|
+
*/
|
|
144
|
+
async drainPlayback(signal) {
|
|
145
|
+
const scheduler = this.scheduler;
|
|
146
|
+
if (!scheduler)
|
|
147
|
+
return;
|
|
148
|
+
const deadline = now() + 12_000; // hard cap so we never hang
|
|
149
|
+
while (now() < deadline) {
|
|
150
|
+
if (signal?.aborted)
|
|
151
|
+
return;
|
|
152
|
+
// Finish decoding anything still pending (JPEG pool or WebCodecs queue).
|
|
153
|
+
if (this.pendingJpeg.length || this.decoding || (this.videoDecoder?.decodeQueueSize ?? 0) > 0) {
|
|
154
|
+
await sleep(30);
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
const mediaTime = scheduler.mediaTimeSeconds;
|
|
158
|
+
const lastPts = this.queue.length ? this.queue[this.queue.length - 1].pts : null;
|
|
159
|
+
const audioRemainingMs = scheduler.queuedMs;
|
|
160
|
+
const videoDrained = lastPts === null || (mediaTime !== null && mediaTime >= lastPts);
|
|
161
|
+
if (videoDrained && audioRemainingMs <= 30)
|
|
162
|
+
return; // everything played out
|
|
163
|
+
await sleep(30);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/** Stop playback: cancel the clock, close audio, clear the queue and canvas. */
|
|
167
|
+
stop() {
|
|
168
|
+
if (this.rafHandle !== null) {
|
|
169
|
+
cancelAnimationFrame(this.rafHandle);
|
|
170
|
+
this.rafHandle = null;
|
|
171
|
+
}
|
|
172
|
+
this.queue.forEach(closeQueuedFrame);
|
|
173
|
+
this.queue = [];
|
|
174
|
+
this.pendingJpeg = [];
|
|
175
|
+
if (this.videoDecoder && this.videoDecoder.state !== "closed") {
|
|
176
|
+
try {
|
|
177
|
+
this.videoDecoder.close();
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
// already closing
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
this.videoDecoder = null;
|
|
184
|
+
this.scheduler?.close();
|
|
185
|
+
this.scheduler = null;
|
|
186
|
+
this.renderer?.reset();
|
|
187
|
+
clearCanvas(this.canvas);
|
|
188
|
+
}
|
|
189
|
+
/** Fully release resources, including the persistent AudioContext. Call on
|
|
190
|
+
* unmount; after this, unlock() must be called again before the next turn. */
|
|
191
|
+
dispose() {
|
|
192
|
+
this.stop();
|
|
193
|
+
this.audioTap = null; // bound to the context being closed below
|
|
194
|
+
const context = this.audioContext;
|
|
195
|
+
this.audioContext = null;
|
|
196
|
+
if (context && context.state !== "closed")
|
|
197
|
+
void context.close();
|
|
198
|
+
}
|
|
199
|
+
async handleEvent(event, startMs, priorText, handlers) {
|
|
200
|
+
const header = event.header;
|
|
201
|
+
switch (header.type) {
|
|
202
|
+
case "start":
|
|
203
|
+
this.setState("speaking", handlers);
|
|
204
|
+
return { text: priorText };
|
|
205
|
+
case "text_delta": {
|
|
206
|
+
const delta = new TextDecoder().decode(event.payload);
|
|
207
|
+
const text = priorText + delta;
|
|
208
|
+
handlers.onText?.(delta, text);
|
|
209
|
+
return { text };
|
|
210
|
+
}
|
|
211
|
+
case "text_done": {
|
|
212
|
+
const decoded = new TextDecoder().decode(event.payload);
|
|
213
|
+
const text = decoded || priorText;
|
|
214
|
+
if (decoded)
|
|
215
|
+
handlers.onText?.("", text);
|
|
216
|
+
return { text };
|
|
217
|
+
}
|
|
218
|
+
case "audio": {
|
|
219
|
+
const scheduled = await this.scheduler?.schedule(event.payload);
|
|
220
|
+
this.metricsState.firstAudioMs ??= Math.round(now() - startMs);
|
|
221
|
+
if (scheduled) {
|
|
222
|
+
this.metricsState.audioQueueMs = scheduled.queuedMs;
|
|
223
|
+
if (scheduled.underrunMs > 0)
|
|
224
|
+
this.metricsState.audioUnderrunMs += scheduled.underrunMs;
|
|
225
|
+
}
|
|
226
|
+
this.setState("speaking", handlers);
|
|
227
|
+
handlers.onMetrics?.(this.metrics);
|
|
228
|
+
return { text: priorText };
|
|
229
|
+
}
|
|
230
|
+
case "video":
|
|
231
|
+
this.queueVideo(header, event.payload);
|
|
232
|
+
this.metricsState.firstVideoMs ??= Math.round(now() - startMs);
|
|
233
|
+
handlers.onMetrics?.(this.metrics);
|
|
234
|
+
return { text: priorText };
|
|
235
|
+
case "done":
|
|
236
|
+
return {
|
|
237
|
+
text: priorText,
|
|
238
|
+
frames: header.frames,
|
|
239
|
+
elapsedMs: header.elapsedMs,
|
|
240
|
+
done: true,
|
|
241
|
+
sourceVideo: header.sourceVideo,
|
|
242
|
+
};
|
|
243
|
+
case "error":
|
|
244
|
+
throw new Error(header.message);
|
|
245
|
+
default:
|
|
246
|
+
return { text: priorText };
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
queueVideo(header, payload) {
|
|
250
|
+
this.metricsState.width = header.width;
|
|
251
|
+
this.metricsState.height = header.height;
|
|
252
|
+
if (header.pixelFormat === "h264") {
|
|
253
|
+
this.queueH264(header, payload);
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
if (header.pixelFormat === "jpeg") {
|
|
257
|
+
// Enqueue COPIES of each frame's bytes and decode them off the event loop.
|
|
258
|
+
// Two reasons this must be a copy, not a subarray view:
|
|
259
|
+
// 1. the stream's read buffer is reused, so a deferred decode of a view
|
|
260
|
+
// could read bytes that a later chunk has already overwritten;
|
|
261
|
+
// 2. decoding inline here (await per frame) would block the stream's
|
|
262
|
+
// for-await loop, starving the audio scheduler and stalling playback.
|
|
263
|
+
let offset = 0;
|
|
264
|
+
for (let i = 0; i < header.frames; i += 1) {
|
|
265
|
+
const size = header.frameSizes?.[i] ?? 0;
|
|
266
|
+
this.pendingJpeg.push({
|
|
267
|
+
pts: (header.startFrame + i) / header.fps,
|
|
268
|
+
bytes: payload.slice(offset, offset + size),
|
|
269
|
+
width: header.width,
|
|
270
|
+
height: header.height,
|
|
271
|
+
});
|
|
272
|
+
offset += size;
|
|
273
|
+
}
|
|
274
|
+
this.metricsState.frames += header.frames;
|
|
275
|
+
void this.pumpJpegDecode();
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
const frameBytes = header.frameBytes ?? 0;
|
|
279
|
+
const frames = [];
|
|
280
|
+
for (let i = 0; i < header.frames; i += 1) {
|
|
281
|
+
const start = i * frameBytes;
|
|
282
|
+
// i420 frames are drawn straight from the queue; copy so a reused stream
|
|
283
|
+
// buffer can't corrupt a not-yet-drawn frame.
|
|
284
|
+
frames.push({ kind: "i420", pts: (header.startFrame + i) / header.fps, frame: payload.slice(start, start + frameBytes), width: header.width, height: header.height });
|
|
285
|
+
}
|
|
286
|
+
this.queue.push(...frames);
|
|
287
|
+
this.metricsState.frames += frames.length;
|
|
288
|
+
if (frames.length)
|
|
289
|
+
this.scheduler?.startPlayout();
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Decodes the pending-JPEG queue into bitmaps with bounded concurrency, off
|
|
293
|
+
* the stream loop. Concurrency matters: serial `await createImageBitmap` for
|
|
294
|
+
* tall (e.g. 832px) frames can exceed the per-frame realtime budget, so decode
|
|
295
|
+
* falls permanently behind the audio clock and the render queue starves —
|
|
296
|
+
* which is exactly the "audio plays but video freezes" stall. Decoding a few
|
|
297
|
+
* frames in parallel keeps throughput ahead of realtime.
|
|
298
|
+
*
|
|
299
|
+
* Frames are inserted into the render queue in pts order regardless of which
|
|
300
|
+
* decode finishes first, and we never drop here — the audio-clocked render
|
|
301
|
+
* loop is the single place that drops late frames.
|
|
302
|
+
*/
|
|
303
|
+
async pumpJpegDecode() {
|
|
304
|
+
if (this.decoding)
|
|
305
|
+
return;
|
|
306
|
+
this.decoding = true;
|
|
307
|
+
const CONCURRENCY = 4;
|
|
308
|
+
try {
|
|
309
|
+
while (this.pendingJpeg.length) {
|
|
310
|
+
const batch = this.pendingJpeg.splice(0, CONCURRENCY);
|
|
311
|
+
const decoded = await Promise.all(batch.map(async (frame) => {
|
|
312
|
+
try {
|
|
313
|
+
const bitmap = await decodeJpegFrame(frame.bytes);
|
|
314
|
+
return { kind: "bitmap", pts: frame.pts, bitmap, width: frame.width, height: frame.height };
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
return null;
|
|
318
|
+
}
|
|
319
|
+
}));
|
|
320
|
+
for (const frame of decoded) {
|
|
321
|
+
if (frame)
|
|
322
|
+
this.insertOrdered(frame);
|
|
323
|
+
else
|
|
324
|
+
this.metricsState.droppedFrames += 1;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
finally {
|
|
329
|
+
this.decoding = false;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Feed h264 access units to a WebCodecs decoder. Each server chunk is a
|
|
334
|
+
* self-contained Annex-B stream (AUD + SPS/PPS + IDR + P-frames), so any
|
|
335
|
+
* chunk can start decode and dropped chunks never corrupt later ones.
|
|
336
|
+
* Decoded VideoFrames land in the same pts-ordered queue the render clock
|
|
337
|
+
* already drains; hardware decode happens off the event loop.
|
|
338
|
+
*/
|
|
339
|
+
queueH264(header, payload) {
|
|
340
|
+
if (!supportsH264Playback()) {
|
|
341
|
+
this.metricsState.droppedFrames += header.frames;
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
if (!this.videoDecoder || this.videoDecoder.state === "closed") {
|
|
345
|
+
this.videoDecoder = new VideoDecoder({
|
|
346
|
+
output: (frame) => {
|
|
347
|
+
this.insertOrdered({
|
|
348
|
+
kind: "videoframe",
|
|
349
|
+
pts: frame.timestamp / 1_000_000,
|
|
350
|
+
frame,
|
|
351
|
+
width: header.width,
|
|
352
|
+
height: header.height,
|
|
353
|
+
});
|
|
354
|
+
},
|
|
355
|
+
error: () => {
|
|
356
|
+
// A decoder fault drops the rest of this turn's h264 frames; audio
|
|
357
|
+
// keeps playing and the next turn reconfigures a fresh decoder.
|
|
358
|
+
this.videoDecoder = null;
|
|
359
|
+
},
|
|
360
|
+
});
|
|
361
|
+
// Annex-B is implied when no description is attached. The stream is
|
|
362
|
+
// baseline profile level 3.1 (see avtr1_modal/realtime/h264.py).
|
|
363
|
+
this.videoDecoder.configure({ codec: "avc1.42001f", optimizeForLatency: true });
|
|
364
|
+
}
|
|
365
|
+
let offset = 0;
|
|
366
|
+
for (let i = 0; i < header.frames; i += 1) {
|
|
367
|
+
const size = header.frameSizes?.[i] ?? 0;
|
|
368
|
+
const bytes = payload.slice(offset, offset + size);
|
|
369
|
+
offset += size;
|
|
370
|
+
try {
|
|
371
|
+
this.videoDecoder.decode(new EncodedVideoChunk({
|
|
372
|
+
// Chunk-leading AUs carry the IDR; the rest are P-frames.
|
|
373
|
+
type: i === 0 ? "key" : "delta",
|
|
374
|
+
timestamp: Math.round(((header.startFrame + i) / header.fps) * 1_000_000),
|
|
375
|
+
data: bytes,
|
|
376
|
+
}));
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
this.metricsState.droppedFrames += 1;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
this.metricsState.frames += header.frames;
|
|
383
|
+
}
|
|
384
|
+
/** Insert a decoded frame into the render queue keeping it sorted by pts.
|
|
385
|
+
* A frame in the queue means there is something to lip-sync against, so
|
|
386
|
+
* this is also where adaptive playout releases held audio. */
|
|
387
|
+
insertOrdered(frame) {
|
|
388
|
+
this.scheduler?.startPlayout();
|
|
389
|
+
const queue = this.queue;
|
|
390
|
+
if (queue.length === 0 || queue[queue.length - 1].pts <= frame.pts) {
|
|
391
|
+
queue.push(frame);
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
let lo = 0;
|
|
395
|
+
let hi = queue.length;
|
|
396
|
+
while (lo < hi) {
|
|
397
|
+
const mid = (lo + hi) >> 1;
|
|
398
|
+
if (queue[mid].pts <= frame.pts)
|
|
399
|
+
lo = mid + 1;
|
|
400
|
+
else
|
|
401
|
+
hi = mid;
|
|
402
|
+
}
|
|
403
|
+
queue.splice(lo, 0, frame);
|
|
404
|
+
}
|
|
405
|
+
startVideoClock() {
|
|
406
|
+
if (this.rafHandle !== null)
|
|
407
|
+
cancelAnimationFrame(this.rafHandle);
|
|
408
|
+
const tick = () => {
|
|
409
|
+
const mediaTime = this.scheduler?.mediaTimeSeconds;
|
|
410
|
+
const canvas = this.canvas;
|
|
411
|
+
if (mediaTime !== null && mediaTime !== undefined && canvas) {
|
|
412
|
+
let drawable = null;
|
|
413
|
+
let dropped = 0;
|
|
414
|
+
while (this.queue.length && this.queue[0].pts <= mediaTime + 0.03) {
|
|
415
|
+
if (drawable) {
|
|
416
|
+
closeQueuedFrame(drawable);
|
|
417
|
+
dropped += 1;
|
|
418
|
+
}
|
|
419
|
+
drawable = this.queue.shift() ?? null;
|
|
420
|
+
}
|
|
421
|
+
while (this.queue.length > 36 && this.queue[0].pts < mediaTime - 0.08) {
|
|
422
|
+
closeQueuedFrame(this.queue.shift() ?? null);
|
|
423
|
+
dropped += 1;
|
|
424
|
+
}
|
|
425
|
+
if (dropped)
|
|
426
|
+
this.metricsState.droppedFrames += dropped;
|
|
427
|
+
if (drawable) {
|
|
428
|
+
this.metricsState.firstFrameDrawnMs ??= Math.round(now() - this.playStartedAt);
|
|
429
|
+
this.draw(canvas, drawable);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
this.rafHandle = requestAnimationFrame(tick);
|
|
433
|
+
};
|
|
434
|
+
this.rafHandle = requestAnimationFrame(tick);
|
|
435
|
+
}
|
|
436
|
+
draw(canvas, frame) {
|
|
437
|
+
if (frame.kind === "bitmap") {
|
|
438
|
+
drawBitmapFrame(canvas, frame);
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
if (frame.kind === "videoframe") {
|
|
442
|
+
drawVideoFrame(canvas, frame);
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
this.renderer ??= new I420CanvasRenderer();
|
|
446
|
+
this.renderer.draw(canvas, frame.frame, frame.width, frame.height, Math.round(frame.pts * 1_000_000));
|
|
447
|
+
}
|
|
448
|
+
setState(state, handlers) {
|
|
449
|
+
if (this.state === state)
|
|
450
|
+
return;
|
|
451
|
+
this.state = state;
|
|
452
|
+
handlers.onState?.(state);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
function emptyMetrics() {
|
|
456
|
+
return {
|
|
457
|
+
frames: 0,
|
|
458
|
+
droppedFrames: 0,
|
|
459
|
+
firstAudioMs: null,
|
|
460
|
+
firstVideoMs: null,
|
|
461
|
+
firstFrameDrawnMs: null,
|
|
462
|
+
audioQueueMs: 0,
|
|
463
|
+
audioUnderrunMs: 0,
|
|
464
|
+
width: null,
|
|
465
|
+
height: null,
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
function closeQueuedFrame(frame) {
|
|
469
|
+
if (frame?.kind === "bitmap")
|
|
470
|
+
frame.bitmap.close();
|
|
471
|
+
if (frame?.kind === "videoframe")
|
|
472
|
+
frame.frame.close();
|
|
473
|
+
}
|
|
474
|
+
function drawVideoFrame(canvas, frame) {
|
|
475
|
+
if (canvas.width !== frame.width)
|
|
476
|
+
canvas.width = frame.width;
|
|
477
|
+
if (canvas.height !== frame.height)
|
|
478
|
+
canvas.height = frame.height;
|
|
479
|
+
const context = canvas.getContext("2d", { alpha: false });
|
|
480
|
+
context?.drawImage(frame.frame, 0, 0, frame.width, frame.height);
|
|
481
|
+
frame.frame.close();
|
|
482
|
+
}
|
|
483
|
+
function clearCanvas(canvas) {
|
|
484
|
+
if (!canvas)
|
|
485
|
+
return;
|
|
486
|
+
const context = canvas.getContext("2d");
|
|
487
|
+
context?.clearRect(0, 0, canvas.width, canvas.height);
|
|
488
|
+
// eslint-disable-next-line no-self-assign -- reset draw state cheaply
|
|
489
|
+
canvas.width = canvas.width;
|
|
490
|
+
}
|
|
491
|
+
function drawBitmapFrame(canvas, frame) {
|
|
492
|
+
if (canvas.width !== frame.width)
|
|
493
|
+
canvas.width = frame.width;
|
|
494
|
+
if (canvas.height !== frame.height)
|
|
495
|
+
canvas.height = frame.height;
|
|
496
|
+
const context = canvas.getContext("2d", { alpha: false });
|
|
497
|
+
context?.drawImage(frame.bitmap, 0, 0, frame.width, frame.height);
|
|
498
|
+
frame.bitmap.close();
|
|
499
|
+
}
|
|
500
|
+
async function decodeJpegFrame(bytes) {
|
|
501
|
+
if (typeof createImageBitmap !== "function") {
|
|
502
|
+
throw new Error("This browser does not support realtime JPEG frame decoding.");
|
|
503
|
+
}
|
|
504
|
+
// `.slice()` does a single fast buffer copy (the Blob must own the bytes,
|
|
505
|
+
// since the underlying stream buffer is reused); avoid element-wise copies.
|
|
506
|
+
return createImageBitmap(new Blob([bytes.slice()], { type: "image/jpeg" }));
|
|
507
|
+
}
|
|
508
|
+
function now() {
|
|
509
|
+
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
510
|
+
}
|
|
511
|
+
function sleep(ms) {
|
|
512
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
513
|
+
}
|
|
514
|
+
//# sourceMappingURL=player.js.map
|