@mcut/media 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,431 @@
1
+ import { Input, OutputFormat, Quality } from "mediabunny";
2
+ import { AssetId, AssetRef, Project, TimeMap } from "@mcut/timeline";
3
+ import { FrameSource } from "@mcut/compositor";
4
+
5
+ //#region src/probe.d.ts
6
+ type MediaSourceLike = Blob | string;
7
+ /** Open a Mediabunny input over a Blob/File or a (blob:/http:) URL. */
8
+ declare function inputFor(src: MediaSourceLike): Input;
9
+ interface MediaProbe {
10
+ durationMs: number;
11
+ hasVideo: boolean;
12
+ hasAudio: boolean;
13
+ width?: number;
14
+ height?: number;
15
+ mimeType?: string;
16
+ }
17
+ /** Read duration, dimensions, and track layout of an audio/video file. */
18
+ declare function probeMedia(src: MediaSourceLike): Promise<MediaProbe>;
19
+ /** Read intrinsic dimensions of an image URL (browser only). */
20
+ declare function probeImage(src: string): Promise<{
21
+ width: number;
22
+ height: number;
23
+ }>;
24
+ /**
25
+ * Turn a dropped/picked file into a probed {@link AssetRef} ready for the
26
+ * `addAsset` command. Creates an object URL for `src` — callers own its
27
+ * lifetime (revoke when the asset is removed). `hash` (SHA-256) is the
28
+ * asset's stable identity for persistence/relink; very large files skip it.
29
+ */
30
+ declare function createAssetFromFile(file: File): Promise<AssetRef>;
31
+ //#endregion
32
+ //#region src/thumbnails.d.ts
33
+ interface ThumbnailOptions {
34
+ /** Thumbnail width in px (height follows aspect ratio). Default 160. */
35
+ width?: number;
36
+ /** Source time to sample. Default 0. */
37
+ timeMs?: number;
38
+ }
39
+ /** Extract a single poster frame from a video. Returns `null` for audio-only files. */
40
+ declare function getVideoThumbnail(src: MediaSourceLike, options?: ThumbnailOptions): Promise<HTMLCanvasElement | OffscreenCanvas | null>;
41
+ /** A poster frame as a data URL (handy for `<img>` in media bins). */
42
+ declare function getVideoThumbnailUrl(src: MediaSourceLike, options?: ThumbnailOptions): Promise<string | null>;
43
+ //#endregion
44
+ //#region src/extract-audio.d.ts
45
+ interface ExtractAudioOptions {
46
+ /** Default 16000 — small uploads, ideal for speech-to-text APIs. */
47
+ sampleRate?: number;
48
+ /** Default 1 (mono). */
49
+ numberOfChannels?: number;
50
+ onProgress?: (progress: number) => void;
51
+ }
52
+ /** The file's audio exists but this browser has no decoder for its codec. */
53
+ declare class AudioNotDecodableError extends Error {
54
+ constructor(codec: string | undefined);
55
+ }
56
+ /**
57
+ * Extract a file's audio track to a PCM WAV blob, fully client-side.
58
+ * Returns `null` when the file has no audio track. The default
59
+ * 16 kHz/mono output keeps uploads to transcription APIs small.
60
+ *
61
+ * Resilience: when the resampled conversion fails (Mediabunny's resampler /
62
+ * channel mixer can throw "Assertion failed." on unusual source layouts),
63
+ * retry once WITHOUT resampling — the WAV is bigger but transcription APIs
64
+ * accept any PCM rate. Undecodable codecs fail fast with a clear error.
65
+ */
66
+ declare function extractAudioToWav(src: MediaSourceLike, options?: ExtractAudioOptions): Promise<Blob | null>;
67
+ //#endregion
68
+ //#region src/encoders.d.ts
69
+ /**
70
+ * Register WASM fallback encoders for codecs the runtime cannot encode
71
+ * natively — today that's AAC on Firefox (no AudioEncoder AAC support), via
72
+ * `@mediabunny/aac-encoder` (FFmpeg-based). The import is dynamic so
73
+ * browsers with native AAC never download the WASM. Idempotent; export
74
+ * paths await it before probing codecs.
75
+ */
76
+ declare function ensureFallbackAudioEncoders(): Promise<void>;
77
+ //#endregion
78
+ //#region src/preview-pool.d.ts
79
+ interface ActiveMediaItem {
80
+ assetId: AssetId;
81
+ kind: 'video' | 'audio';
82
+ /** Media-local time (after trim and time remap). */
83
+ sourceTimeMs: number;
84
+ /** Element playback speed at the playhead (timeMap slope; 1 unmapped, 0 frozen). */
85
+ rate: number;
86
+ /** Effective volume (element volume × fades; 0 when element/track muted). */
87
+ volume: number;
88
+ /**
89
+ * The element plays its source backward. Media elements reject negative
90
+ * rates, so the pool seek-chases these like a scrub (frames come from the
91
+ * scrub cache) and their preview audio is muted; export is exact.
92
+ */
93
+ reversed?: boolean;
94
+ }
95
+ /** The media items the preview pool should have live at `timeMs`. */
96
+ declare function getActiveMediaItems(project: Project, timeMs: number): ActiveMediaItem[];
97
+ interface PreviewSyncOptions {
98
+ isPlaying: boolean;
99
+ playbackRate: number;
100
+ /** Global volume multiplier (0–1). */
101
+ masterVolume: number;
102
+ muted: boolean;
103
+ }
104
+ /**
105
+ * The approximate, low-latency {@link FrameSource} used for interactive
106
+ * preview: one pooled `<video>`/`<audio>` element per media asset, kept in
107
+ * sync with the playback clock, plus decoded `ImageBitmap`s for images.
108
+ * Audio plays through the media elements themselves (no Web Audio graph);
109
+ * the deterministic export pipeline is a separate implementation.
110
+ *
111
+ * Known approximation: two simultaneously-active elements sharing one asset
112
+ * share one media element, so they render the same source frame.
113
+ */
114
+ declare class PreviewMediaPool implements FrameSource {
115
+ private resolveAsset;
116
+ private media;
117
+ private images;
118
+ private scrubCaches;
119
+ private decodedVideos;
120
+ private disposed;
121
+ /** Transport state from the last sync(); steers mid-seek frame choice. */
122
+ private playing;
123
+ constructor(resolveAsset: (assetId: AssetId) => AssetRef | undefined);
124
+ getFrame(assetId: AssetId, sourceTimeMs: number): CanvasImageSource | null;
125
+ /**
126
+ * Reconcile pooled media elements with the items active under the
127
+ * playhead. Called by the playback loop every frame and on seeks.
128
+ */
129
+ sync(items: ActiveMediaItem[], options: PreviewSyncOptions): void;
130
+ /** Reload an errored element from the asset's current src, rate-limited. */
131
+ private recoverMediaElement;
132
+ /**
133
+ * Seek unless one is already in flight. Restarting an in-flight seek aborts
134
+ * its decode, and on long-GOP sources (seek latency above the drift
135
+ * tolerance) that loops forever: no seek ever completes, playback degrades
136
+ * to scrub-cache frames, and a paused preview can stay black. Letting the
137
+ * seek land also coalesces scrubbing to the latest playhead position.
138
+ */
139
+ private requestSeek;
140
+ /** Fold a completed seek into the element's latency estimate. */
141
+ private settleSeek;
142
+ /** Pause everything (e.g. when the player unmounts a project). */
143
+ pauseAll(): void;
144
+ dispose(): void;
145
+ private ensureScrubCache;
146
+ private ensureMediaElement;
147
+ private getDecodedVideoFrame;
148
+ private ensureDecodedVideoState;
149
+ private requestDecodedVideoFrame;
150
+ private decodeVideoFrame;
151
+ private trimDecodedVideoFrames;
152
+ private loadImage;
153
+ }
154
+ //#endregion
155
+ //#region src/container-formats.d.ts
156
+ /**
157
+ * The container-format registry — export's counterpart to the timeline's
158
+ * element-type registry. A container format is an output vocabulary entry:
159
+ * its id (also the default file extension), a UI label, and a factory for
160
+ * the Mediabunny {@link OutputFormat} that muxes it.
161
+ *
162
+ * The built-in formats (mp4, webm, mkv) register through this exact API, so
163
+ * community formats are first-class: they show up in `listContainerFormats`
164
+ * (which the export dialog renders), pass WebCodecs support probing via
165
+ * `getExportSupport`, and export via `exportProject({ format: id })`.
166
+ *
167
+ * Register custom formats at module load, before the export UI mounts.
168
+ */
169
+ interface ContainerFormatEntry {
170
+ /** Registry key, accepted as `ExportProjectOptions.format`. */
171
+ id: string;
172
+ /** UI label (e.g. `'MP4'`). */
173
+ label: string;
174
+ /** Suggested file extension, without the dot. */
175
+ extension: string;
176
+ /** Output MIME type (e.g. `'video/mp4'`). */
177
+ mimeType: string;
178
+ /** Build a fresh Mediabunny output format for one export. */
179
+ createOutputFormat: () => OutputFormat;
180
+ }
181
+ declare function registerContainerFormat(entry: ContainerFormatEntry): void;
182
+ declare function getContainerFormat(id: string): ContainerFormatEntry | undefined;
183
+ /** Every registered container format, in registration order (built-ins first). */
184
+ declare function listContainerFormats(): ContainerFormatEntry[];
185
+ //#endregion
186
+ //#region src/export-types.d.ts
187
+ interface ExportProgress {
188
+ /** 0–1 across the whole export. */
189
+ progress: number;
190
+ phase: 'audio' | 'video' | 'finalize';
191
+ }
192
+ /** A registered container format id (built-ins: mp4, webm, mkv). */
193
+ type ContainerFormatId = 'mp4' | 'webm' | 'mkv' | (string & {});
194
+ /**
195
+ * A font face the export worker registers into its own `FontFaceSet` before
196
+ * rendering: workers do not see `document.fonts`, so faces loaded on the
197
+ * main thread are invisible to an `OffscreenCanvas` in a worker. `source` is
198
+ * either the face's binary or a URL the worker can fetch.
199
+ */
200
+ interface ExportFontFaceInit {
201
+ family: string;
202
+ /** CSS font-weight descriptor (e.g. "400", "100 900" for variable). */
203
+ weight?: string;
204
+ /** CSS font-style descriptor (e.g. "italic"). */
205
+ style?: string;
206
+ /** CSS unicode-range descriptor (Google Fonts ships per-subset faces). */
207
+ unicodeRange?: string;
208
+ source: ArrayBuffer | string;
209
+ }
210
+ interface ExportProjectOptions {
211
+ /** Container format id from the registry. Default `'mp4'`. */
212
+ format?: ContainerFormatId;
213
+ /** Video bitrate in bits/s or a mediabunny `Quality`. Default `QUALITY_HIGH`. */
214
+ videoBitrate?: number | Quality;
215
+ /**
216
+ * Font faces for text/caption rendering inside the export worker. Without
217
+ * them the worker draws text with system fallback faces (web fonts loaded
218
+ * on the main thread don't exist in worker scope).
219
+ */
220
+ fonts?: ExportFontFaceInit[];
221
+ onProgress?: (progress: ExportProgress) => void;
222
+ signal?: AbortSignal;
223
+ }
224
+ interface ExportResult {
225
+ blob: Blob;
226
+ /** Suggested file extension from the format's registry entry. */
227
+ extension: string;
228
+ }
229
+ //#endregion
230
+ //#region src/export-core.d.ts
231
+ /** Can this browser encode video (and audio) for the given format? */
232
+ declare function getExportSupport(format?: ContainerFormatId): Promise<{
233
+ video: boolean;
234
+ audio: boolean;
235
+ }>;
236
+ //#endregion
237
+ //#region src/export.d.ts
238
+ /**
239
+ * Render a project to a video file, fully client-side and deterministically.
240
+ *
241
+ * The audio mix renders first on the main thread (`OfflineAudioContext` and
242
+ * the time-stretch worklet don't exist in workers), then the frame
243
+ * decode→composite→encode→mux pipeline runs in a dedicated worker so the
244
+ * editor stays responsive; environments without workers (Node/Bun, spawn
245
+ * failure) fall back to running the same pipeline in-context.
246
+ */
247
+ declare function exportProject(project: Project, options?: ExportProjectOptions): Promise<ExportResult>;
248
+ //#endregion
249
+ //#region src/filmstrip.d.ts
250
+ interface FilmstripOptions {
251
+ /** Number of evenly spaced frames. */
252
+ frameCount: number;
253
+ /** Width of each frame in px (height follows aspect). Default 80. */
254
+ frameWidth?: number;
255
+ /** Source range to sample. Defaults to the whole file. */
256
+ startMs?: number;
257
+ endMs?: number;
258
+ }
259
+ interface Filmstrip {
260
+ /** All frames drawn side-by-side, `frameCount × frameWidth` wide. */
261
+ canvas: HTMLCanvasElement | OffscreenCanvas;
262
+ frameWidth: number;
263
+ frameHeight: number;
264
+ frameCount: number;
265
+ /** Source timestamp of each frame, in ms. */
266
+ timestampsMs: number[];
267
+ }
268
+ /**
269
+ * Sample evenly spaced poster frames into one horizontal strip — the
270
+ * filmstrip background of timeline video clips. Returns `null` for files
271
+ * without a video track.
272
+ */
273
+ declare function getFilmstrip(src: MediaSourceLike, options: FilmstripOptions): Promise<Filmstrip | null>;
274
+ //#endregion
275
+ //#region src/audio-peaks.d.ts
276
+ interface AudioPeaksOptions {
277
+ /** Number of peak buckets across the range. Default 256. */
278
+ buckets?: number;
279
+ /** Source range. Defaults to the whole file. */
280
+ startMs?: number;
281
+ endMs?: number;
282
+ }
283
+ interface AudioPeaks {
284
+ /** Max |sample| per bucket, 0–1. */
285
+ peaks: Float32Array;
286
+ durationMs: number;
287
+ }
288
+ /** Fold samples into `buckets` max-|amplitude| bins (pure; unit-tested). */
289
+ declare function bucketPeaks(samples: Float32Array, buckets: number): Float32Array;
290
+ /**
291
+ * Decode a file's audio and reduce it to waveform peaks for timeline clip
292
+ * rendering. Returns `null` when the file has no audio track. Browser-only
293
+ * (WebCodecs decode via Mediabunny).
294
+ */
295
+ declare function extractAudioPeaks(src: MediaSourceLike, options?: AudioPeaksOptions): Promise<AudioPeaks | null>;
296
+ //#endregion
297
+ //#region src/media-store.d.ts
298
+ /**
299
+ * Content-addressed media persistence on OPFS (the OpenCut pattern: media
300
+ * blobs live in the Origin Private File System keyed by content hash;
301
+ * project JSON stores `asset.hash` and re-binds `src` on load). Hash-keyed
302
+ * storage dedupes repeated imports and gives relink a stable identity.
303
+ *
304
+ * Callers fall back to their own storage (e.g. IndexedDB keyed by asset id)
305
+ * when OPFS is unavailable or a file was imported without a hash.
306
+ */
307
+ /** Largest file we hash/persist (WebCrypto digest needs the full buffer). */
308
+ declare const MAX_HASHABLE_BYTES: number;
309
+ declare function isMediaStoreSupported(): boolean;
310
+ /** SHA-256 hex of a blob's content, or null when too large to hash. */
311
+ declare function hashBlob(blob: Blob): Promise<string | null>;
312
+ /** Persist a blob under its hash. No-op when already stored (same content). */
313
+ declare function saveMediaBlob(hash: string, blob: Blob): Promise<boolean>;
314
+ declare function loadMediaBlob(hash: string): Promise<Blob | null>;
315
+ /** Delete stored blobs whose hash is not in `keep`. Returns removed count. */
316
+ declare function pruneMediaBlobs(keep: ReadonlySet<string>): Promise<number>;
317
+ //#endregion
318
+ //#region src/scrub-cache.d.ts
319
+ /**
320
+ * Scrub frame cache (Diffusion Studio's recipe): a binary-searched ring of
321
+ * downscaled frames captured opportunistically while a media element plays
322
+ * or sits on a decoded frame. While the element is mid-seek the preview
323
+ * serves the nearest cached frame instead of flashing black/stale — preview
324
+ * never blocks on decode.
325
+ */
326
+ declare class ScrubFrameCache {
327
+ private maxFrames;
328
+ /** Frames closer together than this are considered duplicates. */
329
+ private minGapMs;
330
+ /** Sorted by timeMs for binary search. */
331
+ private frames;
332
+ /** Insertion order for FIFO eviction. */
333
+ private order;
334
+ constructor(maxFrames?: number, /** Frames closer together than this are considered duplicates. */
335
+
336
+ minGapMs?: number);
337
+ /** Capture the element's current frame if this instant isn't cached yet. */
338
+ capture(source: HTMLVideoElement, timeMs: number): void;
339
+ /** The cached frame nearest `timeMs`, or null when the cache is empty. */
340
+ nearest(timeMs: number): OffscreenCanvas | null;
341
+ get size(): number;
342
+ clear(): void;
343
+ /** First index whose frame time is >= timeMs. */
344
+ private indexAtOrAfter;
345
+ }
346
+ //#endregion
347
+ //#region src/time-stretch.d.ts
348
+ /**
349
+ * Pitch-preserving time-stretch for export audio.
350
+ *
351
+ * Preview already preserves pitch — media elements default
352
+ * `preservesPitch = true` when `playbackRate` changes — so without this the
353
+ * export would chipmunk where the preview didn't. Constant-speed clips
354
+ * (linear two-keyframe timeMaps, which is everything setElementSpeed
355
+ * produces, including its split halves) stretch through here; variable
356
+ * ramps keep the per-buffer `playbackRate` fallback.
357
+ *
358
+ * Engine: Signalsmith Stretch (WASM, notably higher quality and faster than
359
+ * phase-vocoder/WSOLA approaches), driven through the offline buffer driver
360
+ * in signalsmith-offline.ts — no Web Audio required, so the same path runs
361
+ * on the main thread, in workers, and under Bun tests. Failures reject and
362
+ * the export falls back to per-buffer `playbackRate` at the call site.
363
+ */
364
+ interface ConstantSpeed {
365
+ /** Source ms consumed per output ms. */
366
+ rate: number;
367
+ /** First source offset (ms relative to trimStart) the map plays. */
368
+ sourceStartOffsetMs: number;
369
+ /** Source ms consumed in total. */
370
+ sourceSpanMs: number;
371
+ }
372
+ /** The constant speed a timeMap encodes, or null when it's a ramp/freeze. */
373
+ declare function constantSpeedOf(timeMap: TimeMap | undefined): ConstantSpeed | null;
374
+ interface StereoData {
375
+ left: Float32Array;
376
+ right: Float32Array;
377
+ /** PCM sample rate; drives the offline stretch render. */
378
+ sampleRate: number;
379
+ }
380
+ /**
381
+ * Stretch stereo PCM by `tempo` (2 = twice as fast, half as long) with
382
+ * pitch preserved. Output length ≈ input / tempo.
383
+ */
384
+ declare function stretchStereo(data: StereoData, tempo: number): Promise<StereoData>;
385
+ //#endregion
386
+ //#region src/audio-sync.d.ts
387
+ /**
388
+ * Audio-waveform autosync for multicam: two recordings of the same room
389
+ * align where their loudness envelopes correlate best. RMS envelopes at
390
+ * 100Hz (10ms buckets) give talking-head-grade sync; normalized
391
+ * cross-correlation over a bounded lag search finds the offset, and the
392
+ * peak-vs-noise ratio doubles as a confidence score.
393
+ */
394
+ interface SyncResult {
395
+ /** How much B starts AFTER A in real time (negative = B started first). */
396
+ offsetMs: number;
397
+ /** Peak correlation ÷ runner-up — <1.3 means "don't trust this". */
398
+ confidence: number;
399
+ }
400
+ interface AudioSyncOptions {
401
+ /** Seconds of audio analyzed from each source. Default 60. */
402
+ windowS?: number;
403
+ /** Largest |offset| considered, in seconds. Default 30. */
404
+ maxLagS?: number;
405
+ /** Envelope rate (buckets per second). Default 100 (10ms resolution). */
406
+ rateHz?: number;
407
+ signal?: AbortSignal;
408
+ }
409
+ /**
410
+ * Normalized cross-correlation of two zero-meaned envelopes. Returns the lag
411
+ * (in buckets) that best aligns `b` to `a`: positive lag means b's content
412
+ * happens LATER in its own file, i.e. b started recording earlier.
413
+ * Pure — unit-testable without decoding.
414
+ */
415
+ declare function crossCorrelateEnvelopes(a: Float32Array, b: Float32Array, maxLagBuckets: number): {
416
+ lag: number;
417
+ confidence: number;
418
+ };
419
+ /** RMS envelope of the first `windowS` seconds at `rateHz` buckets/second. */
420
+ declare function extractEnvelope(src: MediaSourceLike, {
421
+ windowS,
422
+ rateHz,
423
+ signal
424
+ }?: AudioSyncOptions): Promise<Float32Array | null>;
425
+ /**
426
+ * The sync offset between two recordings: how many ms after A's recording
427
+ * started did B's start. Null when either source has no audio.
428
+ */
429
+ declare function findSyncOffsetMs(a: MediaSourceLike, b: MediaSourceLike, options?: AudioSyncOptions): Promise<SyncResult | null>;
430
+ //#endregion
431
+ export { type ActiveMediaItem, AudioNotDecodableError, type AudioPeaks, type AudioPeaksOptions, type AudioSyncOptions, type ConstantSpeed, type ContainerFormatEntry, type ContainerFormatId, type ExportFontFaceInit, type ExportProgress, type ExportProjectOptions, type ExportResult, type ExtractAudioOptions, type Filmstrip, type FilmstripOptions, MAX_HASHABLE_BYTES, type MediaProbe, type MediaSourceLike, PreviewMediaPool, type PreviewSyncOptions, ScrubFrameCache, type StereoData, type SyncResult, type ThumbnailOptions, bucketPeaks, constantSpeedOf, createAssetFromFile, crossCorrelateEnvelopes, ensureFallbackAudioEncoders, exportProject, extractAudioPeaks, extractAudioToWav, extractEnvelope, findSyncOffsetMs, getActiveMediaItems, getContainerFormat, getExportSupport, getFilmstrip, getVideoThumbnail, getVideoThumbnailUrl, hashBlob, inputFor, isMediaStoreSupported, listContainerFormats, loadMediaBlob, probeImage, probeMedia, pruneMediaBlobs, registerContainerFormat, saveMediaBlob, stretchStereo };