npm - @mcut/media - Versions diffs - 0.1.0-alpha.0 - Mend

@mcut/media 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/LICENSE +187 -0
package/README.md +11 -0
package/dist/export-core-B8z3duRc.js +605 -0
package/dist/export-worker.d.ts +1 -0
package/dist/export-worker.js +87 -0
package/dist/index.d.ts +431 -0
package/dist/index.js +1384 -0
package/package.json +56 -0

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,431 @@
+import { Input, OutputFormat, Quality } from "mediabunny";
+import { AssetId, AssetRef, Project, TimeMap } from "@mcut/timeline";
+import { FrameSource } from "@mcut/compositor";
+//#region src/probe.d.ts
+type MediaSourceLike = Blob | string;
+/** Open a Mediabunny input over a Blob/File or a (blob:/http:) URL. */
+declare function inputFor(src: MediaSourceLike): Input;
+interface MediaProbe {
+  durationMs: number;
+  hasVideo: boolean;
+  hasAudio: boolean;
+  width?: number;
+  height?: number;
+  mimeType?: string;
+}
+/** Read duration, dimensions, and track layout of an audio/video file. */
+declare function probeMedia(src: MediaSourceLike): Promise<MediaProbe>;
+/** Read intrinsic dimensions of an image URL (browser only). */
+declare function probeImage(src: string): Promise<{
+  width: number;
+  height: number;
+}>;
+/**
+ * Turn a dropped/picked file into a probed {@link AssetRef} ready for the
+ * `addAsset` command. Creates an object URL for `src` — callers own its
+ * lifetime (revoke when the asset is removed). `hash` (SHA-256) is the
+ * asset's stable identity for persistence/relink; very large files skip it.
+ */
+declare function createAssetFromFile(file: File): Promise<AssetRef>;
+//#endregion
+//#region src/thumbnails.d.ts
+interface ThumbnailOptions {
+  /** Thumbnail width in px (height follows aspect ratio). Default 160. */
+  width?: number;
+  /** Source time to sample. Default 0. */
+  timeMs?: number;
+}
+/** Extract a single poster frame from a video. Returns `null` for audio-only files. */
+declare function getVideoThumbnail(src: MediaSourceLike, options?: ThumbnailOptions): Promise<HTMLCanvasElement | OffscreenCanvas | null>;
+/** A poster frame as a data URL (handy for `<img>` in media bins). */
+declare function getVideoThumbnailUrl(src: MediaSourceLike, options?: ThumbnailOptions): Promise<string | null>;
+//#endregion
+//#region src/extract-audio.d.ts
+interface ExtractAudioOptions {
+  /** Default 16000 — small uploads, ideal for speech-to-text APIs. */
+  sampleRate?: number;
+  /** Default 1 (mono). */
+  numberOfChannels?: number;
+  onProgress?: (progress: number) => void;
+}
+/** The file's audio exists but this browser has no decoder for its codec. */
+declare class AudioNotDecodableError extends Error {
+  constructor(codec: string | undefined);
+}
+/**
+ * Extract a file's audio track to a PCM WAV blob, fully client-side.
+ * Returns `null` when the file has no audio track. The default
+ * 16 kHz/mono output keeps uploads to transcription APIs small.
+ *
+ * Resilience: when the resampled conversion fails (Mediabunny's resampler /
+ * channel mixer can throw "Assertion failed." on unusual source layouts),
+ * retry once WITHOUT resampling — the WAV is bigger but transcription APIs
+ * accept any PCM rate. Undecodable codecs fail fast with a clear error.
+ */
+declare function extractAudioToWav(src: MediaSourceLike, options?: ExtractAudioOptions): Promise<Blob | null>;
+//#endregion
+//#region src/encoders.d.ts
+/**
+ * Register WASM fallback encoders for codecs the runtime cannot encode
+ * natively — today that's AAC on Firefox (no AudioEncoder AAC support), via
+ * `@mediabunny/aac-encoder` (FFmpeg-based). The import is dynamic so
+ * browsers with native AAC never download the WASM. Idempotent; export
+ * paths await it before probing codecs.
+ */
+declare function ensureFallbackAudioEncoders(): Promise<void>;
+//#endregion
+//#region src/preview-pool.d.ts
+interface ActiveMediaItem {
+  assetId: AssetId;
+  kind: 'video' | 'audio';
+  /** Media-local time (after trim and time remap). */
+  sourceTimeMs: number;
+  /** Element playback speed at the playhead (timeMap slope; 1 unmapped, 0 frozen). */
+  rate: number;
+  /** Effective volume (element volume × fades; 0 when element/track muted). */
+  volume: number;
+  /**
+   * The element plays its source backward. Media elements reject negative
+   * rates, so the pool seek-chases these like a scrub (frames come from the
+   * scrub cache) and their preview audio is muted; export is exact.
+   */
+  reversed?: boolean;
+}
+/** The media items the preview pool should have live at `timeMs`. */
+declare function getActiveMediaItems(project: Project, timeMs: number): ActiveMediaItem[];
+interface PreviewSyncOptions {
+  isPlaying: boolean;
+  playbackRate: number;
+  /** Global volume multiplier (0–1). */
+  masterVolume: number;
+  muted: boolean;
+}
+/**
+ * The approximate, low-latency {@link FrameSource} used for interactive
+ * preview: one pooled `<video>`/`<audio>` element per media asset, kept in
+ * sync with the playback clock, plus decoded `ImageBitmap`s for images.
+ * Audio plays through the media elements themselves (no Web Audio graph);
+ * the deterministic export pipeline is a separate implementation.
+ *
+ * Known approximation: two simultaneously-active elements sharing one asset
+ * share one media element, so they render the same source frame.
+ */
+declare class PreviewMediaPool implements FrameSource {
+  private resolveAsset;
+  private media;
+  private images;
+  private scrubCaches;
+  private decodedVideos;
+  private disposed;
+  /** Transport state from the last sync(); steers mid-seek frame choice. */
+  private playing;
+  constructor(resolveAsset: (assetId: AssetId) => AssetRef | undefined);
+  getFrame(assetId: AssetId, sourceTimeMs: number): CanvasImageSource | null;
+  /**
+   * Reconcile pooled media elements with the items active under the
+   * playhead. Called by the playback loop every frame and on seeks.
+   */
+  sync(items: ActiveMediaItem[], options: PreviewSyncOptions): void;
+  /** Reload an errored element from the asset's current src, rate-limited. */
+  private recoverMediaElement;
+  /**
+   * Seek unless one is already in flight. Restarting an in-flight seek aborts
+   * its decode, and on long-GOP sources (seek latency above the drift
+   * tolerance) that loops forever: no seek ever completes, playback degrades
+   * to scrub-cache frames, and a paused preview can stay black. Letting the
+   * seek land also coalesces scrubbing to the latest playhead position.
+   */
+  private requestSeek;
+  /** Fold a completed seek into the element's latency estimate. */
+  private settleSeek;
+  /** Pause everything (e.g. when the player unmounts a project). */
+  pauseAll(): void;
+  dispose(): void;
+  private ensureScrubCache;
+  private ensureMediaElement;
+  private getDecodedVideoFrame;
+  private ensureDecodedVideoState;
+  private requestDecodedVideoFrame;
+  private decodeVideoFrame;
+  private trimDecodedVideoFrames;
+  private loadImage;
+}
+//#endregion
+//#region src/container-formats.d.ts
+/**
+ * The container-format registry — export's counterpart to the timeline's
+ * element-type registry. A container format is an output vocabulary entry:
+ * its id (also the default file extension), a UI label, and a factory for
+ * the Mediabunny {@link OutputFormat} that muxes it.
+ *
+ * The built-in formats (mp4, webm, mkv) register through this exact API, so
+ * community formats are first-class: they show up in `listContainerFormats`
+ * (which the export dialog renders), pass WebCodecs support probing via
+ * `getExportSupport`, and export via `exportProject({ format: id })`.
+ *
+ * Register custom formats at module load, before the export UI mounts.
+ */
+interface ContainerFormatEntry {
+  /** Registry key, accepted as `ExportProjectOptions.format`. */
+  id: string;
+  /** UI label (e.g. `'MP4'`). */
+  label: string;
+  /** Suggested file extension, without the dot. */
+  extension: string;
+  /** Output MIME type (e.g. `'video/mp4'`). */
+  mimeType: string;
+  /** Build a fresh Mediabunny output format for one export. */
+  createOutputFormat: () => OutputFormat;
+}
+declare function registerContainerFormat(entry: ContainerFormatEntry): void;
+declare function getContainerFormat(id: string): ContainerFormatEntry | undefined;
+/** Every registered container format, in registration order (built-ins first). */
+declare function listContainerFormats(): ContainerFormatEntry[];
+//#endregion
+//#region src/export-types.d.ts
+interface ExportProgress {
+  /** 0–1 across the whole export. */
+  progress: number;
+  phase: 'audio' | 'video' | 'finalize';
+}
+/** A registered container format id (built-ins: mp4, webm, mkv). */
+type ContainerFormatId = 'mp4' | 'webm' | 'mkv' | (string & {});
+/**
+ * A font face the export worker registers into its own `FontFaceSet` before
+ * rendering: workers do not see `document.fonts`, so faces loaded on the
+ * main thread are invisible to an `OffscreenCanvas` in a worker. `source` is
+ * either the face's binary or a URL the worker can fetch.
+ */
+interface ExportFontFaceInit {
+  family: string;
+  /** CSS font-weight descriptor (e.g. "400", "100 900" for variable). */
+  weight?: string;
+  /** CSS font-style descriptor (e.g. "italic"). */
+  style?: string;
+  /** CSS unicode-range descriptor (Google Fonts ships per-subset faces). */
+  unicodeRange?: string;
+  source: ArrayBuffer | string;
+}
+interface ExportProjectOptions {
+  /** Container format id from the registry. Default `'mp4'`. */
+  format?: ContainerFormatId;
+  /** Video bitrate in bits/s or a mediabunny `Quality`. Default `QUALITY_HIGH`. */
+  videoBitrate?: number | Quality;
+  /**
+   * Font faces for text/caption rendering inside the export worker. Without
+   * them the worker draws text with system fallback faces (web fonts loaded
+   * on the main thread don't exist in worker scope).
+   */
+  fonts?: ExportFontFaceInit[];
+  onProgress?: (progress: ExportProgress) => void;
+  signal?: AbortSignal;
+}
+interface ExportResult {
+  blob: Blob;
+  /** Suggested file extension from the format's registry entry. */
+  extension: string;
+}
+//#endregion
+//#region src/export-core.d.ts
+/** Can this browser encode video (and audio) for the given format? */
+declare function getExportSupport(format?: ContainerFormatId): Promise<{
+  video: boolean;
+  audio: boolean;
+}>;
+//#endregion
+//#region src/export.d.ts
+/**
+ * Render a project to a video file, fully client-side and deterministically.
+ *
+ * The audio mix renders first on the main thread (`OfflineAudioContext` and
+ * the time-stretch worklet don't exist in workers), then the frame
+ * decode→composite→encode→mux pipeline runs in a dedicated worker so the
+ * editor stays responsive; environments without workers (Node/Bun, spawn
+ * failure) fall back to running the same pipeline in-context.
+ */
+declare function exportProject(project: Project, options?: ExportProjectOptions): Promise<ExportResult>;
+//#endregion
+//#region src/filmstrip.d.ts
+interface FilmstripOptions {
+  /** Number of evenly spaced frames. */
+  frameCount: number;
+  /** Width of each frame in px (height follows aspect). Default 80. */
+  frameWidth?: number;
+  /** Source range to sample. Defaults to the whole file. */
+  startMs?: number;
+  endMs?: number;
+}
+interface Filmstrip {
+  /** All frames drawn side-by-side, `frameCount × frameWidth` wide. */
+  canvas: HTMLCanvasElement | OffscreenCanvas;
+  frameWidth: number;
+  frameHeight: number;
+  frameCount: number;
+  /** Source timestamp of each frame, in ms. */
+  timestampsMs: number[];
+}
+/**
+ * Sample evenly spaced poster frames into one horizontal strip — the
+ * filmstrip background of timeline video clips. Returns `null` for files
+ * without a video track.
+ */
+declare function getFilmstrip(src: MediaSourceLike, options: FilmstripOptions): Promise<Filmstrip | null>;
+//#endregion
+//#region src/audio-peaks.d.ts
+interface AudioPeaksOptions {
+  /** Number of peak buckets across the range. Default 256. */
+  buckets?: number;
+  /** Source range. Defaults to the whole file. */
+  startMs?: number;
+  endMs?: number;
+}
+interface AudioPeaks {
+  /** Max |sample| per bucket, 0–1. */
+  peaks: Float32Array;
+  durationMs: number;
+}
+/** Fold samples into `buckets` max-|amplitude| bins (pure; unit-tested). */
+declare function bucketPeaks(samples: Float32Array, buckets: number): Float32Array;
+/**
+ * Decode a file's audio and reduce it to waveform peaks for timeline clip
+ * rendering. Returns `null` when the file has no audio track. Browser-only
+ * (WebCodecs decode via Mediabunny).
+ */
+declare function extractAudioPeaks(src: MediaSourceLike, options?: AudioPeaksOptions): Promise<AudioPeaks | null>;
+//#endregion
+//#region src/media-store.d.ts
+/**
+ * Content-addressed media persistence on OPFS (the OpenCut pattern: media
+ * blobs live in the Origin Private File System keyed by content hash;
+ * project JSON stores `asset.hash` and re-binds `src` on load). Hash-keyed
+ * storage dedupes repeated imports and gives relink a stable identity.
+ *
+ * Callers fall back to their own storage (e.g. IndexedDB keyed by asset id)
+ * when OPFS is unavailable or a file was imported without a hash.
+ */
+/** Largest file we hash/persist (WebCrypto digest needs the full buffer). */
+declare const MAX_HASHABLE_BYTES: number;
+declare function isMediaStoreSupported(): boolean;
+/** SHA-256 hex of a blob's content, or null when too large to hash. */
+declare function hashBlob(blob: Blob): Promise<string | null>;
+/** Persist a blob under its hash. No-op when already stored (same content). */
+declare function saveMediaBlob(hash: string, blob: Blob): Promise<boolean>;
+declare function loadMediaBlob(hash: string): Promise<Blob | null>;
+/** Delete stored blobs whose hash is not in `keep`. Returns removed count. */
+declare function pruneMediaBlobs(keep: ReadonlySet<string>): Promise<number>;
+//#endregion
+//#region src/scrub-cache.d.ts
+/**
+ * Scrub frame cache (Diffusion Studio's recipe): a binary-searched ring of
+ * downscaled frames captured opportunistically while a media element plays
+ * or sits on a decoded frame. While the element is mid-seek the preview
+ * serves the nearest cached frame instead of flashing black/stale — preview
+ * never blocks on decode.
+ */
+declare class ScrubFrameCache {
+  private maxFrames;
+  /** Frames closer together than this are considered duplicates. */
+  private minGapMs;
+  /** Sorted by timeMs for binary search. */
+  private frames;
+  /** Insertion order for FIFO eviction. */
+  private order;
+  constructor(maxFrames?: number, /** Frames closer together than this are considered duplicates. */
+  minGapMs?: number);
+  /** Capture the element's current frame if this instant isn't cached yet. */
+  capture(source: HTMLVideoElement, timeMs: number): void;
+  /** The cached frame nearest `timeMs`, or null when the cache is empty. */
+  nearest(timeMs: number): OffscreenCanvas | null;
+  get size(): number;
+  clear(): void;
+  /** First index whose frame time is >= timeMs. */
+  private indexAtOrAfter;
+}
+//#endregion
+//#region src/time-stretch.d.ts
+/**
+ * Pitch-preserving time-stretch for export audio.
+ *
+ * Preview already preserves pitch — media elements default
+ * `preservesPitch = true` when `playbackRate` changes — so without this the
+ * export would chipmunk where the preview didn't. Constant-speed clips
+ * (linear two-keyframe timeMaps, which is everything setElementSpeed
+ * produces, including its split halves) stretch through here; variable
+ * ramps keep the per-buffer `playbackRate` fallback.
+ *
+ * Engine: Signalsmith Stretch (WASM, notably higher quality and faster than
+ * phase-vocoder/WSOLA approaches), driven through the offline buffer driver
+ * in signalsmith-offline.ts — no Web Audio required, so the same path runs
+ * on the main thread, in workers, and under Bun tests. Failures reject and
+ * the export falls back to per-buffer `playbackRate` at the call site.
+ */
+interface ConstantSpeed {
+  /** Source ms consumed per output ms. */
+  rate: number;
+  /** First source offset (ms relative to trimStart) the map plays. */
+  sourceStartOffsetMs: number;
+  /** Source ms consumed in total. */
+  sourceSpanMs: number;
+}
+/** The constant speed a timeMap encodes, or null when it's a ramp/freeze. */
+declare function constantSpeedOf(timeMap: TimeMap | undefined): ConstantSpeed | null;
+interface StereoData {
+  left: Float32Array;
+  right: Float32Array;
+  /** PCM sample rate; drives the offline stretch render. */
+  sampleRate: number;
+}
+/**
+ * Stretch stereo PCM by `tempo` (2 = twice as fast, half as long) with
+ * pitch preserved. Output length ≈ input / tempo.
+ */
+declare function stretchStereo(data: StereoData, tempo: number): Promise<StereoData>;
+//#endregion
+//#region src/audio-sync.d.ts
+/**
+ * Audio-waveform autosync for multicam: two recordings of the same room
+ * align where their loudness envelopes correlate best. RMS envelopes at
+ * 100Hz (10ms buckets) give talking-head-grade sync; normalized
+ * cross-correlation over a bounded lag search finds the offset, and the
+ * peak-vs-noise ratio doubles as a confidence score.
+ */
+interface SyncResult {
+  /** How much B starts AFTER A in real time (negative = B started first). */
+  offsetMs: number;
+  /** Peak correlation ÷ runner-up — <1.3 means "don't trust this". */
+  confidence: number;
+}
+interface AudioSyncOptions {
+  /** Seconds of audio analyzed from each source. Default 60. */
+  windowS?: number;
+  /** Largest |offset| considered, in seconds. Default 30. */
+  maxLagS?: number;
+  /** Envelope rate (buckets per second). Default 100 (10ms resolution). */
+  rateHz?: number;
+  signal?: AbortSignal;
+}
+/**
+ * Normalized cross-correlation of two zero-meaned envelopes. Returns the lag
+ * (in buckets) that best aligns `b` to `a`: positive lag means b's content
+ * happens LATER in its own file, i.e. b started recording earlier.
+ * Pure — unit-testable without decoding.
+ */
+declare function crossCorrelateEnvelopes(a: Float32Array, b: Float32Array, maxLagBuckets: number): {
+  lag: number;
+  confidence: number;
+};
+/** RMS envelope of the first `windowS` seconds at `rateHz` buckets/second. */
+declare function extractEnvelope(src: MediaSourceLike, {
+  windowS,
+  rateHz,
+  signal
+}?: AudioSyncOptions): Promise<Float32Array | null>;
+/**
+ * The sync offset between two recordings: how many ms after A's recording
+ * started did B's start. Null when either source has no audio.
+ */
+declare function findSyncOffsetMs(a: MediaSourceLike, b: MediaSourceLike, options?: AudioSyncOptions): Promise<SyncResult | null>;
+//#endregion
+export { type ActiveMediaItem, AudioNotDecodableError, type AudioPeaks, type AudioPeaksOptions, type AudioSyncOptions, type ConstantSpeed, type ContainerFormatEntry, type ContainerFormatId, type ExportFontFaceInit, type ExportProgress, type ExportProjectOptions, type ExportResult, type ExtractAudioOptions, type Filmstrip, type FilmstripOptions, MAX_HASHABLE_BYTES, type MediaProbe, type MediaSourceLike, PreviewMediaPool, type PreviewSyncOptions, ScrubFrameCache, type StereoData, type SyncResult, type ThumbnailOptions, bucketPeaks, constantSpeedOf, createAssetFromFile, crossCorrelateEnvelopes, ensureFallbackAudioEncoders, exportProject, extractAudioPeaks, extractAudioToWav, extractEnvelope, findSyncOffsetMs, getActiveMediaItems, getContainerFormat, getExportSupport, getFilmstrip, getVideoThumbnail, getVideoThumbnailUrl, hashBlob, inputFor, isMediaStoreSupported, listContainerFormats, loadMediaBlob, probeImage, probeMedia, pruneMediaBlobs, registerContainerFormat, saveMediaBlob, stretchStereo };