npm - @omnimedia/omnitool - Versions diffs - 1.1.0-94 → 1.1.0-96 - Mend

@omnimedia/omnitool 1.1.0-94 → 1.1.0-96

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@omnimedia/omnitool",
-	"version": "1.1.0-94",
+	"version": "1.1.0-96",
 	"description": "open source video processing tools",
 	"license": "MIT",
 	"author": "Przemysław Gałęzki",

package/s/features/bg-remover/bg-remover.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {queue} from "@e280/stz"
+import {Comrade, LoggerTap, tune} from "@e280/comrade"
+import {BgRemoverOptions, BgRemoverSchematic, RemoverOptions} from "./types.js"
+export async function makeBgRemover({spec, workerUrl, onLoading}: BgRemoverOptions) {
+	const thread = await Comrade.thread<BgRemoverSchematic>({
+		label: "OmnitoolBgRemover",
+		workerUrl,
+		tap: new LoggerTap(),
+		setupHost: () => ({
+			loading: async loading => onLoading(loading),
+		}),
+	})
+	await thread.work.prepare(spec)
+	return {
+		remove: queue(async(input: RemoverOptions) =>
+			await thread.work.remove[tune]({transfer: [input.frame]})(input.frame)
+		),
+		dispose: () => thread.terminate()
+	}
+}

package/s/features/bg-remover/default-spec.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import {PipelineSpec} from "../parts/types.js"
+export const defaultBgRemoverSpec = (): PipelineSpec => ({
+	model: "Xenova/modnet",
+	dtype: "auto",
+	device: "webgpu"
+})

package/s/features/bg-remover/types.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import {AsSchematic} from "@e280/comrade"
+import {Loading, PipelineSpec} from "../parts/types.js"
+export type BgRemoverSchematic = AsSchematic<{
+	work: {
+		prepare(spec: PipelineSpec): Promise<void>
+		remove(request: VideoFrame): Promise<VideoFrame>
+	},
+	host: {
+		loading(load: Loading): Promise<void>
+	}
+}>
+export type RemoverOptions = {
+	frame: VideoFrame
+}
+export type BgRemoverModels = "onnx-community/ISNet-ONNX" | "Xenova/modnet" | "briaai/RMBG-1.4"
+export type BgRemoverOptions = {
+	spec: PipelineSpec
+	workerUrl: URL | string
+	onLoading: (loading: Loading) => void
+}

package/s/features/bg-remover/worker.bundle.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import {defer, once} from "@e280/stz"
+import {Comrade, Host} from "@e280/comrade"
+import {BackgroundRemovalPipeline} from "@huggingface/transformers"
+import {PipelineSpec} from "../parts/types.js"
+import {BgRemoverSchematic} from "./types.js"
+import {loadPipe} from "../parts/load-pipe.js"
+const deferred = defer<{spec: PipelineSpec, pipe: BackgroundRemovalPipeline}>()
+const makePrepare = (host: Host<BgRemoverSchematic>) => once(async(spec: PipelineSpec) => {
+	deferred.resolve({
+		spec,
+		pipe: await loadPipe({
+			spec,
+			task: "background-removal",
+			onLoading: loading => host.loading(loading),
+		}) as BackgroundRemovalPipeline
+	})
+})
+const canvas = new OffscreenCanvas(1920, 1080)
+const ctx = canvas.getContext("2d")
+await Comrade.worker<BgRemoverSchematic>(shell => {
+	const prepare = makePrepare(shell.host)
+	return {
+		prepare,
+		async remove(request) {
+			const {pipe} = await deferred.promise
+			canvas.width = request.displayWidth
+			canvas.height = request.displayHeight
+			ctx?.drawImage(request, 0, 0)
+			const output = await pipe(canvas)
+			const mask = output[0]
+			const frame = new VideoFrame(mask.toCanvas(), {
+				timestamp: request.timestamp,
+				duration: request.duration ?? undefined,
+			})
+			request.close()
+			shell.transfer = [frame]
+			return frame
+		}
+	}
+})

package/s/features/{speech/transcribe/parts → parts}/load-pipe.ts RENAMED Viewed

@@ -1,12 +1,11 @@
 import {pipeline} from "@huggingface/transformers"
+import {PipeOptions} from "./types.js"
-import {TranscriberPipeOptions} from "../types.js"
-export async function loadPipe(options: TranscriberPipeOptions) {
+export async function loadPipe(options: PipeOptions) {
 	const {spec, onLoading} = options
-	const pipe = await pipeline("automatic-speech-recognition", spec.model, {
+	const pipe = await pipeline(options.task, spec.model, {
 		device: spec.device,
 		dtype: spec.dtype,
 		progress_callback: (data: any) => {
@@ -16,4 +15,3 @@ export async function loadPipe(options: TranscriberPipeOptions) {
 	return pipe
 }

package/s/features/parts/types.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import {DataType, DeviceType, TaskType} from "@huggingface/transformers"
+export type Loading = {
+	total: number
+	progress: number
+}
+export type PipelineSpec<Extras extends object = {}> = {
+	model: string
+	dtype: DataType
+	device: DeviceType
+} & Extras
+export type PipeOptions = {
+	spec: PipelineSpec
+	task: TaskType
+	onLoading: (loading: Loading) => void
+}

package/s/features/speech/transcribe/default-spec.ts CHANGED Viewed

@@ -3,8 +3,8 @@ import {TranscriberSpec} from "./types.js"
 export const defaultTranscriberSpec = (): TranscriberSpec => ({
 	model: "onnx-community/whisper-tiny_timestamped",
-	dtype: "q4",
-	device: "wasm",
+	dtype: "auto",
+	device: "webgpu",
 	chunkLength: 20,
 	strideLength: 3,
 })

package/s/features/speech/transcribe/types.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import {AsSchematic} from "@e280/comrade"
-import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
+import {Pipeline} from "@huggingface/transformers"
+import {Loading, PipelineSpec} from "../../parts/types.js"
 import {Driver} from "../../../driver/driver.js"
 export type TranscriberSchematic = AsSchematic<{
@@ -17,11 +18,6 @@ export type TranscriberSchematic = AsSchematic<{
 	}
 }>
-export type Loading = {
-	total: number
-	progress: number
-}
 export type TranscribeOptions = {
 	pipe: Pipeline
 	spec: TranscriberSpec
@@ -29,11 +25,6 @@ export type TranscribeOptions = {
 	callbacks: TranscriptionCallbacks
 }
-export type TranscriberPipeOptions = {
-	spec: TranscriberSpec
-	onLoading: (loading: Loading) => void
-}
 export type SpeechTime = [start: number, end: number]
 export type TranscriptWord = {
@@ -48,13 +39,12 @@ export type Transcription = {
 	chunks: TranscriptWord[]
 }
-export type TranscriberSpec = {
-	model: string
-	dtype: DataType
-	device: DeviceType
+export type TranscriberModels = "onnx-community/whisper-tiny_timestamped"
+export type TranscriberSpec = PipelineSpec<{
 	chunkLength: number
 	strideLength: number
-}
+}>
 export type TranscriptionOptions = {
 	source: Blob

package/s/features/speech/transcribe/worker.bundle.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import {defer, once} from "@e280/stz"
 import {Comrade, Host} from "@e280/comrade"
-import {Pipeline} from "@huggingface/transformers"
+import {AutomaticSpeechRecognitionPipeline, Pipeline} from "@huggingface/transformers"
-import {loadPipe} from "./parts/load-pipe.js"
+import {loadPipe} from "../../parts/load-pipe.js"
 import {transcribe} from "./parts/transcribe.js"
 import {TranscriberSchematic, TranscriberSpec} from "./types.js"
@@ -14,8 +14,9 @@ const makePrepare = (host: Host<TranscriberSchematic>) => once(async(spec: Trans
 		spec,
 		pipe: await loadPipe({
 			spec,
+			task: "automatic-speech-recognition",
 			onLoading: loading => host.loading(loading),
-		}),
+		}) as AutomaticSpeechRecognitionPipeline
 	})
 })

package/s/index.html.ts CHANGED Viewed

@@ -96,7 +96,7 @@ export default ssg.page(import.meta.url, async orb => ({
 						<p>Build timeline and run the playback engine.</p>
 					</header>
 					<div class="demo-controls">
-						<input type="file" accept="video/*,audio/*" />
+						<input type="file" accept="video/*,audio/*,image/*" />
 					</div>
 					<div class="player-canvas"></div>
 					<div class="player">
@@ -120,7 +120,7 @@ export default ssg.page(import.meta.url, async orb => ({
 						<p>Build timeline and export a render.</p>
 					</header>
 					<div class="demo-controls">
-						<input type="file" accept="video/*,audio/*" />
+						<input type="file" accept="video/*,audio/*,image/*" />
 						<button data-action="export" disabled>Export</button>
 					</div>
 					<div class="demo-progress">

package/s/timeline/parts/captions.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import {TransformOptions, Vec2} from "../types.js"
 import {Transcription, TranscriptSegment} from "../../features/speech/transcribe/types.js"
 export type CaptionOptions = {
+	itemId?: Item.Caption["itemId"]
 	start?: number
 	duration?: number
 	styles?: TextStyleOptions

package/s/timeline/parts/item.ts CHANGED Viewed

@@ -21,7 +21,8 @@ export enum Kind {
 	Transition,
 	TextStyle,
 	Filter,
-	Caption
+	Caption,
+	Image
 }
 export enum Effect {
@@ -89,7 +90,16 @@ export namespace Item {
 		spatialId?: Id
 		animationIds?: Id[]
 		filterIds?: Id[]
-		captionId?: Id
+	}
+	export type Image = {
+		id: Id
+		kind: Kind.Image
+		mediaHash: Hash
+		duration: number
+		spatialId?: Id
+		animationIds?: Id[]
+		filterIds?: Id[]
 	}
 	export type Audio = {
@@ -99,7 +109,6 @@ export namespace Item {
 		start: number
 		duration: number
 		gain?: number
-		captionId?: Id
 	}
 	export type Text = {
@@ -118,6 +127,7 @@ export namespace Item {
 		id: Id
 		kind: Kind.Caption
 		transcript: Transcription
+		itemId?: Id
 		start: number
 		duration: number
 		maxChars?: number
@@ -140,6 +150,7 @@ export namespace Item {
 		| Sequence
 		| Stack
 		| Video
+		| Image
 		| Audio
 		| Text
 		| Caption
@@ -154,8 +165,8 @@ export namespace Item {
 export type ContainerItem = Item.Sequence | Item.Stack
 export type NonContainerItem = Exclude<Item.Any, ContainerItem>
-export type FilterableItem = Item.Sequence | Item.Stack | Item.Video | Item.Text | Item.Caption
-export type VisualAnimatableItem = Item.Video | Item.Text | Item.Caption
+export type FilterableItem = Item.Sequence | Item.Stack | Item.Video | Item.Image | Item.Text | Item.Caption
+export type VisualAnimatableItem = Item.Video | Item.Image | Item.Text | Item.Caption
 export type PlayableItem = Item.Any & {
 	start: Ms

package/s/timeline/parts/media.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {loadDecoderSource} from "../../driver/utils/load-decoder-source.js"
 export class Media {
 	duration = 0
+	isImage = false
 	hasVideo = false
 	hasAudio = false
@@ -14,6 +15,12 @@ export class Media {
 	static async analyze(datafile: Datafile) {
 		const media = new this(datafile)
+		if (this.#isImage(datafile)) {
+			media.isImage = true
+			return media
+		}
 		const duration = (await this.duration(datafile.url)) * 1000
 		media.duration = duration
 		const {video, audio} = await this.#has(datafile.url)
@@ -22,6 +29,10 @@ export class Media {
 		return media
 	}
+	static #isImage(datafile: Datafile) {
+		return datafile.blob.type.startsWith("image/")
+	}
 	static async duration(source: DecoderSource) {
 		const input = new Input({
 			formats: ALL_FORMATS,

package/s/timeline/renderers/parts/handy.ts CHANGED Viewed

@@ -15,6 +15,7 @@ type WalkAtCallbacks = {
 	sequence: (x: Item.Sequence, localTime: Ms, ancestors: AncestorAt[]) => void
 	stack: (x: Item.Stack, localTime: Ms, ancestors: AncestorAt[]) => void
 	video: (x: Item.Video, localTime: Ms, ancestors: AncestorAt[]) => void
+	image: (x: Item.Image, localTime: Ms, ancestors: AncestorAt[]) => void
 	text: (x: Item.Text, localTime: Ms, ancestors: AncestorAt[]) => void
 	caption: (x: Item.Caption, localTime: Ms, ancestors: AncestorAt[]) => void
 	audio: (x: Item.Audio, localTime: Ms, ancestors: AncestorAt[]) => void
@@ -24,6 +25,7 @@ type WalkCallbacks = {
 	sequence?: (x: Item.Sequence, matrix: Mat6, ancestors: AncestorAt[]) => void
 	stack?: (x: Item.Stack, matrix: Mat6, ancestors: AncestorAt[]) => void
 	video?: (x: Item.Video, matrix: Mat6, ancestors: AncestorAt[]) => void
+	image?: (x: Item.Image, matrix: Mat6, ancestors: AncestorAt[]) => void
 	text?: (x: Item.Text, matrix: Mat6, ancestors: AncestorAt[]) => void
 	caption?: (x: Item.Caption, matrix: Mat6, ancestors: AncestorAt[]) => void
 	audio?: (x: Item.Audio) => void
@@ -53,6 +55,7 @@ export function itemsAt(p: Props): At[] {
 		sequence: () => { },
 		stack: () => { },
 		video: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
+		image: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		text: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		caption: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		audio: (item, localTime, ancestors) => results.push({ item, localTime, ancestors })
@@ -74,6 +77,7 @@ export function itemsFrom(p: FromProps): At[] {
 		sequence: () => { },
 		stack: () => { },
 		video: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
+		image: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		text: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		caption: (item, localTime, ancestors) => results.push({ item, localTime, ancestors }),
 		audio: (item, localTime, ancestors) => results.push({ item, localTime, ancestors })
@@ -176,6 +180,10 @@ export function walk(
 			callbacks.video?.(item, currentMatrix, ancestors)
 			break
+		case Kind.Image:
+			callbacks.image?.(item, currentMatrix, ancestors)
+			break
 		case Kind.Text:
 			callbacks.text?.(item, currentMatrix, ancestors)
 			break
@@ -246,6 +254,10 @@ function walkAt(
 			callbacks.video(item, time, ancestors)
 			break
+		case Kind.Image:
+			callbacks.image(item, time, ancestors)
+			break
 		case Kind.Text:
 			callbacks.text(item, time, ancestors)
 			break
@@ -318,6 +330,10 @@ function walkFrom(
 			callbacks.video(item, from, ancestors)
 			break
+		case Kind.Image:
+			callbacks.image(item, from, ancestors)
+			break
 		case Kind.Text:
 			callbacks.text(item, from, ancestors)
 			break

package/s/timeline/renderers/parts/samplers/visual/parts/defaults.ts CHANGED Viewed

@@ -1,9 +1,11 @@
-import {VideoSink} from "./sink.js"
+import {VideoSink} from "./video-sink.js"
+import {ImageSink} from "./image-sink.js"
 import {Ms} from "../../../../../../units/ms.js"
 import {Item} from "../../../../../parts/item.js"
 export type VideoSampler = (item: Item.Video, time: Ms) => Promise<VideoFrame | undefined>
+export type ImageSampler = (item: Item.Image, time: Ms) => Promise<VideoFrame | undefined>
 export function createDefaultVideoSampler(sink: VideoSink): VideoSampler {
 	return async (item, time) => {
@@ -15,3 +17,7 @@ export function createDefaultVideoSampler(sink: VideoSink): VideoSampler {
 		return frame ?? undefined
 	}
 }
+export function createDefaultImageSampler(sink: ImageSink): ImageSampler {
+	return async (_item, time) => await sink.getFrame(_item.mediaHash, time)
+}

package/s/timeline/renderers/parts/samplers/visual/parts/image-sink.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import {Ms} from "../../../../../../units/ms.js"
+import {Hash} from "../../../../../parts/basics.js"
+import {DecoderSource} from "../../../../../../driver/fns/schematic.js"
+type CachedImage = {
+	bitmap: ImageBitmap
+}
+export class ImageSink {
+	readonly #images = new Map<Hash, CachedImage>()
+	constructor(
+		private resolveMedia: (hash: string) => DecoderSource,
+	) {}
+	async getFrame(hash: Hash, time: Ms) {
+		const image = await this.#getImage(hash)
+		return new VideoFrame(image.bitmap, {
+			timestamp: Math.round(time * 1000),
+		})
+	}
+	async #getImage(hash: Hash) {
+		const existing = this.#images.get(hash)
+		if (existing)
+			return existing
+		const source = this.resolveMedia(hash)
+		const blob = source instanceof Blob
+			? source
+			: await fetch(source).then(response => response.blob())
+		const image = {bitmap: await createImageBitmap(blob)}
+		this.#images.set(hash, image)
+		return image
+	}
+	disposeAll() {
+		for (const image of this.#images.values())
+			image.bitmap.close()
+		this.#images.clear()
+	}
+	dispose(hash: Hash) {
+		const image = this.#images.get(hash)
+		image?.bitmap.close()
+		this.#images.delete(hash)
+	}
+}

package/s/timeline/renderers/parts/samplers/visual/parts/sample.ts CHANGED Viewed

@@ -49,6 +49,13 @@ export async function sampleVisual(
 			return frame ? [{kind: "image", frame, matrix, alpha, crop, filters, id: item.id}] : []
 		}
+		case Kind.Image: {
+			if (time < 0 || time >= item.duration) return []
+			const frame = await ctx.imageSampler(item, time)
+			return frame ? [{kind: "image", frame, matrix, alpha, crop, filters, id: item.id}] : []
+		}
 		case Kind.Text: {
 			if (time < 0 || time >= item.duration) return []

package/s/timeline/renderers/parts/samplers/visual/parts/types.ts CHANGED Viewed

@@ -1,9 +1,10 @@
-import {VideoSampler} from "./defaults.js"
+import {ImageSampler, VideoSampler} from "./defaults.js"
 import {Item} from "../../../../../parts/item.js"
 import {TimelineFile} from "../../../../../parts/basics.js"
 export type SampleContext = {
+	imageSampler: ImageSampler
 	videoSampler: VideoSampler
 	timeline: TimelineFile
 	items: Map<number, Item.Any>

package/s/timeline/renderers/parts/samplers/visual/sampler.ts CHANGED Viewed

@@ -1,17 +1,20 @@
-import {VideoSink} from "./parts/sink.js"
+import {VideoSink} from "./parts/video-sink.js"
+import {ImageSink} from "./parts/image-sink.js"
 import {sampleVisual} from "./parts/sample.js"
 import {Ms} from "../../../../../units/ms.js"
 import {TimelineFile} from "../../../../parts/basics.js"
 import {DecoderSource} from "../../../../../driver/fns/schematic.js"
-import {createDefaultVideoSampler, VideoSampler} from "./parts/defaults.js"
+import {createDefaultImageSampler, createDefaultVideoSampler, VideoSampler} from "./parts/defaults.js"
 export function createVisualSampler(
 	resolveMedia: (hash: string) => DecoderSource,
 	sampleVideo?: VideoSampler
 ) {
-	const sink = new VideoSink(resolveMedia)
-	const videoSampler = sampleVideo ?? createDefaultVideoSampler(sink)
+	const imageSink = new ImageSink(resolveMedia)
+	const videoSink = new VideoSink(resolveMedia)
+	const imageSampler = createDefaultImageSampler(imageSink)
+	const videoSampler = sampleVideo ?? createDefaultVideoSampler(videoSink)
 	return {
 		async sample(timeline: TimelineFile, timecode: Ms) {
@@ -21,7 +24,7 @@ export function createVisualSampler(
 			if (!root)
 				return []
-			return sampleVisual({videoSampler, timeline, items}, root, timecode, [])
+			return sampleVisual({imageSampler, videoSampler, timeline, items}, root, timecode, [])
 		}
 	}
 }

package/s/timeline/sugar/helpers.ts CHANGED Viewed

@@ -56,6 +56,15 @@ export function video(
 	return o => o.video(media, options)
 }
+export function image(
+	media: Media,
+	options?: {
+		duration?: number
+	}
+): Build<Item.Image> {
+	return o => o.image(media, options)
+}
 export function audio(
 	media: Media,
 	options?: {

package/s/timeline/sugar/o.ts CHANGED Viewed

@@ -269,6 +269,25 @@ export class O {
 		return item
 	}
+	image = (
+		media: Media,
+		options?: {
+			duration?: number
+		}): Item.Image => {
+		if(!media.isImage)
+			throw new Error(`Image error: media "${media.datafile.filename}" is not an image.`)
+		const item: Item.Image = {
+			kind: Kind.Image,
+			id: this.getId(),
+			mediaHash: media.datafile.checksum.hash,
+			duration: options?.duration ?? 2000
+		}
+		this.register(item)
+		return item
+	}
 	audio = (
 		media: Media,
 		options?: {
@@ -322,6 +341,7 @@ export class O {
 			id: this.getId(),
 			kind: Kind.Caption,
 			transcript,
+			itemId: options?.itemId,
 			start,
 			duration,
 			maxChars: options?.maxChars,
@@ -343,10 +363,10 @@ export class O {
 		const action = ((item: CaptionSourceItem, transcript: Transcription, options?: CaptionOptions): Item.Stack => {
 			const caption = make(transcript, {
 				...options,
+				itemId: item.id,
 				start: options?.start ?? item.start,
 				duration: options?.duration ?? item.duration,
 			})
-			this.set<CaptionSourceItem>(item.id, {captionId: caption.id})
 			return this.stack(caption, item)
 		}) as CaptionAction