npm - @omnimedia/omnitool - Versions diffs - 1.1.0-1 → 1.1.0-10 - Mend

@omnimedia/omnitool 1.1.0-1 → 1.1.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (222) hide show

package/README.md +1 -1
package/package.json +12 -9
package/s/context.ts +1 -1
package/s/demo/demo.bundle.ts +6 -2
package/s/demo/demo.css +5 -0
package/s/demo/routines/filmstrip-test.ts +2 -2
package/s/demo/routines/transcode-test.ts +4 -2
package/s/demo/routines/transcriber-test.ts +34 -0
package/s/demo/routines/transitions-test.ts +43 -0
package/s/driver/driver.ts +17 -9
package/s/driver/fns/host.ts +7 -6
package/s/driver/fns/schematic.ts +45 -22
package/s/driver/fns/work.ts +163 -151
package/s/driver/utils/load-decoder-source.ts +3 -4
package/s/features/speech/transcribe/default-spec.ts +11 -0
package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
package/s/features/speech/transcribe/transcriber.ts +46 -0
package/s/features/speech/transcribe/types.ts +82 -0
package/s/features/speech/transcribe/worker.bundle.ts +40 -0
package/s/features/transition/parts/fragment.ts +24 -0
package/s/features/transition/parts/types.ts +94 -0
package/s/features/transition/parts/uniforms.ts +29 -0
package/s/features/transition/parts/vertex.ts +31 -0
package/s/features/transition/transition.ts +60 -0
package/s/index.html.ts +6 -1
package/s/timeline/index.ts +1 -0
package/s/timeline/parts/basics.ts +1 -1
package/s/timeline/parts/compositor/export.ts +77 -0
package/s/timeline/parts/compositor/parts/html-tree.ts +37 -0
package/s/timeline/parts/compositor/parts/schedulers.ts +85 -0
package/s/timeline/parts/compositor/parts/tree-builder.ts +184 -0
package/s/timeline/parts/compositor/parts/webcodecs-tree.ts +30 -0
package/s/timeline/parts/compositor/playback.ts +81 -0
package/s/timeline/parts/compositor/samplers/html.ts +115 -0
package/s/timeline/parts/compositor/samplers/webcodecs.ts +60 -0
package/s/timeline/parts/item.ts +38 -6
package/s/timeline/parts/media.ts +21 -0
package/s/timeline/parts/waveform.ts +1 -1
package/s/timeline/sugar/builders.ts +102 -0
package/s/timeline/sugar/o.ts +117 -27
package/s/timeline/sugar/omni-test.ts +2 -2
package/s/timeline/sugar/omni.ts +21 -11
package/s/timeline/types.ts +29 -0
package/s/timeline/utils/audio-stream.ts +15 -0
package/s/timeline/utils/checksum.ts +2 -1
package/s/timeline/utils/matrix.ts +33 -0
package/s/timeline/utils/video-cursor.ts +40 -0
package/s/tools/common/loader.ts +26 -0
package/s/tools/common/transformer-pipeline.ts +26 -0
package/s/tools/speech-recognition/common/model.ts +26 -0
package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
package/s/tools/speech-recognition/whisper/tool.ts +70 -0
package/x/context.js +1 -1
package/x/context.js.map +1 -1
package/x/demo/demo.bundle.js +6 -2
package/x/demo/demo.bundle.js.map +1 -1
package/x/demo/demo.bundle.min.js +39 -37
package/x/demo/demo.bundle.min.js.map +4 -4
package/x/demo/demo.css +5 -0
package/x/demo/routines/filmstrip-test.d.ts +1 -1
package/x/demo/routines/filmstrip-test.js +2 -2
package/x/demo/routines/filmstrip-test.js.map +1 -1
package/x/demo/routines/transcode-test.js +4 -2
package/x/demo/routines/transcode-test.js.map +1 -1
package/x/demo/routines/transcriber-test.d.ts +4 -0
package/x/demo/routines/transcriber-test.js +33 -0
package/x/demo/routines/transcriber-test.js.map +1 -0
package/x/demo/routines/transitions-test.d.ts +5 -0
package/x/demo/routines/transitions-test.js +35 -0
package/x/demo/routines/transitions-test.js.map +1 -0
package/x/driver/driver.d.ts +3 -5
package/x/driver/driver.js +16 -9
package/x/driver/driver.js.map +1 -1
package/x/driver/driver.worker.bundle.min.js +2537 -148
package/x/driver/driver.worker.bundle.min.js.map +4 -4
package/x/driver/fns/host.d.ts +9 -2
package/x/driver/fns/host.js +3 -3
package/x/driver/fns/host.js.map +1 -1
package/x/driver/fns/schematic.d.ts +39 -21
package/x/driver/fns/work.d.ts +11 -4
package/x/driver/fns/work.js +111 -102
package/x/driver/fns/work.js.map +1 -1
package/x/driver/utils/load-decoder-source.d.ts +2 -1
package/x/driver/utils/load-decoder-source.js +2 -3
package/x/driver/utils/load-decoder-source.js.map +1 -1
package/x/features/speech/transcribe/default-spec.d.ts +2 -0
package/x/features/speech/transcribe/default-spec.js +8 -0
package/x/features/speech/transcribe/default-spec.js.map +1 -0
package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
package/x/features/speech/transcribe/parts/transcribe.js +56 -0
package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
package/x/features/speech/transcribe/transcriber.d.ts +5 -0
package/x/features/speech/transcribe/transcriber.js +33 -0
package/x/features/speech/transcribe/transcriber.js.map +1 -0
package/x/features/speech/transcribe/types.d.ts +66 -0
package/x/features/speech/transcribe/types.js +2 -0
package/x/features/speech/transcribe/types.js.map +1 -0
package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
package/x/features/speech/transcribe/worker.bundle.js +33 -0
package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
package/x/features/transition/parts/fragment.d.ts +1 -0
package/x/features/transition/parts/fragment.js +25 -0
package/x/features/transition/parts/fragment.js.map +1 -0
package/x/features/transition/parts/types.d.ts +23 -0
package/x/features/transition/parts/types.js +2 -0
package/x/features/transition/parts/types.js.map +1 -0
package/x/features/transition/parts/uniforms.d.ts +31 -0
package/x/features/transition/parts/uniforms.js +27 -0
package/x/features/transition/parts/uniforms.js.map +1 -0
package/x/features/transition/parts/vertex.d.ts +1 -0
package/x/features/transition/parts/vertex.js +32 -0
package/x/features/transition/parts/vertex.js.map +1 -0
package/x/features/transition/transition.d.ts +5 -0
package/x/features/transition/transition.js +50 -0
package/x/features/transition/transition.js.map +1 -0
package/x/index.html +13 -3
package/x/index.html.js +6 -1
package/x/index.html.js.map +1 -1
package/x/timeline/index.d.ts +1 -0
package/x/timeline/index.js +1 -0
package/x/timeline/index.js.map +1 -1
package/x/timeline/parts/basics.d.ts +1 -1
package/x/timeline/parts/compositor/export.d.ts +9 -0
package/x/timeline/parts/compositor/export.js +64 -0
package/x/timeline/parts/compositor/export.js.map +1 -0
package/x/timeline/parts/compositor/parts/html-tree.d.ts +3 -0
package/x/timeline/parts/compositor/parts/html-tree.js +40 -0
package/x/timeline/parts/compositor/parts/html-tree.js.map +1 -0
package/x/timeline/parts/compositor/parts/schedulers.d.ts +15 -0
package/x/timeline/parts/compositor/parts/schedulers.js +64 -0
package/x/timeline/parts/compositor/parts/schedulers.js.map +1 -0
package/x/timeline/parts/compositor/parts/tree-builder.d.ts +37 -0
package/x/timeline/parts/compositor/parts/tree-builder.js +147 -0
package/x/timeline/parts/compositor/parts/tree-builder.js.map +1 -0
package/x/timeline/parts/compositor/parts/webcodecs-tree.d.ts +3 -0
package/x/timeline/parts/compositor/parts/webcodecs-tree.js +28 -0
package/x/timeline/parts/compositor/parts/webcodecs-tree.js.map +1 -0
package/x/timeline/parts/compositor/playback.d.ts +19 -0
package/x/timeline/parts/compositor/playback.js +71 -0
package/x/timeline/parts/compositor/playback.js.map +1 -0
package/x/timeline/parts/compositor/samplers/html.d.ts +3 -0
package/x/timeline/parts/compositor/samplers/html.js +106 -0
package/x/timeline/parts/compositor/samplers/html.js.map +1 -0
package/x/timeline/parts/compositor/samplers/webcodecs.d.ts +2 -0
package/x/timeline/parts/compositor/samplers/webcodecs.js +55 -0
package/x/timeline/parts/compositor/samplers/webcodecs.js.map +1 -0
package/x/timeline/parts/item.d.ts +34 -8
package/x/timeline/parts/item.js +6 -3
package/x/timeline/parts/item.js.map +1 -1
package/x/timeline/parts/media.d.ts +3 -0
package/x/timeline/parts/media.js +17 -0
package/x/timeline/parts/media.js.map +1 -1
package/x/timeline/parts/waveform.js +1 -1
package/x/timeline/parts/waveform.js.map +1 -1
package/x/timeline/sugar/builders.d.ts +1 -0
package/x/timeline/sugar/builders.js +104 -0
package/x/timeline/sugar/builders.js.map +1 -0
package/x/timeline/sugar/o.d.ts +23 -5
package/x/timeline/sugar/o.js +93 -27
package/x/timeline/sugar/o.js.map +1 -1
package/x/timeline/sugar/omni-test.js +1 -1
package/x/timeline/sugar/omni-test.js.map +1 -1
package/x/timeline/sugar/omni.d.ts +5 -2
package/x/timeline/sugar/omni.js +17 -9
package/x/timeline/sugar/omni.js.map +1 -1
package/x/timeline/types.d.ts +24 -0
package/x/timeline/types.js +2 -0
package/x/timeline/types.js.map +1 -0
package/x/timeline/utils/audio-stream.d.ts +6 -0
package/x/timeline/utils/audio-stream.js +17 -0
package/x/timeline/utils/audio-stream.js.map +1 -0
package/x/timeline/utils/checksum.js +2 -1
package/x/timeline/utils/checksum.js.map +1 -1
package/x/timeline/utils/matrix.d.ts +8 -0
package/x/timeline/utils/matrix.js +26 -0
package/x/timeline/utils/matrix.js.map +1 -0
package/x/timeline/utils/video-cursor.d.ts +10 -0
package/x/timeline/utils/video-cursor.js +36 -0
package/x/timeline/utils/video-cursor.js.map +1 -0
package/x/tools/common/loader.d.ts +19 -0
package/x/tools/common/loader.js +18 -0
package/x/tools/common/loader.js.map +1 -0
package/x/tools/common/transformer-pipeline.d.ts +8 -0
package/x/tools/common/transformer-pipeline.js +24 -0
package/x/tools/common/transformer-pipeline.js.map +1 -0
package/x/tools/speech-recognition/common/model.d.ts +14 -0
package/x/tools/speech-recognition/common/model.js +16 -0
package/x/tools/speech-recognition/common/model.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
package/x/tools/speech-recognition/whisper/tool.js +63 -0
package/x/tools/speech-recognition/whisper/tool.js.map +1 -0

package/s/timeline/sugar/o.ts CHANGED Viewed

@@ -1,51 +1,119 @@
-import {MapG} from "@e280/stz"
-import {Id} from "../parts/basics.js"
 import {Media} from "../parts/media.js"
+import {Id, TimelineFile} from "../parts/basics.js"
 import {Effect, Item, Kind} from "../parts/item.js"
+import {Transform, TransformOptions, Vec2} from "../types.js"
 export class O {
 	#nextId = 0
-	#items = new MapG<Id, Item.Any>()
+	constructor(public state: {project: TimelineFile}) {}
+  require<T extends Item.Any>(id: Id): T {
+    const item = this.state.project.items.find(item => item.id === id)
+    return item as T
+  }
 	#getId() {
 		return this.#nextId++
 	}
-	register(item: Item.Any) {
-		if (!this.#items.has(item.id))
-			this.#items.set(item.id, item)
-		return item.id
+  #mutate(fn: (project: TimelineFile) => TimelineFile) {
+    this.state.project = fn(this.state.project)
+  }
+  spatial = (transform: Transform): Item.Spatial => {
+  	const item: Item.Spatial = {
+  		id: this.#getId(),
+  		kind: Kind.Spatial,
+  		transform
+  	}
+  	return item
+  }
+	sequence = (...items: Item.Any[]): Item.Any => {
+		const item =  {
+			id: this.#getId(),
+			kind: Kind.Sequence,
+			childrenIds: items.map(item => item.id)
+		} as Item.Sequence
+		this.#mutate(state => {
+			state.items.push(item, ...items)
+			return state
+		})
+		return item
 	}
-	get items() {
-		return [...this.#items.values()]
+	stack = (...items: Item.Any[]): Item.Any => {
+		const item = {
+			kind: Kind.Stack,
+			id: this.#getId(),
+			childrenIds: items.map(item => item.id)
+		} as Item.Stack
+		this.#mutate(state => {
+			state.items.push(item, ...items)
+			return state
+		})
+		return item
 	}
-	sequence = (...items: Item.Any[]): Item.Sequence => ({
-		id: this.#getId(),
-		kind: Kind.Sequence,
-		children: items.map(item => this.register(item)),
-	})
+	video = (
+		media: Media,
+		options?: {
+			start?: number,
+			duration?: number
+		}): Item.Video => {
-	stack = (...items: Item.Any[]): Item.Stack => ({
-		id: this.#getId(),
-		kind: Kind.Stack,
-		children: items.map(item => this.register(item)),
-	})
+		if(!media.hasVideo)
+			throw new Error(`Video clip error: media "${media.datafile.filename}" has no video track.`)
-	clip = (media: Media, start?: number, duration?: number): Item.Clip => ({
-		id: this.#getId(),
-		kind: Kind.Clip,
-		mediaHash: media.datafile.checksum.hash,
-		start: start ?? 0,
-		duration: duration ?? media.duration,
-	})
+		const item: Item.Video = {
+			kind: Kind.Video,
+			id: this.#getId(),
+			mediaHash: media.datafile.checksum.hash,
+			start: options?.start ?? 0,
+			duration: options?.duration ?? media.duration
+		}
+		return item
+	}
+	audio = (
+		media: Media,
+		options?: {
+			start?: number,
+			duration?: number
+		}): Item.Audio => {
+		if(!media.hasAudio)
+			throw new Error(`Audio clip error: media "${media.datafile.filename}" has no audio track.`)
+		const item: Item.Audio = {
+			kind: Kind.Audio,
+			id: this.#getId(),
+			mediaHash: media.datafile.checksum.hash,
+			start: options?.start ?? 0,
+			duration: options?.duration ?? media.duration
+		}
+		return item
+	}
 	text = (content: string): Item.Text => ({
 		id: this.#getId(),
-		kind: Kind.Text,
 		content,
+		kind: Kind.Text,
+		color: "#FFFFF"
+	})
+	gap = (duration: number): Item.Gap => ({
+		id: this.#getId(),
+		kind: Kind.Gap,
+		duration
 	})
 	transition = {
@@ -56,5 +124,27 @@ export class O {
 			duration,
 		}),
 	}
+  transform = (options?: TransformOptions): Transform => {
+    const position: Vec2 = [
+    	options?.position?.[0] ?? 0,
+    	options?.position?.[1] ?? 0
+    ]
+    const scale: Vec2 = [
+    	options?.scale?.[0] ?? 1,
+    	options?.scale?.[1] ?? 1
+    ]
+    const rotation = options?.rotation ?? 0
+    return [position, scale, rotation]
+  }
+  addChildren(parent: Item.Stack | Item.Sequence, ...items: Item.Any[]) {
+		this.#mutate(state => {
+			const parentItem = state.items.find(({id}) => id === parent.id) as Item.Stack
+			parentItem.childrenIds.push(...items.map(item => item.id))
+			state.items.push(...items)
+			return state
+		})
+  }
 }

package/s/timeline/sugar/omni-test.ts CHANGED Viewed

@@ -22,10 +22,10 @@ const {mediaA, mediaB} = await omni.load({
 //
 const timeline = omni.timeline(o => o.sequence(
-	o.clip(mediaA),
+	o.video(mediaA),
 	o.transition.crossfade(600),
 	o.stack(
-		o.clip(mediaB),
+		o.video(mediaB),
 		o.text("hello world"),
 	),
 ))

package/s/timeline/sugar/omni.ts CHANGED Viewed

@@ -2,12 +2,15 @@
 import {O} from "./o.js"
 import {Item} from "../parts/item.js"
 import {Media} from "../parts/media.js"
-import {TimelineFile} from "../parts/basics.js"
 import {Datafile} from "../utils/datafile.js"
+import {TimelineFile} from "../parts/basics.js"
+import {Export} from "../parts/compositor/export.js"
 import {ResourcePool} from "../parts/resource-pool.js"
+import {RenderConfig} from "../../driver/fns/schematic.js"
 export class Omni {
 	resources = new ResourcePool()
+	#export = new Export()
 	load = async<S extends Record<string, Promise<Datafile>>>(spec: S) => {
 		return Object.fromEntries(await Promise.all(Object.entries(spec).map(
@@ -15,16 +18,23 @@ export class Omni {
 		))) as {[K in keyof S]: Media}
 	}
-	timeline = (fn: (o: O) => Item.Sequence): TimelineFile => {
-		const o = new O()
-		const sequence = fn(o)
-		return {
-			format: "timeline",
-			info: "https://omniclip.app/",
-			version: 0,
-			root: o.register(sequence),
-			items: o.items,
-		}
+	timeline = (fn: (o: O) => Item.Any): TimelineFile => {
+		const o = new O({
+			project: {
+				format: "timeline",
+				info: "https://omniclip.app/",
+				version: 0,
+				items: [],
+				rootId: 0
+			}
+		})
+		const root = fn(o)
+		o.state.project.rootId = root.id
+		return o.state.project
+	}
+	render = async (timeline: TimelineFile, config: RenderConfig) => {
+		await this.#export.render(timeline)
 	}
 }

package/s/timeline/types.ts ADDED Viewed

@@ -0,0 +1,29 @@
+export type Interpolation = "linear" | "catmullRom"
+export type Keyframe<Value = number> = [time: number, value: Value]
+export type Keyframes<Value = number> = Keyframe<Value>[]
+export type Vec2 = [x: number, y: number]
+export type Transform = [position: Vec2, scale: Vec2, rotation: number]
+export type TrackVec2 = {
+	x: Keyframes
+	y: Keyframes
+}
+export type Anim<T> = {
+  terp: Interpolation
+  track: T
+}
+export type Animations = Anim<TrackTransform>
+export type TrackTransform = {
+	position: TrackVec2
+	scale: TrackVec2
+	rotation: Keyframes
+}
+export type TransformOptions = {
+  position?: Vec2
+  scale?: Vec2
+  rotation?: number
+}

package/s/timeline/utils/audio-stream.ts ADDED Viewed

@@ -0,0 +1,15 @@
+export class AudioStream {
+	constructor(private reader: ReadableStreamDefaultReader<AudioData>) {}
+	async *stream(): AsyncGenerator<AudioData> {
+		while (true) {
+			const {done, value: hit} = await this.reader.read()
+			if (done) {
+				break
+			}
+			yield hit
+		}
+	}
+	cancel = async () => await this.reader.cancel()
+}

package/s/timeline/utils/checksum.ts CHANGED Viewed

@@ -10,7 +10,8 @@ export class Checksum {
 	) {}
 	static async make(data: Uint8Array) {
-		const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data))
+		const data2 = new Uint8Array(data)
+		const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data2))
 		const hash = Hex.fromBytes(bytes)
 		const nickname = Thumbprint.sigil.fromBytes(bytes)
 		return new this(data, bytes, hash, nickname)

package/s/timeline/utils/matrix.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import {Matrix} from "pixi.js"
+import {Transform} from "../types.js"
+export const transformToMat6 = (t: Transform): Mat6 => {
+	const [pos, scl, rotDeg] = t
+	const [x, y] = pos
+	const [sx, sy] = scl
+	const r = rotDeg * Math.PI / 180
+	const cos = Math.cos(r)
+	const sin = Math.sin(r)
+	return [cos * sx, sin * sx, -sin * sy, cos * sy, x, y]
+}
+export const mat6ToMatrix = ([a, b, c, d, tx, ty]: Mat6): Matrix =>
+	new Matrix(a, b, c, d, tx, ty)
+export const transformToMatrix = (t: Transform) => mat6ToMatrix(transformToMat6(t))
+export const mul6 = (local: Mat6, parent: Mat6): Mat6 => {
+	const [a1, b1, c1, d1, tx1, ty1] = local
+	const [a2, b2, c2, d2, tx2, ty2] = parent
+	return [
+		a1 * a2 + c1 * b2,
+		b1 * a2 + d1 * b2,
+		a1 * c2 + c1 * d2,
+		b1 * c2 + d1 * d2,
+		a1 * tx2 + c1 * ty2 + tx1,
+		b1 * tx2 + d1 * ty2 + ty1
+	]
+}
+export const I6: Mat6 = [1, 0, 0, 1, 0, 0]
+export type Mat6 = [a: number, b: number, c: number, d: number, tx: number, ty: number]

package/s/timeline/utils/video-cursor.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * A stateful, forward-only frame cursor for a single clip instance.
+ * It efficiently reads a video stream to find the frame nearest to a target timestamp.
+ */
+export class VideoCursor {
+  constructor(private reader: ReadableStreamDefaultReader<VideoFrame>) {}
+  async atOrNear(targetUs: number): Promise<VideoFrame | undefined> {
+    let prev: VideoFrame | null = null
+    while (true) {
+      const {done, value: hit} = await this.reader.read()
+      if (done) {
+        const out = prev ? new VideoFrame(prev) : undefined
+        prev?.close()
+        return out
+      }
+      const hitUs = hit.timestamp ?? 0
+      if (hitUs >= targetUs) {
+        const prevUs = prev?.timestamp ?? Number.NEGATIVE_INFINITY
+        const usePrev = !!prev && Math.abs(prevUs - targetUs) < Math.abs(hitUs - targetUs)
+        const chosen = usePrev ? prev! : hit
+        const other = usePrev ? hit : prev
+        const copy = new VideoFrame(chosen)
+        chosen.close()
+        other?.close()
+        return copy
+      }
+      prev?.close()
+      prev = hit
+    }
+  }
+  cancel = async () => await this.reader.cancel()
+}

package/s/tools/common/loader.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {pub, Pub} from "@e280/stz"
+import {ProgressItem} from "../speech-recognition/whisper/parts/types.js"
+export interface LoaderEvents {
+	onModelLoadProgress: Pub<ProgressItem[]>
+	onTpsUpdate: Pub<[number]>
+}
+export abstract class Loader {
+	tps = 0
+	static loaderEvents = {
+		onModelLoadProgress: pub<ProgressItem[]>(),
+		onTpsUpdate: pub<[number]>()
+	}
+	constructor(public readonly name: string, public model: string) {}
+	abstract init(): Promise<void>
+	abstract setModel(model: string): void
+	setTps(value: number) {
+		this.tps = value
+	}
+}

package/s/tools/common/transformer-pipeline.ts ADDED Viewed

@@ -0,0 +1,26 @@
+//@ts-ignore
+import {pipeline} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
+import {ProgressCallback} from "../speech-recognition/whisper/parts/types.js"
+export class PipelineFactory {
+	instance: any = null
+	model: string | null = null
+	constructor(public task: string) {}
+	async createInstance(model: string, progressCallback?: ProgressCallback) {
+		this.model = model
+		return this.instance = await pipeline(this.task, this.model, {
+			dtype: {
+				encoder_model:
+					this.model === "onnx-community/whisper-large-v3-turbo"
+						? "fp16"
+						: "fp32",
+				decoder_model_merged: "q4",
+			},
+			device: "webgpu",
+			progress_callback: progressCallback,
+		})
+	}
+}

package/s/tools/speech-recognition/common/model.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {pub} from "@e280/stz"
+import {Loader} from "../../common/loader.js"
+import {DecoderSource} from "../../../driver/fns/schematic.js"
+import {SpeechRecognizerModels, Word, WordGroup} from "../whisper/parts/types.js"
+export abstract class SpeechRecognizer extends Loader {
+	multilingual = true
+	static speechRecognizerEvents = {
+		onTranscriptionChunk: pub<Word[]>(),
+		onTranscribeProgress: pub<[number]>()
+	}
+	abstract transcribe(input: DecoderSource): Promise<WordGroup>
+	setMultilingual(value: boolean) {
+		this.multilingual = value
+	}
+	detectLanguage?(input: Blob | AudioBuffer): Promise<string>
+	setModel(value: SpeechRecognizerModels) {
+		this.model = value
+	}
+}

package/s/tools/speech-recognition/whisper/fns/host.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import {Comrade} from "@e280/comrade"
+import {ProgressItem} from "../parts/types.js"
+import {SpeechRecognizerHostEvents, WhisperSchematic} from "./schematic.js"
+export const setupWhisperHost = (events: SpeechRecognizerHostEvents) => (
+	Comrade.host<WhisperSchematic>(_shell => ({
+		async updateModelLoadProgress(item) {
+			events.onModelLoadProgress.pub(item)
+		},
+		async deliverTranscriptionChunk(chunk) {
+			events.onTranscriptionChunk.pub({
+				text: chunk.text,
+				timestamp: chunk.timestamp
+			})
+		},
+		async updateTps(value) {
+			events.onTpsUpdate.pub(value)
+		},
+		async updateTranscribeProgress(value) {
+			events.onTranscribeProgress(value)
+		}
+	}))
+)

package/s/tools/speech-recognition/whisper/fns/schematic.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import {Pub} from "@e280/stz"
+import {AsSchematic} from "@e280/comrade"
+import {LoaderEvents} from "../../../common/loader.js"
+import {ProgressItem, TranscriptionChunk, TranscriptionMessage, TranscriptionResult, Word} from "../parts/types.js"
+export type WhisperSchematic = AsSchematic<{
+	work: {
+		transcribe(input: TranscriptionMessage): Promise<TranscriptionResult | null>
+	},
+	host: {
+		updateModelLoadProgress(item: ProgressItem): Promise<void>
+		deliverTranscriptionChunk(chunk: TranscriptionChunk): Promise<void>
+		updateTps(value: number): Promise<void>
+		updateTranscribeProgress(value: number): Promise<void>
+	}
+}>
+export interface SpeechRecognizerHostEvents extends LoaderEvents {
+	onTranscriptionChunk: Pub<Word[]>
+	onTranscribeProgress: Pub<[number]>
+}

package/s/tools/speech-recognition/whisper/fns/work.ts ADDED Viewed

@@ -0,0 +1,91 @@
+import {Comrade} from "@e280/comrade"
+//@ts-ignore
+import {pipeline, WhisperTextStreamer} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
+import {WhisperSchematic} from "./schematic.js"
+import {TranscriptionChunk} from "../parts/types.js"
+import {PipelineFactory} from "../../../common/transformer-pipeline.js"
+// TODO suspicious globals, probably bad
+const pipeline = new PipelineFactory("automatic-speech-recognition")
+let transcriber: any
+export const setupWhisperWork = Comrade.work<WhisperSchematic>(shell => ({
+	async transcribe({audio, model, language, duration}) {
+		const isDistil = model.startsWith("distil-whisper/")
+		if(!pipeline.model || pipeline.model !== model) {
+			pipeline.instance?.dispose()?.()
+			pipeline.instance = null
+			transcriber = await pipeline.createInstance(
+				model,
+				(data) => {
+					if(data.progress)
+						shell.host.updateModelLoadProgress({
+							id: data.file,
+							progress: data.progress
+						})
+				}
+			)
+		}
+		const timePrecision =
+			transcriber.processor.feature_extractor.config.chunk_length /
+			transcriber.model.config.max_source_positions
+		const chunkLength = isDistil ? 20 : 30
+		const strideLength = isDistil ? 3 : 5
+		let chunkCount = 0
+		let startTime: number | null = null
+		let tokenCount = 0
+		let tps = 0
+		const chunkDuration = chunkLength - strideLength
+		const estimateProgress = () => {
+			const audioProgressSeconds = chunkCount * chunkDuration
+			return Math.min(audioProgressSeconds / duration, 1)
+		}
+		const streamer = new WhisperTextStreamer(transcriber.tokenizer, {
+			time_precision: timePrecision,
+			token_callback_function: () => {
+				startTime ??= performance.now()
+				if (++tokenCount > 1) {
+					tps = (tokenCount / (performance.now() - startTime)) * 1000
+					shell.host.updateTps(tps)
+				}
+			},
+			callback_function: (textChunk: any) => {
+				shell.host.deliverTranscriptionChunk(textChunk)
+			},
+			on_finalize: () => {
+				startTime = null
+				tokenCount = 0
+				chunkCount++
+				const progress = estimateProgress()
+				shell.host.updateTranscribeProgress(progress)
+			},
+		})
+		const output = await transcriber(audio, {
+			top_k: 0,
+			do_sample: false,
+			chunk_length_s: chunkLength,
+			stride_length_s: strideLength,
+			language,
+			task: "transcribe",
+			return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
+			force_full_sequences: false,
+			streamer,
+		})
+		if (!output) return null
+		return {
+			tps,
+			...output,
+		}
+	}
+}))

package/s/tools/speech-recognition/whisper/parts/types.ts ADDED Viewed

@@ -0,0 +1,38 @@
+export interface ProgressItem {
+	id: string
+	progress: number
+}
+export type Word = {
+	text: string
+	timestamp: [start: number, end: number]
+}
+export type WordGroup = Word[]
+export type Transcript = WordGroup[]
+export interface TranscriptionChunk {
+	text: string
+	offset: number
+	timestamp: [number, number]
+	finalised: boolean
+}
+export interface TranscriptionMessage {
+	audio: Float32Array
+	model: string
+	subtask: string | null
+	language: string | null
+	duration: number
+}
+export interface TranscriptionResult {
+	text: string
+	chunks: TranscriptionChunk[]
+	tps: number
+}
+export type ProgressCallback = (data: any) => void
+export type SpeechRecognizerModels = "onnx-community/whisper-tiny_timestamped"
+export type SpeechRecognizerSubtasks = "transcribe"

package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import {Comrade} from "@e280/comrade"
+import {setupWhisperWork} from "../fns/work.js"
+import {WhisperSchematic} from "../fns/schematic.js"
+await Comrade.worker<WhisperSchematic>(setupWhisperWork)