npm - @omnimedia/omnitool - Versions diffs - 1.1.0-3 → 1.1.0-5 - Mend

@omnimedia/omnitool 1.1.0-3 → 1.1.0-5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

package/package.json +11 -9
package/s/context.ts +1 -1
package/s/demo/demo.bundle.ts +6 -2
package/s/demo/routines/filmstrip-test.ts +2 -2
package/s/demo/routines/transcriber-test.ts +34 -0
package/s/demo/routines/transitions-test.ts +43 -0
package/s/driver/fns/host.ts +7 -6
package/s/driver/fns/schematic.ts +1 -1
package/s/driver/fns/work.ts +116 -119
package/s/driver/utils/load-decoder-source.ts +3 -4
package/s/features/speech/transcribe/default-spec.ts +11 -0
package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
package/s/features/speech/transcribe/transcriber.ts +46 -0
package/s/features/speech/transcribe/types.ts +82 -0
package/s/features/speech/transcribe/worker.bundle.ts +40 -0
package/s/features/transition/parts/fragment.ts +24 -0
package/s/features/transition/parts/types.ts +94 -0
package/s/features/transition/parts/uniforms.ts +29 -0
package/s/features/transition/parts/vertex.ts +31 -0
package/s/features/transition/transition.ts +60 -0
package/s/timeline/utils/checksum.ts +2 -1
package/s/tools/common/loader.ts +26 -0
package/s/tools/common/transformer-pipeline.ts +26 -0
package/s/tools/speech-recognition/common/model.ts +26 -0
package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
package/s/tools/speech-recognition/whisper/tool.ts +70 -0
package/x/context.js +1 -1
package/x/context.js.map +1 -1
package/x/demo/demo.bundle.js +6 -2
package/x/demo/demo.bundle.js.map +1 -1
package/x/demo/demo.bundle.min.js +6 -6
package/x/demo/demo.bundle.min.js.map +4 -4
package/x/demo/routines/filmstrip-test.d.ts +1 -1
package/x/demo/routines/filmstrip-test.js +2 -2
package/x/demo/routines/filmstrip-test.js.map +1 -1
package/x/demo/routines/transcriber-test.d.ts +4 -0
package/x/demo/routines/transcriber-test.js +33 -0
package/x/demo/routines/transcriber-test.js.map +1 -0
package/x/demo/routines/transitions-test.d.ts +5 -0
package/x/demo/routines/transitions-test.js +35 -0
package/x/demo/routines/transitions-test.js.map +1 -0
package/x/driver/driver.worker.bundle.min.js +80 -80
package/x/driver/driver.worker.bundle.min.js.map +4 -4
package/x/driver/fns/host.js +3 -3
package/x/driver/fns/host.js.map +1 -1
package/x/driver/fns/schematic.d.ts +1 -1
package/x/driver/fns/work.js +8 -8
package/x/driver/fns/work.js.map +1 -1
package/x/driver/utils/load-decoder-source.d.ts +2 -1
package/x/driver/utils/load-decoder-source.js +2 -3
package/x/driver/utils/load-decoder-source.js.map +1 -1
package/x/features/speech/transcribe/default-spec.d.ts +2 -0
package/x/features/speech/transcribe/default-spec.js +8 -0
package/x/features/speech/transcribe/default-spec.js.map +1 -0
package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
package/x/features/speech/transcribe/parts/transcribe.js +56 -0
package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
package/x/features/speech/transcribe/transcriber.d.ts +5 -0
package/x/features/speech/transcribe/transcriber.js +33 -0
package/x/features/speech/transcribe/transcriber.js.map +1 -0
package/x/features/speech/transcribe/types.d.ts +66 -0
package/x/features/speech/transcribe/types.js +2 -0
package/x/features/speech/transcribe/types.js.map +1 -0
package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
package/x/features/speech/transcribe/worker.bundle.js +33 -0
package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
package/x/features/transition/parts/fragment.d.ts +1 -0
package/x/features/transition/parts/fragment.js +25 -0
package/x/features/transition/parts/fragment.js.map +1 -0
package/x/features/transition/parts/types.d.ts +23 -0
package/x/features/transition/parts/types.js +2 -0
package/x/features/transition/parts/types.js.map +1 -0
package/x/features/transition/parts/uniforms.d.ts +31 -0
package/x/features/transition/parts/uniforms.js +27 -0
package/x/features/transition/parts/uniforms.js.map +1 -0
package/x/features/transition/parts/vertex.d.ts +1 -0
package/x/features/transition/parts/vertex.js +32 -0
package/x/features/transition/parts/vertex.js.map +1 -0
package/x/features/transition/transition.d.ts +5 -0
package/x/features/transition/transition.js +50 -0
package/x/features/transition/transition.js.map +1 -0
package/x/index.html +2 -2
package/x/timeline/utils/checksum.js +2 -1
package/x/timeline/utils/checksum.js.map +1 -1
package/x/tools/common/loader.d.ts +19 -0
package/x/tools/common/loader.js +18 -0
package/x/tools/common/loader.js.map +1 -0
package/x/tools/common/transformer-pipeline.d.ts +8 -0
package/x/tools/common/transformer-pipeline.js +24 -0
package/x/tools/common/transformer-pipeline.js.map +1 -0
package/x/tools/speech-recognition/common/model.d.ts +14 -0
package/x/tools/speech-recognition/common/model.js +16 -0
package/x/tools/speech-recognition/common/model.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
package/x/tools/speech-recognition/whisper/tool.js +63 -0
package/x/tools/speech-recognition/whisper/tool.js.map +1 -0

package/s/features/speech/transcribe/types.ts ADDED Viewed

@@ -0,0 +1,82 @@
+import {AsSchematic} from "@e280/comrade"
+import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
+import {Driver} from "../../../driver/driver.js"
+export type TranscriberSchematic = AsSchematic<{
+	work: {
+		prepare(spec: TranscriberSpec): Promise<void>
+		transcribe(request: TranscriptionRequest): Promise<Transcription>
+	},
+	host: {
+		loading(load: Loading): Promise<void>
+		deliverReport(report: TranscriptionReport): Promise<void>
+		deliverTranscription(transcription: string): Promise<void>
+	}
+}>
+export type Loading = {
+	total: number
+	progress: number
+}
+export type TranscribeOptions = {
+	pipe: Pipeline
+	spec: TranscriberSpec
+	request: TranscriptionRequest
+	callbacks: TranscriptionCallbacks
+}
+export type TranscriberPipeOptions = {
+	spec: TranscriberSpec
+	onLoading: (loading: Loading) => void
+}
+export type SpeechTime = [start: number, end: number]
+export type Transcription = {
+	text: string
+	chunks: {
+		text: string
+		timestamp: SpeechTime
+	}[]
+}
+export type TranscriberSpec = {
+	model: string
+	dtype: DataType
+	device: DeviceType
+	chunkLength: number
+	strideLength: number
+}
+export type TranscriptionOptions = {
+	source: Blob
+	language: string | null
+} & TranscriptionCallbacks
+export type TranscriptionRequest = {
+	audio: ArrayBufferLike
+	language: string | null
+	duration: number
+}
+export type TranscriptionReport = {
+	progress: number
+	tokensPerSecond: number
+}
+export type TranscriptionCallbacks = {
+	onReport: (report: TranscriptionReport) => void
+	onTranscription: (transcription: string) => void
+}
+export type TranscriberOptions = {
+	driver: Driver
+	spec: TranscriberSpec
+	workerUrl: URL | string
+	onLoading: (loading: Loading) => void
+}

package/s/features/speech/transcribe/worker.bundle.ts ADDED Viewed

@@ -0,0 +1,40 @@
+import {defer, once} from "@e280/stz"
+import {Comrade, Host} from "@e280/comrade"
+import {Pipeline} from "@huggingface/transformers"
+import {loadPipe} from "./parts/load-pipe.js"
+import {transcribe} from "./parts/transcribe.js"
+import {TranscriberSchematic, TranscriberSpec} from "./types.js"
+const deferred = defer<{pipe: Pipeline, spec: TranscriberSpec}>()
+const makePrepare = (host: Host<TranscriberSchematic>) => once(async(spec: TranscriberSpec) => {
+	deferred.resolve({
+		spec,
+		pipe: await loadPipe({
+			spec,
+			onLoading: loading => host.loading(loading),
+		}),
+	})
+})
+await Comrade.worker<TranscriberSchematic>(shell => {
+	const prepare = makePrepare(shell.host)
+	return {
+		prepare,
+		async transcribe(request) {
+			const {pipe, spec} = await deferred.promise
+			return transcribe({
+				pipe,
+				spec,
+				request,
+				callbacks: {
+					onReport: report => shell.host.deliverReport(report),
+					onTranscription: transcription => shell.host.deliverTranscription(transcription),
+				},
+			})
+		}
+	}
+})

package/s/features/transition/parts/fragment.ts ADDED Viewed

@@ -0,0 +1,24 @@
+export const fragment = (glsl: string) => `
+	precision highp float;
+	varying vec2 vTextureCoord;
+	varying vec2 _uv;
+	uniform sampler2D from, to;
+	uniform float progress, ratio, _fromR, _toR;
+	uniform float customUniform;
+	vec4 getFromColor(vec2 uv){
+		return texture2D(from, .5+(uv-.5)*vec2(max(ratio/_fromR,1.), max(_fromR/ratio,1.)));
+	}
+	vec4 getToColor(vec2 uv){
+		return texture2D(to, .5+(uv-.5)*vec2(max(ratio/_toR,1.), max(_toR/ratio,1.)));
+	}
+	// gl-transition code here
+	${glsl}
+	// gl-transition code end
+	void main(){
+		vec2 uv = vTextureCoord.xy;
+		gl_FragColor = transition(vTextureCoord);
+	}
+`

package/s/features/transition/parts/types.ts ADDED Viewed

@@ -0,0 +1,94 @@
+import {Renderer} from "pixi.js"
+export interface TransitionOptions {
+	name: Transition
+	renderer: Renderer
+}
+export interface TransitionRendererOptions {
+	from: VideoFrame
+	to: VideoFrame
+	progress: number
+	width: number
+	height: number
+}
+export interface GLTransition {
+	author: string
+	createdAt: string
+	glsl: string
+	license: string
+	name: Transition
+	updatedAt: string
+	defaultParams: any
+	paramsTypes: any
+}
+export type Transition =
+  | "Bounce"
+  | "BowTieHorizontal"
+  | "BowTieVertical"
+  | "ButterflyWaveScrawler"
+  | "CircleCrop"
+  | "ColourDistance"
+  | "CrazyParametricFun"
+  | "CrossZoom"
+  | "Directional"
+  | "DoomScreenTransition"
+  | "Dreamy"
+  | "DreamyZoom"
+  | "GlitchDisplace"
+  | "GlitchMemories"
+  | "GridFlip"
+  | "InvertedPageCurl"
+  | "LinearBlur"
+  | "Mosaic"
+  | "PolkaDotsCurtain"
+  | "Radial"
+  | "SimpleZoom"
+  | "StereoViewer"
+  | "Swirl"
+  | "WaterDrop"
+  | "ZoomInCircles"
+  | "angular"
+  | "burn"
+  | "cannabisleaf"
+  | "circle"
+  | "circleopen"
+  | "colorphase"
+  | "crosshatch"
+  | "crosswarp"
+  | "cube"
+  | "directionalwarp"
+  | "directionalwipe"
+  | "displacement"
+  | "doorway"
+  | "fade"
+  | "fadecolor"
+  | "fadegrayscale"
+  | "flyeye"
+  | "heart"
+  | "hexagonalize"
+  | "kaleidoscope"
+  | "luma"
+  | "luminance_melt"
+  | "morph"
+  | "multiply_blend"
+  | "perlin"
+  | "pinwheel"
+  | "pixelize"
+  | "polar_function"
+  | "randomsquares"
+  | "ripple"
+  | "rotate_scale_fade"
+  | "squareswire"
+  | "squeeze"
+  | "swap"
+  | "undulatingBurnOut"
+  | "wind"
+  | "windowblinds"
+  | "windowslice"
+  | "wipeDown"
+  | "wipeLeft"
+  | "wipeRight"
+  | "wipeUp"

package/s/features/transition/parts/uniforms.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import {GLTransition} from "./types.js"
+export const uniforms = {
+	custom: (transition: GLTransition) => Object.fromEntries(
+		Object.entries(transition.defaultParams).map(([name, value]) => [
+			name,
+			{
+				value,
+				type: getUniformType(transition.paramsTypes[name])
+			}
+		])
+	),
+	basics: {
+		_fromR: {value: 1, type: "f32"},
+		_toR: {value: 1, type: "f32"},
+		ratio: {value: 1, type: "f32"},
+		progress: {value: 0, type: "f32"},
+		customUniform: {value: 0, type: "f32"},
+	}
+}
+const getUniformType = (type: string) => {
+	if(type === "f32" || type === "i32") {
+		return type
+	} else if(type === "float") {
+		return "f32"
+	}
+	else return `${type}<f32>`
+}

package/s/features/transition/parts/vertex.ts ADDED Viewed

@@ -0,0 +1,31 @@
+export const vertex = `
+	in vec2 aPosition;
+	varying vec2 _uv;                          // gl-transition
+	uniform mat3 projectionMatrix;
+	uniform vec4 uInputSize;
+	uniform vec4 uOutputFrame;
+	out vec2 vTextureCoord;
+	uniform vec4 uOutputTexture;
+	vec4 filterVertexPosition( void )
+	{
+			vec2 position = aPosition * uOutputFrame.zw + uOutputFrame.xy;
+			position.x = position.x * (2.0 / uOutputTexture.x) - 1.0;
+			position.y = position.y * (2.0*uOutputTexture.z / uOutputTexture.y) - uOutputTexture.z;
+			return vec4(position, 0.0, 1.0);
+	}
+	vec2 filterTextureCoord( void )
+	{
+	return aPosition * (uOutputFrame.zw * uInputSize.zw);
+	}
+	void main(void)
+	{
+	gl_Position = filterVertexPosition();
+	vTextureCoord = filterTextureCoord();
+	_uv = vec2(0.5, 0.5) * (aPosition +vec2(1.0, 1.0));    // gl-transition
+	}
+`

package/s/features/transition/transition.ts ADDED Viewed

@@ -0,0 +1,60 @@
+//@ts-ignore
+import transitions from "gl-transitions"
+import {Filter, GlProgram, Sprite, Texture, ImageSource} from "pixi.js"
+import {vertex} from "./parts/vertex.js"
+import {uniforms} from "./parts/uniforms.js"
+import {fragment} from "./parts/fragment.js"
+import {GLTransition, TransitionOptions, TransitionRendererOptions} from "./parts/types.js"
+export function makeTransition({name, renderer}: TransitionOptions) {
+	const transition = transitions.find((t: GLTransition) => t.name === name) as GLTransition
+	const transitionSprite = new Sprite()
+	const transitionTexture = new Texture()
+	const sourceFrom = new ImageSource({})
+	const sourceTo = new ImageSource({})
+	const filter = new Filter({
+		glProgram: new GlProgram({
+			vertex,
+			fragment: fragment(transition.glsl),
+		}),
+		resources: {
+			from: sourceFrom,
+			to: sourceTo,
+			uniforms: {
+				...uniforms.basics,
+				...uniforms.custom(transition)
+			}
+		}
+	})
+	transitionSprite.filters = [filter]
+	return {
+		render({width, height, from, to, progress}: TransitionRendererOptions) {
+			if(transitionSprite.width !== width || transitionSprite.height !== height) {
+				transitionSprite.setSize({width, height})
+				transitionTexture.source.resize(width, height)
+			}
+			sourceFrom.resource = from
+			sourceTo.resource = to
+			sourceFrom.update()
+			sourceTo.update()
+			filter.resources.uniforms.uniforms.progress = progress
+			renderer.render({
+				container: transitionSprite,
+				target: transitionTexture,
+				clear: false,
+				width,
+				height
+			})
+			return transitionTexture
+		}
+	}
+}

package/s/timeline/utils/checksum.ts CHANGED Viewed

@@ -10,7 +10,8 @@ export class Checksum {
 	) {}
 	static async make(data: Uint8Array) {
-		const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data))
+		const data2 = new Uint8Array(data)
+		const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data2))
 		const hash = Hex.fromBytes(bytes)
 		const nickname = Thumbprint.sigil.fromBytes(bytes)
 		return new this(data, bytes, hash, nickname)

package/s/tools/common/loader.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {pub, Pub} from "@e280/stz"
+import {ProgressItem} from "../speech-recognition/whisper/parts/types.js"
+export interface LoaderEvents {
+	onModelLoadProgress: Pub<ProgressItem[]>
+	onTpsUpdate: Pub<[number]>
+}
+export abstract class Loader {
+	tps = 0
+	static loaderEvents = {
+		onModelLoadProgress: pub<ProgressItem[]>(),
+		onTpsUpdate: pub<[number]>()
+	}
+	constructor(public readonly name: string, public model: string) {}
+	abstract init(): Promise<void>
+	abstract setModel(model: string): void
+	setTps(value: number) {
+		this.tps = value
+	}
+}

package/s/tools/common/transformer-pipeline.ts ADDED Viewed

@@ -0,0 +1,26 @@
+//@ts-ignore
+import {pipeline} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
+import {ProgressCallback} from "../speech-recognition/whisper/parts/types.js"
+export class PipelineFactory {
+	instance: any = null
+	model: string | null = null
+	constructor(public task: string) {}
+	async createInstance(model: string, progressCallback?: ProgressCallback) {
+		this.model = model
+		return this.instance = await pipeline(this.task, this.model, {
+			dtype: {
+				encoder_model:
+					this.model === "onnx-community/whisper-large-v3-turbo"
+						? "fp16"
+						: "fp32",
+				decoder_model_merged: "q4",
+			},
+			device: "webgpu",
+			progress_callback: progressCallback,
+		})
+	}
+}

package/s/tools/speech-recognition/common/model.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {pub} from "@e280/stz"
+import {Loader} from "../../common/loader.js"
+import {DecoderSource} from "../../../driver/fns/schematic.js"
+import {SpeechRecognizerModels, Word, WordGroup} from "../whisper/parts/types.js"
+export abstract class SpeechRecognizer extends Loader {
+	multilingual = true
+	static speechRecognizerEvents = {
+		onTranscriptionChunk: pub<Word[]>(),
+		onTranscribeProgress: pub<[number]>()
+	}
+	abstract transcribe(input: DecoderSource): Promise<WordGroup>
+	setMultilingual(value: boolean) {
+		this.multilingual = value
+	}
+	detectLanguage?(input: Blob | AudioBuffer): Promise<string>
+	setModel(value: SpeechRecognizerModels) {
+		this.model = value
+	}
+}

package/s/tools/speech-recognition/whisper/fns/host.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import {Comrade} from "@e280/comrade"
+import {ProgressItem} from "../parts/types.js"
+import {SpeechRecognizerHostEvents, WhisperSchematic} from "./schematic.js"
+export const setupWhisperHost = (events: SpeechRecognizerHostEvents) => (
+	Comrade.host<WhisperSchematic>(_shell => ({
+		async updateModelLoadProgress(item) {
+			events.onModelLoadProgress.pub(item)
+		},
+		async deliverTranscriptionChunk(chunk) {
+			events.onTranscriptionChunk.pub({
+				text: chunk.text,
+				timestamp: chunk.timestamp
+			})
+		},
+		async updateTps(value) {
+			events.onTpsUpdate.pub(value)
+		},
+		async updateTranscribeProgress(value) {
+			events.onTranscribeProgress(value)
+		}
+	}))
+)

package/s/tools/speech-recognition/whisper/fns/schematic.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import {Pub} from "@e280/stz"
+import {AsSchematic} from "@e280/comrade"
+import {LoaderEvents} from "../../../common/loader.js"
+import {ProgressItem, TranscriptionChunk, TranscriptionMessage, TranscriptionResult, Word} from "../parts/types.js"
+export type WhisperSchematic = AsSchematic<{
+	work: {
+		transcribe(input: TranscriptionMessage): Promise<TranscriptionResult | null>
+	},
+	host: {
+		updateModelLoadProgress(item: ProgressItem): Promise<void>
+		deliverTranscriptionChunk(chunk: TranscriptionChunk): Promise<void>
+		updateTps(value: number): Promise<void>
+		updateTranscribeProgress(value: number): Promise<void>
+	}
+}>
+export interface SpeechRecognizerHostEvents extends LoaderEvents {
+	onTranscriptionChunk: Pub<Word[]>
+	onTranscribeProgress: Pub<[number]>
+}

package/s/tools/speech-recognition/whisper/fns/work.ts ADDED Viewed

@@ -0,0 +1,91 @@
+import {Comrade} from "@e280/comrade"
+//@ts-ignore
+import {pipeline, WhisperTextStreamer} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
+import {WhisperSchematic} from "./schematic.js"
+import {TranscriptionChunk} from "../parts/types.js"
+import {PipelineFactory} from "../../../common/transformer-pipeline.js"
+// TODO suspicious globals, probably bad
+const pipeline = new PipelineFactory("automatic-speech-recognition")
+let transcriber: any
+export const setupWhisperWork = Comrade.work<WhisperSchematic>(shell => ({
+	async transcribe({audio, model, language, duration}) {
+		const isDistil = model.startsWith("distil-whisper/")
+		if(!pipeline.model || pipeline.model !== model) {
+			pipeline.instance?.dispose()?.()
+			pipeline.instance = null
+			transcriber = await pipeline.createInstance(
+				model,
+				(data) => {
+					if(data.progress)
+						shell.host.updateModelLoadProgress({
+							id: data.file,
+							progress: data.progress
+						})
+				}
+			)
+		}
+		const timePrecision =
+			transcriber.processor.feature_extractor.config.chunk_length /
+			transcriber.model.config.max_source_positions
+		const chunkLength = isDistil ? 20 : 30
+		const strideLength = isDistil ? 3 : 5
+		let chunkCount = 0
+		let startTime: number | null = null
+		let tokenCount = 0
+		let tps = 0
+		const chunkDuration = chunkLength - strideLength
+		const estimateProgress = () => {
+			const audioProgressSeconds = chunkCount * chunkDuration
+			return Math.min(audioProgressSeconds / duration, 1)
+		}
+		const streamer = new WhisperTextStreamer(transcriber.tokenizer, {
+			time_precision: timePrecision,
+			token_callback_function: () => {
+				startTime ??= performance.now()
+				if (++tokenCount > 1) {
+					tps = (tokenCount / (performance.now() - startTime)) * 1000
+					shell.host.updateTps(tps)
+				}
+			},
+			callback_function: (textChunk: any) => {
+				shell.host.deliverTranscriptionChunk(textChunk)
+			},
+			on_finalize: () => {
+				startTime = null
+				tokenCount = 0
+				chunkCount++
+				const progress = estimateProgress()
+				shell.host.updateTranscribeProgress(progress)
+			},
+		})
+		const output = await transcriber(audio, {
+			top_k: 0,
+			do_sample: false,
+			chunk_length_s: chunkLength,
+			stride_length_s: strideLength,
+			language,
+			task: "transcribe",
+			return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
+			force_full_sequences: false,
+			streamer,
+		})
+		if (!output) return null
+		return {
+			tps,
+			...output,
+		}
+	}
+}))

package/s/tools/speech-recognition/whisper/parts/types.ts ADDED Viewed

@@ -0,0 +1,38 @@
+export interface ProgressItem {
+	id: string
+	progress: number
+}
+export type Word = {
+	text: string
+	timestamp: [start: number, end: number]
+}
+export type WordGroup = Word[]
+export type Transcript = WordGroup[]
+export interface TranscriptionChunk {
+	text: string
+	offset: number
+	timestamp: [number, number]
+	finalised: boolean
+}
+export interface TranscriptionMessage {
+	audio: Float32Array
+	model: string
+	subtask: string | null
+	language: string | null
+	duration: number
+}
+export interface TranscriptionResult {
+	text: string
+	chunks: TranscriptionChunk[]
+	tps: number
+}
+export type ProgressCallback = (data: any) => void
+export type SpeechRecognizerModels = "onnx-community/whisper-tiny_timestamped"
+export type SpeechRecognizerSubtasks = "transcribe"

package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import {Comrade} from "@e280/comrade"
+import {setupWhisperWork} from "../fns/work.js"
+import {WhisperSchematic} from "../fns/schematic.js"
+await Comrade.worker<WhisperSchematic>(setupWhisperWork)