@omnimedia/omnitool 1.1.0-3 → 1.1.0-5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/package.json +11 -9
  2. package/s/context.ts +1 -1
  3. package/s/demo/demo.bundle.ts +6 -2
  4. package/s/demo/routines/filmstrip-test.ts +2 -2
  5. package/s/demo/routines/transcriber-test.ts +34 -0
  6. package/s/demo/routines/transitions-test.ts +43 -0
  7. package/s/driver/fns/host.ts +7 -6
  8. package/s/driver/fns/schematic.ts +1 -1
  9. package/s/driver/fns/work.ts +116 -119
  10. package/s/driver/utils/load-decoder-source.ts +3 -4
  11. package/s/features/speech/transcribe/default-spec.ts +11 -0
  12. package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
  13. package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
  14. package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
  15. package/s/features/speech/transcribe/transcriber.ts +46 -0
  16. package/s/features/speech/transcribe/types.ts +82 -0
  17. package/s/features/speech/transcribe/worker.bundle.ts +40 -0
  18. package/s/features/transition/parts/fragment.ts +24 -0
  19. package/s/features/transition/parts/types.ts +94 -0
  20. package/s/features/transition/parts/uniforms.ts +29 -0
  21. package/s/features/transition/parts/vertex.ts +31 -0
  22. package/s/features/transition/transition.ts +60 -0
  23. package/s/timeline/utils/checksum.ts +2 -1
  24. package/s/tools/common/loader.ts +26 -0
  25. package/s/tools/common/transformer-pipeline.ts +26 -0
  26. package/s/tools/speech-recognition/common/model.ts +26 -0
  27. package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
  28. package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
  29. package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
  30. package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
  31. package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
  32. package/s/tools/speech-recognition/whisper/tool.ts +70 -0
  33. package/x/context.js +1 -1
  34. package/x/context.js.map +1 -1
  35. package/x/demo/demo.bundle.js +6 -2
  36. package/x/demo/demo.bundle.js.map +1 -1
  37. package/x/demo/demo.bundle.min.js +6 -6
  38. package/x/demo/demo.bundle.min.js.map +4 -4
  39. package/x/demo/routines/filmstrip-test.d.ts +1 -1
  40. package/x/demo/routines/filmstrip-test.js +2 -2
  41. package/x/demo/routines/filmstrip-test.js.map +1 -1
  42. package/x/demo/routines/transcriber-test.d.ts +4 -0
  43. package/x/demo/routines/transcriber-test.js +33 -0
  44. package/x/demo/routines/transcriber-test.js.map +1 -0
  45. package/x/demo/routines/transitions-test.d.ts +5 -0
  46. package/x/demo/routines/transitions-test.js +35 -0
  47. package/x/demo/routines/transitions-test.js.map +1 -0
  48. package/x/driver/driver.worker.bundle.min.js +80 -80
  49. package/x/driver/driver.worker.bundle.min.js.map +4 -4
  50. package/x/driver/fns/host.js +3 -3
  51. package/x/driver/fns/host.js.map +1 -1
  52. package/x/driver/fns/schematic.d.ts +1 -1
  53. package/x/driver/fns/work.js +8 -8
  54. package/x/driver/fns/work.js.map +1 -1
  55. package/x/driver/utils/load-decoder-source.d.ts +2 -1
  56. package/x/driver/utils/load-decoder-source.js +2 -3
  57. package/x/driver/utils/load-decoder-source.js.map +1 -1
  58. package/x/features/speech/transcribe/default-spec.d.ts +2 -0
  59. package/x/features/speech/transcribe/default-spec.js +8 -0
  60. package/x/features/speech/transcribe/default-spec.js.map +1 -0
  61. package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
  62. package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
  63. package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
  64. package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
  65. package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
  66. package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
  67. package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
  68. package/x/features/speech/transcribe/parts/transcribe.js +56 -0
  69. package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
  70. package/x/features/speech/transcribe/transcriber.d.ts +5 -0
  71. package/x/features/speech/transcribe/transcriber.js +33 -0
  72. package/x/features/speech/transcribe/transcriber.js.map +1 -0
  73. package/x/features/speech/transcribe/types.d.ts +66 -0
  74. package/x/features/speech/transcribe/types.js +2 -0
  75. package/x/features/speech/transcribe/types.js.map +1 -0
  76. package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
  77. package/x/features/speech/transcribe/worker.bundle.js +33 -0
  78. package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
  79. package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
  80. package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
  81. package/x/features/transition/parts/fragment.d.ts +1 -0
  82. package/x/features/transition/parts/fragment.js +25 -0
  83. package/x/features/transition/parts/fragment.js.map +1 -0
  84. package/x/features/transition/parts/types.d.ts +23 -0
  85. package/x/features/transition/parts/types.js +2 -0
  86. package/x/features/transition/parts/types.js.map +1 -0
  87. package/x/features/transition/parts/uniforms.d.ts +31 -0
  88. package/x/features/transition/parts/uniforms.js +27 -0
  89. package/x/features/transition/parts/uniforms.js.map +1 -0
  90. package/x/features/transition/parts/vertex.d.ts +1 -0
  91. package/x/features/transition/parts/vertex.js +32 -0
  92. package/x/features/transition/parts/vertex.js.map +1 -0
  93. package/x/features/transition/transition.d.ts +5 -0
  94. package/x/features/transition/transition.js +50 -0
  95. package/x/features/transition/transition.js.map +1 -0
  96. package/x/index.html +2 -2
  97. package/x/timeline/utils/checksum.js +2 -1
  98. package/x/timeline/utils/checksum.js.map +1 -1
  99. package/x/tools/common/loader.d.ts +19 -0
  100. package/x/tools/common/loader.js +18 -0
  101. package/x/tools/common/loader.js.map +1 -0
  102. package/x/tools/common/transformer-pipeline.d.ts +8 -0
  103. package/x/tools/common/transformer-pipeline.js +24 -0
  104. package/x/tools/common/transformer-pipeline.js.map +1 -0
  105. package/x/tools/speech-recognition/common/model.d.ts +14 -0
  106. package/x/tools/speech-recognition/common/model.js +16 -0
  107. package/x/tools/speech-recognition/common/model.js.map +1 -0
  108. package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
  109. package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
  110. package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
  111. package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
  112. package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
  113. package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
  114. package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
  115. package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
  116. package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
  117. package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
  118. package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
  119. package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
  120. package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
  121. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
  122. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
  123. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
  124. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
  125. package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
  126. package/x/tools/speech-recognition/whisper/tool.js +63 -0
  127. package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
@@ -0,0 +1,82 @@
1
+
2
+ import {AsSchematic} from "@e280/comrade"
3
+ import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
4
+
5
+ import {Driver} from "../../../driver/driver.js"
6
+
7
+ export type TranscriberSchematic = AsSchematic<{
8
+ work: {
9
+ prepare(spec: TranscriberSpec): Promise<void>
10
+ transcribe(request: TranscriptionRequest): Promise<Transcription>
11
+ },
12
+
13
+ host: {
14
+ loading(load: Loading): Promise<void>
15
+ deliverReport(report: TranscriptionReport): Promise<void>
16
+ deliverTranscription(transcription: string): Promise<void>
17
+ }
18
+ }>
19
+
20
+ export type Loading = {
21
+ total: number
22
+ progress: number
23
+ }
24
+
25
+ export type TranscribeOptions = {
26
+ pipe: Pipeline
27
+ spec: TranscriberSpec
28
+ request: TranscriptionRequest
29
+ callbacks: TranscriptionCallbacks
30
+ }
31
+
32
+ export type TranscriberPipeOptions = {
33
+ spec: TranscriberSpec
34
+ onLoading: (loading: Loading) => void
35
+ }
36
+
37
+ export type SpeechTime = [start: number, end: number]
38
+
39
+ export type Transcription = {
40
+ text: string
41
+ chunks: {
42
+ text: string
43
+ timestamp: SpeechTime
44
+ }[]
45
+ }
46
+
47
+ export type TranscriberSpec = {
48
+ model: string
49
+ dtype: DataType
50
+ device: DeviceType
51
+ chunkLength: number
52
+ strideLength: number
53
+ }
54
+
55
+ export type TranscriptionOptions = {
56
+ source: Blob
57
+ language: string | null
58
+ } & TranscriptionCallbacks
59
+
60
+ export type TranscriptionRequest = {
61
+ audio: ArrayBufferLike
62
+ language: string | null
63
+ duration: number
64
+ }
65
+
66
+ export type TranscriptionReport = {
67
+ progress: number
68
+ tokensPerSecond: number
69
+ }
70
+
71
+ export type TranscriptionCallbacks = {
72
+ onReport: (report: TranscriptionReport) => void
73
+ onTranscription: (transcription: string) => void
74
+ }
75
+
76
+ export type TranscriberOptions = {
77
+ driver: Driver
78
+ spec: TranscriberSpec
79
+ workerUrl: URL | string
80
+ onLoading: (loading: Loading) => void
81
+ }
82
+
@@ -0,0 +1,40 @@
1
+
2
+ import {defer, once} from "@e280/stz"
3
+ import {Comrade, Host} from "@e280/comrade"
4
+ import {Pipeline} from "@huggingface/transformers"
5
+
6
+ import {loadPipe} from "./parts/load-pipe.js"
7
+ import {transcribe} from "./parts/transcribe.js"
8
+ import {TranscriberSchematic, TranscriberSpec} from "./types.js"
9
+
10
+ const deferred = defer<{pipe: Pipeline, spec: TranscriberSpec}>()
11
+
12
+ const makePrepare = (host: Host<TranscriberSchematic>) => once(async(spec: TranscriberSpec) => {
13
+ deferred.resolve({
14
+ spec,
15
+ pipe: await loadPipe({
16
+ spec,
17
+ onLoading: loading => host.loading(loading),
18
+ }),
19
+ })
20
+ })
21
+
22
+ await Comrade.worker<TranscriberSchematic>(shell => {
23
+ const prepare = makePrepare(shell.host)
24
+ return {
25
+ prepare,
26
+ async transcribe(request) {
27
+ const {pipe, spec} = await deferred.promise
28
+ return transcribe({
29
+ pipe,
30
+ spec,
31
+ request,
32
+ callbacks: {
33
+ onReport: report => shell.host.deliverReport(report),
34
+ onTranscription: transcription => shell.host.deliverTranscription(transcription),
35
+ },
36
+ })
37
+ }
38
+ }
39
+ })
40
+
@@ -0,0 +1,24 @@
1
+ export const fragment = (glsl: string) => `
2
+ precision highp float;
3
+ varying vec2 vTextureCoord;
4
+ varying vec2 _uv;
5
+ uniform sampler2D from, to;
6
+ uniform float progress, ratio, _fromR, _toR;
7
+ uniform float customUniform;
8
+
9
+ vec4 getFromColor(vec2 uv){
10
+ return texture2D(from, .5+(uv-.5)*vec2(max(ratio/_fromR,1.), max(_fromR/ratio,1.)));
11
+ }
12
+ vec4 getToColor(vec2 uv){
13
+ return texture2D(to, .5+(uv-.5)*vec2(max(ratio/_toR,1.), max(_toR/ratio,1.)));
14
+ }
15
+
16
+ // gl-transition code here
17
+ ${glsl}
18
+ // gl-transition code end
19
+
20
+ void main(){
21
+ vec2 uv = vTextureCoord.xy;
22
+ gl_FragColor = transition(vTextureCoord);
23
+ }
24
+ `
@@ -0,0 +1,94 @@
1
+ import {Renderer} from "pixi.js"
2
+
3
+ export interface TransitionOptions {
4
+ name: Transition
5
+ renderer: Renderer
6
+ }
7
+
8
+ export interface TransitionRendererOptions {
9
+ from: VideoFrame
10
+ to: VideoFrame
11
+ progress: number
12
+ width: number
13
+ height: number
14
+ }
15
+
16
+ export interface GLTransition {
17
+ author: string
18
+ createdAt: string
19
+ glsl: string
20
+ license: string
21
+ name: Transition
22
+ updatedAt: string
23
+ defaultParams: any
24
+ paramsTypes: any
25
+ }
26
+
27
+ export type Transition =
28
+ | "Bounce"
29
+ | "BowTieHorizontal"
30
+ | "BowTieVertical"
31
+ | "ButterflyWaveScrawler"
32
+ | "CircleCrop"
33
+ | "ColourDistance"
34
+ | "CrazyParametricFun"
35
+ | "CrossZoom"
36
+ | "Directional"
37
+ | "DoomScreenTransition"
38
+ | "Dreamy"
39
+ | "DreamyZoom"
40
+ | "GlitchDisplace"
41
+ | "GlitchMemories"
42
+ | "GridFlip"
43
+ | "InvertedPageCurl"
44
+ | "LinearBlur"
45
+ | "Mosaic"
46
+ | "PolkaDotsCurtain"
47
+ | "Radial"
48
+ | "SimpleZoom"
49
+ | "StereoViewer"
50
+ | "Swirl"
51
+ | "WaterDrop"
52
+ | "ZoomInCircles"
53
+ | "angular"
54
+ | "burn"
55
+ | "cannabisleaf"
56
+ | "circle"
57
+ | "circleopen"
58
+ | "colorphase"
59
+ | "crosshatch"
60
+ | "crosswarp"
61
+ | "cube"
62
+ | "directionalwarp"
63
+ | "directionalwipe"
64
+ | "displacement"
65
+ | "doorway"
66
+ | "fade"
67
+ | "fadecolor"
68
+ | "fadegrayscale"
69
+ | "flyeye"
70
+ | "heart"
71
+ | "hexagonalize"
72
+ | "kaleidoscope"
73
+ | "luma"
74
+ | "luminance_melt"
75
+ | "morph"
76
+ | "multiply_blend"
77
+ | "perlin"
78
+ | "pinwheel"
79
+ | "pixelize"
80
+ | "polar_function"
81
+ | "randomsquares"
82
+ | "ripple"
83
+ | "rotate_scale_fade"
84
+ | "squareswire"
85
+ | "squeeze"
86
+ | "swap"
87
+ | "undulatingBurnOut"
88
+ | "wind"
89
+ | "windowblinds"
90
+ | "windowslice"
91
+ | "wipeDown"
92
+ | "wipeLeft"
93
+ | "wipeRight"
94
+ | "wipeUp"
@@ -0,0 +1,29 @@
1
+ import {GLTransition} from "./types.js"
2
+
3
+ export const uniforms = {
4
+ custom: (transition: GLTransition) => Object.fromEntries(
5
+ Object.entries(transition.defaultParams).map(([name, value]) => [
6
+ name,
7
+ {
8
+ value,
9
+ type: getUniformType(transition.paramsTypes[name])
10
+ }
11
+ ])
12
+ ),
13
+ basics: {
14
+ _fromR: {value: 1, type: "f32"},
15
+ _toR: {value: 1, type: "f32"},
16
+ ratio: {value: 1, type: "f32"},
17
+ progress: {value: 0, type: "f32"},
18
+ customUniform: {value: 0, type: "f32"},
19
+ }
20
+ }
21
+
22
+ const getUniformType = (type: string) => {
23
+ if(type === "f32" || type === "i32") {
24
+ return type
25
+ } else if(type === "float") {
26
+ return "f32"
27
+ }
28
+ else return `${type}<f32>`
29
+ }
@@ -0,0 +1,31 @@
1
+ export const vertex = `
2
+ in vec2 aPosition;
3
+ varying vec2 _uv; // gl-transition
4
+ uniform mat3 projectionMatrix;
5
+ uniform vec4 uInputSize;
6
+ uniform vec4 uOutputFrame;
7
+ out vec2 vTextureCoord;
8
+ uniform vec4 uOutputTexture;
9
+
10
+ vec4 filterVertexPosition( void )
11
+ {
12
+ vec2 position = aPosition * uOutputFrame.zw + uOutputFrame.xy;
13
+
14
+ position.x = position.x * (2.0 / uOutputTexture.x) - 1.0;
15
+ position.y = position.y * (2.0*uOutputTexture.z / uOutputTexture.y) - uOutputTexture.z;
16
+
17
+ return vec4(position, 0.0, 1.0);
18
+ }
19
+
20
+ vec2 filterTextureCoord( void )
21
+ {
22
+ return aPosition * (uOutputFrame.zw * uInputSize.zw);
23
+ }
24
+
25
+ void main(void)
26
+ {
27
+ gl_Position = filterVertexPosition();
28
+ vTextureCoord = filterTextureCoord();
29
+ _uv = vec2(0.5, 0.5) * (aPosition +vec2(1.0, 1.0)); // gl-transition
30
+ }
31
+ `
@@ -0,0 +1,60 @@
1
+ //@ts-ignore
2
+ import transitions from "gl-transitions"
3
+ import {Filter, GlProgram, Sprite, Texture, ImageSource} from "pixi.js"
4
+
5
+ import {vertex} from "./parts/vertex.js"
6
+ import {uniforms} from "./parts/uniforms.js"
7
+ import {fragment} from "./parts/fragment.js"
8
+ import {GLTransition, TransitionOptions, TransitionRendererOptions} from "./parts/types.js"
9
+
10
+ export function makeTransition({name, renderer}: TransitionOptions) {
11
+ const transition = transitions.find((t: GLTransition) => t.name === name) as GLTransition
12
+ const transitionSprite = new Sprite()
13
+ const transitionTexture = new Texture()
14
+ const sourceFrom = new ImageSource({})
15
+ const sourceTo = new ImageSource({})
16
+
17
+ const filter = new Filter({
18
+ glProgram: new GlProgram({
19
+ vertex,
20
+ fragment: fragment(transition.glsl),
21
+ }),
22
+ resources: {
23
+ from: sourceFrom,
24
+ to: sourceTo,
25
+ uniforms: {
26
+ ...uniforms.basics,
27
+ ...uniforms.custom(transition)
28
+ }
29
+ }
30
+ })
31
+
32
+ transitionSprite.filters = [filter]
33
+
34
+ return {
35
+ render({width, height, from, to, progress}: TransitionRendererOptions) {
36
+ if(transitionSprite.width !== width || transitionSprite.height !== height) {
37
+ transitionSprite.setSize({width, height})
38
+ transitionTexture.source.resize(width, height)
39
+ }
40
+
41
+ sourceFrom.resource = from
42
+ sourceTo.resource = to
43
+ sourceFrom.update()
44
+ sourceTo.update()
45
+
46
+ filter.resources.uniforms.uniforms.progress = progress
47
+
48
+ renderer.render({
49
+ container: transitionSprite,
50
+ target: transitionTexture,
51
+ clear: false,
52
+ width,
53
+ height
54
+ })
55
+
56
+ return transitionTexture
57
+ }
58
+ }
59
+ }
60
+
@@ -10,7 +10,8 @@ export class Checksum {
10
10
  ) {}
11
11
 
12
12
  static async make(data: Uint8Array) {
13
- const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data))
13
+ const data2 = new Uint8Array(data)
14
+ const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data2))
14
15
  const hash = Hex.fromBytes(bytes)
15
16
  const nickname = Thumbprint.sigil.fromBytes(bytes)
16
17
  return new this(data, bytes, hash, nickname)
@@ -0,0 +1,26 @@
1
+ import {pub, Pub} from "@e280/stz"
2
+ import {ProgressItem} from "../speech-recognition/whisper/parts/types.js"
3
+
4
+ export interface LoaderEvents {
5
+ onModelLoadProgress: Pub<ProgressItem[]>
6
+ onTpsUpdate: Pub<[number]>
7
+ }
8
+
9
+ export abstract class Loader {
10
+ tps = 0
11
+
12
+ static loaderEvents = {
13
+ onModelLoadProgress: pub<ProgressItem[]>(),
14
+ onTpsUpdate: pub<[number]>()
15
+ }
16
+
17
+ constructor(public readonly name: string, public model: string) {}
18
+
19
+ abstract init(): Promise<void>
20
+
21
+ abstract setModel(model: string): void
22
+
23
+ setTps(value: number) {
24
+ this.tps = value
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ //@ts-ignore
2
+ import {pipeline} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
3
+
4
+ import {ProgressCallback} from "../speech-recognition/whisper/parts/types.js"
5
+
6
+ export class PipelineFactory {
7
+ instance: any = null
8
+ model: string | null = null
9
+
10
+ constructor(public task: string) {}
11
+
12
+ async createInstance(model: string, progressCallback?: ProgressCallback) {
13
+ this.model = model
14
+ return this.instance = await pipeline(this.task, this.model, {
15
+ dtype: {
16
+ encoder_model:
17
+ this.model === "onnx-community/whisper-large-v3-turbo"
18
+ ? "fp16"
19
+ : "fp32",
20
+ decoder_model_merged: "q4",
21
+ },
22
+ device: "webgpu",
23
+ progress_callback: progressCallback,
24
+ })
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ import {pub} from "@e280/stz"
2
+
3
+ import {Loader} from "../../common/loader.js"
4
+ import {DecoderSource} from "../../../driver/fns/schematic.js"
5
+ import {SpeechRecognizerModels, Word, WordGroup} from "../whisper/parts/types.js"
6
+
7
+ export abstract class SpeechRecognizer extends Loader {
8
+ multilingual = true
9
+
10
+ static speechRecognizerEvents = {
11
+ onTranscriptionChunk: pub<Word[]>(),
12
+ onTranscribeProgress: pub<[number]>()
13
+ }
14
+
15
+ abstract transcribe(input: DecoderSource): Promise<WordGroup>
16
+
17
+ setMultilingual(value: boolean) {
18
+ this.multilingual = value
19
+ }
20
+
21
+ detectLanguage?(input: Blob | AudioBuffer): Promise<string>
22
+
23
+ setModel(value: SpeechRecognizerModels) {
24
+ this.model = value
25
+ }
26
+ }
@@ -0,0 +1,25 @@
1
+
2
+ import {Comrade} from "@e280/comrade"
3
+ import {ProgressItem} from "../parts/types.js"
4
+ import {SpeechRecognizerHostEvents, WhisperSchematic} from "./schematic.js"
5
+
6
+ export const setupWhisperHost = (events: SpeechRecognizerHostEvents) => (
7
+ Comrade.host<WhisperSchematic>(_shell => ({
8
+ async updateModelLoadProgress(item) {
9
+ events.onModelLoadProgress.pub(item)
10
+ },
11
+ async deliverTranscriptionChunk(chunk) {
12
+ events.onTranscriptionChunk.pub({
13
+ text: chunk.text,
14
+ timestamp: chunk.timestamp
15
+ })
16
+ },
17
+ async updateTps(value) {
18
+ events.onTpsUpdate.pub(value)
19
+ },
20
+ async updateTranscribeProgress(value) {
21
+ events.onTranscribeProgress(value)
22
+ }
23
+ }))
24
+ )
25
+
@@ -0,0 +1,23 @@
1
+ import {Pub} from "@e280/stz"
2
+ import {AsSchematic} from "@e280/comrade"
3
+
4
+ import {LoaderEvents} from "../../../common/loader.js"
5
+ import {ProgressItem, TranscriptionChunk, TranscriptionMessage, TranscriptionResult, Word} from "../parts/types.js"
6
+
7
+ export type WhisperSchematic = AsSchematic<{
8
+ work: {
9
+ transcribe(input: TranscriptionMessage): Promise<TranscriptionResult | null>
10
+ },
11
+
12
+ host: {
13
+ updateModelLoadProgress(item: ProgressItem): Promise<void>
14
+ deliverTranscriptionChunk(chunk: TranscriptionChunk): Promise<void>
15
+ updateTps(value: number): Promise<void>
16
+ updateTranscribeProgress(value: number): Promise<void>
17
+ }
18
+ }>
19
+
20
+ export interface SpeechRecognizerHostEvents extends LoaderEvents {
21
+ onTranscriptionChunk: Pub<Word[]>
22
+ onTranscribeProgress: Pub<[number]>
23
+ }
@@ -0,0 +1,91 @@
1
+ import {Comrade} from "@e280/comrade"
2
+ //@ts-ignore
3
+ import {pipeline, WhisperTextStreamer} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
4
+
5
+ import {WhisperSchematic} from "./schematic.js"
6
+ import {TranscriptionChunk} from "../parts/types.js"
7
+ import {PipelineFactory} from "../../../common/transformer-pipeline.js"
8
+
9
+ // TODO suspicious globals, probably bad
10
+ const pipeline = new PipelineFactory("automatic-speech-recognition")
11
+ let transcriber: any
12
+
13
+ export const setupWhisperWork = Comrade.work<WhisperSchematic>(shell => ({
14
+ async transcribe({audio, model, language, duration}) {
15
+ const isDistil = model.startsWith("distil-whisper/")
16
+
17
+ if(!pipeline.model || pipeline.model !== model) {
18
+ pipeline.instance?.dispose()?.()
19
+ pipeline.instance = null
20
+ transcriber = await pipeline.createInstance(
21
+ model,
22
+ (data) => {
23
+ if(data.progress)
24
+ shell.host.updateModelLoadProgress({
25
+ id: data.file,
26
+ progress: data.progress
27
+ })
28
+ }
29
+ )
30
+ }
31
+
32
+ const timePrecision =
33
+ transcriber.processor.feature_extractor.config.chunk_length /
34
+ transcriber.model.config.max_source_positions
35
+
36
+ const chunkLength = isDistil ? 20 : 30
37
+ const strideLength = isDistil ? 3 : 5
38
+
39
+ let chunkCount = 0
40
+ let startTime: number | null = null
41
+ let tokenCount = 0
42
+ let tps = 0
43
+
44
+ const chunkDuration = chunkLength - strideLength
45
+
46
+ const estimateProgress = () => {
47
+ const audioProgressSeconds = chunkCount * chunkDuration
48
+ return Math.min(audioProgressSeconds / duration, 1)
49
+ }
50
+
51
+ const streamer = new WhisperTextStreamer(transcriber.tokenizer, {
52
+ time_precision: timePrecision,
53
+ token_callback_function: () => {
54
+ startTime ??= performance.now()
55
+ if (++tokenCount > 1) {
56
+ tps = (tokenCount / (performance.now() - startTime)) * 1000
57
+ shell.host.updateTps(tps)
58
+ }
59
+ },
60
+ callback_function: (textChunk: any) => {
61
+ shell.host.deliverTranscriptionChunk(textChunk)
62
+ },
63
+ on_finalize: () => {
64
+ startTime = null
65
+ tokenCount = 0
66
+ chunkCount++
67
+ const progress = estimateProgress()
68
+ shell.host.updateTranscribeProgress(progress)
69
+ },
70
+ })
71
+
72
+ const output = await transcriber(audio, {
73
+ top_k: 0,
74
+ do_sample: false,
75
+ chunk_length_s: chunkLength,
76
+ stride_length_s: strideLength,
77
+ language,
78
+ task: "transcribe",
79
+ return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
80
+ force_full_sequences: false,
81
+ streamer,
82
+ })
83
+
84
+ if (!output) return null
85
+
86
+ return {
87
+ tps,
88
+ ...output,
89
+ }
90
+ }
91
+ }))
@@ -0,0 +1,38 @@
1
+ export interface ProgressItem {
2
+ id: string
3
+ progress: number
4
+ }
5
+
6
+ export type Word = {
7
+ text: string
8
+ timestamp: [start: number, end: number]
9
+ }
10
+
11
+ export type WordGroup = Word[]
12
+ export type Transcript = WordGroup[]
13
+
14
+ export interface TranscriptionChunk {
15
+ text: string
16
+ offset: number
17
+ timestamp: [number, number]
18
+ finalised: boolean
19
+ }
20
+
21
+ export interface TranscriptionMessage {
22
+ audio: Float32Array
23
+ model: string
24
+ subtask: string | null
25
+ language: string | null
26
+ duration: number
27
+ }
28
+
29
+ export interface TranscriptionResult {
30
+ text: string
31
+ chunks: TranscriptionChunk[]
32
+ tps: number
33
+ }
34
+
35
+ export type ProgressCallback = (data: any) => void
36
+
37
+ export type SpeechRecognizerModels = "onnx-community/whisper-tiny_timestamped"
38
+ export type SpeechRecognizerSubtasks = "transcribe"
@@ -0,0 +1,7 @@
1
+ import {Comrade} from "@e280/comrade"
2
+
3
+ import {setupWhisperWork} from "../fns/work.js"
4
+ import {WhisperSchematic} from "../fns/schematic.js"
5
+
6
+ await Comrade.worker<WhisperSchematic>(setupWhisperWork)
7
+