@omnimedia/omnitool 1.1.0-3 → 1.1.0-5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/package.json +11 -9
  2. package/s/context.ts +1 -1
  3. package/s/demo/demo.bundle.ts +6 -2
  4. package/s/demo/routines/filmstrip-test.ts +2 -2
  5. package/s/demo/routines/transcriber-test.ts +34 -0
  6. package/s/demo/routines/transitions-test.ts +43 -0
  7. package/s/driver/fns/host.ts +7 -6
  8. package/s/driver/fns/schematic.ts +1 -1
  9. package/s/driver/fns/work.ts +116 -119
  10. package/s/driver/utils/load-decoder-source.ts +3 -4
  11. package/s/features/speech/transcribe/default-spec.ts +11 -0
  12. package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
  13. package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
  14. package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
  15. package/s/features/speech/transcribe/transcriber.ts +46 -0
  16. package/s/features/speech/transcribe/types.ts +82 -0
  17. package/s/features/speech/transcribe/worker.bundle.ts +40 -0
  18. package/s/features/transition/parts/fragment.ts +24 -0
  19. package/s/features/transition/parts/types.ts +94 -0
  20. package/s/features/transition/parts/uniforms.ts +29 -0
  21. package/s/features/transition/parts/vertex.ts +31 -0
  22. package/s/features/transition/transition.ts +60 -0
  23. package/s/timeline/utils/checksum.ts +2 -1
  24. package/s/tools/common/loader.ts +26 -0
  25. package/s/tools/common/transformer-pipeline.ts +26 -0
  26. package/s/tools/speech-recognition/common/model.ts +26 -0
  27. package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
  28. package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
  29. package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
  30. package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
  31. package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
  32. package/s/tools/speech-recognition/whisper/tool.ts +70 -0
  33. package/x/context.js +1 -1
  34. package/x/context.js.map +1 -1
  35. package/x/demo/demo.bundle.js +6 -2
  36. package/x/demo/demo.bundle.js.map +1 -1
  37. package/x/demo/demo.bundle.min.js +6 -6
  38. package/x/demo/demo.bundle.min.js.map +4 -4
  39. package/x/demo/routines/filmstrip-test.d.ts +1 -1
  40. package/x/demo/routines/filmstrip-test.js +2 -2
  41. package/x/demo/routines/filmstrip-test.js.map +1 -1
  42. package/x/demo/routines/transcriber-test.d.ts +4 -0
  43. package/x/demo/routines/transcriber-test.js +33 -0
  44. package/x/demo/routines/transcriber-test.js.map +1 -0
  45. package/x/demo/routines/transitions-test.d.ts +5 -0
  46. package/x/demo/routines/transitions-test.js +35 -0
  47. package/x/demo/routines/transitions-test.js.map +1 -0
  48. package/x/driver/driver.worker.bundle.min.js +80 -80
  49. package/x/driver/driver.worker.bundle.min.js.map +4 -4
  50. package/x/driver/fns/host.js +3 -3
  51. package/x/driver/fns/host.js.map +1 -1
  52. package/x/driver/fns/schematic.d.ts +1 -1
  53. package/x/driver/fns/work.js +8 -8
  54. package/x/driver/fns/work.js.map +1 -1
  55. package/x/driver/utils/load-decoder-source.d.ts +2 -1
  56. package/x/driver/utils/load-decoder-source.js +2 -3
  57. package/x/driver/utils/load-decoder-source.js.map +1 -1
  58. package/x/features/speech/transcribe/default-spec.d.ts +2 -0
  59. package/x/features/speech/transcribe/default-spec.js +8 -0
  60. package/x/features/speech/transcribe/default-spec.js.map +1 -0
  61. package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
  62. package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
  63. package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
  64. package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
  65. package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
  66. package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
  67. package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
  68. package/x/features/speech/transcribe/parts/transcribe.js +56 -0
  69. package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
  70. package/x/features/speech/transcribe/transcriber.d.ts +5 -0
  71. package/x/features/speech/transcribe/transcriber.js +33 -0
  72. package/x/features/speech/transcribe/transcriber.js.map +1 -0
  73. package/x/features/speech/transcribe/types.d.ts +66 -0
  74. package/x/features/speech/transcribe/types.js +2 -0
  75. package/x/features/speech/transcribe/types.js.map +1 -0
  76. package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
  77. package/x/features/speech/transcribe/worker.bundle.js +33 -0
  78. package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
  79. package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
  80. package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
  81. package/x/features/transition/parts/fragment.d.ts +1 -0
  82. package/x/features/transition/parts/fragment.js +25 -0
  83. package/x/features/transition/parts/fragment.js.map +1 -0
  84. package/x/features/transition/parts/types.d.ts +23 -0
  85. package/x/features/transition/parts/types.js +2 -0
  86. package/x/features/transition/parts/types.js.map +1 -0
  87. package/x/features/transition/parts/uniforms.d.ts +31 -0
  88. package/x/features/transition/parts/uniforms.js +27 -0
  89. package/x/features/transition/parts/uniforms.js.map +1 -0
  90. package/x/features/transition/parts/vertex.d.ts +1 -0
  91. package/x/features/transition/parts/vertex.js +32 -0
  92. package/x/features/transition/parts/vertex.js.map +1 -0
  93. package/x/features/transition/transition.d.ts +5 -0
  94. package/x/features/transition/transition.js +50 -0
  95. package/x/features/transition/transition.js.map +1 -0
  96. package/x/index.html +2 -2
  97. package/x/timeline/utils/checksum.js +2 -1
  98. package/x/timeline/utils/checksum.js.map +1 -1
  99. package/x/tools/common/loader.d.ts +19 -0
  100. package/x/tools/common/loader.js +18 -0
  101. package/x/tools/common/loader.js.map +1 -0
  102. package/x/tools/common/transformer-pipeline.d.ts +8 -0
  103. package/x/tools/common/transformer-pipeline.js +24 -0
  104. package/x/tools/common/transformer-pipeline.js.map +1 -0
  105. package/x/tools/speech-recognition/common/model.d.ts +14 -0
  106. package/x/tools/speech-recognition/common/model.js +16 -0
  107. package/x/tools/speech-recognition/common/model.js.map +1 -0
  108. package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
  109. package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
  110. package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
  111. package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
  112. package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
  113. package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
  114. package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
  115. package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
  116. package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
  117. package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
  118. package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
  119. package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
  120. package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
  121. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
  122. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
  123. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
  124. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
  125. package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
  126. package/x/tools/speech-recognition/whisper/tool.js +63 -0
  127. package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@omnimedia/omnitool",
3
- "version": "1.1.0-3",
3
+ "version": "1.1.0-5",
4
4
  "description": "open source video processing tools",
5
5
  "license": "MIT",
6
6
  "author": "Przemysław Gałęzki",
@@ -23,20 +23,22 @@
23
23
  "test-debug": "node inspect x/tests.test.js"
24
24
  },
25
25
  "devDependencies": {
26
- "@e280/science": "^0.0.5",
27
- "@e280/scute": "^0.0.0-6",
28
- "@types/node": "^24.0.14",
26
+ "@e280/science": "^0.0.6",
27
+ "@e280/scute": "^0.0.0-7",
28
+ "@types/node": "^24.2.0",
29
29
  "http-server": "^14.1.1",
30
30
  "npm-run-all": "^4.1.5",
31
- "typescript": "^5.8.3"
31
+ "typescript": "^5.9.2"
32
32
  },
33
33
  "dependencies": {
34
34
  "@benev/slate": "^0.3.9",
35
- "@e280/comrade": "^0.0.0-18",
36
- "@e280/renraku": "^0.5.0-19",
37
- "@e280/stz": "^0.0.0-22",
35
+ "@e280/comrade": "^0.0.0-23",
36
+ "@e280/renraku": "^0.5.0-29",
37
+ "@e280/stz": "^0.0.0-34",
38
+ "@huggingface/transformers": "^3.7.1",
38
39
  "comrade": "^0.0.3",
39
- "mediabunny": "^1.1.1",
40
+ "gl-transitions": "^1.43.0",
41
+ "mediabunny": "^1.4.4",
40
42
  "mp4-muxer": "^5.2.1",
41
43
  "pixi.js": "^8.10.1",
42
44
  "wavesurfer.js": "^7.10.0",
package/s/context.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import {Driver} from "./driver/driver.js"
2
2
 
3
- const workerUrl = new URL("../driver/driver.worker.bundle.js", import.meta.url)
3
+ const workerUrl = new URL("../driver/driver.worker.bundle.min.js", import.meta.url)
4
4
 
5
5
  export const context = {
6
6
  driver: Driver.setup({workerUrl})
@@ -2,6 +2,7 @@
2
2
  import {context} from "../context.js"
3
3
  import {waveformTest} from "./routines/waveform-test.js"
4
4
  import {filmstripTest} from "./routines/filmstrip-test.js"
5
+ import {transcriberTest} from "./routines/transcriber-test.js"
5
6
  import {setupTranscodeTest} from "./routines/transcode-test.js"
6
7
 
7
8
  const driver = await context.driver
@@ -14,6 +15,7 @@ fetchButton?.addEventListener("click", startDemoFetch)
14
15
  importButton?.addEventListener("click", startDemoImport)
15
16
 
16
17
  waveformTest()
18
+ const transcriber = await transcriberTest(driver)
17
19
 
18
20
  // hello world test
19
21
  {
@@ -26,9 +28,11 @@ waveformTest()
26
28
  async function startDemoImport()
27
29
  {
28
30
  const [fileHandle] = await window.showOpenFilePicker()
29
- const transcode = setupTranscodeTest(driver, fileHandle)
30
- await filmstripTest(fileHandle)
31
+ const file = await fileHandle.getFile()
32
+ const transcode = setupTranscodeTest(driver, file)
33
+ await filmstripTest(file)
31
34
  run(transcode, fileHandle.name)
35
+ await transcriber.transcribe(file)
32
36
  }
33
37
 
34
38
  async function startDemoFetch()
@@ -1,6 +1,6 @@
1
1
  import {Filmstrip} from "../../timeline/parts/filmstrip.js"
2
2
 
3
- export async function filmstripTest(fileHandle: FileSystemFileHandle) {
3
+ export async function filmstripTest(file: File) {
4
4
  const rangeSlider = document.querySelector(".range") as HTMLInputElement
5
5
  const rangeView = document.querySelector(".range-view")!
6
6
  const rangeSizeSlider = document.querySelector(".range-size")! as HTMLInputElement
@@ -10,7 +10,7 @@ export async function filmstripTest(fileHandle: FileSystemFileHandle) {
10
10
  const FPS_10 = 1000/10 / 1000
11
11
  let rangeSize = 0.5
12
12
  const filmstrip = await Filmstrip.init(
13
- fileHandle,
13
+ file,
14
14
  {
15
15
  onChange(tiles) {
16
16
  // Sort by time (optional, for clean ordering)
@@ -0,0 +1,34 @@
1
+ import {Driver} from "../../driver/driver.js"
2
+ import {makeTranscriber} from "../../features/speech/transcribe/transcriber.js"
3
+
4
+ export async function transcriberTest(driver: Driver) {
5
+ const transcriber = await makeTranscriber({
6
+ driver,
7
+ spec: {
8
+ model: "onnx-community/whisper-tiny_timestamped",
9
+ device: "webgpu",
10
+ strideLength: 5,
11
+ chunkLength: 30,
12
+ dtype: "fp32"
13
+ },
14
+ workerUrl: new URL("/features/speech/transcribe/worker.bundle.min.js", import.meta.url),
15
+ onLoading({progress, total}) {
16
+ console.log(progress, total, "total")
17
+ },
18
+ })
19
+ return {
20
+ transcribe: async (file: File) => {
21
+ const result = await transcriber.transcribe({
22
+ source: file,
23
+ language: "english",
24
+ onReport(report) {
25
+ console.log("report", report)
26
+ },
27
+ onTranscription(transcription) {
28
+ console.log("transcript", transcription)
29
+ }
30
+ })
31
+ console.log(result, "transcript result")
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,43 @@
1
+ import {Application, Sprite} from "pixi.js"
2
+
3
+ import {Driver} from "../../driver/driver.js"
4
+ import {DecoderSource} from "../../driver/fns/schematic.js"
5
+ import {makeTransition} from "../../features/transition/transition.js"
6
+
7
+ export async function setupTransitionsTest(driver: Driver, source: DecoderSource) {
8
+ const app = new Application()
9
+ await app.init({width: 300, height: 300, preference: "webgl"})
10
+ const sprite = new Sprite({width: 300, height: 300})
11
+
12
+ app.stage.addChild(sprite)
13
+
14
+ document.body.appendChild(app.canvas)
15
+ const transition = makeTransition({name: "circle", renderer: app.renderer})
16
+
17
+ async function run() {
18
+ const readables = driver.decode({
19
+ source,
20
+ async onFrame(frame) {
21
+ const texture = transition.render({
22
+ from: frame,
23
+ to: frame,
24
+ progress: 0.7,
25
+ width: app.canvas.width,
26
+ height: app.canvas.height
27
+ })
28
+ sprite.texture = texture
29
+ return frame
30
+ }
31
+ })
32
+
33
+ await driver.encode({
34
+ readables,
35
+ config: {
36
+ audio: {codec: "opus", bitrate: 128000},
37
+ video: {codec: "vp9", bitrate: 1000000}
38
+ }
39
+ })
40
+ }
41
+
42
+ return {run}
43
+ }
@@ -3,10 +3,11 @@ import {Comrade} from "@e280/comrade"
3
3
  import {Machina} from "../parts/machina.js"
4
4
  import {DriverSchematic} from "./schematic.js"
5
5
 
6
- export const setupDriverHost = (machina: Machina) => Comrade.host<DriverSchematic>(({work}, rig) => ({
7
-
8
- async world() {
9
- machina.count++
10
- }
11
- }))
6
+ export const setupDriverHost = (machina: Machina) => (
7
+ Comrade.host<DriverSchematic>(_shell => ({
8
+ async world() {
9
+ machina.count++
10
+ },
11
+ }))
12
+ )
12
13
 
@@ -36,7 +36,7 @@ export interface EncoderInput {
36
36
  }
37
37
  }
38
38
 
39
- export type DecoderSource = FileSystemFileHandle | string
39
+ export type DecoderSource = Blob | string | URL
40
40
 
41
41
  export interface DecoderInput {
42
42
  source: DecoderSource
@@ -1,142 +1,139 @@
1
1
  import {Comrade} from "@e280/comrade"
2
- import {
3
- Input, ALL_FORMATS, VideoSampleSink, Output, Mp4OutputFormat, VideoSampleSource, VideoSample,
4
- AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource
5
- } from "mediabunny"
6
2
  import {autoDetectRenderer, Container, Renderer, Sprite, Text, Texture, DOMAdapter, WebWorkerAdapter} from "pixi.js"
3
+ import {Input, ALL_FORMATS, VideoSampleSink, Output, Mp4OutputFormat, VideoSampleSource, VideoSample, AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource} from "mediabunny"
7
4
 
8
5
  import {Composition, DriverSchematic, Layer, Transform} from "./schematic.js"
9
6
 
10
7
  DOMAdapter.set(WebWorkerAdapter)
11
8
 
12
- export const setupDriverWork = Comrade.work<DriverSchematic>(({host}, rig) => ({
13
-
14
- async hello() {
15
- await host.world()
16
- },
17
-
18
- async decode({source, video, audio}) {
19
- const loadSource = async () => {
20
- if(source instanceof FileSystemFileHandle) {
21
- const file = await source.getFile()
22
- return new BlobSource(file)
23
- } else {
24
- return new UrlSource(source)
9
+ export const setupDriverWork = (
10
+ Comrade.work<DriverSchematic>(shell => ({
11
+ async hello() {
12
+ await shell.host.world()
13
+ },
14
+
15
+ async decode({source, video, audio}) {
16
+ const loadSource = async () => {
17
+ if(source instanceof Blob) {
18
+ return new BlobSource(source)
19
+ } else {
20
+ return new UrlSource(source)
21
+ }
25
22
  }
26
- }
27
- const input = new Input({
28
- source: await loadSource(),
29
- formats: ALL_FORMATS
30
- })
31
-
32
- const [videoTrack, audioTrack] = await Promise.all([
33
- input.getPrimaryVideoTrack(),
34
- input.getPrimaryAudioTrack()
35
- ])
36
-
37
- const videoDecodable = await videoTrack?.canDecode()
38
- const audioDecodable = await audioTrack?.canDecode()
39
-
40
- const videoWriter = video.getWriter()
41
- const audioWriter = audio.getWriter()
42
-
43
- await Promise.all([
44
- (async () => {
45
- if (videoDecodable && videoTrack) {
46
- const sink = new VideoSampleSink(videoTrack)
47
- for await (const sample of sink.samples()) {
48
- const frame = sample.toVideoFrame()
49
- await videoWriter.write(frame)
23
+ const input = new Input({
24
+ source: await loadSource(),
25
+ formats: ALL_FORMATS
26
+ })
27
+
28
+ const [videoTrack, audioTrack] = await Promise.all([
29
+ input.getPrimaryVideoTrack(),
30
+ input.getPrimaryAudioTrack()
31
+ ])
32
+
33
+ const videoDecodable = await videoTrack?.canDecode()
34
+ const audioDecodable = await audioTrack?.canDecode()
35
+
36
+ const videoWriter = video.getWriter()
37
+ const audioWriter = audio.getWriter()
38
+
39
+ await Promise.all([
40
+ (async () => {
41
+ if (videoDecodable && videoTrack) {
42
+ const sink = new VideoSampleSink(videoTrack)
43
+ for await (const sample of sink.samples()) {
44
+ const frame = sample.toVideoFrame()
45
+ await videoWriter.write(frame)
46
+ sample.close()
47
+ frame.close()
48
+ }
49
+ await videoWriter.close()
50
+ }
51
+ })(),
52
+ (async () => {
53
+ if (audioDecodable && audioTrack) {
54
+ const sink = new AudioSampleSink(audioTrack)
55
+ for await (const sample of sink.samples()) {
56
+ const frame = sample.toAudioData()
57
+ await audioWriter.write(frame)
58
+ sample.close()
59
+ frame.close()
60
+ }
61
+ await audioWriter.close()
62
+ }
63
+ })()
64
+ ])
65
+ },
66
+
67
+ async encode({readables, config, bridge}) {
68
+ const output = new Output({
69
+ format: new Mp4OutputFormat(),
70
+ target: new StreamTarget(bridge, {chunked: true})
71
+ })
72
+ const videoSource = new VideoSampleSource(config.video)
73
+ output.addVideoTrack(videoSource)
74
+ // since AudioSample is not transferable it fails to transfer encoder bitrate config
75
+ // so it needs to be hardcoded not set through constants eg QUALITY_LOW
76
+ const audioSource = new AudioSampleSource(config.audio)
77
+ output.addAudioTrack(audioSource)
78
+
79
+ await output.start()
80
+
81
+ const videoReader = readables.video.getReader()
82
+ const audioReader = readables.audio.getReader()
83
+
84
+ await Promise.all([
85
+ (async () => {
86
+ while (true) {
87
+ const {done, value} = await videoReader.read()
88
+ if (done) break
89
+ const sample = new VideoSample(value)
90
+ await videoSource.add(sample)
50
91
  sample.close()
51
- frame.close()
52
92
  }
53
- await videoWriter.close()
54
- }
55
- })(),
56
- (async () => {
57
- if (audioDecodable && audioTrack) {
58
- const sink = new AudioSampleSink(audioTrack)
59
- for await (const sample of sink.samples()) {
60
- const frame = sample.toAudioData()
61
- await audioWriter.write(frame)
93
+ })(),
94
+ (async () => {
95
+ while (true) {
96
+ const {done, value} = await audioReader.read()
97
+ if (done) break
98
+ const sample = new AudioSample(value)
99
+ await audioSource.add(sample)
62
100
  sample.close()
63
- frame.close()
101
+ value.close()
64
102
  }
65
- await audioWriter.close()
66
- }
67
- })()
68
- ])
69
- },
70
-
71
- async encode({readables, config, bridge}) {
72
- const output = new Output({
73
- format: new Mp4OutputFormat(),
74
- target: new StreamTarget(bridge, {chunked: true})
75
- })
76
- const videoSource = new VideoSampleSource(config.video)
77
- output.addVideoTrack(videoSource)
78
- // since AudioSample is not transferable it fails to transfer encoder bitrate config
79
- // so it needs to be hardcoded not set through constants eg QUALITY_LOW
80
- const audioSource = new AudioSampleSource(config.audio)
81
- output.addAudioTrack(audioSource)
82
-
83
- await output.start()
84
-
85
- const videoReader = readables.video.getReader()
86
- const audioReader = readables.audio.getReader()
87
-
88
- await Promise.all([
89
- (async () => {
90
- while (true) {
91
- const {done, value} = await videoReader.read()
92
- if (done) break
93
- const sample = new VideoSample(value)
94
- await videoSource.add(sample)
95
- sample.close()
96
- }
97
- })(),
98
- (async () => {
99
- while (true) {
100
- const {done, value} = await audioReader.read()
101
- if (done) break
102
- const sample = new AudioSample(value)
103
- await audioSource.add(sample)
104
- sample.close()
105
- value.close()
106
- }
107
- })()
108
- ])
103
+ })()
104
+ ])
109
105
 
110
- await output.finalize()
111
- },
106
+ await output.finalize()
107
+ },
112
108
 
113
- async composite(composition) {
114
- const {stage, renderer} = await renderPIXI(1920, 1080)
115
- stage.removeChildren()
109
+ async composite(composition) {
110
+ const {stage, renderer} = await renderPIXI(1920, 1080)
111
+ stage.removeChildren()
116
112
 
117
- const {baseFrame, disposables} = await renderLayer(composition, stage)
118
- renderer.render(stage)
113
+ const {baseFrame, disposables} = await renderLayer(composition, stage)
114
+ renderer.render(stage)
119
115
 
120
- // make sure browser support webgl/webgpu otherwise it might take much longer to construct frame
121
- // if its very slow on eg edge try chrome
122
- const frame = new VideoFrame(renderer.canvas, {
123
- timestamp: baseFrame?.timestamp,
124
- duration: baseFrame?.duration ?? undefined,
125
- })
116
+ // make sure browser support webgl/webgpu otherwise it might take much longer to construct frame
117
+ // if its very slow on eg edge try chrome
118
+ const frame = new VideoFrame(renderer.canvas, {
119
+ timestamp: baseFrame?.timestamp,
120
+ duration: baseFrame?.duration ?? undefined,
121
+ })
126
122
 
127
- baseFrame?.close()
128
- renderer.clear()
129
-
130
- for (const disposable of disposables) {
131
- disposable.destroy(true)
132
- }
123
+ baseFrame?.close()
124
+ renderer.clear()
133
125
 
134
- rig.transfer = [frame]
135
- return frame
136
- }
137
- }))
126
+ for (const disposable of disposables) {
127
+ disposable.destroy(true)
128
+ }
138
129
 
130
+ shell.transfer = [frame]
131
+ return frame
132
+ }
133
+ }))
134
+ )
139
135
 
136
+ // TODO suspicious global, probably bad
140
137
  let pixi: {
141
138
  renderer: Renderer
142
139
  stage: Container
@@ -2,10 +2,9 @@ import {BlobSource, UrlSource} from "mediabunny"
2
2
  import {DecoderSource} from "../fns/schematic.js"
3
3
 
4
4
  // only streamable sources
5
- export async function loadDecoderSource(source: DecoderSource) {
6
- if(source instanceof FileSystemFileHandle) {
7
- const file = await source.getFile()
8
- return new BlobSource(file)
5
+ export async function loadDecoderSource(source: DecoderSource): Promise<UrlSource | BlobSource> {
6
+ if(source instanceof Blob) {
7
+ return new BlobSource(source)
9
8
  } else {
10
9
  return new UrlSource(source)
11
10
  }
@@ -0,0 +1,11 @@
1
+
2
+ import {TranscriberSpec} from "./types.js"
3
+
4
+ export const defaultTranscriberSpec = (): TranscriberSpec => ({
5
+ model: "onnx-community/whisper-tiny_timestamped",
6
+ dtype: "q4",
7
+ device: "wasm",
8
+ chunkLength: 20,
9
+ strideLength: 3,
10
+ })
11
+
@@ -0,0 +1,19 @@
1
+
2
+ import {pipeline} from "@huggingface/transformers"
3
+
4
+ import {TranscriberPipeOptions} from "../types.js"
5
+
6
+ export async function loadPipe(options: TranscriberPipeOptions) {
7
+ const {spec, onLoading} = options
8
+
9
+ const pipe = await pipeline("automatic-speech-recognition", spec.model, {
10
+ device: spec.device,
11
+ dtype: spec.dtype,
12
+ progress_callback: (data: any) => {
13
+ onLoading({total: data.total, progress: data.progress})
14
+ },
15
+ })
16
+
17
+ return pipe
18
+ }
19
+
@@ -0,0 +1,23 @@
1
+
2
+ import {Driver} from "../../../../driver/driver.js"
3
+
4
+ export async function prepAudio(driver: Driver, source: Blob) {
5
+ const arrayBuffer = await source.arrayBuffer()
6
+ const audioCTX = new AudioContext({sampleRate: 16000})
7
+ const audioData = await audioCTX.decodeAudioData(arrayBuffer)
8
+ let audio: Float32Array
9
+ if (audioData.numberOfChannels === 2) {
10
+ const SCALING_FACTOR = Math.sqrt(2)
11
+ const left = audioData.getChannelData(0)
12
+ const right = audioData.getChannelData(1)
13
+ audio = new Float32Array(left.length)
14
+ for (let i = 0; i < audioData.length; ++i) {
15
+ audio[i] = (SCALING_FACTOR * (left[i] + right[i])) / 2
16
+ }
17
+ } else {
18
+ audio = audioData.getChannelData(0)
19
+ }
20
+ const duration = await driver.getAudioDuration(source)
21
+ return {audio, duration}
22
+ }
23
+
@@ -0,0 +1,70 @@
1
+
2
+ import {WhisperTextStreamer} from "@huggingface/transformers"
3
+ import {TranscribeOptions} from "../types.js"
4
+
5
+ export async function transcribe(options: TranscribeOptions) {
6
+ const {pipe, spec, request, callbacks} = options
7
+
8
+ if (!pipe.processor.feature_extractor)
9
+ throw new Error("no feature_extractor")
10
+
11
+ const timePrecision = (
12
+ pipe.processor.feature_extractor?.config.chunk_length /
13
+ // @ts-ignore
14
+ pipe.model.config.max_source_positions
15
+ )
16
+
17
+ let chunkCount = 0
18
+ let startTime: number | null = null
19
+ let tokenCount = 0
20
+ let tokensPerSecond = 0
21
+
22
+ const chunkDuration = spec.chunkLength - spec.strideLength
23
+
24
+ const calculateProgress = () => {
25
+ const audioProgressSeconds = chunkCount * chunkDuration
26
+ return Math.min(audioProgressSeconds / request.duration, 1)
27
+ }
28
+
29
+ // TODO type error on pipe.tokenizer
30
+ const tokenizer = pipe.tokenizer as any
31
+
32
+ const streamer = new WhisperTextStreamer(tokenizer, {
33
+ time_precision: timePrecision,
34
+ token_callback_function: () => {
35
+ startTime ??= performance.now()
36
+ if (++tokenCount > 1) {
37
+ tokensPerSecond = (tokenCount / (performance.now() - startTime)) * 1000
38
+ }
39
+ },
40
+ callback_function: (textChunk: any) => {
41
+ // TODO
42
+ callbacks.onTranscription(textChunk)
43
+ callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
44
+ },
45
+ on_finalize: () => {
46
+ startTime = null
47
+ tokenCount = 0
48
+ chunkCount++
49
+ callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
50
+ },
51
+ })
52
+
53
+ const result = await pipe(new Float32Array(request.audio), {
54
+ top_k: 0,
55
+ do_sample: false,
56
+ chunk_length_s: spec.chunkLength,
57
+ stride_length_s: spec.strideLength,
58
+ language: request.language,
59
+ task: "transcribe",
60
+ return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
61
+ force_full_sequences: false,
62
+ streamer,
63
+ })
64
+
65
+ return {
66
+ text: result.text,
67
+ chunks: result.chunks
68
+ }
69
+ }
70
+
@@ -0,0 +1,46 @@
1
+
2
+ import {Comrade} from "@e280/comrade"
3
+ import {coalesce, queue, sub} from "@e280/stz"
4
+
5
+ import {prepAudio} from "./parts/prep-audio.js"
6
+ import {TranscriberOptions, TranscriberSchematic, TranscriptionOptions, TranscriptionReport} from "./types.js"
7
+
8
+ export async function makeTranscriber({driver, spec, workerUrl, onLoading}: TranscriberOptions) {
9
+ const onReport = sub<[report: TranscriptionReport]>()
10
+ const onTranscription = sub<[transcription: string]>()
11
+
12
+ const thread = await Comrade.thread<TranscriberSchematic>({
13
+ label: "OmnitoolSpeechTranscriber",
14
+ workerUrl,
15
+ setupHost: () => ({
16
+ loading: async loading => onLoading(loading),
17
+ deliverReport: async report => onReport.pub(report),
18
+ deliverTranscription: async transcription => onTranscription.pub(transcription),
19
+ }),
20
+ })
21
+
22
+ await thread.work.prepare(spec)
23
+
24
+ return {
25
+ transcribe: queue(async(info: TranscriptionOptions) => {
26
+ const {source, language} = info
27
+ const {audio, duration} = await prepAudio(driver, source)
28
+
29
+ const detachCallbacks = coalesce(
30
+ onReport(info.onReport),
31
+ onTranscription(info.onTranscription),
32
+ )
33
+
34
+ const result = await thread.work.transcribe({
35
+ duration,
36
+ language,
37
+ audio: audio.buffer,
38
+ })
39
+
40
+ detachCallbacks()
41
+ return result
42
+ }),
43
+ dispose: thread.terminate()
44
+ }
45
+ }
46
+