@omnimedia/omnitool 1.1.0-1 → 1.1.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.md +1 -1
  2. package/package.json +12 -9
  3. package/s/context.ts +1 -1
  4. package/s/demo/demo.bundle.ts +6 -2
  5. package/s/demo/demo.css +5 -0
  6. package/s/demo/routines/filmstrip-test.ts +2 -2
  7. package/s/demo/routines/transcode-test.ts +4 -2
  8. package/s/demo/routines/transcriber-test.ts +34 -0
  9. package/s/demo/routines/transitions-test.ts +43 -0
  10. package/s/driver/driver.ts +17 -9
  11. package/s/driver/fns/host.ts +7 -6
  12. package/s/driver/fns/schematic.ts +45 -22
  13. package/s/driver/fns/work.ts +163 -151
  14. package/s/driver/utils/load-decoder-source.ts +3 -4
  15. package/s/features/speech/transcribe/default-spec.ts +11 -0
  16. package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
  17. package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
  18. package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
  19. package/s/features/speech/transcribe/transcriber.ts +46 -0
  20. package/s/features/speech/transcribe/types.ts +82 -0
  21. package/s/features/speech/transcribe/worker.bundle.ts +40 -0
  22. package/s/features/transition/parts/fragment.ts +24 -0
  23. package/s/features/transition/parts/types.ts +94 -0
  24. package/s/features/transition/parts/uniforms.ts +29 -0
  25. package/s/features/transition/parts/vertex.ts +31 -0
  26. package/s/features/transition/transition.ts +60 -0
  27. package/s/index.html.ts +6 -1
  28. package/s/timeline/index.ts +1 -0
  29. package/s/timeline/parts/basics.ts +1 -1
  30. package/s/timeline/parts/compositor/export.ts +77 -0
  31. package/s/timeline/parts/compositor/parts/html-tree.ts +37 -0
  32. package/s/timeline/parts/compositor/parts/schedulers.ts +85 -0
  33. package/s/timeline/parts/compositor/parts/tree-builder.ts +184 -0
  34. package/s/timeline/parts/compositor/parts/webcodecs-tree.ts +30 -0
  35. package/s/timeline/parts/compositor/playback.ts +81 -0
  36. package/s/timeline/parts/compositor/samplers/html.ts +115 -0
  37. package/s/timeline/parts/compositor/samplers/webcodecs.ts +60 -0
  38. package/s/timeline/parts/item.ts +38 -6
  39. package/s/timeline/parts/media.ts +21 -0
  40. package/s/timeline/parts/waveform.ts +1 -1
  41. package/s/timeline/sugar/builders.ts +102 -0
  42. package/s/timeline/sugar/o.ts +117 -27
  43. package/s/timeline/sugar/omni-test.ts +2 -2
  44. package/s/timeline/sugar/omni.ts +21 -11
  45. package/s/timeline/types.ts +29 -0
  46. package/s/timeline/utils/audio-stream.ts +15 -0
  47. package/s/timeline/utils/checksum.ts +2 -1
  48. package/s/timeline/utils/matrix.ts +33 -0
  49. package/s/timeline/utils/video-cursor.ts +40 -0
  50. package/s/tools/common/loader.ts +26 -0
  51. package/s/tools/common/transformer-pipeline.ts +26 -0
  52. package/s/tools/speech-recognition/common/model.ts +26 -0
  53. package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
  54. package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
  55. package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
  56. package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
  57. package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
  58. package/s/tools/speech-recognition/whisper/tool.ts +70 -0
  59. package/x/context.js +1 -1
  60. package/x/context.js.map +1 -1
  61. package/x/demo/demo.bundle.js +6 -2
  62. package/x/demo/demo.bundle.js.map +1 -1
  63. package/x/demo/demo.bundle.min.js +39 -37
  64. package/x/demo/demo.bundle.min.js.map +4 -4
  65. package/x/demo/demo.css +5 -0
  66. package/x/demo/routines/filmstrip-test.d.ts +1 -1
  67. package/x/demo/routines/filmstrip-test.js +2 -2
  68. package/x/demo/routines/filmstrip-test.js.map +1 -1
  69. package/x/demo/routines/transcode-test.js +4 -2
  70. package/x/demo/routines/transcode-test.js.map +1 -1
  71. package/x/demo/routines/transcriber-test.d.ts +4 -0
  72. package/x/demo/routines/transcriber-test.js +33 -0
  73. package/x/demo/routines/transcriber-test.js.map +1 -0
  74. package/x/demo/routines/transitions-test.d.ts +5 -0
  75. package/x/demo/routines/transitions-test.js +35 -0
  76. package/x/demo/routines/transitions-test.js.map +1 -0
  77. package/x/driver/driver.d.ts +3 -5
  78. package/x/driver/driver.js +16 -9
  79. package/x/driver/driver.js.map +1 -1
  80. package/x/driver/driver.worker.bundle.min.js +2537 -148
  81. package/x/driver/driver.worker.bundle.min.js.map +4 -4
  82. package/x/driver/fns/host.d.ts +9 -2
  83. package/x/driver/fns/host.js +3 -3
  84. package/x/driver/fns/host.js.map +1 -1
  85. package/x/driver/fns/schematic.d.ts +39 -21
  86. package/x/driver/fns/work.d.ts +11 -4
  87. package/x/driver/fns/work.js +111 -102
  88. package/x/driver/fns/work.js.map +1 -1
  89. package/x/driver/utils/load-decoder-source.d.ts +2 -1
  90. package/x/driver/utils/load-decoder-source.js +2 -3
  91. package/x/driver/utils/load-decoder-source.js.map +1 -1
  92. package/x/features/speech/transcribe/default-spec.d.ts +2 -0
  93. package/x/features/speech/transcribe/default-spec.js +8 -0
  94. package/x/features/speech/transcribe/default-spec.js.map +1 -0
  95. package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
  96. package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
  97. package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
  98. package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
  99. package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
  100. package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
  101. package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
  102. package/x/features/speech/transcribe/parts/transcribe.js +56 -0
  103. package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
  104. package/x/features/speech/transcribe/transcriber.d.ts +5 -0
  105. package/x/features/speech/transcribe/transcriber.js +33 -0
  106. package/x/features/speech/transcribe/transcriber.js.map +1 -0
  107. package/x/features/speech/transcribe/types.d.ts +66 -0
  108. package/x/features/speech/transcribe/types.js +2 -0
  109. package/x/features/speech/transcribe/types.js.map +1 -0
  110. package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
  111. package/x/features/speech/transcribe/worker.bundle.js +33 -0
  112. package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
  113. package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
  114. package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
  115. package/x/features/transition/parts/fragment.d.ts +1 -0
  116. package/x/features/transition/parts/fragment.js +25 -0
  117. package/x/features/transition/parts/fragment.js.map +1 -0
  118. package/x/features/transition/parts/types.d.ts +23 -0
  119. package/x/features/transition/parts/types.js +2 -0
  120. package/x/features/transition/parts/types.js.map +1 -0
  121. package/x/features/transition/parts/uniforms.d.ts +31 -0
  122. package/x/features/transition/parts/uniforms.js +27 -0
  123. package/x/features/transition/parts/uniforms.js.map +1 -0
  124. package/x/features/transition/parts/vertex.d.ts +1 -0
  125. package/x/features/transition/parts/vertex.js +32 -0
  126. package/x/features/transition/parts/vertex.js.map +1 -0
  127. package/x/features/transition/transition.d.ts +5 -0
  128. package/x/features/transition/transition.js +50 -0
  129. package/x/features/transition/transition.js.map +1 -0
  130. package/x/index.html +13 -3
  131. package/x/index.html.js +6 -1
  132. package/x/index.html.js.map +1 -1
  133. package/x/timeline/index.d.ts +1 -0
  134. package/x/timeline/index.js +1 -0
  135. package/x/timeline/index.js.map +1 -1
  136. package/x/timeline/parts/basics.d.ts +1 -1
  137. package/x/timeline/parts/compositor/export.d.ts +9 -0
  138. package/x/timeline/parts/compositor/export.js +64 -0
  139. package/x/timeline/parts/compositor/export.js.map +1 -0
  140. package/x/timeline/parts/compositor/parts/html-tree.d.ts +3 -0
  141. package/x/timeline/parts/compositor/parts/html-tree.js +40 -0
  142. package/x/timeline/parts/compositor/parts/html-tree.js.map +1 -0
  143. package/x/timeline/parts/compositor/parts/schedulers.d.ts +15 -0
  144. package/x/timeline/parts/compositor/parts/schedulers.js +64 -0
  145. package/x/timeline/parts/compositor/parts/schedulers.js.map +1 -0
  146. package/x/timeline/parts/compositor/parts/tree-builder.d.ts +37 -0
  147. package/x/timeline/parts/compositor/parts/tree-builder.js +147 -0
  148. package/x/timeline/parts/compositor/parts/tree-builder.js.map +1 -0
  149. package/x/timeline/parts/compositor/parts/webcodecs-tree.d.ts +3 -0
  150. package/x/timeline/parts/compositor/parts/webcodecs-tree.js +28 -0
  151. package/x/timeline/parts/compositor/parts/webcodecs-tree.js.map +1 -0
  152. package/x/timeline/parts/compositor/playback.d.ts +19 -0
  153. package/x/timeline/parts/compositor/playback.js +71 -0
  154. package/x/timeline/parts/compositor/playback.js.map +1 -0
  155. package/x/timeline/parts/compositor/samplers/html.d.ts +3 -0
  156. package/x/timeline/parts/compositor/samplers/html.js +106 -0
  157. package/x/timeline/parts/compositor/samplers/html.js.map +1 -0
  158. package/x/timeline/parts/compositor/samplers/webcodecs.d.ts +2 -0
  159. package/x/timeline/parts/compositor/samplers/webcodecs.js +55 -0
  160. package/x/timeline/parts/compositor/samplers/webcodecs.js.map +1 -0
  161. package/x/timeline/parts/item.d.ts +34 -8
  162. package/x/timeline/parts/item.js +6 -3
  163. package/x/timeline/parts/item.js.map +1 -1
  164. package/x/timeline/parts/media.d.ts +3 -0
  165. package/x/timeline/parts/media.js +17 -0
  166. package/x/timeline/parts/media.js.map +1 -1
  167. package/x/timeline/parts/waveform.js +1 -1
  168. package/x/timeline/parts/waveform.js.map +1 -1
  169. package/x/timeline/sugar/builders.d.ts +1 -0
  170. package/x/timeline/sugar/builders.js +104 -0
  171. package/x/timeline/sugar/builders.js.map +1 -0
  172. package/x/timeline/sugar/o.d.ts +23 -5
  173. package/x/timeline/sugar/o.js +93 -27
  174. package/x/timeline/sugar/o.js.map +1 -1
  175. package/x/timeline/sugar/omni-test.js +1 -1
  176. package/x/timeline/sugar/omni-test.js.map +1 -1
  177. package/x/timeline/sugar/omni.d.ts +5 -2
  178. package/x/timeline/sugar/omni.js +17 -9
  179. package/x/timeline/sugar/omni.js.map +1 -1
  180. package/x/timeline/types.d.ts +24 -0
  181. package/x/timeline/types.js +2 -0
  182. package/x/timeline/types.js.map +1 -0
  183. package/x/timeline/utils/audio-stream.d.ts +6 -0
  184. package/x/timeline/utils/audio-stream.js +17 -0
  185. package/x/timeline/utils/audio-stream.js.map +1 -0
  186. package/x/timeline/utils/checksum.js +2 -1
  187. package/x/timeline/utils/checksum.js.map +1 -1
  188. package/x/timeline/utils/matrix.d.ts +8 -0
  189. package/x/timeline/utils/matrix.js +26 -0
  190. package/x/timeline/utils/matrix.js.map +1 -0
  191. package/x/timeline/utils/video-cursor.d.ts +10 -0
  192. package/x/timeline/utils/video-cursor.js +36 -0
  193. package/x/timeline/utils/video-cursor.js.map +1 -0
  194. package/x/tools/common/loader.d.ts +19 -0
  195. package/x/tools/common/loader.js +18 -0
  196. package/x/tools/common/loader.js.map +1 -0
  197. package/x/tools/common/transformer-pipeline.d.ts +8 -0
  198. package/x/tools/common/transformer-pipeline.js +24 -0
  199. package/x/tools/common/transformer-pipeline.js.map +1 -0
  200. package/x/tools/speech-recognition/common/model.d.ts +14 -0
  201. package/x/tools/speech-recognition/common/model.js +16 -0
  202. package/x/tools/speech-recognition/common/model.js.map +1 -0
  203. package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
  204. package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
  205. package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
  206. package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
  207. package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
  208. package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
  209. package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
  210. package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
  211. package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
  212. package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
  213. package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
  214. package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
  215. package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
  216. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
  217. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
  218. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
  219. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
  220. package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
  221. package/x/tools/speech-recognition/whisper/tool.js +63 -0
  222. package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
@@ -1,142 +1,141 @@
1
1
  import {Comrade} from "@e280/comrade"
2
- import {
3
- Input, ALL_FORMATS, VideoSampleSink, Output, Mp4OutputFormat, VideoSampleSource, VideoSample,
4
- AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource
5
- } from "mediabunny"
6
- import {autoDetectRenderer, Container, Renderer, Sprite, Text, Texture, DOMAdapter, WebWorkerAdapter} from "pixi.js"
2
+ import {autoDetectRenderer, Container, Renderer, Sprite, Text, Texture, DOMAdapter, WebWorkerAdapter, Matrix} from "pixi.js"
3
+ import {Input, ALL_FORMATS, VideoSampleSink, Output, Mp4OutputFormat, VideoSampleSource, VideoSample, AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource} from "mediabunny"
7
4
 
8
- import {Composition, DriverSchematic, Layer, Transform} from "./schematic.js"
5
+ import {Mat6, mat6ToMatrix} from "../../timeline/utils/matrix.js"
6
+ import {makeTransition} from "../../features/transition/transition.js"
7
+ import {Composition, DecoderSource, DriverSchematic, Layer} from "./schematic.js"
9
8
 
10
9
  DOMAdapter.set(WebWorkerAdapter)
11
10
 
12
- export const setupDriverWork = Comrade.work<DriverSchematic>(({host}, rig) => ({
13
-
14
- async hello() {
15
- await host.world()
16
- },
11
+ const loadSource = async (source: DecoderSource) => {
12
+ if(source instanceof Blob) {
13
+ return new BlobSource(source)
14
+ } else {
15
+ return new UrlSource(source)
16
+ }
17
+ }
17
18
 
18
- async decode({source, video, audio}) {
19
- const loadSource = async () => {
20
- if(source instanceof FileSystemFileHandle) {
21
- const file = await source.getFile()
22
- return new BlobSource(file)
23
- } else {
24
- return new UrlSource(source)
19
+ export const setupDriverWork = (
20
+ Comrade.work<DriverSchematic>(shell => ({
21
+ async hello() {
22
+ await shell.host.world()
23
+ },
24
+
25
+ async decodeAudio({source, audio, start, end}) {
26
+ const input = new Input({
27
+ source: await loadSource(source),
28
+ formats: ALL_FORMATS
29
+ })
30
+
31
+ const audioTrack = await input.getPrimaryAudioTrack()
32
+ const audioDecodable = await audioTrack?.canDecode()
33
+ const audioWriter = audio.getWriter()
34
+
35
+ if (audioDecodable && audioTrack) {
36
+ const sink = new AudioSampleSink(audioTrack)
37
+ for await (const sample of sink.samples(start, end)) {
38
+ const frame = sample.toAudioData()
39
+ await audioWriter.write(frame)
40
+ sample.close()
41
+ frame.close()
42
+ }
43
+ await audioWriter.close()
25
44
  }
26
- }
27
- const input = new Input({
28
- source: await loadSource(),
29
- formats: ALL_FORMATS
30
- })
31
-
32
- const [videoTrack, audioTrack] = await Promise.all([
33
- input.getPrimaryVideoTrack(),
34
- input.getPrimaryAudioTrack()
35
- ])
36
-
37
- const videoDecodable = await videoTrack?.canDecode()
38
- const audioDecodable = await audioTrack?.canDecode()
39
-
40
- const videoWriter = video.getWriter()
41
- const audioWriter = audio.getWriter()
42
-
43
- await Promise.all([
44
- (async () => {
45
- if (videoDecodable && videoTrack) {
46
- const sink = new VideoSampleSink(videoTrack)
47
- for await (const sample of sink.samples()) {
48
- const frame = sample.toVideoFrame()
49
- await videoWriter.write(frame)
45
+ },
46
+
47
+ async decodeVideo({source, video, start, end}) {
48
+ const input = new Input({
49
+ source: await loadSource(source),
50
+ formats: ALL_FORMATS
51
+ })
52
+
53
+ const videoTrack = await input.getPrimaryVideoTrack()
54
+ const videoDecodable = await videoTrack?.canDecode()
55
+ const videoWriter = video.getWriter()
56
+
57
+ if (videoDecodable && videoTrack) {
58
+ const sink = new VideoSampleSink(videoTrack)
59
+ for await (const sample of sink.samples(start, end)) {
60
+ const frame = sample.toVideoFrame()
61
+ await videoWriter.write(frame)
62
+ sample.close()
63
+ frame.close()
64
+ }
65
+ await videoWriter.close()
66
+ }
67
+ },
68
+
69
+ async encode({video, audio, config, bridge}) {
70
+ const output = new Output({
71
+ format: new Mp4OutputFormat(),
72
+ target: new StreamTarget(bridge, {chunked: true})
73
+ })
74
+ // since AudioSample is not transferable it fails to transfer encoder bitrate config
75
+ // so it needs to be hardcoded not set through constants eg QUALITY_LOW
76
+
77
+ const promises = []
78
+
79
+ if(video) {
80
+ const videoSource = new VideoSampleSource(config.video)
81
+ output.addVideoTrack(videoSource)
82
+ const videoReader = video.getReader()
83
+ promises.push((async () => {
84
+ while (true) {
85
+ const {done, value} = await videoReader.read()
86
+ if (done) break
87
+ const sample = new VideoSample(value)
88
+ await videoSource.add(sample)
50
89
  sample.close()
51
- frame.close()
52
90
  }
53
- await videoWriter.close()
54
- }
55
- })(),
56
- (async () => {
57
- if (audioDecodable && audioTrack) {
58
- const sink = new AudioSampleSink(audioTrack)
59
- for await (const sample of sink.samples()) {
60
- const frame = sample.toAudioData()
61
- await audioWriter.write(frame)
91
+ })())
92
+ }
93
+
94
+ if(audio) {
95
+ const audioSource = new AudioSampleSource(config.audio)
96
+ output.addAudioTrack(audioSource)
97
+ const audioReader = audio.getReader()
98
+ promises.push((async () => {
99
+ while (true) {
100
+ const {done, value} = await audioReader.read()
101
+ if (done) break
102
+ const sample = new AudioSample(value)
103
+ await audioSource.add(sample)
62
104
  sample.close()
63
- frame.close()
105
+ value.close()
64
106
  }
65
- await audioWriter.close()
66
- }
67
- })()
68
- ])
69
- },
70
-
71
- async encode({readables, config, bridge}) {
72
- const output = new Output({
73
- format: new Mp4OutputFormat(),
74
- target: new StreamTarget(bridge, {chunked: true})
75
- })
76
- const videoSource = new VideoSampleSource(config.video)
77
- output.addVideoTrack(videoSource)
78
- // since AudioSample is not transferable it fails to transfer encoder bitrate config
79
- // so it needs to be hardcoded not set through constants eg QUALITY_LOW
80
- const audioSource = new AudioSampleSource(config.audio)
81
- output.addAudioTrack(audioSource)
82
-
83
- await output.start()
84
-
85
- const videoReader = readables.video.getReader()
86
- const audioReader = readables.audio.getReader()
87
-
88
- await Promise.all([
89
- (async () => {
90
- while (true) {
91
- const {done, value} = await videoReader.read()
92
- if (done) break
93
- const sample = new VideoSample(value)
94
- await videoSource.add(sample)
95
- sample.close()
96
- }
97
- })(),
98
- (async () => {
99
- while (true) {
100
- const {done, value} = await audioReader.read()
101
- if (done) break
102
- const sample = new AudioSample(value)
103
- await audioSource.add(sample)
104
- sample.close()
105
- value.close()
106
- }
107
- })()
108
- ])
107
+ })())
108
+ }
109
109
 
110
- await output.finalize()
111
- },
110
+ await output.start()
111
+ await Promise.all(promises)
112
+ await output.finalize()
113
+ },
112
114
 
113
- async composite(composition) {
114
- const {stage, renderer} = await renderPIXI(1920, 1080)
115
- stage.removeChildren()
115
+ async composite(composition) {
116
+ const {stage, renderer} = await renderPIXI(1920, 1080)
117
+ stage.removeChildren()
116
118
 
117
- const {baseFrame, disposables} = await renderLayer(composition, stage)
118
- renderer.render(stage)
119
+ const {dispose} = await renderLayer(composition, stage)
120
+ renderer.render(stage)
119
121
 
120
- // make sure browser support webgl/webgpu otherwise it might take much longer to construct frame
121
- // if its very slow on eg edge try chrome
122
- const frame = new VideoFrame(renderer.canvas, {
123
- timestamp: baseFrame?.timestamp,
124
- duration: baseFrame?.duration ?? undefined,
125
- })
122
+ // make sure browser support webgl/webgpu otherwise it might take much longer to construct frame
123
+ // if its very slow on eg edge try chrome
124
+ const frame = new VideoFrame(renderer.canvas, {
125
+ timestamp: 0,
126
+ duration: 0,
127
+ })
126
128
 
127
- baseFrame?.close()
128
- renderer.clear()
129
+ renderer.clear()
130
+ dispose()
129
131
 
130
- for (const disposable of disposables) {
131
- disposable.destroy(true)
132
+ shell.transfer = [frame]
133
+ return frame
132
134
  }
135
+ }))
136
+ )
133
137
 
134
- rig.transfer = [frame]
135
- return frame
136
- }
137
- }))
138
-
139
-
138
+ // TODO suspicious global, probably bad
140
139
  let pixi: {
141
140
  renderer: Renderer
142
141
  stage: Container
@@ -160,46 +159,43 @@ async function renderPIXI(width: number, height: number) {
160
159
  return pixi
161
160
  }
162
161
 
162
+ const transitions: Map<string, ReturnType<typeof makeTransition>> = new Map()
163
+
163
164
  type RenderableObject = Sprite | Text | Texture
164
165
 
165
166
  async function renderLayer(
166
167
  layer: Layer | Composition,
167
168
  parent: Container,
168
- disposables: RenderableObject[] = []
169
169
  ) {
170
170
  if (Array.isArray(layer)) {
171
- let baseFrame: VideoFrame | undefined
171
+ const disposers: (() => void)[] = []
172
172
  for (const child of layer) {
173
- const result = await renderLayer(child, parent, disposables)
174
- baseFrame ??= result.baseFrame
173
+ const result = await renderLayer(child, parent)
174
+ disposers.push(result.dispose)
175
175
  }
176
- return {baseFrame, disposables}
177
- }
178
-
179
- if (!isRenderableLayer(layer)) {
180
- console.warn('Invalid layer', layer)
181
- return {disposables}
176
+ return {dispose: () => disposers.forEach(d => d())}
182
177
  }
183
178
 
184
179
  switch (layer.kind) {
185
180
  case 'text':
186
- return renderTextLayer(layer, parent, disposables)
181
+ return renderTextLayer(layer, parent)
187
182
  case 'image':
188
- return renderImageLayer(layer, parent, disposables)
183
+ return renderImageLayer(layer, parent)
184
+ case 'transition':
185
+ return renderTransitionLayer(layer, parent)
186
+ case 'gap': {
187
+ pixi?.renderer.clear()
188
+ return {dispose: () => {}}
189
+ }
189
190
  default:
190
191
  console.warn('Unknown layer kind', (layer as any).kind)
191
- return {disposables}
192
+ return {dispose: () => {}}
192
193
  }
193
194
  }
194
195
 
195
- function isRenderableLayer(layer: any): layer is Layer {
196
- return !!layer && typeof layer === 'object' && typeof layer.kind === 'string'
197
- }
198
-
199
196
  function renderTextLayer(
200
197
  layer: Extract<Layer, {kind: 'text'}>,
201
198
  parent: Container,
202
- disposables: RenderableObject[]
203
199
  ) {
204
200
  const text = new Text({
205
201
  text: layer.content,
@@ -209,29 +205,45 @@ function renderTextLayer(
209
205
  fill: layer.color ?? 'white'
210
206
  }
211
207
  })
212
- applyTransform(text, layer)
208
+ applyTransform(text, layer.matrix)
213
209
  parent.addChild(text)
214
- disposables.push(text)
215
- return {disposables}
210
+ return {dispose: () => text.destroy(true)}
216
211
  }
217
212
 
218
213
  function renderImageLayer(
219
214
  layer: Extract<Layer, {kind: 'image'}>,
220
215
  parent: Container,
221
- disposables: RenderableObject[]
222
216
  ) {
223
217
  const texture = Texture.from(layer.frame)
224
218
  const sprite = new Sprite(texture)
225
- applyTransform(sprite, layer)
219
+ applyTransform(sprite, layer.matrix)
220
+ parent.addChild(sprite)
221
+ return {dispose: () => {
222
+ sprite.destroy(true)
223
+ texture.destroy(true)
224
+ layer.frame.close()
225
+ }}
226
+ }
227
+
228
+ function renderTransitionLayer(
229
+ {from, to, progress, name}: Extract<Layer, {kind: 'transition'}>,
230
+ parent: Container,
231
+ ) {
232
+ const transition = transitions.get(name) ??
233
+ (transitions.set(name, makeTransition({
234
+ name: "circle",
235
+ renderer: pixi!.renderer
236
+ })),
237
+ transitions.get(name)!
238
+ )
239
+ const texture = transition.render({from, to, progress, width: from.displayWidth, height: from.displayHeight})
240
+ const sprite = new Sprite(texture)
226
241
  parent.addChild(sprite)
227
- disposables.push(sprite, texture)
228
- return {baseFrame: layer.frame, disposables}
242
+ return {dispose: () => sprite.destroy(false)}
229
243
  }
230
244
 
231
- function applyTransform(target: Sprite | Text, t: Transform = {}) {
232
- if(t.x) target.x = t.x
233
- if(t.y) target.y = t.y
234
- if(t.scale) target.scale.set(t.scale)
235
- if(t.opacity) target.alpha = t.opacity
236
- if(t.anchor && 'anchor' in target) target.anchor.set(t.anchor)
245
+ function applyTransform(target: Sprite | Text, worldMatrix?: Mat6) {
246
+ if (!worldMatrix) return
247
+ const mx = mat6ToMatrix(worldMatrix)
248
+ target.setFromMatrix(mx)
237
249
  }
@@ -2,10 +2,9 @@ import {BlobSource, UrlSource} from "mediabunny"
2
2
  import {DecoderSource} from "../fns/schematic.js"
3
3
 
4
4
  // only streamable sources
5
- export async function loadDecoderSource(source: DecoderSource) {
6
- if(source instanceof FileSystemFileHandle) {
7
- const file = await source.getFile()
8
- return new BlobSource(file)
5
+ export async function loadDecoderSource(source: DecoderSource): Promise<UrlSource | BlobSource> {
6
+ if(source instanceof Blob) {
7
+ return new BlobSource(source)
9
8
  } else {
10
9
  return new UrlSource(source)
11
10
  }
@@ -0,0 +1,11 @@
1
+
2
+ import {TranscriberSpec} from "./types.js"
3
+
4
+ export const defaultTranscriberSpec = (): TranscriberSpec => ({
5
+ model: "onnx-community/whisper-tiny_timestamped",
6
+ dtype: "q4",
7
+ device: "wasm",
8
+ chunkLength: 20,
9
+ strideLength: 3,
10
+ })
11
+
@@ -0,0 +1,19 @@
1
+
2
+ import {pipeline} from "@huggingface/transformers"
3
+
4
+ import {TranscriberPipeOptions} from "../types.js"
5
+
6
+ export async function loadPipe(options: TranscriberPipeOptions) {
7
+ const {spec, onLoading} = options
8
+
9
+ const pipe = await pipeline("automatic-speech-recognition", spec.model, {
10
+ device: spec.device,
11
+ dtype: spec.dtype,
12
+ progress_callback: (data: any) => {
13
+ onLoading({total: data.total, progress: data.progress})
14
+ },
15
+ })
16
+
17
+ return pipe
18
+ }
19
+
@@ -0,0 +1,23 @@
1
+
2
+ import {Driver} from "../../../../driver/driver.js"
3
+
4
+ export async function prepAudio(driver: Driver, source: Blob) {
5
+ const arrayBuffer = await source.arrayBuffer()
6
+ const audioCTX = new AudioContext({sampleRate: 16000})
7
+ const audioData = await audioCTX.decodeAudioData(arrayBuffer)
8
+ let audio: Float32Array
9
+ if (audioData.numberOfChannels === 2) {
10
+ const SCALING_FACTOR = Math.sqrt(2)
11
+ const left = audioData.getChannelData(0)
12
+ const right = audioData.getChannelData(1)
13
+ audio = new Float32Array(left.length)
14
+ for (let i = 0; i < audioData.length; ++i) {
15
+ audio[i] = (SCALING_FACTOR * (left[i] + right[i])) / 2
16
+ }
17
+ } else {
18
+ audio = audioData.getChannelData(0)
19
+ }
20
+ const duration = await driver.getAudioDuration(source)
21
+ return {audio, duration}
22
+ }
23
+
@@ -0,0 +1,70 @@
1
+
2
+ import {WhisperTextStreamer} from "@huggingface/transformers"
3
+ import {TranscribeOptions} from "../types.js"
4
+
5
+ export async function transcribe(options: TranscribeOptions) {
6
+ const {pipe, spec, request, callbacks} = options
7
+
8
+ if (!pipe.processor.feature_extractor)
9
+ throw new Error("no feature_extractor")
10
+
11
+ const timePrecision = (
12
+ pipe.processor.feature_extractor?.config.chunk_length /
13
+ // @ts-ignore
14
+ pipe.model.config.max_source_positions
15
+ )
16
+
17
+ let chunkCount = 0
18
+ let startTime: number | null = null
19
+ let tokenCount = 0
20
+ let tokensPerSecond = 0
21
+
22
+ const chunkDuration = spec.chunkLength - spec.strideLength
23
+
24
+ const calculateProgress = () => {
25
+ const audioProgressSeconds = chunkCount * chunkDuration
26
+ return Math.min(audioProgressSeconds / request.duration, 1)
27
+ }
28
+
29
+ // TODO type error on pipe.tokenizer
30
+ const tokenizer = pipe.tokenizer as any
31
+
32
+ const streamer = new WhisperTextStreamer(tokenizer, {
33
+ time_precision: timePrecision,
34
+ token_callback_function: () => {
35
+ startTime ??= performance.now()
36
+ if (++tokenCount > 1) {
37
+ tokensPerSecond = (tokenCount / (performance.now() - startTime)) * 1000
38
+ }
39
+ },
40
+ callback_function: (textChunk: any) => {
41
+ // TODO
42
+ callbacks.onTranscription(textChunk)
43
+ callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
44
+ },
45
+ on_finalize: () => {
46
+ startTime = null
47
+ tokenCount = 0
48
+ chunkCount++
49
+ callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
50
+ },
51
+ })
52
+
53
+ const result = await pipe(new Float32Array(request.audio), {
54
+ top_k: 0,
55
+ do_sample: false,
56
+ chunk_length_s: spec.chunkLength,
57
+ stride_length_s: spec.strideLength,
58
+ language: request.language,
59
+ task: "transcribe",
60
+ return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
61
+ force_full_sequences: false,
62
+ streamer,
63
+ })
64
+
65
+ return {
66
+ text: result.text,
67
+ chunks: result.chunks
68
+ }
69
+ }
70
+
@@ -0,0 +1,46 @@
1
+
2
+ import {Comrade} from "@e280/comrade"
3
+ import {coalesce, queue, sub} from "@e280/stz"
4
+
5
+ import {prepAudio} from "./parts/prep-audio.js"
6
+ import {TranscriberOptions, TranscriberSchematic, TranscriptionOptions, TranscriptionReport} from "./types.js"
7
+
8
+ export async function makeTranscriber({driver, spec, workerUrl, onLoading}: TranscriberOptions) {
9
+ const onReport = sub<[report: TranscriptionReport]>()
10
+ const onTranscription = sub<[transcription: string]>()
11
+
12
+ const thread = await Comrade.thread<TranscriberSchematic>({
13
+ label: "OmnitoolSpeechTranscriber",
14
+ workerUrl,
15
+ setupHost: () => ({
16
+ loading: async loading => onLoading(loading),
17
+ deliverReport: async report => onReport.pub(report),
18
+ deliverTranscription: async transcription => onTranscription.pub(transcription),
19
+ }),
20
+ })
21
+
22
+ await thread.work.prepare(spec)
23
+
24
+ return {
25
+ transcribe: queue(async(info: TranscriptionOptions) => {
26
+ const {source, language} = info
27
+ const {audio, duration} = await prepAudio(driver, source)
28
+
29
+ const detachCallbacks = coalesce(
30
+ onReport(info.onReport),
31
+ onTranscription(info.onTranscription),
32
+ )
33
+
34
+ const result = await thread.work.transcribe({
35
+ duration,
36
+ language,
37
+ audio: audio.buffer,
38
+ })
39
+
40
+ detachCallbacks()
41
+ return result
42
+ }),
43
+ dispose: thread.terminate()
44
+ }
45
+ }
46
+
@@ -0,0 +1,82 @@
1
+
2
+ import {AsSchematic} from "@e280/comrade"
3
+ import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
4
+
5
+ import {Driver} from "../../../driver/driver.js"
6
+
7
+ export type TranscriberSchematic = AsSchematic<{
8
+ work: {
9
+ prepare(spec: TranscriberSpec): Promise<void>
10
+ transcribe(request: TranscriptionRequest): Promise<Transcription>
11
+ },
12
+
13
+ host: {
14
+ loading(load: Loading): Promise<void>
15
+ deliverReport(report: TranscriptionReport): Promise<void>
16
+ deliverTranscription(transcription: string): Promise<void>
17
+ }
18
+ }>
19
+
20
+ export type Loading = {
21
+ total: number
22
+ progress: number
23
+ }
24
+
25
+ export type TranscribeOptions = {
26
+ pipe: Pipeline
27
+ spec: TranscriberSpec
28
+ request: TranscriptionRequest
29
+ callbacks: TranscriptionCallbacks
30
+ }
31
+
32
+ export type TranscriberPipeOptions = {
33
+ spec: TranscriberSpec
34
+ onLoading: (loading: Loading) => void
35
+ }
36
+
37
+ export type SpeechTime = [start: number, end: number]
38
+
39
+ export type Transcription = {
40
+ text: string
41
+ chunks: {
42
+ text: string
43
+ timestamp: SpeechTime
44
+ }[]
45
+ }
46
+
47
+ export type TranscriberSpec = {
48
+ model: string
49
+ dtype: DataType
50
+ device: DeviceType
51
+ chunkLength: number
52
+ strideLength: number
53
+ }
54
+
55
+ export type TranscriptionOptions = {
56
+ source: Blob
57
+ language: string | null
58
+ } & TranscriptionCallbacks
59
+
60
+ export type TranscriptionRequest = {
61
+ audio: ArrayBufferLike
62
+ language: string | null
63
+ duration: number
64
+ }
65
+
66
+ export type TranscriptionReport = {
67
+ progress: number
68
+ tokensPerSecond: number
69
+ }
70
+
71
+ export type TranscriptionCallbacks = {
72
+ onReport: (report: TranscriptionReport) => void
73
+ onTranscription: (transcription: string) => void
74
+ }
75
+
76
+ export type TranscriberOptions = {
77
+ driver: Driver
78
+ spec: TranscriberSpec
79
+ workerUrl: URL | string
80
+ onLoading: (loading: Loading) => void
81
+ }
82
+