@omnimedia/omnitool 1.1.0-1 → 1.1.0-10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +12 -9
- package/s/context.ts +1 -1
- package/s/demo/demo.bundle.ts +6 -2
- package/s/demo/demo.css +5 -0
- package/s/demo/routines/filmstrip-test.ts +2 -2
- package/s/demo/routines/transcode-test.ts +4 -2
- package/s/demo/routines/transcriber-test.ts +34 -0
- package/s/demo/routines/transitions-test.ts +43 -0
- package/s/driver/driver.ts +17 -9
- package/s/driver/fns/host.ts +7 -6
- package/s/driver/fns/schematic.ts +45 -22
- package/s/driver/fns/work.ts +163 -151
- package/s/driver/utils/load-decoder-source.ts +3 -4
- package/s/features/speech/transcribe/default-spec.ts +11 -0
- package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
- package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
- package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
- package/s/features/speech/transcribe/transcriber.ts +46 -0
- package/s/features/speech/transcribe/types.ts +82 -0
- package/s/features/speech/transcribe/worker.bundle.ts +40 -0
- package/s/features/transition/parts/fragment.ts +24 -0
- package/s/features/transition/parts/types.ts +94 -0
- package/s/features/transition/parts/uniforms.ts +29 -0
- package/s/features/transition/parts/vertex.ts +31 -0
- package/s/features/transition/transition.ts +60 -0
- package/s/index.html.ts +6 -1
- package/s/timeline/index.ts +1 -0
- package/s/timeline/parts/basics.ts +1 -1
- package/s/timeline/parts/compositor/export.ts +77 -0
- package/s/timeline/parts/compositor/parts/html-tree.ts +37 -0
- package/s/timeline/parts/compositor/parts/schedulers.ts +85 -0
- package/s/timeline/parts/compositor/parts/tree-builder.ts +184 -0
- package/s/timeline/parts/compositor/parts/webcodecs-tree.ts +30 -0
- package/s/timeline/parts/compositor/playback.ts +81 -0
- package/s/timeline/parts/compositor/samplers/html.ts +115 -0
- package/s/timeline/parts/compositor/samplers/webcodecs.ts +60 -0
- package/s/timeline/parts/item.ts +38 -6
- package/s/timeline/parts/media.ts +21 -0
- package/s/timeline/parts/waveform.ts +1 -1
- package/s/timeline/sugar/builders.ts +102 -0
- package/s/timeline/sugar/o.ts +117 -27
- package/s/timeline/sugar/omni-test.ts +2 -2
- package/s/timeline/sugar/omni.ts +21 -11
- package/s/timeline/types.ts +29 -0
- package/s/timeline/utils/audio-stream.ts +15 -0
- package/s/timeline/utils/checksum.ts +2 -1
- package/s/timeline/utils/matrix.ts +33 -0
- package/s/timeline/utils/video-cursor.ts +40 -0
- package/s/tools/common/loader.ts +26 -0
- package/s/tools/common/transformer-pipeline.ts +26 -0
- package/s/tools/speech-recognition/common/model.ts +26 -0
- package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
- package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
- package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
- package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
- package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
- package/s/tools/speech-recognition/whisper/tool.ts +70 -0
- package/x/context.js +1 -1
- package/x/context.js.map +1 -1
- package/x/demo/demo.bundle.js +6 -2
- package/x/demo/demo.bundle.js.map +1 -1
- package/x/demo/demo.bundle.min.js +39 -37
- package/x/demo/demo.bundle.min.js.map +4 -4
- package/x/demo/demo.css +5 -0
- package/x/demo/routines/filmstrip-test.d.ts +1 -1
- package/x/demo/routines/filmstrip-test.js +2 -2
- package/x/demo/routines/filmstrip-test.js.map +1 -1
- package/x/demo/routines/transcode-test.js +4 -2
- package/x/demo/routines/transcode-test.js.map +1 -1
- package/x/demo/routines/transcriber-test.d.ts +4 -0
- package/x/demo/routines/transcriber-test.js +33 -0
- package/x/demo/routines/transcriber-test.js.map +1 -0
- package/x/demo/routines/transitions-test.d.ts +5 -0
- package/x/demo/routines/transitions-test.js +35 -0
- package/x/demo/routines/transitions-test.js.map +1 -0
- package/x/driver/driver.d.ts +3 -5
- package/x/driver/driver.js +16 -9
- package/x/driver/driver.js.map +1 -1
- package/x/driver/driver.worker.bundle.min.js +2537 -148
- package/x/driver/driver.worker.bundle.min.js.map +4 -4
- package/x/driver/fns/host.d.ts +9 -2
- package/x/driver/fns/host.js +3 -3
- package/x/driver/fns/host.js.map +1 -1
- package/x/driver/fns/schematic.d.ts +39 -21
- package/x/driver/fns/work.d.ts +11 -4
- package/x/driver/fns/work.js +111 -102
- package/x/driver/fns/work.js.map +1 -1
- package/x/driver/utils/load-decoder-source.d.ts +2 -1
- package/x/driver/utils/load-decoder-source.js +2 -3
- package/x/driver/utils/load-decoder-source.js.map +1 -1
- package/x/features/speech/transcribe/default-spec.d.ts +2 -0
- package/x/features/speech/transcribe/default-spec.js +8 -0
- package/x/features/speech/transcribe/default-spec.js.map +1 -0
- package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
- package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
- package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
- package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
- package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
- package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
- package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
- package/x/features/speech/transcribe/parts/transcribe.js +56 -0
- package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
- package/x/features/speech/transcribe/transcriber.d.ts +5 -0
- package/x/features/speech/transcribe/transcriber.js +33 -0
- package/x/features/speech/transcribe/transcriber.js.map +1 -0
- package/x/features/speech/transcribe/types.d.ts +66 -0
- package/x/features/speech/transcribe/types.js +2 -0
- package/x/features/speech/transcribe/types.js.map +1 -0
- package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
- package/x/features/speech/transcribe/worker.bundle.js +33 -0
- package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
- package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
- package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
- package/x/features/transition/parts/fragment.d.ts +1 -0
- package/x/features/transition/parts/fragment.js +25 -0
- package/x/features/transition/parts/fragment.js.map +1 -0
- package/x/features/transition/parts/types.d.ts +23 -0
- package/x/features/transition/parts/types.js +2 -0
- package/x/features/transition/parts/types.js.map +1 -0
- package/x/features/transition/parts/uniforms.d.ts +31 -0
- package/x/features/transition/parts/uniforms.js +27 -0
- package/x/features/transition/parts/uniforms.js.map +1 -0
- package/x/features/transition/parts/vertex.d.ts +1 -0
- package/x/features/transition/parts/vertex.js +32 -0
- package/x/features/transition/parts/vertex.js.map +1 -0
- package/x/features/transition/transition.d.ts +5 -0
- package/x/features/transition/transition.js +50 -0
- package/x/features/transition/transition.js.map +1 -0
- package/x/index.html +13 -3
- package/x/index.html.js +6 -1
- package/x/index.html.js.map +1 -1
- package/x/timeline/index.d.ts +1 -0
- package/x/timeline/index.js +1 -0
- package/x/timeline/index.js.map +1 -1
- package/x/timeline/parts/basics.d.ts +1 -1
- package/x/timeline/parts/compositor/export.d.ts +9 -0
- package/x/timeline/parts/compositor/export.js +64 -0
- package/x/timeline/parts/compositor/export.js.map +1 -0
- package/x/timeline/parts/compositor/parts/html-tree.d.ts +3 -0
- package/x/timeline/parts/compositor/parts/html-tree.js +40 -0
- package/x/timeline/parts/compositor/parts/html-tree.js.map +1 -0
- package/x/timeline/parts/compositor/parts/schedulers.d.ts +15 -0
- package/x/timeline/parts/compositor/parts/schedulers.js +64 -0
- package/x/timeline/parts/compositor/parts/schedulers.js.map +1 -0
- package/x/timeline/parts/compositor/parts/tree-builder.d.ts +37 -0
- package/x/timeline/parts/compositor/parts/tree-builder.js +147 -0
- package/x/timeline/parts/compositor/parts/tree-builder.js.map +1 -0
- package/x/timeline/parts/compositor/parts/webcodecs-tree.d.ts +3 -0
- package/x/timeline/parts/compositor/parts/webcodecs-tree.js +28 -0
- package/x/timeline/parts/compositor/parts/webcodecs-tree.js.map +1 -0
- package/x/timeline/parts/compositor/playback.d.ts +19 -0
- package/x/timeline/parts/compositor/playback.js +71 -0
- package/x/timeline/parts/compositor/playback.js.map +1 -0
- package/x/timeline/parts/compositor/samplers/html.d.ts +3 -0
- package/x/timeline/parts/compositor/samplers/html.js +106 -0
- package/x/timeline/parts/compositor/samplers/html.js.map +1 -0
- package/x/timeline/parts/compositor/samplers/webcodecs.d.ts +2 -0
- package/x/timeline/parts/compositor/samplers/webcodecs.js +55 -0
- package/x/timeline/parts/compositor/samplers/webcodecs.js.map +1 -0
- package/x/timeline/parts/item.d.ts +34 -8
- package/x/timeline/parts/item.js +6 -3
- package/x/timeline/parts/item.js.map +1 -1
- package/x/timeline/parts/media.d.ts +3 -0
- package/x/timeline/parts/media.js +17 -0
- package/x/timeline/parts/media.js.map +1 -1
- package/x/timeline/parts/waveform.js +1 -1
- package/x/timeline/parts/waveform.js.map +1 -1
- package/x/timeline/sugar/builders.d.ts +1 -0
- package/x/timeline/sugar/builders.js +104 -0
- package/x/timeline/sugar/builders.js.map +1 -0
- package/x/timeline/sugar/o.d.ts +23 -5
- package/x/timeline/sugar/o.js +93 -27
- package/x/timeline/sugar/o.js.map +1 -1
- package/x/timeline/sugar/omni-test.js +1 -1
- package/x/timeline/sugar/omni-test.js.map +1 -1
- package/x/timeline/sugar/omni.d.ts +5 -2
- package/x/timeline/sugar/omni.js +17 -9
- package/x/timeline/sugar/omni.js.map +1 -1
- package/x/timeline/types.d.ts +24 -0
- package/x/timeline/types.js +2 -0
- package/x/timeline/types.js.map +1 -0
- package/x/timeline/utils/audio-stream.d.ts +6 -0
- package/x/timeline/utils/audio-stream.js +17 -0
- package/x/timeline/utils/audio-stream.js.map +1 -0
- package/x/timeline/utils/checksum.js +2 -1
- package/x/timeline/utils/checksum.js.map +1 -1
- package/x/timeline/utils/matrix.d.ts +8 -0
- package/x/timeline/utils/matrix.js +26 -0
- package/x/timeline/utils/matrix.js.map +1 -0
- package/x/timeline/utils/video-cursor.d.ts +10 -0
- package/x/timeline/utils/video-cursor.js +36 -0
- package/x/timeline/utils/video-cursor.js.map +1 -0
- package/x/tools/common/loader.d.ts +19 -0
- package/x/tools/common/loader.js +18 -0
- package/x/tools/common/loader.js.map +1 -0
- package/x/tools/common/transformer-pipeline.d.ts +8 -0
- package/x/tools/common/transformer-pipeline.js +24 -0
- package/x/tools/common/transformer-pipeline.js.map +1 -0
- package/x/tools/speech-recognition/common/model.d.ts +14 -0
- package/x/tools/speech-recognition/common/model.js +16 -0
- package/x/tools/speech-recognition/common/model.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
- package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
- package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
- package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
- package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
- package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
- package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
- package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
- package/x/tools/speech-recognition/whisper/tool.js +63 -0
- package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
package/s/driver/fns/work.ts
CHANGED
|
@@ -1,142 +1,141 @@
|
|
|
1
1
|
import {Comrade} from "@e280/comrade"
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource
|
|
5
|
-
} from "mediabunny"
|
|
6
|
-
import {autoDetectRenderer, Container, Renderer, Sprite, Text, Texture, DOMAdapter, WebWorkerAdapter} from "pixi.js"
|
|
2
|
+
import {autoDetectRenderer, Container, Renderer, Sprite, Text, Texture, DOMAdapter, WebWorkerAdapter, Matrix} from "pixi.js"
|
|
3
|
+
import {Input, ALL_FORMATS, VideoSampleSink, Output, Mp4OutputFormat, VideoSampleSource, VideoSample, AudioSampleSink, AudioSampleSource, AudioSample, StreamTarget, BlobSource, UrlSource} from "mediabunny"
|
|
7
4
|
|
|
8
|
-
import {
|
|
5
|
+
import {Mat6, mat6ToMatrix} from "../../timeline/utils/matrix.js"
|
|
6
|
+
import {makeTransition} from "../../features/transition/transition.js"
|
|
7
|
+
import {Composition, DecoderSource, DriverSchematic, Layer} from "./schematic.js"
|
|
9
8
|
|
|
10
9
|
DOMAdapter.set(WebWorkerAdapter)
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
11
|
+
const loadSource = async (source: DecoderSource) => {
|
|
12
|
+
if(source instanceof Blob) {
|
|
13
|
+
return new BlobSource(source)
|
|
14
|
+
} else {
|
|
15
|
+
return new UrlSource(source)
|
|
16
|
+
}
|
|
17
|
+
}
|
|
17
18
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
export const setupDriverWork = (
|
|
20
|
+
Comrade.work<DriverSchematic>(shell => ({
|
|
21
|
+
async hello() {
|
|
22
|
+
await shell.host.world()
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
async decodeAudio({source, audio, start, end}) {
|
|
26
|
+
const input = new Input({
|
|
27
|
+
source: await loadSource(source),
|
|
28
|
+
formats: ALL_FORMATS
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
const audioTrack = await input.getPrimaryAudioTrack()
|
|
32
|
+
const audioDecodable = await audioTrack?.canDecode()
|
|
33
|
+
const audioWriter = audio.getWriter()
|
|
34
|
+
|
|
35
|
+
if (audioDecodable && audioTrack) {
|
|
36
|
+
const sink = new AudioSampleSink(audioTrack)
|
|
37
|
+
for await (const sample of sink.samples(start, end)) {
|
|
38
|
+
const frame = sample.toAudioData()
|
|
39
|
+
await audioWriter.write(frame)
|
|
40
|
+
sample.close()
|
|
41
|
+
frame.close()
|
|
42
|
+
}
|
|
43
|
+
await audioWriter.close()
|
|
25
44
|
}
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
input.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
},
|
|
46
|
+
|
|
47
|
+
async decodeVideo({source, video, start, end}) {
|
|
48
|
+
const input = new Input({
|
|
49
|
+
source: await loadSource(source),
|
|
50
|
+
formats: ALL_FORMATS
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const videoTrack = await input.getPrimaryVideoTrack()
|
|
54
|
+
const videoDecodable = await videoTrack?.canDecode()
|
|
55
|
+
const videoWriter = video.getWriter()
|
|
56
|
+
|
|
57
|
+
if (videoDecodable && videoTrack) {
|
|
58
|
+
const sink = new VideoSampleSink(videoTrack)
|
|
59
|
+
for await (const sample of sink.samples(start, end)) {
|
|
60
|
+
const frame = sample.toVideoFrame()
|
|
61
|
+
await videoWriter.write(frame)
|
|
62
|
+
sample.close()
|
|
63
|
+
frame.close()
|
|
64
|
+
}
|
|
65
|
+
await videoWriter.close()
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
|
|
69
|
+
async encode({video, audio, config, bridge}) {
|
|
70
|
+
const output = new Output({
|
|
71
|
+
format: new Mp4OutputFormat(),
|
|
72
|
+
target: new StreamTarget(bridge, {chunked: true})
|
|
73
|
+
})
|
|
74
|
+
// since AudioSample is not transferable it fails to transfer encoder bitrate config
|
|
75
|
+
// so it needs to be hardcoded not set through constants eg QUALITY_LOW
|
|
76
|
+
|
|
77
|
+
const promises = []
|
|
78
|
+
|
|
79
|
+
if(video) {
|
|
80
|
+
const videoSource = new VideoSampleSource(config.video)
|
|
81
|
+
output.addVideoTrack(videoSource)
|
|
82
|
+
const videoReader = video.getReader()
|
|
83
|
+
promises.push((async () => {
|
|
84
|
+
while (true) {
|
|
85
|
+
const {done, value} = await videoReader.read()
|
|
86
|
+
if (done) break
|
|
87
|
+
const sample = new VideoSample(value)
|
|
88
|
+
await videoSource.add(sample)
|
|
50
89
|
sample.close()
|
|
51
|
-
frame.close()
|
|
52
90
|
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
91
|
+
})())
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if(audio) {
|
|
95
|
+
const audioSource = new AudioSampleSource(config.audio)
|
|
96
|
+
output.addAudioTrack(audioSource)
|
|
97
|
+
const audioReader = audio.getReader()
|
|
98
|
+
promises.push((async () => {
|
|
99
|
+
while (true) {
|
|
100
|
+
const {done, value} = await audioReader.read()
|
|
101
|
+
if (done) break
|
|
102
|
+
const sample = new AudioSample(value)
|
|
103
|
+
await audioSource.add(sample)
|
|
62
104
|
sample.close()
|
|
63
|
-
|
|
105
|
+
value.close()
|
|
64
106
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
})()
|
|
68
|
-
])
|
|
69
|
-
},
|
|
70
|
-
|
|
71
|
-
async encode({readables, config, bridge}) {
|
|
72
|
-
const output = new Output({
|
|
73
|
-
format: new Mp4OutputFormat(),
|
|
74
|
-
target: new StreamTarget(bridge, {chunked: true})
|
|
75
|
-
})
|
|
76
|
-
const videoSource = new VideoSampleSource(config.video)
|
|
77
|
-
output.addVideoTrack(videoSource)
|
|
78
|
-
// since AudioSample is not transferable it fails to transfer encoder bitrate config
|
|
79
|
-
// so it needs to be hardcoded not set through constants eg QUALITY_LOW
|
|
80
|
-
const audioSource = new AudioSampleSource(config.audio)
|
|
81
|
-
output.addAudioTrack(audioSource)
|
|
82
|
-
|
|
83
|
-
await output.start()
|
|
84
|
-
|
|
85
|
-
const videoReader = readables.video.getReader()
|
|
86
|
-
const audioReader = readables.audio.getReader()
|
|
87
|
-
|
|
88
|
-
await Promise.all([
|
|
89
|
-
(async () => {
|
|
90
|
-
while (true) {
|
|
91
|
-
const {done, value} = await videoReader.read()
|
|
92
|
-
if (done) break
|
|
93
|
-
const sample = new VideoSample(value)
|
|
94
|
-
await videoSource.add(sample)
|
|
95
|
-
sample.close()
|
|
96
|
-
}
|
|
97
|
-
})(),
|
|
98
|
-
(async () => {
|
|
99
|
-
while (true) {
|
|
100
|
-
const {done, value} = await audioReader.read()
|
|
101
|
-
if (done) break
|
|
102
|
-
const sample = new AudioSample(value)
|
|
103
|
-
await audioSource.add(sample)
|
|
104
|
-
sample.close()
|
|
105
|
-
value.close()
|
|
106
|
-
}
|
|
107
|
-
})()
|
|
108
|
-
])
|
|
107
|
+
})())
|
|
108
|
+
}
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
await output.start()
|
|
111
|
+
await Promise.all(promises)
|
|
112
|
+
await output.finalize()
|
|
113
|
+
},
|
|
112
114
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
115
|
+
async composite(composition) {
|
|
116
|
+
const {stage, renderer} = await renderPIXI(1920, 1080)
|
|
117
|
+
stage.removeChildren()
|
|
116
118
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
+
const {dispose} = await renderLayer(composition, stage)
|
|
120
|
+
renderer.render(stage)
|
|
119
121
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
122
|
+
// make sure browser support webgl/webgpu otherwise it might take much longer to construct frame
|
|
123
|
+
// if its very slow on eg edge try chrome
|
|
124
|
+
const frame = new VideoFrame(renderer.canvas, {
|
|
125
|
+
timestamp: 0,
|
|
126
|
+
duration: 0,
|
|
127
|
+
})
|
|
126
128
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
+
renderer.clear()
|
|
130
|
+
dispose()
|
|
129
131
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
+
shell.transfer = [frame]
|
|
133
|
+
return frame
|
|
132
134
|
}
|
|
135
|
+
}))
|
|
136
|
+
)
|
|
133
137
|
|
|
134
|
-
|
|
135
|
-
return frame
|
|
136
|
-
}
|
|
137
|
-
}))
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
// TODO suspicious global, probably bad
|
|
140
139
|
let pixi: {
|
|
141
140
|
renderer: Renderer
|
|
142
141
|
stage: Container
|
|
@@ -160,46 +159,43 @@ async function renderPIXI(width: number, height: number) {
|
|
|
160
159
|
return pixi
|
|
161
160
|
}
|
|
162
161
|
|
|
162
|
+
const transitions: Map<string, ReturnType<typeof makeTransition>> = new Map()
|
|
163
|
+
|
|
163
164
|
type RenderableObject = Sprite | Text | Texture
|
|
164
165
|
|
|
165
166
|
async function renderLayer(
|
|
166
167
|
layer: Layer | Composition,
|
|
167
168
|
parent: Container,
|
|
168
|
-
disposables: RenderableObject[] = []
|
|
169
169
|
) {
|
|
170
170
|
if (Array.isArray(layer)) {
|
|
171
|
-
|
|
171
|
+
const disposers: (() => void)[] = []
|
|
172
172
|
for (const child of layer) {
|
|
173
|
-
const result = await renderLayer(child, parent
|
|
174
|
-
|
|
173
|
+
const result = await renderLayer(child, parent)
|
|
174
|
+
disposers.push(result.dispose)
|
|
175
175
|
}
|
|
176
|
-
return {
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (!isRenderableLayer(layer)) {
|
|
180
|
-
console.warn('Invalid layer', layer)
|
|
181
|
-
return {disposables}
|
|
176
|
+
return {dispose: () => disposers.forEach(d => d())}
|
|
182
177
|
}
|
|
183
178
|
|
|
184
179
|
switch (layer.kind) {
|
|
185
180
|
case 'text':
|
|
186
|
-
return renderTextLayer(layer, parent
|
|
181
|
+
return renderTextLayer(layer, parent)
|
|
187
182
|
case 'image':
|
|
188
|
-
return renderImageLayer(layer, parent
|
|
183
|
+
return renderImageLayer(layer, parent)
|
|
184
|
+
case 'transition':
|
|
185
|
+
return renderTransitionLayer(layer, parent)
|
|
186
|
+
case 'gap': {
|
|
187
|
+
pixi?.renderer.clear()
|
|
188
|
+
return {dispose: () => {}}
|
|
189
|
+
}
|
|
189
190
|
default:
|
|
190
191
|
console.warn('Unknown layer kind', (layer as any).kind)
|
|
191
|
-
return {
|
|
192
|
+
return {dispose: () => {}}
|
|
192
193
|
}
|
|
193
194
|
}
|
|
194
195
|
|
|
195
|
-
function isRenderableLayer(layer: any): layer is Layer {
|
|
196
|
-
return !!layer && typeof layer === 'object' && typeof layer.kind === 'string'
|
|
197
|
-
}
|
|
198
|
-
|
|
199
196
|
function renderTextLayer(
|
|
200
197
|
layer: Extract<Layer, {kind: 'text'}>,
|
|
201
198
|
parent: Container,
|
|
202
|
-
disposables: RenderableObject[]
|
|
203
199
|
) {
|
|
204
200
|
const text = new Text({
|
|
205
201
|
text: layer.content,
|
|
@@ -209,29 +205,45 @@ function renderTextLayer(
|
|
|
209
205
|
fill: layer.color ?? 'white'
|
|
210
206
|
}
|
|
211
207
|
})
|
|
212
|
-
applyTransform(text, layer)
|
|
208
|
+
applyTransform(text, layer.matrix)
|
|
213
209
|
parent.addChild(text)
|
|
214
|
-
|
|
215
|
-
return {disposables}
|
|
210
|
+
return {dispose: () => text.destroy(true)}
|
|
216
211
|
}
|
|
217
212
|
|
|
218
213
|
function renderImageLayer(
|
|
219
214
|
layer: Extract<Layer, {kind: 'image'}>,
|
|
220
215
|
parent: Container,
|
|
221
|
-
disposables: RenderableObject[]
|
|
222
216
|
) {
|
|
223
217
|
const texture = Texture.from(layer.frame)
|
|
224
218
|
const sprite = new Sprite(texture)
|
|
225
|
-
applyTransform(sprite, layer)
|
|
219
|
+
applyTransform(sprite, layer.matrix)
|
|
220
|
+
parent.addChild(sprite)
|
|
221
|
+
return {dispose: () => {
|
|
222
|
+
sprite.destroy(true)
|
|
223
|
+
texture.destroy(true)
|
|
224
|
+
layer.frame.close()
|
|
225
|
+
}}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function renderTransitionLayer(
|
|
229
|
+
{from, to, progress, name}: Extract<Layer, {kind: 'transition'}>,
|
|
230
|
+
parent: Container,
|
|
231
|
+
) {
|
|
232
|
+
const transition = transitions.get(name) ??
|
|
233
|
+
(transitions.set(name, makeTransition({
|
|
234
|
+
name: "circle",
|
|
235
|
+
renderer: pixi!.renderer
|
|
236
|
+
})),
|
|
237
|
+
transitions.get(name)!
|
|
238
|
+
)
|
|
239
|
+
const texture = transition.render({from, to, progress, width: from.displayWidth, height: from.displayHeight})
|
|
240
|
+
const sprite = new Sprite(texture)
|
|
226
241
|
parent.addChild(sprite)
|
|
227
|
-
|
|
228
|
-
return {baseFrame: layer.frame, disposables}
|
|
242
|
+
return {dispose: () => sprite.destroy(false)}
|
|
229
243
|
}
|
|
230
244
|
|
|
231
|
-
function applyTransform(target: Sprite | Text,
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
if(t.opacity) target.alpha = t.opacity
|
|
236
|
-
if(t.anchor && 'anchor' in target) target.anchor.set(t.anchor)
|
|
245
|
+
function applyTransform(target: Sprite | Text, worldMatrix?: Mat6) {
|
|
246
|
+
if (!worldMatrix) return
|
|
247
|
+
const mx = mat6ToMatrix(worldMatrix)
|
|
248
|
+
target.setFromMatrix(mx)
|
|
237
249
|
}
|
|
@@ -2,10 +2,9 @@ import {BlobSource, UrlSource} from "mediabunny"
|
|
|
2
2
|
import {DecoderSource} from "../fns/schematic.js"
|
|
3
3
|
|
|
4
4
|
// only streamable sources
|
|
5
|
-
export async function loadDecoderSource(source: DecoderSource) {
|
|
6
|
-
if(source instanceof
|
|
7
|
-
|
|
8
|
-
return new BlobSource(file)
|
|
5
|
+
export async function loadDecoderSource(source: DecoderSource): Promise<UrlSource | BlobSource> {
|
|
6
|
+
if(source instanceof Blob) {
|
|
7
|
+
return new BlobSource(source)
|
|
9
8
|
} else {
|
|
10
9
|
return new UrlSource(source)
|
|
11
10
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
|
|
2
|
+
import {pipeline} from "@huggingface/transformers"
|
|
3
|
+
|
|
4
|
+
import {TranscriberPipeOptions} from "../types.js"
|
|
5
|
+
|
|
6
|
+
export async function loadPipe(options: TranscriberPipeOptions) {
|
|
7
|
+
const {spec, onLoading} = options
|
|
8
|
+
|
|
9
|
+
const pipe = await pipeline("automatic-speech-recognition", spec.model, {
|
|
10
|
+
device: spec.device,
|
|
11
|
+
dtype: spec.dtype,
|
|
12
|
+
progress_callback: (data: any) => {
|
|
13
|
+
onLoading({total: data.total, progress: data.progress})
|
|
14
|
+
},
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
return pipe
|
|
18
|
+
}
|
|
19
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
import {Driver} from "../../../../driver/driver.js"
|
|
3
|
+
|
|
4
|
+
export async function prepAudio(driver: Driver, source: Blob) {
|
|
5
|
+
const arrayBuffer = await source.arrayBuffer()
|
|
6
|
+
const audioCTX = new AudioContext({sampleRate: 16000})
|
|
7
|
+
const audioData = await audioCTX.decodeAudioData(arrayBuffer)
|
|
8
|
+
let audio: Float32Array
|
|
9
|
+
if (audioData.numberOfChannels === 2) {
|
|
10
|
+
const SCALING_FACTOR = Math.sqrt(2)
|
|
11
|
+
const left = audioData.getChannelData(0)
|
|
12
|
+
const right = audioData.getChannelData(1)
|
|
13
|
+
audio = new Float32Array(left.length)
|
|
14
|
+
for (let i = 0; i < audioData.length; ++i) {
|
|
15
|
+
audio[i] = (SCALING_FACTOR * (left[i] + right[i])) / 2
|
|
16
|
+
}
|
|
17
|
+
} else {
|
|
18
|
+
audio = audioData.getChannelData(0)
|
|
19
|
+
}
|
|
20
|
+
const duration = await driver.getAudioDuration(source)
|
|
21
|
+
return {audio, duration}
|
|
22
|
+
}
|
|
23
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
|
|
2
|
+
import {WhisperTextStreamer} from "@huggingface/transformers"
|
|
3
|
+
import {TranscribeOptions} from "../types.js"
|
|
4
|
+
|
|
5
|
+
export async function transcribe(options: TranscribeOptions) {
|
|
6
|
+
const {pipe, spec, request, callbacks} = options
|
|
7
|
+
|
|
8
|
+
if (!pipe.processor.feature_extractor)
|
|
9
|
+
throw new Error("no feature_extractor")
|
|
10
|
+
|
|
11
|
+
const timePrecision = (
|
|
12
|
+
pipe.processor.feature_extractor?.config.chunk_length /
|
|
13
|
+
// @ts-ignore
|
|
14
|
+
pipe.model.config.max_source_positions
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
let chunkCount = 0
|
|
18
|
+
let startTime: number | null = null
|
|
19
|
+
let tokenCount = 0
|
|
20
|
+
let tokensPerSecond = 0
|
|
21
|
+
|
|
22
|
+
const chunkDuration = spec.chunkLength - spec.strideLength
|
|
23
|
+
|
|
24
|
+
const calculateProgress = () => {
|
|
25
|
+
const audioProgressSeconds = chunkCount * chunkDuration
|
|
26
|
+
return Math.min(audioProgressSeconds / request.duration, 1)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// TODO type error on pipe.tokenizer
|
|
30
|
+
const tokenizer = pipe.tokenizer as any
|
|
31
|
+
|
|
32
|
+
const streamer = new WhisperTextStreamer(tokenizer, {
|
|
33
|
+
time_precision: timePrecision,
|
|
34
|
+
token_callback_function: () => {
|
|
35
|
+
startTime ??= performance.now()
|
|
36
|
+
if (++tokenCount > 1) {
|
|
37
|
+
tokensPerSecond = (tokenCount / (performance.now() - startTime)) * 1000
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
callback_function: (textChunk: any) => {
|
|
41
|
+
// TODO
|
|
42
|
+
callbacks.onTranscription(textChunk)
|
|
43
|
+
callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
|
|
44
|
+
},
|
|
45
|
+
on_finalize: () => {
|
|
46
|
+
startTime = null
|
|
47
|
+
tokenCount = 0
|
|
48
|
+
chunkCount++
|
|
49
|
+
callbacks.onReport({tokensPerSecond, progress: calculateProgress()})
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const result = await pipe(new Float32Array(request.audio), {
|
|
54
|
+
top_k: 0,
|
|
55
|
+
do_sample: false,
|
|
56
|
+
chunk_length_s: spec.chunkLength,
|
|
57
|
+
stride_length_s: spec.strideLength,
|
|
58
|
+
language: request.language,
|
|
59
|
+
task: "transcribe",
|
|
60
|
+
return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
|
|
61
|
+
force_full_sequences: false,
|
|
62
|
+
streamer,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
text: result.text,
|
|
67
|
+
chunks: result.chunks
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
|
|
2
|
+
import {Comrade} from "@e280/comrade"
|
|
3
|
+
import {coalesce, queue, sub} from "@e280/stz"
|
|
4
|
+
|
|
5
|
+
import {prepAudio} from "./parts/prep-audio.js"
|
|
6
|
+
import {TranscriberOptions, TranscriberSchematic, TranscriptionOptions, TranscriptionReport} from "./types.js"
|
|
7
|
+
|
|
8
|
+
export async function makeTranscriber({driver, spec, workerUrl, onLoading}: TranscriberOptions) {
|
|
9
|
+
const onReport = sub<[report: TranscriptionReport]>()
|
|
10
|
+
const onTranscription = sub<[transcription: string]>()
|
|
11
|
+
|
|
12
|
+
const thread = await Comrade.thread<TranscriberSchematic>({
|
|
13
|
+
label: "OmnitoolSpeechTranscriber",
|
|
14
|
+
workerUrl,
|
|
15
|
+
setupHost: () => ({
|
|
16
|
+
loading: async loading => onLoading(loading),
|
|
17
|
+
deliverReport: async report => onReport.pub(report),
|
|
18
|
+
deliverTranscription: async transcription => onTranscription.pub(transcription),
|
|
19
|
+
}),
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
await thread.work.prepare(spec)
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
transcribe: queue(async(info: TranscriptionOptions) => {
|
|
26
|
+
const {source, language} = info
|
|
27
|
+
const {audio, duration} = await prepAudio(driver, source)
|
|
28
|
+
|
|
29
|
+
const detachCallbacks = coalesce(
|
|
30
|
+
onReport(info.onReport),
|
|
31
|
+
onTranscription(info.onTranscription),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
const result = await thread.work.transcribe({
|
|
35
|
+
duration,
|
|
36
|
+
language,
|
|
37
|
+
audio: audio.buffer,
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
detachCallbacks()
|
|
41
|
+
return result
|
|
42
|
+
}),
|
|
43
|
+
dispose: thread.terminate()
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
|
|
2
|
+
import {AsSchematic} from "@e280/comrade"
|
|
3
|
+
import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
|
|
4
|
+
|
|
5
|
+
import {Driver} from "../../../driver/driver.js"
|
|
6
|
+
|
|
7
|
+
export type TranscriberSchematic = AsSchematic<{
|
|
8
|
+
work: {
|
|
9
|
+
prepare(spec: TranscriberSpec): Promise<void>
|
|
10
|
+
transcribe(request: TranscriptionRequest): Promise<Transcription>
|
|
11
|
+
},
|
|
12
|
+
|
|
13
|
+
host: {
|
|
14
|
+
loading(load: Loading): Promise<void>
|
|
15
|
+
deliverReport(report: TranscriptionReport): Promise<void>
|
|
16
|
+
deliverTranscription(transcription: string): Promise<void>
|
|
17
|
+
}
|
|
18
|
+
}>
|
|
19
|
+
|
|
20
|
+
export type Loading = {
|
|
21
|
+
total: number
|
|
22
|
+
progress: number
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export type TranscribeOptions = {
|
|
26
|
+
pipe: Pipeline
|
|
27
|
+
spec: TranscriberSpec
|
|
28
|
+
request: TranscriptionRequest
|
|
29
|
+
callbacks: TranscriptionCallbacks
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export type TranscriberPipeOptions = {
|
|
33
|
+
spec: TranscriberSpec
|
|
34
|
+
onLoading: (loading: Loading) => void
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type SpeechTime = [start: number, end: number]
|
|
38
|
+
|
|
39
|
+
export type Transcription = {
|
|
40
|
+
text: string
|
|
41
|
+
chunks: {
|
|
42
|
+
text: string
|
|
43
|
+
timestamp: SpeechTime
|
|
44
|
+
}[]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type TranscriberSpec = {
|
|
48
|
+
model: string
|
|
49
|
+
dtype: DataType
|
|
50
|
+
device: DeviceType
|
|
51
|
+
chunkLength: number
|
|
52
|
+
strideLength: number
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export type TranscriptionOptions = {
|
|
56
|
+
source: Blob
|
|
57
|
+
language: string | null
|
|
58
|
+
} & TranscriptionCallbacks
|
|
59
|
+
|
|
60
|
+
export type TranscriptionRequest = {
|
|
61
|
+
audio: ArrayBufferLike
|
|
62
|
+
language: string | null
|
|
63
|
+
duration: number
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export type TranscriptionReport = {
|
|
67
|
+
progress: number
|
|
68
|
+
tokensPerSecond: number
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export type TranscriptionCallbacks = {
|
|
72
|
+
onReport: (report: TranscriptionReport) => void
|
|
73
|
+
onTranscription: (transcription: string) => void
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export type TranscriberOptions = {
|
|
77
|
+
driver: Driver
|
|
78
|
+
spec: TranscriberSpec
|
|
79
|
+
workerUrl: URL | string
|
|
80
|
+
onLoading: (loading: Loading) => void
|
|
81
|
+
}
|
|
82
|
+
|