@omnimedia/omnitool 1.1.0-3 → 1.1.0-5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +11 -9
- package/s/context.ts +1 -1
- package/s/demo/demo.bundle.ts +6 -2
- package/s/demo/routines/filmstrip-test.ts +2 -2
- package/s/demo/routines/transcriber-test.ts +34 -0
- package/s/demo/routines/transitions-test.ts +43 -0
- package/s/driver/fns/host.ts +7 -6
- package/s/driver/fns/schematic.ts +1 -1
- package/s/driver/fns/work.ts +116 -119
- package/s/driver/utils/load-decoder-source.ts +3 -4
- package/s/features/speech/transcribe/default-spec.ts +11 -0
- package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
- package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
- package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
- package/s/features/speech/transcribe/transcriber.ts +46 -0
- package/s/features/speech/transcribe/types.ts +82 -0
- package/s/features/speech/transcribe/worker.bundle.ts +40 -0
- package/s/features/transition/parts/fragment.ts +24 -0
- package/s/features/transition/parts/types.ts +94 -0
- package/s/features/transition/parts/uniforms.ts +29 -0
- package/s/features/transition/parts/vertex.ts +31 -0
- package/s/features/transition/transition.ts +60 -0
- package/s/timeline/utils/checksum.ts +2 -1
- package/s/tools/common/loader.ts +26 -0
- package/s/tools/common/transformer-pipeline.ts +26 -0
- package/s/tools/speech-recognition/common/model.ts +26 -0
- package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
- package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
- package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
- package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
- package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
- package/s/tools/speech-recognition/whisper/tool.ts +70 -0
- package/x/context.js +1 -1
- package/x/context.js.map +1 -1
- package/x/demo/demo.bundle.js +6 -2
- package/x/demo/demo.bundle.js.map +1 -1
- package/x/demo/demo.bundle.min.js +6 -6
- package/x/demo/demo.bundle.min.js.map +4 -4
- package/x/demo/routines/filmstrip-test.d.ts +1 -1
- package/x/demo/routines/filmstrip-test.js +2 -2
- package/x/demo/routines/filmstrip-test.js.map +1 -1
- package/x/demo/routines/transcriber-test.d.ts +4 -0
- package/x/demo/routines/transcriber-test.js +33 -0
- package/x/demo/routines/transcriber-test.js.map +1 -0
- package/x/demo/routines/transitions-test.d.ts +5 -0
- package/x/demo/routines/transitions-test.js +35 -0
- package/x/demo/routines/transitions-test.js.map +1 -0
- package/x/driver/driver.worker.bundle.min.js +80 -80
- package/x/driver/driver.worker.bundle.min.js.map +4 -4
- package/x/driver/fns/host.js +3 -3
- package/x/driver/fns/host.js.map +1 -1
- package/x/driver/fns/schematic.d.ts +1 -1
- package/x/driver/fns/work.js +8 -8
- package/x/driver/fns/work.js.map +1 -1
- package/x/driver/utils/load-decoder-source.d.ts +2 -1
- package/x/driver/utils/load-decoder-source.js +2 -3
- package/x/driver/utils/load-decoder-source.js.map +1 -1
- package/x/features/speech/transcribe/default-spec.d.ts +2 -0
- package/x/features/speech/transcribe/default-spec.js +8 -0
- package/x/features/speech/transcribe/default-spec.js.map +1 -0
- package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
- package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
- package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
- package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
- package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
- package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
- package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
- package/x/features/speech/transcribe/parts/transcribe.js +56 -0
- package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
- package/x/features/speech/transcribe/transcriber.d.ts +5 -0
- package/x/features/speech/transcribe/transcriber.js +33 -0
- package/x/features/speech/transcribe/transcriber.js.map +1 -0
- package/x/features/speech/transcribe/types.d.ts +66 -0
- package/x/features/speech/transcribe/types.js +2 -0
- package/x/features/speech/transcribe/types.js.map +1 -0
- package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
- package/x/features/speech/transcribe/worker.bundle.js +33 -0
- package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
- package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
- package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
- package/x/features/transition/parts/fragment.d.ts +1 -0
- package/x/features/transition/parts/fragment.js +25 -0
- package/x/features/transition/parts/fragment.js.map +1 -0
- package/x/features/transition/parts/types.d.ts +23 -0
- package/x/features/transition/parts/types.js +2 -0
- package/x/features/transition/parts/types.js.map +1 -0
- package/x/features/transition/parts/uniforms.d.ts +31 -0
- package/x/features/transition/parts/uniforms.js +27 -0
- package/x/features/transition/parts/uniforms.js.map +1 -0
- package/x/features/transition/parts/vertex.d.ts +1 -0
- package/x/features/transition/parts/vertex.js +32 -0
- package/x/features/transition/parts/vertex.js.map +1 -0
- package/x/features/transition/transition.d.ts +5 -0
- package/x/features/transition/transition.js +50 -0
- package/x/features/transition/transition.js.map +1 -0
- package/x/index.html +2 -2
- package/x/timeline/utils/checksum.js +2 -1
- package/x/timeline/utils/checksum.js.map +1 -1
- package/x/tools/common/loader.d.ts +19 -0
- package/x/tools/common/loader.js +18 -0
- package/x/tools/common/loader.js.map +1 -0
- package/x/tools/common/transformer-pipeline.d.ts +8 -0
- package/x/tools/common/transformer-pipeline.js +24 -0
- package/x/tools/common/transformer-pipeline.js.map +1 -0
- package/x/tools/speech-recognition/common/model.d.ts +14 -0
- package/x/tools/speech-recognition/common/model.js +16 -0
- package/x/tools/speech-recognition/common/model.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
- package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
- package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
- package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
- package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
- package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
- package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
- package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
- package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
- package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
- package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
- package/x/tools/speech-recognition/whisper/tool.js +63 -0
- package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
|
|
2
|
+
import {AsSchematic} from "@e280/comrade"
|
|
3
|
+
import {DataType, DeviceType, Pipeline} from "@huggingface/transformers"
|
|
4
|
+
|
|
5
|
+
import {Driver} from "../../../driver/driver.js"
|
|
6
|
+
|
|
7
|
+
export type TranscriberSchematic = AsSchematic<{
|
|
8
|
+
work: {
|
|
9
|
+
prepare(spec: TranscriberSpec): Promise<void>
|
|
10
|
+
transcribe(request: TranscriptionRequest): Promise<Transcription>
|
|
11
|
+
},
|
|
12
|
+
|
|
13
|
+
host: {
|
|
14
|
+
loading(load: Loading): Promise<void>
|
|
15
|
+
deliverReport(report: TranscriptionReport): Promise<void>
|
|
16
|
+
deliverTranscription(transcription: string): Promise<void>
|
|
17
|
+
}
|
|
18
|
+
}>
|
|
19
|
+
|
|
20
|
+
export type Loading = {
|
|
21
|
+
total: number
|
|
22
|
+
progress: number
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export type TranscribeOptions = {
|
|
26
|
+
pipe: Pipeline
|
|
27
|
+
spec: TranscriberSpec
|
|
28
|
+
request: TranscriptionRequest
|
|
29
|
+
callbacks: TranscriptionCallbacks
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export type TranscriberPipeOptions = {
|
|
33
|
+
spec: TranscriberSpec
|
|
34
|
+
onLoading: (loading: Loading) => void
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type SpeechTime = [start: number, end: number]
|
|
38
|
+
|
|
39
|
+
export type Transcription = {
|
|
40
|
+
text: string
|
|
41
|
+
chunks: {
|
|
42
|
+
text: string
|
|
43
|
+
timestamp: SpeechTime
|
|
44
|
+
}[]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type TranscriberSpec = {
|
|
48
|
+
model: string
|
|
49
|
+
dtype: DataType
|
|
50
|
+
device: DeviceType
|
|
51
|
+
chunkLength: number
|
|
52
|
+
strideLength: number
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export type TranscriptionOptions = {
|
|
56
|
+
source: Blob
|
|
57
|
+
language: string | null
|
|
58
|
+
} & TranscriptionCallbacks
|
|
59
|
+
|
|
60
|
+
export type TranscriptionRequest = {
|
|
61
|
+
audio: ArrayBufferLike
|
|
62
|
+
language: string | null
|
|
63
|
+
duration: number
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export type TranscriptionReport = {
|
|
67
|
+
progress: number
|
|
68
|
+
tokensPerSecond: number
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export type TranscriptionCallbacks = {
|
|
72
|
+
onReport: (report: TranscriptionReport) => void
|
|
73
|
+
onTranscription: (transcription: string) => void
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export type TranscriberOptions = {
|
|
77
|
+
driver: Driver
|
|
78
|
+
spec: TranscriberSpec
|
|
79
|
+
workerUrl: URL | string
|
|
80
|
+
onLoading: (loading: Loading) => void
|
|
81
|
+
}
|
|
82
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
import {defer, once} from "@e280/stz"
|
|
3
|
+
import {Comrade, Host} from "@e280/comrade"
|
|
4
|
+
import {Pipeline} from "@huggingface/transformers"
|
|
5
|
+
|
|
6
|
+
import {loadPipe} from "./parts/load-pipe.js"
|
|
7
|
+
import {transcribe} from "./parts/transcribe.js"
|
|
8
|
+
import {TranscriberSchematic, TranscriberSpec} from "./types.js"
|
|
9
|
+
|
|
10
|
+
const deferred = defer<{pipe: Pipeline, spec: TranscriberSpec}>()
|
|
11
|
+
|
|
12
|
+
const makePrepare = (host: Host<TranscriberSchematic>) => once(async(spec: TranscriberSpec) => {
|
|
13
|
+
deferred.resolve({
|
|
14
|
+
spec,
|
|
15
|
+
pipe: await loadPipe({
|
|
16
|
+
spec,
|
|
17
|
+
onLoading: loading => host.loading(loading),
|
|
18
|
+
}),
|
|
19
|
+
})
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
await Comrade.worker<TranscriberSchematic>(shell => {
|
|
23
|
+
const prepare = makePrepare(shell.host)
|
|
24
|
+
return {
|
|
25
|
+
prepare,
|
|
26
|
+
async transcribe(request) {
|
|
27
|
+
const {pipe, spec} = await deferred.promise
|
|
28
|
+
return transcribe({
|
|
29
|
+
pipe,
|
|
30
|
+
spec,
|
|
31
|
+
request,
|
|
32
|
+
callbacks: {
|
|
33
|
+
onReport: report => shell.host.deliverReport(report),
|
|
34
|
+
onTranscription: transcription => shell.host.deliverTranscription(transcription),
|
|
35
|
+
},
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
})
|
|
40
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export const fragment = (glsl: string) => `
|
|
2
|
+
precision highp float;
|
|
3
|
+
varying vec2 vTextureCoord;
|
|
4
|
+
varying vec2 _uv;
|
|
5
|
+
uniform sampler2D from, to;
|
|
6
|
+
uniform float progress, ratio, _fromR, _toR;
|
|
7
|
+
uniform float customUniform;
|
|
8
|
+
|
|
9
|
+
vec4 getFromColor(vec2 uv){
|
|
10
|
+
return texture2D(from, .5+(uv-.5)*vec2(max(ratio/_fromR,1.), max(_fromR/ratio,1.)));
|
|
11
|
+
}
|
|
12
|
+
vec4 getToColor(vec2 uv){
|
|
13
|
+
return texture2D(to, .5+(uv-.5)*vec2(max(ratio/_toR,1.), max(_toR/ratio,1.)));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// gl-transition code here
|
|
17
|
+
${glsl}
|
|
18
|
+
// gl-transition code end
|
|
19
|
+
|
|
20
|
+
void main(){
|
|
21
|
+
vec2 uv = vTextureCoord.xy;
|
|
22
|
+
gl_FragColor = transition(vTextureCoord);
|
|
23
|
+
}
|
|
24
|
+
`
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import {Renderer} from "pixi.js"
|
|
2
|
+
|
|
3
|
+
export interface TransitionOptions {
|
|
4
|
+
name: Transition
|
|
5
|
+
renderer: Renderer
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface TransitionRendererOptions {
|
|
9
|
+
from: VideoFrame
|
|
10
|
+
to: VideoFrame
|
|
11
|
+
progress: number
|
|
12
|
+
width: number
|
|
13
|
+
height: number
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface GLTransition {
|
|
17
|
+
author: string
|
|
18
|
+
createdAt: string
|
|
19
|
+
glsl: string
|
|
20
|
+
license: string
|
|
21
|
+
name: Transition
|
|
22
|
+
updatedAt: string
|
|
23
|
+
defaultParams: any
|
|
24
|
+
paramsTypes: any
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export type Transition =
|
|
28
|
+
| "Bounce"
|
|
29
|
+
| "BowTieHorizontal"
|
|
30
|
+
| "BowTieVertical"
|
|
31
|
+
| "ButterflyWaveScrawler"
|
|
32
|
+
| "CircleCrop"
|
|
33
|
+
| "ColourDistance"
|
|
34
|
+
| "CrazyParametricFun"
|
|
35
|
+
| "CrossZoom"
|
|
36
|
+
| "Directional"
|
|
37
|
+
| "DoomScreenTransition"
|
|
38
|
+
| "Dreamy"
|
|
39
|
+
| "DreamyZoom"
|
|
40
|
+
| "GlitchDisplace"
|
|
41
|
+
| "GlitchMemories"
|
|
42
|
+
| "GridFlip"
|
|
43
|
+
| "InvertedPageCurl"
|
|
44
|
+
| "LinearBlur"
|
|
45
|
+
| "Mosaic"
|
|
46
|
+
| "PolkaDotsCurtain"
|
|
47
|
+
| "Radial"
|
|
48
|
+
| "SimpleZoom"
|
|
49
|
+
| "StereoViewer"
|
|
50
|
+
| "Swirl"
|
|
51
|
+
| "WaterDrop"
|
|
52
|
+
| "ZoomInCircles"
|
|
53
|
+
| "angular"
|
|
54
|
+
| "burn"
|
|
55
|
+
| "cannabisleaf"
|
|
56
|
+
| "circle"
|
|
57
|
+
| "circleopen"
|
|
58
|
+
| "colorphase"
|
|
59
|
+
| "crosshatch"
|
|
60
|
+
| "crosswarp"
|
|
61
|
+
| "cube"
|
|
62
|
+
| "directionalwarp"
|
|
63
|
+
| "directionalwipe"
|
|
64
|
+
| "displacement"
|
|
65
|
+
| "doorway"
|
|
66
|
+
| "fade"
|
|
67
|
+
| "fadecolor"
|
|
68
|
+
| "fadegrayscale"
|
|
69
|
+
| "flyeye"
|
|
70
|
+
| "heart"
|
|
71
|
+
| "hexagonalize"
|
|
72
|
+
| "kaleidoscope"
|
|
73
|
+
| "luma"
|
|
74
|
+
| "luminance_melt"
|
|
75
|
+
| "morph"
|
|
76
|
+
| "multiply_blend"
|
|
77
|
+
| "perlin"
|
|
78
|
+
| "pinwheel"
|
|
79
|
+
| "pixelize"
|
|
80
|
+
| "polar_function"
|
|
81
|
+
| "randomsquares"
|
|
82
|
+
| "ripple"
|
|
83
|
+
| "rotate_scale_fade"
|
|
84
|
+
| "squareswire"
|
|
85
|
+
| "squeeze"
|
|
86
|
+
| "swap"
|
|
87
|
+
| "undulatingBurnOut"
|
|
88
|
+
| "wind"
|
|
89
|
+
| "windowblinds"
|
|
90
|
+
| "windowslice"
|
|
91
|
+
| "wipeDown"
|
|
92
|
+
| "wipeLeft"
|
|
93
|
+
| "wipeRight"
|
|
94
|
+
| "wipeUp"
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import {GLTransition} from "./types.js"
|
|
2
|
+
|
|
3
|
+
export const uniforms = {
|
|
4
|
+
custom: (transition: GLTransition) => Object.fromEntries(
|
|
5
|
+
Object.entries(transition.defaultParams).map(([name, value]) => [
|
|
6
|
+
name,
|
|
7
|
+
{
|
|
8
|
+
value,
|
|
9
|
+
type: getUniformType(transition.paramsTypes[name])
|
|
10
|
+
}
|
|
11
|
+
])
|
|
12
|
+
),
|
|
13
|
+
basics: {
|
|
14
|
+
_fromR: {value: 1, type: "f32"},
|
|
15
|
+
_toR: {value: 1, type: "f32"},
|
|
16
|
+
ratio: {value: 1, type: "f32"},
|
|
17
|
+
progress: {value: 0, type: "f32"},
|
|
18
|
+
customUniform: {value: 0, type: "f32"},
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const getUniformType = (type: string) => {
|
|
23
|
+
if(type === "f32" || type === "i32") {
|
|
24
|
+
return type
|
|
25
|
+
} else if(type === "float") {
|
|
26
|
+
return "f32"
|
|
27
|
+
}
|
|
28
|
+
else return `${type}<f32>`
|
|
29
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export const vertex = `
|
|
2
|
+
in vec2 aPosition;
|
|
3
|
+
varying vec2 _uv; // gl-transition
|
|
4
|
+
uniform mat3 projectionMatrix;
|
|
5
|
+
uniform vec4 uInputSize;
|
|
6
|
+
uniform vec4 uOutputFrame;
|
|
7
|
+
out vec2 vTextureCoord;
|
|
8
|
+
uniform vec4 uOutputTexture;
|
|
9
|
+
|
|
10
|
+
vec4 filterVertexPosition( void )
|
|
11
|
+
{
|
|
12
|
+
vec2 position = aPosition * uOutputFrame.zw + uOutputFrame.xy;
|
|
13
|
+
|
|
14
|
+
position.x = position.x * (2.0 / uOutputTexture.x) - 1.0;
|
|
15
|
+
position.y = position.y * (2.0*uOutputTexture.z / uOutputTexture.y) - uOutputTexture.z;
|
|
16
|
+
|
|
17
|
+
return vec4(position, 0.0, 1.0);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
vec2 filterTextureCoord( void )
|
|
21
|
+
{
|
|
22
|
+
return aPosition * (uOutputFrame.zw * uInputSize.zw);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
void main(void)
|
|
26
|
+
{
|
|
27
|
+
gl_Position = filterVertexPosition();
|
|
28
|
+
vTextureCoord = filterTextureCoord();
|
|
29
|
+
_uv = vec2(0.5, 0.5) * (aPosition +vec2(1.0, 1.0)); // gl-transition
|
|
30
|
+
}
|
|
31
|
+
`
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
//@ts-ignore
|
|
2
|
+
import transitions from "gl-transitions"
|
|
3
|
+
import {Filter, GlProgram, Sprite, Texture, ImageSource} from "pixi.js"
|
|
4
|
+
|
|
5
|
+
import {vertex} from "./parts/vertex.js"
|
|
6
|
+
import {uniforms} from "./parts/uniforms.js"
|
|
7
|
+
import {fragment} from "./parts/fragment.js"
|
|
8
|
+
import {GLTransition, TransitionOptions, TransitionRendererOptions} from "./parts/types.js"
|
|
9
|
+
|
|
10
|
+
export function makeTransition({name, renderer}: TransitionOptions) {
|
|
11
|
+
const transition = transitions.find((t: GLTransition) => t.name === name) as GLTransition
|
|
12
|
+
const transitionSprite = new Sprite()
|
|
13
|
+
const transitionTexture = new Texture()
|
|
14
|
+
const sourceFrom = new ImageSource({})
|
|
15
|
+
const sourceTo = new ImageSource({})
|
|
16
|
+
|
|
17
|
+
const filter = new Filter({
|
|
18
|
+
glProgram: new GlProgram({
|
|
19
|
+
vertex,
|
|
20
|
+
fragment: fragment(transition.glsl),
|
|
21
|
+
}),
|
|
22
|
+
resources: {
|
|
23
|
+
from: sourceFrom,
|
|
24
|
+
to: sourceTo,
|
|
25
|
+
uniforms: {
|
|
26
|
+
...uniforms.basics,
|
|
27
|
+
...uniforms.custom(transition)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
transitionSprite.filters = [filter]
|
|
33
|
+
|
|
34
|
+
return {
|
|
35
|
+
render({width, height, from, to, progress}: TransitionRendererOptions) {
|
|
36
|
+
if(transitionSprite.width !== width || transitionSprite.height !== height) {
|
|
37
|
+
transitionSprite.setSize({width, height})
|
|
38
|
+
transitionTexture.source.resize(width, height)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
sourceFrom.resource = from
|
|
42
|
+
sourceTo.resource = to
|
|
43
|
+
sourceFrom.update()
|
|
44
|
+
sourceTo.update()
|
|
45
|
+
|
|
46
|
+
filter.resources.uniforms.uniforms.progress = progress
|
|
47
|
+
|
|
48
|
+
renderer.render({
|
|
49
|
+
container: transitionSprite,
|
|
50
|
+
target: transitionTexture,
|
|
51
|
+
clear: false,
|
|
52
|
+
width,
|
|
53
|
+
height
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
return transitionTexture
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
@@ -10,7 +10,8 @@ export class Checksum {
|
|
|
10
10
|
) {}
|
|
11
11
|
|
|
12
12
|
static async make(data: Uint8Array) {
|
|
13
|
-
const
|
|
13
|
+
const data2 = new Uint8Array(data)
|
|
14
|
+
const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data2))
|
|
14
15
|
const hash = Hex.fromBytes(bytes)
|
|
15
16
|
const nickname = Thumbprint.sigil.fromBytes(bytes)
|
|
16
17
|
return new this(data, bytes, hash, nickname)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import {pub, Pub} from "@e280/stz"
|
|
2
|
+
import {ProgressItem} from "../speech-recognition/whisper/parts/types.js"
|
|
3
|
+
|
|
4
|
+
export interface LoaderEvents {
|
|
5
|
+
onModelLoadProgress: Pub<ProgressItem[]>
|
|
6
|
+
onTpsUpdate: Pub<[number]>
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export abstract class Loader {
|
|
10
|
+
tps = 0
|
|
11
|
+
|
|
12
|
+
static loaderEvents = {
|
|
13
|
+
onModelLoadProgress: pub<ProgressItem[]>(),
|
|
14
|
+
onTpsUpdate: pub<[number]>()
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
constructor(public readonly name: string, public model: string) {}
|
|
18
|
+
|
|
19
|
+
abstract init(): Promise<void>
|
|
20
|
+
|
|
21
|
+
abstract setModel(model: string): void
|
|
22
|
+
|
|
23
|
+
setTps(value: number) {
|
|
24
|
+
this.tps = value
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//@ts-ignore
|
|
2
|
+
import {pipeline} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
|
|
3
|
+
|
|
4
|
+
import {ProgressCallback} from "../speech-recognition/whisper/parts/types.js"
|
|
5
|
+
|
|
6
|
+
export class PipelineFactory {
|
|
7
|
+
instance: any = null
|
|
8
|
+
model: string | null = null
|
|
9
|
+
|
|
10
|
+
constructor(public task: string) {}
|
|
11
|
+
|
|
12
|
+
async createInstance(model: string, progressCallback?: ProgressCallback) {
|
|
13
|
+
this.model = model
|
|
14
|
+
return this.instance = await pipeline(this.task, this.model, {
|
|
15
|
+
dtype: {
|
|
16
|
+
encoder_model:
|
|
17
|
+
this.model === "onnx-community/whisper-large-v3-turbo"
|
|
18
|
+
? "fp16"
|
|
19
|
+
: "fp32",
|
|
20
|
+
decoder_model_merged: "q4",
|
|
21
|
+
},
|
|
22
|
+
device: "webgpu",
|
|
23
|
+
progress_callback: progressCallback,
|
|
24
|
+
})
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import {pub} from "@e280/stz"
|
|
2
|
+
|
|
3
|
+
import {Loader} from "../../common/loader.js"
|
|
4
|
+
import {DecoderSource} from "../../../driver/fns/schematic.js"
|
|
5
|
+
import {SpeechRecognizerModels, Word, WordGroup} from "../whisper/parts/types.js"
|
|
6
|
+
|
|
7
|
+
export abstract class SpeechRecognizer extends Loader {
|
|
8
|
+
multilingual = true
|
|
9
|
+
|
|
10
|
+
static speechRecognizerEvents = {
|
|
11
|
+
onTranscriptionChunk: pub<Word[]>(),
|
|
12
|
+
onTranscribeProgress: pub<[number]>()
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
abstract transcribe(input: DecoderSource): Promise<WordGroup>
|
|
16
|
+
|
|
17
|
+
setMultilingual(value: boolean) {
|
|
18
|
+
this.multilingual = value
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
detectLanguage?(input: Blob | AudioBuffer): Promise<string>
|
|
22
|
+
|
|
23
|
+
setModel(value: SpeechRecognizerModels) {
|
|
24
|
+
this.model = value
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
|
|
2
|
+
import {Comrade} from "@e280/comrade"
|
|
3
|
+
import {ProgressItem} from "../parts/types.js"
|
|
4
|
+
import {SpeechRecognizerHostEvents, WhisperSchematic} from "./schematic.js"
|
|
5
|
+
|
|
6
|
+
export const setupWhisperHost = (events: SpeechRecognizerHostEvents) => (
|
|
7
|
+
Comrade.host<WhisperSchematic>(_shell => ({
|
|
8
|
+
async updateModelLoadProgress(item) {
|
|
9
|
+
events.onModelLoadProgress.pub(item)
|
|
10
|
+
},
|
|
11
|
+
async deliverTranscriptionChunk(chunk) {
|
|
12
|
+
events.onTranscriptionChunk.pub({
|
|
13
|
+
text: chunk.text,
|
|
14
|
+
timestamp: chunk.timestamp
|
|
15
|
+
})
|
|
16
|
+
},
|
|
17
|
+
async updateTps(value) {
|
|
18
|
+
events.onTpsUpdate.pub(value)
|
|
19
|
+
},
|
|
20
|
+
async updateTranscribeProgress(value) {
|
|
21
|
+
events.onTranscribeProgress(value)
|
|
22
|
+
}
|
|
23
|
+
}))
|
|
24
|
+
)
|
|
25
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import {Pub} from "@e280/stz"
|
|
2
|
+
import {AsSchematic} from "@e280/comrade"
|
|
3
|
+
|
|
4
|
+
import {LoaderEvents} from "../../../common/loader.js"
|
|
5
|
+
import {ProgressItem, TranscriptionChunk, TranscriptionMessage, TranscriptionResult, Word} from "../parts/types.js"
|
|
6
|
+
|
|
7
|
+
export type WhisperSchematic = AsSchematic<{
|
|
8
|
+
work: {
|
|
9
|
+
transcribe(input: TranscriptionMessage): Promise<TranscriptionResult | null>
|
|
10
|
+
},
|
|
11
|
+
|
|
12
|
+
host: {
|
|
13
|
+
updateModelLoadProgress(item: ProgressItem): Promise<void>
|
|
14
|
+
deliverTranscriptionChunk(chunk: TranscriptionChunk): Promise<void>
|
|
15
|
+
updateTps(value: number): Promise<void>
|
|
16
|
+
updateTranscribeProgress(value: number): Promise<void>
|
|
17
|
+
}
|
|
18
|
+
}>
|
|
19
|
+
|
|
20
|
+
export interface SpeechRecognizerHostEvents extends LoaderEvents {
|
|
21
|
+
onTranscriptionChunk: Pub<Word[]>
|
|
22
|
+
onTranscribeProgress: Pub<[number]>
|
|
23
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import {Comrade} from "@e280/comrade"
|
|
2
|
+
//@ts-ignore
|
|
3
|
+
import {pipeline, WhisperTextStreamer} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
|
|
4
|
+
|
|
5
|
+
import {WhisperSchematic} from "./schematic.js"
|
|
6
|
+
import {TranscriptionChunk} from "../parts/types.js"
|
|
7
|
+
import {PipelineFactory} from "../../../common/transformer-pipeline.js"
|
|
8
|
+
|
|
9
|
+
// TODO suspicious globals, probably bad
|
|
10
|
+
const pipeline = new PipelineFactory("automatic-speech-recognition")
|
|
11
|
+
let transcriber: any
|
|
12
|
+
|
|
13
|
+
export const setupWhisperWork = Comrade.work<WhisperSchematic>(shell => ({
|
|
14
|
+
async transcribe({audio, model, language, duration}) {
|
|
15
|
+
const isDistil = model.startsWith("distil-whisper/")
|
|
16
|
+
|
|
17
|
+
if(!pipeline.model || pipeline.model !== model) {
|
|
18
|
+
pipeline.instance?.dispose()?.()
|
|
19
|
+
pipeline.instance = null
|
|
20
|
+
transcriber = await pipeline.createInstance(
|
|
21
|
+
model,
|
|
22
|
+
(data) => {
|
|
23
|
+
if(data.progress)
|
|
24
|
+
shell.host.updateModelLoadProgress({
|
|
25
|
+
id: data.file,
|
|
26
|
+
progress: data.progress
|
|
27
|
+
})
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const timePrecision =
|
|
33
|
+
transcriber.processor.feature_extractor.config.chunk_length /
|
|
34
|
+
transcriber.model.config.max_source_positions
|
|
35
|
+
|
|
36
|
+
const chunkLength = isDistil ? 20 : 30
|
|
37
|
+
const strideLength = isDistil ? 3 : 5
|
|
38
|
+
|
|
39
|
+
let chunkCount = 0
|
|
40
|
+
let startTime: number | null = null
|
|
41
|
+
let tokenCount = 0
|
|
42
|
+
let tps = 0
|
|
43
|
+
|
|
44
|
+
const chunkDuration = chunkLength - strideLength
|
|
45
|
+
|
|
46
|
+
const estimateProgress = () => {
|
|
47
|
+
const audioProgressSeconds = chunkCount * chunkDuration
|
|
48
|
+
return Math.min(audioProgressSeconds / duration, 1)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const streamer = new WhisperTextStreamer(transcriber.tokenizer, {
|
|
52
|
+
time_precision: timePrecision,
|
|
53
|
+
token_callback_function: () => {
|
|
54
|
+
startTime ??= performance.now()
|
|
55
|
+
if (++tokenCount > 1) {
|
|
56
|
+
tps = (tokenCount / (performance.now() - startTime)) * 1000
|
|
57
|
+
shell.host.updateTps(tps)
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
callback_function: (textChunk: any) => {
|
|
61
|
+
shell.host.deliverTranscriptionChunk(textChunk)
|
|
62
|
+
},
|
|
63
|
+
on_finalize: () => {
|
|
64
|
+
startTime = null
|
|
65
|
+
tokenCount = 0
|
|
66
|
+
chunkCount++
|
|
67
|
+
const progress = estimateProgress()
|
|
68
|
+
shell.host.updateTranscribeProgress(progress)
|
|
69
|
+
},
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
const output = await transcriber(audio, {
|
|
73
|
+
top_k: 0,
|
|
74
|
+
do_sample: false,
|
|
75
|
+
chunk_length_s: chunkLength,
|
|
76
|
+
stride_length_s: strideLength,
|
|
77
|
+
language,
|
|
78
|
+
task: "transcribe",
|
|
79
|
+
return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
|
|
80
|
+
force_full_sequences: false,
|
|
81
|
+
streamer,
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
if (!output) return null
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
tps,
|
|
88
|
+
...output,
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}))
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export interface ProgressItem {
|
|
2
|
+
id: string
|
|
3
|
+
progress: number
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export type Word = {
|
|
7
|
+
text: string
|
|
8
|
+
timestamp: [start: number, end: number]
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type WordGroup = Word[]
|
|
12
|
+
export type Transcript = WordGroup[]
|
|
13
|
+
|
|
14
|
+
export interface TranscriptionChunk {
|
|
15
|
+
text: string
|
|
16
|
+
offset: number
|
|
17
|
+
timestamp: [number, number]
|
|
18
|
+
finalised: boolean
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface TranscriptionMessage {
|
|
22
|
+
audio: Float32Array
|
|
23
|
+
model: string
|
|
24
|
+
subtask: string | null
|
|
25
|
+
language: string | null
|
|
26
|
+
duration: number
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface TranscriptionResult {
|
|
30
|
+
text: string
|
|
31
|
+
chunks: TranscriptionChunk[]
|
|
32
|
+
tps: number
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export type ProgressCallback = (data: any) => void
|
|
36
|
+
|
|
37
|
+
export type SpeechRecognizerModels = "onnx-community/whisper-tiny_timestamped"
|
|
38
|
+
export type SpeechRecognizerSubtasks = "transcribe"
|