@omnimedia/omnitool 1.1.0-1 → 1.1.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.md +1 -1
  2. package/package.json +12 -9
  3. package/s/context.ts +1 -1
  4. package/s/demo/demo.bundle.ts +6 -2
  5. package/s/demo/demo.css +5 -0
  6. package/s/demo/routines/filmstrip-test.ts +2 -2
  7. package/s/demo/routines/transcode-test.ts +4 -2
  8. package/s/demo/routines/transcriber-test.ts +34 -0
  9. package/s/demo/routines/transitions-test.ts +43 -0
  10. package/s/driver/driver.ts +17 -9
  11. package/s/driver/fns/host.ts +7 -6
  12. package/s/driver/fns/schematic.ts +45 -22
  13. package/s/driver/fns/work.ts +163 -151
  14. package/s/driver/utils/load-decoder-source.ts +3 -4
  15. package/s/features/speech/transcribe/default-spec.ts +11 -0
  16. package/s/features/speech/transcribe/parts/load-pipe.ts +19 -0
  17. package/s/features/speech/transcribe/parts/prep-audio.ts +23 -0
  18. package/s/features/speech/transcribe/parts/transcribe.ts +70 -0
  19. package/s/features/speech/transcribe/transcriber.ts +46 -0
  20. package/s/features/speech/transcribe/types.ts +82 -0
  21. package/s/features/speech/transcribe/worker.bundle.ts +40 -0
  22. package/s/features/transition/parts/fragment.ts +24 -0
  23. package/s/features/transition/parts/types.ts +94 -0
  24. package/s/features/transition/parts/uniforms.ts +29 -0
  25. package/s/features/transition/parts/vertex.ts +31 -0
  26. package/s/features/transition/transition.ts +60 -0
  27. package/s/index.html.ts +6 -1
  28. package/s/timeline/index.ts +1 -0
  29. package/s/timeline/parts/basics.ts +1 -1
  30. package/s/timeline/parts/compositor/export.ts +77 -0
  31. package/s/timeline/parts/compositor/parts/html-tree.ts +37 -0
  32. package/s/timeline/parts/compositor/parts/schedulers.ts +85 -0
  33. package/s/timeline/parts/compositor/parts/tree-builder.ts +184 -0
  34. package/s/timeline/parts/compositor/parts/webcodecs-tree.ts +30 -0
  35. package/s/timeline/parts/compositor/playback.ts +81 -0
  36. package/s/timeline/parts/compositor/samplers/html.ts +115 -0
  37. package/s/timeline/parts/compositor/samplers/webcodecs.ts +60 -0
  38. package/s/timeline/parts/item.ts +38 -6
  39. package/s/timeline/parts/media.ts +21 -0
  40. package/s/timeline/parts/waveform.ts +1 -1
  41. package/s/timeline/sugar/builders.ts +102 -0
  42. package/s/timeline/sugar/o.ts +117 -27
  43. package/s/timeline/sugar/omni-test.ts +2 -2
  44. package/s/timeline/sugar/omni.ts +21 -11
  45. package/s/timeline/types.ts +29 -0
  46. package/s/timeline/utils/audio-stream.ts +15 -0
  47. package/s/timeline/utils/checksum.ts +2 -1
  48. package/s/timeline/utils/matrix.ts +33 -0
  49. package/s/timeline/utils/video-cursor.ts +40 -0
  50. package/s/tools/common/loader.ts +26 -0
  51. package/s/tools/common/transformer-pipeline.ts +26 -0
  52. package/s/tools/speech-recognition/common/model.ts +26 -0
  53. package/s/tools/speech-recognition/whisper/fns/host.ts +25 -0
  54. package/s/tools/speech-recognition/whisper/fns/schematic.ts +23 -0
  55. package/s/tools/speech-recognition/whisper/fns/work.ts +91 -0
  56. package/s/tools/speech-recognition/whisper/parts/types.ts +38 -0
  57. package/s/tools/speech-recognition/whisper/parts/worker.bundle.ts +7 -0
  58. package/s/tools/speech-recognition/whisper/tool.ts +70 -0
  59. package/x/context.js +1 -1
  60. package/x/context.js.map +1 -1
  61. package/x/demo/demo.bundle.js +6 -2
  62. package/x/demo/demo.bundle.js.map +1 -1
  63. package/x/demo/demo.bundle.min.js +39 -37
  64. package/x/demo/demo.bundle.min.js.map +4 -4
  65. package/x/demo/demo.css +5 -0
  66. package/x/demo/routines/filmstrip-test.d.ts +1 -1
  67. package/x/demo/routines/filmstrip-test.js +2 -2
  68. package/x/demo/routines/filmstrip-test.js.map +1 -1
  69. package/x/demo/routines/transcode-test.js +4 -2
  70. package/x/demo/routines/transcode-test.js.map +1 -1
  71. package/x/demo/routines/transcriber-test.d.ts +4 -0
  72. package/x/demo/routines/transcriber-test.js +33 -0
  73. package/x/demo/routines/transcriber-test.js.map +1 -0
  74. package/x/demo/routines/transitions-test.d.ts +5 -0
  75. package/x/demo/routines/transitions-test.js +35 -0
  76. package/x/demo/routines/transitions-test.js.map +1 -0
  77. package/x/driver/driver.d.ts +3 -5
  78. package/x/driver/driver.js +16 -9
  79. package/x/driver/driver.js.map +1 -1
  80. package/x/driver/driver.worker.bundle.min.js +2537 -148
  81. package/x/driver/driver.worker.bundle.min.js.map +4 -4
  82. package/x/driver/fns/host.d.ts +9 -2
  83. package/x/driver/fns/host.js +3 -3
  84. package/x/driver/fns/host.js.map +1 -1
  85. package/x/driver/fns/schematic.d.ts +39 -21
  86. package/x/driver/fns/work.d.ts +11 -4
  87. package/x/driver/fns/work.js +111 -102
  88. package/x/driver/fns/work.js.map +1 -1
  89. package/x/driver/utils/load-decoder-source.d.ts +2 -1
  90. package/x/driver/utils/load-decoder-source.js +2 -3
  91. package/x/driver/utils/load-decoder-source.js.map +1 -1
  92. package/x/features/speech/transcribe/default-spec.d.ts +2 -0
  93. package/x/features/speech/transcribe/default-spec.js +8 -0
  94. package/x/features/speech/transcribe/default-spec.js.map +1 -0
  95. package/x/features/speech/transcribe/parts/load-pipe.d.ts +2 -0
  96. package/x/features/speech/transcribe/parts/load-pipe.js +13 -0
  97. package/x/features/speech/transcribe/parts/load-pipe.js.map +1 -0
  98. package/x/features/speech/transcribe/parts/prep-audio.d.ts +5 -0
  99. package/x/features/speech/transcribe/parts/prep-audio.js +21 -0
  100. package/x/features/speech/transcribe/parts/prep-audio.js.map +1 -0
  101. package/x/features/speech/transcribe/parts/transcribe.d.ts +5 -0
  102. package/x/features/speech/transcribe/parts/transcribe.js +56 -0
  103. package/x/features/speech/transcribe/parts/transcribe.js.map +1 -0
  104. package/x/features/speech/transcribe/transcriber.d.ts +5 -0
  105. package/x/features/speech/transcribe/transcriber.js +33 -0
  106. package/x/features/speech/transcribe/transcriber.js.map +1 -0
  107. package/x/features/speech/transcribe/types.d.ts +66 -0
  108. package/x/features/speech/transcribe/types.js +2 -0
  109. package/x/features/speech/transcribe/types.js.map +1 -0
  110. package/x/features/speech/transcribe/worker.bundle.d.ts +1 -0
  111. package/x/features/speech/transcribe/worker.bundle.js +33 -0
  112. package/x/features/speech/transcribe/worker.bundle.js.map +1 -0
  113. package/x/features/speech/transcribe/worker.bundle.min.js +2916 -0
  114. package/x/features/speech/transcribe/worker.bundle.min.js.map +7 -0
  115. package/x/features/transition/parts/fragment.d.ts +1 -0
  116. package/x/features/transition/parts/fragment.js +25 -0
  117. package/x/features/transition/parts/fragment.js.map +1 -0
  118. package/x/features/transition/parts/types.d.ts +23 -0
  119. package/x/features/transition/parts/types.js +2 -0
  120. package/x/features/transition/parts/types.js.map +1 -0
  121. package/x/features/transition/parts/uniforms.d.ts +31 -0
  122. package/x/features/transition/parts/uniforms.js +27 -0
  123. package/x/features/transition/parts/uniforms.js.map +1 -0
  124. package/x/features/transition/parts/vertex.d.ts +1 -0
  125. package/x/features/transition/parts/vertex.js +32 -0
  126. package/x/features/transition/parts/vertex.js.map +1 -0
  127. package/x/features/transition/transition.d.ts +5 -0
  128. package/x/features/transition/transition.js +50 -0
  129. package/x/features/transition/transition.js.map +1 -0
  130. package/x/index.html +13 -3
  131. package/x/index.html.js +6 -1
  132. package/x/index.html.js.map +1 -1
  133. package/x/timeline/index.d.ts +1 -0
  134. package/x/timeline/index.js +1 -0
  135. package/x/timeline/index.js.map +1 -1
  136. package/x/timeline/parts/basics.d.ts +1 -1
  137. package/x/timeline/parts/compositor/export.d.ts +9 -0
  138. package/x/timeline/parts/compositor/export.js +64 -0
  139. package/x/timeline/parts/compositor/export.js.map +1 -0
  140. package/x/timeline/parts/compositor/parts/html-tree.d.ts +3 -0
  141. package/x/timeline/parts/compositor/parts/html-tree.js +40 -0
  142. package/x/timeline/parts/compositor/parts/html-tree.js.map +1 -0
  143. package/x/timeline/parts/compositor/parts/schedulers.d.ts +15 -0
  144. package/x/timeline/parts/compositor/parts/schedulers.js +64 -0
  145. package/x/timeline/parts/compositor/parts/schedulers.js.map +1 -0
  146. package/x/timeline/parts/compositor/parts/tree-builder.d.ts +37 -0
  147. package/x/timeline/parts/compositor/parts/tree-builder.js +147 -0
  148. package/x/timeline/parts/compositor/parts/tree-builder.js.map +1 -0
  149. package/x/timeline/parts/compositor/parts/webcodecs-tree.d.ts +3 -0
  150. package/x/timeline/parts/compositor/parts/webcodecs-tree.js +28 -0
  151. package/x/timeline/parts/compositor/parts/webcodecs-tree.js.map +1 -0
  152. package/x/timeline/parts/compositor/playback.d.ts +19 -0
  153. package/x/timeline/parts/compositor/playback.js +71 -0
  154. package/x/timeline/parts/compositor/playback.js.map +1 -0
  155. package/x/timeline/parts/compositor/samplers/html.d.ts +3 -0
  156. package/x/timeline/parts/compositor/samplers/html.js +106 -0
  157. package/x/timeline/parts/compositor/samplers/html.js.map +1 -0
  158. package/x/timeline/parts/compositor/samplers/webcodecs.d.ts +2 -0
  159. package/x/timeline/parts/compositor/samplers/webcodecs.js +55 -0
  160. package/x/timeline/parts/compositor/samplers/webcodecs.js.map +1 -0
  161. package/x/timeline/parts/item.d.ts +34 -8
  162. package/x/timeline/parts/item.js +6 -3
  163. package/x/timeline/parts/item.js.map +1 -1
  164. package/x/timeline/parts/media.d.ts +3 -0
  165. package/x/timeline/parts/media.js +17 -0
  166. package/x/timeline/parts/media.js.map +1 -1
  167. package/x/timeline/parts/waveform.js +1 -1
  168. package/x/timeline/parts/waveform.js.map +1 -1
  169. package/x/timeline/sugar/builders.d.ts +1 -0
  170. package/x/timeline/sugar/builders.js +104 -0
  171. package/x/timeline/sugar/builders.js.map +1 -0
  172. package/x/timeline/sugar/o.d.ts +23 -5
  173. package/x/timeline/sugar/o.js +93 -27
  174. package/x/timeline/sugar/o.js.map +1 -1
  175. package/x/timeline/sugar/omni-test.js +1 -1
  176. package/x/timeline/sugar/omni-test.js.map +1 -1
  177. package/x/timeline/sugar/omni.d.ts +5 -2
  178. package/x/timeline/sugar/omni.js +17 -9
  179. package/x/timeline/sugar/omni.js.map +1 -1
  180. package/x/timeline/types.d.ts +24 -0
  181. package/x/timeline/types.js +2 -0
  182. package/x/timeline/types.js.map +1 -0
  183. package/x/timeline/utils/audio-stream.d.ts +6 -0
  184. package/x/timeline/utils/audio-stream.js +17 -0
  185. package/x/timeline/utils/audio-stream.js.map +1 -0
  186. package/x/timeline/utils/checksum.js +2 -1
  187. package/x/timeline/utils/checksum.js.map +1 -1
  188. package/x/timeline/utils/matrix.d.ts +8 -0
  189. package/x/timeline/utils/matrix.js +26 -0
  190. package/x/timeline/utils/matrix.js.map +1 -0
  191. package/x/timeline/utils/video-cursor.d.ts +10 -0
  192. package/x/timeline/utils/video-cursor.js +36 -0
  193. package/x/timeline/utils/video-cursor.js.map +1 -0
  194. package/x/tools/common/loader.d.ts +19 -0
  195. package/x/tools/common/loader.js +18 -0
  196. package/x/tools/common/loader.js.map +1 -0
  197. package/x/tools/common/transformer-pipeline.d.ts +8 -0
  198. package/x/tools/common/transformer-pipeline.js +24 -0
  199. package/x/tools/common/transformer-pipeline.js.map +1 -0
  200. package/x/tools/speech-recognition/common/model.d.ts +14 -0
  201. package/x/tools/speech-recognition/common/model.js +16 -0
  202. package/x/tools/speech-recognition/common/model.js.map +1 -0
  203. package/x/tools/speech-recognition/whisper/fns/host.d.ts +13 -0
  204. package/x/tools/speech-recognition/whisper/fns/host.js +19 -0
  205. package/x/tools/speech-recognition/whisper/fns/host.js.map +1 -0
  206. package/x/tools/speech-recognition/whisper/fns/schematic.d.ts +19 -0
  207. package/x/tools/speech-recognition/whisper/fns/schematic.js +2 -0
  208. package/x/tools/speech-recognition/whisper/fns/schematic.js.map +1 -0
  209. package/x/tools/speech-recognition/whisper/fns/work.d.ts +12 -0
  210. package/x/tools/speech-recognition/whisper/fns/work.js +74 -0
  211. package/x/tools/speech-recognition/whisper/fns/work.js.map +1 -0
  212. package/x/tools/speech-recognition/whisper/parts/types.d.ts +31 -0
  213. package/x/tools/speech-recognition/whisper/parts/types.js +2 -0
  214. package/x/tools/speech-recognition/whisper/parts/types.js.map +1 -0
  215. package/x/tools/speech-recognition/whisper/parts/worker.bundle.d.ts +1 -0
  216. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js +4 -0
  217. package/x/tools/speech-recognition/whisper/parts/worker.bundle.js.map +1 -0
  218. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js +8 -0
  219. package/x/tools/speech-recognition/whisper/parts/worker.bundle.min.js.map +7 -0
  220. package/x/tools/speech-recognition/whisper/tool.d.ts +12 -0
  221. package/x/tools/speech-recognition/whisper/tool.js +63 -0
  222. package/x/tools/speech-recognition/whisper/tool.js.map +1 -0
@@ -1,51 +1,119 @@
1
1
 
2
- import {MapG} from "@e280/stz"
3
- import {Id} from "../parts/basics.js"
4
2
  import {Media} from "../parts/media.js"
3
+ import {Id, TimelineFile} from "../parts/basics.js"
5
4
  import {Effect, Item, Kind} from "../parts/item.js"
5
+ import {Transform, TransformOptions, Vec2} from "../types.js"
6
6
 
7
7
  export class O {
8
8
  #nextId = 0
9
- #items = new MapG<Id, Item.Any>()
9
+
10
+ constructor(public state: {project: TimelineFile}) {}
11
+
12
+ require<T extends Item.Any>(id: Id): T {
13
+ const item = this.state.project.items.find(item => item.id === id)
14
+ return item as T
15
+ }
10
16
 
11
17
  #getId() {
12
18
  return this.#nextId++
13
19
  }
14
20
 
15
- register(item: Item.Any) {
16
- if (!this.#items.has(item.id))
17
- this.#items.set(item.id, item)
18
- return item.id
21
+ #mutate(fn: (project: TimelineFile) => TimelineFile) {
22
+ this.state.project = fn(this.state.project)
23
+ }
24
+
25
+ spatial = (transform: Transform): Item.Spatial => {
26
+ const item: Item.Spatial = {
27
+ id: this.#getId(),
28
+ kind: Kind.Spatial,
29
+ transform
30
+ }
31
+ return item
32
+ }
33
+
34
+ sequence = (...items: Item.Any[]): Item.Any => {
35
+ const item = {
36
+ id: this.#getId(),
37
+ kind: Kind.Sequence,
38
+ childrenIds: items.map(item => item.id)
39
+ } as Item.Sequence
40
+
41
+ this.#mutate(state => {
42
+ state.items.push(item, ...items)
43
+ return state
44
+ })
45
+
46
+ return item
19
47
  }
20
48
 
21
- get items() {
22
- return [...this.#items.values()]
49
+ stack = (...items: Item.Any[]): Item.Any => {
50
+ const item = {
51
+ kind: Kind.Stack,
52
+ id: this.#getId(),
53
+ childrenIds: items.map(item => item.id)
54
+ } as Item.Stack
55
+
56
+ this.#mutate(state => {
57
+ state.items.push(item, ...items)
58
+ return state
59
+ })
60
+
61
+ return item
23
62
  }
24
63
 
25
- sequence = (...items: Item.Any[]): Item.Sequence => ({
26
- id: this.#getId(),
27
- kind: Kind.Sequence,
28
- children: items.map(item => this.register(item)),
29
- })
64
+ video = (
65
+ media: Media,
66
+ options?: {
67
+ start?: number,
68
+ duration?: number
69
+ }): Item.Video => {
30
70
 
31
- stack = (...items: Item.Any[]): Item.Stack => ({
32
- id: this.#getId(),
33
- kind: Kind.Stack,
34
- children: items.map(item => this.register(item)),
35
- })
71
+ if(!media.hasVideo)
72
+ throw new Error(`Video clip error: media "${media.datafile.filename}" has no video track.`)
36
73
 
37
- clip = (media: Media, start?: number, duration?: number): Item.Clip => ({
38
- id: this.#getId(),
39
- kind: Kind.Clip,
40
- mediaHash: media.datafile.checksum.hash,
41
- start: start ?? 0,
42
- duration: duration ?? media.duration,
43
- })
74
+ const item: Item.Video = {
75
+ kind: Kind.Video,
76
+ id: this.#getId(),
77
+ mediaHash: media.datafile.checksum.hash,
78
+ start: options?.start ?? 0,
79
+ duration: options?.duration ?? media.duration
80
+ }
81
+
82
+ return item
83
+ }
84
+
85
+ audio = (
86
+ media: Media,
87
+ options?: {
88
+ start?: number,
89
+ duration?: number
90
+ }): Item.Audio => {
91
+
92
+ if(!media.hasAudio)
93
+ throw new Error(`Audio clip error: media "${media.datafile.filename}" has no audio track.`)
94
+
95
+ const item: Item.Audio = {
96
+ kind: Kind.Audio,
97
+ id: this.#getId(),
98
+ mediaHash: media.datafile.checksum.hash,
99
+ start: options?.start ?? 0,
100
+ duration: options?.duration ?? media.duration
101
+ }
102
+
103
+ return item
104
+ }
44
105
 
45
106
  text = (content: string): Item.Text => ({
46
107
  id: this.#getId(),
47
- kind: Kind.Text,
48
108
  content,
109
+ kind: Kind.Text,
110
+ color: "#FFFFF"
111
+ })
112
+
113
+ gap = (duration: number): Item.Gap => ({
114
+ id: this.#getId(),
115
+ kind: Kind.Gap,
116
+ duration
49
117
  })
50
118
 
51
119
  transition = {
@@ -56,5 +124,27 @@ export class O {
56
124
  duration,
57
125
  }),
58
126
  }
127
+
128
+ transform = (options?: TransformOptions): Transform => {
129
+ const position: Vec2 = [
130
+ options?.position?.[0] ?? 0,
131
+ options?.position?.[1] ?? 0
132
+ ]
133
+ const scale: Vec2 = [
134
+ options?.scale?.[0] ?? 1,
135
+ options?.scale?.[1] ?? 1
136
+ ]
137
+ const rotation = options?.rotation ?? 0
138
+ return [position, scale, rotation]
139
+ }
140
+
141
+ addChildren(parent: Item.Stack | Item.Sequence, ...items: Item.Any[]) {
142
+ this.#mutate(state => {
143
+ const parentItem = state.items.find(({id}) => id === parent.id) as Item.Stack
144
+ parentItem.childrenIds.push(...items.map(item => item.id))
145
+ state.items.push(...items)
146
+ return state
147
+ })
148
+ }
59
149
  }
60
150
 
@@ -22,10 +22,10 @@ const {mediaA, mediaB} = await omni.load({
22
22
  //
23
23
 
24
24
  const timeline = omni.timeline(o => o.sequence(
25
- o.clip(mediaA),
25
+ o.video(mediaA),
26
26
  o.transition.crossfade(600),
27
27
  o.stack(
28
- o.clip(mediaB),
28
+ o.video(mediaB),
29
29
  o.text("hello world"),
30
30
  ),
31
31
  ))
@@ -2,12 +2,15 @@
2
2
  import {O} from "./o.js"
3
3
  import {Item} from "../parts/item.js"
4
4
  import {Media} from "../parts/media.js"
5
- import {TimelineFile} from "../parts/basics.js"
6
5
  import {Datafile} from "../utils/datafile.js"
6
+ import {TimelineFile} from "../parts/basics.js"
7
+ import {Export} from "../parts/compositor/export.js"
7
8
  import {ResourcePool} from "../parts/resource-pool.js"
9
+ import {RenderConfig} from "../../driver/fns/schematic.js"
8
10
 
9
11
  export class Omni {
10
12
  resources = new ResourcePool()
13
+ #export = new Export()
11
14
 
12
15
  load = async<S extends Record<string, Promise<Datafile>>>(spec: S) => {
13
16
  return Object.fromEntries(await Promise.all(Object.entries(spec).map(
@@ -15,16 +18,23 @@ export class Omni {
15
18
  ))) as {[K in keyof S]: Media}
16
19
  }
17
20
 
18
- timeline = (fn: (o: O) => Item.Sequence): TimelineFile => {
19
- const o = new O()
20
- const sequence = fn(o)
21
- return {
22
- format: "timeline",
23
- info: "https://omniclip.app/",
24
- version: 0,
25
- root: o.register(sequence),
26
- items: o.items,
27
- }
21
+ timeline = (fn: (o: O) => Item.Any): TimelineFile => {
22
+ const o = new O({
23
+ project: {
24
+ format: "timeline",
25
+ info: "https://omniclip.app/",
26
+ version: 0,
27
+ items: [],
28
+ rootId: 0
29
+ }
30
+ })
31
+ const root = fn(o)
32
+ o.state.project.rootId = root.id
33
+ return o.state.project
34
+ }
35
+
36
+ render = async (timeline: TimelineFile, config: RenderConfig) => {
37
+ await this.#export.render(timeline)
28
38
  }
29
39
  }
30
40
 
@@ -0,0 +1,29 @@
1
+ export type Interpolation = "linear" | "catmullRom"
2
+ export type Keyframe<Value = number> = [time: number, value: Value]
3
+ export type Keyframes<Value = number> = Keyframe<Value>[]
4
+ export type Vec2 = [x: number, y: number]
5
+ export type Transform = [position: Vec2, scale: Vec2, rotation: number]
6
+
7
+ export type TrackVec2 = {
8
+ x: Keyframes
9
+ y: Keyframes
10
+ }
11
+
12
+ export type Anim<T> = {
13
+ terp: Interpolation
14
+ track: T
15
+ }
16
+
17
+ export type Animations = Anim<TrackTransform>
18
+
19
+ export type TrackTransform = {
20
+ position: TrackVec2
21
+ scale: TrackVec2
22
+ rotation: Keyframes
23
+ }
24
+
25
+ export type TransformOptions = {
26
+ position?: Vec2
27
+ scale?: Vec2
28
+ rotation?: number
29
+ }
@@ -0,0 +1,15 @@
1
+ export class AudioStream {
2
+ constructor(private reader: ReadableStreamDefaultReader<AudioData>) {}
3
+
4
+ async *stream(): AsyncGenerator<AudioData> {
5
+ while (true) {
6
+ const {done, value: hit} = await this.reader.read()
7
+ if (done) {
8
+ break
9
+ }
10
+ yield hit
11
+ }
12
+ }
13
+
14
+ cancel = async () => await this.reader.cancel()
15
+ }
@@ -10,7 +10,8 @@ export class Checksum {
10
10
  ) {}
11
11
 
12
12
  static async make(data: Uint8Array) {
13
- const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data))
13
+ const data2 = new Uint8Array(data)
14
+ const bytes = new Uint8Array(await crypto.subtle.digest("SHA-256", data2))
14
15
  const hash = Hex.fromBytes(bytes)
15
16
  const nickname = Thumbprint.sigil.fromBytes(bytes)
16
17
  return new this(data, bytes, hash, nickname)
@@ -0,0 +1,33 @@
1
+ import {Matrix} from "pixi.js"
2
+ import {Transform} from "../types.js"
3
+
4
+ export const transformToMat6 = (t: Transform): Mat6 => {
5
+ const [pos, scl, rotDeg] = t
6
+ const [x, y] = pos
7
+ const [sx, sy] = scl
8
+ const r = rotDeg * Math.PI / 180
9
+ const cos = Math.cos(r)
10
+ const sin = Math.sin(r)
11
+ return [cos * sx, sin * sx, -sin * sy, cos * sy, x, y]
12
+ }
13
+
14
+ export const mat6ToMatrix = ([a, b, c, d, tx, ty]: Mat6): Matrix =>
15
+ new Matrix(a, b, c, d, tx, ty)
16
+
17
+ export const transformToMatrix = (t: Transform) => mat6ToMatrix(transformToMat6(t))
18
+
19
+ export const mul6 = (local: Mat6, parent: Mat6): Mat6 => {
20
+ const [a1, b1, c1, d1, tx1, ty1] = local
21
+ const [a2, b2, c2, d2, tx2, ty2] = parent
22
+ return [
23
+ a1 * a2 + c1 * b2,
24
+ b1 * a2 + d1 * b2,
25
+ a1 * c2 + c1 * d2,
26
+ b1 * c2 + d1 * d2,
27
+ a1 * tx2 + c1 * ty2 + tx1,
28
+ b1 * tx2 + d1 * ty2 + ty1
29
+ ]
30
+ }
31
+
32
+ export const I6: Mat6 = [1, 0, 0, 1, 0, 0]
33
+ export type Mat6 = [a: number, b: number, c: number, d: number, tx: number, ty: number]
@@ -0,0 +1,40 @@
1
+ /**
2
+ * A stateful, forward-only frame cursor for a single clip instance.
3
+ * It efficiently reads a video stream to find the frame nearest to a target timestamp.
4
+ */
5
+
6
+ export class VideoCursor {
7
+ constructor(private reader: ReadableStreamDefaultReader<VideoFrame>) {}
8
+
9
+ async atOrNear(targetUs: number): Promise<VideoFrame | undefined> {
10
+ let prev: VideoFrame | null = null
11
+ while (true) {
12
+ const {done, value: hit} = await this.reader.read()
13
+
14
+ if (done) {
15
+ const out = prev ? new VideoFrame(prev) : undefined
16
+ prev?.close()
17
+ return out
18
+ }
19
+
20
+ const hitUs = hit.timestamp ?? 0
21
+ if (hitUs >= targetUs) {
22
+ const prevUs = prev?.timestamp ?? Number.NEGATIVE_INFINITY
23
+ const usePrev = !!prev && Math.abs(prevUs - targetUs) < Math.abs(hitUs - targetUs)
24
+
25
+ const chosen = usePrev ? prev! : hit
26
+ const other = usePrev ? hit : prev
27
+
28
+ const copy = new VideoFrame(chosen)
29
+ chosen.close()
30
+ other?.close()
31
+ return copy
32
+ }
33
+
34
+ prev?.close()
35
+ prev = hit
36
+ }
37
+ }
38
+
39
+ cancel = async () => await this.reader.cancel()
40
+ }
@@ -0,0 +1,26 @@
1
+ import {pub, Pub} from "@e280/stz"
2
+ import {ProgressItem} from "../speech-recognition/whisper/parts/types.js"
3
+
4
+ export interface LoaderEvents {
5
+ onModelLoadProgress: Pub<ProgressItem[]>
6
+ onTpsUpdate: Pub<[number]>
7
+ }
8
+
9
+ export abstract class Loader {
10
+ tps = 0
11
+
12
+ static loaderEvents = {
13
+ onModelLoadProgress: pub<ProgressItem[]>(),
14
+ onTpsUpdate: pub<[number]>()
15
+ }
16
+
17
+ constructor(public readonly name: string, public model: string) {}
18
+
19
+ abstract init(): Promise<void>
20
+
21
+ abstract setModel(model: string): void
22
+
23
+ setTps(value: number) {
24
+ this.tps = value
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ //@ts-ignore
2
+ import {pipeline} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
3
+
4
+ import {ProgressCallback} from "../speech-recognition/whisper/parts/types.js"
5
+
6
+ export class PipelineFactory {
7
+ instance: any = null
8
+ model: string | null = null
9
+
10
+ constructor(public task: string) {}
11
+
12
+ async createInstance(model: string, progressCallback?: ProgressCallback) {
13
+ this.model = model
14
+ return this.instance = await pipeline(this.task, this.model, {
15
+ dtype: {
16
+ encoder_model:
17
+ this.model === "onnx-community/whisper-large-v3-turbo"
18
+ ? "fp16"
19
+ : "fp32",
20
+ decoder_model_merged: "q4",
21
+ },
22
+ device: "webgpu",
23
+ progress_callback: progressCallback,
24
+ })
25
+ }
26
+ }
@@ -0,0 +1,26 @@
1
+ import {pub} from "@e280/stz"
2
+
3
+ import {Loader} from "../../common/loader.js"
4
+ import {DecoderSource} from "../../../driver/fns/schematic.js"
5
+ import {SpeechRecognizerModels, Word, WordGroup} from "../whisper/parts/types.js"
6
+
7
+ export abstract class SpeechRecognizer extends Loader {
8
+ multilingual = true
9
+
10
+ static speechRecognizerEvents = {
11
+ onTranscriptionChunk: pub<Word[]>(),
12
+ onTranscribeProgress: pub<[number]>()
13
+ }
14
+
15
+ abstract transcribe(input: DecoderSource): Promise<WordGroup>
16
+
17
+ setMultilingual(value: boolean) {
18
+ this.multilingual = value
19
+ }
20
+
21
+ detectLanguage?(input: Blob | AudioBuffer): Promise<string>
22
+
23
+ setModel(value: SpeechRecognizerModels) {
24
+ this.model = value
25
+ }
26
+ }
@@ -0,0 +1,25 @@
1
+
2
+ import {Comrade} from "@e280/comrade"
3
+ import {ProgressItem} from "../parts/types.js"
4
+ import {SpeechRecognizerHostEvents, WhisperSchematic} from "./schematic.js"
5
+
6
+ export const setupWhisperHost = (events: SpeechRecognizerHostEvents) => (
7
+ Comrade.host<WhisperSchematic>(_shell => ({
8
+ async updateModelLoadProgress(item) {
9
+ events.onModelLoadProgress.pub(item)
10
+ },
11
+ async deliverTranscriptionChunk(chunk) {
12
+ events.onTranscriptionChunk.pub({
13
+ text: chunk.text,
14
+ timestamp: chunk.timestamp
15
+ })
16
+ },
17
+ async updateTps(value) {
18
+ events.onTpsUpdate.pub(value)
19
+ },
20
+ async updateTranscribeProgress(value) {
21
+ events.onTranscribeProgress(value)
22
+ }
23
+ }))
24
+ )
25
+
@@ -0,0 +1,23 @@
1
+ import {Pub} from "@e280/stz"
2
+ import {AsSchematic} from "@e280/comrade"
3
+
4
+ import {LoaderEvents} from "../../../common/loader.js"
5
+ import {ProgressItem, TranscriptionChunk, TranscriptionMessage, TranscriptionResult, Word} from "../parts/types.js"
6
+
7
+ export type WhisperSchematic = AsSchematic<{
8
+ work: {
9
+ transcribe(input: TranscriptionMessage): Promise<TranscriptionResult | null>
10
+ },
11
+
12
+ host: {
13
+ updateModelLoadProgress(item: ProgressItem): Promise<void>
14
+ deliverTranscriptionChunk(chunk: TranscriptionChunk): Promise<void>
15
+ updateTps(value: number): Promise<void>
16
+ updateTranscribeProgress(value: number): Promise<void>
17
+ }
18
+ }>
19
+
20
+ export interface SpeechRecognizerHostEvents extends LoaderEvents {
21
+ onTranscriptionChunk: Pub<Word[]>
22
+ onTranscribeProgress: Pub<[number]>
23
+ }
@@ -0,0 +1,91 @@
1
+ import {Comrade} from "@e280/comrade"
2
+ //@ts-ignore
3
+ import {pipeline, WhisperTextStreamer} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.0/dist/transformers.min.js"
4
+
5
+ import {WhisperSchematic} from "./schematic.js"
6
+ import {TranscriptionChunk} from "../parts/types.js"
7
+ import {PipelineFactory} from "../../../common/transformer-pipeline.js"
8
+
9
+ // TODO suspicious globals, probably bad
10
+ const pipeline = new PipelineFactory("automatic-speech-recognition")
11
+ let transcriber: any
12
+
13
+ export const setupWhisperWork = Comrade.work<WhisperSchematic>(shell => ({
14
+ async transcribe({audio, model, language, duration}) {
15
+ const isDistil = model.startsWith("distil-whisper/")
16
+
17
+ if(!pipeline.model || pipeline.model !== model) {
18
+ pipeline.instance?.dispose()?.()
19
+ pipeline.instance = null
20
+ transcriber = await pipeline.createInstance(
21
+ model,
22
+ (data) => {
23
+ if(data.progress)
24
+ shell.host.updateModelLoadProgress({
25
+ id: data.file,
26
+ progress: data.progress
27
+ })
28
+ }
29
+ )
30
+ }
31
+
32
+ const timePrecision =
33
+ transcriber.processor.feature_extractor.config.chunk_length /
34
+ transcriber.model.config.max_source_positions
35
+
36
+ const chunkLength = isDistil ? 20 : 30
37
+ const strideLength = isDistil ? 3 : 5
38
+
39
+ let chunkCount = 0
40
+ let startTime: number | null = null
41
+ let tokenCount = 0
42
+ let tps = 0
43
+
44
+ const chunkDuration = chunkLength - strideLength
45
+
46
+ const estimateProgress = () => {
47
+ const audioProgressSeconds = chunkCount * chunkDuration
48
+ return Math.min(audioProgressSeconds / duration, 1)
49
+ }
50
+
51
+ const streamer = new WhisperTextStreamer(transcriber.tokenizer, {
52
+ time_precision: timePrecision,
53
+ token_callback_function: () => {
54
+ startTime ??= performance.now()
55
+ if (++tokenCount > 1) {
56
+ tps = (tokenCount / (performance.now() - startTime)) * 1000
57
+ shell.host.updateTps(tps)
58
+ }
59
+ },
60
+ callback_function: (textChunk: any) => {
61
+ shell.host.deliverTranscriptionChunk(textChunk)
62
+ },
63
+ on_finalize: () => {
64
+ startTime = null
65
+ tokenCount = 0
66
+ chunkCount++
67
+ const progress = estimateProgress()
68
+ shell.host.updateTranscribeProgress(progress)
69
+ },
70
+ })
71
+
72
+ const output = await transcriber(audio, {
73
+ top_k: 0,
74
+ do_sample: false,
75
+ chunk_length_s: chunkLength,
76
+ stride_length_s: strideLength,
77
+ language,
78
+ task: "transcribe",
79
+ return_timestamps: "word", // if using "word" the on_chunk_start & end is not called thus we cant retrieve timestamps, only after whole thing finishes
80
+ force_full_sequences: false,
81
+ streamer,
82
+ })
83
+
84
+ if (!output) return null
85
+
86
+ return {
87
+ tps,
88
+ ...output,
89
+ }
90
+ }
91
+ }))
@@ -0,0 +1,38 @@
1
+ export interface ProgressItem {
2
+ id: string
3
+ progress: number
4
+ }
5
+
6
+ export type Word = {
7
+ text: string
8
+ timestamp: [start: number, end: number]
9
+ }
10
+
11
+ export type WordGroup = Word[]
12
+ export type Transcript = WordGroup[]
13
+
14
+ export interface TranscriptionChunk {
15
+ text: string
16
+ offset: number
17
+ timestamp: [number, number]
18
+ finalised: boolean
19
+ }
20
+
21
+ export interface TranscriptionMessage {
22
+ audio: Float32Array
23
+ model: string
24
+ subtask: string | null
25
+ language: string | null
26
+ duration: number
27
+ }
28
+
29
+ export interface TranscriptionResult {
30
+ text: string
31
+ chunks: TranscriptionChunk[]
32
+ tps: number
33
+ }
34
+
35
+ export type ProgressCallback = (data: any) => void
36
+
37
+ export type SpeechRecognizerModels = "onnx-community/whisper-tiny_timestamped"
38
+ export type SpeechRecognizerSubtasks = "transcribe"
@@ -0,0 +1,7 @@
1
+ import {Comrade} from "@e280/comrade"
2
+
3
+ import {setupWhisperWork} from "../fns/work.js"
4
+ import {WhisperSchematic} from "../fns/schematic.js"
5
+
6
+ await Comrade.worker<WhisperSchematic>(setupWhisperWork)
7
+