varg.ai-sdk 0.1.1 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +43 -10
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -169
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -437
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -105
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -145
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -125
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -136
  217. package/cli/commands/find.ts +0 -58
  218. package/cli/commands/help.ts +0 -70
  219. package/cli/commands/list.ts +0 -49
  220. package/cli/commands/run.ts +0 -237
  221. package/cli/commands/which.ts +0 -66
  222. package/cli/discover.ts +0 -66
  223. package/cli/index.ts +0 -33
  224. package/cli/runner.ts +0 -65
  225. package/cli/types.ts +0 -49
  226. package/cli/ui.ts +0 -185
  227. package/index.ts +0 -75
  228. package/lib/README.md +0 -144
  229. package/lib/ai-sdk/fal.ts +0 -106
  230. package/lib/ai-sdk/replicate.ts +0 -107
  231. package/lib/elevenlabs.ts +0 -382
  232. package/lib/fal.ts +0 -467
  233. package/lib/ffmpeg.ts +0 -467
  234. package/lib/fireworks.ts +0 -235
  235. package/lib/groq.ts +0 -246
  236. package/lib/higgsfield.ts +0 -176
  237. package/lib/remotion/SKILL.md +0 -823
  238. package/lib/remotion/cli.ts +0 -115
  239. package/lib/remotion/functions.ts +0 -283
  240. package/lib/remotion/index.ts +0 -19
  241. package/lib/remotion/templates.ts +0 -73
  242. package/lib/replicate.ts +0 -304
  243. package/output.txt +0 -1
  244. package/test-import.ts +0 -7
  245. package/test-services.ts +0 -97
  246. package/utilities/s3.ts +0 -147
@@ -0,0 +1,817 @@
1
+ import { $ } from "bun";
2
+ import { ffprobe, multipleOf2 } from "./ffmpeg";
3
+ import {
4
+ getImageOverlayFilter,
5
+ getImageOverlayPositionFilter,
6
+ getNewsTitleFilter,
7
+ getOverlayFilter,
8
+ getSlideInTextFilter,
9
+ getSubtitleFilter,
10
+ getTitleFilter,
11
+ getVideoFilterWithTrim,
12
+ processLayer,
13
+ } from "./layers";
14
+ import type {
15
+ AudioLayer,
16
+ AudioNormalizationOptions,
17
+ AudioTrack,
18
+ Clip,
19
+ DetachedAudioLayer,
20
+ EditlyConfig,
21
+ ImageOverlayLayer,
22
+ Layer,
23
+ NewsTitleLayer,
24
+ ProcessedClip,
25
+ SlideInTextLayer,
26
+ SubtitleLayer,
27
+ TitleLayer,
28
+ VideoLayer,
29
+ } from "./types";
30
+
31
+ export * from "./types";
32
+
33
+ const DEFAULT_DURATION = 4;
34
+ const DEFAULT_TRANSITION = { name: "fade", duration: 0.5 };
35
+ const DEFAULT_FPS = 30;
36
+ const DEFAULT_WIDTH = 1280;
37
+ const DEFAULT_HEIGHT = 720;
38
+
39
+ async function getVideoDuration(path: string): Promise<number> {
40
+ const info = await ffprobe(path);
41
+ return info.duration;
42
+ }
43
+
44
+ async function getFirstVideoInfo(clips: Clip[]): Promise<{
45
+ width?: number;
46
+ height?: number;
47
+ fps?: number;
48
+ }> {
49
+ for (const clip of clips) {
50
+ for (const layer of clip.layers) {
51
+ if (layer.type === "video") {
52
+ const info = await ffprobe((layer as VideoLayer).path);
53
+ return { width: info.width, height: info.height, fps: info.fps };
54
+ }
55
+ }
56
+ }
57
+ return {};
58
+ }
59
+
60
+ function applyLayerDefaults(
61
+ layer: Layer,
62
+ defaults: EditlyConfig["defaults"],
63
+ ): Layer {
64
+ if (!defaults) return layer;
65
+
66
+ const layerDefaults = defaults.layer ?? {};
67
+ const typeDefaults = defaults.layerType?.[layer.type] ?? {};
68
+
69
+ return { ...layerDefaults, ...typeDefaults, ...layer } as Layer;
70
+ }
71
+
72
+ async function processClips(
73
+ clips: Clip[],
74
+ defaults: EditlyConfig["defaults"],
75
+ ): Promise<ProcessedClip[]> {
76
+ const processed: ProcessedClip[] = [];
77
+ const defaultDuration = defaults?.duration ?? DEFAULT_DURATION;
78
+ const defaultTransition = defaults?.transition ?? DEFAULT_TRANSITION;
79
+
80
+ for (const clip of clips) {
81
+ const layers = clip.layers.map((layer) =>
82
+ applyLayerDefaults(layer, defaults),
83
+ );
84
+ let duration = clip.duration ?? defaultDuration;
85
+
86
+ for (const layer of layers) {
87
+ if (layer.type === "video" && !clip.duration) {
88
+ const videoLayer = layer as VideoLayer;
89
+ const videoDuration = await getVideoDuration(videoLayer.path);
90
+ const cutFrom = videoLayer.cutFrom ?? 0;
91
+ const cutTo = videoLayer.cutTo ?? videoDuration;
92
+ duration = cutTo - cutFrom;
93
+ break;
94
+ }
95
+ }
96
+
97
+ processed.push({
98
+ layers,
99
+ duration,
100
+ transition:
101
+ clip.transition === null
102
+ ? {
103
+ name: "none",
104
+ duration: 0,
105
+ audioOutCurve: "tri",
106
+ audioInCurve: "tri",
107
+ }
108
+ : {
109
+ name: clip.transition?.name ?? defaultTransition.name ?? "fade",
110
+ duration:
111
+ clip.transition?.duration ?? defaultTransition.duration ?? 0.5,
112
+ audioOutCurve: clip.transition?.audioOutCurve ?? "tri",
113
+ audioInCurve: clip.transition?.audioInCurve ?? "tri",
114
+ },
115
+ });
116
+ }
117
+
118
+ return processed;
119
+ }
120
+
121
+ function isVideoOverlayLayer(layer: Layer): boolean {
122
+ if (layer.type !== "video") return false;
123
+ const v = layer as VideoLayer;
124
+ return (
125
+ v.width !== undefined ||
126
+ v.height !== undefined ||
127
+ v.left !== undefined ||
128
+ v.top !== undefined
129
+ );
130
+ }
131
+
132
+ function isImageOverlayLayer(layer: Layer): boolean {
133
+ return layer.type === "image-overlay";
134
+ }
135
+
136
+ function isOverlayLayer(layer: Layer): boolean {
137
+ return isVideoOverlayLayer(layer) || isImageOverlayLayer(layer);
138
+ }
139
+
140
+ function buildBaseClipFilter(
141
+ clip: ProcessedClip,
142
+ clipIndex: number,
143
+ width: number,
144
+ height: number,
145
+ inputOffset: number,
146
+ ): {
147
+ filters: string[];
148
+ inputs: string[];
149
+ outputLabel: string;
150
+ nextInputOffset: number;
151
+ videoSources: {
152
+ inputIndex: number;
153
+ cutFrom: number;
154
+ mixVolume?: number | string;
155
+ }[];
156
+ } {
157
+ const filters: string[] = [];
158
+ const inputs: string[] = [];
159
+ const videoSources: {
160
+ inputIndex: number;
161
+ cutFrom: number;
162
+ mixVolume?: number | string;
163
+ }[] = [];
164
+ let baseLabel = "";
165
+ let inputIdx = inputOffset;
166
+
167
+ const baseLayers = clip.layers.filter((l) => l && !isOverlayLayer(l));
168
+
169
+ for (let i = 0; i < baseLayers.length; i++) {
170
+ const layer = baseLayers[i];
171
+ if (!layer) continue;
172
+
173
+ const layerFilter = processLayer(
174
+ layer,
175
+ inputIdx,
176
+ width,
177
+ height,
178
+ clip.duration,
179
+ false,
180
+ );
181
+
182
+ if (layerFilter) {
183
+ let hasFileInput = false;
184
+ for (const input of layerFilter.inputs) {
185
+ if (input.path) {
186
+ inputs.push(input.path);
187
+ hasFileInput = true;
188
+ }
189
+ }
190
+ filters.push(layerFilter.filterComplex);
191
+ baseLabel = layerFilter.outputLabel;
192
+ if (hasFileInput) {
193
+ if (layer.type === "video" && !isVideoOverlayLayer(layer)) {
194
+ const videoLayer = layer as VideoLayer;
195
+ videoSources.push({
196
+ inputIndex: inputIdx,
197
+ cutFrom: videoLayer.cutFrom ?? 0,
198
+ mixVolume: videoLayer.mixVolume,
199
+ });
200
+ }
201
+ inputIdx++;
202
+ }
203
+ }
204
+
205
+ if (layer.type === "title") {
206
+ const titleFilter = getTitleFilter(
207
+ layer as TitleLayer,
208
+ baseLabel,
209
+ width,
210
+ height,
211
+ clip.duration,
212
+ );
213
+ const newLabel = `title${clipIndex}_${i}`;
214
+ filters.push(`${titleFilter}[${newLabel}]`);
215
+ baseLabel = newLabel;
216
+ }
217
+
218
+ if (layer.type === "subtitle") {
219
+ const subtitleFilter = getSubtitleFilter(
220
+ layer as SubtitleLayer,
221
+ baseLabel,
222
+ width,
223
+ height,
224
+ clip.duration,
225
+ );
226
+ const newLabel = `sub${clipIndex}_${i}`;
227
+ filters.push(`${subtitleFilter}[${newLabel}]`);
228
+ baseLabel = newLabel;
229
+ }
230
+
231
+ if (layer.type === "news-title") {
232
+ const newsFilter = getNewsTitleFilter(
233
+ layer as NewsTitleLayer,
234
+ baseLabel,
235
+ width,
236
+ height,
237
+ clip.duration,
238
+ );
239
+ const newLabel = `news${clipIndex}_${i}`;
240
+ filters.push(`${newsFilter}[${newLabel}]`);
241
+ baseLabel = newLabel;
242
+ }
243
+
244
+ if (layer.type === "slide-in-text") {
245
+ const slideFilter = getSlideInTextFilter(
246
+ layer as SlideInTextLayer,
247
+ baseLabel,
248
+ width,
249
+ height,
250
+ clip.duration,
251
+ );
252
+ const newLabel = `slide${clipIndex}_${i}`;
253
+ filters.push(`${slideFilter}[${newLabel}]`);
254
+ baseLabel = newLabel;
255
+ }
256
+ }
257
+
258
+ return {
259
+ filters,
260
+ inputs,
261
+ outputLabel: baseLabel,
262
+ nextInputOffset: inputIdx,
263
+ videoSources,
264
+ };
265
+ }
266
+
267
+ function collectContinuousVideoOverlays(
268
+ clips: ProcessedClip[],
269
+ ): Map<string, { layer: VideoLayer; totalDuration: number }> {
270
+ const overlays = new Map<
271
+ string,
272
+ { layer: VideoLayer; totalDuration: number }
273
+ >();
274
+
275
+ for (const clip of clips) {
276
+ for (const layer of clip.layers) {
277
+ if (layer && isVideoOverlayLayer(layer)) {
278
+ const videoLayer = layer as VideoLayer;
279
+ const existing = overlays.get(videoLayer.path);
280
+ if (existing) {
281
+ existing.totalDuration += clip.duration;
282
+ } else {
283
+ overlays.set(videoLayer.path, {
284
+ layer: videoLayer,
285
+ totalDuration: clip.duration,
286
+ });
287
+ }
288
+ }
289
+ }
290
+ }
291
+
292
+ return overlays;
293
+ }
294
+
295
+ function collectImageOverlays(
296
+ clips: ProcessedClip[],
297
+ ): Map<string, { layer: ImageOverlayLayer; totalDuration: number }> {
298
+ const overlays = new Map<
299
+ string,
300
+ { layer: ImageOverlayLayer; totalDuration: number }
301
+ >();
302
+
303
+ for (const clip of clips) {
304
+ for (const layer of clip.layers) {
305
+ if (layer && isImageOverlayLayer(layer)) {
306
+ const imgLayer = layer as ImageOverlayLayer;
307
+ const existing = overlays.get(imgLayer.path);
308
+ if (existing) {
309
+ existing.totalDuration += clip.duration;
310
+ } else {
311
+ overlays.set(imgLayer.path, {
312
+ layer: imgLayer,
313
+ totalDuration: clip.duration,
314
+ });
315
+ }
316
+ }
317
+ }
318
+ }
319
+
320
+ return overlays;
321
+ }
322
+
323
+ function collectAudioLayers(
324
+ clips: ProcessedClip[],
325
+ ): { layer: AudioLayer | DetachedAudioLayer; clipStartTime: number }[] {
326
+ const audioLayers: {
327
+ layer: AudioLayer | DetachedAudioLayer;
328
+ clipStartTime: number;
329
+ }[] = [];
330
+ let currentTime = 0;
331
+
332
+ for (let i = 0; i < clips.length; i++) {
333
+ const clip = clips[i];
334
+ if (!clip) continue;
335
+
336
+ for (const layer of clip.layers) {
337
+ if (layer && layer.type === "audio") {
338
+ audioLayers.push({
339
+ layer: layer as AudioLayer,
340
+ clipStartTime: currentTime,
341
+ });
342
+ }
343
+ if (layer && layer.type === "detached-audio") {
344
+ const detached = layer as DetachedAudioLayer;
345
+ audioLayers.push({
346
+ layer: detached,
347
+ clipStartTime: currentTime + (detached.start ?? 0),
348
+ });
349
+ }
350
+ }
351
+
352
+ currentTime += clip.duration;
353
+ if (i < clips.length - 1) {
354
+ currentTime -= clip.transition.duration;
355
+ }
356
+ }
357
+
358
+ return audioLayers;
359
+ }
360
+
361
+ function buildTransitionFilter(
362
+ fromLabel: string,
363
+ toLabel: string,
364
+ transitionName: string,
365
+ transitionDuration: number,
366
+ offset: number,
367
+ outputLabel: string,
368
+ ): string {
369
+ if (transitionName === "none" || transitionDuration <= 0) {
370
+ return `[${fromLabel}][${toLabel}]concat=n=2:v=1:a=0,settb=1/30[${outputLabel}]`;
371
+ }
372
+
373
+ // settb=1/30 ensures consistent timebase for chained xfades
374
+ return `[${fromLabel}][${toLabel}]xfade=transition=${transitionName}:duration=${transitionDuration}:offset=${offset},settb=1/30[${outputLabel}]`;
375
+ }
376
+
377
+ interface VideoSourceAudio {
378
+ inputIndex: number;
379
+ startTime: number;
380
+ duration: number;
381
+ cutFrom: number;
382
+ mixVolume?: number | string;
383
+ fadeOutDuration?: number;
384
+ fadeOutCurve?: string;
385
+ fadeInDuration?: number;
386
+ fadeInCurve?: string;
387
+ }
388
+
389
+ function buildAudioFilter(
390
+ videoInputCount: number,
391
+ audioTracks: AudioTrack[],
392
+ clipAudioLayers: {
393
+ layer: AudioLayer | DetachedAudioLayer;
394
+ clipStartTime: number;
395
+ }[],
396
+ totalDuration: number,
397
+ audioFilePath?: string,
398
+ loopAudio?: boolean,
399
+ keepSourceAudio?: boolean,
400
+ outputVolume?: number | string,
401
+ videoSourceAudio?: VideoSourceAudio[],
402
+ clipsAudioVolume?: number | string,
403
+ audioNorm?: AudioNormalizationOptions,
404
+ ): { inputs: string[]; filter: string; outputLabel: string } | null {
405
+ const audioInputs: string[] = [];
406
+ const filterParts: string[] = [];
407
+ const mixLabels: string[] = [];
408
+ let inputIdx = videoInputCount;
409
+
410
+ if (videoSourceAudio && videoSourceAudio.length > 0) {
411
+ for (let i = 0; i < videoSourceAudio.length; i++) {
412
+ const src = videoSourceAudio[i]!;
413
+ const { inputIndex, startTime, duration, cutFrom, mixVolume } = src;
414
+
415
+ const shouldInclude =
416
+ keepSourceAudio || (mixVolume !== undefined && mixVolume !== 0);
417
+ if (!shouldInclude) continue;
418
+
419
+ const label = `vsrc${i}`;
420
+ let audioFilter = `[${inputIndex}:a]`;
421
+ audioFilter += `atrim=${cutFrom}:${cutFrom + duration},asetpts=PTS-STARTPTS,`;
422
+
423
+ const volume = mixVolume ?? clipsAudioVolume;
424
+ if (volume !== undefined) {
425
+ audioFilter += `volume=${volume},`;
426
+ }
427
+ if (src.fadeInDuration) {
428
+ audioFilter += `afade=t=in:st=0:d=${src.fadeInDuration}:curve=${src.fadeInCurve ?? "tri"},`;
429
+ }
430
+ if (src.fadeOutDuration) {
431
+ const fadeOutStart = duration - src.fadeOutDuration;
432
+ audioFilter += `afade=t=out:st=${fadeOutStart}:d=${src.fadeOutDuration}:curve=${src.fadeOutCurve ?? "tri"},`;
433
+ }
434
+ audioFilter += `adelay=${Math.round(startTime * 1000)}|${Math.round(startTime * 1000)}`;
435
+ audioFilter += `[${label}]`;
436
+ filterParts.push(audioFilter);
437
+ mixLabels.push(label);
438
+ }
439
+ }
440
+
441
+ if (audioFilePath) {
442
+ audioInputs.push(audioFilePath);
443
+ const label = `abg${inputIdx}`;
444
+ if (loopAudio) {
445
+ filterParts.push(
446
+ `[${inputIdx}:a]aloop=loop=-1:size=2e9,atrim=0:${totalDuration}[${label}]`,
447
+ );
448
+ } else {
449
+ filterParts.push(`[${inputIdx}:a]anull[${label}]`);
450
+ }
451
+ mixLabels.push(label);
452
+ inputIdx++;
453
+ }
454
+
455
+ for (let i = 0; i < audioTracks.length; i++) {
456
+ const track = audioTracks[i]!;
457
+ audioInputs.push(track.path);
458
+ const label = `atrk${i}`;
459
+
460
+ let audioFilter = `[${inputIdx}:a]`;
461
+ if (track.cutFrom !== undefined || track.cutTo !== undefined) {
462
+ const start = track.cutFrom ?? 0;
463
+ const end = track.cutTo ?? 999999;
464
+ audioFilter += `atrim=start=${start}:end=${end},asetpts=PTS-STARTPTS,`;
465
+ }
466
+ if (track.mixVolume !== undefined) {
467
+ audioFilter += `volume=${track.mixVolume},`;
468
+ }
469
+ const startMs = Math.round((track.start ?? 0) * 1000);
470
+ audioFilter += `adelay=${startMs}|${startMs}`;
471
+ audioFilter += `[${label}]`;
472
+
473
+ filterParts.push(audioFilter);
474
+ mixLabels.push(label);
475
+ inputIdx++;
476
+ }
477
+
478
+ for (let i = 0; i < clipAudioLayers.length; i++) {
479
+ const { layer, clipStartTime } = clipAudioLayers[i]!;
480
+ audioInputs.push(layer.path);
481
+ const label = `aclip${i}`;
482
+
483
+ let audioFilter = `[${inputIdx}:a]`;
484
+ if (layer.cutFrom !== undefined || layer.cutTo !== undefined) {
485
+ const start = layer.cutFrom ?? 0;
486
+ const end = layer.cutTo ?? 999999;
487
+ audioFilter += `atrim=start=${start}:end=${end},asetpts=PTS-STARTPTS,`;
488
+ }
489
+ if (layer.mixVolume !== undefined) {
490
+ audioFilter += `volume=${layer.mixVolume},`;
491
+ }
492
+ audioFilter += `adelay=${Math.round(clipStartTime * 1000)}|${Math.round(clipStartTime * 1000)}`;
493
+ audioFilter += `[${label}]`;
494
+
495
+ filterParts.push(audioFilter);
496
+ mixLabels.push(label);
497
+ inputIdx++;
498
+ }
499
+
500
+ if (mixLabels.length === 0) {
501
+ return null;
502
+ }
503
+
504
+ let postFilters = "";
505
+ if (audioNorm?.enable !== false && audioNorm) {
506
+ const gaussSize = audioNorm.gaussSize ?? 5;
507
+ const maxGain = audioNorm.maxGain ?? 25;
508
+ postFilters += `,dynaudnorm=g=${gaussSize}:maxgain=${maxGain}`;
509
+ }
510
+ if (outputVolume) {
511
+ postFilters += `,volume=${outputVolume}`;
512
+ }
513
+
514
+ if (mixLabels.length === 1) {
515
+ return {
516
+ inputs: audioInputs,
517
+ filter: `${filterParts.join(";")};[${mixLabels[0]}]anull${postFilters}[aout]`,
518
+ outputLabel: "aout",
519
+ };
520
+ }
521
+
522
+ const mixInputs = mixLabels.map((l) => `[${l}]`).join("");
523
+ return {
524
+ inputs: audioInputs,
525
+ filter: `${filterParts.join(";")};${mixInputs}amix=inputs=${mixLabels.length}:normalize=0${postFilters}[aout]`,
526
+ outputLabel: "aout",
527
+ };
528
+ }
529
+
530
+ export async function editly(config: EditlyConfig): Promise<void> {
531
+ const {
532
+ outPath,
533
+ clips: clipsIn,
534
+ defaults,
535
+ audioFilePath,
536
+ audioTracks = [],
537
+ loopAudio,
538
+ keepSourceAudio,
539
+ clipsAudioVolume,
540
+ outputVolume,
541
+ audioNorm,
542
+ customOutputArgs,
543
+ verbose,
544
+ fast,
545
+ } = config;
546
+
547
+ if (!clipsIn || clipsIn.length === 0) {
548
+ throw new Error("At least one clip is required");
549
+ }
550
+
551
+ const firstVideoInfo = await getFirstVideoInfo(clipsIn);
552
+ let width = config.width ?? firstVideoInfo.width ?? DEFAULT_WIDTH;
553
+ let height = config.height ?? firstVideoInfo.height ?? DEFAULT_HEIGHT;
554
+ const fps = config.fps ?? firstVideoInfo.fps ?? DEFAULT_FPS;
555
+
556
+ width = multipleOf2(width);
557
+ height = multipleOf2(height);
558
+
559
+ if (fast) {
560
+ const aspectRatio = width / height;
561
+ width = multipleOf2(Math.round(320 * Math.sqrt(aspectRatio)));
562
+ height = multipleOf2(Math.round(320 * Math.sqrt(1 / aspectRatio)));
563
+ }
564
+
565
+ if (verbose) {
566
+ console.log(`Output: ${width}x${height} @ ${fps}fps`);
567
+ }
568
+
569
+ const clips = await processClips(clipsIn, defaults);
570
+
571
+ const continuousVideoOverlays = collectContinuousVideoOverlays(clips);
572
+ const imageOverlays = collectImageOverlays(clips);
573
+ const overlayInputs: string[] = [];
574
+ const videoOverlayInputMap = new Map<string, number>();
575
+ const imageOverlayInputMap = new Map<string, number>();
576
+
577
+ for (const [path] of continuousVideoOverlays) {
578
+ videoOverlayInputMap.set(path, overlayInputs.length);
579
+ overlayInputs.push(path);
580
+ }
581
+
582
+ for (const [path] of imageOverlays) {
583
+ imageOverlayInputMap.set(path, overlayInputs.length);
584
+ overlayInputs.push(path);
585
+ }
586
+
587
+ const allFilters: string[] = [];
588
+ const allInputs: string[] = [...overlayInputs];
589
+ const clipOutputLabels: string[] = [];
590
+ const videoSourceAudio: VideoSourceAudio[] = [];
591
+ let inputOffset = overlayInputs.length;
592
+ let currentClipTime = 0;
593
+
594
+ for (const [i, clip] of clips.entries()) {
595
+ const result = buildBaseClipFilter(clip, i, width, height, inputOffset);
596
+
597
+ allFilters.push(...result.filters);
598
+ allInputs.push(...result.inputs);
599
+ clipOutputLabels.push(result.outputLabel);
600
+
601
+ for (const { inputIndex, cutFrom, mixVolume } of result.videoSources) {
602
+ const prevClip = i > 0 ? clips[i - 1] : null;
603
+ const fadeInDuration = prevClip ? prevClip.transition.duration : 0;
604
+ const fadeInCurve = prevClip?.transition.audioInCurve ?? "tri";
605
+ const fadeOutDuration = clip.transition.duration;
606
+ const fadeOutCurve = clip.transition.audioOutCurve ?? "tri";
607
+
608
+ videoSourceAudio.push({
609
+ inputIndex,
610
+ startTime: currentClipTime,
611
+ duration: clip.duration,
612
+ cutFrom,
613
+ mixVolume,
614
+ fadeInDuration: fadeInDuration > 0 ? fadeInDuration : undefined,
615
+ fadeInCurve,
616
+ fadeOutDuration: fadeOutDuration > 0 ? fadeOutDuration : undefined,
617
+ fadeOutCurve,
618
+ });
619
+ }
620
+
621
+ inputOffset = result.nextInputOffset;
622
+ currentClipTime += clip.duration;
623
+ if (i < clips.length - 1) {
624
+ currentClipTime -= clip.transition.duration;
625
+ }
626
+ }
627
+
628
+ let finalVideoLabel = clipOutputLabels[0] ?? "v0";
629
+
630
+ if (clipOutputLabels.length > 1) {
631
+ let currentLabel = clipOutputLabels[0] ?? "v0";
632
+ let accumulatedDuration = clips[0]?.duration ?? 0;
633
+
634
+ for (let i = 0; i < clips.length - 1; i++) {
635
+ const nextLabel = clipOutputLabels[i + 1] ?? `v${i + 1}`;
636
+ const clip = clips[i];
637
+ const nextClip = clips[i + 1];
638
+ if (!clip) continue;
639
+ const transition = clip.transition;
640
+ const outputLabel = i === clips.length - 2 ? "vfinal" : `vmix${i}`;
641
+
642
+ const offset = Math.max(0, accumulatedDuration - transition.duration);
643
+
644
+ allFilters.push(
645
+ buildTransitionFilter(
646
+ currentLabel,
647
+ nextLabel,
648
+ transition.name,
649
+ transition.duration,
650
+ offset,
651
+ outputLabel,
652
+ ),
653
+ );
654
+
655
+ accumulatedDuration = offset + (nextClip?.duration ?? 0);
656
+ currentLabel = outputLabel;
657
+ }
658
+
659
+ finalVideoLabel = "vfinal";
660
+ }
661
+
662
+ let totalDuration = 0;
663
+ for (const clip of clips) {
664
+ totalDuration += clip.duration;
665
+ }
666
+ for (let i = 0; i < clips.length - 1; i++) {
667
+ const clip = clips[i];
668
+ if (clip) {
669
+ totalDuration -= clip.transition.duration;
670
+ }
671
+ }
672
+
673
+ if (continuousVideoOverlays.size > 0) {
674
+ let currentBase = finalVideoLabel;
675
+ let overlayIdx = 0;
676
+
677
+ for (const [path, { layer }] of continuousVideoOverlays) {
678
+ const inputIndex = videoOverlayInputMap.get(path);
679
+ if (inputIndex === undefined) continue;
680
+
681
+ const trimmedLabel = `ovfinal${overlayIdx}`;
682
+ const layerFilter = getVideoFilterWithTrim(
683
+ layer,
684
+ inputIndex,
685
+ width,
686
+ height,
687
+ 0,
688
+ totalDuration,
689
+ trimmedLabel,
690
+ true,
691
+ );
692
+ allFilters.push(layerFilter.filterComplex);
693
+
694
+ const outputLabel = `vwithov${overlayIdx}`;
695
+ const overlayFilter = getOverlayFilter(
696
+ currentBase,
697
+ trimmedLabel,
698
+ layer,
699
+ width,
700
+ height,
701
+ outputLabel,
702
+ );
703
+ allFilters.push(overlayFilter);
704
+
705
+ currentBase = outputLabel;
706
+ overlayIdx++;
707
+ }
708
+
709
+ finalVideoLabel = currentBase;
710
+ }
711
+
712
+ if (imageOverlays.size > 0) {
713
+ let currentBase = finalVideoLabel;
714
+ let imgOverlayIdx = 0;
715
+
716
+ for (const [path, { layer }] of imageOverlays) {
717
+ const inputIndex = imageOverlayInputMap.get(path);
718
+ if (inputIndex === undefined) continue;
719
+
720
+ const imgFilter = getImageOverlayFilter(
721
+ layer,
722
+ inputIndex,
723
+ width,
724
+ height,
725
+ totalDuration,
726
+ );
727
+ allFilters.push(imgFilter.filterComplex);
728
+
729
+ const outputLabel = `vwithimgov${imgOverlayIdx}`;
730
+ const positionFilter = getImageOverlayPositionFilter(
731
+ currentBase,
732
+ imgFilter.outputLabel,
733
+ layer,
734
+ width,
735
+ height,
736
+ outputLabel,
737
+ );
738
+ allFilters.push(positionFilter);
739
+
740
+ currentBase = outputLabel;
741
+ imgOverlayIdx++;
742
+ }
743
+
744
+ finalVideoLabel = currentBase;
745
+ }
746
+
747
+ const clipAudioLayers = collectAudioLayers(clips);
748
+ const videoInputCount = allInputs.length;
749
+ const audioFilter = buildAudioFilter(
750
+ videoInputCount,
751
+ audioTracks,
752
+ clipAudioLayers,
753
+ totalDuration,
754
+ audioFilePath,
755
+ loopAudio,
756
+ keepSourceAudio,
757
+ outputVolume,
758
+ videoSourceAudio,
759
+ clipsAudioVolume,
760
+ audioNorm,
761
+ );
762
+
763
+ if (audioFilter) {
764
+ allInputs.push(...audioFilter.inputs);
765
+ allFilters.push(audioFilter.filter);
766
+ }
767
+
768
+ const inputArgs = allInputs.flatMap((input) => ["-i", input]);
769
+ const filterComplex = allFilters.join(";");
770
+
771
+ const outputArgs = customOutputArgs ?? [
772
+ "-c:v",
773
+ "libx264",
774
+ "-preset",
775
+ fast ? "ultrafast" : "medium",
776
+ "-crf",
777
+ "18",
778
+ "-pix_fmt",
779
+ "yuv420p",
780
+ "-movflags",
781
+ "+faststart",
782
+ ];
783
+
784
+ const mapArgs = audioFilter
785
+ ? ["-map", `[${finalVideoLabel}]`, "-map", `[${audioFilter.outputLabel}]`]
786
+ : ["-map", `[${finalVideoLabel}]`];
787
+
788
+ const ffmpegArgs = [
789
+ "-hide_banner",
790
+ "-loglevel",
791
+ verbose ? "info" : "error",
792
+ ...inputArgs,
793
+ "-filter_complex",
794
+ filterComplex,
795
+ ...mapArgs,
796
+ "-r",
797
+ String(fps),
798
+ ...outputArgs,
799
+ "-y",
800
+ outPath,
801
+ ];
802
+
803
+ if (verbose) {
804
+ console.log("ffmpeg", ffmpegArgs.join(" "));
805
+ console.log("\nFilter complex:\n", filterComplex.split(";").join(";\n"));
806
+ }
807
+
808
+ const result = await $`ffmpeg ${ffmpegArgs}`.quiet();
809
+
810
+ if (result.exitCode !== 0) {
811
+ throw new Error(`ffmpeg failed with exit code ${result.exitCode}`);
812
+ }
813
+
814
+ console.log(`Output: ${outPath}`);
815
+ }
816
+
817
+ export default editly;