varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -0,0 +1,817 @@
1
+ import { $ } from "bun";
2
+ import { ffprobe, multipleOf2 } from "./ffmpeg";
3
+ import {
4
+ getImageOverlayFilter,
5
+ getImageOverlayPositionFilter,
6
+ getNewsTitleFilter,
7
+ getOverlayFilter,
8
+ getSlideInTextFilter,
9
+ getSubtitleFilter,
10
+ getTitleFilter,
11
+ getVideoFilterWithTrim,
12
+ processLayer,
13
+ } from "./layers";
14
+ import type {
15
+ AudioLayer,
16
+ AudioNormalizationOptions,
17
+ AudioTrack,
18
+ Clip,
19
+ DetachedAudioLayer,
20
+ EditlyConfig,
21
+ ImageOverlayLayer,
22
+ Layer,
23
+ NewsTitleLayer,
24
+ ProcessedClip,
25
+ SlideInTextLayer,
26
+ SubtitleLayer,
27
+ TitleLayer,
28
+ VideoLayer,
29
+ } from "./types";
30
+
31
+ export * from "./types";
32
+
33
+ const DEFAULT_DURATION = 4;
34
+ const DEFAULT_TRANSITION = { name: "fade", duration: 0.5 };
35
+ const DEFAULT_FPS = 30;
36
+ const DEFAULT_WIDTH = 1280;
37
+ const DEFAULT_HEIGHT = 720;
38
+
39
+ async function getVideoDuration(path: string): Promise<number> {
40
+ const info = await ffprobe(path);
41
+ return info.duration;
42
+ }
43
+
44
+ async function getFirstVideoInfo(clips: Clip[]): Promise<{
45
+ width?: number;
46
+ height?: number;
47
+ fps?: number;
48
+ }> {
49
+ for (const clip of clips) {
50
+ for (const layer of clip.layers) {
51
+ if (layer.type === "video") {
52
+ const info = await ffprobe((layer as VideoLayer).path);
53
+ return { width: info.width, height: info.height, fps: info.fps };
54
+ }
55
+ }
56
+ }
57
+ return {};
58
+ }
59
+
60
+ function applyLayerDefaults(
61
+ layer: Layer,
62
+ defaults: EditlyConfig["defaults"],
63
+ ): Layer {
64
+ if (!defaults) return layer;
65
+
66
+ const layerDefaults = defaults.layer ?? {};
67
+ const typeDefaults = defaults.layerType?.[layer.type] ?? {};
68
+
69
+ return { ...layerDefaults, ...typeDefaults, ...layer } as Layer;
70
+ }
71
+
72
+ async function processClips(
73
+ clips: Clip[],
74
+ defaults: EditlyConfig["defaults"],
75
+ ): Promise<ProcessedClip[]> {
76
+ const processed: ProcessedClip[] = [];
77
+ const defaultDuration = defaults?.duration ?? DEFAULT_DURATION;
78
+ const defaultTransition = defaults?.transition ?? DEFAULT_TRANSITION;
79
+
80
+ for (const clip of clips) {
81
+ const layers = clip.layers.map((layer) =>
82
+ applyLayerDefaults(layer, defaults),
83
+ );
84
+ let duration = clip.duration ?? defaultDuration;
85
+
86
+ for (const layer of layers) {
87
+ if (layer.type === "video" && !clip.duration) {
88
+ const videoLayer = layer as VideoLayer;
89
+ const videoDuration = await getVideoDuration(videoLayer.path);
90
+ const cutFrom = videoLayer.cutFrom ?? 0;
91
+ const cutTo = videoLayer.cutTo ?? videoDuration;
92
+ duration = cutTo - cutFrom;
93
+ break;
94
+ }
95
+ }
96
+
97
+ processed.push({
98
+ layers,
99
+ duration,
100
+ transition:
101
+ clip.transition === null
102
+ ? {
103
+ name: "none",
104
+ duration: 0,
105
+ audioOutCurve: "tri",
106
+ audioInCurve: "tri",
107
+ }
108
+ : {
109
+ name: clip.transition?.name ?? defaultTransition.name ?? "fade",
110
+ duration:
111
+ clip.transition?.duration ?? defaultTransition.duration ?? 0.5,
112
+ audioOutCurve: clip.transition?.audioOutCurve ?? "tri",
113
+ audioInCurve: clip.transition?.audioInCurve ?? "tri",
114
+ },
115
+ });
116
+ }
117
+
118
+ return processed;
119
+ }
120
+
121
+ function isVideoOverlayLayer(layer: Layer): boolean {
122
+ if (layer.type !== "video") return false;
123
+ const v = layer as VideoLayer;
124
+ return (
125
+ v.width !== undefined ||
126
+ v.height !== undefined ||
127
+ v.left !== undefined ||
128
+ v.top !== undefined
129
+ );
130
+ }
131
+
132
+ function isImageOverlayLayer(layer: Layer): boolean {
133
+ return layer.type === "image-overlay";
134
+ }
135
+
136
+ function isOverlayLayer(layer: Layer): boolean {
137
+ return isVideoOverlayLayer(layer) || isImageOverlayLayer(layer);
138
+ }
139
+
140
+ function buildBaseClipFilter(
141
+ clip: ProcessedClip,
142
+ clipIndex: number,
143
+ width: number,
144
+ height: number,
145
+ inputOffset: number,
146
+ ): {
147
+ filters: string[];
148
+ inputs: string[];
149
+ outputLabel: string;
150
+ nextInputOffset: number;
151
+ videoSources: {
152
+ inputIndex: number;
153
+ cutFrom: number;
154
+ mixVolume?: number | string;
155
+ }[];
156
+ } {
157
+ const filters: string[] = [];
158
+ const inputs: string[] = [];
159
+ const videoSources: {
160
+ inputIndex: number;
161
+ cutFrom: number;
162
+ mixVolume?: number | string;
163
+ }[] = [];
164
+ let baseLabel = "";
165
+ let inputIdx = inputOffset;
166
+
167
+ const baseLayers = clip.layers.filter((l) => l && !isOverlayLayer(l));
168
+
169
+ for (let i = 0; i < baseLayers.length; i++) {
170
+ const layer = baseLayers[i];
171
+ if (!layer) continue;
172
+
173
+ const layerFilter = processLayer(
174
+ layer,
175
+ inputIdx,
176
+ width,
177
+ height,
178
+ clip.duration,
179
+ false,
180
+ );
181
+
182
+ if (layerFilter) {
183
+ let hasFileInput = false;
184
+ for (const input of layerFilter.inputs) {
185
+ if (input.path) {
186
+ inputs.push(input.path);
187
+ hasFileInput = true;
188
+ }
189
+ }
190
+ filters.push(layerFilter.filterComplex);
191
+ baseLabel = layerFilter.outputLabel;
192
+ if (hasFileInput) {
193
+ if (layer.type === "video" && !isVideoOverlayLayer(layer)) {
194
+ const videoLayer = layer as VideoLayer;
195
+ videoSources.push({
196
+ inputIndex: inputIdx,
197
+ cutFrom: videoLayer.cutFrom ?? 0,
198
+ mixVolume: videoLayer.mixVolume,
199
+ });
200
+ }
201
+ inputIdx++;
202
+ }
203
+ }
204
+
205
+ if (layer.type === "title") {
206
+ const titleFilter = getTitleFilter(
207
+ layer as TitleLayer,
208
+ baseLabel,
209
+ width,
210
+ height,
211
+ clip.duration,
212
+ );
213
+ const newLabel = `title${clipIndex}_${i}`;
214
+ filters.push(`${titleFilter}[${newLabel}]`);
215
+ baseLabel = newLabel;
216
+ }
217
+
218
+ if (layer.type === "subtitle") {
219
+ const subtitleFilter = getSubtitleFilter(
220
+ layer as SubtitleLayer,
221
+ baseLabel,
222
+ width,
223
+ height,
224
+ clip.duration,
225
+ );
226
+ const newLabel = `sub${clipIndex}_${i}`;
227
+ filters.push(`${subtitleFilter}[${newLabel}]`);
228
+ baseLabel = newLabel;
229
+ }
230
+
231
+ if (layer.type === "news-title") {
232
+ const newsFilter = getNewsTitleFilter(
233
+ layer as NewsTitleLayer,
234
+ baseLabel,
235
+ width,
236
+ height,
237
+ clip.duration,
238
+ );
239
+ const newLabel = `news${clipIndex}_${i}`;
240
+ filters.push(`${newsFilter}[${newLabel}]`);
241
+ baseLabel = newLabel;
242
+ }
243
+
244
+ if (layer.type === "slide-in-text") {
245
+ const slideFilter = getSlideInTextFilter(
246
+ layer as SlideInTextLayer,
247
+ baseLabel,
248
+ width,
249
+ height,
250
+ clip.duration,
251
+ );
252
+ const newLabel = `slide${clipIndex}_${i}`;
253
+ filters.push(`${slideFilter}[${newLabel}]`);
254
+ baseLabel = newLabel;
255
+ }
256
+ }
257
+
258
+ return {
259
+ filters,
260
+ inputs,
261
+ outputLabel: baseLabel,
262
+ nextInputOffset: inputIdx,
263
+ videoSources,
264
+ };
265
+ }
266
+
267
+ function collectContinuousVideoOverlays(
268
+ clips: ProcessedClip[],
269
+ ): Map<string, { layer: VideoLayer; totalDuration: number }> {
270
+ const overlays = new Map<
271
+ string,
272
+ { layer: VideoLayer; totalDuration: number }
273
+ >();
274
+
275
+ for (const clip of clips) {
276
+ for (const layer of clip.layers) {
277
+ if (layer && isVideoOverlayLayer(layer)) {
278
+ const videoLayer = layer as VideoLayer;
279
+ const existing = overlays.get(videoLayer.path);
280
+ if (existing) {
281
+ existing.totalDuration += clip.duration;
282
+ } else {
283
+ overlays.set(videoLayer.path, {
284
+ layer: videoLayer,
285
+ totalDuration: clip.duration,
286
+ });
287
+ }
288
+ }
289
+ }
290
+ }
291
+
292
+ return overlays;
293
+ }
294
+
295
+ function collectImageOverlays(
296
+ clips: ProcessedClip[],
297
+ ): Map<string, { layer: ImageOverlayLayer; totalDuration: number }> {
298
+ const overlays = new Map<
299
+ string,
300
+ { layer: ImageOverlayLayer; totalDuration: number }
301
+ >();
302
+
303
+ for (const clip of clips) {
304
+ for (const layer of clip.layers) {
305
+ if (layer && isImageOverlayLayer(layer)) {
306
+ const imgLayer = layer as ImageOverlayLayer;
307
+ const existing = overlays.get(imgLayer.path);
308
+ if (existing) {
309
+ existing.totalDuration += clip.duration;
310
+ } else {
311
+ overlays.set(imgLayer.path, {
312
+ layer: imgLayer,
313
+ totalDuration: clip.duration,
314
+ });
315
+ }
316
+ }
317
+ }
318
+ }
319
+
320
+ return overlays;
321
+ }
322
+
323
+ function collectAudioLayers(
324
+ clips: ProcessedClip[],
325
+ ): { layer: AudioLayer | DetachedAudioLayer; clipStartTime: number }[] {
326
+ const audioLayers: {
327
+ layer: AudioLayer | DetachedAudioLayer;
328
+ clipStartTime: number;
329
+ }[] = [];
330
+ let currentTime = 0;
331
+
332
+ for (let i = 0; i < clips.length; i++) {
333
+ const clip = clips[i];
334
+ if (!clip) continue;
335
+
336
+ for (const layer of clip.layers) {
337
+ if (layer && layer.type === "audio") {
338
+ audioLayers.push({
339
+ layer: layer as AudioLayer,
340
+ clipStartTime: currentTime,
341
+ });
342
+ }
343
+ if (layer && layer.type === "detached-audio") {
344
+ const detached = layer as DetachedAudioLayer;
345
+ audioLayers.push({
346
+ layer: detached,
347
+ clipStartTime: currentTime + (detached.start ?? 0),
348
+ });
349
+ }
350
+ }
351
+
352
+ currentTime += clip.duration;
353
+ if (i < clips.length - 1) {
354
+ currentTime -= clip.transition.duration;
355
+ }
356
+ }
357
+
358
+ return audioLayers;
359
+ }
360
+
361
+ function buildTransitionFilter(
362
+ fromLabel: string,
363
+ toLabel: string,
364
+ transitionName: string,
365
+ transitionDuration: number,
366
+ offset: number,
367
+ outputLabel: string,
368
+ ): string {
369
+ if (transitionName === "none" || transitionDuration <= 0) {
370
+ return `[${fromLabel}][${toLabel}]concat=n=2:v=1:a=0,settb=1/30[${outputLabel}]`;
371
+ }
372
+
373
+ // settb=1/30 ensures consistent timebase for chained xfades
374
+ return `[${fromLabel}][${toLabel}]xfade=transition=${transitionName}:duration=${transitionDuration}:offset=${offset},settb=1/30[${outputLabel}]`;
375
+ }
376
+
377
+ interface VideoSourceAudio {
378
+ inputIndex: number;
379
+ startTime: number;
380
+ duration: number;
381
+ cutFrom: number;
382
+ mixVolume?: number | string;
383
+ fadeOutDuration?: number;
384
+ fadeOutCurve?: string;
385
+ fadeInDuration?: number;
386
+ fadeInCurve?: string;
387
+ }
388
+
389
+ function buildAudioFilter(
390
+ videoInputCount: number,
391
+ audioTracks: AudioTrack[],
392
+ clipAudioLayers: {
393
+ layer: AudioLayer | DetachedAudioLayer;
394
+ clipStartTime: number;
395
+ }[],
396
+ totalDuration: number,
397
+ audioFilePath?: string,
398
+ loopAudio?: boolean,
399
+ keepSourceAudio?: boolean,
400
+ outputVolume?: number | string,
401
+ videoSourceAudio?: VideoSourceAudio[],
402
+ clipsAudioVolume?: number | string,
403
+ audioNorm?: AudioNormalizationOptions,
404
+ ): { inputs: string[]; filter: string; outputLabel: string } | null {
405
+ const audioInputs: string[] = [];
406
+ const filterParts: string[] = [];
407
+ const mixLabels: string[] = [];
408
+ let inputIdx = videoInputCount;
409
+
410
+ if (videoSourceAudio && videoSourceAudio.length > 0) {
411
+ for (let i = 0; i < videoSourceAudio.length; i++) {
412
+ const src = videoSourceAudio[i]!;
413
+ const { inputIndex, startTime, duration, cutFrom, mixVolume } = src;
414
+
415
+ const shouldInclude =
416
+ keepSourceAudio || (mixVolume !== undefined && mixVolume !== 0);
417
+ if (!shouldInclude) continue;
418
+
419
+ const label = `vsrc${i}`;
420
+ let audioFilter = `[${inputIndex}:a]`;
421
+ audioFilter += `atrim=${cutFrom}:${cutFrom + duration},asetpts=PTS-STARTPTS,`;
422
+
423
+ const volume = mixVolume ?? clipsAudioVolume;
424
+ if (volume !== undefined) {
425
+ audioFilter += `volume=${volume},`;
426
+ }
427
+ if (src.fadeInDuration) {
428
+ audioFilter += `afade=t=in:st=0:d=${src.fadeInDuration}:curve=${src.fadeInCurve ?? "tri"},`;
429
+ }
430
+ if (src.fadeOutDuration) {
431
+ const fadeOutStart = duration - src.fadeOutDuration;
432
+ audioFilter += `afade=t=out:st=${fadeOutStart}:d=${src.fadeOutDuration}:curve=${src.fadeOutCurve ?? "tri"},`;
433
+ }
434
+ audioFilter += `adelay=${Math.round(startTime * 1000)}|${Math.round(startTime * 1000)}`;
435
+ audioFilter += `[${label}]`;
436
+ filterParts.push(audioFilter);
437
+ mixLabels.push(label);
438
+ }
439
+ }
440
+
441
+ if (audioFilePath) {
442
+ audioInputs.push(audioFilePath);
443
+ const label = `abg${inputIdx}`;
444
+ if (loopAudio) {
445
+ filterParts.push(
446
+ `[${inputIdx}:a]aloop=loop=-1:size=2e9,atrim=0:${totalDuration}[${label}]`,
447
+ );
448
+ } else {
449
+ filterParts.push(`[${inputIdx}:a]anull[${label}]`);
450
+ }
451
+ mixLabels.push(label);
452
+ inputIdx++;
453
+ }
454
+
455
+ for (let i = 0; i < audioTracks.length; i++) {
456
+ const track = audioTracks[i]!;
457
+ audioInputs.push(track.path);
458
+ const label = `atrk${i}`;
459
+
460
+ let audioFilter = `[${inputIdx}:a]`;
461
+ if (track.cutFrom !== undefined || track.cutTo !== undefined) {
462
+ const start = track.cutFrom ?? 0;
463
+ const end = track.cutTo ?? 999999;
464
+ audioFilter += `atrim=start=${start}:end=${end},asetpts=PTS-STARTPTS,`;
465
+ }
466
+ if (track.mixVolume !== undefined) {
467
+ audioFilter += `volume=${track.mixVolume},`;
468
+ }
469
+ const startMs = Math.round((track.start ?? 0) * 1000);
470
+ audioFilter += `adelay=${startMs}|${startMs}`;
471
+ audioFilter += `[${label}]`;
472
+
473
+ filterParts.push(audioFilter);
474
+ mixLabels.push(label);
475
+ inputIdx++;
476
+ }
477
+
478
+ for (let i = 0; i < clipAudioLayers.length; i++) {
479
+ const { layer, clipStartTime } = clipAudioLayers[i]!;
480
+ audioInputs.push(layer.path);
481
+ const label = `aclip${i}`;
482
+
483
+ let audioFilter = `[${inputIdx}:a]`;
484
+ if (layer.cutFrom !== undefined || layer.cutTo !== undefined) {
485
+ const start = layer.cutFrom ?? 0;
486
+ const end = layer.cutTo ?? 999999;
487
+ audioFilter += `atrim=start=${start}:end=${end},asetpts=PTS-STARTPTS,`;
488
+ }
489
+ if (layer.mixVolume !== undefined) {
490
+ audioFilter += `volume=${layer.mixVolume},`;
491
+ }
492
+ audioFilter += `adelay=${Math.round(clipStartTime * 1000)}|${Math.round(clipStartTime * 1000)}`;
493
+ audioFilter += `[${label}]`;
494
+
495
+ filterParts.push(audioFilter);
496
+ mixLabels.push(label);
497
+ inputIdx++;
498
+ }
499
+
500
+ if (mixLabels.length === 0) {
501
+ return null;
502
+ }
503
+
504
+ let postFilters = "";
505
+ if (audioNorm?.enable !== false && audioNorm) {
506
+ const gaussSize = audioNorm.gaussSize ?? 5;
507
+ const maxGain = audioNorm.maxGain ?? 25;
508
+ postFilters += `,dynaudnorm=g=${gaussSize}:maxgain=${maxGain}`;
509
+ }
510
+ if (outputVolume) {
511
+ postFilters += `,volume=${outputVolume}`;
512
+ }
513
+
514
+ if (mixLabels.length === 1) {
515
+ return {
516
+ inputs: audioInputs,
517
+ filter: `${filterParts.join(";")};[${mixLabels[0]}]anull${postFilters}[aout]`,
518
+ outputLabel: "aout",
519
+ };
520
+ }
521
+
522
+ const mixInputs = mixLabels.map((l) => `[${l}]`).join("");
523
+ return {
524
+ inputs: audioInputs,
525
+ filter: `${filterParts.join(";")};${mixInputs}amix=inputs=${mixLabels.length}:normalize=0${postFilters}[aout]`,
526
+ outputLabel: "aout",
527
+ };
528
+ }
529
+
530
+ export async function editly(config: EditlyConfig): Promise<void> {
531
+ const {
532
+ outPath,
533
+ clips: clipsIn,
534
+ defaults,
535
+ audioFilePath,
536
+ audioTracks = [],
537
+ loopAudio,
538
+ keepSourceAudio,
539
+ clipsAudioVolume,
540
+ outputVolume,
541
+ audioNorm,
542
+ customOutputArgs,
543
+ verbose,
544
+ fast,
545
+ } = config;
546
+
547
+ if (!clipsIn || clipsIn.length === 0) {
548
+ throw new Error("At least one clip is required");
549
+ }
550
+
551
+ const firstVideoInfo = await getFirstVideoInfo(clipsIn);
552
+ let width = config.width ?? firstVideoInfo.width ?? DEFAULT_WIDTH;
553
+ let height = config.height ?? firstVideoInfo.height ?? DEFAULT_HEIGHT;
554
+ const fps = config.fps ?? firstVideoInfo.fps ?? DEFAULT_FPS;
555
+
556
+ width = multipleOf2(width);
557
+ height = multipleOf2(height);
558
+
559
+ if (fast) {
560
+ const aspectRatio = width / height;
561
+ width = multipleOf2(Math.round(320 * Math.sqrt(aspectRatio)));
562
+ height = multipleOf2(Math.round(320 * Math.sqrt(1 / aspectRatio)));
563
+ }
564
+
565
+ if (verbose) {
566
+ console.log(`Output: ${width}x${height} @ ${fps}fps`);
567
+ }
568
+
569
+ const clips = await processClips(clipsIn, defaults);
570
+
571
+ const continuousVideoOverlays = collectContinuousVideoOverlays(clips);
572
+ const imageOverlays = collectImageOverlays(clips);
573
+ const overlayInputs: string[] = [];
574
+ const videoOverlayInputMap = new Map<string, number>();
575
+ const imageOverlayInputMap = new Map<string, number>();
576
+
577
+ for (const [path] of continuousVideoOverlays) {
578
+ videoOverlayInputMap.set(path, overlayInputs.length);
579
+ overlayInputs.push(path);
580
+ }
581
+
582
+ for (const [path] of imageOverlays) {
583
+ imageOverlayInputMap.set(path, overlayInputs.length);
584
+ overlayInputs.push(path);
585
+ }
586
+
587
+ const allFilters: string[] = [];
588
+ const allInputs: string[] = [...overlayInputs];
589
+ const clipOutputLabels: string[] = [];
590
+ const videoSourceAudio: VideoSourceAudio[] = [];
591
+ let inputOffset = overlayInputs.length;
592
+ let currentClipTime = 0;
593
+
594
+ for (const [i, clip] of clips.entries()) {
595
+ const result = buildBaseClipFilter(clip, i, width, height, inputOffset);
596
+
597
+ allFilters.push(...result.filters);
598
+ allInputs.push(...result.inputs);
599
+ clipOutputLabels.push(result.outputLabel);
600
+
601
+ for (const { inputIndex, cutFrom, mixVolume } of result.videoSources) {
602
+ const prevClip = i > 0 ? clips[i - 1] : null;
603
+ const fadeInDuration = prevClip ? prevClip.transition.duration : 0;
604
+ const fadeInCurve = prevClip?.transition.audioInCurve ?? "tri";
605
+ const fadeOutDuration = clip.transition.duration;
606
+ const fadeOutCurve = clip.transition.audioOutCurve ?? "tri";
607
+
608
+ videoSourceAudio.push({
609
+ inputIndex,
610
+ startTime: currentClipTime,
611
+ duration: clip.duration,
612
+ cutFrom,
613
+ mixVolume,
614
+ fadeInDuration: fadeInDuration > 0 ? fadeInDuration : undefined,
615
+ fadeInCurve,
616
+ fadeOutDuration: fadeOutDuration > 0 ? fadeOutDuration : undefined,
617
+ fadeOutCurve,
618
+ });
619
+ }
620
+
621
+ inputOffset = result.nextInputOffset;
622
+ currentClipTime += clip.duration;
623
+ if (i < clips.length - 1) {
624
+ currentClipTime -= clip.transition.duration;
625
+ }
626
+ }
627
+
628
+ let finalVideoLabel = clipOutputLabels[0] ?? "v0";
629
+
630
+ if (clipOutputLabels.length > 1) {
631
+ let currentLabel = clipOutputLabels[0] ?? "v0";
632
+ let accumulatedDuration = clips[0]?.duration ?? 0;
633
+
634
+ for (let i = 0; i < clips.length - 1; i++) {
635
+ const nextLabel = clipOutputLabels[i + 1] ?? `v${i + 1}`;
636
+ const clip = clips[i];
637
+ const nextClip = clips[i + 1];
638
+ if (!clip) continue;
639
+ const transition = clip.transition;
640
+ const outputLabel = i === clips.length - 2 ? "vfinal" : `vmix${i}`;
641
+
642
+ const offset = Math.max(0, accumulatedDuration - transition.duration);
643
+
644
+ allFilters.push(
645
+ buildTransitionFilter(
646
+ currentLabel,
647
+ nextLabel,
648
+ transition.name,
649
+ transition.duration,
650
+ offset,
651
+ outputLabel,
652
+ ),
653
+ );
654
+
655
+ accumulatedDuration = offset + (nextClip?.duration ?? 0);
656
+ currentLabel = outputLabel;
657
+ }
658
+
659
+ finalVideoLabel = "vfinal";
660
+ }
661
+
662
+ let totalDuration = 0;
663
+ for (const clip of clips) {
664
+ totalDuration += clip.duration;
665
+ }
666
+ for (let i = 0; i < clips.length - 1; i++) {
667
+ const clip = clips[i];
668
+ if (clip) {
669
+ totalDuration -= clip.transition.duration;
670
+ }
671
+ }
672
+
673
+ if (continuousVideoOverlays.size > 0) {
674
+ let currentBase = finalVideoLabel;
675
+ let overlayIdx = 0;
676
+
677
+ for (const [path, { layer }] of continuousVideoOverlays) {
678
+ const inputIndex = videoOverlayInputMap.get(path);
679
+ if (inputIndex === undefined) continue;
680
+
681
+ const trimmedLabel = `ovfinal${overlayIdx}`;
682
+ const layerFilter = getVideoFilterWithTrim(
683
+ layer,
684
+ inputIndex,
685
+ width,
686
+ height,
687
+ 0,
688
+ totalDuration,
689
+ trimmedLabel,
690
+ true,
691
+ );
692
+ allFilters.push(layerFilter.filterComplex);
693
+
694
+ const outputLabel = `vwithov${overlayIdx}`;
695
+ const overlayFilter = getOverlayFilter(
696
+ currentBase,
697
+ trimmedLabel,
698
+ layer,
699
+ width,
700
+ height,
701
+ outputLabel,
702
+ );
703
+ allFilters.push(overlayFilter);
704
+
705
+ currentBase = outputLabel;
706
+ overlayIdx++;
707
+ }
708
+
709
+ finalVideoLabel = currentBase;
710
+ }
711
+
712
+ if (imageOverlays.size > 0) {
713
+ let currentBase = finalVideoLabel;
714
+ let imgOverlayIdx = 0;
715
+
716
+ for (const [path, { layer }] of imageOverlays) {
717
+ const inputIndex = imageOverlayInputMap.get(path);
718
+ if (inputIndex === undefined) continue;
719
+
720
+ const imgFilter = getImageOverlayFilter(
721
+ layer,
722
+ inputIndex,
723
+ width,
724
+ height,
725
+ totalDuration,
726
+ );
727
+ allFilters.push(imgFilter.filterComplex);
728
+
729
+ const outputLabel = `vwithimgov${imgOverlayIdx}`;
730
+ const positionFilter = getImageOverlayPositionFilter(
731
+ currentBase,
732
+ imgFilter.outputLabel,
733
+ layer,
734
+ width,
735
+ height,
736
+ outputLabel,
737
+ );
738
+ allFilters.push(positionFilter);
739
+
740
+ currentBase = outputLabel;
741
+ imgOverlayIdx++;
742
+ }
743
+
744
+ finalVideoLabel = currentBase;
745
+ }
746
+
747
+ const clipAudioLayers = collectAudioLayers(clips);
748
+ const videoInputCount = allInputs.length;
749
+ const audioFilter = buildAudioFilter(
750
+ videoInputCount,
751
+ audioTracks,
752
+ clipAudioLayers,
753
+ totalDuration,
754
+ audioFilePath,
755
+ loopAudio,
756
+ keepSourceAudio,
757
+ outputVolume,
758
+ videoSourceAudio,
759
+ clipsAudioVolume,
760
+ audioNorm,
761
+ );
762
+
763
+ if (audioFilter) {
764
+ allInputs.push(...audioFilter.inputs);
765
+ allFilters.push(audioFilter.filter);
766
+ }
767
+
768
+ const inputArgs = allInputs.flatMap((input) => ["-i", input]);
769
+ const filterComplex = allFilters.join(";");
770
+
771
+ const outputArgs = customOutputArgs ?? [
772
+ "-c:v",
773
+ "libx264",
774
+ "-preset",
775
+ fast ? "ultrafast" : "medium",
776
+ "-crf",
777
+ "18",
778
+ "-pix_fmt",
779
+ "yuv420p",
780
+ "-movflags",
781
+ "+faststart",
782
+ ];
783
+
784
+ const mapArgs = audioFilter
785
+ ? ["-map", `[${finalVideoLabel}]`, "-map", `[${audioFilter.outputLabel}]`]
786
+ : ["-map", `[${finalVideoLabel}]`];
787
+
788
+ const ffmpegArgs = [
789
+ "-hide_banner",
790
+ "-loglevel",
791
+ verbose ? "info" : "error",
792
+ ...inputArgs,
793
+ "-filter_complex",
794
+ filterComplex,
795
+ ...mapArgs,
796
+ "-r",
797
+ String(fps),
798
+ ...outputArgs,
799
+ "-y",
800
+ outPath,
801
+ ];
802
+
803
+ if (verbose) {
804
+ console.log("ffmpeg", ffmpegArgs.join(" "));
805
+ console.log("\nFilter complex:\n", filterComplex.split(";").join(";\n"));
806
+ }
807
+
808
+ const result = await $`ffmpeg ${ffmpegArgs}`.quiet();
809
+
810
+ if (result.exitCode !== 0) {
811
+ throw new Error(`ffmpeg failed with exit code ${result.exitCode}`);
812
+ }
813
+
814
+ console.log(`Output: ${outPath}`);
815
+ }
816
+
817
+ export default editly;