varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -0,0 +1,297 @@
1
+ import { writeFileSync } from "node:fs";
2
+ import { groq } from "@ai-sdk/groq";
3
+ import { experimental_transcribe as transcribe } from "ai";
4
+ import { z } from "zod";
5
+ import type { CaptionsProps, VargElement } from "../types";
6
+ import type { RenderContext } from "./context";
7
+ import { addTask, completeTask, startTask } from "./progress";
8
+ import { renderSpeech } from "./speech";
9
+
10
+ const groqWordSchema = z.object({
11
+ word: z.string(),
12
+ start: z.number(),
13
+ end: z.number(),
14
+ });
15
+
16
+ const groqResponseSchema = z.object({
17
+ words: z.array(groqWordSchema).optional(),
18
+ });
19
+
20
+ type GroqWord = z.infer<typeof groqWordSchema>;
21
+
22
+ // Helper function to convert words to SRT format
23
+ function formatTime(seconds: number): string {
24
+ const hours = Math.floor(seconds / 3600);
25
+ const minutes = Math.floor((seconds % 3600) / 60);
26
+ const secs = Math.floor(seconds % 60);
27
+ const millis = Math.floor((seconds % 1) * 1000);
28
+
29
+ return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
30
+ }
31
+
32
+ export function convertToSRT(words: GroqWord[]): string {
33
+ let srt = "";
34
+ let index = 1;
35
+
36
+ for (const word of words) {
37
+ const startTime = formatTime(word.start);
38
+ const endTime = formatTime(word.end);
39
+
40
+ srt += `${index}\n`;
41
+ srt += `${startTime} --> ${endTime}\n`;
42
+ srt += `${word.word.trim()}\n\n`;
43
+ index++;
44
+ }
45
+
46
+ return srt;
47
+ }
48
+
49
+ interface SrtEntry {
50
+ index: number;
51
+ start: number;
52
+ end: number;
53
+ text: string;
54
+ }
55
+
56
+ interface SubtitleStyle {
57
+ fontName: string;
58
+ fontSize: number;
59
+ primaryColor: string;
60
+ outlineColor: string;
61
+ backColor: string;
62
+ bold: boolean;
63
+ outline: number;
64
+ shadow: number;
65
+ marginV: number;
66
+ alignment: number;
67
+ }
68
+
69
+ const STYLE_PRESETS: Record<string, SubtitleStyle> = {
70
+ tiktok: {
71
+ fontName: "Montserrat",
72
+ fontSize: 32,
73
+ primaryColor: "&HFFFFFF",
74
+ outlineColor: "&H000000",
75
+ backColor: "&H80000000",
76
+ bold: true,
77
+ outline: 3,
78
+ shadow: 0,
79
+ marginV: 50,
80
+ alignment: 2,
81
+ },
82
+ karaoke: {
83
+ fontName: "Arial",
84
+ fontSize: 28,
85
+ primaryColor: "&H00FFFF",
86
+ outlineColor: "&H000000",
87
+ backColor: "&H00000000",
88
+ bold: true,
89
+ outline: 2,
90
+ shadow: 1,
91
+ marginV: 40,
92
+ alignment: 2,
93
+ },
94
+ bounce: {
95
+ fontName: "Impact",
96
+ fontSize: 36,
97
+ primaryColor: "&HFFFFFF",
98
+ outlineColor: "&H000000",
99
+ backColor: "&H00000000",
100
+ bold: false,
101
+ outline: 4,
102
+ shadow: 2,
103
+ marginV: 60,
104
+ alignment: 2,
105
+ },
106
+ typewriter: {
107
+ fontName: "Courier New",
108
+ fontSize: 24,
109
+ primaryColor: "&H00FF00",
110
+ outlineColor: "&H000000",
111
+ backColor: "&H80000000",
112
+ bold: false,
113
+ outline: 1,
114
+ shadow: 0,
115
+ marginV: 30,
116
+ alignment: 2,
117
+ },
118
+ };
119
+
120
+ function parseSrt(content: string): SrtEntry[] {
121
+ const entries: SrtEntry[] = [];
122
+ const blocks = content.trim().split(/\n\n+/);
123
+
124
+ for (const block of blocks) {
125
+ const lines = block.split("\n");
126
+ if (lines.length < 3) continue;
127
+
128
+ const index = Number.parseInt(lines[0] || "0", 10);
129
+ const timeLine = lines[1] || "";
130
+ const timeMatch = timeLine.match(
131
+ /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
132
+ );
133
+
134
+ if (!timeMatch) continue;
135
+
136
+ const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
137
+ if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
138
+
139
+ const start =
140
+ Number.parseInt(h1, 10) * 3600 +
141
+ Number.parseInt(m1, 10) * 60 +
142
+ Number.parseInt(s1, 10) +
143
+ Number.parseInt(ms1, 10) / 1000;
144
+
145
+ const end =
146
+ Number.parseInt(h2, 10) * 3600 +
147
+ Number.parseInt(m2, 10) * 60 +
148
+ Number.parseInt(s2, 10) +
149
+ Number.parseInt(ms2, 10) / 1000;
150
+
151
+ const text = lines.slice(2).join("\n");
152
+ entries.push({ index, start, end, text });
153
+ }
154
+
155
+ return entries;
156
+ }
157
+
158
+ function formatAssTime(seconds: number): string {
159
+ const h = Math.floor(seconds / 3600);
160
+ const m = Math.floor((seconds % 3600) / 60);
161
+ const s = Math.floor(seconds % 60);
162
+ const cs = Math.floor((seconds % 1) * 100);
163
+
164
+ return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
165
+ }
166
+
167
+ function convertSrtToAss(srtContent: string, style: SubtitleStyle): string {
168
+ const assHeader = `[Script Info]
169
+ Title: Generated Subtitles
170
+ ScriptType: v4.00+
171
+ WrapStyle: 0
172
+ ScaledBorderAndShadow: yes
173
+ YCbCr Matrix: TV.601
174
+
175
+ [V4+ Styles]
176
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
177
+ Style: Default,${style.fontName},${style.fontSize},${style.primaryColor},&H000000FF,${style.outlineColor},${style.backColor},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline},${style.shadow},${style.alignment},10,10,${style.marginV},1
178
+
179
+ [Events]
180
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
181
+ `;
182
+
183
+ const entries = parseSrt(srtContent);
184
+ const assDialogues = entries
185
+ .map((entry) => {
186
+ const start = formatAssTime(entry.start);
187
+ const end = formatAssTime(entry.end);
188
+ const text = entry.text.replace(/\n/g, "\\N");
189
+ return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
190
+ })
191
+ .join("\n");
192
+
193
+ return assHeader + assDialogues;
194
+ }
195
+
196
+ function colorToAss(color: string): string {
197
+ if (color.startsWith("&H")) return color;
198
+
199
+ const hex = color.replace("#", "");
200
+ if (hex.length === 6) {
201
+ const r = hex.substring(0, 2);
202
+ const g = hex.substring(2, 4);
203
+ const b = hex.substring(4, 6);
204
+ return `&H${b}${g}${r}`.toUpperCase();
205
+ }
206
+ return "&HFFFFFF";
207
+ }
208
+
209
+ export interface CaptionsResult {
210
+ assPath: string;
211
+ srtPath?: string;
212
+ audioPath?: string;
213
+ }
214
+
215
+ export async function renderCaptions(
216
+ element: VargElement<"captions">,
217
+ ctx: RenderContext,
218
+ ): Promise<CaptionsResult> {
219
+ const props = element.props as CaptionsProps;
220
+
221
+ let srtContent: string;
222
+ let srtPath: string | undefined;
223
+ let audioPath: string | undefined;
224
+
225
+ if (props.srt) {
226
+ srtContent = await Bun.file(props.srt).text();
227
+ srtPath = props.srt;
228
+ } else if (props.src) {
229
+ if (typeof props.src === "string") {
230
+ srtContent = await Bun.file(props.src).text();
231
+ srtPath = props.src;
232
+ } else if (props.src.type === "speech") {
233
+ const speechResult = await renderSpeech(props.src, ctx);
234
+ audioPath = speechResult.path;
235
+
236
+ const transcribeTaskId = ctx.progress
237
+ ? addTask(ctx.progress, "transcribe", "groq-whisper")
238
+ : null;
239
+ if (transcribeTaskId && ctx.progress)
240
+ startTask(ctx.progress, transcribeTaskId);
241
+
242
+ const audioData = await Bun.file(speechResult.path).arrayBuffer();
243
+
244
+ const result = await transcribe({
245
+ model: groq.transcription("whisper-large-v3"),
246
+ audio: new Uint8Array(audioData),
247
+ providerOptions: {
248
+ groq: {
249
+ responseFormat: "verbose_json",
250
+ timestampGranularities: ["word"],
251
+ },
252
+ },
253
+ });
254
+
255
+ if (transcribeTaskId && ctx.progress)
256
+ completeTask(ctx.progress, transcribeTaskId);
257
+
258
+ const rawBody = (result.responses[0] as { body?: unknown })?.body;
259
+ const parsed = groqResponseSchema.safeParse(rawBody);
260
+ const words = parsed.success ? parsed.data.words : undefined;
261
+
262
+ if (!words || words.length === 0) {
263
+ srtContent = `1\n00:00:00,000 --> 00:00:05,000\n${result.text}\n`;
264
+ } else {
265
+ srtContent = convertToSRT(words);
266
+ }
267
+
268
+ srtPath = `/tmp/varg-captions-${Date.now()}.srt`;
269
+ writeFileSync(srtPath, srtContent);
270
+ ctx.tempFiles.push(srtPath);
271
+ } else {
272
+ throw new Error(
273
+ "Captions src must be a path to SRT file or Speech element",
274
+ );
275
+ }
276
+ } else {
277
+ throw new Error("Captions element requires either 'srt' or 'src' prop");
278
+ }
279
+
280
+ const styleName = props.style ?? "tiktok";
281
+ const baseStyle = STYLE_PRESETS[styleName] ?? STYLE_PRESETS.tiktok!;
282
+
283
+ const style: SubtitleStyle = {
284
+ ...baseStyle,
285
+ fontSize: props.fontSize ?? baseStyle.fontSize,
286
+ primaryColor: props.color
287
+ ? colorToAss(props.color)
288
+ : baseStyle.primaryColor,
289
+ };
290
+
291
+ const assContent = convertSrtToAss(srtContent, style);
292
+ const assPath = `/tmp/varg-captions-${Date.now()}.ass`;
293
+ writeFileSync(assPath, assContent);
294
+ ctx.tempFiles.push(assPath);
295
+
296
+ return { assPath, srtPath, audioPath };
297
+ }
@@ -0,0 +1,248 @@
1
+ import type {
2
+ AudioLayer,
3
+ Clip,
4
+ FillColorLayer,
5
+ ImageLayer,
6
+ ImageOverlayLayer,
7
+ Layer,
8
+ VideoLayer,
9
+ } from "../../ai-sdk/providers/editly/types";
10
+ import type {
11
+ AnimateProps,
12
+ ClipProps,
13
+ ImageProps,
14
+ SpeechProps,
15
+ VargElement,
16
+ VargNode,
17
+ VideoProps,
18
+ } from "../types";
19
+ import { renderAnimate } from "./animate";
20
+ import type { RenderContext } from "./context";
21
+ import { renderImage } from "./image";
22
+ import { renderPackshot } from "./packshot";
23
+ import { renderSlider } from "./slider";
24
+ import { renderSpeech } from "./speech";
25
+ import { renderSplit } from "./split";
26
+ import { renderSubtitle } from "./subtitle";
27
+ import { renderSwipe } from "./swipe";
28
+ import { renderTitle } from "./title";
29
+ import { renderVideo } from "./video";
30
+
31
+ type PendingLayer =
32
+ | { type: "sync"; layer: Layer }
33
+ | { type: "async"; promise: Promise<Layer> };
34
+
35
+ async function renderClipLayers(
36
+ children: VargNode[],
37
+ ctx: RenderContext,
38
+ ): Promise<Layer[]> {
39
+ const pending: PendingLayer[] = [];
40
+
41
+ for (const child of children) {
42
+ if (!child || typeof child !== "object" || !("type" in child)) continue;
43
+
44
+ const element = child as VargElement;
45
+
46
+ switch (element.type) {
47
+ case "image": {
48
+ const props = element.props as ImageProps;
49
+ const hasPosition =
50
+ props.left !== undefined ||
51
+ props.top !== undefined ||
52
+ props.width !== undefined ||
53
+ props.height !== undefined;
54
+
55
+ pending.push({
56
+ type: "async",
57
+ promise: renderImage(element as VargElement<"image">, ctx).then(
58
+ (path) =>
59
+ hasPosition
60
+ ? ({
61
+ type: "image-overlay",
62
+ path,
63
+ zoomDirection: props.zoom,
64
+ width: props.width,
65
+ height: props.height,
66
+ position: { x: props.left ?? 0, y: props.top ?? 0 },
67
+ } as ImageOverlayLayer)
68
+ : ({
69
+ type: "image",
70
+ path,
71
+ resizeMode: props.resize,
72
+ zoomDirection: props.zoom,
73
+ } as ImageLayer),
74
+ ),
75
+ });
76
+ break;
77
+ }
78
+
79
+ case "video": {
80
+ const props = element.props as VideoProps;
81
+ pending.push({
82
+ type: "async",
83
+ promise: renderVideo(element as VargElement<"video">, ctx).then(
84
+ (path) =>
85
+ ({
86
+ type: "video",
87
+ path,
88
+ resizeMode: props.resize,
89
+ cutFrom: props.cutFrom,
90
+ cutTo: props.cutTo,
91
+ mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
92
+ left: props.left,
93
+ top: props.top,
94
+ width: props.width,
95
+ height: props.height,
96
+ }) as VideoLayer,
97
+ ),
98
+ });
99
+ break;
100
+ }
101
+
102
+ case "animate": {
103
+ const props = element.props as AnimateProps;
104
+ pending.push({
105
+ type: "async",
106
+ promise: renderAnimate(element as VargElement<"animate">, ctx).then(
107
+ (path) =>
108
+ ({
109
+ type: "video",
110
+ path,
111
+ left: props.left,
112
+ top: props.top,
113
+ width: props.width,
114
+ height: props.height,
115
+ }) as VideoLayer,
116
+ ),
117
+ });
118
+ break;
119
+ }
120
+
121
+ case "title": {
122
+ pending.push({
123
+ type: "sync",
124
+ layer: renderTitle(element as VargElement<"title">),
125
+ });
126
+ break;
127
+ }
128
+
129
+ case "subtitle": {
130
+ pending.push({
131
+ type: "sync",
132
+ layer: renderSubtitle(element as VargElement<"subtitle">),
133
+ });
134
+ break;
135
+ }
136
+
137
+ case "speech": {
138
+ const props = element.props as SpeechProps;
139
+ pending.push({
140
+ type: "async",
141
+ promise: renderSpeech(element as VargElement<"speech">, ctx).then(
142
+ (result) =>
143
+ ({
144
+ type: "audio",
145
+ path: result.path,
146
+ mixVolume: props.volume ?? 1,
147
+ }) as AudioLayer,
148
+ ),
149
+ });
150
+ break;
151
+ }
152
+
153
+ case "split": {
154
+ pending.push({
155
+ type: "async",
156
+ promise: renderSplit(element as VargElement<"split">, ctx).then(
157
+ (path) =>
158
+ ({
159
+ type: "video",
160
+ path,
161
+ }) as VideoLayer,
162
+ ),
163
+ });
164
+ break;
165
+ }
166
+
167
+ case "slider": {
168
+ pending.push({
169
+ type: "async",
170
+ promise: renderSlider(element as VargElement<"slider">, ctx).then(
171
+ (path) =>
172
+ ({
173
+ type: "video",
174
+ path,
175
+ }) as VideoLayer,
176
+ ),
177
+ });
178
+ break;
179
+ }
180
+
181
+ case "swipe": {
182
+ pending.push({
183
+ type: "async",
184
+ promise: renderSwipe(element as VargElement<"swipe">, ctx).then(
185
+ (path) =>
186
+ ({
187
+ type: "video",
188
+ path,
189
+ }) as VideoLayer,
190
+ ),
191
+ });
192
+ break;
193
+ }
194
+
195
+ case "packshot": {
196
+ pending.push({
197
+ type: "async",
198
+ promise: renderPackshot(element as VargElement<"packshot">, ctx).then(
199
+ (path) =>
200
+ ({
201
+ type: "video",
202
+ path,
203
+ }) as VideoLayer,
204
+ ),
205
+ });
206
+ break;
207
+ }
208
+ }
209
+ }
210
+
211
+ const layers = await Promise.all(
212
+ pending.map((p) => (p.type === "sync" ? p.layer : p.promise)),
213
+ );
214
+
215
+ return layers;
216
+ }
217
+
218
+ export async function renderClip(
219
+ element: VargElement<"clip">,
220
+ ctx: RenderContext,
221
+ ): Promise<Clip> {
222
+ const props = element.props as ClipProps;
223
+ const layers = await renderClipLayers(element.children, ctx);
224
+
225
+ const isOverlayVideo = (l: Layer) =>
226
+ l.type === "video" &&
227
+ ((l as VideoLayer).left !== undefined ||
228
+ (l as VideoLayer).top !== undefined ||
229
+ (l as VideoLayer).width !== undefined ||
230
+ (l as VideoLayer).height !== undefined);
231
+
232
+ const hasBaseLayer = layers.some(
233
+ (l) =>
234
+ l.type === "image" ||
235
+ l.type === "fill-color" ||
236
+ (l.type === "video" && !isOverlayVideo(l)),
237
+ );
238
+
239
+ if (!hasBaseLayer && layers.length > 0) {
240
+ layers.unshift({ type: "fill-color", color: "#000000" } as FillColorLayer);
241
+ }
242
+
243
+ return {
244
+ layers,
245
+ duration: typeof props.duration === "number" ? props.duration : undefined,
246
+ transition: props.transition ?? null,
247
+ };
248
+ }
@@ -0,0 +1,17 @@
1
+ import type { generateImage } from "ai";
2
+ import type { fileCache } from "../../ai-sdk/file-cache";
3
+ import type { generateVideo } from "../../ai-sdk/generate-video";
4
+ import type { ProgressTracker } from "./progress";
5
+
6
+ export interface RenderContext {
7
+ width: number;
8
+ height: number;
9
+ fps: number;
10
+ cache?: ReturnType<typeof fileCache>;
11
+ generateImage: typeof generateImage;
12
+ generateVideo: typeof generateVideo;
13
+ tempFiles: string[];
14
+ progress?: ProgressTracker;
15
+ /** In-memory deduplication for concurrent renders of the same element */
16
+ pending: Map<string, Promise<string>>;
17
+ }
@@ -0,0 +1,109 @@
1
+ import type { generateImage } from "ai";
2
+ import { File } from "../../ai-sdk/file";
3
+ import type {
4
+ ImageInput,
5
+ ImagePrompt,
6
+ ImageProps,
7
+ VargElement,
8
+ } from "../types";
9
+ import type { RenderContext } from "./context";
10
+ import { addTask, completeTask, startTask } from "./progress";
11
+ import { computeCacheKey, toFileUrl } from "./utils";
12
+
13
+ async function resolveImageInput(
14
+ input: ImageInput,
15
+ ctx: RenderContext,
16
+ ): Promise<Uint8Array> {
17
+ if (input instanceof Uint8Array) {
18
+ return input;
19
+ }
20
+ if (typeof input === "string") {
21
+ const response = await fetch(toFileUrl(input));
22
+ return new Uint8Array(await response.arrayBuffer());
23
+ }
24
+ const path = await renderImage(input, ctx);
25
+ const response = await fetch(toFileUrl(path));
26
+ return new Uint8Array(await response.arrayBuffer());
27
+ }
28
+
29
+ async function resolvePrompt(
30
+ prompt: ImagePrompt,
31
+ ctx: RenderContext,
32
+ ): Promise<string | { text?: string; images: Uint8Array[] }> {
33
+ if (typeof prompt === "string") {
34
+ return prompt;
35
+ }
36
+ const resolvedImages = await Promise.all(
37
+ prompt.images.map((img) => resolveImageInput(img, ctx)),
38
+ );
39
+ return { text: prompt.text, images: resolvedImages };
40
+ }
41
+
42
+ export async function renderImage(
43
+ element: VargElement<"image">,
44
+ ctx: RenderContext,
45
+ ): Promise<string> {
46
+ const props = element.props as ImageProps;
47
+
48
+ if (props.src) {
49
+ return props.src;
50
+ }
51
+
52
+ const prompt = props.prompt;
53
+ if (!prompt) {
54
+ throw new Error("Image element requires either 'prompt' or 'src'");
55
+ }
56
+
57
+ const model = props.model;
58
+ if (!model) {
59
+ throw new Error("Image element requires 'model' prop when using prompt");
60
+ }
61
+
62
+ // Compute cache key for deduplication
63
+ const cacheKey = computeCacheKey(element);
64
+ const cacheKeyStr = JSON.stringify(cacheKey);
65
+
66
+ // Check if this element is already being rendered (deduplication)
67
+ const pendingRender = ctx.pending.get(cacheKeyStr);
68
+ if (pendingRender) {
69
+ return pendingRender;
70
+ }
71
+
72
+ // Create the render promise and store it for deduplication
73
+ const renderPromise = (async () => {
74
+ const resolvedPrompt = await resolvePrompt(prompt, ctx);
75
+
76
+ const modelId = typeof model === "string" ? model : model.modelId;
77
+ const taskId = ctx.progress
78
+ ? addTask(ctx.progress, "image", modelId)
79
+ : null;
80
+ if (taskId && ctx.progress) startTask(ctx.progress, taskId);
81
+
82
+ const { images } = await ctx.generateImage({
83
+ model,
84
+ prompt: resolvedPrompt,
85
+ aspectRatio: props.aspectRatio,
86
+ n: 1,
87
+ cacheKey,
88
+ } as Parameters<typeof generateImage>[0]);
89
+
90
+ if (taskId && ctx.progress) completeTask(ctx.progress, taskId);
91
+
92
+ const firstImage = images[0];
93
+ if (!firstImage?.uint8Array) {
94
+ throw new Error("Image generation returned no image data");
95
+ }
96
+ const imageData = firstImage.uint8Array;
97
+ const tempPath = await File.toTemp({
98
+ uint8Array: imageData,
99
+ mimeType: "image/png",
100
+ });
101
+ ctx.tempFiles.push(tempPath);
102
+
103
+ return tempPath;
104
+ })();
105
+
106
+ ctx.pending.set(cacheKeyStr, renderPromise);
107
+
108
+ return renderPromise;
109
+ }
@@ -0,0 +1,22 @@
1
+ export { renderAnimate } from "./animate";
2
+ export { renderCaptions } from "./captions";
3
+ export { renderClip } from "./clip";
4
+ export type { RenderContext } from "./context";
5
+ export { renderImage } from "./image";
6
+ export { renderPackshot } from "./packshot";
7
+ export {
8
+ createProgressTracker,
9
+ type GenerationType,
10
+ type ProgressTask,
11
+ type ProgressTracker,
12
+ TIME_ESTIMATES,
13
+ } from "./progress";
14
+ export { renderRoot } from "./render";
15
+ export { renderSlider } from "./slider";
16
+ export type { SpeechResult } from "./speech";
17
+ export { renderSpeech } from "./speech";
18
+ export { renderSplit } from "./split";
19
+ export { renderSwipe } from "./swipe";
20
+ export { renderTitle } from "./title";
21
+ export { computeCacheKey, getTextContent } from "./utils";
22
+ export { renderVideo } from "./video";