varg.ai-sdk 0.1.1 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +43 -10
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -169
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -437
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -105
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -145
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -125
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -136
  217. package/cli/commands/find.ts +0 -58
  218. package/cli/commands/help.ts +0 -70
  219. package/cli/commands/list.ts +0 -49
  220. package/cli/commands/run.ts +0 -237
  221. package/cli/commands/which.ts +0 -66
  222. package/cli/discover.ts +0 -66
  223. package/cli/index.ts +0 -33
  224. package/cli/runner.ts +0 -65
  225. package/cli/types.ts +0 -49
  226. package/cli/ui.ts +0 -185
  227. package/index.ts +0 -75
  228. package/lib/README.md +0 -144
  229. package/lib/ai-sdk/fal.ts +0 -106
  230. package/lib/ai-sdk/replicate.ts +0 -107
  231. package/lib/elevenlabs.ts +0 -382
  232. package/lib/fal.ts +0 -467
  233. package/lib/ffmpeg.ts +0 -467
  234. package/lib/fireworks.ts +0 -235
  235. package/lib/groq.ts +0 -246
  236. package/lib/higgsfield.ts +0 -176
  237. package/lib/remotion/SKILL.md +0 -823
  238. package/lib/remotion/cli.ts +0 -115
  239. package/lib/remotion/functions.ts +0 -283
  240. package/lib/remotion/index.ts +0 -19
  241. package/lib/remotion/templates.ts +0 -73
  242. package/lib/replicate.ts +0 -304
  243. package/output.txt +0 -1
  244. package/test-import.ts +0 -7
  245. package/test-services.ts +0 -97
  246. package/utilities/s3.ts +0 -147
@@ -0,0 +1,297 @@
1
+ import { writeFileSync } from "node:fs";
2
+ import { groq } from "@ai-sdk/groq";
3
+ import { experimental_transcribe as transcribe } from "ai";
4
+ import { z } from "zod";
5
+ import type { CaptionsProps, VargElement } from "../types";
6
+ import type { RenderContext } from "./context";
7
+ import { addTask, completeTask, startTask } from "./progress";
8
+ import { renderSpeech } from "./speech";
9
+
10
+ const groqWordSchema = z.object({
11
+ word: z.string(),
12
+ start: z.number(),
13
+ end: z.number(),
14
+ });
15
+
16
+ const groqResponseSchema = z.object({
17
+ words: z.array(groqWordSchema).optional(),
18
+ });
19
+
20
+ type GroqWord = z.infer<typeof groqWordSchema>;
21
+
22
+ // Helper function to convert words to SRT format
23
+ function formatTime(seconds: number): string {
24
+ const hours = Math.floor(seconds / 3600);
25
+ const minutes = Math.floor((seconds % 3600) / 60);
26
+ const secs = Math.floor(seconds % 60);
27
+ const millis = Math.floor((seconds % 1) * 1000);
28
+
29
+ return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
30
+ }
31
+
32
+ export function convertToSRT(words: GroqWord[]): string {
33
+ let srt = "";
34
+ let index = 1;
35
+
36
+ for (const word of words) {
37
+ const startTime = formatTime(word.start);
38
+ const endTime = formatTime(word.end);
39
+
40
+ srt += `${index}\n`;
41
+ srt += `${startTime} --> ${endTime}\n`;
42
+ srt += `${word.word.trim()}\n\n`;
43
+ index++;
44
+ }
45
+
46
+ return srt;
47
+ }
48
+
49
+ interface SrtEntry {
50
+ index: number;
51
+ start: number;
52
+ end: number;
53
+ text: string;
54
+ }
55
+
56
+ interface SubtitleStyle {
57
+ fontName: string;
58
+ fontSize: number;
59
+ primaryColor: string;
60
+ outlineColor: string;
61
+ backColor: string;
62
+ bold: boolean;
63
+ outline: number;
64
+ shadow: number;
65
+ marginV: number;
66
+ alignment: number;
67
+ }
68
+
69
+ const STYLE_PRESETS: Record<string, SubtitleStyle> = {
70
+ tiktok: {
71
+ fontName: "Montserrat",
72
+ fontSize: 32,
73
+ primaryColor: "&HFFFFFF",
74
+ outlineColor: "&H000000",
75
+ backColor: "&H80000000",
76
+ bold: true,
77
+ outline: 3,
78
+ shadow: 0,
79
+ marginV: 50,
80
+ alignment: 2,
81
+ },
82
+ karaoke: {
83
+ fontName: "Arial",
84
+ fontSize: 28,
85
+ primaryColor: "&H00FFFF",
86
+ outlineColor: "&H000000",
87
+ backColor: "&H00000000",
88
+ bold: true,
89
+ outline: 2,
90
+ shadow: 1,
91
+ marginV: 40,
92
+ alignment: 2,
93
+ },
94
+ bounce: {
95
+ fontName: "Impact",
96
+ fontSize: 36,
97
+ primaryColor: "&HFFFFFF",
98
+ outlineColor: "&H000000",
99
+ backColor: "&H00000000",
100
+ bold: false,
101
+ outline: 4,
102
+ shadow: 2,
103
+ marginV: 60,
104
+ alignment: 2,
105
+ },
106
+ typewriter: {
107
+ fontName: "Courier New",
108
+ fontSize: 24,
109
+ primaryColor: "&H00FF00",
110
+ outlineColor: "&H000000",
111
+ backColor: "&H80000000",
112
+ bold: false,
113
+ outline: 1,
114
+ shadow: 0,
115
+ marginV: 30,
116
+ alignment: 2,
117
+ },
118
+ };
119
+
120
+ function parseSrt(content: string): SrtEntry[] {
121
+ const entries: SrtEntry[] = [];
122
+ const blocks = content.trim().split(/\n\n+/);
123
+
124
+ for (const block of blocks) {
125
+ const lines = block.split("\n");
126
+ if (lines.length < 3) continue;
127
+
128
+ const index = Number.parseInt(lines[0] || "0", 10);
129
+ const timeLine = lines[1] || "";
130
+ const timeMatch = timeLine.match(
131
+ /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
132
+ );
133
+
134
+ if (!timeMatch) continue;
135
+
136
+ const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
137
+ if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
138
+
139
+ const start =
140
+ Number.parseInt(h1, 10) * 3600 +
141
+ Number.parseInt(m1, 10) * 60 +
142
+ Number.parseInt(s1, 10) +
143
+ Number.parseInt(ms1, 10) / 1000;
144
+
145
+ const end =
146
+ Number.parseInt(h2, 10) * 3600 +
147
+ Number.parseInt(m2, 10) * 60 +
148
+ Number.parseInt(s2, 10) +
149
+ Number.parseInt(ms2, 10) / 1000;
150
+
151
+ const text = lines.slice(2).join("\n");
152
+ entries.push({ index, start, end, text });
153
+ }
154
+
155
+ return entries;
156
+ }
157
+
158
+ function formatAssTime(seconds: number): string {
159
+ const h = Math.floor(seconds / 3600);
160
+ const m = Math.floor((seconds % 3600) / 60);
161
+ const s = Math.floor(seconds % 60);
162
+ const cs = Math.floor((seconds % 1) * 100);
163
+
164
+ return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
165
+ }
166
+
167
+ function convertSrtToAss(srtContent: string, style: SubtitleStyle): string {
168
+ const assHeader = `[Script Info]
169
+ Title: Generated Subtitles
170
+ ScriptType: v4.00+
171
+ WrapStyle: 0
172
+ ScaledBorderAndShadow: yes
173
+ YCbCr Matrix: TV.601
174
+
175
+ [V4+ Styles]
176
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
177
+ Style: Default,${style.fontName},${style.fontSize},${style.primaryColor},&H000000FF,${style.outlineColor},${style.backColor},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline},${style.shadow},${style.alignment},10,10,${style.marginV},1
178
+
179
+ [Events]
180
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
181
+ `;
182
+
183
+ const entries = parseSrt(srtContent);
184
+ const assDialogues = entries
185
+ .map((entry) => {
186
+ const start = formatAssTime(entry.start);
187
+ const end = formatAssTime(entry.end);
188
+ const text = entry.text.replace(/\n/g, "\\N");
189
+ return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
190
+ })
191
+ .join("\n");
192
+
193
+ return assHeader + assDialogues;
194
+ }
195
+
196
+ function colorToAss(color: string): string {
197
+ if (color.startsWith("&H")) return color;
198
+
199
+ const hex = color.replace("#", "");
200
+ if (hex.length === 6) {
201
+ const r = hex.substring(0, 2);
202
+ const g = hex.substring(2, 4);
203
+ const b = hex.substring(4, 6);
204
+ return `&H${b}${g}${r}`.toUpperCase();
205
+ }
206
+ return "&HFFFFFF";
207
+ }
208
+
209
+ export interface CaptionsResult {
210
+ assPath: string;
211
+ srtPath?: string;
212
+ audioPath?: string;
213
+ }
214
+
215
+ export async function renderCaptions(
216
+ element: VargElement<"captions">,
217
+ ctx: RenderContext,
218
+ ): Promise<CaptionsResult> {
219
+ const props = element.props as CaptionsProps;
220
+
221
+ let srtContent: string;
222
+ let srtPath: string | undefined;
223
+ let audioPath: string | undefined;
224
+
225
+ if (props.srt) {
226
+ srtContent = await Bun.file(props.srt).text();
227
+ srtPath = props.srt;
228
+ } else if (props.src) {
229
+ if (typeof props.src === "string") {
230
+ srtContent = await Bun.file(props.src).text();
231
+ srtPath = props.src;
232
+ } else if (props.src.type === "speech") {
233
+ const speechResult = await renderSpeech(props.src, ctx);
234
+ audioPath = speechResult.path;
235
+
236
+ const transcribeTaskId = ctx.progress
237
+ ? addTask(ctx.progress, "transcribe", "groq-whisper")
238
+ : null;
239
+ if (transcribeTaskId && ctx.progress)
240
+ startTask(ctx.progress, transcribeTaskId);
241
+
242
+ const audioData = await Bun.file(speechResult.path).arrayBuffer();
243
+
244
+ const result = await transcribe({
245
+ model: groq.transcription("whisper-large-v3"),
246
+ audio: new Uint8Array(audioData),
247
+ providerOptions: {
248
+ groq: {
249
+ responseFormat: "verbose_json",
250
+ timestampGranularities: ["word"],
251
+ },
252
+ },
253
+ });
254
+
255
+ if (transcribeTaskId && ctx.progress)
256
+ completeTask(ctx.progress, transcribeTaskId);
257
+
258
+ const rawBody = (result.responses[0] as { body?: unknown })?.body;
259
+ const parsed = groqResponseSchema.safeParse(rawBody);
260
+ const words = parsed.success ? parsed.data.words : undefined;
261
+
262
+ if (!words || words.length === 0) {
263
+ srtContent = `1\n00:00:00,000 --> 00:00:05,000\n${result.text}\n`;
264
+ } else {
265
+ srtContent = convertToSRT(words);
266
+ }
267
+
268
+ srtPath = `/tmp/varg-captions-${Date.now()}.srt`;
269
+ writeFileSync(srtPath, srtContent);
270
+ ctx.tempFiles.push(srtPath);
271
+ } else {
272
+ throw new Error(
273
+ "Captions src must be a path to SRT file or Speech element",
274
+ );
275
+ }
276
+ } else {
277
+ throw new Error("Captions element requires either 'srt' or 'src' prop");
278
+ }
279
+
280
+ const styleName = props.style ?? "tiktok";
281
+ const baseStyle = STYLE_PRESETS[styleName] ?? STYLE_PRESETS.tiktok!;
282
+
283
+ const style: SubtitleStyle = {
284
+ ...baseStyle,
285
+ fontSize: props.fontSize ?? baseStyle.fontSize,
286
+ primaryColor: props.color
287
+ ? colorToAss(props.color)
288
+ : baseStyle.primaryColor,
289
+ };
290
+
291
+ const assContent = convertSrtToAss(srtContent, style);
292
+ const assPath = `/tmp/varg-captions-${Date.now()}.ass`;
293
+ writeFileSync(assPath, assContent);
294
+ ctx.tempFiles.push(assPath);
295
+
296
+ return { assPath, srtPath, audioPath };
297
+ }
@@ -0,0 +1,248 @@
1
+ import type {
2
+ AudioLayer,
3
+ Clip,
4
+ FillColorLayer,
5
+ ImageLayer,
6
+ ImageOverlayLayer,
7
+ Layer,
8
+ VideoLayer,
9
+ } from "../../ai-sdk/providers/editly/types";
10
+ import type {
11
+ AnimateProps,
12
+ ClipProps,
13
+ ImageProps,
14
+ SpeechProps,
15
+ VargElement,
16
+ VargNode,
17
+ VideoProps,
18
+ } from "../types";
19
+ import { renderAnimate } from "./animate";
20
+ import type { RenderContext } from "./context";
21
+ import { renderImage } from "./image";
22
+ import { renderPackshot } from "./packshot";
23
+ import { renderSlider } from "./slider";
24
+ import { renderSpeech } from "./speech";
25
+ import { renderSplit } from "./split";
26
+ import { renderSubtitle } from "./subtitle";
27
+ import { renderSwipe } from "./swipe";
28
+ import { renderTitle } from "./title";
29
+ import { renderVideo } from "./video";
30
+
31
+ type PendingLayer =
32
+ | { type: "sync"; layer: Layer }
33
+ | { type: "async"; promise: Promise<Layer> };
34
+
35
+ async function renderClipLayers(
36
+ children: VargNode[],
37
+ ctx: RenderContext,
38
+ ): Promise<Layer[]> {
39
+ const pending: PendingLayer[] = [];
40
+
41
+ for (const child of children) {
42
+ if (!child || typeof child !== "object" || !("type" in child)) continue;
43
+
44
+ const element = child as VargElement;
45
+
46
+ switch (element.type) {
47
+ case "image": {
48
+ const props = element.props as ImageProps;
49
+ const hasPosition =
50
+ props.left !== undefined ||
51
+ props.top !== undefined ||
52
+ props.width !== undefined ||
53
+ props.height !== undefined;
54
+
55
+ pending.push({
56
+ type: "async",
57
+ promise: renderImage(element as VargElement<"image">, ctx).then(
58
+ (path) =>
59
+ hasPosition
60
+ ? ({
61
+ type: "image-overlay",
62
+ path,
63
+ zoomDirection: props.zoom,
64
+ width: props.width,
65
+ height: props.height,
66
+ position: { x: props.left ?? 0, y: props.top ?? 0 },
67
+ } as ImageOverlayLayer)
68
+ : ({
69
+ type: "image",
70
+ path,
71
+ resizeMode: props.resize,
72
+ zoomDirection: props.zoom,
73
+ } as ImageLayer),
74
+ ),
75
+ });
76
+ break;
77
+ }
78
+
79
+ case "video": {
80
+ const props = element.props as VideoProps;
81
+ pending.push({
82
+ type: "async",
83
+ promise: renderVideo(element as VargElement<"video">, ctx).then(
84
+ (path) =>
85
+ ({
86
+ type: "video",
87
+ path,
88
+ resizeMode: props.resize,
89
+ cutFrom: props.cutFrom,
90
+ cutTo: props.cutTo,
91
+ mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
92
+ left: props.left,
93
+ top: props.top,
94
+ width: props.width,
95
+ height: props.height,
96
+ }) as VideoLayer,
97
+ ),
98
+ });
99
+ break;
100
+ }
101
+
102
+ case "animate": {
103
+ const props = element.props as AnimateProps;
104
+ pending.push({
105
+ type: "async",
106
+ promise: renderAnimate(element as VargElement<"animate">, ctx).then(
107
+ (path) =>
108
+ ({
109
+ type: "video",
110
+ path,
111
+ left: props.left,
112
+ top: props.top,
113
+ width: props.width,
114
+ height: props.height,
115
+ }) as VideoLayer,
116
+ ),
117
+ });
118
+ break;
119
+ }
120
+
121
+ case "title": {
122
+ pending.push({
123
+ type: "sync",
124
+ layer: renderTitle(element as VargElement<"title">),
125
+ });
126
+ break;
127
+ }
128
+
129
+ case "subtitle": {
130
+ pending.push({
131
+ type: "sync",
132
+ layer: renderSubtitle(element as VargElement<"subtitle">),
133
+ });
134
+ break;
135
+ }
136
+
137
+ case "speech": {
138
+ const props = element.props as SpeechProps;
139
+ pending.push({
140
+ type: "async",
141
+ promise: renderSpeech(element as VargElement<"speech">, ctx).then(
142
+ (result) =>
143
+ ({
144
+ type: "audio",
145
+ path: result.path,
146
+ mixVolume: props.volume ?? 1,
147
+ }) as AudioLayer,
148
+ ),
149
+ });
150
+ break;
151
+ }
152
+
153
+ case "split": {
154
+ pending.push({
155
+ type: "async",
156
+ promise: renderSplit(element as VargElement<"split">, ctx).then(
157
+ (path) =>
158
+ ({
159
+ type: "video",
160
+ path,
161
+ }) as VideoLayer,
162
+ ),
163
+ });
164
+ break;
165
+ }
166
+
167
+ case "slider": {
168
+ pending.push({
169
+ type: "async",
170
+ promise: renderSlider(element as VargElement<"slider">, ctx).then(
171
+ (path) =>
172
+ ({
173
+ type: "video",
174
+ path,
175
+ }) as VideoLayer,
176
+ ),
177
+ });
178
+ break;
179
+ }
180
+
181
+ case "swipe": {
182
+ pending.push({
183
+ type: "async",
184
+ promise: renderSwipe(element as VargElement<"swipe">, ctx).then(
185
+ (path) =>
186
+ ({
187
+ type: "video",
188
+ path,
189
+ }) as VideoLayer,
190
+ ),
191
+ });
192
+ break;
193
+ }
194
+
195
+ case "packshot": {
196
+ pending.push({
197
+ type: "async",
198
+ promise: renderPackshot(element as VargElement<"packshot">, ctx).then(
199
+ (path) =>
200
+ ({
201
+ type: "video",
202
+ path,
203
+ }) as VideoLayer,
204
+ ),
205
+ });
206
+ break;
207
+ }
208
+ }
209
+ }
210
+
211
+ const layers = await Promise.all(
212
+ pending.map((p) => (p.type === "sync" ? p.layer : p.promise)),
213
+ );
214
+
215
+ return layers;
216
+ }
217
+
218
+ export async function renderClip(
219
+ element: VargElement<"clip">,
220
+ ctx: RenderContext,
221
+ ): Promise<Clip> {
222
+ const props = element.props as ClipProps;
223
+ const layers = await renderClipLayers(element.children, ctx);
224
+
225
+ const isOverlayVideo = (l: Layer) =>
226
+ l.type === "video" &&
227
+ ((l as VideoLayer).left !== undefined ||
228
+ (l as VideoLayer).top !== undefined ||
229
+ (l as VideoLayer).width !== undefined ||
230
+ (l as VideoLayer).height !== undefined);
231
+
232
+ const hasBaseLayer = layers.some(
233
+ (l) =>
234
+ l.type === "image" ||
235
+ l.type === "fill-color" ||
236
+ (l.type === "video" && !isOverlayVideo(l)),
237
+ );
238
+
239
+ if (!hasBaseLayer && layers.length > 0) {
240
+ layers.unshift({ type: "fill-color", color: "#000000" } as FillColorLayer);
241
+ }
242
+
243
+ return {
244
+ layers,
245
+ duration: typeof props.duration === "number" ? props.duration : undefined,
246
+ transition: props.transition ?? null,
247
+ };
248
+ }
@@ -0,0 +1,17 @@
1
+ import type { generateImage } from "ai";
2
+ import type { fileCache } from "../../ai-sdk/file-cache";
3
+ import type { generateVideo } from "../../ai-sdk/generate-video";
4
+ import type { ProgressTracker } from "./progress";
5
+
6
+ export interface RenderContext {
7
+ width: number;
8
+ height: number;
9
+ fps: number;
10
+ cache?: ReturnType<typeof fileCache>;
11
+ generateImage: typeof generateImage;
12
+ generateVideo: typeof generateVideo;
13
+ tempFiles: string[];
14
+ progress?: ProgressTracker;
15
+ /** In-memory deduplication for concurrent renders of the same element */
16
+ pending: Map<string, Promise<string>>;
17
+ }
@@ -0,0 +1,109 @@
1
+ import type { generateImage } from "ai";
2
+ import { File } from "../../ai-sdk/file";
3
+ import type {
4
+ ImageInput,
5
+ ImagePrompt,
6
+ ImageProps,
7
+ VargElement,
8
+ } from "../types";
9
+ import type { RenderContext } from "./context";
10
+ import { addTask, completeTask, startTask } from "./progress";
11
+ import { computeCacheKey, toFileUrl } from "./utils";
12
+
13
+ async function resolveImageInput(
14
+ input: ImageInput,
15
+ ctx: RenderContext,
16
+ ): Promise<Uint8Array> {
17
+ if (input instanceof Uint8Array) {
18
+ return input;
19
+ }
20
+ if (typeof input === "string") {
21
+ const response = await fetch(toFileUrl(input));
22
+ return new Uint8Array(await response.arrayBuffer());
23
+ }
24
+ const path = await renderImage(input, ctx);
25
+ const response = await fetch(toFileUrl(path));
26
+ return new Uint8Array(await response.arrayBuffer());
27
+ }
28
+
29
+ async function resolvePrompt(
30
+ prompt: ImagePrompt,
31
+ ctx: RenderContext,
32
+ ): Promise<string | { text?: string; images: Uint8Array[] }> {
33
+ if (typeof prompt === "string") {
34
+ return prompt;
35
+ }
36
+ const resolvedImages = await Promise.all(
37
+ prompt.images.map((img) => resolveImageInput(img, ctx)),
38
+ );
39
+ return { text: prompt.text, images: resolvedImages };
40
+ }
41
+
42
+ export async function renderImage(
43
+ element: VargElement<"image">,
44
+ ctx: RenderContext,
45
+ ): Promise<string> {
46
+ const props = element.props as ImageProps;
47
+
48
+ if (props.src) {
49
+ return props.src;
50
+ }
51
+
52
+ const prompt = props.prompt;
53
+ if (!prompt) {
54
+ throw new Error("Image element requires either 'prompt' or 'src'");
55
+ }
56
+
57
+ const model = props.model;
58
+ if (!model) {
59
+ throw new Error("Image element requires 'model' prop when using prompt");
60
+ }
61
+
62
+ // Compute cache key for deduplication
63
+ const cacheKey = computeCacheKey(element);
64
+ const cacheKeyStr = JSON.stringify(cacheKey);
65
+
66
+ // Check if this element is already being rendered (deduplication)
67
+ const pendingRender = ctx.pending.get(cacheKeyStr);
68
+ if (pendingRender) {
69
+ return pendingRender;
70
+ }
71
+
72
+ // Create the render promise and store it for deduplication
73
+ const renderPromise = (async () => {
74
+ const resolvedPrompt = await resolvePrompt(prompt, ctx);
75
+
76
+ const modelId = typeof model === "string" ? model : model.modelId;
77
+ const taskId = ctx.progress
78
+ ? addTask(ctx.progress, "image", modelId)
79
+ : null;
80
+ if (taskId && ctx.progress) startTask(ctx.progress, taskId);
81
+
82
+ const { images } = await ctx.generateImage({
83
+ model,
84
+ prompt: resolvedPrompt,
85
+ aspectRatio: props.aspectRatio,
86
+ n: 1,
87
+ cacheKey,
88
+ } as Parameters<typeof generateImage>[0]);
89
+
90
+ if (taskId && ctx.progress) completeTask(ctx.progress, taskId);
91
+
92
+ const firstImage = images[0];
93
+ if (!firstImage?.uint8Array) {
94
+ throw new Error("Image generation returned no image data");
95
+ }
96
+ const imageData = firstImage.uint8Array;
97
+ const tempPath = await File.toTemp({
98
+ uint8Array: imageData,
99
+ mimeType: "image/png",
100
+ });
101
+ ctx.tempFiles.push(tempPath);
102
+
103
+ return tempPath;
104
+ })();
105
+
106
+ ctx.pending.set(cacheKeyStr, renderPromise);
107
+
108
+ return renderPromise;
109
+ }
@@ -0,0 +1,22 @@
1
+ export { renderAnimate } from "./animate";
2
+ export { renderCaptions } from "./captions";
3
+ export { renderClip } from "./clip";
4
+ export type { RenderContext } from "./context";
5
+ export { renderImage } from "./image";
6
+ export { renderPackshot } from "./packshot";
7
+ export {
8
+ createProgressTracker,
9
+ type GenerationType,
10
+ type ProgressTask,
11
+ type ProgressTracker,
12
+ TIME_ESTIMATES,
13
+ } from "./progress";
14
+ export { renderRoot } from "./render";
15
+ export { renderSlider } from "./slider";
16
+ export type { SpeechResult } from "./speech";
17
+ export { renderSpeech } from "./speech";
18
+ export { renderSplit } from "./split";
19
+ export { renderSwipe } from "./swipe";
20
+ export { renderTitle } from "./title";
21
+ export { computeCacheKey, getTextContent } from "./utils";
22
+ export { renderVideo } from "./video";