vargai 0.3.2 → 0.4.0-alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/biome.json +6 -1
  2. package/docs/index.html +1130 -0
  3. package/docs/prompting.md +326 -0
  4. package/docs/react.md +834 -0
  5. package/package.json +10 -4
  6. package/src/cli/commands/index.ts +1 -4
  7. package/src/cli/commands/render.tsx +94 -0
  8. package/src/cli/index.ts +3 -2
  9. package/src/react/cli.ts +52 -0
  10. package/src/react/elements.ts +146 -0
  11. package/src/react/examples/branching.tsx +66 -0
  12. package/src/react/examples/captions-demo.tsx +37 -0
  13. package/src/react/examples/character-video.tsx +84 -0
  14. package/src/react/examples/grid.tsx +53 -0
  15. package/src/react/examples/layouts-demo.tsx +57 -0
  16. package/src/react/examples/madi.tsx +60 -0
  17. package/src/react/examples/music-test.tsx +35 -0
  18. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  19. package/src/react/examples/orange-portrait.tsx +41 -0
  20. package/src/react/examples/split-element-demo.tsx +60 -0
  21. package/src/react/examples/split-layout-demo.tsx +60 -0
  22. package/src/react/examples/split.tsx +41 -0
  23. package/src/react/examples/video-grid.tsx +46 -0
  24. package/src/react/index.ts +43 -0
  25. package/src/react/layouts/grid.tsx +28 -0
  26. package/src/react/layouts/index.ts +2 -0
  27. package/src/react/layouts/split.tsx +20 -0
  28. package/src/react/react.test.ts +309 -0
  29. package/src/react/render.ts +21 -0
  30. package/src/react/renderers/animate.ts +59 -0
  31. package/src/react/renderers/captions.ts +297 -0
  32. package/src/react/renderers/clip.ts +248 -0
  33. package/src/react/renderers/context.ts +17 -0
  34. package/src/react/renderers/image.ts +109 -0
  35. package/src/react/renderers/index.ts +22 -0
  36. package/src/react/renderers/music.ts +60 -0
  37. package/src/react/renderers/packshot.ts +84 -0
  38. package/src/react/renderers/progress.ts +173 -0
  39. package/src/react/renderers/render.ts +319 -0
  40. package/src/react/renderers/slider.ts +69 -0
  41. package/src/react/renderers/speech.ts +53 -0
  42. package/src/react/renderers/split.ts +91 -0
  43. package/src/react/renderers/subtitle.ts +16 -0
  44. package/src/react/renderers/swipe.ts +75 -0
  45. package/src/react/renderers/title.ts +17 -0
  46. package/src/react/renderers/utils.ts +124 -0
  47. package/src/react/renderers/video.ts +127 -0
  48. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  49. package/src/react/runtime/jsx-runtime.ts +35 -0
  50. package/src/react/types.ts +235 -0
  51. package/src/studio/index.ts +26 -0
  52. package/src/studio/scanner.ts +102 -0
  53. package/src/studio/server.ts +554 -0
  54. package/src/studio/stages.ts +251 -0
  55. package/src/studio/step-renderer.ts +279 -0
  56. package/src/studio/types.ts +60 -0
  57. package/src/studio/ui/cache.html +303 -0
  58. package/src/studio/ui/index.html +1820 -0
  59. package/tsconfig.cli.json +8 -0
  60. package/tsconfig.json +3 -1
  61. package/bun.lock +0 -1255
  62. package/docs/plan.md +0 -66
  63. package/docs/todo.md +0 -14
  64. /package/docs/{varg-sdk.md → sdk.md} +0 -0
@@ -0,0 +1,309 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { existsSync, unlinkSync } from "node:fs";
3
+ import { fal } from "../ai-sdk/providers/fal";
4
+ import {
5
+ Animate,
6
+ Captions,
7
+ Clip,
8
+ Image,
9
+ Packshot,
10
+ Render,
11
+ render,
12
+ Slider,
13
+ Split,
14
+ Swipe,
15
+ Title,
16
+ Video,
17
+ } from "./index";
18
+
19
+ describe("varg-react elements", () => {
20
+ test("Render creates correct element structure", () => {
21
+ const element = Render({
22
+ width: 1280,
23
+ height: 720,
24
+ fps: 30,
25
+ children: [],
26
+ });
27
+
28
+ expect(element.type).toBe("render");
29
+ expect(element.props.width).toBe(1280);
30
+ expect(element.props.height).toBe(720);
31
+ expect(element.props.fps).toBe(30);
32
+ });
33
+
34
+ test("Video creates correct element structure", () => {
35
+ const element = Video({
36
+ prompt: "ocean waves",
37
+ model: fal.videoModel("wan-2.5"),
38
+ });
39
+
40
+ expect(element.type).toBe("video");
41
+ expect(element.props.prompt).toBe("ocean waves");
42
+ });
43
+
44
+ test("Clip creates correct element structure", () => {
45
+ const element = Clip({
46
+ duration: 5,
47
+ transition: { name: "fade", duration: 0.5 },
48
+ children: [],
49
+ });
50
+
51
+ expect(element.type).toBe("clip");
52
+ expect(element.props.duration).toBe(5);
53
+ expect(element.props.transition).toEqual({ name: "fade", duration: 0.5 });
54
+ });
55
+
56
+ test("Image creates correct element structure", () => {
57
+ const element = Image({
58
+ prompt: "fat tiger on couch",
59
+ model: fal.imageModel("flux-schnell"),
60
+ aspectRatio: "16:9",
61
+ zoom: "in",
62
+ });
63
+
64
+ expect(element.type).toBe("image");
65
+ expect(element.props.prompt).toBe("fat tiger on couch");
66
+ expect(element.props.aspectRatio).toBe("16:9");
67
+ expect(element.props.zoom).toBe("in");
68
+ });
69
+
70
+ test("Title creates correct element with text children", () => {
71
+ const element = Title({
72
+ position: "bottom",
73
+ color: "#ffffff",
74
+ children: "I'M IN DANGER",
75
+ });
76
+
77
+ expect(element.type).toBe("title");
78
+ expect(element.props.position).toBe("bottom");
79
+ expect(element.props.color).toBe("#ffffff");
80
+ expect(element.children).toContain("I'M IN DANGER");
81
+ });
82
+
83
+ test("Animate creates correct element with nested image", () => {
84
+ const image = Image({ prompt: "luigi in wheelchair" });
85
+ const element = Animate({
86
+ image,
87
+ model: fal.videoModel("wan-2.5"),
88
+ motion: "wheels spinning fast",
89
+ duration: 5,
90
+ });
91
+
92
+ expect(element.type).toBe("animate");
93
+ expect(element.props.image).toBe(image);
94
+ expect(element.props.motion).toBe("wheels spinning fast");
95
+ expect(element.props.duration).toBe(5);
96
+ });
97
+
98
+ test("nested composition builds correct tree", () => {
99
+ const root = Render({
100
+ width: 1080,
101
+ height: 1920,
102
+ children: [
103
+ Clip({
104
+ duration: 5,
105
+ children: [
106
+ Image({
107
+ prompt: "ralph wiggum",
108
+ model: fal.imageModel("flux-schnell"),
109
+ }),
110
+ Title({ children: "HELLO" }),
111
+ ],
112
+ }),
113
+ Clip({
114
+ duration: 3,
115
+ transition: { name: "fade", duration: 0.3 },
116
+ children: [
117
+ Image({
118
+ prompt: "fat tiger",
119
+ model: fal.imageModel("flux-schnell"),
120
+ }),
121
+ ],
122
+ }),
123
+ ],
124
+ });
125
+
126
+ expect(root.type).toBe("render");
127
+ expect(root.children.length).toBe(2);
128
+
129
+ const clip1 = root.children[0] as ReturnType<typeof Clip>;
130
+ expect(clip1.type).toBe("clip");
131
+ expect(clip1.children.length).toBe(2);
132
+
133
+ const clip2 = root.children[1] as ReturnType<typeof Clip>;
134
+ expect(clip2.type).toBe("clip");
135
+ expect(clip2.props.transition).toEqual({ name: "fade", duration: 0.3 });
136
+ });
137
+ });
138
+
139
+ describe("varg-react render", () => {
140
+ test("render throws on non-render root", async () => {
141
+ const clip = Clip({ duration: 5, children: [] });
142
+
143
+ expect(render(clip)).rejects.toThrow("Root element must be <Render>");
144
+ });
145
+
146
+ test("render requires model prop for image with prompt", async () => {
147
+ const root = Render({
148
+ width: 720,
149
+ height: 720,
150
+ children: [
151
+ Clip({
152
+ duration: 3,
153
+ children: [Image({ prompt: "test image without model" })],
154
+ }),
155
+ ],
156
+ });
157
+
158
+ expect(render(root)).rejects.toThrow("model");
159
+ });
160
+ });
161
+
162
+ describe("layout renderers", () => {
163
+ const testImage1 = "media/cyberpunk-street.png";
164
+ const testImage2 = "media/fal-coffee-shop.png";
165
+ const outPath = "output/layout-test.mp4";
166
+
167
+ test("Split renders side-by-side images", async () => {
168
+ const root = Render({
169
+ width: 1280,
170
+ height: 720,
171
+ children: [
172
+ Clip({
173
+ duration: 2,
174
+ children: [
175
+ Split({
176
+ direction: "horizontal",
177
+ children: [
178
+ Image({ src: testImage1 }),
179
+ Image({ src: testImage2 }),
180
+ ],
181
+ }),
182
+ ],
183
+ }),
184
+ ],
185
+ });
186
+
187
+ const result = await render(root, { output: outPath, quiet: true });
188
+ expect(result).toBeInstanceOf(Uint8Array);
189
+ expect(result.length).toBeGreaterThan(0);
190
+ expect(existsSync(outPath)).toBe(true);
191
+ unlinkSync(outPath);
192
+ });
193
+
194
+ test(
195
+ "Slider renders with slide transitions",
196
+ async () => {
197
+ const root = Render({
198
+ width: 1280,
199
+ height: 720,
200
+ children: [
201
+ Clip({
202
+ duration: 4,
203
+ children: [
204
+ Slider({
205
+ direction: "horizontal",
206
+ children: [
207
+ Image({ src: testImage1 }),
208
+ Image({ src: testImage2 }),
209
+ ],
210
+ }),
211
+ ],
212
+ }),
213
+ ],
214
+ });
215
+
216
+ const result = await render(root, { output: outPath, quiet: true });
217
+ expect(result).toBeInstanceOf(Uint8Array);
218
+ expect(existsSync(outPath)).toBe(true);
219
+ unlinkSync(outPath);
220
+ },
221
+ { timeout: 30000 },
222
+ );
223
+
224
+ test(
225
+ "Swipe renders with swipe animation",
226
+ async () => {
227
+ const root = Render({
228
+ width: 1280,
229
+ height: 720,
230
+ children: [
231
+ Clip({
232
+ duration: 4,
233
+ children: [
234
+ Swipe({
235
+ direction: "left",
236
+ interval: 2,
237
+ children: [
238
+ Image({ src: testImage1 }),
239
+ Image({ src: testImage2 }),
240
+ ],
241
+ }),
242
+ ],
243
+ }),
244
+ ],
245
+ });
246
+
247
+ const result = await render(root, { output: outPath, quiet: true });
248
+ expect(result).toBeInstanceOf(Uint8Array);
249
+ expect(existsSync(outPath)).toBe(true);
250
+ unlinkSync(outPath);
251
+ },
252
+ { timeout: 30000 },
253
+ );
254
+
255
+ test("Packshot renders end card with logo and cta", async () => {
256
+ const root = Render({
257
+ width: 1280,
258
+ height: 720,
259
+ children: [
260
+ Clip({
261
+ duration: 3,
262
+ children: [
263
+ Packshot({
264
+ background: "#1a1a2e",
265
+ logo: testImage1,
266
+ logoPosition: "center",
267
+ logoSize: "40%",
268
+ cta: "Subscribe Now!",
269
+ ctaPosition: "bottom",
270
+ ctaColor: "#FFD700",
271
+ duration: 3,
272
+ }),
273
+ ],
274
+ }),
275
+ ],
276
+ });
277
+
278
+ const result = await render(root, { output: outPath, quiet: true });
279
+ expect(result).toBeInstanceOf(Uint8Array);
280
+ expect(existsSync(outPath)).toBe(true);
281
+ unlinkSync(outPath);
282
+ });
283
+
284
+ test(
285
+ "Captions burns subtitles from SRT file",
286
+ async () => {
287
+ const root = Render({
288
+ width: 1280,
289
+ height: 720,
290
+ children: [
291
+ Clip({
292
+ duration: 3,
293
+ children: [Image({ src: testImage1 })],
294
+ }),
295
+ Captions({
296
+ srt: "media/dora-test.srt",
297
+ style: "tiktok",
298
+ }),
299
+ ],
300
+ });
301
+
302
+ const result = await render(root, { output: outPath, quiet: true });
303
+ expect(result).toBeInstanceOf(Uint8Array);
304
+ expect(existsSync(outPath)).toBe(true);
305
+ unlinkSync(outPath);
306
+ },
307
+ { timeout: 30000 },
308
+ );
309
+ });
@@ -0,0 +1,21 @@
1
+ import { renderRoot } from "./renderers";
2
+ import type { RenderOptions, VargElement } from "./types";
3
+
4
+ export async function render(
5
+ element: VargElement,
6
+ options: RenderOptions = {},
7
+ ): Promise<Uint8Array> {
8
+ if (element.type !== "render") {
9
+ throw new Error("Root element must be <Render>");
10
+ }
11
+
12
+ return renderRoot(element as VargElement<"render">, options);
13
+ }
14
+
15
+ export const renderStream = {
16
+ async *stream(element: VargElement, options: RenderOptions = {}) {
17
+ yield { type: "start", progress: 0 };
18
+ const result = await render(element, options);
19
+ yield { type: "complete", progress: 100, result };
20
+ },
21
+ };
@@ -0,0 +1,59 @@
1
+ import { File } from "../../ai-sdk/file";
2
+ import type { generateVideo } from "../../ai-sdk/generate-video";
3
+ import type { AnimateProps, VargElement } from "../types";
4
+ import type { RenderContext } from "./context";
5
+ import { renderImage } from "./image";
6
+ import { addTask, completeTask, startTask } from "./progress";
7
+ import { computeCacheKey, resolvePath } from "./utils";
8
+
9
+ export async function renderAnimate(
10
+ element: VargElement<"animate">,
11
+ ctx: RenderContext,
12
+ ): Promise<string> {
13
+ const props = element.props as AnimateProps;
14
+
15
+ let imagePath: string;
16
+ if (props.src) {
17
+ imagePath = props.src;
18
+ } else if (props.image) {
19
+ if (props.image.type !== "image") {
20
+ throw new Error(
21
+ `Animate 'image' prop must be an <Image /> element, got <${props.image.type} />`,
22
+ );
23
+ }
24
+ imagePath = await renderImage(props.image as VargElement<"image">, ctx);
25
+ } else {
26
+ throw new Error("Animate element requires either 'src' or 'image' prop");
27
+ }
28
+
29
+ const model = props.model;
30
+ if (!model) {
31
+ throw new Error("Animate element requires 'model' prop");
32
+ }
33
+
34
+ const imageData = await Bun.file(resolvePath(imagePath)).arrayBuffer();
35
+ const cacheKey = computeCacheKey(element);
36
+
37
+ const modelId = typeof model === "string" ? model : model.modelId;
38
+ const taskId = ctx.progress
39
+ ? addTask(ctx.progress, "animate", modelId)
40
+ : null;
41
+ if (taskId && ctx.progress) startTask(ctx.progress, taskId);
42
+
43
+ const { video } = await ctx.generateVideo({
44
+ model,
45
+ prompt: {
46
+ text: props.motion ?? "",
47
+ images: [new Uint8Array(imageData)],
48
+ },
49
+ duration: props.duration ?? 5,
50
+ cacheKey,
51
+ } as Parameters<typeof generateVideo>[0]);
52
+
53
+ if (taskId && ctx.progress) completeTask(ctx.progress, taskId);
54
+
55
+ const tempPath = await File.toTemp(video);
56
+ ctx.tempFiles.push(tempPath);
57
+
58
+ return tempPath;
59
+ }
@@ -0,0 +1,297 @@
1
+ import { writeFileSync } from "node:fs";
2
+ import { groq } from "@ai-sdk/groq";
3
+ import { experimental_transcribe as transcribe } from "ai";
4
+ import { z } from "zod";
5
+ import type { CaptionsProps, VargElement } from "../types";
6
+ import type { RenderContext } from "./context";
7
+ import { addTask, completeTask, startTask } from "./progress";
8
+ import { renderSpeech } from "./speech";
9
+
10
+ const groqWordSchema = z.object({
11
+ word: z.string(),
12
+ start: z.number(),
13
+ end: z.number(),
14
+ });
15
+
16
+ const groqResponseSchema = z.object({
17
+ words: z.array(groqWordSchema).optional(),
18
+ });
19
+
20
+ type GroqWord = z.infer<typeof groqWordSchema>;
21
+
22
+ // Helper function to convert words to SRT format
23
+ function formatTime(seconds: number): string {
24
+ const hours = Math.floor(seconds / 3600);
25
+ const minutes = Math.floor((seconds % 3600) / 60);
26
+ const secs = Math.floor(seconds % 60);
27
+ const millis = Math.floor((seconds % 1) * 1000);
28
+
29
+ return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
30
+ }
31
+
32
+ export function convertToSRT(words: GroqWord[]): string {
33
+ let srt = "";
34
+ let index = 1;
35
+
36
+ for (const word of words) {
37
+ const startTime = formatTime(word.start);
38
+ const endTime = formatTime(word.end);
39
+
40
+ srt += `${index}\n`;
41
+ srt += `${startTime} --> ${endTime}\n`;
42
+ srt += `${word.word.trim()}\n\n`;
43
+ index++;
44
+ }
45
+
46
+ return srt;
47
+ }
48
+
49
+ interface SrtEntry {
50
+ index: number;
51
+ start: number;
52
+ end: number;
53
+ text: string;
54
+ }
55
+
56
+ interface SubtitleStyle {
57
+ fontName: string;
58
+ fontSize: number;
59
+ primaryColor: string;
60
+ outlineColor: string;
61
+ backColor: string;
62
+ bold: boolean;
63
+ outline: number;
64
+ shadow: number;
65
+ marginV: number;
66
+ alignment: number;
67
+ }
68
+
69
+ const STYLE_PRESETS: Record<string, SubtitleStyle> = {
70
+ tiktok: {
71
+ fontName: "Montserrat",
72
+ fontSize: 32,
73
+ primaryColor: "&HFFFFFF",
74
+ outlineColor: "&H000000",
75
+ backColor: "&H80000000",
76
+ bold: true,
77
+ outline: 3,
78
+ shadow: 0,
79
+ marginV: 50,
80
+ alignment: 2,
81
+ },
82
+ karaoke: {
83
+ fontName: "Arial",
84
+ fontSize: 28,
85
+ primaryColor: "&H00FFFF",
86
+ outlineColor: "&H000000",
87
+ backColor: "&H00000000",
88
+ bold: true,
89
+ outline: 2,
90
+ shadow: 1,
91
+ marginV: 40,
92
+ alignment: 2,
93
+ },
94
+ bounce: {
95
+ fontName: "Impact",
96
+ fontSize: 36,
97
+ primaryColor: "&HFFFFFF",
98
+ outlineColor: "&H000000",
99
+ backColor: "&H00000000",
100
+ bold: false,
101
+ outline: 4,
102
+ shadow: 2,
103
+ marginV: 60,
104
+ alignment: 2,
105
+ },
106
+ typewriter: {
107
+ fontName: "Courier New",
108
+ fontSize: 24,
109
+ primaryColor: "&H00FF00",
110
+ outlineColor: "&H000000",
111
+ backColor: "&H80000000",
112
+ bold: false,
113
+ outline: 1,
114
+ shadow: 0,
115
+ marginV: 30,
116
+ alignment: 2,
117
+ },
118
+ };
119
+
120
+ function parseSrt(content: string): SrtEntry[] {
121
+ const entries: SrtEntry[] = [];
122
+ const blocks = content.trim().split(/\n\n+/);
123
+
124
+ for (const block of blocks) {
125
+ const lines = block.split("\n");
126
+ if (lines.length < 3) continue;
127
+
128
+ const index = Number.parseInt(lines[0] || "0", 10);
129
+ const timeLine = lines[1] || "";
130
+ const timeMatch = timeLine.match(
131
+ /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
132
+ );
133
+
134
+ if (!timeMatch) continue;
135
+
136
+ const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
137
+ if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
138
+
139
+ const start =
140
+ Number.parseInt(h1, 10) * 3600 +
141
+ Number.parseInt(m1, 10) * 60 +
142
+ Number.parseInt(s1, 10) +
143
+ Number.parseInt(ms1, 10) / 1000;
144
+
145
+ const end =
146
+ Number.parseInt(h2, 10) * 3600 +
147
+ Number.parseInt(m2, 10) * 60 +
148
+ Number.parseInt(s2, 10) +
149
+ Number.parseInt(ms2, 10) / 1000;
150
+
151
+ const text = lines.slice(2).join("\n");
152
+ entries.push({ index, start, end, text });
153
+ }
154
+
155
+ return entries;
156
+ }
157
+
158
+ function formatAssTime(seconds: number): string {
159
+ const h = Math.floor(seconds / 3600);
160
+ const m = Math.floor((seconds % 3600) / 60);
161
+ const s = Math.floor(seconds % 60);
162
+ const cs = Math.floor((seconds % 1) * 100);
163
+
164
+ return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
165
+ }
166
+
167
+ function convertSrtToAss(srtContent: string, style: SubtitleStyle): string {
168
+ const assHeader = `[Script Info]
169
+ Title: Generated Subtitles
170
+ ScriptType: v4.00+
171
+ WrapStyle: 0
172
+ ScaledBorderAndShadow: yes
173
+ YCbCr Matrix: TV.601
174
+
175
+ [V4+ Styles]
176
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
177
+ Style: Default,${style.fontName},${style.fontSize},${style.primaryColor},&H000000FF,${style.outlineColor},${style.backColor},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline},${style.shadow},${style.alignment},10,10,${style.marginV},1
178
+
179
+ [Events]
180
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
181
+ `;
182
+
183
+ const entries = parseSrt(srtContent);
184
+ const assDialogues = entries
185
+ .map((entry) => {
186
+ const start = formatAssTime(entry.start);
187
+ const end = formatAssTime(entry.end);
188
+ const text = entry.text.replace(/\n/g, "\\N");
189
+ return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
190
+ })
191
+ .join("\n");
192
+
193
+ return assHeader + assDialogues;
194
+ }
195
+
196
+ function colorToAss(color: string): string {
197
+ if (color.startsWith("&H")) return color;
198
+
199
+ const hex = color.replace("#", "");
200
+ if (hex.length === 6) {
201
+ const r = hex.substring(0, 2);
202
+ const g = hex.substring(2, 4);
203
+ const b = hex.substring(4, 6);
204
+ return `&H${b}${g}${r}`.toUpperCase();
205
+ }
206
+ return "&HFFFFFF";
207
+ }
208
+
209
+ export interface CaptionsResult {
210
+ assPath: string;
211
+ srtPath?: string;
212
+ audioPath?: string;
213
+ }
214
+
215
+ export async function renderCaptions(
216
+ element: VargElement<"captions">,
217
+ ctx: RenderContext,
218
+ ): Promise<CaptionsResult> {
219
+ const props = element.props as CaptionsProps;
220
+
221
+ let srtContent: string;
222
+ let srtPath: string | undefined;
223
+ let audioPath: string | undefined;
224
+
225
+ if (props.srt) {
226
+ srtContent = await Bun.file(props.srt).text();
227
+ srtPath = props.srt;
228
+ } else if (props.src) {
229
+ if (typeof props.src === "string") {
230
+ srtContent = await Bun.file(props.src).text();
231
+ srtPath = props.src;
232
+ } else if (props.src.type === "speech") {
233
+ const speechResult = await renderSpeech(props.src, ctx);
234
+ audioPath = speechResult.path;
235
+
236
+ const transcribeTaskId = ctx.progress
237
+ ? addTask(ctx.progress, "transcribe", "groq-whisper")
238
+ : null;
239
+ if (transcribeTaskId && ctx.progress)
240
+ startTask(ctx.progress, transcribeTaskId);
241
+
242
+ const audioData = await Bun.file(speechResult.path).arrayBuffer();
243
+
244
+ const result = await transcribe({
245
+ model: groq.transcription("whisper-large-v3"),
246
+ audio: new Uint8Array(audioData),
247
+ providerOptions: {
248
+ groq: {
249
+ responseFormat: "verbose_json",
250
+ timestampGranularities: ["word"],
251
+ },
252
+ },
253
+ });
254
+
255
+ if (transcribeTaskId && ctx.progress)
256
+ completeTask(ctx.progress, transcribeTaskId);
257
+
258
+ const rawBody = (result.responses[0] as { body?: unknown })?.body;
259
+ const parsed = groqResponseSchema.safeParse(rawBody);
260
+ const words = parsed.success ? parsed.data.words : undefined;
261
+
262
+ if (!words || words.length === 0) {
263
+ srtContent = `1\n00:00:00,000 --> 00:00:05,000\n${result.text}\n`;
264
+ } else {
265
+ srtContent = convertToSRT(words);
266
+ }
267
+
268
+ srtPath = `/tmp/varg-captions-${Date.now()}.srt`;
269
+ writeFileSync(srtPath, srtContent);
270
+ ctx.tempFiles.push(srtPath);
271
+ } else {
272
+ throw new Error(
273
+ "Captions src must be a path to SRT file or Speech element",
274
+ );
275
+ }
276
+ } else {
277
+ throw new Error("Captions element requires either 'srt' or 'src' prop");
278
+ }
279
+
280
+ const styleName = props.style ?? "tiktok";
281
+ const baseStyle = STYLE_PRESETS[styleName] ?? STYLE_PRESETS.tiktok!;
282
+
283
+ const style: SubtitleStyle = {
284
+ ...baseStyle,
285
+ fontSize: props.fontSize ?? baseStyle.fontSize,
286
+ primaryColor: props.color
287
+ ? colorToAss(props.color)
288
+ : baseStyle.primaryColor,
289
+ };
290
+
291
+ const assContent = convertSrtToAss(srtContent, style);
292
+ const assPath = `/tmp/varg-captions-${Date.now()}.ass`;
293
+ writeFileSync(assPath, assContent);
294
+ ctx.tempFiles.push(assPath);
295
+
296
+ return { assPath, srtPath, audioPath };
297
+ }