varg.ai-sdk 0.1.0 → 0.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +1 -1
- package/.env.example +3 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +10 -3
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +56 -209
- package/SKILLS.md +26 -10
- package/biome.json +7 -1
- package/bun.lock +1286 -0
- package/commitlint.config.js +22 -0
- package/docs/index.html +1130 -0
- package/docs/prompting.md +326 -0
- package/docs/react.md +834 -0
- package/docs/sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +48 -8
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +776 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +6 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/render.tsx +71 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +114 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/react/cli.ts +52 -0
- package/src/react/elements.ts +146 -0
- package/src/react/examples/branching.tsx +66 -0
- package/src/react/examples/captions-demo.tsx +37 -0
- package/src/react/examples/character-video.tsx +84 -0
- package/src/react/examples/grid.tsx +53 -0
- package/src/react/examples/layouts-demo.tsx +57 -0
- package/src/react/examples/madi.tsx +60 -0
- package/src/react/examples/music-test.tsx +35 -0
- package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
- package/src/react/examples/orange-portrait.tsx +41 -0
- package/src/react/examples/split-element-demo.tsx +60 -0
- package/src/react/examples/split-layout-demo.tsx +60 -0
- package/src/react/examples/split.tsx +41 -0
- package/src/react/examples/video-grid.tsx +46 -0
- package/src/react/index.ts +43 -0
- package/src/react/layouts/grid.tsx +28 -0
- package/src/react/layouts/index.ts +2 -0
- package/src/react/layouts/split.tsx +20 -0
- package/src/react/react.test.ts +309 -0
- package/src/react/render.ts +21 -0
- package/src/react/renderers/animate.ts +59 -0
- package/src/react/renderers/captions.ts +297 -0
- package/src/react/renderers/clip.ts +248 -0
- package/src/react/renderers/context.ts +17 -0
- package/src/react/renderers/image.ts +109 -0
- package/src/react/renderers/index.ts +22 -0
- package/src/react/renderers/music.ts +60 -0
- package/src/react/renderers/packshot.ts +84 -0
- package/src/react/renderers/progress.ts +173 -0
- package/src/react/renderers/render.ts +243 -0
- package/src/react/renderers/slider.ts +69 -0
- package/src/react/renderers/speech.ts +53 -0
- package/src/react/renderers/split.ts +91 -0
- package/src/react/renderers/subtitle.ts +16 -0
- package/src/react/renderers/swipe.ts +75 -0
- package/src/react/renderers/title.ts +17 -0
- package/src/react/renderers/utils.ts +124 -0
- package/src/react/renderers/video.ts +127 -0
- package/src/react/runtime/jsx-dev-runtime.ts +43 -0
- package/src/react/runtime/jsx-runtime.ts +35 -0
- package/src/react/types.ts +232 -0
- package/src/studio/index.ts +26 -0
- package/src/studio/scanner.ts +102 -0
- package/src/studio/server.ts +554 -0
- package/src/studio/stages.ts +251 -0
- package/src/studio/step-renderer.ts +279 -0
- package/src/studio/types.ts +60 -0
- package/src/studio/ui/cache.html +303 -0
- package/src/studio/ui/index.html +1820 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.cli.json +8 -0
- package/tsconfig.json +21 -3
- package/TEST_RESULTS.md +0 -122
- package/action/captions/SKILL.md +0 -170
- package/action/captions/index.ts +0 -227
- package/action/edit/SKILL.md +0 -235
- package/action/edit/index.ts +0 -493
- package/action/image/SKILL.md +0 -140
- package/action/image/index.ts +0 -112
- package/action/sync/SKILL.md +0 -136
- package/action/sync/index.ts +0 -187
- package/action/transcribe/SKILL.md +0 -179
- package/action/video/SKILL.md +0 -116
- package/action/video/index.ts +0 -135
- package/action/voice/SKILL.md +0 -125
- package/action/voice/index.ts +0 -201
- package/index.ts +0 -38
- package/lib/README.md +0 -144
- package/lib/ai-sdk/fal.ts +0 -106
- package/lib/ai-sdk/replicate.ts +0 -107
- package/lib/elevenlabs.ts +0 -382
- package/lib/fal.ts +0 -478
- package/lib/ffmpeg.ts +0 -467
- package/lib/fireworks.ts +0 -235
- package/lib/groq.ts +0 -246
- package/lib/higgsfield.ts +0 -176
- package/lib/remotion/SKILL.md +0 -823
- package/lib/remotion/cli.ts +0 -115
- package/lib/remotion/functions.ts +0 -283
- package/lib/remotion/index.ts +0 -19
- package/lib/remotion/templates.ts +0 -73
- package/lib/replicate.ts +0 -304
- package/output.txt +0 -1
- package/test-import.ts +0 -7
- package/test-services.ts +0 -97
- package/utilities/s3.ts +0 -147
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import { writeFileSync } from "node:fs";
|
|
2
|
+
import { groq } from "@ai-sdk/groq";
|
|
3
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import type { CaptionsProps, VargElement } from "../types";
|
|
6
|
+
import type { RenderContext } from "./context";
|
|
7
|
+
import { addTask, completeTask, startTask } from "./progress";
|
|
8
|
+
import { renderSpeech } from "./speech";
|
|
9
|
+
|
|
10
|
+
const groqWordSchema = z.object({
|
|
11
|
+
word: z.string(),
|
|
12
|
+
start: z.number(),
|
|
13
|
+
end: z.number(),
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
const groqResponseSchema = z.object({
|
|
17
|
+
words: z.array(groqWordSchema).optional(),
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
type GroqWord = z.infer<typeof groqWordSchema>;
|
|
21
|
+
|
|
22
|
+
// Helper function to convert words to SRT format
|
|
23
|
+
function formatTime(seconds: number): string {
|
|
24
|
+
const hours = Math.floor(seconds / 3600);
|
|
25
|
+
const minutes = Math.floor((seconds % 3600) / 60);
|
|
26
|
+
const secs = Math.floor(seconds % 60);
|
|
27
|
+
const millis = Math.floor((seconds % 1) * 1000);
|
|
28
|
+
|
|
29
|
+
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function convertToSRT(words: GroqWord[]): string {
|
|
33
|
+
let srt = "";
|
|
34
|
+
let index = 1;
|
|
35
|
+
|
|
36
|
+
for (const word of words) {
|
|
37
|
+
const startTime = formatTime(word.start);
|
|
38
|
+
const endTime = formatTime(word.end);
|
|
39
|
+
|
|
40
|
+
srt += `${index}\n`;
|
|
41
|
+
srt += `${startTime} --> ${endTime}\n`;
|
|
42
|
+
srt += `${word.word.trim()}\n\n`;
|
|
43
|
+
index++;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return srt;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface SrtEntry {
|
|
50
|
+
index: number;
|
|
51
|
+
start: number;
|
|
52
|
+
end: number;
|
|
53
|
+
text: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
interface SubtitleStyle {
|
|
57
|
+
fontName: string;
|
|
58
|
+
fontSize: number;
|
|
59
|
+
primaryColor: string;
|
|
60
|
+
outlineColor: string;
|
|
61
|
+
backColor: string;
|
|
62
|
+
bold: boolean;
|
|
63
|
+
outline: number;
|
|
64
|
+
shadow: number;
|
|
65
|
+
marginV: number;
|
|
66
|
+
alignment: number;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const STYLE_PRESETS: Record<string, SubtitleStyle> = {
|
|
70
|
+
tiktok: {
|
|
71
|
+
fontName: "Montserrat",
|
|
72
|
+
fontSize: 32,
|
|
73
|
+
primaryColor: "&HFFFFFF",
|
|
74
|
+
outlineColor: "&H000000",
|
|
75
|
+
backColor: "&H80000000",
|
|
76
|
+
bold: true,
|
|
77
|
+
outline: 3,
|
|
78
|
+
shadow: 0,
|
|
79
|
+
marginV: 50,
|
|
80
|
+
alignment: 2,
|
|
81
|
+
},
|
|
82
|
+
karaoke: {
|
|
83
|
+
fontName: "Arial",
|
|
84
|
+
fontSize: 28,
|
|
85
|
+
primaryColor: "&H00FFFF",
|
|
86
|
+
outlineColor: "&H000000",
|
|
87
|
+
backColor: "&H00000000",
|
|
88
|
+
bold: true,
|
|
89
|
+
outline: 2,
|
|
90
|
+
shadow: 1,
|
|
91
|
+
marginV: 40,
|
|
92
|
+
alignment: 2,
|
|
93
|
+
},
|
|
94
|
+
bounce: {
|
|
95
|
+
fontName: "Impact",
|
|
96
|
+
fontSize: 36,
|
|
97
|
+
primaryColor: "&HFFFFFF",
|
|
98
|
+
outlineColor: "&H000000",
|
|
99
|
+
backColor: "&H00000000",
|
|
100
|
+
bold: false,
|
|
101
|
+
outline: 4,
|
|
102
|
+
shadow: 2,
|
|
103
|
+
marginV: 60,
|
|
104
|
+
alignment: 2,
|
|
105
|
+
},
|
|
106
|
+
typewriter: {
|
|
107
|
+
fontName: "Courier New",
|
|
108
|
+
fontSize: 24,
|
|
109
|
+
primaryColor: "&H00FF00",
|
|
110
|
+
outlineColor: "&H000000",
|
|
111
|
+
backColor: "&H80000000",
|
|
112
|
+
bold: false,
|
|
113
|
+
outline: 1,
|
|
114
|
+
shadow: 0,
|
|
115
|
+
marginV: 30,
|
|
116
|
+
alignment: 2,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
function parseSrt(content: string): SrtEntry[] {
|
|
121
|
+
const entries: SrtEntry[] = [];
|
|
122
|
+
const blocks = content.trim().split(/\n\n+/);
|
|
123
|
+
|
|
124
|
+
for (const block of blocks) {
|
|
125
|
+
const lines = block.split("\n");
|
|
126
|
+
if (lines.length < 3) continue;
|
|
127
|
+
|
|
128
|
+
const index = Number.parseInt(lines[0] || "0", 10);
|
|
129
|
+
const timeLine = lines[1] || "";
|
|
130
|
+
const timeMatch = timeLine.match(
|
|
131
|
+
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
if (!timeMatch) continue;
|
|
135
|
+
|
|
136
|
+
const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
|
|
137
|
+
if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
|
|
138
|
+
|
|
139
|
+
const start =
|
|
140
|
+
Number.parseInt(h1, 10) * 3600 +
|
|
141
|
+
Number.parseInt(m1, 10) * 60 +
|
|
142
|
+
Number.parseInt(s1, 10) +
|
|
143
|
+
Number.parseInt(ms1, 10) / 1000;
|
|
144
|
+
|
|
145
|
+
const end =
|
|
146
|
+
Number.parseInt(h2, 10) * 3600 +
|
|
147
|
+
Number.parseInt(m2, 10) * 60 +
|
|
148
|
+
Number.parseInt(s2, 10) +
|
|
149
|
+
Number.parseInt(ms2, 10) / 1000;
|
|
150
|
+
|
|
151
|
+
const text = lines.slice(2).join("\n");
|
|
152
|
+
entries.push({ index, start, end, text });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return entries;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function formatAssTime(seconds: number): string {
|
|
159
|
+
const h = Math.floor(seconds / 3600);
|
|
160
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
161
|
+
const s = Math.floor(seconds % 60);
|
|
162
|
+
const cs = Math.floor((seconds % 1) * 100);
|
|
163
|
+
|
|
164
|
+
return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function convertSrtToAss(srtContent: string, style: SubtitleStyle): string {
|
|
168
|
+
const assHeader = `[Script Info]
|
|
169
|
+
Title: Generated Subtitles
|
|
170
|
+
ScriptType: v4.00+
|
|
171
|
+
WrapStyle: 0
|
|
172
|
+
ScaledBorderAndShadow: yes
|
|
173
|
+
YCbCr Matrix: TV.601
|
|
174
|
+
|
|
175
|
+
[V4+ Styles]
|
|
176
|
+
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
|
177
|
+
Style: Default,${style.fontName},${style.fontSize},${style.primaryColor},&H000000FF,${style.outlineColor},${style.backColor},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline},${style.shadow},${style.alignment},10,10,${style.marginV},1
|
|
178
|
+
|
|
179
|
+
[Events]
|
|
180
|
+
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
181
|
+
`;
|
|
182
|
+
|
|
183
|
+
const entries = parseSrt(srtContent);
|
|
184
|
+
const assDialogues = entries
|
|
185
|
+
.map((entry) => {
|
|
186
|
+
const start = formatAssTime(entry.start);
|
|
187
|
+
const end = formatAssTime(entry.end);
|
|
188
|
+
const text = entry.text.replace(/\n/g, "\\N");
|
|
189
|
+
return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
|
|
190
|
+
})
|
|
191
|
+
.join("\n");
|
|
192
|
+
|
|
193
|
+
return assHeader + assDialogues;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function colorToAss(color: string): string {
|
|
197
|
+
if (color.startsWith("&H")) return color;
|
|
198
|
+
|
|
199
|
+
const hex = color.replace("#", "");
|
|
200
|
+
if (hex.length === 6) {
|
|
201
|
+
const r = hex.substring(0, 2);
|
|
202
|
+
const g = hex.substring(2, 4);
|
|
203
|
+
const b = hex.substring(4, 6);
|
|
204
|
+
return `&H${b}${g}${r}`.toUpperCase();
|
|
205
|
+
}
|
|
206
|
+
return "&HFFFFFF";
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export interface CaptionsResult {
|
|
210
|
+
assPath: string;
|
|
211
|
+
srtPath?: string;
|
|
212
|
+
audioPath?: string;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export async function renderCaptions(
|
|
216
|
+
element: VargElement<"captions">,
|
|
217
|
+
ctx: RenderContext,
|
|
218
|
+
): Promise<CaptionsResult> {
|
|
219
|
+
const props = element.props as CaptionsProps;
|
|
220
|
+
|
|
221
|
+
let srtContent: string;
|
|
222
|
+
let srtPath: string | undefined;
|
|
223
|
+
let audioPath: string | undefined;
|
|
224
|
+
|
|
225
|
+
if (props.srt) {
|
|
226
|
+
srtContent = await Bun.file(props.srt).text();
|
|
227
|
+
srtPath = props.srt;
|
|
228
|
+
} else if (props.src) {
|
|
229
|
+
if (typeof props.src === "string") {
|
|
230
|
+
srtContent = await Bun.file(props.src).text();
|
|
231
|
+
srtPath = props.src;
|
|
232
|
+
} else if (props.src.type === "speech") {
|
|
233
|
+
const speechResult = await renderSpeech(props.src, ctx);
|
|
234
|
+
audioPath = speechResult.path;
|
|
235
|
+
|
|
236
|
+
const transcribeTaskId = ctx.progress
|
|
237
|
+
? addTask(ctx.progress, "transcribe", "groq-whisper")
|
|
238
|
+
: null;
|
|
239
|
+
if (transcribeTaskId && ctx.progress)
|
|
240
|
+
startTask(ctx.progress, transcribeTaskId);
|
|
241
|
+
|
|
242
|
+
const audioData = await Bun.file(speechResult.path).arrayBuffer();
|
|
243
|
+
|
|
244
|
+
const result = await transcribe({
|
|
245
|
+
model: groq.transcription("whisper-large-v3"),
|
|
246
|
+
audio: new Uint8Array(audioData),
|
|
247
|
+
providerOptions: {
|
|
248
|
+
groq: {
|
|
249
|
+
responseFormat: "verbose_json",
|
|
250
|
+
timestampGranularities: ["word"],
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
if (transcribeTaskId && ctx.progress)
|
|
256
|
+
completeTask(ctx.progress, transcribeTaskId);
|
|
257
|
+
|
|
258
|
+
const rawBody = (result.responses[0] as { body?: unknown })?.body;
|
|
259
|
+
const parsed = groqResponseSchema.safeParse(rawBody);
|
|
260
|
+
const words = parsed.success ? parsed.data.words : undefined;
|
|
261
|
+
|
|
262
|
+
if (!words || words.length === 0) {
|
|
263
|
+
srtContent = `1\n00:00:00,000 --> 00:00:05,000\n${result.text}\n`;
|
|
264
|
+
} else {
|
|
265
|
+
srtContent = convertToSRT(words);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
srtPath = `/tmp/varg-captions-${Date.now()}.srt`;
|
|
269
|
+
writeFileSync(srtPath, srtContent);
|
|
270
|
+
ctx.tempFiles.push(srtPath);
|
|
271
|
+
} else {
|
|
272
|
+
throw new Error(
|
|
273
|
+
"Captions src must be a path to SRT file or Speech element",
|
|
274
|
+
);
|
|
275
|
+
}
|
|
276
|
+
} else {
|
|
277
|
+
throw new Error("Captions element requires either 'srt' or 'src' prop");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const styleName = props.style ?? "tiktok";
|
|
281
|
+
const baseStyle = STYLE_PRESETS[styleName] ?? STYLE_PRESETS.tiktok!;
|
|
282
|
+
|
|
283
|
+
const style: SubtitleStyle = {
|
|
284
|
+
...baseStyle,
|
|
285
|
+
fontSize: props.fontSize ?? baseStyle.fontSize,
|
|
286
|
+
primaryColor: props.color
|
|
287
|
+
? colorToAss(props.color)
|
|
288
|
+
: baseStyle.primaryColor,
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
const assContent = convertSrtToAss(srtContent, style);
|
|
292
|
+
const assPath = `/tmp/varg-captions-${Date.now()}.ass`;
|
|
293
|
+
writeFileSync(assPath, assContent);
|
|
294
|
+
ctx.tempFiles.push(assPath);
|
|
295
|
+
|
|
296
|
+
return { assPath, srtPath, audioPath };
|
|
297
|
+
}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
AudioLayer,
|
|
3
|
+
Clip,
|
|
4
|
+
FillColorLayer,
|
|
5
|
+
ImageLayer,
|
|
6
|
+
ImageOverlayLayer,
|
|
7
|
+
Layer,
|
|
8
|
+
VideoLayer,
|
|
9
|
+
} from "../../ai-sdk/providers/editly/types";
|
|
10
|
+
import type {
|
|
11
|
+
AnimateProps,
|
|
12
|
+
ClipProps,
|
|
13
|
+
ImageProps,
|
|
14
|
+
SpeechProps,
|
|
15
|
+
VargElement,
|
|
16
|
+
VargNode,
|
|
17
|
+
VideoProps,
|
|
18
|
+
} from "../types";
|
|
19
|
+
import { renderAnimate } from "./animate";
|
|
20
|
+
import type { RenderContext } from "./context";
|
|
21
|
+
import { renderImage } from "./image";
|
|
22
|
+
import { renderPackshot } from "./packshot";
|
|
23
|
+
import { renderSlider } from "./slider";
|
|
24
|
+
import { renderSpeech } from "./speech";
|
|
25
|
+
import { renderSplit } from "./split";
|
|
26
|
+
import { renderSubtitle } from "./subtitle";
|
|
27
|
+
import { renderSwipe } from "./swipe";
|
|
28
|
+
import { renderTitle } from "./title";
|
|
29
|
+
import { renderVideo } from "./video";
|
|
30
|
+
|
|
31
|
+
type PendingLayer =
|
|
32
|
+
| { type: "sync"; layer: Layer }
|
|
33
|
+
| { type: "async"; promise: Promise<Layer> };
|
|
34
|
+
|
|
35
|
+
async function renderClipLayers(
|
|
36
|
+
children: VargNode[],
|
|
37
|
+
ctx: RenderContext,
|
|
38
|
+
): Promise<Layer[]> {
|
|
39
|
+
const pending: PendingLayer[] = [];
|
|
40
|
+
|
|
41
|
+
for (const child of children) {
|
|
42
|
+
if (!child || typeof child !== "object" || !("type" in child)) continue;
|
|
43
|
+
|
|
44
|
+
const element = child as VargElement;
|
|
45
|
+
|
|
46
|
+
switch (element.type) {
|
|
47
|
+
case "image": {
|
|
48
|
+
const props = element.props as ImageProps;
|
|
49
|
+
const hasPosition =
|
|
50
|
+
props.left !== undefined ||
|
|
51
|
+
props.top !== undefined ||
|
|
52
|
+
props.width !== undefined ||
|
|
53
|
+
props.height !== undefined;
|
|
54
|
+
|
|
55
|
+
pending.push({
|
|
56
|
+
type: "async",
|
|
57
|
+
promise: renderImage(element as VargElement<"image">, ctx).then(
|
|
58
|
+
(path) =>
|
|
59
|
+
hasPosition
|
|
60
|
+
? ({
|
|
61
|
+
type: "image-overlay",
|
|
62
|
+
path,
|
|
63
|
+
zoomDirection: props.zoom,
|
|
64
|
+
width: props.width,
|
|
65
|
+
height: props.height,
|
|
66
|
+
position: { x: props.left ?? 0, y: props.top ?? 0 },
|
|
67
|
+
} as ImageOverlayLayer)
|
|
68
|
+
: ({
|
|
69
|
+
type: "image",
|
|
70
|
+
path,
|
|
71
|
+
resizeMode: props.resize,
|
|
72
|
+
zoomDirection: props.zoom,
|
|
73
|
+
} as ImageLayer),
|
|
74
|
+
),
|
|
75
|
+
});
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
case "video": {
|
|
80
|
+
const props = element.props as VideoProps;
|
|
81
|
+
pending.push({
|
|
82
|
+
type: "async",
|
|
83
|
+
promise: renderVideo(element as VargElement<"video">, ctx).then(
|
|
84
|
+
(path) =>
|
|
85
|
+
({
|
|
86
|
+
type: "video",
|
|
87
|
+
path,
|
|
88
|
+
resizeMode: props.resize,
|
|
89
|
+
cutFrom: props.cutFrom,
|
|
90
|
+
cutTo: props.cutTo,
|
|
91
|
+
mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
|
|
92
|
+
left: props.left,
|
|
93
|
+
top: props.top,
|
|
94
|
+
width: props.width,
|
|
95
|
+
height: props.height,
|
|
96
|
+
}) as VideoLayer,
|
|
97
|
+
),
|
|
98
|
+
});
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
case "animate": {
|
|
103
|
+
const props = element.props as AnimateProps;
|
|
104
|
+
pending.push({
|
|
105
|
+
type: "async",
|
|
106
|
+
promise: renderAnimate(element as VargElement<"animate">, ctx).then(
|
|
107
|
+
(path) =>
|
|
108
|
+
({
|
|
109
|
+
type: "video",
|
|
110
|
+
path,
|
|
111
|
+
left: props.left,
|
|
112
|
+
top: props.top,
|
|
113
|
+
width: props.width,
|
|
114
|
+
height: props.height,
|
|
115
|
+
}) as VideoLayer,
|
|
116
|
+
),
|
|
117
|
+
});
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
case "title": {
|
|
122
|
+
pending.push({
|
|
123
|
+
type: "sync",
|
|
124
|
+
layer: renderTitle(element as VargElement<"title">),
|
|
125
|
+
});
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
case "subtitle": {
|
|
130
|
+
pending.push({
|
|
131
|
+
type: "sync",
|
|
132
|
+
layer: renderSubtitle(element as VargElement<"subtitle">),
|
|
133
|
+
});
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
case "speech": {
|
|
138
|
+
const props = element.props as SpeechProps;
|
|
139
|
+
pending.push({
|
|
140
|
+
type: "async",
|
|
141
|
+
promise: renderSpeech(element as VargElement<"speech">, ctx).then(
|
|
142
|
+
(result) =>
|
|
143
|
+
({
|
|
144
|
+
type: "audio",
|
|
145
|
+
path: result.path,
|
|
146
|
+
mixVolume: props.volume ?? 1,
|
|
147
|
+
}) as AudioLayer,
|
|
148
|
+
),
|
|
149
|
+
});
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
case "split": {
|
|
154
|
+
pending.push({
|
|
155
|
+
type: "async",
|
|
156
|
+
promise: renderSplit(element as VargElement<"split">, ctx).then(
|
|
157
|
+
(path) =>
|
|
158
|
+
({
|
|
159
|
+
type: "video",
|
|
160
|
+
path,
|
|
161
|
+
}) as VideoLayer,
|
|
162
|
+
),
|
|
163
|
+
});
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
case "slider": {
|
|
168
|
+
pending.push({
|
|
169
|
+
type: "async",
|
|
170
|
+
promise: renderSlider(element as VargElement<"slider">, ctx).then(
|
|
171
|
+
(path) =>
|
|
172
|
+
({
|
|
173
|
+
type: "video",
|
|
174
|
+
path,
|
|
175
|
+
}) as VideoLayer,
|
|
176
|
+
),
|
|
177
|
+
});
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
case "swipe": {
|
|
182
|
+
pending.push({
|
|
183
|
+
type: "async",
|
|
184
|
+
promise: renderSwipe(element as VargElement<"swipe">, ctx).then(
|
|
185
|
+
(path) =>
|
|
186
|
+
({
|
|
187
|
+
type: "video",
|
|
188
|
+
path,
|
|
189
|
+
}) as VideoLayer,
|
|
190
|
+
),
|
|
191
|
+
});
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
case "packshot": {
|
|
196
|
+
pending.push({
|
|
197
|
+
type: "async",
|
|
198
|
+
promise: renderPackshot(element as VargElement<"packshot">, ctx).then(
|
|
199
|
+
(path) =>
|
|
200
|
+
({
|
|
201
|
+
type: "video",
|
|
202
|
+
path,
|
|
203
|
+
}) as VideoLayer,
|
|
204
|
+
),
|
|
205
|
+
});
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const layers = await Promise.all(
|
|
212
|
+
pending.map((p) => (p.type === "sync" ? p.layer : p.promise)),
|
|
213
|
+
);
|
|
214
|
+
|
|
215
|
+
return layers;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export async function renderClip(
|
|
219
|
+
element: VargElement<"clip">,
|
|
220
|
+
ctx: RenderContext,
|
|
221
|
+
): Promise<Clip> {
|
|
222
|
+
const props = element.props as ClipProps;
|
|
223
|
+
const layers = await renderClipLayers(element.children, ctx);
|
|
224
|
+
|
|
225
|
+
const isOverlayVideo = (l: Layer) =>
|
|
226
|
+
l.type === "video" &&
|
|
227
|
+
((l as VideoLayer).left !== undefined ||
|
|
228
|
+
(l as VideoLayer).top !== undefined ||
|
|
229
|
+
(l as VideoLayer).width !== undefined ||
|
|
230
|
+
(l as VideoLayer).height !== undefined);
|
|
231
|
+
|
|
232
|
+
const hasBaseLayer = layers.some(
|
|
233
|
+
(l) =>
|
|
234
|
+
l.type === "image" ||
|
|
235
|
+
l.type === "fill-color" ||
|
|
236
|
+
(l.type === "video" && !isOverlayVideo(l)),
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
if (!hasBaseLayer && layers.length > 0) {
|
|
240
|
+
layers.unshift({ type: "fill-color", color: "#000000" } as FillColorLayer);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
layers,
|
|
245
|
+
duration: typeof props.duration === "number" ? props.duration : undefined,
|
|
246
|
+
transition: props.transition ?? null,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { generateImage } from "ai";
|
|
2
|
+
import type { fileCache } from "../../ai-sdk/file-cache";
|
|
3
|
+
import type { generateVideo } from "../../ai-sdk/generate-video";
|
|
4
|
+
import type { ProgressTracker } from "./progress";
|
|
5
|
+
|
|
6
|
+
export interface RenderContext {
|
|
7
|
+
width: number;
|
|
8
|
+
height: number;
|
|
9
|
+
fps: number;
|
|
10
|
+
cache?: ReturnType<typeof fileCache>;
|
|
11
|
+
generateImage: typeof generateImage;
|
|
12
|
+
generateVideo: typeof generateVideo;
|
|
13
|
+
tempFiles: string[];
|
|
14
|
+
progress?: ProgressTracker;
|
|
15
|
+
/** In-memory deduplication for concurrent renders of the same element */
|
|
16
|
+
pending: Map<string, Promise<string>>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import type { generateImage } from "ai";
|
|
2
|
+
import { File } from "../../ai-sdk/file";
|
|
3
|
+
import type {
|
|
4
|
+
ImageInput,
|
|
5
|
+
ImagePrompt,
|
|
6
|
+
ImageProps,
|
|
7
|
+
VargElement,
|
|
8
|
+
} from "../types";
|
|
9
|
+
import type { RenderContext } from "./context";
|
|
10
|
+
import { addTask, completeTask, startTask } from "./progress";
|
|
11
|
+
import { computeCacheKey, toFileUrl } from "./utils";
|
|
12
|
+
|
|
13
|
+
async function resolveImageInput(
|
|
14
|
+
input: ImageInput,
|
|
15
|
+
ctx: RenderContext,
|
|
16
|
+
): Promise<Uint8Array> {
|
|
17
|
+
if (input instanceof Uint8Array) {
|
|
18
|
+
return input;
|
|
19
|
+
}
|
|
20
|
+
if (typeof input === "string") {
|
|
21
|
+
const response = await fetch(toFileUrl(input));
|
|
22
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
23
|
+
}
|
|
24
|
+
const path = await renderImage(input, ctx);
|
|
25
|
+
const response = await fetch(toFileUrl(path));
|
|
26
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async function resolvePrompt(
|
|
30
|
+
prompt: ImagePrompt,
|
|
31
|
+
ctx: RenderContext,
|
|
32
|
+
): Promise<string | { text?: string; images: Uint8Array[] }> {
|
|
33
|
+
if (typeof prompt === "string") {
|
|
34
|
+
return prompt;
|
|
35
|
+
}
|
|
36
|
+
const resolvedImages = await Promise.all(
|
|
37
|
+
prompt.images.map((img) => resolveImageInput(img, ctx)),
|
|
38
|
+
);
|
|
39
|
+
return { text: prompt.text, images: resolvedImages };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export async function renderImage(
|
|
43
|
+
element: VargElement<"image">,
|
|
44
|
+
ctx: RenderContext,
|
|
45
|
+
): Promise<string> {
|
|
46
|
+
const props = element.props as ImageProps;
|
|
47
|
+
|
|
48
|
+
if (props.src) {
|
|
49
|
+
return props.src;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const prompt = props.prompt;
|
|
53
|
+
if (!prompt) {
|
|
54
|
+
throw new Error("Image element requires either 'prompt' or 'src'");
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const model = props.model;
|
|
58
|
+
if (!model) {
|
|
59
|
+
throw new Error("Image element requires 'model' prop when using prompt");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Compute cache key for deduplication
|
|
63
|
+
const cacheKey = computeCacheKey(element);
|
|
64
|
+
const cacheKeyStr = JSON.stringify(cacheKey);
|
|
65
|
+
|
|
66
|
+
// Check if this element is already being rendered (deduplication)
|
|
67
|
+
const pendingRender = ctx.pending.get(cacheKeyStr);
|
|
68
|
+
if (pendingRender) {
|
|
69
|
+
return pendingRender;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Create the render promise and store it for deduplication
|
|
73
|
+
const renderPromise = (async () => {
|
|
74
|
+
const resolvedPrompt = await resolvePrompt(prompt, ctx);
|
|
75
|
+
|
|
76
|
+
const modelId = typeof model === "string" ? model : model.modelId;
|
|
77
|
+
const taskId = ctx.progress
|
|
78
|
+
? addTask(ctx.progress, "image", modelId)
|
|
79
|
+
: null;
|
|
80
|
+
if (taskId && ctx.progress) startTask(ctx.progress, taskId);
|
|
81
|
+
|
|
82
|
+
const { images } = await ctx.generateImage({
|
|
83
|
+
model,
|
|
84
|
+
prompt: resolvedPrompt,
|
|
85
|
+
aspectRatio: props.aspectRatio,
|
|
86
|
+
n: 1,
|
|
87
|
+
cacheKey,
|
|
88
|
+
} as Parameters<typeof generateImage>[0]);
|
|
89
|
+
|
|
90
|
+
if (taskId && ctx.progress) completeTask(ctx.progress, taskId);
|
|
91
|
+
|
|
92
|
+
const firstImage = images[0];
|
|
93
|
+
if (!firstImage?.uint8Array) {
|
|
94
|
+
throw new Error("Image generation returned no image data");
|
|
95
|
+
}
|
|
96
|
+
const imageData = firstImage.uint8Array;
|
|
97
|
+
const tempPath = await File.toTemp({
|
|
98
|
+
uint8Array: imageData,
|
|
99
|
+
mimeType: "image/png",
|
|
100
|
+
});
|
|
101
|
+
ctx.tempFiles.push(tempPath);
|
|
102
|
+
|
|
103
|
+
return tempPath;
|
|
104
|
+
})();
|
|
105
|
+
|
|
106
|
+
ctx.pending.set(cacheKeyStr, renderPromise);
|
|
107
|
+
|
|
108
|
+
return renderPromise;
|
|
109
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export { renderAnimate } from "./animate";
|
|
2
|
+
export { renderCaptions } from "./captions";
|
|
3
|
+
export { renderClip } from "./clip";
|
|
4
|
+
export type { RenderContext } from "./context";
|
|
5
|
+
export { renderImage } from "./image";
|
|
6
|
+
export { renderPackshot } from "./packshot";
|
|
7
|
+
export {
|
|
8
|
+
createProgressTracker,
|
|
9
|
+
type GenerationType,
|
|
10
|
+
type ProgressTask,
|
|
11
|
+
type ProgressTracker,
|
|
12
|
+
TIME_ESTIMATES,
|
|
13
|
+
} from "./progress";
|
|
14
|
+
export { renderRoot } from "./render";
|
|
15
|
+
export { renderSlider } from "./slider";
|
|
16
|
+
export type { SpeechResult } from "./speech";
|
|
17
|
+
export { renderSpeech } from "./speech";
|
|
18
|
+
export { renderSplit } from "./split";
|
|
19
|
+
export { renderSwipe } from "./swipe";
|
|
20
|
+
export { renderTitle } from "./title";
|
|
21
|
+
export { computeCacheKey, getTextContent } from "./utils";
|
|
22
|
+
export { renderVideo } from "./video";
|