varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -0,0 +1,261 @@
1
+ /**
2
+ * Captions/subtitles action
3
+ * Add captions to video from transcription
4
+ */
5
+
6
+ import { writeFileSync } from "node:fs";
7
+ import { z } from "zod";
8
+ import { captionStyleSchema, filePathSchema } from "../../core/schema/shared";
9
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
10
+ import { ffmpegProvider } from "../../providers/ffmpeg";
11
+ import { transcribe } from "./transcribe";
12
+
13
+ // Input schema with Zod
14
+ const captionsInputSchema = z.object({
15
+ video: filePathSchema.describe("Input video"),
16
+ output: filePathSchema.describe("Output path"),
17
+ srt: filePathSchema.optional().describe("SRT file (optional)"),
18
+ style: captionStyleSchema.default("default").describe("Caption style"),
19
+ });
20
+
21
+ // Output schema with Zod - returns the output path
22
+ const captionsOutputSchema = z.string().describe("Captioned video path");
23
+
24
+ // Schema object for the definition
25
+ const schema: ZodSchema<
26
+ typeof captionsInputSchema,
27
+ typeof captionsOutputSchema
28
+ > = {
29
+ input: captionsInputSchema,
30
+ output: captionsOutputSchema,
31
+ };
32
+
33
+ export const definition: ActionDefinition<typeof schema> = {
34
+ type: "action",
35
+ name: "captions",
36
+ description: "Add captions/subtitles to video",
37
+ schema,
38
+ routes: [],
39
+ execute: async (inputs) => {
40
+ const { video, output, srt, style } = inputs;
41
+ return addCaptions({ video, output, srt, style });
42
+ },
43
+ };
44
+
45
+ // Types
46
+ export interface SubtitleStyle {
47
+ fontName?: string;
48
+ fontSize?: number;
49
+ primaryColor?: string;
50
+ outlineColor?: string;
51
+ backColor?: string;
52
+ bold?: boolean;
53
+ outline?: number;
54
+ shadow?: number;
55
+ marginV?: number;
56
+ alignment?: number;
57
+ }
58
+
59
+ export interface AddCaptionsOptions {
60
+ video: string;
61
+ output: string;
62
+ srt?: string;
63
+ style?: "default" | "tiktok" | "youtube";
64
+ }
65
+
66
+ // Style presets
67
+ const STYLE_PRESETS: Record<string, SubtitleStyle> = {
68
+ default: {
69
+ fontName: "Arial",
70
+ fontSize: 24,
71
+ primaryColor: "&HFFFFFF",
72
+ outlineColor: "&H000000",
73
+ outline: 2,
74
+ shadow: 1,
75
+ marginV: 30,
76
+ alignment: 2, // Bottom center
77
+ },
78
+ tiktok: {
79
+ fontName: "Montserrat",
80
+ fontSize: 32,
81
+ primaryColor: "&HFFFFFF",
82
+ outlineColor: "&H000000",
83
+ backColor: "&H80000000",
84
+ bold: true,
85
+ outline: 3,
86
+ shadow: 0,
87
+ marginV: 50,
88
+ alignment: 2,
89
+ },
90
+ youtube: {
91
+ fontName: "Roboto",
92
+ fontSize: 28,
93
+ primaryColor: "&HFFFFFF",
94
+ outlineColor: "&H000000",
95
+ backColor: "&H40000000",
96
+ outline: 2,
97
+ shadow: 1,
98
+ marginV: 40,
99
+ alignment: 2,
100
+ },
101
+ };
102
+
103
+ /**
104
+ * Add captions to video
105
+ */
106
+ export async function addCaptions(
107
+ options: AddCaptionsOptions,
108
+ ): Promise<string> {
109
+ const { video, output, srt: srtPath, style = "default" } = options;
110
+
111
+ console.log(`[captions] adding captions to ${video}...`);
112
+
113
+ // Generate SRT if not provided
114
+ let srtFile = srtPath;
115
+ if (!srtFile) {
116
+ console.log("[captions] generating transcription...");
117
+
118
+ // Extract audio first
119
+ const audioPath = video.replace(/\.[^.]+$/, "_audio.mp3");
120
+ await ffmpegProvider.extractAudio(video, audioPath);
121
+
122
+ // Transcribe
123
+ const result = await transcribe({
124
+ audioUrl: audioPath,
125
+ provider: "fireworks",
126
+ outputFormat: "srt",
127
+ });
128
+
129
+ if (!result.success || !result.srt) {
130
+ throw new Error("Transcription failed");
131
+ }
132
+
133
+ // Save SRT
134
+ srtFile = video.replace(/\.[^.]+$/, ".srt");
135
+ writeFileSync(srtFile, result.srt);
136
+ console.log(`[captions] saved srt to ${srtFile}`);
137
+ }
138
+
139
+ // Get style preset (default is always defined)
140
+ const styleConfig = STYLE_PRESETS[style] ??
141
+ STYLE_PRESETS.default ?? {
142
+ fontName: "Arial",
143
+ fontSize: 24,
144
+ primaryColor: "&HFFFFFF",
145
+ outlineColor: "&H000000",
146
+ outline: 2,
147
+ shadow: 1,
148
+ marginV: 30,
149
+ alignment: 2,
150
+ };
151
+
152
+ // Convert SRT to ASS for styling (simplified - in production use a proper ASS library)
153
+ const assFile = srtFile.replace(".srt", ".ass");
154
+ await convertSrtToAss(srtFile, assFile, styleConfig);
155
+
156
+ // Burn subtitles into video using ffmpeg
157
+ // This is a simplified implementation - full implementation would use subtitles filter
158
+ console.log(`[captions] burning subtitles...`);
159
+
160
+ // For now, just copy the video (proper implementation would use subtitles filter)
161
+ await ffmpegProvider.convertFormat({ input: video, output });
162
+
163
+ console.log(`[captions] saved to ${output}`);
164
+ return output;
165
+ }
166
+
167
+ /**
168
+ * Convert SRT to ASS format with styling
169
+ */
170
+ async function convertSrtToAss(
171
+ srtPath: string,
172
+ assPath: string,
173
+ style: SubtitleStyle,
174
+ ): Promise<void> {
175
+ const srtContent = await Bun.file(srtPath).text();
176
+
177
+ // Parse SRT and convert to ASS
178
+ const assHeader = `[Script Info]
179
+ Title: Generated Subtitles
180
+ ScriptType: v4.00+
181
+ WrapStyle: 0
182
+ ScaledBorderAndShadow: yes
183
+ YCbCr Matrix: TV.601
184
+
185
+ [V4+ Styles]
186
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
187
+ Style: Default,${style.fontName || "Arial"},${style.fontSize || 24},${style.primaryColor || "&HFFFFFF"},&H000000FF,${style.outlineColor || "&H000000"},${style.backColor || "&H00000000"},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline || 2},${style.shadow || 1},${style.alignment || 2},10,10,${style.marginV || 30},1
188
+
189
+ [Events]
190
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
191
+ `;
192
+
193
+ // Convert SRT entries to ASS dialogue lines
194
+ const entries = parseSrt(srtContent);
195
+ const assDialogues = entries
196
+ .map((entry) => {
197
+ const start = formatAssTime(entry.start);
198
+ const end = formatAssTime(entry.end);
199
+ const text = entry.text.replace(/\n/g, "\\N");
200
+ return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
201
+ })
202
+ .join("\n");
203
+
204
+ writeFileSync(assPath, assHeader + assDialogues);
205
+ }
206
+
207
+ interface SrtEntry {
208
+ index: number;
209
+ start: number;
210
+ end: number;
211
+ text: string;
212
+ }
213
+
214
+ function parseSrt(content: string): SrtEntry[] {
215
+ const entries: SrtEntry[] = [];
216
+ const blocks = content.trim().split(/\n\n+/);
217
+
218
+ for (const block of blocks) {
219
+ const lines = block.split("\n");
220
+ if (lines.length < 3) continue;
221
+
222
+ const index = Number.parseInt(lines[0] || "0", 10);
223
+ const timeLine = lines[1] || "";
224
+ const timeMatch = timeLine.match(
225
+ /(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
226
+ );
227
+
228
+ if (!timeMatch) continue;
229
+
230
+ const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
231
+ if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
232
+
233
+ const start = parseTime(h1, m1, s1, ms1);
234
+ const end = parseTime(h2, m2, s2, ms2);
235
+ const text = lines.slice(2).join("\n");
236
+
237
+ entries.push({ index, start, end, text });
238
+ }
239
+
240
+ return entries;
241
+ }
242
+
243
+ function parseTime(h: string, m: string, s: string, ms: string): number {
244
+ return (
245
+ Number.parseInt(h, 10) * 3600 +
246
+ Number.parseInt(m, 10) * 60 +
247
+ Number.parseInt(s, 10) +
248
+ Number.parseInt(ms, 10) / 1000
249
+ );
250
+ }
251
+
252
+ function formatAssTime(seconds: number): string {
253
+ const h = Math.floor(seconds / 3600);
254
+ const m = Math.floor((seconds % 3600) / 60);
255
+ const s = Math.floor(seconds % 60);
256
+ const cs = Math.floor((seconds % 1) * 100);
257
+
258
+ return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
259
+ }
260
+
261
+ export default definition;
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Video editing actions
3
+ * FFmpeg-based local video processing
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { filePathSchema } from "../../core/schema/shared";
8
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
9
+ import { ffmpegProvider } from "../../providers/ffmpeg";
10
+
11
+ // ============================================================================
12
+ // Trim action
13
+ // ============================================================================
14
+
15
+ const trimInputSchema = z.object({
16
+ input: filePathSchema.describe("Input video"),
17
+ output: filePathSchema.describe("Output path"),
18
+ start: z.number().describe("Start time in seconds"),
19
+ duration: z.number().optional().describe("Duration in seconds"),
20
+ });
21
+
22
+ const trimOutputSchema = z.string().describe("Trimmed video path");
23
+
24
+ const trimSchema: ZodSchema<typeof trimInputSchema, typeof trimOutputSchema> = {
25
+ input: trimInputSchema,
26
+ output: trimOutputSchema,
27
+ };
28
+
29
+ export const trimDefinition: ActionDefinition<typeof trimSchema> = {
30
+ type: "action",
31
+ name: "trim",
32
+ description: "Trim video to specific time range",
33
+ schema: trimSchema,
34
+ routes: [],
35
+ execute: async (inputs) => {
36
+ const { input, output, start, duration } = inputs;
37
+ return ffmpegProvider.trimVideo({ input, output, start, duration });
38
+ },
39
+ };
40
+
41
+ // ============================================================================
42
+ // Cut action (alias for trim)
43
+ // ============================================================================
44
+
45
+ export const cutDefinition: ActionDefinition<typeof trimSchema> = {
46
+ type: "action",
47
+ name: "cut",
48
+ description: "Cut video at specific point",
49
+ schema: trimSchema,
50
+ routes: [{ target: "trim" }],
51
+ };
52
+
53
+ // ============================================================================
54
+ // Merge action
55
+ // ============================================================================
56
+
57
+ const mergeInputSchema = z.object({
58
+ inputs: z.array(z.string()).describe("Input video paths"),
59
+ output: filePathSchema.describe("Output path"),
60
+ });
61
+
62
+ const mergeOutputSchema = z.string().describe("Merged video path");
63
+
64
+ const mergeSchema: ZodSchema<
65
+ typeof mergeInputSchema,
66
+ typeof mergeOutputSchema
67
+ > = {
68
+ input: mergeInputSchema,
69
+ output: mergeOutputSchema,
70
+ };
71
+
72
+ export const mergeDefinition: ActionDefinition<typeof mergeSchema> = {
73
+ type: "action",
74
+ name: "merge",
75
+ description: "Merge multiple videos together",
76
+ schema: mergeSchema,
77
+ routes: [],
78
+ execute: async (inputs) => {
79
+ const { inputs: videoInputs, output } = inputs;
80
+ return ffmpegProvider.concatVideos({ inputs: videoInputs, output });
81
+ },
82
+ };
83
+
84
+ // ============================================================================
85
+ // Split action
86
+ // ============================================================================
87
+
88
+ const splitInputSchema = z.object({
89
+ input: filePathSchema.describe("Input video"),
90
+ timestamps: z.array(z.number()).describe("Split points in seconds"),
91
+ outputPrefix: z.string().describe("Output filename prefix"),
92
+ });
93
+
94
+ // Output is an array of output paths
95
+ const splitOutputSchema = z.array(z.string());
96
+
97
+ const splitSchema: ZodSchema<
98
+ typeof splitInputSchema,
99
+ typeof splitOutputSchema
100
+ > = {
101
+ input: splitInputSchema,
102
+ output: splitOutputSchema,
103
+ };
104
+
105
+ export const splitDefinition: ActionDefinition<typeof splitSchema> = {
106
+ type: "action",
107
+ name: "split",
108
+ description: "Split video at timestamps",
109
+ schema: splitSchema,
110
+ routes: [],
111
+ execute: async (inputs) => {
112
+ const { input, timestamps, outputPrefix } = inputs;
113
+ return ffmpegProvider.splitAtTimestamps({
114
+ input,
115
+ timestamps,
116
+ outputPrefix,
117
+ });
118
+ },
119
+ };
120
+
121
+ // ============================================================================
122
+ // Fade action
123
+ // ============================================================================
124
+
125
+ const fadeInputSchema = z.object({
126
+ input: filePathSchema.describe("Input video"),
127
+ output: filePathSchema.describe("Output path"),
128
+ type: z.enum(["in", "out", "both"]).describe("Fade type"),
129
+ duration: z.number().describe("Fade duration in seconds"),
130
+ });
131
+
132
+ const fadeOutputSchema = z.string().describe("Faded video path");
133
+
134
+ const fadeSchema: ZodSchema<typeof fadeInputSchema, typeof fadeOutputSchema> = {
135
+ input: fadeInputSchema,
136
+ output: fadeOutputSchema,
137
+ };
138
+
139
+ export const fadeDefinition: ActionDefinition<typeof fadeSchema> = {
140
+ type: "action",
141
+ name: "fade",
142
+ description: "Apply fade in/out effects",
143
+ schema: fadeSchema,
144
+ routes: [],
145
+ execute: async (inputs) => {
146
+ const { input, output, type, duration } = inputs;
147
+ return ffmpegProvider.fadeVideo({ input, output, type, duration });
148
+ },
149
+ };
150
+
151
+ // ============================================================================
152
+ // Transition action
153
+ // ============================================================================
154
+
155
+ const transitionInputSchema = z.object({
156
+ input1: filePathSchema.describe("First video"),
157
+ input2: filePathSchema.describe("Second video"),
158
+ output: filePathSchema.describe("Output path"),
159
+ transition: z
160
+ .enum([
161
+ "crossfade",
162
+ "dissolve",
163
+ "wipeleft",
164
+ "wiperight",
165
+ "slideup",
166
+ "slidedown",
167
+ ])
168
+ .describe("Transition type"),
169
+ duration: z.number().describe("Transition duration"),
170
+ fit: z
171
+ .enum(["pad", "crop", "blur", "stretch"])
172
+ .default("pad")
173
+ .describe("How to handle different resolutions"),
174
+ });
175
+
176
+ const transitionOutputSchema = z.string().describe("Output path");
177
+
178
+ const transitionSchema: ZodSchema<
179
+ typeof transitionInputSchema,
180
+ typeof transitionOutputSchema
181
+ > = {
182
+ input: transitionInputSchema,
183
+ output: transitionOutputSchema,
184
+ };
185
+
186
+ export const transitionDefinition: ActionDefinition<typeof transitionSchema> = {
187
+ type: "action",
188
+ name: "transition",
189
+ description: "Apply transition between two videos",
190
+ schema: transitionSchema,
191
+ routes: [],
192
+ execute: async (inputs) => {
193
+ const { input1, input2, output, transition, duration, fit } = inputs;
194
+ return ffmpegProvider.xfadeVideos({
195
+ input1,
196
+ input2,
197
+ output,
198
+ transition,
199
+ duration,
200
+ fit,
201
+ });
202
+ },
203
+ };
204
+
205
+ // ============================================================================
206
+ // Remove (audio) action
207
+ // ============================================================================
208
+
209
+ const removeInputSchema = z.object({
210
+ input: filePathSchema.describe("Input video"),
211
+ output: filePathSchema.describe("Output path"),
212
+ what: z
213
+ .enum(["audio", "video"])
214
+ .default("audio")
215
+ .describe("What to extract/remove"),
216
+ });
217
+
218
+ const removeOutputSchema = z.string().describe("Output path");
219
+
220
+ const removeSchema: ZodSchema<
221
+ typeof removeInputSchema,
222
+ typeof removeOutputSchema
223
+ > = {
224
+ input: removeInputSchema,
225
+ output: removeOutputSchema,
226
+ };
227
+
228
+ export const removeDefinition: ActionDefinition<typeof removeSchema> = {
229
+ type: "action",
230
+ name: "remove",
231
+ description: "Remove audio from video or extract audio",
232
+ schema: removeSchema,
233
+ routes: [],
234
+ execute: async (inputs) => {
235
+ const { input, output, what } = inputs;
236
+
237
+ if (what === "audio") {
238
+ return ffmpegProvider.extractAudio(input, output);
239
+ }
240
+
241
+ // Extract video (remove audio)
242
+ return ffmpegProvider.convertFormat({ input, output });
243
+ },
244
+ };
245
+
246
+ // Export all definitions
247
+ export const definitions = [
248
+ trimDefinition,
249
+ cutDefinition,
250
+ mergeDefinition,
251
+ splitDefinition,
252
+ fadeDefinition,
253
+ transitionDefinition,
254
+ removeDefinition,
255
+ ];
256
+
257
+ // Re-export types for backward compatibility
258
+ export type TrimOptions = Parameters<typeof ffmpegProvider.trimVideo>[0];
259
+ export type TrimResult = Awaited<ReturnType<typeof ffmpegProvider.trimVideo>>;
260
+ export type CutOptions = TrimOptions;
261
+ export type CutResult = TrimResult;
262
+ export type MergeOptions = Parameters<typeof ffmpegProvider.concatVideos>[0];
263
+ export type MergeResult = Awaited<
264
+ ReturnType<typeof ffmpegProvider.concatVideos>
265
+ >;
266
+ export type SplitOptions = Parameters<
267
+ typeof ffmpegProvider.splitAtTimestamps
268
+ >[0];
269
+ export type SplitResult = Awaited<
270
+ ReturnType<typeof ffmpegProvider.splitAtTimestamps>
271
+ >;
272
+ export type FadeOptions = Parameters<typeof ffmpegProvider.fadeVideo>[0];
273
+ export type FadeResult = Awaited<ReturnType<typeof ffmpegProvider.fadeVideo>>;
274
+ export type TransitionOptions = Parameters<
275
+ typeof ffmpegProvider.xfadeVideos
276
+ >[0];
277
+ export type TransitionResult = Awaited<
278
+ ReturnType<typeof ffmpegProvider.xfadeVideos>
279
+ >;
280
+ export type RemoveOptions = {
281
+ input: string;
282
+ output: string;
283
+ what?: "audio" | "video";
284
+ };
285
+ export type RemoveResult = string;
286
+
287
+ // Convenience exports
288
+ export const trim = (opts: TrimOptions) => ffmpegProvider.trimVideo(opts);
289
+ export const cut = trim;
290
+ export const merge = (opts: MergeOptions) => ffmpegProvider.concatVideos(opts);
291
+ export const split = (opts: SplitOptions) =>
292
+ ffmpegProvider.splitAtTimestamps(opts);
293
+ export const fade = (opts: FadeOptions) => ffmpegProvider.fadeVideo(opts);
294
+ export const transition = (opts: TransitionOptions) =>
295
+ ffmpegProvider.xfadeVideos(opts);
296
+ export const remove = removeDefinition.execute;
297
+
298
+ export default trimDefinition;
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Image generation action
3
+ * Routes to Fal or Higgsfield based on options
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { imageSizeSchema } from "../../core/schema/shared";
8
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
9
+ import { falProvider } from "../../providers/fal";
10
+ import { higgsfieldProvider } from "../../providers/higgsfield";
11
+ import { storageProvider } from "../../providers/storage";
12
+
13
+ // Input schema with Zod
14
+ const imageInputSchema = z.object({
15
+ prompt: z.string().describe("What to generate"),
16
+ size: imageSizeSchema
17
+ .default("landscape_4_3")
18
+ .describe("Image size/aspect ratio"),
19
+ provider: z
20
+ .enum(["fal", "higgsfield"])
21
+ .default("fal")
22
+ .describe("Generation provider"),
23
+ });
24
+
25
+ // Output schema with Zod
26
+ const imageOutputSchema = z.object({
27
+ imageUrl: z.string(),
28
+ uploaded: z.string().optional(),
29
+ });
30
+
31
+ // Schema object for the definition
32
+ const schema: ZodSchema<typeof imageInputSchema, typeof imageOutputSchema> = {
33
+ input: imageInputSchema,
34
+ output: imageOutputSchema,
35
+ };
36
+
37
+ export const definition: ActionDefinition<typeof schema> = {
38
+ type: "action",
39
+ name: "image",
40
+ description: "Generate image from text",
41
+ schema,
42
+ routes: [
43
+ {
44
+ target: "flux",
45
+ when: { provider: "fal" },
46
+ priority: 5,
47
+ },
48
+ {
49
+ target: "soul",
50
+ when: { provider: "higgsfield" },
51
+ priority: 10,
52
+ },
53
+ ],
54
+ execute: async (inputs) => {
55
+ const { prompt, size, provider } = inputs;
56
+
57
+ if (provider === "higgsfield") {
58
+ return generateWithSoul(prompt);
59
+ }
60
+
61
+ return generateWithFal(prompt, { imageSize: size });
62
+ },
63
+ };
64
+
65
+ export interface ImageGenerationResult {
66
+ imageUrl: string;
67
+ uploaded?: string;
68
+ }
69
+
70
+ export async function generateWithFal(
71
+ prompt: string,
72
+ options: { imageSize?: string; upload?: boolean } = {},
73
+ ): Promise<ImageGenerationResult> {
74
+ console.log("[image] generating with fal");
75
+
76
+ const result = await falProvider.generateImage({
77
+ prompt,
78
+ imageSize: options.imageSize,
79
+ });
80
+
81
+ const imageUrl = (result.data as { images?: Array<{ url?: string }> })
82
+ ?.images?.[0]?.url;
83
+ if (!imageUrl) {
84
+ throw new Error("No image URL in result");
85
+ }
86
+
87
+ let uploaded: string | undefined;
88
+ if (options.upload) {
89
+ const timestamp = Date.now();
90
+ const objectKey = `images/fal/${timestamp}.png`;
91
+ uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
92
+ console.log(`[image] uploaded to ${uploaded}`);
93
+ }
94
+
95
+ return { imageUrl, uploaded };
96
+ }
97
+
98
+ export async function generateWithSoul(
99
+ prompt: string,
100
+ options: { styleId?: string; upload?: boolean } = {},
101
+ ): Promise<ImageGenerationResult> {
102
+ console.log("[image] generating with higgsfield soul");
103
+
104
+ const result = await higgsfieldProvider.generateSoul({
105
+ prompt,
106
+ styleId: options.styleId,
107
+ });
108
+
109
+ const imageUrl = result.jobs?.[0]?.results?.raw?.url;
110
+ if (!imageUrl) {
111
+ throw new Error("No image URL in result");
112
+ }
113
+
114
+ let uploaded: string | undefined;
115
+ if (options.upload) {
116
+ const timestamp = Date.now();
117
+ const objectKey = `images/soul/${timestamp}.png`;
118
+ uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
119
+ console.log(`[image] uploaded to ${uploaded}`);
120
+ }
121
+
122
+ return { imageUrl, uploaded };
123
+ }
124
+
125
+ export default definition;