varg.ai-sdk 0.1.1 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +43 -10
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -169
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -437
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -105
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -145
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -125
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -136
  217. package/cli/commands/find.ts +0 -58
  218. package/cli/commands/help.ts +0 -70
  219. package/cli/commands/list.ts +0 -49
  220. package/cli/commands/run.ts +0 -237
  221. package/cli/commands/which.ts +0 -66
  222. package/cli/discover.ts +0 -66
  223. package/cli/index.ts +0 -33
  224. package/cli/runner.ts +0 -65
  225. package/cli/types.ts +0 -49
  226. package/cli/ui.ts +0 -185
  227. package/index.ts +0 -75
  228. package/lib/README.md +0 -144
  229. package/lib/ai-sdk/fal.ts +0 -106
  230. package/lib/ai-sdk/replicate.ts +0 -107
  231. package/lib/elevenlabs.ts +0 -382
  232. package/lib/fal.ts +0 -467
  233. package/lib/ffmpeg.ts +0 -467
  234. package/lib/fireworks.ts +0 -235
  235. package/lib/groq.ts +0 -246
  236. package/lib/higgsfield.ts +0 -176
  237. package/lib/remotion/SKILL.md +0 -823
  238. package/lib/remotion/cli.ts +0 -115
  239. package/lib/remotion/functions.ts +0 -283
  240. package/lib/remotion/index.ts +0 -19
  241. package/lib/remotion/templates.ts +0 -73
  242. package/lib/replicate.ts +0 -304
  243. package/output.txt +0 -1
  244. package/test-import.ts +0 -7
  245. package/test-services.ts +0 -97
  246. package/utilities/s3.ts +0 -147
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Action definitions index
3
+ */
4
+
5
+ export type { AddCaptionsOptions, SubtitleStyle } from "./captions";
6
+ // Captions
7
+ export { addCaptions, definition as captions } from "./captions";
8
+ export type {
9
+ CutOptions,
10
+ CutResult,
11
+ FadeOptions,
12
+ FadeResult,
13
+ MergeOptions,
14
+ MergeResult,
15
+ RemoveOptions,
16
+ RemoveResult,
17
+ SplitOptions,
18
+ SplitResult,
19
+ TransitionOptions,
20
+ TransitionResult,
21
+ TrimOptions,
22
+ TrimResult,
23
+ } from "./edit";
24
+ // Video editing (FFmpeg)
25
+ export {
26
+ cut,
27
+ cutDefinition,
28
+ fade,
29
+ fadeDefinition,
30
+ merge,
31
+ mergeDefinition,
32
+ remove,
33
+ removeDefinition,
34
+ split,
35
+ splitDefinition,
36
+ transition,
37
+ transitionDefinition,
38
+ trim,
39
+ trimDefinition,
40
+ } from "./edit";
41
+ export type { ImageGenerationResult } from "./image";
42
+ // Image generation
43
+ export {
44
+ definition as image,
45
+ generateWithFal,
46
+ generateWithSoul,
47
+ } from "./image";
48
+ export type { GenerateMusicOptions, MusicResult } from "./music";
49
+ // Music generation
50
+ export { definition as music, generateMusic } from "./music";
51
+ export type { LipsyncOptions, LipsyncResult, Wav2LipOptions } from "./sync";
52
+ // Lip sync
53
+ export {
54
+ definition as sync,
55
+ lipsync,
56
+ lipsyncOverlay,
57
+ lipsyncWav2Lip,
58
+ } from "./sync";
59
+ export type { TranscribeOptions, TranscribeResult } from "./transcribe";
60
+ // Transcription
61
+ export {
62
+ definition as transcribe,
63
+ transcribe as transcribeAudio,
64
+ } from "./transcribe";
65
+ export type { UploadOptions, UploadResult } from "./upload";
66
+ // Upload
67
+ export { definition as uploadDef, upload } from "./upload";
68
+ export type { VideoGenerationResult } from "./video";
69
+ // Video generation
70
+ export {
71
+ definition as video,
72
+ generateVideoFromImage,
73
+ generateVideoFromText,
74
+ } from "./video";
75
+ export type { GenerateVoiceOptions, VoiceResult } from "./voice";
76
+ // Voice generation
77
+ export { definition as voice, generateVoice } from "./voice";
78
+
79
+ // All action definitions for auto-loading
80
+ import { definition as captionsDefinition } from "./captions";
81
+ import {
82
+ cutDefinition,
83
+ fadeDefinition,
84
+ mergeDefinition,
85
+ removeDefinition,
86
+ splitDefinition,
87
+ transitionDefinition,
88
+ trimDefinition,
89
+ } from "./edit";
90
+ import { definition as imageDefinition } from "./image";
91
+ import { definition as musicDefinition } from "./music";
92
+ import { definition as syncDefinition } from "./sync";
93
+ import { definition as transcribeDefinition } from "./transcribe";
94
+ import { definition as uploadDefinition } from "./upload";
95
+ import { definition as videoDefinition } from "./video";
96
+ import { definition as voiceDefinition } from "./voice";
97
+
98
+ export const allActions = [
99
+ videoDefinition,
100
+ imageDefinition,
101
+ voiceDefinition,
102
+ transcribeDefinition,
103
+ musicDefinition,
104
+ syncDefinition,
105
+ captionsDefinition,
106
+ trimDefinition,
107
+ cutDefinition,
108
+ mergeDefinition,
109
+ splitDefinition,
110
+ fadeDefinition,
111
+ transitionDefinition,
112
+ removeDefinition,
113
+ uploadDefinition,
114
+ ];
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Music generation action
3
+ * Text-to-music via Fal/Sonauto
4
+ */
5
+
6
+ import { writeFile } from "node:fs/promises";
7
+ import { z } from "zod";
8
+ import { audioFormatSchema, filePathSchema } from "../../core/schema/shared";
9
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
10
+ import { falProvider } from "../../providers/fal";
11
+ import { storageProvider } from "../../providers/storage";
12
+
13
+ // Input schema with Zod
14
+ const musicInputSchema = z.object({
15
+ prompt: z.string().optional().describe("Description of music to generate"),
16
+ tags: z
17
+ .array(z.string())
18
+ .optional()
19
+ .describe("Style tags like 'rock', 'energetic'"),
20
+ lyrics: z.string().optional().describe("Optional lyrics prompt"),
21
+ format: audioFormatSchema.default("mp3").describe("Output format"),
22
+ numSongs: z
23
+ .union([z.literal(1), z.literal(2)])
24
+ .default(1)
25
+ .describe("Number of songs to generate"),
26
+ output: filePathSchema.optional().describe("Output file path"),
27
+ });
28
+
29
+ // Output schema with Zod
30
+ const musicOutputSchema = z.object({
31
+ seed: z.number(),
32
+ tags: z.array(z.string()).optional(),
33
+ lyrics: z.string().optional(),
34
+ audio: z.array(
35
+ z.object({
36
+ url: z.string(),
37
+ fileName: z.string(),
38
+ contentType: z.string(),
39
+ fileSize: z.number(),
40
+ }),
41
+ ),
42
+ uploadUrls: z.array(z.string()).optional(),
43
+ });
44
+
45
+ // Schema object for the definition
46
+ const schema: ZodSchema<typeof musicInputSchema, typeof musicOutputSchema> = {
47
+ input: musicInputSchema,
48
+ output: musicOutputSchema,
49
+ };
50
+
51
+ export const definition: ActionDefinition<typeof schema> = {
52
+ type: "action",
53
+ name: "music",
54
+ description: "Generate music from text prompt or tags",
55
+ schema,
56
+ routes: [],
57
+ execute: async (inputs) => {
58
+ return generateMusic({
59
+ prompt: inputs.prompt,
60
+ tags: inputs.tags,
61
+ lyrics: inputs.lyrics,
62
+ format: inputs.format,
63
+ numSongs: inputs.numSongs,
64
+ outputPath: inputs.output,
65
+ });
66
+ },
67
+ };
68
+
69
+ // Types
70
+ export interface GenerateMusicOptions {
71
+ prompt?: string;
72
+ tags?: string[];
73
+ lyrics?: string;
74
+ seed?: number;
75
+ promptStrength?: number;
76
+ balanceStrength?: number;
77
+ numSongs?: 1 | 2;
78
+ format?: "flac" | "mp3" | "wav" | "ogg" | "m4a";
79
+ bitRate?: 128 | 192 | 256 | 320;
80
+ bpm?: number | "auto";
81
+ upload?: boolean;
82
+ outputPath?: string;
83
+ }
84
+
85
+ export interface MusicResult {
86
+ seed: number;
87
+ tags?: string[];
88
+ lyrics?: string;
89
+ audio: Array<{
90
+ url: string;
91
+ fileName: string;
92
+ contentType: string;
93
+ fileSize: number;
94
+ }>;
95
+ uploadUrls?: string[];
96
+ }
97
+
98
+ export async function generateMusic(
99
+ options: GenerateMusicOptions,
100
+ ): Promise<MusicResult> {
101
+ const {
102
+ prompt,
103
+ tags,
104
+ lyrics,
105
+ seed,
106
+ promptStrength = 2,
107
+ balanceStrength = 0.7,
108
+ numSongs = 1,
109
+ format = "mp3",
110
+ bitRate,
111
+ bpm = "auto",
112
+ upload = false,
113
+ outputPath,
114
+ } = options;
115
+
116
+ if (!prompt && !tags) {
117
+ throw new Error("Either prompt or tags is required");
118
+ }
119
+
120
+ console.log(`[music] generating ${numSongs} song(s)...`);
121
+ if (prompt) console.log(`[music] prompt: ${prompt}`);
122
+ if (tags) console.log(`[music] tags: ${tags.join(", ")}`);
123
+
124
+ const result = await falProvider.textToMusic({
125
+ prompt,
126
+ tags,
127
+ lyricsPrompt: lyrics,
128
+ seed,
129
+ promptStrength,
130
+ balanceStrength,
131
+ numSongs,
132
+ outputFormat: format,
133
+ outputBitRate: bitRate,
134
+ bpm,
135
+ });
136
+
137
+ const musicResult: MusicResult = {
138
+ seed: result.data.seed,
139
+ tags: result.data.tags,
140
+ lyrics: result.data.lyrics,
141
+ audio: Array.isArray(result.data.audio)
142
+ ? result.data.audio.map(
143
+ (a: {
144
+ url: string;
145
+ file_name: string;
146
+ content_type: string;
147
+ file_size: number;
148
+ }) => ({
149
+ url: a.url,
150
+ fileName: a.file_name,
151
+ contentType: a.content_type,
152
+ fileSize: a.file_size,
153
+ }),
154
+ )
155
+ : [
156
+ {
157
+ url: result.data.audio.url,
158
+ fileName: result.data.audio.file_name,
159
+ contentType: result.data.audio.content_type,
160
+ fileSize: result.data.audio.file_size,
161
+ },
162
+ ],
163
+ };
164
+
165
+ // Save files locally if requested
166
+ if (outputPath) {
167
+ for (let i = 0; i < musicResult.audio.length; i++) {
168
+ const audio = musicResult.audio[i];
169
+ if (!audio) continue;
170
+
171
+ const ext = format || "wav";
172
+ const filePath =
173
+ musicResult.audio.length === 1
174
+ ? outputPath
175
+ : outputPath.replace(/\.[^.]+$/, `-${i + 1}.${ext}`);
176
+
177
+ const response = await fetch(audio.url);
178
+ const buffer = await response.arrayBuffer();
179
+ await writeFile(filePath, Buffer.from(buffer));
180
+ console.log(`[music] saved to ${filePath}`);
181
+ }
182
+ }
183
+
184
+ // Upload to storage if requested
185
+ if (upload) {
186
+ const uploadUrls: string[] = [];
187
+ for (let i = 0; i < musicResult.audio.length; i++) {
188
+ const audio = musicResult.audio[i];
189
+ if (!audio) continue;
190
+
191
+ const objectKey = `music/${Date.now()}-${i + 1}.${format || "wav"}`;
192
+ const uploadUrl = await storageProvider.uploadFromUrl(
193
+ audio.url,
194
+ objectKey,
195
+ );
196
+ uploadUrls.push(uploadUrl);
197
+ console.log(`[music] uploaded to ${uploadUrl}`);
198
+ }
199
+ musicResult.uploadUrls = uploadUrls;
200
+ }
201
+
202
+ return musicResult;
203
+ }
204
+
205
+ export default definition;
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Lip sync action
3
+ * Audio-to-video synchronization
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ filePathSchema,
9
+ resolutionSchema,
10
+ videoDurationStringSchema,
11
+ } from "../../core/schema/shared";
12
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
13
+ import { falProvider } from "../../providers/fal";
14
+ import { ffmpegProvider } from "../../providers/ffmpeg";
15
+
16
+ // Input schema with Zod
17
+ const syncInputSchema = z.object({
18
+ image: filePathSchema.describe("Input image"),
19
+ audio: filePathSchema.describe("Audio file"),
20
+ prompt: z.string().describe("Description of the scene"),
21
+ duration: videoDurationStringSchema.default("5").describe("Output duration"),
22
+ resolution: resolutionSchema.default("480p").describe("Output resolution"),
23
+ });
24
+
25
+ // Output schema with Zod
26
+ const syncOutputSchema = z.object({
27
+ videoUrl: z.string(),
28
+ });
29
+
30
+ // Schema object for the definition
31
+ const schema: ZodSchema<typeof syncInputSchema, typeof syncOutputSchema> = {
32
+ input: syncInputSchema,
33
+ output: syncOutputSchema,
34
+ };
35
+
36
+ export const definition: ActionDefinition<typeof schema> = {
37
+ type: "action",
38
+ name: "sync",
39
+ description: "Lip sync audio to video/image",
40
+ schema,
41
+ routes: [],
42
+ execute: async (inputs) => {
43
+ const { image, audio, prompt, duration, resolution } = inputs;
44
+ return lipsync({ image, audio, prompt, duration, resolution });
45
+ },
46
+ };
47
+
48
+ // Types
49
+ export interface LipsyncOptions {
50
+ image: string;
51
+ audio: string;
52
+ prompt: string;
53
+ duration?: "5" | "10";
54
+ resolution?: "480p" | "720p" | "1080p";
55
+ }
56
+
57
+ export interface LipsyncResult {
58
+ videoUrl: string;
59
+ }
60
+
61
+ export interface Wav2LipOptions {
62
+ videoPath: string;
63
+ audioPath: string;
64
+ outputPath: string;
65
+ }
66
+
67
+ /**
68
+ * Generate lip-synced video using Wan-25
69
+ */
70
+ export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
71
+ const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
72
+
73
+ console.log("[sync] generating lip-synced video with wan-25...");
74
+
75
+ const result = await falProvider.wan25({
76
+ imageUrl: image,
77
+ audioUrl: audio,
78
+ prompt,
79
+ duration,
80
+ resolution,
81
+ });
82
+
83
+ const videoUrl = result.data?.video?.url;
84
+ if (!videoUrl) {
85
+ throw new Error("No video URL in result");
86
+ }
87
+
88
+ return { videoUrl };
89
+ }
90
+
91
+ /**
92
+ * Overlay lip-synced face onto original video
93
+ */
94
+ export async function lipsyncOverlay(options: {
95
+ originalVideo: string;
96
+ lipsyncedVideo: string;
97
+ outputPath: string;
98
+ }): Promise<string> {
99
+ const { lipsyncedVideo, outputPath } = options;
100
+
101
+ console.log("[sync] overlaying lip-synced video...");
102
+
103
+ // This would require more complex ffmpeg operations
104
+ // For now, just return the lip-synced video as-is
105
+ await ffmpegProvider.convertFormat({
106
+ input: lipsyncedVideo,
107
+ output: outputPath,
108
+ });
109
+
110
+ return outputPath;
111
+ }
112
+
113
+ /**
114
+ * Wav2Lip-style lip sync (placeholder for future implementation)
115
+ */
116
+ export async function lipsyncWav2Lip(options: Wav2LipOptions): Promise<string> {
117
+ console.warn("[sync] wav2lip not yet implemented, using wan-25 fallback");
118
+
119
+ // For now, just copy the video
120
+ await ffmpegProvider.convertFormat({
121
+ input: options.videoPath,
122
+ output: options.outputPath,
123
+ });
124
+
125
+ return options.outputPath;
126
+ }
127
+
128
+ export default definition;
@@ -1,63 +1,63 @@
1
- #!/usr/bin/env bun
2
-
3
1
  /**
4
- * audio transcription service
5
- * supports groq whisper, fireworks api, and future providers
2
+ * Transcription action
3
+ * Speech-to-text via Groq or Fireworks
6
4
  */
7
5
 
8
6
  import { writeFileSync } from "node:fs";
9
7
  import { toFile } from "groq-sdk/uploads";
10
- import type { ActionMeta } from "../../cli/types";
8
+ import { z } from "zod";
9
+ import {
10
+ filePathSchema,
11
+ transcriptionProviderSchema,
12
+ } from "../../core/schema/shared";
13
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
11
14
  import {
12
15
  convertFireworksToSRT,
13
- transcribeWithFireworks as fireworksTranscribe,
14
- } from "../../lib/fireworks";
15
- import { GROQ_MODELS, transcribeAudio as groqTranscribe } from "../../lib/groq";
16
+ fireworksProvider,
17
+ } from "../../providers/fireworks";
18
+ import { GROQ_MODELS, groqProvider } from "../../providers/groq";
19
+
20
+ // Input schema with Zod
21
+ const transcribeInputSchema = z.object({
22
+ audio: filePathSchema.describe("Audio/video file to transcribe"),
23
+ provider: transcriptionProviderSchema
24
+ .default("groq")
25
+ .describe("Transcription provider"),
26
+ output: filePathSchema.optional().describe("Output file path"),
27
+ });
28
+
29
+ // Output schema with Zod
30
+ const transcribeOutputSchema = z.object({
31
+ success: z.boolean(),
32
+ text: z.string().optional(),
33
+ srt: z.string().optional(),
34
+ error: z.string().optional(),
35
+ });
36
+
37
+ // Schema object for the definition
38
+ const schema: ZodSchema<
39
+ typeof transcribeInputSchema,
40
+ typeof transcribeOutputSchema
41
+ > = {
42
+ input: transcribeInputSchema,
43
+ output: transcribeOutputSchema,
44
+ };
16
45
 
17
- export const meta: ActionMeta = {
18
- name: "transcribe",
46
+ export const definition: ActionDefinition<typeof schema> = {
19
47
  type: "action",
20
- description: "speech to text transcription",
21
- inputType: "audio",
22
- outputType: "text",
23
- schema: {
24
- input: {
25
- type: "object",
26
- required: ["audio"],
27
- properties: {
28
- audio: {
29
- type: "string",
30
- format: "file-path",
31
- description: "audio/video file to transcribe",
32
- },
33
- provider: {
34
- type: "string",
35
- enum: ["groq", "fireworks"],
36
- default: "groq",
37
- description: "transcription provider",
38
- },
39
- output: {
40
- type: "string",
41
- format: "file-path",
42
- description: "output file path",
43
- },
44
- },
45
- },
46
- output: { type: "string", description: "transcribed text" },
47
- },
48
- async run(options) {
49
- const { audio, provider, output } = options as {
50
- audio: string;
51
- provider?: "groq" | "fireworks";
52
- output?: string;
53
- };
48
+ name: "transcribe",
49
+ description: "Speech to text transcription",
50
+ schema,
51
+ routes: [],
52
+ execute: async (inputs) => {
53
+ const { audio, provider, output } = inputs;
54
54
  return transcribe({ audioUrl: audio, provider, outputPath: output });
55
55
  },
56
56
  };
57
57
 
58
- // types
58
+ // Types
59
59
  export interface TranscribeOptions {
60
- audioUrl: string; // url or local file path
60
+ audioUrl: string;
61
61
  provider?: "groq" | "fireworks";
62
62
  model?: string;
63
63
  language?: string;
@@ -72,7 +72,7 @@ export interface TranscribeResult {
72
72
  error?: string;
73
73
  }
74
74
 
75
- // groq transcription
75
+ // Groq transcription
76
76
  async function transcribeWithGroq(
77
77
  audioUrl: string,
78
78
  options: {
@@ -84,16 +84,14 @@ async function transcribeWithGroq(
84
84
  try {
85
85
  console.log("[transcribe] using groq whisper...");
86
86
 
87
- // load audio file (local or remote)
87
+ // Load audio file
88
88
  let audioBuffer: ArrayBuffer;
89
89
  let fileName = "audio.mp3";
90
90
 
91
91
  if (audioUrl.startsWith("http://") || audioUrl.startsWith("https://")) {
92
- // fetch remote file
93
92
  const audioResponse = await fetch(audioUrl);
94
93
  audioBuffer = await audioResponse.arrayBuffer();
95
94
  } else {
96
- // read local file with bun
97
95
  const file = Bun.file(audioUrl);
98
96
  audioBuffer = await file.arrayBuffer();
99
97
  fileName = audioUrl.split("/").pop() || "audio.mp3";
@@ -101,8 +99,7 @@ async function transcribeWithGroq(
101
99
 
102
100
  const audioFile = await toFile(audioBuffer, fileName);
103
101
 
104
- // transcribe with groq
105
- const text = await groqTranscribe({
102
+ const text = await groqProvider.transcribeAudio({
106
103
  file: audioFile,
107
104
  model: options.model || GROQ_MODELS.WHISPER_LARGE,
108
105
  language: options.language,
@@ -111,10 +108,8 @@ async function transcribeWithGroq(
111
108
  console.log("[transcribe] groq transcription complete");
112
109
 
113
110
  if (options.outputFormat === "srt") {
114
- // groq returns plain text, so we need to convert to srt
115
- // for now just return text with warning
116
111
  console.warn(
117
- "[transcribe] groq returns plain text, use fireworks for srt format",
112
+ "[transcribe] groq returns plain text, use fireworks for SRT format",
118
113
  );
119
114
  return { success: true, text, srt: text };
120
115
  }
@@ -125,19 +120,19 @@ async function transcribeWithGroq(
125
120
  return {
126
121
  success: false,
127
122
  error:
128
- error instanceof Error ? error.message : "groq transcription failed",
123
+ error instanceof Error ? error.message : "Groq transcription failed",
129
124
  };
130
125
  }
131
126
  }
132
127
 
133
- // fireworks transcription (with srt support)
128
+ // Fireworks transcription (with SRT support)
134
129
  async function transcribeWithFireworks(
135
130
  audioUrl: string,
136
131
  ): Promise<TranscribeResult> {
137
132
  try {
138
133
  console.log("[transcribe] using fireworks api...");
139
134
 
140
- const data = await fireworksTranscribe({
135
+ const data = await fireworksProvider.transcribe({
141
136
  audioPath: audioUrl,
142
137
  });
143
138
 
@@ -152,12 +147,12 @@ async function transcribeWithFireworks(
152
147
  error:
153
148
  error instanceof Error
154
149
  ? error.message
155
- : "fireworks transcription failed",
150
+ : "Fireworks transcription failed",
156
151
  };
157
152
  }
158
153
  }
159
154
 
160
- // main transcription function
155
+ // Main transcription function
161
156
  export async function transcribe(
162
157
  options: TranscribeOptions,
163
158
  ): Promise<TranscribeResult> {
@@ -178,7 +173,6 @@ export async function transcribe(
178
173
 
179
174
  let result: TranscribeResult;
180
175
 
181
- // choose provider
182
176
  if (provider === "groq") {
183
177
  result = await transcribeWithGroq(audioUrl, {
184
178
  model,
@@ -188,10 +182,10 @@ export async function transcribe(
188
182
  } else if (provider === "fireworks") {
189
183
  result = await transcribeWithFireworks(audioUrl);
190
184
  } else {
191
- throw new Error(`unknown provider: ${provider}`);
185
+ throw new Error(`Unknown provider: ${provider}`);
192
186
  }
193
187
 
194
- // save to file if requested
188
+ // Save to file if requested
195
189
  if (result.success && outputPath) {
196
190
  const content = outputFormat === "srt" ? result.srt : result.text;
197
191
  if (content) {
@@ -203,8 +197,4 @@ export async function transcribe(
203
197
  return result;
204
198
  }
205
199
 
206
- // cli
207
- if (import.meta.main) {
208
- const { runCli } = await import("../../cli/runner");
209
- runCli(meta);
210
- }
200
+ export default definition;