varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Action definitions index
3
+ */
4
+
5
+ export type { AddCaptionsOptions, SubtitleStyle } from "./captions";
6
+ // Captions
7
+ export { addCaptions, definition as captions } from "./captions";
8
+ export type {
9
+ CutOptions,
10
+ CutResult,
11
+ FadeOptions,
12
+ FadeResult,
13
+ MergeOptions,
14
+ MergeResult,
15
+ RemoveOptions,
16
+ RemoveResult,
17
+ SplitOptions,
18
+ SplitResult,
19
+ TransitionOptions,
20
+ TransitionResult,
21
+ TrimOptions,
22
+ TrimResult,
23
+ } from "./edit";
24
+ // Video editing (FFmpeg)
25
+ export {
26
+ cut,
27
+ cutDefinition,
28
+ fade,
29
+ fadeDefinition,
30
+ merge,
31
+ mergeDefinition,
32
+ remove,
33
+ removeDefinition,
34
+ split,
35
+ splitDefinition,
36
+ transition,
37
+ transitionDefinition,
38
+ trim,
39
+ trimDefinition,
40
+ } from "./edit";
41
+ export type { ImageGenerationResult } from "./image";
42
+ // Image generation
43
+ export {
44
+ definition as image,
45
+ generateWithFal,
46
+ generateWithSoul,
47
+ } from "./image";
48
+ export type { GenerateMusicOptions, MusicResult } from "./music";
49
+ // Music generation
50
+ export { definition as music, generateMusic } from "./music";
51
+ export type { LipsyncOptions, LipsyncResult, Wav2LipOptions } from "./sync";
52
+ // Lip sync
53
+ export {
54
+ definition as sync,
55
+ lipsync,
56
+ lipsyncOverlay,
57
+ lipsyncWav2Lip,
58
+ } from "./sync";
59
+ export type { TranscribeOptions, TranscribeResult } from "./transcribe";
60
+ // Transcription
61
+ export {
62
+ definition as transcribe,
63
+ transcribe as transcribeAudio,
64
+ } from "./transcribe";
65
+ export type { UploadOptions, UploadResult } from "./upload";
66
+ // Upload
67
+ export { definition as uploadDef, upload } from "./upload";
68
+ export type { VideoGenerationResult } from "./video";
69
+ // Video generation
70
+ export {
71
+ definition as video,
72
+ generateVideoFromImage,
73
+ generateVideoFromText,
74
+ } from "./video";
75
+ export type { GenerateVoiceOptions, VoiceResult } from "./voice";
76
+ // Voice generation
77
+ export { definition as voice, generateVoice } from "./voice";
78
+
79
+ // All action definitions for auto-loading
80
+ import { definition as captionsDefinition } from "./captions";
81
+ import {
82
+ cutDefinition,
83
+ fadeDefinition,
84
+ mergeDefinition,
85
+ removeDefinition,
86
+ splitDefinition,
87
+ transitionDefinition,
88
+ trimDefinition,
89
+ } from "./edit";
90
+ import { definition as imageDefinition } from "./image";
91
+ import { definition as musicDefinition } from "./music";
92
+ import { definition as syncDefinition } from "./sync";
93
+ import { definition as transcribeDefinition } from "./transcribe";
94
+ import { definition as uploadDefinition } from "./upload";
95
+ import { definition as videoDefinition } from "./video";
96
+ import { definition as voiceDefinition } from "./voice";
97
+
98
+ export const allActions = [
99
+ videoDefinition,
100
+ imageDefinition,
101
+ voiceDefinition,
102
+ transcribeDefinition,
103
+ musicDefinition,
104
+ syncDefinition,
105
+ captionsDefinition,
106
+ trimDefinition,
107
+ cutDefinition,
108
+ mergeDefinition,
109
+ splitDefinition,
110
+ fadeDefinition,
111
+ transitionDefinition,
112
+ removeDefinition,
113
+ uploadDefinition,
114
+ ];
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Music generation action
3
+ * Text-to-music via Fal/Sonauto
4
+ */
5
+
6
+ import { writeFile } from "node:fs/promises";
7
+ import { z } from "zod";
8
+ import { audioFormatSchema, filePathSchema } from "../../core/schema/shared";
9
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
10
+ import { falProvider } from "../../providers/fal";
11
+ import { storageProvider } from "../../providers/storage";
12
+
13
+ // Input schema with Zod
14
+ const musicInputSchema = z.object({
15
+ prompt: z.string().optional().describe("Description of music to generate"),
16
+ tags: z
17
+ .array(z.string())
18
+ .optional()
19
+ .describe("Style tags like 'rock', 'energetic'"),
20
+ lyrics: z.string().optional().describe("Optional lyrics prompt"),
21
+ format: audioFormatSchema.default("mp3").describe("Output format"),
22
+ numSongs: z
23
+ .union([z.literal(1), z.literal(2)])
24
+ .default(1)
25
+ .describe("Number of songs to generate"),
26
+ output: filePathSchema.optional().describe("Output file path"),
27
+ });
28
+
29
+ // Output schema with Zod
30
+ const musicOutputSchema = z.object({
31
+ seed: z.number(),
32
+ tags: z.array(z.string()).optional(),
33
+ lyrics: z.string().optional(),
34
+ audio: z.array(
35
+ z.object({
36
+ url: z.string(),
37
+ fileName: z.string(),
38
+ contentType: z.string(),
39
+ fileSize: z.number(),
40
+ }),
41
+ ),
42
+ uploadUrls: z.array(z.string()).optional(),
43
+ });
44
+
45
+ // Schema object for the definition
46
+ const schema: ZodSchema<typeof musicInputSchema, typeof musicOutputSchema> = {
47
+ input: musicInputSchema,
48
+ output: musicOutputSchema,
49
+ };
50
+
51
+ export const definition: ActionDefinition<typeof schema> = {
52
+ type: "action",
53
+ name: "music",
54
+ description: "Generate music from text prompt or tags",
55
+ schema,
56
+ routes: [],
57
+ execute: async (inputs) => {
58
+ return generateMusic({
59
+ prompt: inputs.prompt,
60
+ tags: inputs.tags,
61
+ lyrics: inputs.lyrics,
62
+ format: inputs.format,
63
+ numSongs: inputs.numSongs,
64
+ outputPath: inputs.output,
65
+ });
66
+ },
67
+ };
68
+
69
+ // Types
70
+ export interface GenerateMusicOptions {
71
+ prompt?: string;
72
+ tags?: string[];
73
+ lyrics?: string;
74
+ seed?: number;
75
+ promptStrength?: number;
76
+ balanceStrength?: number;
77
+ numSongs?: 1 | 2;
78
+ format?: "flac" | "mp3" | "wav" | "ogg" | "m4a";
79
+ bitRate?: 128 | 192 | 256 | 320;
80
+ bpm?: number | "auto";
81
+ upload?: boolean;
82
+ outputPath?: string;
83
+ }
84
+
85
+ export interface MusicResult {
86
+ seed: number;
87
+ tags?: string[];
88
+ lyrics?: string;
89
+ audio: Array<{
90
+ url: string;
91
+ fileName: string;
92
+ contentType: string;
93
+ fileSize: number;
94
+ }>;
95
+ uploadUrls?: string[];
96
+ }
97
+
98
+ export async function generateMusic(
99
+ options: GenerateMusicOptions,
100
+ ): Promise<MusicResult> {
101
+ const {
102
+ prompt,
103
+ tags,
104
+ lyrics,
105
+ seed,
106
+ promptStrength = 2,
107
+ balanceStrength = 0.7,
108
+ numSongs = 1,
109
+ format = "mp3",
110
+ bitRate,
111
+ bpm = "auto",
112
+ upload = false,
113
+ outputPath,
114
+ } = options;
115
+
116
+ if (!prompt && !tags) {
117
+ throw new Error("Either prompt or tags is required");
118
+ }
119
+
120
+ console.log(`[music] generating ${numSongs} song(s)...`);
121
+ if (prompt) console.log(`[music] prompt: ${prompt}`);
122
+ if (tags) console.log(`[music] tags: ${tags.join(", ")}`);
123
+
124
+ const result = await falProvider.textToMusic({
125
+ prompt,
126
+ tags,
127
+ lyricsPrompt: lyrics,
128
+ seed,
129
+ promptStrength,
130
+ balanceStrength,
131
+ numSongs,
132
+ outputFormat: format,
133
+ outputBitRate: bitRate,
134
+ bpm,
135
+ });
136
+
137
+ const musicResult: MusicResult = {
138
+ seed: result.data.seed,
139
+ tags: result.data.tags,
140
+ lyrics: result.data.lyrics,
141
+ audio: Array.isArray(result.data.audio)
142
+ ? result.data.audio.map(
143
+ (a: {
144
+ url: string;
145
+ file_name: string;
146
+ content_type: string;
147
+ file_size: number;
148
+ }) => ({
149
+ url: a.url,
150
+ fileName: a.file_name,
151
+ contentType: a.content_type,
152
+ fileSize: a.file_size,
153
+ }),
154
+ )
155
+ : [
156
+ {
157
+ url: result.data.audio.url,
158
+ fileName: result.data.audio.file_name,
159
+ contentType: result.data.audio.content_type,
160
+ fileSize: result.data.audio.file_size,
161
+ },
162
+ ],
163
+ };
164
+
165
+ // Save files locally if requested
166
+ if (outputPath) {
167
+ for (let i = 0; i < musicResult.audio.length; i++) {
168
+ const audio = musicResult.audio[i];
169
+ if (!audio) continue;
170
+
171
+ const ext = format || "wav";
172
+ const filePath =
173
+ musicResult.audio.length === 1
174
+ ? outputPath
175
+ : outputPath.replace(/\.[^.]+$/, `-${i + 1}.${ext}`);
176
+
177
+ const response = await fetch(audio.url);
178
+ const buffer = await response.arrayBuffer();
179
+ await writeFile(filePath, Buffer.from(buffer));
180
+ console.log(`[music] saved to ${filePath}`);
181
+ }
182
+ }
183
+
184
+ // Upload to storage if requested
185
+ if (upload) {
186
+ const uploadUrls: string[] = [];
187
+ for (let i = 0; i < musicResult.audio.length; i++) {
188
+ const audio = musicResult.audio[i];
189
+ if (!audio) continue;
190
+
191
+ const objectKey = `music/${Date.now()}-${i + 1}.${format || "wav"}`;
192
+ const uploadUrl = await storageProvider.uploadFromUrl(
193
+ audio.url,
194
+ objectKey,
195
+ );
196
+ uploadUrls.push(uploadUrl);
197
+ console.log(`[music] uploaded to ${uploadUrl}`);
198
+ }
199
+ musicResult.uploadUrls = uploadUrls;
200
+ }
201
+
202
+ return musicResult;
203
+ }
204
+
205
+ export default definition;
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Lip sync action
3
+ * Audio-to-video synchronization
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ filePathSchema,
9
+ resolutionSchema,
10
+ videoDurationStringSchema,
11
+ } from "../../core/schema/shared";
12
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
13
+ import { falProvider } from "../../providers/fal";
14
+ import { ffmpegProvider } from "../../providers/ffmpeg";
15
+
16
+ // Input schema with Zod
17
+ const syncInputSchema = z.object({
18
+ image: filePathSchema.describe("Input image"),
19
+ audio: filePathSchema.describe("Audio file"),
20
+ prompt: z.string().describe("Description of the scene"),
21
+ duration: videoDurationStringSchema.default("5").describe("Output duration"),
22
+ resolution: resolutionSchema.default("480p").describe("Output resolution"),
23
+ });
24
+
25
+ // Output schema with Zod
26
+ const syncOutputSchema = z.object({
27
+ videoUrl: z.string(),
28
+ });
29
+
30
+ // Schema object for the definition
31
+ const schema: ZodSchema<typeof syncInputSchema, typeof syncOutputSchema> = {
32
+ input: syncInputSchema,
33
+ output: syncOutputSchema,
34
+ };
35
+
36
+ export const definition: ActionDefinition<typeof schema> = {
37
+ type: "action",
38
+ name: "sync",
39
+ description: "Lip sync audio to video/image",
40
+ schema,
41
+ routes: [],
42
+ execute: async (inputs) => {
43
+ const { image, audio, prompt, duration, resolution } = inputs;
44
+ return lipsync({ image, audio, prompt, duration, resolution });
45
+ },
46
+ };
47
+
48
+ // Types
49
+ export interface LipsyncOptions {
50
+ image: string;
51
+ audio: string;
52
+ prompt: string;
53
+ duration?: "5" | "10";
54
+ resolution?: "480p" | "720p" | "1080p";
55
+ }
56
+
57
+ export interface LipsyncResult {
58
+ videoUrl: string;
59
+ }
60
+
61
+ export interface Wav2LipOptions {
62
+ videoPath: string;
63
+ audioPath: string;
64
+ outputPath: string;
65
+ }
66
+
67
+ /**
68
+ * Generate lip-synced video using Wan-25
69
+ */
70
+ export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
71
+ const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
72
+
73
+ console.log("[sync] generating lip-synced video with wan-25...");
74
+
75
+ const result = await falProvider.wan25({
76
+ imageUrl: image,
77
+ audioUrl: audio,
78
+ prompt,
79
+ duration,
80
+ resolution,
81
+ });
82
+
83
+ const videoUrl = result.data?.video?.url;
84
+ if (!videoUrl) {
85
+ throw new Error("No video URL in result");
86
+ }
87
+
88
+ return { videoUrl };
89
+ }
90
+
91
+ /**
92
+ * Overlay lip-synced face onto original video
93
+ */
94
+ export async function lipsyncOverlay(options: {
95
+ originalVideo: string;
96
+ lipsyncedVideo: string;
97
+ outputPath: string;
98
+ }): Promise<string> {
99
+ const { lipsyncedVideo, outputPath } = options;
100
+
101
+ console.log("[sync] overlaying lip-synced video...");
102
+
103
+ // This would require more complex ffmpeg operations
104
+ // For now, just return the lip-synced video as-is
105
+ await ffmpegProvider.convertFormat({
106
+ input: lipsyncedVideo,
107
+ output: outputPath,
108
+ });
109
+
110
+ return outputPath;
111
+ }
112
+
113
+ /**
114
+ * Wav2Lip-style lip sync (placeholder for future implementation)
115
+ */
116
+ export async function lipsyncWav2Lip(options: Wav2LipOptions): Promise<string> {
117
+ console.warn("[sync] wav2lip not yet implemented, using wan-25 fallback");
118
+
119
+ // For now, just copy the video
120
+ await ffmpegProvider.convertFormat({
121
+ input: options.videoPath,
122
+ output: options.outputPath,
123
+ });
124
+
125
+ return options.outputPath;
126
+ }
127
+
128
+ export default definition;
@@ -1,22 +1,63 @@
1
- #!/usr/bin/env bun
2
-
3
1
  /**
4
- * audio transcription service
5
- * supports groq whisper, fireworks api, and future providers
2
+ * Transcription action
3
+ * Speech-to-text via Groq or Fireworks
6
4
  */
7
5
 
8
6
  import { writeFileSync } from "node:fs";
9
- import { join } from "node:path";
10
7
  import { toFile } from "groq-sdk/uploads";
8
+ import { z } from "zod";
9
+ import {
10
+ filePathSchema,
11
+ transcriptionProviderSchema,
12
+ } from "../../core/schema/shared";
13
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
11
14
  import {
12
15
  convertFireworksToSRT,
13
- transcribeWithFireworks as fireworksTranscribe,
14
- } from "../../lib/fireworks";
15
- import { GROQ_MODELS, transcribeAudio as groqTranscribe } from "../../lib/groq";
16
+ fireworksProvider,
17
+ } from "../../providers/fireworks";
18
+ import { GROQ_MODELS, groqProvider } from "../../providers/groq";
19
+
20
+ // Input schema with Zod
21
+ const transcribeInputSchema = z.object({
22
+ audio: filePathSchema.describe("Audio/video file to transcribe"),
23
+ provider: transcriptionProviderSchema
24
+ .default("groq")
25
+ .describe("Transcription provider"),
26
+ output: filePathSchema.optional().describe("Output file path"),
27
+ });
28
+
29
+ // Output schema with Zod
30
+ const transcribeOutputSchema = z.object({
31
+ success: z.boolean(),
32
+ text: z.string().optional(),
33
+ srt: z.string().optional(),
34
+ error: z.string().optional(),
35
+ });
36
+
37
+ // Schema object for the definition
38
+ const schema: ZodSchema<
39
+ typeof transcribeInputSchema,
40
+ typeof transcribeOutputSchema
41
+ > = {
42
+ input: transcribeInputSchema,
43
+ output: transcribeOutputSchema,
44
+ };
45
+
46
+ export const definition: ActionDefinition<typeof schema> = {
47
+ type: "action",
48
+ name: "transcribe",
49
+ description: "Speech to text transcription",
50
+ schema,
51
+ routes: [],
52
+ execute: async (inputs) => {
53
+ const { audio, provider, output } = inputs;
54
+ return transcribe({ audioUrl: audio, provider, outputPath: output });
55
+ },
56
+ };
16
57
 
17
- // types
58
+ // Types
18
59
  export interface TranscribeOptions {
19
- audioUrl: string; // url or local file path
60
+ audioUrl: string;
20
61
  provider?: "groq" | "fireworks";
21
62
  model?: string;
22
63
  language?: string;
@@ -31,7 +72,7 @@ export interface TranscribeResult {
31
72
  error?: string;
32
73
  }
33
74
 
34
- // groq transcription
75
+ // Groq transcription
35
76
  async function transcribeWithGroq(
36
77
  audioUrl: string,
37
78
  options: {
@@ -43,16 +84,14 @@ async function transcribeWithGroq(
43
84
  try {
44
85
  console.log("[transcribe] using groq whisper...");
45
86
 
46
- // load audio file (local or remote)
87
+ // Load audio file
47
88
  let audioBuffer: ArrayBuffer;
48
89
  let fileName = "audio.mp3";
49
90
 
50
91
  if (audioUrl.startsWith("http://") || audioUrl.startsWith("https://")) {
51
- // fetch remote file
52
92
  const audioResponse = await fetch(audioUrl);
53
93
  audioBuffer = await audioResponse.arrayBuffer();
54
94
  } else {
55
- // read local file with bun
56
95
  const file = Bun.file(audioUrl);
57
96
  audioBuffer = await file.arrayBuffer();
58
97
  fileName = audioUrl.split("/").pop() || "audio.mp3";
@@ -60,8 +99,7 @@ async function transcribeWithGroq(
60
99
 
61
100
  const audioFile = await toFile(audioBuffer, fileName);
62
101
 
63
- // transcribe with groq
64
- const text = await groqTranscribe({
102
+ const text = await groqProvider.transcribeAudio({
65
103
  file: audioFile,
66
104
  model: options.model || GROQ_MODELS.WHISPER_LARGE,
67
105
  language: options.language,
@@ -70,10 +108,8 @@ async function transcribeWithGroq(
70
108
  console.log("[transcribe] groq transcription complete");
71
109
 
72
110
  if (options.outputFormat === "srt") {
73
- // groq returns plain text, so we need to convert to srt
74
- // for now just return text with warning
75
111
  console.warn(
76
- "[transcribe] groq returns plain text, use fireworks for srt format",
112
+ "[transcribe] groq returns plain text, use fireworks for SRT format",
77
113
  );
78
114
  return { success: true, text, srt: text };
79
115
  }
@@ -84,19 +120,19 @@ async function transcribeWithGroq(
84
120
  return {
85
121
  success: false,
86
122
  error:
87
- error instanceof Error ? error.message : "groq transcription failed",
123
+ error instanceof Error ? error.message : "Groq transcription failed",
88
124
  };
89
125
  }
90
126
  }
91
127
 
92
- // fireworks transcription (with srt support)
128
+ // Fireworks transcription (with SRT support)
93
129
  async function transcribeWithFireworks(
94
130
  audioUrl: string,
95
131
  ): Promise<TranscribeResult> {
96
132
  try {
97
133
  console.log("[transcribe] using fireworks api...");
98
134
 
99
- const data = await fireworksTranscribe({
135
+ const data = await fireworksProvider.transcribe({
100
136
  audioPath: audioUrl,
101
137
  });
102
138
 
@@ -111,12 +147,12 @@ async function transcribeWithFireworks(
111
147
  error:
112
148
  error instanceof Error
113
149
  ? error.message
114
- : "fireworks transcription failed",
150
+ : "Fireworks transcription failed",
115
151
  };
116
152
  }
117
153
  }
118
154
 
119
- // main transcription function
155
+ // Main transcription function
120
156
  export async function transcribe(
121
157
  options: TranscribeOptions,
122
158
  ): Promise<TranscribeResult> {
@@ -137,7 +173,6 @@ export async function transcribe(
137
173
 
138
174
  let result: TranscribeResult;
139
175
 
140
- // choose provider
141
176
  if (provider === "groq") {
142
177
  result = await transcribeWithGroq(audioUrl, {
143
178
  model,
@@ -147,10 +182,10 @@ export async function transcribe(
147
182
  } else if (provider === "fireworks") {
148
183
  result = await transcribeWithFireworks(audioUrl);
149
184
  } else {
150
- throw new Error(`unknown provider: ${provider}`);
185
+ throw new Error(`Unknown provider: ${provider}`);
151
186
  }
152
187
 
153
- // save to file if requested
188
+ // Save to file if requested
154
189
  if (result.success && outputPath) {
155
190
  const content = outputFormat === "srt" ? result.srt : result.text;
156
191
  if (content) {
@@ -162,66 +197,4 @@ export async function transcribe(
162
197
  return result;
163
198
  }
164
199
 
165
- // cli
166
- async function cli() {
167
- const args = process.argv.slice(2);
168
- const command = args[0];
169
-
170
- if (!command || command === "help") {
171
- console.log(`
172
- usage:
173
- bun run service/transcribe.ts <audioPath> [provider] [outputPath]
174
-
175
- arguments:
176
- audioPath - url or local path to audio file
177
- provider - groq (default) | fireworks
178
- outputPath - optional path to save transcription
179
-
180
- examples:
181
- bun run service/transcribe.ts https://example.com/audio.mp3
182
- bun run service/transcribe.ts media/dora.ogg groq
183
- bun run service/transcribe.ts https://example.com/audio.mp3 fireworks output.srt
184
- bun run service/transcribe.ts media/audio.mp3 groq output.txt
185
-
186
- providers:
187
- groq - ultra-fast whisper (text only, free tier available)
188
- fireworks - slower but includes srt timestamps (uses reels-srt api)
189
-
190
- environment:
191
- GROQ_API_KEY - your groq api key (for groq provider)
192
- `);
193
- process.exit(0);
194
- }
195
-
196
- try {
197
- const audioUrl = args[0];
198
- const provider = (args[1] || "groq") as "groq" | "fireworks";
199
- const outputPath = args[2];
200
-
201
- if (!audioUrl) {
202
- throw new Error("audioUrl is required");
203
- }
204
-
205
- const result = await transcribe({
206
- audioUrl,
207
- provider,
208
- outputFormat: provider === "fireworks" ? "srt" : "text",
209
- outputPath: outputPath || join(process.cwd(), "output.txt"),
210
- });
211
-
212
- if (result.success) {
213
- console.log("\ntranscription:");
214
- console.log(result.srt || result.text);
215
- } else {
216
- console.error(`\nerror: ${result.error}`);
217
- process.exit(1);
218
- }
219
- } catch (error) {
220
- console.error("[transcribe] error:", error);
221
- process.exit(1);
222
- }
223
- }
224
-
225
- if (import.meta.main) {
226
- cli();
227
- }
200
+ export default definition;