varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Upload action
3
+ * Upload files to S3/R2 storage
4
+ */
5
+
6
+ import { existsSync } from "node:fs";
7
+ import { basename, extname } from "node:path";
8
+ import { z } from "zod";
9
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
10
+ import { storageProvider } from "../../providers/storage";
11
+
12
+ // Input schema
13
+ const uploadInputSchema = z.object({
14
+ file: z.string().describe("Local file path or URL to upload"),
15
+ key: z
16
+ .string()
17
+ .optional()
18
+ .describe("Object key/path in storage (auto-generated if not provided)"),
19
+ });
20
+
21
+ // Output schema
22
+ const uploadOutputSchema = z.object({
23
+ url: z.string().describe("Public URL of the uploaded file"),
24
+ key: z.string().describe("Object key in storage"),
25
+ });
26
+
27
+ // Schema object for the definition
28
+ const schema: ZodSchema<typeof uploadInputSchema, typeof uploadOutputSchema> = {
29
+ input: uploadInputSchema,
30
+ output: uploadOutputSchema,
31
+ };
32
+
33
+ export interface UploadOptions {
34
+ key?: string;
35
+ }
36
+
37
+ export interface UploadResult {
38
+ url: string;
39
+ key: string;
40
+ }
41
+
42
+ /**
43
+ * Generate a unique object key based on file info
44
+ */
45
+ function generateObjectKey(source: string): string {
46
+ const timestamp = Date.now();
47
+ const random = Math.random().toString(36).slice(2, 8);
48
+
49
+ // Extract extension from source
50
+ let ext = extname(source);
51
+ if (!ext) {
52
+ // Try to guess from URL or default to .bin
53
+ if (source.includes(".")) {
54
+ const parts = source.split(".");
55
+ ext = `.${parts[parts.length - 1]?.split("?")[0] || "bin"}`;
56
+ } else {
57
+ ext = ".bin";
58
+ }
59
+ }
60
+
61
+ const name = basename(source, ext).slice(0, 20) || "file";
62
+ return `uploads/${timestamp}-${random}-${name}${ext}`;
63
+ }
64
+
65
+ /**
66
+ * Check if a string is a URL
67
+ */
68
+ function isUrl(str: string): boolean {
69
+ return str.startsWith("http://") || str.startsWith("https://");
70
+ }
71
+
72
+ /**
73
+ * Upload a file to storage
74
+ */
75
+ export async function upload(
76
+ file: string,
77
+ options: UploadOptions = {},
78
+ ): Promise<UploadResult> {
79
+ const key = options.key || generateObjectKey(file);
80
+
81
+ if (isUrl(file)) {
82
+ console.log(`[upload] uploading from URL: ${file}`);
83
+ const url = await storageProvider.uploadFromUrl(file, key);
84
+ console.log(`[upload] uploaded to ${url}`);
85
+ return { url, key };
86
+ }
87
+
88
+ // Local file
89
+ if (!existsSync(file)) {
90
+ throw new Error(`File not found: ${file}`);
91
+ }
92
+
93
+ console.log(`[upload] uploading local file: ${file}`);
94
+ const url = await storageProvider.uploadLocalFile(file, key);
95
+ console.log(`[upload] uploaded to ${url}`);
96
+ return { url, key };
97
+ }
98
+
99
+ export const definition: ActionDefinition<typeof schema> = {
100
+ type: "action",
101
+ name: "upload",
102
+ description: "Upload file to S3/R2 storage",
103
+ schema,
104
+ routes: [],
105
+ execute: async (inputs) => {
106
+ const { file, key } = inputs;
107
+ return upload(file, { key });
108
+ },
109
+ };
110
+
111
+ export default definition;
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Video generation action
3
+ * Routes to appropriate video generation models based on input
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ aspectRatioSchema,
9
+ filePathSchema,
10
+ videoDurationSchema,
11
+ } from "../../core/schema/shared";
12
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
13
+ import { falProvider } from "../../providers/fal";
14
+ import { storageProvider } from "../../providers/storage";
15
+
16
+ // Input schema with Zod
17
+ const videoInputSchema = z.object({
18
+ prompt: z.string().describe("What to generate"),
19
+ image: filePathSchema
20
+ .optional()
21
+ .describe("Input image (enables image-to-video)"),
22
+ duration: videoDurationSchema
23
+ .default(5)
24
+ .describe("Video duration in seconds"),
25
+ aspectRatio: aspectRatioSchema
26
+ .default("16:9")
27
+ .describe("Aspect ratio for text-to-video"),
28
+ });
29
+
30
+ // Output schema with Zod
31
+ const videoOutputSchema = z.object({
32
+ videoUrl: z.string(),
33
+ duration: z.number().optional(),
34
+ });
35
+
36
+ // Schema object for the definition
37
+ const schema: ZodSchema<typeof videoInputSchema, typeof videoOutputSchema> = {
38
+ input: videoInputSchema,
39
+ output: videoOutputSchema,
40
+ };
41
+
42
+ export const definition: ActionDefinition<typeof schema> = {
43
+ type: "action",
44
+ name: "video",
45
+ description: "Generate video from text or image",
46
+ schema,
47
+ routes: [
48
+ {
49
+ target: "kling",
50
+ priority: 10,
51
+ },
52
+ ],
53
+ execute: async (inputs) => {
54
+ // inputs is now fully typed as VideoInput - no more `as` cast!
55
+ const { prompt, image, duration, aspectRatio } = inputs;
56
+
57
+ let result: { data?: { video?: { url?: string }; duration?: number } };
58
+
59
+ if (image) {
60
+ console.log("[action/video] generating video from image");
61
+ result = await falProvider.imageToVideo({
62
+ prompt,
63
+ imageUrl: image,
64
+ duration,
65
+ });
66
+ } else {
67
+ console.log("[action/video] generating video from text");
68
+ result = await falProvider.textToVideo({
69
+ prompt,
70
+ duration,
71
+ aspectRatio,
72
+ });
73
+ }
74
+
75
+ const videoUrl = result.data?.video?.url;
76
+ if (!videoUrl) {
77
+ throw new Error("No video URL in result");
78
+ }
79
+
80
+ return {
81
+ videoUrl,
82
+ duration: result.data?.duration,
83
+ };
84
+ },
85
+ };
86
+
87
+ // Re-export types and functions for backward compatibility
88
+ export interface VideoGenerationResult {
89
+ videoUrl: string;
90
+ duration?: number;
91
+ uploaded?: string;
92
+ }
93
+
94
+ export async function generateVideoFromImage(
95
+ prompt: string,
96
+ imageUrl: string,
97
+ options: { duration?: 5 | 10; upload?: boolean } = {},
98
+ ): Promise<VideoGenerationResult> {
99
+ console.log("[video] generating video from image");
100
+
101
+ const result = await falProvider.imageToVideo({
102
+ prompt,
103
+ imageUrl,
104
+ duration: options.duration,
105
+ });
106
+
107
+ const videoUrl = result.data?.video?.url;
108
+ if (!videoUrl) {
109
+ throw new Error("No video URL in result");
110
+ }
111
+
112
+ let uploaded: string | undefined;
113
+ if (options.upload) {
114
+ const timestamp = Date.now();
115
+ const objectKey = `videos/generated/${timestamp}.mp4`;
116
+ uploaded = await storageProvider.uploadFromUrl(videoUrl, objectKey);
117
+ console.log(`[video] uploaded to ${uploaded}`);
118
+ }
119
+
120
+ return {
121
+ videoUrl,
122
+ duration: result.data?.duration,
123
+ uploaded,
124
+ };
125
+ }
126
+
127
+ export async function generateVideoFromText(
128
+ prompt: string,
129
+ options: {
130
+ duration?: 5 | 10;
131
+ upload?: boolean;
132
+ aspectRatio?: "16:9" | "9:16" | "1:1";
133
+ } = {},
134
+ ): Promise<VideoGenerationResult> {
135
+ console.log("[video] generating video from text");
136
+
137
+ const result = await falProvider.textToVideo({
138
+ prompt,
139
+ duration: options.duration,
140
+ aspectRatio: options.aspectRatio,
141
+ });
142
+
143
+ const videoUrl = result.data?.video?.url;
144
+ if (!videoUrl) {
145
+ throw new Error("No video URL in result");
146
+ }
147
+
148
+ let uploaded: string | undefined;
149
+ if (options.upload) {
150
+ const timestamp = Date.now();
151
+ const objectKey = `videos/generated/${timestamp}.mp4`;
152
+ uploaded = await storageProvider.uploadFromUrl(videoUrl, objectKey);
153
+ console.log(`[video] uploaded to ${uploaded}`);
154
+ }
155
+
156
+ return {
157
+ videoUrl,
158
+ duration: result.data?.duration,
159
+ uploaded,
160
+ };
161
+ }
162
+
163
+ export default definition;
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Voice generation action
3
+ * Text-to-speech via ElevenLabs
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { filePathSchema, voiceNameSchema } from "../../core/schema/shared";
8
+ import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
9
+ import { elevenlabsProvider, VOICES } from "../../providers/elevenlabs";
10
+ import { storageProvider } from "../../providers/storage";
11
+
12
+ // Input schema with Zod
13
+ const voiceInputSchema = z.object({
14
+ text: z.string().describe("Text to convert to speech"),
15
+ voice: voiceNameSchema.default("rachel").describe("Voice to use"),
16
+ output: filePathSchema.optional().describe("Output file path"),
17
+ });
18
+
19
+ // Output schema with Zod
20
+ const voiceOutputSchema = z.object({
21
+ audio: z.instanceof(Buffer),
22
+ provider: z.string(),
23
+ voiceId: z.string(),
24
+ uploadUrl: z.string().optional(),
25
+ });
26
+
27
+ // Schema object for the definition
28
+ const schema: ZodSchema<typeof voiceInputSchema, typeof voiceOutputSchema> = {
29
+ input: voiceInputSchema,
30
+ output: voiceOutputSchema,
31
+ };
32
+
33
+ export const definition: ActionDefinition<typeof schema> = {
34
+ type: "action",
35
+ name: "voice",
36
+ description: "Text to speech generation",
37
+ schema,
38
+ routes: [],
39
+ execute: async (inputs) => {
40
+ const { text, voice, output } = inputs;
41
+ return generateVoice({ text, voice, outputPath: output });
42
+ },
43
+ };
44
+
45
+ // Types
46
+ export interface GenerateVoiceOptions {
47
+ text: string;
48
+ voice?: string;
49
+ provider?: "elevenlabs";
50
+ upload?: boolean;
51
+ outputPath?: string;
52
+ }
53
+
54
+ export interface VoiceResult {
55
+ audio: Buffer;
56
+ provider: string;
57
+ voiceId: string;
58
+ uploadUrl?: string;
59
+ }
60
+
61
+ // Voice name to ID mapping
62
+ const VOICE_MAP: Record<string, string> = {
63
+ rachel: VOICES.RACHEL,
64
+ domi: VOICES.DOMI,
65
+ bella: VOICES.BELLA,
66
+ antoni: VOICES.ANTONI,
67
+ elli: VOICES.ELLI,
68
+ josh: VOICES.JOSH,
69
+ arnold: VOICES.ARNOLD,
70
+ adam: VOICES.ADAM,
71
+ sam: VOICES.SAM,
72
+ };
73
+
74
+ export async function generateVoice(
75
+ options: GenerateVoiceOptions,
76
+ ): Promise<VoiceResult> {
77
+ const {
78
+ text,
79
+ voice = "rachel",
80
+ provider = "elevenlabs",
81
+ upload = false,
82
+ outputPath,
83
+ } = options;
84
+
85
+ if (!text) {
86
+ throw new Error("text is required");
87
+ }
88
+
89
+ console.log(`[voice] generating with ${provider} (${voice})...`);
90
+
91
+ const voiceId = VOICE_MAP[voice.toLowerCase()] || voice;
92
+
93
+ const audio = await elevenlabsProvider.textToSpeech({
94
+ text,
95
+ voiceId,
96
+ outputPath,
97
+ });
98
+
99
+ const result: VoiceResult = {
100
+ audio,
101
+ provider,
102
+ voiceId,
103
+ };
104
+
105
+ // Upload to storage if requested
106
+ if (upload && outputPath) {
107
+ const objectKey = `voice/${Date.now()}-${voice}.mp3`;
108
+ const uploadUrl = await storageProvider.uploadLocalFile(
109
+ outputPath,
110
+ objectKey,
111
+ );
112
+ result.uploadUrl = uploadUrl;
113
+ console.log(`[voice] uploaded to ${uploadUrl}`);
114
+ }
115
+
116
+ return result;
117
+ }
118
+
119
+ export default definition;
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Definitions index
3
+ * Re-exports all actions, models, and skills
4
+ */
5
+
6
+ // Actions
7
+ export * from "./actions";
8
+ export { allActions } from "./actions";
9
+
10
+ // Models
11
+ export * from "./models";
12
+ export { allModels } from "./models";
13
+
14
+ // Skills
15
+ export * from "./skills";
16
+ export { allSkills } from "./skills";
17
+
18
+ // All definitions combined
19
+ import { allActions } from "./actions";
20
+ import { allModels } from "./models";
21
+ import { allSkills } from "./skills";
22
+
23
+ export const allDefinitions = [...allModels, ...allActions, ...allSkills];
@@ -0,0 +1,50 @@
1
+ /**
2
+ * ElevenLabs voice models
3
+ * Text-to-speech generation
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { elevenLabsModelSchema, percentSchema } from "../../core/schema/shared";
8
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
9
+
10
+ // Input schema with Zod
11
+ const elevenlabsInputSchema = z.object({
12
+ text: z.string().describe("Text to convert to speech"),
13
+ voice_id: z.string().optional().describe("Voice ID to use"),
14
+ model_id: elevenLabsModelSchema
15
+ .default("eleven_multilingual_v2")
16
+ .describe("TTS model to use"),
17
+ stability: percentSchema.default(0.5).describe("Voice stability (0-1)"),
18
+ similarity_boost: percentSchema
19
+ .default(0.75)
20
+ .describe("Voice similarity boost (0-1)"),
21
+ });
22
+
23
+ // Output schema with Zod
24
+ const elevenlabsOutputSchema = z.object({
25
+ audio: z.instanceof(Buffer),
26
+ });
27
+
28
+ // Schema object for the definition
29
+ const schema: ZodSchema<
30
+ typeof elevenlabsInputSchema,
31
+ typeof elevenlabsOutputSchema
32
+ > = {
33
+ input: elevenlabsInputSchema,
34
+ output: elevenlabsOutputSchema,
35
+ };
36
+
37
+ export const definition: ModelDefinition<typeof schema> = {
38
+ type: "model",
39
+ name: "elevenlabs-tts",
40
+ description:
41
+ "ElevenLabs text-to-speech model for high-quality voice generation",
42
+ providers: ["elevenlabs"],
43
+ defaultProvider: "elevenlabs",
44
+ providerModels: {
45
+ elevenlabs: "eleven_multilingual_v2",
46
+ },
47
+ schema,
48
+ };
49
+
50
+ export default definition;
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Flux image generation model
3
+ * High-quality image generation from text
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { imageSizeSchema } from "../../core/schema/shared";
8
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
9
+
10
+ // Input schema with Zod
11
+ const fluxInputSchema = z.object({
12
+ prompt: z.string().describe("Text description of the image"),
13
+ image_size: imageSizeSchema
14
+ .default("landscape_4_3")
15
+ .describe("Output image size/aspect"),
16
+ num_inference_steps: z
17
+ .number()
18
+ .int()
19
+ .default(28)
20
+ .describe("Number of inference steps"),
21
+ guidance_scale: z
22
+ .number()
23
+ .default(3.5)
24
+ .describe("Guidance scale for generation"),
25
+ });
26
+
27
+ // Output schema with Zod
28
+ const fluxOutputSchema = z.object({
29
+ images: z.array(
30
+ z.object({
31
+ url: z.string(),
32
+ }),
33
+ ),
34
+ });
35
+
36
+ // Schema object for the definition
37
+ const schema: ZodSchema<typeof fluxInputSchema, typeof fluxOutputSchema> = {
38
+ input: fluxInputSchema,
39
+ output: fluxOutputSchema,
40
+ };
41
+
42
+ export const definition: ModelDefinition<typeof schema> = {
43
+ type: "model",
44
+ name: "flux",
45
+ description:
46
+ "Flux Pro image generation model for high-quality images from text",
47
+ providers: ["fal", "replicate"],
48
+ defaultProvider: "fal",
49
+ providerModels: {
50
+ fal: "fal-ai/flux-pro/v1.1",
51
+ replicate: "black-forest-labs/flux-1.1-pro",
52
+ },
53
+ schema,
54
+ };
55
+
56
+ export default definition;
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Model definitions index
3
+ */
4
+
5
+ export { definition as elevenlabsTts } from "./elevenlabs";
6
+ export { definition as flux } from "./flux";
7
+ export { definition as kling } from "./kling";
8
+ export { definition as llama } from "./llama";
9
+ export { definition as nanoBananaPro } from "./nano-banana-pro";
10
+ export { definition as sonauto } from "./sonauto";
11
+ export { definition as soul } from "./soul";
12
+ export { definition as wan } from "./wan";
13
+ export { definition as whisper } from "./whisper";
14
+
15
+ // All model definitions for auto-loading
16
+ import { definition as elevenlabsDefinition } from "./elevenlabs";
17
+ import { definition as fluxDefinition } from "./flux";
18
+ import { definition as klingDefinition } from "./kling";
19
+ import { definition as llamaDefinition } from "./llama";
20
+ import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
21
+ import { definition as sonautoDefinition } from "./sonauto";
22
+ import { definition as soulDefinition } from "./soul";
23
+ import { definition as wanDefinition } from "./wan";
24
+ import { definition as whisperDefinition } from "./whisper";
25
+
26
+ export const allModels = [
27
+ klingDefinition,
28
+ fluxDefinition,
29
+ nanoBananaProDefinition,
30
+ wanDefinition,
31
+ whisperDefinition,
32
+ elevenlabsDefinition,
33
+ soulDefinition,
34
+ sonautoDefinition,
35
+ llamaDefinition,
36
+ ];
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Kling video generation model
3
+ * High-quality video generation from text/image
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ aspectRatioSchema,
9
+ videoDurationSchema,
10
+ } from "../../core/schema/shared";
11
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
12
+
13
+ // Input schema with Zod
14
+ const klingInputSchema = z.object({
15
+ prompt: z.string().describe("Text description of the video"),
16
+ image_url: z
17
+ .string()
18
+ .url()
19
+ .optional()
20
+ .describe("Input image for image-to-video"),
21
+ duration: videoDurationSchema
22
+ .default(5)
23
+ .describe("Video duration in seconds"),
24
+ aspect_ratio: aspectRatioSchema
25
+ .default("16:9")
26
+ .describe("Output aspect ratio"),
27
+ });
28
+
29
+ // Output schema with Zod
30
+ const klingOutputSchema = z.object({
31
+ video: z.object({
32
+ url: z.string(),
33
+ }),
34
+ });
35
+
36
+ // Schema object for the definition
37
+ const schema: ZodSchema<typeof klingInputSchema, typeof klingOutputSchema> = {
38
+ input: klingInputSchema,
39
+ output: klingOutputSchema,
40
+ };
41
+
42
+ export const definition: ModelDefinition<typeof schema> = {
43
+ type: "model",
44
+ name: "kling",
45
+ description:
46
+ "Kling video generation model for high-quality video from text or image",
47
+ providers: ["fal", "replicate"],
48
+ defaultProvider: "fal",
49
+ providerModels: {
50
+ fal: "fal-ai/kling-video/v2.5-turbo/pro",
51
+ replicate: "fofr/kling-v1.5",
52
+ },
53
+ schema,
54
+ };
55
+
56
+ export default definition;
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Llama LLM model
3
+ * Fast inference via Groq
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
8
+
9
+ // Llama model variants schema
10
+ const llamaModelSchema = z.enum([
11
+ "llama-3.3-70b-versatile",
12
+ "llama-3.1-8b-instant",
13
+ "llama-3.1-70b-versatile",
14
+ ]);
15
+
16
+ // Chat message schema
17
+ const chatMessageSchema = z.object({
18
+ role: z.enum(["system", "user", "assistant"]),
19
+ content: z.string(),
20
+ });
21
+
22
+ // Input schema with Zod
23
+ const llamaInputSchema = z.object({
24
+ messages: z.array(chatMessageSchema).describe("Chat messages array"),
25
+ model: llamaModelSchema
26
+ .default("llama-3.3-70b-versatile")
27
+ .describe("Llama model variant"),
28
+ temperature: z.number().default(1).describe("Sampling temperature"),
29
+ max_tokens: z.number().int().default(1024).describe("Maximum output tokens"),
30
+ stream: z.boolean().default(false).describe("Stream response"),
31
+ });
32
+
33
+ // Output schema with Zod
34
+ const llamaOutputSchema = z.string().describe("Generated text response");
35
+
36
+ // Schema object for the definition
37
+ const schema: ZodSchema<typeof llamaInputSchema, typeof llamaOutputSchema> = {
38
+ input: llamaInputSchema,
39
+ output: llamaOutputSchema,
40
+ };
41
+
42
+ export const definition: ModelDefinition<typeof schema> = {
43
+ type: "model",
44
+ name: "llama",
45
+ description: "Meta Llama model for fast text generation via Groq",
46
+ providers: ["groq"],
47
+ defaultProvider: "groq",
48
+ providerModels: {
49
+ groq: "llama-3.3-70b-versatile",
50
+ },
51
+ schema,
52
+ };
53
+
54
+ export default definition;