vargai 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/.claude/settings.local.json +7 -0
  2. package/.env.example +27 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +125 -0
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +78 -0
  14. package/SKILLS.md +173 -0
  15. package/STRUCTURE.md +92 -0
  16. package/biome.json +34 -0
  17. package/bun.lock +1254 -0
  18. package/commitlint.config.js +22 -0
  19. package/docs/plan.md +66 -0
  20. package/docs/todo.md +14 -0
  21. package/docs/varg-sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +69 -0
  24. package/pipeline/cookbooks/SKILL.md +285 -0
  25. package/pipeline/cookbooks/remotion-video.md +585 -0
  26. package/pipeline/cookbooks/round-video-character.md +337 -0
  27. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  28. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  29. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  30. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  31. package/pipeline/cookbooks/talking-character.md +59 -0
  32. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  33. package/pipeline/cookbooks/trendwatching.md +156 -0
  34. package/plan.md +281 -0
  35. package/scripts/.gitkeep +0 -0
  36. package/src/ai-sdk/cache.ts +142 -0
  37. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  38. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  39. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  40. package/src/ai-sdk/examples/duet-video.ts +56 -0
  41. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  42. package/src/ai-sdk/examples/editly-test.ts +57 -0
  43. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  44. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  45. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  46. package/src/ai-sdk/examples/music-generation.ts +19 -0
  47. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  48. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  49. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  50. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  51. package/src/ai-sdk/examples/video-generation.ts +39 -0
  52. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  53. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  54. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  55. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  56. package/src/ai-sdk/file-cache.ts +112 -0
  57. package/src/ai-sdk/file.ts +238 -0
  58. package/src/ai-sdk/generate-element.ts +92 -0
  59. package/src/ai-sdk/generate-music.ts +46 -0
  60. package/src/ai-sdk/generate-video.ts +165 -0
  61. package/src/ai-sdk/index.ts +72 -0
  62. package/src/ai-sdk/music-model.ts +110 -0
  63. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  64. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  65. package/src/ai-sdk/providers/editly/index.ts +817 -0
  66. package/src/ai-sdk/providers/editly/layers.ts +772 -0
  67. package/src/ai-sdk/providers/editly/plan.md +144 -0
  68. package/src/ai-sdk/providers/editly/types.ts +328 -0
  69. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  70. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  71. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  72. package/src/ai-sdk/providers/openai.ts +251 -0
  73. package/src/ai-sdk/providers/replicate.ts +16 -0
  74. package/src/ai-sdk/video-model.ts +185 -0
  75. package/src/cli/commands/find.tsx +137 -0
  76. package/src/cli/commands/help.tsx +85 -0
  77. package/src/cli/commands/index.ts +9 -0
  78. package/src/cli/commands/list.tsx +238 -0
  79. package/src/cli/commands/run.tsx +511 -0
  80. package/src/cli/commands/which.tsx +253 -0
  81. package/src/cli/index.ts +112 -0
  82. package/src/cli/quiet.ts +44 -0
  83. package/src/cli/types.ts +32 -0
  84. package/src/cli/ui/components/Badge.tsx +29 -0
  85. package/src/cli/ui/components/DataTable.tsx +51 -0
  86. package/src/cli/ui/components/Header.tsx +23 -0
  87. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  88. package/src/cli/ui/components/KeyValue.tsx +33 -0
  89. package/src/cli/ui/components/OptionRow.tsx +81 -0
  90. package/src/cli/ui/components/Separator.tsx +23 -0
  91. package/src/cli/ui/components/StatusBox.tsx +108 -0
  92. package/src/cli/ui/components/VargBox.tsx +51 -0
  93. package/src/cli/ui/components/VargProgress.tsx +36 -0
  94. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  95. package/src/cli/ui/components/VargText.tsx +56 -0
  96. package/src/cli/ui/components/index.ts +19 -0
  97. package/src/cli/ui/index.ts +12 -0
  98. package/src/cli/ui/render.ts +35 -0
  99. package/src/cli/ui/theme.ts +63 -0
  100. package/src/cli/utils.ts +78 -0
  101. package/src/core/executor/executor.ts +201 -0
  102. package/src/core/executor/index.ts +13 -0
  103. package/src/core/executor/job.ts +214 -0
  104. package/src/core/executor/pipeline.ts +222 -0
  105. package/src/core/index.ts +11 -0
  106. package/src/core/registry/index.ts +9 -0
  107. package/src/core/registry/loader.ts +149 -0
  108. package/src/core/registry/registry.ts +221 -0
  109. package/src/core/registry/resolver.ts +206 -0
  110. package/src/core/schema/helpers.ts +134 -0
  111. package/src/core/schema/index.ts +8 -0
  112. package/src/core/schema/shared.ts +102 -0
  113. package/src/core/schema/types.ts +279 -0
  114. package/src/core/schema/validator.ts +92 -0
  115. package/src/definitions/actions/captions.ts +261 -0
  116. package/src/definitions/actions/edit.ts +298 -0
  117. package/src/definitions/actions/image.ts +125 -0
  118. package/src/definitions/actions/index.ts +114 -0
  119. package/src/definitions/actions/music.ts +205 -0
  120. package/src/definitions/actions/sync.ts +128 -0
  121. package/src/definitions/actions/transcribe.ts +200 -0
  122. package/src/definitions/actions/upload.ts +111 -0
  123. package/src/definitions/actions/video.ts +163 -0
  124. package/src/definitions/actions/voice.ts +119 -0
  125. package/src/definitions/index.ts +23 -0
  126. package/src/definitions/models/elevenlabs.ts +50 -0
  127. package/src/definitions/models/flux.ts +56 -0
  128. package/src/definitions/models/index.ts +36 -0
  129. package/src/definitions/models/kling.ts +56 -0
  130. package/src/definitions/models/llama.ts +54 -0
  131. package/src/definitions/models/nano-banana-pro.ts +102 -0
  132. package/src/definitions/models/sonauto.ts +68 -0
  133. package/src/definitions/models/soul.ts +65 -0
  134. package/src/definitions/models/wan.ts +54 -0
  135. package/src/definitions/models/whisper.ts +44 -0
  136. package/src/definitions/skills/index.ts +12 -0
  137. package/src/definitions/skills/talking-character.ts +87 -0
  138. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  139. package/src/index.ts +118 -0
  140. package/src/providers/apify.ts +269 -0
  141. package/src/providers/base.ts +264 -0
  142. package/src/providers/elevenlabs.ts +217 -0
  143. package/src/providers/fal.ts +392 -0
  144. package/src/providers/ffmpeg.ts +544 -0
  145. package/src/providers/fireworks.ts +193 -0
  146. package/src/providers/groq.ts +149 -0
  147. package/src/providers/higgsfield.ts +145 -0
  148. package/src/providers/index.ts +143 -0
  149. package/src/providers/replicate.ts +147 -0
  150. package/src/providers/storage.ts +206 -0
  151. package/src/tests/all.test.ts +509 -0
  152. package/src/tests/index.ts +33 -0
  153. package/src/tests/unit.test.ts +403 -0
  154. package/tsconfig.json +45 -0
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Nano Banana Pro image generation model (Google Gemini 3 Pro Image)
3
+ * High-quality image generation and editing from text or images
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
8
+
9
+ // Nano Banana Pro resolution options
10
+ const nanoBananaResolutionSchema = z.enum(["1K", "2K", "4K"]);
11
+
12
+ // Nano Banana Pro aspect ratio options (fal.ai API does not support "auto")
13
+ const nanoBananaAspectRatioSchema = z.enum([
14
+ "21:9",
15
+ "16:9",
16
+ "3:2",
17
+ "4:3",
18
+ "5:4",
19
+ "1:1",
20
+ "4:5",
21
+ "3:4",
22
+ "2:3",
23
+ "9:16",
24
+ ]);
25
+
26
+ // Output format options
27
+ const nanoBananaOutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
28
+
29
+ // Safety filter level options
30
+ const nanoBananaSafetyFilterSchema = z.enum([
31
+ "block_only_high",
32
+ "block_medium_and_above",
33
+ "block_low_and_above",
34
+ "block_none",
35
+ ]);
36
+
37
+ // Input schema with Zod
38
+ const nanoBananaProInputSchema = z.object({
39
+ prompt: z.string().describe("Text description for generation or editing"),
40
+ image_urls: z
41
+ .array(z.string().url())
42
+ .optional()
43
+ .describe(
44
+ "Input image URLs for image-to-image editing (up to 14 images). If omitted, generates new image from prompt.",
45
+ ),
46
+ resolution: nanoBananaResolutionSchema
47
+ .default("1K")
48
+ .describe("Output resolution: 1K (1024px), 2K (2048px), or 4K"),
49
+ aspect_ratio: nanoBananaAspectRatioSchema
50
+ .default("1:1")
51
+ .describe("Output aspect ratio"),
52
+ output_format: nanoBananaOutputFormatSchema
53
+ .default("png")
54
+ .describe("Output image format"),
55
+ safety_filter_level: nanoBananaSafetyFilterSchema
56
+ .default("block_only_high")
57
+ .describe("Safety filter strictness level"),
58
+ num_images: z
59
+ .number()
60
+ .int()
61
+ .min(1)
62
+ .max(4)
63
+ .default(1)
64
+ .describe("Number of images to generate (1-4)"),
65
+ });
66
+
67
+ // Output schema with Zod
68
+ const nanoBananaProOutputSchema = z.object({
69
+ images: z.array(
70
+ z.object({
71
+ url: z.string(),
72
+ file_name: z.string().optional(),
73
+ content_type: z.string().optional(),
74
+ }),
75
+ ),
76
+ description: z.string().optional(),
77
+ });
78
+
79
+ // Schema object for the definition
80
+ const schema: ZodSchema<
81
+ typeof nanoBananaProInputSchema,
82
+ typeof nanoBananaProOutputSchema
83
+ > = {
84
+ input: nanoBananaProInputSchema,
85
+ output: nanoBananaProOutputSchema,
86
+ };
87
+
88
+ export const definition: ModelDefinition<typeof schema> = {
89
+ type: "model",
90
+ name: "nano-banana-pro",
91
+ description:
92
+ "Google Nano Banana Pro (Gemini 3 Pro Image) for text-to-image generation and image editing. Provide image_urls for editing, omit for generation.",
93
+ providers: ["fal", "replicate"],
94
+ defaultProvider: "fal",
95
+ providerModels: {
96
+ fal: "fal-ai/nano-banana-pro",
97
+ replicate: "google/nano-banana-pro",
98
+ },
99
+ schema,
100
+ };
101
+
102
+ export default definition;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Sonauto music generation model
3
+ * Text-to-music generation
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { audioFormatSchema } from "../../core/schema/shared";
8
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
9
+
10
+ // Input schema with Zod
11
+ const sonautoInputSchema = z.object({
12
+ prompt: z.string().optional().describe("Music description"),
13
+ tags: z.array(z.string()).optional().describe("Style tags"),
14
+ lyrics_prompt: z.string().optional().describe("Lyrics to generate"),
15
+ num_songs: z
16
+ .union([z.literal(1), z.literal(2)])
17
+ .default(1)
18
+ .describe("Number of songs"),
19
+ output_format: audioFormatSchema.default("mp3").describe("Output format"),
20
+ bpm: z
21
+ .union([z.number(), z.literal("auto")])
22
+ .default("auto")
23
+ .describe("Beats per minute"),
24
+ });
25
+
26
+ // Output schema with Zod
27
+ const sonautoOutputSchema = z.object({
28
+ seed: z.number(),
29
+ tags: z.array(z.string()).optional(),
30
+ lyrics: z.string().optional(),
31
+ audio: z.union([
32
+ z.array(
33
+ z.object({
34
+ url: z.string(),
35
+ file_name: z.string(),
36
+ content_type: z.string(),
37
+ file_size: z.number(),
38
+ }),
39
+ ),
40
+ z.object({
41
+ url: z.string(),
42
+ file_name: z.string(),
43
+ content_type: z.string(),
44
+ file_size: z.number(),
45
+ }),
46
+ ]),
47
+ });
48
+
49
+ // Schema object for the definition
50
+ const schema: ZodSchema<typeof sonautoInputSchema, typeof sonautoOutputSchema> =
51
+ {
52
+ input: sonautoInputSchema,
53
+ output: sonautoOutputSchema,
54
+ };
55
+
56
+ export const definition: ModelDefinition<typeof schema> = {
57
+ type: "model",
58
+ name: "sonauto",
59
+ description: "Sonauto model for text-to-music generation",
60
+ providers: ["fal"],
61
+ defaultProvider: "fal",
62
+ providerModels: {
63
+ fal: "fal-ai/sonauto/bark",
64
+ },
65
+ schema,
66
+ };
67
+
68
+ export default definition;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Higgsfield Soul image generation model
3
+ * Character-focused image generation
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { soulQualitySchema } from "../../core/schema/shared";
8
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
9
+
10
+ // Soul-specific dimension schema
11
+ const soulDimensionSchema = z.enum([
12
+ "SQUARE_1024x1024",
13
+ "PORTRAIT_1152x2048",
14
+ "LANDSCAPE_2048x1152",
15
+ ]);
16
+
17
+ // Soul-specific batch size schema
18
+ const soulBatchSizeSchema = z.union([z.literal(1), z.literal(2), z.literal(4)]);
19
+
20
+ // Input schema with Zod
21
+ const soulInputSchema = z.object({
22
+ prompt: z.string().describe("Character description"),
23
+ width_and_height: soulDimensionSchema
24
+ .default("PORTRAIT_1152x2048")
25
+ .describe("Output dimensions"),
26
+ quality: soulQualitySchema.default("HD").describe("Output quality"),
27
+ style_id: z.string().optional().describe("Style preset ID"),
28
+ batch_size: soulBatchSizeSchema
29
+ .default(1)
30
+ .describe("Number of images to generate"),
31
+ enhance_prompt: z.boolean().default(false).describe("Enhance prompt with AI"),
32
+ });
33
+
34
+ // Output schema with Zod
35
+ const soulOutputSchema = z.object({
36
+ jobs: z.array(
37
+ z.object({
38
+ results: z.object({
39
+ raw: z.object({
40
+ url: z.string(),
41
+ }),
42
+ }),
43
+ }),
44
+ ),
45
+ });
46
+
47
+ // Schema object for the definition
48
+ const schema: ZodSchema<typeof soulInputSchema, typeof soulOutputSchema> = {
49
+ input: soulInputSchema,
50
+ output: soulOutputSchema,
51
+ };
52
+
53
+ export const definition: ModelDefinition<typeof schema> = {
54
+ type: "model",
55
+ name: "soul",
56
+ description: "Higgsfield Soul model for character-focused image generation",
57
+ providers: ["higgsfield"],
58
+ defaultProvider: "higgsfield",
59
+ providerModels: {
60
+ higgsfield: "/v1/text2image/soul",
61
+ },
62
+ schema,
63
+ };
64
+
65
+ export default definition;
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Wan-25 lip sync model
3
+ * Audio-driven video generation with lip sync
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ resolutionSchema,
9
+ videoDurationStringSchema,
10
+ } from "../../core/schema/shared";
11
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
12
+
13
+ // Input schema with Zod
14
+ const wanInputSchema = z.object({
15
+ prompt: z.string().describe("Scene description"),
16
+ image_url: z.string().url().describe("Input image of the character"),
17
+ audio_url: z.string().url().describe("Audio file for lip sync"),
18
+ duration: videoDurationStringSchema
19
+ .default("5")
20
+ .describe("Video duration in seconds"),
21
+ resolution: resolutionSchema.default("480p").describe("Output resolution"),
22
+ negative_prompt: z
23
+ .string()
24
+ .optional()
25
+ .describe("What to avoid in generation"),
26
+ });
27
+
28
+ // Output schema with Zod
29
+ const wanOutputSchema = z.object({
30
+ video: z.object({
31
+ url: z.string(),
32
+ }),
33
+ });
34
+
35
+ // Schema object for the definition
36
+ const schema: ZodSchema<typeof wanInputSchema, typeof wanOutputSchema> = {
37
+ input: wanInputSchema,
38
+ output: wanOutputSchema,
39
+ };
40
+
41
+ export const definition: ModelDefinition<typeof schema> = {
42
+ type: "model",
43
+ name: "wan",
44
+ description: "Wan-25 model for audio-driven video generation with lip sync",
45
+ providers: ["fal", "replicate"],
46
+ defaultProvider: "fal",
47
+ providerModels: {
48
+ fal: "fal-ai/wan-25-preview/image-to-video",
49
+ replicate: "wan-video/wan-2.5-i2v",
50
+ },
51
+ schema,
52
+ };
53
+
54
+ export default definition;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Whisper transcription model
3
+ * Speech-to-text transcription
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import { filePathSchema } from "../../core/schema/shared";
8
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
9
+
10
+ // Input schema with Zod
11
+ const whisperInputSchema = z.object({
12
+ file: filePathSchema.describe("Audio file to transcribe"),
13
+ language: z.string().optional().describe("Language code (e.g., 'en', 'es')"),
14
+ prompt: z
15
+ .string()
16
+ .optional()
17
+ .describe("Optional prompt to guide transcription"),
18
+ temperature: z.number().default(0).describe("Sampling temperature"),
19
+ });
20
+
21
+ // Output schema with Zod
22
+ const whisperOutputSchema = z.string().describe("Transcribed text");
23
+
24
+ // Schema object for the definition
25
+ const schema: ZodSchema<typeof whisperInputSchema, typeof whisperOutputSchema> =
26
+ {
27
+ input: whisperInputSchema,
28
+ output: whisperOutputSchema,
29
+ };
30
+
31
+ export const definition: ModelDefinition<typeof schema> = {
32
+ type: "model",
33
+ name: "whisper",
34
+ description: "OpenAI Whisper model for speech-to-text transcription",
35
+ providers: ["groq", "fireworks"],
36
+ defaultProvider: "groq",
37
+ providerModels: {
38
+ groq: "whisper-large-v3",
39
+ fireworks: "whisper-v3-large",
40
+ },
41
+ schema,
42
+ };
43
+
44
+ export default definition;
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Skill definitions index
3
+ */
4
+
5
+ export { definition as talkingCharacter } from "./talking-character";
6
+ export { definition as textToTiktok } from "./text-to-tiktok";
7
+
8
+ // All skill definitions for auto-loading
9
+ import { definition as talkingCharacterDefinition } from "./talking-character";
10
+ import { definition as textToTiktokDefinition } from "./text-to-tiktok";
11
+
12
+ export const allSkills = [talkingCharacterDefinition, textToTiktokDefinition];
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Talking Character Skill
3
+ * Create a talking character video with lipsync and captions
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ captionStyleSchema,
9
+ simpleVoiceSchema,
10
+ videoDurationSchema,
11
+ } from "../../core/schema/shared";
12
+ import type { SkillDefinition, ZodSchema } from "../../core/schema/types";
13
+
14
+ // Input schema with Zod
15
+ const talkingCharacterInputSchema = z.object({
16
+ text: z.string().describe("Script/text for the character to say"),
17
+ characterPrompt: z
18
+ .string()
19
+ .default("professional headshot of a friendly person, studio lighting")
20
+ .describe("Prompt to generate the character"),
21
+ voice: simpleVoiceSchema.default("sam").describe("Voice to use for speech"),
22
+ duration: videoDurationSchema.default(5).describe("Video duration"),
23
+ style: captionStyleSchema.default("tiktok").describe("Caption style"),
24
+ });
25
+
26
+ // Output schema with Zod
27
+ const talkingCharacterOutputSchema = z.object({
28
+ videoUrl: z.string(),
29
+ characterImageUrl: z.string().optional(),
30
+ audioPath: z.string().optional(),
31
+ });
32
+
33
+ // Schema object for the definition
34
+ const schema: ZodSchema<
35
+ typeof talkingCharacterInputSchema,
36
+ typeof talkingCharacterOutputSchema
37
+ > = {
38
+ input: talkingCharacterInputSchema,
39
+ output: talkingCharacterOutputSchema,
40
+ };
41
+
42
+ export const definition: SkillDefinition<typeof schema> = {
43
+ type: "skill",
44
+ name: "talking-character",
45
+ description: "Create a talking character video with lipsync and captions",
46
+ schema,
47
+ steps: [
48
+ {
49
+ name: "generate-character",
50
+ run: "image",
51
+ inputs: {
52
+ prompt: "$inputs.characterPrompt",
53
+ provider: "higgsfield",
54
+ },
55
+ },
56
+ {
57
+ name: "generate-voice",
58
+ run: "voice",
59
+ inputs: {
60
+ text: "$inputs.text",
61
+ voice: "$inputs.voice",
62
+ output: "output/voiceover.mp3",
63
+ },
64
+ },
65
+ {
66
+ name: "animate-character",
67
+ run: "sync",
68
+ inputs: {
69
+ image: "$results.generate-character.imageUrl",
70
+ audio: "output/voiceover.mp3",
71
+ prompt: "person talking naturally, professional demeanor",
72
+ duration: "$inputs.duration",
73
+ },
74
+ },
75
+ {
76
+ name: "add-captions",
77
+ run: "captions",
78
+ inputs: {
79
+ video: "$results.animate-character.videoUrl",
80
+ output: "output/final.mp4",
81
+ style: "$inputs.style",
82
+ },
83
+ },
84
+ ],
85
+ };
86
+
87
+ export default definition;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Text to TikTok Skill
3
+ * Turn text into a TikTok with AI-generated looping background and voiceover
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import {
8
+ captionStyleSchema,
9
+ simpleVoiceSchema,
10
+ } from "../../core/schema/shared";
11
+ import type { SkillDefinition, ZodSchema } from "../../core/schema/types";
12
+
13
+ // Input schema with Zod
14
+ const textToTiktokInputSchema = z.object({
15
+ text: z.string().describe("Text content to convert to video"),
16
+ voice: simpleVoiceSchema.default("sam").describe("Voice for narration"),
17
+ backgroundPrompt: z
18
+ .string()
19
+ .default(
20
+ "POV from inside moving car driving through rainy city at night, motion blur on streetlights, cinematic",
21
+ )
22
+ .describe("Prompt for background video"),
23
+ captionStyle: captionStyleSchema.default("tiktok").describe("Caption style"),
24
+ });
25
+
26
+ // Output schema with Zod
27
+ const textToTiktokOutputSchema = z.object({
28
+ videoUrl: z.string(),
29
+ voiceoverPath: z.string().optional(),
30
+ captionsPath: z.string().optional(),
31
+ backgroundVideoUrl: z.string().optional(),
32
+ });
33
+
34
+ // Schema object for the definition
35
+ const schema: ZodSchema<
36
+ typeof textToTiktokInputSchema,
37
+ typeof textToTiktokOutputSchema
38
+ > = {
39
+ input: textToTiktokInputSchema,
40
+ output: textToTiktokOutputSchema,
41
+ };
42
+
43
+ export const definition: SkillDefinition<typeof schema> = {
44
+ type: "skill",
45
+ name: "text-to-tiktok",
46
+ description: "Turn text into a TikTok with looping background and voiceover",
47
+ schema,
48
+ steps: [
49
+ {
50
+ name: "generate-voiceover",
51
+ run: "voice",
52
+ inputs: {
53
+ text: "$inputs.text",
54
+ voice: "$inputs.voice",
55
+ output: "output/voiceover.mp3",
56
+ },
57
+ },
58
+ {
59
+ name: "transcribe",
60
+ run: "transcribe",
61
+ inputs: {
62
+ audio: "output/voiceover.mp3",
63
+ provider: "fireworks",
64
+ output: "output/captions.srt",
65
+ },
66
+ },
67
+ {
68
+ name: "generate-background-frame",
69
+ run: "image",
70
+ inputs: {
71
+ prompt: "$inputs.backgroundPrompt",
72
+ size: "portrait_16_9",
73
+ },
74
+ },
75
+ {
76
+ name: "generate-background-video",
77
+ run: "video",
78
+ inputs: {
79
+ prompt: "$inputs.backgroundPrompt",
80
+ image: "$results.generate-background-frame.imageUrl",
81
+ duration: 10,
82
+ },
83
+ },
84
+ {
85
+ name: "add-captions",
86
+ run: "captions",
87
+ inputs: {
88
+ video: "$results.generate-background-video.videoUrl",
89
+ output: "output/final.mp4",
90
+ srt: "output/captions.srt",
91
+ style: "$inputs.captionStyle",
92
+ },
93
+ },
94
+ ],
95
+ };
96
+
97
+ export default definition;
package/src/index.ts ADDED
@@ -0,0 +1,118 @@
1
+ /**
2
+ * varg.ai SDK
3
+ * AI video generation and editing tools
4
+ */
5
+
6
+ // Re-export external clients for convenience
7
+ export { fal } from "@ai-sdk/fal";
8
+ export { replicate } from "@ai-sdk/replicate";
9
+ export { fal as falClient } from "@fal-ai/client";
10
+ export { HiggsfieldClient } from "@higgsfield/client";
11
+ // Core exports
12
+ export * from "./core";
13
+ export type {
14
+ ActionDefinition,
15
+ Definition,
16
+ ExecutionResult,
17
+ InferInput,
18
+ InferOutput,
19
+ Job,
20
+ JobStatus,
21
+ JsonSchema,
22
+ ModelDefinition,
23
+ Provider,
24
+ ProviderConfig,
25
+ RunOptions,
26
+ SchemaProperty,
27
+ SkillDefinition,
28
+ VargConfig,
29
+ ZodSchema,
30
+ } from "./core/schema/types";
31
+ // Definition exports
32
+ export * from "./definitions";
33
+ export type {
34
+ FireworksResponse,
35
+ FireworksWord,
36
+ ProbeResult,
37
+ ProviderResult,
38
+ StorageConfig,
39
+ } from "./providers";
40
+ // Provider exports (excluding transcribeAudio to avoid conflict with definitions)
41
+
42
+ export {
43
+ addAudio,
44
+ // Base
45
+ BaseProvider,
46
+ BatchSize,
47
+ chatCompletion,
48
+ concatVideos,
49
+ convertFireworksToSRT,
50
+ convertFormat,
51
+ createSoulId,
52
+ downloadToFile,
53
+ // ElevenLabs
54
+ ElevenLabsProvider,
55
+ elevenlabsProvider,
56
+ ensureUrl,
57
+ extractAudio,
58
+ // Fal
59
+ FalProvider,
60
+ // FFmpeg
61
+ FFmpegProvider,
62
+ // Fireworks
63
+ FireworksProvider,
64
+ fadeVideo,
65
+ falProvider,
66
+ ffmpegProvider,
67
+ fireworksProvider,
68
+ GROQ_MODELS,
69
+ // Groq
70
+ GroqProvider,
71
+ generateImage,
72
+ generateMusicElevenlabs,
73
+ generatePresignedUrl,
74
+ generateSoul,
75
+ generateSoundEffect,
76
+ getExtension,
77
+ getPublicUrl,
78
+ getVideoDuration,
79
+ getVoice,
80
+ groqProvider,
81
+ // Higgsfield
82
+ HiggsfieldProvider,
83
+ higgsfieldProvider,
84
+ imageToImage,
85
+ imageToVideo,
86
+ listModels,
87
+ listSoulIds,
88
+ listSoulStyles,
89
+ listVoices,
90
+ MODELS,
91
+ ProviderRegistry,
92
+ probe,
93
+ providers,
94
+ // Replicate
95
+ ReplicateProvider,
96
+ replicateProvider,
97
+ resizeVideo,
98
+ runImage,
99
+ runModel,
100
+ runVideo,
101
+ SoulQuality,
102
+ SoulSize,
103
+ // Storage
104
+ StorageProvider,
105
+ splitAtTimestamps,
106
+ storageProvider,
107
+ textToMusic,
108
+ textToSpeech,
109
+ textToVideo,
110
+ transcribeWithFireworks,
111
+ trimVideo,
112
+ uploadBuffer,
113
+ uploadFile,
114
+ uploadFromUrl,
115
+ VOICES,
116
+ wan25,
117
+ xfadeVideos,
118
+ } from "./providers";