vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nano Banana Pro image generation model (Google Gemini 3 Pro Image)
|
|
3
|
+
* High-quality image generation and editing from text or images
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
8
|
+
|
|
9
|
+
// Nano Banana Pro resolution options
|
|
10
|
+
const nanoBananaResolutionSchema = z.enum(["1K", "2K", "4K"]);
|
|
11
|
+
|
|
12
|
+
// Nano Banana Pro aspect ratio options (fal.ai API does not support "auto")
|
|
13
|
+
const nanoBananaAspectRatioSchema = z.enum([
|
|
14
|
+
"21:9",
|
|
15
|
+
"16:9",
|
|
16
|
+
"3:2",
|
|
17
|
+
"4:3",
|
|
18
|
+
"5:4",
|
|
19
|
+
"1:1",
|
|
20
|
+
"4:5",
|
|
21
|
+
"3:4",
|
|
22
|
+
"2:3",
|
|
23
|
+
"9:16",
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
// Output format options
|
|
27
|
+
const nanoBananaOutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
|
|
28
|
+
|
|
29
|
+
// Safety filter level options
|
|
30
|
+
const nanoBananaSafetyFilterSchema = z.enum([
|
|
31
|
+
"block_only_high",
|
|
32
|
+
"block_medium_and_above",
|
|
33
|
+
"block_low_and_above",
|
|
34
|
+
"block_none",
|
|
35
|
+
]);
|
|
36
|
+
|
|
37
|
+
// Input schema with Zod
|
|
38
|
+
const nanoBananaProInputSchema = z.object({
|
|
39
|
+
prompt: z.string().describe("Text description for generation or editing"),
|
|
40
|
+
image_urls: z
|
|
41
|
+
.array(z.string().url())
|
|
42
|
+
.optional()
|
|
43
|
+
.describe(
|
|
44
|
+
"Input image URLs for image-to-image editing (up to 14 images). If omitted, generates new image from prompt.",
|
|
45
|
+
),
|
|
46
|
+
resolution: nanoBananaResolutionSchema
|
|
47
|
+
.default("1K")
|
|
48
|
+
.describe("Output resolution: 1K (1024px), 2K (2048px), or 4K"),
|
|
49
|
+
aspect_ratio: nanoBananaAspectRatioSchema
|
|
50
|
+
.default("1:1")
|
|
51
|
+
.describe("Output aspect ratio"),
|
|
52
|
+
output_format: nanoBananaOutputFormatSchema
|
|
53
|
+
.default("png")
|
|
54
|
+
.describe("Output image format"),
|
|
55
|
+
safety_filter_level: nanoBananaSafetyFilterSchema
|
|
56
|
+
.default("block_only_high")
|
|
57
|
+
.describe("Safety filter strictness level"),
|
|
58
|
+
num_images: z
|
|
59
|
+
.number()
|
|
60
|
+
.int()
|
|
61
|
+
.min(1)
|
|
62
|
+
.max(4)
|
|
63
|
+
.default(1)
|
|
64
|
+
.describe("Number of images to generate (1-4)"),
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// Output schema with Zod
|
|
68
|
+
const nanoBananaProOutputSchema = z.object({
|
|
69
|
+
images: z.array(
|
|
70
|
+
z.object({
|
|
71
|
+
url: z.string(),
|
|
72
|
+
file_name: z.string().optional(),
|
|
73
|
+
content_type: z.string().optional(),
|
|
74
|
+
}),
|
|
75
|
+
),
|
|
76
|
+
description: z.string().optional(),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Schema object for the definition
|
|
80
|
+
const schema: ZodSchema<
|
|
81
|
+
typeof nanoBananaProInputSchema,
|
|
82
|
+
typeof nanoBananaProOutputSchema
|
|
83
|
+
> = {
|
|
84
|
+
input: nanoBananaProInputSchema,
|
|
85
|
+
output: nanoBananaProOutputSchema,
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
89
|
+
type: "model",
|
|
90
|
+
name: "nano-banana-pro",
|
|
91
|
+
description:
|
|
92
|
+
"Google Nano Banana Pro (Gemini 3 Pro Image) for text-to-image generation and image editing. Provide image_urls for editing, omit for generation.",
|
|
93
|
+
providers: ["fal", "replicate"],
|
|
94
|
+
defaultProvider: "fal",
|
|
95
|
+
providerModels: {
|
|
96
|
+
fal: "fal-ai/nano-banana-pro",
|
|
97
|
+
replicate: "google/nano-banana-pro",
|
|
98
|
+
},
|
|
99
|
+
schema,
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
export default definition;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sonauto music generation model
|
|
3
|
+
* Text-to-music generation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { audioFormatSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Input schema with Zod
|
|
11
|
+
const sonautoInputSchema = z.object({
|
|
12
|
+
prompt: z.string().optional().describe("Music description"),
|
|
13
|
+
tags: z.array(z.string()).optional().describe("Style tags"),
|
|
14
|
+
lyrics_prompt: z.string().optional().describe("Lyrics to generate"),
|
|
15
|
+
num_songs: z
|
|
16
|
+
.union([z.literal(1), z.literal(2)])
|
|
17
|
+
.default(1)
|
|
18
|
+
.describe("Number of songs"),
|
|
19
|
+
output_format: audioFormatSchema.default("mp3").describe("Output format"),
|
|
20
|
+
bpm: z
|
|
21
|
+
.union([z.number(), z.literal("auto")])
|
|
22
|
+
.default("auto")
|
|
23
|
+
.describe("Beats per minute"),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Output schema with Zod
|
|
27
|
+
const sonautoOutputSchema = z.object({
|
|
28
|
+
seed: z.number(),
|
|
29
|
+
tags: z.array(z.string()).optional(),
|
|
30
|
+
lyrics: z.string().optional(),
|
|
31
|
+
audio: z.union([
|
|
32
|
+
z.array(
|
|
33
|
+
z.object({
|
|
34
|
+
url: z.string(),
|
|
35
|
+
file_name: z.string(),
|
|
36
|
+
content_type: z.string(),
|
|
37
|
+
file_size: z.number(),
|
|
38
|
+
}),
|
|
39
|
+
),
|
|
40
|
+
z.object({
|
|
41
|
+
url: z.string(),
|
|
42
|
+
file_name: z.string(),
|
|
43
|
+
content_type: z.string(),
|
|
44
|
+
file_size: z.number(),
|
|
45
|
+
}),
|
|
46
|
+
]),
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
// Schema object for the definition
|
|
50
|
+
const schema: ZodSchema<typeof sonautoInputSchema, typeof sonautoOutputSchema> =
|
|
51
|
+
{
|
|
52
|
+
input: sonautoInputSchema,
|
|
53
|
+
output: sonautoOutputSchema,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
57
|
+
type: "model",
|
|
58
|
+
name: "sonauto",
|
|
59
|
+
description: "Sonauto model for text-to-music generation",
|
|
60
|
+
providers: ["fal"],
|
|
61
|
+
defaultProvider: "fal",
|
|
62
|
+
providerModels: {
|
|
63
|
+
fal: "fal-ai/sonauto/bark",
|
|
64
|
+
},
|
|
65
|
+
schema,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
export default definition;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Higgsfield Soul image generation model
|
|
3
|
+
* Character-focused image generation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { soulQualitySchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Soul-specific dimension schema
|
|
11
|
+
const soulDimensionSchema = z.enum([
|
|
12
|
+
"SQUARE_1024x1024",
|
|
13
|
+
"PORTRAIT_1152x2048",
|
|
14
|
+
"LANDSCAPE_2048x1152",
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
// Soul-specific batch size schema
|
|
18
|
+
const soulBatchSizeSchema = z.union([z.literal(1), z.literal(2), z.literal(4)]);
|
|
19
|
+
|
|
20
|
+
// Input schema with Zod
|
|
21
|
+
const soulInputSchema = z.object({
|
|
22
|
+
prompt: z.string().describe("Character description"),
|
|
23
|
+
width_and_height: soulDimensionSchema
|
|
24
|
+
.default("PORTRAIT_1152x2048")
|
|
25
|
+
.describe("Output dimensions"),
|
|
26
|
+
quality: soulQualitySchema.default("HD").describe("Output quality"),
|
|
27
|
+
style_id: z.string().optional().describe("Style preset ID"),
|
|
28
|
+
batch_size: soulBatchSizeSchema
|
|
29
|
+
.default(1)
|
|
30
|
+
.describe("Number of images to generate"),
|
|
31
|
+
enhance_prompt: z.boolean().default(false).describe("Enhance prompt with AI"),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Output schema with Zod
|
|
35
|
+
const soulOutputSchema = z.object({
|
|
36
|
+
jobs: z.array(
|
|
37
|
+
z.object({
|
|
38
|
+
results: z.object({
|
|
39
|
+
raw: z.object({
|
|
40
|
+
url: z.string(),
|
|
41
|
+
}),
|
|
42
|
+
}),
|
|
43
|
+
}),
|
|
44
|
+
),
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Schema object for the definition
|
|
48
|
+
const schema: ZodSchema<typeof soulInputSchema, typeof soulOutputSchema> = {
|
|
49
|
+
input: soulInputSchema,
|
|
50
|
+
output: soulOutputSchema,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
54
|
+
type: "model",
|
|
55
|
+
name: "soul",
|
|
56
|
+
description: "Higgsfield Soul model for character-focused image generation",
|
|
57
|
+
providers: ["higgsfield"],
|
|
58
|
+
defaultProvider: "higgsfield",
|
|
59
|
+
providerModels: {
|
|
60
|
+
higgsfield: "/v1/text2image/soul",
|
|
61
|
+
},
|
|
62
|
+
schema,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
export default definition;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wan-25 lip sync model
|
|
3
|
+
* Audio-driven video generation with lip sync
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
resolutionSchema,
|
|
9
|
+
videoDurationStringSchema,
|
|
10
|
+
} from "../../core/schema/shared";
|
|
11
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const wanInputSchema = z.object({
|
|
15
|
+
prompt: z.string().describe("Scene description"),
|
|
16
|
+
image_url: z.string().url().describe("Input image of the character"),
|
|
17
|
+
audio_url: z.string().url().describe("Audio file for lip sync"),
|
|
18
|
+
duration: videoDurationStringSchema
|
|
19
|
+
.default("5")
|
|
20
|
+
.describe("Video duration in seconds"),
|
|
21
|
+
resolution: resolutionSchema.default("480p").describe("Output resolution"),
|
|
22
|
+
negative_prompt: z
|
|
23
|
+
.string()
|
|
24
|
+
.optional()
|
|
25
|
+
.describe("What to avoid in generation"),
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// Output schema with Zod
|
|
29
|
+
const wanOutputSchema = z.object({
|
|
30
|
+
video: z.object({
|
|
31
|
+
url: z.string(),
|
|
32
|
+
}),
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Schema object for the definition
|
|
36
|
+
const schema: ZodSchema<typeof wanInputSchema, typeof wanOutputSchema> = {
|
|
37
|
+
input: wanInputSchema,
|
|
38
|
+
output: wanOutputSchema,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
42
|
+
type: "model",
|
|
43
|
+
name: "wan",
|
|
44
|
+
description: "Wan-25 model for audio-driven video generation with lip sync",
|
|
45
|
+
providers: ["fal", "replicate"],
|
|
46
|
+
defaultProvider: "fal",
|
|
47
|
+
providerModels: {
|
|
48
|
+
fal: "fal-ai/wan-25-preview/image-to-video",
|
|
49
|
+
replicate: "wan-video/wan-2.5-i2v",
|
|
50
|
+
},
|
|
51
|
+
schema,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export default definition;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Whisper transcription model
|
|
3
|
+
* Speech-to-text transcription
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { filePathSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Input schema with Zod
|
|
11
|
+
const whisperInputSchema = z.object({
|
|
12
|
+
file: filePathSchema.describe("Audio file to transcribe"),
|
|
13
|
+
language: z.string().optional().describe("Language code (e.g., 'en', 'es')"),
|
|
14
|
+
prompt: z
|
|
15
|
+
.string()
|
|
16
|
+
.optional()
|
|
17
|
+
.describe("Optional prompt to guide transcription"),
|
|
18
|
+
temperature: z.number().default(0).describe("Sampling temperature"),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// Output schema with Zod
|
|
22
|
+
const whisperOutputSchema = z.string().describe("Transcribed text");
|
|
23
|
+
|
|
24
|
+
// Schema object for the definition
|
|
25
|
+
const schema: ZodSchema<typeof whisperInputSchema, typeof whisperOutputSchema> =
|
|
26
|
+
{
|
|
27
|
+
input: whisperInputSchema,
|
|
28
|
+
output: whisperOutputSchema,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
32
|
+
type: "model",
|
|
33
|
+
name: "whisper",
|
|
34
|
+
description: "OpenAI Whisper model for speech-to-text transcription",
|
|
35
|
+
providers: ["groq", "fireworks"],
|
|
36
|
+
defaultProvider: "groq",
|
|
37
|
+
providerModels: {
|
|
38
|
+
groq: "whisper-large-v3",
|
|
39
|
+
fireworks: "whisper-v3-large",
|
|
40
|
+
},
|
|
41
|
+
schema,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export default definition;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill definitions index
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export { definition as talkingCharacter } from "./talking-character";
|
|
6
|
+
export { definition as textToTiktok } from "./text-to-tiktok";
|
|
7
|
+
|
|
8
|
+
// All skill definitions for auto-loading
|
|
9
|
+
import { definition as talkingCharacterDefinition } from "./talking-character";
|
|
10
|
+
import { definition as textToTiktokDefinition } from "./text-to-tiktok";
|
|
11
|
+
|
|
12
|
+
export const allSkills = [talkingCharacterDefinition, textToTiktokDefinition];
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Talking Character Skill
|
|
3
|
+
* Create a talking character video with lipsync and captions
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
captionStyleSchema,
|
|
9
|
+
simpleVoiceSchema,
|
|
10
|
+
videoDurationSchema,
|
|
11
|
+
} from "../../core/schema/shared";
|
|
12
|
+
import type { SkillDefinition, ZodSchema } from "../../core/schema/types";
|
|
13
|
+
|
|
14
|
+
// Input schema with Zod
|
|
15
|
+
const talkingCharacterInputSchema = z.object({
|
|
16
|
+
text: z.string().describe("Script/text for the character to say"),
|
|
17
|
+
characterPrompt: z
|
|
18
|
+
.string()
|
|
19
|
+
.default("professional headshot of a friendly person, studio lighting")
|
|
20
|
+
.describe("Prompt to generate the character"),
|
|
21
|
+
voice: simpleVoiceSchema.default("sam").describe("Voice to use for speech"),
|
|
22
|
+
duration: videoDurationSchema.default(5).describe("Video duration"),
|
|
23
|
+
style: captionStyleSchema.default("tiktok").describe("Caption style"),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Output schema with Zod
|
|
27
|
+
const talkingCharacterOutputSchema = z.object({
|
|
28
|
+
videoUrl: z.string(),
|
|
29
|
+
characterImageUrl: z.string().optional(),
|
|
30
|
+
audioPath: z.string().optional(),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// Schema object for the definition
|
|
34
|
+
const schema: ZodSchema<
|
|
35
|
+
typeof talkingCharacterInputSchema,
|
|
36
|
+
typeof talkingCharacterOutputSchema
|
|
37
|
+
> = {
|
|
38
|
+
input: talkingCharacterInputSchema,
|
|
39
|
+
output: talkingCharacterOutputSchema,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const definition: SkillDefinition<typeof schema> = {
|
|
43
|
+
type: "skill",
|
|
44
|
+
name: "talking-character",
|
|
45
|
+
description: "Create a talking character video with lipsync and captions",
|
|
46
|
+
schema,
|
|
47
|
+
steps: [
|
|
48
|
+
{
|
|
49
|
+
name: "generate-character",
|
|
50
|
+
run: "image",
|
|
51
|
+
inputs: {
|
|
52
|
+
prompt: "$inputs.characterPrompt",
|
|
53
|
+
provider: "higgsfield",
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "generate-voice",
|
|
58
|
+
run: "voice",
|
|
59
|
+
inputs: {
|
|
60
|
+
text: "$inputs.text",
|
|
61
|
+
voice: "$inputs.voice",
|
|
62
|
+
output: "output/voiceover.mp3",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "animate-character",
|
|
67
|
+
run: "sync",
|
|
68
|
+
inputs: {
|
|
69
|
+
image: "$results.generate-character.imageUrl",
|
|
70
|
+
audio: "output/voiceover.mp3",
|
|
71
|
+
prompt: "person talking naturally, professional demeanor",
|
|
72
|
+
duration: "$inputs.duration",
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "add-captions",
|
|
77
|
+
run: "captions",
|
|
78
|
+
inputs: {
|
|
79
|
+
video: "$results.animate-character.videoUrl",
|
|
80
|
+
output: "output/final.mp4",
|
|
81
|
+
style: "$inputs.style",
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
export default definition;
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text to TikTok Skill
|
|
3
|
+
* Turn text into a TikTok with AI-generated looping background and voiceover
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
captionStyleSchema,
|
|
9
|
+
simpleVoiceSchema,
|
|
10
|
+
} from "../../core/schema/shared";
|
|
11
|
+
import type { SkillDefinition, ZodSchema } from "../../core/schema/types";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const textToTiktokInputSchema = z.object({
|
|
15
|
+
text: z.string().describe("Text content to convert to video"),
|
|
16
|
+
voice: simpleVoiceSchema.default("sam").describe("Voice for narration"),
|
|
17
|
+
backgroundPrompt: z
|
|
18
|
+
.string()
|
|
19
|
+
.default(
|
|
20
|
+
"POV from inside moving car driving through rainy city at night, motion blur on streetlights, cinematic",
|
|
21
|
+
)
|
|
22
|
+
.describe("Prompt for background video"),
|
|
23
|
+
captionStyle: captionStyleSchema.default("tiktok").describe("Caption style"),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Output schema with Zod
|
|
27
|
+
const textToTiktokOutputSchema = z.object({
|
|
28
|
+
videoUrl: z.string(),
|
|
29
|
+
voiceoverPath: z.string().optional(),
|
|
30
|
+
captionsPath: z.string().optional(),
|
|
31
|
+
backgroundVideoUrl: z.string().optional(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Schema object for the definition
|
|
35
|
+
const schema: ZodSchema<
|
|
36
|
+
typeof textToTiktokInputSchema,
|
|
37
|
+
typeof textToTiktokOutputSchema
|
|
38
|
+
> = {
|
|
39
|
+
input: textToTiktokInputSchema,
|
|
40
|
+
output: textToTiktokOutputSchema,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export const definition: SkillDefinition<typeof schema> = {
|
|
44
|
+
type: "skill",
|
|
45
|
+
name: "text-to-tiktok",
|
|
46
|
+
description: "Turn text into a TikTok with looping background and voiceover",
|
|
47
|
+
schema,
|
|
48
|
+
steps: [
|
|
49
|
+
{
|
|
50
|
+
name: "generate-voiceover",
|
|
51
|
+
run: "voice",
|
|
52
|
+
inputs: {
|
|
53
|
+
text: "$inputs.text",
|
|
54
|
+
voice: "$inputs.voice",
|
|
55
|
+
output: "output/voiceover.mp3",
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: "transcribe",
|
|
60
|
+
run: "transcribe",
|
|
61
|
+
inputs: {
|
|
62
|
+
audio: "output/voiceover.mp3",
|
|
63
|
+
provider: "fireworks",
|
|
64
|
+
output: "output/captions.srt",
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
name: "generate-background-frame",
|
|
69
|
+
run: "image",
|
|
70
|
+
inputs: {
|
|
71
|
+
prompt: "$inputs.backgroundPrompt",
|
|
72
|
+
size: "portrait_16_9",
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "generate-background-video",
|
|
77
|
+
run: "video",
|
|
78
|
+
inputs: {
|
|
79
|
+
prompt: "$inputs.backgroundPrompt",
|
|
80
|
+
image: "$results.generate-background-frame.imageUrl",
|
|
81
|
+
duration: 10,
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
name: "add-captions",
|
|
86
|
+
run: "captions",
|
|
87
|
+
inputs: {
|
|
88
|
+
video: "$results.generate-background-video.videoUrl",
|
|
89
|
+
output: "output/final.mp4",
|
|
90
|
+
srt: "output/captions.srt",
|
|
91
|
+
style: "$inputs.captionStyle",
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
export default definition;
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* varg.ai SDK
|
|
3
|
+
* AI video generation and editing tools
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Re-export external clients for convenience
|
|
7
|
+
export { fal } from "@ai-sdk/fal";
|
|
8
|
+
export { replicate } from "@ai-sdk/replicate";
|
|
9
|
+
export { fal as falClient } from "@fal-ai/client";
|
|
10
|
+
export { HiggsfieldClient } from "@higgsfield/client";
|
|
11
|
+
// Core exports
|
|
12
|
+
export * from "./core";
|
|
13
|
+
export type {
|
|
14
|
+
ActionDefinition,
|
|
15
|
+
Definition,
|
|
16
|
+
ExecutionResult,
|
|
17
|
+
InferInput,
|
|
18
|
+
InferOutput,
|
|
19
|
+
Job,
|
|
20
|
+
JobStatus,
|
|
21
|
+
JsonSchema,
|
|
22
|
+
ModelDefinition,
|
|
23
|
+
Provider,
|
|
24
|
+
ProviderConfig,
|
|
25
|
+
RunOptions,
|
|
26
|
+
SchemaProperty,
|
|
27
|
+
SkillDefinition,
|
|
28
|
+
VargConfig,
|
|
29
|
+
ZodSchema,
|
|
30
|
+
} from "./core/schema/types";
|
|
31
|
+
// Definition exports
|
|
32
|
+
export * from "./definitions";
|
|
33
|
+
export type {
|
|
34
|
+
FireworksResponse,
|
|
35
|
+
FireworksWord,
|
|
36
|
+
ProbeResult,
|
|
37
|
+
ProviderResult,
|
|
38
|
+
StorageConfig,
|
|
39
|
+
} from "./providers";
|
|
40
|
+
// Provider exports (excluding transcribeAudio to avoid conflict with definitions)
|
|
41
|
+
|
|
42
|
+
export {
|
|
43
|
+
addAudio,
|
|
44
|
+
// Base
|
|
45
|
+
BaseProvider,
|
|
46
|
+
BatchSize,
|
|
47
|
+
chatCompletion,
|
|
48
|
+
concatVideos,
|
|
49
|
+
convertFireworksToSRT,
|
|
50
|
+
convertFormat,
|
|
51
|
+
createSoulId,
|
|
52
|
+
downloadToFile,
|
|
53
|
+
// ElevenLabs
|
|
54
|
+
ElevenLabsProvider,
|
|
55
|
+
elevenlabsProvider,
|
|
56
|
+
ensureUrl,
|
|
57
|
+
extractAudio,
|
|
58
|
+
// Fal
|
|
59
|
+
FalProvider,
|
|
60
|
+
// FFmpeg
|
|
61
|
+
FFmpegProvider,
|
|
62
|
+
// Fireworks
|
|
63
|
+
FireworksProvider,
|
|
64
|
+
fadeVideo,
|
|
65
|
+
falProvider,
|
|
66
|
+
ffmpegProvider,
|
|
67
|
+
fireworksProvider,
|
|
68
|
+
GROQ_MODELS,
|
|
69
|
+
// Groq
|
|
70
|
+
GroqProvider,
|
|
71
|
+
generateImage,
|
|
72
|
+
generateMusicElevenlabs,
|
|
73
|
+
generatePresignedUrl,
|
|
74
|
+
generateSoul,
|
|
75
|
+
generateSoundEffect,
|
|
76
|
+
getExtension,
|
|
77
|
+
getPublicUrl,
|
|
78
|
+
getVideoDuration,
|
|
79
|
+
getVoice,
|
|
80
|
+
groqProvider,
|
|
81
|
+
// Higgsfield
|
|
82
|
+
HiggsfieldProvider,
|
|
83
|
+
higgsfieldProvider,
|
|
84
|
+
imageToImage,
|
|
85
|
+
imageToVideo,
|
|
86
|
+
listModels,
|
|
87
|
+
listSoulIds,
|
|
88
|
+
listSoulStyles,
|
|
89
|
+
listVoices,
|
|
90
|
+
MODELS,
|
|
91
|
+
ProviderRegistry,
|
|
92
|
+
probe,
|
|
93
|
+
providers,
|
|
94
|
+
// Replicate
|
|
95
|
+
ReplicateProvider,
|
|
96
|
+
replicateProvider,
|
|
97
|
+
resizeVideo,
|
|
98
|
+
runImage,
|
|
99
|
+
runModel,
|
|
100
|
+
runVideo,
|
|
101
|
+
SoulQuality,
|
|
102
|
+
SoulSize,
|
|
103
|
+
// Storage
|
|
104
|
+
StorageProvider,
|
|
105
|
+
splitAtTimestamps,
|
|
106
|
+
storageProvider,
|
|
107
|
+
textToMusic,
|
|
108
|
+
textToSpeech,
|
|
109
|
+
textToVideo,
|
|
110
|
+
transcribeWithFireworks,
|
|
111
|
+
trimVideo,
|
|
112
|
+
uploadBuffer,
|
|
113
|
+
uploadFile,
|
|
114
|
+
uploadFromUrl,
|
|
115
|
+
VOICES,
|
|
116
|
+
wan25,
|
|
117
|
+
xfadeVideos,
|
|
118
|
+
} from "./providers";
|