vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Upload action
|
|
3
|
+
* Upload files to S3/R2 storage
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { existsSync } from "node:fs";
|
|
7
|
+
import { basename, extname } from "node:path";
|
|
8
|
+
import { z } from "zod";
|
|
9
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
10
|
+
import { storageProvider } from "../../providers/storage";
|
|
11
|
+
|
|
12
|
+
// Input schema
|
|
13
|
+
const uploadInputSchema = z.object({
|
|
14
|
+
file: z.string().describe("Local file path or URL to upload"),
|
|
15
|
+
key: z
|
|
16
|
+
.string()
|
|
17
|
+
.optional()
|
|
18
|
+
.describe("Object key/path in storage (auto-generated if not provided)"),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// Output schema
|
|
22
|
+
const uploadOutputSchema = z.object({
|
|
23
|
+
url: z.string().describe("Public URL of the uploaded file"),
|
|
24
|
+
key: z.string().describe("Object key in storage"),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// Schema object for the definition
|
|
28
|
+
const schema: ZodSchema<typeof uploadInputSchema, typeof uploadOutputSchema> = {
|
|
29
|
+
input: uploadInputSchema,
|
|
30
|
+
output: uploadOutputSchema,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export interface UploadOptions {
|
|
34
|
+
key?: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface UploadResult {
|
|
38
|
+
url: string;
|
|
39
|
+
key: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Generate a unique object key based on file info
|
|
44
|
+
*/
|
|
45
|
+
function generateObjectKey(source: string): string {
|
|
46
|
+
const timestamp = Date.now();
|
|
47
|
+
const random = Math.random().toString(36).slice(2, 8);
|
|
48
|
+
|
|
49
|
+
// Extract extension from source
|
|
50
|
+
let ext = extname(source);
|
|
51
|
+
if (!ext) {
|
|
52
|
+
// Try to guess from URL or default to .bin
|
|
53
|
+
if (source.includes(".")) {
|
|
54
|
+
const parts = source.split(".");
|
|
55
|
+
ext = `.${parts[parts.length - 1]?.split("?")[0] || "bin"}`;
|
|
56
|
+
} else {
|
|
57
|
+
ext = ".bin";
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const name = basename(source, ext).slice(0, 20) || "file";
|
|
62
|
+
return `uploads/${timestamp}-${random}-${name}${ext}`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Check if a string is a URL
|
|
67
|
+
*/
|
|
68
|
+
function isUrl(str: string): boolean {
|
|
69
|
+
return str.startsWith("http://") || str.startsWith("https://");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Upload a file to storage
|
|
74
|
+
*/
|
|
75
|
+
export async function upload(
|
|
76
|
+
file: string,
|
|
77
|
+
options: UploadOptions = {},
|
|
78
|
+
): Promise<UploadResult> {
|
|
79
|
+
const key = options.key || generateObjectKey(file);
|
|
80
|
+
|
|
81
|
+
if (isUrl(file)) {
|
|
82
|
+
console.log(`[upload] uploading from URL: ${file}`);
|
|
83
|
+
const url = await storageProvider.uploadFromUrl(file, key);
|
|
84
|
+
console.log(`[upload] uploaded to ${url}`);
|
|
85
|
+
return { url, key };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Local file
|
|
89
|
+
if (!existsSync(file)) {
|
|
90
|
+
throw new Error(`File not found: ${file}`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
console.log(`[upload] uploading local file: ${file}`);
|
|
94
|
+
const url = await storageProvider.uploadLocalFile(file, key);
|
|
95
|
+
console.log(`[upload] uploaded to ${url}`);
|
|
96
|
+
return { url, key };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
100
|
+
type: "action",
|
|
101
|
+
name: "upload",
|
|
102
|
+
description: "Upload file to S3/R2 storage",
|
|
103
|
+
schema,
|
|
104
|
+
routes: [],
|
|
105
|
+
execute: async (inputs) => {
|
|
106
|
+
const { file, key } = inputs;
|
|
107
|
+
return upload(file, { key });
|
|
108
|
+
},
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
export default definition;
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video generation action
|
|
3
|
+
* Routes to appropriate video generation models based on input
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
aspectRatioSchema,
|
|
9
|
+
filePathSchema,
|
|
10
|
+
videoDurationSchema,
|
|
11
|
+
} from "../../core/schema/shared";
|
|
12
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
13
|
+
import { falProvider } from "../../providers/fal";
|
|
14
|
+
import { storageProvider } from "../../providers/storage";
|
|
15
|
+
|
|
16
|
+
// Input schema with Zod
|
|
17
|
+
const videoInputSchema = z.object({
|
|
18
|
+
prompt: z.string().describe("What to generate"),
|
|
19
|
+
image: filePathSchema
|
|
20
|
+
.optional()
|
|
21
|
+
.describe("Input image (enables image-to-video)"),
|
|
22
|
+
duration: videoDurationSchema
|
|
23
|
+
.default(5)
|
|
24
|
+
.describe("Video duration in seconds"),
|
|
25
|
+
aspectRatio: aspectRatioSchema
|
|
26
|
+
.default("16:9")
|
|
27
|
+
.describe("Aspect ratio for text-to-video"),
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// Output schema with Zod
|
|
31
|
+
const videoOutputSchema = z.object({
|
|
32
|
+
videoUrl: z.string(),
|
|
33
|
+
duration: z.number().optional(),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// Schema object for the definition
|
|
37
|
+
const schema: ZodSchema<typeof videoInputSchema, typeof videoOutputSchema> = {
|
|
38
|
+
input: videoInputSchema,
|
|
39
|
+
output: videoOutputSchema,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
43
|
+
type: "action",
|
|
44
|
+
name: "video",
|
|
45
|
+
description: "Generate video from text or image",
|
|
46
|
+
schema,
|
|
47
|
+
routes: [
|
|
48
|
+
{
|
|
49
|
+
target: "kling",
|
|
50
|
+
priority: 10,
|
|
51
|
+
},
|
|
52
|
+
],
|
|
53
|
+
execute: async (inputs) => {
|
|
54
|
+
// inputs is now fully typed as VideoInput - no more `as` cast!
|
|
55
|
+
const { prompt, image, duration, aspectRatio } = inputs;
|
|
56
|
+
|
|
57
|
+
let result: { data?: { video?: { url?: string }; duration?: number } };
|
|
58
|
+
|
|
59
|
+
if (image) {
|
|
60
|
+
console.log("[action/video] generating video from image");
|
|
61
|
+
result = await falProvider.imageToVideo({
|
|
62
|
+
prompt,
|
|
63
|
+
imageUrl: image,
|
|
64
|
+
duration,
|
|
65
|
+
});
|
|
66
|
+
} else {
|
|
67
|
+
console.log("[action/video] generating video from text");
|
|
68
|
+
result = await falProvider.textToVideo({
|
|
69
|
+
prompt,
|
|
70
|
+
duration,
|
|
71
|
+
aspectRatio,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const videoUrl = result.data?.video?.url;
|
|
76
|
+
if (!videoUrl) {
|
|
77
|
+
throw new Error("No video URL in result");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
videoUrl,
|
|
82
|
+
duration: result.data?.duration,
|
|
83
|
+
};
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Re-export types and functions for backward compatibility
|
|
88
|
+
export interface VideoGenerationResult {
|
|
89
|
+
videoUrl: string;
|
|
90
|
+
duration?: number;
|
|
91
|
+
uploaded?: string;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export async function generateVideoFromImage(
|
|
95
|
+
prompt: string,
|
|
96
|
+
imageUrl: string,
|
|
97
|
+
options: { duration?: 5 | 10; upload?: boolean } = {},
|
|
98
|
+
): Promise<VideoGenerationResult> {
|
|
99
|
+
console.log("[video] generating video from image");
|
|
100
|
+
|
|
101
|
+
const result = await falProvider.imageToVideo({
|
|
102
|
+
prompt,
|
|
103
|
+
imageUrl,
|
|
104
|
+
duration: options.duration,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const videoUrl = result.data?.video?.url;
|
|
108
|
+
if (!videoUrl) {
|
|
109
|
+
throw new Error("No video URL in result");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
let uploaded: string | undefined;
|
|
113
|
+
if (options.upload) {
|
|
114
|
+
const timestamp = Date.now();
|
|
115
|
+
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
116
|
+
uploaded = await storageProvider.uploadFromUrl(videoUrl, objectKey);
|
|
117
|
+
console.log(`[video] uploaded to ${uploaded}`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
videoUrl,
|
|
122
|
+
duration: result.data?.duration,
|
|
123
|
+
uploaded,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export async function generateVideoFromText(
|
|
128
|
+
prompt: string,
|
|
129
|
+
options: {
|
|
130
|
+
duration?: 5 | 10;
|
|
131
|
+
upload?: boolean;
|
|
132
|
+
aspectRatio?: "16:9" | "9:16" | "1:1";
|
|
133
|
+
} = {},
|
|
134
|
+
): Promise<VideoGenerationResult> {
|
|
135
|
+
console.log("[video] generating video from text");
|
|
136
|
+
|
|
137
|
+
const result = await falProvider.textToVideo({
|
|
138
|
+
prompt,
|
|
139
|
+
duration: options.duration,
|
|
140
|
+
aspectRatio: options.aspectRatio,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const videoUrl = result.data?.video?.url;
|
|
144
|
+
if (!videoUrl) {
|
|
145
|
+
throw new Error("No video URL in result");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
let uploaded: string | undefined;
|
|
149
|
+
if (options.upload) {
|
|
150
|
+
const timestamp = Date.now();
|
|
151
|
+
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
152
|
+
uploaded = await storageProvider.uploadFromUrl(videoUrl, objectKey);
|
|
153
|
+
console.log(`[video] uploaded to ${uploaded}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
videoUrl,
|
|
158
|
+
duration: result.data?.duration,
|
|
159
|
+
uploaded,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export default definition;
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice generation action
|
|
3
|
+
* Text-to-speech via ElevenLabs
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { filePathSchema, voiceNameSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
import { elevenlabsProvider, VOICES } from "../../providers/elevenlabs";
|
|
10
|
+
import { storageProvider } from "../../providers/storage";
|
|
11
|
+
|
|
12
|
+
// Input schema with Zod
|
|
13
|
+
const voiceInputSchema = z.object({
|
|
14
|
+
text: z.string().describe("Text to convert to speech"),
|
|
15
|
+
voice: voiceNameSchema.default("rachel").describe("Voice to use"),
|
|
16
|
+
output: filePathSchema.optional().describe("Output file path"),
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// Output schema with Zod
|
|
20
|
+
const voiceOutputSchema = z.object({
|
|
21
|
+
audio: z.instanceof(Buffer),
|
|
22
|
+
provider: z.string(),
|
|
23
|
+
voiceId: z.string(),
|
|
24
|
+
uploadUrl: z.string().optional(),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// Schema object for the definition
|
|
28
|
+
const schema: ZodSchema<typeof voiceInputSchema, typeof voiceOutputSchema> = {
|
|
29
|
+
input: voiceInputSchema,
|
|
30
|
+
output: voiceOutputSchema,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
34
|
+
type: "action",
|
|
35
|
+
name: "voice",
|
|
36
|
+
description: "Text to speech generation",
|
|
37
|
+
schema,
|
|
38
|
+
routes: [],
|
|
39
|
+
execute: async (inputs) => {
|
|
40
|
+
const { text, voice, output } = inputs;
|
|
41
|
+
return generateVoice({ text, voice, outputPath: output });
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Types
|
|
46
|
+
export interface GenerateVoiceOptions {
|
|
47
|
+
text: string;
|
|
48
|
+
voice?: string;
|
|
49
|
+
provider?: "elevenlabs";
|
|
50
|
+
upload?: boolean;
|
|
51
|
+
outputPath?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface VoiceResult {
|
|
55
|
+
audio: Buffer;
|
|
56
|
+
provider: string;
|
|
57
|
+
voiceId: string;
|
|
58
|
+
uploadUrl?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Voice name to ID mapping
|
|
62
|
+
const VOICE_MAP: Record<string, string> = {
|
|
63
|
+
rachel: VOICES.RACHEL,
|
|
64
|
+
domi: VOICES.DOMI,
|
|
65
|
+
bella: VOICES.BELLA,
|
|
66
|
+
antoni: VOICES.ANTONI,
|
|
67
|
+
elli: VOICES.ELLI,
|
|
68
|
+
josh: VOICES.JOSH,
|
|
69
|
+
arnold: VOICES.ARNOLD,
|
|
70
|
+
adam: VOICES.ADAM,
|
|
71
|
+
sam: VOICES.SAM,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
export async function generateVoice(
|
|
75
|
+
options: GenerateVoiceOptions,
|
|
76
|
+
): Promise<VoiceResult> {
|
|
77
|
+
const {
|
|
78
|
+
text,
|
|
79
|
+
voice = "rachel",
|
|
80
|
+
provider = "elevenlabs",
|
|
81
|
+
upload = false,
|
|
82
|
+
outputPath,
|
|
83
|
+
} = options;
|
|
84
|
+
|
|
85
|
+
if (!text) {
|
|
86
|
+
throw new Error("text is required");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
console.log(`[voice] generating with ${provider} (${voice})...`);
|
|
90
|
+
|
|
91
|
+
const voiceId = VOICE_MAP[voice.toLowerCase()] || voice;
|
|
92
|
+
|
|
93
|
+
const audio = await elevenlabsProvider.textToSpeech({
|
|
94
|
+
text,
|
|
95
|
+
voiceId,
|
|
96
|
+
outputPath,
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
const result: VoiceResult = {
|
|
100
|
+
audio,
|
|
101
|
+
provider,
|
|
102
|
+
voiceId,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
// Upload to storage if requested
|
|
106
|
+
if (upload && outputPath) {
|
|
107
|
+
const objectKey = `voice/${Date.now()}-${voice}.mp3`;
|
|
108
|
+
const uploadUrl = await storageProvider.uploadLocalFile(
|
|
109
|
+
outputPath,
|
|
110
|
+
objectKey,
|
|
111
|
+
);
|
|
112
|
+
result.uploadUrl = uploadUrl;
|
|
113
|
+
console.log(`[voice] uploaded to ${uploadUrl}`);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export default definition;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Definitions index
|
|
3
|
+
* Re-exports all actions, models, and skills
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Actions
|
|
7
|
+
export * from "./actions";
|
|
8
|
+
export { allActions } from "./actions";
|
|
9
|
+
|
|
10
|
+
// Models
|
|
11
|
+
export * from "./models";
|
|
12
|
+
export { allModels } from "./models";
|
|
13
|
+
|
|
14
|
+
// Skills
|
|
15
|
+
export * from "./skills";
|
|
16
|
+
export { allSkills } from "./skills";
|
|
17
|
+
|
|
18
|
+
// All definitions combined
|
|
19
|
+
import { allActions } from "./actions";
|
|
20
|
+
import { allModels } from "./models";
|
|
21
|
+
import { allSkills } from "./skills";
|
|
22
|
+
|
|
23
|
+
export const allDefinitions = [...allModels, ...allActions, ...allSkills];
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ElevenLabs voice models
|
|
3
|
+
* Text-to-speech generation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { elevenLabsModelSchema, percentSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Input schema with Zod
|
|
11
|
+
const elevenlabsInputSchema = z.object({
|
|
12
|
+
text: z.string().describe("Text to convert to speech"),
|
|
13
|
+
voice_id: z.string().optional().describe("Voice ID to use"),
|
|
14
|
+
model_id: elevenLabsModelSchema
|
|
15
|
+
.default("eleven_multilingual_v2")
|
|
16
|
+
.describe("TTS model to use"),
|
|
17
|
+
stability: percentSchema.default(0.5).describe("Voice stability (0-1)"),
|
|
18
|
+
similarity_boost: percentSchema
|
|
19
|
+
.default(0.75)
|
|
20
|
+
.describe("Voice similarity boost (0-1)"),
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
// Output schema with Zod
|
|
24
|
+
const elevenlabsOutputSchema = z.object({
|
|
25
|
+
audio: z.instanceof(Buffer),
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// Schema object for the definition
|
|
29
|
+
const schema: ZodSchema<
|
|
30
|
+
typeof elevenlabsInputSchema,
|
|
31
|
+
typeof elevenlabsOutputSchema
|
|
32
|
+
> = {
|
|
33
|
+
input: elevenlabsInputSchema,
|
|
34
|
+
output: elevenlabsOutputSchema,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
38
|
+
type: "model",
|
|
39
|
+
name: "elevenlabs-tts",
|
|
40
|
+
description:
|
|
41
|
+
"ElevenLabs text-to-speech model for high-quality voice generation",
|
|
42
|
+
providers: ["elevenlabs"],
|
|
43
|
+
defaultProvider: "elevenlabs",
|
|
44
|
+
providerModels: {
|
|
45
|
+
elevenlabs: "eleven_multilingual_v2",
|
|
46
|
+
},
|
|
47
|
+
schema,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
export default definition;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flux image generation model
|
|
3
|
+
* High-quality image generation from text
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { imageSizeSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Input schema with Zod
|
|
11
|
+
const fluxInputSchema = z.object({
|
|
12
|
+
prompt: z.string().describe("Text description of the image"),
|
|
13
|
+
image_size: imageSizeSchema
|
|
14
|
+
.default("landscape_4_3")
|
|
15
|
+
.describe("Output image size/aspect"),
|
|
16
|
+
num_inference_steps: z
|
|
17
|
+
.number()
|
|
18
|
+
.int()
|
|
19
|
+
.default(28)
|
|
20
|
+
.describe("Number of inference steps"),
|
|
21
|
+
guidance_scale: z
|
|
22
|
+
.number()
|
|
23
|
+
.default(3.5)
|
|
24
|
+
.describe("Guidance scale for generation"),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// Output schema with Zod
|
|
28
|
+
const fluxOutputSchema = z.object({
|
|
29
|
+
images: z.array(
|
|
30
|
+
z.object({
|
|
31
|
+
url: z.string(),
|
|
32
|
+
}),
|
|
33
|
+
),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// Schema object for the definition
|
|
37
|
+
const schema: ZodSchema<typeof fluxInputSchema, typeof fluxOutputSchema> = {
|
|
38
|
+
input: fluxInputSchema,
|
|
39
|
+
output: fluxOutputSchema,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
43
|
+
type: "model",
|
|
44
|
+
name: "flux",
|
|
45
|
+
description:
|
|
46
|
+
"Flux Pro image generation model for high-quality images from text",
|
|
47
|
+
providers: ["fal", "replicate"],
|
|
48
|
+
defaultProvider: "fal",
|
|
49
|
+
providerModels: {
|
|
50
|
+
fal: "fal-ai/flux-pro/v1.1",
|
|
51
|
+
replicate: "black-forest-labs/flux-1.1-pro",
|
|
52
|
+
},
|
|
53
|
+
schema,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export default definition;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model definitions index
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export { definition as elevenlabsTts } from "./elevenlabs";
|
|
6
|
+
export { definition as flux } from "./flux";
|
|
7
|
+
export { definition as kling } from "./kling";
|
|
8
|
+
export { definition as llama } from "./llama";
|
|
9
|
+
export { definition as nanoBananaPro } from "./nano-banana-pro";
|
|
10
|
+
export { definition as sonauto } from "./sonauto";
|
|
11
|
+
export { definition as soul } from "./soul";
|
|
12
|
+
export { definition as wan } from "./wan";
|
|
13
|
+
export { definition as whisper } from "./whisper";
|
|
14
|
+
|
|
15
|
+
// All model definitions for auto-loading
|
|
16
|
+
import { definition as elevenlabsDefinition } from "./elevenlabs";
|
|
17
|
+
import { definition as fluxDefinition } from "./flux";
|
|
18
|
+
import { definition as klingDefinition } from "./kling";
|
|
19
|
+
import { definition as llamaDefinition } from "./llama";
|
|
20
|
+
import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
|
|
21
|
+
import { definition as sonautoDefinition } from "./sonauto";
|
|
22
|
+
import { definition as soulDefinition } from "./soul";
|
|
23
|
+
import { definition as wanDefinition } from "./wan";
|
|
24
|
+
import { definition as whisperDefinition } from "./whisper";
|
|
25
|
+
|
|
26
|
+
export const allModels = [
|
|
27
|
+
klingDefinition,
|
|
28
|
+
fluxDefinition,
|
|
29
|
+
nanoBananaProDefinition,
|
|
30
|
+
wanDefinition,
|
|
31
|
+
whisperDefinition,
|
|
32
|
+
elevenlabsDefinition,
|
|
33
|
+
soulDefinition,
|
|
34
|
+
sonautoDefinition,
|
|
35
|
+
llamaDefinition,
|
|
36
|
+
];
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kling video generation model
|
|
3
|
+
* High-quality video generation from text/image
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
aspectRatioSchema,
|
|
9
|
+
videoDurationSchema,
|
|
10
|
+
} from "../../core/schema/shared";
|
|
11
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const klingInputSchema = z.object({
|
|
15
|
+
prompt: z.string().describe("Text description of the video"),
|
|
16
|
+
image_url: z
|
|
17
|
+
.string()
|
|
18
|
+
.url()
|
|
19
|
+
.optional()
|
|
20
|
+
.describe("Input image for image-to-video"),
|
|
21
|
+
duration: videoDurationSchema
|
|
22
|
+
.default(5)
|
|
23
|
+
.describe("Video duration in seconds"),
|
|
24
|
+
aspect_ratio: aspectRatioSchema
|
|
25
|
+
.default("16:9")
|
|
26
|
+
.describe("Output aspect ratio"),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Output schema with Zod
|
|
30
|
+
const klingOutputSchema = z.object({
|
|
31
|
+
video: z.object({
|
|
32
|
+
url: z.string(),
|
|
33
|
+
}),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// Schema object for the definition
|
|
37
|
+
const schema: ZodSchema<typeof klingInputSchema, typeof klingOutputSchema> = {
|
|
38
|
+
input: klingInputSchema,
|
|
39
|
+
output: klingOutputSchema,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
43
|
+
type: "model",
|
|
44
|
+
name: "kling",
|
|
45
|
+
description:
|
|
46
|
+
"Kling video generation model for high-quality video from text or image",
|
|
47
|
+
providers: ["fal", "replicate"],
|
|
48
|
+
defaultProvider: "fal",
|
|
49
|
+
providerModels: {
|
|
50
|
+
fal: "fal-ai/kling-video/v2.5-turbo/pro",
|
|
51
|
+
replicate: "fofr/kling-v1.5",
|
|
52
|
+
},
|
|
53
|
+
schema,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export default definition;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Llama LLM model
|
|
3
|
+
* Fast inference via Groq
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
8
|
+
|
|
9
|
+
// Llama model variants schema
|
|
10
|
+
const llamaModelSchema = z.enum([
|
|
11
|
+
"llama-3.3-70b-versatile",
|
|
12
|
+
"llama-3.1-8b-instant",
|
|
13
|
+
"llama-3.1-70b-versatile",
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
// Chat message schema
|
|
17
|
+
const chatMessageSchema = z.object({
|
|
18
|
+
role: z.enum(["system", "user", "assistant"]),
|
|
19
|
+
content: z.string(),
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// Input schema with Zod
|
|
23
|
+
const llamaInputSchema = z.object({
|
|
24
|
+
messages: z.array(chatMessageSchema).describe("Chat messages array"),
|
|
25
|
+
model: llamaModelSchema
|
|
26
|
+
.default("llama-3.3-70b-versatile")
|
|
27
|
+
.describe("Llama model variant"),
|
|
28
|
+
temperature: z.number().default(1).describe("Sampling temperature"),
|
|
29
|
+
max_tokens: z.number().int().default(1024).describe("Maximum output tokens"),
|
|
30
|
+
stream: z.boolean().default(false).describe("Stream response"),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// Output schema with Zod
|
|
34
|
+
const llamaOutputSchema = z.string().describe("Generated text response");
|
|
35
|
+
|
|
36
|
+
// Schema object for the definition
|
|
37
|
+
const schema: ZodSchema<typeof llamaInputSchema, typeof llamaOutputSchema> = {
|
|
38
|
+
input: llamaInputSchema,
|
|
39
|
+
output: llamaOutputSchema,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
43
|
+
type: "model",
|
|
44
|
+
name: "llama",
|
|
45
|
+
description: "Meta Llama model for fast text generation via Groq",
|
|
46
|
+
providers: ["groq"],
|
|
47
|
+
defaultProvider: "groq",
|
|
48
|
+
providerModels: {
|
|
49
|
+
groq: "llama-3.3-70b-versatile",
|
|
50
|
+
},
|
|
51
|
+
schema,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export default definition;
|