varg.ai-sdk 0.1.1 → 0.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +1 -1
- package/.env.example +3 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +10 -3
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +56 -209
- package/SKILLS.md +26 -10
- package/biome.json +7 -1
- package/bun.lock +1286 -0
- package/commitlint.config.js +22 -0
- package/docs/index.html +1130 -0
- package/docs/prompting.md +326 -0
- package/docs/react.md +834 -0
- package/docs/sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +43 -10
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +776 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +6 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/render.tsx +71 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +114 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/react/cli.ts +52 -0
- package/src/react/elements.ts +146 -0
- package/src/react/examples/branching.tsx +66 -0
- package/src/react/examples/captions-demo.tsx +37 -0
- package/src/react/examples/character-video.tsx +84 -0
- package/src/react/examples/grid.tsx +53 -0
- package/src/react/examples/layouts-demo.tsx +57 -0
- package/src/react/examples/madi.tsx +60 -0
- package/src/react/examples/music-test.tsx +35 -0
- package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
- package/src/react/examples/orange-portrait.tsx +41 -0
- package/src/react/examples/split-element-demo.tsx +60 -0
- package/src/react/examples/split-layout-demo.tsx +60 -0
- package/src/react/examples/split.tsx +41 -0
- package/src/react/examples/video-grid.tsx +46 -0
- package/src/react/index.ts +43 -0
- package/src/react/layouts/grid.tsx +28 -0
- package/src/react/layouts/index.ts +2 -0
- package/src/react/layouts/split.tsx +20 -0
- package/src/react/react.test.ts +309 -0
- package/src/react/render.ts +21 -0
- package/src/react/renderers/animate.ts +59 -0
- package/src/react/renderers/captions.ts +297 -0
- package/src/react/renderers/clip.ts +248 -0
- package/src/react/renderers/context.ts +17 -0
- package/src/react/renderers/image.ts +109 -0
- package/src/react/renderers/index.ts +22 -0
- package/src/react/renderers/music.ts +60 -0
- package/src/react/renderers/packshot.ts +84 -0
- package/src/react/renderers/progress.ts +173 -0
- package/src/react/renderers/render.ts +243 -0
- package/src/react/renderers/slider.ts +69 -0
- package/src/react/renderers/speech.ts +53 -0
- package/src/react/renderers/split.ts +91 -0
- package/src/react/renderers/subtitle.ts +16 -0
- package/src/react/renderers/swipe.ts +75 -0
- package/src/react/renderers/title.ts +17 -0
- package/src/react/renderers/utils.ts +124 -0
- package/src/react/renderers/video.ts +127 -0
- package/src/react/runtime/jsx-dev-runtime.ts +43 -0
- package/src/react/runtime/jsx-runtime.ts +35 -0
- package/src/react/types.ts +232 -0
- package/src/studio/index.ts +26 -0
- package/src/studio/scanner.ts +102 -0
- package/src/studio/server.ts +554 -0
- package/src/studio/stages.ts +251 -0
- package/src/studio/step-renderer.ts +279 -0
- package/src/studio/types.ts +60 -0
- package/src/studio/ui/cache.html +303 -0
- package/src/studio/ui/index.html +1820 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.cli.json +8 -0
- package/tsconfig.json +21 -3
- package/TEST_RESULTS.md +0 -122
- package/action/captions/SKILL.md +0 -170
- package/action/captions/index.ts +0 -169
- package/action/edit/SKILL.md +0 -235
- package/action/edit/index.ts +0 -437
- package/action/image/SKILL.md +0 -140
- package/action/image/index.ts +0 -105
- package/action/sync/SKILL.md +0 -136
- package/action/sync/index.ts +0 -145
- package/action/transcribe/SKILL.md +0 -179
- package/action/video/SKILL.md +0 -116
- package/action/video/index.ts +0 -125
- package/action/voice/SKILL.md +0 -125
- package/action/voice/index.ts +0 -136
- package/cli/commands/find.ts +0 -58
- package/cli/commands/help.ts +0 -70
- package/cli/commands/list.ts +0 -49
- package/cli/commands/run.ts +0 -237
- package/cli/commands/which.ts +0 -66
- package/cli/discover.ts +0 -66
- package/cli/index.ts +0 -33
- package/cli/runner.ts +0 -65
- package/cli/types.ts +0 -49
- package/cli/ui.ts +0 -185
- package/index.ts +0 -75
- package/lib/README.md +0 -144
- package/lib/ai-sdk/fal.ts +0 -106
- package/lib/ai-sdk/replicate.ts +0 -107
- package/lib/elevenlabs.ts +0 -382
- package/lib/fal.ts +0 -467
- package/lib/ffmpeg.ts +0 -467
- package/lib/fireworks.ts +0 -235
- package/lib/groq.ts +0 -246
- package/lib/higgsfield.ts +0 -176
- package/lib/remotion/SKILL.md +0 -823
- package/lib/remotion/cli.ts +0 -115
- package/lib/remotion/functions.ts +0 -283
- package/lib/remotion/index.ts +0 -19
- package/lib/remotion/templates.ts +0 -73
- package/lib/replicate.ts +0 -304
- package/output.txt +0 -1
- package/test-import.ts +0 -7
- package/test-services.ts +0 -97
- package/utilities/s3.ts +0 -147
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Action definitions index
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export type { AddCaptionsOptions, SubtitleStyle } from "./captions";
|
|
6
|
+
// Captions
|
|
7
|
+
export { addCaptions, definition as captions } from "./captions";
|
|
8
|
+
export type {
|
|
9
|
+
CutOptions,
|
|
10
|
+
CutResult,
|
|
11
|
+
FadeOptions,
|
|
12
|
+
FadeResult,
|
|
13
|
+
MergeOptions,
|
|
14
|
+
MergeResult,
|
|
15
|
+
RemoveOptions,
|
|
16
|
+
RemoveResult,
|
|
17
|
+
SplitOptions,
|
|
18
|
+
SplitResult,
|
|
19
|
+
TransitionOptions,
|
|
20
|
+
TransitionResult,
|
|
21
|
+
TrimOptions,
|
|
22
|
+
TrimResult,
|
|
23
|
+
} from "./edit";
|
|
24
|
+
// Video editing (FFmpeg)
|
|
25
|
+
export {
|
|
26
|
+
cut,
|
|
27
|
+
cutDefinition,
|
|
28
|
+
fade,
|
|
29
|
+
fadeDefinition,
|
|
30
|
+
merge,
|
|
31
|
+
mergeDefinition,
|
|
32
|
+
remove,
|
|
33
|
+
removeDefinition,
|
|
34
|
+
split,
|
|
35
|
+
splitDefinition,
|
|
36
|
+
transition,
|
|
37
|
+
transitionDefinition,
|
|
38
|
+
trim,
|
|
39
|
+
trimDefinition,
|
|
40
|
+
} from "./edit";
|
|
41
|
+
export type { ImageGenerationResult } from "./image";
|
|
42
|
+
// Image generation
|
|
43
|
+
export {
|
|
44
|
+
definition as image,
|
|
45
|
+
generateWithFal,
|
|
46
|
+
generateWithSoul,
|
|
47
|
+
} from "./image";
|
|
48
|
+
export type { GenerateMusicOptions, MusicResult } from "./music";
|
|
49
|
+
// Music generation
|
|
50
|
+
export { definition as music, generateMusic } from "./music";
|
|
51
|
+
export type { LipsyncOptions, LipsyncResult, Wav2LipOptions } from "./sync";
|
|
52
|
+
// Lip sync
|
|
53
|
+
export {
|
|
54
|
+
definition as sync,
|
|
55
|
+
lipsync,
|
|
56
|
+
lipsyncOverlay,
|
|
57
|
+
lipsyncWav2Lip,
|
|
58
|
+
} from "./sync";
|
|
59
|
+
export type { TranscribeOptions, TranscribeResult } from "./transcribe";
|
|
60
|
+
// Transcription
|
|
61
|
+
export {
|
|
62
|
+
definition as transcribe,
|
|
63
|
+
transcribe as transcribeAudio,
|
|
64
|
+
} from "./transcribe";
|
|
65
|
+
export type { UploadOptions, UploadResult } from "./upload";
|
|
66
|
+
// Upload
|
|
67
|
+
export { definition as uploadDef, upload } from "./upload";
|
|
68
|
+
export type { VideoGenerationResult } from "./video";
|
|
69
|
+
// Video generation
|
|
70
|
+
export {
|
|
71
|
+
definition as video,
|
|
72
|
+
generateVideoFromImage,
|
|
73
|
+
generateVideoFromText,
|
|
74
|
+
} from "./video";
|
|
75
|
+
export type { GenerateVoiceOptions, VoiceResult } from "./voice";
|
|
76
|
+
// Voice generation
|
|
77
|
+
export { definition as voice, generateVoice } from "./voice";
|
|
78
|
+
|
|
79
|
+
// All action definitions for auto-loading
|
|
80
|
+
import { definition as captionsDefinition } from "./captions";
|
|
81
|
+
import {
|
|
82
|
+
cutDefinition,
|
|
83
|
+
fadeDefinition,
|
|
84
|
+
mergeDefinition,
|
|
85
|
+
removeDefinition,
|
|
86
|
+
splitDefinition,
|
|
87
|
+
transitionDefinition,
|
|
88
|
+
trimDefinition,
|
|
89
|
+
} from "./edit";
|
|
90
|
+
import { definition as imageDefinition } from "./image";
|
|
91
|
+
import { definition as musicDefinition } from "./music";
|
|
92
|
+
import { definition as syncDefinition } from "./sync";
|
|
93
|
+
import { definition as transcribeDefinition } from "./transcribe";
|
|
94
|
+
import { definition as uploadDefinition } from "./upload";
|
|
95
|
+
import { definition as videoDefinition } from "./video";
|
|
96
|
+
import { definition as voiceDefinition } from "./voice";
|
|
97
|
+
|
|
98
|
+
export const allActions = [
|
|
99
|
+
videoDefinition,
|
|
100
|
+
imageDefinition,
|
|
101
|
+
voiceDefinition,
|
|
102
|
+
transcribeDefinition,
|
|
103
|
+
musicDefinition,
|
|
104
|
+
syncDefinition,
|
|
105
|
+
captionsDefinition,
|
|
106
|
+
trimDefinition,
|
|
107
|
+
cutDefinition,
|
|
108
|
+
mergeDefinition,
|
|
109
|
+
splitDefinition,
|
|
110
|
+
fadeDefinition,
|
|
111
|
+
transitionDefinition,
|
|
112
|
+
removeDefinition,
|
|
113
|
+
uploadDefinition,
|
|
114
|
+
];
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Music generation action
|
|
3
|
+
* Text-to-music via Fal/Sonauto
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { writeFile } from "node:fs/promises";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { audioFormatSchema, filePathSchema } from "../../core/schema/shared";
|
|
9
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
10
|
+
import { falProvider } from "../../providers/fal";
|
|
11
|
+
import { storageProvider } from "../../providers/storage";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const musicInputSchema = z.object({
|
|
15
|
+
prompt: z.string().optional().describe("Description of music to generate"),
|
|
16
|
+
tags: z
|
|
17
|
+
.array(z.string())
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("Style tags like 'rock', 'energetic'"),
|
|
20
|
+
lyrics: z.string().optional().describe("Optional lyrics prompt"),
|
|
21
|
+
format: audioFormatSchema.default("mp3").describe("Output format"),
|
|
22
|
+
numSongs: z
|
|
23
|
+
.union([z.literal(1), z.literal(2)])
|
|
24
|
+
.default(1)
|
|
25
|
+
.describe("Number of songs to generate"),
|
|
26
|
+
output: filePathSchema.optional().describe("Output file path"),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Output schema with Zod
|
|
30
|
+
const musicOutputSchema = z.object({
|
|
31
|
+
seed: z.number(),
|
|
32
|
+
tags: z.array(z.string()).optional(),
|
|
33
|
+
lyrics: z.string().optional(),
|
|
34
|
+
audio: z.array(
|
|
35
|
+
z.object({
|
|
36
|
+
url: z.string(),
|
|
37
|
+
fileName: z.string(),
|
|
38
|
+
contentType: z.string(),
|
|
39
|
+
fileSize: z.number(),
|
|
40
|
+
}),
|
|
41
|
+
),
|
|
42
|
+
uploadUrls: z.array(z.string()).optional(),
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Schema object for the definition
|
|
46
|
+
const schema: ZodSchema<typeof musicInputSchema, typeof musicOutputSchema> = {
|
|
47
|
+
input: musicInputSchema,
|
|
48
|
+
output: musicOutputSchema,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
52
|
+
type: "action",
|
|
53
|
+
name: "music",
|
|
54
|
+
description: "Generate music from text prompt or tags",
|
|
55
|
+
schema,
|
|
56
|
+
routes: [],
|
|
57
|
+
execute: async (inputs) => {
|
|
58
|
+
return generateMusic({
|
|
59
|
+
prompt: inputs.prompt,
|
|
60
|
+
tags: inputs.tags,
|
|
61
|
+
lyrics: inputs.lyrics,
|
|
62
|
+
format: inputs.format,
|
|
63
|
+
numSongs: inputs.numSongs,
|
|
64
|
+
outputPath: inputs.output,
|
|
65
|
+
});
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// Types
|
|
70
|
+
export interface GenerateMusicOptions {
|
|
71
|
+
prompt?: string;
|
|
72
|
+
tags?: string[];
|
|
73
|
+
lyrics?: string;
|
|
74
|
+
seed?: number;
|
|
75
|
+
promptStrength?: number;
|
|
76
|
+
balanceStrength?: number;
|
|
77
|
+
numSongs?: 1 | 2;
|
|
78
|
+
format?: "flac" | "mp3" | "wav" | "ogg" | "m4a";
|
|
79
|
+
bitRate?: 128 | 192 | 256 | 320;
|
|
80
|
+
bpm?: number | "auto";
|
|
81
|
+
upload?: boolean;
|
|
82
|
+
outputPath?: string;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export interface MusicResult {
|
|
86
|
+
seed: number;
|
|
87
|
+
tags?: string[];
|
|
88
|
+
lyrics?: string;
|
|
89
|
+
audio: Array<{
|
|
90
|
+
url: string;
|
|
91
|
+
fileName: string;
|
|
92
|
+
contentType: string;
|
|
93
|
+
fileSize: number;
|
|
94
|
+
}>;
|
|
95
|
+
uploadUrls?: string[];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export async function generateMusic(
|
|
99
|
+
options: GenerateMusicOptions,
|
|
100
|
+
): Promise<MusicResult> {
|
|
101
|
+
const {
|
|
102
|
+
prompt,
|
|
103
|
+
tags,
|
|
104
|
+
lyrics,
|
|
105
|
+
seed,
|
|
106
|
+
promptStrength = 2,
|
|
107
|
+
balanceStrength = 0.7,
|
|
108
|
+
numSongs = 1,
|
|
109
|
+
format = "mp3",
|
|
110
|
+
bitRate,
|
|
111
|
+
bpm = "auto",
|
|
112
|
+
upload = false,
|
|
113
|
+
outputPath,
|
|
114
|
+
} = options;
|
|
115
|
+
|
|
116
|
+
if (!prompt && !tags) {
|
|
117
|
+
throw new Error("Either prompt or tags is required");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
console.log(`[music] generating ${numSongs} song(s)...`);
|
|
121
|
+
if (prompt) console.log(`[music] prompt: ${prompt}`);
|
|
122
|
+
if (tags) console.log(`[music] tags: ${tags.join(", ")}`);
|
|
123
|
+
|
|
124
|
+
const result = await falProvider.textToMusic({
|
|
125
|
+
prompt,
|
|
126
|
+
tags,
|
|
127
|
+
lyricsPrompt: lyrics,
|
|
128
|
+
seed,
|
|
129
|
+
promptStrength,
|
|
130
|
+
balanceStrength,
|
|
131
|
+
numSongs,
|
|
132
|
+
outputFormat: format,
|
|
133
|
+
outputBitRate: bitRate,
|
|
134
|
+
bpm,
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
const musicResult: MusicResult = {
|
|
138
|
+
seed: result.data.seed,
|
|
139
|
+
tags: result.data.tags,
|
|
140
|
+
lyrics: result.data.lyrics,
|
|
141
|
+
audio: Array.isArray(result.data.audio)
|
|
142
|
+
? result.data.audio.map(
|
|
143
|
+
(a: {
|
|
144
|
+
url: string;
|
|
145
|
+
file_name: string;
|
|
146
|
+
content_type: string;
|
|
147
|
+
file_size: number;
|
|
148
|
+
}) => ({
|
|
149
|
+
url: a.url,
|
|
150
|
+
fileName: a.file_name,
|
|
151
|
+
contentType: a.content_type,
|
|
152
|
+
fileSize: a.file_size,
|
|
153
|
+
}),
|
|
154
|
+
)
|
|
155
|
+
: [
|
|
156
|
+
{
|
|
157
|
+
url: result.data.audio.url,
|
|
158
|
+
fileName: result.data.audio.file_name,
|
|
159
|
+
contentType: result.data.audio.content_type,
|
|
160
|
+
fileSize: result.data.audio.file_size,
|
|
161
|
+
},
|
|
162
|
+
],
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// Save files locally if requested
|
|
166
|
+
if (outputPath) {
|
|
167
|
+
for (let i = 0; i < musicResult.audio.length; i++) {
|
|
168
|
+
const audio = musicResult.audio[i];
|
|
169
|
+
if (!audio) continue;
|
|
170
|
+
|
|
171
|
+
const ext = format || "wav";
|
|
172
|
+
const filePath =
|
|
173
|
+
musicResult.audio.length === 1
|
|
174
|
+
? outputPath
|
|
175
|
+
: outputPath.replace(/\.[^.]+$/, `-${i + 1}.${ext}`);
|
|
176
|
+
|
|
177
|
+
const response = await fetch(audio.url);
|
|
178
|
+
const buffer = await response.arrayBuffer();
|
|
179
|
+
await writeFile(filePath, Buffer.from(buffer));
|
|
180
|
+
console.log(`[music] saved to ${filePath}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Upload to storage if requested
|
|
185
|
+
if (upload) {
|
|
186
|
+
const uploadUrls: string[] = [];
|
|
187
|
+
for (let i = 0; i < musicResult.audio.length; i++) {
|
|
188
|
+
const audio = musicResult.audio[i];
|
|
189
|
+
if (!audio) continue;
|
|
190
|
+
|
|
191
|
+
const objectKey = `music/${Date.now()}-${i + 1}.${format || "wav"}`;
|
|
192
|
+
const uploadUrl = await storageProvider.uploadFromUrl(
|
|
193
|
+
audio.url,
|
|
194
|
+
objectKey,
|
|
195
|
+
);
|
|
196
|
+
uploadUrls.push(uploadUrl);
|
|
197
|
+
console.log(`[music] uploaded to ${uploadUrl}`);
|
|
198
|
+
}
|
|
199
|
+
musicResult.uploadUrls = uploadUrls;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return musicResult;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export default definition;
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lip sync action
|
|
3
|
+
* Audio-to-video synchronization
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import {
|
|
8
|
+
filePathSchema,
|
|
9
|
+
resolutionSchema,
|
|
10
|
+
videoDurationStringSchema,
|
|
11
|
+
} from "../../core/schema/shared";
|
|
12
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
13
|
+
import { falProvider } from "../../providers/fal";
|
|
14
|
+
import { ffmpegProvider } from "../../providers/ffmpeg";
|
|
15
|
+
|
|
16
|
+
// Input schema with Zod
|
|
17
|
+
const syncInputSchema = z.object({
|
|
18
|
+
image: filePathSchema.describe("Input image"),
|
|
19
|
+
audio: filePathSchema.describe("Audio file"),
|
|
20
|
+
prompt: z.string().describe("Description of the scene"),
|
|
21
|
+
duration: videoDurationStringSchema.default("5").describe("Output duration"),
|
|
22
|
+
resolution: resolutionSchema.default("480p").describe("Output resolution"),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
// Output schema with Zod
|
|
26
|
+
const syncOutputSchema = z.object({
|
|
27
|
+
videoUrl: z.string(),
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// Schema object for the definition
|
|
31
|
+
const schema: ZodSchema<typeof syncInputSchema, typeof syncOutputSchema> = {
|
|
32
|
+
input: syncInputSchema,
|
|
33
|
+
output: syncOutputSchema,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
37
|
+
type: "action",
|
|
38
|
+
name: "sync",
|
|
39
|
+
description: "Lip sync audio to video/image",
|
|
40
|
+
schema,
|
|
41
|
+
routes: [],
|
|
42
|
+
execute: async (inputs) => {
|
|
43
|
+
const { image, audio, prompt, duration, resolution } = inputs;
|
|
44
|
+
return lipsync({ image, audio, prompt, duration, resolution });
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// Types
|
|
49
|
+
export interface LipsyncOptions {
|
|
50
|
+
image: string;
|
|
51
|
+
audio: string;
|
|
52
|
+
prompt: string;
|
|
53
|
+
duration?: "5" | "10";
|
|
54
|
+
resolution?: "480p" | "720p" | "1080p";
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface LipsyncResult {
|
|
58
|
+
videoUrl: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface Wav2LipOptions {
|
|
62
|
+
videoPath: string;
|
|
63
|
+
audioPath: string;
|
|
64
|
+
outputPath: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Generate lip-synced video using Wan-25
|
|
69
|
+
*/
|
|
70
|
+
export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
|
|
71
|
+
const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
|
|
72
|
+
|
|
73
|
+
console.log("[sync] generating lip-synced video with wan-25...");
|
|
74
|
+
|
|
75
|
+
const result = await falProvider.wan25({
|
|
76
|
+
imageUrl: image,
|
|
77
|
+
audioUrl: audio,
|
|
78
|
+
prompt,
|
|
79
|
+
duration,
|
|
80
|
+
resolution,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
const videoUrl = result.data?.video?.url;
|
|
84
|
+
if (!videoUrl) {
|
|
85
|
+
throw new Error("No video URL in result");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { videoUrl };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Overlay lip-synced face onto original video
|
|
93
|
+
*/
|
|
94
|
+
export async function lipsyncOverlay(options: {
|
|
95
|
+
originalVideo: string;
|
|
96
|
+
lipsyncedVideo: string;
|
|
97
|
+
outputPath: string;
|
|
98
|
+
}): Promise<string> {
|
|
99
|
+
const { lipsyncedVideo, outputPath } = options;
|
|
100
|
+
|
|
101
|
+
console.log("[sync] overlaying lip-synced video...");
|
|
102
|
+
|
|
103
|
+
// This would require more complex ffmpeg operations
|
|
104
|
+
// For now, just return the lip-synced video as-is
|
|
105
|
+
await ffmpegProvider.convertFormat({
|
|
106
|
+
input: lipsyncedVideo,
|
|
107
|
+
output: outputPath,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
return outputPath;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Wav2Lip-style lip sync (placeholder for future implementation)
|
|
115
|
+
*/
|
|
116
|
+
export async function lipsyncWav2Lip(options: Wav2LipOptions): Promise<string> {
|
|
117
|
+
console.warn("[sync] wav2lip not yet implemented, using wan-25 fallback");
|
|
118
|
+
|
|
119
|
+
// For now, just copy the video
|
|
120
|
+
await ffmpegProvider.convertFormat({
|
|
121
|
+
input: options.videoPath,
|
|
122
|
+
output: options.outputPath,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
return options.outputPath;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export default definition;
|
|
@@ -1,63 +1,63 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Transcription action
|
|
3
|
+
* Speech-to-text via Groq or Fireworks
|
|
6
4
|
*/
|
|
7
5
|
|
|
8
6
|
import { writeFileSync } from "node:fs";
|
|
9
7
|
import { toFile } from "groq-sdk/uploads";
|
|
10
|
-
import
|
|
8
|
+
import { z } from "zod";
|
|
9
|
+
import {
|
|
10
|
+
filePathSchema,
|
|
11
|
+
transcriptionProviderSchema,
|
|
12
|
+
} from "../../core/schema/shared";
|
|
13
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
11
14
|
import {
|
|
12
15
|
convertFireworksToSRT,
|
|
13
|
-
|
|
14
|
-
} from "../../
|
|
15
|
-
import { GROQ_MODELS,
|
|
16
|
+
fireworksProvider,
|
|
17
|
+
} from "../../providers/fireworks";
|
|
18
|
+
import { GROQ_MODELS, groqProvider } from "../../providers/groq";
|
|
19
|
+
|
|
20
|
+
// Input schema with Zod
|
|
21
|
+
const transcribeInputSchema = z.object({
|
|
22
|
+
audio: filePathSchema.describe("Audio/video file to transcribe"),
|
|
23
|
+
provider: transcriptionProviderSchema
|
|
24
|
+
.default("groq")
|
|
25
|
+
.describe("Transcription provider"),
|
|
26
|
+
output: filePathSchema.optional().describe("Output file path"),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Output schema with Zod
|
|
30
|
+
const transcribeOutputSchema = z.object({
|
|
31
|
+
success: z.boolean(),
|
|
32
|
+
text: z.string().optional(),
|
|
33
|
+
srt: z.string().optional(),
|
|
34
|
+
error: z.string().optional(),
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
// Schema object for the definition
|
|
38
|
+
const schema: ZodSchema<
|
|
39
|
+
typeof transcribeInputSchema,
|
|
40
|
+
typeof transcribeOutputSchema
|
|
41
|
+
> = {
|
|
42
|
+
input: transcribeInputSchema,
|
|
43
|
+
output: transcribeOutputSchema,
|
|
44
|
+
};
|
|
16
45
|
|
|
17
|
-
export const
|
|
18
|
-
name: "transcribe",
|
|
46
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
19
47
|
type: "action",
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
required: ["audio"],
|
|
27
|
-
properties: {
|
|
28
|
-
audio: {
|
|
29
|
-
type: "string",
|
|
30
|
-
format: "file-path",
|
|
31
|
-
description: "audio/video file to transcribe",
|
|
32
|
-
},
|
|
33
|
-
provider: {
|
|
34
|
-
type: "string",
|
|
35
|
-
enum: ["groq", "fireworks"],
|
|
36
|
-
default: "groq",
|
|
37
|
-
description: "transcription provider",
|
|
38
|
-
},
|
|
39
|
-
output: {
|
|
40
|
-
type: "string",
|
|
41
|
-
format: "file-path",
|
|
42
|
-
description: "output file path",
|
|
43
|
-
},
|
|
44
|
-
},
|
|
45
|
-
},
|
|
46
|
-
output: { type: "string", description: "transcribed text" },
|
|
47
|
-
},
|
|
48
|
-
async run(options) {
|
|
49
|
-
const { audio, provider, output } = options as {
|
|
50
|
-
audio: string;
|
|
51
|
-
provider?: "groq" | "fireworks";
|
|
52
|
-
output?: string;
|
|
53
|
-
};
|
|
48
|
+
name: "transcribe",
|
|
49
|
+
description: "Speech to text transcription",
|
|
50
|
+
schema,
|
|
51
|
+
routes: [],
|
|
52
|
+
execute: async (inputs) => {
|
|
53
|
+
const { audio, provider, output } = inputs;
|
|
54
54
|
return transcribe({ audioUrl: audio, provider, outputPath: output });
|
|
55
55
|
},
|
|
56
56
|
};
|
|
57
57
|
|
|
58
|
-
//
|
|
58
|
+
// Types
|
|
59
59
|
export interface TranscribeOptions {
|
|
60
|
-
audioUrl: string;
|
|
60
|
+
audioUrl: string;
|
|
61
61
|
provider?: "groq" | "fireworks";
|
|
62
62
|
model?: string;
|
|
63
63
|
language?: string;
|
|
@@ -72,7 +72,7 @@ export interface TranscribeResult {
|
|
|
72
72
|
error?: string;
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
//
|
|
75
|
+
// Groq transcription
|
|
76
76
|
async function transcribeWithGroq(
|
|
77
77
|
audioUrl: string,
|
|
78
78
|
options: {
|
|
@@ -84,16 +84,14 @@ async function transcribeWithGroq(
|
|
|
84
84
|
try {
|
|
85
85
|
console.log("[transcribe] using groq whisper...");
|
|
86
86
|
|
|
87
|
-
//
|
|
87
|
+
// Load audio file
|
|
88
88
|
let audioBuffer: ArrayBuffer;
|
|
89
89
|
let fileName = "audio.mp3";
|
|
90
90
|
|
|
91
91
|
if (audioUrl.startsWith("http://") || audioUrl.startsWith("https://")) {
|
|
92
|
-
// fetch remote file
|
|
93
92
|
const audioResponse = await fetch(audioUrl);
|
|
94
93
|
audioBuffer = await audioResponse.arrayBuffer();
|
|
95
94
|
} else {
|
|
96
|
-
// read local file with bun
|
|
97
95
|
const file = Bun.file(audioUrl);
|
|
98
96
|
audioBuffer = await file.arrayBuffer();
|
|
99
97
|
fileName = audioUrl.split("/").pop() || "audio.mp3";
|
|
@@ -101,8 +99,7 @@ async function transcribeWithGroq(
|
|
|
101
99
|
|
|
102
100
|
const audioFile = await toFile(audioBuffer, fileName);
|
|
103
101
|
|
|
104
|
-
|
|
105
|
-
const text = await groqTranscribe({
|
|
102
|
+
const text = await groqProvider.transcribeAudio({
|
|
106
103
|
file: audioFile,
|
|
107
104
|
model: options.model || GROQ_MODELS.WHISPER_LARGE,
|
|
108
105
|
language: options.language,
|
|
@@ -111,10 +108,8 @@ async function transcribeWithGroq(
|
|
|
111
108
|
console.log("[transcribe] groq transcription complete");
|
|
112
109
|
|
|
113
110
|
if (options.outputFormat === "srt") {
|
|
114
|
-
// groq returns plain text, so we need to convert to srt
|
|
115
|
-
// for now just return text with warning
|
|
116
111
|
console.warn(
|
|
117
|
-
"[transcribe] groq returns plain text, use fireworks for
|
|
112
|
+
"[transcribe] groq returns plain text, use fireworks for SRT format",
|
|
118
113
|
);
|
|
119
114
|
return { success: true, text, srt: text };
|
|
120
115
|
}
|
|
@@ -125,19 +120,19 @@ async function transcribeWithGroq(
|
|
|
125
120
|
return {
|
|
126
121
|
success: false,
|
|
127
122
|
error:
|
|
128
|
-
error instanceof Error ? error.message : "
|
|
123
|
+
error instanceof Error ? error.message : "Groq transcription failed",
|
|
129
124
|
};
|
|
130
125
|
}
|
|
131
126
|
}
|
|
132
127
|
|
|
133
|
-
//
|
|
128
|
+
// Fireworks transcription (with SRT support)
|
|
134
129
|
async function transcribeWithFireworks(
|
|
135
130
|
audioUrl: string,
|
|
136
131
|
): Promise<TranscribeResult> {
|
|
137
132
|
try {
|
|
138
133
|
console.log("[transcribe] using fireworks api...");
|
|
139
134
|
|
|
140
|
-
const data = await
|
|
135
|
+
const data = await fireworksProvider.transcribe({
|
|
141
136
|
audioPath: audioUrl,
|
|
142
137
|
});
|
|
143
138
|
|
|
@@ -152,12 +147,12 @@ async function transcribeWithFireworks(
|
|
|
152
147
|
error:
|
|
153
148
|
error instanceof Error
|
|
154
149
|
? error.message
|
|
155
|
-
: "
|
|
150
|
+
: "Fireworks transcription failed",
|
|
156
151
|
};
|
|
157
152
|
}
|
|
158
153
|
}
|
|
159
154
|
|
|
160
|
-
//
|
|
155
|
+
// Main transcription function
|
|
161
156
|
export async function transcribe(
|
|
162
157
|
options: TranscribeOptions,
|
|
163
158
|
): Promise<TranscribeResult> {
|
|
@@ -178,7 +173,6 @@ export async function transcribe(
|
|
|
178
173
|
|
|
179
174
|
let result: TranscribeResult;
|
|
180
175
|
|
|
181
|
-
// choose provider
|
|
182
176
|
if (provider === "groq") {
|
|
183
177
|
result = await transcribeWithGroq(audioUrl, {
|
|
184
178
|
model,
|
|
@@ -188,10 +182,10 @@ export async function transcribe(
|
|
|
188
182
|
} else if (provider === "fireworks") {
|
|
189
183
|
result = await transcribeWithFireworks(audioUrl);
|
|
190
184
|
} else {
|
|
191
|
-
throw new Error(`
|
|
185
|
+
throw new Error(`Unknown provider: ${provider}`);
|
|
192
186
|
}
|
|
193
187
|
|
|
194
|
-
//
|
|
188
|
+
// Save to file if requested
|
|
195
189
|
if (result.success && outputPath) {
|
|
196
190
|
const content = outputFormat === "srt" ? result.srt : result.text;
|
|
197
191
|
if (content) {
|
|
@@ -203,8 +197,4 @@ export async function transcribe(
|
|
|
203
197
|
return result;
|
|
204
198
|
}
|
|
205
199
|
|
|
206
|
-
|
|
207
|
-
if (import.meta.main) {
|
|
208
|
-
const { runCli } = await import("../../cli/runner");
|
|
209
|
-
runCli(meta);
|
|
210
|
-
}
|
|
200
|
+
export default definition;
|