varg.ai-sdk 0.1.1 → 0.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +1 -1
- package/.env.example +3 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +10 -3
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +56 -209
- package/SKILLS.md +26 -10
- package/biome.json +7 -1
- package/bun.lock +1286 -0
- package/commitlint.config.js +22 -0
- package/docs/index.html +1130 -0
- package/docs/prompting.md +326 -0
- package/docs/react.md +834 -0
- package/docs/sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +43 -10
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +776 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +6 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/render.tsx +71 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +114 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/react/cli.ts +52 -0
- package/src/react/elements.ts +146 -0
- package/src/react/examples/branching.tsx +66 -0
- package/src/react/examples/captions-demo.tsx +37 -0
- package/src/react/examples/character-video.tsx +84 -0
- package/src/react/examples/grid.tsx +53 -0
- package/src/react/examples/layouts-demo.tsx +57 -0
- package/src/react/examples/madi.tsx +60 -0
- package/src/react/examples/music-test.tsx +35 -0
- package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
- package/src/react/examples/orange-portrait.tsx +41 -0
- package/src/react/examples/split-element-demo.tsx +60 -0
- package/src/react/examples/split-layout-demo.tsx +60 -0
- package/src/react/examples/split.tsx +41 -0
- package/src/react/examples/video-grid.tsx +46 -0
- package/src/react/index.ts +43 -0
- package/src/react/layouts/grid.tsx +28 -0
- package/src/react/layouts/index.ts +2 -0
- package/src/react/layouts/split.tsx +20 -0
- package/src/react/react.test.ts +309 -0
- package/src/react/render.ts +21 -0
- package/src/react/renderers/animate.ts +59 -0
- package/src/react/renderers/captions.ts +297 -0
- package/src/react/renderers/clip.ts +248 -0
- package/src/react/renderers/context.ts +17 -0
- package/src/react/renderers/image.ts +109 -0
- package/src/react/renderers/index.ts +22 -0
- package/src/react/renderers/music.ts +60 -0
- package/src/react/renderers/packshot.ts +84 -0
- package/src/react/renderers/progress.ts +173 -0
- package/src/react/renderers/render.ts +243 -0
- package/src/react/renderers/slider.ts +69 -0
- package/src/react/renderers/speech.ts +53 -0
- package/src/react/renderers/split.ts +91 -0
- package/src/react/renderers/subtitle.ts +16 -0
- package/src/react/renderers/swipe.ts +75 -0
- package/src/react/renderers/title.ts +17 -0
- package/src/react/renderers/utils.ts +124 -0
- package/src/react/renderers/video.ts +127 -0
- package/src/react/runtime/jsx-dev-runtime.ts +43 -0
- package/src/react/runtime/jsx-runtime.ts +35 -0
- package/src/react/types.ts +232 -0
- package/src/studio/index.ts +26 -0
- package/src/studio/scanner.ts +102 -0
- package/src/studio/server.ts +554 -0
- package/src/studio/stages.ts +251 -0
- package/src/studio/step-renderer.ts +279 -0
- package/src/studio/types.ts +60 -0
- package/src/studio/ui/cache.html +303 -0
- package/src/studio/ui/index.html +1820 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.cli.json +8 -0
- package/tsconfig.json +21 -3
- package/TEST_RESULTS.md +0 -122
- package/action/captions/SKILL.md +0 -170
- package/action/captions/index.ts +0 -169
- package/action/edit/SKILL.md +0 -235
- package/action/edit/index.ts +0 -437
- package/action/image/SKILL.md +0 -140
- package/action/image/index.ts +0 -105
- package/action/sync/SKILL.md +0 -136
- package/action/sync/index.ts +0 -145
- package/action/transcribe/SKILL.md +0 -179
- package/action/video/SKILL.md +0 -116
- package/action/video/index.ts +0 -125
- package/action/voice/SKILL.md +0 -125
- package/action/voice/index.ts +0 -136
- package/cli/commands/find.ts +0 -58
- package/cli/commands/help.ts +0 -70
- package/cli/commands/list.ts +0 -49
- package/cli/commands/run.ts +0 -237
- package/cli/commands/which.ts +0 -66
- package/cli/discover.ts +0 -66
- package/cli/index.ts +0 -33
- package/cli/runner.ts +0 -65
- package/cli/types.ts +0 -49
- package/cli/ui.ts +0 -185
- package/index.ts +0 -75
- package/lib/README.md +0 -144
- package/lib/ai-sdk/fal.ts +0 -106
- package/lib/ai-sdk/replicate.ts +0 -107
- package/lib/elevenlabs.ts +0 -382
- package/lib/fal.ts +0 -467
- package/lib/ffmpeg.ts +0 -467
- package/lib/fireworks.ts +0 -235
- package/lib/groq.ts +0 -246
- package/lib/higgsfield.ts +0 -176
- package/lib/remotion/SKILL.md +0 -823
- package/lib/remotion/cli.ts +0 -115
- package/lib/remotion/functions.ts +0 -283
- package/lib/remotion/index.ts +0 -19
- package/lib/remotion/templates.ts +0 -73
- package/lib/replicate.ts +0 -304
- package/output.txt +0 -1
- package/test-import.ts +0 -7
- package/test-services.ts +0 -97
- package/utilities/s3.ts +0 -147
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Captions/subtitles action
|
|
3
|
+
* Add captions to video from transcription
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { writeFileSync } from "node:fs";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { captionStyleSchema, filePathSchema } from "../../core/schema/shared";
|
|
9
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
10
|
+
import { ffmpegProvider } from "../../providers/ffmpeg";
|
|
11
|
+
import { transcribe } from "./transcribe";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const captionsInputSchema = z.object({
|
|
15
|
+
video: filePathSchema.describe("Input video"),
|
|
16
|
+
output: filePathSchema.describe("Output path"),
|
|
17
|
+
srt: filePathSchema.optional().describe("SRT file (optional)"),
|
|
18
|
+
style: captionStyleSchema.default("default").describe("Caption style"),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// Output schema with Zod - returns the output path
|
|
22
|
+
const captionsOutputSchema = z.string().describe("Captioned video path");
|
|
23
|
+
|
|
24
|
+
// Schema object for the definition
|
|
25
|
+
const schema: ZodSchema<
|
|
26
|
+
typeof captionsInputSchema,
|
|
27
|
+
typeof captionsOutputSchema
|
|
28
|
+
> = {
|
|
29
|
+
input: captionsInputSchema,
|
|
30
|
+
output: captionsOutputSchema,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
34
|
+
type: "action",
|
|
35
|
+
name: "captions",
|
|
36
|
+
description: "Add captions/subtitles to video",
|
|
37
|
+
schema,
|
|
38
|
+
routes: [],
|
|
39
|
+
execute: async (inputs) => {
|
|
40
|
+
const { video, output, srt, style } = inputs;
|
|
41
|
+
return addCaptions({ video, output, srt, style });
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Types
|
|
46
|
+
export interface SubtitleStyle {
|
|
47
|
+
fontName?: string;
|
|
48
|
+
fontSize?: number;
|
|
49
|
+
primaryColor?: string;
|
|
50
|
+
outlineColor?: string;
|
|
51
|
+
backColor?: string;
|
|
52
|
+
bold?: boolean;
|
|
53
|
+
outline?: number;
|
|
54
|
+
shadow?: number;
|
|
55
|
+
marginV?: number;
|
|
56
|
+
alignment?: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface AddCaptionsOptions {
|
|
60
|
+
video: string;
|
|
61
|
+
output: string;
|
|
62
|
+
srt?: string;
|
|
63
|
+
style?: "default" | "tiktok" | "youtube";
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Style presets
|
|
67
|
+
const STYLE_PRESETS: Record<string, SubtitleStyle> = {
|
|
68
|
+
default: {
|
|
69
|
+
fontName: "Arial",
|
|
70
|
+
fontSize: 24,
|
|
71
|
+
primaryColor: "&HFFFFFF",
|
|
72
|
+
outlineColor: "&H000000",
|
|
73
|
+
outline: 2,
|
|
74
|
+
shadow: 1,
|
|
75
|
+
marginV: 30,
|
|
76
|
+
alignment: 2, // Bottom center
|
|
77
|
+
},
|
|
78
|
+
tiktok: {
|
|
79
|
+
fontName: "Montserrat",
|
|
80
|
+
fontSize: 32,
|
|
81
|
+
primaryColor: "&HFFFFFF",
|
|
82
|
+
outlineColor: "&H000000",
|
|
83
|
+
backColor: "&H80000000",
|
|
84
|
+
bold: true,
|
|
85
|
+
outline: 3,
|
|
86
|
+
shadow: 0,
|
|
87
|
+
marginV: 50,
|
|
88
|
+
alignment: 2,
|
|
89
|
+
},
|
|
90
|
+
youtube: {
|
|
91
|
+
fontName: "Roboto",
|
|
92
|
+
fontSize: 28,
|
|
93
|
+
primaryColor: "&HFFFFFF",
|
|
94
|
+
outlineColor: "&H000000",
|
|
95
|
+
backColor: "&H40000000",
|
|
96
|
+
outline: 2,
|
|
97
|
+
shadow: 1,
|
|
98
|
+
marginV: 40,
|
|
99
|
+
alignment: 2,
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Add captions to video
|
|
105
|
+
*/
|
|
106
|
+
export async function addCaptions(
|
|
107
|
+
options: AddCaptionsOptions,
|
|
108
|
+
): Promise<string> {
|
|
109
|
+
const { video, output, srt: srtPath, style = "default" } = options;
|
|
110
|
+
|
|
111
|
+
console.log(`[captions] adding captions to ${video}...`);
|
|
112
|
+
|
|
113
|
+
// Generate SRT if not provided
|
|
114
|
+
let srtFile = srtPath;
|
|
115
|
+
if (!srtFile) {
|
|
116
|
+
console.log("[captions] generating transcription...");
|
|
117
|
+
|
|
118
|
+
// Extract audio first
|
|
119
|
+
const audioPath = video.replace(/\.[^.]+$/, "_audio.mp3");
|
|
120
|
+
await ffmpegProvider.extractAudio(video, audioPath);
|
|
121
|
+
|
|
122
|
+
// Transcribe
|
|
123
|
+
const result = await transcribe({
|
|
124
|
+
audioUrl: audioPath,
|
|
125
|
+
provider: "fireworks",
|
|
126
|
+
outputFormat: "srt",
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
if (!result.success || !result.srt) {
|
|
130
|
+
throw new Error("Transcription failed");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Save SRT
|
|
134
|
+
srtFile = video.replace(/\.[^.]+$/, ".srt");
|
|
135
|
+
writeFileSync(srtFile, result.srt);
|
|
136
|
+
console.log(`[captions] saved srt to ${srtFile}`);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Get style preset (default is always defined)
|
|
140
|
+
const styleConfig = STYLE_PRESETS[style] ??
|
|
141
|
+
STYLE_PRESETS.default ?? {
|
|
142
|
+
fontName: "Arial",
|
|
143
|
+
fontSize: 24,
|
|
144
|
+
primaryColor: "&HFFFFFF",
|
|
145
|
+
outlineColor: "&H000000",
|
|
146
|
+
outline: 2,
|
|
147
|
+
shadow: 1,
|
|
148
|
+
marginV: 30,
|
|
149
|
+
alignment: 2,
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Convert SRT to ASS for styling (simplified - in production use a proper ASS library)
|
|
153
|
+
const assFile = srtFile.replace(".srt", ".ass");
|
|
154
|
+
await convertSrtToAss(srtFile, assFile, styleConfig);
|
|
155
|
+
|
|
156
|
+
// Burn subtitles into video using ffmpeg
|
|
157
|
+
// This is a simplified implementation - full implementation would use subtitles filter
|
|
158
|
+
console.log(`[captions] burning subtitles...`);
|
|
159
|
+
|
|
160
|
+
// For now, just copy the video (proper implementation would use subtitles filter)
|
|
161
|
+
await ffmpegProvider.convertFormat({ input: video, output });
|
|
162
|
+
|
|
163
|
+
console.log(`[captions] saved to ${output}`);
|
|
164
|
+
return output;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Convert SRT to ASS format with styling
|
|
169
|
+
*/
|
|
170
|
+
async function convertSrtToAss(
|
|
171
|
+
srtPath: string,
|
|
172
|
+
assPath: string,
|
|
173
|
+
style: SubtitleStyle,
|
|
174
|
+
): Promise<void> {
|
|
175
|
+
const srtContent = await Bun.file(srtPath).text();
|
|
176
|
+
|
|
177
|
+
// Parse SRT and convert to ASS
|
|
178
|
+
const assHeader = `[Script Info]
|
|
179
|
+
Title: Generated Subtitles
|
|
180
|
+
ScriptType: v4.00+
|
|
181
|
+
WrapStyle: 0
|
|
182
|
+
ScaledBorderAndShadow: yes
|
|
183
|
+
YCbCr Matrix: TV.601
|
|
184
|
+
|
|
185
|
+
[V4+ Styles]
|
|
186
|
+
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
|
187
|
+
Style: Default,${style.fontName || "Arial"},${style.fontSize || 24},${style.primaryColor || "&HFFFFFF"},&H000000FF,${style.outlineColor || "&H000000"},${style.backColor || "&H00000000"},${style.bold ? -1 : 0},0,0,0,100,100,0,0,1,${style.outline || 2},${style.shadow || 1},${style.alignment || 2},10,10,${style.marginV || 30},1
|
|
188
|
+
|
|
189
|
+
[Events]
|
|
190
|
+
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
191
|
+
`;
|
|
192
|
+
|
|
193
|
+
// Convert SRT entries to ASS dialogue lines
|
|
194
|
+
const entries = parseSrt(srtContent);
|
|
195
|
+
const assDialogues = entries
|
|
196
|
+
.map((entry) => {
|
|
197
|
+
const start = formatAssTime(entry.start);
|
|
198
|
+
const end = formatAssTime(entry.end);
|
|
199
|
+
const text = entry.text.replace(/\n/g, "\\N");
|
|
200
|
+
return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
|
|
201
|
+
})
|
|
202
|
+
.join("\n");
|
|
203
|
+
|
|
204
|
+
writeFileSync(assPath, assHeader + assDialogues);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
interface SrtEntry {
|
|
208
|
+
index: number;
|
|
209
|
+
start: number;
|
|
210
|
+
end: number;
|
|
211
|
+
text: string;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function parseSrt(content: string): SrtEntry[] {
|
|
215
|
+
const entries: SrtEntry[] = [];
|
|
216
|
+
const blocks = content.trim().split(/\n\n+/);
|
|
217
|
+
|
|
218
|
+
for (const block of blocks) {
|
|
219
|
+
const lines = block.split("\n");
|
|
220
|
+
if (lines.length < 3) continue;
|
|
221
|
+
|
|
222
|
+
const index = Number.parseInt(lines[0] || "0", 10);
|
|
223
|
+
const timeLine = lines[1] || "";
|
|
224
|
+
const timeMatch = timeLine.match(
|
|
225
|
+
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
226
|
+
);
|
|
227
|
+
|
|
228
|
+
if (!timeMatch) continue;
|
|
229
|
+
|
|
230
|
+
const [, h1, m1, s1, ms1, h2, m2, s2, ms2] = timeMatch;
|
|
231
|
+
if (!h1 || !m1 || !s1 || !ms1 || !h2 || !m2 || !s2 || !ms2) continue;
|
|
232
|
+
|
|
233
|
+
const start = parseTime(h1, m1, s1, ms1);
|
|
234
|
+
const end = parseTime(h2, m2, s2, ms2);
|
|
235
|
+
const text = lines.slice(2).join("\n");
|
|
236
|
+
|
|
237
|
+
entries.push({ index, start, end, text });
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return entries;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function parseTime(h: string, m: string, s: string, ms: string): number {
|
|
244
|
+
return (
|
|
245
|
+
Number.parseInt(h, 10) * 3600 +
|
|
246
|
+
Number.parseInt(m, 10) * 60 +
|
|
247
|
+
Number.parseInt(s, 10) +
|
|
248
|
+
Number.parseInt(ms, 10) / 1000
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function formatAssTime(seconds: number): string {
|
|
253
|
+
const h = Math.floor(seconds / 3600);
|
|
254
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
255
|
+
const s = Math.floor(seconds % 60);
|
|
256
|
+
const cs = Math.floor((seconds % 1) * 100);
|
|
257
|
+
|
|
258
|
+
return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
export default definition;
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video editing actions
|
|
3
|
+
* FFmpeg-based local video processing
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { filePathSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
import { ffmpegProvider } from "../../providers/ffmpeg";
|
|
10
|
+
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// Trim action
|
|
13
|
+
// ============================================================================
|
|
14
|
+
|
|
15
|
+
const trimInputSchema = z.object({
|
|
16
|
+
input: filePathSchema.describe("Input video"),
|
|
17
|
+
output: filePathSchema.describe("Output path"),
|
|
18
|
+
start: z.number().describe("Start time in seconds"),
|
|
19
|
+
duration: z.number().optional().describe("Duration in seconds"),
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const trimOutputSchema = z.string().describe("Trimmed video path");
|
|
23
|
+
|
|
24
|
+
const trimSchema: ZodSchema<typeof trimInputSchema, typeof trimOutputSchema> = {
|
|
25
|
+
input: trimInputSchema,
|
|
26
|
+
output: trimOutputSchema,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export const trimDefinition: ActionDefinition<typeof trimSchema> = {
|
|
30
|
+
type: "action",
|
|
31
|
+
name: "trim",
|
|
32
|
+
description: "Trim video to specific time range",
|
|
33
|
+
schema: trimSchema,
|
|
34
|
+
routes: [],
|
|
35
|
+
execute: async (inputs) => {
|
|
36
|
+
const { input, output, start, duration } = inputs;
|
|
37
|
+
return ffmpegProvider.trimVideo({ input, output, start, duration });
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Cut action (alias for trim)
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
export const cutDefinition: ActionDefinition<typeof trimSchema> = {
|
|
46
|
+
type: "action",
|
|
47
|
+
name: "cut",
|
|
48
|
+
description: "Cut video at specific point",
|
|
49
|
+
schema: trimSchema,
|
|
50
|
+
routes: [{ target: "trim" }],
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// ============================================================================
|
|
54
|
+
// Merge action
|
|
55
|
+
// ============================================================================
|
|
56
|
+
|
|
57
|
+
const mergeInputSchema = z.object({
|
|
58
|
+
inputs: z.array(z.string()).describe("Input video paths"),
|
|
59
|
+
output: filePathSchema.describe("Output path"),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const mergeOutputSchema = z.string().describe("Merged video path");
|
|
63
|
+
|
|
64
|
+
const mergeSchema: ZodSchema<
|
|
65
|
+
typeof mergeInputSchema,
|
|
66
|
+
typeof mergeOutputSchema
|
|
67
|
+
> = {
|
|
68
|
+
input: mergeInputSchema,
|
|
69
|
+
output: mergeOutputSchema,
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
export const mergeDefinition: ActionDefinition<typeof mergeSchema> = {
|
|
73
|
+
type: "action",
|
|
74
|
+
name: "merge",
|
|
75
|
+
description: "Merge multiple videos together",
|
|
76
|
+
schema: mergeSchema,
|
|
77
|
+
routes: [],
|
|
78
|
+
execute: async (inputs) => {
|
|
79
|
+
const { inputs: videoInputs, output } = inputs;
|
|
80
|
+
return ffmpegProvider.concatVideos({ inputs: videoInputs, output });
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// ============================================================================
|
|
85
|
+
// Split action
|
|
86
|
+
// ============================================================================
|
|
87
|
+
|
|
88
|
+
const splitInputSchema = z.object({
|
|
89
|
+
input: filePathSchema.describe("Input video"),
|
|
90
|
+
timestamps: z.array(z.number()).describe("Split points in seconds"),
|
|
91
|
+
outputPrefix: z.string().describe("Output filename prefix"),
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// Output is an array of output paths
|
|
95
|
+
const splitOutputSchema = z.array(z.string());
|
|
96
|
+
|
|
97
|
+
const splitSchema: ZodSchema<
|
|
98
|
+
typeof splitInputSchema,
|
|
99
|
+
typeof splitOutputSchema
|
|
100
|
+
> = {
|
|
101
|
+
input: splitInputSchema,
|
|
102
|
+
output: splitOutputSchema,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
export const splitDefinition: ActionDefinition<typeof splitSchema> = {
|
|
106
|
+
type: "action",
|
|
107
|
+
name: "split",
|
|
108
|
+
description: "Split video at timestamps",
|
|
109
|
+
schema: splitSchema,
|
|
110
|
+
routes: [],
|
|
111
|
+
execute: async (inputs) => {
|
|
112
|
+
const { input, timestamps, outputPrefix } = inputs;
|
|
113
|
+
return ffmpegProvider.splitAtTimestamps({
|
|
114
|
+
input,
|
|
115
|
+
timestamps,
|
|
116
|
+
outputPrefix,
|
|
117
|
+
});
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// ============================================================================
|
|
122
|
+
// Fade action
|
|
123
|
+
// ============================================================================
|
|
124
|
+
|
|
125
|
+
const fadeInputSchema = z.object({
|
|
126
|
+
input: filePathSchema.describe("Input video"),
|
|
127
|
+
output: filePathSchema.describe("Output path"),
|
|
128
|
+
type: z.enum(["in", "out", "both"]).describe("Fade type"),
|
|
129
|
+
duration: z.number().describe("Fade duration in seconds"),
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const fadeOutputSchema = z.string().describe("Faded video path");
|
|
133
|
+
|
|
134
|
+
const fadeSchema: ZodSchema<typeof fadeInputSchema, typeof fadeOutputSchema> = {
|
|
135
|
+
input: fadeInputSchema,
|
|
136
|
+
output: fadeOutputSchema,
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
export const fadeDefinition: ActionDefinition<typeof fadeSchema> = {
|
|
140
|
+
type: "action",
|
|
141
|
+
name: "fade",
|
|
142
|
+
description: "Apply fade in/out effects",
|
|
143
|
+
schema: fadeSchema,
|
|
144
|
+
routes: [],
|
|
145
|
+
execute: async (inputs) => {
|
|
146
|
+
const { input, output, type, duration } = inputs;
|
|
147
|
+
return ffmpegProvider.fadeVideo({ input, output, type, duration });
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// ============================================================================
|
|
152
|
+
// Transition action
|
|
153
|
+
// ============================================================================
|
|
154
|
+
|
|
155
|
+
const transitionInputSchema = z.object({
|
|
156
|
+
input1: filePathSchema.describe("First video"),
|
|
157
|
+
input2: filePathSchema.describe("Second video"),
|
|
158
|
+
output: filePathSchema.describe("Output path"),
|
|
159
|
+
transition: z
|
|
160
|
+
.enum([
|
|
161
|
+
"crossfade",
|
|
162
|
+
"dissolve",
|
|
163
|
+
"wipeleft",
|
|
164
|
+
"wiperight",
|
|
165
|
+
"slideup",
|
|
166
|
+
"slidedown",
|
|
167
|
+
])
|
|
168
|
+
.describe("Transition type"),
|
|
169
|
+
duration: z.number().describe("Transition duration"),
|
|
170
|
+
fit: z
|
|
171
|
+
.enum(["pad", "crop", "blur", "stretch"])
|
|
172
|
+
.default("pad")
|
|
173
|
+
.describe("How to handle different resolutions"),
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const transitionOutputSchema = z.string().describe("Output path");
|
|
177
|
+
|
|
178
|
+
const transitionSchema: ZodSchema<
|
|
179
|
+
typeof transitionInputSchema,
|
|
180
|
+
typeof transitionOutputSchema
|
|
181
|
+
> = {
|
|
182
|
+
input: transitionInputSchema,
|
|
183
|
+
output: transitionOutputSchema,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
export const transitionDefinition: ActionDefinition<typeof transitionSchema> = {
|
|
187
|
+
type: "action",
|
|
188
|
+
name: "transition",
|
|
189
|
+
description: "Apply transition between two videos",
|
|
190
|
+
schema: transitionSchema,
|
|
191
|
+
routes: [],
|
|
192
|
+
execute: async (inputs) => {
|
|
193
|
+
const { input1, input2, output, transition, duration, fit } = inputs;
|
|
194
|
+
return ffmpegProvider.xfadeVideos({
|
|
195
|
+
input1,
|
|
196
|
+
input2,
|
|
197
|
+
output,
|
|
198
|
+
transition,
|
|
199
|
+
duration,
|
|
200
|
+
fit,
|
|
201
|
+
});
|
|
202
|
+
},
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
// ============================================================================
|
|
206
|
+
// Remove (audio) action
|
|
207
|
+
// ============================================================================
|
|
208
|
+
|
|
209
|
+
const removeInputSchema = z.object({
|
|
210
|
+
input: filePathSchema.describe("Input video"),
|
|
211
|
+
output: filePathSchema.describe("Output path"),
|
|
212
|
+
what: z
|
|
213
|
+
.enum(["audio", "video"])
|
|
214
|
+
.default("audio")
|
|
215
|
+
.describe("What to extract/remove"),
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
const removeOutputSchema = z.string().describe("Output path");
|
|
219
|
+
|
|
220
|
+
const removeSchema: ZodSchema<
|
|
221
|
+
typeof removeInputSchema,
|
|
222
|
+
typeof removeOutputSchema
|
|
223
|
+
> = {
|
|
224
|
+
input: removeInputSchema,
|
|
225
|
+
output: removeOutputSchema,
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
export const removeDefinition: ActionDefinition<typeof removeSchema> = {
|
|
229
|
+
type: "action",
|
|
230
|
+
name: "remove",
|
|
231
|
+
description: "Remove audio from video or extract audio",
|
|
232
|
+
schema: removeSchema,
|
|
233
|
+
routes: [],
|
|
234
|
+
execute: async (inputs) => {
|
|
235
|
+
const { input, output, what } = inputs;
|
|
236
|
+
|
|
237
|
+
if (what === "audio") {
|
|
238
|
+
return ffmpegProvider.extractAudio(input, output);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Extract video (remove audio)
|
|
242
|
+
return ffmpegProvider.convertFormat({ input, output });
|
|
243
|
+
},
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
// Export all definitions
|
|
247
|
+
export const definitions = [
|
|
248
|
+
trimDefinition,
|
|
249
|
+
cutDefinition,
|
|
250
|
+
mergeDefinition,
|
|
251
|
+
splitDefinition,
|
|
252
|
+
fadeDefinition,
|
|
253
|
+
transitionDefinition,
|
|
254
|
+
removeDefinition,
|
|
255
|
+
];
|
|
256
|
+
|
|
257
|
+
// Re-export types for backward compatibility
|
|
258
|
+
export type TrimOptions = Parameters<typeof ffmpegProvider.trimVideo>[0];
|
|
259
|
+
export type TrimResult = Awaited<ReturnType<typeof ffmpegProvider.trimVideo>>;
|
|
260
|
+
export type CutOptions = TrimOptions;
|
|
261
|
+
export type CutResult = TrimResult;
|
|
262
|
+
export type MergeOptions = Parameters<typeof ffmpegProvider.concatVideos>[0];
|
|
263
|
+
export type MergeResult = Awaited<
|
|
264
|
+
ReturnType<typeof ffmpegProvider.concatVideos>
|
|
265
|
+
>;
|
|
266
|
+
export type SplitOptions = Parameters<
|
|
267
|
+
typeof ffmpegProvider.splitAtTimestamps
|
|
268
|
+
>[0];
|
|
269
|
+
export type SplitResult = Awaited<
|
|
270
|
+
ReturnType<typeof ffmpegProvider.splitAtTimestamps>
|
|
271
|
+
>;
|
|
272
|
+
export type FadeOptions = Parameters<typeof ffmpegProvider.fadeVideo>[0];
|
|
273
|
+
export type FadeResult = Awaited<ReturnType<typeof ffmpegProvider.fadeVideo>>;
|
|
274
|
+
export type TransitionOptions = Parameters<
|
|
275
|
+
typeof ffmpegProvider.xfadeVideos
|
|
276
|
+
>[0];
|
|
277
|
+
export type TransitionResult = Awaited<
|
|
278
|
+
ReturnType<typeof ffmpegProvider.xfadeVideos>
|
|
279
|
+
>;
|
|
280
|
+
export type RemoveOptions = {
|
|
281
|
+
input: string;
|
|
282
|
+
output: string;
|
|
283
|
+
what?: "audio" | "video";
|
|
284
|
+
};
|
|
285
|
+
export type RemoveResult = string;
|
|
286
|
+
|
|
287
|
+
// Convenience exports
|
|
288
|
+
export const trim = (opts: TrimOptions) => ffmpegProvider.trimVideo(opts);
|
|
289
|
+
export const cut = trim;
|
|
290
|
+
export const merge = (opts: MergeOptions) => ffmpegProvider.concatVideos(opts);
|
|
291
|
+
export const split = (opts: SplitOptions) =>
|
|
292
|
+
ffmpegProvider.splitAtTimestamps(opts);
|
|
293
|
+
export const fade = (opts: FadeOptions) => ffmpegProvider.fadeVideo(opts);
|
|
294
|
+
export const transition = (opts: TransitionOptions) =>
|
|
295
|
+
ffmpegProvider.xfadeVideos(opts);
|
|
296
|
+
export const remove = removeDefinition.execute;
|
|
297
|
+
|
|
298
|
+
export default trimDefinition;
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image generation action
|
|
3
|
+
* Routes to Fal or Higgsfield based on options
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { imageSizeSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
import { falProvider } from "../../providers/fal";
|
|
10
|
+
import { higgsfieldProvider } from "../../providers/higgsfield";
|
|
11
|
+
import { storageProvider } from "../../providers/storage";
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const imageInputSchema = z.object({
|
|
15
|
+
prompt: z.string().describe("What to generate"),
|
|
16
|
+
size: imageSizeSchema
|
|
17
|
+
.default("landscape_4_3")
|
|
18
|
+
.describe("Image size/aspect ratio"),
|
|
19
|
+
provider: z
|
|
20
|
+
.enum(["fal", "higgsfield"])
|
|
21
|
+
.default("fal")
|
|
22
|
+
.describe("Generation provider"),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
// Output schema with Zod
|
|
26
|
+
const imageOutputSchema = z.object({
|
|
27
|
+
imageUrl: z.string(),
|
|
28
|
+
uploaded: z.string().optional(),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// Schema object for the definition
|
|
32
|
+
const schema: ZodSchema<typeof imageInputSchema, typeof imageOutputSchema> = {
|
|
33
|
+
input: imageInputSchema,
|
|
34
|
+
output: imageOutputSchema,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export const definition: ActionDefinition<typeof schema> = {
|
|
38
|
+
type: "action",
|
|
39
|
+
name: "image",
|
|
40
|
+
description: "Generate image from text",
|
|
41
|
+
schema,
|
|
42
|
+
routes: [
|
|
43
|
+
{
|
|
44
|
+
target: "flux",
|
|
45
|
+
when: { provider: "fal" },
|
|
46
|
+
priority: 5,
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
target: "soul",
|
|
50
|
+
when: { provider: "higgsfield" },
|
|
51
|
+
priority: 10,
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
execute: async (inputs) => {
|
|
55
|
+
const { prompt, size, provider } = inputs;
|
|
56
|
+
|
|
57
|
+
if (provider === "higgsfield") {
|
|
58
|
+
return generateWithSoul(prompt);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return generateWithFal(prompt, { imageSize: size });
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
export interface ImageGenerationResult {
|
|
66
|
+
imageUrl: string;
|
|
67
|
+
uploaded?: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export async function generateWithFal(
|
|
71
|
+
prompt: string,
|
|
72
|
+
options: { imageSize?: string; upload?: boolean } = {},
|
|
73
|
+
): Promise<ImageGenerationResult> {
|
|
74
|
+
console.log("[image] generating with fal");
|
|
75
|
+
|
|
76
|
+
const result = await falProvider.generateImage({
|
|
77
|
+
prompt,
|
|
78
|
+
imageSize: options.imageSize,
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
const imageUrl = (result.data as { images?: Array<{ url?: string }> })
|
|
82
|
+
?.images?.[0]?.url;
|
|
83
|
+
if (!imageUrl) {
|
|
84
|
+
throw new Error("No image URL in result");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let uploaded: string | undefined;
|
|
88
|
+
if (options.upload) {
|
|
89
|
+
const timestamp = Date.now();
|
|
90
|
+
const objectKey = `images/fal/${timestamp}.png`;
|
|
91
|
+
uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
|
|
92
|
+
console.log(`[image] uploaded to ${uploaded}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return { imageUrl, uploaded };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export async function generateWithSoul(
|
|
99
|
+
prompt: string,
|
|
100
|
+
options: { styleId?: string; upload?: boolean } = {},
|
|
101
|
+
): Promise<ImageGenerationResult> {
|
|
102
|
+
console.log("[image] generating with higgsfield soul");
|
|
103
|
+
|
|
104
|
+
const result = await higgsfieldProvider.generateSoul({
|
|
105
|
+
prompt,
|
|
106
|
+
styleId: options.styleId,
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
const imageUrl = result.jobs?.[0]?.results?.raw?.url;
|
|
110
|
+
if (!imageUrl) {
|
|
111
|
+
throw new Error("No image URL in result");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let uploaded: string | undefined;
|
|
115
|
+
if (options.upload) {
|
|
116
|
+
const timestamp = Date.now();
|
|
117
|
+
const objectKey = `images/soul/${timestamp}.png`;
|
|
118
|
+
uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
|
|
119
|
+
console.log(`[image] uploaded to ${uploaded}`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return { imageUrl, uploaded };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export default definition;
|