varg.ai-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.claude/settings.local.json +7 -0
  2. package/.env.example +24 -0
  3. package/CLAUDE.md +118 -0
  4. package/README.md +231 -0
  5. package/SKILLS.md +157 -0
  6. package/STRUCTURE.md +92 -0
  7. package/TEST_RESULTS.md +122 -0
  8. package/action/captions/SKILL.md +170 -0
  9. package/action/captions/index.ts +227 -0
  10. package/action/edit/SKILL.md +235 -0
  11. package/action/edit/index.ts +493 -0
  12. package/action/image/SKILL.md +140 -0
  13. package/action/image/index.ts +112 -0
  14. package/action/sync/SKILL.md +136 -0
  15. package/action/sync/index.ts +187 -0
  16. package/action/transcribe/SKILL.md +179 -0
  17. package/action/transcribe/index.ts +227 -0
  18. package/action/video/SKILL.md +116 -0
  19. package/action/video/index.ts +135 -0
  20. package/action/voice/SKILL.md +125 -0
  21. package/action/voice/index.ts +201 -0
  22. package/biome.json +33 -0
  23. package/index.ts +38 -0
  24. package/lib/README.md +144 -0
  25. package/lib/ai-sdk/fal.ts +106 -0
  26. package/lib/ai-sdk/replicate.ts +107 -0
  27. package/lib/elevenlabs.ts +382 -0
  28. package/lib/fal.ts +478 -0
  29. package/lib/ffmpeg.ts +467 -0
  30. package/lib/fireworks.ts +235 -0
  31. package/lib/groq.ts +246 -0
  32. package/lib/higgsfield.ts +176 -0
  33. package/lib/remotion/SKILL.md +823 -0
  34. package/lib/remotion/cli.ts +115 -0
  35. package/lib/remotion/functions.ts +283 -0
  36. package/lib/remotion/index.ts +19 -0
  37. package/lib/remotion/templates.ts +73 -0
  38. package/lib/replicate.ts +304 -0
  39. package/output.txt +1 -0
  40. package/package.json +35 -0
  41. package/pipeline/cookbooks/SKILL.md +285 -0
  42. package/pipeline/cookbooks/remotion-video.md +585 -0
  43. package/pipeline/cookbooks/round-video-character.md +337 -0
  44. package/pipeline/cookbooks/talking-character.md +59 -0
  45. package/test-import.ts +7 -0
  46. package/test-services.ts +97 -0
  47. package/tsconfig.json +29 -0
  48. package/utilities/s3.ts +147 -0
@@ -0,0 +1,122 @@
1
+ # test results
2
+
3
+ ## ✅ both fal approaches working
4
+
5
+ ### approach 1: lib/ai-sdk/fal.ts (vercel ai sdk)
6
+
7
+ ```bash
8
+ $ bun run lib/ai-sdk/fal.ts generate_image "futuristic spaceship" "fal-ai/flux/dev" "16:9"
9
+
10
+ [ai-sdk/fal] generating image with fal-ai/flux/dev
11
+ [ai-sdk/fal] prompt: futuristic spaceship interior
12
+ [ai-sdk/fal] aspect ratio: 16:9
13
+ [ai-sdk/fal] completed!
14
+
15
+ image saved to: /tmp/fal-ai-sdk-1763772836608.png
16
+
17
+ metadata:
18
+ {
19
+ "images": [
20
+ {
21
+ "width": 1024,
22
+ "height": 576,
23
+ "contentType": "image/jpeg",
24
+ "nsfw": false
25
+ }
26
+ ]
27
+ }
28
+ ```
29
+
30
+ ✅ benefits:
31
+ - clean typed api
32
+ - auto image save + open
33
+ - aspect ratio support
34
+ - consistent with other ai-sdk providers
35
+
36
+ ### approach 2: lib/fal.ts (fal client direct)
37
+
38
+ ```bash
39
+ $ bun run lib/fal.ts generate_image "ancient temple ruins"
40
+
41
+ [fal] generating image with fal-ai/flux-pro/v1.1
42
+ [fal] prompt: ancient temple ruins at sunset
43
+ [fal] processing...
44
+ [fal] completed!
45
+
46
+ {
47
+ "data": {
48
+ "images": [
49
+ {
50
+ "url": "https://v3b.fal.media/files/b/koala/L5LYGCHZ4aZ_CKZsmPbUe.jpg",
51
+ "width": 1024,
52
+ "height": 768,
53
+ "content_type": "image/jpeg"
54
+ }
55
+ ],
56
+ "seed": 2946158106
57
+ }
58
+ }
59
+ ```
60
+
61
+ ✅ benefits:
62
+ - full api access
63
+ - queue updates
64
+ - video support
65
+ - custom parameters
66
+
67
+ ## cli tests ✅
68
+
69
+ all help menus working:
70
+
71
+ ```bash
72
+ bun run lib/ai-sdk/fal.ts # ✓
73
+ bun run lib/fal.ts # ✓
74
+ bun run lib/higgsfield.ts # ✓
75
+ bun run service/image.ts # ✓
76
+ bun run service/video.ts # ✓
77
+ bun run utilities/s3.ts # ✓
78
+ ```
79
+
80
+ ## library imports ✅
81
+
82
+ ```typescript
83
+ import { generateImage } from "./index"
84
+ import * as aiSdkFal from "./index"
85
+
86
+ // both approaches available
87
+ ```
88
+
89
+ ## actual generation tests ✅
90
+
91
+ successfully generated and opened:
92
+ - cyberpunk city (16:9, ai-sdk)
93
+ - spaceship interior (16:9, ai-sdk)
94
+ - temple ruins (4:3, fal client)
95
+ - aurora borealis (4:3, fal client)
96
+
97
+ all images ~15-20 seconds generation time
98
+
99
+ ## what works
100
+
101
+ 1. **dual fal implementations** - ai-sdk for simplicity, client for power ✓
102
+ 2. **all cli scripts executable** with proper help menus ✓
103
+ 3. **library imports functional** ✓
104
+ 4. **actual image generation working** ✓
105
+ 5. **automatic image opening** (ai-sdk version) ✓
106
+ 6. **queue progress updates** (fal client) ✓
107
+
108
+ ## file structure
109
+
110
+ ```
111
+ lib/
112
+ ├── ai-sdk/
113
+ │ └── fal.ts # vercel ai sdk approach
114
+ ├── fal.ts # fal client approach
115
+ └── higgsfield.ts # soul character generation
116
+ ```
117
+
118
+ ## recommendations
119
+
120
+ - **use lib/ai-sdk/fal.ts** for standard image generation
121
+ - **use lib/fal.ts** for video or advanced features
122
+ - **use service/**.ts for high-level operations with s3 upload
@@ -0,0 +1,170 @@
1
+ ---
2
+ name: video-captions
3
+ description: add auto-generated or custom subtitles to videos using groq/fireworks transcription and ffmpeg overlay. use when adding captions, subtitles, or text overlays to videos for accessibility or social media.
4
+ allowed-tools: Read, Bash
5
+ ---
6
+
7
+ # video captions
8
+
9
+ automatically generate and overlay subtitles on videos with customizable styling.
10
+
11
+ ## features
12
+
13
+ - **auto-generation**: transcribe video audio using groq or fireworks
14
+ - **custom srt support**: use existing subtitle files
15
+ - **styling**: customize font, size, colors, position
16
+ - **word-level timing**: fireworks provides precise word timestamps
17
+ - **instant overlay**: ffmpeg-based subtitle rendering
18
+
19
+ ## usage
20
+
21
+ ### auto-generate captions
22
+ ```bash
23
+ bun run service/captions.ts <videoPath> [outputPath] [options]
24
+ ```
25
+
26
+ **basic example:**
27
+ ```bash
28
+ bun run service/captions.ts media/video.mp4
29
+ # outputs: media/video-captioned.mp4
30
+ ```
31
+
32
+ **with options:**
33
+ ```bash
34
+ bun run service/captions.ts media/video.mp4 output.mp4 --provider fireworks --font Arial --size 28
35
+ ```
36
+
37
+ ### use existing srt file
38
+ ```bash
39
+ bun run service/captions.ts media/video.mp4 output.mp4 --srt media/video.srt
40
+ ```
41
+
42
+ ## options
43
+
44
+ - `--srt <path>` - use existing srt file instead of auto-generating
45
+ - `--provider <name>` - groq or fireworks (default: fireworks)
46
+ - `--font <name>` - font name (default: Arial)
47
+ - `--size <number>` - font size (default: 24)
48
+ - `--color <hex>` - primary color in ASS format (default: &HFFFFFF white)
49
+ - `--outline <hex>` - outline color in ASS format (default: &H000000 black)
50
+
51
+ ## as library
52
+
53
+ ```typescript
54
+ import { addCaptions } from "./service/captions"
55
+
56
+ const result = await addCaptions({
57
+ videoPath: "media/video.mp4",
58
+ output: "captioned.mp4",
59
+ provider: "fireworks", // or "groq"
60
+ style: {
61
+ fontName: "Helvetica",
62
+ fontSize: 28,
63
+ primaryColor: "&HFFFFFF",
64
+ outlineColor: "&H000000",
65
+ bold: true,
66
+ alignment: 2, // bottom center
67
+ marginV: 20
68
+ }
69
+ })
70
+ ```
71
+
72
+ ## providers
73
+
74
+ ### fireworks (recommended)
75
+ - **word-level timestamps** for precise timing
76
+ - generates `.srt` format with detailed timing
77
+ - better for social media content
78
+ - slightly slower transcription
79
+
80
+ ### groq
81
+ - **ultra-fast** transcription
82
+ - plain text output (converted to srt)
83
+ - sentence-level timing
84
+ - great for quick previews
85
+
86
+ ## styling options
87
+
88
+ ```typescript
89
+ interface SubtitleStyle {
90
+ fontName?: string // default: Arial
91
+ fontSize?: number // default: 24
92
+ primaryColor?: string // default: &HFFFFFF (white)
93
+ outlineColor?: string // default: &H000000 (black)
94
+ bold?: boolean // default: true
95
+ alignment?: number // 1-9, default: 2 (bottom center)
96
+ marginV?: number // vertical margin, default: 20
97
+ }
98
+ ```
99
+
100
+ **alignment values:**
101
+ ```
102
+ 1 = bottom left 2 = bottom center 3 = bottom right
103
+ 4 = middle left 5 = middle center 6 = middle right
104
+ 7 = top left 8 = top center 9 = top right
105
+ ```
106
+
107
+ ## when to use
108
+
109
+ use this skill when:
110
+ - preparing videos for social media (tiktok, instagram, youtube)
111
+ - adding accessibility features
112
+ - creating educational or tutorial content
113
+ - need word-level caption timing
114
+ - translating videos with custom srt files
115
+
116
+ ## typical workflow
117
+
118
+ 1. create or edit video
119
+ 2. add captions with auto-transcription (this service)
120
+ 3. customize style for platform
121
+ 4. prepare for social media (edit service)
122
+
123
+ ## examples
124
+
125
+ **tiktok/instagram style captions:**
126
+ ```bash
127
+ bun run service/captions.ts video.mp4 captioned.mp4 \
128
+ --provider fireworks \
129
+ --font "Arial Black" \
130
+ --size 32 \
131
+ --color "&H00FFFF"
132
+ ```
133
+
134
+ **professional style:**
135
+ ```bash
136
+ bun run service/captions.ts video.mp4 output.mp4 \
137
+ --provider fireworks \
138
+ --font "Helvetica" \
139
+ --size 24
140
+ ```
141
+
142
+ **with existing subtitles:**
143
+ ```bash
144
+ bun run service/captions.ts video.mp4 final.mp4 \
145
+ --srt custom-subtitles.srt \
146
+ --font "Arial" \
147
+ --size 26
148
+ ```
149
+
150
+ ## output
151
+
152
+ - generates `.srt` file if auto-transcribing
153
+ - creates new video file with burned-in subtitles
154
+ - preserves original video quality
155
+ - audio is copied without re-encoding
156
+
157
+ ## environment variables
158
+
159
+ required (for auto-transcription):
160
+ - `GROQ_API_KEY` - for groq provider
161
+ - `FIREWORKS_API_KEY` - for fireworks provider
162
+
163
+ **system requirements:**
164
+ - ffmpeg must be installed
165
+
166
+ ## processing time
167
+
168
+ - transcription: 5-30 seconds (depending on video length)
169
+ - overlay: 5-15 seconds (depending on video length)
170
+ - total: typically under 1 minute
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/env bun
2
+
3
+ /**
4
+ * video captioning service
5
+ * generates and overlays subtitles on videos using ffmpeg
6
+ * supports auto-generation via groq/fireworks or custom srt files
7
+ */
8
+
9
+ import { existsSync } from "node:fs";
10
+ import ffmpeg from "fluent-ffmpeg";
11
+ import { transcribe } from "../transcribe";
12
+
13
+ // types
14
+ export interface AddCaptionsOptions {
15
+ videoPath: string;
16
+ srtPath?: string; // optional existing srt file
17
+ output: string;
18
+ provider?: "groq" | "fireworks"; // only used if srtPath not provided
19
+ style?: SubtitleStyle;
20
+ }
21
+
22
+ export interface SubtitleStyle {
23
+ fontName?: string; // default: Arial
24
+ fontSize?: number; // default: 24
25
+ primaryColor?: string; // default: &HFFFFFF (white)
26
+ outlineColor?: string; // default: &H000000 (black)
27
+ bold?: boolean; // default: true
28
+ alignment?: number; // 1-9, default: 2 (bottom center)
29
+ marginV?: number; // vertical margin, default: 20
30
+ }
31
+
32
+ // default subtitle style
33
+ const DEFAULT_STYLE: Required<SubtitleStyle> = {
34
+ fontName: "Arial",
35
+ fontSize: 24,
36
+ primaryColor: "&HFFFFFF", // white
37
+ outlineColor: "&H000000", // black
38
+ bold: true,
39
+ alignment: 2, // bottom center
40
+ marginV: 20,
41
+ };
42
+
43
+ // main function to add captions to video
44
+ export async function addCaptions(
45
+ options: AddCaptionsOptions,
46
+ ): Promise<string> {
47
+ const { videoPath, srtPath, output, provider = "fireworks", style } = options;
48
+
49
+ if (!videoPath) {
50
+ throw new Error("videoPath is required");
51
+ }
52
+ if (!output) {
53
+ throw new Error("output is required");
54
+ }
55
+ if (!existsSync(videoPath)) {
56
+ throw new Error(`video file not found: ${videoPath}`);
57
+ }
58
+
59
+ console.log("[captions] adding captions to video...");
60
+
61
+ // determine srt file path
62
+ let finalSrtPath = srtPath;
63
+
64
+ // if no srt file provided, auto-generate it
65
+ if (!finalSrtPath) {
66
+ console.log(
67
+ `[captions] no srt file provided, auto-generating with ${provider}...`,
68
+ );
69
+
70
+ // generate srt file from video audio
71
+ const tempSrtPath = videoPath.replace(/\.[^.]+$/, ".srt");
72
+
73
+ const result = await transcribe({
74
+ audioUrl: videoPath,
75
+ provider,
76
+ outputFormat: "srt",
77
+ outputPath: tempSrtPath,
78
+ });
79
+
80
+ if (!result.success) {
81
+ throw new Error(`failed to generate subtitles: ${result.error}`);
82
+ }
83
+
84
+ finalSrtPath = tempSrtPath;
85
+ console.log(`[captions] generated subtitles at ${finalSrtPath}`);
86
+ }
87
+
88
+ if (!existsSync(finalSrtPath)) {
89
+ throw new Error(`srt file not found: ${finalSrtPath}`);
90
+ }
91
+
92
+ // merge style with defaults
93
+ const finalStyle = { ...DEFAULT_STYLE, ...style };
94
+
95
+ // build subtitle filter with style
96
+ const subtitlesFilter = `subtitles=${finalSrtPath}:force_style='FontName=${finalStyle.fontName},FontSize=${finalStyle.fontSize},PrimaryColour=${finalStyle.primaryColor},OutlineColour=${finalStyle.outlineColor},Bold=${finalStyle.bold ? -1 : 0},Alignment=${finalStyle.alignment},MarginV=${finalStyle.marginV}'`;
97
+
98
+ console.log("[captions] overlaying subtitles on video...");
99
+
100
+ return new Promise((resolve, reject) => {
101
+ ffmpeg(videoPath)
102
+ .videoFilters(subtitlesFilter)
103
+ .outputOptions(["-c:a", "copy"]) // copy audio without re-encoding
104
+ .output(output)
105
+ .on("end", () => {
106
+ console.log(`[captions] saved to ${output}`);
107
+ resolve(output);
108
+ })
109
+ .on("error", (err) => {
110
+ console.error("[captions] error:", err);
111
+ reject(err);
112
+ })
113
+ .run();
114
+ });
115
+ }
116
+
117
+ // cli
118
+ async function cli() {
119
+ const args = process.argv.slice(2);
120
+ const command = args[0];
121
+
122
+ if (!command || command === "help") {
123
+ console.log(`
124
+ usage:
125
+ bun run service/captions.ts <videoPath> [outputPath] [options]
126
+
127
+ arguments:
128
+ videoPath - path to input video file
129
+ outputPath - path to output video (default: video-captioned.mp4)
130
+
131
+ options:
132
+ --srt <path> - use existing srt file instead of auto-generating
133
+ --provider <name> - groq | fireworks (default: fireworks)
134
+ --font <name> - font name (default: Arial)
135
+ --size <number> - font size (default: 24)
136
+ --color <hex> - primary color in ASS format (default: &HFFFFFF)
137
+ --outline <hex> - outline color in ASS format (default: &H000000)
138
+
139
+ examples:
140
+ # auto-generate captions with fireworks
141
+ bun run service/captions.ts media/fitness-demo.mp4
142
+
143
+ # auto-generate with groq (faster, plain text)
144
+ bun run service/captions.ts media/fitness-demo.mp4 output.mp4 --provider groq
145
+
146
+ # use existing srt file
147
+ bun run service/captions.ts media/fitness-demo.mp4 output.mp4 --srt media/fitness-demo.srt
148
+
149
+ # customize style
150
+ bun run service/captions.ts media/video.mp4 output.mp4 --font "Helvetica" --size 28
151
+
152
+ requirements:
153
+ ffmpeg must be installed on your system
154
+ brew install ffmpeg (macos)
155
+ apt-get install ffmpeg (linux)
156
+ `);
157
+ process.exit(0);
158
+ }
159
+
160
+ try {
161
+ const videoPath = args[0];
162
+ let outputPath = args[1];
163
+
164
+ if (!videoPath) {
165
+ throw new Error("videoPath is required");
166
+ }
167
+
168
+ // parse options
169
+ let srtPath: string | undefined;
170
+ let provider: "groq" | "fireworks" = "fireworks";
171
+ const style: SubtitleStyle = {};
172
+
173
+ for (let i = 1; i < args.length; i++) {
174
+ const arg = args[i];
175
+
176
+ if (arg === "--srt") {
177
+ srtPath = args[++i];
178
+ } else if (arg === "--provider") {
179
+ provider = args[++i] as "groq" | "fireworks";
180
+ } else if (arg === "--font") {
181
+ style.fontName = args[++i];
182
+ } else if (arg === "--size") {
183
+ const size = args[++i];
184
+ if (!size) {
185
+ throw new Error("--size requires a number");
186
+ }
187
+ style.fontSize = Number.parseInt(size, 10);
188
+ } else if (arg === "--color") {
189
+ const color = args[++i];
190
+ if (!color) {
191
+ throw new Error("--color requires a hex color");
192
+ }
193
+ style.primaryColor = color;
194
+ } else if (arg === "--outline") {
195
+ const outline = args[++i];
196
+ if (!outline) {
197
+ throw new Error("--outline requires a hex color");
198
+ }
199
+ style.outlineColor = outline;
200
+ } else if (!arg?.startsWith("--") && !outputPath) {
201
+ outputPath = arg;
202
+ }
203
+ }
204
+
205
+ // default output path
206
+ if (!outputPath) {
207
+ outputPath = videoPath.replace(/\.[^.]+$/, "-captioned.mp4");
208
+ }
209
+
210
+ await addCaptions({
211
+ videoPath,
212
+ srtPath,
213
+ output: outputPath,
214
+ provider,
215
+ style,
216
+ });
217
+
218
+ console.log("\ndone! video with captions saved to:", outputPath);
219
+ } catch (error) {
220
+ console.error("[captions] error:", error);
221
+ process.exit(1);
222
+ }
223
+ }
224
+
225
+ if (import.meta.main) {
226
+ cli();
227
+ }