varg.ai-sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,8 +8,56 @@
8
8
 
9
9
  import { existsSync } from "node:fs";
10
10
  import ffmpeg from "fluent-ffmpeg";
11
+ import type { ActionMeta } from "../../cli/types";
11
12
  import { transcribe } from "../transcribe";
12
13
 
14
+ export const meta: ActionMeta = {
15
+ name: "captions",
16
+ type: "action",
17
+ description: "add subtitles to video",
18
+ inputType: "video",
19
+ outputType: "video",
20
+ schema: {
21
+ input: {
22
+ type: "object",
23
+ required: ["video", "output"],
24
+ properties: {
25
+ video: {
26
+ type: "string",
27
+ format: "file-path",
28
+ description: "input video file",
29
+ },
30
+ output: {
31
+ type: "string",
32
+ format: "file-path",
33
+ description: "output video path",
34
+ },
35
+ srt: {
36
+ type: "string",
37
+ format: "file-path",
38
+ description: "existing srt file (auto-generates if not provided)",
39
+ },
40
+ provider: {
41
+ type: "string",
42
+ enum: ["groq", "fireworks"],
43
+ default: "fireworks",
44
+ description: "transcription provider for auto-generation",
45
+ },
46
+ },
47
+ },
48
+ output: { type: "string", format: "file-path", description: "video path" },
49
+ },
50
+ async run(options) {
51
+ const { video, output, srt, provider } = options as {
52
+ video: string;
53
+ output: string;
54
+ srt?: string;
55
+ provider?: "groq" | "fireworks";
56
+ };
57
+ return addCaptions({ videoPath: video, output, srtPath: srt, provider });
58
+ },
59
+ };
60
+
13
61
  // types
14
62
  export interface AddCaptionsOptions {
15
63
  videoPath: string;
@@ -115,113 +163,7 @@ export async function addCaptions(
115
163
  }
116
164
 
117
165
  // cli
118
- async function cli() {
119
- const args = process.argv.slice(2);
120
- const command = args[0];
121
-
122
- if (!command || command === "help") {
123
- console.log(`
124
- usage:
125
- bun run service/captions.ts <videoPath> [outputPath] [options]
126
-
127
- arguments:
128
- videoPath - path to input video file
129
- outputPath - path to output video (default: video-captioned.mp4)
130
-
131
- options:
132
- --srt <path> - use existing srt file instead of auto-generating
133
- --provider <name> - groq | fireworks (default: fireworks)
134
- --font <name> - font name (default: Arial)
135
- --size <number> - font size (default: 24)
136
- --color <hex> - primary color in ASS format (default: &HFFFFFF)
137
- --outline <hex> - outline color in ASS format (default: &H000000)
138
-
139
- examples:
140
- # auto-generate captions with fireworks
141
- bun run service/captions.ts media/fitness-demo.mp4
142
-
143
- # auto-generate with groq (faster, plain text)
144
- bun run service/captions.ts media/fitness-demo.mp4 output.mp4 --provider groq
145
-
146
- # use existing srt file
147
- bun run service/captions.ts media/fitness-demo.mp4 output.mp4 --srt media/fitness-demo.srt
148
-
149
- # customize style
150
- bun run service/captions.ts media/video.mp4 output.mp4 --font "Helvetica" --size 28
151
-
152
- requirements:
153
- ffmpeg must be installed on your system
154
- brew install ffmpeg (macos)
155
- apt-get install ffmpeg (linux)
156
- `);
157
- process.exit(0);
158
- }
159
-
160
- try {
161
- const videoPath = args[0];
162
- let outputPath = args[1];
163
-
164
- if (!videoPath) {
165
- throw new Error("videoPath is required");
166
- }
167
-
168
- // parse options
169
- let srtPath: string | undefined;
170
- let provider: "groq" | "fireworks" = "fireworks";
171
- const style: SubtitleStyle = {};
172
-
173
- for (let i = 1; i < args.length; i++) {
174
- const arg = args[i];
175
-
176
- if (arg === "--srt") {
177
- srtPath = args[++i];
178
- } else if (arg === "--provider") {
179
- provider = args[++i] as "groq" | "fireworks";
180
- } else if (arg === "--font") {
181
- style.fontName = args[++i];
182
- } else if (arg === "--size") {
183
- const size = args[++i];
184
- if (!size) {
185
- throw new Error("--size requires a number");
186
- }
187
- style.fontSize = Number.parseInt(size, 10);
188
- } else if (arg === "--color") {
189
- const color = args[++i];
190
- if (!color) {
191
- throw new Error("--color requires a hex color");
192
- }
193
- style.primaryColor = color;
194
- } else if (arg === "--outline") {
195
- const outline = args[++i];
196
- if (!outline) {
197
- throw new Error("--outline requires a hex color");
198
- }
199
- style.outlineColor = outline;
200
- } else if (!arg?.startsWith("--") && !outputPath) {
201
- outputPath = arg;
202
- }
203
- }
204
-
205
- // default output path
206
- if (!outputPath) {
207
- outputPath = videoPath.replace(/\.[^.]+$/, "-captioned.mp4");
208
- }
209
-
210
- await addCaptions({
211
- videoPath,
212
- srtPath,
213
- output: outputPath,
214
- provider,
215
- style,
216
- });
217
-
218
- console.log("\ndone! video with captions saved to:", outputPath);
219
- } catch (error) {
220
- console.error("[captions] error:", error);
221
- process.exit(1);
222
- }
223
- }
224
-
225
166
  if (import.meta.main) {
226
- cli();
167
+ const { runCli } = await import("../../cli/runner");
168
+ runCli(meta);
227
169
  }
@@ -7,6 +7,7 @@
7
7
 
8
8
  import { existsSync } from "node:fs";
9
9
  import { extname } from "node:path";
10
+ import type { ActionMeta } from "../../cli/types";
10
11
  import {
11
12
  type AddAudioOptions,
12
13
  addAudio,
@@ -21,6 +22,67 @@ import {
21
22
  trimVideo,
22
23
  } from "../../lib/ffmpeg";
23
24
 
25
+ export const meta: ActionMeta = {
26
+ name: "edit",
27
+ type: "action",
28
+ description: "trim/resize video",
29
+ inputType: "video",
30
+ outputType: "video",
31
+ schema: {
32
+ input: {
33
+ type: "object",
34
+ required: ["input", "output"],
35
+ properties: {
36
+ input: {
37
+ type: "string",
38
+ format: "file-path",
39
+ description: "input video file",
40
+ },
41
+ output: {
42
+ type: "string",
43
+ format: "file-path",
44
+ description: "output video path",
45
+ },
46
+ start: {
47
+ type: "number",
48
+ description: "start time in seconds (for trim)",
49
+ },
50
+ duration: {
51
+ type: "number",
52
+ description: "duration in seconds (for trim)",
53
+ },
54
+ preset: {
55
+ type: "string",
56
+ enum: ["vertical", "square", "landscape", "4k"],
57
+ description: "resize preset",
58
+ },
59
+ },
60
+ },
61
+ output: { type: "string", format: "file-path", description: "video path" },
62
+ },
63
+ async run(options) {
64
+ const { input, output, start, duration, preset } = options as {
65
+ input: string;
66
+ output: string;
67
+ start?: number;
68
+ duration?: number;
69
+ preset?: "vertical" | "square" | "landscape" | "4k";
70
+ };
71
+ if (preset) {
72
+ return quickResize(input, output, preset);
73
+ }
74
+ if (start !== undefined) {
75
+ return quickTrim(
76
+ input,
77
+ output,
78
+ start,
79
+ duration ? start + duration : undefined,
80
+ );
81
+ }
82
+ throw new Error("specify --start for trim or --preset for resize");
83
+ },
84
+ };
85
+
24
86
  // types
25
87
  export interface EditPipelineStep {
26
88
  operation:
@@ -369,125 +431,7 @@ export async function mergeWithAudio(
369
431
  }
370
432
 
371
433
  // cli
372
- async function cli() {
373
- const args = process.argv.slice(2);
374
- const command = args[0];
375
-
376
- if (!command || command === "help") {
377
- console.log(`
378
- usage:
379
- bun run service/edit.ts <command> [args]
380
-
381
- commands:
382
- social <input> <output> <platform> [audioPath] prepare for social media
383
- montage <output> <clip1> <clip2> [clip3...] create montage from clips
384
- trim <input> <output> <start> [end] quick trim
385
- resize <input> <output> <preset> quick resize
386
- merge_audio <audio> <output> <video1> [video2...] merge videos with audio
387
-
388
- platforms:
389
- tiktok, instagram, youtube-shorts, youtube, twitter
390
-
391
- resize presets:
392
- vertical (9:16), square (1:1), landscape (16:9), 4k
393
-
394
- examples:
395
- bun run service/edit.ts social raw.mp4 tiktok.mp4 tiktok
396
- bun run service/edit.ts social raw.mp4 ig.mp4 instagram audio.mp3
397
- bun run service/edit.ts montage output.mp4 clip1.mp4 clip2.mp4 clip3.mp4
398
- bun run service/edit.ts trim long.mp4 short.mp4 10 30
399
- bun run service/edit.ts resize raw.mp4 vertical.mp4 vertical
400
- bun run service/edit.ts merge_audio song.mp3 final.mp4 clip1.mp4 clip2.mp4
401
- `);
402
- process.exit(0);
403
- }
404
-
405
- try {
406
- switch (command) {
407
- case "social": {
408
- const input = args[1];
409
- const output = args[2];
410
- const platform = args[3] as PrepareForSocialOptions["platform"];
411
- const withAudio = args[4];
412
-
413
- if (!input || !output || !platform) {
414
- throw new Error("input, output, and platform are required");
415
- }
416
-
417
- await prepareForSocial({ input, output, platform, withAudio });
418
- break;
419
- }
420
-
421
- case "montage": {
422
- const output = args[1];
423
- const clips = args.slice(2);
424
-
425
- if (!output || clips.length === 0) {
426
- throw new Error("output and at least one clip are required");
427
- }
428
-
429
- await createMontage({ clips, output });
430
- break;
431
- }
432
-
433
- case "trim": {
434
- const input = args[1];
435
- const output = args[2];
436
- const startArg = args[3];
437
- const endArg = args[4];
438
-
439
- if (!input || !output || !startArg) {
440
- throw new Error("input, output, and start are required");
441
- }
442
-
443
- const start = Number.parseFloat(startArg);
444
- const end = endArg ? Number.parseFloat(endArg) : undefined;
445
-
446
- if (Number.isNaN(start) || (endArg && Number.isNaN(end))) {
447
- throw new Error("start and end must be valid numbers");
448
- }
449
-
450
- await quickTrim(input, output, start, end);
451
- break;
452
- }
453
-
454
- case "resize": {
455
- const input = args[1];
456
- const output = args[2];
457
- const preset = args[3] as "vertical" | "square" | "landscape" | "4k";
458
-
459
- if (!input || !output || !preset) {
460
- throw new Error("input, output, and preset are required");
461
- }
462
-
463
- await quickResize(input, output, preset);
464
- break;
465
- }
466
-
467
- case "merge_audio": {
468
- const audio = args[1];
469
- const output = args[2];
470
- const videos = args.slice(3);
471
-
472
- if (!audio || !output || videos.length === 0) {
473
- throw new Error("audio, output, and at least one video are required");
474
- }
475
-
476
- await mergeWithAudio(videos, audio, output);
477
- break;
478
- }
479
-
480
- default:
481
- console.error(`unknown command: ${command}`);
482
- console.log("run 'bun run service/edit.ts help' for usage");
483
- process.exit(1);
484
- }
485
- } catch (error) {
486
- console.error("[edit] error:", error);
487
- process.exit(1);
488
- }
489
- }
490
-
491
434
  if (import.meta.main) {
492
- cli();
435
+ const { runCli } = await import("../../cli/runner");
436
+ runCli(meta);
493
437
  }
@@ -4,10 +4,44 @@
4
4
  * usage: bun run service/image.ts <command> <args>
5
5
  */
6
6
 
7
+ import type { ActionMeta } from "../../cli/types";
7
8
  import { generateImage } from "../../lib/fal";
8
9
  import { generateSoul } from "../../lib/higgsfield";
9
10
  import { uploadFromUrl } from "../../utilities/s3";
10
11
 
12
+ export const meta: ActionMeta = {
13
+ name: "image",
14
+ type: "action",
15
+ description: "generate image from text",
16
+ inputType: "text",
17
+ outputType: "image",
18
+ schema: {
19
+ input: {
20
+ type: "object",
21
+ required: ["prompt"],
22
+ properties: {
23
+ prompt: { type: "string", description: "what to generate" },
24
+ size: {
25
+ type: "string",
26
+ enum: [
27
+ "square_hd",
28
+ "landscape_4_3",
29
+ "portrait_4_3",
30
+ "landscape_16_9",
31
+ ],
32
+ default: "landscape_4_3",
33
+ description: "image size/aspect",
34
+ },
35
+ },
36
+ },
37
+ output: { type: "string", format: "file-path", description: "image path" },
38
+ },
39
+ async run(options) {
40
+ const { prompt, size } = options as { prompt: string; size?: string };
41
+ return generateWithFal(prompt, { model: size });
42
+ },
43
+ };
44
+
11
45
  export interface ImageGenerationResult {
12
46
  imageUrl: string;
13
47
  uploaded?: string;
@@ -64,49 +98,8 @@ export async function generateWithSoul(
64
98
  return { imageUrl, uploaded };
65
99
  }
66
100
 
67
- // cli runner
101
+ // cli
68
102
  if (import.meta.main) {
69
- const [command, ...args] = process.argv.slice(2);
70
-
71
- switch (command) {
72
- case "fal": {
73
- if (!args[0]) {
74
- console.log(`
75
- usage:
76
- bun run service/image.ts fal <prompt> [model] [upload]
77
- `);
78
- process.exit(1);
79
- }
80
- const falResult = await generateWithFal(args[0], {
81
- model: args[1],
82
- upload: args[2] === "true",
83
- });
84
- console.log(JSON.stringify(falResult, null, 2));
85
- break;
86
- }
87
-
88
- case "soul": {
89
- if (!args[0]) {
90
- console.log(`
91
- usage:
92
- bun run service/image.ts soul <prompt> [styleId] [upload]
93
- `);
94
- process.exit(1);
95
- }
96
- const soulResult = await generateWithSoul(args[0], {
97
- styleId: args[1],
98
- upload: args[2] === "true",
99
- });
100
- console.log(JSON.stringify(soulResult, null, 2));
101
- break;
102
- }
103
-
104
- default:
105
- console.log(`
106
- usage:
107
- bun run service/image.ts fal <prompt> [model] [upload]
108
- bun run service/image.ts soul <prompt> [styleId] [upload]
109
- `);
110
- process.exit(1);
111
- }
103
+ const { runCli } = await import("../../cli/runner");
104
+ runCli(meta);
112
105
  }
@@ -5,9 +5,57 @@
5
5
  * supports wav2lip, synclabs, and simple audio overlay
6
6
  */
7
7
 
8
+ import type { ActionMeta } from "../../cli/types";
8
9
  import { addAudio } from "../../lib/ffmpeg";
9
10
  import { runModel } from "../../lib/replicate";
10
11
 
12
+ export const meta: ActionMeta = {
13
+ name: "sync",
14
+ type: "action",
15
+ description: "sync audio to video (lipsync)",
16
+ inputType: "video+audio",
17
+ outputType: "video",
18
+ schema: {
19
+ input: {
20
+ type: "object",
21
+ required: ["video", "audio"],
22
+ properties: {
23
+ video: {
24
+ type: "string",
25
+ format: "file-path",
26
+ description: "input video file or url",
27
+ },
28
+ audio: {
29
+ type: "string",
30
+ format: "file-path",
31
+ description: "audio file or url to sync",
32
+ },
33
+ method: {
34
+ type: "string",
35
+ enum: ["wav2lip", "overlay"],
36
+ default: "overlay",
37
+ description: "sync method (wav2lip requires urls)",
38
+ },
39
+ output: {
40
+ type: "string",
41
+ format: "file-path",
42
+ description: "output video path",
43
+ },
44
+ },
45
+ },
46
+ output: { type: "string", format: "file-path", description: "video path" },
47
+ },
48
+ async run(options) {
49
+ const { video, audio, method, output } = options as {
50
+ video: string;
51
+ audio: string;
52
+ method?: "wav2lip" | "overlay";
53
+ output?: string;
54
+ };
55
+ return lipsync({ videoUrl: video, audioUrl: audio, method, output });
56
+ },
57
+ };
58
+
11
59
  // types
12
60
  export interface LipsyncOptions {
13
61
  videoUrl: string;
@@ -91,97 +139,7 @@ export async function lipsyncOverlay(
91
139
  }
92
140
 
93
141
  // cli
94
- async function cli() {
95
- const args = process.argv.slice(2);
96
- const command = args[0];
97
-
98
- if (!command || command === "help") {
99
- console.log(`
100
- usage:
101
- bun run service/sync.ts <command> [args]
102
-
103
- commands:
104
- sync <videoUrl> <audioUrl> [method] [output] sync video with audio
105
- wav2lip <videoUrl> <audioUrl> use wav2lip model
106
- overlay <videoPath> <audioPath> [output] simple audio overlay
107
- help show this help
108
-
109
- methods:
110
- wav2lip - ai-powered lipsync using replicate (url inputs)
111
- overlay - simple audio overlay using ffmpeg (local files)
112
-
113
- examples:
114
- bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
115
- bun run service/sync.ts wav2lip https://example.com/video.mp4 https://example.com/audio.mp3
116
- bun run service/sync.ts overlay video.mp4 audio.mp3 synced.mp4
117
-
118
- environment:
119
- REPLICATE_API_TOKEN - required for wav2lip method
120
- `);
121
- process.exit(0);
122
- }
123
-
124
- try {
125
- switch (command) {
126
- case "sync": {
127
- const videoUrl = args[1];
128
- const audioUrl = args[2];
129
- const method = (args[3] || "overlay") as "wav2lip" | "overlay";
130
- const output = args[4];
131
-
132
- if (!videoUrl || !audioUrl) {
133
- throw new Error("videoUrl and audioUrl are required");
134
- }
135
-
136
- const result = await lipsync({
137
- videoUrl,
138
- audioUrl,
139
- method,
140
- output,
141
- });
142
-
143
- console.log(`[sync] result:`, result);
144
- break;
145
- }
146
-
147
- case "wav2lip": {
148
- const videoUrl = args[1];
149
- const audioUrl = args[2];
150
-
151
- if (!videoUrl || !audioUrl) {
152
- throw new Error("videoUrl and audioUrl are required");
153
- }
154
-
155
- const result = await lipsyncWav2Lip({ videoUrl, audioUrl });
156
- console.log(`[sync] result:`, result);
157
- break;
158
- }
159
-
160
- case "overlay": {
161
- const videoPath = args[1];
162
- const audioPath = args[2];
163
- const output = args[3];
164
-
165
- if (!videoPath || !audioPath) {
166
- throw new Error("videoPath and audioPath are required");
167
- }
168
-
169
- const result = await lipsyncOverlay(videoPath, audioPath, output);
170
- console.log(`[sync] result:`, result);
171
- break;
172
- }
173
-
174
- default:
175
- console.error(`unknown command: ${command}`);
176
- console.log(`run 'bun run service/sync.ts help' for usage`);
177
- process.exit(1);
178
- }
179
- } catch (error) {
180
- console.error(`[sync] error:`, error);
181
- process.exit(1);
182
- }
183
- }
184
-
185
142
  if (import.meta.main) {
186
- cli();
143
+ const { runCli } = await import("../../cli/runner");
144
+ runCli(meta);
187
145
  }