varg.ai-sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,14 +6,55 @@
6
6
  */
7
7
 
8
8
  import { writeFileSync } from "node:fs";
9
- import { join } from "node:path";
10
9
  import { toFile } from "groq-sdk/uploads";
10
+ import type { ActionMeta } from "../../cli/types";
11
11
  import {
12
12
  convertFireworksToSRT,
13
13
  transcribeWithFireworks as fireworksTranscribe,
14
14
  } from "../../lib/fireworks";
15
15
  import { GROQ_MODELS, transcribeAudio as groqTranscribe } from "../../lib/groq";
16
16
 
17
+ export const meta: ActionMeta = {
18
+ name: "transcribe",
19
+ type: "action",
20
+ description: "speech to text transcription",
21
+ inputType: "audio",
22
+ outputType: "text",
23
+ schema: {
24
+ input: {
25
+ type: "object",
26
+ required: ["audio"],
27
+ properties: {
28
+ audio: {
29
+ type: "string",
30
+ format: "file-path",
31
+ description: "audio/video file to transcribe",
32
+ },
33
+ provider: {
34
+ type: "string",
35
+ enum: ["groq", "fireworks"],
36
+ default: "groq",
37
+ description: "transcription provider",
38
+ },
39
+ output: {
40
+ type: "string",
41
+ format: "file-path",
42
+ description: "output file path",
43
+ },
44
+ },
45
+ },
46
+ output: { type: "string", description: "transcribed text" },
47
+ },
48
+ async run(options) {
49
+ const { audio, provider, output } = options as {
50
+ audio: string;
51
+ provider?: "groq" | "fireworks";
52
+ output?: string;
53
+ };
54
+ return transcribe({ audioUrl: audio, provider, outputPath: output });
55
+ },
56
+ };
57
+
17
58
  // types
18
59
  export interface TranscribeOptions {
19
60
  audioUrl: string; // url or local file path
@@ -163,65 +204,7 @@ export async function transcribe(
163
204
  }
164
205
 
165
206
  // cli
166
- async function cli() {
167
- const args = process.argv.slice(2);
168
- const command = args[0];
169
-
170
- if (!command || command === "help") {
171
- console.log(`
172
- usage:
173
- bun run service/transcribe.ts <audioPath> [provider] [outputPath]
174
-
175
- arguments:
176
- audioPath - url or local path to audio file
177
- provider - groq (default) | fireworks
178
- outputPath - optional path to save transcription
179
-
180
- examples:
181
- bun run service/transcribe.ts https://example.com/audio.mp3
182
- bun run service/transcribe.ts media/dora.ogg groq
183
- bun run service/transcribe.ts https://example.com/audio.mp3 fireworks output.srt
184
- bun run service/transcribe.ts media/audio.mp3 groq output.txt
185
-
186
- providers:
187
- groq - ultra-fast whisper (text only, free tier available)
188
- fireworks - slower but includes srt timestamps (uses reels-srt api)
189
-
190
- environment:
191
- GROQ_API_KEY - your groq api key (for groq provider)
192
- `);
193
- process.exit(0);
194
- }
195
-
196
- try {
197
- const audioUrl = args[0];
198
- const provider = (args[1] || "groq") as "groq" | "fireworks";
199
- const outputPath = args[2];
200
-
201
- if (!audioUrl) {
202
- throw new Error("audioUrl is required");
203
- }
204
-
205
- const result = await transcribe({
206
- audioUrl,
207
- provider,
208
- outputFormat: provider === "fireworks" ? "srt" : "text",
209
- outputPath: outputPath || join(process.cwd(), "output.txt"),
210
- });
211
-
212
- if (result.success) {
213
- console.log("\ntranscription:");
214
- console.log(result.srt || result.text);
215
- } else {
216
- console.error(`\nerror: ${result.error}`);
217
- process.exit(1);
218
- }
219
- } catch (error) {
220
- console.error("[transcribe] error:", error);
221
- process.exit(1);
222
- }
223
- }
224
-
225
207
  if (import.meta.main) {
226
- cli();
208
+ const { runCli } = await import("../../cli/runner");
209
+ runCli(meta);
227
210
  }
@@ -4,9 +4,50 @@
4
4
  * usage: bun run service/video.ts <command> <args>
5
5
  */
6
6
 
7
+ import type { ActionMeta } from "../../cli/types";
7
8
  import { imageToVideo, textToVideo } from "../../lib/fal";
8
9
  import { uploadFromUrl } from "../../utilities/s3";
9
10
 
11
+ export const meta: ActionMeta = {
12
+ name: "video",
13
+ type: "action",
14
+ description: "generate video from text or image",
15
+ inputType: "text/image",
16
+ outputType: "video",
17
+ schema: {
18
+ input: {
19
+ type: "object",
20
+ required: ["prompt"],
21
+ properties: {
22
+ prompt: { type: "string", description: "what to generate" },
23
+ image: {
24
+ type: "string",
25
+ format: "file-path",
26
+ description: "input image (enables image-to-video)",
27
+ },
28
+ duration: {
29
+ type: "integer",
30
+ enum: [5, 10],
31
+ default: 5,
32
+ description: "video duration in seconds",
33
+ },
34
+ },
35
+ },
36
+ output: { type: "string", format: "file-path", description: "video path" },
37
+ },
38
+ async run(options) {
39
+ const { prompt, image, duration } = options as {
40
+ prompt: string;
41
+ image?: string;
42
+ duration?: 5 | 10;
43
+ };
44
+ if (image) {
45
+ return generateVideoFromImage(prompt, image, { duration });
46
+ }
47
+ return generateVideoFromText(prompt, { duration });
48
+ },
49
+ };
50
+
10
51
  export interface VideoGenerationResult {
11
52
  videoUrl: string;
12
53
  duration?: number;
@@ -77,59 +118,8 @@ export async function generateVideoFromText(
77
118
  };
78
119
  }
79
120
 
80
- // cli runner
121
+ // cli
81
122
  if (import.meta.main) {
82
- const [command, ...args] = process.argv.slice(2);
83
-
84
- switch (command) {
85
- case "from_image": {
86
- if (!args[0] || !args[1]) {
87
- console.log(`
88
- usage:
89
- bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
90
- `);
91
- process.exit(1);
92
- }
93
- const duration = args[2];
94
- if (duration && duration !== "5" && duration !== "10") {
95
- console.error("duration must be 5 or 10");
96
- process.exit(1);
97
- }
98
- const imgResult = await generateVideoFromImage(args[0], args[1], {
99
- duration: duration === "10" ? 10 : 5,
100
- upload: args[3] === "true",
101
- });
102
- console.log(JSON.stringify(imgResult, null, 2));
103
- break;
104
- }
105
-
106
- case "from_text": {
107
- if (!args[0]) {
108
- console.log(`
109
- usage:
110
- bun run service/video.ts from_text <prompt> [duration] [upload]
111
- `);
112
- process.exit(1);
113
- }
114
- const duration = args[1];
115
- if (duration && duration !== "5" && duration !== "10") {
116
- console.error("duration must be 5 or 10");
117
- process.exit(1);
118
- }
119
- const txtResult = await generateVideoFromText(args[0], {
120
- duration: duration === "10" ? 10 : 5,
121
- upload: args[2] === "true",
122
- });
123
- console.log(JSON.stringify(txtResult, null, 2));
124
- break;
125
- }
126
-
127
- default:
128
- console.log(`
129
- usage:
130
- bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
131
- bun run service/video.ts from_text <prompt> [duration] [upload]
132
- `);
133
- process.exit(1);
134
- }
123
+ const { runCli } = await import("../../cli/runner");
124
+ runCli(meta);
135
125
  }
@@ -5,9 +5,47 @@
5
5
  * supports elevenlabs and future providers
6
6
  */
7
7
 
8
+ import type { ActionMeta } from "../../cli/types";
8
9
  import { textToSpeech, VOICES } from "../../lib/elevenlabs";
9
10
  import { uploadFile } from "../../utilities/s3";
10
11
 
12
+ export const meta: ActionMeta = {
13
+ name: "voice",
14
+ type: "action",
15
+ description: "text to speech generation",
16
+ inputType: "text",
17
+ outputType: "audio",
18
+ schema: {
19
+ input: {
20
+ type: "object",
21
+ required: ["text"],
22
+ properties: {
23
+ text: { type: "string", description: "text to convert to speech" },
24
+ voice: {
25
+ type: "string",
26
+ enum: ["rachel", "domi", "bella", "antoni", "josh", "adam", "sam"],
27
+ default: "rachel",
28
+ description: "voice to use",
29
+ },
30
+ output: {
31
+ type: "string",
32
+ format: "file-path",
33
+ description: "output file path",
34
+ },
35
+ },
36
+ },
37
+ output: { type: "string", format: "file-path", description: "audio path" },
38
+ },
39
+ async run(options) {
40
+ const { text, voice, output } = options as {
41
+ text: string;
42
+ voice?: string;
43
+ output?: string;
44
+ };
45
+ return generateVoice({ text, voice, outputPath: output });
46
+ },
47
+ };
48
+
11
49
  // types
12
50
  export interface GenerateVoiceOptions {
13
51
  text: string;
@@ -92,110 +130,7 @@ export async function generateVoice(
92
130
  }
93
131
 
94
132
  // cli
95
- async function cli() {
96
- const args = process.argv.slice(2);
97
- const command = args[0];
98
-
99
- if (!command || command === "help") {
100
- console.log(`
101
- usage:
102
- bun run service/voice.ts <command> [args]
103
-
104
- commands:
105
- generate <text> [voice] [provider] [upload] generate voice from text
106
- elevenlabs <text> [voice] [upload] generate with elevenlabs
107
- help show this help
108
-
109
- examples:
110
- bun run service/voice.ts generate "hello world" rachel elevenlabs false
111
- bun run service/voice.ts elevenlabs "hello world" josh true
112
- bun run service/voice.ts generate "welcome to ai" bella
113
-
114
- available voices:
115
- rachel, domi, bella, antoni, elli, josh, arnold, adam, sam
116
-
117
- providers:
118
- elevenlabs (default)
119
-
120
- environment:
121
- ELEVENLABS_API_KEY - required for elevenlabs
122
- CLOUDFLARE_* - required for upload
123
- `);
124
- process.exit(0);
125
- }
126
-
127
- try {
128
- switch (command) {
129
- case "generate": {
130
- const text = args[1];
131
- const voice = args[2];
132
- const provider = (args[3] || "elevenlabs") as "elevenlabs";
133
- const upload = args[4] === "true";
134
-
135
- if (!text) {
136
- throw new Error("text is required");
137
- }
138
-
139
- const outputPath = `media/voice-${Date.now()}.mp3`;
140
-
141
- const result = await generateVoice({
142
- text,
143
- voice,
144
- provider,
145
- upload,
146
- outputPath,
147
- });
148
-
149
- console.log(`[voice] result:`, {
150
- provider: result.provider,
151
- voiceId: result.voiceId,
152
- audioSize: result.audio.length,
153
- outputPath,
154
- uploadUrl: result.uploadUrl,
155
- });
156
- break;
157
- }
158
-
159
- case "elevenlabs": {
160
- const text = args[1];
161
- const voice = args[2];
162
- const upload = args[3] === "true";
163
-
164
- if (!text) {
165
- throw new Error("text is required");
166
- }
167
-
168
- const outputPath = `media/voice-${Date.now()}.mp3`;
169
-
170
- const result = await generateVoice({
171
- text,
172
- voice,
173
- provider: "elevenlabs",
174
- upload,
175
- outputPath,
176
- });
177
-
178
- console.log(`[voice] result:`, {
179
- provider: result.provider,
180
- voiceId: result.voiceId,
181
- audioSize: result.audio.length,
182
- outputPath,
183
- uploadUrl: result.uploadUrl,
184
- });
185
- break;
186
- }
187
-
188
- default:
189
- console.error(`unknown command: ${command}`);
190
- console.log(`run 'bun run service/voice.ts help' for usage`);
191
- process.exit(1);
192
- }
193
- } catch (error) {
194
- console.error(`[voice] error:`, error);
195
- process.exit(1);
196
- }
197
- }
198
-
199
133
  if (import.meta.main) {
200
- cli();
134
+ const { runCli } = await import("../../cli/runner");
135
+ runCli(meta);
201
136
  }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * varg find command
3
+ * fuzzy search by scanning filesystem
4
+ */
5
+
6
+ import { defineCommand } from "citty";
7
+ import { search } from "../discover";
8
+ import { box, c, header, separator } from "../ui";
9
+
10
+ export const findCmd = defineCommand({
11
+ meta: {
12
+ name: "find",
13
+ description: "fuzzy search for models/actions",
14
+ },
15
+ args: {
16
+ query: {
17
+ type: "positional",
18
+ description: "search query",
19
+ required: true,
20
+ },
21
+ },
22
+ async run({ args }) {
23
+ const query = args.query;
24
+
25
+ if (!query) {
26
+ console.error(`${c.red("error:")} search query required`);
27
+ console.log(`\nusage: ${c.cyan("varg find <query>")}`);
28
+ process.exit(1);
29
+ }
30
+
31
+ const results = await search(query);
32
+
33
+ if (results.length === 0) {
34
+ console.log(`\nno matches for "${query}"`);
35
+ console.log(`\ntry ${c.cyan("varg list")} to see all available actions`);
36
+ return;
37
+ }
38
+
39
+ const content: string[] = [];
40
+ content.push("");
41
+ content.push(header("MATCHES"));
42
+ content.push("");
43
+
44
+ for (const action of results) {
45
+ content.push(
46
+ ` ${c.cyan(action.name.padEnd(16))}${action.inputType} → ${action.outputType}`,
47
+ );
48
+ }
49
+
50
+ content.push("");
51
+ content.push(separator());
52
+ content.push("");
53
+ content.push(` run ${c.cyan("varg run <name> --help")} for usage`);
54
+ content.push("");
55
+
56
+ console.log(box(`search: "${query}"`, content));
57
+ },
58
+ });
@@ -0,0 +1,70 @@
1
+ /**
2
+ * varg help command
3
+ */
4
+
5
+ import { defineCommand } from "citty";
6
+ import { box, c, header, separator } from "../ui";
7
+
8
+ export const helpCmd = defineCommand({
9
+ meta: {
10
+ name: "help",
11
+ description: "show help",
12
+ },
13
+ run() {
14
+ const content: string[] = [];
15
+ content.push("");
16
+ content.push(" AI video infrastructure from your terminal.");
17
+ content.push("");
18
+ content.push(separator());
19
+ content.push("");
20
+ content.push(header("USAGE"));
21
+ content.push("");
22
+ content.push(` varg ${c.cyan("<command>")} [target] [options]`);
23
+ content.push("");
24
+ content.push(separator());
25
+ content.push("");
26
+ content.push(header("COMMANDS"));
27
+ content.push("");
28
+ content.push(` ${c.cyan("run".padEnd(12))}run a model or action`);
29
+ content.push(` ${c.cyan("list".padEnd(12))}discover what's available`);
30
+ content.push(
31
+ ` ${c.cyan("find".padEnd(12))}fuzzy search for models/actions`,
32
+ );
33
+ content.push(
34
+ ` ${c.cyan("which".padEnd(12))}inspect what's behind an action`,
35
+ );
36
+ content.push(` ${c.cyan("help".padEnd(12))}show this help`);
37
+ content.push("");
38
+ content.push(separator());
39
+ content.push("");
40
+ content.push(header("EXAMPLES"));
41
+ content.push("");
42
+ content.push(` ${c.dim("# generate video from text")}`);
43
+ content.push(` varg run kling --prompt "a cat dancing"`);
44
+ content.push("");
45
+ content.push(` ${c.dim("# animate an image")}`);
46
+ content.push(` varg run image-to-video --image ./cat.png`);
47
+ content.push("");
48
+ content.push(` ${c.dim("# transcribe audio")}`);
49
+ content.push(` varg run transcribe ./video.mp4`);
50
+ content.push("");
51
+ content.push(` ${c.dim("# see what's available")}`);
52
+ content.push(` varg list`);
53
+ content.push("");
54
+ content.push(separator());
55
+ content.push("");
56
+ content.push(header("ENVIRONMENT"));
57
+ content.push("");
58
+ content.push(` ${c.dim("FAL_KEY".padEnd(24))}fal.ai api key`);
59
+ content.push(
60
+ ` ${c.dim("REPLICATE_API_TOKEN".padEnd(24))}replicate api key`,
61
+ );
62
+ content.push(
63
+ ` ${c.dim("ELEVENLABS_API_KEY".padEnd(24))}elevenlabs api key`,
64
+ );
65
+ content.push(` ${c.dim("GROQ_API_KEY".padEnd(24))}groq api key`);
66
+ content.push("");
67
+
68
+ console.log(box("varg", content));
69
+ },
70
+ });
@@ -0,0 +1,49 @@
1
+ /**
2
+ * varg list command
3
+ * discover what's available by scanning filesystem
4
+ */
5
+
6
+ import { defineCommand } from "citty";
7
+ import { discoverActions } from "../discover";
8
+ import { box, c, header, separator, table } from "../ui";
9
+
10
+ export const listCmd = defineCommand({
11
+ meta: {
12
+ name: "list",
13
+ description: "discover what's available",
14
+ },
15
+ args: {
16
+ filter: {
17
+ type: "positional",
18
+ description: "filter by type",
19
+ required: false,
20
+ },
21
+ },
22
+ async run() {
23
+ const actions = await discoverActions();
24
+
25
+ const content: string[] = [];
26
+ content.push("");
27
+
28
+ content.push(header("ACTIONS"));
29
+ content.push("");
30
+
31
+ const rows = actions.map((a) => ({
32
+ name: a.name,
33
+ description:
34
+ `${a.inputType} → ${a.outputType}`.padEnd(20) + a.description,
35
+ }));
36
+
37
+ content.push(...table(rows));
38
+ content.push("");
39
+
40
+ content.push(separator());
41
+ content.push("");
42
+ content.push(
43
+ ` ${actions.length} actions · run ${c.cyan("varg run <action> --info")} for details`,
44
+ );
45
+ content.push("");
46
+
47
+ console.log(box("varg", content));
48
+ },
49
+ });