npm - @vargai/sdk - Versions diffs - 0.1.1 - Mend

@vargai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/.env.example +24 -0
package/CLAUDE.md +118 -0
package/HIGGSFIELD_REWRITE_SUMMARY.md +300 -0
package/README.md +231 -0
package/SKILLS.md +157 -0
package/STRUCTURE.md +92 -0
package/TEST_RESULTS.md +122 -0
package/action/captions/SKILL.md +170 -0
package/action/captions/index.ts +169 -0
package/action/edit/SKILL.md +235 -0
package/action/edit/index.ts +437 -0
package/action/image/SKILL.md +140 -0
package/action/image/index.ts +105 -0
package/action/sync/SKILL.md +136 -0
package/action/sync/index.ts +145 -0
package/action/transcribe/SKILL.md +179 -0
package/action/transcribe/index.ts +210 -0
package/action/video/SKILL.md +116 -0
package/action/video/index.ts +125 -0
package/action/voice/SKILL.md +125 -0
package/action/voice/index.ts +136 -0
package/biome.json +33 -0
package/bun.lock +842 -0
package/cli/commands/find.ts +58 -0
package/cli/commands/help.ts +70 -0
package/cli/commands/list.ts +49 -0
package/cli/commands/run.ts +237 -0
package/cli/commands/which.ts +66 -0
package/cli/discover.ts +66 -0
package/cli/index.ts +33 -0
package/cli/runner.ts +65 -0
package/cli/types.ts +49 -0
package/cli/ui.ts +185 -0
package/index.ts +75 -0
package/lib/README.md +144 -0
package/lib/ai-sdk/fal.ts +106 -0
package/lib/ai-sdk/replicate.ts +107 -0
package/lib/elevenlabs.ts +382 -0
package/lib/fal.ts +467 -0
package/lib/ffmpeg.ts +467 -0
package/lib/fireworks.ts +235 -0
package/lib/groq.ts +246 -0
package/lib/higgsfield/MIGRATION.md +308 -0
package/lib/higgsfield/README.md +273 -0
package/lib/higgsfield/example.ts +228 -0
package/lib/higgsfield/index.ts +241 -0
package/lib/higgsfield/soul.ts +262 -0
package/lib/higgsfield.ts +176 -0
package/lib/remotion/SKILL.md +823 -0
package/lib/remotion/cli.ts +115 -0
package/lib/remotion/functions.ts +283 -0
package/lib/remotion/index.ts +19 -0
package/lib/remotion/templates.ts +73 -0
package/lib/replicate.ts +304 -0
package/output.txt +1 -0
package/package.json +42 -0
package/pipeline/cookbooks/SKILL.md +285 -0
package/pipeline/cookbooks/remotion-video.md +585 -0
package/pipeline/cookbooks/round-video-character.md +337 -0
package/pipeline/cookbooks/talking-character.md +59 -0
package/scripts/produce-menopause-campaign.sh +202 -0
package/service/music/SKILL.md +229 -0
package/service/music/index.ts +296 -0
package/test-import.ts +7 -0
package/test-services.ts +97 -0
package/tsconfig.json +29 -0
package/utilities/s3.ts +147 -0

package/lib/replicate.ts ADDED Viewed

@@ -0,0 +1,304 @@
+#!/usr/bin/env bun
+/**
+ * replicate.com api wrapper for video/image generation
+ * supports models like minimax, kling, luma, stable diffusion
+ */
+import Replicate from "replicate";
+const replicate = new Replicate({
+  auth: process.env.REPLICATE_API_TOKEN || "",
+});
+// types
+export interface RunVideoOptions {
+  model: string;
+  input: Record<string, unknown>;
+}
+export interface RunImageOptions {
+  model: string;
+  input: Record<string, unknown>;
+}
+// core functions
+export async function runModel(model: string, input: Record<string, unknown>) {
+  console.log(`[replicate] running ${model}...`);
+  try {
+    const output = await replicate.run(model as `${string}/${string}`, {
+      input,
+    });
+    console.log(`[replicate] completed`);
+    return output;
+  } catch (error) {
+    console.error(`[replicate] error:`, error);
+    throw error;
+  }
+}
+export async function runVideo(options: RunVideoOptions) {
+  const { model, input } = options;
+  if (!model || !input) {
+    throw new Error("model and input are required");
+  }
+  return await runModel(model, input);
+}
+export async function runImage(options: RunImageOptions) {
+  const { model, input } = options;
+  if (!model || !input) {
+    throw new Error("model and input are required");
+  }
+  return await runModel(model, input);
+}
+// popular models
+export const MODELS = {
+  // video generation
+  VIDEO: {
+    MINIMAX: "minimax/video-01",
+    KLING: "fofr/kling-v1.5",
+    LUMA: "fofr/ltx-video",
+    RUNWAY_GEN3: "replicate/runway-gen3-turbo",
+    WAN_2_5: "wan-video/wan-2.5-i2v",
+  },
+  // image generation
+  IMAGE: {
+    FLUX_PRO: "black-forest-labs/flux-1.1-pro",
+    FLUX_DEV: "black-forest-labs/flux-dev",
+    FLUX_SCHNELL: "black-forest-labs/flux-schnell",
+    STABLE_DIFFUSION: "stability-ai/sdxl",
+  },
+};
+// cli
+async function cli() {
+  const args = process.argv.slice(2);
+  const command = args[0];
+  if (!command || command === "help") {
+    console.log(`
+usage:
+  bun run lib/replicate.ts <command> [args]
+commands:
+  video <model> <prompt> [imageUrl]     generate video
+  image <model> <prompt>                generate image
+  minimax <prompt> [imageUrl]           generate video with minimax-01
+  kling <prompt> [imageUrl]             generate video with kling-v1.5
+  wan <imageUrl> <audioUrl> <prompt>    generate talking video with wan 2.5
+  flux <prompt>                         generate image with flux-dev
+  list                                  list recent predictions
+  get <predictionId>                    get prediction by id
+  help                                  show this help
+examples:
+  bun run lib/replicate.ts minimax "person walking on beach"
+  bun run lib/replicate.ts minimax "camera zoom in" https://example.com/img.jpg
+  bun run lib/replicate.ts kling "cinematic city scene"
+  bun run lib/replicate.ts wan https://image.jpg https://audio.mp3 "person talking to camera"
+  bun run lib/replicate.ts flux "cyberpunk cityscape"
+  bun run lib/replicate.ts video "minimax/video-01" "dancing robot"
+  bun run lib/replicate.ts image "black-forest-labs/flux-dev" "sunset landscape"
+environment:
+  REPLICATE_API_TOKEN - your replicate api token
+    `);
+    process.exit(0);
+  }
+  try {
+    switch (command) {
+      case "minimax": {
+        const prompt = args[1];
+        const imageUrl = args[2];
+        if (!prompt) {
+          throw new Error("prompt is required");
+        }
+        const input: Record<string, unknown> = { prompt };
+        if (imageUrl) {
+          input.first_frame_image = imageUrl;
+        }
+        const output = await runVideo({
+          model: MODELS.VIDEO.MINIMAX,
+          input,
+        });
+        console.log(`[replicate] minimax output:`, output);
+        break;
+      }
+      case "kling": {
+        const prompt = args[1];
+        const imageUrl = args[2];
+        if (!prompt) {
+          throw new Error("prompt is required");
+        }
+        const input: Record<string, unknown> = { prompt };
+        if (imageUrl) {
+          input.image = imageUrl;
+        }
+        const output = await runVideo({
+          model: MODELS.VIDEO.KLING,
+          input,
+        });
+        console.log(`[replicate] kling output:`, output);
+        break;
+      }
+      case "wan": {
+        const imageUrl = args[1];
+        const audioUrl = args[2];
+        const prompt = args[3];
+        const duration = args[4] ? Number.parseInt(args[4], 10) : 10;
+        const resolution = args[5] || "480p";
+        if (!imageUrl || !audioUrl || !prompt) {
+          throw new Error("imageUrl, audioUrl, and prompt are required");
+        }
+        const input: Record<string, unknown> = {
+          image: imageUrl,
+          audio: audioUrl,
+          prompt,
+          duration,
+          resolution,
+          enable_prompt_expansion: true,
+        };
+        console.log(`[replicate] running wan 2.5...`);
+        console.log(`[replicate] this may take 3-5 minutes...`);
+        const output = await runVideo({
+          model: MODELS.VIDEO.WAN_2_5,
+          input,
+        });
+        console.log(`[replicate] wan 2.5 output:`, output);
+        break;
+      }
+      case "list": {
+        console.log(`[replicate] fetching recent predictions...`);
+        const predictions = await replicate.predictions.list();
+        console.log(`\nrecent predictions:\n`);
+        for (const pred of predictions.results.slice(0, 10)) {
+          console.log(`id: ${pred.id}`);
+          console.log(`status: ${pred.status}`);
+          console.log(`model: ${pred.version}`);
+          console.log(`created: ${pred.created_at}`);
+          if (pred.output) {
+            console.log(
+              `output: ${JSON.stringify(pred.output).substring(0, 100)}...`,
+            );
+          }
+          console.log(`---`);
+        }
+        break;
+      }
+      case "get": {
+        const predictionId = args[1];
+        if (!predictionId) {
+          throw new Error("predictionId is required");
+        }
+        console.log(`[replicate] fetching prediction ${predictionId}...`);
+        const prediction = await replicate.predictions.get(predictionId);
+        console.log(`\nstatus: ${prediction.status}`);
+        console.log(`model: ${prediction.version}`);
+        console.log(`created: ${prediction.created_at}`);
+        if (prediction.status === "succeeded") {
+          console.log(`\noutput:`);
+          console.log(JSON.stringify(prediction.output, null, 2));
+        } else if (prediction.status === "failed") {
+          console.log(`\nerror: ${prediction.error}`);
+        } else {
+          console.log(`\nstill processing...`);
+        }
+        break;
+      }
+      case "flux": {
+        const prompt = args[1];
+        if (!prompt) {
+          throw new Error("prompt is required");
+        }
+        const output = await runImage({
+          model: MODELS.IMAGE.FLUX_DEV,
+          input: { prompt },
+        });
+        console.log(`[replicate] flux output:`, output);
+        break;
+      }
+      case "video": {
+        const model = args[1];
+        const prompt = args[2];
+        const imageUrl = args[3];
+        if (!model || !prompt) {
+          throw new Error("model and prompt are required");
+        }
+        const input: Record<string, unknown> = { prompt };
+        if (imageUrl) {
+          input.image = imageUrl;
+        }
+        const output = await runVideo({ model, input });
+        console.log(`[replicate] video output:`, output);
+        break;
+      }
+      case "image": {
+        const model = args[1];
+        const prompt = args[2];
+        if (!model || !prompt) {
+          throw new Error("model and prompt are required");
+        }
+        const output = await runImage({
+          model,
+          input: { prompt },
+        });
+        console.log(`[replicate] image output:`, output);
+        break;
+      }
+      default:
+        console.error(`unknown command: ${command}`);
+        console.log(`run 'bun run lib/replicate.ts help' for usage`);
+        process.exit(1);
+    }
+  } catch (error) {
+    console.error(`[replicate] error:`, error);
+    process.exit(1);
+  }
+}
+if (import.meta.main) {
+  cli();
+}

package/output.txt ADDED Viewed

@@ -0,0 +1 @@

+ Let's say I've just joined the Roark team as a marketer and I want to add a new article to the website to boost our SEO. The devs won't get to this task anytime soon, but thanks to YoloCode AI, I can take care of it myself. Any changes I make show up instantly on the right so I can see exactly how it looked in production before opening a pull request. So now I'm ready to submit and push this feature and here we go!

package/package.json ADDED Viewed

@@ -0,0 +1,42 @@
+{
+  "name": "@vargai/sdk",
+  "module": "index.ts",
+  "type": "module",
+  "bin": {
+    "varg": "./cli/index.ts"
+  },
+  "scripts": {
+    "lint": "biome check .",
+    "format": "biome format --write ."
+  },
+  "devDependencies": {
+    "@biomejs/biome": "^2.3.7",
+    "@types/bun": "latest"
+  },
+  "peerDependencies": {
+    "typescript": "^5"
+  },
+  "dependencies": {
+    "@ai-sdk/fal": "^1.0.23",
+    "@ai-sdk/replicate": "^1.0.18",
+    "@aws-sdk/client-s3": "^3.937.0",
+    "@aws-sdk/s3-request-presigner": "^3.937.0",
+    "@elevenlabs/elevenlabs-js": "^2.25.0",
+    "@fal-ai/client": "^1.7.2",
+    "@higgsfield/client": "^0.1.2",
+    "@remotion/cli": "^4.0.377",
+    "@types/fluent-ffmpeg": "^2.1.28",
+    "ai": "^5.0.98",
+    "citty": "^0.1.6",
+    "fluent-ffmpeg": "^2.1.3",
+    "groq-sdk": "^0.36.0",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0",
+    "remotion": "^4.0.377",
+    "replicate": "^1.4.0"
+  },
+  "version": "0.1.1",
+  "exports": {
+    ".": "./index.ts"
+  }
+}

package/pipeline/cookbooks/SKILL.md ADDED Viewed

@@ -0,0 +1,285 @@
+---
+name: talking-character-pipeline
+description: complete workflow to create talking character videos with lipsync and captions. use when creating ai character videos, talking avatars, narrated content, or social media character content with voiceover.
+allowed-tools: Read, Bash
+---
+# talking character pipeline
+create professional talking character videos from scratch using the complete varg.ai sdk workflow.
+## overview
+this pipeline combines multiple services to create a fully produced talking character video:
+1. character headshot generation
+2. voiceover synthesis
+3. character animation
+4. lipsync
+5. auto-generated captions
+6. social media optimization
+**total time**: ~4-5 minutes per video
+## step-by-step workflow
+### 1. create character headshot
+```bash
+bun run service/image.ts soul "professional headshot of a friendly person, studio lighting" true
+```
+**output**: character image url + s3 url
+**time**: ~30 seconds
+**tip**: be specific about character appearance, lighting, and style for best results
+### 2. generate voiceover
+```bash
+bun run service/voice.ts elevenlabs "hello world, this is my character speaking" rachel true
+```
+**output**: `media/voice-{timestamp}.mp3` + s3 url
+**time**: ~10 seconds
+**tip**: choose voice that matches character (rachel/bella for female, josh/antoni for male)
+### 3. animate character
+```bash
+bun run service/video.ts from_image "person talking naturally, professional demeanor" <headshot_url> 5 true
+```
+**output**: animated video url + s3 url
+**time**: ~2-3 minutes
+**tip**: use subtle motion prompts like "person talking naturally" or "slight head movement"
+### 4. add lipsync
+```bash
+bun run service/sync.ts wav2lip <video_url> <audio_url>
+```
+**output**: lipsynced video url
+**time**: ~30 seconds
+**tip**: wav2lip works best with close-up character shots and clear audio
+### 5. add captions
+```bash
+bun run service/captions.ts <video_path> captioned.mp4 --provider fireworks
+```
+**output**: `captioned.mp4` with subtitles
+**time**: ~15 seconds (includes transcription)
+**tip**: fireworks provider gives word-level timing for professional captions
+### 6. prepare for social media
+```bash
+bun run service/edit.ts social captioned.mp4 final-tiktok.mp4 tiktok
+```
+**output**: `final-tiktok.mp4` optimized for platform
+**time**: ~5 seconds
+**platforms**: tiktok, instagram, youtube-shorts, youtube, twitter
+## complete example
+```bash
+# step 1: generate character
+bun run service/image.ts soul \
+  "professional business woman, friendly smile, studio lighting" \
+  true
+# step 2: create voiceover
+bun run service/voice.ts elevenlabs \
+  "welcome to our company. we're excited to show you our new product" \
+  rachel \
+  true
+# step 3: animate character
+bun run service/video.ts from_image \
+  "person talking professionally" \
+  https://your-s3-url/character.jpg \
+  5 \
+  true
+# step 4: sync lips
+bun run service/sync.ts wav2lip \
+  https://your-s3-url/animated.mp4 \
+  https://your-s3-url/voice.mp3
+# step 5: add captions
+bun run service/captions.ts \
+  synced-video.mp4 \
+  captioned.mp4 \
+  --provider fireworks \
+  --font "Arial Black" \
+  --size 32
+# step 6: optimize for tiktok
+bun run service/edit.ts social \
+  captioned.mp4 \
+  final-tiktok.mp4 \
+  tiktok
+```
+## programmatic workflow
+```typescript
+import { generateWithSoul } from "./service/image"
+import { generateVoice } from "./service/voice"
+import { generateVideoFromImage } from "./service/video"
+import { lipsyncWav2Lip } from "./service/sync"
+import { addCaptions } from "./service/captions"
+import { prepareForSocial } from "./service/edit"
+// 1. character
+const character = await generateWithSoul(
+  "friendly business person, professional",
+  { upload: true }
+)
+// 2. voice
+const voice = await generateVoice({
+  text: "hello, welcome to our video",
+  voice: "rachel",
+  upload: true,
+  outputPath: "media/voice.mp3"
+})
+// 3. animate
+const video = await generateVideoFromImage(
+  "person talking naturally",
+  character.uploaded!,
+  { duration: 5, upload: true }
+)
+// 4. lipsync
+const synced = await lipsyncWav2Lip({
+  videoUrl: video.uploaded!,
+  audioUrl: voice.uploadUrl!
+})
+// 5. captions
+const captioned = await addCaptions({
+  videoPath: synced,
+  output: "captioned.mp4",
+  provider: "fireworks"
+})
+// 6. social media
+const final = await prepareForSocial({
+  input: captioned,
+  output: "final.mp4",
+  platform: "tiktok"
+})
+```
+## use cases
+### marketing content
+- product announcements
+- brand messaging
+- explainer videos
+- social media ads
+### educational content
+- course introductions
+- tutorial narration
+- lesson summaries
+- educational social media
+### social media
+- tiktok character content
+- instagram reels with narration
+- youtube shorts
+- twitter video posts
+## tips for best results
+**character creation:**
+- be specific about appearance, expression, lighting
+- "professional", "friendly", "casual" work well
+- mention "studio lighting" for clean backgrounds
+**voiceover:**
+- write natural, conversational scripts
+- add punctuation for natural pauses
+- keep sentences short and clear
+- match voice gender to character
+**animation:**
+- use subtle motion prompts
+- 5 seconds is perfect for character talking shots
+- avoid complex camera movements
+**lipsync:**
+- wav2lip works best with frontal face views
+- ensure audio is clear and well-paced
+- close-up shots give better results
+**captions:**
+- use fireworks for word-level timing
+- larger font sizes (28-32) work better on mobile
+- white text with black outline is most readable
+**social media:**
+- vertical (9:16) for tiktok/instagram/shorts
+- landscape (16:9) for youtube/twitter
+- keep total video under 60 seconds for best engagement
+## estimated costs
+per video (approximate):
+- character image: $0.05 (higgsfield soul)
+- voiceover: $0.10 (elevenlabs)
+- animation: $0.20 (fal image-to-video)
+- lipsync: $0.10 (replicate wav2lip)
+- transcription: $0.02 (fireworks)
+**total**: ~$0.47 per video
+## troubleshooting
+**character doesn't look consistent:**
+- use higgsfield soul instead of fal for characters
+- save character image and reuse for consistency
+**lipsync doesn't match well:**
+- ensure video shows face clearly
+- use close-up shots
+- check audio quality and clarity
+**animation looks unnatural:**
+- simplify motion prompt
+- use "person talking naturally" or "slight movement"
+- avoid dramatic camera movements
+**captions are off-sync:**
+- use fireworks provider for better timing
+- check audio quality
+- verify video fps is standard (24/30fps)
+## required environment variables
+```bash
+HIGGSFIELD_API_KEY=hf_xxx
+HIGGSFIELD_SECRET=secret_xxx
+ELEVENLABS_API_KEY=el_xxx
+FAL_API_KEY=fal_xxx
+REPLICATE_API_TOKEN=r8_xxx
+FIREWORKS_API_KEY=fw_xxx
+CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
+CLOUDFLARE_ACCESS_KEY_ID=xxx
+CLOUDFLARE_ACCESS_SECRET=xxx
+CLOUDFLARE_R2_BUCKET=m
+```
+## next steps
+after creating your talking character video:
+- upload to social platforms
+- analyze performance metrics
+- iterate on character design and scripts
+- create series with consistent character
+- experiment with different voices and styles