@vibeframe/mcp-server 0.20.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +821 -862
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -60,6 +60,7 @@ __export(remotion_exports, {
60
60
  compositeOverlay: () => compositeOverlay,
61
61
  compositeWithImage: () => compositeWithImage,
62
62
  ensureRemotionInstalled: () => ensureRemotionInstalled,
63
+ generateAnimatedCaptionComponent: () => generateAnimatedCaptionComponent,
63
64
  generateCaptionComponent: () => generateCaptionComponent,
64
65
  renderAndComposite: () => renderAndComposite,
65
66
  renderMotion: () => renderMotion,
@@ -491,6 +492,190 @@ export const ${name} = () => {
491
492
  `;
492
493
  return { code, name };
493
494
  }
495
+ function generateAnimatedCaptionComponent(options) {
496
+ const { groups, style, highlightColor, fontSize, position, width, fps, videoFileName } = options;
497
+ const name = videoFileName ? "VideoAnimatedCaption" : "AnimatedCaptionOverlay";
498
+ const groupsJSON = JSON.stringify(
499
+ groups.map((g) => ({
500
+ words: g.words.map((w) => ({ word: w.word, start: w.start, end: w.end })),
501
+ startTime: g.startTime,
502
+ endTime: g.endTime,
503
+ text: g.text
504
+ }))
505
+ );
506
+ const justifyContent = position === "top" ? "flex-start" : position === "center" ? "center" : "flex-end";
507
+ const paddingDir = position === "top" ? "paddingTop" : position === "bottom" ? "paddingBottom" : "";
508
+ const paddingVal = position === "center" ? "" : `${paddingDir}: 40,`;
509
+ const videoImport = videoFileName ? `, staticFile` : "";
510
+ const videoElement = videoFileName ? `<Video src={staticFile("${videoFileName}")} style={{ width: "100%", height: "100%" }} muted />` : "";
511
+ const videoMediaImport = videoFileName ? `import { Video } from "@remotion/media";
512
+ ` : "";
513
+ let wordRenderer;
514
+ switch (style) {
515
+ case "highlight":
516
+ wordRenderer = `
517
+ const isActive = currentTime >= w.start && currentTime < w.end;
518
+ const bgOpacity = isActive ? 1 : 0;
519
+ return (
520
+ <span
521
+ key={wi}
522
+ style={{
523
+ display: "inline-block",
524
+ padding: "2px 6px",
525
+ margin: "0 2px",
526
+ borderRadius: 4,
527
+ backgroundColor: isActive ? "${highlightColor}" : "transparent",
528
+ color: isActive ? "#000000" : "#FFFFFF",
529
+ transition: "background-color 0.1s",
530
+ fontWeight: "bold",
531
+ textShadow: isActive ? "none" : "2px 2px 4px rgba(0,0,0,0.8)",
532
+ }}
533
+ >
534
+ {w.word}
535
+ </span>
536
+ );`;
537
+ break;
538
+ case "bounce":
539
+ wordRenderer = `
540
+ const isActive = currentTime >= w.start && currentTime < w.end;
541
+ const entryFrame = w.start * ${fps};
542
+ const progress = Math.min(1, Math.max(0, (frame - entryFrame) / 5));
543
+ const springVal = isActive
544
+ ? 1 + Math.sin(progress * Math.PI) * 0.15
545
+ : 1;
546
+ const translateY = isActive
547
+ ? -Math.sin(progress * Math.PI) * 8
548
+ : 0;
549
+ return (
550
+ <span
551
+ key={wi}
552
+ style={{
553
+ display: "inline-block",
554
+ margin: "0 3px",
555
+ transform: \`scale(\${springVal}) translateY(\${translateY}px)\`,
556
+ color: isActive ? "${highlightColor}" : "#FFFFFF",
557
+ fontWeight: "bold",
558
+ textShadow: "2px 2px 4px rgba(0,0,0,0.8)",
559
+ }}
560
+ >
561
+ {w.word}
562
+ </span>
563
+ );`;
564
+ break;
565
+ case "pop-in":
566
+ wordRenderer = `
567
+ const entryFrame = w.start * ${fps};
568
+ const scale = frame >= entryFrame
569
+ ? Math.min(1, (frame - entryFrame) / 5)
570
+ : 0;
571
+ const isActive = currentTime >= w.start && currentTime < w.end;
572
+ return (
573
+ <span
574
+ key={wi}
575
+ style={{
576
+ display: "inline-block",
577
+ margin: "0 3px",
578
+ transform: \`scale(\${scale})\`,
579
+ opacity: scale,
580
+ color: isActive ? "${highlightColor}" : "#FFFFFF",
581
+ fontWeight: "bold",
582
+ textShadow: "2px 2px 4px rgba(0,0,0,0.8)",
583
+ }}
584
+ >
585
+ {w.word}
586
+ </span>
587
+ );`;
588
+ break;
589
+ case "neon":
590
+ wordRenderer = `
591
+ const isActive = currentTime >= w.start && currentTime < w.end;
592
+ const pulse = isActive ? 0.8 + Math.sin(frame * 0.3) * 0.2 : 0.5;
593
+ const glowSize = isActive ? 15 : 0;
594
+ return (
595
+ <span
596
+ key={wi}
597
+ style={{
598
+ display: "inline-block",
599
+ margin: "0 3px",
600
+ color: isActive ? "${highlightColor}" : "#FFFFFF",
601
+ fontWeight: "bold",
602
+ opacity: isActive ? 1 : pulse,
603
+ textShadow: isActive
604
+ ? \`0 0 \${glowSize}px ${highlightColor}, 0 0 \${glowSize * 2}px ${highlightColor}, 0 0 \${glowSize * 3}px ${highlightColor}\`
605
+ : "2px 2px 4px rgba(0,0,0,0.8)",
606
+ }}
607
+ >
608
+ {w.word}
609
+ </span>
610
+ );`;
611
+ break;
612
+ }
613
+ const code = `import { AbsoluteFill, useCurrentFrame, useVideoConfig${videoImport} } from "remotion";
614
+ ${videoMediaImport}
615
+ interface Word {
616
+ word: string;
617
+ start: number;
618
+ end: number;
619
+ }
620
+
621
+ interface WordGroup {
622
+ words: Word[];
623
+ startTime: number;
624
+ endTime: number;
625
+ text: string;
626
+ }
627
+
628
+ const groups: WordGroup[] = ${groupsJSON};
629
+
630
+ export const ${name} = () => {
631
+ const frame = useCurrentFrame();
632
+ const { fps } = useVideoConfig();
633
+ const currentTime = frame / fps;
634
+
635
+ const activeGroup = groups.find(
636
+ (g) => currentTime >= g.startTime && currentTime < g.endTime
637
+ );
638
+
639
+ const renderWord = (w: Word, wi: number) => {
640
+ ${wordRenderer}
641
+ };
642
+
643
+ return (
644
+ <AbsoluteFill>
645
+ ${videoElement}
646
+ {activeGroup && (
647
+ <AbsoluteFill
648
+ style={{
649
+ display: "flex",
650
+ justifyContent: "${justifyContent}",
651
+ alignItems: "center",
652
+ ${paddingVal}
653
+ }}
654
+ >
655
+ <div
656
+ style={{
657
+ fontSize: ${fontSize},
658
+ fontFamily: "Arial, Helvetica, sans-serif",
659
+ textAlign: "center" as const,
660
+ maxWidth: "${Math.round(width * 0.9)}px",
661
+ lineHeight: 1.5,
662
+ padding: "8px 16px",
663
+ display: "flex",
664
+ flexWrap: "wrap" as const,
665
+ justifyContent: "center",
666
+ gap: "0px",
667
+ }}
668
+ >
669
+ {activeGroup.words.map((w, wi) => renderWord(w, wi))}
670
+ </div>
671
+ </AbsoluteFill>
672
+ )}
673
+ </AbsoluteFill>
674
+ );
675
+ };
676
+ `;
677
+ return { code, name };
678
+ }
494
679
  async function compositeOverlay(options) {
495
680
  try {
496
681
  await execSafe("ffmpeg", [
@@ -1694,7 +1879,18 @@ function buildFFmpegArgs(clips, sources, presetSettings, outputPath, options, so
1694
1879
  if (hasAudio) {
1695
1880
  const audioTrimStart = clip.sourceStartOffset;
1696
1881
  const audioTrimEnd = clip.sourceStartOffset + clip.duration;
1697
- audioFilter = `[${srcIdx}:a]atrim=start=${audioTrimStart}:end=${audioTrimEnd},asetpts=PTS-STARTPTS`;
1882
+ const sourceDuration = source.duration || 0;
1883
+ const clipDuration = clip.duration;
1884
+ if (source.type === "audio" && sourceDuration > clipDuration && audioTrimStart === 0) {
1885
+ const tempo = sourceDuration / clipDuration;
1886
+ if (tempo <= 2) {
1887
+ audioFilter = `[${srcIdx}:a]atempo=${tempo.toFixed(4)},asetpts=PTS-STARTPTS`;
1888
+ } else {
1889
+ audioFilter = `[${srcIdx}:a]atrim=start=${audioTrimStart}:end=${audioTrimEnd},asetpts=PTS-STARTPTS`;
1890
+ }
1891
+ } else {
1892
+ audioFilter = `[${srcIdx}:a]atrim=start=${audioTrimStart}:end=${audioTrimEnd},asetpts=PTS-STARTPTS`;
1893
+ }
1698
1894
  } else {
1699
1895
  audioFilter = `anullsrc=r=48000:cl=stereo,atrim=0:${clip.duration},asetpts=PTS-STARTPTS`;
1700
1896
  }
@@ -1742,7 +1938,7 @@ function buildFFmpegArgs(clips, sources, presetSettings, outputPath, options, so
1742
1938
  return args;
1743
1939
  }
1744
1940
  function runFFmpegProcess(ffmpegPath, args, onProgress) {
1745
- return new Promise((resolve13, reject) => {
1941
+ return new Promise((resolve12, reject) => {
1746
1942
  const ffmpeg = spawn(ffmpegPath, args, {
1747
1943
  stdio: ["pipe", "pipe", "pipe"]
1748
1944
  });
@@ -1766,7 +1962,7 @@ function runFFmpegProcess(ffmpegPath, args, onProgress) {
1766
1962
  });
1767
1963
  ffmpeg.on("close", (code) => {
1768
1964
  if (code === 0) {
1769
- resolve13();
1965
+ resolve12();
1770
1966
  } else {
1771
1967
  const errorMatch = stderr.match(/Error.*$/m);
1772
1968
  const errorMsg = errorMatch ? errorMatch[0] : `FFmpeg exited with code ${code}`;
@@ -2487,8 +2683,11 @@ async function analyzeContent(api, content, targetDuration, options) {
2487
2683
  // ../ai-providers/dist/gemini/GeminiProvider.js
2488
2684
  var MODEL_MAP = {
2489
2685
  "flash": "gemini-2.5-flash-image",
2686
+ "3.1-flash": "gemini-3.1-flash-image-preview",
2687
+ "latest": "gemini-3.1-flash-image-preview",
2490
2688
  "pro": "gemini-3-pro-image-preview",
2491
2689
  "gemini-2.5-flash-image": "gemini-2.5-flash-image",
2690
+ "gemini-3.1-flash-image-preview": "gemini-3.1-flash-image-preview",
2492
2691
  "gemini-3-pro-image-preview": "gemini-3-pro-image-preview"
2493
2692
  };
2494
2693
  var GeminiProvider = class {
@@ -2523,36 +2722,57 @@ var GeminiProvider = class {
2523
2722
  };
2524
2723
  }
2525
2724
  try {
2526
- const model = options?.model || "veo-3.1-fast-generate-preview";
2725
+ const veoOpts = options ?? {};
2726
+ const model = veoOpts.model || "veo-3.1-fast-generate-preview";
2527
2727
  const aspectRatioMap = {
2528
2728
  "16:9": "16:9",
2529
2729
  "9:16": "9:16",
2530
2730
  "1:1": "1:1"
2531
2731
  };
2532
- const requestBody = {
2533
- instances: [{
2534
- prompt: prompt2
2535
- }],
2536
- parameters: {
2537
- aspectRatio: aspectRatioMap[options?.aspectRatio || "16:9"] || "16:9",
2538
- durationSeconds: Math.max(4, Math.min(8, options?.duration || 8))
2539
- }
2732
+ const parameters = {
2733
+ aspectRatio: aspectRatioMap[veoOpts.aspectRatio || "16:9"] || "16:9",
2734
+ durationSeconds: Math.max(4, Math.min(8, veoOpts.duration || 8))
2540
2735
  };
2541
- if (options?.referenceImage) {
2542
- const imageData = options.referenceImage;
2736
+ if (veoOpts.negativePrompt) {
2737
+ parameters.negativePrompt = veoOpts.negativePrompt;
2738
+ }
2739
+ if (veoOpts.resolution) {
2740
+ parameters.resolution = veoOpts.resolution;
2741
+ }
2742
+ if (veoOpts.personGeneration) {
2743
+ parameters.personGeneration = veoOpts.personGeneration;
2744
+ }
2745
+ const instance = { prompt: prompt2 };
2746
+ if (veoOpts.referenceImage) {
2747
+ const imageData = veoOpts.referenceImage;
2543
2748
  if (imageData.startsWith("data:")) {
2544
2749
  const base64 = imageData.split(",")[1];
2545
2750
  const mimeType = imageData.split(";")[0].split(":")[1];
2546
- requestBody.instances[0].image = {
2547
- bytesBase64Encoded: base64,
2548
- mimeType
2549
- };
2751
+ instance.image = { bytesBase64Encoded: base64, mimeType };
2550
2752
  } else if (imageData.startsWith("http")) {
2551
- requestBody.instances[0].image = {
2552
- gcsUri: imageData
2553
- };
2753
+ instance.image = { gcsUri: imageData };
2554
2754
  }
2555
2755
  }
2756
+ if (veoOpts.lastFrame) {
2757
+ const lastFrameData = veoOpts.lastFrame;
2758
+ if (lastFrameData.startsWith("data:")) {
2759
+ const base64 = lastFrameData.split(",")[1];
2760
+ const mimeType = lastFrameData.split(";")[0].split(":")[1];
2761
+ instance.lastFrame = { bytesBase64Encoded: base64, mimeType };
2762
+ } else if (lastFrameData.startsWith("http")) {
2763
+ instance.lastFrame = { gcsUri: lastFrameData };
2764
+ }
2765
+ }
2766
+ if (veoOpts.referenceImages && veoOpts.referenceImages.length > 0) {
2767
+ instance.referenceImages = veoOpts.referenceImages.slice(0, 3).map((img) => ({
2768
+ bytesBase64Encoded: img.base64,
2769
+ mimeType: img.mimeType
2770
+ }));
2771
+ }
2772
+ const requestBody = {
2773
+ instances: [instance],
2774
+ parameters
2775
+ };
2556
2776
  const response = await fetch(`${this.baseUrl}/models/${model}:predictLongRunning`, {
2557
2777
  method: "POST",
2558
2778
  headers: {
@@ -2687,7 +2907,70 @@ var GeminiProvider = class {
2687
2907
  * Sleep helper
2688
2908
  */
2689
2909
  sleep(ms) {
2690
- return new Promise((resolve13) => setTimeout(resolve13, ms));
2910
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
2911
+ }
2912
+ /**
2913
+ * Extend a previously generated Veo video
2914
+ * Uses the operation name from a completed generation to create a continuation
2915
+ */
2916
+ async extendVideo(previousOperationName, prompt2, options) {
2917
+ if (!this.apiKey) {
2918
+ return {
2919
+ id: "",
2920
+ status: "failed",
2921
+ error: "Gemini API key not configured"
2922
+ };
2923
+ }
2924
+ try {
2925
+ const model = options?.model || "veo-3.1-generate-preview";
2926
+ const instance = {
2927
+ video: { previousOperationName }
2928
+ };
2929
+ if (prompt2) {
2930
+ instance.prompt = prompt2;
2931
+ }
2932
+ const requestBody = {
2933
+ instances: [instance],
2934
+ parameters: {
2935
+ durationSeconds: Math.max(4, Math.min(8, options?.duration || 6))
2936
+ }
2937
+ };
2938
+ const response = await fetch(`${this.baseUrl}/models/${model}:predictLongRunning`, {
2939
+ method: "POST",
2940
+ headers: {
2941
+ "Content-Type": "application/json",
2942
+ "x-goog-api-key": this.apiKey
2943
+ },
2944
+ body: JSON.stringify(requestBody)
2945
+ });
2946
+ if (!response.ok) {
2947
+ const errorText = await response.text();
2948
+ return {
2949
+ id: "",
2950
+ status: "failed",
2951
+ error: `Veo extend API error (${response.status}): ${errorText}`
2952
+ };
2953
+ }
2954
+ const data = await response.json();
2955
+ if (data.name) {
2956
+ return {
2957
+ id: data.name,
2958
+ status: "pending",
2959
+ progress: 0
2960
+ };
2961
+ }
2962
+ return {
2963
+ id: "",
2964
+ status: "failed",
2965
+ error: data.error?.message || "Unknown Veo extend error"
2966
+ };
2967
+ } catch (error) {
2968
+ return {
2969
+ id: "",
2970
+ status: "failed",
2971
+ error: error instanceof Error ? error.message : "Unknown error"
2972
+ };
2973
+ }
2691
2974
  }
2692
2975
  async cancelGeneration(_id) {
2693
2976
  return false;
@@ -2726,18 +3009,26 @@ var GeminiProvider = class {
2726
3009
  } else {
2727
3010
  imageConfig.aspectRatio = "1:1";
2728
3011
  }
2729
- if (options.resolution && isPro) {
2730
- imageConfig.imageSize = options.resolution;
3012
+ if (options.resolution) {
3013
+ if (options.resolution === "512px" || options.resolution === "1K" || isPro) {
3014
+ imageConfig.imageSize = options.resolution;
3015
+ }
2731
3016
  }
2732
3017
  const generationConfig = {
2733
3018
  responseModalities: ["TEXT", "IMAGE"],
2734
3019
  imageConfig
2735
3020
  };
3021
+ if (options.thinkingConfig) {
3022
+ generationConfig.thinkingConfig = options.thinkingConfig;
3023
+ }
2736
3024
  const payload = {
2737
3025
  contents: [{ parts: [{ text: prompt2 }] }],
2738
3026
  generationConfig
2739
3027
  };
2740
- if (options.grounding && isPro) {
3028
+ const is31Flash = modelId === "gemini-3.1-flash-image-preview";
3029
+ if (options.imageSearchGrounding && is31Flash) {
3030
+ payload.tools = [{ googleSearch: { searchTypes: { webSearch: {}, imageSearch: {} } } }];
3031
+ } else if (options.grounding && isPro) {
2741
3032
  payload.tools = [{ googleSearch: {} }];
2742
3033
  }
2743
3034
  const response = await fetch(`${this.baseUrl}/models/${modelId}:generateContent?key=${this.apiKey}`, {
@@ -2764,9 +3055,12 @@ var GeminiProvider = class {
2764
3055
  const data = await response.json();
2765
3056
  const parts = data.candidates?.[0]?.content?.parts;
2766
3057
  if (!parts || parts.length === 0) {
3058
+ const finishReason = data.candidates?.[0]?.finishReason;
3059
+ const blockReason = data.promptFeedback?.blockReason;
3060
+ const detail = blockReason ? `Blocked by safety filter: ${blockReason}` : finishReason ? `Model returned no image (finishReason: ${finishReason})` : "No content in response";
2767
3061
  return {
2768
3062
  success: false,
2769
- error: "No content in response"
3063
+ error: detail
2770
3064
  };
2771
3065
  }
2772
3066
  const images = [];
@@ -2873,9 +3167,12 @@ var GeminiProvider = class {
2873
3167
  const data = await response.json();
2874
3168
  const responseParts = data.candidates?.[0]?.content?.parts;
2875
3169
  if (!responseParts || responseParts.length === 0) {
3170
+ const finishReason = data.candidates?.[0]?.finishReason;
3171
+ const blockReason = data.promptFeedback?.blockReason;
3172
+ const detail = blockReason ? `Blocked by safety filter: ${blockReason}` : finishReason ? `Model returned no image (finishReason: ${finishReason})` : "No content in response";
2876
3173
  return {
2877
3174
  success: false,
2878
- error: "No content in response"
3175
+ error: detail
2879
3176
  };
2880
3177
  }
2881
3178
  const images = [];
@@ -3130,7 +3427,7 @@ Example response:
3130
3427
  [{"type":"trim","description":"Trim intro to 3 seconds","clipIds":["clip-1"],"params":{"newDuration":3},"confidence":0.9}]
3131
3428
 
3132
3429
  Respond with ONLY the JSON array, no other text.`;
3133
- const response = await fetch(`${this.baseUrl}/models/gemini-1.5-flash:generateContent?key=${this.apiKey}`, {
3430
+ const response = await fetch(`${this.baseUrl}/models/gemini-2.5-flash:generateContent?key=${this.apiKey}`, {
3134
3431
  method: "POST",
3135
3432
  headers: {
3136
3433
  "Content-Type": "application/json"
@@ -3257,7 +3554,7 @@ async function analyzeContent2(apiKey, content, targetDuration, options) {
3257
3554
  Authorization: `Bearer ${apiKey}`
3258
3555
  },
3259
3556
  body: JSON.stringify({
3260
- model: "gpt-4o",
3557
+ model: "gpt-5-mini",
3261
3558
  messages: [
3262
3559
  { role: "system", content: systemPrompt },
3263
3560
  { role: "user", content: buildStoryboardUserMessage(content) }
@@ -3309,7 +3606,7 @@ var OpenAIProvider = class {
3309
3606
  this.iconUrl = "/icons/openai.svg";
3310
3607
  this.isAvailable = true;
3311
3608
  this.baseUrl = "https://api.openai.com/v1";
3312
- this.model = "gpt-4o-mini";
3609
+ this.model = "gpt-5-mini";
3313
3610
  }
3314
3611
  async initialize(config2) {
3315
3612
  this.apiKey = config2.apiKey;
@@ -3616,7 +3913,7 @@ Each segment should be 3-10 seconds long.`;
3616
3913
  Authorization: `Bearer ${this.apiKey}`
3617
3914
  },
3618
3915
  body: JSON.stringify({
3619
- model: "gpt-4o",
3916
+ model: "gpt-5-mini",
3620
3917
  max_tokens: 4096,
3621
3918
  response_format: { type: "json_object" },
3622
3919
  messages: [
@@ -3650,7 +3947,7 @@ ${videoAnalysis}` }
3650
3947
  }
3651
3948
  }
3652
3949
  /**
3653
- * Generate a storyboard from script content using GPT-4o.
3950
+ * Generate a storyboard from script content using GPT-5-mini.
3654
3951
  * Alternative to ClaudeProvider.analyzeContent for when Claude is unavailable.
3655
3952
  */
3656
3953
  async analyzeContent(content, targetDuration, options) {
@@ -5575,7 +5872,7 @@ var ElevenLabsProvider = class {
5575
5872
  this.id = "elevenlabs";
5576
5873
  this.name = "ElevenLabs";
5577
5874
  this.description = "AI text-to-speech with natural voices and voice cloning";
5578
- this.capabilities = ["text-to-speech", "sound-generation", "audio-isolation", "voice-clone"];
5875
+ this.capabilities = ["text-to-speech", "sound-generation", "music-generation", "audio-isolation", "voice-clone"];
5579
5876
  this.iconUrl = "/icons/elevenlabs.svg";
5580
5877
  this.isAvailable = true;
5581
5878
  this.baseUrl = "https://api.elevenlabs.io/v1";
@@ -5633,7 +5930,7 @@ var ElevenLabsProvider = class {
5633
5930
  error: voiceError instanceof Error ? voiceError.message : String(voiceError)
5634
5931
  };
5635
5932
  }
5636
- const model = options.model || "eleven_multilingual_v2";
5933
+ const model = options.model || "eleven_v3";
5637
5934
  const response = await fetch(`${this.baseUrl}/text-to-speech/${voiceId}`, {
5638
5935
  method: "POST",
5639
5936
  headers: {
@@ -5746,6 +6043,60 @@ var ElevenLabsProvider = class {
5746
6043
  };
5747
6044
  }
5748
6045
  }
6046
+ /**
6047
+ * Generate music from text prompt
6048
+ */
6049
+ async generateMusic(prompt2, options = {}) {
6050
+ if (!this.apiKey) {
6051
+ return {
6052
+ success: false,
6053
+ error: "ElevenLabs API key not configured"
6054
+ };
6055
+ }
6056
+ try {
6057
+ const body = {
6058
+ prompt: prompt2,
6059
+ model_id: "music_v1"
6060
+ };
6061
+ if (options.duration !== void 0) {
6062
+ const duration = Math.max(3, Math.min(600, options.duration));
6063
+ body.music_length_ms = duration * 1e3;
6064
+ }
6065
+ if (options.forceInstrumental !== void 0) {
6066
+ body.force_instrumental = options.forceInstrumental;
6067
+ }
6068
+ if (options.seed !== void 0) {
6069
+ body.seed = options.seed;
6070
+ }
6071
+ const response = await fetch(`${this.baseUrl}/music`, {
6072
+ method: "POST",
6073
+ headers: {
6074
+ "xi-api-key": this.apiKey,
6075
+ "Content-Type": "application/json",
6076
+ Accept: "audio/mpeg"
6077
+ },
6078
+ body: JSON.stringify(body)
6079
+ });
6080
+ if (!response.ok) {
6081
+ const error = await response.text();
6082
+ return {
6083
+ success: false,
6084
+ error: `Music generation failed: ${error}`
6085
+ };
6086
+ }
6087
+ const arrayBuffer = await response.arrayBuffer();
6088
+ const audioBuffer = Buffer.from(arrayBuffer);
6089
+ return {
6090
+ success: true,
6091
+ audioBuffer
6092
+ };
6093
+ } catch (error) {
6094
+ return {
6095
+ success: false,
6096
+ error: error instanceof Error ? error.message : "Unknown error"
6097
+ };
6098
+ }
6099
+ }
5749
6100
  /**
5750
6101
  * Isolate vocals from audio
5751
6102
  * Separates the vocal track from the background music/noise
@@ -5896,7 +6247,7 @@ var OpenAIImageProvider = class {
5896
6247
  this.id = "openai-image";
5897
6248
  this.name = "OpenAI GPT Image";
5898
6249
  this.description = "AI image generation with GPT Image 1.5 (fastest, best quality)";
5899
- this.capabilities = ["text-to-image", "background-removal"];
6250
+ this.capabilities = ["text-to-image", "background-removal", "image-editing"];
5900
6251
  this.iconUrl = "/icons/openai.svg";
5901
6252
  this.isAvailable = true;
5902
6253
  this.baseUrl = "https://api.openai.com/v1";
@@ -6026,6 +6377,76 @@ var OpenAIImageProvider = class {
6026
6377
  quality: "high"
6027
6378
  });
6028
6379
  }
6380
+ /**
6381
+ * Edit images using GPT Image 1.5
6382
+ * Supports up to 16 input images with text instruction-based editing
6383
+ */
6384
+ async editImage(imageBuffers, prompt2, options = {}) {
6385
+ if (!this.apiKey) {
6386
+ return {
6387
+ success: false,
6388
+ error: "OpenAI API key not configured"
6389
+ };
6390
+ }
6391
+ try {
6392
+ const formData = new FormData();
6393
+ formData.append("model", options.model || DEFAULT_MODEL);
6394
+ formData.append("prompt", prompt2);
6395
+ for (const buf of imageBuffers) {
6396
+ const uint8Array = new Uint8Array(buf);
6397
+ formData.append("image[]", new Blob([uint8Array], { type: "image/png" }), "image.png");
6398
+ }
6399
+ if (options.mask) {
6400
+ const maskUint8 = new Uint8Array(options.mask);
6401
+ formData.append("mask", new Blob([maskUint8], { type: "image/png" }), "mask.png");
6402
+ }
6403
+ if (options.quality) {
6404
+ formData.append("quality", options.quality);
6405
+ }
6406
+ if (options.size) {
6407
+ formData.append("size", options.size);
6408
+ }
6409
+ const response = await fetch(`${this.baseUrl}/images/edits`, {
6410
+ method: "POST",
6411
+ headers: {
6412
+ Authorization: `Bearer ${this.apiKey}`
6413
+ },
6414
+ body: formData
6415
+ });
6416
+ if (!response.ok) {
6417
+ const errorText = await response.text();
6418
+ let errorMessage = `API error: ${response.status}`;
6419
+ try {
6420
+ const errorJson = JSON.parse(errorText);
6421
+ if (errorJson.error?.message) {
6422
+ errorMessage = errorJson.error.message;
6423
+ }
6424
+ } catch {
6425
+ if (errorText) {
6426
+ errorMessage = errorText.substring(0, 200);
6427
+ }
6428
+ }
6429
+ return {
6430
+ success: false,
6431
+ error: errorMessage
6432
+ };
6433
+ }
6434
+ const data = await response.json();
6435
+ return {
6436
+ success: true,
6437
+ images: data.data.map((img) => ({
6438
+ base64: img.b64_json,
6439
+ url: img.url,
6440
+ revisedPrompt: img.revised_prompt
6441
+ }))
6442
+ };
6443
+ } catch (error) {
6444
+ return {
6445
+ success: false,
6446
+ error: error instanceof Error ? error.message : "Unknown error"
6447
+ };
6448
+ }
6449
+ }
6029
6450
  /**
6030
6451
  * Create image variations (uses DALL-E 2)
6031
6452
  */
@@ -6039,7 +6460,7 @@ var OpenAIImageProvider = class {
6039
6460
  try {
6040
6461
  const formData = new FormData();
6041
6462
  const uint8Array = new Uint8Array(imageBuffer);
6042
- formData.append("image", new Blob([uint8Array]), "image.png");
6463
+ formData.append("image", new Blob([uint8Array], { type: "image/png" }), "image.png");
6043
6464
  formData.append("model", "dall-e-2");
6044
6465
  formData.append("n", String(options.n || 1));
6045
6466
  formData.append("size", options.size || "1024x1024");
@@ -6073,7 +6494,7 @@ var OpenAIImageProvider = class {
6073
6494
  var openaiImageProvider = new OpenAIImageProvider();
6074
6495
 
6075
6496
  // ../ai-providers/dist/runway/RunwayProvider.js
6076
- var DEFAULT_MODEL2 = "gen4_turbo";
6497
+ var DEFAULT_MODEL2 = "gen4.5";
6077
6498
  var RunwayProvider = class {
6078
6499
  constructor() {
6079
6500
  this.id = "runway";
@@ -6117,25 +6538,29 @@ var RunwayProvider = class {
6117
6538
  };
6118
6539
  const apiRatio = ratioMap[options?.aspectRatio || "16:9"] || "1280:720";
6119
6540
  const model = options?.model || DEFAULT_MODEL2;
6120
- if (!options?.referenceImage) {
6541
+ if (!options?.referenceImage && model !== "gen4.5") {
6121
6542
  return {
6122
6543
  id: "",
6123
6544
  status: "failed",
6124
- error: "Runway Gen-4 requires an input image. Use -i <image> to specify an image."
6545
+ error: `Runway ${model} requires an input image. Use -i <image> or switch to gen4.5 for text-to-video.`
6125
6546
  };
6126
6547
  }
6127
- const imageData = typeof options.referenceImage === "string" ? options.referenceImage : await this.blobToDataUri(options.referenceImage);
6548
+ const hasImage = !!options?.referenceImage;
6549
+ const endpoint = hasImage ? "image_to_video" : "text_to_video";
6128
6550
  const body = {
6129
6551
  model,
6130
6552
  promptText: prompt2,
6131
- promptImage: imageData,
6132
6553
  ratio: apiRatio,
6133
- duration: options?.duration === 10 ? 10 : 5
6554
+ duration: this.clampDuration(options?.duration, model)
6134
6555
  };
6556
+ if (hasImage) {
6557
+ const imageData = typeof options.referenceImage === "string" ? options.referenceImage : await this.blobToDataUri(options.referenceImage);
6558
+ body.promptImage = imageData;
6559
+ }
6135
6560
  if (options?.seed !== void 0) {
6136
6561
  body.seed = options.seed;
6137
6562
  }
6138
- const response = await fetch(`${this.baseUrl}/image_to_video`, {
6563
+ const response = await fetch(`${this.baseUrl}/${endpoint}`, {
6139
6564
  method: "POST",
6140
6565
  headers: {
6141
6566
  Authorization: `Bearer ${this.apiKey}`,
@@ -6302,6 +6727,16 @@ var RunwayProvider = class {
6302
6727
  error: "Generation timed out"
6303
6728
  };
6304
6729
  }
6730
+ /**
6731
+ * Clamp duration to valid range for the given model
6732
+ */
6733
+ clampDuration(duration, model) {
6734
+ if (model === "gen4.5") {
6735
+ const d = duration ?? 5;
6736
+ return Math.max(2, Math.min(10, Math.round(d)));
6737
+ }
6738
+ return duration === 10 ? 10 : 5;
6739
+ }
6305
6740
  /**
6306
6741
  * Convert Blob to data URI
6307
6742
  */
@@ -6315,7 +6750,7 @@ var RunwayProvider = class {
6315
6750
  * Sleep helper
6316
6751
  */
6317
6752
  sleep(ms) {
6318
- return new Promise((resolve13) => setTimeout(resolve13, ms));
6753
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
6319
6754
  }
6320
6755
  };
6321
6756
  var runwayProvider = new RunwayProvider();
@@ -6323,7 +6758,7 @@ var runwayProvider = new RunwayProvider();
6323
6758
  // ../ai-providers/dist/kling/KlingProvider.js
6324
6759
  import { createHmac } from "node:crypto";
6325
6760
  var DEFAULT_MODEL3 = "kling-v2-5-turbo";
6326
- var STD_MODE_MODELS = ["kling-v2-5-turbo", "kling-v2-6"];
6761
+ var STD_MODE_MODELS = ["kling-v2-5-turbo", "kling-v2-6", "kling-v3", "kling-v3-omni"];
6327
6762
  var KlingProvider = class {
6328
6763
  constructor() {
6329
6764
  this.id = "kling";
@@ -6419,14 +6854,14 @@ var KlingProvider = class {
6419
6854
  return {
6420
6855
  id: "",
6421
6856
  status: "failed",
6422
- error: "Kling v2.5/v2.6 requires image URL, not base64. Upload image to a hosting service first."
6857
+ error: "Kling v2.5+ requires image URL, not base64. Upload image to a hosting service first."
6423
6858
  };
6424
6859
  }
6425
6860
  } else {
6426
6861
  return {
6427
6862
  id: "",
6428
6863
  status: "failed",
6429
- error: "Kling v2.5/v2.6 requires image URL, not Blob. Upload image to a hosting service first."
6864
+ error: "Kling v2.5+ requires image URL, not Blob. Upload image to a hosting service first."
6430
6865
  };
6431
6866
  }
6432
6867
  const response2 = await fetch(`${this.baseUrl}/videos/image2video`, {
@@ -6731,7 +7166,7 @@ var KlingProvider = class {
6731
7166
  * Sleep helper
6732
7167
  */
6733
7168
  sleep(ms) {
6734
- return new Promise((resolve13) => setTimeout(resolve13, ms));
7169
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
6735
7170
  }
6736
7171
  };
6737
7172
  var klingProvider = new KlingProvider();
@@ -6743,7 +7178,7 @@ var GrokProvider = class {
6743
7178
  this.id = "grok";
6744
7179
  this.name = "xAI Grok Imagine";
6745
7180
  this.description = "AI video generation with Grok Imagine (native audio, 1-15 sec)";
6746
- this.capabilities = ["text-to-video", "image-to-video"];
7181
+ this.capabilities = ["text-to-video", "image-to-video", "text-to-image", "image-editing"];
6747
7182
  this.iconUrl = "/icons/xai.svg";
6748
7183
  this.isAvailable = true;
6749
7184
  this.baseUrl = "https://api.x.ai/v1";
@@ -6759,33 +7194,29 @@ var GrokProvider = class {
6759
7194
  return !!this.apiKey;
6760
7195
  }
6761
7196
  /**
6762
- * Generate video using Grok Imagine
7197
+ * Generate image using Grok Imagine
6763
7198
  */
6764
- async generateVideo(prompt2, options) {
7199
+ async generateImage(prompt2, options = {}) {
6765
7200
  if (!this.apiKey) {
6766
7201
  return {
6767
- id: "",
6768
- status: "failed",
7202
+ success: false,
6769
7203
  error: "xAI API key not configured. Set XAI_API_KEY environment variable."
6770
7204
  };
6771
7205
  }
6772
7206
  try {
6773
- const duration = Math.min(15, Math.max(1, options?.duration || 5));
6774
7207
  const body = {
6775
- model: DEFAULT_MODEL4,
7208
+ model: options.model || "grok-imagine-image",
6776
7209
  prompt: prompt2,
6777
- duration,
6778
- aspect_ratio: options?.aspectRatio || "16:9"
7210
+ n: options.n || 1,
7211
+ response_format: options.responseFormat || "url"
6779
7212
  };
6780
- if (options?.referenceImage) {
6781
- const imageData = options.referenceImage;
6782
- if (imageData.startsWith("http")) {
6783
- body.image_url = imageData;
6784
- } else if (imageData.startsWith("data:")) {
6785
- body.image = imageData;
6786
- }
7213
+ if (options.aspectRatio) {
7214
+ body.aspect_ratio = options.aspectRatio;
6787
7215
  }
6788
- const response = await fetch(`${this.baseUrl}/video/generations`, {
7216
+ if (options.resolution) {
7217
+ body.resolution = options.resolution;
7218
+ }
7219
+ const response = await fetch(`${this.baseUrl}/images/generations`, {
6789
7220
  method: "POST",
6790
7221
  headers: {
6791
7222
  "Content-Type": "application/json",
@@ -6795,210 +7226,29 @@ var GrokProvider = class {
6795
7226
  });
6796
7227
  if (!response.ok) {
6797
7228
  const errorText = await response.text();
6798
- return {
6799
- id: "",
6800
- status: "failed",
6801
- error: `Grok API error (${response.status}): ${errorText}`
6802
- };
6803
- }
6804
- const data = await response.json();
6805
- return {
6806
- id: data.id,
6807
- status: data.status === "completed" ? "completed" : "pending",
6808
- videoUrl: data.video_url
6809
- };
6810
- } catch (error) {
6811
- return {
6812
- id: "",
6813
- status: "failed",
6814
- error: error instanceof Error ? error.message : "Unknown error"
6815
- };
6816
- }
6817
- }
6818
- /**
6819
- * Get generation status
6820
- */
6821
- async getGenerationStatus(id) {
6822
- if (!this.apiKey) {
6823
- return {
6824
- id,
6825
- status: "failed",
6826
- error: "xAI API key not configured"
6827
- };
6828
- }
6829
- try {
6830
- const response = await fetch(`${this.baseUrl}/video/generations/${id}`, {
6831
- headers: {
6832
- Authorization: `Bearer ${this.apiKey}`
6833
- }
6834
- });
6835
- if (!response.ok) {
6836
- const errorText = await response.text();
6837
- return {
6838
- id,
6839
- status: "failed",
6840
- error: `Failed to get status: ${errorText}`
6841
- };
6842
- }
6843
- const data = await response.json();
6844
- const statusMap = {
6845
- pending: "pending",
6846
- processing: "processing",
6847
- completed: "completed",
6848
- failed: "failed"
6849
- };
6850
- return {
6851
- id: data.id,
6852
- status: statusMap[data.status] || "pending",
6853
- videoUrl: data.video_url,
6854
- error: data.error
6855
- };
6856
- } catch (error) {
6857
- return {
6858
- id,
6859
- status: "failed",
6860
- error: error instanceof Error ? error.message : "Unknown error"
6861
- };
6862
- }
6863
- }
6864
- /**
6865
- * Wait for generation to complete
6866
- */
6867
- async waitForCompletion(id, onProgress, maxWaitMs = 3e5) {
6868
- const startTime = Date.now();
6869
- while (Date.now() - startTime < maxWaitMs) {
6870
- const result = await this.getGenerationStatus(id);
6871
- if (onProgress) {
6872
- onProgress(result);
6873
- }
6874
- if (result.status === "completed" || result.status === "failed") {
6875
- return result;
6876
- }
6877
- await this.sleep(this.pollingInterval);
6878
- }
6879
- return {
6880
- id,
6881
- status: "failed",
6882
- error: "Generation timed out"
6883
- };
6884
- }
6885
- /**
6886
- * Cancel generation (if supported)
6887
- */
6888
- async cancelGeneration(id) {
6889
- if (!this.apiKey)
6890
- return false;
6891
- try {
6892
- const response = await fetch(`${this.baseUrl}/video/generations/${id}`, {
6893
- method: "DELETE",
6894
- headers: {
6895
- Authorization: `Bearer ${this.apiKey}`
6896
- }
6897
- });
6898
- return response.ok;
6899
- } catch {
6900
- return false;
6901
- }
6902
- }
6903
- sleep(ms) {
6904
- return new Promise((resolve13) => setTimeout(resolve13, ms));
6905
- }
6906
- };
6907
- var grokProvider = new GrokProvider();
6908
-
6909
- // ../ai-providers/dist/stability/StabilityProvider.js
6910
- var StabilityProvider = class {
6911
- constructor() {
6912
- this.id = "stability";
6913
- this.name = "Stability AI";
6914
- this.description = "Stable Diffusion image generation with SD3.5 and SDXL";
6915
- this.capabilities = ["background-removal", "upscale", "search-replace", "outpaint"];
6916
- this.iconUrl = "/icons/stability.svg";
6917
- this.isAvailable = true;
6918
- this.baseUrl = "https://api.stability.ai";
6919
- this.modelMapping = {
6920
- "sd3.5-large": ["sd3", "sd3.5-large"],
6921
- "sd3.5-large-turbo": ["sd3", "sd3.5-large-turbo"],
6922
- "sd3.5-medium": ["sd3", "sd3.5-medium"],
6923
- "sd3-large": ["sd3", "sd3-large"],
6924
- "sd3-medium": ["sd3", "sd3-medium"],
6925
- "stable-image-core": ["core", null],
6926
- "stable-image-ultra": ["ultra", null]
6927
- };
6928
- }
6929
- async initialize(config2) {
6930
- this.apiKey = config2.apiKey;
6931
- if (config2.baseUrl) {
6932
- this.baseUrl = config2.baseUrl;
6933
- }
6934
- }
6935
- isConfigured() {
6936
- return !!this.apiKey;
6937
- }
6938
- /**
6939
- * Generate images from text prompt using SD3.5 or newer models
6940
- */
6941
- async generateImage(prompt2, options = {}) {
6942
- if (!this.apiKey) {
6943
- return {
6944
- success: false,
6945
- error: "Stability AI API key not configured"
6946
- };
6947
- }
6948
- try {
6949
- const model = options.model || "sd3.5-large";
6950
- const [endpoint, modelParam] = this.modelMapping[model] || ["sd3", model];
6951
- const formData = new FormData();
6952
- formData.append("prompt", prompt2);
6953
- formData.append("output_format", options.outputFormat || "png");
6954
- if (modelParam) {
6955
- formData.append("model", modelParam);
6956
- }
6957
- if (options.negativePrompt) {
6958
- formData.append("negative_prompt", options.negativePrompt);
6959
- }
6960
- if (options.aspectRatio) {
6961
- formData.append("aspect_ratio", options.aspectRatio);
6962
- }
6963
- if (options.seed !== void 0) {
6964
- formData.append("seed", String(options.seed));
6965
- }
6966
- if (options.stylePreset) {
6967
- formData.append("style_preset", options.stylePreset);
6968
- }
6969
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/generate/${endpoint}`, {
6970
- method: "POST",
6971
- headers: {
6972
- Authorization: `Bearer ${this.apiKey}`,
6973
- Accept: "image/*"
6974
- },
6975
- body: formData
6976
- });
6977
- if (!response.ok) {
6978
- const errorText = await response.text();
6979
- let errorMessage;
7229
+ let errorMessage = `API error: ${response.status}`;
6980
7230
  try {
6981
- const errorData = JSON.parse(errorText);
6982
- errorMessage = errorData.message || errorData.error || errorText;
7231
+ const errorJson = JSON.parse(errorText);
7232
+ if (errorJson.error?.message) {
7233
+ errorMessage = errorJson.error.message;
7234
+ }
6983
7235
  } catch {
6984
- errorMessage = errorText;
7236
+ if (errorText) {
7237
+ errorMessage = errorText.substring(0, 200);
7238
+ }
6985
7239
  }
6986
7240
  return {
6987
7241
  success: false,
6988
- error: `API error (${response.status}): ${errorMessage}`
7242
+ error: errorMessage
6989
7243
  };
6990
7244
  }
6991
- const arrayBuffer = await response.arrayBuffer();
6992
- const base64 = Buffer.from(arrayBuffer).toString("base64");
6993
- const seed = response.headers.get("seed");
6994
- const finishReason = response.headers.get("finish-reason");
7245
+ const data = await response.json();
6995
7246
  return {
6996
7247
  success: true,
6997
- images: [{
6998
- base64,
6999
- seed: seed ? parseInt(seed) : void 0,
7000
- finishReason: finishReason || void 0
7001
- }]
7248
+ images: data.data.map((img) => ({
7249
+ url: img.url,
7250
+ base64: img.b64_json
7251
+ }))
7002
7252
  };
7003
7253
  } catch (error) {
7004
7254
  return {
@@ -7008,77 +7258,64 @@ var StabilityProvider = class {
7008
7258
  }
7009
7259
  }
7010
7260
  /**
7011
- * Generate image using legacy SDXL API (supports more options)
7261
+ * Edit image using Grok Imagine
7262
+ * Supports single image input with text instruction-based editing
7012
7263
  */
7013
- async generateImageSDXL(prompt2, options = {}) {
7264
+ async editImage(imageBuffer, prompt2, options = {}) {
7014
7265
  if (!this.apiKey) {
7015
7266
  return {
7016
7267
  success: false,
7017
- error: "Stability AI API key not configured"
7268
+ error: "xAI API key not configured. Set XAI_API_KEY environment variable."
7018
7269
  };
7019
7270
  }
7020
7271
  try {
7272
+ const base64 = imageBuffer.toString("base64");
7273
+ const dataUri = `data:image/png;base64,${base64}`;
7021
7274
  const body = {
7022
- text_prompts: [
7023
- { text: prompt2, weight: 1 }
7024
- ],
7025
- cfg_scale: options.cfgScale || 7,
7026
- steps: options.steps || 30,
7027
- samples: options.count || 1
7028
- };
7029
- if (options.negativePrompt) {
7030
- body.text_prompts.push({
7031
- text: options.negativePrompt,
7032
- weight: -1
7033
- });
7034
- }
7035
- if (options.seed !== void 0) {
7036
- body.seed = options.seed;
7037
- }
7038
- if (options.sampler) {
7039
- body.sampler = options.sampler;
7040
- }
7041
- if (options.stylePreset) {
7042
- body.style_preset = options.stylePreset;
7043
- }
7044
- if (options.size) {
7045
- const [width, height] = options.size.split("x").map(Number);
7046
- body.width = width;
7047
- body.height = height;
7048
- } else {
7049
- body.width = 1024;
7050
- body.height = 1024;
7275
+ model: options.model || "grok-imagine-image",
7276
+ prompt: prompt2,
7277
+ image: {
7278
+ url: dataUri,
7279
+ type: "image_url"
7280
+ },
7281
+ n: 1,
7282
+ response_format: options.responseFormat || "url"
7283
+ };
7284
+ if (options.aspectRatio) {
7285
+ body.aspect_ratio = options.aspectRatio;
7051
7286
  }
7052
- const response = await fetch(`${this.baseUrl}/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image`, {
7287
+ const response = await fetch(`${this.baseUrl}/images/edits`, {
7053
7288
  method: "POST",
7054
7289
  headers: {
7055
7290
  "Content-Type": "application/json",
7056
- Authorization: `Bearer ${this.apiKey}`,
7057
- Accept: "application/json"
7291
+ Authorization: `Bearer ${this.apiKey}`
7058
7292
  },
7059
7293
  body: JSON.stringify(body)
7060
7294
  });
7061
7295
  if (!response.ok) {
7062
7296
  const errorText = await response.text();
7063
- let errorMessage;
7297
+ let errorMessage = `API error: ${response.status}`;
7064
7298
  try {
7065
- const errorData = JSON.parse(errorText);
7066
- errorMessage = errorData.message || errorData.error || errorText;
7299
+ const errorJson = JSON.parse(errorText);
7300
+ if (errorJson.error?.message) {
7301
+ errorMessage = errorJson.error.message;
7302
+ }
7067
7303
  } catch {
7068
- errorMessage = errorText;
7304
+ if (errorText) {
7305
+ errorMessage = errorText.substring(0, 200);
7306
+ }
7069
7307
  }
7070
7308
  return {
7071
7309
  success: false,
7072
- error: `API error (${response.status}): ${errorMessage}`
7310
+ error: errorMessage
7073
7311
  };
7074
7312
  }
7075
7313
  const data = await response.json();
7076
7314
  return {
7077
7315
  success: true,
7078
- images: data.artifacts.map((a) => ({
7079
- base64: a.base64,
7080
- seed: a.seed,
7081
- finishReason: a.finishReason
7316
+ images: data.data.map((img) => ({
7317
+ url: img.url,
7318
+ base64: img.b64_json
7082
7319
  }))
7083
7320
  };
7084
7321
  } catch (error) {
@@ -7089,350 +7326,145 @@ var StabilityProvider = class {
7089
7326
  }
7090
7327
  }
7091
7328
  /**
7092
- * Image-to-image transformation
7093
- */
7094
- async imageToImage(imageData, prompt2, options = {}) {
7095
- if (!this.apiKey) {
7096
- return {
7097
- success: false,
7098
- error: "Stability AI API key not configured"
7099
- };
7100
- }
7101
- try {
7102
- const formData = new FormData();
7103
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7104
- formData.append("image", imageBlob, "image.png");
7105
- formData.append("prompt", prompt2);
7106
- formData.append("output_format", options.outputFormat || "png");
7107
- formData.append("strength", String(options.strength || 0.35));
7108
- formData.append("mode", options.mode || "image-to-image");
7109
- if (options.negativePrompt) {
7110
- formData.append("negative_prompt", options.negativePrompt);
7111
- }
7112
- if (options.seed !== void 0) {
7113
- formData.append("seed", String(options.seed));
7114
- }
7115
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/generate/sd3`, {
7116
- method: "POST",
7117
- headers: {
7118
- Authorization: `Bearer ${this.apiKey}`,
7119
- Accept: "image/*"
7120
- },
7121
- body: formData
7122
- });
7123
- if (!response.ok) {
7124
- const errorText = await response.text();
7125
- return {
7126
- success: false,
7127
- error: `API error (${response.status}): ${errorText}`
7128
- };
7129
- }
7130
- const arrayBuffer = await response.arrayBuffer();
7131
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7132
- return {
7133
- success: true,
7134
- images: [{ base64 }]
7135
- };
7136
- } catch (error) {
7137
- return {
7138
- success: false,
7139
- error: error instanceof Error ? error.message : "Unknown error"
7140
- };
7141
- }
7142
- }
7143
- /**
7144
- * Upscale image
7145
- */
7146
- async upscaleImage(imageData, options = {}) {
7147
- if (!this.apiKey) {
7148
- return {
7149
- success: false,
7150
- error: "Stability AI API key not configured"
7151
- };
7152
- }
7153
- try {
7154
- const formData = new FormData();
7155
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7156
- formData.append("image", imageBlob, "image.png");
7157
- formData.append("output_format", options.outputFormat || "png");
7158
- const upscaleType = options.type || "fast";
7159
- let endpoint;
7160
- if (upscaleType === "creative") {
7161
- endpoint = `${this.baseUrl}/v2beta/stable-image/upscale/creative`;
7162
- if (options.creativity !== void 0) {
7163
- formData.append("creativity", String(options.creativity));
7164
- }
7165
- } else if (upscaleType === "conservative") {
7166
- endpoint = `${this.baseUrl}/v2beta/stable-image/upscale/conservative`;
7167
- } else {
7168
- endpoint = `${this.baseUrl}/v2beta/stable-image/upscale/fast`;
7169
- }
7170
- const response = await fetch(endpoint, {
7171
- method: "POST",
7172
- headers: {
7173
- Authorization: `Bearer ${this.apiKey}`,
7174
- Accept: "image/*"
7175
- },
7176
- body: formData
7177
- });
7178
- if (!response.ok) {
7179
- const errorText = await response.text();
7180
- return {
7181
- success: false,
7182
- error: `API error (${response.status}): ${errorText}`
7183
- };
7184
- }
7185
- const arrayBuffer = await response.arrayBuffer();
7186
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7187
- return {
7188
- success: true,
7189
- images: [{ base64 }]
7190
- };
7191
- } catch (error) {
7192
- return {
7193
- success: false,
7194
- error: error instanceof Error ? error.message : "Unknown error"
7195
- };
7196
- }
7197
- }
7198
- /**
7199
- * Remove background from image
7200
- */
7201
- async removeBackground(imageData, outputFormat = "png") {
7202
- if (!this.apiKey) {
7203
- return {
7204
- success: false,
7205
- error: "Stability AI API key not configured"
7206
- };
7207
- }
7208
- try {
7209
- const formData = new FormData();
7210
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7211
- formData.append("image", imageBlob, "image.png");
7212
- formData.append("output_format", outputFormat);
7213
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/edit/remove-background`, {
7214
- method: "POST",
7215
- headers: {
7216
- Authorization: `Bearer ${this.apiKey}`,
7217
- Accept: "image/*"
7218
- },
7219
- body: formData
7220
- });
7221
- if (!response.ok) {
7222
- const errorText = await response.text();
7223
- return {
7224
- success: false,
7225
- error: `API error (${response.status}): ${errorText}`
7226
- };
7227
- }
7228
- const arrayBuffer = await response.arrayBuffer();
7229
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7230
- return {
7231
- success: true,
7232
- images: [{ base64 }]
7233
- };
7234
- } catch (error) {
7235
- return {
7236
- success: false,
7237
- error: error instanceof Error ? error.message : "Unknown error"
7238
- };
7239
- }
7240
- }
7241
- /**
7242
- * Inpaint/outpaint image
7329
+ * Generate video using Grok Imagine
7243
7330
  */
7244
- async inpaint(imageData, maskData, prompt2, options = {}) {
7331
+ async generateVideo(prompt2, options) {
7245
7332
  if (!this.apiKey) {
7246
7333
  return {
7247
- success: false,
7248
- error: "Stability AI API key not configured"
7334
+ id: "",
7335
+ status: "failed",
7336
+ error: "xAI API key not configured. Set XAI_API_KEY environment variable."
7249
7337
  };
7250
7338
  }
7251
7339
  try {
7252
- const formData = new FormData();
7253
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7254
- const maskBlob = Buffer.isBuffer(maskData) ? new Blob([new Uint8Array(maskData)]) : maskData;
7255
- formData.append("image", imageBlob, "image.png");
7256
- formData.append("mask", maskBlob, "mask.png");
7257
- formData.append("prompt", prompt2);
7258
- formData.append("output_format", options.outputFormat || "png");
7259
- if (options.negativePrompt) {
7260
- formData.append("negative_prompt", options.negativePrompt);
7261
- }
7262
- if (options.seed !== void 0) {
7263
- formData.append("seed", String(options.seed));
7264
- }
7265
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/edit/inpaint`, {
7266
- method: "POST",
7267
- headers: {
7268
- Authorization: `Bearer ${this.apiKey}`,
7269
- Accept: "image/*"
7270
- },
7271
- body: formData
7272
- });
7273
- if (!response.ok) {
7274
- const errorText = await response.text();
7275
- return {
7276
- success: false,
7277
- error: `API error (${response.status}): ${errorText}`
7278
- };
7279
- }
7280
- const arrayBuffer = await response.arrayBuffer();
7281
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7282
- return {
7283
- success: true,
7284
- images: [{ base64 }]
7285
- };
7286
- } catch (error) {
7287
- return {
7288
- success: false,
7289
- error: error instanceof Error ? error.message : "Unknown error"
7290
- };
7291
- }
7292
- }
7293
- /**
7294
- * Search and replace objects in an image using AI
7295
- */
7296
- async searchAndReplace(imageData, searchPrompt, replacePrompt, options = {}) {
7297
- if (!this.apiKey) {
7298
- return {
7299
- success: false,
7300
- error: "Stability AI API key not configured"
7340
+ const duration = Math.min(15, Math.max(1, options?.duration || 5));
7341
+ const body = {
7342
+ model: DEFAULT_MODEL4,
7343
+ prompt: prompt2,
7344
+ duration,
7345
+ aspect_ratio: options?.aspectRatio || "16:9"
7301
7346
  };
7302
- }
7303
- try {
7304
- const formData = new FormData();
7305
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7306
- formData.append("image", imageBlob, "image.png");
7307
- formData.append("prompt", replacePrompt);
7308
- formData.append("search_prompt", searchPrompt);
7309
- formData.append("output_format", options.outputFormat || "png");
7310
- if (options.negativePrompt) {
7311
- formData.append("negative_prompt", options.negativePrompt);
7312
- }
7313
- if (options.seed !== void 0) {
7314
- formData.append("seed", String(options.seed));
7347
+ if (options?.referenceImage) {
7348
+ body.image = { url: options.referenceImage };
7315
7349
  }
7316
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/edit/search-and-replace`, {
7350
+ const response = await fetch(`${this.baseUrl}/videos/generations`, {
7317
7351
  method: "POST",
7318
7352
  headers: {
7319
- Authorization: `Bearer ${this.apiKey}`,
7320
- Accept: "image/*"
7353
+ "Content-Type": "application/json",
7354
+ Authorization: `Bearer ${this.apiKey}`
7321
7355
  },
7322
- body: formData
7356
+ body: JSON.stringify(body)
7323
7357
  });
7324
7358
  if (!response.ok) {
7325
7359
  const errorText = await response.text();
7326
- let errorMessage;
7327
- try {
7328
- const errorData = JSON.parse(errorText);
7329
- errorMessage = errorData.message || errorData.error || errorText;
7330
- } catch {
7331
- errorMessage = errorText;
7332
- }
7333
7360
  return {
7334
- success: false,
7335
- error: `API error (${response.status}): ${errorMessage}`
7361
+ id: "",
7362
+ status: "failed",
7363
+ error: `Grok API error (${response.status}): ${errorText}`
7336
7364
  };
7337
7365
  }
7338
- const arrayBuffer = await response.arrayBuffer();
7339
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7340
- const seed = response.headers.get("seed");
7366
+ const data = await response.json();
7341
7367
  return {
7342
- success: true,
7343
- images: [{
7344
- base64,
7345
- seed: seed ? parseInt(seed) : void 0
7346
- }]
7368
+ id: data.request_id,
7369
+ status: "pending"
7347
7370
  };
7348
7371
  } catch (error) {
7349
7372
  return {
7350
- success: false,
7373
+ id: "",
7374
+ status: "failed",
7351
7375
  error: error instanceof Error ? error.message : "Unknown error"
7352
7376
  };
7353
7377
  }
7354
7378
  }
7355
7379
  /**
7356
- * Extend image canvas (outpainting)
7380
+ * Get generation status
7357
7381
  */
7358
- async outpaint(imageData, options = {}) {
7382
+ async getGenerationStatus(id) {
7359
7383
  if (!this.apiKey) {
7360
7384
  return {
7361
- success: false,
7362
- error: "Stability AI API key not configured"
7363
- };
7364
- }
7365
- const { left = 0, right = 0, up = 0, down = 0 } = options;
7366
- if (left === 0 && right === 0 && up === 0 && down === 0) {
7367
- return {
7368
- success: false,
7369
- error: "At least one direction (left, right, up, down) must be specified"
7385
+ id,
7386
+ status: "failed",
7387
+ error: "xAI API key not configured"
7370
7388
  };
7371
7389
  }
7372
7390
  try {
7373
- const formData = new FormData();
7374
- const imageBlob = Buffer.isBuffer(imageData) ? new Blob([new Uint8Array(imageData)]) : imageData;
7375
- formData.append("image", imageBlob, "image.png");
7376
- formData.append("output_format", options.outputFormat || "png");
7377
- if (left > 0) {
7378
- formData.append("left", String(Math.min(2e3, Math.max(0, left))));
7379
- }
7380
- if (right > 0) {
7381
- formData.append("right", String(Math.min(2e3, Math.max(0, right))));
7382
- }
7383
- if (up > 0) {
7384
- formData.append("up", String(Math.min(2e3, Math.max(0, up))));
7385
- }
7386
- if (down > 0) {
7387
- formData.append("down", String(Math.min(2e3, Math.max(0, down))));
7388
- }
7389
- if (options.prompt) {
7390
- formData.append("prompt", options.prompt);
7391
- }
7392
- if (options.creativity !== void 0) {
7393
- formData.append("creativity", String(Math.min(1, Math.max(0, options.creativity))));
7394
- }
7395
- const response = await fetch(`${this.baseUrl}/v2beta/stable-image/edit/outpaint`, {
7396
- method: "POST",
7391
+ const response = await fetch(`${this.baseUrl}/videos/${id}`, {
7397
7392
  headers: {
7398
- Authorization: `Bearer ${this.apiKey}`,
7399
- Accept: "image/*"
7400
- },
7401
- body: formData
7393
+ Authorization: `Bearer ${this.apiKey}`
7394
+ }
7402
7395
  });
7403
7396
  if (!response.ok) {
7404
7397
  const errorText = await response.text();
7405
- let errorMessage;
7406
- try {
7407
- const errorData = JSON.parse(errorText);
7408
- errorMessage = errorData.message || errorData.error || errorText;
7409
- } catch {
7410
- errorMessage = errorText;
7411
- }
7412
7398
  return {
7413
- success: false,
7414
- error: `API error (${response.status}): ${errorMessage}`
7399
+ id,
7400
+ status: "failed",
7401
+ error: `Failed to get status: ${errorText}`
7415
7402
  };
7416
7403
  }
7417
- const arrayBuffer = await response.arrayBuffer();
7418
- const base64 = Buffer.from(arrayBuffer).toString("base64");
7419
- const seed = response.headers.get("seed");
7404
+ const data = await response.json();
7405
+ const statusMap = {
7406
+ pending: "pending",
7407
+ done: "completed",
7408
+ expired: "failed"
7409
+ };
7420
7410
  return {
7421
- success: true,
7422
- images: [{
7423
- base64,
7424
- seed: seed ? parseInt(seed) : void 0
7425
- }]
7411
+ id,
7412
+ status: statusMap[data.status] || "pending",
7413
+ videoUrl: data.video?.url,
7414
+ error: data.status === "expired" ? "Generation expired" : void 0
7426
7415
  };
7427
7416
  } catch (error) {
7428
7417
  return {
7429
- success: false,
7418
+ id,
7419
+ status: "failed",
7430
7420
  error: error instanceof Error ? error.message : "Unknown error"
7431
7421
  };
7432
7422
  }
7433
7423
  }
7424
+ /**
7425
+ * Wait for generation to complete
7426
+ */
7427
+ async waitForCompletion(id, onProgress, maxWaitMs = 3e5) {
7428
+ const startTime = Date.now();
7429
+ while (Date.now() - startTime < maxWaitMs) {
7430
+ const result = await this.getGenerationStatus(id);
7431
+ if (onProgress) {
7432
+ onProgress(result);
7433
+ }
7434
+ if (result.status === "completed" || result.status === "failed") {
7435
+ return result;
7436
+ }
7437
+ await this.sleep(this.pollingInterval);
7438
+ }
7439
+ return {
7440
+ id,
7441
+ status: "failed",
7442
+ error: "Generation timed out"
7443
+ };
7444
+ }
7445
+ /**
7446
+ * Cancel generation (if supported)
7447
+ */
7448
+ async cancelGeneration(id) {
7449
+ if (!this.apiKey)
7450
+ return false;
7451
+ try {
7452
+ const response = await fetch(`${this.baseUrl}/videos/${id}`, {
7453
+ method: "DELETE",
7454
+ headers: {
7455
+ Authorization: `Bearer ${this.apiKey}`
7456
+ }
7457
+ });
7458
+ return response.ok;
7459
+ } catch {
7460
+ return false;
7461
+ }
7462
+ }
7463
+ sleep(ms) {
7464
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
7465
+ }
7434
7466
  };
7435
- var stabilityProvider = new StabilityProvider();
7467
+ var grokProvider = new GrokProvider();
7436
7468
 
7437
7469
  // ../ai-providers/dist/replicate/ReplicateProvider.js
7438
7470
  var ReplicateProvider = class {
@@ -7688,7 +7720,7 @@ var ReplicateProvider = class {
7688
7720
  * Sleep helper
7689
7721
  */
7690
7722
  sleep(ms) {
7691
- return new Promise((resolve13) => setTimeout(resolve13, ms));
7723
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
7692
7724
  }
7693
7725
  /**
7694
7726
  * Generate music from text prompt using MusicGen
@@ -7829,8 +7861,7 @@ var ReplicateProvider = class {
7829
7861
  Authorization: `Bearer ${this.apiToken}`
7830
7862
  },
7831
7863
  body: JSON.stringify({
7832
- version: "aa9adce25e29d8a8a6c6cddbd25627d7e11b9c1a4e3f8f7f7b7f5e88d5b8f4c1",
7833
- // resemble-enhance
7864
+ model: "lucataco/resemble-enhance",
7834
7865
  input
7835
7866
  })
7836
7867
  });
@@ -7993,8 +8024,7 @@ var ReplicateProvider = class {
7993
8024
  let modelVersion;
7994
8025
  if (point) {
7995
8026
  input.query_points = [[point[0], point[1], 0]];
7996
- modelVersion = "facebookresearch/co-tracker:a12e tried";
7997
- modelVersion = "0a0dcaf0c51e8d6a8b1a3e4c5f6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5";
8027
+ modelVersion = "facebookresearch/co-tracker";
7998
8028
  } else if (box) {
7999
8029
  input.box = [box[0], box[1], box[0] + box[2], box[1] + box[3]];
8000
8030
  modelVersion = "meta/sam-2:fe97b453a6455861e3bac769b441ca1f1086110da7466dbb65cf1eecfd60dc83";
@@ -8015,7 +8045,7 @@ var ReplicateProvider = class {
8015
8045
  Authorization: `Bearer ${this.apiToken}`
8016
8046
  },
8017
8047
  body: JSON.stringify({
8018
- version: modelVersion.split(":")[1] || modelVersion,
8048
+ ...modelVersion.includes(":") ? { version: modelVersion.split(":")[1] } : { model: modelVersion },
8019
8049
  input
8020
8050
  })
8021
8051
  });
@@ -8093,7 +8123,6 @@ var PROVIDER_ENV_VARS = {
8093
8123
  runway: "RUNWAY_API_SECRET",
8094
8124
  kling: "KLING_API_KEY",
8095
8125
  imgbb: "IMGBB_API_KEY",
8096
- stability: "STABILITY_API_KEY",
8097
8126
  replicate: "REPLICATE_API_TOKEN",
8098
8127
  xai: "XAI_API_KEY"
8099
8128
  };
@@ -8148,7 +8177,14 @@ async function getApiKeyFromConfig(providerKey) {
8148
8177
  }
8149
8178
 
8150
8179
  // ../cli/src/utils/api-key.ts
8151
- function findProjectRoot() {
8180
+ function loadEnv() {
8181
+ config({ path: resolve5(process.cwd(), ".env"), debug: false });
8182
+ const monorepoRoot = findMonorepoRoot();
8183
+ if (monorepoRoot && monorepoRoot !== process.cwd()) {
8184
+ config({ path: resolve5(monorepoRoot, ".env"), debug: false });
8185
+ }
8186
+ }
8187
+ function findMonorepoRoot() {
8152
8188
  let dir = process.cwd();
8153
8189
  while (dir !== "/") {
8154
8190
  try {
@@ -8158,18 +8194,14 @@ function findProjectRoot() {
8158
8194
  dir = resolve5(dir, "..");
8159
8195
  }
8160
8196
  }
8161
- return process.cwd();
8162
- }
8163
- function loadEnv() {
8164
- const projectRoot = findProjectRoot();
8165
- config({ path: resolve5(projectRoot, ".env") });
8197
+ return null;
8166
8198
  }
8167
8199
  async function prompt(question, hidden = false) {
8168
8200
  const rl = createInterface({
8169
8201
  input: process.stdin,
8170
8202
  output: process.stdout
8171
8203
  });
8172
- return new Promise((resolve13) => {
8204
+ return new Promise((resolve12) => {
8173
8205
  if (hidden && process.stdin.isTTY) {
8174
8206
  process.stdout.write(question);
8175
8207
  let input = "";
@@ -8183,7 +8215,7 @@ async function prompt(question, hidden = false) {
8183
8215
  process.stdin.removeListener("data", onData);
8184
8216
  process.stdout.write("\n");
8185
8217
  rl.close();
8186
- resolve13(input);
8218
+ resolve12(input);
8187
8219
  } else if (char === "") {
8188
8220
  process.exit(1);
8189
8221
  } else if (char === "\x7F" || char === "\b") {
@@ -8198,7 +8230,7 @@ async function prompt(question, hidden = false) {
8198
8230
  } else {
8199
8231
  rl.question(question, (answer) => {
8200
8232
  rl.close();
8201
- resolve13(answer);
8233
+ resolve12(answer);
8202
8234
  });
8203
8235
  }
8204
8236
  });
@@ -8215,7 +8247,6 @@ async function getApiKey(envVar, providerName, optionValue) {
8215
8247
  RUNWAY_API_SECRET: "runway",
8216
8248
  KLING_API_KEY: "kling",
8217
8249
  IMGBB_API_KEY: "imgbb",
8218
- STABILITY_API_KEY: "stability",
8219
8250
  REPLICATE_API_TOKEN: "replicate"
8220
8251
  };
8221
8252
  const providerKey = providerKeyMap[envVar];
@@ -8235,7 +8266,7 @@ async function getApiKey(envVar, providerName, optionValue) {
8235
8266
  }
8236
8267
  console.log();
8237
8268
  console.log(chalk2.yellow(`${providerName} API key not found.`));
8238
- console.log(chalk2.dim(`Set ${envVar} in .env, run 'vibe setup', or enter below.`));
8269
+ console.log(chalk2.dim(`Set ${envVar} in .env (current directory), run 'vibe setup', or enter below.`));
8239
8270
  console.log();
8240
8271
  const apiKey = await prompt(chalk2.cyan(`Enter ${providerName} API key: `), true);
8241
8272
  if (!apiKey || apiKey.trim() === "") {
@@ -8249,8 +8280,7 @@ async function getApiKey(envVar, providerName, optionValue) {
8249
8280
  return apiKey.trim();
8250
8281
  }
8251
8282
  async function saveApiKeyToEnv(envVar, apiKey) {
8252
- const projectRoot = findProjectRoot();
8253
- const envPath = resolve5(projectRoot, ".env");
8283
+ const envPath = resolve5(process.cwd(), ".env");
8254
8284
  let content = "";
8255
8285
  try {
8256
8286
  await access3(envPath);
@@ -8409,7 +8439,7 @@ async function detectSilencePeriodsWithGemini(videoPath, minDuration, options) {
8409
8439
  const totalDuration = await getVideoDuration(videoPath);
8410
8440
  const geminiApiKey = options.apiKey || await getApiKey("GOOGLE_API_KEY", "Google");
8411
8441
  if (!geminiApiKey) {
8412
- throw new Error("Google API key required for Gemini Video Understanding. Set GOOGLE_API_KEY or use --api-key");
8442
+ throw new Error("Google API key required for Gemini Video Understanding. Run 'vibe setup' or set GOOGLE_API_KEY in .env");
8413
8443
  }
8414
8444
  const gemini = new GeminiProvider();
8415
8445
  await gemini.initialize({ apiKey: geminiApiKey });
@@ -8666,7 +8696,7 @@ async function executeJumpCut(options) {
8666
8696
  }
8667
8697
  const openaiKey = apiKey || process.env.OPENAI_API_KEY;
8668
8698
  if (!openaiKey) {
8669
- return { success: false, error: "OpenAI API key required for Whisper transcription." };
8699
+ return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
8670
8700
  }
8671
8701
  try {
8672
8702
  const tmpDir = `/tmp/vibe_jumpcut_${Date.now()}`;
@@ -8811,7 +8841,7 @@ async function executeCaption(options) {
8811
8841
  }
8812
8842
  const openaiKey = apiKey || process.env.OPENAI_API_KEY;
8813
8843
  if (!openaiKey) {
8814
- return { success: false, error: "OpenAI API key required for Whisper transcription." };
8844
+ return { success: false, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
8815
8845
  }
8816
8846
  try {
8817
8847
  const tmpDir = `/tmp/vibe_caption_${Date.now()}`;
@@ -9080,7 +9110,7 @@ ${textsToTranslate}`;
9080
9110
  if (provider === "openai") {
9081
9111
  const openaiKey = apiKey || process.env.OPENAI_API_KEY;
9082
9112
  if (!openaiKey) {
9083
- return { success: false, error: "OpenAI API key required for translation." };
9113
+ return { success: false, error: "OpenAI API key required for translation. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
9084
9114
  }
9085
9115
  const response = await fetch("https://api.openai.com/v1/chat/completions", {
9086
9116
  method: "POST",
@@ -9089,7 +9119,7 @@ ${textsToTranslate}`;
9089
9119
  Authorization: `Bearer ${openaiKey}`
9090
9120
  },
9091
9121
  body: JSON.stringify({
9092
- model: "gpt-4o-mini",
9122
+ model: "gpt-5-mini",
9093
9123
  messages: [{ role: "user", content: translatePrompt }],
9094
9124
  temperature: 0.3
9095
9125
  })
@@ -9102,7 +9132,7 @@ ${textsToTranslate}`;
9102
9132
  } else {
9103
9133
  const claudeKey = apiKey || process.env.ANTHROPIC_API_KEY;
9104
9134
  if (!claudeKey) {
9105
- return { success: false, error: "Anthropic API key required for translation." };
9135
+ return { success: false, error: "Anthropic API key required for translation. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
9106
9136
  }
9107
9137
  const response = await fetch("https://api.anthropic.com/v1/messages", {
9108
9138
  method: "POST",
@@ -9599,7 +9629,7 @@ async function executeGeminiVideo(options) {
9599
9629
  try {
9600
9630
  const apiKey = await getApiKey("GOOGLE_API_KEY", "Google");
9601
9631
  if (!apiKey) {
9602
- return { success: false, error: "Google API key required" };
9632
+ return { success: false, error: "Google API key required. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
9603
9633
  }
9604
9634
  const isYouTube = options.source.includes("youtube.com") || options.source.includes("youtu.be");
9605
9635
  const modelMap = {
@@ -9649,7 +9679,7 @@ async function executeAnalyze(options) {
9649
9679
  try {
9650
9680
  const apiKey = await getApiKey("GOOGLE_API_KEY", "Google");
9651
9681
  if (!apiKey) {
9652
- return { success: false, error: "Google API key required" };
9682
+ return { success: false, error: "Google API key required. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
9653
9683
  }
9654
9684
  const source = options.source;
9655
9685
  const isYouTube = source.includes("youtube.com") || source.includes("youtu.be");
@@ -9789,7 +9819,7 @@ async function executeReview(options) {
9789
9819
  }
9790
9820
  const apiKey = process.env.GOOGLE_API_KEY || await getApiKey("GOOGLE_API_KEY", "Google");
9791
9821
  if (!apiKey) {
9792
- return { success: false, error: "Google API key required for Gemini video review" };
9822
+ return { success: false, error: "Google API key required for Gemini video review. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
9793
9823
  }
9794
9824
  let storyboardContext = "";
9795
9825
  if (storyboardPath) {
@@ -9896,7 +9926,7 @@ Score each category 1-10. For fixable issues, provide an FFmpeg filter in autoFi
9896
9926
  }
9897
9927
 
9898
9928
  // ../cli/src/commands/ai-image.ts
9899
- import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
9929
+ import { readFile as readFile8, writeFile as writeFile6, mkdir as mkdir4 } from "node:fs/promises";
9900
9930
  import { existsSync as existsSync5 } from "node:fs";
9901
9931
  import chalk5 from "chalk";
9902
9932
  import ora4 from "ora";
@@ -9917,7 +9947,7 @@ async function executeThumbnailBestFrame(options) {
9917
9947
  }
9918
9948
  const googleKey = apiKey || process.env.GOOGLE_API_KEY;
9919
9949
  if (!googleKey) {
9920
- return { success: false, error: "Google API key required for Gemini video analysis." };
9950
+ return { success: false, error: "Google API key required for Gemini video analysis. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
9921
9951
  }
9922
9952
  try {
9923
9953
  const gemini = new GeminiProvider();
@@ -9926,7 +9956,9 @@ async function executeThumbnailBestFrame(options) {
9926
9956
  const analysisPrompt = prompt2 || 'Analyze this video and find the single best frame for a thumbnail. Look for frames that are visually striking, well-composed, emotionally engaging, and representative of the video content. Avoid blurry frames, transitions, or dark scenes. Return ONLY a JSON object: {"timestamp": <seconds as number>, "reason": "<brief explanation>"}';
9927
9957
  const modelMap = {
9928
9958
  flash: "gemini-3-flash-preview",
9959
+ latest: "gemini-2.5-flash",
9929
9960
  "flash-2.5": "gemini-2.5-flash",
9961
+ // backward compat
9930
9962
  pro: "gemini-2.5-pro"
9931
9963
  };
9932
9964
  const modelId = modelMap[model] || "gemini-3-flash-preview";
@@ -9968,7 +10000,7 @@ async function executeThumbnailBestFrame(options) {
9968
10000
  // src/tools/ai-analysis.ts
9969
10001
  var aiAnalysisTools = [
9970
10002
  {
9971
- name: "ai_analyze",
10003
+ name: "analyze_media",
9972
10004
  description: "Analyze media (image, video, or YouTube URL) using Gemini AI. Requires GOOGLE_API_KEY.",
9973
10005
  inputSchema: {
9974
10006
  type: "object",
@@ -9989,7 +10021,7 @@ var aiAnalysisTools = [
9989
10021
  }
9990
10022
  },
9991
10023
  {
9992
- name: "ai_gemini_video",
10024
+ name: "analyze_video",
9993
10025
  description: "Analyze video content using Gemini AI with temporal understanding. Requires GOOGLE_API_KEY.",
9994
10026
  inputSchema: {
9995
10027
  type: "object",
@@ -10010,7 +10042,7 @@ var aiAnalysisTools = [
10010
10042
  }
10011
10043
  },
10012
10044
  {
10013
- name: "ai_review",
10045
+ name: "analyze_review",
10014
10046
  description: "AI video review: analyzes quality, suggests fixes, and optionally auto-applies them. Requires GOOGLE_API_KEY.",
10015
10047
  inputSchema: {
10016
10048
  type: "object",
@@ -10030,7 +10062,7 @@ var aiAnalysisTools = [
10030
10062
  }
10031
10063
  },
10032
10064
  {
10033
- name: "ai_thumbnail",
10065
+ name: "generate_thumbnail",
10034
10066
  description: "Extract the best thumbnail frame from a video using Gemini AI analysis. Requires GOOGLE_API_KEY.",
10035
10067
  inputSchema: {
10036
10068
  type: "object",
@@ -10046,7 +10078,7 @@ var aiAnalysisTools = [
10046
10078
  ];
10047
10079
  async function handleAiAnalysisToolCall(name, args) {
10048
10080
  switch (name) {
10049
- case "ai_analyze": {
10081
+ case "analyze_media": {
10050
10082
  const result = await executeAnalyze({
10051
10083
  source: args.source,
10052
10084
  prompt: args.prompt,
@@ -10064,7 +10096,7 @@ async function handleAiAnalysisToolCall(name, args) {
10064
10096
  totalTokens: result.totalTokens
10065
10097
  });
10066
10098
  }
10067
- case "ai_gemini_video": {
10099
+ case "analyze_video": {
10068
10100
  const result = await executeGeminiVideo({
10069
10101
  source: args.source,
10070
10102
  prompt: args.prompt,
@@ -10081,7 +10113,7 @@ async function handleAiAnalysisToolCall(name, args) {
10081
10113
  totalTokens: result.totalTokens
10082
10114
  });
10083
10115
  }
10084
- case "ai_review": {
10116
+ case "analyze_review": {
10085
10117
  const result = await executeReview({
10086
10118
  videoPath: args.videoPath,
10087
10119
  storyboardPath: args.storyboardPath,
@@ -10098,7 +10130,7 @@ async function handleAiAnalysisToolCall(name, args) {
10098
10130
  outputPath: result.outputPath
10099
10131
  });
10100
10132
  }
10101
- case "ai_thumbnail": {
10133
+ case "generate_thumbnail": {
10102
10134
  const result = await executeThumbnailBestFrame({
10103
10135
  videoPath: args.videoPath,
10104
10136
  outputPath: args.outputPath,
@@ -10118,15 +10150,30 @@ async function handleAiAnalysisToolCall(name, args) {
10118
10150
  }
10119
10151
 
10120
10152
  // ../cli/src/commands/ai-script-pipeline.ts
10121
- import { readFile as readFile9, writeFile as writeFile7, mkdir as mkdir4, unlink, rename as rename2 } from "node:fs/promises";
10153
+ import { readFile as readFile9, writeFile as writeFile7, mkdir as mkdir5, unlink, rename as rename2 } from "node:fs/promises";
10122
10154
  import { resolve as resolve9, basename as basename3, extname as extname3 } from "node:path";
10123
10155
  import { existsSync as existsSync6 } from "node:fs";
10124
10156
  import chalk6 from "chalk";
10125
10157
  init_exec_safe();
10158
+
10159
+ // ../cli/src/commands/ai-helpers.ts
10160
+ async function downloadVideo(url, apiKey) {
10161
+ const headers = {};
10162
+ if (url.includes("generativelanguage.googleapis.com") && apiKey) {
10163
+ headers["x-goog-api-key"] = apiKey;
10164
+ }
10165
+ const response = await fetch(url, { headers, redirect: "follow" });
10166
+ if (!response.ok) {
10167
+ throw new Error(`Download failed (${response.status}): ${response.statusText}`);
10168
+ }
10169
+ return Buffer.from(await response.arrayBuffer());
10170
+ }
10171
+
10172
+ // ../cli/src/commands/ai-script-pipeline.ts
10126
10173
  var DEFAULT_VIDEO_RETRIES = 2;
10127
10174
  var RETRY_DELAY_MS = 5e3;
10128
10175
  function sleep(ms) {
10129
- return new Promise((resolve13) => setTimeout(resolve13, ms));
10176
+ return new Promise((resolve12) => setTimeout(resolve12, ms));
10130
10177
  }
10131
10178
  async function generateVideoWithRetryKling(kling, segment, options, maxRetries, onProgress) {
10132
10179
  const prompt2 = segment.visualStyle ? `${segment.visuals}. Style: ${segment.visualStyle}` : segment.visuals;
@@ -10193,6 +10240,37 @@ async function generateVideoWithRetryRunway(runway, segment, referenceImage, opt
10193
10240
  }
10194
10241
  return null;
10195
10242
  }
10243
+ async function generateVideoWithRetryVeo(gemini, segment, options, maxRetries, onProgress) {
10244
+ const prompt2 = segment.visualStyle ? `${segment.visuals}. Style: ${segment.visualStyle}` : segment.visuals;
10245
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
10246
+ try {
10247
+ const result = await gemini.generateVideo(prompt2, {
10248
+ prompt: prompt2,
10249
+ referenceImage: options.referenceImage,
10250
+ duration: options.duration,
10251
+ aspectRatio: options.aspectRatio,
10252
+ model: "veo-3.1-fast-generate-preview"
10253
+ });
10254
+ if (result.status !== "failed" && result.id) {
10255
+ return { operationName: result.id };
10256
+ }
10257
+ if (attempt < maxRetries) {
10258
+ onProgress?.(`\u26A0 Retry ${attempt + 1}/${maxRetries}...`);
10259
+ await sleep(RETRY_DELAY_MS);
10260
+ }
10261
+ } catch (err) {
10262
+ const errMsg = err instanceof Error ? err.message : String(err);
10263
+ if (attempt < maxRetries) {
10264
+ onProgress?.(`\u26A0 Error: ${errMsg.slice(0, 50)}... retry ${attempt + 1}/${maxRetries}`);
10265
+ await sleep(RETRY_DELAY_MS);
10266
+ } else {
10267
+ console.error(chalk6.dim(`
10268
+ [Veo error: ${errMsg}]`));
10269
+ }
10270
+ }
10271
+ }
10272
+ return null;
10273
+ }
10196
10274
  async function executeScriptToVideo(options) {
10197
10275
  const outputDir = options.outputDir || "script-video-output";
10198
10276
  try {
@@ -10201,17 +10279,17 @@ async function executeScriptToVideo(options) {
10201
10279
  if (storyboardProvider === "openai") {
10202
10280
  storyboardApiKey = await getApiKey("OPENAI_API_KEY", "OpenAI") ?? void 0;
10203
10281
  if (!storyboardApiKey) {
10204
- return { success: false, outputDir, scenes: 0, error: "OpenAI API key required for storyboard generation (--storyboard-provider openai)" };
10282
+ return { success: false, outputDir, scenes: 0, error: "OpenAI API key required for storyboard generation (--storyboard-provider openai). Run 'vibe setup' or set OPENAI_API_KEY in .env" };
10205
10283
  }
10206
10284
  } else if (storyboardProvider === "gemini") {
10207
10285
  storyboardApiKey = await getApiKey("GOOGLE_API_KEY", "Google") ?? void 0;
10208
10286
  if (!storyboardApiKey) {
10209
- return { success: false, outputDir, scenes: 0, error: "Google API key required for storyboard generation (--storyboard-provider gemini)" };
10287
+ return { success: false, outputDir, scenes: 0, error: "Google API key required for storyboard generation (--storyboard-provider gemini). Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
10210
10288
  }
10211
10289
  } else {
10212
10290
  storyboardApiKey = await getApiKey("ANTHROPIC_API_KEY", "Anthropic") ?? void 0;
10213
10291
  if (!storyboardApiKey) {
10214
- return { success: false, outputDir, scenes: 0, error: "Anthropic API key required for storyboard generation" };
10292
+ return { success: false, outputDir, scenes: 0, error: "Anthropic API key required for storyboard generation. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
10215
10293
  }
10216
10294
  }
10217
10295
  let imageApiKey;
@@ -10219,24 +10297,24 @@ async function executeScriptToVideo(options) {
10219
10297
  if (imageProvider === "openai" || imageProvider === "dalle") {
10220
10298
  imageApiKey = await getApiKey("OPENAI_API_KEY", "OpenAI") ?? void 0;
10221
10299
  if (!imageApiKey) {
10222
- return { success: false, outputDir, scenes: 0, error: "OpenAI API key required for image generation" };
10223
- }
10224
- } else if (imageProvider === "stability") {
10225
- imageApiKey = await getApiKey("STABILITY_API_KEY", "Stability AI") ?? void 0;
10226
- if (!imageApiKey) {
10227
- return { success: false, outputDir, scenes: 0, error: "Stability API key required for image generation" };
10300
+ return { success: false, outputDir, scenes: 0, error: "OpenAI API key required for image generation. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
10228
10301
  }
10229
10302
  } else if (imageProvider === "gemini") {
10230
10303
  imageApiKey = await getApiKey("GOOGLE_API_KEY", "Google") ?? void 0;
10231
10304
  if (!imageApiKey) {
10232
- return { success: false, outputDir, scenes: 0, error: "Google API key required for Gemini image generation" };
10305
+ return { success: false, outputDir, scenes: 0, error: "Google API key required for Gemini image generation. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
10306
+ }
10307
+ } else if (imageProvider === "grok") {
10308
+ imageApiKey = await getApiKey("XAI_API_KEY", "xAI") ?? void 0;
10309
+ if (!imageApiKey) {
10310
+ return { success: false, outputDir, scenes: 0, error: "xAI API key required for Grok image generation. Run 'vibe setup' or set XAI_API_KEY in .env" };
10233
10311
  }
10234
10312
  }
10235
10313
  let elevenlabsApiKey;
10236
10314
  if (!options.noVoiceover) {
10237
10315
  elevenlabsApiKey = await getApiKey("ELEVENLABS_API_KEY", "ElevenLabs") ?? void 0;
10238
10316
  if (!elevenlabsApiKey) {
10239
- return { success: false, outputDir, scenes: 0, error: "ElevenLabs API key required for voiceover (or use noVoiceover option)" };
10317
+ return { success: false, outputDir, scenes: 0, error: "ElevenLabs API key required for voiceover (or use noVoiceover option). Run 'vibe setup' or set ELEVENLABS_API_KEY in .env" };
10240
10318
  }
10241
10319
  }
10242
10320
  let videoApiKey;
@@ -10244,18 +10322,23 @@ async function executeScriptToVideo(options) {
10244
10322
  if (options.generator === "kling") {
10245
10323
  videoApiKey = await getApiKey("KLING_API_KEY", "Kling") ?? void 0;
10246
10324
  if (!videoApiKey) {
10247
- return { success: false, outputDir, scenes: 0, error: "Kling API key required (or use imagesOnly option)" };
10325
+ return { success: false, outputDir, scenes: 0, error: "Kling API key required (or use imagesOnly option). Run 'vibe setup' or set KLING_API_KEY in .env" };
10326
+ }
10327
+ } else if (options.generator === "veo") {
10328
+ videoApiKey = await getApiKey("GOOGLE_API_KEY", "Google") ?? void 0;
10329
+ if (!videoApiKey) {
10330
+ return { success: false, outputDir, scenes: 0, error: "Google API key required for Veo video generation (or use imagesOnly option). Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
10248
10331
  }
10249
10332
  } else {
10250
10333
  videoApiKey = await getApiKey("RUNWAY_API_SECRET", "Runway") ?? void 0;
10251
10334
  if (!videoApiKey) {
10252
- return { success: false, outputDir, scenes: 0, error: "Runway API key required (or use imagesOnly option)" };
10335
+ return { success: false, outputDir, scenes: 0, error: "Runway API key required (or use imagesOnly option). Run 'vibe setup' or set RUNWAY_API_SECRET in .env" };
10253
10336
  }
10254
10337
  }
10255
10338
  }
10256
10339
  const absOutputDir = resolve9(process.cwd(), outputDir);
10257
10340
  if (!existsSync6(absOutputDir)) {
10258
- await mkdir4(absOutputDir, { recursive: true });
10341
+ await mkdir5(absOutputDir, { recursive: true });
10259
10342
  }
10260
10343
  let segments;
10261
10344
  const creativityOpts = { creativity: options.creativity };
@@ -10344,23 +10427,18 @@ async function executeScriptToVideo(options) {
10344
10427
  "9:16": "1024x1536",
10345
10428
  "1:1": "1024x1024"
10346
10429
  };
10347
- const stabilityAspectRatios = {
10348
- "16:9": "16:9",
10349
- "9:16": "9:16",
10350
- "1:1": "1:1"
10351
- };
10352
10430
  let openaiImageInstance;
10353
- let stabilityInstance;
10354
10431
  let geminiInstance;
10432
+ let grokInstance;
10355
10433
  if (imageProvider === "openai" || imageProvider === "dalle") {
10356
10434
  openaiImageInstance = new OpenAIImageProvider();
10357
10435
  await openaiImageInstance.initialize({ apiKey: imageApiKey });
10358
- } else if (imageProvider === "stability") {
10359
- stabilityInstance = new StabilityProvider();
10360
- await stabilityInstance.initialize({ apiKey: imageApiKey });
10361
10436
  } else if (imageProvider === "gemini") {
10362
10437
  geminiInstance = new GeminiProvider();
10363
10438
  await geminiInstance.initialize({ apiKey: imageApiKey });
10439
+ } else if (imageProvider === "grok") {
10440
+ grokInstance = new GrokProvider();
10441
+ await grokInstance.initialize({ apiKey: imageApiKey });
10364
10442
  }
10365
10443
  const imagePaths = [];
10366
10444
  for (let i = 0; i < segments.length; i++) {
@@ -10382,12 +10460,18 @@ async function executeScriptToVideo(options) {
10382
10460
  imageUrl = img.url;
10383
10461
  }
10384
10462
  }
10385
- } else if (imageProvider === "stability" && stabilityInstance) {
10386
- const imageResult = await stabilityInstance.generateImage(imagePrompt, {
10387
- aspectRatio: stabilityAspectRatios[options.aspectRatio || "16:9"] || "16:9",
10388
- model: "sd3.5-large"
10463
+ } else if (imageProvider === "gemini" && geminiInstance) {
10464
+ const imageResult = await geminiInstance.generateImage(imagePrompt, {
10465
+ aspectRatio: options.aspectRatio || "16:9"
10466
+ });
10467
+ if (imageResult.success && imageResult.images?.[0]?.base64) {
10468
+ imageBuffer = Buffer.from(imageResult.images[0].base64, "base64");
10469
+ }
10470
+ } else if (imageProvider === "grok" && grokInstance) {
10471
+ const imageResult = await grokInstance.generateImage(imagePrompt, {
10472
+ aspectRatio: options.aspectRatio || "16:9"
10389
10473
  });
10390
- if (imageResult.success && imageResult.images?.[0]) {
10474
+ if (imageResult.success && imageResult.images && imageResult.images.length > 0) {
10391
10475
  const img = imageResult.images[0];
10392
10476
  if (img.base64) {
10393
10477
  imageBuffer = Buffer.from(img.base64, "base64");
@@ -10395,13 +10479,6 @@ async function executeScriptToVideo(options) {
10395
10479
  imageUrl = img.url;
10396
10480
  }
10397
10481
  }
10398
- } else if (imageProvider === "gemini" && geminiInstance) {
10399
- const imageResult = await geminiInstance.generateImage(imagePrompt, {
10400
- aspectRatio: options.aspectRatio || "16:9"
10401
- });
10402
- if (imageResult.success && imageResult.images?.[0]?.base64) {
10403
- imageBuffer = Buffer.from(imageResult.images[0].base64, "base64");
10404
- }
10405
10482
  }
10406
10483
  const imagePath = resolve9(absOutputDir, `scene-${i + 1}.png`);
10407
10484
  if (imageBuffer) {
@@ -10451,8 +10528,53 @@ async function executeScriptToVideo(options) {
10451
10528
  const waitResult = await kling.waitForCompletion(taskResult.taskId, taskResult.type, void 0, 6e5);
10452
10529
  if (waitResult.status === "completed" && waitResult.videoUrl) {
10453
10530
  const videoPath = resolve9(absOutputDir, `scene-${i + 1}.mp4`);
10454
- const response = await fetch(waitResult.videoUrl);
10455
- const buffer = Buffer.from(await response.arrayBuffer());
10531
+ const buffer = await downloadVideo(waitResult.videoUrl, videoApiKey);
10532
+ await writeFile7(videoPath, buffer);
10533
+ const targetDuration = segment.duration;
10534
+ const actualVideoDuration = await getVideoDuration(videoPath);
10535
+ if (actualVideoDuration < targetDuration - 0.1) {
10536
+ const extendedPath = resolve9(absOutputDir, `scene-${i + 1}-extended.mp4`);
10537
+ await extendVideoNaturally(videoPath, targetDuration, extendedPath);
10538
+ await unlink(videoPath);
10539
+ await rename2(extendedPath, videoPath);
10540
+ }
10541
+ videoPaths.push(videoPath);
10542
+ result.videos.push(videoPath);
10543
+ } else {
10544
+ videoPaths.push("");
10545
+ result.failedScenes.push(i + 1);
10546
+ }
10547
+ } catch {
10548
+ videoPaths.push("");
10549
+ result.failedScenes.push(i + 1);
10550
+ }
10551
+ } else {
10552
+ videoPaths.push("");
10553
+ result.failedScenes.push(i + 1);
10554
+ }
10555
+ }
10556
+ } else if (options.generator === "veo") {
10557
+ const veo = new GeminiProvider();
10558
+ await veo.initialize({ apiKey: videoApiKey });
10559
+ for (let i = 0; i < segments.length; i++) {
10560
+ if (!imagePaths[i]) {
10561
+ videoPaths.push("");
10562
+ continue;
10563
+ }
10564
+ const segment = segments[i];
10565
+ const veoDuration = segment.duration > 6 ? 8 : segment.duration > 4 ? 6 : 4;
10566
+ const taskResult = await generateVideoWithRetryVeo(
10567
+ veo,
10568
+ segment,
10569
+ { duration: veoDuration, aspectRatio: options.aspectRatio || "16:9" },
10570
+ maxRetries
10571
+ );
10572
+ if (taskResult) {
10573
+ try {
10574
+ const waitResult = await veo.waitForVideoCompletion(taskResult.operationName, void 0, 3e5);
10575
+ if (waitResult.status === "completed" && waitResult.videoUrl) {
10576
+ const videoPath = resolve9(absOutputDir, `scene-${i + 1}.mp4`);
10577
+ const buffer = await downloadVideo(waitResult.videoUrl, videoApiKey);
10456
10578
  await writeFile7(videoPath, buffer);
10457
10579
  const targetDuration = segment.duration;
10458
10580
  const actualVideoDuration = await getVideoDuration(videoPath);
@@ -10504,8 +10626,7 @@ async function executeScriptToVideo(options) {
10504
10626
  const waitResult = await runway.waitForCompletion(taskResult.taskId, void 0, 3e5);
10505
10627
  if (waitResult.status === "completed" && waitResult.videoUrl) {
10506
10628
  const videoPath = resolve9(absOutputDir, `scene-${i + 1}.mp4`);
10507
- const response = await fetch(waitResult.videoUrl);
10508
- const buffer = Buffer.from(await response.arrayBuffer());
10629
+ const buffer = await downloadVideo(waitResult.videoUrl, videoApiKey);
10509
10630
  await writeFile7(videoPath, buffer);
10510
10631
  const targetDuration = segment.duration;
10511
10632
  const actualVideoDuration = await getVideoDuration(videoPath);
@@ -10659,7 +10780,7 @@ async function executeScriptToVideo(options) {
10659
10780
  }
10660
10781
 
10661
10782
  // ../cli/src/commands/ai-highlights.ts
10662
- import { readFile as readFile10, writeFile as writeFile8, mkdir as mkdir5 } from "node:fs/promises";
10783
+ import { readFile as readFile10, writeFile as writeFile8, mkdir as mkdir6 } from "node:fs/promises";
10663
10784
  import { resolve as resolve10, dirname as dirname2, basename as basename4, extname as extname4 } from "node:path";
10664
10785
  import { existsSync as existsSync7 } from "node:fs";
10665
10786
  import chalk7 from "chalk";
@@ -10702,7 +10823,7 @@ async function executeHighlights(options) {
10702
10823
  if (options.useGemini && isVideo) {
10703
10824
  const geminiApiKey = await getApiKey("GOOGLE_API_KEY", "Google");
10704
10825
  if (!geminiApiKey) {
10705
- return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "Google API key required for Gemini Video Understanding" };
10826
+ return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "Google API key required for Gemini Video Understanding. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
10706
10827
  }
10707
10828
  sourceDuration = await ffprobeDuration(absPath);
10708
10829
  const gemini = new GeminiProvider();
@@ -10770,11 +10891,11 @@ Analyze both what is SHOWN (visual cues, actions, expressions) and what is SAID
10770
10891
  } else {
10771
10892
  const openaiApiKey = await getApiKey("OPENAI_API_KEY", "OpenAI");
10772
10893
  if (!openaiApiKey) {
10773
- return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "OpenAI API key required for Whisper transcription" };
10894
+ return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "OpenAI API key required for Whisper transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
10774
10895
  }
10775
10896
  const claudeApiKey = await getApiKey("ANTHROPIC_API_KEY", "Anthropic");
10776
10897
  if (!claudeApiKey) {
10777
- return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "Anthropic API key required for highlight analysis" };
10898
+ return { success: false, highlights: [], totalDuration: 0, totalHighlightDuration: 0, error: "Anthropic API key required for highlight analysis. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
10778
10899
  }
10779
10900
  let audioPath = absPath;
10780
10901
  let tempAudioPath = null;
@@ -10900,7 +11021,7 @@ async function executeAutoShorts(options) {
10900
11021
  if (options.useGemini) {
10901
11022
  const geminiApiKey = await getApiKey("GOOGLE_API_KEY", "Google");
10902
11023
  if (!geminiApiKey) {
10903
- return { success: false, shorts: [], error: "Google API key required for Gemini Video Understanding" };
11024
+ return { success: false, shorts: [], error: "Google API key required for Gemini Video Understanding. Run 'vibe setup' or set GOOGLE_API_KEY in .env" };
10904
11025
  }
10905
11026
  const gemini = new GeminiProvider();
10906
11027
  await gemini.initialize({ apiKey: geminiApiKey });
@@ -10971,11 +11092,11 @@ Analyze both VISUALS (expressions, actions, scene changes) and AUDIO (speech, re
10971
11092
  } else {
10972
11093
  const openaiApiKey = await getApiKey("OPENAI_API_KEY", "OpenAI");
10973
11094
  if (!openaiApiKey) {
10974
- return { success: false, shorts: [], error: "OpenAI API key required for transcription" };
11095
+ return { success: false, shorts: [], error: "OpenAI API key required for transcription. Run 'vibe setup' or set OPENAI_API_KEY in .env" };
10975
11096
  }
10976
11097
  const claudeApiKey = await getApiKey("ANTHROPIC_API_KEY", "Anthropic");
10977
11098
  if (!claudeApiKey) {
10978
- return { success: false, shorts: [], error: "Anthropic API key required for highlight detection" };
11099
+ return { success: false, shorts: [], error: "Anthropic API key required for highlight detection. Run 'vibe setup' or set ANTHROPIC_API_KEY in .env" };
10979
11100
  }
10980
11101
  const tempAudio = absPath.replace(/(\.[^.]+)$/, "-temp-audio.mp3");
10981
11102
  await execSafe("ffmpeg", ["-i", absPath, "-vn", "-acodec", "libmp3lame", "-q:a", "2", tempAudio, "-y"]);
@@ -11020,7 +11141,7 @@ Analyze both VISUALS (expressions, actions, scene changes) and AUDIO (speech, re
11020
11141
  }
11021
11142
  const outputDir = options.outputDir ? resolve10(process.cwd(), options.outputDir) : dirname2(absPath);
11022
11143
  if (options.outputDir && !existsSync7(outputDir)) {
11023
- await mkdir5(outputDir, { recursive: true });
11144
+ await mkdir6(outputDir, { recursive: true });
11024
11145
  }
11025
11146
  const result = {
11026
11147
  success: true,
@@ -11095,136 +11216,10 @@ Analyze both VISUALS (expressions, actions, scene changes) and AUDIO (speech, re
11095
11216
  }
11096
11217
  }
11097
11218
 
11098
- // ../cli/src/commands/ai-narrate.ts
11099
- import { readFile as readFile11, writeFile as writeFile9, mkdir as mkdir6 } from "node:fs/promises";
11100
- import { resolve as resolve11, dirname as dirname3, basename as basename5 } from "node:path";
11101
- import { existsSync as existsSync8 } from "node:fs";
11102
- import chalk8 from "chalk";
11103
- import ora6 from "ora";
11104
- init_exec_safe();
11105
- async function autoNarrate(options) {
11106
- const {
11107
- videoPath,
11108
- duration,
11109
- outputDir,
11110
- voice = "rachel",
11111
- style = "informative",
11112
- language = "en",
11113
- scriptProvider = "claude"
11114
- } = options;
11115
- const geminiApiKey = await getApiKey("GOOGLE_API_KEY", "Google");
11116
- if (!geminiApiKey) {
11117
- return { success: false, error: "GOOGLE_API_KEY required for video analysis" };
11118
- }
11119
- let claudeApiKey = null;
11120
- let openaiScriptApiKey = null;
11121
- if (scriptProvider === "openai") {
11122
- openaiScriptApiKey = await getApiKey("OPENAI_API_KEY", "OpenAI");
11123
- if (!openaiScriptApiKey) {
11124
- return { success: false, error: "OPENAI_API_KEY required for script generation" };
11125
- }
11126
- } else {
11127
- claudeApiKey = await getApiKey("ANTHROPIC_API_KEY", "Anthropic");
11128
- if (!claudeApiKey) {
11129
- return { success: false, error: "ANTHROPIC_API_KEY required for script generation" };
11130
- }
11131
- }
11132
- const elevenlabsApiKey = await getApiKey("ELEVENLABS_API_KEY", "ElevenLabs");
11133
- if (!elevenlabsApiKey) {
11134
- return { success: false, error: "ELEVENLABS_API_KEY required for TTS" };
11135
- }
11136
- try {
11137
- const gemini = new GeminiProvider();
11138
- await gemini.initialize({ apiKey: geminiApiKey });
11139
- const videoBuffer = await readFile11(videoPath);
11140
- const analysisPrompt = `Analyze this video in detail for narration purposes. Describe:
11141
- 1. What is happening visually (actions, movements, subjects)
11142
- 2. The setting and environment
11143
- 3. Any text or graphics visible
11144
- 4. The mood and tone of the content
11145
- 5. Key moments and their approximate timestamps
11146
-
11147
- Provide a detailed description that could be used to write a voiceover narration.
11148
- Focus on what viewers need to know to understand and appreciate the video.`;
11149
- const analysisResult = await gemini.analyzeVideo(videoBuffer, analysisPrompt, {
11150
- fps: 0.5,
11151
- // Lower FPS for cost optimization
11152
- lowResolution: duration > 60
11153
- // Use low res for longer videos
11154
- });
11155
- if (!analysisResult.success || !analysisResult.response) {
11156
- return { success: false, error: `Video analysis failed: ${analysisResult.error}` };
11157
- }
11158
- let scriptResult;
11159
- if (scriptProvider === "openai") {
11160
- const gpt = new OpenAIProvider();
11161
- await gpt.initialize({ apiKey: openaiScriptApiKey });
11162
- scriptResult = await gpt.generateNarrationScript(
11163
- analysisResult.response,
11164
- duration,
11165
- style,
11166
- language
11167
- );
11168
- } else {
11169
- const claude = new ClaudeProvider();
11170
- await claude.initialize({ apiKey: claudeApiKey });
11171
- scriptResult = await claude.generateNarrationScript(
11172
- analysisResult.response,
11173
- duration,
11174
- style,
11175
- language
11176
- );
11177
- if (!scriptResult.success && scriptResult.error?.includes("529")) {
11178
- const fallbackKey = await getApiKey("OPENAI_API_KEY", "OpenAI");
11179
- if (fallbackKey) {
11180
- console.error("\u26A0\uFE0F Claude overloaded, falling back to OpenAI...");
11181
- const gpt = new OpenAIProvider();
11182
- await gpt.initialize({ apiKey: fallbackKey });
11183
- scriptResult = await gpt.generateNarrationScript(
11184
- analysisResult.response,
11185
- duration,
11186
- style,
11187
- language
11188
- );
11189
- }
11190
- }
11191
- }
11192
- if (!scriptResult.success || !scriptResult.script) {
11193
- return { success: false, error: `Script generation failed: ${scriptResult.error}` };
11194
- }
11195
- const elevenlabs = new ElevenLabsProvider();
11196
- await elevenlabs.initialize({ apiKey: elevenlabsApiKey });
11197
- const ttsResult = await elevenlabs.textToSpeech(scriptResult.script, {
11198
- voiceId: voice
11199
- });
11200
- if (!ttsResult.success || !ttsResult.audioBuffer) {
11201
- return { success: false, error: `TTS generation failed: ${ttsResult.error}` };
11202
- }
11203
- if (!existsSync8(outputDir)) {
11204
- await mkdir6(outputDir, { recursive: true });
11205
- }
11206
- const audioPath = resolve11(outputDir, "auto-narration.mp3");
11207
- await writeFile9(audioPath, ttsResult.audioBuffer);
11208
- const scriptPath = resolve11(outputDir, "narration-script.txt");
11209
- await writeFile9(scriptPath, scriptResult.script, "utf-8");
11210
- return {
11211
- success: true,
11212
- audioPath,
11213
- script: scriptResult.script,
11214
- segments: scriptResult.segments
11215
- };
11216
- } catch (error) {
11217
- return {
11218
- success: false,
11219
- error: error instanceof Error ? error.message : "Unknown error in autoNarrate"
11220
- };
11221
- }
11222
- }
11223
-
11224
11219
  // src/tools/ai-pipelines.ts
11225
11220
  var aiPipelineTools = [
11226
11221
  {
11227
- name: "ai_script_to_video",
11222
+ name: "pipeline_script_to_video",
11228
11223
  description: "Full script-to-video pipeline: script -> storyboard -> images -> voiceover -> video. Requires multiple API keys depending on providers chosen.",
11229
11224
  inputSchema: {
11230
11225
  type: "object",
@@ -11240,7 +11235,7 @@ var aiPipelineTools = [
11240
11235
  },
11241
11236
  imageProvider: {
11242
11237
  type: "string",
11243
- enum: ["openai", "dalle", "stability", "gemini"],
11238
+ enum: ["openai", "gemini", "grok"],
11244
11239
  description: "Image generation provider (default: gemini)"
11245
11240
  },
11246
11241
  aspectRatio: {
@@ -11273,7 +11268,7 @@ var aiPipelineTools = [
11273
11268
  }
11274
11269
  },
11275
11270
  {
11276
- name: "ai_highlights",
11271
+ name: "pipeline_highlights",
11277
11272
  description: "Extract highlight clips from a longer video using AI analysis. Requires OPENAI_API_KEY+ANTHROPIC_API_KEY or GOOGLE_API_KEY (with --use-gemini).",
11278
11273
  inputSchema: {
11279
11274
  type: "object",
@@ -11297,8 +11292,8 @@ var aiPipelineTools = [
11297
11292
  }
11298
11293
  },
11299
11294
  {
11300
- name: "ai_auto_shorts",
11301
- description: "Automatically generate short-form content (Reels/TikTok/Shorts) from a longer video. Same API key requirements as ai_highlights.",
11295
+ name: "pipeline_auto_shorts",
11296
+ description: "Automatically generate short-form content (Reels/TikTok/Shorts) from a longer video. Same API key requirements as pipeline_highlights.",
11302
11297
  inputSchema: {
11303
11298
  type: "object",
11304
11299
  properties: {
@@ -11324,31 +11319,11 @@ var aiPipelineTools = [
11324
11319
  },
11325
11320
  required: ["video"]
11326
11321
  }
11327
- },
11328
- {
11329
- name: "ai_narrate",
11330
- description: "Auto-generate narration for a video: analyze content with Gemini, generate script, produce voiceover with ElevenLabs. Requires GOOGLE_API_KEY + ELEVENLABS_API_KEY.",
11331
- inputSchema: {
11332
- type: "object",
11333
- properties: {
11334
- videoPath: { type: "string", description: "Path to the input video file" },
11335
- duration: { type: "number", description: "Video duration in seconds (auto-detected if omitted)" },
11336
- outputDir: { type: "string", description: "Output directory for narration audio" },
11337
- voice: { type: "string", description: "ElevenLabs voice name (default: Rachel)" },
11338
- style: {
11339
- type: "string",
11340
- enum: ["informative", "energetic", "calm", "dramatic"],
11341
- description: "Narration style (default: informative)"
11342
- },
11343
- language: { type: "string", description: "Language code (default: en)" }
11344
- },
11345
- required: ["videoPath", "duration", "outputDir"]
11346
- }
11347
11322
  }
11348
11323
  ];
11349
11324
  async function handleAiPipelineToolCall(name, args) {
11350
11325
  switch (name) {
11351
- case "ai_script_to_video": {
11326
+ case "pipeline_script_to_video": {
11352
11327
  const result = await executeScriptToVideo({
11353
11328
  script: args.script,
11354
11329
  outputDir: args.outputDir,
@@ -11378,7 +11353,7 @@ async function handleAiPipelineToolCall(name, args) {
11378
11353
  failedScenes: result.failedScenes
11379
11354
  });
11380
11355
  }
11381
- case "ai_highlights": {
11356
+ case "pipeline_highlights": {
11382
11357
  const result = await executeHighlights({
11383
11358
  media: args.media,
11384
11359
  output: args.output,
@@ -11400,7 +11375,7 @@ async function handleAiPipelineToolCall(name, args) {
11400
11375
  projectPath: result.projectPath
11401
11376
  });
11402
11377
  }
11403
- case "ai_auto_shorts": {
11378
+ case "pipeline_auto_shorts": {
11404
11379
  const result = await executeAutoShorts({
11405
11380
  video: args.video,
11406
11381
  outputDir: args.outputDir,
@@ -11426,22 +11401,6 @@ async function handleAiPipelineToolCall(name, args) {
11426
11401
  }))
11427
11402
  });
11428
11403
  }
11429
- case "ai_narrate": {
11430
- const result = await autoNarrate({
11431
- videoPath: args.videoPath,
11432
- duration: args.duration,
11433
- outputDir: args.outputDir,
11434
- voice: args.voice,
11435
- style: args.style,
11436
- language: args.language
11437
- });
11438
- if (!result.success) return `Narration failed: ${result.error}`;
11439
- return JSON.stringify({
11440
- audioPath: result.audioPath,
11441
- script: result.script,
11442
- segments: result.segments?.length
11443
- });
11444
- }
11445
11404
  default:
11446
11405
  throw new Error(`Unknown AI pipeline tool: ${name}`);
11447
11406
  }
@@ -11482,8 +11441,8 @@ async function handleToolCall(name, args) {
11482
11441
  }
11483
11442
 
11484
11443
  // src/resources/index.ts
11485
- import { readFile as readFile12 } from "node:fs/promises";
11486
- import { resolve as resolve12 } from "node:path";
11444
+ import { readFile as readFile11 } from "node:fs/promises";
11445
+ import { resolve as resolve11 } from "node:path";
11487
11446
  var resources = [
11488
11447
  {
11489
11448
  uri: "vibe://project/current",
@@ -11518,8 +11477,8 @@ var resources = [
11518
11477
  ];
11519
11478
  var currentProjectPath = process.env.VIBE_PROJECT_PATH || null;
11520
11479
  async function loadProject2(projectPath) {
11521
- const absPath = resolve12(process.cwd(), projectPath);
11522
- const content = await readFile12(absPath, "utf-8");
11480
+ const absPath = resolve11(process.cwd(), projectPath);
11481
+ const content = await readFile11(absPath, "utf-8");
11523
11482
  const data = JSON.parse(content);
11524
11483
  return Project.fromJSON(data);
11525
11484
  }