npm - @farazirfan/costar-server-executor - Versions diffs - 1.7.37 → 1.7.39 - Mend

@farazirfan/costar-server-executor 1.7.37 → 1.7.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (253) hide show

package/skills/longform-video-generation/scripts/video_generator.py ADDED Viewed

@@ -0,0 +1,579 @@
+#!/usr/bin/env python3
+"""
+Long-Form Video Generator with Veo 3.1
+Generate videos longer than 8 seconds using Google's Veo 3.1 API.
+Outputs videos to /home/user/task directory (sandbox working directory).
+Progress is logged to /home/user/task/video_gen_progress_{timestamp}.log
+Usage:
+    python video_generator.py "scene 1" "scene 2" "scene 3"
+    python video_generator.py --enhanced --resolution 2K "scene 1" "scene 2"
+"""
+import os
+import sys
+import time
+import subprocess
+import argparse
+from pathlib import Path
+from typing import List, Optional
+from datetime import datetime
+from google import genai
+from google.genai import types
+class LongFormVideoGenerator:
+    """Generate long-form videos with Veo 3.1"""
+    def __init__(self, api_key: str, output_dir: str = "/home/user/task", temp_dir: str = "/home/user/task", progress_file: str = None):
+        self.api_key = api_key
+        self.output_dir = Path(output_dir)
+        self.temp_dir = Path(temp_dir)
+        self.client = genai.Client(api_key=api_key)
+        # Ensure directories exist (task directory should already exist from sandbox setup)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.temp_dir.mkdir(parents=True, exist_ok=True)
+        # Setup progress logging
+        if progress_file is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            progress_file = f"/home/user/task/video_gen_progress_{timestamp}.log"
+        self.progress_file = progress_file
+        self.log_progress("STARTED", "Video generation started")
+    def log_progress(self, status: str, message: str):
+        """Log progress to file"""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        log_entry = f"[{timestamp}] {status}: {message}\n"
+        with open(self.progress_file, 'a') as f:
+            f.write(log_entry)
+            f.flush()  # Force write to disk immediately so clients can see updates
+        print(log_entry.strip())
+    def generate_reference_image(
+        self,
+        prompt: str,
+        output_path: str,
+        model: str = "gemini-3-pro-image-preview",
+        resolution: str = "2K",
+        aspect_ratio: str = "16:9"
+    ) -> str:
+        """Generate reference image using Nano Banana Pro"""
+        try:
+            self.log_progress("IMAGE_GEN", f"Generating reference image with {model}")
+            print(f"\nGenerating reference image with {model}")
+            print(f"Resolution: {resolution}, Aspect Ratio: {aspect_ratio}")
+            config = types.GenerateContentConfig(
+                response_modalities=['IMAGE'],
+                image_config=types.ImageConfig(aspect_ratio=aspect_ratio)
+            )
+            response = self.client.models.generate_content(
+                model=model,
+                contents=prompt,
+                config=config
+            )
+            for part in response.parts:
+                if image := part.as_image():
+                    image.save(output_path)
+                    self.log_progress("IMAGE_SAVED", f"Reference image saved: {output_path}")
+                    print(f"Reference image saved: {output_path}")
+                    return output_path
+            raise Exception("No image generated in response")
+        except Exception as e:
+            self.log_progress("FAILURE", f"Reference image generation failed: {str(e)}")
+            import traceback
+            self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+            raise
+        raise ValueError("No image generated")
+    def generate_video_clip(
+        self,
+        prompt: str,
+        scene_number: int,
+        reference_image: Optional[str] = None
+    ) -> str:
+        """Generate single 8-second video clip"""
+        self.log_progress("SCENE_START", f"Starting scene {scene_number}")
+        print(f"\n{'='*70}")
+        print(f"Scene {scene_number}")
+        print(f"{'='*70}")
+        print(f"Prompt: {prompt[:80]}...")
+        if reference_image:
+            print(f"Using reference image: {reference_image}")
+        try:
+            if reference_image and os.path.exists(reference_image):
+                print(f"Loading reference image...")
+                reference_img = types.Image.from_file(location=reference_image)
+                operation = self.client.models.generate_videos(
+                    model="veo-3.1-generate-preview",
+                    prompt=prompt,
+                    image=reference_img,
+                    config=types.GenerateVideosConfig(
+                        duration_seconds=8,
+                        aspect_ratio="16:9",
+                        resolution="720p"
+                    )
+                )
+            else:
+                operation = self.client.models.generate_videos(
+                    model="veo-3.1-generate-preview",
+                    prompt=prompt,
+                    config=types.GenerateVideosConfig(
+                        duration_seconds=8,
+                        aspect_ratio="16:9",
+                        resolution="720p"
+                    )
+                )
+            # Poll until ready
+            self.log_progress("SCENE_GENERATING", f"Scene {scene_number}: Generating video (2-3 minutes)...")
+            print(f"Generating video (2-3 minutes)...")
+            poll_count = 0
+            start_time = time.time()
+            try:
+                while not operation.done:
+                    elapsed = int(time.time() - start_time)
+                    # Log progress on every poll (every 10 seconds) so clients see updates
+                    self.log_progress("SCENE_PROGRESS", f"Scene {scene_number}: Still generating... ({elapsed}s elapsed)")
+                    print(f"Still generating... ({elapsed}s elapsed)")
+                    time.sleep(10)
+                    try:
+                        operation = self.client.operations.get(operation)
+                    except Exception as poll_error:
+                        self.log_progress("FAILURE", f"Scene {scene_number}: Error polling operation: {str(poll_error)}")
+                        raise
+                    poll_count += 1
+                    # Safety timeout: if polling for more than 10 minutes, something is wrong
+                    if elapsed > 600:
+                        raise Exception(f"Video generation timeout after {elapsed}s")
+                if not operation.response or not operation.response.generated_videos:
+                    raise Exception("Video generation failed - no video in response")
+            except Exception as poll_exception:
+                self.log_progress("FAILURE", f"Scene {scene_number}: Polling failed: {str(poll_exception)}")
+                import traceback
+                self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+                raise
+            # Download video
+            try:
+                self.log_progress("SCENE_DOWNLOADING", f"Scene {scene_number}: Downloading video...")
+                print(f"Downloading video...")
+                generated_video = operation.response.generated_videos[0]
+                video_path = str(self.temp_dir / f"scene_{scene_number:03d}.mp4")
+                self.client.files.download(file=generated_video.video)
+                generated_video.video.save(video_path)
+                self.log_progress("SCENE_COMPLETE", f"Scene {scene_number}: Video saved: {video_path}")
+                print(f"Video saved: {video_path}")
+                return video_path
+            except Exception as download_error:
+                self.log_progress("FAILURE", f"Scene {scene_number}: Download failed: {str(download_error)}")
+                import traceback
+                self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+                raise
+        except Exception as e:
+            error_msg = f"Scene {scene_number}: API Error - {str(e)}"
+            self.log_progress("FAILURE", error_msg)
+            print(f"API Error: {e}")
+            import traceback
+            self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+            raise
+    def extract_last_frame(self, video_path: str, output_path: str) -> str:
+        """Extract last frame from video"""
+        try:
+            print(f"Extracting last frame...")
+            result = subprocess.run(
+                ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+                 '-of', 'default=noprint_wrappers=1:nokey=1', video_path],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                raise Exception(f"ffprobe failed: {result.stderr}")
+            duration = float(result.stdout.strip())
+            result = subprocess.run([
+                'ffmpeg', '-ss', str(duration - 0.1), '-i', video_path,
+                '-vframes', '1', '-q:v', '2', output_path, '-y'
+            ], capture_output=True, check=True, text=True)
+            print(f"Frame extracted: {output_path}")
+            return output_path
+        except Exception as e:
+            self.log_progress("FAILURE", f"Frame extraction failed: {str(e)}")
+            import traceback
+            self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+            raise
+    def concatenate_videos_smooth(
+        self,
+        video_paths: List[str],
+        output_path: str,
+        transition_duration: float = 0.5
+    ) -> str:
+        """Concatenate videos with smooth crossfade transitions"""
+        print(f"\n{'='*70}")
+        print(f"Concatenating {len(video_paths)} videos with smooth transitions")
+        print(f"Transition duration: {transition_duration}s")
+        print(f"{'='*70}\n")
+        if len(video_paths) == 1:
+            subprocess.run(['cp', video_paths[0], output_path], check=True)
+            print(f"Single video copied")
+            return output_path
+        # For 2 videos
+        if len(video_paths) == 2:
+            offset = 8.0 - transition_duration
+            cmd = [
+                'ffmpeg',
+                '-i', video_paths[0],
+                '-i', video_paths[1],
+                '-filter_complex',
+                f'[0:v][1:v]xfade=transition=fade:duration={transition_duration}:offset={offset}[outv];'
+                f'[0:a][1:a]acrossfade=d={transition_duration}[outa]',
+                '-map', '[outv]',
+                '-map', '[outa]',
+                '-c:v', 'libx264',  # H.264 codec for web compatibility
+                '-preset', 'medium',  # Encoding speed/quality balance
+                '-crf', '23',  # Quality (18-28, lower = better quality)
+                '-pix_fmt', 'yuv420p',  # Ensure compatibility
+                '-c:a', 'aac',  # AAC audio codec
+                '-b:a', '128k',  # Audio bitrate
+                '-movflags', '+faststart',  # Enable web streaming (play before full download)
+                '-y', output_path
+            ]
+            subprocess.run(cmd, capture_output=True, check=True)
+            print(f"Videos concatenated with smooth transition")
+            return output_path
+        # For 3+ videos, chain xfades
+        filter_parts = []
+        audio_parts = []
+        for i in range(len(video_paths) - 1):
+            offset = (8.0 - transition_duration) * (i + 1)
+            if i == 0:
+                filter_parts.append(
+                    f'[0:v][1:v]xfade=transition=fade:duration={transition_duration}:offset={8.0-transition_duration}[v01]'
+                )
+                audio_parts.append(f'[0:a][1:a]acrossfade=d={transition_duration}[a01]')
+            else:
+                prev_label = f'v0{i}' if i == 1 else f'v{i-1}{i}'
+                curr_label = f'v{i}{i+1}'
+                filter_parts.append(
+                    f'[{prev_label}][{i+1}:v]xfade=transition=fade:duration={transition_duration}:offset={offset}[{curr_label}]'
+                )
+                prev_audio = f'a0{i}' if i == 1 else f'a{i-1}{i}'
+                curr_audio = f'a{i}{i+1}'
+                audio_parts.append(f'[{prev_audio}][{i+1}:a]acrossfade=d={transition_duration}[{curr_audio}]')
+        last_v = f'v{len(video_paths)-2}{len(video_paths)-1}'
+        last_a = f'a{len(video_paths)-2}{len(video_paths)-1}'
+        filter_complex = ';'.join(filter_parts + audio_parts)
+        input_args = []
+        for video in video_paths:
+            input_args.extend(['-i', video])
+        cmd = ['ffmpeg'] + input_args + [
+            '-filter_complex', filter_complex,
+            '-map', f'[{last_v}]',
+            '-map', f'[{last_a}]',
+            '-c:v', 'libx264',  # H.264 codec for web compatibility
+            '-preset', 'medium',  # Encoding speed/quality balance
+            '-crf', '23',  # Quality (18-28, lower = better quality)
+            '-pix_fmt', 'yuv420p',  # Ensure compatibility
+            '-c:a', 'aac',  # AAC audio codec
+            '-b:a', '128k',  # Audio bitrate
+            '-movflags', '+faststart',  # Enable web streaming (play before full download)
+            '-y', output_path
+        ]
+        try:
+            result = subprocess.run(cmd, capture_output=True, check=True, text=True)
+            print(f"Videos concatenated with smooth transitions")
+            return output_path
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Video concatenation failed: {e.stderr if e.stderr else str(e)}"
+            self.log_progress("FAILURE", error_msg)
+            import traceback
+            self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+            raise Exception(error_msg) from e
+        except Exception as e:
+            self.log_progress("FAILURE", f"Video concatenation failed: {str(e)}")
+            import traceback
+            self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+            raise
+    def generate_longform_video(
+        self,
+        scenes: List[str],
+        output_filename: str = "output.mp4",
+        use_enhanced_first_frame: bool = False,
+        enhanced_resolution: str = "2K",
+        use_frame_chaining: bool = True,
+        smooth_transitions: bool = True,
+        transition_duration: float = 0.5
+    ) -> str:
+        """Generate long-form video"""
+        if not scenes:
+            raise ValueError("scenes parameter is required")
+        self.log_progress("INIT", f"Starting long-form video generation - {len(scenes)} scenes")
+        print("\n" + "="*70)
+        print(f"LONG-FORM VIDEO GENERATION - {len(scenes)} SCENES")
+        if use_enhanced_first_frame:
+            print(f"Enhanced mode: First frame with Nano Banana Pro ({enhanced_resolution})")
+            self.log_progress("CONFIG", f"Enhanced mode enabled: {enhanced_resolution}")
+        if smooth_transitions:
+            print(f"Smooth transitions enabled ({transition_duration}s crossfade)")
+            self.log_progress("CONFIG", f"Smooth transitions: {transition_duration}s crossfade")
+        print("="*70 + "\n")
+        video_paths = []
+        last_frame_path = None
+        for i, prompt in enumerate(scenes):
+            scene_num = i + 1
+            reference = last_frame_path
+            # Use Nano Banana Pro for first scene
+            if scene_num == 1 and use_enhanced_first_frame:
+                print(f"\nGENERATING HIGH-QUALITY FIRST FRAME")
+                print(f"Using Nano Banana Pro ({enhanced_resolution})")
+                image_prompt = f"""
+                Cinematic still frame for video scene.
+                {prompt}
+                High-resolution professional photography, dramatic lighting.
+                16:9 aspect ratio, photorealistic, ultra detailed.
+                """
+                reference = str(self.temp_dir / "pro_first_frame.png")
+                self.generate_reference_image(
+                    prompt=image_prompt,
+                    output_path=reference,
+                    model="gemini-3-pro-image-preview",
+                    resolution=enhanced_resolution,
+                    aspect_ratio="16:9"
+                )
+            # Generate video
+            video_path = self.generate_video_clip(prompt, scene_num, reference)
+            video_paths.append(video_path)
+            # Extract last frame for chaining
+            if use_frame_chaining and i < len(scenes) - 1:
+                frame_path = str(self.temp_dir / f"scene_{scene_num:03d}_last_frame.png")
+                last_frame_path = self.extract_last_frame(video_path, frame_path)
+            print(f"Scene {scene_num} complete")
+            if i < len(scenes) - 1:
+                print(f"Pausing 5 seconds...")
+                time.sleep(5)
+        # Concatenate
+        self.log_progress("CONCAT_START", "Concatenating video scenes...")
+        output_path = str(self.output_dir / output_filename)
+        if smooth_transitions and len(video_paths) > 1:
+            self.concatenate_videos_smooth(video_paths, output_path, transition_duration)
+        else:
+            # Simple concatenation
+            try:
+                list_file = str(self.temp_dir / 'concat_list.txt')
+                with open(list_file, 'w') as f:
+                    for video_path in video_paths:
+                        f.write(f"file '{os.path.abspath(video_path)}'\n")
+                result = subprocess.run([
+                    'ffmpeg', '-f', 'concat', '-safe', '0',
+                    '-i', list_file,
+                    '-c:v', 'libx264',  # Re-encode for optimization
+                    '-preset', 'medium',
+                    '-crf', '23',
+                    '-pix_fmt', 'yuv420p',
+                    '-c:a', 'aac',
+                    '-b:a', '128k',
+                    '-movflags', '+faststart',  # Enable web streaming
+                    '-y', output_path
+                ], capture_output=True, check=True, text=True)
+            except subprocess.CalledProcessError as e:
+                error_msg = f"Simple concatenation failed: {e.stderr if e.stderr else str(e)}"
+                self.log_progress("FAILURE", error_msg)
+                import traceback
+                self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+                raise Exception(error_msg) from e
+            except Exception as e:
+                self.log_progress("FAILURE", f"Simple concatenation failed: {str(e)}")
+                import traceback
+                self.log_progress("FAILURE", f"Traceback: {traceback.format_exc()}")
+                raise
+        # Get final info
+        try:
+            result = subprocess.run(
+                ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+                 '-of', 'default=noprint_wrappers=1:nokey=1', output_path],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                raise Exception(f"ffprobe failed: {result.stderr}")
+            duration = float(result.stdout.strip())
+        except Exception as e:
+            self.log_progress("WARNING", f"Could not get video duration: {str(e)} (continuing anyway)")
+            duration = 0.0  # Continue even if duration check fails
+        self.log_progress("COMPLETE", f"Video generation complete: {output_path} ({duration:.2f}s, {len(video_paths)} scenes)")
+        self.log_progress("SUCCESS", f"Video saved to: {output_path}")
+        print(f"\n{'='*70}")
+        print("GENERATION COMPLETE")
+        print(f"{'='*70}")
+        print(f"Location: {output_path}")
+        print(f"Duration: {duration:.2f} seconds")
+        print(f"Scenes: {len(video_paths)}")
+        if use_enhanced_first_frame:
+            print(f"Enhanced with Nano Banana Pro ({enhanced_resolution})")
+        if smooth_transitions:
+            print(f"Smooth transitions: {transition_duration}s crossfade")
+        print(f"{'='*70}\n")
+        return output_path
+def main():
+    """CLI entry point"""
+    parser = argparse.ArgumentParser(
+        description='Generate long-form videos using Veo 3.1',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python video_generator.py "Scene 1" "Scene 2" "Scene 3"
+  python video_generator.py --enhanced --resolution 2K "Scene 1" "Scene 2"
+  python video_generator.py -o my_video.mp4 "Scene 1" "Scene 2"
+        """
+    )
+    parser.add_argument(
+        'scenes',
+        nargs='+',
+        help='Scene prompts (at least one required)'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        default='longform_video.mp4',
+        help='Output filename (default: longform_video.mp4)'
+    )
+    parser.add_argument(
+        '--enhanced',
+        action='store_true',
+        help='Use Nano Banana Pro for first frame'
+    )
+    parser.add_argument(
+        '--resolution',
+        choices=['1K', '2K', '4K'],
+        default='2K',
+        help='Enhanced frame resolution (default: 2K)'
+    )
+    parser.add_argument(
+        '--no-smooth',
+        action='store_true',
+        help='Disable smooth transitions'
+    )
+    parser.add_argument(
+        '--transition-duration',
+        type=float,
+        default=0.5,
+        help='Transition duration in seconds (default: 0.5)'
+    )
+    parser.add_argument(
+        '--no-frame-chaining',
+        action='store_true',
+        help='Disable frame chaining'
+    )
+    args = parser.parse_args()
+    # Get API key from environment
+    api_key = os.getenv("GEMINI_API_KEY") or os.getenv("VEO3_API_KEY")
+    print("="*70)
+    print("LONG-FORM VIDEO GENERATOR")
+    print("Generate videos longer than 8 seconds with Veo 3.1")
+    print("="*70)
+    print()
+    scenes = args.scenes
+    print(f"Generating video with {len(scenes)} scenes")
+    print()
+    for i, scene in enumerate(scenes, 1):
+        preview = scene[:80].replace('\n', ' ')
+        print(f"Scene {i}: {preview}...")
+    print()
+    # Create generator (uses /home/user/task by default in sandbox)
+    generator = LongFormVideoGenerator(api_key=api_key)
+    # Print progress file location
+    print(f"Progress log: {generator.progress_file}")
+    print()
+    try:
+        # Generate video
+        video_path = generator.generate_longform_video(
+            scenes=scenes,
+            output_filename=args.output,
+            use_enhanced_first_frame=args.enhanced,
+            enhanced_resolution=args.resolution,
+            use_frame_chaining=not args.no_frame_chaining,
+            smooth_transitions=not args.no_smooth,
+            transition_duration=args.transition_duration
+        )
+        print(f"\nSUCCESS! Your video is ready: {video_path}")
+        print(f"Progress log: {generator.progress_file}")
+    except Exception as e:
+        generator.log_progress("FAILURE", f"Video generation failed: {str(e)}")
+        print(f"\nERROR: {e}")
+        import traceback
+        full_traceback = traceback.format_exc()
+        generator.log_progress("FAILURE", f"Full traceback: {full_traceback}")
+        traceback.print_exc()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()