npm - @weirdfingers/baseboards - Versions diffs - 0.5.2 → 0.6.0 - Mend

@weirdfingers/baseboards 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/templates/api/src/boards/generators/implementations/fal/video/fal_sora_2_text_to_video.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""
+Sora 2 text-to-video generator.
+Text-to-video endpoint for Sora 2, OpenAI's state-of-the-art video model capable of
+creating richly detailed, dynamic clips with audio from natural language prompts.
+Based on Fal AI's fal-ai/sora-2/text-to-video model.
+See: https://fal.ai/models/fal-ai/sora-2/text-to-video
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+class Sora2TextToVideoInput(BaseModel):
+    """Input schema for Sora 2 text-to-video generation.
+    Artifact fields are automatically detected via type introspection
+    and resolved from generation IDs to artifact objects.
+    """
+    prompt: str = Field(
+        description="Text description of desired video",
+        min_length=1,
+        max_length=5000,
+    )
+    resolution: Literal["720p"] = Field(
+        default="720p",
+        description="Video output quality (currently only 720p is supported)",
+    )
+    aspect_ratio: Literal["9:16", "16:9"] = Field(
+        default="16:9",
+        description="Video dimensions",
+    )
+    duration: Literal[4, 8, 12] = Field(
+        default=4,
+        description="Video length in seconds",
+    )
+class FalSora2TextToVideoGenerator(BaseGenerator):
+    """Generator for text-to-video using Sora 2."""
+    name = "fal-sora-2-text-to-video"
+    description = (
+        "Fal: Sora 2 - OpenAI's state-of-the-art text-to-video with richly detailed, dynamic clips"
+    )
+    artifact_type = "video"
+    def get_input_schema(self) -> type[Sora2TextToVideoInput]:
+        """Return the input schema for this generator."""
+        return Sora2TextToVideoInput
+    async def generate(
+        self, inputs: Sora2TextToVideoInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Generate video using fal.ai Sora 2 model."""
+        # Check for API key
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalSora2TextToVideoGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Prepare arguments for fal.ai API
+        arguments = {
+            "prompt": inputs.prompt,
+            "resolution": inputs.resolution,
+            "aspect_ratio": inputs.aspect_ratio,
+            "duration": inputs.duration,
+        }
+        # Submit async job
+        handler = await fal_client.submit_async(
+            "fal-ai/sora-2/text-to-video",
+            arguments=arguments,
+        )
+        # Store external job ID
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Sample every 3rd event to avoid spam
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract video from result
+        # fal.ai returns: {"video": {"url": "...", "content_type": "video/mp4",
+        # "width": ..., "height": ..., "duration": ..., "fps": ...}}
+        video_data = result.get("video")
+        if not video_data:
+            raise ValueError("No video returned from fal.ai API")
+        video_url = video_data.get("url")
+        if not video_url:
+            raise ValueError("Video missing URL in fal.ai response")
+        # Extract video metadata from response or use defaults
+        width = video_data.get("width")
+        height = video_data.get("height")
+        duration = video_data.get("duration")
+        fps = video_data.get("fps")
+        # If dimensions not provided, determine based on aspect ratio and resolution
+        if width is None or height is None:
+            # 720p dimensions
+            aspect_ratio_dimensions = {
+                "16:9": (1280, 720),
+                "9:16": (720, 1280),
+            }
+            width, height = aspect_ratio_dimensions.get(inputs.aspect_ratio, (1280, 720))
+        # Store video result
+        artifact = await context.store_video_result(
+            storage_url=video_url,
+            format="mp4",
+            width=width,
+            height=height,
+            duration=float(duration) if duration else float(inputs.duration),
+            fps=fps,
+            output_index=0,
+        )
+        return GeneratorResult(outputs=[artifact])
+    async def estimate_cost(self, inputs: Sora2TextToVideoInput) -> float:
+        """Estimate cost for Sora 2 generation.
+        Pricing information not provided in official documentation.
+        Estimated at $0.20 per video based on typical high-quality video generation costs.
+        Cost scales with duration.
+        """
+        # Approximate cost per video - Sora 2 is likely higher cost due to quality
+        base_cost = 0.20
+        # Scale by duration: 4s = 1x, 8s = 2x, 12s = 3x
+        duration_multiplier = inputs.duration / 4
+        return base_cost * duration_multiplier

package/templates/api/src/boards/generators/implementations/fal/video/infinitalk.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""
+fal.ai infinitalk video generator.
+Generates talking avatar videos from an image and audio file. The avatar
+lip-syncs to the provided audio with natural facial expressions.
+Based on Fal AI's fal-ai/infinitalk model.
+See: https://fal.ai/models/fal-ai/infinitalk
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....artifacts import AudioArtifact, ImageArtifact
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+class InfinitalkInput(BaseModel):
+    """Input schema for infinitalk.
+    Artifact fields are automatically detected via type introspection
+    and resolved from generation IDs to artifact objects.
+    """
+    image: ImageArtifact = Field(
+        description=(
+            "Input image for the avatar. "
+            "If the aspect ratio doesn't match, it is resized and center cropped"
+        )
+    )
+    audio: AudioArtifact = Field(description="Audio file to synchronize with the avatar")
+    prompt: str = Field(description="Text prompt to guide video generation")
+    num_frames: int = Field(
+        default=145,
+        ge=41,
+        le=721,
+        description="Number of frames to generate",
+    )
+    resolution: Literal["480p", "720p"] = Field(
+        default="480p",
+        description="Output video resolution",
+    )
+    acceleration: Literal["none", "regular", "high"] = Field(
+        default="regular",
+        description="Acceleration level for generation speed",
+    )
+    seed: int = Field(
+        default=42,
+        description="Seed for reproducibility",
+    )
+class FalInfinitalkGenerator(BaseGenerator):
+    """Generator for talking avatar videos from image and audio."""
+    name = "fal-infinitalk"
+    description = "Fal: infinitalk - Generate talking avatar video from image and audio"
+    artifact_type = "video"
+    def get_input_schema(self) -> type[InfinitalkInput]:
+        """Return the input schema for this generator."""
+        return InfinitalkInput
+    async def generate(
+        self, inputs: InfinitalkInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Generate talking avatar video using fal.ai infinitalk."""
+        # Check for API key
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalInfinitalkGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Upload image and audio artifacts to Fal's public storage
+        # Fal API requires publicly accessible URLs
+        from ..utils import upload_artifacts_to_fal
+        # Upload image and audio separately
+        image_urls = await upload_artifacts_to_fal([inputs.image], context)
+        audio_urls = await upload_artifacts_to_fal([inputs.audio], context)
+        # Prepare arguments for fal.ai API
+        arguments = {
+            "image_url": image_urls[0],
+            "audio_url": audio_urls[0],
+            "prompt": inputs.prompt,
+            "num_frames": inputs.num_frames,
+            "resolution": inputs.resolution,
+            "acceleration": inputs.acceleration,
+            "seed": inputs.seed,
+        }
+        # Submit async job
+        handler = await fal_client.submit_async(
+            "fal-ai/infinitalk",
+            arguments=arguments,
+        )
+        # Store external job ID
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Sample every 3rd event to avoid spam
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract video from result
+        # fal.ai returns: {"video": {"url": "...", "content_type": "video/mp4", ...}, "seed": 42}
+        video_data = result.get("video")
+        if not video_data:
+            raise ValueError("No video returned from fal.ai API")
+        video_url = video_data.get("url")
+        if not video_url:
+            raise ValueError("Video missing URL in fal.ai response")
+        # Extract format from content_type (e.g., "video/mp4" -> "mp4")
+        # Infinitalk always produces MP4 videos, so default to mp4
+        content_type = video_data.get("content_type", "video/mp4")
+        if content_type.startswith("video/"):
+            video_format = content_type.split("/")[-1]
+        else:
+            # If content_type is not a video mime type (e.g., application/octet-stream),
+            # default to mp4 since infinitalk only produces mp4 videos
+            video_format = "mp4"
+        # Store the video result
+        # Use input image dimensions and audio duration for metadata
+        # Estimate FPS based on num_frames and audio duration
+        fps = 30.0  # Default FPS
+        if inputs.audio.duration and inputs.audio.duration > 0:
+            fps = inputs.num_frames / inputs.audio.duration
+        # Parse resolution to get dimensions
+        width, height = self._parse_resolution(inputs.resolution)
+        artifact = await context.store_video_result(
+            storage_url=video_url,
+            format=video_format,
+            width=width,
+            height=height,
+            duration=inputs.audio.duration,
+            fps=int(fps),
+            output_index=0,
+        )
+        return GeneratorResult(outputs=[artifact])
+    def _parse_resolution(self, resolution: str) -> tuple[int, int]:
+        """Parse resolution string to width and height.
+        Args:
+            resolution: Resolution string like "480p" or "720p"
+        Returns:
+            Tuple of (width, height)
+        """
+        if resolution == "480p":
+            return (854, 480)
+        elif resolution == "720p":
+            return (1280, 720)
+        else:
+            # Default to 480p
+            return (854, 480)
+    async def estimate_cost(self, inputs: InfinitalkInput) -> float:
+        """Estimate cost for infinitalk generation in USD.
+        Pricing not specified in documentation, using estimate based on
+        typical video generation costs. Higher resolution and more frames
+        may increase cost.
+        """
+        # Base cost estimate per generation
+        base_cost = 0.10
+        # Adjust for resolution
+        if inputs.resolution == "720p":
+            base_cost *= 1.5
+        # Adjust for frame count (more frames = higher cost)
+        # Base estimate is for 145 frames
+        frame_multiplier = inputs.num_frames / 145.0
+        base_cost *= frame_multiplier
+        return base_cost

package/templates/api/src/boards/generators/implementations/fal/video/kling_video_v2_5_turbo_pro_image_to_video.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""
+Kling v2.5 Turbo Pro image-to-video generator.
+Top-tier image-to-video generation with unparalleled motion fluidity, cinematic visuals,
+and exceptional prompt precision using Kling's v2.5 Turbo Pro model.
+Based on Fal AI's fal-ai/kling-video/v2.5-turbo/pro/image-to-video model.
+See: https://fal.ai/models/fal-ai/kling-video/v2.5-turbo/pro/image-to-video
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....artifacts import ImageArtifact
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+class KlingVideoV25TurboProImageToVideoInput(BaseModel):
+    """Input schema for Kling v2.5 Turbo Pro image-to-video generation.
+    Artifact fields (image_url) are automatically detected via type introspection
+    and resolved from generation IDs to ImageArtifact objects.
+    """
+    prompt: str = Field(
+        description="Text description of desired video content",
+        max_length=2500,
+    )
+    image_url: ImageArtifact = Field(
+        description="Source image for animation",
+    )
+    duration: Literal["5", "10"] = Field(
+        default="5",
+        description="Video length in seconds",
+    )
+    negative_prompt: str = Field(
+        default="blur, distort, and low quality",
+        description="Elements to exclude from output",
+        max_length=2500,
+    )
+    cfg_scale: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Guidance strength controlling prompt adherence (0-1)",
+    )
+class FalKlingVideoV25TurboProImageToVideoGenerator(BaseGenerator):
+    """Generator for image-to-video using Kling v2.5 Turbo Pro."""
+    name = "fal-kling-video-v2-5-turbo-pro-image-to-video"
+    description = (
+        "Fal: Kling v2.5 Turbo Pro - top-tier image-to-video generation with cinematic visuals"
+    )
+    artifact_type = "video"
+    def get_input_schema(self) -> type[KlingVideoV25TurboProImageToVideoInput]:
+        """Return the input schema for this generator."""
+        return KlingVideoV25TurboProImageToVideoInput
+    async def generate(
+        self, inputs: KlingVideoV25TurboProImageToVideoInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Generate video using fal.ai Kling v2.5 Turbo Pro image-to-video model."""
+        # Check for API key
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalKlingVideoV25TurboProImageToVideoGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Upload image artifact to Fal's public storage
+        # Fal API requires publicly accessible URLs
+        from ..utils import upload_artifacts_to_fal
+        image_urls = await upload_artifacts_to_fal([inputs.image_url], context)
+        # Prepare arguments for fal.ai API
+        arguments = {
+            "prompt": inputs.prompt,
+            "image_url": image_urls[0],
+            "duration": inputs.duration,
+            "negative_prompt": inputs.negative_prompt,
+            "cfg_scale": inputs.cfg_scale,
+        }
+        # Submit async job
+        handler = await fal_client.submit_async(
+            "fal-ai/kling-video/v2.5-turbo/pro/image-to-video",
+            arguments=arguments,
+        )
+        # Store external job ID
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Sample every 3rd event to avoid spam
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract video from result
+        # fal.ai returns: {"video": {"url": "...", "content_type": "video/mp4", ...}}
+        video_data = result.get("video")
+        if not video_data:
+            raise ValueError("No video returned from fal.ai API")
+        video_url = video_data.get("url")
+        if not video_url:
+            raise ValueError("Video missing URL in fal.ai response")
+        # Determine video dimensions based on input image
+        # Kling maintains the aspect ratio of the input image
+        # Use input image dimensions as reference
+        width = inputs.image_url.width
+        height = inputs.image_url.height
+        # Store video result
+        artifact = await context.store_video_result(
+            storage_url=video_url,
+            format="mp4",
+            width=width,
+            height=height,
+            duration=float(inputs.duration),  # Convert "5" or "10" to float
+            output_index=0,
+        )
+        return GeneratorResult(outputs=[artifact])
+    async def estimate_cost(self, inputs: KlingVideoV25TurboProImageToVideoInput) -> float:
+        """Estimate cost for Kling v2.5 Turbo Pro image-to-video generation.
+        Pricing information not provided in official documentation.
+        Estimated at $0.15 per video based on typical video generation costs.
+        Cost may vary based on duration and quality settings.
+        """
+        # Approximate cost per video
+        # 10-second videos may cost more than 5-second videos
+        base_cost = 0.15
+        duration_multiplier = 2.0 if inputs.duration == "10" else 1.0
+        return base_cost * duration_multiplier