npm - @weirdfingers/baseboards - Versions diffs - 0.6.1 → 0.7.0 - Mend

@weirdfingers/baseboards 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/templates/api/src/boards/generators/implementations/fal/audio/chatterbox_tts_turbo.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""
+fal.ai Chatterbox Text-to-Speech Turbo generator.
+Generate expressive speech from text with paralinguistic controls like laughs,
+sighs, coughs, and more. Supports voice cloning with custom audio samples.
+Based on Fal AI's fal-ai/chatterbox/text-to-speech/turbo model.
+See: https://fal.ai/models/fal-ai/chatterbox/text-to-speech/turbo
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....artifacts import AudioArtifact
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+# Voice presets available in Chatterbox
+ChatterboxVoice = Literal[
+    "aaron",
+    "abigail",
+    "anaya",
+    "andy",
+    "archer",
+    "brian",
+    "chloe",
+    "dylan",
+    "emmanuel",
+    "ethan",
+    "evelyn",
+    "gavin",
+    "gordon",
+    "ivan",
+    "laura",
+    "lucy",
+    "madison",
+    "marisol",
+    "meera",
+    "walter",
+]
+class ChatterboxTtsTurboInput(BaseModel):
+    """Input schema for Chatterbox TTS Turbo.
+    Artifact fields are automatically detected via type introspection
+    and resolved from generation IDs to artifact objects.
+    """
+    text: str = Field(
+        description=(
+            "The text to be converted to speech. Supports paralinguistic tags: "
+            "[clear throat], [sigh], [shush], [cough], [groan], [sniff], [gasp], "
+            "[chuckle], [laugh]"
+        ),
+        min_length=1,
+    )
+    voice: ChatterboxVoice = Field(
+        default="lucy",
+        description="Preset voice to use for synthesis",
+    )
+    audio_url: AudioArtifact | None = Field(
+        default=None,
+        description=(
+            "Optional audio file (5-10 seconds) for voice cloning. "
+            "If provided, this overrides the preset voice selection."
+        ),
+    )
+    temperature: float = Field(
+        default=0.8,
+        ge=0.05,
+        le=2.0,
+        description="Temperature for generation. Higher values create more varied speech patterns.",
+    )
+    seed: int | None = Field(
+        default=None,
+        description="Random seed for reproducible results. Set for consistent generations.",
+    )
+class FalChatterboxTtsTurboGenerator(BaseGenerator):
+    """Chatterbox TTS Turbo text-to-speech generator using fal.ai."""
+    name = "fal-chatterbox-tts-turbo"
+    artifact_type = "audio"
+    description = (
+        "Fal: Chatterbox TTS Turbo - "
+        "Expressive text-to-speech with paralinguistic controls and voice cloning"
+    )
+    def get_input_schema(self) -> type[ChatterboxTtsTurboInput]:
+        return ChatterboxTtsTurboInput
+    async def generate(
+        self, inputs: ChatterboxTtsTurboInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Generate audio using fal.ai Chatterbox TTS Turbo model."""
+        # Check for API key (fal-client uses FAL_KEY environment variable)
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalChatterboxTtsTurboGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Prepare arguments for fal.ai API
+        arguments: dict[str, str | float | int] = {
+            "text": inputs.text,
+            "voice": inputs.voice,
+            "temperature": inputs.temperature,
+        }
+        # Add seed if provided
+        if inputs.seed is not None:
+            arguments["seed"] = inputs.seed
+        # Handle voice cloning audio upload
+        if inputs.audio_url is not None:
+            from ..utils import upload_artifacts_to_fal
+            audio_urls = await upload_artifacts_to_fal([inputs.audio_url], context)
+            arguments["audio_url"] = audio_urls[0]
+        # Submit async job and get handler
+        handler = await fal_client.submit_async(
+            "fal-ai/chatterbox/text-to-speech/turbo",
+            arguments=arguments,
+        )
+        # Store the external job ID for tracking
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates (sample every 3rd event to avoid spam)
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Process every 3rd event to provide feedback without overwhelming
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract audio URL from result
+        # fal.ai returns: {"audio": {"url": "..."}}
+        audio_data = result.get("audio")
+        if audio_data is None:
+            raise ValueError("No audio data returned from fal.ai API")
+        audio_url = audio_data.get("url")
+        if not audio_url:
+            raise ValueError("Audio URL missing in fal.ai response")
+        # Store audio result
+        artifact = await context.store_audio_result(
+            storage_url=audio_url,
+            format="wav",  # Chatterbox TTS returns WAV format
+            output_index=0,
+        )
+        return GeneratorResult(outputs=[artifact])
+    async def estimate_cost(self, inputs: ChatterboxTtsTurboInput) -> float:
+        """Estimate cost for Chatterbox TTS Turbo generation.
+        Chatterbox TTS Turbo pricing is approximately $0.03 per generation.
+        """
+        return 0.03

package/templates/api/src/boards/generators/implementations/fal/image/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Fal.ai image generators."""
+from .bytedance_seedream_v45_edit import FalBytedanceSeedreamV45EditGenerator
 from .clarity_upscaler import FalClarityUpscalerGenerator
 from .crystal_upscaler import FalCrystalUpscalerGenerator
 from .fal_ideogram_character import FalIdeogramCharacterGenerator
@@ -10,8 +11,11 @@ from .flux_2_pro_edit import FalFlux2ProEditGenerator
 from .flux_pro_kontext import FalFluxProKontextGenerator
 from .flux_pro_ultra import FalFluxProUltraGenerator
 from .gemini_25_flash_image import FalGemini25FlashImageGenerator
+from .gemini_25_flash_image_edit import FalGemini25FlashImageEditGenerator
+from .gpt_image_1_5 import FalGptImage15Generator
 from .gpt_image_1_edit_image import FalGptImage1EditImageGenerator
 from .gpt_image_1_mini import FalGptImage1MiniGenerator
+from .gpt_image_15_edit import FalGptImage15EditGenerator
 from .ideogram_character_edit import FalIdeogramCharacterEditGenerator
 from .ideogram_v2 import FalIdeogramV2Generator
 from .imagen4_preview import FalImagen4PreviewGenerator
@@ -22,8 +26,12 @@ from .nano_banana_pro import FalNanoBananaProGenerator
 from .nano_banana_pro_edit import FalNanoBananaProEditGenerator
 from .qwen_image import FalQwenImageGenerator
 from .qwen_image_edit import FalQwenImageEditGenerator
+from .reve_edit import FalReveEditGenerator
+from .reve_text_to_image import FalReveTextToImageGenerator
+from .seedream_v45_text_to_image import FalSeedreamV45TextToImageGenerator
 __all__ = [
+    "FalBytedanceSeedreamV45EditGenerator",
     "FalClarityUpscalerGenerator",
     "FalCrystalUpscalerGenerator",
     "FalFlux2Generator",
@@ -32,7 +40,10 @@ __all__ = [
     "FalFlux2ProEditGenerator",
     "FalFluxProKontextGenerator",
     "FalFluxProUltraGenerator",
+    "FalGemini25FlashImageEditGenerator",
     "FalGemini25FlashImageGenerator",
+    "FalGptImage15EditGenerator",
+    "FalGptImage15Generator",
     "FalGptImage1EditImageGenerator",
     "FalGptImage1MiniGenerator",
     "FalIdeogramCharacterGenerator",
@@ -46,4 +57,7 @@ __all__ = [
     "FalNanoBananaProEditGenerator",
     "FalQwenImageEditGenerator",
     "FalQwenImageGenerator",
+    "FalReveEditGenerator",
+    "FalReveTextToImageGenerator",
+    "FalSeedreamV45TextToImageGenerator",
 ]

package/templates/api/src/boards/generators/implementations/fal/image/bytedance_seedream_v45_edit.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""
+fal.ai ByteDance Seedream v4.5 Edit image editing generator.
+Edit images using fal.ai's ByteDance Seedream v4.5 Edit model.
+A new-generation image creation model that integrates image generation
+and image editing capabilities into a single, unified architecture.
+Supports editing up to 10 input images with a text prompt.
+See: https://fal.ai/models/fal-ai/bytedance/seedream/v4.5/edit
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....artifacts import ImageArtifact
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+# Valid image size presets
+ImageSizePreset = Literal[
+    "square_hd",
+    "square",
+    "portrait_4_3",
+    "portrait_16_9",
+    "landscape_4_3",
+    "landscape_16_9",
+    "auto_2K",
+    "auto_4K",
+]
+class BytedanceSeedreamV45EditInput(BaseModel):
+    """Input schema for ByteDance Seedream v4.5 Edit.
+    Artifact fields (like image_sources) are automatically detected via type
+    introspection and resolved from generation IDs to ImageArtifact objects.
+    """
+    prompt: str = Field(description="The text prompt used to edit the image")
+    image_sources: list[ImageArtifact] = Field(
+        description="List of input images for editing (up to 10 images)",
+        min_length=1,
+        max_length=10,
+    )
+    num_images: int = Field(
+        default=1,
+        ge=1,
+        le=6,
+        description="Number of images to generate",
+    )
+    image_size: ImageSizePreset | None = Field(
+        default=None,
+        description=(
+            "The size of the generated image. Options: square_hd, square, "
+            "portrait_4_3, portrait_16_9, landscape_4_3, landscape_16_9, "
+            "auto_2K, auto_4K. Default is 2048x2048"
+        ),
+    )
+    seed: int | None = Field(
+        default=None,
+        description="Random seed to control the stochasticity of image generation",
+    )
+    sync_mode: bool = Field(
+        default=False,
+        description=(
+            "If True, the media will be returned as a data URI and the output "
+            "data won't be available in the request history"
+        ),
+    )
+    enable_safety_checker: bool = Field(
+        default=True,
+        description="Enables safety filtering on generated images",
+    )
+class FalBytedanceSeedreamV45EditGenerator(BaseGenerator):
+    """ByteDance Seedream v4.5 Edit image editing generator using fal.ai."""
+    name = "fal-bytedance-seedream-v45-edit"
+    artifact_type = "image"
+    description = "Fal: ByteDance Seedream v4.5 Edit - Unified image generation and editing"
+    def get_input_schema(self) -> type[BytedanceSeedreamV45EditInput]:
+        return BytedanceSeedreamV45EditInput
+    async def generate(
+        self, inputs: BytedanceSeedreamV45EditInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Edit images using fal.ai ByteDance Seedream v4.5 Edit model."""
+        # Check for API key (fal-client uses FAL_KEY environment variable)
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalBytedanceSeedreamV45EditGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Upload image artifacts to Fal's public storage
+        # Fal API requires publicly accessible URLs, but our storage_url might be:
+        # - Localhost URLs (not publicly accessible)
+        # - Private S3 buckets (not publicly accessible)
+        # So we upload to Fal's temporary storage first
+        from ..utils import upload_artifacts_to_fal
+        image_urls = await upload_artifacts_to_fal(inputs.image_sources, context)
+        # Prepare arguments for fal.ai API
+        arguments: dict = {
+            "prompt": inputs.prompt,
+            "image_urls": image_urls,
+            "num_images": inputs.num_images,
+            "sync_mode": inputs.sync_mode,
+            "enable_safety_checker": inputs.enable_safety_checker,
+        }
+        # Add optional parameters
+        if inputs.image_size is not None:
+            arguments["image_size"] = inputs.image_size
+        if inputs.seed is not None:
+            arguments["seed"] = inputs.seed
+        # Submit async job and get handler
+        handler = await fal_client.submit_async(
+            "fal-ai/bytedance/seedream/v4.5/edit",
+            arguments=arguments,
+        )
+        # Store the external job ID for tracking
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates (sample every 3rd event to avoid spam)
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Process every 3rd event to provide feedback without overwhelming
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract image URLs from result
+        # fal.ai returns: {
+        #   "images": [{"url": "...", "width": ..., "height": ..., ...}, ...]
+        # }
+        images = result.get("images", [])
+        if not images:
+            raise ValueError("No images returned from fal.ai API")
+        # Store each image using output_index
+        artifacts = []
+        for idx, image_data in enumerate(images):
+            image_url = image_data.get("url")
+            # Extract dimensions if available, otherwise use sensible defaults
+            width = image_data.get("width", 2048)
+            height = image_data.get("height", 2048)
+            if not image_url:
+                raise ValueError(f"Image {idx} missing URL in fal.ai response")
+            # Determine format from content_type or default to png
+            content_type = image_data.get("content_type", "image/png")
+            if "jpeg" in content_type or "jpg" in content_type:
+                format_type = "jpeg"
+            elif "webp" in content_type:
+                format_type = "webp"
+            else:
+                format_type = "png"
+            # Store with appropriate output_index
+            artifact = await context.store_image_result(
+                storage_url=image_url,
+                format=format_type,
+                width=width,
+                height=height,
+                output_index=idx,
+            )
+            artifacts.append(artifact)
+        return GeneratorResult(outputs=artifacts)
+    async def estimate_cost(self, inputs: BytedanceSeedreamV45EditInput) -> float:
+        """Estimate cost for ByteDance Seedream v4.5 Edit generation.
+        Pricing not disclosed in documentation, using conservative estimate
+        based on similar high-quality image editing models.
+        """
+        # Conservative estimate per image
+        per_image_cost = 0.05
+        return per_image_cost * inputs.num_images

package/templates/api/src/boards/generators/implementations/fal/image/gemini_25_flash_image_edit.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""
+Google Gemini 2.5 Flash Image edit image-to-image generator.
+Google's state-of-the-art image generation and editing model available through fal.ai.
+Performs image-to-image transformations and edits based on text prompts.
+Supports multiple aspect ratios and output formats with batch generation up to 4 images.
+Based on Fal AI's fal-ai/gemini-25-flash-image/edit model.
+See: https://fal.ai/models/fal-ai/gemini-25-flash-image/edit
+"""
+import os
+from typing import Literal
+from pydantic import BaseModel, Field
+from ....artifacts import ImageArtifact
+from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
+class Gemini25FlashImageEditInput(BaseModel):
+    """Input schema for Gemini 2.5 Flash Image edit generation.
+    Artifact fields (like image_sources) are automatically detected via type
+    introspection and resolved from generation IDs to ImageArtifact objects.
+    """
+    prompt: str = Field(
+        description="The editing instruction for image transformation",
+        min_length=3,
+        max_length=5000,
+    )
+    image_sources: list[ImageArtifact] = Field(
+        description="List of input images for editing (from previous generations)",
+        min_length=1,
+    )
+    num_images: int = Field(
+        default=1,
+        ge=1,
+        le=4,
+        description="Number of images to generate (max 4)",
+    )
+    aspect_ratio: (
+        Literal[
+            "auto",
+            "21:9",
+            "16:9",
+            "3:2",
+            "4:3",
+            "5:4",
+            "1:1",
+            "4:5",
+            "3:4",
+            "2:3",
+            "9:16",
+        ]
+        | None
+    ) = Field(
+        default="auto",
+        description="Image aspect ratio. Default 'auto' uses input image's aspect ratio.",
+    )
+    output_format: Literal["jpeg", "png", "webp"] = Field(
+        default="png",
+        description="Output image format",
+    )
+    sync_mode: bool = Field(
+        default=False,
+        description="Return media as data URI without request history storage",
+    )
+    limit_generations: bool = Field(
+        default=False,
+        description="Restrict to single generation per round (experimental)",
+    )
+class FalGemini25FlashImageEditGenerator(BaseGenerator):
+    """Google Gemini 2.5 Flash Image edit generator using fal.ai."""
+    name = "fal-gemini-25-flash-image-edit"
+    artifact_type = "image"
+    description = "Fal: Gemini 2.5 Flash Image Edit - AI-powered image editing with Gemini"
+    def get_input_schema(self) -> type[Gemini25FlashImageEditInput]:
+        return Gemini25FlashImageEditInput
+    async def generate(
+        self, inputs: Gemini25FlashImageEditInput, context: GeneratorExecutionContext
+    ) -> GeneratorResult:
+        """Edit images using Google Gemini 2.5 Flash Image via fal.ai."""
+        # Check for API key (fal-client uses FAL_KEY environment variable)
+        if not os.getenv("FAL_KEY"):
+            raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
+        # Import fal_client
+        try:
+            import fal_client
+        except ImportError as e:
+            raise ImportError(
+                "fal.ai SDK is required for FalGemini25FlashImageEditGenerator. "
+                "Install with: pip install weirdfingers-boards[generators-fal]"
+            ) from e
+        # Upload image artifacts to Fal's public storage
+        # Fal API requires publicly accessible URLs, but our storage_url might be:
+        # - Localhost URLs (not publicly accessible)
+        # - Private S3 buckets (not publicly accessible)
+        # So we upload to Fal's temporary storage first
+        from ..utils import upload_artifacts_to_fal
+        image_urls = await upload_artifacts_to_fal(inputs.image_sources, context)
+        # Prepare arguments for fal.ai API
+        arguments = {
+            "prompt": inputs.prompt,
+            "image_urls": image_urls,
+            "num_images": inputs.num_images,
+            "output_format": inputs.output_format,
+            "sync_mode": inputs.sync_mode,
+            "limit_generations": inputs.limit_generations,
+        }
+        # Add aspect_ratio if provided
+        if inputs.aspect_ratio is not None:
+            arguments["aspect_ratio"] = inputs.aspect_ratio
+        # Submit async job and get handler
+        handler = await fal_client.submit_async(
+            "fal-ai/gemini-25-flash-image/edit",
+            arguments=arguments,
+        )
+        # Store the external job ID for tracking
+        await context.set_external_job_id(handler.request_id)
+        # Stream progress updates (sample every 3rd event to avoid spam)
+        from .....progress.models import ProgressUpdate
+        event_count = 0
+        async for event in handler.iter_events(with_logs=True):
+            event_count += 1
+            # Process every 3rd event to provide feedback without overwhelming
+            if event_count % 3 == 0:
+                # Extract logs if available
+                logs = getattr(event, "logs", None)
+                if logs:
+                    # Join log entries into a single message
+                    if isinstance(logs, list):
+                        message = " | ".join(str(log) for log in logs if log)
+                    else:
+                        message = str(logs)
+                    if message:
+                        await context.publish_progress(
+                            ProgressUpdate(
+                                job_id=handler.request_id,
+                                status="processing",
+                                progress=50.0,  # Approximate mid-point progress
+                                phase="processing",
+                                message=message,
+                            )
+                        )
+        # Get final result
+        result = await handler.get()
+        # Extract image URLs and description from result
+        # fal.ai returns: {
+        #   "images": [{"url": "...", "width": ..., "height": ..., ...}, ...],
+        #   "description": "Text description from Gemini"
+        # }
+        images = result.get("images", [])
+        if not images:
+            raise ValueError("No images returned from fal.ai API")
+        # Store each image using output_index
+        artifacts = []
+        for idx, image_data in enumerate(images):
+            image_url = image_data.get("url")
+            # Use 'or' to handle explicit None values from API
+            width = image_data.get("width") or 1024
+            height = image_data.get("height") or 1024
+            if not image_url:
+                raise ValueError(f"Image {idx} missing URL in fal.ai response")
+            # Store with appropriate output_index
+            artifact = await context.store_image_result(
+                storage_url=image_url,
+                format=inputs.output_format,
+                width=width,
+                height=height,
+                output_index=idx,
+            )
+            artifacts.append(artifact)
+        return GeneratorResult(outputs=artifacts)
+    async def estimate_cost(self, inputs: Gemini25FlashImageEditInput) -> float:
+        """Estimate cost for Gemini 2.5 Flash Image edit generation.
+        Note: Pricing information not available in fal.ai documentation.
+        Using placeholder estimate similar to other Gemini-based models.
+        """
+        # Placeholder cost estimate per image (similar to nano-banana which also uses Gemini)
+        per_image_cost = 0.039
+        return per_image_cost * inputs.num_images