npm - @_vrsen/openswarm - Versions diffs - 0.1.0 - Mend

@_vrsen/openswarm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (316) hide show

package/docs_agent/tools/utils/html_validation.py ADDED Viewed

@@ -0,0 +1,102 @@
+from __future__ import annotations
+import re
+from typing import Iterable, List
+from bs4 import BeautifulSoup
+UNSUPPORTED_ISSUES_ORDER = [
+    "flex or grid layout (display: flex/grid)",
+    "positioning or floats (position/float)",
+    "pseudo-elements (::before/::after)",
+    "advanced selectors (#id, attribute selectors, sibling combinators, pseudo-classes)",
+    "unsupported visual effects (background-image, gradients, box-shadow, border-radius, transform)",
+    "unsupported units (em, rem, %, vh, vw)",
+]
+_ISSUE_TO_PATTERNS = {
+    "flex or grid layout (display: flex/grid)": [
+        re.compile(r"display\s*:\s*(flex|grid)\b", re.IGNORECASE),
+    ],
+    "positioning or floats (position/float)": [
+        re.compile(r"\bposition\s*:\s*(absolute|relative|fixed|sticky)\b", re.IGNORECASE),
+        re.compile(r"\bfloat\s*:\s*(left|right|inline-start|inline-end)\b", re.IGNORECASE),
+    ],
+    "pseudo-elements (::before/::after)": [
+        re.compile(r"::before\b", re.IGNORECASE),
+        re.compile(r"::after\b", re.IGNORECASE),
+    ],
+    "unsupported visual effects (background-image, gradients, box-shadow, border-radius, transform)": [
+        re.compile(r"\bbackground-image\s*:", re.IGNORECASE),
+        re.compile(r"\bbox-shadow\s*:", re.IGNORECASE),
+        re.compile(r"\bborder-radius\s*:", re.IGNORECASE),
+        re.compile(r"\btransform\s*:", re.IGNORECASE),
+        re.compile(r"gradient\s*\(", re.IGNORECASE),
+    ],
+    "unsupported units (em, rem, %, vh, vw)": [
+        re.compile(r"(-?\d*\.?\d+)\s*(em|rem|%|vh|vw)\b", re.IGNORECASE),
+    ],
+}
+def find_unsupported_html(html_content: str) -> List[str]:
+    issues = set()
+    soup = BeautifulSoup(html_content, "html.parser")
+    for style_tag in soup.find_all("style"):
+        css_text = style_tag.get_text() or ""
+        _scan_css_text(css_text, issues)
+        _scan_css_selectors(css_text, issues)
+    for tag in soup.find_all(True):
+        inline_style = tag.get("style", "")
+        if inline_style:
+            _scan_css_text(inline_style, issues)
+    return [issue for issue in UNSUPPORTED_ISSUES_ORDER if issue in issues]
+def build_unsupported_error(issues: Iterable[str]) -> str:
+    details = "\n".join(f"- {issue}" for issue in issues)
+    return f"Error: Unsupported HTML/CSS detected:\n{details}"
+def _scan_css_text(css_text: str, issues: set) -> None:
+    for issue, patterns in _ISSUE_TO_PATTERNS.items():
+        if issue in issues:
+            continue
+        if any(pattern.search(css_text) for pattern in patterns):
+            issues.add(issue)
+def _scan_css_selectors(css_text: str, issues: set) -> None:
+    if (
+        "advanced selectors (#id, attribute selectors, sibling combinators, pseudo-classes)"
+        in issues
+    ):
+        return
+    for selectors in _iter_selectors(css_text):
+        for selector in selectors:
+            if _selector_has_unsupported(selector):
+                issues.add(
+                    "advanced selectors (#id, attribute selectors, sibling combinators, pseudo-classes)"
+                )
+                return
+def _iter_selectors(css_text: str) -> Iterable[List[str]]:
+    for match in re.finditer(r"([^{]+)\{[^}]*\}", css_text, re.DOTALL):
+        selector_text = match.group(1)
+        selectors = [s.strip() for s in selector_text.split(",") if s.strip()]
+        if selectors:
+            yield selectors
+def _selector_has_unsupported(selector: str) -> bool:
+    if any(token in selector for token in ["#", "[", "]", "+", "~"]):
+        return True
+    if ":" in selector:
+        return True
+    return False

package/helpers.py ADDED Viewed

@@ -0,0 +1,59 @@
+import os
+from composio import Composio
+from composio_openai_agents import OpenAIAgentsProvider
+from dotenv import load_dotenv
+load_dotenv()
+_composio_clients: dict[str, Composio] = {}
+def get_composio_user_id() -> str | None:
+    for key in ("COMPOSIO_USER_ID", "USER_ID"):
+        value = os.getenv(key)
+        if value:
+            return str(value)
+    return None
+def get_composio_client() -> Composio | None:
+    api_key = os.getenv("COMPOSIO_API_KEY")
+    if not api_key:
+        return None
+    if api_key in _composio_clients:
+        return _composio_clients[api_key]
+    client = Composio(provider=OpenAIAgentsProvider())
+    _composio_clients[api_key] = client
+    return client
+def execute_composio_tool(tool_name: str, arguments: dict):
+    composio = get_composio_client()
+    user_id = get_composio_user_id()
+    if not composio:
+        return {"error": "COMPOSIO_API_KEY is not set."}
+    if not user_id:
+        return {"error": "COMPOSIO_USER_ID is not set."}
+    return composio.tools.execute(
+        tool_name,
+        user_id=user_id,
+        arguments=arguments,
+        dangerously_skip_version_check=True,
+    )
+def get_composio_tools(**kwargs):
+    composio = get_composio_client()
+    user_id = get_composio_user_id()
+    if not composio:
+        return {"error": "COMPOSIO_API_KEY is not set."}
+    if not user_id:
+        return {"error": "COMPOSIO_USER_ID is not set."}
+    return composio.tools.get(user_id, **kwargs)
+user_id = get_composio_user_id()
+composio = get_composio_client()

package/image_generation_agent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .image_generation_agent import create_image_generation_agent

package/image_generation_agent/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file

package/image_generation_agent/__pycache__/image_generation_agent.cpython-312.pyc ADDED Viewed

Binary file

package/image_generation_agent/image_generation_agent.py ADDED Viewed

@@ -0,0 +1,31 @@
+from agency_swarm import Agent, ModelSettings
+from agency_swarm.tools import LoadFileAttachment
+from openai.types.shared.reasoning import Reasoning
+from config import get_default_model, is_openai_provider
+def create_image_generation_agent() -> Agent:
+    return Agent(
+        name="Image Agent",
+        description="A specialized agent for image generation, editing, and composition.",
+        instructions="instructions.md",
+        tools_folder="./tools",
+        tools=[LoadFileAttachment],
+        model=get_default_model(),
+        model_settings=ModelSettings(
+            reasoning=Reasoning(summary="auto", effort="medium") if is_openai_provider() else None,
+            truncation="auto",
+        ),
+        conversation_starters=[
+            "Generate a clean product hero image for my landing page.",
+            "Edit this uploaded photo to match a cinematic style.",
+            "Create two variants: one with Gemini and one with OpenAI image model.",
+            "Combine these images into a polished ad creative.",
+        ],
+    )
+if __name__ == "__main__":
+    from agency_swarm import Agency
+    Agency(create_image_generation_agent()).terminal_demo()

package/image_generation_agent/instructions.md ADDED Viewed

@@ -0,0 +1,80 @@
+# Role
+You are an Image Generation Specialist focused on producing high-quality images and edits.
+# Goals
+- Generate images that match user intent with strong visual quality.
+- Choose the best model for each request and explain that choice briefly.
+- Use reference images when consistency or precise composition is required.
+- Deliver outputs with clear delivery confirmations and visual previews.
+# Process
+## 1) Analyze Requirements
+1. Identify whether the task is generation, editing, or composition.
+2. Identify style, aspect ratio, realism level, and any mandatory elements.
+3. Determine if reference images are required for consistency.
+## 2) Select a Model
+1. **Prefer `gemini-2.5-flash-image` by default** for most generation and editing tasks. It is the fastest high-quality option for iterative workflows and rapid variants.
+2. **Use `gemini-3-pro-image-preview` for precision-first outputs** where detail quality matters more than speed:
+   - Text-heavy images (headlines, labels, typography)
+   - Complex product compositions with multiple visual constraints
+   - High-fidelity brand assets where prompt adherence is critical
+   - Large, highly detailed prompts with many constraints or style directives
+   - Complex and precise image editing tasks that require strict instruction following
+3. **Use `gpt-image-1.5` when OpenAI is explicitly requested** or when the user asks for model comparison against Gemini outputs.
+4. **Model-specific aspect-ratio awareness**:
+   - Gemini models support a broader AR set in these tools.
+   - `gpt-image-1.5` in this agent supports `1:1`, `2:3`, and `3:2`.
+   - If a requested AR is unsupported for the chosen model, switch to a compatible model and explain why.
+5. Use a single model by default unless the user explicitly asks for multi-model output.
+## 3) Execute with Tools
+1. Use `GenerateImages` for text-to-image generation.
+2. Use `EditImages` for reference-driven edits.
+3. Use `CombineImages` when compositing multiple image references into one output. Should be used whenever user wants to put elements from one image into another image. For example, when user wants to put company logo from one image onto a product in another image.
+4. Use `RemoveBackground` to strip the background from an image and produce a transparent PNG. Use this whenever the user asks to remove, cut out, or isolate the subject from its background.
+5. If user uploaded files are provided, use those file references directly.
+6. Include the file path in your response for every final user-facing output image/file.
+## 4) Validate and Deliver
+1. Perform a mandatory QC pass after every generation/edit:
+   - Compare result against user requirements for composition, scale, lighting, artifacts, and missing elements.
+   - Record issues explicitly as pass/fail checks.
+   - Analyze the photo as if user asks you "What's wrong with this image?"
+2. If any issue is found, perform one automatic correction pass before final delivery:
+   - Use the same model for small fixes.
+   - Upgrade to `gemini-3-pro-image-preview` for precision/composition/complex-editing issues.
+3. After auto-fix, run QC again and report final status.
+4. If issues still remain, explicitly state that they remain and propose exactly one next change.
+## 5) Final File Delivery
+1. Include the file path in your response for every final user-facing output image/file.
+2. Deliver only after QC is complete.
+3. If multiple final variants are requested, list all paths together.
+4. Do not include paths for intermediate test renders unless the user explicitly asks for them.
+# Output Format
+- Keep responses concise and action-oriented.
+- Include:
+  - Model used (and upgrade reason if model changed)
+  - What was generated/edited
+  - Absolute output path(s) for each delivered file.
+  - A 2-5 bullet QC checklist with Pass/Fail status and what changed in auto-fix
+  - One optional improvement suggestion (only if fully passing result is not yet achieved)
+# Additional Notes
+- Do not sanitize or weaken user intent; pass requirements faithfully to generation tools.
+- Avoid unnecessary parallel generation unless user asks for multiple variants or comparisons.
+- Prefer continuity through references for character/product consistency across outputs.
+- If quality is insufficient with `gemini-2.5-flash-image`, retry with `gemini-3-pro-image-preview` before proposing a major prompt rewrite.
+- Never skip QC reporting, even if the result looks good at first glance.

package/image_generation_agent/tools/CombineImages.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""Combine multiple image references into a single generated composition."""
+from io import BytesIO
+from typing import Literal
+import os
+from openai import OpenAI
+from PIL import Image
+from pydantic import Field, field_validator, model_validator
+from agency_swarm import BaseTool
+from .utils.image_io import (
+    get_images_dir,
+    build_variant_output_name,
+    resolve_image_reference,
+    save_image,
+    image_to_base64_jpeg,
+    build_multimodal_outputs,
+    extract_gemini_image_and_usage,
+    extract_openai_images_and_usage,
+    run_parallel_variants_sync,
+    validate_aspect_ratio_for_model,
+    get_openai_size_for_aspect_ratio,
+)
+class CombineImages(BaseTool):
+    """
+    Combine multiple images into a single generated composition
+    using the selected model and instruction.
+    """
+    product_name: str = Field(..., description="Product namespace for output files.")
+    image_refs: list[str] = Field(
+        ...,
+        description="List of image references (URLs, absolute paths, or generated image names).",
+    )
+    text_instruction: str = Field(..., description="Instruction for how images should be combined.")
+    output_file_name: str = Field(
+        ...,
+        description=(
+            "Output image name (without extension) or output path. "
+            "If a path is provided, the image is saved at that path."
+        ),
+    )
+    model: Literal["gemini-2.5-flash-image", "gemini-3-pro-image-preview", "gpt-image-1.5"] = Field(
+        default="gemini-2.5-flash-image",
+        description="Image model to use for composition.",
+    )
+    aspect_ratio: Literal["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"] = Field(
+        default="1:1",
+        description="Target aspect ratio. Model compatibility is validated automatically.",
+    )
+    num_variants: int = Field(default=1, description="Number of variants to generate (1-4).")
+    @field_validator("product_name", "text_instruction", "output_file_name")
+    @classmethod
+    def _not_blank(cls, value: str) -> str:
+        if not value.strip():
+            raise ValueError("value must not be empty")
+        return value
+    @field_validator("image_refs")
+    @classmethod
+    def _validate_refs(cls, value: list[str]) -> list[str]:
+        if len(value) < 2:
+            raise ValueError("image_refs must include at least two images")
+        for item in value:
+            if not item.strip():
+                raise ValueError("image reference must not be empty")
+        return value
+    @field_validator("num_variants")
+    @classmethod
+    def _validate_variants(cls, value: int) -> int:
+        if value < 1 or value > 4:
+            raise ValueError("num_variants must be between 1 and 4")
+        return value
+    @model_validator(mode="after")
+    def _validate_model_aspect_ratio(self) -> "CombineImages":
+        validate_aspect_ratio_for_model(self.model, self.aspect_ratio)
+        return self
+    def run(self) -> list:
+        images_dir = get_images_dir(self.product_name)
+        reference_images = [resolve_image_reference(self.product_name, ref)[0] for ref in self.image_refs]
+        if self.model.startswith("gemini-"):
+            results, usage_metadata = self._run_gemini(images_dir, reference_images)
+            return build_multimodal_outputs(results, "Image composition complete")
+        results, usage_metadata = self._run_openai(images_dir, reference_images)
+        return build_multimodal_outputs(results, "Image composition complete")
+    def _run_gemini(self, images_dir, reference_images: list[Image.Image]):
+        from google import genai
+        from google.genai.types import GenerateContentConfig, ImageConfig
+        api_key = os.getenv("GOOGLE_API_KEY")
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY is not set. Add it to your .env to use image composition.")
+        client = genai.Client(api_key=api_key)
+        results: list[dict] = []
+        total_prompt_tokens = 0.0
+        total_candidate_tokens = 0.0
+        def compose_single_variant(idx: int):
+            response = client.models.generate_content(
+                model=self.model,
+                contents=[*reference_images, self.text_instruction],
+                config=GenerateContentConfig(
+                    image_config=ImageConfig(aspect_ratio=self.aspect_ratio),
+                ),
+            )
+            image, usage = extract_gemini_image_and_usage(response)
+            if image is None:
+                return None
+            variant_name = build_variant_output_name(self.output_file_name, idx, self.num_variants)
+            image_name, file_path = save_image(image, variant_name, images_dir)
+            return {
+                "image_name": image_name,
+                "file_path": file_path,
+                "preview_b64": image_to_base64_jpeg(image),
+                "prompt_tokens": float(usage.get("prompt_token_count") or 0),
+                "candidate_tokens": float(usage.get("candidates_token_count") or 0),
+            }
+        raw_results = run_parallel_variants_sync(compose_single_variant, self.num_variants)
+        if not raw_results:
+            raise RuntimeError("Gemini did not return any composed images.")
+        for item in raw_results:
+            total_prompt_tokens += item.pop("prompt_tokens")
+            total_candidate_tokens += item.pop("candidate_tokens")
+            results.append(item)
+        usage_metadata = {
+            "prompt_token_count": total_prompt_tokens,
+            "candidates_token_count": total_candidate_tokens,
+        }
+        return results, usage_metadata
+    def _run_openai(self, images_dir, reference_images: list[Image.Image]):
+        api_key = os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise RuntimeError("OPENAI_API_KEY is required for OpenAI image composition.")
+        size = get_openai_size_for_aspect_ratio(self.aspect_ratio)
+        input_buffers: list[BytesIO] = []
+        for idx, image in enumerate(reference_images, start=1):
+            buffer = BytesIO()
+            image.save(buffer, format="PNG")
+            buffer.seek(0)
+            buffer.name = f"reference_{idx}.png"
+            input_buffers.append(buffer)
+        try:
+            client = OpenAI(api_key=api_key)
+            response = client.images.edit(
+                model=self.model,
+                image=input_buffers,
+                prompt=self.text_instruction,
+                size=size,
+                n=self.num_variants,
+            )
+        finally:
+            for buffer in input_buffers:
+                buffer.close()
+        images, usage_metadata = extract_openai_images_and_usage(response)
+        if not images:
+            raise RuntimeError("OpenAI image API did not return composed images.")
+        results: list[dict] = []
+        for idx, image in enumerate(images, start=1):
+            variant_name = build_variant_output_name(self.output_file_name, idx, len(images))
+            image_name, file_path = save_image(image, variant_name, images_dir)
+            results.append(
+                {
+                    "image_name": image_name,
+                    "file_path": file_path,
+                    "preview_b64": image_to_base64_jpeg(image),
+                }
+            )
+        return results, usage_metadata
+if __name__ == "__main__":
+    # Example test scenario
+    tool = CombineImages(
+        product_name="Test_Product",
+        image_refs=["hero_image_example_oai", "edited_image_example"],
+        text_instruction=(
+            "Apply logo on a product. Keep the original product image as is."
+        ),
+        output_file_name="combined_example",
+        model="gpt-image-1.5",
+        aspect_ratio="1:1",
+        num_variants=1,
+    )
+    try:
+        result = tool.run()
+        print(result)
+    except Exception as exc:
+        print(f"Image composition failed: {exc}")