PyPI - cudag - Versions diffs - 0.3.10__py3-none-any.whl - Mend

cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

cudag/__init__.py +334 -0
cudag/annotation/__init__.py +77 -0
cudag/annotation/codegen.py +648 -0
cudag/annotation/config.py +545 -0
cudag/annotation/loader.py +342 -0
cudag/annotation/scaffold.py +121 -0
cudag/annotation/transcription.py +296 -0
cudag/cli/__init__.py +5 -0
cudag/cli/main.py +315 -0
cudag/cli/new.py +873 -0
cudag/core/__init__.py +364 -0
cudag/core/button.py +137 -0
cudag/core/canvas.py +222 -0
cudag/core/config.py +70 -0
cudag/core/coords.py +233 -0
cudag/core/data_grid.py +804 -0
cudag/core/dataset.py +678 -0
cudag/core/distribution.py +136 -0
cudag/core/drawing.py +75 -0
cudag/core/fonts.py +156 -0
cudag/core/generator.py +163 -0
cudag/core/grid.py +367 -0
cudag/core/grounding_task.py +247 -0
cudag/core/icon.py +207 -0
cudag/core/iconlist_task.py +301 -0
cudag/core/models.py +1251 -0
cudag/core/random.py +130 -0
cudag/core/renderer.py +190 -0
cudag/core/screen.py +402 -0
cudag/core/scroll_task.py +254 -0
cudag/core/scrollable_grid.py +447 -0
cudag/core/state.py +110 -0
cudag/core/task.py +293 -0
cudag/core/taskbar.py +350 -0
cudag/core/text.py +212 -0
cudag/core/utils.py +82 -0
cudag/data/surnames.txt +5000 -0
cudag/modal_apps/__init__.py +4 -0
cudag/modal_apps/archive.py +103 -0
cudag/modal_apps/extract.py +138 -0
cudag/modal_apps/preprocess.py +529 -0
cudag/modal_apps/upload.py +317 -0
cudag/prompts/SYSTEM_PROMPT.txt +104 -0
cudag/prompts/__init__.py +33 -0
cudag/prompts/system.py +43 -0
cudag/prompts/tools.py +382 -0
cudag/py.typed +0 -0
cudag/schemas/filesystem.json +90 -0
cudag/schemas/test_record.schema.json +113 -0
cudag/schemas/train_record.schema.json +90 -0
cudag/server/__init__.py +21 -0
cudag/server/app.py +232 -0
cudag/server/services/__init__.py +9 -0
cudag/server/services/generator.py +128 -0
cudag/templates/scripts/archive.sh +35 -0
cudag/templates/scripts/build.sh +13 -0
cudag/templates/scripts/extract.sh +54 -0
cudag/templates/scripts/generate.sh +116 -0
cudag/templates/scripts/pre-commit.sh +44 -0
cudag/templates/scripts/preprocess.sh +46 -0
cudag/templates/scripts/upload.sh +63 -0
cudag/templates/scripts/verify.py +428 -0
cudag/validation/__init__.py +35 -0
cudag/validation/validate.py +508 -0
cudag-0.3.10.dist-info/METADATA +570 -0
cudag-0.3.10.dist-info/RECORD +69 -0
cudag-0.3.10.dist-info/WHEEL +4 -0
cudag-0.3.10.dist-info/entry_points.txt +2 -0
cudag-0.3.10.dist-info/licenses/LICENSE +66 -0

cudag/core/task.py ADDED Viewed

@@ -0,0 +1,293 @@
+# Copyright (c) 2025 Tylt LLC. All rights reserved.
+# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
+# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
+"""Base task class and data structures for VLM training samples.
+Tasks are the "Controller" in VLMGen's Screen/State/Renderer/Task architecture.
+Each task type (click-day, scroll-grid, etc.) defines:
+- How to generate prompts
+- What tool calls are expected
+- How to create state for rendering
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+from random import Random
+from typing import TYPE_CHECKING, Any
+from cudag.prompts.tools import ToolCall, format_tool_call
+if TYPE_CHECKING:
+    from cudag.core.dataset import DatasetConfig
+    from cudag.core.renderer import BaseRenderer
+@dataclass
+class TaskSample:
+    """Output of a task generation.
+    This represents a single training sample in the dataset.
+    """
+    id: str
+    """Unique identifier for this sample."""
+    image_path: Path
+    """Path to the generated image file."""
+    human_prompt: str
+    """The human instruction (without <image> prefix)."""
+    tool_call: ToolCall
+    """The expected tool call response."""
+    pixel_coords: tuple[int, int]
+    """Pixel coordinates of the target (for real_coords in metadata)."""
+    metadata: dict[str, Any] = field(default_factory=dict)
+    """Task-specific metadata (task_type is added automatically)."""
+    image_size: tuple[int, int] = (1000, 1000)
+    """Size of the generated image (width, height)."""
+@dataclass
+class TestCase:
+    """Output of test case generation.
+    This represents a single test case for evaluating model accuracy.
+    """
+    test_id: str
+    """Unique identifier for this test case."""
+    screenshot: Path
+    """Path to the test screenshot."""
+    prompt: str
+    """The human instruction (without <image> prefix)."""
+    expected_action: dict[str, Any]
+    """Expected tool call as dict (for JSON serialization)."""
+    tolerance: tuple[int, int] | int
+    """Allowed coordinate tolerance (x, y) in RU units."""
+    metadata: dict[str, Any] = field(default_factory=dict)
+    """Test-specific metadata."""
+    pixel_coords: tuple[int, int] | None = None
+    """Original pixel coordinates (before normalization)."""
+@dataclass
+class TaskContext:
+    """Context passed to task generation methods.
+    Provides access to shared resources like RNG, output directories, and config.
+    """
+    rng: Random
+    """Seeded random number generator for reproducibility."""
+    index: int
+    """Current sample index (for ID generation)."""
+    output_dir: Path
+    """Directory for generated images."""
+    config: dict[str, Any]
+    """Task-specific configuration."""
+    dataset_name: str
+    """Name prefix for generated IDs."""
+class BaseTask(ABC):
+    """Abstract base class for task types.
+    Subclass this to create new task types. Each task type defines:
+    - task_type: Unique identifier (e.g., "click-day", "scroll-grid")
+    - generate_samples(): How to generate training samples from one image (1:N)
+    - generate_tests(): How to generate test cases from one image (1:N)
+    The key insight is that one rendered image can produce MULTIPLE training
+    samples. For example, a claim window image can have:
+    - "Click the procedure code" → one coordinate
+    - "Click the fee column" → different coordinate
+    - "Scroll down in the grid" → scroll action
+    Example:
+        class ClaimWindowTask(BaseTask):
+            task_type = "claim-window"
+            def generate_samples(self, ctx: TaskContext) -> list[TaskSample]:
+                # 1. Generate state and render ONCE
+                state = ClaimWindowState.generate(ctx.rng)
+                image, metadata = self.renderer.render(state)
+                image_path = self.save_image(image, ctx)
+                # 2. Derive MULTIPLE samples from this one image
+                samples = []
+                # Sample 1: Click procedure code
+                samples.append(TaskSample(
+                    id=self.build_id(ctx, "_click_code"),
+                    image_path=image_path,
+                    human_prompt="Click the procedure code",
+                    tool_call=ToolCall.left_click(code_coords),
+                    pixel_coords=code_coords,
+                    ...
+                ))
+                # Sample 2: Click fee
+                samples.append(TaskSample(
+                    id=self.build_id(ctx, "_click_fee"),
+                    image_path=image_path,  # SAME IMAGE
+                    human_prompt="Click the fee column",
+                    tool_call=ToolCall.left_click(fee_coords),
+                    pixel_coords=fee_coords,
+                    ...
+                ))
+                return samples
+    """
+    task_type: str
+    """Unique identifier for this task type (e.g., 'click-day', 'scroll-grid')."""
+    def __init__(
+        self, config: DatasetConfig | dict[str, Any], renderer: BaseRenderer[Any]
+    ) -> None:
+        """Initialize the task.
+        Args:
+            config: Task-specific configuration from generator.yaml (DatasetConfig or dict)
+            renderer: Renderer instance for generating images
+        """
+        self.config = config
+        self.renderer = renderer
+    def generate_samples(self, ctx: TaskContext) -> list[TaskSample]:
+        """Generate training samples from one rendered image.
+        Override this to generate multiple samples from a single render.
+        Default implementation calls generate_sample() once for backwards compat.
+        Args:
+            ctx: Task context with RNG, index, output directory, etc.
+        Returns:
+            List of TaskSample objects (can share the same image_path).
+        """
+        return [self.generate_sample(ctx)]
+    @abstractmethod
+    def generate_sample(self, ctx: TaskContext) -> TaskSample:
+        """Generate one training sample.
+        For simple 1:1 image-to-sample tasks, implement this.
+        For 1:N image-to-samples, override generate_samples() instead.
+        Args:
+            ctx: Task context with RNG, index, output directory, etc.
+        Returns:
+            TaskSample with all required fields populated.
+        """
+        pass
+    def generate_tests(self, ctx: TaskContext) -> list[TestCase]:
+        """Generate test cases from one rendered image.
+        Override this to generate multiple tests from a single render.
+        Default implementation calls generate_test() once for backwards compat.
+        Args:
+            ctx: Task context with RNG, index, output directory, etc.
+        Returns:
+            List of TestCase objects (can share the same screenshot).
+        """
+        return [self.generate_test(ctx)]
+    @abstractmethod
+    def generate_test(self, ctx: TaskContext) -> TestCase:
+        """Generate one test case.
+        For simple 1:1 image-to-test tasks, implement this.
+        For 1:N image-to-tests, override generate_tests() instead.
+        Args:
+            ctx: Task context with RNG, index, output directory, etc.
+        Returns:
+            TestCase with all required fields populated.
+        """
+        pass
+    def format_gpt_response(self, tool_call: ToolCall) -> str:
+        """Format the GPT response for this sample.
+        Override this to customize the response format (e.g., add <think> tags).
+        Args:
+            tool_call: The tool call to format
+        Returns:
+            Formatted string for the "gpt" conversation turn
+        """
+        return format_tool_call(tool_call)
+    def save_image(
+        self,
+        image: Any,  # PIL.Image.Image
+        ctx: TaskContext,
+        extension: str = "jpg",
+        quality: int = 85,
+        prefix: str | None = None,
+    ) -> Path:
+        """Save a generated image to the output directory.
+        Args:
+            image: PIL Image to save
+            ctx: Task context
+            extension: Image format (default: jpg)
+            quality: JPEG quality (ignored for PNG)
+            prefix: Optional prefix for filename (e.g., "eval" for eval images)
+        Returns:
+            Path to saved image
+        """
+        images_dir = ctx.output_dir / "images"
+        images_dir.mkdir(parents=True, exist_ok=True)
+        if prefix:
+            filename = f"{prefix}_{ctx.index:05d}.{extension}"
+        else:
+            filename = f"{ctx.dataset_name}_{ctx.index:05d}.{extension}"
+        path = images_dir / filename
+        if extension.lower() in ("jpg", "jpeg"):
+            image.save(path, quality=quality)
+        else:
+            image.save(path)
+        return path
+    def build_id(self, ctx: TaskContext, suffix: str = "") -> str:
+        """Build a sample ID from context.
+        Args:
+            ctx: Task context
+            suffix: Optional suffix to add (e.g., "_task")
+        Returns:
+            Formatted ID string
+        """
+        base = f"{ctx.dataset_name}_{ctx.index:05d}"
+        return f"{base}{suffix}" if suffix else base

cudag/core/taskbar.py ADDED Viewed

@@ -0,0 +1,350 @@
+# Copyright (c) 2025 Tylt LLC. All rights reserved.
+# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
+# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
+"""Taskbar primitive for Windows-style taskbar rendering.
+Provides reusable TaskbarState and TaskbarRenderer for any generator
+that needs a taskbar at the bottom of the screen.
+The taskbar includes:
+- Icons (configurable position, varying N, random order)
+- DateTime display (time + date in Windows format)
+Example:
+    from cudag.core import TaskbarState, TaskbarRenderer
+    # Generate random taskbar state
+    state = TaskbarState.generate(
+        rng=rng,
+        icon_config=annotation_config.get_element_by_label("taskbar"),
+    )
+    # Render onto existing image
+    renderer = TaskbarRenderer(assets_dir="assets")
+    metadata = renderer.render_onto(image, state)
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import date
+from pathlib import Path
+from random import Random
+from typing import Any
+from PIL import Image, ImageDraw, ImageFont
+from cudag.core.icon import IconPlacement, IconSpec, TASKBAR_ICON
+@dataclass
+class TaskbarState:
+    """State for a Windows-style taskbar.
+    Contains icon placements and datetime text for rendering.
+    """
+    icons: list[IconPlacement] = field(default_factory=list)
+    """Icons placed on the taskbar."""
+    datetime_text: str = ""
+    """DateTime string (e.g., '1:30 PM\\n12/15/2025')."""
+    datetime_position: tuple[int, int] = (0, 0)
+    """Position for datetime text (x, y)."""
+    @classmethod
+    def generate(
+        cls,
+        rng: Random,
+        icon_config: Any | None = None,
+        datetime_position: tuple[int, int] = (1868, 1043),
+        icon_spec: IconSpec | None = None,
+        taskbar_left_margin: int = 946,
+        taskbar_y_offset: int = 1042,
+        icon_gap: int = 8,
+        target_date: date | None = None,
+    ) -> "TaskbarState":
+        """Generate random taskbar state.
+        Args:
+            rng: Random number generator
+            icon_config: AnnotatedElement with icons, varyN, randomOrder settings
+            datetime_position: Position for datetime text
+            icon_spec: Icon specification (defaults to TASKBAR_ICON)
+            taskbar_left_margin: X position where icons start
+            taskbar_y_offset: Y position for icons
+            icon_gap: Gap between icons
+            target_date: Specific date to display (for consistent calendar/taskbar)
+        Returns:
+            TaskbarState with randomized icons and datetime
+        """
+        state = cls()
+        state.datetime_text = cls._generate_datetime(rng, target_date)
+        state.datetime_position = datetime_position
+        if icon_config is not None:
+            state.icons = cls._place_icons(
+                rng,
+                icon_config,
+                icon_spec or TASKBAR_ICON,
+                taskbar_left_margin,
+                taskbar_y_offset,
+                icon_gap,
+            )
+        return state
+    @classmethod
+    def _generate_datetime(cls, rng: Random, target_date: date | None = None) -> str:
+        """Generate datetime string in Windows 11 format.
+        Args:
+            rng: Random number generator for time component
+            target_date: Specific date to use (or None for random)
+        Returns:
+            Formatted datetime string like "1:30 PM\\n12/15/2025"
+        """
+        # Time is always random
+        hour = rng.randint(1, 12)
+        minute = rng.randint(0, 59)
+        am_pm = rng.choice(["AM", "PM"])
+        # Date from target_date or random
+        if target_date is not None:
+            month = target_date.month
+            day = target_date.day
+            year = target_date.year
+        else:
+            month = rng.randint(1, 12)
+            day = rng.randint(1, 28)
+            year = rng.randint(2024, 2025)
+        return f"{hour}:{minute:02d} {am_pm}\n{month}/{day}/{year}"
+    @classmethod
+    def _place_icons(
+        cls,
+        rng: Random,
+        icon_config: Any,
+        icon_spec: IconSpec,
+        left_margin: int,
+        y_offset: int,
+        gap: int,
+    ) -> list[IconPlacement]:
+        """Place icons based on annotation config settings."""
+        placements: list[IconPlacement] = []
+        # Get settings from annotation config
+        vary_n = getattr(icon_config, "vary_n", False)
+        random_order = getattr(icon_config, "random_order", False)
+        icons = getattr(icon_config, "icons", [])
+        if not icons:
+            return placements
+        # Build icon list based on varyN setting
+        required = [i for i in icons if getattr(i, "required", False)]
+        optional = [i for i in icons if not getattr(i, "required", False)]
+        if vary_n and optional:
+            min_optional = max(1, int(len(optional) * 0.4))
+            max_optional = len(optional)
+            k = rng.randint(min_optional, max_optional)
+            selected_optional = rng.sample(optional, k)
+            selected = required + selected_optional
+        else:
+            selected = required + optional
+        # Shuffle if randomOrder is enabled
+        if random_order:
+            rng.shuffle(selected)
+        # Place icons left to right
+        x = left_margin
+        for icon_data in selected:
+            icon_id = getattr(icon_data, "icon_file_id", "") or getattr(
+                icon_data, "label", ""
+            )
+            label = getattr(icon_data, "label", "")
+            placements.append(
+                IconPlacement(
+                    icon_id=icon_id,
+                    x=x,
+                    y=y_offset,
+                    spec=icon_spec,
+                    label=label,
+                )
+            )
+            x += icon_spec.width + gap
+        return placements
+    def get_icon_by_id(self, icon_id: str) -> IconPlacement | None:
+        """Find icon by ID."""
+        for icon in self.icons:
+            if icon.icon_id == icon_id:
+                return icon
+        return None
+    def to_ground_truth(self) -> dict[str, Any]:
+        """Export state as ground truth dict."""
+        return {
+            "icons": [
+                {
+                    "id": icon.icon_id,
+                    "label": icon.label,
+                    "bounds": icon.bounds,
+                    "center": icon.center,
+                }
+                for icon in self.icons
+            ],
+            "datetime": {
+                "text": self.datetime_text,
+                "position": self.datetime_position,
+            },
+        }
+class TaskbarRenderer:
+    """Renderer for Windows-style taskbar.
+    Composites taskbar icons and datetime onto existing images.
+    Designed to be used as a mixin or called directly.
+    """
+    def __init__(
+        self,
+        assets_dir: Path | str = "assets",
+        datetime_font_size: int = 9,
+    ):
+        """Initialize the taskbar renderer.
+        Args:
+            assets_dir: Path to assets directory containing icons/taskbar/
+            datetime_font_size: Font size for datetime text
+        """
+        self.assets_dir = Path(assets_dir)
+        self.datetime_font_size = datetime_font_size
+        self._icon_cache: dict[str, Image.Image] = {}
+        self._datetime_font: ImageFont.FreeTypeFont | ImageFont.ImageFont | None = None
+        self._loaded = False
+    def load_assets(self) -> None:
+        """Load fonts and icon images."""
+        if self._loaded:
+            return
+        # Load datetime font
+        font_path = self.assets_dir / "fonts" / "segoeui.ttf"
+        if font_path.exists():
+            self._datetime_font = ImageFont.truetype(
+                str(font_path), self.datetime_font_size
+            )
+        else:
+            self._datetime_font = ImageFont.load_default()
+        # Load taskbar icons
+        icons_dir = self.assets_dir / "icons" / "taskbar"
+        if icons_dir.exists():
+            for icon_path in icons_dir.glob("*.png"):
+                icon_id = self._extract_icon_id(icon_path.stem)
+                self._icon_cache[icon_id] = Image.open(icon_path).convert("RGBA")
+        self._loaded = True
+    def _extract_icon_id(self, filename: str) -> str:
+        """Extract icon ID from filename.
+        Examples:
+            icon-tb-od -> od
+            icon-od-clean -> od
+            taskbar_m365 -> m365
+            taskbar_open-dental -> open-dental
+        """
+        name = filename.lower()
+        for prefix in ("icon-", "icon_", "tb-", "tb_", "taskbar_", "taskbar-"):
+            if name.startswith(prefix):
+                name = name[len(prefix) :]
+        for suffix in ("-clean", "_clean"):
+            if name.endswith(suffix):
+                name = name[: -len(suffix)]
+        return name
+    def render_onto(
+        self,
+        image: Image.Image,
+        state: TaskbarState,
+    ) -> dict[str, Any]:
+        """Render taskbar onto an existing image.
+        Args:
+            image: PIL Image to render onto (modified in place)
+            state: TaskbarState with icons and datetime
+        Returns:
+            Metadata dict with icon positions and datetime info
+        """
+        self.load_assets()
+        draw = ImageDraw.Draw(image)
+        # Draw icons (paste with alpha compositing)
+        for icon in state.icons:
+            self._draw_icon(image, icon)
+        # Draw datetime
+        self._draw_datetime(draw, state)
+        return state.to_ground_truth()
+    def _draw_icon(self, image: Image.Image, icon: IconPlacement) -> None:
+        """Draw a single taskbar icon."""
+        icon_id = icon.icon_id.lower()
+        # Try to find icon in cache
+        icon_img = self._icon_cache.get(icon_id)
+        if icon_img is None:
+            # Try aliases
+            aliases = {
+                "open_dental": "od",
+                "open-dental": "od",
+                "file_explorer": "explorer",
+                "microsoft_edge": "edge",
+            }
+            aliased_id = aliases.get(icon_id)
+            if aliased_id:
+                icon_img = self._icon_cache.get(aliased_id)
+        if icon_img is not None:
+            # Ensure icon has alpha channel for compositing
+            if icon_img.mode != "RGBA":
+                icon_img = icon_img.convert("RGBA")
+            # Use alpha channel as mask for proper compositing
+            if image.mode == "RGB":
+                # For RGB images, paste with alpha mask
+                image.paste(icon_img, (icon.x, icon.y), icon_img.split()[3])
+            else:
+                # For RGBA images, paste directly with alpha
+                image.paste(icon_img, (icon.x, icon.y), icon_img)
+    def _draw_datetime(self, draw: ImageDraw.ImageDraw, state: TaskbarState) -> None:
+        """Draw datetime text."""
+        if not state.datetime_text or not self._datetime_font:
+            return
+        x, y = state.datetime_position
+        lines = state.datetime_text.split("\n")
+        for i, line in enumerate(lines):
+            line_y = y + i * (self.datetime_font_size + 2)
+            # Center align text
+            bbox = draw.textbbox((0, 0), line, font=self._datetime_font)
+            text_width = bbox[2] - bbox[0]
+            text_x = x - text_width // 2
+            draw.text((text_x, line_y), line, fill="black", font=self._datetime_font)