PyPI - argus-cv - Versions diffs - 1.1.0__tar.gz → 1.2.0__tar.gz - Mend

argus-cv 1.1.0tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of argus-cv might be problematic. Click here for more details.

Files changed (35) hide show

{argus_cv-1.1.0 → argus_cv-1.2.0}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,22 @@
 <!-- version list -->
+## v1.2.0 (2026-01-15)
+### Code Style
+- Fix ruff linting errors
+  ([`b2d5ea2`](https://github.com/pirnerjonas/argus/commit/b2d5ea2c4d0715a474d4ffaa5be60d0499d200a2))
+- Remove unused pytest import in test_classification.py
+  ([`e22175a`](https://github.com/pirnerjonas/argus/commit/e22175a7378276dd2840754e672c7d4d1ed0e067))
+### Features
+- Add classification dataset support with grid viewer
+  ([`8089bd3`](https://github.com/pirnerjonas/argus/commit/8089bd3367ede4c288b0276ac3da4d3ef9960c4d))
 ## v1.1.0 (2026-01-14)
 ### Code Style

{argus_cv-1.1.0 → argus_cv-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: argus-cv
-Version: 1.1.0
+Version: 1.2.0
 Summary: CLI tool for working with vision AI datasets
 Requires-Python: >=3.10
 Requires-Dist: numpy>=1.24.0

{argus_cv-1.1.0 → argus_cv-1.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "argus-cv"
-version = "1.1.0"
+version = "1.2.0"
 description = "CLI tool for working with vision AI datasets"
 readme = "README.md"
 requires-python = ">=3.10"

{argus_cv-1.1.0 → argus_cv-1.2.0}/src/argus/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Argus - Vision AI dataset toolkit."""
-__version__ = "1.1.0"
+__version__ = "1.2.0"

{argus_cv-1.1.0 → argus_cv-1.2.0}/src/argus/cli.py RENAMED Viewed

@@ -12,6 +12,7 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.table import Table
 from argus.core import COCODataset, Dataset, YOLODataset
+from argus.core.base import TaskType
 from argus.core.split import (
     is_coco_unsplit,
     parse_ratio,
@@ -238,19 +239,29 @@ def view(
             help="Specific split to view (train, val, test).",
         ),
     ] = None,
+    max_classes: Annotated[
+        int | None,
+        typer.Option(
+            "--max-classes",
+            "-m",
+            help="Maximum classes to show in grid (classification only).",
+        ),
+    ] = None,
 ) -> None:
     """View annotated images in a dataset.
     Opens an interactive viewer to browse images with their annotations
     (bounding boxes and segmentation masks) overlaid.
+    For classification datasets, shows a grid view with one image per class.
     Controls:
-        - Right Arrow / N: Next image
-        - Left Arrow / P: Previous image
-        - Mouse Wheel: Zoom in/out
-        - Mouse Drag: Pan when zoomed
-        - R: Reset zoom
-        - T: Toggle annotations
+        - Right Arrow / N: Next image(s)
+        - Left Arrow / P: Previous image(s)
+        - Mouse Wheel: Zoom in/out (detection/segmentation only)
+        - Mouse Drag: Pan when zoomed (detection/segmentation only)
+        - R: Reset zoom / Reset to first images
+        - T: Toggle annotations (detection/segmentation only)
         - Q / ESC: Quit viewer
     """
     # Resolve path and validate
@@ -281,39 +292,76 @@ def view(
         )
         raise typer.Exit(1)
-    # Get image paths
-    with Progress(
-        SpinnerColumn(),
-        TextColumn("[progress.description]{task.description}"),
-        console=console,
-        transient=True,
-    ) as progress:
-        progress.add_task("Loading images...", total=None)
-        image_paths = dataset.get_image_paths(split)
+    # Generate consistent colors for each class
+    class_colors = _generate_class_colors(dataset.class_names)
-    if not image_paths:
-        console.print("[yellow]No images found in the dataset.[/yellow]")
-        return
+    # Handle classification datasets with grid viewer
+    if dataset.task == TaskType.CLASSIFICATION:
+        # Use first split if specified, otherwise let get_images_by_class handle it
+        view_split = split if split else (dataset.splits[0] if dataset.splits else None)
-    console.print(
-        f"[green]Found {len(image_paths)} images. "
-        f"Opening viewer...[/green]\n"
-        "[dim]Controls: ← / → or P / N to navigate, "
-        "Mouse wheel to zoom, Drag to pan, R to reset, T to toggle annotations, "
-        "Q / ESC to quit[/dim]"
-    )
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task("Loading images by class...", total=None)
+            images_by_class = dataset.get_images_by_class(view_split)
-    # Generate consistent colors for each class
-    class_colors = _generate_class_colors(dataset.class_names)
+        total_images = sum(len(imgs) for imgs in images_by_class.values())
+        if total_images == 0:
+            console.print("[yellow]No images found in the dataset.[/yellow]")
+            return
-    # Create and run the interactive viewer
-    viewer = _ImageViewer(
-        image_paths=image_paths,
-        dataset=dataset,
-        class_colors=class_colors,
-        window_name=f"Argus Viewer - {dataset_path.name}",
-    )
-    viewer.run()
+        num_classes = len(dataset.class_names)
+        display_classes = min(num_classes, max_classes) if max_classes else num_classes
+        console.print(
+            f"[green]Found {total_images} images across {num_classes} classes "
+            f"(showing {display_classes}). Opening grid viewer...[/green]\n"
+            "[dim]Controls: ← / → or P / N to navigate all classes, "
+            "R to reset, Q / ESC to quit[/dim]"
+        )
+        viewer = _ClassificationGridViewer(
+            images_by_class=images_by_class,
+            class_names=dataset.class_names,
+            class_colors=class_colors,
+            window_name=f"Argus Classification Viewer - {dataset_path.name}",
+            max_classes=max_classes,
+        )
+        viewer.run()
+    else:
+        # Detection/Segmentation viewer
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task("Loading images...", total=None)
+            image_paths = dataset.get_image_paths(split)
+        if not image_paths:
+            console.print("[yellow]No images found in the dataset.[/yellow]")
+            return
+        console.print(
+            f"[green]Found {len(image_paths)} images. "
+            f"Opening viewer...[/green]\n"
+            "[dim]Controls: ← / → or P / N to navigate, "
+            "Mouse wheel to zoom, Drag to pan, R to reset, T to toggle annotations, "
+            "Q / ESC to quit[/dim]"
+        )
+        viewer = _ImageViewer(
+            image_paths=image_paths,
+            dataset=dataset,
+            class_colors=class_colors,
+            window_name=f"Argus Viewer - {dataset_path.name}",
+        )
+        viewer.run()
     console.print("[green]Viewer closed.[/green]")
@@ -656,6 +704,185 @@ class _ImageViewer:
         cv2.destroyAllWindows()
+class _ClassificationGridViewer:
+    """Grid viewer for classification datasets showing one image per class."""
+    def __init__(
+        self,
+        images_by_class: dict[str, list[Path]],
+        class_names: list[str],
+        class_colors: dict[str, tuple[int, int, int]],
+        window_name: str,
+        max_classes: int | None = None,
+        tile_size: int = 300,
+    ):
+        # Limit classes if max_classes specified
+        if max_classes and len(class_names) > max_classes:
+            self.class_names = class_names[:max_classes]
+        else:
+            self.class_names = class_names
+        self.images_by_class = {
+            cls: images_by_class.get(cls, []) for cls in self.class_names
+        }
+        self.class_colors = class_colors
+        self.window_name = window_name
+        self.tile_size = tile_size
+        # Global image index (same for all classes)
+        self.current_index = 0
+        # Calculate max images across all classes
+        self.max_images = max(
+            len(imgs) for imgs in self.images_by_class.values()
+        ) if self.images_by_class else 0
+        # Calculate grid layout
+        self.cols, self.rows = self._calculate_grid_layout()
+    def _calculate_grid_layout(self) -> tuple[int, int]:
+        """Calculate optimal grid layout based on number of classes."""
+        n = len(self.class_names)
+        if n <= 0:
+            return 1, 1
+        # Try to make a roughly square grid
+        import math
+        cols = int(math.ceil(math.sqrt(n)))
+        rows = int(math.ceil(n / cols))
+        return cols, rows
+    def _create_tile(
+        self, class_name: str, image_path: Path | None, index: int, total: int
+    ) -> np.ndarray:
+        """Create a single tile for a class."""
+        tile = np.zeros((self.tile_size, self.tile_size, 3), dtype=np.uint8)
+        if image_path is not None and image_path.exists():
+            # Load and resize image
+            img = cv2.imread(str(image_path))
+            if img is not None:
+                # Resize maintaining aspect ratio
+                h, w = img.shape[:2]
+                scale = min(self.tile_size / w, self.tile_size / h)
+                new_w = int(w * scale)
+                new_h = int(h * scale)
+                resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
+                # Center in tile
+                x_offset = (self.tile_size - new_w) // 2
+                y_offset = (self.tile_size - new_h) // 2
+                tile[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = resized
+        # Draw label at top: "class_name (N/M)"
+        if image_path is not None:
+            label = f"{class_name} ({index + 1}/{total})"
+        else:
+            label = f"{class_name} (-/{total})"
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.5
+        thickness = 1
+        (label_w, label_h), baseline = cv2.getTextSize(
+            label, font, font_scale, thickness
+        )
+        # Semi-transparent background for label
+        overlay = tile.copy()
+        label_bg_height = label_h + baseline + 10
+        cv2.rectangle(overlay, (0, 0), (self.tile_size, label_bg_height), (0, 0, 0), -1)
+        cv2.addWeighted(overlay, 0.6, tile, 0.4, 0, tile)
+        cv2.putText(
+            tile,
+            label,
+            (5, label_h + 5),
+            font,
+            font_scale,
+            (255, 255, 255),
+            thickness,
+        )
+        # Draw thin border
+        border_end = self.tile_size - 1
+        cv2.rectangle(tile, (0, 0), (border_end, border_end), (80, 80, 80), 1)
+        return tile
+    def _compose_grid(self) -> np.ndarray:
+        """Compose all tiles into a single grid image."""
+        grid_h = self.rows * self.tile_size
+        grid_w = self.cols * self.tile_size
+        grid = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+        for i, class_name in enumerate(self.class_names):
+            row = i // self.cols
+            col = i % self.cols
+            images = self.images_by_class[class_name]
+            total = len(images)
+            # Use global index - show black tile if class doesn't have this image
+            if self.current_index < total:
+                image_path = images[self.current_index]
+                display_index = self.current_index
+            else:
+                image_path = None
+                display_index = self.current_index
+            tile = self._create_tile(class_name, image_path, display_index, total)
+            y_start = row * self.tile_size
+            x_start = col * self.tile_size
+            y_end = y_start + self.tile_size
+            x_end = x_start + self.tile_size
+            grid[y_start:y_end, x_start:x_end] = tile
+        return grid
+    def _next_images(self) -> None:
+        """Advance to next image index."""
+        if self.max_images > 0:
+            self.current_index = min(self.current_index + 1, self.max_images - 1)
+    def _prev_images(self) -> None:
+        """Go back to previous image index."""
+        self.current_index = max(self.current_index - 1, 0)
+    def _reset_indices(self) -> None:
+        """Reset to first image."""
+        self.current_index = 0
+    def run(self) -> None:
+        """Run the interactive grid viewer."""
+        if not self.class_names:
+            console.print("[yellow]No classes to display.[/yellow]")
+            return
+        cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL)
+        while True:
+            # Compose and display grid
+            grid = self._compose_grid()
+            cv2.imshow(self.window_name, grid)
+            # Wait for input
+            key = cv2.waitKey(30) & 0xFF
+            # Handle keyboard input
+            if key == ord("q") or key == 27:  # Q or ESC
+                break
+            elif key == ord("n") or key == 83 or key == 3:  # N or Right arrow
+                self._next_images()
+            elif key == ord("p") or key == 81 or key == 2:  # P or Left arrow
+                self._prev_images()
+            elif key == ord("r"):  # R to reset
+                self._reset_indices()
+        cv2.destroyAllWindows()
 def _generate_class_colors(class_names: list[str]) -> dict[str, tuple[int, int, int]]:
     """Generate consistent colors for each class name.

{argus_cv-1.1.0 → argus_cv-1.2.0}/src/argus/core/yolo.py RENAMED Viewed

@@ -12,9 +12,9 @@ from argus.core.base import Dataset, DatasetFormat, TaskType
 class YOLODataset(Dataset):
     """YOLO format dataset.
-    Supports detection and segmentation tasks.
+    Supports detection, segmentation, and classification tasks.
-    Structure:
+    Structure (detection/segmentation):
         dataset/
         ├── data.yaml (or *.yaml/*.yml with 'names' key)
         ├── images/
@@ -23,6 +23,19 @@ class YOLODataset(Dataset):
         └── labels/
             ├── train/
             └── val/
+    Structure (classification):
+        dataset/
+        ├── images/
+        │   ├── train/
+        │   │   ├── class1/
+        │   │   │   ├── img1.jpg
+        │   │   │   └── img2.jpg
+        │   │   └── class2/
+        │   │       └── img1.jpg
+        │   └── val/
+        │       ├── class1/
+        │       └── class2/
     """
     config_file: Path | None = None
@@ -43,8 +56,13 @@ class YOLODataset(Dataset):
         if not path.is_dir():
             return None
-        # Try detection/segmentation (YAML-based)
-        return cls._detect_yaml_based(path)
+        # Try detection/segmentation (YAML-based) first
+        result = cls._detect_yaml_based(path)
+        if result:
+            return result
+        # Try classification (directory-based structure)
+        return cls._detect_classification(path)
     @classmethod
     def _detect_yaml_based(cls, path: Path) -> "YOLODataset | None":
@@ -103,16 +121,121 @@ class YOLODataset(Dataset):
         return None
+    @classmethod
+    def _detect_classification(cls, path: Path) -> "YOLODataset | None":
+        """Detect classification dataset from directory structure.
+        Classification datasets can have two structures:
+        1. Split structure:
+            images/{split}/class_name/image.jpg
+        2. Flat structure (unsplit):
+            class_name/image.jpg
+        No YAML config required - class names inferred from directory names.
+        Args:
+            path: Directory path to check.
+        Returns:
+            YOLODataset if classification structure found, None otherwise.
+        """
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Try split structure first: images/{split}/class/
+        images_root = path / "images"
+        if images_root.is_dir():
+            splits: list[str] = []
+            class_names_set: set[str] = set()
+            for split_name in ["train", "val", "test"]:
+                split_dir = images_root / split_name
+                if not split_dir.is_dir():
+                    continue
+                # Get subdirectories (potential class folders)
+                class_dirs = [d for d in split_dir.iterdir() if d.is_dir()]
+                if not class_dirs:
+                    continue
+                # Check if at least one class dir contains images
+                has_images = False
+                for class_dir in class_dirs:
+                    for f in class_dir.iterdir():
+                        if f.suffix.lower() in image_extensions:
+                            has_images = True
+                            break
+                    if has_images:
+                        break
+                if has_images:
+                    splits.append(split_name)
+                    class_names_set.update(d.name for d in class_dirs)
+            if splits and class_names_set:
+                class_names = sorted(class_names_set)
+                return cls(
+                    path=path,
+                    task=TaskType.CLASSIFICATION,
+                    num_classes=len(class_names),
+                    class_names=class_names,
+                    splits=splits,
+                    config_file=None,
+                )
+        # Try flat structure: class_name/image.jpg (no images/ or split dirs)
+        # Check if root contains subdirectories with images
+        class_dirs = [d for d in path.iterdir() if d.is_dir()]
+        # Filter out common non-class directories
+        excluded_dirs = {"images", "labels", "annotations", ".git", "__pycache__"}
+        class_dirs = [d for d in class_dirs if d.name not in excluded_dirs]
+        if not class_dirs:
+            return None
+        # Check if these are class directories (contain images directly)
+        class_names_set = set()
+        for class_dir in class_dirs:
+            has_images = any(
+                f.suffix.lower() in image_extensions
+                for f in class_dir.iterdir()
+                if f.is_file()
+            )
+            if has_images:
+                class_names_set.add(class_dir.name)
+        # Need at least 2 classes to be a valid classification dataset
+        if len(class_names_set) < 2:
+            return None
+        class_names = sorted(class_names_set)
+        return cls(
+            path=path,
+            task=TaskType.CLASSIFICATION,
+            num_classes=len(class_names),
+            class_names=class_names,
+            splits=[],  # No splits for flat structure
+            config_file=None,
+        )
     def get_instance_counts(self) -> dict[str, dict[str, int]]:
         """Get the number of annotation instances per class, per split.
-        Parses all label files in labels/{split}/*.txt and counts
-        occurrences of each class ID. For unsplit datasets, uses "unsplit"
-        as the split name.
+        For detection/segmentation: Parses all label files in labels/{split}/*.txt
+        and counts occurrences of each class ID.
+        For classification: Counts images in each class directory
+        (1 image = 1 instance).
         Returns:
             Dictionary mapping split name to dict of class name to instance count.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_instance_counts()
         counts: dict[str, dict[str, int]] = {}
         # Build class_id -> class_name mapping
@@ -162,15 +285,77 @@ class YOLODataset(Dataset):
         return counts
+    def _get_classification_instance_counts(self) -> dict[str, dict[str, int]]:
+        """Get instance counts for classification datasets.
+        Each image is one instance of its class.
+        Returns:
+            Dictionary mapping split name to dict of class name to image count.
+        """
+        counts: dict[str, dict[str, int]] = {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            split_counts: dict[str, int] = {}
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    split_counts[class_name] = 0
+                    continue
+                image_count = sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+                split_counts[class_name] = image_count
+            counts["unsplit"] = split_counts
+            return counts
+        # Handle split structure
+        images_root = self.path / "images"
+        for split in self.splits:
+            split_dir = images_root / split
+            if not split_dir.is_dir():
+                continue
+            split_counts = {}
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    split_counts[class_name] = 0
+                    continue
+                image_count = sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+                split_counts[class_name] = image_count
+            counts[split] = split_counts
+        return counts
     def get_image_counts(self) -> dict[str, dict[str, int]]:
         """Get image counts per split, including background images.
-        Counts label files in labels/{split}/*.txt. Empty files are
-        counted as background images.
+        For detection/segmentation: Counts label files in labels/{split}/*.txt.
+        Empty files are counted as background images.
+        For classification: Counts total images across all class directories.
+        Background count is always 0 (no background concept in classification).
         Returns:
             Dictionary mapping split name to dict with "total" and "background" counts.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_image_counts()
         counts: dict[str, dict[str, int]] = {}
         labels_root = self.path / "labels"
@@ -203,6 +388,56 @@ class YOLODataset(Dataset):
         return counts
+    def _get_classification_image_counts(self) -> dict[str, dict[str, int]]:
+        """Get image counts for classification datasets.
+        Returns:
+            Dictionary mapping split name to dict with "total" and "background" counts.
+            Background is always 0 for classification.
+        """
+        counts: dict[str, dict[str, int]] = {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            total = 0
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                total += sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+            counts["unsplit"] = {"total": total, "background": 0}
+            return counts
+        # Handle split structure
+        images_root = self.path / "images"
+        for split in self.splits:
+            split_dir = images_root / split
+            if not split_dir.is_dir():
+                continue
+            total = 0
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                total += sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+            counts[split] = {"total": total, "background": 0}
+        return counts
     @classmethod
     def _detect_splits(cls, path: Path, config: dict) -> list[str]:
         """Detect available splits from config and filesystem.
@@ -301,6 +536,10 @@ class YOLODataset(Dataset):
         Returns:
             List of image file paths sorted alphabetically.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_image_paths(split)
         image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
         images_root = self.path / "images"
         image_paths: list[Path] = []
@@ -345,6 +584,51 @@ class YOLODataset(Dataset):
         return sorted(image_paths, key=lambda p: p.name)
+    def _get_classification_image_paths(self, split: str | None = None) -> list[Path]:
+        """Get image paths for classification datasets.
+        Args:
+            split: Specific split to get images from. If None, returns all images.
+        Returns:
+            List of image file paths sorted alphabetically.
+        """
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        image_paths: list[Path] = []
+        # Handle flat structure (no splits)
+        if not self.splits:
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        image_paths.append(img_file)
+            return sorted(image_paths, key=lambda p: p.name)
+        # Handle split structure
+        images_root = self.path / "images"
+        splits_to_search = [split] if split else self.splits
+        for s in splits_to_search:
+            split_dir = images_root / s
+            if not split_dir.is_dir():
+                continue
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        image_paths.append(img_file)
+        return sorted(image_paths, key=lambda p: p.name)
     def get_annotations_for_image(self, image_path: Path) -> list[dict]:
         """Get annotations for a specific image.
@@ -445,3 +729,65 @@ class YOLODataset(Dataset):
             pass
         return annotations
+    def get_images_by_class(self, split: str | None = None) -> dict[str, list[Path]]:
+        """Get images grouped by class for classification datasets.
+        Args:
+            split: Specific split to get images from. If None, uses first
+                   available split or all images for flat structure.
+        Returns:
+            Dictionary mapping class name to list of image paths.
+        """
+        if self.task != TaskType.CLASSIFICATION:
+            return {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        images_by_class: dict[str, list[Path]] = {cls: [] for cls in self.class_names}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        images_by_class[class_name].append(img_file)
+            # Sort images within each class
+            for class_name in images_by_class:
+                images_by_class[class_name] = sorted(
+                    images_by_class[class_name], key=lambda p: p.name
+                )
+            return images_by_class
+        # Handle split structure
+        images_root = self.path / "images"
+        default_splits = self.splits[:1] if self.splits else []
+        splits_to_search = [split] if split else default_splits
+        for s in splits_to_search:
+            split_dir = images_root / s
+            if not split_dir.is_dir():
+                continue
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        images_by_class[class_name].append(img_file)
+        # Sort images within each class for consistent ordering
+        for class_name in images_by_class:
+            images_by_class[class_name] = sorted(
+                images_by_class[class_name], key=lambda p: p.name
+            )
+        return images_by_class

{argus_cv-1.1.0 → argus_cv-1.2.0}/tests/conftest.py RENAMED Viewed

@@ -389,3 +389,74 @@ names:
     (annotations_dir / "annotations.json").write_text(json.dumps(coco_data))
     return root_path
+@pytest.fixture
+def yolo_classification_dataset(tmp_path: Path) -> Path:
+    """Create a valid YOLO classification dataset.
+    Structure:
+        dataset/
+        └── images/
+            ├── train/
+            │   ├── cat/
+            │   │   ├── img001.jpg
+            │   │   └── img002.jpg
+            │   └── dog/
+            │       └── img001.jpg
+            └── val/
+                ├── cat/
+                │   └── img001.jpg
+                └── dog/
+                    └── img001.jpg
+    """
+    dataset_path = tmp_path / "yolo_classification"
+    dataset_path.mkdir()
+    # Create directory structure with class subdirectories
+    (dataset_path / "images" / "train" / "cat").mkdir(parents=True)
+    (dataset_path / "images" / "train" / "dog").mkdir(parents=True)
+    (dataset_path / "images" / "val" / "cat").mkdir(parents=True)
+    (dataset_path / "images" / "val" / "dog").mkdir(parents=True)
+    # Create dummy images in each class directory
+    # Train split
+    (dataset_path / "images" / "train" / "cat" / "img001.jpg").write_bytes(b"fake cat")
+    (dataset_path / "images" / "train" / "cat" / "img002.jpg").write_bytes(b"fake cat")
+    (dataset_path / "images" / "train" / "dog" / "img001.jpg").write_bytes(b"fake dog")
+    # Val split
+    (dataset_path / "images" / "val" / "cat" / "img001.jpg").write_bytes(b"fake cat")
+    (dataset_path / "images" / "val" / "dog" / "img001.jpg").write_bytes(b"fake dog")
+    return dataset_path
+@pytest.fixture
+def yolo_classification_multiclass_dataset(tmp_path: Path) -> Path:
+    """Create a YOLO classification dataset with more classes.
+    Structure:
+        dataset/
+        └── images/
+            └── train/
+                ├── class1/
+                ├── class2/
+                ├── class3/
+                └── class4/
+    """
+    dataset_path = tmp_path / "yolo_cls_multiclass"
+    dataset_path.mkdir()
+    classes = ["apple", "banana", "cherry", "date"]
+    for cls in classes:
+        (dataset_path / "images" / "train" / cls).mkdir(parents=True)
+        # Add varying number of images per class
+        num_images = classes.index(cls) + 1
+        for i in range(num_images):
+            (dataset_path / "images" / "train" / cls / f"img{i:03d}.jpg").write_bytes(
+                b"fake image"
+            )
+    return dataset_path

argus_cv-1.2.0/tests/test_classification.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Tests for YOLO classification dataset support."""
+from pathlib import Path
+from argus.core.base import DatasetFormat, TaskType
+from argus.core.yolo import YOLODataset
+class TestClassificationDetection:
+    """Tests for detecting YOLO classification datasets."""
+    def test_detect_classification_dataset(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test that classification dataset is correctly detected."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        assert dataset.task == TaskType.CLASSIFICATION
+        assert dataset.format == DatasetFormat.YOLO
+        assert set(dataset.class_names) == {"cat", "dog"}
+        assert dataset.num_classes == 2
+        assert set(dataset.splits) == {"train", "val"}
+    def test_detect_classification_multiclass(
+        self, yolo_classification_multiclass_dataset: Path
+    ) -> None:
+        """Test detection with multiple classes."""
+        dataset = YOLODataset.detect(yolo_classification_multiclass_dataset)
+        assert dataset is not None
+        assert dataset.task == TaskType.CLASSIFICATION
+        assert set(dataset.class_names) == {"apple", "banana", "cherry", "date"}
+        assert dataset.num_classes == 4
+        assert "train" in dataset.splits
+    def test_classification_no_yaml_required(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test that classification datasets don't need a YAML config."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        assert dataset.config_file is None
+    def test_classification_not_detected_for_detection(
+        self, yolo_detection_dataset: Path
+    ) -> None:
+        """Test that detection datasets are not classified as classification."""
+        dataset = YOLODataset.detect(yolo_detection_dataset)
+        assert dataset is not None
+        assert dataset.task != TaskType.CLASSIFICATION
+        assert dataset.task == TaskType.DETECTION
+class TestClassificationGetImagesByClass:
+    """Tests for get_images_by_class method."""
+    def test_get_images_by_class(self, yolo_classification_dataset: Path) -> None:
+        """Test getting images grouped by class."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        images_by_class = dataset.get_images_by_class("train")
+        assert "cat" in images_by_class
+        assert "dog" in images_by_class
+        assert len(images_by_class["cat"]) == 2
+        assert len(images_by_class["dog"]) == 1
+    def test_get_images_by_class_val_split(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test getting images from val split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        images_by_class = dataset.get_images_by_class("val")
+        assert len(images_by_class["cat"]) == 1
+        assert len(images_by_class["dog"]) == 1
+    def test_get_images_by_class_default_split(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test that None split uses first available split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        images_by_class = dataset.get_images_by_class(None)
+        # Should return images from first split (train)
+        total_images = sum(len(imgs) for imgs in images_by_class.values())
+        assert total_images > 0
+    def test_get_images_by_class_non_classification(
+        self, yolo_detection_dataset: Path
+    ) -> None:
+        """Test that non-classification datasets return empty dict."""
+        dataset = YOLODataset.detect(yolo_detection_dataset)
+        assert dataset is not None
+        images_by_class = dataset.get_images_by_class("train")
+        assert images_by_class == {}
+class TestClassificationInstanceCounts:
+    """Tests for instance counts in classification datasets."""
+    def test_get_instance_counts(self, yolo_classification_dataset: Path) -> None:
+        """Test counting images per class per split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        counts = dataset.get_instance_counts()
+        assert "train" in counts
+        assert "val" in counts
+        assert counts["train"]["cat"] == 2
+        assert counts["train"]["dog"] == 1
+        assert counts["val"]["cat"] == 1
+        assert counts["val"]["dog"] == 1
+    def test_get_image_counts(self, yolo_classification_dataset: Path) -> None:
+        """Test total image counts per split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        counts = dataset.get_image_counts()
+        assert "train" in counts
+        assert "val" in counts
+        assert counts["train"]["total"] == 3  # 2 cat + 1 dog
+        assert counts["train"]["background"] == 0
+        assert counts["val"]["total"] == 2  # 1 cat + 1 dog
+        assert counts["val"]["background"] == 0
+class TestClassificationImagePaths:
+    """Tests for get_image_paths with classification datasets."""
+    def test_get_image_paths_all(self, yolo_classification_dataset: Path) -> None:
+        """Test getting all image paths."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        paths = dataset.get_image_paths()
+        assert len(paths) == 5  # 3 train + 2 val
+    def test_get_image_paths_train(self, yolo_classification_dataset: Path) -> None:
+        """Test getting image paths for train split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        paths = dataset.get_image_paths("train")
+        assert len(paths) == 3
+    def test_get_image_paths_val(self, yolo_classification_dataset: Path) -> None:
+        """Test getting image paths for val split."""
+        dataset = YOLODataset.detect(yolo_classification_dataset)
+        assert dataset is not None
+        paths = dataset.get_image_paths("val")
+        assert len(paths) == 2
+class TestClassificationCLI:
+    """Tests for CLI integration with classification datasets."""
+    def test_list_shows_classification(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test that list command shows classification datasets."""
+        from typer.testing import CliRunner
+        from argus.cli import app
+        runner = CliRunner()
+        result = runner.invoke(app, ["list", "-p", str(yolo_classification_dataset)])
+        assert result.exit_code == 0
+        assert "classification" in result.output.lower()
+        assert "yolo" in result.output.lower()
+    def test_stats_shows_classification_counts(
+        self, yolo_classification_dataset: Path
+    ) -> None:
+        """Test that stats command shows image counts per class."""
+        from typer.testing import CliRunner
+        from argus.cli import app
+        runner = CliRunner()
+        result = runner.invoke(
+            app, ["stats", "-d", str(yolo_classification_dataset)]
+        )
+        assert result.exit_code == 0
+        assert "cat" in result.output
+        assert "dog" in result.output