PyPI - argus-cv - Versions diffs - 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

argus-cv 1.1.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of argus-cv might be problematic. Click here for more details.

Files changed (12) hide show

argus/__init__.py +1 -1
argus/cli.py +698 -50
argus/core/__init__.py +3 -0
argus/core/base.py +1 -0
argus/core/coco.py +8 -6
argus/core/mask.py +648 -0
argus/core/yolo.py +376 -21
{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/METADATA +1 -1
argus_cv-1.3.0.dist-info/RECORD +14 -0
argus_cv-1.1.0.dist-info/RECORD +0 -13
{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/WHEEL +0 -0
{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/entry_points.txt +0 -0

argus/core/yolo.py CHANGED Viewed

@@ -12,9 +12,9 @@ from argus.core.base import Dataset, DatasetFormat, TaskType
 class YOLODataset(Dataset):
     """YOLO format dataset.
-    Supports detection and segmentation tasks.
+    Supports detection, segmentation, and classification tasks.
-    Structure:
+    Structure (detection/segmentation):
         dataset/
         ├── data.yaml (or *.yaml/*.yml with 'names' key)
         ├── images/
@@ -23,6 +23,19 @@ class YOLODataset(Dataset):
         └── labels/
             ├── train/
             └── val/
+    Structure (classification):
+        dataset/
+        ├── images/
+        │   ├── train/
+        │   │   ├── class1/
+        │   │   │   ├── img1.jpg
+        │   │   │   └── img2.jpg
+        │   │   └── class2/
+        │   │       └── img1.jpg
+        │   └── val/
+        │       ├── class1/
+        │       └── class2/
     """
     config_file: Path | None = None
@@ -43,8 +56,13 @@ class YOLODataset(Dataset):
         if not path.is_dir():
             return None
-        # Try detection/segmentation (YAML-based)
-        return cls._detect_yaml_based(path)
+        # Try detection/segmentation (YAML-based) first
+        result = cls._detect_yaml_based(path)
+        if result:
+            return result
+        # Try classification (directory-based structure)
+        return cls._detect_classification(path)
     @classmethod
     def _detect_yaml_based(cls, path: Path) -> "YOLODataset | None":
@@ -71,6 +89,11 @@ class YOLODataset(Dataset):
                 if "names" not in config:
                     continue
+                # Skip if this looks like a mask dataset config
+                # (has ignore_index or palette keys which are mask-specific)
+                if "ignore_index" in config or "palette" in config:
+                    continue
                 names = config["names"]
                 # Extract class names
@@ -103,16 +126,121 @@ class YOLODataset(Dataset):
         return None
+    @classmethod
+    def _detect_classification(cls, path: Path) -> "YOLODataset | None":
+        """Detect classification dataset from directory structure.
+        Classification datasets can have two structures:
+        1. Split structure:
+            images/{split}/class_name/image.jpg
+        2. Flat structure (unsplit):
+            class_name/image.jpg
+        No YAML config required - class names inferred from directory names.
+        Args:
+            path: Directory path to check.
+        Returns:
+            YOLODataset if classification structure found, None otherwise.
+        """
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Try split structure first: images/{split}/class/
+        images_root = path / "images"
+        if images_root.is_dir():
+            splits: list[str] = []
+            class_names_set: set[str] = set()
+            for split_name in ["train", "val", "test"]:
+                split_dir = images_root / split_name
+                if not split_dir.is_dir():
+                    continue
+                # Get subdirectories (potential class folders)
+                class_dirs = [d for d in split_dir.iterdir() if d.is_dir()]
+                if not class_dirs:
+                    continue
+                # Check if at least one class dir contains images
+                has_images = False
+                for class_dir in class_dirs:
+                    for f in class_dir.iterdir():
+                        if f.suffix.lower() in image_extensions:
+                            has_images = True
+                            break
+                    if has_images:
+                        break
+                if has_images:
+                    splits.append(split_name)
+                    class_names_set.update(d.name for d in class_dirs)
+            if splits and class_names_set:
+                class_names = sorted(class_names_set)
+                return cls(
+                    path=path,
+                    task=TaskType.CLASSIFICATION,
+                    num_classes=len(class_names),
+                    class_names=class_names,
+                    splits=splits,
+                    config_file=None,
+                )
+        # Try flat structure: class_name/image.jpg (no images/ or split dirs)
+        # Check if root contains subdirectories with images
+        class_dirs = [d for d in path.iterdir() if d.is_dir()]
+        # Filter out common non-class directories
+        excluded_dirs = {"images", "labels", "annotations", ".git", "__pycache__"}
+        class_dirs = [d for d in class_dirs if d.name not in excluded_dirs]
+        if not class_dirs:
+            return None
+        # Check if these are class directories (contain images directly)
+        class_names_set = set()
+        for class_dir in class_dirs:
+            has_images = any(
+                f.suffix.lower() in image_extensions
+                for f in class_dir.iterdir()
+                if f.is_file()
+            )
+            if has_images:
+                class_names_set.add(class_dir.name)
+        # Need at least 2 classes to be a valid classification dataset
+        if len(class_names_set) < 2:
+            return None
+        class_names = sorted(class_names_set)
+        return cls(
+            path=path,
+            task=TaskType.CLASSIFICATION,
+            num_classes=len(class_names),
+            class_names=class_names,
+            splits=[],  # No splits for flat structure
+            config_file=None,
+        )
     def get_instance_counts(self) -> dict[str, dict[str, int]]:
         """Get the number of annotation instances per class, per split.
-        Parses all label files in labels/{split}/*.txt and counts
-        occurrences of each class ID. For unsplit datasets, uses "unsplit"
-        as the split name.
+        For detection/segmentation: Parses all label files in labels/{split}/*.txt
+        and counts occurrences of each class ID.
+        For classification: Counts images in each class directory
+        (1 image = 1 instance).
         Returns:
             Dictionary mapping split name to dict of class name to instance count.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_instance_counts()
         counts: dict[str, dict[str, int]] = {}
         # Build class_id -> class_name mapping
@@ -162,15 +290,77 @@ class YOLODataset(Dataset):
         return counts
+    def _get_classification_instance_counts(self) -> dict[str, dict[str, int]]:
+        """Get instance counts for classification datasets.
+        Each image is one instance of its class.
+        Returns:
+            Dictionary mapping split name to dict of class name to image count.
+        """
+        counts: dict[str, dict[str, int]] = {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            split_counts: dict[str, int] = {}
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    split_counts[class_name] = 0
+                    continue
+                image_count = sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+                split_counts[class_name] = image_count
+            counts["unsplit"] = split_counts
+            return counts
+        # Handle split structure
+        images_root = self.path / "images"
+        for split in self.splits:
+            split_dir = images_root / split
+            if not split_dir.is_dir():
+                continue
+            split_counts = {}
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    split_counts[class_name] = 0
+                    continue
+                image_count = sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+                split_counts[class_name] = image_count
+            counts[split] = split_counts
+        return counts
     def get_image_counts(self) -> dict[str, dict[str, int]]:
         """Get image counts per split, including background images.
-        Counts label files in labels/{split}/*.txt. Empty files are
-        counted as background images.
+        For detection/segmentation: Counts label files in labels/{split}/*.txt.
+        Empty files are counted as background images.
+        For classification: Counts total images across all class directories.
+        Background count is always 0 (no background concept in classification).
         Returns:
             Dictionary mapping split name to dict with "total" and "background" counts.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_image_counts()
         counts: dict[str, dict[str, int]] = {}
         labels_root = self.path / "labels"
@@ -203,6 +393,56 @@ class YOLODataset(Dataset):
         return counts
+    def _get_classification_image_counts(self) -> dict[str, dict[str, int]]:
+        """Get image counts for classification datasets.
+        Returns:
+            Dictionary mapping split name to dict with "total" and "background" counts.
+            Background is always 0 for classification.
+        """
+        counts: dict[str, dict[str, int]] = {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            total = 0
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                total += sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+            counts["unsplit"] = {"total": total, "background": 0}
+            return counts
+        # Handle split structure
+        images_root = self.path / "images"
+        for split in self.splits:
+            split_dir = images_root / split
+            if not split_dir.is_dir():
+                continue
+            total = 0
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                total += sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower() in image_extensions
+                )
+            counts[split] = {"total": total, "background": 0}
+        return counts
     @classmethod
     def _detect_splits(cls, path: Path, config: dict) -> list[str]:
         """Detect available splits from config and filesystem.
@@ -301,6 +541,10 @@ class YOLODataset(Dataset):
         Returns:
             List of image file paths sorted alphabetically.
         """
+        # Handle classification datasets differently
+        if self.task == TaskType.CLASSIFICATION:
+            return self._get_classification_image_paths(split)
         image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
         images_root = self.path / "images"
         image_paths: list[Path] = []
@@ -345,6 +589,51 @@ class YOLODataset(Dataset):
         return sorted(image_paths, key=lambda p: p.name)
+    def _get_classification_image_paths(self, split: str | None = None) -> list[Path]:
+        """Get image paths for classification datasets.
+        Args:
+            split: Specific split to get images from. If None, returns all images.
+        Returns:
+            List of image file paths sorted alphabetically.
+        """
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        image_paths: list[Path] = []
+        # Handle flat structure (no splits)
+        if not self.splits:
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        image_paths.append(img_file)
+            return sorted(image_paths, key=lambda p: p.name)
+        # Handle split structure
+        images_root = self.path / "images"
+        splits_to_search = [split] if split else self.splits
+        for s in splits_to_search:
+            split_dir = images_root / s
+            if not split_dir.is_dir():
+                continue
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        image_paths.append(img_file)
+        return sorted(image_paths, key=lambda p: p.name)
     def get_annotations_for_image(self, image_path: Path) -> list[dict]:
         """Get annotations for a specific image.
@@ -408,12 +697,14 @@ class YOLODataset(Dataset):
                             x = x_center - width / 2
                             y = y_center - height / 2
-                            annotations.append({
-                                "class_name": class_name,
-                                "class_id": class_id,
-                                "bbox": (x, y, width, height),
-                                "polygon": None,
-                            })
+                            annotations.append(
+                                {
+                                    "class_name": class_name,
+                                    "class_id": class_id,
+                                    "bbox": (x, y, width, height),
+                                    "polygon": None,
+                                }
+                            )
                         else:
                             # Segmentation: class x1 y1 x2 y2 ... xn yn
                             coords = [float(p) for p in parts[1:]]
@@ -431,12 +722,14 @@ class YOLODataset(Dataset):
                             width = max(xs) - x
                             height = max(ys) - y
-                            annotations.append({
-                                "class_name": class_name,
-                                "class_id": class_id,
-                                "bbox": (x, y, width, height),
-                                "polygon": polygon,
-                            })
+                            annotations.append(
+                                {
+                                    "class_name": class_name,
+                                    "class_id": class_id,
+                                    "bbox": (x, y, width, height),
+                                    "polygon": polygon,
+                                }
+                            )
                     except (ValueError, IndexError):
                         continue
@@ -445,3 +738,65 @@ class YOLODataset(Dataset):
             pass
         return annotations
+    def get_images_by_class(self, split: str | None = None) -> dict[str, list[Path]]:
+        """Get images grouped by class for classification datasets.
+        Args:
+            split: Specific split to get images from. If None, uses first
+                   available split or all images for flat structure.
+        Returns:
+            Dictionary mapping class name to list of image paths.
+        """
+        if self.task != TaskType.CLASSIFICATION:
+            return {}
+        image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+        images_by_class: dict[str, list[Path]] = {cls: [] for cls in self.class_names}
+        # Handle flat structure (no splits)
+        if not self.splits:
+            for class_name in self.class_names:
+                class_dir = self.path / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        images_by_class[class_name].append(img_file)
+            # Sort images within each class
+            for class_name in images_by_class:
+                images_by_class[class_name] = sorted(
+                    images_by_class[class_name], key=lambda p: p.name
+                )
+            return images_by_class
+        # Handle split structure
+        images_root = self.path / "images"
+        default_splits = self.splits[:1] if self.splits else []
+        splits_to_search = [split] if split else default_splits
+        for s in splits_to_search:
+            split_dir = images_root / s
+            if not split_dir.is_dir():
+                continue
+            for class_name in self.class_names:
+                class_dir = split_dir / class_name
+                if not class_dir.is_dir():
+                    continue
+                for img_file in class_dir.iterdir():
+                    if img_file.suffix.lower() in image_extensions:
+                        images_by_class[class_name].append(img_file)
+        # Sort images within each class for consistent ordering
+        for class_name in images_by_class:
+            images_by_class[class_name] = sorted(
+                images_by_class[class_name], key=lambda p: p.name
+            )
+        return images_by_class

{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: argus-cv
-Version: 1.1.0
+Version: 1.3.0
 Summary: CLI tool for working with vision AI datasets
 Requires-Python: >=3.10
 Requires-Dist: numpy>=1.24.0

argus_cv-1.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+argus/__init__.py,sha256=277ASQvH6ZWVWUzOCVB7vAxn3OYciow4nVkjG16-kio,64
+argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
+argus/cli.py,sha256=th1Rgn1Sm9juWoavopEXLBT8XEh5lKzOMX-pccwvDgA,47904
+argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
+argus/core/__init__.py,sha256=II2wYJpGUUGGKOFZ5BCpMIBTfv0WP-F15U_xbpWGjk8,453
+argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
+argus/core/coco.py,sha256=atVurZV2T7cszydyD9GfDTWHGYDd-JNK5RD7nse5avc,15823
+argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
+argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
+argus/core/yolo.py,sha256=tTc9jJzXcwa8LQ_s8nv-D_i2b9k_x-LT1O0eWr4sZ2k,28616
+argus_cv-1.3.0.dist-info/METADATA,sha256=1CCYLSnGHaAdS5jjwUuTJWRHu5OM_oFfvi1L_v3SkQw,1070
+argus_cv-1.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+argus_cv-1.3.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
+argus_cv-1.3.0.dist-info/RECORD,,

argus_cv-1.1.0.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-argus/__init__.py,sha256=_j7Q7azBU1noVs9msyPjAbXcNBN0lZamyibW_IP_mdc,64
-argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
-argus/cli.py,sha256=T2jA4LZIeg1_BJJyvHWRhC-DypSBl-9UJV2k8odGGok,25849
-argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
-argus/core/__init__.py,sha256=Plv_tk0Wq9OlGLDPOSQWxrd5cTwNK9kEZANTim3s23A,348
-argus/core/base.py,sha256=Vd_2xR6L3lhu9vHoyLeFTc0Dg59py_D9kaye1tta5Co,3678
-argus/core/coco.py,sha256=bJvOhBzwjsOU8DBijGDysnSPlprwetkPf4Z02UOmqw0,15757
-argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
-argus/core/yolo.py,sha256=KTWgmEguxKZ_C0WsMxUB-B-zbx_Oi1ieGDk3Osuh0xY,15876
-argus_cv-1.1.0.dist-info/METADATA,sha256=TovQhK7bNU8ip-3F2VzGbTzIjnxYXfuJ2ShbhLM5rvM,1070
-argus_cv-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-argus_cv-1.1.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
-argus_cv-1.1.0.dist-info/RECORD,,

{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{argus_cv-1.1.0.dist-info → argus_cv-1.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

argus-cv 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

Potentially problematic release.

argus-cv 1.1.0py3-none-any.whl → 1.3.0py3-none-any.whl