PyPI - argus-cv - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

argus-cv 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of argus-cv might be problematic. Click here for more details.

Files changed (12) hide show

argus/__init__.py +1 -1
argus/cli.py +345 -1
argus/core/__init__.py +20 -0
argus/core/coco.py +46 -8
argus/core/convert.py +277 -0
argus/core/filter.py +670 -0
argus/core/yolo.py +29 -0
{argus_cv-1.4.0.dist-info → argus_cv-1.5.1.dist-info}/METADATA +1 -1
argus_cv-1.5.1.dist-info/RECORD +16 -0
argus_cv-1.4.0.dist-info/RECORD +0 -14
{argus_cv-1.4.0.dist-info → argus_cv-1.5.1.dist-info}/WHEEL +0 -0
{argus_cv-1.4.0.dist-info → argus_cv-1.5.1.dist-info}/entry_points.txt +0 -0

argus/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Argus - Vision AI dataset toolkit."""
-__version__ = "1.4.0"
+__version__ = "1.5.1"

argus/cli.py CHANGED Viewed

@@ -8,11 +8,23 @@ import cv2
 import numpy as np
 import typer
 from rich.console import Console
-from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.progress import (
+    BarColumn,
+    Progress,
+    SpinnerColumn,
+    TaskProgressColumn,
+    TextColumn,
+)
 from rich.table import Table
 from argus.core import COCODataset, Dataset, MaskDataset, YOLODataset
 from argus.core.base import DatasetFormat, TaskType
+from argus.core.convert import convert_mask_to_yolo_seg
+from argus.core.filter import (
+    filter_coco_dataset,
+    filter_mask_dataset,
+    filter_yolo_dataset,
+)
 from argus.core.split import (
     is_coco_unsplit,
     parse_ratio,
@@ -632,6 +644,338 @@ def split_dataset(
     )
+@app.command(name="convert")
+def convert_dataset(
+    input_path: Annotated[
+        Path,
+        typer.Option(
+            "--input-path",
+            "-i",
+            help="Path to the source dataset.",
+        ),
+    ] = Path("."),
+    output_path: Annotated[
+        Path,
+        typer.Option(
+            "--output-path",
+            "-o",
+            help="Output directory for converted dataset.",
+        ),
+    ] = Path("converted"),
+    to_format: Annotated[
+        str,
+        typer.Option(
+            "--to",
+            help="Target format (currently only 'yolo-seg' is supported).",
+        ),
+    ] = "yolo-seg",
+    epsilon_factor: Annotated[
+        float,
+        typer.Option(
+            "--epsilon-factor",
+            "-e",
+            help="Polygon simplification factor (Douglas-Peucker algorithm).",
+            min=0.0,
+            max=1.0,
+        ),
+    ] = 0.005,
+    min_area: Annotated[
+        float,
+        typer.Option(
+            "--min-area",
+            "-a",
+            help="Minimum contour area in pixels to include.",
+            min=0.0,
+        ),
+    ] = 100.0,
+) -> None:
+    """Convert a dataset from one format to another.
+    Currently supports converting MaskDataset to YOLO segmentation format.
+    Example:
+        uvx argus-cv convert -i /path/to/masks -o /path/to/output --to yolo-seg
+    """
+    # Validate format
+    if to_format != "yolo-seg":
+        console.print(
+            f"[red]Error: Unsupported target format '{to_format}'.[/red]\n"
+            "[yellow]Currently only 'yolo-seg' is supported.[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Resolve and validate input path
+    input_path = input_path.resolve()
+    if not input_path.exists():
+        console.print(f"[red]Error: Path does not exist: {input_path}[/red]")
+        raise typer.Exit(1)
+    if not input_path.is_dir():
+        console.print(f"[red]Error: Path is not a directory: {input_path}[/red]")
+        raise typer.Exit(1)
+    # Detect source dataset - must be MaskDataset for yolo-seg conversion
+    dataset = MaskDataset.detect(input_path)
+    if not dataset:
+        console.print(
+            f"[red]Error: No MaskDataset found at {input_path}[/red]\n"
+            "[yellow]Ensure the path contains images/ + masks/ directories "
+            "(or equivalent patterns like img/+gt/ or leftImg8bit/+gtFine/).[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Resolve output path
+    if not output_path.is_absolute():
+        output_path = input_path.parent / output_path
+    output_path = output_path.resolve()
+    # Check if output already exists
+    if output_path.exists() and any(output_path.iterdir()):
+        console.print(
+            f"[red]Error: Output directory already exists and is not empty: "
+            f"{output_path}[/red]"
+        )
+        raise typer.Exit(1)
+    # Show conversion info
+    console.print("[cyan]Converting MaskDataset to YOLO segmentation format[/cyan]")
+    console.print(f"  Source: {input_path}")
+    console.print(f"  Output: {output_path}")
+    console.print(f"  Classes: {dataset.num_classes}")
+    splits_str = ", ".join(dataset.splits) if dataset.splits else "unsplit"
+    console.print(f"  Splits: {splits_str}")
+    console.print()
+    # Run conversion with progress bar
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
+        task = progress.add_task("Processing images...", total=None)
+        def update_progress(current: int, total: int) -> None:
+            progress.update(task, completed=current, total=total)
+        try:
+            stats = convert_mask_to_yolo_seg(
+                dataset=dataset,
+                output_path=output_path,
+                epsilon_factor=epsilon_factor,
+                min_area=min_area,
+                progress_callback=update_progress,
+            )
+        except Exception as exc:
+            console.print(f"[red]Error during conversion: {exc}[/red]")
+            raise typer.Exit(1) from exc
+    # Show results
+    console.print()
+    console.print("[green]Conversion complete![/green]")
+    console.print(f"  Images processed: {stats['images']}")
+    console.print(f"  Labels created: {stats['labels']}")
+    console.print(f"  Polygons extracted: {stats['polygons']}")
+    if stats["skipped"] > 0:
+        skipped = stats["skipped"]
+        console.print(f"  [yellow]Skipped: {skipped} (no mask or empty)[/yellow]")
+    if stats["warnings"] > 0:
+        console.print(f"  [yellow]Warnings: {stats['warnings']}[/yellow]")
+    console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
+@app.command(name="filter")
+def filter_dataset(
+    dataset_path: Annotated[
+        Path,
+        typer.Option(
+            "--dataset-path",
+            "-d",
+            help="Path to the dataset root directory.",
+        ),
+    ] = Path("."),
+    output_path: Annotated[
+        Path,
+        typer.Option(
+            "--output",
+            "-o",
+            help="Output directory for filtered dataset.",
+        ),
+    ] = Path("filtered"),
+    classes: Annotated[
+        str,
+        typer.Option(
+            "--classes",
+            "-c",
+            help="Comma-separated list of class names to keep.",
+        ),
+    ] = "",
+    no_background: Annotated[
+        bool,
+        typer.Option(
+            "--no-background",
+            help="Exclude images with no annotations after filtering.",
+        ),
+    ] = False,
+    use_symlinks: Annotated[
+        bool,
+        typer.Option(
+            "--symlinks",
+            help="Use symlinks instead of copying images.",
+        ),
+    ] = False,
+) -> None:
+    """Filter a dataset by class names.
+    Creates a filtered copy of the dataset containing only the specified classes.
+    Class IDs are remapped to sequential values (0, 1, 2, ...).
+    Examples:
+        argus-cv filter -d dataset -o output --classes ball --no-background
+        argus-cv filter -d dataset -o output --classes ball,player
+        argus-cv filter -d dataset -o output --classes ball --symlinks
+    """
+    # Resolve path and validate
+    dataset_path = dataset_path.resolve()
+    if not dataset_path.exists():
+        console.print(f"[red]Error: Path does not exist: {dataset_path}[/red]")
+        raise typer.Exit(1)
+    if not dataset_path.is_dir():
+        console.print(f"[red]Error: Path is not a directory: {dataset_path}[/red]")
+        raise typer.Exit(1)
+    # Parse classes
+    if not classes:
+        console.print(
+            "[red]Error: No classes specified. "
+            "Use --classes to specify classes to keep.[/red]"
+        )
+        raise typer.Exit(1)
+    class_list = [c.strip() for c in classes.split(",") if c.strip()]
+    if not class_list:
+        console.print("[red]Error: No valid class names provided.[/red]")
+        raise typer.Exit(1)
+    # Detect dataset
+    dataset = _detect_dataset(dataset_path)
+    if not dataset:
+        console.print(
+            f"[red]Error: No dataset found at {dataset_path}[/red]\n"
+            "[yellow]Ensure the path points to a dataset root containing "
+            "data.yaml (YOLO), annotations/ folder (COCO), or "
+            "images/ + masks/ directories (Mask).[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Validate classes exist in dataset
+    missing_classes = [c for c in class_list if c not in dataset.class_names]
+    if missing_classes:
+        available = ", ".join(dataset.class_names)
+        missing = ", ".join(missing_classes)
+        console.print(
+            f"[red]Error: Classes not found in dataset: {missing}[/red]\n"
+            f"[yellow]Available classes: {available}[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Resolve output path
+    if not output_path.is_absolute():
+        output_path = dataset_path.parent / output_path
+    output_path = output_path.resolve()
+    # Check if output already exists
+    if output_path.exists() and any(output_path.iterdir()):
+        console.print(
+            f"[red]Error: Output directory already exists and is not empty: "
+            f"{output_path}[/red]"
+        )
+        raise typer.Exit(1)
+    # Show filter info
+    console.print(f"[cyan]Filtering {dataset.format.value.upper()} dataset[/cyan]")
+    console.print(f"  Source: {dataset_path}")
+    console.print(f"  Output: {output_path}")
+    console.print(f"  Classes to keep: {', '.join(class_list)}")
+    console.print(f"  Exclude background: {no_background}")
+    console.print(f"  Use symlinks: {use_symlinks}")
+    console.print()
+    # Run filtering with progress bar
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
+        task = progress.add_task("Filtering dataset...", total=None)
+        def update_progress(current: int, total: int) -> None:
+            progress.update(task, completed=current, total=total)
+        try:
+            if dataset.format == DatasetFormat.YOLO:
+                assert isinstance(dataset, YOLODataset)
+                stats = filter_yolo_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            elif dataset.format == DatasetFormat.COCO:
+                assert isinstance(dataset, COCODataset)
+                stats = filter_coco_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            elif dataset.format == DatasetFormat.MASK:
+                assert isinstance(dataset, MaskDataset)
+                stats = filter_mask_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            else:
+                console.print(
+                    f"[red]Error: Unsupported dataset format: {dataset.format}[/red]"
+                )
+                raise typer.Exit(1)
+        except ValueError as exc:
+            console.print(f"[red]Error: {exc}[/red]")
+            raise typer.Exit(1) from exc
+        except Exception as exc:
+            console.print(f"[red]Error during filtering: {exc}[/red]")
+            raise typer.Exit(1) from exc
+    # Show results
+    console.print()
+    console.print("[green]Filtering complete![/green]")
+    console.print(f"  Images: {stats.get('images', 0)}")
+    if "labels" in stats:
+        console.print(f"  Labels: {stats['labels']}")
+    if "annotations" in stats:
+        console.print(f"  Annotations: {stats['annotations']}")
+    if "masks" in stats:
+        console.print(f"  Masks: {stats['masks']}")
+    if stats.get("skipped", 0) > 0:
+        skipped = stats["skipped"]
+        console.print(f"  [yellow]Skipped: {skipped} (background images)[/yellow]")
+    console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
 class _ImageViewer:
     """Interactive image viewer with zoom and pan support."""

argus/core/__init__.py CHANGED Viewed

@@ -2,6 +2,18 @@
 from argus.core.base import Dataset
 from argus.core.coco import COCODataset
+from argus.core.convert import (
+    ConversionParams,
+    Polygon,
+    convert_mask_to_yolo_labels,
+    convert_mask_to_yolo_seg,
+    mask_to_polygons,
+)
+from argus.core.filter import (
+    filter_coco_dataset,
+    filter_mask_dataset,
+    filter_yolo_dataset,
+)
 from argus.core.mask import ConfigurationError, MaskDataset
 from argus.core.split import split_coco_dataset, split_yolo_dataset
 from argus.core.yolo import YOLODataset
@@ -14,4 +26,12 @@ __all__ = [
     "ConfigurationError",
     "split_coco_dataset",
     "split_yolo_dataset",
+    "filter_yolo_dataset",
+    "filter_coco_dataset",
+    "filter_mask_dataset",
+    "ConversionParams",
+    "Polygon",
+    "mask_to_polygons",
+    "convert_mask_to_yolo_labels",
+    "convert_mask_to_yolo_seg",
 ]

argus/core/coco.py CHANGED Viewed

@@ -75,6 +75,13 @@ class COCODataset(Dataset):
         # Also check root directory for single annotation file
         annotation_files.extend(path.glob("*.json"))
+        # Check split directories for Roboflow COCO format
+        for split_name in ["train", "valid", "val", "test"]:
+            split_dir = path / split_name
+            if split_dir.is_dir():
+                annotation_files.extend(split_dir.glob("*annotations*.json"))
+                annotation_files.extend(split_dir.glob("*coco*.json"))
         # Filter to only include files that might be COCO annotations
         # (exclude package.json, tsconfig.json, etc.)
         filtered_files = []
@@ -185,8 +192,10 @@ class COCODataset(Dataset):
                     if isinstance(cat, dict) and "id" in cat and "name" in cat:
                         id_to_name[cat["id"]] = cat["name"]
-                # Determine split from filename
-                split = self._get_split_from_filename(ann_file.stem)
+                # Determine split from filename or parent directory
+                split = self._get_split_from_filename(
+                    ann_file.stem, ann_file.parent.name
+                )
                 # Count annotations per category
                 split_counts: dict[str, int] = counts.get(split, {})
@@ -224,7 +233,9 @@ class COCODataset(Dataset):
                 if not isinstance(data, dict):
                     continue
-                split = self._get_split_from_filename(ann_file.stem)
+                split = self._get_split_from_filename(
+                    ann_file.stem, ann_file.parent.name
+                )
                 images = data.get("images", [])
                 annotations = data.get("annotations", [])
@@ -256,11 +267,12 @@ class COCODataset(Dataset):
         return counts
     @staticmethod
-    def _get_split_from_filename(filename: str) -> str:
-        """Extract split name from annotation filename.
+    def _get_split_from_filename(filename: str, parent_dir: str | None = None) -> str:
+        """Extract split name from annotation filename or parent directory.
         Args:
             filename: Annotation file stem (without extension).
+            parent_dir: Optional parent directory name (for Roboflow COCO format).
         Returns:
             Split name (train, val, test) or 'train' as default.
@@ -272,6 +284,17 @@ class COCODataset(Dataset):
             return "val"
         elif "test" in name_lower:
             return "test"
+        # Check parent directory name (Roboflow COCO format)
+        if parent_dir:
+            parent_lower = parent_dir.lower()
+            if parent_lower == "train":
+                return "train"
+            elif parent_lower in ("val", "valid"):
+                return "val"
+            elif parent_lower == "test":
+                return "test"
         return "train"
     @classmethod
@@ -301,7 +324,7 @@ class COCODataset(Dataset):
     @classmethod
     def _detect_splits(cls, annotation_files: list[Path]) -> list[str]:
-        """Detect available splits from annotation filenames.
+        """Detect available splits from annotation filenames or parent directories.
         Args:
             annotation_files: List of annotation file paths.
@@ -313,13 +336,22 @@ class COCODataset(Dataset):
         for ann_file in annotation_files:
             name_lower = ann_file.stem.lower()
+            parent_lower = ann_file.parent.name.lower()
+            # Check filename first
             if "train" in name_lower and "train" not in splits:
                 splits.append("train")
             elif "val" in name_lower and "val" not in splits:
                 splits.append("val")
             elif "test" in name_lower and "test" not in splits:
                 splits.append("test")
+            # Check parent directory (Roboflow COCO format)
+            elif parent_lower == "train" and "train" not in splits:
+                splits.append("train")
+            elif parent_lower in ("val", "valid") and "val" not in splits:
+                splits.append("val")
+            elif parent_lower == "test" and "test" not in splits:
+                splits.append("test")
         # If no splits detected from filenames, default to train
         if not splits:
@@ -342,7 +374,9 @@ class COCODataset(Dataset):
         for ann_file in self.annotation_files:
             # Filter by split if specified
             if split:
-                file_split = self._get_split_from_filename(ann_file.stem)
+                file_split = self._get_split_from_filename(
+                    ann_file.stem, ann_file.parent.name
+                )
                 if file_split != split:
                     continue
@@ -354,7 +388,9 @@ class COCODataset(Dataset):
                     continue
                 images = data.get("images", [])
-                file_split = self._get_split_from_filename(ann_file.stem)
+                file_split = self._get_split_from_filename(
+                    ann_file.stem, ann_file.parent.name
+                )
                 for img in images:
                     if not isinstance(img, dict) or "file_name" not in img:
@@ -371,6 +407,8 @@ class COCODataset(Dataset):
                         self.path / "images" / file_name,
                         self.path / file_split / file_name,
                         self.path / file_name,
+                        # Roboflow format: images alongside annotations
+                        ann_file.parent / file_name,
                     ]
                     for img_path in possible_paths:

argus-cv 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

Potentially problematic release.

argus-cv 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl