PyPI - argus-cv - Versions diffs - 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

argus-cv 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of argus-cv might be problematic. Click here for more details.

Files changed (8) hide show

argus/__init__.py +1 -1
argus/cli.py +195 -0
argus/core/__init__.py +8 -0
argus/core/filter.py +670 -0
{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/METADATA +1 -1
{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/RECORD +8 -7
{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/WHEEL +0 -0
{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/entry_points.txt +0 -0

argus/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Argus - Vision AI dataset toolkit."""
-__version__ = "1.5.0"
+__version__ = "1.5.1"

argus/cli.py CHANGED Viewed

@@ -20,6 +20,11 @@ from rich.table import Table
 from argus.core import COCODataset, Dataset, MaskDataset, YOLODataset
 from argus.core.base import DatasetFormat, TaskType
 from argus.core.convert import convert_mask_to_yolo_seg
+from argus.core.filter import (
+    filter_coco_dataset,
+    filter_mask_dataset,
+    filter_yolo_dataset,
+)
 from argus.core.split import (
     is_coco_unsplit,
     parse_ratio,
@@ -781,6 +786,196 @@ def convert_dataset(
     console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
+@app.command(name="filter")
+def filter_dataset(
+    dataset_path: Annotated[
+        Path,
+        typer.Option(
+            "--dataset-path",
+            "-d",
+            help="Path to the dataset root directory.",
+        ),
+    ] = Path("."),
+    output_path: Annotated[
+        Path,
+        typer.Option(
+            "--output",
+            "-o",
+            help="Output directory for filtered dataset.",
+        ),
+    ] = Path("filtered"),
+    classes: Annotated[
+        str,
+        typer.Option(
+            "--classes",
+            "-c",
+            help="Comma-separated list of class names to keep.",
+        ),
+    ] = "",
+    no_background: Annotated[
+        bool,
+        typer.Option(
+            "--no-background",
+            help="Exclude images with no annotations after filtering.",
+        ),
+    ] = False,
+    use_symlinks: Annotated[
+        bool,
+        typer.Option(
+            "--symlinks",
+            help="Use symlinks instead of copying images.",
+        ),
+    ] = False,
+) -> None:
+    """Filter a dataset by class names.
+    Creates a filtered copy of the dataset containing only the specified classes.
+    Class IDs are remapped to sequential values (0, 1, 2, ...).
+    Examples:
+        argus-cv filter -d dataset -o output --classes ball --no-background
+        argus-cv filter -d dataset -o output --classes ball,player
+        argus-cv filter -d dataset -o output --classes ball --symlinks
+    """
+    # Resolve path and validate
+    dataset_path = dataset_path.resolve()
+    if not dataset_path.exists():
+        console.print(f"[red]Error: Path does not exist: {dataset_path}[/red]")
+        raise typer.Exit(1)
+    if not dataset_path.is_dir():
+        console.print(f"[red]Error: Path is not a directory: {dataset_path}[/red]")
+        raise typer.Exit(1)
+    # Parse classes
+    if not classes:
+        console.print(
+            "[red]Error: No classes specified. "
+            "Use --classes to specify classes to keep.[/red]"
+        )
+        raise typer.Exit(1)
+    class_list = [c.strip() for c in classes.split(",") if c.strip()]
+    if not class_list:
+        console.print("[red]Error: No valid class names provided.[/red]")
+        raise typer.Exit(1)
+    # Detect dataset
+    dataset = _detect_dataset(dataset_path)
+    if not dataset:
+        console.print(
+            f"[red]Error: No dataset found at {dataset_path}[/red]\n"
+            "[yellow]Ensure the path points to a dataset root containing "
+            "data.yaml (YOLO), annotations/ folder (COCO), or "
+            "images/ + masks/ directories (Mask).[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Validate classes exist in dataset
+    missing_classes = [c for c in class_list if c not in dataset.class_names]
+    if missing_classes:
+        available = ", ".join(dataset.class_names)
+        missing = ", ".join(missing_classes)
+        console.print(
+            f"[red]Error: Classes not found in dataset: {missing}[/red]\n"
+            f"[yellow]Available classes: {available}[/yellow]"
+        )
+        raise typer.Exit(1)
+    # Resolve output path
+    if not output_path.is_absolute():
+        output_path = dataset_path.parent / output_path
+    output_path = output_path.resolve()
+    # Check if output already exists
+    if output_path.exists() and any(output_path.iterdir()):
+        console.print(
+            f"[red]Error: Output directory already exists and is not empty: "
+            f"{output_path}[/red]"
+        )
+        raise typer.Exit(1)
+    # Show filter info
+    console.print(f"[cyan]Filtering {dataset.format.value.upper()} dataset[/cyan]")
+    console.print(f"  Source: {dataset_path}")
+    console.print(f"  Output: {output_path}")
+    console.print(f"  Classes to keep: {', '.join(class_list)}")
+    console.print(f"  Exclude background: {no_background}")
+    console.print(f"  Use symlinks: {use_symlinks}")
+    console.print()
+    # Run filtering with progress bar
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
+        task = progress.add_task("Filtering dataset...", total=None)
+        def update_progress(current: int, total: int) -> None:
+            progress.update(task, completed=current, total=total)
+        try:
+            if dataset.format == DatasetFormat.YOLO:
+                assert isinstance(dataset, YOLODataset)
+                stats = filter_yolo_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            elif dataset.format == DatasetFormat.COCO:
+                assert isinstance(dataset, COCODataset)
+                stats = filter_coco_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            elif dataset.format == DatasetFormat.MASK:
+                assert isinstance(dataset, MaskDataset)
+                stats = filter_mask_dataset(
+                    dataset=dataset,
+                    output_path=output_path,
+                    classes=class_list,
+                    no_background=no_background,
+                    use_symlinks=use_symlinks,
+                    progress_callback=update_progress,
+                )
+            else:
+                console.print(
+                    f"[red]Error: Unsupported dataset format: {dataset.format}[/red]"
+                )
+                raise typer.Exit(1)
+        except ValueError as exc:
+            console.print(f"[red]Error: {exc}[/red]")
+            raise typer.Exit(1) from exc
+        except Exception as exc:
+            console.print(f"[red]Error during filtering: {exc}[/red]")
+            raise typer.Exit(1) from exc
+    # Show results
+    console.print()
+    console.print("[green]Filtering complete![/green]")
+    console.print(f"  Images: {stats.get('images', 0)}")
+    if "labels" in stats:
+        console.print(f"  Labels: {stats['labels']}")
+    if "annotations" in stats:
+        console.print(f"  Annotations: {stats['annotations']}")
+    if "masks" in stats:
+        console.print(f"  Masks: {stats['masks']}")
+    if stats.get("skipped", 0) > 0:
+        skipped = stats["skipped"]
+        console.print(f"  [yellow]Skipped: {skipped} (background images)[/yellow]")
+    console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
 class _ImageViewer:
     """Interactive image viewer with zoom and pan support."""

argus/core/__init__.py CHANGED Viewed

@@ -9,6 +9,11 @@ from argus.core.convert import (
     convert_mask_to_yolo_seg,
     mask_to_polygons,
 )
+from argus.core.filter import (
+    filter_coco_dataset,
+    filter_mask_dataset,
+    filter_yolo_dataset,
+)
 from argus.core.mask import ConfigurationError, MaskDataset
 from argus.core.split import split_coco_dataset, split_yolo_dataset
 from argus.core.yolo import YOLODataset
@@ -21,6 +26,9 @@ __all__ = [
     "ConfigurationError",
     "split_coco_dataset",
     "split_yolo_dataset",
+    "filter_yolo_dataset",
+    "filter_coco_dataset",
+    "filter_mask_dataset",
     "ConversionParams",
     "Polygon",
     "mask_to_polygons",

argus/core/filter.py ADDED Viewed

@@ -0,0 +1,670 @@
+"""Dataset filtering utilities."""
+import json
+import shutil
+from collections.abc import Callable
+from pathlib import Path
+import cv2
+import numpy as np
+import yaml
+from argus.core.base import TaskType
+from argus.core.coco import COCODataset
+from argus.core.mask import MaskDataset
+from argus.core.yolo import YOLODataset
+def filter_yolo_dataset(
+    dataset: YOLODataset,
+    output_path: Path,
+    classes: list[str],
+    no_background: bool = False,
+    use_symlinks: bool = False,
+    progress_callback: Callable[[int, int], None] | None = None,
+) -> dict[str, int]:
+    """Filter a YOLO dataset by class names.
+    Args:
+        dataset: Source YOLODataset to filter.
+        output_path: Directory to write filtered dataset.
+        classes: List of class names to keep.
+        no_background: If True, exclude images with no annotations after filtering.
+        use_symlinks: If True, create symlinks instead of copying images.
+        progress_callback: Optional callback for progress updates (current, total).
+    Returns:
+        Dictionary with statistics: images, labels, skipped.
+    """
+    if dataset.task == TaskType.CLASSIFICATION:
+        return _filter_yolo_classification(
+            dataset, output_path, classes, use_symlinks, progress_callback
+        )
+    else:
+        return _filter_yolo_detection_segmentation(
+            dataset,
+            output_path,
+            classes,
+            no_background,
+            use_symlinks,
+            progress_callback,
+        )
+def _filter_yolo_detection_segmentation(
+    dataset: YOLODataset,
+    output_path: Path,
+    classes: list[str],
+    no_background: bool,
+    use_symlinks: bool,
+    progress_callback: Callable[[int, int], None] | None,
+) -> dict[str, int]:
+    """Filter YOLO detection/segmentation dataset."""
+    # Build class ID mapping: old_id -> new_id
+    # New IDs are sequential starting from 0
+    old_to_new: dict[int, int] = {}
+    new_class_names: list[str] = []
+    for i, name in enumerate(dataset.class_names):
+        if name in classes:
+            new_id = len(new_class_names)
+            old_to_new[i] = new_id
+            new_class_names.append(name)
+    if not new_class_names:
+        raise ValueError(f"No matching classes found. Available: {dataset.class_names}")
+    # Create output structure
+    output_path.mkdir(parents=True, exist_ok=True)
+    # Determine splits
+    splits = dataset.splits if dataset.splits else [""]
+    has_splits = bool(dataset.splits)
+    stats = {"images": 0, "labels": 0, "skipped": 0}
+    # Collect all image/label pairs
+    all_pairs: list[tuple[Path, Path, str]] = []
+    labels_root = dataset.path / "labels"
+    for split in splits:
+        if has_splits:
+            images_dir = dataset.path / "images" / split
+            labels_dir = labels_root / split
+        else:
+            images_dir = dataset.path / "images"
+            labels_dir = labels_root
+        if not images_dir.is_dir():
+            continue
+        for img_file in images_dir.iterdir():
+            if img_file.suffix.lower() not in {
+                ".jpg",
+                ".jpeg",
+                ".png",
+                ".bmp",
+                ".tiff",
+                ".webp",
+            }:
+                continue
+            label_file = labels_dir / f"{img_file.stem}.txt"
+            all_pairs.append((img_file, label_file, split))
+    total = len(all_pairs)
+    for idx, (img_file, label_file, split) in enumerate(all_pairs):
+        if progress_callback:
+            progress_callback(idx, total)
+        # Read and filter label file
+        filtered_lines: list[str] = []
+        if label_file.exists():
+            with open(label_file, encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    parts = line.split()
+                    if len(parts) < 5:
+                        continue
+                    try:
+                        old_class_id = int(parts[0])
+                    except ValueError:
+                        continue
+                    if old_class_id in old_to_new:
+                        new_class_id = old_to_new[old_class_id]
+                        parts[0] = str(new_class_id)
+                        filtered_lines.append(" ".join(parts))
+        # Skip if no annotations and no_background is True
+        if no_background and not filtered_lines:
+            stats["skipped"] += 1
+            continue
+        # Create output directories
+        if has_splits:
+            out_images_dir = output_path / "images" / split
+            out_labels_dir = output_path / "labels" / split
+        else:
+            out_images_dir = output_path / "images"
+            out_labels_dir = output_path / "labels"
+        out_images_dir.mkdir(parents=True, exist_ok=True)
+        out_labels_dir.mkdir(parents=True, exist_ok=True)
+        # Copy/symlink image
+        out_img = out_images_dir / img_file.name
+        if use_symlinks:
+            if not out_img.exists():
+                out_img.symlink_to(img_file.resolve())
+        else:
+            if not out_img.exists():
+                shutil.copy2(img_file, out_img)
+        # Write filtered label
+        out_label = out_labels_dir / f"{img_file.stem}.txt"
+        with open(out_label, "w", encoding="utf-8") as f:
+            f.write("\n".join(filtered_lines))
+            if filtered_lines:
+                f.write("\n")
+        stats["images"] += 1
+        stats["labels"] += 1
+    if progress_callback:
+        progress_callback(total, total)
+    # Create data.yaml
+    _create_yolo_yaml(output_path, new_class_names, splits if has_splits else [])
+    return stats
+def _filter_yolo_classification(
+    dataset: YOLODataset,
+    output_path: Path,
+    classes: list[str],
+    use_symlinks: bool,
+    progress_callback: Callable[[int, int], None] | None,
+) -> dict[str, int]:
+    """Filter YOLO classification dataset."""
+    # Filter to only requested classes that exist
+    new_class_names = [name for name in dataset.class_names if name in classes]
+    if not new_class_names:
+        raise ValueError(f"No matching classes found. Available: {dataset.class_names}")
+    output_path.mkdir(parents=True, exist_ok=True)
+    stats = {"images": 0, "labels": 0, "skipped": 0}
+    # Count total images for progress
+    total = 0
+    if dataset.splits:
+        for split in dataset.splits:
+            for class_name in new_class_names:
+                class_dir = dataset.path / "images" / split / class_name
+                if class_dir.is_dir():
+                    total += sum(
+                        1
+                        for f in class_dir.iterdir()
+                        if f.suffix.lower()
+                        in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+                    )
+    else:
+        # Flat structure
+        for class_name in new_class_names:
+            class_dir = dataset.path / class_name
+            if class_dir.is_dir():
+                total += sum(
+                    1
+                    for f in class_dir.iterdir()
+                    if f.suffix.lower()
+                    in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
+                )
+    current = 0
+    if dataset.splits:
+        for split in dataset.splits:
+            for class_name in new_class_names:
+                src_dir = dataset.path / "images" / split / class_name
+                dst_dir = output_path / "images" / split / class_name
+                if not src_dir.is_dir():
+                    continue
+                dst_dir.mkdir(parents=True, exist_ok=True)
+                for img_file in src_dir.iterdir():
+                    if img_file.suffix.lower() not in {
+                        ".jpg",
+                        ".jpeg",
+                        ".png",
+                        ".bmp",
+                        ".tiff",
+                        ".webp",
+                    }:
+                        continue
+                    if progress_callback:
+                        progress_callback(current, total)
+                    current += 1
+                    dst_file = dst_dir / img_file.name
+                    if use_symlinks:
+                        if not dst_file.exists():
+                            dst_file.symlink_to(img_file.resolve())
+                    else:
+                        if not dst_file.exists():
+                            shutil.copy2(img_file, dst_file)
+                    stats["images"] += 1
+    else:
+        # Flat structure
+        for class_name in new_class_names:
+            src_dir = dataset.path / class_name
+            dst_dir = output_path / class_name
+            if not src_dir.is_dir():
+                continue
+            dst_dir.mkdir(parents=True, exist_ok=True)
+            for img_file in src_dir.iterdir():
+                if img_file.suffix.lower() not in {
+                    ".jpg",
+                    ".jpeg",
+                    ".png",
+                    ".bmp",
+                    ".tiff",
+                    ".webp",
+                }:
+                    continue
+                if progress_callback:
+                    progress_callback(current, total)
+                current += 1
+                dst_file = dst_dir / img_file.name
+                if use_symlinks:
+                    if not dst_file.exists():
+                        dst_file.symlink_to(img_file.resolve())
+                else:
+                    if not dst_file.exists():
+                        shutil.copy2(img_file, dst_file)
+                stats["images"] += 1
+    if progress_callback:
+        progress_callback(total, total)
+    return stats
+def _create_yolo_yaml(
+    output_path: Path, class_names: list[str], splits: list[str]
+) -> None:
+    """Create data.yaml for YOLO dataset."""
+    config: dict = {
+        "path": ".",
+        "names": {i: name for i, name in enumerate(class_names)},
+    }
+    if splits:
+        if "train" in splits:
+            config["train"] = "images/train"
+        if "val" in splits:
+            config["val"] = "images/val"
+        if "test" in splits:
+            config["test"] = "images/test"
+    with open(output_path / "data.yaml", "w", encoding="utf-8") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+def filter_coco_dataset(
+    dataset: COCODataset,
+    output_path: Path,
+    classes: list[str],
+    no_background: bool = False,
+    use_symlinks: bool = False,
+    progress_callback: Callable[[int, int], None] | None = None,
+) -> dict[str, int]:
+    """Filter a COCO dataset by class names.
+    Args:
+        dataset: Source COCODataset to filter.
+        output_path: Directory to write filtered dataset.
+        classes: List of class names to keep.
+        no_background: If True, exclude images with no annotations after filtering.
+        use_symlinks: If True, create symlinks instead of copying images.
+        progress_callback: Optional callback for progress updates (current, total).
+    Returns:
+        Dictionary with statistics: images, annotations, skipped.
+    """
+    output_path.mkdir(parents=True, exist_ok=True)
+    stats = {"images": 0, "annotations": 0, "skipped": 0}
+    # Process each annotation file
+    for ann_file in dataset.annotation_files:
+        with open(ann_file, encoding="utf-8") as f:
+            data = json.load(f)
+        # Build category mappings
+        old_categories = data.get("categories", [])
+        old_id_to_name: dict[int, str] = {}
+        for cat in old_categories:
+            if isinstance(cat, dict) and "id" in cat and "name" in cat:
+                old_id_to_name[cat["id"]] = cat["name"]
+        # Create new category list with remapped IDs
+        old_to_new: dict[int, int] = {}
+        new_categories: list[dict] = []
+        new_id = 1  # COCO IDs typically start at 1
+        for cat in old_categories:
+            if isinstance(cat, dict) and "name" in cat and cat["name"] in classes:
+                old_id = cat["id"]
+                old_to_new[old_id] = new_id
+                new_cat = cat.copy()
+                new_cat["id"] = new_id
+                new_categories.append(new_cat)
+                new_id += 1
+        if not new_categories:
+            raise ValueError(
+                f"No matching classes found. Available: {list(old_id_to_name.values())}"
+            )
+        # Filter annotations
+        old_annotations = data.get("annotations", [])
+        new_annotations: list[dict] = []
+        images_with_annotations: set[int] = set()
+        new_ann_id = 1
+        for ann in old_annotations:
+            if not isinstance(ann, dict):
+                continue
+            old_cat_id = ann.get("category_id")
+            if old_cat_id not in old_to_new:
+                continue
+            new_ann = ann.copy()
+            new_ann["id"] = new_ann_id
+            new_ann["category_id"] = old_to_new[old_cat_id]
+            new_annotations.append(new_ann)
+            new_ann_id += 1
+            stats["annotations"] += 1
+            image_id = ann.get("image_id")
+            if image_id is not None:
+                images_with_annotations.add(image_id)
+        # Filter images
+        old_images = data.get("images", [])
+        new_images: list[dict] = []
+        included_image_ids: set[int] = set()
+        new_img_id = 1
+        # Build image ID mapping for annotation update
+        old_to_new_img_id: dict[int, int] = {}
+        for img in old_images:
+            if not isinstance(img, dict) or "id" not in img:
+                continue
+            old_img_id = img["id"]
+            # Skip if no_background and no annotations
+            if no_background and old_img_id not in images_with_annotations:
+                stats["skipped"] += 1
+                continue
+            old_to_new_img_id[old_img_id] = new_img_id
+            new_img = img.copy()
+            new_img["id"] = new_img_id
+            new_images.append(new_img)
+            included_image_ids.add(old_img_id)
+            new_img_id += 1
+            stats["images"] += 1
+        # Update annotation image IDs and filter out annotations for excluded images
+        final_annotations: list[dict] = []
+        for ann in new_annotations:
+            old_img_id = ann.get("image_id")
+            if old_img_id in old_to_new_img_id:
+                ann["image_id"] = old_to_new_img_id[old_img_id]
+                final_annotations.append(ann)
+        # Determine split from annotation file
+        split = COCODataset._get_split_from_filename(
+            ann_file.stem, ann_file.parent.name
+        )
+        # Check if this is Roboflow format (annotation in split directory)
+        is_roboflow = ann_file.parent.name.lower() in ("train", "valid", "val", "test")
+        # Create output annotation
+        new_data = data.copy()
+        new_data["categories"] = new_categories
+        new_data["annotations"] = final_annotations
+        new_data["images"] = new_images
+        # Write annotation file
+        if is_roboflow:
+            # Roboflow format: annotations in split directories
+            out_ann_dir = output_path / split
+            out_ann_dir.mkdir(parents=True, exist_ok=True)
+            out_ann_file = out_ann_dir / ann_file.name
+        else:
+            # Standard format: annotations in annotations/ directory
+            out_ann_dir = output_path / "annotations"
+            out_ann_dir.mkdir(parents=True, exist_ok=True)
+            out_ann_file = out_ann_dir / ann_file.name
+        with open(out_ann_file, "w", encoding="utf-8") as f:
+            json.dump(new_data, f, indent=2)
+        # Copy/symlink images
+        for img in old_images:
+            if not isinstance(img, dict) or "id" not in img:
+                continue
+            if img["id"] not in included_image_ids:
+                continue
+            file_name = img.get("file_name")
+            if not file_name:
+                continue
+            # Find source image
+            possible_paths = [
+                dataset.path / "images" / split / file_name,
+                dataset.path / "images" / file_name,
+                dataset.path / split / file_name,
+                dataset.path / file_name,
+                ann_file.parent / file_name,  # Roboflow format
+            ]
+            src_path = None
+            for p in possible_paths:
+                if p.exists():
+                    src_path = p
+                    break
+            if src_path is None:
+                continue
+            # Determine output directory
+            if is_roboflow:
+                out_img_dir = output_path / split
+            else:
+                out_img_dir = output_path / "images" / split
+            out_img_dir.mkdir(parents=True, exist_ok=True)
+            out_img = out_img_dir / file_name
+            if use_symlinks:
+                if not out_img.exists():
+                    out_img.symlink_to(src_path.resolve())
+            else:
+                if not out_img.exists():
+                    shutil.copy2(src_path, out_img)
+    return stats
+def filter_mask_dataset(
+    dataset: MaskDataset,
+    output_path: Path,
+    classes: list[str],
+    no_background: bool = False,
+    use_symlinks: bool = False,
+    progress_callback: Callable[[int, int], None] | None = None,
+) -> dict[str, int]:
+    """Filter a mask dataset by class names.
+    Args:
+        dataset: Source MaskDataset to filter.
+        output_path: Directory to write filtered dataset.
+        classes: List of class names to keep.
+        no_background: If True, exclude images with no annotations after filtering.
+        use_symlinks: If True, create symlinks instead of copying images.
+        progress_callback: Optional callback for progress updates (current, total).
+    Returns:
+        Dictionary with statistics: images, masks, skipped.
+    """
+    # Build class ID mapping
+    old_mapping = dataset.get_class_mapping()
+    old_name_to_id: dict[str, int] = {name: id for id, name in old_mapping.items()}
+    # Create new mapping: old_id -> new_id
+    old_to_new: dict[int, int] = {}
+    new_class_names: list[str] = []
+    # Start from 0 for background, then 1, 2, ... for other classes
+    # If "background" is in classes, include it
+    new_id = 0
+    for name in classes:
+        if name in old_name_to_id:
+            old_id = old_name_to_id[name]
+            old_to_new[old_id] = new_id
+            new_class_names.append(name)
+            new_id += 1
+    if not new_class_names:
+        raise ValueError(
+            f"No matching classes found. Available: {list(old_mapping.values())}"
+        )
+    output_path.mkdir(parents=True, exist_ok=True)
+    stats = {"images": 0, "masks": 0, "skipped": 0}
+    # Get all image paths
+    image_paths = dataset.get_image_paths()
+    total = len(image_paths)
+    for idx, img_path in enumerate(image_paths):
+        if progress_callback:
+            progress_callback(idx, total)
+        # Load mask
+        mask = dataset.load_mask(img_path)
+        if mask is None:
+            stats["skipped"] += 1
+            continue
+        # Create filtered mask
+        # Set all pixels to ignore_index first, then fill in kept classes
+        new_ignore_index = 255
+        new_mask = np.full(mask.shape, new_ignore_index, dtype=np.uint8)
+        has_annotations = False
+        for old_id, new_id in old_to_new.items():
+            mask_pixels = mask == old_id
+            if np.any(mask_pixels):
+                has_annotations = True
+                new_mask[mask_pixels] = new_id
+        # Skip if no_background and no kept annotations
+        if no_background and not has_annotations:
+            stats["skipped"] += 1
+            continue
+        # Determine split from image path
+        img_parts = img_path.parts
+        images_dir_idx = None
+        for i, part in enumerate(img_parts):
+            if part == dataset.images_dir:
+                images_dir_idx = i
+                break
+        if images_dir_idx is not None and images_dir_idx + 1 < len(img_parts) - 1:
+            split = img_parts[images_dir_idx + 1]
+            if split not in dataset.splits:
+                split = None
+        else:
+            split = None
+        # Create output directories
+        if split:
+            out_images_dir = output_path / dataset.images_dir / split
+            out_masks_dir = output_path / dataset.masks_dir / split
+        else:
+            out_images_dir = output_path / dataset.images_dir
+            out_masks_dir = output_path / dataset.masks_dir
+        out_images_dir.mkdir(parents=True, exist_ok=True)
+        out_masks_dir.mkdir(parents=True, exist_ok=True)
+        # Copy/symlink image
+        out_img = out_images_dir / img_path.name
+        if use_symlinks:
+            if not out_img.exists():
+                out_img.symlink_to(img_path.resolve())
+        else:
+            if not out_img.exists():
+                shutil.copy2(img_path, out_img)
+        # Write filtered mask
+        mask_path = dataset.get_mask_path(img_path)
+        if mask_path:
+            out_mask = out_masks_dir / mask_path.name
+            cv2.imwrite(str(out_mask), new_mask)
+        stats["images"] += 1
+        stats["masks"] += 1
+    if progress_callback:
+        progress_callback(total, total)
+    # Create classes.yaml
+    _create_mask_classes_yaml(output_path, new_class_names, dataset.ignore_index)
+    return stats
+def _create_mask_classes_yaml(
+    output_path: Path, class_names: list[str], ignore_index: int | None
+) -> None:
+    """Create classes.yaml for mask dataset."""
+    config: dict = {
+        "names": {i: name for i, name in enumerate(class_names)},
+    }
+    if ignore_index is not None:
+        config["ignore_index"] = 255  # Use standard ignore index
+    with open(output_path / "classes.yaml", "w", encoding="utf-8") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)

{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: argus-cv
-Version: 1.5.0
+Version: 1.5.1
 Summary: CLI tool for working with vision AI datasets
 Requires-Python: >=3.10
 Requires-Dist: numpy>=1.24.0

{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,16 @@
-argus/__init__.py,sha256=-NDJwMF-NWlPd0dIFWTu3SjgVWZy8SJxBD9g3YQXfrY,64
+argus/__init__.py,sha256=Yo7UDKujDodxid1b2g022IqmD1bwc9POtFSl8iolq5c,64
 argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
-argus/cli.py,sha256=hQ4t69E-clFvn9ZIeQ4Rf7cAqC0TgPtz1HEAFqNajcg,52706
+argus/cli.py,sha256=Xri1KFVOMS-YhbwkRE0eB5HOf49kfQuftu0IJU4gAjA,59605
 argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
-argus/core/__init__.py,sha256=sP206E44GdnnjKwyWNvuWntvO7D8oy0qs1yUUaPDThI,738
+argus/core/__init__.py,sha256=L5Onny8UjJtok5hOBKftqwnOhPvgQS5MZT_kgLpes1o,928
 argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
 argus/core/coco.py,sha256=V3Ifh6KUbifBTLefUuMxQkejgkwsPZNfKLn0newDZJ4,17539
 argus/core/convert.py,sha256=cHuw1E9B4vyozpikS2PJnFfiJ_eRMPIHblizyeZz1Ps,8471
+argus/core/filter.py,sha256=7BRefzYcKIxU0GkFNHiJJAijc9UIhrvNKdYgXE_22ig,21945
 argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
 argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
 argus/core/yolo.py,sha256=Vtw2sga40VooaRE8bmjKtr_aYhfoV7ZcVijFjg1DVwo,29644
-argus_cv-1.5.0.dist-info/METADATA,sha256=9iwY-3C6t-vzZOA9wBvrvIY10YBUaHgsDRN5x5Uk_8c,1353
-argus_cv-1.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-argus_cv-1.5.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
-argus_cv-1.5.0.dist-info/RECORD,,
+argus_cv-1.5.1.dist-info/METADATA,sha256=71pcUGzx6s0uCiJjZNUh-4p-z50xCL5xqQC8-JXhNaI,1353
+argus_cv-1.5.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+argus_cv-1.5.1.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
+argus_cv-1.5.1.dist-info/RECORD,,

{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{argus_cv-1.5.0.dist-info → argus_cv-1.5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

argus-cv 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

Potentially problematic release.

argus-cv 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl