argus-cv 1.5.0__tar.gz → 1.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of argus-cv might be problematic. Click here for more details.

Files changed (43) hide show
  1. {argus_cv-1.5.0 → argus_cv-1.5.2}/CHANGELOG.md +16 -0
  2. {argus_cv-1.5.0 → argus_cv-1.5.2}/PKG-INFO +1 -1
  3. argus_cv-1.5.2/docs/guides/filtering.md +101 -0
  4. {argus_cv-1.5.0 → argus_cv-1.5.2}/mkdocs.yml +1 -0
  5. {argus_cv-1.5.0 → argus_cv-1.5.2}/pyproject.toml +1 -1
  6. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/__init__.py +1 -1
  7. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/cli.py +195 -0
  8. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/__init__.py +8 -0
  9. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/coco.py +5 -5
  10. argus_cv-1.5.2/src/argus/core/filter.py +670 -0
  11. argus_cv-1.5.2/tests/test_filter_command.py +659 -0
  12. {argus_cv-1.5.0 → argus_cv-1.5.2}/.github/workflows/ci.yml +0 -0
  13. {argus_cv-1.5.0 → argus_cv-1.5.2}/.github/workflows/docs.yml +0 -0
  14. {argus_cv-1.5.0 → argus_cv-1.5.2}/.github/workflows/release.yml +0 -0
  15. {argus_cv-1.5.0 → argus_cv-1.5.2}/.gitignore +0 -0
  16. {argus_cv-1.5.0 → argus_cv-1.5.2}/.pre-commit-config.yaml +0 -0
  17. {argus_cv-1.5.0 → argus_cv-1.5.2}/README.md +0 -0
  18. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/assets/javascripts/extra.js +0 -0
  19. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/assets/stylesheets/extra.css +0 -0
  20. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/getting-started/installation.md +0 -0
  21. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/getting-started/quickstart.md +0 -0
  22. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/guides/datasets.md +0 -0
  23. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/guides/listing.md +0 -0
  24. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/guides/splitting.md +0 -0
  25. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/guides/stats.md +0 -0
  26. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/guides/viewer.md +0 -0
  27. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/index.md +0 -0
  28. {argus_cv-1.5.0 → argus_cv-1.5.2}/docs/reference/cli.md +0 -0
  29. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/__main__.py +0 -0
  30. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/commands/__init__.py +0 -0
  31. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/base.py +0 -0
  32. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/convert.py +0 -0
  33. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/mask.py +0 -0
  34. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/split.py +0 -0
  35. {argus_cv-1.5.0 → argus_cv-1.5.2}/src/argus/core/yolo.py +0 -0
  36. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/conftest.py +0 -0
  37. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_classification.py +0 -0
  38. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_convert.py +0 -0
  39. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_list_command.py +0 -0
  40. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_mask.py +0 -0
  41. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_split_command.py +0 -0
  42. {argus_cv-1.5.0 → argus_cv-1.5.2}/tests/test_stats_command.py +0 -0
  43. {argus_cv-1.5.0 → argus_cv-1.5.2}/uv.lock +0 -0
@@ -2,6 +2,22 @@
2
2
 
3
3
  <!-- version list -->
4
4
 
5
+ ## v1.5.2 (2026-02-03)
6
+
7
+ ### Bug Fixes
8
+
9
+ - Prevent double counting in COCO stats
10
+ ([`90436b1`](https://github.com/pirnerjonas/argus/commit/90436b13d64a50821a39ee3bb9468bfa59e8e0ea))
11
+
12
+
13
+ ## v1.5.1 (2026-01-28)
14
+
15
+ ### Bug Fixes
16
+
17
+ - Add missing documentation for filter command
18
+ ([`dc41fbb`](https://github.com/pirnerjonas/argus/commit/dc41fbb724faf2024ec9f1430e2af0a6af000d21))
19
+
20
+
5
21
  ## v1.5.0 (2026-01-28)
6
22
 
7
23
  ### Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argus-cv
3
- Version: 1.5.0
3
+ Version: 1.5.2
4
4
  Summary: CLI tool for working with vision AI datasets
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: numpy>=1.24.0
@@ -0,0 +1,101 @@
1
+ # Filtering datasets
2
+
3
+ Use `argus-cv filter` to create a filtered copy of a dataset containing only specified classes.
4
+
5
+ ## Basic usage
6
+
7
+ ```bash
8
+ argus-cv filter -d /datasets/coco -o /datasets/coco_filtered --classes person,car
9
+ ```
10
+
11
+ This creates a new dataset with only the `person` and `car` classes. Class IDs are automatically remapped to sequential values (0, 1, 2, ...).
12
+
13
+ ## Filter to a single class
14
+
15
+ ```bash
16
+ argus-cv filter -d /datasets/yolo -o /datasets/yolo_balls --classes ball
17
+ ```
18
+
19
+ ## Exclude background images
20
+
21
+ By default, images without annotations (after filtering) are kept. Use `--no-background` to exclude them:
22
+
23
+ ```bash
24
+ argus-cv filter -d /datasets/coco -o /datasets/coco_filtered --classes dog --no-background
25
+ ```
26
+
27
+ This is useful when you want a dataset with only images that contain your target class.
28
+
29
+ ## Use symlinks for faster filtering
30
+
31
+ For large datasets, use `--symlinks` to create symbolic links instead of copying images:
32
+
33
+ ```bash
34
+ argus-cv filter -d /datasets/large -o /datasets/filtered --classes cat --symlinks
35
+ ```
36
+
37
+ This saves disk space and speeds up the filtering process significantly.
38
+
39
+ ## Supported formats
40
+
41
+ The filter command works with all dataset formats:
42
+
43
+ | Format | Supported | Notes |
44
+ |--------|-----------|-------|
45
+ | YOLO Detection | Yes | Labels remapped to new class IDs |
46
+ | YOLO Segmentation | Yes | Polygon annotations preserved |
47
+ | YOLO Classification | Yes | Only selected class directories copied |
48
+ | COCO | Yes | Annotations and category IDs remapped |
49
+ | Mask | Yes | Pixel values remapped to new class IDs |
50
+
51
+ ## Output layout
52
+
53
+ The output preserves the original dataset structure with train/val/test splits.
54
+
55
+ YOLO output:
56
+
57
+ ```text
58
+ output/
59
+ ├── data.yaml
60
+ ├── images/
61
+ │ ├── train/
62
+ │ ├── val/
63
+ │ └── test/
64
+ └── labels/
65
+ ├── train/
66
+ ├── val/
67
+ └── test/
68
+ ```
69
+
70
+ COCO output:
71
+
72
+ ```text
73
+ output/
74
+ ├── annotations/
75
+ │ ├── instances_train.json
76
+ │ ├── instances_val.json
77
+ │ └── instances_test.json
78
+ └── images/
79
+ ├── train/
80
+ ├── val/
81
+ └── test/
82
+ ```
83
+
84
+ ## Class ID remapping
85
+
86
+ When filtering, class IDs are remapped to start from 0 and be sequential. For example:
87
+
88
+ | Original | Filtered |
89
+ |----------|----------|
90
+ | 0: person | (removed) |
91
+ | 1: car | 0: car |
92
+ | 2: dog | 1: dog |
93
+ | 3: cat | (removed) |
94
+
95
+ If you filter to keep only `car` and `dog`, the new dataset will have `car` as class 0 and `dog` as class 1.
96
+
97
+ ## Common errors
98
+
99
+ - "No classes specified": You must provide at least one class name with `--classes`.
100
+ - "Classes not found in dataset": Check the class names match exactly (case-sensitive). Use `argus-cv stats` to see available classes.
101
+ - "Output directory already exists": The output directory must be empty or non-existent.
@@ -64,5 +64,6 @@ nav:
64
64
  - Stats and counts: guides/stats.md
65
65
  - Visual inspection: guides/viewer.md
66
66
  - Splitting datasets: guides/splitting.md
67
+ - Filtering datasets: guides/filtering.md
67
68
  - Reference:
68
69
  - CLI reference: reference/cli.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "argus-cv"
3
- version = "1.5.0"
3
+ version = "1.5.2"
4
4
  description = "CLI tool for working with vision AI datasets"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,3 +1,3 @@
1
1
  """Argus - Vision AI dataset toolkit."""
2
2
 
3
- __version__ = "1.5.0"
3
+ __version__ = "1.5.2"
@@ -20,6 +20,11 @@ from rich.table import Table
20
20
  from argus.core import COCODataset, Dataset, MaskDataset, YOLODataset
21
21
  from argus.core.base import DatasetFormat, TaskType
22
22
  from argus.core.convert import convert_mask_to_yolo_seg
23
+ from argus.core.filter import (
24
+ filter_coco_dataset,
25
+ filter_mask_dataset,
26
+ filter_yolo_dataset,
27
+ )
23
28
  from argus.core.split import (
24
29
  is_coco_unsplit,
25
30
  parse_ratio,
@@ -781,6 +786,196 @@ def convert_dataset(
781
786
  console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
782
787
 
783
788
 
789
+ @app.command(name="filter")
790
+ def filter_dataset(
791
+ dataset_path: Annotated[
792
+ Path,
793
+ typer.Option(
794
+ "--dataset-path",
795
+ "-d",
796
+ help="Path to the dataset root directory.",
797
+ ),
798
+ ] = Path("."),
799
+ output_path: Annotated[
800
+ Path,
801
+ typer.Option(
802
+ "--output",
803
+ "-o",
804
+ help="Output directory for filtered dataset.",
805
+ ),
806
+ ] = Path("filtered"),
807
+ classes: Annotated[
808
+ str,
809
+ typer.Option(
810
+ "--classes",
811
+ "-c",
812
+ help="Comma-separated list of class names to keep.",
813
+ ),
814
+ ] = "",
815
+ no_background: Annotated[
816
+ bool,
817
+ typer.Option(
818
+ "--no-background",
819
+ help="Exclude images with no annotations after filtering.",
820
+ ),
821
+ ] = False,
822
+ use_symlinks: Annotated[
823
+ bool,
824
+ typer.Option(
825
+ "--symlinks",
826
+ help="Use symlinks instead of copying images.",
827
+ ),
828
+ ] = False,
829
+ ) -> None:
830
+ """Filter a dataset by class names.
831
+
832
+ Creates a filtered copy of the dataset containing only the specified classes.
833
+ Class IDs are remapped to sequential values (0, 1, 2, ...).
834
+
835
+ Examples:
836
+ argus-cv filter -d dataset -o output --classes ball --no-background
837
+ argus-cv filter -d dataset -o output --classes ball,player
838
+ argus-cv filter -d dataset -o output --classes ball --symlinks
839
+ """
840
+ # Resolve path and validate
841
+ dataset_path = dataset_path.resolve()
842
+ if not dataset_path.exists():
843
+ console.print(f"[red]Error: Path does not exist: {dataset_path}[/red]")
844
+ raise typer.Exit(1)
845
+ if not dataset_path.is_dir():
846
+ console.print(f"[red]Error: Path is not a directory: {dataset_path}[/red]")
847
+ raise typer.Exit(1)
848
+
849
+ # Parse classes
850
+ if not classes:
851
+ console.print(
852
+ "[red]Error: No classes specified. "
853
+ "Use --classes to specify classes to keep.[/red]"
854
+ )
855
+ raise typer.Exit(1)
856
+
857
+ class_list = [c.strip() for c in classes.split(",") if c.strip()]
858
+ if not class_list:
859
+ console.print("[red]Error: No valid class names provided.[/red]")
860
+ raise typer.Exit(1)
861
+
862
+ # Detect dataset
863
+ dataset = _detect_dataset(dataset_path)
864
+ if not dataset:
865
+ console.print(
866
+ f"[red]Error: No dataset found at {dataset_path}[/red]\n"
867
+ "[yellow]Ensure the path points to a dataset root containing "
868
+ "data.yaml (YOLO), annotations/ folder (COCO), or "
869
+ "images/ + masks/ directories (Mask).[/yellow]"
870
+ )
871
+ raise typer.Exit(1)
872
+
873
+ # Validate classes exist in dataset
874
+ missing_classes = [c for c in class_list if c not in dataset.class_names]
875
+ if missing_classes:
876
+ available = ", ".join(dataset.class_names)
877
+ missing = ", ".join(missing_classes)
878
+ console.print(
879
+ f"[red]Error: Classes not found in dataset: {missing}[/red]\n"
880
+ f"[yellow]Available classes: {available}[/yellow]"
881
+ )
882
+ raise typer.Exit(1)
883
+
884
+ # Resolve output path
885
+ if not output_path.is_absolute():
886
+ output_path = dataset_path.parent / output_path
887
+ output_path = output_path.resolve()
888
+
889
+ # Check if output already exists
890
+ if output_path.exists() and any(output_path.iterdir()):
891
+ console.print(
892
+ f"[red]Error: Output directory already exists and is not empty: "
893
+ f"{output_path}[/red]"
894
+ )
895
+ raise typer.Exit(1)
896
+
897
+ # Show filter info
898
+ console.print(f"[cyan]Filtering {dataset.format.value.upper()} dataset[/cyan]")
899
+ console.print(f" Source: {dataset_path}")
900
+ console.print(f" Output: {output_path}")
901
+ console.print(f" Classes to keep: {', '.join(class_list)}")
902
+ console.print(f" Exclude background: {no_background}")
903
+ console.print(f" Use symlinks: {use_symlinks}")
904
+ console.print()
905
+
906
+ # Run filtering with progress bar
907
+ with Progress(
908
+ SpinnerColumn(),
909
+ TextColumn("[progress.description]{task.description}"),
910
+ BarColumn(),
911
+ TaskProgressColumn(),
912
+ console=console,
913
+ ) as progress:
914
+ task = progress.add_task("Filtering dataset...", total=None)
915
+
916
+ def update_progress(current: int, total: int) -> None:
917
+ progress.update(task, completed=current, total=total)
918
+
919
+ try:
920
+ if dataset.format == DatasetFormat.YOLO:
921
+ assert isinstance(dataset, YOLODataset)
922
+ stats = filter_yolo_dataset(
923
+ dataset=dataset,
924
+ output_path=output_path,
925
+ classes=class_list,
926
+ no_background=no_background,
927
+ use_symlinks=use_symlinks,
928
+ progress_callback=update_progress,
929
+ )
930
+ elif dataset.format == DatasetFormat.COCO:
931
+ assert isinstance(dataset, COCODataset)
932
+ stats = filter_coco_dataset(
933
+ dataset=dataset,
934
+ output_path=output_path,
935
+ classes=class_list,
936
+ no_background=no_background,
937
+ use_symlinks=use_symlinks,
938
+ progress_callback=update_progress,
939
+ )
940
+ elif dataset.format == DatasetFormat.MASK:
941
+ assert isinstance(dataset, MaskDataset)
942
+ stats = filter_mask_dataset(
943
+ dataset=dataset,
944
+ output_path=output_path,
945
+ classes=class_list,
946
+ no_background=no_background,
947
+ use_symlinks=use_symlinks,
948
+ progress_callback=update_progress,
949
+ )
950
+ else:
951
+ console.print(
952
+ f"[red]Error: Unsupported dataset format: {dataset.format}[/red]"
953
+ )
954
+ raise typer.Exit(1)
955
+ except ValueError as exc:
956
+ console.print(f"[red]Error: {exc}[/red]")
957
+ raise typer.Exit(1) from exc
958
+ except Exception as exc:
959
+ console.print(f"[red]Error during filtering: {exc}[/red]")
960
+ raise typer.Exit(1) from exc
961
+
962
+ # Show results
963
+ console.print()
964
+ console.print("[green]Filtering complete![/green]")
965
+ console.print(f" Images: {stats.get('images', 0)}")
966
+ if "labels" in stats:
967
+ console.print(f" Labels: {stats['labels']}")
968
+ if "annotations" in stats:
969
+ console.print(f" Annotations: {stats['annotations']}")
970
+ if "masks" in stats:
971
+ console.print(f" Masks: {stats['masks']}")
972
+ if stats.get("skipped", 0) > 0:
973
+ skipped = stats["skipped"]
974
+ console.print(f" [yellow]Skipped: {skipped} (background images)[/yellow]")
975
+
976
+ console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
977
+
978
+
784
979
  class _ImageViewer:
785
980
  """Interactive image viewer with zoom and pan support."""
786
981
 
@@ -9,6 +9,11 @@ from argus.core.convert import (
9
9
  convert_mask_to_yolo_seg,
10
10
  mask_to_polygons,
11
11
  )
12
+ from argus.core.filter import (
13
+ filter_coco_dataset,
14
+ filter_mask_dataset,
15
+ filter_yolo_dataset,
16
+ )
12
17
  from argus.core.mask import ConfigurationError, MaskDataset
13
18
  from argus.core.split import split_coco_dataset, split_yolo_dataset
14
19
  from argus.core.yolo import YOLODataset
@@ -21,6 +26,9 @@ __all__ = [
21
26
  "ConfigurationError",
22
27
  "split_coco_dataset",
23
28
  "split_yolo_dataset",
29
+ "filter_yolo_dataset",
30
+ "filter_coco_dataset",
31
+ "filter_mask_dataset",
24
32
  "ConversionParams",
25
33
  "Polygon",
26
34
  "mask_to_polygons",
@@ -65,22 +65,22 @@ class COCODataset(Dataset):
65
65
  Returns:
66
66
  List of annotation file paths.
67
67
  """
68
- annotation_files = []
68
+ annotation_files: set[Path] = set()
69
69
 
70
70
  # Check annotations/ directory first
71
71
  annotations_dir = path / "annotations"
72
72
  if annotations_dir.is_dir():
73
- annotation_files.extend(annotations_dir.glob("*.json"))
73
+ annotation_files.update(annotations_dir.glob("*.json"))
74
74
 
75
75
  # Also check root directory for single annotation file
76
- annotation_files.extend(path.glob("*.json"))
76
+ annotation_files.update(path.glob("*.json"))
77
77
 
78
78
  # Check split directories for Roboflow COCO format
79
79
  for split_name in ["train", "valid", "val", "test"]:
80
80
  split_dir = path / split_name
81
81
  if split_dir.is_dir():
82
- annotation_files.extend(split_dir.glob("*annotations*.json"))
83
- annotation_files.extend(split_dir.glob("*coco*.json"))
82
+ annotation_files.update(split_dir.glob("*annotations*.json"))
83
+ annotation_files.update(split_dir.glob("*coco*.json"))
84
84
 
85
85
  # Filter to only include files that might be COCO annotations
86
86
  # (exclude package.json, tsconfig.json, etc.)