argus-cv 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of argus-cv might be problematic. Click here for more details.
- argus/__init__.py +1 -1
- argus/cli.py +157 -14
- argus/core/__init__.py +12 -0
- argus/core/coco.py +46 -8
- argus/core/convert.py +277 -0
- argus/core/yolo.py +29 -0
- {argus_cv-1.3.0.dist-info → argus_cv-1.5.0.dist-info}/METADATA +9 -2
- argus_cv-1.5.0.dist-info/RECORD +15 -0
- argus_cv-1.3.0.dist-info/RECORD +0 -14
- {argus_cv-1.3.0.dist-info → argus_cv-1.5.0.dist-info}/WHEEL +0 -0
- {argus_cv-1.3.0.dist-info → argus_cv-1.5.0.dist-info}/entry_points.txt +0 -0
argus/__init__.py
CHANGED
argus/cli.py
CHANGED
|
@@ -8,11 +8,18 @@ import cv2
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import typer
|
|
10
10
|
from rich.console import Console
|
|
11
|
-
from rich.progress import
|
|
11
|
+
from rich.progress import (
|
|
12
|
+
BarColumn,
|
|
13
|
+
Progress,
|
|
14
|
+
SpinnerColumn,
|
|
15
|
+
TaskProgressColumn,
|
|
16
|
+
TextColumn,
|
|
17
|
+
)
|
|
12
18
|
from rich.table import Table
|
|
13
19
|
|
|
14
20
|
from argus.core import COCODataset, Dataset, MaskDataset, YOLODataset
|
|
15
21
|
from argus.core.base import DatasetFormat, TaskType
|
|
22
|
+
from argus.core.convert import convert_mask_to_yolo_seg
|
|
16
23
|
from argus.core.split import (
|
|
17
24
|
is_coco_unsplit,
|
|
18
25
|
parse_ratio,
|
|
@@ -534,13 +541,6 @@ def split_dataset(
|
|
|
534
541
|
help="Train/val/test ratio (e.g. 0.8,0.1,0.1).",
|
|
535
542
|
),
|
|
536
543
|
] = "0.8,0.1,0.1",
|
|
537
|
-
stratify: Annotated[
|
|
538
|
-
bool,
|
|
539
|
-
typer.Option(
|
|
540
|
-
"--stratify/--no-stratify",
|
|
541
|
-
help="Stratify by class distribution when splitting.",
|
|
542
|
-
),
|
|
543
|
-
] = True,
|
|
544
544
|
seed: Annotated[
|
|
545
545
|
int,
|
|
546
546
|
typer.Option(
|
|
@@ -593,9 +593,7 @@ def split_dataset(
|
|
|
593
593
|
) as progress:
|
|
594
594
|
progress.add_task("Creating YOLO splits...", total=None)
|
|
595
595
|
try:
|
|
596
|
-
counts = split_yolo_dataset(
|
|
597
|
-
dataset, output_path, ratios, stratify, seed
|
|
598
|
-
)
|
|
596
|
+
counts = split_yolo_dataset(dataset, output_path, ratios, True, seed)
|
|
599
597
|
except ValueError as exc:
|
|
600
598
|
console.print(f"[red]Error: {exc}[/red]")
|
|
601
599
|
raise typer.Exit(1) from exc
|
|
@@ -628,7 +626,7 @@ def split_dataset(
|
|
|
628
626
|
annotation_file,
|
|
629
627
|
output_path,
|
|
630
628
|
ratios,
|
|
631
|
-
|
|
629
|
+
True,
|
|
632
630
|
seed,
|
|
633
631
|
)
|
|
634
632
|
except ValueError as exc:
|
|
@@ -641,6 +639,148 @@ def split_dataset(
|
|
|
641
639
|
)
|
|
642
640
|
|
|
643
641
|
|
|
642
|
+
@app.command(name="convert")
|
|
643
|
+
def convert_dataset(
|
|
644
|
+
input_path: Annotated[
|
|
645
|
+
Path,
|
|
646
|
+
typer.Option(
|
|
647
|
+
"--input-path",
|
|
648
|
+
"-i",
|
|
649
|
+
help="Path to the source dataset.",
|
|
650
|
+
),
|
|
651
|
+
] = Path("."),
|
|
652
|
+
output_path: Annotated[
|
|
653
|
+
Path,
|
|
654
|
+
typer.Option(
|
|
655
|
+
"--output-path",
|
|
656
|
+
"-o",
|
|
657
|
+
help="Output directory for converted dataset.",
|
|
658
|
+
),
|
|
659
|
+
] = Path("converted"),
|
|
660
|
+
to_format: Annotated[
|
|
661
|
+
str,
|
|
662
|
+
typer.Option(
|
|
663
|
+
"--to",
|
|
664
|
+
help="Target format (currently only 'yolo-seg' is supported).",
|
|
665
|
+
),
|
|
666
|
+
] = "yolo-seg",
|
|
667
|
+
epsilon_factor: Annotated[
|
|
668
|
+
float,
|
|
669
|
+
typer.Option(
|
|
670
|
+
"--epsilon-factor",
|
|
671
|
+
"-e",
|
|
672
|
+
help="Polygon simplification factor (Douglas-Peucker algorithm).",
|
|
673
|
+
min=0.0,
|
|
674
|
+
max=1.0,
|
|
675
|
+
),
|
|
676
|
+
] = 0.005,
|
|
677
|
+
min_area: Annotated[
|
|
678
|
+
float,
|
|
679
|
+
typer.Option(
|
|
680
|
+
"--min-area",
|
|
681
|
+
"-a",
|
|
682
|
+
help="Minimum contour area in pixels to include.",
|
|
683
|
+
min=0.0,
|
|
684
|
+
),
|
|
685
|
+
] = 100.0,
|
|
686
|
+
) -> None:
|
|
687
|
+
"""Convert a dataset from one format to another.
|
|
688
|
+
|
|
689
|
+
Currently supports converting MaskDataset to YOLO segmentation format.
|
|
690
|
+
|
|
691
|
+
Example:
|
|
692
|
+
uvx argus-cv convert -i /path/to/masks -o /path/to/output --to yolo-seg
|
|
693
|
+
"""
|
|
694
|
+
# Validate format
|
|
695
|
+
if to_format != "yolo-seg":
|
|
696
|
+
console.print(
|
|
697
|
+
f"[red]Error: Unsupported target format '{to_format}'.[/red]\n"
|
|
698
|
+
"[yellow]Currently only 'yolo-seg' is supported.[/yellow]"
|
|
699
|
+
)
|
|
700
|
+
raise typer.Exit(1)
|
|
701
|
+
|
|
702
|
+
# Resolve and validate input path
|
|
703
|
+
input_path = input_path.resolve()
|
|
704
|
+
if not input_path.exists():
|
|
705
|
+
console.print(f"[red]Error: Path does not exist: {input_path}[/red]")
|
|
706
|
+
raise typer.Exit(1)
|
|
707
|
+
if not input_path.is_dir():
|
|
708
|
+
console.print(f"[red]Error: Path is not a directory: {input_path}[/red]")
|
|
709
|
+
raise typer.Exit(1)
|
|
710
|
+
|
|
711
|
+
# Detect source dataset - must be MaskDataset for yolo-seg conversion
|
|
712
|
+
dataset = MaskDataset.detect(input_path)
|
|
713
|
+
if not dataset:
|
|
714
|
+
console.print(
|
|
715
|
+
f"[red]Error: No MaskDataset found at {input_path}[/red]\n"
|
|
716
|
+
"[yellow]Ensure the path contains images/ + masks/ directories "
|
|
717
|
+
"(or equivalent patterns like img/+gt/ or leftImg8bit/+gtFine/).[/yellow]"
|
|
718
|
+
)
|
|
719
|
+
raise typer.Exit(1)
|
|
720
|
+
|
|
721
|
+
# Resolve output path
|
|
722
|
+
if not output_path.is_absolute():
|
|
723
|
+
output_path = input_path.parent / output_path
|
|
724
|
+
output_path = output_path.resolve()
|
|
725
|
+
|
|
726
|
+
# Check if output already exists
|
|
727
|
+
if output_path.exists() and any(output_path.iterdir()):
|
|
728
|
+
console.print(
|
|
729
|
+
f"[red]Error: Output directory already exists and is not empty: "
|
|
730
|
+
f"{output_path}[/red]"
|
|
731
|
+
)
|
|
732
|
+
raise typer.Exit(1)
|
|
733
|
+
|
|
734
|
+
# Show conversion info
|
|
735
|
+
console.print("[cyan]Converting MaskDataset to YOLO segmentation format[/cyan]")
|
|
736
|
+
console.print(f" Source: {input_path}")
|
|
737
|
+
console.print(f" Output: {output_path}")
|
|
738
|
+
console.print(f" Classes: {dataset.num_classes}")
|
|
739
|
+
splits_str = ", ".join(dataset.splits) if dataset.splits else "unsplit"
|
|
740
|
+
console.print(f" Splits: {splits_str}")
|
|
741
|
+
console.print()
|
|
742
|
+
|
|
743
|
+
# Run conversion with progress bar
|
|
744
|
+
with Progress(
|
|
745
|
+
SpinnerColumn(),
|
|
746
|
+
TextColumn("[progress.description]{task.description}"),
|
|
747
|
+
BarColumn(),
|
|
748
|
+
TaskProgressColumn(),
|
|
749
|
+
console=console,
|
|
750
|
+
) as progress:
|
|
751
|
+
task = progress.add_task("Processing images...", total=None)
|
|
752
|
+
|
|
753
|
+
def update_progress(current: int, total: int) -> None:
|
|
754
|
+
progress.update(task, completed=current, total=total)
|
|
755
|
+
|
|
756
|
+
try:
|
|
757
|
+
stats = convert_mask_to_yolo_seg(
|
|
758
|
+
dataset=dataset,
|
|
759
|
+
output_path=output_path,
|
|
760
|
+
epsilon_factor=epsilon_factor,
|
|
761
|
+
min_area=min_area,
|
|
762
|
+
progress_callback=update_progress,
|
|
763
|
+
)
|
|
764
|
+
except Exception as exc:
|
|
765
|
+
console.print(f"[red]Error during conversion: {exc}[/red]")
|
|
766
|
+
raise typer.Exit(1) from exc
|
|
767
|
+
|
|
768
|
+
# Show results
|
|
769
|
+
console.print()
|
|
770
|
+
console.print("[green]Conversion complete![/green]")
|
|
771
|
+
console.print(f" Images processed: {stats['images']}")
|
|
772
|
+
console.print(f" Labels created: {stats['labels']}")
|
|
773
|
+
console.print(f" Polygons extracted: {stats['polygons']}")
|
|
774
|
+
|
|
775
|
+
if stats["skipped"] > 0:
|
|
776
|
+
skipped = stats["skipped"]
|
|
777
|
+
console.print(f" [yellow]Skipped: {skipped} (no mask or empty)[/yellow]")
|
|
778
|
+
if stats["warnings"] > 0:
|
|
779
|
+
console.print(f" [yellow]Warnings: {stats['warnings']}[/yellow]")
|
|
780
|
+
|
|
781
|
+
console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
|
|
782
|
+
|
|
783
|
+
|
|
644
784
|
class _ImageViewer:
|
|
645
785
|
"""Interactive image viewer with zoom and pan support."""
|
|
646
786
|
|
|
@@ -1352,9 +1492,12 @@ def _draw_annotations(
|
|
|
1352
1492
|
overlay = img.copy()
|
|
1353
1493
|
cv2.fillPoly(overlay, [pts], color)
|
|
1354
1494
|
cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)
|
|
1495
|
+
# Draw small points at polygon vertices
|
|
1496
|
+
for pt in pts:
|
|
1497
|
+
cv2.circle(img, tuple(pt), radius=3, color=color, thickness=-1)
|
|
1355
1498
|
|
|
1356
|
-
# Draw bounding box
|
|
1357
|
-
if bbox:
|
|
1499
|
+
# Draw bounding box (only for detection, not segmentation)
|
|
1500
|
+
if bbox and not polygon:
|
|
1358
1501
|
x, y, w, h = bbox
|
|
1359
1502
|
x1, y1 = int(x), int(y)
|
|
1360
1503
|
x2, y2 = int(x + w), int(y + h)
|
argus/core/__init__.py
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
from argus.core.base import Dataset
|
|
4
4
|
from argus.core.coco import COCODataset
|
|
5
|
+
from argus.core.convert import (
|
|
6
|
+
ConversionParams,
|
|
7
|
+
Polygon,
|
|
8
|
+
convert_mask_to_yolo_labels,
|
|
9
|
+
convert_mask_to_yolo_seg,
|
|
10
|
+
mask_to_polygons,
|
|
11
|
+
)
|
|
5
12
|
from argus.core.mask import ConfigurationError, MaskDataset
|
|
6
13
|
from argus.core.split import split_coco_dataset, split_yolo_dataset
|
|
7
14
|
from argus.core.yolo import YOLODataset
|
|
@@ -14,4 +21,9 @@ __all__ = [
|
|
|
14
21
|
"ConfigurationError",
|
|
15
22
|
"split_coco_dataset",
|
|
16
23
|
"split_yolo_dataset",
|
|
24
|
+
"ConversionParams",
|
|
25
|
+
"Polygon",
|
|
26
|
+
"mask_to_polygons",
|
|
27
|
+
"convert_mask_to_yolo_labels",
|
|
28
|
+
"convert_mask_to_yolo_seg",
|
|
17
29
|
]
|
argus/core/coco.py
CHANGED
|
@@ -75,6 +75,13 @@ class COCODataset(Dataset):
|
|
|
75
75
|
# Also check root directory for single annotation file
|
|
76
76
|
annotation_files.extend(path.glob("*.json"))
|
|
77
77
|
|
|
78
|
+
# Check split directories for Roboflow COCO format
|
|
79
|
+
for split_name in ["train", "valid", "val", "test"]:
|
|
80
|
+
split_dir = path / split_name
|
|
81
|
+
if split_dir.is_dir():
|
|
82
|
+
annotation_files.extend(split_dir.glob("*annotations*.json"))
|
|
83
|
+
annotation_files.extend(split_dir.glob("*coco*.json"))
|
|
84
|
+
|
|
78
85
|
# Filter to only include files that might be COCO annotations
|
|
79
86
|
# (exclude package.json, tsconfig.json, etc.)
|
|
80
87
|
filtered_files = []
|
|
@@ -185,8 +192,10 @@ class COCODataset(Dataset):
|
|
|
185
192
|
if isinstance(cat, dict) and "id" in cat and "name" in cat:
|
|
186
193
|
id_to_name[cat["id"]] = cat["name"]
|
|
187
194
|
|
|
188
|
-
# Determine split from filename
|
|
189
|
-
split = self._get_split_from_filename(
|
|
195
|
+
# Determine split from filename or parent directory
|
|
196
|
+
split = self._get_split_from_filename(
|
|
197
|
+
ann_file.stem, ann_file.parent.name
|
|
198
|
+
)
|
|
190
199
|
|
|
191
200
|
# Count annotations per category
|
|
192
201
|
split_counts: dict[str, int] = counts.get(split, {})
|
|
@@ -224,7 +233,9 @@ class COCODataset(Dataset):
|
|
|
224
233
|
if not isinstance(data, dict):
|
|
225
234
|
continue
|
|
226
235
|
|
|
227
|
-
split = self._get_split_from_filename(
|
|
236
|
+
split = self._get_split_from_filename(
|
|
237
|
+
ann_file.stem, ann_file.parent.name
|
|
238
|
+
)
|
|
228
239
|
|
|
229
240
|
images = data.get("images", [])
|
|
230
241
|
annotations = data.get("annotations", [])
|
|
@@ -256,11 +267,12 @@ class COCODataset(Dataset):
|
|
|
256
267
|
return counts
|
|
257
268
|
|
|
258
269
|
@staticmethod
|
|
259
|
-
def _get_split_from_filename(filename: str) -> str:
|
|
260
|
-
"""Extract split name from annotation filename.
|
|
270
|
+
def _get_split_from_filename(filename: str, parent_dir: str | None = None) -> str:
|
|
271
|
+
"""Extract split name from annotation filename or parent directory.
|
|
261
272
|
|
|
262
273
|
Args:
|
|
263
274
|
filename: Annotation file stem (without extension).
|
|
275
|
+
parent_dir: Optional parent directory name (for Roboflow COCO format).
|
|
264
276
|
|
|
265
277
|
Returns:
|
|
266
278
|
Split name (train, val, test) or 'train' as default.
|
|
@@ -272,6 +284,17 @@ class COCODataset(Dataset):
|
|
|
272
284
|
return "val"
|
|
273
285
|
elif "test" in name_lower:
|
|
274
286
|
return "test"
|
|
287
|
+
|
|
288
|
+
# Check parent directory name (Roboflow COCO format)
|
|
289
|
+
if parent_dir:
|
|
290
|
+
parent_lower = parent_dir.lower()
|
|
291
|
+
if parent_lower == "train":
|
|
292
|
+
return "train"
|
|
293
|
+
elif parent_lower in ("val", "valid"):
|
|
294
|
+
return "val"
|
|
295
|
+
elif parent_lower == "test":
|
|
296
|
+
return "test"
|
|
297
|
+
|
|
275
298
|
return "train"
|
|
276
299
|
|
|
277
300
|
@classmethod
|
|
@@ -301,7 +324,7 @@ class COCODataset(Dataset):
|
|
|
301
324
|
|
|
302
325
|
@classmethod
|
|
303
326
|
def _detect_splits(cls, annotation_files: list[Path]) -> list[str]:
|
|
304
|
-
"""Detect available splits from annotation filenames.
|
|
327
|
+
"""Detect available splits from annotation filenames or parent directories.
|
|
305
328
|
|
|
306
329
|
Args:
|
|
307
330
|
annotation_files: List of annotation file paths.
|
|
@@ -313,13 +336,22 @@ class COCODataset(Dataset):
|
|
|
313
336
|
|
|
314
337
|
for ann_file in annotation_files:
|
|
315
338
|
name_lower = ann_file.stem.lower()
|
|
339
|
+
parent_lower = ann_file.parent.name.lower()
|
|
316
340
|
|
|
341
|
+
# Check filename first
|
|
317
342
|
if "train" in name_lower and "train" not in splits:
|
|
318
343
|
splits.append("train")
|
|
319
344
|
elif "val" in name_lower and "val" not in splits:
|
|
320
345
|
splits.append("val")
|
|
321
346
|
elif "test" in name_lower and "test" not in splits:
|
|
322
347
|
splits.append("test")
|
|
348
|
+
# Check parent directory (Roboflow COCO format)
|
|
349
|
+
elif parent_lower == "train" and "train" not in splits:
|
|
350
|
+
splits.append("train")
|
|
351
|
+
elif parent_lower in ("val", "valid") and "val" not in splits:
|
|
352
|
+
splits.append("val")
|
|
353
|
+
elif parent_lower == "test" and "test" not in splits:
|
|
354
|
+
splits.append("test")
|
|
323
355
|
|
|
324
356
|
# If no splits detected from filenames, default to train
|
|
325
357
|
if not splits:
|
|
@@ -342,7 +374,9 @@ class COCODataset(Dataset):
|
|
|
342
374
|
for ann_file in self.annotation_files:
|
|
343
375
|
# Filter by split if specified
|
|
344
376
|
if split:
|
|
345
|
-
file_split = self._get_split_from_filename(
|
|
377
|
+
file_split = self._get_split_from_filename(
|
|
378
|
+
ann_file.stem, ann_file.parent.name
|
|
379
|
+
)
|
|
346
380
|
if file_split != split:
|
|
347
381
|
continue
|
|
348
382
|
|
|
@@ -354,7 +388,9 @@ class COCODataset(Dataset):
|
|
|
354
388
|
continue
|
|
355
389
|
|
|
356
390
|
images = data.get("images", [])
|
|
357
|
-
file_split = self._get_split_from_filename(
|
|
391
|
+
file_split = self._get_split_from_filename(
|
|
392
|
+
ann_file.stem, ann_file.parent.name
|
|
393
|
+
)
|
|
358
394
|
|
|
359
395
|
for img in images:
|
|
360
396
|
if not isinstance(img, dict) or "file_name" not in img:
|
|
@@ -371,6 +407,8 @@ class COCODataset(Dataset):
|
|
|
371
407
|
self.path / "images" / file_name,
|
|
372
408
|
self.path / file_split / file_name,
|
|
373
409
|
self.path / file_name,
|
|
410
|
+
# Roboflow format: images alongside annotations
|
|
411
|
+
ann_file.parent / file_name,
|
|
374
412
|
]
|
|
375
413
|
|
|
376
414
|
for img_path in possible_paths:
|
argus/core/convert.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""Conversion functions for dataset format transformation."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import shutil
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import cv2
|
|
10
|
+
import numpy as np
|
|
11
|
+
import yaml
|
|
12
|
+
from numpy.typing import NDArray
|
|
13
|
+
|
|
14
|
+
from argus.core.mask import MaskDataset
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ConversionParams:
|
|
21
|
+
"""Parameters for mask-to-polygon conversion.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
class_id: Class ID for the resulting polygon.
|
|
25
|
+
epsilon_factor: Douglas-Peucker simplification factor (relative to perimeter).
|
|
26
|
+
min_area: Minimum contour area in pixels to include.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
class_id: int = 0
|
|
30
|
+
epsilon_factor: float = 0.005
|
|
31
|
+
min_area: float = 100.0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class Polygon:
|
|
36
|
+
"""A polygon annotation with class ID and normalized points.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
class_id: Class ID for this polygon.
|
|
40
|
+
points: List of (x, y) points normalized to [0, 1].
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
class_id: int
|
|
44
|
+
points: list[tuple[float, float]]
|
|
45
|
+
|
|
46
|
+
def to_yolo(self) -> str:
|
|
47
|
+
"""Convert to YOLO segmentation format string.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
String in format: "class_id x1 y1 x2 y2 ... xn yn"
|
|
51
|
+
"""
|
|
52
|
+
coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in self.points)
|
|
53
|
+
return f"{self.class_id} {coords}"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def mask_to_polygons(
|
|
57
|
+
mask: NDArray[np.uint8],
|
|
58
|
+
params: ConversionParams | None = None,
|
|
59
|
+
) -> list[Polygon]:
|
|
60
|
+
"""Convert a binary mask to simplified polygons.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
mask: Binary mask (255 for foreground, 0 for background).
|
|
64
|
+
params: Conversion parameters. Uses defaults if None.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
List of Polygon objects with normalized coordinates.
|
|
68
|
+
"""
|
|
69
|
+
if params is None:
|
|
70
|
+
params = ConversionParams()
|
|
71
|
+
|
|
72
|
+
h, w = mask.shape[:2]
|
|
73
|
+
polygons: list[Polygon] = []
|
|
74
|
+
|
|
75
|
+
# Find contours
|
|
76
|
+
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
77
|
+
|
|
78
|
+
for contour in contours:
|
|
79
|
+
area = cv2.contourArea(contour)
|
|
80
|
+
if area < params.min_area:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# Simplify polygon using Douglas-Peucker
|
|
84
|
+
perimeter = cv2.arcLength(contour, closed=True)
|
|
85
|
+
epsilon = params.epsilon_factor * perimeter
|
|
86
|
+
simplified = cv2.approxPolyDP(contour, epsilon, closed=True)
|
|
87
|
+
|
|
88
|
+
# Need at least 3 points for a valid polygon
|
|
89
|
+
if len(simplified) < 3:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
# Normalize coordinates to [0, 1]
|
|
93
|
+
points: list[tuple[float, float]] = []
|
|
94
|
+
for point in simplified:
|
|
95
|
+
x, y = point[0]
|
|
96
|
+
points.append((x / w, y / h))
|
|
97
|
+
|
|
98
|
+
polygons.append(Polygon(class_id=params.class_id, points=points))
|
|
99
|
+
|
|
100
|
+
return polygons
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def convert_mask_to_yolo_labels(
|
|
104
|
+
mask: np.ndarray,
|
|
105
|
+
class_ids: list[int],
|
|
106
|
+
epsilon_factor: float = 0.005,
|
|
107
|
+
min_area: float = 100.0,
|
|
108
|
+
) -> list[str]:
|
|
109
|
+
"""Convert a multi-class mask to YOLO label lines.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
mask: Grayscale mask where pixel values represent class IDs.
|
|
113
|
+
class_ids: List of class IDs to extract (excluding ignore index).
|
|
114
|
+
epsilon_factor: Douglas-Peucker simplification factor.
|
|
115
|
+
min_area: Minimum contour area in pixels.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of YOLO format label strings.
|
|
119
|
+
"""
|
|
120
|
+
lines: list[str] = []
|
|
121
|
+
|
|
122
|
+
for class_id in class_ids:
|
|
123
|
+
# Create binary mask for this class
|
|
124
|
+
binary_mask = (mask == class_id).astype(np.uint8) * 255
|
|
125
|
+
|
|
126
|
+
params = ConversionParams(
|
|
127
|
+
class_id=class_id,
|
|
128
|
+
epsilon_factor=epsilon_factor,
|
|
129
|
+
min_area=min_area,
|
|
130
|
+
)
|
|
131
|
+
polygons = mask_to_polygons(binary_mask, params)
|
|
132
|
+
lines.extend(poly.to_yolo() for poly in polygons)
|
|
133
|
+
|
|
134
|
+
return lines
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def convert_mask_to_yolo_seg(
|
|
138
|
+
dataset: MaskDataset,
|
|
139
|
+
output_path: Path,
|
|
140
|
+
epsilon_factor: float = 0.005,
|
|
141
|
+
min_area: float = 100.0,
|
|
142
|
+
progress_callback: Callable[[int, int], None] | None = None,
|
|
143
|
+
) -> dict[str, int]:
|
|
144
|
+
"""Convert a MaskDataset to YOLO segmentation format.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
dataset: Source MaskDataset to convert.
|
|
148
|
+
output_path: Output directory for YOLO dataset.
|
|
149
|
+
epsilon_factor: Douglas-Peucker simplification factor.
|
|
150
|
+
min_area: Minimum contour area in pixels.
|
|
151
|
+
progress_callback: Optional callback(current, total) for progress updates.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Dictionary with conversion statistics:
|
|
155
|
+
- "images": Total images processed
|
|
156
|
+
- "labels": Total label files created
|
|
157
|
+
- "polygons": Total polygons extracted
|
|
158
|
+
- "skipped": Images skipped (no mask or empty)
|
|
159
|
+
- "warnings": Number of warnings (dimension mismatch, etc.)
|
|
160
|
+
"""
|
|
161
|
+
stats = {
|
|
162
|
+
"images": 0,
|
|
163
|
+
"labels": 0,
|
|
164
|
+
"polygons": 0,
|
|
165
|
+
"skipped": 0,
|
|
166
|
+
"warnings": 0,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# Create output directory structure
|
|
170
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
|
|
172
|
+
# Get class mapping and build id-to-name for data.yaml
|
|
173
|
+
class_mapping = dataset.get_class_mapping()
|
|
174
|
+
class_ids = sorted(class_mapping.keys())
|
|
175
|
+
|
|
176
|
+
# Build data.yaml content
|
|
177
|
+
data_yaml: dict = {
|
|
178
|
+
"path": ".",
|
|
179
|
+
"names": {i: class_mapping[i] for i in class_ids},
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Determine splits to process
|
|
183
|
+
splits = dataset.splits if dataset.splits else [None]
|
|
184
|
+
|
|
185
|
+
# Count total images for progress
|
|
186
|
+
total_images = 0
|
|
187
|
+
for split in splits:
|
|
188
|
+
total_images += len(dataset.get_image_paths(split))
|
|
189
|
+
|
|
190
|
+
current_image = 0
|
|
191
|
+
|
|
192
|
+
for split in splits:
|
|
193
|
+
split_name = split if split else "train" # Default to train if unsplit
|
|
194
|
+
|
|
195
|
+
# Create directories
|
|
196
|
+
images_dir = output_path / "images" / split_name
|
|
197
|
+
labels_dir = output_path / "labels" / split_name
|
|
198
|
+
images_dir.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
labels_dir.mkdir(parents=True, exist_ok=True)
|
|
200
|
+
|
|
201
|
+
# Add split to data.yaml
|
|
202
|
+
data_yaml[split_name] = f"images/{split_name}"
|
|
203
|
+
|
|
204
|
+
# Process images in this split
|
|
205
|
+
image_paths = dataset.get_image_paths(split)
|
|
206
|
+
|
|
207
|
+
for image_path in image_paths:
|
|
208
|
+
current_image += 1
|
|
209
|
+
if progress_callback:
|
|
210
|
+
progress_callback(current_image, total_images)
|
|
211
|
+
|
|
212
|
+
stats["images"] += 1
|
|
213
|
+
|
|
214
|
+
# Load mask
|
|
215
|
+
mask = dataset.load_mask(image_path)
|
|
216
|
+
if mask is None:
|
|
217
|
+
logger.warning(f"No mask found for {image_path.name}, skipping")
|
|
218
|
+
stats["skipped"] += 1
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
# Load image to check dimensions
|
|
222
|
+
img = cv2.imread(str(image_path))
|
|
223
|
+
if img is None:
|
|
224
|
+
logger.warning(f"Could not load image {image_path.name}, skipping")
|
|
225
|
+
stats["skipped"] += 1
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
# Check dimension match
|
|
229
|
+
if img.shape[:2] != mask.shape[:2]:
|
|
230
|
+
logger.warning(
|
|
231
|
+
f"Dimension mismatch for {image_path.name}: "
|
|
232
|
+
f"image={img.shape[:2]}, mask={mask.shape[:2]}"
|
|
233
|
+
)
|
|
234
|
+
stats["warnings"] += 1
|
|
235
|
+
# Continue anyway - mask might still be usable
|
|
236
|
+
|
|
237
|
+
# Get unique class IDs present in this mask (excluding ignore index)
|
|
238
|
+
unique_ids = [
|
|
239
|
+
int(v)
|
|
240
|
+
for v in np.unique(mask)
|
|
241
|
+
if v != dataset.ignore_index and v in class_ids
|
|
242
|
+
]
|
|
243
|
+
|
|
244
|
+
if not unique_ids:
|
|
245
|
+
# Empty mask (only background/ignored)
|
|
246
|
+
logger.debug(f"Empty mask for {image_path.name}")
|
|
247
|
+
stats["skipped"] += 1
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# Convert mask to YOLO labels
|
|
251
|
+
label_lines = convert_mask_to_yolo_labels(
|
|
252
|
+
mask, unique_ids, epsilon_factor, min_area
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if not label_lines:
|
|
256
|
+
# No polygons extracted (all contours too small)
|
|
257
|
+
logger.debug(f"No valid polygons for {image_path.name}")
|
|
258
|
+
stats["skipped"] += 1
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
# Copy image to output
|
|
262
|
+
dest_image = images_dir / image_path.name
|
|
263
|
+
shutil.copy2(image_path, dest_image)
|
|
264
|
+
|
|
265
|
+
# Write label file
|
|
266
|
+
label_file = labels_dir / f"{image_path.stem}.txt"
|
|
267
|
+
label_file.write_text("\n".join(label_lines) + "\n")
|
|
268
|
+
|
|
269
|
+
stats["labels"] += 1
|
|
270
|
+
stats["polygons"] += len(label_lines)
|
|
271
|
+
|
|
272
|
+
# Write data.yaml
|
|
273
|
+
data_yaml_path = output_path / "data.yaml"
|
|
274
|
+
with open(data_yaml_path, "w") as f:
|
|
275
|
+
yaml.dump(data_yaml, f, default_flow_style=False, sort_keys=False)
|
|
276
|
+
|
|
277
|
+
return stats
|
argus/core/yolo.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""YOLO dataset detection and handling."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
@@ -200,6 +201,12 @@ class YOLODataset(Dataset):
|
|
|
200
201
|
if not class_dirs:
|
|
201
202
|
return None
|
|
202
203
|
|
|
204
|
+
# Check if any directory contains COCO annotation files
|
|
205
|
+
# (Roboflow COCO datasets have train/valid/test dirs with JSON files)
|
|
206
|
+
for class_dir in class_dirs:
|
|
207
|
+
if cls._has_coco_annotation(class_dir):
|
|
208
|
+
return None # Not a classification dataset
|
|
209
|
+
|
|
203
210
|
# Check if these are class directories (contain images directly)
|
|
204
211
|
class_names_set = set()
|
|
205
212
|
for class_dir in class_dirs:
|
|
@@ -225,6 +232,28 @@ class YOLODataset(Dataset):
|
|
|
225
232
|
config_file=None,
|
|
226
233
|
)
|
|
227
234
|
|
|
235
|
+
@staticmethod
|
|
236
|
+
def _has_coco_annotation(directory: Path) -> bool:
|
|
237
|
+
"""Check if directory contains COCO annotation files.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
directory: Directory to check.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
True if COCO annotation files are found, False otherwise.
|
|
244
|
+
"""
|
|
245
|
+
for f in directory.glob("*.json"):
|
|
246
|
+
try:
|
|
247
|
+
with open(f, encoding="utf-8") as fp:
|
|
248
|
+
data = json.load(fp)
|
|
249
|
+
if isinstance(data, dict) and any(
|
|
250
|
+
k in data for k in ["images", "annotations", "categories"]
|
|
251
|
+
):
|
|
252
|
+
return True
|
|
253
|
+
except (json.JSONDecodeError, OSError):
|
|
254
|
+
pass
|
|
255
|
+
return False
|
|
256
|
+
|
|
228
257
|
def get_instance_counts(self) -> dict[str, dict[str, int]]:
|
|
229
258
|
"""Get the number of annotation instances per class, per split.
|
|
230
259
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: argus-cv
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: CLI tool for working with vision AI datasets
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: numpy>=1.24.0
|
|
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
|
|
|
12
12
|
|
|
13
13
|
# argus-cv
|
|
14
14
|
|
|
15
|
-
Vision AI dataset toolkit for working with YOLO and
|
|
15
|
+
Vision AI dataset toolkit for working with YOLO, COCO, and semantic mask datasets.
|
|
16
16
|
|
|
17
17
|
**[Documentation](https://pirnerjonas.github.io/argus/)**
|
|
18
18
|
|
|
@@ -22,6 +22,13 @@ Vision AI dataset toolkit for working with YOLO and COCO datasets.
|
|
|
22
22
|
uvx argus-cv
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
+
## Highlights
|
|
26
|
+
|
|
27
|
+
- Detects YOLO, COCO, and folder-based semantic mask datasets.
|
|
28
|
+
- Reports per-class stats (pixel coverage for mask datasets).
|
|
29
|
+
- Interactive viewer with bounding boxes, polygons, or mask overlays.
|
|
30
|
+
- Split unsplit YOLO/COCO datasets into train/val/test.
|
|
31
|
+
|
|
25
32
|
## Usage
|
|
26
33
|
|
|
27
34
|
```bash
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
argus/__init__.py,sha256=-NDJwMF-NWlPd0dIFWTu3SjgVWZy8SJxBD9g3YQXfrY,64
|
|
2
|
+
argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
|
|
3
|
+
argus/cli.py,sha256=hQ4t69E-clFvn9ZIeQ4Rf7cAqC0TgPtz1HEAFqNajcg,52706
|
|
4
|
+
argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
|
|
5
|
+
argus/core/__init__.py,sha256=sP206E44GdnnjKwyWNvuWntvO7D8oy0qs1yUUaPDThI,738
|
|
6
|
+
argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
|
|
7
|
+
argus/core/coco.py,sha256=V3Ifh6KUbifBTLefUuMxQkejgkwsPZNfKLn0newDZJ4,17539
|
|
8
|
+
argus/core/convert.py,sha256=cHuw1E9B4vyozpikS2PJnFfiJ_eRMPIHblizyeZz1Ps,8471
|
|
9
|
+
argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
|
|
10
|
+
argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
|
|
11
|
+
argus/core/yolo.py,sha256=Vtw2sga40VooaRE8bmjKtr_aYhfoV7ZcVijFjg1DVwo,29644
|
|
12
|
+
argus_cv-1.5.0.dist-info/METADATA,sha256=9iwY-3C6t-vzZOA9wBvrvIY10YBUaHgsDRN5x5Uk_8c,1353
|
|
13
|
+
argus_cv-1.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
argus_cv-1.5.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
|
|
15
|
+
argus_cv-1.5.0.dist-info/RECORD,,
|
argus_cv-1.3.0.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
argus/__init__.py,sha256=277ASQvH6ZWVWUzOCVB7vAxn3OYciow4nVkjG16-kio,64
|
|
2
|
-
argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
|
|
3
|
-
argus/cli.py,sha256=th1Rgn1Sm9juWoavopEXLBT8XEh5lKzOMX-pccwvDgA,47904
|
|
4
|
-
argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
|
|
5
|
-
argus/core/__init__.py,sha256=II2wYJpGUUGGKOFZ5BCpMIBTfv0WP-F15U_xbpWGjk8,453
|
|
6
|
-
argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
|
|
7
|
-
argus/core/coco.py,sha256=atVurZV2T7cszydyD9GfDTWHGYDd-JNK5RD7nse5avc,15823
|
|
8
|
-
argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
|
|
9
|
-
argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
|
|
10
|
-
argus/core/yolo.py,sha256=tTc9jJzXcwa8LQ_s8nv-D_i2b9k_x-LT1O0eWr4sZ2k,28616
|
|
11
|
-
argus_cv-1.3.0.dist-info/METADATA,sha256=1CCYLSnGHaAdS5jjwUuTJWRHu5OM_oFfvi1L_v3SkQw,1070
|
|
12
|
-
argus_cv-1.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
13
|
-
argus_cv-1.3.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
|
|
14
|
-
argus_cv-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|