argus-cv 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of argus-cv might be problematic. Click here for more details.

argus/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Argus - Vision AI dataset toolkit."""
2
2
 
3
- __version__ = "1.3.0"
3
+ __version__ = "1.5.0"
argus/cli.py CHANGED
@@ -8,11 +8,18 @@ import cv2
8
8
  import numpy as np
9
9
  import typer
10
10
  from rich.console import Console
11
- from rich.progress import Progress, SpinnerColumn, TextColumn
11
+ from rich.progress import (
12
+ BarColumn,
13
+ Progress,
14
+ SpinnerColumn,
15
+ TaskProgressColumn,
16
+ TextColumn,
17
+ )
12
18
  from rich.table import Table
13
19
 
14
20
  from argus.core import COCODataset, Dataset, MaskDataset, YOLODataset
15
21
  from argus.core.base import DatasetFormat, TaskType
22
+ from argus.core.convert import convert_mask_to_yolo_seg
16
23
  from argus.core.split import (
17
24
  is_coco_unsplit,
18
25
  parse_ratio,
@@ -534,13 +541,6 @@ def split_dataset(
534
541
  help="Train/val/test ratio (e.g. 0.8,0.1,0.1).",
535
542
  ),
536
543
  ] = "0.8,0.1,0.1",
537
- stratify: Annotated[
538
- bool,
539
- typer.Option(
540
- "--stratify/--no-stratify",
541
- help="Stratify by class distribution when splitting.",
542
- ),
543
- ] = True,
544
544
  seed: Annotated[
545
545
  int,
546
546
  typer.Option(
@@ -593,9 +593,7 @@ def split_dataset(
593
593
  ) as progress:
594
594
  progress.add_task("Creating YOLO splits...", total=None)
595
595
  try:
596
- counts = split_yolo_dataset(
597
- dataset, output_path, ratios, stratify, seed
598
- )
596
+ counts = split_yolo_dataset(dataset, output_path, ratios, True, seed)
599
597
  except ValueError as exc:
600
598
  console.print(f"[red]Error: {exc}[/red]")
601
599
  raise typer.Exit(1) from exc
@@ -628,7 +626,7 @@ def split_dataset(
628
626
  annotation_file,
629
627
  output_path,
630
628
  ratios,
631
- stratify,
629
+ True,
632
630
  seed,
633
631
  )
634
632
  except ValueError as exc:
@@ -641,6 +639,148 @@ def split_dataset(
641
639
  )
642
640
 
643
641
 
642
+ @app.command(name="convert")
643
+ def convert_dataset(
644
+ input_path: Annotated[
645
+ Path,
646
+ typer.Option(
647
+ "--input-path",
648
+ "-i",
649
+ help="Path to the source dataset.",
650
+ ),
651
+ ] = Path("."),
652
+ output_path: Annotated[
653
+ Path,
654
+ typer.Option(
655
+ "--output-path",
656
+ "-o",
657
+ help="Output directory for converted dataset.",
658
+ ),
659
+ ] = Path("converted"),
660
+ to_format: Annotated[
661
+ str,
662
+ typer.Option(
663
+ "--to",
664
+ help="Target format (currently only 'yolo-seg' is supported).",
665
+ ),
666
+ ] = "yolo-seg",
667
+ epsilon_factor: Annotated[
668
+ float,
669
+ typer.Option(
670
+ "--epsilon-factor",
671
+ "-e",
672
+ help="Polygon simplification factor (Douglas-Peucker algorithm).",
673
+ min=0.0,
674
+ max=1.0,
675
+ ),
676
+ ] = 0.005,
677
+ min_area: Annotated[
678
+ float,
679
+ typer.Option(
680
+ "--min-area",
681
+ "-a",
682
+ help="Minimum contour area in pixels to include.",
683
+ min=0.0,
684
+ ),
685
+ ] = 100.0,
686
+ ) -> None:
687
+ """Convert a dataset from one format to another.
688
+
689
+ Currently supports converting MaskDataset to YOLO segmentation format.
690
+
691
+ Example:
692
+ uvx argus-cv convert -i /path/to/masks -o /path/to/output --to yolo-seg
693
+ """
694
+ # Validate format
695
+ if to_format != "yolo-seg":
696
+ console.print(
697
+ f"[red]Error: Unsupported target format '{to_format}'.[/red]\n"
698
+ "[yellow]Currently only 'yolo-seg' is supported.[/yellow]"
699
+ )
700
+ raise typer.Exit(1)
701
+
702
+ # Resolve and validate input path
703
+ input_path = input_path.resolve()
704
+ if not input_path.exists():
705
+ console.print(f"[red]Error: Path does not exist: {input_path}[/red]")
706
+ raise typer.Exit(1)
707
+ if not input_path.is_dir():
708
+ console.print(f"[red]Error: Path is not a directory: {input_path}[/red]")
709
+ raise typer.Exit(1)
710
+
711
+ # Detect source dataset - must be MaskDataset for yolo-seg conversion
712
+ dataset = MaskDataset.detect(input_path)
713
+ if not dataset:
714
+ console.print(
715
+ f"[red]Error: No MaskDataset found at {input_path}[/red]\n"
716
+ "[yellow]Ensure the path contains images/ + masks/ directories "
717
+ "(or equivalent patterns like img/+gt/ or leftImg8bit/+gtFine/).[/yellow]"
718
+ )
719
+ raise typer.Exit(1)
720
+
721
+ # Resolve output path
722
+ if not output_path.is_absolute():
723
+ output_path = input_path.parent / output_path
724
+ output_path = output_path.resolve()
725
+
726
+ # Check if output already exists
727
+ if output_path.exists() and any(output_path.iterdir()):
728
+ console.print(
729
+ f"[red]Error: Output directory already exists and is not empty: "
730
+ f"{output_path}[/red]"
731
+ )
732
+ raise typer.Exit(1)
733
+
734
+ # Show conversion info
735
+ console.print("[cyan]Converting MaskDataset to YOLO segmentation format[/cyan]")
736
+ console.print(f" Source: {input_path}")
737
+ console.print(f" Output: {output_path}")
738
+ console.print(f" Classes: {dataset.num_classes}")
739
+ splits_str = ", ".join(dataset.splits) if dataset.splits else "unsplit"
740
+ console.print(f" Splits: {splits_str}")
741
+ console.print()
742
+
743
+ # Run conversion with progress bar
744
+ with Progress(
745
+ SpinnerColumn(),
746
+ TextColumn("[progress.description]{task.description}"),
747
+ BarColumn(),
748
+ TaskProgressColumn(),
749
+ console=console,
750
+ ) as progress:
751
+ task = progress.add_task("Processing images...", total=None)
752
+
753
+ def update_progress(current: int, total: int) -> None:
754
+ progress.update(task, completed=current, total=total)
755
+
756
+ try:
757
+ stats = convert_mask_to_yolo_seg(
758
+ dataset=dataset,
759
+ output_path=output_path,
760
+ epsilon_factor=epsilon_factor,
761
+ min_area=min_area,
762
+ progress_callback=update_progress,
763
+ )
764
+ except Exception as exc:
765
+ console.print(f"[red]Error during conversion: {exc}[/red]")
766
+ raise typer.Exit(1) from exc
767
+
768
+ # Show results
769
+ console.print()
770
+ console.print("[green]Conversion complete![/green]")
771
+ console.print(f" Images processed: {stats['images']}")
772
+ console.print(f" Labels created: {stats['labels']}")
773
+ console.print(f" Polygons extracted: {stats['polygons']}")
774
+
775
+ if stats["skipped"] > 0:
776
+ skipped = stats["skipped"]
777
+ console.print(f" [yellow]Skipped: {skipped} (no mask or empty)[/yellow]")
778
+ if stats["warnings"] > 0:
779
+ console.print(f" [yellow]Warnings: {stats['warnings']}[/yellow]")
780
+
781
+ console.print(f"\n[cyan]Output dataset: {output_path}[/cyan]")
782
+
783
+
644
784
  class _ImageViewer:
645
785
  """Interactive image viewer with zoom and pan support."""
646
786
 
@@ -1352,9 +1492,12 @@ def _draw_annotations(
1352
1492
  overlay = img.copy()
1353
1493
  cv2.fillPoly(overlay, [pts], color)
1354
1494
  cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)
1495
+ # Draw small points at polygon vertices
1496
+ for pt in pts:
1497
+ cv2.circle(img, tuple(pt), radius=3, color=color, thickness=-1)
1355
1498
 
1356
- # Draw bounding box
1357
- if bbox:
1499
+ # Draw bounding box (only for detection, not segmentation)
1500
+ if bbox and not polygon:
1358
1501
  x, y, w, h = bbox
1359
1502
  x1, y1 = int(x), int(y)
1360
1503
  x2, y2 = int(x + w), int(y + h)
argus/core/__init__.py CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  from argus.core.base import Dataset
4
4
  from argus.core.coco import COCODataset
5
+ from argus.core.convert import (
6
+ ConversionParams,
7
+ Polygon,
8
+ convert_mask_to_yolo_labels,
9
+ convert_mask_to_yolo_seg,
10
+ mask_to_polygons,
11
+ )
5
12
  from argus.core.mask import ConfigurationError, MaskDataset
6
13
  from argus.core.split import split_coco_dataset, split_yolo_dataset
7
14
  from argus.core.yolo import YOLODataset
@@ -14,4 +21,9 @@ __all__ = [
14
21
  "ConfigurationError",
15
22
  "split_coco_dataset",
16
23
  "split_yolo_dataset",
24
+ "ConversionParams",
25
+ "Polygon",
26
+ "mask_to_polygons",
27
+ "convert_mask_to_yolo_labels",
28
+ "convert_mask_to_yolo_seg",
17
29
  ]
argus/core/coco.py CHANGED
@@ -75,6 +75,13 @@ class COCODataset(Dataset):
75
75
  # Also check root directory for single annotation file
76
76
  annotation_files.extend(path.glob("*.json"))
77
77
 
78
+ # Check split directories for Roboflow COCO format
79
+ for split_name in ["train", "valid", "val", "test"]:
80
+ split_dir = path / split_name
81
+ if split_dir.is_dir():
82
+ annotation_files.extend(split_dir.glob("*annotations*.json"))
83
+ annotation_files.extend(split_dir.glob("*coco*.json"))
84
+
78
85
  # Filter to only include files that might be COCO annotations
79
86
  # (exclude package.json, tsconfig.json, etc.)
80
87
  filtered_files = []
@@ -185,8 +192,10 @@ class COCODataset(Dataset):
185
192
  if isinstance(cat, dict) and "id" in cat and "name" in cat:
186
193
  id_to_name[cat["id"]] = cat["name"]
187
194
 
188
- # Determine split from filename
189
- split = self._get_split_from_filename(ann_file.stem)
195
+ # Determine split from filename or parent directory
196
+ split = self._get_split_from_filename(
197
+ ann_file.stem, ann_file.parent.name
198
+ )
190
199
 
191
200
  # Count annotations per category
192
201
  split_counts: dict[str, int] = counts.get(split, {})
@@ -224,7 +233,9 @@ class COCODataset(Dataset):
224
233
  if not isinstance(data, dict):
225
234
  continue
226
235
 
227
- split = self._get_split_from_filename(ann_file.stem)
236
+ split = self._get_split_from_filename(
237
+ ann_file.stem, ann_file.parent.name
238
+ )
228
239
 
229
240
  images = data.get("images", [])
230
241
  annotations = data.get("annotations", [])
@@ -256,11 +267,12 @@ class COCODataset(Dataset):
256
267
  return counts
257
268
 
258
269
  @staticmethod
259
- def _get_split_from_filename(filename: str) -> str:
260
- """Extract split name from annotation filename.
270
+ def _get_split_from_filename(filename: str, parent_dir: str | None = None) -> str:
271
+ """Extract split name from annotation filename or parent directory.
261
272
 
262
273
  Args:
263
274
  filename: Annotation file stem (without extension).
275
+ parent_dir: Optional parent directory name (for Roboflow COCO format).
264
276
 
265
277
  Returns:
266
278
  Split name (train, val, test) or 'train' as default.
@@ -272,6 +284,17 @@ class COCODataset(Dataset):
272
284
  return "val"
273
285
  elif "test" in name_lower:
274
286
  return "test"
287
+
288
+ # Check parent directory name (Roboflow COCO format)
289
+ if parent_dir:
290
+ parent_lower = parent_dir.lower()
291
+ if parent_lower == "train":
292
+ return "train"
293
+ elif parent_lower in ("val", "valid"):
294
+ return "val"
295
+ elif parent_lower == "test":
296
+ return "test"
297
+
275
298
  return "train"
276
299
 
277
300
  @classmethod
@@ -301,7 +324,7 @@ class COCODataset(Dataset):
301
324
 
302
325
  @classmethod
303
326
  def _detect_splits(cls, annotation_files: list[Path]) -> list[str]:
304
- """Detect available splits from annotation filenames.
327
+ """Detect available splits from annotation filenames or parent directories.
305
328
 
306
329
  Args:
307
330
  annotation_files: List of annotation file paths.
@@ -313,13 +336,22 @@ class COCODataset(Dataset):
313
336
 
314
337
  for ann_file in annotation_files:
315
338
  name_lower = ann_file.stem.lower()
339
+ parent_lower = ann_file.parent.name.lower()
316
340
 
341
+ # Check filename first
317
342
  if "train" in name_lower and "train" not in splits:
318
343
  splits.append("train")
319
344
  elif "val" in name_lower and "val" not in splits:
320
345
  splits.append("val")
321
346
  elif "test" in name_lower and "test" not in splits:
322
347
  splits.append("test")
348
+ # Check parent directory (Roboflow COCO format)
349
+ elif parent_lower == "train" and "train" not in splits:
350
+ splits.append("train")
351
+ elif parent_lower in ("val", "valid") and "val" not in splits:
352
+ splits.append("val")
353
+ elif parent_lower == "test" and "test" not in splits:
354
+ splits.append("test")
323
355
 
324
356
  # If no splits detected from filenames, default to train
325
357
  if not splits:
@@ -342,7 +374,9 @@ class COCODataset(Dataset):
342
374
  for ann_file in self.annotation_files:
343
375
  # Filter by split if specified
344
376
  if split:
345
- file_split = self._get_split_from_filename(ann_file.stem)
377
+ file_split = self._get_split_from_filename(
378
+ ann_file.stem, ann_file.parent.name
379
+ )
346
380
  if file_split != split:
347
381
  continue
348
382
 
@@ -354,7 +388,9 @@ class COCODataset(Dataset):
354
388
  continue
355
389
 
356
390
  images = data.get("images", [])
357
- file_split = self._get_split_from_filename(ann_file.stem)
391
+ file_split = self._get_split_from_filename(
392
+ ann_file.stem, ann_file.parent.name
393
+ )
358
394
 
359
395
  for img in images:
360
396
  if not isinstance(img, dict) or "file_name" not in img:
@@ -371,6 +407,8 @@ class COCODataset(Dataset):
371
407
  self.path / "images" / file_name,
372
408
  self.path / file_split / file_name,
373
409
  self.path / file_name,
410
+ # Roboflow format: images alongside annotations
411
+ ann_file.parent / file_name,
374
412
  ]
375
413
 
376
414
  for img_path in possible_paths:
argus/core/convert.py ADDED
@@ -0,0 +1,277 @@
1
+ """Conversion functions for dataset format transformation."""
2
+
3
+ import logging
4
+ import shutil
5
+ from collections.abc import Callable
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ import cv2
10
+ import numpy as np
11
+ import yaml
12
+ from numpy.typing import NDArray
13
+
14
+ from argus.core.mask import MaskDataset
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class ConversionParams:
21
+ """Parameters for mask-to-polygon conversion.
22
+
23
+ Attributes:
24
+ class_id: Class ID for the resulting polygon.
25
+ epsilon_factor: Douglas-Peucker simplification factor (relative to perimeter).
26
+ min_area: Minimum contour area in pixels to include.
27
+ """
28
+
29
+ class_id: int = 0
30
+ epsilon_factor: float = 0.005
31
+ min_area: float = 100.0
32
+
33
+
34
+ @dataclass
35
+ class Polygon:
36
+ """A polygon annotation with class ID and normalized points.
37
+
38
+ Attributes:
39
+ class_id: Class ID for this polygon.
40
+ points: List of (x, y) points normalized to [0, 1].
41
+ """
42
+
43
+ class_id: int
44
+ points: list[tuple[float, float]]
45
+
46
+ def to_yolo(self) -> str:
47
+ """Convert to YOLO segmentation format string.
48
+
49
+ Returns:
50
+ String in format: "class_id x1 y1 x2 y2 ... xn yn"
51
+ """
52
+ coords = " ".join(f"{x:.6f} {y:.6f}" for x, y in self.points)
53
+ return f"{self.class_id} {coords}"
54
+
55
+
56
+ def mask_to_polygons(
57
+ mask: NDArray[np.uint8],
58
+ params: ConversionParams | None = None,
59
+ ) -> list[Polygon]:
60
+ """Convert a binary mask to simplified polygons.
61
+
62
+ Args:
63
+ mask: Binary mask (255 for foreground, 0 for background).
64
+ params: Conversion parameters. Uses defaults if None.
65
+
66
+ Returns:
67
+ List of Polygon objects with normalized coordinates.
68
+ """
69
+ if params is None:
70
+ params = ConversionParams()
71
+
72
+ h, w = mask.shape[:2]
73
+ polygons: list[Polygon] = []
74
+
75
+ # Find contours
76
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
77
+
78
+ for contour in contours:
79
+ area = cv2.contourArea(contour)
80
+ if area < params.min_area:
81
+ continue
82
+
83
+ # Simplify polygon using Douglas-Peucker
84
+ perimeter = cv2.arcLength(contour, closed=True)
85
+ epsilon = params.epsilon_factor * perimeter
86
+ simplified = cv2.approxPolyDP(contour, epsilon, closed=True)
87
+
88
+ # Need at least 3 points for a valid polygon
89
+ if len(simplified) < 3:
90
+ continue
91
+
92
+ # Normalize coordinates to [0, 1]
93
+ points: list[tuple[float, float]] = []
94
+ for point in simplified:
95
+ x, y = point[0]
96
+ points.append((x / w, y / h))
97
+
98
+ polygons.append(Polygon(class_id=params.class_id, points=points))
99
+
100
+ return polygons
101
+
102
+
103
+ def convert_mask_to_yolo_labels(
104
+ mask: np.ndarray,
105
+ class_ids: list[int],
106
+ epsilon_factor: float = 0.005,
107
+ min_area: float = 100.0,
108
+ ) -> list[str]:
109
+ """Convert a multi-class mask to YOLO label lines.
110
+
111
+ Args:
112
+ mask: Grayscale mask where pixel values represent class IDs.
113
+ class_ids: List of class IDs to extract (excluding ignore index).
114
+ epsilon_factor: Douglas-Peucker simplification factor.
115
+ min_area: Minimum contour area in pixels.
116
+
117
+ Returns:
118
+ List of YOLO format label strings.
119
+ """
120
+ lines: list[str] = []
121
+
122
+ for class_id in class_ids:
123
+ # Create binary mask for this class
124
+ binary_mask = (mask == class_id).astype(np.uint8) * 255
125
+
126
+ params = ConversionParams(
127
+ class_id=class_id,
128
+ epsilon_factor=epsilon_factor,
129
+ min_area=min_area,
130
+ )
131
+ polygons = mask_to_polygons(binary_mask, params)
132
+ lines.extend(poly.to_yolo() for poly in polygons)
133
+
134
+ return lines
135
+
136
+
137
+ def convert_mask_to_yolo_seg(
138
+ dataset: MaskDataset,
139
+ output_path: Path,
140
+ epsilon_factor: float = 0.005,
141
+ min_area: float = 100.0,
142
+ progress_callback: Callable[[int, int], None] | None = None,
143
+ ) -> dict[str, int]:
144
+ """Convert a MaskDataset to YOLO segmentation format.
145
+
146
+ Args:
147
+ dataset: Source MaskDataset to convert.
148
+ output_path: Output directory for YOLO dataset.
149
+ epsilon_factor: Douglas-Peucker simplification factor.
150
+ min_area: Minimum contour area in pixels.
151
+ progress_callback: Optional callback(current, total) for progress updates.
152
+
153
+ Returns:
154
+ Dictionary with conversion statistics:
155
+ - "images": Total images processed
156
+ - "labels": Total label files created
157
+ - "polygons": Total polygons extracted
158
+ - "skipped": Images skipped (no mask or empty)
159
+ - "warnings": Number of warnings (dimension mismatch, etc.)
160
+ """
161
+ stats = {
162
+ "images": 0,
163
+ "labels": 0,
164
+ "polygons": 0,
165
+ "skipped": 0,
166
+ "warnings": 0,
167
+ }
168
+
169
+ # Create output directory structure
170
+ output_path.mkdir(parents=True, exist_ok=True)
171
+
172
+ # Get class mapping and build id-to-name for data.yaml
173
+ class_mapping = dataset.get_class_mapping()
174
+ class_ids = sorted(class_mapping.keys())
175
+
176
+ # Build data.yaml content
177
+ data_yaml: dict = {
178
+ "path": ".",
179
+ "names": {i: class_mapping[i] for i in class_ids},
180
+ }
181
+
182
+ # Determine splits to process
183
+ splits = dataset.splits if dataset.splits else [None]
184
+
185
+ # Count total images for progress
186
+ total_images = 0
187
+ for split in splits:
188
+ total_images += len(dataset.get_image_paths(split))
189
+
190
+ current_image = 0
191
+
192
+ for split in splits:
193
+ split_name = split if split else "train" # Default to train if unsplit
194
+
195
+ # Create directories
196
+ images_dir = output_path / "images" / split_name
197
+ labels_dir = output_path / "labels" / split_name
198
+ images_dir.mkdir(parents=True, exist_ok=True)
199
+ labels_dir.mkdir(parents=True, exist_ok=True)
200
+
201
+ # Add split to data.yaml
202
+ data_yaml[split_name] = f"images/{split_name}"
203
+
204
+ # Process images in this split
205
+ image_paths = dataset.get_image_paths(split)
206
+
207
+ for image_path in image_paths:
208
+ current_image += 1
209
+ if progress_callback:
210
+ progress_callback(current_image, total_images)
211
+
212
+ stats["images"] += 1
213
+
214
+ # Load mask
215
+ mask = dataset.load_mask(image_path)
216
+ if mask is None:
217
+ logger.warning(f"No mask found for {image_path.name}, skipping")
218
+ stats["skipped"] += 1
219
+ continue
220
+
221
+ # Load image to check dimensions
222
+ img = cv2.imread(str(image_path))
223
+ if img is None:
224
+ logger.warning(f"Could not load image {image_path.name}, skipping")
225
+ stats["skipped"] += 1
226
+ continue
227
+
228
+ # Check dimension match
229
+ if img.shape[:2] != mask.shape[:2]:
230
+ logger.warning(
231
+ f"Dimension mismatch for {image_path.name}: "
232
+ f"image={img.shape[:2]}, mask={mask.shape[:2]}"
233
+ )
234
+ stats["warnings"] += 1
235
+ # Continue anyway - mask might still be usable
236
+
237
+ # Get unique class IDs present in this mask (excluding ignore index)
238
+ unique_ids = [
239
+ int(v)
240
+ for v in np.unique(mask)
241
+ if v != dataset.ignore_index and v in class_ids
242
+ ]
243
+
244
+ if not unique_ids:
245
+ # Empty mask (only background/ignored)
246
+ logger.debug(f"Empty mask for {image_path.name}")
247
+ stats["skipped"] += 1
248
+ continue
249
+
250
+ # Convert mask to YOLO labels
251
+ label_lines = convert_mask_to_yolo_labels(
252
+ mask, unique_ids, epsilon_factor, min_area
253
+ )
254
+
255
+ if not label_lines:
256
+ # No polygons extracted (all contours too small)
257
+ logger.debug(f"No valid polygons for {image_path.name}")
258
+ stats["skipped"] += 1
259
+ continue
260
+
261
+ # Copy image to output
262
+ dest_image = images_dir / image_path.name
263
+ shutil.copy2(image_path, dest_image)
264
+
265
+ # Write label file
266
+ label_file = labels_dir / f"{image_path.stem}.txt"
267
+ label_file.write_text("\n".join(label_lines) + "\n")
268
+
269
+ stats["labels"] += 1
270
+ stats["polygons"] += len(label_lines)
271
+
272
+ # Write data.yaml
273
+ data_yaml_path = output_path / "data.yaml"
274
+ with open(data_yaml_path, "w") as f:
275
+ yaml.dump(data_yaml, f, default_flow_style=False, sort_keys=False)
276
+
277
+ return stats
argus/core/yolo.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """YOLO dataset detection and handling."""
2
2
 
3
+ import json
3
4
  from dataclasses import dataclass, field
4
5
  from pathlib import Path
5
6
 
@@ -200,6 +201,12 @@ class YOLODataset(Dataset):
200
201
  if not class_dirs:
201
202
  return None
202
203
 
204
+ # Check if any directory contains COCO annotation files
205
+ # (Roboflow COCO datasets have train/valid/test dirs with JSON files)
206
+ for class_dir in class_dirs:
207
+ if cls._has_coco_annotation(class_dir):
208
+ return None # Not a classification dataset
209
+
203
210
  # Check if these are class directories (contain images directly)
204
211
  class_names_set = set()
205
212
  for class_dir in class_dirs:
@@ -225,6 +232,28 @@ class YOLODataset(Dataset):
225
232
  config_file=None,
226
233
  )
227
234
 
235
+ @staticmethod
236
+ def _has_coco_annotation(directory: Path) -> bool:
237
+ """Check if directory contains COCO annotation files.
238
+
239
+ Args:
240
+ directory: Directory to check.
241
+
242
+ Returns:
243
+ True if COCO annotation files are found, False otherwise.
244
+ """
245
+ for f in directory.glob("*.json"):
246
+ try:
247
+ with open(f, encoding="utf-8") as fp:
248
+ data = json.load(fp)
249
+ if isinstance(data, dict) and any(
250
+ k in data for k in ["images", "annotations", "categories"]
251
+ ):
252
+ return True
253
+ except (json.JSONDecodeError, OSError):
254
+ pass
255
+ return False
256
+
228
257
  def get_instance_counts(self) -> dict[str, dict[str, int]]:
229
258
  """Get the number of annotation instances per class, per split.
230
259
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argus-cv
3
- Version: 1.3.0
3
+ Version: 1.5.0
4
4
  Summary: CLI tool for working with vision AI datasets
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: numpy>=1.24.0
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
12
12
 
13
13
  # argus-cv
14
14
 
15
- Vision AI dataset toolkit for working with YOLO and COCO datasets.
15
+ Vision AI dataset toolkit for working with YOLO, COCO, and semantic mask datasets.
16
16
 
17
17
  **[Documentation](https://pirnerjonas.github.io/argus/)**
18
18
 
@@ -22,6 +22,13 @@ Vision AI dataset toolkit for working with YOLO and COCO datasets.
22
22
  uvx argus-cv
23
23
  ```
24
24
 
25
+ ## Highlights
26
+
27
+ - Detects YOLO, COCO, and folder-based semantic mask datasets.
28
+ - Reports per-class stats (pixel coverage for mask datasets).
29
+ - Interactive viewer with bounding boxes, polygons, or mask overlays.
30
+ - Split unsplit YOLO/COCO datasets into train/val/test.
31
+
25
32
  ## Usage
26
33
 
27
34
  ```bash
@@ -0,0 +1,15 @@
1
+ argus/__init__.py,sha256=-NDJwMF-NWlPd0dIFWTu3SjgVWZy8SJxBD9g3YQXfrY,64
2
+ argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
3
+ argus/cli.py,sha256=hQ4t69E-clFvn9ZIeQ4Rf7cAqC0TgPtz1HEAFqNajcg,52706
4
+ argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
5
+ argus/core/__init__.py,sha256=sP206E44GdnnjKwyWNvuWntvO7D8oy0qs1yUUaPDThI,738
6
+ argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
7
+ argus/core/coco.py,sha256=V3Ifh6KUbifBTLefUuMxQkejgkwsPZNfKLn0newDZJ4,17539
8
+ argus/core/convert.py,sha256=cHuw1E9B4vyozpikS2PJnFfiJ_eRMPIHblizyeZz1Ps,8471
9
+ argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
10
+ argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
11
+ argus/core/yolo.py,sha256=Vtw2sga40VooaRE8bmjKtr_aYhfoV7ZcVijFjg1DVwo,29644
12
+ argus_cv-1.5.0.dist-info/METADATA,sha256=9iwY-3C6t-vzZOA9wBvrvIY10YBUaHgsDRN5x5Uk_8c,1353
13
+ argus_cv-1.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ argus_cv-1.5.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
15
+ argus_cv-1.5.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- argus/__init__.py,sha256=277ASQvH6ZWVWUzOCVB7vAxn3OYciow4nVkjG16-kio,64
2
- argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
3
- argus/cli.py,sha256=th1Rgn1Sm9juWoavopEXLBT8XEh5lKzOMX-pccwvDgA,47904
4
- argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
5
- argus/core/__init__.py,sha256=II2wYJpGUUGGKOFZ5BCpMIBTfv0WP-F15U_xbpWGjk8,453
6
- argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
7
- argus/core/coco.py,sha256=atVurZV2T7cszydyD9GfDTWHGYDd-JNK5RD7nse5avc,15823
8
- argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
9
- argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
10
- argus/core/yolo.py,sha256=tTc9jJzXcwa8LQ_s8nv-D_i2b9k_x-LT1O0eWr4sZ2k,28616
11
- argus_cv-1.3.0.dist-info/METADATA,sha256=1CCYLSnGHaAdS5jjwUuTJWRHu5OM_oFfvi1L_v3SkQw,1070
12
- argus_cv-1.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
- argus_cv-1.3.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
14
- argus_cv-1.3.0.dist-info/RECORD,,