argus-cv 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of argus-cv might be problematic. Click here for more details.

argus/core/yolo.py CHANGED
@@ -12,9 +12,9 @@ from argus.core.base import Dataset, DatasetFormat, TaskType
12
12
  class YOLODataset(Dataset):
13
13
  """YOLO format dataset.
14
14
 
15
- Supports detection and segmentation tasks.
15
+ Supports detection, segmentation, and classification tasks.
16
16
 
17
- Structure:
17
+ Structure (detection/segmentation):
18
18
  dataset/
19
19
  ├── data.yaml (or *.yaml/*.yml with 'names' key)
20
20
  ├── images/
@@ -23,6 +23,19 @@ class YOLODataset(Dataset):
23
23
  └── labels/
24
24
  ├── train/
25
25
  └── val/
26
+
27
+ Structure (classification):
28
+ dataset/
29
+ ├── images/
30
+ │ ├── train/
31
+ │ │ ├── class1/
32
+ │ │ │ ├── img1.jpg
33
+ │ │ │ └── img2.jpg
34
+ │ │ └── class2/
35
+ │ │ └── img1.jpg
36
+ │ └── val/
37
+ │ ├── class1/
38
+ │ └── class2/
26
39
  """
27
40
 
28
41
  config_file: Path | None = None
@@ -43,8 +56,13 @@ class YOLODataset(Dataset):
43
56
  if not path.is_dir():
44
57
  return None
45
58
 
46
- # Try detection/segmentation (YAML-based)
47
- return cls._detect_yaml_based(path)
59
+ # Try detection/segmentation (YAML-based) first
60
+ result = cls._detect_yaml_based(path)
61
+ if result:
62
+ return result
63
+
64
+ # Try classification (directory-based structure)
65
+ return cls._detect_classification(path)
48
66
 
49
67
  @classmethod
50
68
  def _detect_yaml_based(cls, path: Path) -> "YOLODataset | None":
@@ -71,6 +89,11 @@ class YOLODataset(Dataset):
71
89
  if "names" not in config:
72
90
  continue
73
91
 
92
+ # Skip if this looks like a mask dataset config
93
+ # (has ignore_index or palette keys which are mask-specific)
94
+ if "ignore_index" in config or "palette" in config:
95
+ continue
96
+
74
97
  names = config["names"]
75
98
 
76
99
  # Extract class names
@@ -103,16 +126,121 @@ class YOLODataset(Dataset):
103
126
 
104
127
  return None
105
128
 
129
+ @classmethod
130
+ def _detect_classification(cls, path: Path) -> "YOLODataset | None":
131
+ """Detect classification dataset from directory structure.
132
+
133
+ Classification datasets can have two structures:
134
+
135
+ 1. Split structure:
136
+ images/{split}/class_name/image.jpg
137
+
138
+ 2. Flat structure (unsplit):
139
+ class_name/image.jpg
140
+
141
+ No YAML config required - class names inferred from directory names.
142
+
143
+ Args:
144
+ path: Directory path to check.
145
+
146
+ Returns:
147
+ YOLODataset if classification structure found, None otherwise.
148
+ """
149
+ image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
150
+
151
+ # Try split structure first: images/{split}/class/
152
+ images_root = path / "images"
153
+ if images_root.is_dir():
154
+ splits: list[str] = []
155
+ class_names_set: set[str] = set()
156
+
157
+ for split_name in ["train", "val", "test"]:
158
+ split_dir = images_root / split_name
159
+ if not split_dir.is_dir():
160
+ continue
161
+
162
+ # Get subdirectories (potential class folders)
163
+ class_dirs = [d for d in split_dir.iterdir() if d.is_dir()]
164
+ if not class_dirs:
165
+ continue
166
+
167
+ # Check if at least one class dir contains images
168
+ has_images = False
169
+ for class_dir in class_dirs:
170
+ for f in class_dir.iterdir():
171
+ if f.suffix.lower() in image_extensions:
172
+ has_images = True
173
+ break
174
+ if has_images:
175
+ break
176
+
177
+ if has_images:
178
+ splits.append(split_name)
179
+ class_names_set.update(d.name for d in class_dirs)
180
+
181
+ if splits and class_names_set:
182
+ class_names = sorted(class_names_set)
183
+ return cls(
184
+ path=path,
185
+ task=TaskType.CLASSIFICATION,
186
+ num_classes=len(class_names),
187
+ class_names=class_names,
188
+ splits=splits,
189
+ config_file=None,
190
+ )
191
+
192
+ # Try flat structure: class_name/image.jpg (no images/ or split dirs)
193
+ # Check if root contains subdirectories with images
194
+ class_dirs = [d for d in path.iterdir() if d.is_dir()]
195
+
196
+ # Filter out common non-class directories
197
+ excluded_dirs = {"images", "labels", "annotations", ".git", "__pycache__"}
198
+ class_dirs = [d for d in class_dirs if d.name not in excluded_dirs]
199
+
200
+ if not class_dirs:
201
+ return None
202
+
203
+ # Check if these are class directories (contain images directly)
204
+ class_names_set = set()
205
+ for class_dir in class_dirs:
206
+ has_images = any(
207
+ f.suffix.lower() in image_extensions
208
+ for f in class_dir.iterdir()
209
+ if f.is_file()
210
+ )
211
+ if has_images:
212
+ class_names_set.add(class_dir.name)
213
+
214
+ # Need at least 2 classes to be a valid classification dataset
215
+ if len(class_names_set) < 2:
216
+ return None
217
+
218
+ class_names = sorted(class_names_set)
219
+ return cls(
220
+ path=path,
221
+ task=TaskType.CLASSIFICATION,
222
+ num_classes=len(class_names),
223
+ class_names=class_names,
224
+ splits=[], # No splits for flat structure
225
+ config_file=None,
226
+ )
227
+
106
228
  def get_instance_counts(self) -> dict[str, dict[str, int]]:
107
229
  """Get the number of annotation instances per class, per split.
108
230
 
109
- Parses all label files in labels/{split}/*.txt and counts
110
- occurrences of each class ID. For unsplit datasets, uses "unsplit"
111
- as the split name.
231
+ For detection/segmentation: Parses all label files in labels/{split}/*.txt
232
+ and counts occurrences of each class ID.
233
+
234
+ For classification: Counts images in each class directory
235
+ (1 image = 1 instance).
112
236
 
113
237
  Returns:
114
238
  Dictionary mapping split name to dict of class name to instance count.
115
239
  """
240
+ # Handle classification datasets differently
241
+ if self.task == TaskType.CLASSIFICATION:
242
+ return self._get_classification_instance_counts()
243
+
116
244
  counts: dict[str, dict[str, int]] = {}
117
245
 
118
246
  # Build class_id -> class_name mapping
@@ -162,15 +290,77 @@ class YOLODataset(Dataset):
162
290
 
163
291
  return counts
164
292
 
293
+ def _get_classification_instance_counts(self) -> dict[str, dict[str, int]]:
294
+ """Get instance counts for classification datasets.
295
+
296
+ Each image is one instance of its class.
297
+
298
+ Returns:
299
+ Dictionary mapping split name to dict of class name to image count.
300
+ """
301
+ counts: dict[str, dict[str, int]] = {}
302
+ image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
303
+
304
+ # Handle flat structure (no splits)
305
+ if not self.splits:
306
+ split_counts: dict[str, int] = {}
307
+ for class_name in self.class_names:
308
+ class_dir = self.path / class_name
309
+ if not class_dir.is_dir():
310
+ split_counts[class_name] = 0
311
+ continue
312
+
313
+ image_count = sum(
314
+ 1
315
+ for f in class_dir.iterdir()
316
+ if f.suffix.lower() in image_extensions
317
+ )
318
+ split_counts[class_name] = image_count
319
+
320
+ counts["unsplit"] = split_counts
321
+ return counts
322
+
323
+ # Handle split structure
324
+ images_root = self.path / "images"
325
+ for split in self.splits:
326
+ split_dir = images_root / split
327
+ if not split_dir.is_dir():
328
+ continue
329
+
330
+ split_counts = {}
331
+ for class_name in self.class_names:
332
+ class_dir = split_dir / class_name
333
+ if not class_dir.is_dir():
334
+ split_counts[class_name] = 0
335
+ continue
336
+
337
+ image_count = sum(
338
+ 1
339
+ for f in class_dir.iterdir()
340
+ if f.suffix.lower() in image_extensions
341
+ )
342
+ split_counts[class_name] = image_count
343
+
344
+ counts[split] = split_counts
345
+
346
+ return counts
347
+
165
348
  def get_image_counts(self) -> dict[str, dict[str, int]]:
166
349
  """Get image counts per split, including background images.
167
350
 
168
- Counts label files in labels/{split}/*.txt. Empty files are
169
- counted as background images.
351
+ For detection/segmentation: Counts label files in labels/{split}/*.txt.
352
+ Empty files are counted as background images.
353
+
354
+ For classification: Counts total images across all class directories.
355
+ Background count is always 0 (no background concept in classification).
170
356
 
171
357
  Returns:
172
358
  Dictionary mapping split name to dict with "total" and "background" counts.
173
359
  """
360
+ # Handle classification datasets differently
361
+ if self.task == TaskType.CLASSIFICATION:
362
+ return self._get_classification_image_counts()
363
+
174
364
  counts: dict[str, dict[str, int]] = {}
175
365
 
176
366
  labels_root = self.path / "labels"
@@ -203,6 +393,56 @@ class YOLODataset(Dataset):
203
393
 
204
394
  return counts
205
395
 
396
+ def _get_classification_image_counts(self) -> dict[str, dict[str, int]]:
397
+ """Get image counts for classification datasets.
398
+
399
+ Returns:
400
+ Dictionary mapping split name to dict with "total" and "background" counts.
401
+ Background is always 0 for classification.
402
+ """
403
+ counts: dict[str, dict[str, int]] = {}
404
+ image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
405
+
406
+ # Handle flat structure (no splits)
407
+ if not self.splits:
408
+ total = 0
409
+ for class_name in self.class_names:
410
+ class_dir = self.path / class_name
411
+ if not class_dir.is_dir():
412
+ continue
413
+
414
+ total += sum(
415
+ 1
416
+ for f in class_dir.iterdir()
417
+ if f.suffix.lower() in image_extensions
418
+ )
419
+
420
+ counts["unsplit"] = {"total": total, "background": 0}
421
+ return counts
422
+
423
+ # Handle split structure
424
+ images_root = self.path / "images"
425
+ for split in self.splits:
426
+ split_dir = images_root / split
427
+ if not split_dir.is_dir():
428
+ continue
429
+
430
+ total = 0
431
+ for class_name in self.class_names:
432
+ class_dir = split_dir / class_name
433
+ if not class_dir.is_dir():
434
+ continue
435
+
436
+ total += sum(
437
+ 1
438
+ for f in class_dir.iterdir()
439
+ if f.suffix.lower() in image_extensions
440
+ )
441
+
442
+ counts[split] = {"total": total, "background": 0}
443
+
444
+ return counts
445
+
206
446
  @classmethod
207
447
  def _detect_splits(cls, path: Path, config: dict) -> list[str]:
208
448
  """Detect available splits from config and filesystem.
@@ -301,6 +541,10 @@ class YOLODataset(Dataset):
301
541
  Returns:
302
542
  List of image file paths sorted alphabetically.
303
543
  """
544
+ # Handle classification datasets differently
545
+ if self.task == TaskType.CLASSIFICATION:
546
+ return self._get_classification_image_paths(split)
547
+
304
548
  image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
305
549
  images_root = self.path / "images"
306
550
  image_paths: list[Path] = []
@@ -345,6 +589,51 @@ class YOLODataset(Dataset):
345
589
 
346
590
  return sorted(image_paths, key=lambda p: p.name)
347
591
 
592
+ def _get_classification_image_paths(self, split: str | None = None) -> list[Path]:
593
+ """Get image paths for classification datasets.
594
+
595
+ Args:
596
+ split: Specific split to get images from. If None, returns all images.
597
+
598
+ Returns:
599
+ List of image file paths sorted alphabetically.
600
+ """
601
+ image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
602
+ image_paths: list[Path] = []
603
+
604
+ # Handle flat structure (no splits)
605
+ if not self.splits:
606
+ for class_name in self.class_names:
607
+ class_dir = self.path / class_name
608
+ if not class_dir.is_dir():
609
+ continue
610
+
611
+ for img_file in class_dir.iterdir():
612
+ if img_file.suffix.lower() in image_extensions:
613
+ image_paths.append(img_file)
614
+
615
+ return sorted(image_paths, key=lambda p: p.name)
616
+
617
+ # Handle split structure
618
+ images_root = self.path / "images"
619
+ splits_to_search = [split] if split else self.splits
620
+
621
+ for s in splits_to_search:
622
+ split_dir = images_root / s
623
+ if not split_dir.is_dir():
624
+ continue
625
+
626
+ for class_name in self.class_names:
627
+ class_dir = split_dir / class_name
628
+ if not class_dir.is_dir():
629
+ continue
630
+
631
+ for img_file in class_dir.iterdir():
632
+ if img_file.suffix.lower() in image_extensions:
633
+ image_paths.append(img_file)
634
+
635
+ return sorted(image_paths, key=lambda p: p.name)
636
+
348
637
  def get_annotations_for_image(self, image_path: Path) -> list[dict]:
349
638
  """Get annotations for a specific image.
350
639
 
@@ -408,12 +697,14 @@ class YOLODataset(Dataset):
408
697
  x = x_center - width / 2
409
698
  y = y_center - height / 2
410
699
 
411
- annotations.append({
412
- "class_name": class_name,
413
- "class_id": class_id,
414
- "bbox": (x, y, width, height),
415
- "polygon": None,
416
- })
700
+ annotations.append(
701
+ {
702
+ "class_name": class_name,
703
+ "class_id": class_id,
704
+ "bbox": (x, y, width, height),
705
+ "polygon": None,
706
+ }
707
+ )
417
708
  else:
418
709
  # Segmentation: class x1 y1 x2 y2 ... xn yn
419
710
  coords = [float(p) for p in parts[1:]]
@@ -431,12 +722,14 @@ class YOLODataset(Dataset):
431
722
  width = max(xs) - x
432
723
  height = max(ys) - y
433
724
 
434
- annotations.append({
435
- "class_name": class_name,
436
- "class_id": class_id,
437
- "bbox": (x, y, width, height),
438
- "polygon": polygon,
439
- })
725
+ annotations.append(
726
+ {
727
+ "class_name": class_name,
728
+ "class_id": class_id,
729
+ "bbox": (x, y, width, height),
730
+ "polygon": polygon,
731
+ }
732
+ )
440
733
 
441
734
  except (ValueError, IndexError):
442
735
  continue
@@ -445,3 +738,65 @@ class YOLODataset(Dataset):
445
738
  pass
446
739
 
447
740
  return annotations
741
+
742
+ def get_images_by_class(self, split: str | None = None) -> dict[str, list[Path]]:
743
+ """Get images grouped by class for classification datasets.
744
+
745
+ Args:
746
+ split: Specific split to get images from. If None, uses first
747
+ available split or all images for flat structure.
748
+
749
+ Returns:
750
+ Dictionary mapping class name to list of image paths.
751
+ """
752
+ if self.task != TaskType.CLASSIFICATION:
753
+ return {}
754
+
755
+ image_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"}
756
+ images_by_class: dict[str, list[Path]] = {cls: [] for cls in self.class_names}
757
+
758
+ # Handle flat structure (no splits)
759
+ if not self.splits:
760
+ for class_name in self.class_names:
761
+ class_dir = self.path / class_name
762
+ if not class_dir.is_dir():
763
+ continue
764
+
765
+ for img_file in class_dir.iterdir():
766
+ if img_file.suffix.lower() in image_extensions:
767
+ images_by_class[class_name].append(img_file)
768
+
769
+ # Sort images within each class
770
+ for class_name in images_by_class:
771
+ images_by_class[class_name] = sorted(
772
+ images_by_class[class_name], key=lambda p: p.name
773
+ )
774
+
775
+ return images_by_class
776
+
777
+ # Handle split structure
778
+ images_root = self.path / "images"
779
+ default_splits = self.splits[:1] if self.splits else []
780
+ splits_to_search = [split] if split else default_splits
781
+
782
+ for s in splits_to_search:
783
+ split_dir = images_root / s
784
+ if not split_dir.is_dir():
785
+ continue
786
+
787
+ for class_name in self.class_names:
788
+ class_dir = split_dir / class_name
789
+ if not class_dir.is_dir():
790
+ continue
791
+
792
+ for img_file in class_dir.iterdir():
793
+ if img_file.suffix.lower() in image_extensions:
794
+ images_by_class[class_name].append(img_file)
795
+
796
+ # Sort images within each class for consistent ordering
797
+ for class_name in images_by_class:
798
+ images_by_class[class_name] = sorted(
799
+ images_by_class[class_name], key=lambda p: p.name
800
+ )
801
+
802
+ return images_by_class
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argus-cv
3
- Version: 1.1.0
3
+ Version: 1.3.0
4
4
  Summary: CLI tool for working with vision AI datasets
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: numpy>=1.24.0
@@ -0,0 +1,14 @@
1
+ argus/__init__.py,sha256=277ASQvH6ZWVWUzOCVB7vAxn3OYciow4nVkjG16-kio,64
2
+ argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
3
+ argus/cli.py,sha256=th1Rgn1Sm9juWoavopEXLBT8XEh5lKzOMX-pccwvDgA,47904
4
+ argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
5
+ argus/core/__init__.py,sha256=II2wYJpGUUGGKOFZ5BCpMIBTfv0WP-F15U_xbpWGjk8,453
6
+ argus/core/base.py,sha256=WBrB7XWz125YZ1UQfHQwsYAuIFY_XGEhG_0ybgPhn6s,3696
7
+ argus/core/coco.py,sha256=atVurZV2T7cszydyD9GfDTWHGYDd-JNK5RD7nse5avc,15823
8
+ argus/core/mask.py,sha256=m7Ztf4lAZx5ITpk3F3mETcvCC6hGydlxK0-2nCjeTfU,21835
9
+ argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
10
+ argus/core/yolo.py,sha256=tTc9jJzXcwa8LQ_s8nv-D_i2b9k_x-LT1O0eWr4sZ2k,28616
11
+ argus_cv-1.3.0.dist-info/METADATA,sha256=1CCYLSnGHaAdS5jjwUuTJWRHu5OM_oFfvi1L_v3SkQw,1070
12
+ argus_cv-1.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
+ argus_cv-1.3.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
14
+ argus_cv-1.3.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- argus/__init__.py,sha256=_j7Q7azBU1noVs9msyPjAbXcNBN0lZamyibW_IP_mdc,64
2
- argus/__main__.py,sha256=63ezHx8eL_lCMoZrCbKhmpao0fmdvYVw1chbknGg-oI,104
3
- argus/cli.py,sha256=T2jA4LZIeg1_BJJyvHWRhC-DypSBl-9UJV2k8odGGok,25849
4
- argus/commands/__init__.py,sha256=i2oor9hpVpF-_1qZWCGDLwwi1pZGJfZnUKJZ_NMBG18,30
5
- argus/core/__init__.py,sha256=Plv_tk0Wq9OlGLDPOSQWxrd5cTwNK9kEZANTim3s23A,348
6
- argus/core/base.py,sha256=Vd_2xR6L3lhu9vHoyLeFTc0Dg59py_D9kaye1tta5Co,3678
7
- argus/core/coco.py,sha256=bJvOhBzwjsOU8DBijGDysnSPlprwetkPf4Z02UOmqw0,15757
8
- argus/core/split.py,sha256=kEWtbdg6bH-WiNFf83HkqZD90EL4gsavw6JiefuAETs,10776
9
- argus/core/yolo.py,sha256=KTWgmEguxKZ_C0WsMxUB-B-zbx_Oi1ieGDk3Osuh0xY,15876
10
- argus_cv-1.1.0.dist-info/METADATA,sha256=TovQhK7bNU8ip-3F2VzGbTzIjnxYXfuJ2ShbhLM5rvM,1070
11
- argus_cv-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
- argus_cv-1.1.0.dist-info/entry_points.txt,sha256=dvJFH7BkrOxJnifSjPhwq1YCafPaqdngWyBuFYE73yY,43
13
- argus_cv-1.1.0.dist-info/RECORD,,