bplusplus 0.1.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bplusplus might be problematic. Click here for more details.

Files changed (95) hide show
  1. bplusplus/__init__.py +5 -3
  2. bplusplus/{collect_images.py → collect.py} +3 -3
  3. bplusplus/prepare.py +573 -0
  4. bplusplus/train_validate.py +8 -64
  5. bplusplus/yolov5detect/__init__.py +1 -0
  6. bplusplus/yolov5detect/detect.py +444 -0
  7. bplusplus/yolov5detect/export.py +1530 -0
  8. bplusplus/yolov5detect/insect.yaml +8 -0
  9. bplusplus/yolov5detect/models/__init__.py +0 -0
  10. bplusplus/yolov5detect/models/common.py +1109 -0
  11. bplusplus/yolov5detect/models/experimental.py +130 -0
  12. bplusplus/yolov5detect/models/hub/anchors.yaml +56 -0
  13. bplusplus/yolov5detect/models/hub/yolov3-spp.yaml +52 -0
  14. bplusplus/yolov5detect/models/hub/yolov3-tiny.yaml +42 -0
  15. bplusplus/yolov5detect/models/hub/yolov3.yaml +52 -0
  16. bplusplus/yolov5detect/models/hub/yolov5-bifpn.yaml +49 -0
  17. bplusplus/yolov5detect/models/hub/yolov5-fpn.yaml +43 -0
  18. bplusplus/yolov5detect/models/hub/yolov5-p2.yaml +55 -0
  19. bplusplus/yolov5detect/models/hub/yolov5-p34.yaml +42 -0
  20. bplusplus/yolov5detect/models/hub/yolov5-p6.yaml +57 -0
  21. bplusplus/yolov5detect/models/hub/yolov5-p7.yaml +68 -0
  22. bplusplus/yolov5detect/models/hub/yolov5-panet.yaml +49 -0
  23. bplusplus/yolov5detect/models/hub/yolov5l6.yaml +61 -0
  24. bplusplus/yolov5detect/models/hub/yolov5m6.yaml +61 -0
  25. bplusplus/yolov5detect/models/hub/yolov5n6.yaml +61 -0
  26. bplusplus/yolov5detect/models/hub/yolov5s-LeakyReLU.yaml +50 -0
  27. bplusplus/yolov5detect/models/hub/yolov5s-ghost.yaml +49 -0
  28. bplusplus/yolov5detect/models/hub/yolov5s-transformer.yaml +49 -0
  29. bplusplus/yolov5detect/models/hub/yolov5s6.yaml +61 -0
  30. bplusplus/yolov5detect/models/hub/yolov5x6.yaml +61 -0
  31. bplusplus/yolov5detect/models/segment/yolov5l-seg.yaml +49 -0
  32. bplusplus/yolov5detect/models/segment/yolov5m-seg.yaml +49 -0
  33. bplusplus/yolov5detect/models/segment/yolov5n-seg.yaml +49 -0
  34. bplusplus/yolov5detect/models/segment/yolov5s-seg.yaml +49 -0
  35. bplusplus/yolov5detect/models/segment/yolov5x-seg.yaml +49 -0
  36. bplusplus/yolov5detect/models/tf.py +797 -0
  37. bplusplus/yolov5detect/models/yolo.py +495 -0
  38. bplusplus/yolov5detect/models/yolov5l.yaml +49 -0
  39. bplusplus/yolov5detect/models/yolov5m.yaml +49 -0
  40. bplusplus/yolov5detect/models/yolov5n.yaml +49 -0
  41. bplusplus/yolov5detect/models/yolov5s.yaml +49 -0
  42. bplusplus/yolov5detect/models/yolov5x.yaml +49 -0
  43. bplusplus/yolov5detect/utils/__init__.py +97 -0
  44. bplusplus/yolov5detect/utils/activations.py +134 -0
  45. bplusplus/yolov5detect/utils/augmentations.py +448 -0
  46. bplusplus/yolov5detect/utils/autoanchor.py +175 -0
  47. bplusplus/yolov5detect/utils/autobatch.py +70 -0
  48. bplusplus/yolov5detect/utils/aws/__init__.py +0 -0
  49. bplusplus/yolov5detect/utils/aws/mime.sh +26 -0
  50. bplusplus/yolov5detect/utils/aws/resume.py +41 -0
  51. bplusplus/yolov5detect/utils/aws/userdata.sh +27 -0
  52. bplusplus/yolov5detect/utils/callbacks.py +72 -0
  53. bplusplus/yolov5detect/utils/dataloaders.py +1385 -0
  54. bplusplus/yolov5detect/utils/docker/Dockerfile +73 -0
  55. bplusplus/yolov5detect/utils/docker/Dockerfile-arm64 +40 -0
  56. bplusplus/yolov5detect/utils/docker/Dockerfile-cpu +42 -0
  57. bplusplus/yolov5detect/utils/downloads.py +136 -0
  58. bplusplus/yolov5detect/utils/flask_rest_api/README.md +70 -0
  59. bplusplus/yolov5detect/utils/flask_rest_api/example_request.py +17 -0
  60. bplusplus/yolov5detect/utils/flask_rest_api/restapi.py +49 -0
  61. bplusplus/yolov5detect/utils/general.py +1294 -0
  62. bplusplus/yolov5detect/utils/google_app_engine/Dockerfile +25 -0
  63. bplusplus/yolov5detect/utils/google_app_engine/additional_requirements.txt +6 -0
  64. bplusplus/yolov5detect/utils/google_app_engine/app.yaml +16 -0
  65. bplusplus/yolov5detect/utils/loggers/__init__.py +476 -0
  66. bplusplus/yolov5detect/utils/loggers/clearml/README.md +222 -0
  67. bplusplus/yolov5detect/utils/loggers/clearml/__init__.py +0 -0
  68. bplusplus/yolov5detect/utils/loggers/clearml/clearml_utils.py +230 -0
  69. bplusplus/yolov5detect/utils/loggers/clearml/hpo.py +90 -0
  70. bplusplus/yolov5detect/utils/loggers/comet/README.md +250 -0
  71. bplusplus/yolov5detect/utils/loggers/comet/__init__.py +551 -0
  72. bplusplus/yolov5detect/utils/loggers/comet/comet_utils.py +151 -0
  73. bplusplus/yolov5detect/utils/loggers/comet/hpo.py +126 -0
  74. bplusplus/yolov5detect/utils/loggers/comet/optimizer_config.json +135 -0
  75. bplusplus/yolov5detect/utils/loggers/wandb/__init__.py +0 -0
  76. bplusplus/yolov5detect/utils/loggers/wandb/wandb_utils.py +210 -0
  77. bplusplus/yolov5detect/utils/loss.py +259 -0
  78. bplusplus/yolov5detect/utils/metrics.py +381 -0
  79. bplusplus/yolov5detect/utils/plots.py +517 -0
  80. bplusplus/yolov5detect/utils/segment/__init__.py +0 -0
  81. bplusplus/yolov5detect/utils/segment/augmentations.py +100 -0
  82. bplusplus/yolov5detect/utils/segment/dataloaders.py +366 -0
  83. bplusplus/yolov5detect/utils/segment/general.py +160 -0
  84. bplusplus/yolov5detect/utils/segment/loss.py +198 -0
  85. bplusplus/yolov5detect/utils/segment/metrics.py +225 -0
  86. bplusplus/yolov5detect/utils/segment/plots.py +152 -0
  87. bplusplus/yolov5detect/utils/torch_utils.py +482 -0
  88. bplusplus/yolov5detect/utils/triton.py +90 -0
  89. bplusplus-1.1.0.dist-info/METADATA +179 -0
  90. bplusplus-1.1.0.dist-info/RECORD +92 -0
  91. bplusplus/build_model.py +0 -38
  92. bplusplus-0.1.1.dist-info/METADATA +0 -97
  93. bplusplus-0.1.1.dist-info/RECORD +0 -8
  94. {bplusplus-0.1.1.dist-info → bplusplus-1.1.0.dist-info}/LICENSE +0 -0
  95. {bplusplus-0.1.1.dist-info → bplusplus-1.1.0.dist-info}/WHEEL +0 -0
bplusplus/__init__.py CHANGED
@@ -1,3 +1,5 @@
1
- from .build_model import build_model
2
- from .collect_images import Group, collect_images
3
- from .train_validate import train_validate
1
+ from .collect import Group, collect
2
+ from .train_validate import train, validate
3
+ from .prepare import prepare
4
+ from .yolov5detect.detect import run
5
+
@@ -13,7 +13,7 @@ class Group(str, Enum):
13
13
  scientificName="scientificName"
14
14
 
15
15
  #TODO add back support for fetching from dataset (or csvs)
16
- def collect_images(group_by_key: Group, search_parameters: dict[str, Any], images_per_group: int, output_directory: str):
16
+ def collect(group_by_key: Group, search_parameters: dict[str, Any], images_per_group: int, output_directory: str):
17
17
 
18
18
  groups: list[str] = search_parameters[group_by_key.value]
19
19
 
@@ -26,12 +26,12 @@ def collect_images(group_by_key: Group, search_parameters: dict[str, Any], image
26
26
 
27
27
  print("Beginning to collect images from GBIF...")
28
28
  for group in groups:
29
- print(f"Collecting images for {group}...")
29
+ # print(f"Collecting images for {group}...")
30
30
  occurrences_json = _fetch_occurrences(group_key=group_by_key, group_value=group, parameters=search_parameters, totalLimit=10000)
31
31
  optional_occurrences = map(lambda x: __parse_occurrence(x), occurrences_json)
32
32
  occurrences = list(filter(None, optional_occurrences))
33
33
 
34
- print(f"{group} : {len(occurrences)} parseable occurrences fetched, will sample for {images_per_group}")
34
+ # print(f"{group} : {len(occurrences)} parseable occurrences fetched, will sample for {images_per_group}")
35
35
 
36
36
  random.seed(42) # for reproducibility
37
37
  sampled_occurrences = random.sample(occurrences, min(images_per_group, len(occurrences)))
bplusplus/prepare.py ADDED
@@ -0,0 +1,573 @@
1
+ import os
2
+ import random
3
+ from typing import Any, Optional
4
+ import requests
5
+ import tempfile
6
+ from .collect import Group, collect
7
+ from pathlib import Path
8
+ from .yolov5detect.detect import run
9
+ import shutil
10
+ from PIL import Image, ImageDraw, ImageFont
11
+ from collections import defaultdict
12
+ from prettytable import PrettyTable
13
+ import matplotlib.pyplot as plt
14
+ import requests
15
+ from tqdm import tqdm
16
+ import yaml
17
+
18
+ def prepare(input_directory: str, output_directory: str, with_background: bool = False):
19
+
20
+ """
21
+ Prepares the dataset for training by performing the following steps:
22
+ 1. Copies images from the input directory to a temporary directory.
23
+ 2. Deletes corrupted images.
24
+ 3. Downloads YOLOv5 weights if not already present.
25
+ 4. Runs YOLOv5 inference to generate labels for the images.
26
+ 5. Deletes orphaned images and inferences.
27
+ 6. Updates labels based on class mapping.
28
+ 7. Splits the data into train, test, and validation sets.
29
+ 8. Counts the total number of images across all splits.
30
+ 9. Makes a YAML configuration file for YOLOv8.
31
+
32
+ Args:
33
+ input_directory (str): The path to the input directory containing the images.
34
+ output_directory (str): The path to the output directory where the prepared dataset will be saved.
35
+ """
36
+
37
+ input_directory = Path(input_directory)
38
+ output_directory = Path(output_directory)
39
+
40
+ class_mapping={}
41
+
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+
44
+ temp_dir_path = Path(temp_dir)
45
+ images_path = temp_dir_path / "images"
46
+ inference_path = temp_dir_path / "inference"
47
+ labels_path = temp_dir_path / "labels"
48
+
49
+ images_path.mkdir(parents=True, exist_ok=True)
50
+ inference_path.mkdir(parents=True, exist_ok=True)
51
+ labels_path.mkdir(parents=True, exist_ok=True)
52
+
53
+ for folder_directory in input_directory.iterdir():
54
+ images_names = []
55
+ if folder_directory.is_dir():
56
+ folder_name = folder_directory.name
57
+ for image_file in folder_directory.glob("*.jpg"):
58
+ shutil.copy(image_file, images_path)
59
+ image_name = image_file.name
60
+ images_names.append(image_name)
61
+
62
+ class_mapping[folder_name] = images_names
63
+
64
+ original_image_count = len(list(images_path.glob("*.jpg"))) + len(list(images_path.glob("*.jpeg")))
65
+
66
+ __delete_corrupted_images(images_path)
67
+
68
+ current_dir = Path(__file__).resolve().parent
69
+ yaml_path = current_dir / 'yolov5detect' / 'insect.yaml'
70
+ weights_path = current_dir / 'yolov5detect' / 'acc94.pt'
71
+
72
+ github_release_url = 'https://github.com/Tvenver/Bplusplus/releases/download/v0.1.2/acc94.pt'
73
+
74
+ if not weights_path.exists():
75
+ __download_file_from_github_release(github_release_url, weights_path)
76
+
77
+ run(source=images_path, data=yaml_path, weights=weights_path, save_txt=True, project=temp_dir_path)
78
+
79
+ __delete_orphaned_images_and_inferences(images_path, inference_path, labels_path)
80
+ __delete_invalid_txt_files(images_path, inference_path, labels_path)
81
+ class_idxs = update_labels(class_mapping, labels_path)
82
+ __split_data(class_mapping, temp_dir_path, output_directory)
83
+
84
+ # __save_class_idx_to_file(class_idxs, output_directory)
85
+ final_image_count = count_images_across_splits(output_directory)
86
+ print(f"\nOut of {original_image_count} input images, {final_image_count} are eligible for detection. \nThese are saved across train, test and valid split in {output_directory}.")
87
+ __generate_sample_images_with_detections(output_directory, class_idxs)
88
+
89
+ if with_background:
90
+ print("\nCollecting and splitting background images.")
91
+
92
+ bg_images=int(final_image_count*0.06)
93
+
94
+ search: dict[str, Any] = {
95
+ "scientificName": ["Plantae"]
96
+ }
97
+
98
+ collect(
99
+ group_by_key=Group.scientificName,
100
+ search_parameters=search,
101
+ images_per_group=bg_images,
102
+ output_directory=temp_dir_path
103
+ )
104
+
105
+ __delete_corrupted_images(temp_dir_path / "Plantae")
106
+
107
+ __split_background_images(temp_dir_path / "Plantae", output_directory)
108
+
109
+ __count_classes_and_output_table(output_directory, class_idxs)
110
+
111
+ __make_yaml_file(output_directory, class_idxs)
112
+
113
+ def __delete_corrupted_images(images_path: Path):
114
+
115
+ """
116
+ Deletes corrupted images from the specified directory.
117
+
118
+ Args:
119
+ images_path (Path): The path to the directory containing images.
120
+
121
+ This function iterates through all the image files in the specified directory
122
+ and attempts to open each one. If an image file is found to be corrupted (i.e.,
123
+ it cannot be opened), the function deletes the corrupted image file.
124
+ """
125
+
126
+ for image_file in images_path.glob("*.jpg"):
127
+ try:
128
+ Image.open(image_file)
129
+ except IOError:
130
+ image_file.unlink()
131
+
132
+ def __download_file_from_github_release(url, dest_path):
133
+
134
+ """
135
+ Downloads a file from a given GitHub release URL and saves it to the specified destination path,
136
+ with a progress bar displayed in the terminal.
137
+
138
+ Args:
139
+ url (str): The URL of the file to download.
140
+ dest_path (Path): The destination path where the file will be saved.
141
+
142
+ Raises:
143
+ Exception: If the file download fails.
144
+ """
145
+
146
+ response = requests.get(url, stream=True)
147
+ total_size = int(response.headers.get('content-length', 0))
148
+ block_size = 1024 # 1 Kibibyte
149
+ progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
150
+
151
+ if response.status_code == 200:
152
+ with open(dest_path, 'wb') as f:
153
+ for chunk in response.iter_content(chunk_size=block_size):
154
+ progress_bar.update(len(chunk))
155
+ f.write(chunk)
156
+ progress_bar.close()
157
+ else:
158
+ progress_bar.close()
159
+ raise Exception(f"Failed to download file from {url}")
160
+
161
+ def __delete_orphaned_images_and_inferences(images_path: Path, inference_path: Path, labels_path: Path):
162
+
163
+ """
164
+ Deletes orphaned images and their corresponding inference files if they do not have a label file.
165
+
166
+ Args:
167
+ images_path (Path): The path to the directory containing images.
168
+ inference_path (Path): The path to the directory containing inference files.
169
+ labels_path (Path): The path to the directory containing label files.
170
+
171
+ This function iterates through all the image files in the specified directory
172
+ and checks if there is a corresponding label file. If an image file does not
173
+ have a corresponding label file, the function deletes the orphaned image file
174
+ and its corresponding inference file.
175
+ """
176
+
177
+ for txt_file in labels_path.glob("*.txt"):
178
+ image_file_jpg = images_path / (txt_file.stem + ".jpg")
179
+ image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
180
+ inference_file_jpg = inference_path / (txt_file.stem + ".jpg")
181
+ inference_file_jpeg = inference_path / (txt_file.stem + ".jpeg")
182
+
183
+ if not (image_file_jpg.exists() or image_file_jpeg.exists()):
184
+ print(f"Deleting {txt_file.name} - No corresponding image file")
185
+ txt_file.unlink()
186
+ elif not (inference_file_jpg.exists() or inference_file_jpeg.exists()):
187
+ print(f"Deleting {txt_file.name} - No corresponding inference file")
188
+ txt_file.unlink()
189
+
190
+ label_stems = {txt_file.stem for txt_file in labels_path.glob("*.txt")}
191
+ image_files = list(images_path.glob("*.jpg")) + list(images_path.glob("*.jpeg"))
192
+
193
+ for image_file in image_files:
194
+ if image_file.stem not in label_stems:
195
+ print(f"Deleting orphaned image: {image_file.name}")
196
+ image_file.unlink()
197
+
198
+ inference_file_jpg = inference_path / (image_file.stem + ".jpg")
199
+ inference_file_jpeg = inference_path / (image_file.stem + ".jpeg")
200
+
201
+ if inference_file_jpg.exists():
202
+ inference_file_jpg.unlink()
203
+ print(f"Deleted corresponding inference file: {inference_file_jpg.name}")
204
+ elif inference_file_jpeg.exists():
205
+ inference_file_jpeg.unlink()
206
+ print(f"Deleted corresponding inference file: {inference_file_jpeg.name}")
207
+
208
+ print("Orphaned images and inference files without corresponding labels have been deleted.")
209
+
210
+ def __delete_invalid_txt_files(images_path: Path, inference_path: Path, labels_path: Path):
211
+
212
+ """
213
+ Deletes invalid text files and their corresponding image and inference files.
214
+
215
+ Args:
216
+ images_path (Path): The path to the directory containing images.
217
+ inference_path (Path): The path to the directory containing inference files.
218
+ labels_path (Path): The path to the directory containing label files.
219
+
220
+ This function iterates through all the text files in the specified directory
221
+ and checks if they have 0 or more than one detections. If a text file is invalid,
222
+ the function deletes the invalid text file and its corresponding image and inference files.
223
+ """
224
+
225
+ for txt_file in labels_path.glob("*.txt"):
226
+ with open(txt_file, 'r') as file:
227
+ lines = file.readlines()
228
+
229
+ if len(lines) == 0 or len(lines) > 1:
230
+ print(f"Deleting {txt_file.name} - Invalid file")
231
+ txt_file.unlink()
232
+
233
+ image_file_jpg = images_path / (txt_file.stem + ".jpg")
234
+ image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
235
+ inference_file_jpg = inference_path / (txt_file.stem + ".jpg")
236
+ inference_file_jpeg = inference_path / (txt_file.stem + ".jpeg")
237
+
238
+ if image_file_jpg.exists():
239
+ image_file_jpg.unlink()
240
+ print(f"Deleted corresponding image file: {image_file_jpg.name}")
241
+ elif image_file_jpeg.exists():
242
+ image_file_jpeg.unlink()
243
+ print(f"Deleted corresponding image file: {image_file_jpeg.name}")
244
+
245
+ if inference_file_jpg.exists():
246
+ inference_file_jpg.unlink()
247
+ print(f"Deleted corresponding inference file: {inference_file_jpg.name}")
248
+ elif inference_file_jpeg.exists():
249
+ inference_file_jpeg.unlink()
250
+ print(f"Deleted corresponding inference file: {inference_file_jpeg.name}")
251
+
252
+ print("Invalid text files and their corresponding images and inference files have been deleted.")
253
+
254
+
255
+ def __split_data(class_mapping: dict, temp_dir_path: Path, output_directory: Path):
256
+ """
257
+ Splits the data into train, test, and validation sets.
258
+
259
+ Args:
260
+ class_mapping (dict): A dictionary mapping class names to image file names.
261
+ temp_dir_path (Path): The path to the temporary directory containing the images.
262
+ output_directory (Path): The path to the output directory where the split data will be saved.
263
+ """
264
+ images_dir = temp_dir_path / "images"
265
+ labels_dir = temp_dir_path / "labels"
266
+
267
+ def create_dirs(split):
268
+ (output_directory / split).mkdir(parents=True, exist_ok=True)
269
+ (output_directory / split / "images").mkdir(parents=True, exist_ok=True)
270
+ (output_directory / split / "labels").mkdir(parents=True, exist_ok=True)
271
+
272
+ def copy_files(file_list, split):
273
+ for image_file in file_list:
274
+ image_file_path = images_dir / image_file
275
+
276
+ if not image_file_path.exists():
277
+ continue
278
+
279
+ shutil.copy(image_file_path, output_directory / split / "images" / image_file_path.name)
280
+
281
+ label_file = labels_dir / (image_file_path.stem + ".txt")
282
+ if label_file.exists():
283
+ shutil.copy(label_file, output_directory / split / "labels" / label_file.name)
284
+
285
+ for split in ["train", "test", "valid"]:
286
+ create_dirs(split)
287
+
288
+ for _, files in class_mapping.items():
289
+ random.shuffle(files)
290
+ num_files = len(files)
291
+
292
+ train_count = int(0.8 * num_files)
293
+ test_count = int(0.1 * num_files)
294
+ valid_count = num_files - train_count - test_count
295
+
296
+ train_files = files[:train_count]
297
+ test_files = files[train_count:train_count + test_count]
298
+ valid_files = files[train_count + test_count:]
299
+
300
+ copy_files(train_files, "train")
301
+ copy_files(test_files, "test")
302
+ copy_files(valid_files, "valid")
303
+
304
+ print("Data has been split into train, test, and valid.")
305
+
306
+ def __save_class_idx_to_file(class_idxs: dict, output_directory: Path):
307
+ """
308
+ Saves the class indices to a file.
309
+
310
+ Args:
311
+ class_idxs (dict): A dictionary mapping class names to class indices.
312
+ output_directory (Path): The path to the output directory where the class index file will be saved.
313
+ """
314
+ class_idx_file = output_directory / "class_idx.txt"
315
+ with open(class_idx_file, 'w') as f:
316
+ for class_name, idx in class_idxs.items():
317
+ f.write(f"{class_name}: {idx}\n")
318
+ print(f"Class indices have been saved to {class_idx_file}")
319
+
320
+ def __generate_sample_images_with_detections(main_dir: Path, class_idxs: dict):
321
+
322
+ """
323
+ Generates one sample image with multiple detections for each of train, test, valid, combining up to 6 images in one output.
324
+
325
+ Args:
326
+ main_dir (str): The main directory containing the train, test, and valid splits.
327
+ """
328
+
329
+ def resize_and_contain(image, target_size):
330
+ image.thumbnail(target_size, Image.LANCZOS)
331
+ new_image = Image.new("RGB", target_size, (0, 0, 0))
332
+ new_image.paste(image, ((target_size[0] - image.width) // 2, (target_size[1] - image.height) // 2))
333
+ return new_image
334
+
335
+ def draw_bounding_boxes(image, labels_path, class_mapping, color_map):
336
+ draw = ImageDraw.Draw(image)
337
+ img_width, img_height = image.size
338
+ try:
339
+ font = ImageFont.truetype("DejaVuSans-Bold.ttf", 20)
340
+ except IOError:
341
+ font = ImageFont.load_default()
342
+
343
+ if labels_path.exists():
344
+ with open(labels_path, 'r') as label_file:
345
+ for line in label_file.readlines():
346
+ parts = line.strip().split()
347
+ class_idx = int(parts[0])
348
+ center_x, center_y, width, height = map(float, parts[1:])
349
+ x_min = int((center_x - width / 2) * img_width)
350
+ y_min = int((center_y - height / 2) * img_height)
351
+ x_max = int((center_x + width / 2) * img_width)
352
+ y_max = int((center_y + height / 2) * img_height)
353
+ class_name = class_mapping.get(class_idx, str(class_idx))
354
+ color = color_map[class_idx]
355
+ draw.rectangle([x_min, y_min, x_max, y_max], outline=color, width=3)
356
+ draw.text((x_min, y_min - 20), class_name, fill=color, font=font)
357
+ return image
358
+
359
+ def combine_images(images, grid_size=(3, 2), target_size=(416, 416)):
360
+ resized_images = [resize_and_contain(img, target_size) for img in images]
361
+ width, height = target_size
362
+ combined_image = Image.new('RGB', (width * grid_size[0], height * grid_size[1]))
363
+
364
+ for i, img in enumerate(resized_images):
365
+ row = i // grid_size[0]
366
+ col = i % grid_size[0]
367
+ combined_image.paste(img, (col * width, row * height))
368
+
369
+ return combined_image
370
+
371
+ def generate_color_map(class_mapping):
372
+ colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink', 'cyan', 'magenta']
373
+ color_map = {idx: random.choice(colors) for idx in class_mapping.keys()}
374
+ return color_map
375
+
376
+ splits = ['train', 'test', 'valid']
377
+ class_mapping = class_idxs
378
+ color_map = generate_color_map(class_mapping)
379
+
380
+ for split in splits:
381
+ images_dir = Path(main_dir) / split / 'images'
382
+ labels_dir = Path(main_dir) / split / 'labels'
383
+ image_files = list(images_dir.glob("*.jpg"))
384
+ if not image_files:
385
+ continue
386
+
387
+ sample_images = []
388
+ for image_file in image_files[:6]:
389
+ label_file = labels_dir / (image_file.stem + '.txt')
390
+ image = Image.open(image_file)
391
+ image_with_boxes = draw_bounding_boxes(image, label_file, class_mapping, color_map)
392
+ sample_images.append(image_with_boxes)
393
+
394
+ if sample_images:
395
+ combined_image = combine_images(sample_images, grid_size=(3, 2), target_size=(416, 416))
396
+ combined_image_path = Path(main_dir) / split / f"{split}_sample_with_detections.jpg"
397
+ combined_image.save(combined_image_path)
398
+
399
+
400
+ def __split_background_images(background_dir: Path, output_directory: Path):
401
+ """
402
+ Splits the background images into train, test, and validation sets.
403
+
404
+ Args:
405
+ temp_dir_path (Path): The path to the temporary directory containing the background images.
406
+ output_directory (Path): The path to the output directory where the split background images will be saved.
407
+ """
408
+
409
+ image_files = list(Path(background_dir).glob("*.jpg"))
410
+ random.shuffle(image_files)
411
+
412
+ num_images = len(image_files)
413
+ train_split = int(0.8 * num_images)
414
+ valid_split = int(0.1 * num_images)
415
+
416
+ train_files = image_files[:train_split]
417
+ valid_files = image_files[train_split:train_split + valid_split]
418
+ test_files = image_files[train_split + valid_split:]
419
+
420
+ def copy_files(image_list, split):
421
+ for image_file in image_list:
422
+ shutil.copy(image_file, Path(output_directory) / split / 'images' / image_file.name)
423
+
424
+ label_file = Path(output_directory) / split / 'labels' / (image_file.stem + ".txt")
425
+ label_file.touch()
426
+
427
+ copy_files(train_files, 'train')
428
+ copy_files(valid_files, 'valid')
429
+ copy_files(test_files, 'test')
430
+
431
+ print(f"Background data has been split: {len(train_files)} train, {len(valid_files)} valid, {len(test_files)} test")
432
+
433
+
434
+ def __count_classes_and_output_table(output_directory: Path, class_idxs: dict):
435
+ """
436
+ Counts the number of images per class and outputs a table.
437
+
438
+ Args:
439
+ output_directory (Path): The path to the output directory containing the split data.
440
+ class_idxs (dict): A dictionary mapping class indices to class names.
441
+ """
442
+
443
+ def count_classes_in_split(labels_dir):
444
+ class_counts = defaultdict(int)
445
+ for label_file in os.listdir(labels_dir):
446
+ if label_file.endswith(".txt"):
447
+ label_path = os.path.join(labels_dir, label_file)
448
+ with open(label_path, 'r') as f:
449
+ lines = f.readlines()
450
+ if not lines:
451
+ # Count empty files as 'null' class (background images)
452
+ class_counts['null'] += 1
453
+ else:
454
+ for line in lines:
455
+ class_index = int(line.split()[0])
456
+ class_counts[class_index] += 1
457
+ return class_counts
458
+
459
+ splits = ['train', 'test', 'valid']
460
+ total_counts = defaultdict(int)
461
+
462
+ table = PrettyTable()
463
+ table.field_names = ["Class", "Class Index", "Train Count", "Test Count", "Valid Count", "Total"]
464
+
465
+ split_counts = {split: defaultdict(int) for split in splits}
466
+
467
+ for split in splits:
468
+ labels_dir = output_directory / split / 'labels'
469
+ if not os.path.exists(labels_dir):
470
+ print(f"Warning: {labels_dir} does not exist, skipping {split}.")
471
+ continue
472
+
473
+ class_counts = count_classes_in_split(labels_dir)
474
+ for class_index, count in class_counts.items():
475
+ split_counts[split][class_index] = count
476
+ total_counts[class_index] += count
477
+
478
+ for class_index, total in total_counts.items():
479
+ class_name = class_idxs.get(class_index, "Background" if class_index == 'null' else f"Class {class_index}")
480
+ train_count = split_counts['train'].get(class_index, 0)
481
+ test_count = split_counts['test'].get(class_index, 0)
482
+ valid_count = split_counts['valid'].get(class_index, 0)
483
+ table.add_row([class_name, class_index, train_count, test_count, valid_count, total])
484
+
485
+ print(table)
486
+ def update_labels(class_mapping: dict, labels_path: Path) -> dict:
487
+ """
488
+ Updates the labels based on the class mapping.
489
+
490
+ Args:
491
+ class_mapping (dict): A dictionary mapping class names to image file names.
492
+ labels_path (Path): The path to the directory containing the label files.
493
+
494
+ Returns:
495
+ dict: A dictionary mapping class names to class indices.
496
+ """
497
+ class_index_mapping = {}
498
+ class_index_definition = {}
499
+
500
+ for idx, (class_name, images) in enumerate(class_mapping.items()):
501
+ class_index_definition[idx] = class_name
502
+ for image_name in images:
503
+ class_index_mapping[image_name] = idx
504
+
505
+ for txt_file in labels_path.glob("*.txt"):
506
+ image_name_jpg = txt_file.stem + ".jpg"
507
+ image_name_jpeg = txt_file.stem + ".jpeg"
508
+
509
+ if image_name_jpg in class_index_mapping:
510
+ class_index = class_index_mapping[image_name_jpg]
511
+ elif image_name_jpeg in class_index_mapping:
512
+ class_index = class_index_mapping[image_name_jpeg]
513
+ else:
514
+ print(f"Warning: No corresponding image found for {txt_file.name}")
515
+ continue
516
+
517
+ with open(txt_file, 'r') as file:
518
+ lines = file.readlines()
519
+
520
+ updated_lines = []
521
+ for line in lines:
522
+ parts = line.split()
523
+ if len(parts) > 0:
524
+ parts[0] = str(class_index)
525
+ updated_lines.append(" ".join(parts))
526
+
527
+ with open(txt_file, 'w') as file:
528
+ file.write("\n".join(updated_lines))
529
+
530
+ print(f"Labels updated successfully")
531
+ return class_index_definition
532
+
533
+ def count_images_across_splits(output_directory: Path) -> int:
534
+ """
535
+ Counts the total number of images across train, test, and validation splits.
536
+
537
+ Args:
538
+ output_directory (Path): The path to the output directory containing the split data.
539
+
540
+ Returns:
541
+ int: The total number of images across all splits.
542
+ """
543
+ total_images = 0
544
+ for split in ['train', 'test', 'valid']:
545
+ split_dir = output_directory / split / 'images'
546
+ total_images += len(list(split_dir.glob("*.jpg"))) + len(list(split_dir.glob("*.jpeg")))
547
+
548
+ return total_images
549
+
550
+ def __make_yaml_file(output_directory: Path, class_idxs: dict):
551
+ """
552
+ Creates a YAML configuration file for YOLOv8.
553
+
554
+ Args:
555
+ output_directory (Path): The path to the output directory where the YAML file will be saved.
556
+ class_idxs (dict): A dictionary mapping class indices to class names.
557
+ """
558
+
559
+ # Define the structure of the YAML file
560
+ yaml_content = {
561
+ 'path': str(output_directory.resolve()),
562
+ 'train': 'train/images',
563
+ 'val': 'valid/images',
564
+ 'test': 'test/images',
565
+ 'names': {idx: name for idx, name in class_idxs.items()}
566
+ }
567
+
568
+ # Write the YAML content to a file
569
+ yaml_file_path = output_directory / 'dataset.yaml'
570
+ with open(yaml_file_path, 'w') as yaml_file:
571
+ yaml.dump(yaml_content, yaml_file, default_flow_style=False, sort_keys=False)
572
+
573
+ print(f"YOLOv8 YAML file created at {yaml_file_path}")
@@ -1,67 +1,11 @@
1
- import os
2
- import random
3
- import shutil
4
-
5
1
  from ultralytics import YOLO
2
+ from pathlib import Path
6
3
 
4
+ def train(input_yaml: str, output_directory: str = Path(__file__).resolve().parent, epochs: int = 30, imgsz: int = 640, batch: int = 16):
5
+ model = YOLO('yolov8n.pt')
6
+ model.train(data=input_yaml, epochs=epochs , imgsz=imgsz, batch=batch, project=output_directory)
7
+ return model
7
8
 
8
- #split ratio defaults to 80% training 20% validation
9
- def train_validate(groups: list[str], dataset_path: str, output_directory: str, split_ratio: float = 0.8):
10
-
11
- train_path = os.path.join(output_directory, 'train') # Path to the training folder
12
- val_path = os.path.join(output_directory, 'val') # Path to the validation folder
13
-
14
- # Create training and validation directories if they don't exist
15
- os.makedirs(train_path, exist_ok=True)
16
- os.makedirs(val_path, exist_ok=True)
17
-
18
- # Walk through the dataset directory
19
- # for root, dirs, files in os.walk(dataset_path):
20
- for group in groups:
21
- dataset_folder = os.path.join(dataset_path, group)
22
- images = __files_in_folder(folder=dataset_folder)
23
-
24
- # Shuffle the images
25
- random.shuffle(images)
26
-
27
- # Calculate the split index
28
- split_index = int(len(images) * split_ratio)
29
-
30
- # Split the images into training and validation sets
31
- train_images = images[:split_index]
32
- val_images = images[split_index:]
33
-
34
- # Create destination folders if they don't exist
35
- train_label_path = os.path.join(train_path, group)
36
- val_label_path = os.path.join(val_path, group)
37
- os.makedirs(train_label_path, exist_ok=True)
38
- os.makedirs(val_label_path, exist_ok=True)
39
-
40
- # Move images to the appropriate folders
41
- for image in train_images:
42
- src = os.path.join(dataset_folder, image)
43
- dst = os.path.join(train_label_path, image)
44
- shutil.move(src, dst)
45
-
46
- for image in val_images:
47
- src = os.path.join(dataset_folder, image)
48
- dst = os.path.join(val_label_path, image)
49
- shutil.move(src, dst)
50
-
51
- print("Dataset splitting completed successfully.")
52
-
53
- # Create a new YOLO model from scratch
54
- model = YOLO(os.path.join(output_directory,'yolov8n-cls.pt'))
55
- #
56
- #define parameters for YOLO training, be aware of epoch, batch, and imgsz, to not exceed system requirements (memory, CPU, GPU...)
57
- #Specify path to folder where the val and train folder is located
58
- data = output_directory
59
- results = model.train(data=data, epochs=5, batch=16, imgsz=224, project=output_directory)
60
-
61
- #batch is adjusted to 1 to prevent a resizing bug - in training this bug doesnt emerge. A work around for larger batch size could be a resizing step in advance.
62
- model.val(batch=1)
63
-
64
-
65
-
66
- def __files_in_folder(folder: str) -> list[str]:
67
- return [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
9
+ def validate(model: YOLO, input_yaml: str, output_directory: str = Path(__file__).resolve().parent):
10
+ metrics = model.val(data=input_yaml, project=output_directory)
11
+ return metrics
@@ -0,0 +1 @@
1
+ from .detect import run