bplusplus 0.1.1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bplusplus might be problematic. Click here for more details.
- bplusplus/__init__.py +5 -3
- bplusplus/{collect_images.py → collect.py} +3 -3
- bplusplus/prepare.py +573 -0
- bplusplus/train_validate.py +8 -64
- bplusplus/yolov5detect/__init__.py +1 -0
- bplusplus/yolov5detect/detect.py +444 -0
- bplusplus/yolov5detect/export.py +1530 -0
- bplusplus/yolov5detect/insect.yaml +8 -0
- bplusplus/yolov5detect/models/__init__.py +0 -0
- bplusplus/yolov5detect/models/common.py +1109 -0
- bplusplus/yolov5detect/models/experimental.py +130 -0
- bplusplus/yolov5detect/models/hub/anchors.yaml +56 -0
- bplusplus/yolov5detect/models/hub/yolov3-spp.yaml +52 -0
- bplusplus/yolov5detect/models/hub/yolov3-tiny.yaml +42 -0
- bplusplus/yolov5detect/models/hub/yolov3.yaml +52 -0
- bplusplus/yolov5detect/models/hub/yolov5-bifpn.yaml +49 -0
- bplusplus/yolov5detect/models/hub/yolov5-fpn.yaml +43 -0
- bplusplus/yolov5detect/models/hub/yolov5-p2.yaml +55 -0
- bplusplus/yolov5detect/models/hub/yolov5-p34.yaml +42 -0
- bplusplus/yolov5detect/models/hub/yolov5-p6.yaml +57 -0
- bplusplus/yolov5detect/models/hub/yolov5-p7.yaml +68 -0
- bplusplus/yolov5detect/models/hub/yolov5-panet.yaml +49 -0
- bplusplus/yolov5detect/models/hub/yolov5l6.yaml +61 -0
- bplusplus/yolov5detect/models/hub/yolov5m6.yaml +61 -0
- bplusplus/yolov5detect/models/hub/yolov5n6.yaml +61 -0
- bplusplus/yolov5detect/models/hub/yolov5s-LeakyReLU.yaml +50 -0
- bplusplus/yolov5detect/models/hub/yolov5s-ghost.yaml +49 -0
- bplusplus/yolov5detect/models/hub/yolov5s-transformer.yaml +49 -0
- bplusplus/yolov5detect/models/hub/yolov5s6.yaml +61 -0
- bplusplus/yolov5detect/models/hub/yolov5x6.yaml +61 -0
- bplusplus/yolov5detect/models/segment/yolov5l-seg.yaml +49 -0
- bplusplus/yolov5detect/models/segment/yolov5m-seg.yaml +49 -0
- bplusplus/yolov5detect/models/segment/yolov5n-seg.yaml +49 -0
- bplusplus/yolov5detect/models/segment/yolov5s-seg.yaml +49 -0
- bplusplus/yolov5detect/models/segment/yolov5x-seg.yaml +49 -0
- bplusplus/yolov5detect/models/tf.py +797 -0
- bplusplus/yolov5detect/models/yolo.py +495 -0
- bplusplus/yolov5detect/models/yolov5l.yaml +49 -0
- bplusplus/yolov5detect/models/yolov5m.yaml +49 -0
- bplusplus/yolov5detect/models/yolov5n.yaml +49 -0
- bplusplus/yolov5detect/models/yolov5s.yaml +49 -0
- bplusplus/yolov5detect/models/yolov5x.yaml +49 -0
- bplusplus/yolov5detect/utils/__init__.py +97 -0
- bplusplus/yolov5detect/utils/activations.py +134 -0
- bplusplus/yolov5detect/utils/augmentations.py +448 -0
- bplusplus/yolov5detect/utils/autoanchor.py +175 -0
- bplusplus/yolov5detect/utils/autobatch.py +70 -0
- bplusplus/yolov5detect/utils/aws/__init__.py +0 -0
- bplusplus/yolov5detect/utils/aws/mime.sh +26 -0
- bplusplus/yolov5detect/utils/aws/resume.py +41 -0
- bplusplus/yolov5detect/utils/aws/userdata.sh +27 -0
- bplusplus/yolov5detect/utils/callbacks.py +72 -0
- bplusplus/yolov5detect/utils/dataloaders.py +1385 -0
- bplusplus/yolov5detect/utils/docker/Dockerfile +73 -0
- bplusplus/yolov5detect/utils/docker/Dockerfile-arm64 +40 -0
- bplusplus/yolov5detect/utils/docker/Dockerfile-cpu +42 -0
- bplusplus/yolov5detect/utils/downloads.py +136 -0
- bplusplus/yolov5detect/utils/flask_rest_api/README.md +70 -0
- bplusplus/yolov5detect/utils/flask_rest_api/example_request.py +17 -0
- bplusplus/yolov5detect/utils/flask_rest_api/restapi.py +49 -0
- bplusplus/yolov5detect/utils/general.py +1294 -0
- bplusplus/yolov5detect/utils/google_app_engine/Dockerfile +25 -0
- bplusplus/yolov5detect/utils/google_app_engine/additional_requirements.txt +6 -0
- bplusplus/yolov5detect/utils/google_app_engine/app.yaml +16 -0
- bplusplus/yolov5detect/utils/loggers/__init__.py +476 -0
- bplusplus/yolov5detect/utils/loggers/clearml/README.md +222 -0
- bplusplus/yolov5detect/utils/loggers/clearml/__init__.py +0 -0
- bplusplus/yolov5detect/utils/loggers/clearml/clearml_utils.py +230 -0
- bplusplus/yolov5detect/utils/loggers/clearml/hpo.py +90 -0
- bplusplus/yolov5detect/utils/loggers/comet/README.md +250 -0
- bplusplus/yolov5detect/utils/loggers/comet/__init__.py +551 -0
- bplusplus/yolov5detect/utils/loggers/comet/comet_utils.py +151 -0
- bplusplus/yolov5detect/utils/loggers/comet/hpo.py +126 -0
- bplusplus/yolov5detect/utils/loggers/comet/optimizer_config.json +135 -0
- bplusplus/yolov5detect/utils/loggers/wandb/__init__.py +0 -0
- bplusplus/yolov5detect/utils/loggers/wandb/wandb_utils.py +210 -0
- bplusplus/yolov5detect/utils/loss.py +259 -0
- bplusplus/yolov5detect/utils/metrics.py +381 -0
- bplusplus/yolov5detect/utils/plots.py +517 -0
- bplusplus/yolov5detect/utils/segment/__init__.py +0 -0
- bplusplus/yolov5detect/utils/segment/augmentations.py +100 -0
- bplusplus/yolov5detect/utils/segment/dataloaders.py +366 -0
- bplusplus/yolov5detect/utils/segment/general.py +160 -0
- bplusplus/yolov5detect/utils/segment/loss.py +198 -0
- bplusplus/yolov5detect/utils/segment/metrics.py +225 -0
- bplusplus/yolov5detect/utils/segment/plots.py +152 -0
- bplusplus/yolov5detect/utils/torch_utils.py +482 -0
- bplusplus/yolov5detect/utils/triton.py +90 -0
- bplusplus-1.1.0.dist-info/METADATA +179 -0
- bplusplus-1.1.0.dist-info/RECORD +92 -0
- bplusplus/build_model.py +0 -38
- bplusplus-0.1.1.dist-info/METADATA +0 -97
- bplusplus-0.1.1.dist-info/RECORD +0 -8
- {bplusplus-0.1.1.dist-info → bplusplus-1.1.0.dist-info}/LICENSE +0 -0
- {bplusplus-0.1.1.dist-info → bplusplus-1.1.0.dist-info}/WHEEL +0 -0
bplusplus/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
1
|
+
from .collect import Group, collect
|
|
2
|
+
from .train_validate import train, validate
|
|
3
|
+
from .prepare import prepare
|
|
4
|
+
from .yolov5detect.detect import run
|
|
5
|
+
|
|
@@ -13,7 +13,7 @@ class Group(str, Enum):
|
|
|
13
13
|
scientificName="scientificName"
|
|
14
14
|
|
|
15
15
|
#TODO add back support for fetching from dataset (or csvs)
|
|
16
|
-
def
|
|
16
|
+
def collect(group_by_key: Group, search_parameters: dict[str, Any], images_per_group: int, output_directory: str):
|
|
17
17
|
|
|
18
18
|
groups: list[str] = search_parameters[group_by_key.value]
|
|
19
19
|
|
|
@@ -26,12 +26,12 @@ def collect_images(group_by_key: Group, search_parameters: dict[str, Any], image
|
|
|
26
26
|
|
|
27
27
|
print("Beginning to collect images from GBIF...")
|
|
28
28
|
for group in groups:
|
|
29
|
-
print(f"Collecting images for {group}...")
|
|
29
|
+
# print(f"Collecting images for {group}...")
|
|
30
30
|
occurrences_json = _fetch_occurrences(group_key=group_by_key, group_value=group, parameters=search_parameters, totalLimit=10000)
|
|
31
31
|
optional_occurrences = map(lambda x: __parse_occurrence(x), occurrences_json)
|
|
32
32
|
occurrences = list(filter(None, optional_occurrences))
|
|
33
33
|
|
|
34
|
-
print(f"{group} : {len(occurrences)} parseable occurrences fetched, will sample for {images_per_group}")
|
|
34
|
+
# print(f"{group} : {len(occurrences)} parseable occurrences fetched, will sample for {images_per_group}")
|
|
35
35
|
|
|
36
36
|
random.seed(42) # for reproducibility
|
|
37
37
|
sampled_occurrences = random.sample(occurrences, min(images_per_group, len(occurrences)))
|
bplusplus/prepare.py
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import random
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
import requests
|
|
5
|
+
import tempfile
|
|
6
|
+
from .collect import Group, collect
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from .yolov5detect.detect import run
|
|
9
|
+
import shutil
|
|
10
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from prettytable import PrettyTable
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
import requests
|
|
15
|
+
from tqdm import tqdm
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
def prepare(input_directory: str, output_directory: str, with_background: bool = False):
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
Prepares the dataset for training by performing the following steps:
|
|
22
|
+
1. Copies images from the input directory to a temporary directory.
|
|
23
|
+
2. Deletes corrupted images.
|
|
24
|
+
3. Downloads YOLOv5 weights if not already present.
|
|
25
|
+
4. Runs YOLOv5 inference to generate labels for the images.
|
|
26
|
+
5. Deletes orphaned images and inferences.
|
|
27
|
+
6. Updates labels based on class mapping.
|
|
28
|
+
7. Splits the data into train, test, and validation sets.
|
|
29
|
+
8. Counts the total number of images across all splits.
|
|
30
|
+
9. Makes a YAML configuration file for YOLOv8.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
input_directory (str): The path to the input directory containing the images.
|
|
34
|
+
output_directory (str): The path to the output directory where the prepared dataset will be saved.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
input_directory = Path(input_directory)
|
|
38
|
+
output_directory = Path(output_directory)
|
|
39
|
+
|
|
40
|
+
class_mapping={}
|
|
41
|
+
|
|
42
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
43
|
+
|
|
44
|
+
temp_dir_path = Path(temp_dir)
|
|
45
|
+
images_path = temp_dir_path / "images"
|
|
46
|
+
inference_path = temp_dir_path / "inference"
|
|
47
|
+
labels_path = temp_dir_path / "labels"
|
|
48
|
+
|
|
49
|
+
images_path.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
inference_path.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
labels_path.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
for folder_directory in input_directory.iterdir():
|
|
54
|
+
images_names = []
|
|
55
|
+
if folder_directory.is_dir():
|
|
56
|
+
folder_name = folder_directory.name
|
|
57
|
+
for image_file in folder_directory.glob("*.jpg"):
|
|
58
|
+
shutil.copy(image_file, images_path)
|
|
59
|
+
image_name = image_file.name
|
|
60
|
+
images_names.append(image_name)
|
|
61
|
+
|
|
62
|
+
class_mapping[folder_name] = images_names
|
|
63
|
+
|
|
64
|
+
original_image_count = len(list(images_path.glob("*.jpg"))) + len(list(images_path.glob("*.jpeg")))
|
|
65
|
+
|
|
66
|
+
__delete_corrupted_images(images_path)
|
|
67
|
+
|
|
68
|
+
current_dir = Path(__file__).resolve().parent
|
|
69
|
+
yaml_path = current_dir / 'yolov5detect' / 'insect.yaml'
|
|
70
|
+
weights_path = current_dir / 'yolov5detect' / 'acc94.pt'
|
|
71
|
+
|
|
72
|
+
github_release_url = 'https://github.com/Tvenver/Bplusplus/releases/download/v0.1.2/acc94.pt'
|
|
73
|
+
|
|
74
|
+
if not weights_path.exists():
|
|
75
|
+
__download_file_from_github_release(github_release_url, weights_path)
|
|
76
|
+
|
|
77
|
+
run(source=images_path, data=yaml_path, weights=weights_path, save_txt=True, project=temp_dir_path)
|
|
78
|
+
|
|
79
|
+
__delete_orphaned_images_and_inferences(images_path, inference_path, labels_path)
|
|
80
|
+
__delete_invalid_txt_files(images_path, inference_path, labels_path)
|
|
81
|
+
class_idxs = update_labels(class_mapping, labels_path)
|
|
82
|
+
__split_data(class_mapping, temp_dir_path, output_directory)
|
|
83
|
+
|
|
84
|
+
# __save_class_idx_to_file(class_idxs, output_directory)
|
|
85
|
+
final_image_count = count_images_across_splits(output_directory)
|
|
86
|
+
print(f"\nOut of {original_image_count} input images, {final_image_count} are eligible for detection. \nThese are saved across train, test and valid split in {output_directory}.")
|
|
87
|
+
__generate_sample_images_with_detections(output_directory, class_idxs)
|
|
88
|
+
|
|
89
|
+
if with_background:
|
|
90
|
+
print("\nCollecting and splitting background images.")
|
|
91
|
+
|
|
92
|
+
bg_images=int(final_image_count*0.06)
|
|
93
|
+
|
|
94
|
+
search: dict[str, Any] = {
|
|
95
|
+
"scientificName": ["Plantae"]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
collect(
|
|
99
|
+
group_by_key=Group.scientificName,
|
|
100
|
+
search_parameters=search,
|
|
101
|
+
images_per_group=bg_images,
|
|
102
|
+
output_directory=temp_dir_path
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
__delete_corrupted_images(temp_dir_path / "Plantae")
|
|
106
|
+
|
|
107
|
+
__split_background_images(temp_dir_path / "Plantae", output_directory)
|
|
108
|
+
|
|
109
|
+
__count_classes_and_output_table(output_directory, class_idxs)
|
|
110
|
+
|
|
111
|
+
__make_yaml_file(output_directory, class_idxs)
|
|
112
|
+
|
|
113
|
+
def __delete_corrupted_images(images_path: Path):
|
|
114
|
+
|
|
115
|
+
"""
|
|
116
|
+
Deletes corrupted images from the specified directory.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
images_path (Path): The path to the directory containing images.
|
|
120
|
+
|
|
121
|
+
This function iterates through all the image files in the specified directory
|
|
122
|
+
and attempts to open each one. If an image file is found to be corrupted (i.e.,
|
|
123
|
+
it cannot be opened), the function deletes the corrupted image file.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
for image_file in images_path.glob("*.jpg"):
|
|
127
|
+
try:
|
|
128
|
+
Image.open(image_file)
|
|
129
|
+
except IOError:
|
|
130
|
+
image_file.unlink()
|
|
131
|
+
|
|
132
|
+
def __download_file_from_github_release(url, dest_path):
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
Downloads a file from a given GitHub release URL and saves it to the specified destination path,
|
|
136
|
+
with a progress bar displayed in the terminal.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
url (str): The URL of the file to download.
|
|
140
|
+
dest_path (Path): The destination path where the file will be saved.
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
Exception: If the file download fails.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
response = requests.get(url, stream=True)
|
|
147
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
148
|
+
block_size = 1024 # 1 Kibibyte
|
|
149
|
+
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
|
|
150
|
+
|
|
151
|
+
if response.status_code == 200:
|
|
152
|
+
with open(dest_path, 'wb') as f:
|
|
153
|
+
for chunk in response.iter_content(chunk_size=block_size):
|
|
154
|
+
progress_bar.update(len(chunk))
|
|
155
|
+
f.write(chunk)
|
|
156
|
+
progress_bar.close()
|
|
157
|
+
else:
|
|
158
|
+
progress_bar.close()
|
|
159
|
+
raise Exception(f"Failed to download file from {url}")
|
|
160
|
+
|
|
161
|
+
def __delete_orphaned_images_and_inferences(images_path: Path, inference_path: Path, labels_path: Path):
|
|
162
|
+
|
|
163
|
+
"""
|
|
164
|
+
Deletes orphaned images and their corresponding inference files if they do not have a label file.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
images_path (Path): The path to the directory containing images.
|
|
168
|
+
inference_path (Path): The path to the directory containing inference files.
|
|
169
|
+
labels_path (Path): The path to the directory containing label files.
|
|
170
|
+
|
|
171
|
+
This function iterates through all the image files in the specified directory
|
|
172
|
+
and checks if there is a corresponding label file. If an image file does not
|
|
173
|
+
have a corresponding label file, the function deletes the orphaned image file
|
|
174
|
+
and its corresponding inference file.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
for txt_file in labels_path.glob("*.txt"):
|
|
178
|
+
image_file_jpg = images_path / (txt_file.stem + ".jpg")
|
|
179
|
+
image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
|
|
180
|
+
inference_file_jpg = inference_path / (txt_file.stem + ".jpg")
|
|
181
|
+
inference_file_jpeg = inference_path / (txt_file.stem + ".jpeg")
|
|
182
|
+
|
|
183
|
+
if not (image_file_jpg.exists() or image_file_jpeg.exists()):
|
|
184
|
+
print(f"Deleting {txt_file.name} - No corresponding image file")
|
|
185
|
+
txt_file.unlink()
|
|
186
|
+
elif not (inference_file_jpg.exists() or inference_file_jpeg.exists()):
|
|
187
|
+
print(f"Deleting {txt_file.name} - No corresponding inference file")
|
|
188
|
+
txt_file.unlink()
|
|
189
|
+
|
|
190
|
+
label_stems = {txt_file.stem for txt_file in labels_path.glob("*.txt")}
|
|
191
|
+
image_files = list(images_path.glob("*.jpg")) + list(images_path.glob("*.jpeg"))
|
|
192
|
+
|
|
193
|
+
for image_file in image_files:
|
|
194
|
+
if image_file.stem not in label_stems:
|
|
195
|
+
print(f"Deleting orphaned image: {image_file.name}")
|
|
196
|
+
image_file.unlink()
|
|
197
|
+
|
|
198
|
+
inference_file_jpg = inference_path / (image_file.stem + ".jpg")
|
|
199
|
+
inference_file_jpeg = inference_path / (image_file.stem + ".jpeg")
|
|
200
|
+
|
|
201
|
+
if inference_file_jpg.exists():
|
|
202
|
+
inference_file_jpg.unlink()
|
|
203
|
+
print(f"Deleted corresponding inference file: {inference_file_jpg.name}")
|
|
204
|
+
elif inference_file_jpeg.exists():
|
|
205
|
+
inference_file_jpeg.unlink()
|
|
206
|
+
print(f"Deleted corresponding inference file: {inference_file_jpeg.name}")
|
|
207
|
+
|
|
208
|
+
print("Orphaned images and inference files without corresponding labels have been deleted.")
|
|
209
|
+
|
|
210
|
+
def __delete_invalid_txt_files(images_path: Path, inference_path: Path, labels_path: Path):
|
|
211
|
+
|
|
212
|
+
"""
|
|
213
|
+
Deletes invalid text files and their corresponding image and inference files.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
images_path (Path): The path to the directory containing images.
|
|
217
|
+
inference_path (Path): The path to the directory containing inference files.
|
|
218
|
+
labels_path (Path): The path to the directory containing label files.
|
|
219
|
+
|
|
220
|
+
This function iterates through all the text files in the specified directory
|
|
221
|
+
and checks if they have 0 or more than one detections. If a text file is invalid,
|
|
222
|
+
the function deletes the invalid text file and its corresponding image and inference files.
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
for txt_file in labels_path.glob("*.txt"):
|
|
226
|
+
with open(txt_file, 'r') as file:
|
|
227
|
+
lines = file.readlines()
|
|
228
|
+
|
|
229
|
+
if len(lines) == 0 or len(lines) > 1:
|
|
230
|
+
print(f"Deleting {txt_file.name} - Invalid file")
|
|
231
|
+
txt_file.unlink()
|
|
232
|
+
|
|
233
|
+
image_file_jpg = images_path / (txt_file.stem + ".jpg")
|
|
234
|
+
image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
|
|
235
|
+
inference_file_jpg = inference_path / (txt_file.stem + ".jpg")
|
|
236
|
+
inference_file_jpeg = inference_path / (txt_file.stem + ".jpeg")
|
|
237
|
+
|
|
238
|
+
if image_file_jpg.exists():
|
|
239
|
+
image_file_jpg.unlink()
|
|
240
|
+
print(f"Deleted corresponding image file: {image_file_jpg.name}")
|
|
241
|
+
elif image_file_jpeg.exists():
|
|
242
|
+
image_file_jpeg.unlink()
|
|
243
|
+
print(f"Deleted corresponding image file: {image_file_jpeg.name}")
|
|
244
|
+
|
|
245
|
+
if inference_file_jpg.exists():
|
|
246
|
+
inference_file_jpg.unlink()
|
|
247
|
+
print(f"Deleted corresponding inference file: {inference_file_jpg.name}")
|
|
248
|
+
elif inference_file_jpeg.exists():
|
|
249
|
+
inference_file_jpeg.unlink()
|
|
250
|
+
print(f"Deleted corresponding inference file: {inference_file_jpeg.name}")
|
|
251
|
+
|
|
252
|
+
print("Invalid text files and their corresponding images and inference files have been deleted.")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def __split_data(class_mapping: dict, temp_dir_path: Path, output_directory: Path):
|
|
256
|
+
"""
|
|
257
|
+
Splits the data into train, test, and validation sets.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
class_mapping (dict): A dictionary mapping class names to image file names.
|
|
261
|
+
temp_dir_path (Path): The path to the temporary directory containing the images.
|
|
262
|
+
output_directory (Path): The path to the output directory where the split data will be saved.
|
|
263
|
+
"""
|
|
264
|
+
images_dir = temp_dir_path / "images"
|
|
265
|
+
labels_dir = temp_dir_path / "labels"
|
|
266
|
+
|
|
267
|
+
def create_dirs(split):
|
|
268
|
+
(output_directory / split).mkdir(parents=True, exist_ok=True)
|
|
269
|
+
(output_directory / split / "images").mkdir(parents=True, exist_ok=True)
|
|
270
|
+
(output_directory / split / "labels").mkdir(parents=True, exist_ok=True)
|
|
271
|
+
|
|
272
|
+
def copy_files(file_list, split):
|
|
273
|
+
for image_file in file_list:
|
|
274
|
+
image_file_path = images_dir / image_file
|
|
275
|
+
|
|
276
|
+
if not image_file_path.exists():
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
shutil.copy(image_file_path, output_directory / split / "images" / image_file_path.name)
|
|
280
|
+
|
|
281
|
+
label_file = labels_dir / (image_file_path.stem + ".txt")
|
|
282
|
+
if label_file.exists():
|
|
283
|
+
shutil.copy(label_file, output_directory / split / "labels" / label_file.name)
|
|
284
|
+
|
|
285
|
+
for split in ["train", "test", "valid"]:
|
|
286
|
+
create_dirs(split)
|
|
287
|
+
|
|
288
|
+
for _, files in class_mapping.items():
|
|
289
|
+
random.shuffle(files)
|
|
290
|
+
num_files = len(files)
|
|
291
|
+
|
|
292
|
+
train_count = int(0.8 * num_files)
|
|
293
|
+
test_count = int(0.1 * num_files)
|
|
294
|
+
valid_count = num_files - train_count - test_count
|
|
295
|
+
|
|
296
|
+
train_files = files[:train_count]
|
|
297
|
+
test_files = files[train_count:train_count + test_count]
|
|
298
|
+
valid_files = files[train_count + test_count:]
|
|
299
|
+
|
|
300
|
+
copy_files(train_files, "train")
|
|
301
|
+
copy_files(test_files, "test")
|
|
302
|
+
copy_files(valid_files, "valid")
|
|
303
|
+
|
|
304
|
+
print("Data has been split into train, test, and valid.")
|
|
305
|
+
|
|
306
|
+
def __save_class_idx_to_file(class_idxs: dict, output_directory: Path):
|
|
307
|
+
"""
|
|
308
|
+
Saves the class indices to a file.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
class_idxs (dict): A dictionary mapping class names to class indices.
|
|
312
|
+
output_directory (Path): The path to the output directory where the class index file will be saved.
|
|
313
|
+
"""
|
|
314
|
+
class_idx_file = output_directory / "class_idx.txt"
|
|
315
|
+
with open(class_idx_file, 'w') as f:
|
|
316
|
+
for class_name, idx in class_idxs.items():
|
|
317
|
+
f.write(f"{class_name}: {idx}\n")
|
|
318
|
+
print(f"Class indices have been saved to {class_idx_file}")
|
|
319
|
+
|
|
320
|
+
def __generate_sample_images_with_detections(main_dir: Path, class_idxs: dict):
|
|
321
|
+
|
|
322
|
+
"""
|
|
323
|
+
Generates one sample image with multiple detections for each of train, test, valid, combining up to 6 images in one output.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
main_dir (str): The main directory containing the train, test, and valid splits.
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
def resize_and_contain(image, target_size):
|
|
330
|
+
image.thumbnail(target_size, Image.LANCZOS)
|
|
331
|
+
new_image = Image.new("RGB", target_size, (0, 0, 0))
|
|
332
|
+
new_image.paste(image, ((target_size[0] - image.width) // 2, (target_size[1] - image.height) // 2))
|
|
333
|
+
return new_image
|
|
334
|
+
|
|
335
|
+
def draw_bounding_boxes(image, labels_path, class_mapping, color_map):
|
|
336
|
+
draw = ImageDraw.Draw(image)
|
|
337
|
+
img_width, img_height = image.size
|
|
338
|
+
try:
|
|
339
|
+
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 20)
|
|
340
|
+
except IOError:
|
|
341
|
+
font = ImageFont.load_default()
|
|
342
|
+
|
|
343
|
+
if labels_path.exists():
|
|
344
|
+
with open(labels_path, 'r') as label_file:
|
|
345
|
+
for line in label_file.readlines():
|
|
346
|
+
parts = line.strip().split()
|
|
347
|
+
class_idx = int(parts[0])
|
|
348
|
+
center_x, center_y, width, height = map(float, parts[1:])
|
|
349
|
+
x_min = int((center_x - width / 2) * img_width)
|
|
350
|
+
y_min = int((center_y - height / 2) * img_height)
|
|
351
|
+
x_max = int((center_x + width / 2) * img_width)
|
|
352
|
+
y_max = int((center_y + height / 2) * img_height)
|
|
353
|
+
class_name = class_mapping.get(class_idx, str(class_idx))
|
|
354
|
+
color = color_map[class_idx]
|
|
355
|
+
draw.rectangle([x_min, y_min, x_max, y_max], outline=color, width=3)
|
|
356
|
+
draw.text((x_min, y_min - 20), class_name, fill=color, font=font)
|
|
357
|
+
return image
|
|
358
|
+
|
|
359
|
+
def combine_images(images, grid_size=(3, 2), target_size=(416, 416)):
|
|
360
|
+
resized_images = [resize_and_contain(img, target_size) for img in images]
|
|
361
|
+
width, height = target_size
|
|
362
|
+
combined_image = Image.new('RGB', (width * grid_size[0], height * grid_size[1]))
|
|
363
|
+
|
|
364
|
+
for i, img in enumerate(resized_images):
|
|
365
|
+
row = i // grid_size[0]
|
|
366
|
+
col = i % grid_size[0]
|
|
367
|
+
combined_image.paste(img, (col * width, row * height))
|
|
368
|
+
|
|
369
|
+
return combined_image
|
|
370
|
+
|
|
371
|
+
def generate_color_map(class_mapping):
|
|
372
|
+
colors = ['red', 'blue', 'green', 'purple', 'orange', 'yellow', 'pink', 'cyan', 'magenta']
|
|
373
|
+
color_map = {idx: random.choice(colors) for idx in class_mapping.keys()}
|
|
374
|
+
return color_map
|
|
375
|
+
|
|
376
|
+
splits = ['train', 'test', 'valid']
|
|
377
|
+
class_mapping = class_idxs
|
|
378
|
+
color_map = generate_color_map(class_mapping)
|
|
379
|
+
|
|
380
|
+
for split in splits:
|
|
381
|
+
images_dir = Path(main_dir) / split / 'images'
|
|
382
|
+
labels_dir = Path(main_dir) / split / 'labels'
|
|
383
|
+
image_files = list(images_dir.glob("*.jpg"))
|
|
384
|
+
if not image_files:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
sample_images = []
|
|
388
|
+
for image_file in image_files[:6]:
|
|
389
|
+
label_file = labels_dir / (image_file.stem + '.txt')
|
|
390
|
+
image = Image.open(image_file)
|
|
391
|
+
image_with_boxes = draw_bounding_boxes(image, label_file, class_mapping, color_map)
|
|
392
|
+
sample_images.append(image_with_boxes)
|
|
393
|
+
|
|
394
|
+
if sample_images:
|
|
395
|
+
combined_image = combine_images(sample_images, grid_size=(3, 2), target_size=(416, 416))
|
|
396
|
+
combined_image_path = Path(main_dir) / split / f"{split}_sample_with_detections.jpg"
|
|
397
|
+
combined_image.save(combined_image_path)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def __split_background_images(background_dir: Path, output_directory: Path):
|
|
401
|
+
"""
|
|
402
|
+
Splits the background images into train, test, and validation sets.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
temp_dir_path (Path): The path to the temporary directory containing the background images.
|
|
406
|
+
output_directory (Path): The path to the output directory where the split background images will be saved.
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
image_files = list(Path(background_dir).glob("*.jpg"))
|
|
410
|
+
random.shuffle(image_files)
|
|
411
|
+
|
|
412
|
+
num_images = len(image_files)
|
|
413
|
+
train_split = int(0.8 * num_images)
|
|
414
|
+
valid_split = int(0.1 * num_images)
|
|
415
|
+
|
|
416
|
+
train_files = image_files[:train_split]
|
|
417
|
+
valid_files = image_files[train_split:train_split + valid_split]
|
|
418
|
+
test_files = image_files[train_split + valid_split:]
|
|
419
|
+
|
|
420
|
+
def copy_files(image_list, split):
|
|
421
|
+
for image_file in image_list:
|
|
422
|
+
shutil.copy(image_file, Path(output_directory) / split / 'images' / image_file.name)
|
|
423
|
+
|
|
424
|
+
label_file = Path(output_directory) / split / 'labels' / (image_file.stem + ".txt")
|
|
425
|
+
label_file.touch()
|
|
426
|
+
|
|
427
|
+
copy_files(train_files, 'train')
|
|
428
|
+
copy_files(valid_files, 'valid')
|
|
429
|
+
copy_files(test_files, 'test')
|
|
430
|
+
|
|
431
|
+
print(f"Background data has been split: {len(train_files)} train, {len(valid_files)} valid, {len(test_files)} test")
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def __count_classes_and_output_table(output_directory: Path, class_idxs: dict):
|
|
435
|
+
"""
|
|
436
|
+
Counts the number of images per class and outputs a table.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
output_directory (Path): The path to the output directory containing the split data.
|
|
440
|
+
class_idxs (dict): A dictionary mapping class indices to class names.
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
def count_classes_in_split(labels_dir):
|
|
444
|
+
class_counts = defaultdict(int)
|
|
445
|
+
for label_file in os.listdir(labels_dir):
|
|
446
|
+
if label_file.endswith(".txt"):
|
|
447
|
+
label_path = os.path.join(labels_dir, label_file)
|
|
448
|
+
with open(label_path, 'r') as f:
|
|
449
|
+
lines = f.readlines()
|
|
450
|
+
if not lines:
|
|
451
|
+
# Count empty files as 'null' class (background images)
|
|
452
|
+
class_counts['null'] += 1
|
|
453
|
+
else:
|
|
454
|
+
for line in lines:
|
|
455
|
+
class_index = int(line.split()[0])
|
|
456
|
+
class_counts[class_index] += 1
|
|
457
|
+
return class_counts
|
|
458
|
+
|
|
459
|
+
splits = ['train', 'test', 'valid']
|
|
460
|
+
total_counts = defaultdict(int)
|
|
461
|
+
|
|
462
|
+
table = PrettyTable()
|
|
463
|
+
table.field_names = ["Class", "Class Index", "Train Count", "Test Count", "Valid Count", "Total"]
|
|
464
|
+
|
|
465
|
+
split_counts = {split: defaultdict(int) for split in splits}
|
|
466
|
+
|
|
467
|
+
for split in splits:
|
|
468
|
+
labels_dir = output_directory / split / 'labels'
|
|
469
|
+
if not os.path.exists(labels_dir):
|
|
470
|
+
print(f"Warning: {labels_dir} does not exist, skipping {split}.")
|
|
471
|
+
continue
|
|
472
|
+
|
|
473
|
+
class_counts = count_classes_in_split(labels_dir)
|
|
474
|
+
for class_index, count in class_counts.items():
|
|
475
|
+
split_counts[split][class_index] = count
|
|
476
|
+
total_counts[class_index] += count
|
|
477
|
+
|
|
478
|
+
for class_index, total in total_counts.items():
|
|
479
|
+
class_name = class_idxs.get(class_index, "Background" if class_index == 'null' else f"Class {class_index}")
|
|
480
|
+
train_count = split_counts['train'].get(class_index, 0)
|
|
481
|
+
test_count = split_counts['test'].get(class_index, 0)
|
|
482
|
+
valid_count = split_counts['valid'].get(class_index, 0)
|
|
483
|
+
table.add_row([class_name, class_index, train_count, test_count, valid_count, total])
|
|
484
|
+
|
|
485
|
+
print(table)
|
|
486
|
+
def update_labels(class_mapping: dict, labels_path: Path) -> dict:
|
|
487
|
+
"""
|
|
488
|
+
Updates the labels based on the class mapping.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
class_mapping (dict): A dictionary mapping class names to image file names.
|
|
492
|
+
labels_path (Path): The path to the directory containing the label files.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
dict: A dictionary mapping class names to class indices.
|
|
496
|
+
"""
|
|
497
|
+
class_index_mapping = {}
|
|
498
|
+
class_index_definition = {}
|
|
499
|
+
|
|
500
|
+
for idx, (class_name, images) in enumerate(class_mapping.items()):
|
|
501
|
+
class_index_definition[idx] = class_name
|
|
502
|
+
for image_name in images:
|
|
503
|
+
class_index_mapping[image_name] = idx
|
|
504
|
+
|
|
505
|
+
for txt_file in labels_path.glob("*.txt"):
|
|
506
|
+
image_name_jpg = txt_file.stem + ".jpg"
|
|
507
|
+
image_name_jpeg = txt_file.stem + ".jpeg"
|
|
508
|
+
|
|
509
|
+
if image_name_jpg in class_index_mapping:
|
|
510
|
+
class_index = class_index_mapping[image_name_jpg]
|
|
511
|
+
elif image_name_jpeg in class_index_mapping:
|
|
512
|
+
class_index = class_index_mapping[image_name_jpeg]
|
|
513
|
+
else:
|
|
514
|
+
print(f"Warning: No corresponding image found for {txt_file.name}")
|
|
515
|
+
continue
|
|
516
|
+
|
|
517
|
+
with open(txt_file, 'r') as file:
|
|
518
|
+
lines = file.readlines()
|
|
519
|
+
|
|
520
|
+
updated_lines = []
|
|
521
|
+
for line in lines:
|
|
522
|
+
parts = line.split()
|
|
523
|
+
if len(parts) > 0:
|
|
524
|
+
parts[0] = str(class_index)
|
|
525
|
+
updated_lines.append(" ".join(parts))
|
|
526
|
+
|
|
527
|
+
with open(txt_file, 'w') as file:
|
|
528
|
+
file.write("\n".join(updated_lines))
|
|
529
|
+
|
|
530
|
+
print(f"Labels updated successfully")
|
|
531
|
+
return class_index_definition
|
|
532
|
+
|
|
533
|
+
def count_images_across_splits(output_directory: Path) -> int:
|
|
534
|
+
"""
|
|
535
|
+
Counts the total number of images across train, test, and validation splits.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
output_directory (Path): The path to the output directory containing the split data.
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
int: The total number of images across all splits.
|
|
542
|
+
"""
|
|
543
|
+
total_images = 0
|
|
544
|
+
for split in ['train', 'test', 'valid']:
|
|
545
|
+
split_dir = output_directory / split / 'images'
|
|
546
|
+
total_images += len(list(split_dir.glob("*.jpg"))) + len(list(split_dir.glob("*.jpeg")))
|
|
547
|
+
|
|
548
|
+
return total_images
|
|
549
|
+
|
|
550
|
+
def __make_yaml_file(output_directory: Path, class_idxs: dict):
|
|
551
|
+
"""
|
|
552
|
+
Creates a YAML configuration file for YOLOv8.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
output_directory (Path): The path to the output directory where the YAML file will be saved.
|
|
556
|
+
class_idxs (dict): A dictionary mapping class indices to class names.
|
|
557
|
+
"""
|
|
558
|
+
|
|
559
|
+
# Define the structure of the YAML file
|
|
560
|
+
yaml_content = {
|
|
561
|
+
'path': str(output_directory.resolve()),
|
|
562
|
+
'train': 'train/images',
|
|
563
|
+
'val': 'valid/images',
|
|
564
|
+
'test': 'test/images',
|
|
565
|
+
'names': {idx: name for idx, name in class_idxs.items()}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
# Write the YAML content to a file
|
|
569
|
+
yaml_file_path = output_directory / 'dataset.yaml'
|
|
570
|
+
with open(yaml_file_path, 'w') as yaml_file:
|
|
571
|
+
yaml.dump(yaml_content, yaml_file, default_flow_style=False, sort_keys=False)
|
|
572
|
+
|
|
573
|
+
print(f"YOLOv8 YAML file created at {yaml_file_path}")
|
bplusplus/train_validate.py
CHANGED
|
@@ -1,67 +1,11 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import random
|
|
3
|
-
import shutil
|
|
4
|
-
|
|
5
1
|
from ultralytics import YOLO
|
|
2
|
+
from pathlib import Path
|
|
6
3
|
|
|
4
|
+
def train(input_yaml: str, output_directory: str = Path(__file__).resolve().parent, epochs: int = 30, imgsz: int = 640, batch: int = 16):
|
|
5
|
+
model = YOLO('yolov8n.pt')
|
|
6
|
+
model.train(data=input_yaml, epochs=epochs , imgsz=imgsz, batch=batch, project=output_directory)
|
|
7
|
+
return model
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
train_path = os.path.join(output_directory, 'train') # Path to the training folder
|
|
12
|
-
val_path = os.path.join(output_directory, 'val') # Path to the validation folder
|
|
13
|
-
|
|
14
|
-
# Create training and validation directories if they don't exist
|
|
15
|
-
os.makedirs(train_path, exist_ok=True)
|
|
16
|
-
os.makedirs(val_path, exist_ok=True)
|
|
17
|
-
|
|
18
|
-
# Walk through the dataset directory
|
|
19
|
-
# for root, dirs, files in os.walk(dataset_path):
|
|
20
|
-
for group in groups:
|
|
21
|
-
dataset_folder = os.path.join(dataset_path, group)
|
|
22
|
-
images = __files_in_folder(folder=dataset_folder)
|
|
23
|
-
|
|
24
|
-
# Shuffle the images
|
|
25
|
-
random.shuffle(images)
|
|
26
|
-
|
|
27
|
-
# Calculate the split index
|
|
28
|
-
split_index = int(len(images) * split_ratio)
|
|
29
|
-
|
|
30
|
-
# Split the images into training and validation sets
|
|
31
|
-
train_images = images[:split_index]
|
|
32
|
-
val_images = images[split_index:]
|
|
33
|
-
|
|
34
|
-
# Create destination folders if they don't exist
|
|
35
|
-
train_label_path = os.path.join(train_path, group)
|
|
36
|
-
val_label_path = os.path.join(val_path, group)
|
|
37
|
-
os.makedirs(train_label_path, exist_ok=True)
|
|
38
|
-
os.makedirs(val_label_path, exist_ok=True)
|
|
39
|
-
|
|
40
|
-
# Move images to the appropriate folders
|
|
41
|
-
for image in train_images:
|
|
42
|
-
src = os.path.join(dataset_folder, image)
|
|
43
|
-
dst = os.path.join(train_label_path, image)
|
|
44
|
-
shutil.move(src, dst)
|
|
45
|
-
|
|
46
|
-
for image in val_images:
|
|
47
|
-
src = os.path.join(dataset_folder, image)
|
|
48
|
-
dst = os.path.join(val_label_path, image)
|
|
49
|
-
shutil.move(src, dst)
|
|
50
|
-
|
|
51
|
-
print("Dataset splitting completed successfully.")
|
|
52
|
-
|
|
53
|
-
# Create a new YOLO model from scratch
|
|
54
|
-
model = YOLO(os.path.join(output_directory,'yolov8n-cls.pt'))
|
|
55
|
-
#
|
|
56
|
-
#define parameters for YOLO training, be aware of epoch, batch, and imgsz, to not exceed system requirements (memory, CPU, GPU...)
|
|
57
|
-
#Specify path to folder where the val and train folder is located
|
|
58
|
-
data = output_directory
|
|
59
|
-
results = model.train(data=data, epochs=5, batch=16, imgsz=224, project=output_directory)
|
|
60
|
-
|
|
61
|
-
#batch is adjusted to 1 to prevent a resizing bug - in training this bug doesnt emerge. A work around for larger batch size could be a resizing step in advance.
|
|
62
|
-
model.val(batch=1)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def __files_in_folder(folder: str) -> list[str]:
|
|
67
|
-
return [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
|
|
9
|
+
def validate(model: YOLO, input_yaml: str, output_directory: str = Path(__file__).resolve().parent):
|
|
10
|
+
metrics = model.val(data=input_yaml, project=output_directory)
|
|
11
|
+
return metrics
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .detect import run
|