bplusplus 1.2.2__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bplusplus might be problematic. Click here for more details.
- bplusplus/__init__.py +3 -5
- bplusplus/inference.py +891 -0
- bplusplus/prepare.py +419 -652
- bplusplus/{hierarchical/test.py → test.py} +22 -9
- bplusplus/tracker.py +261 -0
- bplusplus/{hierarchical/train.py → train.py} +1 -1
- bplusplus-1.2.3.dist-info/METADATA +101 -0
- bplusplus-1.2.3.dist-info/RECORD +11 -0
- {bplusplus-1.2.2.dist-info → bplusplus-1.2.3.dist-info}/WHEEL +1 -1
- bplusplus/resnet/test.py +0 -473
- bplusplus/resnet/train.py +0 -329
- bplusplus/train_validate.py +0 -11
- bplusplus-1.2.2.dist-info/METADATA +0 -260
- bplusplus-1.2.2.dist-info/RECORD +0 -12
- {bplusplus-1.2.2.dist-info → bplusplus-1.2.3.dist-info}/LICENSE +0 -0
bplusplus/prepare.py
CHANGED
|
@@ -2,17 +2,12 @@ import os
|
|
|
2
2
|
import random
|
|
3
3
|
import shutil
|
|
4
4
|
import tempfile
|
|
5
|
-
from collections import defaultdict
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
|
-
import matplotlib.pyplot as plt
|
|
10
|
-
import numpy as np
|
|
11
8
|
import requests
|
|
12
9
|
import torch
|
|
13
|
-
import
|
|
14
|
-
from PIL import Image, ImageDraw, ImageFont
|
|
15
|
-
from prettytable import PrettyTable
|
|
10
|
+
from PIL import Image
|
|
16
11
|
from torch import serialization
|
|
17
12
|
from torch.nn import Module, ModuleDict, ModuleList
|
|
18
13
|
from torch.nn.modules.activation import LeakyReLU, ReLU, SiLU
|
|
@@ -31,394 +26,290 @@ from ultralytics.nn.modules.block import DFL
|
|
|
31
26
|
from ultralytics.nn.modules.conv import Conv
|
|
32
27
|
from ultralytics.nn.tasks import DetectionModel
|
|
33
28
|
|
|
34
|
-
from .collect import Group, collect
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def prepare(input_directory: str, output_directory: str, one_stage: bool = False, with_background: bool = False, size_filter: bool = False, sizes: list = None):
|
|
38
29
|
|
|
30
|
+
def prepare(input_directory: str, output_directory: str, img_size: int = 40):
|
|
39
31
|
"""
|
|
40
|
-
Prepares
|
|
41
|
-
1. Copies images from
|
|
42
|
-
2. Deletes corrupted images.
|
|
43
|
-
3.
|
|
44
|
-
4.
|
|
45
|
-
5.
|
|
46
|
-
6.
|
|
47
|
-
7. Splits the data into train, test, and validation sets.
|
|
48
|
-
8. Counts the total number of images across all splits.
|
|
49
|
-
9. Makes a YAML configuration file for YOLOv8.
|
|
32
|
+
Prepares a YOLO classification dataset by performing the following steps:
|
|
33
|
+
1. Copies images from input directory to temporary directory and creates class mapping.
|
|
34
|
+
2. Deletes corrupted images and downloads YOLO model weights if not present.
|
|
35
|
+
3. Runs YOLO inference to generate detection labels (bounding boxes) for the images.
|
|
36
|
+
4. Cleans up orphaned images, invalid labels, and updates labels with class indices.
|
|
37
|
+
5. Crops detected objects from images based on bounding boxes and resizes them.
|
|
38
|
+
6. Splits data into train/valid sets with classification folder structure (train/class_name/image.jpg).
|
|
50
39
|
|
|
51
40
|
Args:
|
|
52
41
|
input_directory (str): The path to the input directory containing the images.
|
|
53
|
-
output_directory (str): The path to the output directory where the prepared dataset will be saved.
|
|
42
|
+
output_directory (str): The path to the output directory where the prepared classification dataset will be saved.
|
|
43
|
+
img_size (int, optional): The target size for the smallest dimension of cropped images. Defaults to 40.
|
|
54
44
|
"""
|
|
55
|
-
|
|
56
45
|
input_directory = Path(input_directory)
|
|
57
46
|
output_directory = Path(output_directory)
|
|
58
47
|
|
|
59
|
-
|
|
48
|
+
print("="*60)
|
|
49
|
+
print("STARTING BPLUSPLUS DATASET PREPARATION")
|
|
50
|
+
print("="*60)
|
|
51
|
+
print(f"Input directory: {input_directory}")
|
|
52
|
+
print(f"Output directory: {output_directory}")
|
|
53
|
+
print(f"Target image size: {img_size}px (smallest dimension)")
|
|
54
|
+
print()
|
|
60
55
|
|
|
61
56
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
62
|
-
|
|
63
57
|
temp_dir_path = Path(temp_dir)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
images_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
|
|
68
|
-
for folder_directory in input_directory.iterdir():
|
|
69
|
-
images_names = []
|
|
70
|
-
if folder_directory.is_dir():
|
|
71
|
-
folder_name = folder_directory.name
|
|
72
|
-
for image_file in folder_directory.glob("*.jpg"):
|
|
73
|
-
shutil.copy(image_file, images_path)
|
|
74
|
-
image_name = image_file.name
|
|
75
|
-
images_names.append(image_name)
|
|
76
|
-
|
|
77
|
-
class_mapping[folder_name] = images_names
|
|
78
|
-
|
|
79
|
-
original_image_count = len(list(images_path.glob("*.jpg"))) + len(list(images_path.glob("*.jpeg")))
|
|
80
|
-
|
|
81
|
-
__delete_corrupted_images(images_path)
|
|
82
|
-
|
|
83
|
-
current_dir = Path(__file__).resolve().parent
|
|
84
|
-
|
|
85
|
-
weights_path = current_dir / 'small-generic.pt'
|
|
86
|
-
|
|
87
|
-
github_release_url = 'https://github.com/orlandocloss/TwoStageInsectDetection/releases/download/models/small-generic.pt'
|
|
88
|
-
|
|
89
|
-
if not weights_path.exists():
|
|
90
|
-
__download_file_from_github_release(github_release_url, weights_path)
|
|
91
|
-
|
|
92
|
-
# Add all required classes to safe globals
|
|
93
|
-
serialization.add_safe_globals([
|
|
94
|
-
DetectionModel, Sequential, Conv, Conv2d, BatchNorm2d,
|
|
95
|
-
SiLU, ReLU, LeakyReLU, MaxPool2d, Linear, Dropout, Upsample,
|
|
96
|
-
Module, ModuleList, ModuleDict,
|
|
97
|
-
Bottleneck, C2f, SPPF, Detect, Concat, DFL,
|
|
98
|
-
# Add torch internal classes
|
|
99
|
-
torch.nn.parameter.Parameter,
|
|
100
|
-
torch.Tensor,
|
|
101
|
-
torch._utils._rebuild_tensor_v2,
|
|
102
|
-
torch._utils._rebuild_parameter
|
|
103
|
-
])
|
|
104
|
-
|
|
105
|
-
labels_path = temp_dir_path / "predict" / "labels"
|
|
58
|
+
print(f"Using temporary directory: {temp_dir_path}")
|
|
59
|
+
print()
|
|
106
60
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
except Exception as e:
|
|
154
|
-
print(f"Error during model prediction setup: {e}")
|
|
155
|
-
import traceback
|
|
156
|
-
traceback.print_exc()
|
|
157
|
-
|
|
158
|
-
if one_stage:
|
|
159
|
-
|
|
160
|
-
if size_filter and len(sizes) <= 2:
|
|
161
|
-
__filter_by_size(images_path, labels_path, sizes)
|
|
162
|
-
print(f"\nFiltered {len(list(images_path.glob('*.jpg')))} images by size out of {original_image_count} input images.\n NOTE: Some images may be filtered due to corruption or inaccurate labels.")
|
|
163
|
-
|
|
164
|
-
__delete_orphaned_images_and_inferences(images_path, labels_path)
|
|
165
|
-
__delete_invalid_txt_files(images_path, labels_path)
|
|
166
|
-
class_idxs = update_labels(class_mapping, labels_path)
|
|
167
|
-
__split_data(class_mapping, temp_dir_path, output_directory)
|
|
168
|
-
|
|
169
|
-
# __save_class_idx_to_file(class_idxs, output_directory)
|
|
170
|
-
final_image_count = count_images_across_splits(output_directory)
|
|
171
|
-
print(f"\nOut of {original_image_count} input images, {final_image_count} are eligible for detection. \nThese are saved across train, test and valid split in {output_directory}.")
|
|
172
|
-
__generate_sample_images_with_detections(output_directory, class_idxs)
|
|
173
|
-
|
|
174
|
-
if with_background:
|
|
175
|
-
print("\nCollecting and splitting background images.")
|
|
176
|
-
|
|
177
|
-
bg_images=int(final_image_count*0.06)
|
|
178
|
-
|
|
179
|
-
search: dict[str, Any] = {
|
|
180
|
-
"scientificName": ["Plantae"]
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
collect(
|
|
184
|
-
group_by_key=Group.scientificName,
|
|
185
|
-
search_parameters=search,
|
|
186
|
-
images_per_group=bg_images,
|
|
187
|
-
output_directory=temp_dir_path,
|
|
188
|
-
num_threads=3
|
|
189
|
-
)
|
|
61
|
+
# Step 1: Setup directories and copy images
|
|
62
|
+
print("STEP 1: Setting up directories and copying images...")
|
|
63
|
+
print("-" * 50)
|
|
64
|
+
class_mapping, original_image_count = _setup_directories_and_copy_images(
|
|
65
|
+
input_directory, temp_dir_path
|
|
66
|
+
)
|
|
67
|
+
print(f"✓ Step 1 completed: {original_image_count} images copied from {len(class_mapping)} classes")
|
|
68
|
+
print()
|
|
69
|
+
|
|
70
|
+
# Step 2-3: Clean images and setup model
|
|
71
|
+
print("STEP 2: Cleaning images and setting up YOLO model...")
|
|
72
|
+
print("-" * 50)
|
|
73
|
+
weights_path = _prepare_model_and_clean_images(temp_dir_path)
|
|
74
|
+
print(f"✓ Step 2 completed: Model ready at {weights_path}")
|
|
75
|
+
print()
|
|
76
|
+
|
|
77
|
+
# Step 4: Run YOLO inference
|
|
78
|
+
print("STEP 3: Running YOLO inference to detect objects...")
|
|
79
|
+
print("-" * 50)
|
|
80
|
+
labels_path = _run_yolo_inference(temp_dir_path, weights_path)
|
|
81
|
+
print(f"✓ Step 3 completed: Labels generated at {labels_path}")
|
|
82
|
+
print()
|
|
83
|
+
|
|
84
|
+
# Step 5-6: Clean up labels and update class mapping
|
|
85
|
+
print("STEP 4: Cleaning up orphaned files and processing labels...")
|
|
86
|
+
print("-" * 50)
|
|
87
|
+
class_idxs = _cleanup_and_process_labels(
|
|
88
|
+
temp_dir_path, labels_path, class_mapping
|
|
89
|
+
)
|
|
90
|
+
print(f"✓ Step 4 completed: Processed {len(class_idxs)} classes")
|
|
91
|
+
print()
|
|
92
|
+
|
|
93
|
+
# Step 7-9: Finalize dataset
|
|
94
|
+
print("STEP 5: Creating classification dataset with cropped images...")
|
|
95
|
+
print("-" * 50)
|
|
96
|
+
_finalize_dataset(
|
|
97
|
+
class_mapping, temp_dir_path, output_directory,
|
|
98
|
+
class_idxs, original_image_count, img_size
|
|
99
|
+
)
|
|
100
|
+
print("✓ Step 5 completed: Classification dataset ready!")
|
|
101
|
+
print()
|
|
102
|
+
|
|
103
|
+
print("="*60)
|
|
104
|
+
print("BPLUSPLUS DATASET PREPARATION COMPLETED SUCCESSFULLY!")
|
|
105
|
+
print("="*60)
|
|
190
106
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
__split_background_images(temp_dir_path / "Plantae", output_directory)
|
|
194
|
-
|
|
195
|
-
__count_classes_and_output_table(output_directory, class_idxs)
|
|
196
|
-
|
|
197
|
-
__make_yaml_file(output_directory, class_idxs)
|
|
198
|
-
else:
|
|
199
|
-
# try:
|
|
200
|
-
# sized_dir = temp_dir_path / "sized"
|
|
201
|
-
# sized_dir.mkdir(parents=True, exist_ok=True)
|
|
202
|
-
# __two_stage_update(class_mapping, filtered, sized_dir, images_path)
|
|
203
|
-
# __classification_split(sized_dir, output_directory)
|
|
204
|
-
# __count_classification_split(output_directory, class_mapping)
|
|
205
|
-
# except:
|
|
206
|
-
__delete_orphaned_images_and_inferences(images_path, labels_path)
|
|
207
|
-
__delete_invalid_txt_files(images_path, labels_path)
|
|
208
|
-
__classification_split(images_path, labels_path, output_directory, class_mapping)
|
|
209
|
-
__count_classification_split(output_directory, class_mapping)
|
|
210
|
-
|
|
211
|
-
def __count_classification_split(output_directory: str, class_mapping: dict):
|
|
107
|
+
def _setup_directories_and_copy_images(input_directory: Path, temp_dir_path: Path):
|
|
212
108
|
"""
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
class_mapping (dict): Dictionary mapping class names to image file names.
|
|
109
|
+
Sets up temporary directories and copies images from input directory.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
tuple: (class_mapping dict, original_image_count int)
|
|
218
113
|
"""
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
114
|
+
images_path = temp_dir_path / "images"
|
|
115
|
+
images_path.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
print(f" Created temporary images directory: {images_path}")
|
|
222
117
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
118
|
+
class_mapping = {}
|
|
119
|
+
total_copied = 0
|
|
120
|
+
|
|
121
|
+
print(" Scanning input directory for class folders...")
|
|
122
|
+
class_folders = [d for d in input_directory.iterdir() if d.is_dir()]
|
|
123
|
+
print(f" Found {len(class_folders)} class folders")
|
|
124
|
+
|
|
125
|
+
for folder_directory in class_folders:
|
|
126
|
+
images_names = []
|
|
127
|
+
if folder_directory.is_dir():
|
|
128
|
+
folder_name = folder_directory.name
|
|
129
|
+
image_files = list(folder_directory.glob("*.jpg"))
|
|
130
|
+
print(f" Copying {len(image_files)} images from class '{folder_name}'...")
|
|
131
|
+
|
|
132
|
+
for image_file in image_files:
|
|
133
|
+
shutil.copy(image_file, images_path)
|
|
134
|
+
image_name = image_file.name
|
|
135
|
+
images_names.append(image_name)
|
|
136
|
+
total_copied += 1
|
|
137
|
+
|
|
138
|
+
class_mapping[folder_name] = images_names
|
|
139
|
+
print(f" ✓ {len(images_names)} images copied for class '{folder_name}'")
|
|
140
|
+
|
|
141
|
+
original_image_count = len(list(images_path.glob("*.jpg"))) + len(list(images_path.glob("*.jpeg")))
|
|
142
|
+
print(f" Total images in temporary directory: {original_image_count}")
|
|
143
|
+
|
|
144
|
+
return class_mapping, original_image_count
|
|
145
|
+
|
|
146
|
+
def _prepare_model_and_clean_images(temp_dir_path: Path):
|
|
248
147
|
"""
|
|
249
|
-
|
|
250
|
-
cropping images according to their YOLO labels but preserving original class structure.
|
|
148
|
+
Cleans corrupted images and downloads/prepares the YOLO model.
|
|
251
149
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
labels_directory (str): Path to the directory containing YOLO label files.
|
|
255
|
-
output_directory (str): Path to the output directory where train and valid splits will be created.
|
|
256
|
-
class_mapping (dict): Dictionary mapping class names to image file names.
|
|
150
|
+
Returns:
|
|
151
|
+
Path: weights_path for the YOLO model
|
|
257
152
|
"""
|
|
258
|
-
|
|
259
|
-
labels_directory = Path(labels_directory)
|
|
260
|
-
output_directory = Path(output_directory)
|
|
153
|
+
images_path = temp_dir_path / "images"
|
|
261
154
|
|
|
262
|
-
#
|
|
263
|
-
|
|
264
|
-
|
|
155
|
+
# Clean corrupted images
|
|
156
|
+
print(" Checking for corrupted images...")
|
|
157
|
+
images_before = len(list(images_path.glob("*.jpg")))
|
|
158
|
+
__delete_corrupted_images(images_path)
|
|
159
|
+
images_after = len(list(images_path.glob("*.jpg")))
|
|
160
|
+
deleted_count = images_before - images_after
|
|
161
|
+
print(f" ✓ Cleaned {deleted_count} corrupted images ({images_after} images remain)")
|
|
265
162
|
|
|
266
|
-
|
|
267
|
-
|
|
163
|
+
# Setup model weights
|
|
164
|
+
current_dir = Path(__file__).resolve().parent
|
|
165
|
+
weights_path = current_dir / 'v11small-generic.pt'
|
|
166
|
+
github_release_url = 'https://github.com/Tvenver/Bplusplus/releases/download/v1.2.3/v11small-generic.pt'
|
|
268
167
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
(
|
|
272
|
-
(
|
|
273
|
-
print(f"
|
|
168
|
+
print(f" Checking for YOLO model weights at: {weights_path}")
|
|
169
|
+
if not weights_path.exists():
|
|
170
|
+
print(" Model weights not found, downloading from GitHub...")
|
|
171
|
+
__download_file_from_github_release(github_release_url, weights_path)
|
|
172
|
+
print(f" ✓ Model weights downloaded successfully")
|
|
173
|
+
else:
|
|
174
|
+
print(" ✓ Model weights already exist")
|
|
274
175
|
|
|
275
|
-
#
|
|
276
|
-
|
|
176
|
+
# Add all required classes to safe globals
|
|
177
|
+
serialization.add_safe_globals([
|
|
178
|
+
DetectionModel, Sequential, Conv, Conv2d, BatchNorm2d,
|
|
179
|
+
SiLU, ReLU, LeakyReLU, MaxPool2d, Linear, Dropout, Upsample,
|
|
180
|
+
Module, ModuleList, ModuleDict,
|
|
181
|
+
Bottleneck, C2f, SPPF, Detect, Concat, DFL,
|
|
182
|
+
# Add torch internal classes
|
|
183
|
+
torch.nn.parameter.Parameter,
|
|
184
|
+
torch.Tensor,
|
|
185
|
+
torch._utils._rebuild_tensor_v2,
|
|
186
|
+
torch._utils._rebuild_parameter
|
|
187
|
+
])
|
|
277
188
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
189
|
+
return weights_path
|
|
190
|
+
|
|
191
|
+
def _run_yolo_inference(temp_dir_path: Path, weights_path: Path):
|
|
192
|
+
"""
|
|
193
|
+
Runs YOLO inference on all images to generate labels.
|
|
281
194
|
|
|
282
|
-
|
|
195
|
+
Returns:
|
|
196
|
+
Path: labels_path where the generated labels are stored
|
|
197
|
+
"""
|
|
198
|
+
images_path = temp_dir_path / "images"
|
|
199
|
+
labels_path = temp_dir_path / "predict" / "labels"
|
|
283
200
|
|
|
284
|
-
|
|
285
|
-
print(f"
|
|
201
|
+
try:
|
|
202
|
+
print(f" Loading YOLO model from: {weights_path}")
|
|
203
|
+
model = YOLO(weights_path)
|
|
204
|
+
print(" ✓ YOLO model loaded successfully")
|
|
286
205
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
206
|
+
# Get list of all image files
|
|
207
|
+
image_files = list(images_path.glob('*.jpg'))
|
|
208
|
+
print(f" Found {len(image_files)} images to process with YOLO")
|
|
209
|
+
|
|
210
|
+
# Ensure predict directory exists
|
|
211
|
+
predict_dir = temp_dir_path / "predict"
|
|
212
|
+
predict_dir.mkdir(exist_ok=True)
|
|
213
|
+
labels_path.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
print(f" Created prediction output directory: {predict_dir}")
|
|
215
|
+
|
|
216
|
+
result_count = 0
|
|
217
|
+
error_count = 0
|
|
218
|
+
|
|
219
|
+
print(" Starting YOLO inference...")
|
|
220
|
+
print(f" Progress: 0/{len(image_files)} images processed", end="", flush=True)
|
|
221
|
+
|
|
222
|
+
for i, img_path in enumerate(image_files, 1):
|
|
300
223
|
try:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
img_width, img_height = img.size
|
|
313
|
-
x_min = int((x_center - width/2) * img_width)
|
|
314
|
-
y_min = int((y_center - height/2) * img_height)
|
|
315
|
-
x_max = int((x_center + width/2) * img_width)
|
|
316
|
-
y_max = int((y_center + height/2) * img_height)
|
|
317
|
-
|
|
318
|
-
x_min = max(0, x_min)
|
|
319
|
-
y_min = max(0, y_min)
|
|
320
|
-
x_max = min(img_width, x_max)
|
|
321
|
-
y_max = min(img_height, y_max)
|
|
322
|
-
|
|
323
|
-
img = img.crop((x_min, y_min, x_max, y_max))
|
|
224
|
+
results = model.predict(
|
|
225
|
+
source=str(img_path),
|
|
226
|
+
conf=0.35,
|
|
227
|
+
save=True,
|
|
228
|
+
save_txt=True,
|
|
229
|
+
project=temp_dir_path,
|
|
230
|
+
name="predict",
|
|
231
|
+
exist_ok=True,
|
|
232
|
+
verbose=False # Set to False to reduce YOLO's own output
|
|
233
|
+
)
|
|
324
234
|
|
|
325
|
-
|
|
326
|
-
if img_width < img_height:
|
|
327
|
-
# Width is smaller, make it 40
|
|
328
|
-
new_width = 40
|
|
329
|
-
new_height = int((img_height / img_width) * 40)
|
|
330
|
-
else:
|
|
331
|
-
# Height is smaller, make it 40
|
|
332
|
-
new_height = 40
|
|
333
|
-
new_width = int((img_width / img_height) * 40)
|
|
235
|
+
result_count += 1
|
|
334
236
|
|
|
335
|
-
#
|
|
336
|
-
|
|
237
|
+
# Update progress every 10% or every 100 images, whichever is smaller
|
|
238
|
+
update_interval = max(1, min(100, len(image_files) // 10))
|
|
239
|
+
if i % update_interval == 0 or i == len(image_files):
|
|
240
|
+
print(f"\r Progress: {i}/{len(image_files)} images processed", end="", flush=True)
|
|
337
241
|
|
|
338
|
-
valid_images.append((image_path, img, class_name))
|
|
339
242
|
except Exception as e:
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
for image_set, dest_dir in [(train_images, train_dir), (valid_images, valid_dir)]:
|
|
354
|
-
for orig_file, img, class_name in image_set:
|
|
355
|
-
output_path = dest_dir / class_name / (orig_file.stem + '.jpg')
|
|
243
|
+
error_count += 1
|
|
244
|
+
print(f"\n Error processing {img_path.name}: {e}")
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
print() # New line after progress
|
|
248
|
+
print(f" ✓ YOLO inference completed: {result_count} successful, {error_count} failed")
|
|
249
|
+
|
|
250
|
+
# Verify labels were created
|
|
251
|
+
label_files = list(labels_path.glob("*.txt"))
|
|
252
|
+
print(f" Generated {len(label_files)} label files")
|
|
253
|
+
|
|
254
|
+
if len(label_files) == 0:
|
|
255
|
+
print("WARNING: No label files were created by the model prediction!")
|
|
356
256
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
img.save(output_path, format='JPEG', quality=95)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
print(f"Error during model prediction setup: {e}")
|
|
259
|
+
import traceback
|
|
260
|
+
traceback.print_exc()
|
|
362
261
|
|
|
363
|
-
|
|
364
|
-
print(f"\nData split complete. Images saved to train and validation sets in {output_directory}")
|
|
365
|
-
for class_name in class_mapping:
|
|
366
|
-
train_count = len(list((train_dir / class_name).glob('*.*')))
|
|
367
|
-
valid_count = len(list((valid_dir / class_name).glob('*.*')))
|
|
368
|
-
print(f" - {class_name}: {train_count} images in train, {valid_count} images in valid")
|
|
262
|
+
return labels_path
|
|
369
263
|
|
|
370
|
-
def
|
|
264
|
+
def _cleanup_and_process_labels(temp_dir_path: Path, labels_path: Path, class_mapping: dict):
|
|
371
265
|
"""
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
labels_path (Path): The path to the directory containing labels.
|
|
377
|
-
sizes (list): A list of sizes to filter by.
|
|
266
|
+
Cleans up orphaned images and invalid labels, then creates class index mapping.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
dict: class_idxs mapping class indices to class names
|
|
378
270
|
"""
|
|
379
|
-
|
|
380
|
-
"small": [0, 0.15],
|
|
381
|
-
"medium": [0.15, 0.3],
|
|
382
|
-
"large": [0.3, 1],
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
filtered_images = []
|
|
386
|
-
for image_file in images_path.glob("*.jpg"):
|
|
387
|
-
label_file = labels_path / (image_file.stem + ".txt")
|
|
388
|
-
image_name = image_file.name
|
|
389
|
-
|
|
390
|
-
if label_file.exists():
|
|
391
|
-
with open(label_file, 'r') as file:
|
|
392
|
-
lines = file.readlines()
|
|
393
|
-
if len(lines) != 1:
|
|
394
|
-
continue
|
|
395
|
-
else:
|
|
396
|
-
parts = lines[0].split()
|
|
397
|
-
_, _, width, height = map(float, parts[1:])
|
|
398
|
-
for size in sizes:
|
|
399
|
-
if width < size_map[size][1] and width >= size_map[size][0] and height < size_map[size][1] and height >= size_map[size][0]:
|
|
400
|
-
filtered_images.append(image_name)
|
|
271
|
+
images_path = temp_dir_path / "images"
|
|
401
272
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
273
|
+
print(" Cleaning up orphaned images and labels...")
|
|
274
|
+
images_before = len(list(images_path.glob("*.jpg")))
|
|
275
|
+
labels_before = len(list(labels_path.glob("*.txt")))
|
|
276
|
+
|
|
277
|
+
__delete_orphaned_images_and_inferences(images_path, labels_path)
|
|
278
|
+
__delete_invalid_txt_files(images_path, labels_path)
|
|
279
|
+
|
|
280
|
+
images_after = len(list(images_path.glob("*.jpg")))
|
|
281
|
+
labels_after = len(list(labels_path.glob("*.txt")))
|
|
282
|
+
|
|
283
|
+
deleted_images = images_before - images_after
|
|
284
|
+
deleted_labels = labels_before - labels_after
|
|
285
|
+
print(f" ✓ Cleaned up {deleted_images} orphaned images and {deleted_labels} invalid labels")
|
|
286
|
+
print(f" Final counts: {images_after} images, {labels_after} valid labels")
|
|
287
|
+
|
|
288
|
+
# Create class index mapping for classification
|
|
289
|
+
class_idxs = {}
|
|
290
|
+
for idx, class_name in enumerate(class_mapping.keys()):
|
|
291
|
+
class_idxs[idx] = class_name
|
|
292
|
+
|
|
293
|
+
print(f" Created class mapping for {len(class_idxs)} classes: {list(class_idxs.values())}")
|
|
294
|
+
|
|
295
|
+
return class_idxs
|
|
411
296
|
|
|
412
|
-
def
|
|
297
|
+
def _finalize_dataset(class_mapping: dict, temp_dir_path: Path, output_directory: Path,
|
|
298
|
+
class_idxs: dict, original_image_count: int, img_size: int):
|
|
413
299
|
"""
|
|
414
|
-
|
|
300
|
+
Finalizes the dataset by creating cropped classification images and splitting into train/valid sets.
|
|
415
301
|
"""
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
302
|
+
# Split data into train/valid with cropped classification images
|
|
303
|
+
__classification_split(class_mapping, temp_dir_path, output_directory, img_size)
|
|
304
|
+
|
|
305
|
+
# Generate final report
|
|
306
|
+
print(" Generating final statistics...")
|
|
307
|
+
final_image_count = count_images_across_splits(output_directory)
|
|
308
|
+
print(f" Dataset Statistics:")
|
|
309
|
+
print(f" - Original images: {original_image_count}")
|
|
310
|
+
print(f" - Final cropped images: {final_image_count}")
|
|
311
|
+
print(f" - Success rate: {final_image_count/original_image_count*100:.1f}%")
|
|
312
|
+
print(f" - Output directory: {output_directory}")
|
|
422
313
|
|
|
423
314
|
def __delete_corrupted_images(images_path: Path):
|
|
424
315
|
|
|
@@ -500,7 +391,7 @@ def __delete_orphaned_images_and_inferences(images_path: Path, labels_path: Path
|
|
|
500
391
|
# print(f"Deleting orphaned image: {image_file.name}")
|
|
501
392
|
image_file.unlink()
|
|
502
393
|
|
|
503
|
-
|
|
394
|
+
|
|
504
395
|
|
|
505
396
|
def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
|
|
506
397
|
|
|
@@ -535,291 +426,188 @@ def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
|
|
|
535
426
|
image_file_jpeg.unlink()
|
|
536
427
|
# print(f"Deleted corresponding image file: {image_file_jpeg.name}")
|
|
537
428
|
|
|
538
|
-
print("Invalid text files and their corresponding images files have been deleted.")
|
|
539
429
|
|
|
540
430
|
|
|
541
|
-
def __split_data(class_mapping: dict, temp_dir_path: Path, output_directory: Path):
|
|
542
|
-
"""
|
|
543
|
-
Splits the data into train, test, and validation sets.
|
|
544
431
|
|
|
432
|
+
def __classification_split(class_mapping: dict, temp_dir_path: Path, output_directory: Path, img_size: int):
|
|
433
|
+
"""
|
|
434
|
+
Splits the data into train and validation sets for classification tasks,
|
|
435
|
+
cropping images according to their YOLO labels but preserving original class structure.
|
|
436
|
+
|
|
545
437
|
Args:
|
|
546
438
|
class_mapping (dict): A dictionary mapping class names to image file names.
|
|
547
439
|
temp_dir_path (Path): The path to the temporary directory containing the images.
|
|
548
|
-
output_directory (Path): The path to the output directory where
|
|
440
|
+
output_directory (Path): The path to the output directory where train and valid splits will be created.
|
|
441
|
+
img_size (int): The target size for the smallest dimension of cropped images.
|
|
549
442
|
"""
|
|
550
443
|
images_dir = temp_dir_path / "images"
|
|
551
444
|
labels_dir = temp_dir_path / "predict" / "labels"
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
for
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
valid_files = files[train_count + test_count:]
|
|
585
|
-
|
|
586
|
-
copy_files(train_files, "train")
|
|
587
|
-
copy_files(test_files, "test")
|
|
588
|
-
copy_files(valid_files, "valid")
|
|
589
|
-
|
|
590
|
-
print("Data has been split into train, test, and valid.")
|
|
591
|
-
|
|
592
|
-
def __save_class_idx_to_file(class_idxs: dict, output_directory: Path):
|
|
593
|
-
"""
|
|
594
|
-
Saves the class indices to a file.
|
|
595
|
-
|
|
596
|
-
Args:
|
|
597
|
-
class_idxs (dict): A dictionary mapping class names to class indices.
|
|
598
|
-
output_directory (Path): The path to the output directory where the class index file will be saved.
|
|
599
|
-
"""
|
|
600
|
-
class_idx_file = output_directory / "class_idx.txt"
|
|
601
|
-
with open(class_idx_file, 'w') as f:
|
|
602
|
-
for class_name, idx in class_idxs.items():
|
|
603
|
-
f.write(f"{class_name}: {idx}\n")
|
|
604
|
-
print(f"Class indices have been saved to {class_idx_file}")
|
|
605
|
-
|
|
606
|
-
def __generate_sample_images_with_detections(main_dir: Path, class_idxs: dict):
|
|
607
|
-
|
|
608
|
-
"""
|
|
609
|
-
Generates one sample image with multiple detections for each of train, test, valid, combining up to 6 images in one output.
|
|
610
|
-
|
|
611
|
-
Args:
|
|
612
|
-
main_dir (str): The main directory containing the train, test, and valid splits.
|
|
613
|
-
"""
|
|
614
|
-
|
|
615
|
-
def resize_and_contain(image, target_size):
|
|
616
|
-
image.thumbnail(target_size, Image.LANCZOS)
|
|
617
|
-
new_image = Image.new("RGB", target_size, (0, 0, 0))
|
|
618
|
-
new_image.paste(image, ((target_size[0] - image.width) // 2, (target_size[1] - image.height) // 2))
|
|
619
|
-
return new_image
|
|
620
|
-
|
|
621
|
-
def draw_bounding_boxes(image, labels_path, class_mapping, color_map):
|
|
622
|
-
draw = ImageDraw.Draw(image)
|
|
623
|
-
img_width, img_height = image.size
|
|
624
|
-
try:
|
|
625
|
-
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 20)
|
|
626
|
-
except IOError:
|
|
627
|
-
font = ImageFont.load_default()
|
|
628
|
-
|
|
629
|
-
if labels_path.exists():
|
|
630
|
-
with open(labels_path, 'r') as label_file:
|
|
631
|
-
for line in label_file.readlines():
|
|
632
|
-
parts = line.strip().split()
|
|
633
|
-
class_idx = int(parts[0])
|
|
634
|
-
center_x, center_y, width, height = map(float, parts[1:])
|
|
635
|
-
x_min = int((center_x - width / 2) * img_width)
|
|
636
|
-
y_min = int((center_y - height / 2) * img_height)
|
|
637
|
-
x_max = int((center_x + width / 2) * img_width)
|
|
638
|
-
y_max = int((center_y + height / 2) * img_height)
|
|
639
|
-
class_name = class_mapping.get(class_idx, str(class_idx))
|
|
640
|
-
color = color_map[class_idx]
|
|
641
|
-
draw.rectangle([x_min, y_min, x_max, y_max], outline=color, width=3)
|
|
642
|
-
draw.text((x_min, y_min - 20), class_name, fill=color, font=font)
|
|
643
|
-
return image
|
|
644
|
-
|
|
645
|
-
def combine_images(images, grid_size=(3, 2), target_size=(416, 416)):
|
|
646
|
-
resized_images = [resize_and_contain(img, target_size) for img in images]
|
|
647
|
-
width, height = target_size
|
|
648
|
-
combined_image = Image.new('RGB', (width * grid_size[0], height * grid_size[1]))
|
|
649
|
-
|
|
650
|
-
for i, img in enumerate(resized_images):
|
|
651
|
-
row = i // grid_size[0]
|
|
652
|
-
col = i % grid_size[0]
|
|
653
|
-
combined_image.paste(img, (col * width, row * height))
|
|
445
|
+
|
|
446
|
+
# Create train and valid directories
|
|
447
|
+
train_dir = output_directory / 'train'
|
|
448
|
+
valid_dir = output_directory / 'valid'
|
|
449
|
+
|
|
450
|
+
train_dir.mkdir(parents=True, exist_ok=True)
|
|
451
|
+
valid_dir.mkdir(parents=True, exist_ok=True)
|
|
452
|
+
|
|
453
|
+
# Create class directories based on class_mapping
|
|
454
|
+
print(f" Creating train and validation directories for {len(class_mapping)} classes...")
|
|
455
|
+
for class_name in class_mapping:
|
|
456
|
+
(train_dir / class_name).mkdir(exist_ok=True)
|
|
457
|
+
(valid_dir / class_name).mkdir(exist_ok=True)
|
|
458
|
+
print(f" ✓ Created directories for class: {class_name}")
|
|
459
|
+
|
|
460
|
+
# Process each class folder and its images
|
|
461
|
+
valid_images = []
|
|
462
|
+
|
|
463
|
+
# First, collect all valid label files
|
|
464
|
+
valid_label_stems = {label_file.stem for label_file in labels_dir.glob("*.txt")
|
|
465
|
+
if label_file.exists() and os.path.getsize(label_file) > 0}
|
|
466
|
+
|
|
467
|
+
print(f" Found {len(valid_label_stems)} valid label files for cropping")
|
|
468
|
+
|
|
469
|
+
print(" Starting image cropping and resizing...")
|
|
470
|
+
total_processed = 0
|
|
471
|
+
total_valid = 0
|
|
472
|
+
|
|
473
|
+
for class_name, image_names in class_mapping.items():
|
|
474
|
+
print(f" Processing class '{class_name}' ({len(image_names)} images)...")
|
|
475
|
+
class_processed = 0
|
|
476
|
+
class_valid = 0
|
|
654
477
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
478
|
+
for image_name in image_names:
|
|
479
|
+
# Check if the image exists in the images directory
|
|
480
|
+
image_path = images_dir / image_name
|
|
481
|
+
class_processed += 1
|
|
482
|
+
total_processed += 1
|
|
483
|
+
|
|
484
|
+
if not image_path.exists():
|
|
485
|
+
continue
|
|
486
|
+
|
|
487
|
+
# Skip images that don't have a valid label
|
|
488
|
+
if image_path.stem not in valid_label_stems:
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
label_file = labels_dir / (image_path.stem + '.txt')
|
|
492
|
+
|
|
493
|
+
try:
|
|
494
|
+
img = Image.open(image_path)
|
|
495
|
+
|
|
496
|
+
if label_file.exists():
|
|
497
|
+
# If label exists, crop the image
|
|
498
|
+
with open(label_file, 'r') as f:
|
|
499
|
+
lines = f.readlines()
|
|
500
|
+
if lines:
|
|
501
|
+
parts = lines[0].strip().split()
|
|
502
|
+
if len(parts) >= 5:
|
|
503
|
+
x_center, y_center, width, height = map(float, parts[1:5])
|
|
504
|
+
|
|
505
|
+
img_width, img_height = img.size
|
|
506
|
+
x_min = int((x_center - width/2) * img_width)
|
|
507
|
+
y_min = int((y_center - height/2) * img_height)
|
|
508
|
+
x_max = int((x_center + width/2) * img_width)
|
|
509
|
+
y_max = int((y_center + height/2) * img_height)
|
|
510
|
+
|
|
511
|
+
x_min = max(0, x_min)
|
|
512
|
+
y_min = max(0, y_min)
|
|
513
|
+
x_max = min(img_width, x_max)
|
|
514
|
+
y_max = min(img_height, y_max)
|
|
515
|
+
|
|
516
|
+
img = img.crop((x_min, y_min, x_max, y_max))
|
|
517
|
+
|
|
518
|
+
img_width, img_height = img.size
|
|
519
|
+
if img_width < img_height:
|
|
520
|
+
# Width is smaller, set to img_size
|
|
521
|
+
new_width = img_size
|
|
522
|
+
new_height = int((img_height / img_width) * img_size)
|
|
523
|
+
else:
|
|
524
|
+
# Height is smaller, set to img_size
|
|
525
|
+
new_height = img_size
|
|
526
|
+
new_width = int((img_width / img_height) * img_size)
|
|
527
|
+
|
|
528
|
+
# Resize the image
|
|
529
|
+
img = img.resize((new_width, new_height), Image.LANCZOS)
|
|
530
|
+
|
|
531
|
+
valid_images.append((image_path, img, class_name))
|
|
532
|
+
class_valid += 1
|
|
533
|
+
total_valid += 1
|
|
534
|
+
except Exception as e:
|
|
535
|
+
print(f" Error processing {image_path}: {e}")
|
|
536
|
+
|
|
537
|
+
print(f" ✓ Class '{class_name}': {class_valid} valid images from {class_processed} processed")
|
|
538
|
+
|
|
539
|
+
print(f" ✓ Successfully processed {total_valid} valid images from {total_processed} total images")
|
|
540
|
+
|
|
541
|
+
# Shuffle and split images
|
|
542
|
+
print(" Shuffling and splitting images into train/validation sets...")
|
|
543
|
+
random.shuffle(valid_images)
|
|
544
|
+
split_idx = int(len(valid_images) * 0.9)
|
|
545
|
+
train_images = valid_images[:split_idx]
|
|
546
|
+
valid_images_split = valid_images[split_idx:]
|
|
547
|
+
|
|
548
|
+
print(f" Split: {len(train_images)} training images, {len(valid_images_split)} validation images")
|
|
549
|
+
|
|
550
|
+
# Save images to train/valid directories
|
|
551
|
+
print(" Saving cropped and resized images...")
|
|
552
|
+
saved_train = 0
|
|
553
|
+
saved_valid = 0
|
|
554
|
+
|
|
555
|
+
for image_set, dest_dir, split_name in [(train_images, train_dir, "train"), (valid_images_split, valid_dir, "valid")]:
|
|
556
|
+
print(f" Saving {len(image_set)} images to {split_name} set...")
|
|
557
|
+
for orig_file, img, class_name in image_set:
|
|
558
|
+
output_path = dest_dir / class_name / (orig_file.stem + '.jpg')
|
|
559
|
+
|
|
560
|
+
# Convert any non-RGB mode to RGB before saving
|
|
561
|
+
if img.mode != 'RGB':
|
|
562
|
+
img = img.convert('RGB')
|
|
563
|
+
|
|
564
|
+
img.save(output_path, format='JPEG', quality=95)
|
|
565
|
+
|
|
566
|
+
if split_name == "train":
|
|
567
|
+
saved_train += 1
|
|
568
|
+
else:
|
|
569
|
+
saved_valid += 1
|
|
570
|
+
|
|
571
|
+
print(f" ✓ Saved {saved_train} train images and {saved_valid} validation images")
|
|
572
|
+
|
|
573
|
+
# Print detailed summary table
|
|
574
|
+
print(f" Final dataset summary:")
|
|
575
|
+
print()
|
|
576
|
+
|
|
577
|
+
# Calculate column widths for proper alignment
|
|
578
|
+
max_class_name_length = max(len(class_name) for class_name in class_mapping.keys())
|
|
579
|
+
class_col_width = max(max_class_name_length, len("Class"))
|
|
580
|
+
|
|
581
|
+
# Print table header
|
|
582
|
+
print(f" {'Class':<{class_col_width}} | {'Train':<7} | {'Valid':<7} | {'Total':<7}")
|
|
583
|
+
print(f" {'-' * class_col_width}-+-{'-' * 7}-+-{'-' * 7}-+-{'-' * 7}")
|
|
584
|
+
|
|
585
|
+
# Print data for each class and calculate totals
|
|
586
|
+
total_train = 0
|
|
587
|
+
total_valid = 0
|
|
588
|
+
total_overall = 0
|
|
589
|
+
|
|
590
|
+
for class_name in sorted(class_mapping.keys()): # Sort for consistent output
|
|
591
|
+
train_count = len(list((train_dir / class_name).glob('*.*')))
|
|
592
|
+
valid_count = len(list((valid_dir / class_name).glob('*.*')))
|
|
593
|
+
class_total = train_count + valid_count
|
|
672
594
|
|
|
673
|
-
|
|
674
|
-
for image_file in image_files[:6]:
|
|
675
|
-
label_file = labels_dir / (image_file.stem + '.txt')
|
|
676
|
-
image = Image.open(image_file)
|
|
677
|
-
image_with_boxes = draw_bounding_boxes(image, label_file, class_mapping, color_map)
|
|
678
|
-
sample_images.append(image_with_boxes)
|
|
595
|
+
print(f" {class_name:<{class_col_width}} | {train_count:<7} | {valid_count:<7} | {class_total:<7}")
|
|
679
596
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
combined_image.save(combined_image_path)
|
|
597
|
+
total_train += train_count
|
|
598
|
+
total_valid += valid_count
|
|
599
|
+
total_overall += class_total
|
|
684
600
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
""
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
Args:
|
|
691
|
-
temp_dir_path (Path): The path to the temporary directory containing the background images.
|
|
692
|
-
output_directory (Path): The path to the output directory where the split background images will be saved.
|
|
693
|
-
"""
|
|
694
|
-
|
|
695
|
-
image_files = list(Path(background_dir).glob("*.jpg"))
|
|
696
|
-
random.shuffle(image_files)
|
|
697
|
-
|
|
698
|
-
num_images = len(image_files)
|
|
699
|
-
train_split = int(0.8 * num_images)
|
|
700
|
-
valid_split = int(0.1 * num_images)
|
|
701
|
-
|
|
702
|
-
train_files = image_files[:train_split]
|
|
703
|
-
valid_files = image_files[train_split:train_split + valid_split]
|
|
704
|
-
test_files = image_files[train_split + valid_split:]
|
|
705
|
-
|
|
706
|
-
def copy_files(image_list, split):
|
|
707
|
-
for image_file in image_list:
|
|
708
|
-
shutil.copy(image_file, Path(output_directory) / split / 'images' / image_file.name)
|
|
709
|
-
|
|
710
|
-
label_file = Path(output_directory) / split / 'labels' / (image_file.stem + ".txt")
|
|
711
|
-
label_file.touch()
|
|
712
|
-
|
|
713
|
-
copy_files(train_files, 'train')
|
|
714
|
-
copy_files(valid_files, 'valid')
|
|
715
|
-
copy_files(test_files, 'test')
|
|
716
|
-
|
|
717
|
-
print(f"Background data has been split: {len(train_files)} train, {len(valid_files)} valid, {len(test_files)} test")
|
|
601
|
+
# Print totals row
|
|
602
|
+
print(f" {'-' * class_col_width}-+-{'-' * 7}-+-{'-' * 7}-+-{'-' * 7}")
|
|
603
|
+
print(f" {'TOTAL':<{class_col_width}} | {total_train:<7} | {total_valid:<7} | {total_overall:<7}")
|
|
604
|
+
print()
|
|
718
605
|
|
|
719
|
-
|
|
720
|
-
def __count_classes_and_output_table(output_directory: Path, class_idxs: dict):
|
|
721
|
-
"""
|
|
722
|
-
Counts the number of images per class and outputs a table.
|
|
723
|
-
|
|
724
|
-
Args:
|
|
725
|
-
output_directory (Path): The path to the output directory containing the split data.
|
|
726
|
-
class_idxs (dict): A dictionary mapping class indices to class names.
|
|
727
|
-
"""
|
|
728
|
-
|
|
729
|
-
def count_classes_in_split(labels_dir):
|
|
730
|
-
class_counts = defaultdict(int)
|
|
731
|
-
for label_file in os.listdir(labels_dir):
|
|
732
|
-
if label_file.endswith(".txt"):
|
|
733
|
-
label_path = os.path.join(labels_dir, label_file)
|
|
734
|
-
with open(label_path, 'r') as f:
|
|
735
|
-
lines = f.readlines()
|
|
736
|
-
if not lines:
|
|
737
|
-
# Count empty files as 'null' class (background images)
|
|
738
|
-
class_counts['null'] += 1
|
|
739
|
-
else:
|
|
740
|
-
for line in lines:
|
|
741
|
-
class_index = int(line.split()[0])
|
|
742
|
-
class_counts[class_index] += 1
|
|
743
|
-
return class_counts
|
|
744
|
-
|
|
745
|
-
splits = ['train', 'test', 'valid']
|
|
746
|
-
total_counts = defaultdict(int)
|
|
747
|
-
|
|
748
|
-
table = PrettyTable()
|
|
749
|
-
table.field_names = ["Class", "Class Index", "Train Count", "Test Count", "Valid Count", "Total"]
|
|
750
|
-
|
|
751
|
-
split_counts = {split: defaultdict(int) for split in splits}
|
|
752
|
-
|
|
753
|
-
for split in splits:
|
|
754
|
-
labels_dir = output_directory / split / 'labels'
|
|
755
|
-
if not os.path.exists(labels_dir):
|
|
756
|
-
print(f"Warning: {labels_dir} does not exist, skipping {split}.")
|
|
757
|
-
continue
|
|
758
|
-
|
|
759
|
-
class_counts = count_classes_in_split(labels_dir)
|
|
760
|
-
for class_index, count in class_counts.items():
|
|
761
|
-
split_counts[split][class_index] = count
|
|
762
|
-
total_counts[class_index] += count
|
|
763
|
-
|
|
764
|
-
for class_index, total in total_counts.items():
|
|
765
|
-
class_name = class_idxs.get(class_index, "Background" if class_index == 'null' else f"Class {class_index}")
|
|
766
|
-
train_count = split_counts['train'].get(class_index, 0)
|
|
767
|
-
test_count = split_counts['test'].get(class_index, 0)
|
|
768
|
-
valid_count = split_counts['valid'].get(class_index, 0)
|
|
769
|
-
table.add_row([class_name, class_index, train_count, test_count, valid_count, total])
|
|
770
|
-
|
|
771
|
-
print(table)
|
|
772
|
-
|
|
773
|
-
def update_labels(class_mapping: dict, labels_path: Path) -> dict:
|
|
774
|
-
"""
|
|
775
|
-
Updates the labels based on the class mapping.
|
|
776
|
-
|
|
777
|
-
Args:
|
|
778
|
-
class_mapping (dict): A dictionary mapping class names to image file names.
|
|
779
|
-
labels_path (Path): The path to the directory containing the label files.
|
|
780
|
-
|
|
781
|
-
Returns:
|
|
782
|
-
dict: A dictionary mapping class names to class indices.
|
|
783
|
-
"""
|
|
784
|
-
class_index_mapping = {}
|
|
785
|
-
class_index_definition = {}
|
|
786
|
-
|
|
787
|
-
for idx, (class_name, images) in enumerate(class_mapping.items()):
|
|
788
|
-
class_index_definition[idx] = class_name
|
|
789
|
-
for image_name in images:
|
|
790
|
-
class_index_mapping[image_name] = idx
|
|
791
|
-
|
|
792
|
-
for txt_file in labels_path.glob("*.txt"):
|
|
793
|
-
image_name_jpg = txt_file.stem + ".jpg"
|
|
794
|
-
image_name_jpeg = txt_file.stem + ".jpeg"
|
|
795
|
-
|
|
796
|
-
if image_name_jpg in class_index_mapping:
|
|
797
|
-
class_index = class_index_mapping[image_name_jpg]
|
|
798
|
-
elif image_name_jpeg in class_index_mapping:
|
|
799
|
-
class_index = class_index_mapping[image_name_jpeg]
|
|
800
|
-
else:
|
|
801
|
-
print(f"Warning: No corresponding image found for {txt_file.name}")
|
|
802
|
-
continue
|
|
803
|
-
|
|
804
|
-
with open(txt_file, 'r') as file:
|
|
805
|
-
lines = file.readlines()
|
|
806
|
-
|
|
807
|
-
updated_lines = []
|
|
808
|
-
for line in lines:
|
|
809
|
-
parts = line.split()
|
|
810
|
-
if len(parts) > 0:
|
|
811
|
-
parts[0] = str(class_index)
|
|
812
|
-
updated_lines.append(" ".join(parts))
|
|
813
|
-
|
|
814
|
-
with open(txt_file, 'w') as file:
|
|
815
|
-
file.write("\n".join(updated_lines))
|
|
816
|
-
|
|
817
|
-
print(f"Labels updated successfully")
|
|
818
|
-
return class_index_definition
|
|
606
|
+
print(f" ✓ Classification dataset created successfully at: {output_directory}")
|
|
819
607
|
|
|
820
608
|
def count_images_across_splits(output_directory: Path) -> int:
|
|
821
609
|
"""
|
|
822
|
-
Counts the total number of images across train
|
|
610
|
+
Counts the total number of images across train and validation splits for classification dataset.
|
|
823
611
|
|
|
824
612
|
Args:
|
|
825
613
|
output_directory (Path): The path to the output directory containing the split data.
|
|
@@ -828,33 +616,12 @@ def count_images_across_splits(output_directory: Path) -> int:
|
|
|
828
616
|
int: The total number of images across all splits.
|
|
829
617
|
"""
|
|
830
618
|
total_images = 0
|
|
831
|
-
for split in ['train', '
|
|
832
|
-
split_dir = output_directory / split
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
Args:
|
|
842
|
-
output_directory (Path): The path to the output directory where the YAML file will be saved.
|
|
843
|
-
class_idxs (dict): A dictionary mapping class indices to class names.
|
|
844
|
-
"""
|
|
845
|
-
|
|
846
|
-
# Define the structure of the YAML file
|
|
847
|
-
yaml_content = {
|
|
848
|
-
'path': str(output_directory.resolve()),
|
|
849
|
-
'train': 'train/images',
|
|
850
|
-
'val': 'valid/images',
|
|
851
|
-
'test': 'test/images',
|
|
852
|
-
'names': {idx: name for idx, name in class_idxs.items()}
|
|
853
|
-
}
|
|
854
|
-
|
|
855
|
-
# Write the YAML content to a file
|
|
856
|
-
yaml_file_path = output_directory / 'dataset.yaml'
|
|
857
|
-
with open(yaml_file_path, 'w') as yaml_file:
|
|
858
|
-
yaml.dump(yaml_content, yaml_file, default_flow_style=False, sort_keys=False)
|
|
859
|
-
|
|
860
|
-
print(f"YOLOv8 YAML file created at {yaml_file_path}")
|
|
619
|
+
for split in ['train', 'valid']:
|
|
620
|
+
split_dir = output_directory / split
|
|
621
|
+
if split_dir.exists():
|
|
622
|
+
# Count all images in all class subdirectories
|
|
623
|
+
for class_dir in split_dir.iterdir():
|
|
624
|
+
if class_dir.is_dir():
|
|
625
|
+
total_images += len(list(class_dir.glob("*.jpg"))) + len(list(class_dir.glob("*.jpeg")))
|
|
626
|
+
|
|
627
|
+
return total_images
|