bplusplus 1.2.1__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bplusplus might be problematic. Click here for more details.
- {bplusplus-1.2.1 → bplusplus-1.2.2}/PKG-INFO +14 -6
- {bplusplus-1.2.1 → bplusplus-1.2.2}/README.md +11 -4
- {bplusplus-1.2.1 → bplusplus-1.2.2}/pyproject.toml +1 -1
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/collect.py +2 -0
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/hierarchical/test.py +81 -83
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/hierarchical/train.py +28 -28
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/prepare.py +181 -59
- {bplusplus-1.2.1 → bplusplus-1.2.2}/LICENSE +0 -0
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/__init__.py +0 -0
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/resnet/test.py +0 -0
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/resnet/train.py +0 -0
- {bplusplus-1.2.1 → bplusplus-1.2.2}/src/bplusplus/train_validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: bplusplus
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: A simple method to create AI models for biodiversity, with collect and prepare pipeline
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Titus Venverloo
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
16
|
Requires-Dist: prettytable (==3.7.0)
|
|
16
17
|
Requires-Dist: pygbif (>=0.6.4,<0.7.0)
|
|
17
18
|
Requires-Dist: requests (==2.25.1)
|
|
@@ -111,14 +112,21 @@ This function takes three arguments:
|
|
|
111
112
|
- **sizes: list = None** - List of sizes to filter by. If None, all sizes will be used, ["large", "medium", "small"].
|
|
112
113
|
|
|
113
114
|
```python
|
|
114
|
-
# Prepare data
|
|
115
|
+
# Prepare data (one stage small insects)
|
|
115
116
|
bplusplus.prepare(
|
|
116
117
|
input_directory='/dataset/selected-species',
|
|
117
118
|
output_directory='/dataset/prepared-data',
|
|
118
|
-
with_background=
|
|
119
|
-
one_stage=
|
|
119
|
+
with_background=True,
|
|
120
|
+
one_stage=True,
|
|
120
121
|
size_filter=True,
|
|
121
|
-
sizes=["
|
|
122
|
+
sizes=["small"]
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Prepare data (two stage)
|
|
126
|
+
bplusplus.prepare(
|
|
127
|
+
input_directory='/dataset/selected-species',
|
|
128
|
+
output_directory='/dataset/prepared-data',
|
|
129
|
+
one_stage=False
|
|
122
130
|
)
|
|
123
131
|
```
|
|
124
132
|
|
|
@@ -87,14 +87,21 @@ This function takes three arguments:
|
|
|
87
87
|
- **sizes: list = None** - List of sizes to filter by. If None, all sizes will be used, ["large", "medium", "small"].
|
|
88
88
|
|
|
89
89
|
```python
|
|
90
|
-
# Prepare data
|
|
90
|
+
# Prepare data (one stage small insects)
|
|
91
91
|
bplusplus.prepare(
|
|
92
92
|
input_directory='/dataset/selected-species',
|
|
93
93
|
output_directory='/dataset/prepared-data',
|
|
94
|
-
with_background=
|
|
95
|
-
one_stage=
|
|
94
|
+
with_background=True,
|
|
95
|
+
one_stage=True,
|
|
96
96
|
size_filter=True,
|
|
97
|
-
sizes=["
|
|
97
|
+
sizes=["small"]
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Prepare data (two stage)
|
|
101
|
+
bplusplus.prepare(
|
|
102
|
+
input_directory='/dataset/selected-species',
|
|
103
|
+
output_directory='/dataset/prepared-data',
|
|
104
|
+
one_stage=False
|
|
98
105
|
)
|
|
99
106
|
```
|
|
100
107
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "bplusplus"
|
|
3
|
-
version = "1.2.
|
|
3
|
+
version = "1.2.2"
|
|
4
4
|
description = "A simple method to create AI models for biodiversity, with collect and prepare pipeline"
|
|
5
5
|
authors = ["Titus Venverloo <tvenver@mit.edu>", "Deniz Aydemir <deniz@aydemir.us>", "Orlando Closs <orlando.closs@wur.nl>", "Ase Hatveit <aase@mit.edu>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -114,6 +114,8 @@ def __next_batch(parameters: dict[str, Any], total_limit: int, offset: int, curr
|
|
|
114
114
|
parameters["limit"] = total_limit
|
|
115
115
|
parameters["offset"] = offset
|
|
116
116
|
parameters["mediaType"] = ["StillImage"]
|
|
117
|
+
parameters["basisOfRecord"] = ["HUMAN_OBSERVATION", "LIVING_SPECIMEN", "MACHINE_OBSERVATION", "OBSERVATION", "OCCURRENCE"]
|
|
118
|
+
parameters["lifeStage"] = ["Adult"]
|
|
117
119
|
search = pygbif.occurrences.search(**parameters)
|
|
118
120
|
occurrences = search["results"]
|
|
119
121
|
if search["endOfRecords"] or len(current) >= total_limit:
|
|
@@ -115,17 +115,17 @@ class HierarchicalInsectClassifier(nn.Module):
|
|
|
115
115
|
def get_taxonomy(species_list):
|
|
116
116
|
"""
|
|
117
117
|
Retrieves taxonomic information for a list of species from GBIF API.
|
|
118
|
-
Creates a hierarchical taxonomy dictionary with
|
|
118
|
+
Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
|
|
119
119
|
"""
|
|
120
120
|
taxonomy = {1: [], 2: {}, 3: {}}
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
species_to_genus = {}
|
|
122
|
+
genus_to_family = {}
|
|
123
123
|
|
|
124
124
|
logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
|
|
125
125
|
|
|
126
126
|
print("\nTaxonomy Results:")
|
|
127
127
|
print("-" * 80)
|
|
128
|
-
print(f"{'Species':<30} {'
|
|
128
|
+
print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
|
|
129
129
|
print("-" * 80)
|
|
130
130
|
|
|
131
131
|
for species_name in species_list:
|
|
@@ -136,23 +136,23 @@ def get_taxonomy(species_list):
|
|
|
136
136
|
|
|
137
137
|
if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
|
|
138
138
|
family = data.get('family')
|
|
139
|
-
|
|
139
|
+
genus = data.get('genus')
|
|
140
140
|
|
|
141
|
-
if family and
|
|
141
|
+
if family and genus:
|
|
142
142
|
status = "OK"
|
|
143
143
|
|
|
144
|
-
print(f"{species_name:<30} {
|
|
144
|
+
print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
|
|
145
145
|
|
|
146
|
-
|
|
147
|
-
|
|
146
|
+
species_to_genus[species_name] = genus
|
|
147
|
+
genus_to_family[genus] = family
|
|
148
148
|
|
|
149
|
-
if
|
|
150
|
-
taxonomy[1].append(
|
|
149
|
+
if family not in taxonomy[1]:
|
|
150
|
+
taxonomy[1].append(family)
|
|
151
151
|
|
|
152
|
-
taxonomy[2][
|
|
153
|
-
taxonomy[3][species_name] =
|
|
152
|
+
taxonomy[2][genus] = family
|
|
153
|
+
taxonomy[3][species_name] = genus
|
|
154
154
|
else:
|
|
155
|
-
error_msg = f"Species '{species_name}' found in GBIF but family and
|
|
155
|
+
error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
|
|
156
156
|
logger.error(error_msg)
|
|
157
157
|
print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
|
|
158
158
|
print(f"Error: {error_msg}")
|
|
@@ -174,24 +174,24 @@ def get_taxonomy(species_list):
|
|
|
174
174
|
taxonomy[1] = sorted(list(set(taxonomy[1])))
|
|
175
175
|
print("-" * 80)
|
|
176
176
|
|
|
177
|
-
|
|
178
|
-
|
|
177
|
+
num_families = len(taxonomy[1])
|
|
178
|
+
num_genera = len(taxonomy[2])
|
|
179
179
|
num_species = len(taxonomy[3])
|
|
180
180
|
|
|
181
|
-
print("\nOrder indices:")
|
|
182
|
-
for i, order in enumerate(taxonomy[1]):
|
|
183
|
-
print(f" {i}: {order}")
|
|
184
|
-
|
|
185
181
|
print("\nFamily indices:")
|
|
186
|
-
for i, family in enumerate(taxonomy[
|
|
182
|
+
for i, family in enumerate(taxonomy[1]):
|
|
187
183
|
print(f" {i}: {family}")
|
|
188
184
|
|
|
185
|
+
print("\nGenus indices:")
|
|
186
|
+
for i, genus in enumerate(taxonomy[2].keys()):
|
|
187
|
+
print(f" {i}: {genus}")
|
|
188
|
+
|
|
189
189
|
print("\nSpecies indices:")
|
|
190
190
|
for i, species in enumerate(species_list):
|
|
191
191
|
print(f" {i}: {species}")
|
|
192
192
|
|
|
193
|
-
logger.info(f"Taxonomy built: {
|
|
194
|
-
return taxonomy,
|
|
193
|
+
logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
|
|
194
|
+
return taxonomy, species_to_genus, genus_to_family
|
|
195
195
|
|
|
196
196
|
def create_mappings(taxonomy):
|
|
197
197
|
"""Create index mappings from taxonomy"""
|
|
@@ -244,12 +244,12 @@ class TestTwoStage:
|
|
|
244
244
|
saved_species = checkpoint["species_list"]
|
|
245
245
|
print(f"Saved model was trained on: {', '.join(saved_species)}")
|
|
246
246
|
|
|
247
|
-
taxonomy,
|
|
247
|
+
taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
|
|
248
248
|
else:
|
|
249
|
-
taxonomy,
|
|
249
|
+
taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
|
|
250
250
|
else:
|
|
251
251
|
state_dict = checkpoint
|
|
252
|
-
taxonomy,
|
|
252
|
+
taxonomy, species_to_genus, genus_to_family = get_taxonomy(species_names)
|
|
253
253
|
|
|
254
254
|
level_to_idx, idx_to_level = create_mappings(taxonomy)
|
|
255
255
|
|
|
@@ -259,8 +259,6 @@ class TestTwoStage:
|
|
|
259
259
|
if hasattr(taxonomy, "items"):
|
|
260
260
|
num_classes_per_level = [len(classes) if isinstance(classes, list) else len(classes.keys())
|
|
261
261
|
for level, classes in taxonomy.items()]
|
|
262
|
-
else:
|
|
263
|
-
num_classes_per_level = [4, 5, 9] # Example values, adjust as needed
|
|
264
262
|
|
|
265
263
|
print(f"Using model with class counts: {num_classes_per_level}")
|
|
266
264
|
|
|
@@ -296,8 +294,8 @@ class TestTwoStage:
|
|
|
296
294
|
print("Model successfully loaded")
|
|
297
295
|
print(f"Using species: {', '.join(species_names)}")
|
|
298
296
|
|
|
299
|
-
self.
|
|
300
|
-
self.
|
|
297
|
+
self.species_to_genus = species_to_genus
|
|
298
|
+
self.genus_to_family = genus_to_family
|
|
301
299
|
|
|
302
300
|
def get_frames(self, test_dir):
|
|
303
301
|
image_dir = os.path.join(test_dir, "images")
|
|
@@ -305,10 +303,10 @@ class TestTwoStage:
|
|
|
305
303
|
|
|
306
304
|
predicted_frames = []
|
|
307
305
|
predicted_family_frames = []
|
|
308
|
-
|
|
306
|
+
predicted_genus_frames = []
|
|
309
307
|
true_species_frames = []
|
|
310
308
|
true_family_frames = []
|
|
311
|
-
|
|
309
|
+
true_genus_frames = []
|
|
312
310
|
image_names = []
|
|
313
311
|
|
|
314
312
|
start_time = time.time() # Start timing
|
|
@@ -326,7 +324,7 @@ class TestTwoStage:
|
|
|
326
324
|
detections = results[0].boxes
|
|
327
325
|
predicted_frame = []
|
|
328
326
|
predicted_family_frame = []
|
|
329
|
-
|
|
327
|
+
predicted_genus_frame = []
|
|
330
328
|
|
|
331
329
|
if detections:
|
|
332
330
|
for box in detections:
|
|
@@ -346,13 +344,13 @@ class TestTwoStage:
|
|
|
346
344
|
outputs = self.classification_model(input_tensor)
|
|
347
345
|
|
|
348
346
|
# Get all taxonomic level predictions
|
|
349
|
-
|
|
350
|
-
|
|
347
|
+
family_output = outputs[0] # First output is family (level 1)
|
|
348
|
+
genus_output = outputs[1] # Second output is genus (level 2)
|
|
351
349
|
species_output = outputs[2] # Third output is species (level 3)
|
|
352
350
|
|
|
353
351
|
# Get prediction indices
|
|
354
|
-
order_idx = order_output.argmax(dim=1).item()
|
|
355
352
|
family_idx = family_output.argmax(dim=1).item()
|
|
353
|
+
genus_idx = genus_output.argmax(dim=1).item()
|
|
356
354
|
species_idx = species_output.argmax(dim=1).item()
|
|
357
355
|
|
|
358
356
|
img_height, img_width, _ = frame.shape
|
|
@@ -367,15 +365,15 @@ class TestTwoStage:
|
|
|
367
365
|
# Add predictions for each taxonomic level
|
|
368
366
|
predicted_frame.append([species_idx] + box_coords)
|
|
369
367
|
predicted_family_frame.append([family_idx] + box_coords)
|
|
370
|
-
|
|
368
|
+
predicted_genus_frame.append([genus_idx] + box_coords)
|
|
371
369
|
|
|
372
370
|
predicted_frames.append(predicted_frame if predicted_frame else [])
|
|
373
371
|
predicted_family_frames.append(predicted_family_frame if predicted_family_frame else [])
|
|
374
|
-
|
|
372
|
+
predicted_genus_frames.append(predicted_genus_frame if predicted_genus_frame else [])
|
|
375
373
|
|
|
376
374
|
true_species_frame = []
|
|
377
375
|
true_family_frame = []
|
|
378
|
-
|
|
376
|
+
true_genus_frame = []
|
|
379
377
|
|
|
380
378
|
if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
|
|
381
379
|
with open(label_path, 'r') as f:
|
|
@@ -389,22 +387,22 @@ class TestTwoStage:
|
|
|
389
387
|
if species_idx < len(self.species_names):
|
|
390
388
|
species_name = self.species_names[species_idx]
|
|
391
389
|
|
|
392
|
-
if species_name in self.
|
|
393
|
-
|
|
394
|
-
# Get the index of the
|
|
395
|
-
if 2 in self.level_to_idx and
|
|
396
|
-
|
|
397
|
-
|
|
390
|
+
if species_name in self.species_to_genus:
|
|
391
|
+
genus_name = self.species_to_genus[species_name]
|
|
392
|
+
# Get the index of the genus in the level_to_idx mapping
|
|
393
|
+
if 2 in self.level_to_idx and genus_name in self.level_to_idx[2]:
|
|
394
|
+
genus_idx = self.level_to_idx[2][genus_name]
|
|
395
|
+
true_genus_frame.append([genus_idx] + box_coords)
|
|
398
396
|
|
|
399
|
-
if
|
|
400
|
-
|
|
401
|
-
if 1 in self.level_to_idx and
|
|
402
|
-
|
|
403
|
-
|
|
397
|
+
if genus_name in self.genus_to_family:
|
|
398
|
+
family_name = self.genus_to_family[genus_name]
|
|
399
|
+
if 1 in self.level_to_idx and family_name in self.level_to_idx[1]:
|
|
400
|
+
family_idx = self.level_to_idx[1][family_name]
|
|
401
|
+
true_family_frame.append([family_idx] + box_coords)
|
|
404
402
|
|
|
405
403
|
true_species_frames.append(true_species_frame if true_species_frame else [])
|
|
406
404
|
true_family_frames.append(true_family_frame if true_family_frame else [])
|
|
407
|
-
|
|
405
|
+
true_genus_frames.append(true_genus_frame if true_genus_frame else [])
|
|
408
406
|
|
|
409
407
|
end_time = time.time() # End timing
|
|
410
408
|
|
|
@@ -416,42 +414,42 @@ class TestTwoStage:
|
|
|
416
414
|
writer.writerow([
|
|
417
415
|
"Image Name",
|
|
418
416
|
"True Species Detections",
|
|
417
|
+
"True Genus Detections",
|
|
419
418
|
"True Family Detections",
|
|
420
|
-
"True Order Detections",
|
|
421
419
|
"Species Detections",
|
|
422
|
-
"
|
|
423
|
-
"
|
|
420
|
+
"Genus Detections",
|
|
421
|
+
"Family Detections"
|
|
424
422
|
])
|
|
425
423
|
|
|
426
|
-
for image_name, true_species,
|
|
424
|
+
for image_name, true_species, true_genus, true_family, species_pred, genus_pred, family_pred in zip(
|
|
427
425
|
image_names,
|
|
428
426
|
true_species_frames,
|
|
427
|
+
true_genus_frames,
|
|
429
428
|
true_family_frames,
|
|
430
|
-
true_order_frames,
|
|
431
429
|
predicted_frames,
|
|
432
|
-
|
|
433
|
-
|
|
430
|
+
predicted_genus_frames,
|
|
431
|
+
predicted_family_frames
|
|
434
432
|
):
|
|
435
433
|
writer.writerow([
|
|
436
434
|
image_name,
|
|
437
435
|
true_species,
|
|
436
|
+
true_genus,
|
|
438
437
|
true_family,
|
|
439
|
-
true_order,
|
|
440
438
|
species_pred,
|
|
441
|
-
|
|
442
|
-
|
|
439
|
+
genus_pred,
|
|
440
|
+
family_pred
|
|
443
441
|
])
|
|
444
442
|
|
|
445
443
|
print(f"Results saved to {output_file}")
|
|
446
|
-
return predicted_frames, true_species_frames, end_time - start_time, predicted_family_frames,
|
|
444
|
+
return predicted_frames, true_species_frames, end_time - start_time, predicted_genus_frames, predicted_family_frames, true_genus_frames, true_family_frames
|
|
447
445
|
|
|
448
446
|
def run(self, test_dir):
|
|
449
447
|
results = self.get_frames(test_dir)
|
|
450
448
|
predicted_frames, true_species_frames, total_time = results[0], results[1], results[2]
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
449
|
+
predicted_genus_frames = results[3]
|
|
450
|
+
predicted_family_frames = results[4]
|
|
451
|
+
true_genus_frames = results[5]
|
|
452
|
+
true_family_frames = results[6]
|
|
455
453
|
|
|
456
454
|
num_frames = len(os.listdir(os.path.join(test_dir, 'images')))
|
|
457
455
|
avg_time_per_frame = total_time / num_frames
|
|
@@ -461,29 +459,29 @@ class TestTwoStage:
|
|
|
461
459
|
|
|
462
460
|
self.calculate_metrics(
|
|
463
461
|
predicted_frames, true_species_frames,
|
|
464
|
-
|
|
465
|
-
|
|
462
|
+
predicted_genus_frames, true_genus_frames,
|
|
463
|
+
predicted_family_frames, true_family_frames
|
|
466
464
|
)
|
|
467
465
|
|
|
468
466
|
def calculate_metrics(self, predicted_species_frames, true_species_frames,
|
|
469
|
-
|
|
470
|
-
|
|
467
|
+
predicted_genus_frames, true_genus_frames,
|
|
468
|
+
predicted_family_frames, true_family_frames):
|
|
471
469
|
"""Calculate metrics at all taxonomic levels"""
|
|
472
|
-
# Get list of species, families and
|
|
470
|
+
# Get list of species, families and genera
|
|
473
471
|
species_list = self.species_names
|
|
474
|
-
|
|
475
|
-
|
|
472
|
+
genus_list = sorted(list(set(self.species_to_genus.values())))
|
|
473
|
+
family_list = sorted(list(set(self.genus_to_family.values())))
|
|
476
474
|
|
|
477
475
|
# Print the index mappings we're using for evaluation
|
|
478
476
|
print("\nUsing the following index mappings for evaluation:")
|
|
479
|
-
print("\nOrder indices:")
|
|
480
|
-
for i, order in enumerate(order_list):
|
|
481
|
-
print(f" {i}: {order}")
|
|
482
|
-
|
|
483
477
|
print("\nFamily indices:")
|
|
484
478
|
for i, family in enumerate(family_list):
|
|
485
479
|
print(f" {i}: {family}")
|
|
486
480
|
|
|
481
|
+
print("\nGenus indices:")
|
|
482
|
+
for i, genus in enumerate(genus_list):
|
|
483
|
+
print(f" {i}: {genus}")
|
|
484
|
+
|
|
487
485
|
print("\nSpecies indices:")
|
|
488
486
|
for i, species in enumerate(species_list):
|
|
489
487
|
print(f" {i}: {species}")
|
|
@@ -491,11 +489,11 @@ class TestTwoStage:
|
|
|
491
489
|
# Dictionary to track prediction category counts for debugging
|
|
492
490
|
prediction_counts = {
|
|
493
491
|
"true_species_boxes": sum(len(frame) for frame in true_species_frames),
|
|
492
|
+
"true_genus_boxes": sum(len(frame) for frame in true_genus_frames),
|
|
494
493
|
"true_family_boxes": sum(len(frame) for frame in true_family_frames),
|
|
495
|
-
"true_order_boxes": sum(len(frame) for frame in true_order_frames),
|
|
496
494
|
"predicted_species": sum(len(frame) for frame in predicted_species_frames),
|
|
497
|
-
"
|
|
498
|
-
"
|
|
495
|
+
"predicted_genus": sum(len(frame) for frame in predicted_genus_frames),
|
|
496
|
+
"predicted_family": sum(len(frame) for frame in predicted_family_frames)
|
|
499
497
|
}
|
|
500
498
|
|
|
501
499
|
print(f"Prediction counts: {prediction_counts}")
|
|
@@ -504,11 +502,11 @@ class TestTwoStage:
|
|
|
504
502
|
print("\n=== Species-level Metrics ===")
|
|
505
503
|
self.get_metrics(predicted_species_frames, true_species_frames, species_list)
|
|
506
504
|
|
|
505
|
+
print("\n=== Genus-level Metrics ===")
|
|
506
|
+
self.get_metrics(predicted_genus_frames, true_genus_frames, genus_list)
|
|
507
|
+
|
|
507
508
|
print("\n=== Family-level Metrics ===")
|
|
508
509
|
self.get_metrics(predicted_family_frames, true_family_frames, family_list)
|
|
509
|
-
|
|
510
|
-
print("\n=== Order-level Metrics ===")
|
|
511
|
-
self.get_metrics(predicted_order_frames, true_order_frames, order_list)
|
|
512
510
|
|
|
513
511
|
def get_metrics(self, predicted_frames, true_frames, labels):
|
|
514
512
|
"""Calculate metrics for object detection predictions"""
|
|
@@ -144,17 +144,17 @@ def train_multitask(batch_size=4, epochs=30, patience=3, img_size=640, data_dir=
|
|
|
144
144
|
def get_taxonomy(species_list):
|
|
145
145
|
"""
|
|
146
146
|
Retrieves taxonomic information for a list of species from GBIF API.
|
|
147
|
-
Creates a hierarchical taxonomy dictionary with
|
|
147
|
+
Creates a hierarchical taxonomy dictionary with family, genus, and species relationships.
|
|
148
148
|
"""
|
|
149
149
|
taxonomy = {1: [], 2: {}, 3: {}}
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
species_to_genus = {}
|
|
151
|
+
genus_to_family = {}
|
|
152
152
|
|
|
153
153
|
logger.info(f"Building taxonomy from GBIF for {len(species_list)} species")
|
|
154
154
|
|
|
155
155
|
print("\nTaxonomy Results:")
|
|
156
156
|
print("-" * 80)
|
|
157
|
-
print(f"{'Species':<30} {'
|
|
157
|
+
print(f"{'Species':<30} {'Family':<20} {'Genus':<20} {'Status'}")
|
|
158
158
|
print("-" * 80)
|
|
159
159
|
|
|
160
160
|
for species_name in species_list:
|
|
@@ -165,23 +165,23 @@ def get_taxonomy(species_list):
|
|
|
165
165
|
|
|
166
166
|
if data.get('status') == 'ACCEPTED' or data.get('status') == 'SYNONYM':
|
|
167
167
|
family = data.get('family')
|
|
168
|
-
|
|
168
|
+
genus = data.get('genus')
|
|
169
169
|
|
|
170
|
-
if family and
|
|
170
|
+
if family and genus:
|
|
171
171
|
status = "OK"
|
|
172
172
|
|
|
173
|
-
print(f"{species_name:<30} {
|
|
173
|
+
print(f"{species_name:<30} {family:<20} {genus:<20} {status}")
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
|
|
175
|
+
species_to_genus[species_name] = genus
|
|
176
|
+
genus_to_family[genus] = family
|
|
177
177
|
|
|
178
|
-
if
|
|
179
|
-
taxonomy[1].append(
|
|
178
|
+
if family not in taxonomy[1]:
|
|
179
|
+
taxonomy[1].append(family)
|
|
180
180
|
|
|
181
|
-
taxonomy[2][
|
|
182
|
-
taxonomy[3][species_name] =
|
|
181
|
+
taxonomy[2][genus] = family
|
|
182
|
+
taxonomy[3][species_name] = genus
|
|
183
183
|
else:
|
|
184
|
-
error_msg = f"Species '{species_name}' found in GBIF but family and
|
|
184
|
+
error_msg = f"Species '{species_name}' found in GBIF but family and genus not found, could be spelling error in species, check GBIF"
|
|
185
185
|
logger.error(error_msg)
|
|
186
186
|
print(f"{species_name:<30} {'Not found':<20} {'Not found':<20} ERROR")
|
|
187
187
|
print(f"Error: {error_msg}")
|
|
@@ -203,23 +203,23 @@ def get_taxonomy(species_list):
|
|
|
203
203
|
taxonomy[1] = sorted(list(set(taxonomy[1])))
|
|
204
204
|
print("-" * 80)
|
|
205
205
|
|
|
206
|
-
|
|
207
|
-
|
|
206
|
+
num_families = len(taxonomy[1])
|
|
207
|
+
num_genera = len(taxonomy[2])
|
|
208
208
|
num_species = len(taxonomy[3])
|
|
209
209
|
|
|
210
|
-
print("\nOrder indices:")
|
|
211
|
-
for i, order in enumerate(taxonomy[1]):
|
|
212
|
-
print(f" {i}: {order}")
|
|
213
|
-
|
|
214
210
|
print("\nFamily indices:")
|
|
215
|
-
for i, family in enumerate(taxonomy[
|
|
211
|
+
for i, family in enumerate(taxonomy[1]):
|
|
216
212
|
print(f" {i}: {family}")
|
|
217
213
|
|
|
214
|
+
print("\nGenus indices:")
|
|
215
|
+
for i, genus in enumerate(taxonomy[2].keys()):
|
|
216
|
+
print(f" {i}: {genus}")
|
|
217
|
+
|
|
218
218
|
print("\nSpecies indices:")
|
|
219
219
|
for i, species in enumerate(species_list):
|
|
220
220
|
print(f" {i}: {species}")
|
|
221
221
|
|
|
222
|
-
logger.info(f"Taxonomy built: {
|
|
222
|
+
logger.info(f"Taxonomy built: {num_families} families, {num_genera} genera, {num_species} species")
|
|
223
223
|
return taxonomy
|
|
224
224
|
|
|
225
225
|
def get_species_from_directory(train_dir):
|
|
@@ -276,15 +276,15 @@ class InsectDataset(Dataset):
|
|
|
276
276
|
self.level_to_idx = level_to_idx
|
|
277
277
|
self.samples = []
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
279
|
+
species_to_genus = {species: genus for species, genus in taxonomy[3].items()}
|
|
280
|
+
genus_to_family = {genus: family for genus, family in taxonomy[2].items()}
|
|
281
281
|
|
|
282
282
|
for species_name in os.listdir(root_dir):
|
|
283
283
|
species_path = os.path.join(root_dir, species_name)
|
|
284
284
|
if os.path.isdir(species_path):
|
|
285
|
-
if species_name in
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
if species_name in species_to_genus:
|
|
286
|
+
genus_name = species_to_genus[species_name]
|
|
287
|
+
family_name = genus_to_family[genus_name]
|
|
288
288
|
|
|
289
289
|
for img_file in os.listdir(species_path):
|
|
290
290
|
if img_file.endswith(('.jpg', '.png', '.jpeg')):
|
|
@@ -296,7 +296,7 @@ class InsectDataset(Dataset):
|
|
|
296
296
|
# Only add valid images to samples
|
|
297
297
|
self.samples.append({
|
|
298
298
|
'image_path': img_path,
|
|
299
|
-
'labels': [
|
|
299
|
+
'labels': [family_name, genus_name, species_name]
|
|
300
300
|
})
|
|
301
301
|
|
|
302
302
|
except Exception as e:
|
|
@@ -94,19 +94,73 @@ def prepare(input_directory: str, output_directory: str, one_stage: bool = False
|
|
|
94
94
|
DetectionModel, Sequential, Conv, Conv2d, BatchNorm2d,
|
|
95
95
|
SiLU, ReLU, LeakyReLU, MaxPool2d, Linear, Dropout, Upsample,
|
|
96
96
|
Module, ModuleList, ModuleDict,
|
|
97
|
-
Bottleneck, C2f, SPPF, Detect, Concat, DFL
|
|
97
|
+
Bottleneck, C2f, SPPF, Detect, Concat, DFL,
|
|
98
|
+
# Add torch internal classes
|
|
99
|
+
torch.nn.parameter.Parameter,
|
|
100
|
+
torch.Tensor,
|
|
101
|
+
torch._utils._rebuild_tensor_v2,
|
|
102
|
+
torch._utils._rebuild_parameter
|
|
98
103
|
])
|
|
99
|
-
|
|
100
|
-
model = YOLO(weights_path)
|
|
101
|
-
model.predict(images_path, conf=0.25, save=True, save_txt=True, project=temp_dir_path)
|
|
102
|
-
labels_path = temp_dir_path / "predict" / "labels"
|
|
103
104
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
labels_path = temp_dir_path / "predict" / "labels"
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
print(f"Loading YOLO model from {weights_path}")
|
|
109
|
+
model = YOLO(weights_path)
|
|
110
|
+
|
|
111
|
+
# Get list of all image files
|
|
112
|
+
image_files = list(images_path.glob('*.jpg'))
|
|
113
|
+
print(f"Found {len(image_files)} images to process")
|
|
114
|
+
|
|
115
|
+
# Ensure predict directory exists
|
|
116
|
+
predict_dir = temp_dir_path / "predict"
|
|
117
|
+
predict_dir.mkdir(exist_ok=True)
|
|
118
|
+
labels_path.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
|
|
120
|
+
result_count = 0
|
|
121
|
+
error_count = 0
|
|
122
|
+
|
|
123
|
+
for img_path in image_files:
|
|
124
|
+
try:
|
|
125
|
+
results = model.predict(
|
|
126
|
+
source=str(img_path),
|
|
127
|
+
conf=0.5,
|
|
128
|
+
save=True,
|
|
129
|
+
save_txt=True,
|
|
130
|
+
project=temp_dir_path,
|
|
131
|
+
name="predict",
|
|
132
|
+
exist_ok=True,
|
|
133
|
+
verbose=True
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
result_count += 1
|
|
137
|
+
|
|
138
|
+
except Exception as e:
|
|
139
|
+
error_count += 1
|
|
140
|
+
print(f"Error processing {img_path.name}: {e}")
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
print(f"Model prediction completed: {result_count} successful, {error_count} failed")
|
|
144
|
+
print(f"Checking for labels in {labels_path}")
|
|
145
|
+
|
|
146
|
+
# Verify labels were created
|
|
147
|
+
label_files = list(labels_path.glob("*.txt"))
|
|
148
|
+
print(f"Found {len(label_files)} label files")
|
|
149
|
+
|
|
150
|
+
if len(label_files) == 0:
|
|
151
|
+
print("WARNING: No label files were created by the model prediction!")
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
print(f"Error during model prediction setup: {e}")
|
|
155
|
+
import traceback
|
|
156
|
+
traceback.print_exc()
|
|
107
157
|
|
|
108
158
|
if one_stage:
|
|
109
159
|
|
|
160
|
+
if size_filter and len(sizes) <= 2:
|
|
161
|
+
__filter_by_size(images_path, labels_path, sizes)
|
|
162
|
+
print(f"\nFiltered {len(list(images_path.glob('*.jpg')))} images by size out of {original_image_count} input images.\n NOTE: Some images may be filtered due to corruption or inaccurate labels.")
|
|
163
|
+
|
|
110
164
|
__delete_orphaned_images_and_inferences(images_path, labels_path)
|
|
111
165
|
__delete_invalid_txt_files(images_path, labels_path)
|
|
112
166
|
class_idxs = update_labels(class_mapping, labels_path)
|
|
@@ -142,15 +196,17 @@ def prepare(input_directory: str, output_directory: str, one_stage: bool = False
|
|
|
142
196
|
|
|
143
197
|
__make_yaml_file(output_directory, class_idxs)
|
|
144
198
|
else:
|
|
145
|
-
try:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
except:
|
|
152
|
-
|
|
153
|
-
|
|
199
|
+
# try:
|
|
200
|
+
# sized_dir = temp_dir_path / "sized"
|
|
201
|
+
# sized_dir.mkdir(parents=True, exist_ok=True)
|
|
202
|
+
# __two_stage_update(class_mapping, filtered, sized_dir, images_path)
|
|
203
|
+
# __classification_split(sized_dir, output_directory)
|
|
204
|
+
# __count_classification_split(output_directory, class_mapping)
|
|
205
|
+
# except:
|
|
206
|
+
__delete_orphaned_images_and_inferences(images_path, labels_path)
|
|
207
|
+
__delete_invalid_txt_files(images_path, labels_path)
|
|
208
|
+
__classification_split(images_path, labels_path, output_directory, class_mapping)
|
|
209
|
+
__count_classification_split(output_directory, class_mapping)
|
|
154
210
|
|
|
155
211
|
def __count_classification_split(output_directory: str, class_mapping: dict):
|
|
156
212
|
"""
|
|
@@ -186,17 +242,21 @@ def __count_classification_split(output_directory: str, class_mapping: dict):
|
|
|
186
242
|
class_counts[class_name]
|
|
187
243
|
])
|
|
188
244
|
print(table)
|
|
189
|
-
print(f"Saved in {output_directory}")
|
|
245
|
+
# print(f"Saved in {output_directory}")
|
|
190
246
|
|
|
191
|
-
def __classification_split(input_directory: str, output_directory: str):
|
|
247
|
+
def __classification_split(input_directory: str, labels_directory: str, output_directory: str, class_mapping: dict):
|
|
192
248
|
"""
|
|
193
|
-
Splits the data into train and validation sets for classification tasks
|
|
249
|
+
Splits the data into train and validation sets for classification tasks,
|
|
250
|
+
cropping images according to their YOLO labels but preserving original class structure.
|
|
194
251
|
|
|
195
252
|
Args:
|
|
196
|
-
input_directory (str): Path to the input directory containing
|
|
253
|
+
input_directory (str): Path to the input directory containing images.
|
|
254
|
+
labels_directory (str): Path to the directory containing YOLO label files.
|
|
197
255
|
output_directory (str): Path to the output directory where train and valid splits will be created.
|
|
256
|
+
class_mapping (dict): Dictionary mapping class names to image file names.
|
|
198
257
|
"""
|
|
199
258
|
input_directory = Path(input_directory)
|
|
259
|
+
labels_directory = Path(labels_directory)
|
|
200
260
|
output_directory = Path(output_directory)
|
|
201
261
|
|
|
202
262
|
# Create train and valid directories
|
|
@@ -206,45 +266,108 @@ def __classification_split(input_directory: str, output_directory: str):
|
|
|
206
266
|
train_dir.mkdir(parents=True, exist_ok=True)
|
|
207
267
|
valid_dir.mkdir(parents=True, exist_ok=True)
|
|
208
268
|
|
|
209
|
-
#
|
|
210
|
-
for
|
|
211
|
-
if not class_dir.is_dir():
|
|
212
|
-
continue
|
|
213
|
-
|
|
214
|
-
class_name = class_dir.name
|
|
215
|
-
print(f"Processing class: {class_name}")
|
|
216
|
-
|
|
217
|
-
# Create corresponding class directories in train and valid
|
|
269
|
+
# Create class directories based on class_mapping
|
|
270
|
+
for class_name in class_mapping:
|
|
218
271
|
(train_dir / class_name).mkdir(exist_ok=True)
|
|
219
272
|
(valid_dir / class_name).mkdir(exist_ok=True)
|
|
273
|
+
print(f"Created directory for class: {class_name}")
|
|
274
|
+
|
|
275
|
+
# Process each class folder and its images
|
|
276
|
+
valid_images = []
|
|
277
|
+
|
|
278
|
+
# First, collect all valid label files
|
|
279
|
+
valid_label_stems = {label_file.stem for label_file in labels_directory.glob("*.txt")
|
|
280
|
+
if label_file.exists() and os.path.getsize(label_file) > 0}
|
|
281
|
+
|
|
282
|
+
print(f"Found {len(valid_label_stems)} valid label files")
|
|
283
|
+
|
|
284
|
+
for class_name, image_names in class_mapping.items():
|
|
285
|
+
print(f"Processing class: {class_name} with {len(image_names)} images")
|
|
220
286
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
if not image_files:
|
|
225
|
-
print(f"Warning: No images found in {class_dir}")
|
|
226
|
-
continue
|
|
287
|
+
for image_name in image_names:
|
|
288
|
+
# Check if the image exists directly in the input directory
|
|
289
|
+
image_path = input_directory / image_name
|
|
227
290
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
# Copy files to respective directories
|
|
237
|
-
for img_file in train_files:
|
|
238
|
-
shutil.copy(img_file, train_dir / class_name / img_file.name)
|
|
291
|
+
if not image_path.exists():
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
# Skip images that don't have a valid label
|
|
295
|
+
if image_path.stem not in valid_label_stems:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
label_file = labels_directory / (image_path.stem + '.txt')
|
|
239
299
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
300
|
+
try:
|
|
301
|
+
img = Image.open(image_path)
|
|
302
|
+
|
|
303
|
+
if label_file.exists():
|
|
304
|
+
# If label exists, crop the image
|
|
305
|
+
with open(label_file, 'r') as f:
|
|
306
|
+
lines = f.readlines()
|
|
307
|
+
if lines:
|
|
308
|
+
parts = lines[0].strip().split()
|
|
309
|
+
if len(parts) >= 5:
|
|
310
|
+
x_center, y_center, width, height = map(float, parts[1:5])
|
|
311
|
+
|
|
312
|
+
img_width, img_height = img.size
|
|
313
|
+
x_min = int((x_center - width/2) * img_width)
|
|
314
|
+
y_min = int((y_center - height/2) * img_height)
|
|
315
|
+
x_max = int((x_center + width/2) * img_width)
|
|
316
|
+
y_max = int((y_center + height/2) * img_height)
|
|
317
|
+
|
|
318
|
+
x_min = max(0, x_min)
|
|
319
|
+
y_min = max(0, y_min)
|
|
320
|
+
x_max = min(img_width, x_max)
|
|
321
|
+
y_max = min(img_height, y_max)
|
|
322
|
+
|
|
323
|
+
img = img.crop((x_min, y_min, x_max, y_max))
|
|
324
|
+
|
|
325
|
+
img_width, img_height = img.size
|
|
326
|
+
if img_width < img_height:
|
|
327
|
+
# Width is smaller, make it 40
|
|
328
|
+
new_width = 40
|
|
329
|
+
new_height = int((img_height / img_width) * 40)
|
|
330
|
+
else:
|
|
331
|
+
# Height is smaller, make it 40
|
|
332
|
+
new_height = 40
|
|
333
|
+
new_width = int((img_width / img_height) * 40)
|
|
334
|
+
|
|
335
|
+
#blur the image
|
|
336
|
+
img = img.resize((new_width, new_height), Image.LANCZOS)
|
|
337
|
+
|
|
338
|
+
valid_images.append((image_path, img, class_name))
|
|
339
|
+
except Exception as e:
|
|
340
|
+
print(f"Error processing {image_path}: {e}")
|
|
341
|
+
|
|
342
|
+
print(f"Successfully processed {len(valid_images)} valid images for classification")
|
|
343
|
+
|
|
344
|
+
# Shuffle and split images
|
|
345
|
+
random.shuffle(valid_images)
|
|
346
|
+
split_idx = int(len(valid_images) * 0.9)
|
|
347
|
+
train_images = valid_images[:split_idx]
|
|
348
|
+
valid_images = valid_images[split_idx:]
|
|
349
|
+
|
|
350
|
+
print(f"Split into {len(train_images)} training images and {len(valid_images)} validation images")
|
|
351
|
+
|
|
352
|
+
# Save images to train/valid directories
|
|
353
|
+
for image_set, dest_dir in [(train_images, train_dir), (valid_images, valid_dir)]:
|
|
354
|
+
for orig_file, img, class_name in image_set:
|
|
355
|
+
output_path = dest_dir / class_name / (orig_file.stem + '.jpg')
|
|
356
|
+
|
|
357
|
+
# Convert any non-RGB mode to RGB before saving
|
|
358
|
+
if img.mode != 'RGB':
|
|
359
|
+
img = img.convert('RGB')
|
|
360
|
+
|
|
361
|
+
img.save(output_path, format='JPEG', quality=95)
|
|
244
362
|
|
|
245
|
-
|
|
363
|
+
# Print summary
|
|
364
|
+
print(f"\nData split complete. Images saved to train and validation sets in {output_directory}")
|
|
365
|
+
for class_name in class_mapping:
|
|
366
|
+
train_count = len(list((train_dir / class_name).glob('*.*')))
|
|
367
|
+
valid_count = len(list((valid_dir / class_name).glob('*.*')))
|
|
368
|
+
print(f" - {class_name}: {train_count} images in train, {valid_count} images in valid")
|
|
246
369
|
|
|
247
|
-
def
|
|
370
|
+
def __filter_by_size(images_path: Path, labels_path: Path, sizes: list):
|
|
248
371
|
"""
|
|
249
372
|
Filters images by size and updates labels accordingly.
|
|
250
373
|
|
|
@@ -285,7 +408,6 @@ def filter_by_size(images_path: Path, labels_path: Path, sizes: list):
|
|
|
285
408
|
label_file.unlink()
|
|
286
409
|
except FileNotFoundError:
|
|
287
410
|
pass
|
|
288
|
-
return filtered_images
|
|
289
411
|
|
|
290
412
|
def __two_stage_update(class_mapping: dict, filtered_images: Path, output_directory: Path, images_path: Path):
|
|
291
413
|
"""
|
|
@@ -367,7 +489,7 @@ def __delete_orphaned_images_and_inferences(images_path: Path, labels_path: Path
|
|
|
367
489
|
image_file_jpeg = images_path / (txt_file.stem + ".jpeg")
|
|
368
490
|
|
|
369
491
|
if not (image_file_jpg.exists() or image_file_jpeg.exists()):
|
|
370
|
-
print(f"Deleting {txt_file.name} - No corresponding image file")
|
|
492
|
+
# print(f"Deleting {txt_file.name} - No corresponding image file")
|
|
371
493
|
txt_file.unlink()
|
|
372
494
|
|
|
373
495
|
label_stems = {txt_file.stem for txt_file in labels_path.glob("*.txt")}
|
|
@@ -375,7 +497,7 @@ def __delete_orphaned_images_and_inferences(images_path: Path, labels_path: Path
|
|
|
375
497
|
|
|
376
498
|
for image_file in image_files:
|
|
377
499
|
if image_file.stem not in label_stems:
|
|
378
|
-
print(f"Deleting orphaned image: {image_file.name}")
|
|
500
|
+
# print(f"Deleting orphaned image: {image_file.name}")
|
|
379
501
|
image_file.unlink()
|
|
380
502
|
|
|
381
503
|
print("Orphaned images files without corresponding labels have been deleted.")
|
|
@@ -400,7 +522,7 @@ def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
|
|
|
400
522
|
lines = file.readlines()
|
|
401
523
|
|
|
402
524
|
if len(lines) == 0 or len(lines) > 1:
|
|
403
|
-
print(f"Deleting {txt_file.name} - Invalid file")
|
|
525
|
+
# print(f"Deleting {txt_file.name} - Invalid file")
|
|
404
526
|
txt_file.unlink()
|
|
405
527
|
|
|
406
528
|
image_file_jpg = images_path / (txt_file.stem + ".jpg")
|
|
@@ -408,10 +530,10 @@ def __delete_invalid_txt_files(images_path: Path, labels_path: Path):
|
|
|
408
530
|
|
|
409
531
|
if image_file_jpg.exists():
|
|
410
532
|
image_file_jpg.unlink()
|
|
411
|
-
print(f"Deleted corresponding image file: {image_file_jpg.name}")
|
|
533
|
+
# print(f"Deleted corresponding image file: {image_file_jpg.name}")
|
|
412
534
|
elif image_file_jpeg.exists():
|
|
413
535
|
image_file_jpeg.unlink()
|
|
414
|
-
print(f"Deleted corresponding image file: {image_file_jpeg.name}")
|
|
536
|
+
# print(f"Deleted corresponding image file: {image_file_jpeg.name}")
|
|
415
537
|
|
|
416
538
|
print("Invalid text files and their corresponding images files have been deleted.")
|
|
417
539
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|