megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +5 -2
- megadetector/data_management/cct_json_utils.py +4 -2
- megadetector/data_management/cct_to_md.py +5 -4
- megadetector/data_management/cct_to_wi.py +5 -1
- megadetector/data_management/coco_to_yolo.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/databases/subset_json_db.py +0 -3
- megadetector/data_management/generate_crops_from_cct.py +6 -4
- megadetector/data_management/get_image_sizes.py +5 -35
- megadetector/data_management/labelme_to_coco.py +10 -6
- megadetector/data_management/labelme_to_yolo.py +19 -28
- megadetector/data_management/lila/create_lila_test_set.py +22 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
- megadetector/data_management/lila/lila_common.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
- megadetector/data_management/ocr_tools.py +6 -10
- megadetector/data_management/read_exif.py +59 -16
- megadetector/data_management/remap_coco_categories.py +1 -1
- megadetector/data_management/remove_exif.py +10 -5
- megadetector/data_management/rename_images.py +20 -13
- megadetector/data_management/resize_coco_dataset.py +10 -4
- megadetector/data_management/speciesnet_to_md.py +3 -3
- megadetector/data_management/yolo_output_to_md_output.py +3 -1
- megadetector/data_management/yolo_to_coco.py +28 -19
- megadetector/detection/change_detection.py +26 -18
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +5 -5
- megadetector/detection/run_detector.py +34 -10
- megadetector/detection/run_detector_batch.py +2 -1
- megadetector/detection/run_inference_with_yolov5_val.py +3 -1
- megadetector/detection/run_md_and_speciesnet.py +215 -101
- megadetector/detection/run_tiled_inference.py +7 -7
- megadetector/detection/tf_detector.py +1 -1
- megadetector/detection/video_utils.py +9 -6
- megadetector/postprocessing/add_max_conf.py +4 -4
- megadetector/postprocessing/categorize_detections_by_size.py +3 -2
- megadetector/postprocessing/classification_postprocessing.py +7 -8
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +49 -27
- megadetector/postprocessing/convert_output_format.py +8 -6
- megadetector/postprocessing/create_crop_folder.py +13 -4
- megadetector/postprocessing/generate_csv_report.py +22 -8
- megadetector/postprocessing/load_api_results.py +8 -4
- megadetector/postprocessing/md_to_coco.py +2 -3
- megadetector/postprocessing/md_to_labelme.py +12 -8
- megadetector/postprocessing/md_to_wi.py +2 -1
- megadetector/postprocessing/merge_detections.py +4 -6
- megadetector/postprocessing/postprocess_batch_results.py +4 -3
- megadetector/postprocessing/remap_detection_categories.py +6 -3
- megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
- megadetector/postprocessing/separate_detections_into_folders.py +10 -4
- megadetector/postprocessing/subset_json_detector_output.py +1 -1
- megadetector/postprocessing/top_folders_to_bottom.py +22 -7
- megadetector/postprocessing/validate_batch_results.py +1 -1
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
- megadetector/taxonomy_mapping/species_lookup.py +51 -2
- megadetector/utils/ct_utils.py +9 -4
- megadetector/utils/extract_frames_from_video.py +4 -0
- megadetector/utils/gpu_test.py +6 -6
- megadetector/utils/md_tests.py +21 -21
- megadetector/utils/path_utils.py +112 -44
- megadetector/utils/split_locations_into_train_val.py +0 -4
- megadetector/utils/url_utils.py +5 -3
- megadetector/utils/wi_taxonomy_utils.py +37 -8
- megadetector/utils/write_html_image_list.py +1 -2
- megadetector/visualization/plot_utils.py +31 -19
- megadetector/visualization/render_images_with_thumbnails.py +3 -0
- megadetector/visualization/visualization_utils.py +18 -7
- megadetector/visualization/visualize_db.py +9 -26
- megadetector/visualization/visualize_video_output.py +14 -2
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/RECORD +80 -80
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
megadetector/utils/path_utils.py
CHANGED
|
@@ -152,7 +152,6 @@ def folder_list(base_dir,
|
|
|
152
152
|
folders = []
|
|
153
153
|
|
|
154
154
|
if recursive:
|
|
155
|
-
folders = []
|
|
156
155
|
for root, dirs, _ in os.walk(base_dir):
|
|
157
156
|
for d in dirs:
|
|
158
157
|
folders.append(os.path.join(root, d))
|
|
@@ -370,7 +369,9 @@ def safe_create_link(link_exists,link_new):
|
|
|
370
369
|
os.remove(link_new)
|
|
371
370
|
os.symlink(link_exists,link_new)
|
|
372
371
|
else:
|
|
373
|
-
os.
|
|
372
|
+
link_new_dir = os.path.dirname(link_new)
|
|
373
|
+
if len(link_new_dir) > 0:
|
|
374
|
+
os.makedirs(link_new_dir,exist_ok=True)
|
|
374
375
|
os.symlink(link_exists,link_new)
|
|
375
376
|
|
|
376
377
|
# ...def safe_create_link(...)
|
|
@@ -988,7 +989,9 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
|
|
|
988
989
|
if verbose:
|
|
989
990
|
print('{} to {}'.format(action_string,target_fn))
|
|
990
991
|
|
|
991
|
-
os.
|
|
992
|
+
target_dir = os.path.dirname(target_fn)
|
|
993
|
+
if len(target_dir) > 0:
|
|
994
|
+
os.makedirs(target_dir,exist_ok=True)
|
|
992
995
|
if move:
|
|
993
996
|
shutil.move(source_fn, target_fn)
|
|
994
997
|
else:
|
|
@@ -1038,10 +1041,11 @@ def parallel_copy_files(input_file_to_output_file,
|
|
|
1038
1041
|
input_output_tuples)):
|
|
1039
1042
|
pbar.update()
|
|
1040
1043
|
finally:
|
|
1041
|
-
pool
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1044
|
+
if pool is not None:
|
|
1045
|
+
pool.close()
|
|
1046
|
+
pool.join()
|
|
1047
|
+
if verbose:
|
|
1048
|
+
print("Pool closed and joined parallel file copying")
|
|
1045
1049
|
|
|
1046
1050
|
# ...def parallel_copy_files(...)
|
|
1047
1051
|
|
|
@@ -1100,15 +1104,24 @@ def parallel_delete_files(input_files,
|
|
|
1100
1104
|
|
|
1101
1105
|
n_workers = min(max_workers, len(input_files))
|
|
1102
1106
|
|
|
1103
|
-
|
|
1104
|
-
pool = ThreadPool(n_workers)
|
|
1105
|
-
else:
|
|
1106
|
-
pool = Pool(n_workers)
|
|
1107
|
+
pool = None
|
|
1107
1108
|
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1109
|
+
try:
|
|
1110
|
+
if use_threads:
|
|
1111
|
+
pool = ThreadPool(n_workers)
|
|
1112
|
+
else:
|
|
1113
|
+
pool = Pool(n_workers)
|
|
1114
|
+
|
|
1115
|
+
with tqdm(total=len(input_files)) as pbar:
|
|
1116
|
+
for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
|
|
1117
|
+
input_files)):
|
|
1118
|
+
pbar.update()
|
|
1119
|
+
finally:
|
|
1120
|
+
if pool is not None:
|
|
1121
|
+
pool.close()
|
|
1122
|
+
pool.join()
|
|
1123
|
+
if verbose:
|
|
1124
|
+
print('Pool closed and joined for file deletion')
|
|
1112
1125
|
|
|
1113
1126
|
# ...def parallel_delete_files(...)
|
|
1114
1127
|
|
|
@@ -1185,8 +1198,6 @@ def parallel_get_file_sizes(filenames,
|
|
|
1185
1198
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1186
1199
|
"""
|
|
1187
1200
|
|
|
1188
|
-
n_workers = min(max_workers,len(filenames))
|
|
1189
|
-
|
|
1190
1201
|
folder_name = None
|
|
1191
1202
|
|
|
1192
1203
|
if isinstance(filenames,str):
|
|
@@ -1204,23 +1215,37 @@ def parallel_get_file_sizes(filenames,
|
|
|
1204
1215
|
|
|
1205
1216
|
assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
|
|
1206
1217
|
|
|
1218
|
+
n_workers = min(max_workers,len(filenames))
|
|
1219
|
+
|
|
1207
1220
|
if verbose:
|
|
1208
1221
|
print('Creating worker pool')
|
|
1209
1222
|
|
|
1210
|
-
|
|
1211
|
-
pool_string = 'thread'
|
|
1212
|
-
pool = ThreadPool(n_workers)
|
|
1213
|
-
else:
|
|
1214
|
-
pool_string = 'process'
|
|
1215
|
-
pool = Pool(n_workers)
|
|
1223
|
+
pool = None
|
|
1216
1224
|
|
|
1217
|
-
|
|
1218
|
-
print('Created a {} pool of {} workers'.format(
|
|
1219
|
-
pool_string,n_workers))
|
|
1225
|
+
try:
|
|
1220
1226
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1227
|
+
if use_threads:
|
|
1228
|
+
pool_string = 'thread'
|
|
1229
|
+
pool = ThreadPool(n_workers)
|
|
1230
|
+
else:
|
|
1231
|
+
pool_string = 'process'
|
|
1232
|
+
pool = Pool(n_workers)
|
|
1233
|
+
|
|
1234
|
+
if verbose:
|
|
1235
|
+
print('Created a {} pool of {} workers'.format(
|
|
1236
|
+
pool_string,n_workers))
|
|
1237
|
+
|
|
1238
|
+
# This returns (filename,size) tuples
|
|
1239
|
+
get_size_results = list(tqdm(pool.imap(
|
|
1240
|
+
partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1241
|
+
|
|
1242
|
+
finally:
|
|
1243
|
+
|
|
1244
|
+
if pool is not None:
|
|
1245
|
+
pool.close()
|
|
1246
|
+
pool.join()
|
|
1247
|
+
if verbose:
|
|
1248
|
+
print('Pool closed and join for file size collection')
|
|
1224
1249
|
|
|
1225
1250
|
to_return = {}
|
|
1226
1251
|
for r in get_size_results:
|
|
@@ -1275,6 +1300,8 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_
|
|
|
1275
1300
|
|
|
1276
1301
|
return output_fn
|
|
1277
1302
|
|
|
1303
|
+
# ...def zip_file(...)
|
|
1304
|
+
|
|
1278
1305
|
|
|
1279
1306
|
def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
1280
1307
|
overwrite=False, verbose=False, mode='x'):
|
|
@@ -1315,6 +1342,8 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1315
1342
|
|
|
1316
1343
|
return output_fn
|
|
1317
1344
|
|
|
1345
|
+
# ...def add_files_to_single_tar_file(...)
|
|
1346
|
+
|
|
1318
1347
|
|
|
1319
1348
|
def zip_files_into_single_zipfile(input_files,
|
|
1320
1349
|
output_fn,
|
|
@@ -1359,6 +1388,8 @@ def zip_files_into_single_zipfile(input_files,
|
|
|
1359
1388
|
|
|
1360
1389
|
return output_fn
|
|
1361
1390
|
|
|
1391
|
+
# ...def zip_files_into_single_zipfile(...)
|
|
1392
|
+
|
|
1362
1393
|
|
|
1363
1394
|
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
|
|
1364
1395
|
"""
|
|
@@ -1382,7 +1413,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1382
1413
|
if not overwrite:
|
|
1383
1414
|
if os.path.isfile(output_fn):
|
|
1384
1415
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1385
|
-
return
|
|
1416
|
+
return output_fn
|
|
1386
1417
|
|
|
1387
1418
|
if verbose:
|
|
1388
1419
|
print('Zipping {} to {} (compression level {})'.format(
|
|
@@ -1400,6 +1431,8 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1400
1431
|
|
|
1401
1432
|
return output_fn
|
|
1402
1433
|
|
|
1434
|
+
# ...def zip_folder(...)
|
|
1435
|
+
|
|
1403
1436
|
|
|
1404
1437
|
def parallel_zip_files(input_files,
|
|
1405
1438
|
max_workers=16,
|
|
@@ -1428,11 +1461,22 @@ def parallel_zip_files(input_files,
|
|
|
1428
1461
|
else:
|
|
1429
1462
|
pool = Pool(n_workers)
|
|
1430
1463
|
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1464
|
+
try:
|
|
1465
|
+
|
|
1466
|
+
with tqdm(total=len(input_files)) as pbar:
|
|
1467
|
+
for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
|
|
1468
|
+
output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
|
|
1469
|
+
input_files)):
|
|
1470
|
+
pbar.update()
|
|
1471
|
+
|
|
1472
|
+
finally:
|
|
1473
|
+
|
|
1474
|
+
pool.close()
|
|
1475
|
+
pool.join()
|
|
1476
|
+
if verbose:
|
|
1477
|
+
print('Pool closed and joined for parallel zipping')
|
|
1478
|
+
|
|
1479
|
+
# ...def parallel_zip_files(...)
|
|
1436
1480
|
|
|
1437
1481
|
|
|
1438
1482
|
def parallel_zip_folders(input_folders,
|
|
@@ -1462,12 +1506,23 @@ def parallel_zip_folders(input_folders,
|
|
|
1462
1506
|
else:
|
|
1463
1507
|
pool = Pool(n_workers)
|
|
1464
1508
|
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1509
|
+
try:
|
|
1510
|
+
|
|
1511
|
+
with tqdm(total=len(input_folders)) as pbar:
|
|
1512
|
+
for i,_ in enumerate(pool.imap_unordered(
|
|
1513
|
+
partial(zip_folder,overwrite=overwrite,
|
|
1514
|
+
compress_level=compress_level,verbose=verbose),
|
|
1515
|
+
input_folders)):
|
|
1516
|
+
pbar.update()
|
|
1517
|
+
|
|
1518
|
+
finally:
|
|
1519
|
+
|
|
1520
|
+
pool.close()
|
|
1521
|
+
pool.join()
|
|
1522
|
+
if verbose:
|
|
1523
|
+
print('Pool closed and joined for parallel folder zipping')
|
|
1524
|
+
|
|
1525
|
+
# ...def parallel_zip_folders(...)
|
|
1471
1526
|
|
|
1472
1527
|
|
|
1473
1528
|
def zip_each_file_in_folder(folder_name,
|
|
@@ -1510,6 +1565,8 @@ def zip_each_file_in_folder(folder_name,
|
|
|
1510
1565
|
use_threads=use_threads,compress_level=compress_level,
|
|
1511
1566
|
overwrite=overwrite,verbose=verbose)
|
|
1512
1567
|
|
|
1568
|
+
# ...def zip_each_file_in_folder(...)
|
|
1569
|
+
|
|
1513
1570
|
|
|
1514
1571
|
def unzip_file(input_file, output_folder=None):
|
|
1515
1572
|
"""
|
|
@@ -1617,9 +1674,20 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1617
1674
|
else:
|
|
1618
1675
|
pool = Pool(n_workers)
|
|
1619
1676
|
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1677
|
+
try:
|
|
1678
|
+
|
|
1679
|
+
results = list(tqdm(pool.imap(
|
|
1680
|
+
partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
|
|
1681
|
+
filenames), total=len(filenames)))
|
|
1682
|
+
|
|
1683
|
+
finally:
|
|
1684
|
+
|
|
1685
|
+
pool.close()
|
|
1686
|
+
pool.join()
|
|
1687
|
+
if verbose:
|
|
1688
|
+
print('Pool closed and joined for parallel zipping')
|
|
1689
|
+
|
|
1690
|
+
# ...if we are/aren't parallelizing
|
|
1623
1691
|
|
|
1624
1692
|
assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
|
|
1625
1693
|
|
|
@@ -221,14 +221,10 @@ def split_locations_into_train_val(location_to_category_counts,
|
|
|
221
221
|
weighted_average_error,weighted_category_errors,category_to_val_fraction = \
|
|
222
222
|
compute_seed_errors(min_error_seed)
|
|
223
223
|
|
|
224
|
-
random_seed = min_error_seed
|
|
225
|
-
|
|
226
|
-
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
|
|
227
224
|
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
|
|
228
225
|
sort_values=category_id_to_count,
|
|
229
226
|
reverse=True)
|
|
230
227
|
|
|
231
|
-
|
|
232
228
|
print('Val fractions by category:\n')
|
|
233
229
|
|
|
234
230
|
for category in category_to_val_fraction:
|
megadetector/utils/url_utils.py
CHANGED
|
@@ -132,7 +132,8 @@ def download_url(url,
|
|
|
132
132
|
if verbose:
|
|
133
133
|
print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
|
|
134
134
|
target_dir = os.path.dirname(destination_filename)
|
|
135
|
-
|
|
135
|
+
if len(target_dir) > 0:
|
|
136
|
+
os.makedirs(target_dir,exist_ok=True)
|
|
136
137
|
urllib.request.urlretrieve(url, destination_filename, progress_updater)
|
|
137
138
|
assert(os.path.isfile(destination_filename))
|
|
138
139
|
n_bytes = os.path.getsize(destination_filename)
|
|
@@ -800,8 +801,9 @@ class TestUrlUtils:
|
|
|
800
801
|
def _test_url_utils():
|
|
801
802
|
"""
|
|
802
803
|
Runs all tests in the TestUrlUtils class. I generally disable this during testing
|
|
803
|
-
because it creates irritating nondeterminism
|
|
804
|
-
a module that changes
|
|
804
|
+
because it creates irritating nondeterminism (because it depends on downloading
|
|
805
|
+
stuff from the Internet), and this is neither a core module nor a module that changes
|
|
806
|
+
often.
|
|
805
807
|
"""
|
|
806
808
|
|
|
807
809
|
test_instance = TestUrlUtils()
|
|
@@ -311,7 +311,8 @@ def taxonomy_info_to_taxonomy_string(taxonomy_info, include_taxon_id_and_common_
|
|
|
311
311
|
def generate_whole_image_detections_for_classifications(classifications_json_file,
|
|
312
312
|
detections_json_file,
|
|
313
313
|
ensemble_json_file=None,
|
|
314
|
-
ignore_blank_classifications=True
|
|
314
|
+
ignore_blank_classifications=True,
|
|
315
|
+
verbose=True):
|
|
315
316
|
"""
|
|
316
317
|
Given a set of classification results in SpeciesNet format that were likely run on
|
|
317
318
|
already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
|
|
@@ -324,6 +325,7 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
|
|
|
324
325
|
and classfications
|
|
325
326
|
ignore_blank_classifications (bool, optional): use non-top classifications when
|
|
326
327
|
the top classification is "blank" or "no CV result"
|
|
328
|
+
verbose (bool, optional): enable additional debug output
|
|
327
329
|
|
|
328
330
|
Returns:
|
|
329
331
|
dict: the contents of [detections_json_file]
|
|
@@ -336,16 +338,37 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
|
|
|
336
338
|
output_predictions = []
|
|
337
339
|
ensemble_predictions = []
|
|
338
340
|
|
|
339
|
-
# prediction = predictions[
|
|
340
|
-
for prediction in predictions:
|
|
341
|
+
# i_prediction = 0; prediction = predictions[i_prediction]
|
|
342
|
+
for i_prediction,prediction in enumerate(predictions):
|
|
341
343
|
|
|
342
344
|
output_prediction = {}
|
|
343
345
|
output_prediction['filepath'] = prediction['filepath']
|
|
344
346
|
i_score = 0
|
|
347
|
+
|
|
345
348
|
if ignore_blank_classifications:
|
|
349
|
+
|
|
346
350
|
while (prediction['classifications']['classes'][i_score] in \
|
|
347
351
|
(blank_prediction_string,no_cv_result_prediction_string)):
|
|
352
|
+
|
|
348
353
|
i_score += 1
|
|
354
|
+
if (i_score >= len(prediction['classifications']['classes'])):
|
|
355
|
+
|
|
356
|
+
if verbose:
|
|
357
|
+
|
|
358
|
+
print('Ignoring blank classifications, but ' + \
|
|
359
|
+
'image {} has no non-blank values'.format(
|
|
360
|
+
i_prediction))
|
|
361
|
+
|
|
362
|
+
# Just use the first one
|
|
363
|
+
i_score = 0
|
|
364
|
+
break
|
|
365
|
+
|
|
366
|
+
# ...if we passed the last prediction
|
|
367
|
+
|
|
368
|
+
# ...iterate over classes within this prediction
|
|
369
|
+
|
|
370
|
+
# ...if we're supposed to ignore blank classifications
|
|
371
|
+
|
|
349
372
|
top_classification = prediction['classifications']['classes'][i_score]
|
|
350
373
|
top_classification_score = prediction['classifications']['scores'][i_score]
|
|
351
374
|
if is_animal_classification(top_classification):
|
|
@@ -450,8 +473,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
450
473
|
|
|
451
474
|
# Round floating-point values (confidence scores, coordinates) to a
|
|
452
475
|
# reasonable number of decimal places
|
|
453
|
-
if max_decimals is not None and max_decimals > 0:
|
|
454
|
-
round_floats_in_nested_dict(predictions)
|
|
476
|
+
if (max_decimals is not None) and (max_decimals > 0):
|
|
477
|
+
round_floats_in_nested_dict(predictions, decimal_places=max_decimals)
|
|
455
478
|
|
|
456
479
|
predictions = predictions['predictions']
|
|
457
480
|
assert isinstance(predictions,list)
|
|
@@ -714,7 +737,9 @@ def generate_predictions_json_from_md_results(md_results_file,
|
|
|
714
737
|
|
|
715
738
|
# ...for each image
|
|
716
739
|
|
|
717
|
-
os.
|
|
740
|
+
output_dir = os.path.dirname(predictions_json_file)
|
|
741
|
+
if len(output_dir) > 0:
|
|
742
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
718
743
|
with open(predictions_json_file,'w') as f:
|
|
719
744
|
json.dump(output_dict,f,indent=1)
|
|
720
745
|
|
|
@@ -788,7 +813,9 @@ def generate_instances_json_from_folder(folder,
|
|
|
788
813
|
to_return = {'instances':instances}
|
|
789
814
|
|
|
790
815
|
if output_file is not None:
|
|
791
|
-
os.
|
|
816
|
+
output_dir = os.path.dirname(output_file)
|
|
817
|
+
if len(output_dir) > 0:
|
|
818
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
792
819
|
with open(output_file,'w') as f:
|
|
793
820
|
json.dump(to_return,f,indent=1)
|
|
794
821
|
|
|
@@ -870,7 +897,9 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
|
|
|
870
897
|
|
|
871
898
|
output_dict = {'predictions':predictions}
|
|
872
899
|
|
|
873
|
-
os.
|
|
900
|
+
output_dir = os.path.dirname(output_prediction_file)
|
|
901
|
+
if len(output_dir) > 0:
|
|
902
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
874
903
|
with open(output_prediction_file,'w') as f:
|
|
875
904
|
json.dump(output_dict,f,indent=1)
|
|
876
905
|
|
|
@@ -110,7 +110,6 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
110
110
|
if 'linkTarget' not in image_info:
|
|
111
111
|
image_info['linkTarget'] = ''
|
|
112
112
|
if 'textStyle' not in image_info:
|
|
113
|
-
text_style = options['defaultTextStyle']
|
|
114
113
|
image_info['textStyle'] = options['defaultTextStyle']
|
|
115
114
|
images[i_image] = image_info
|
|
116
115
|
|
|
@@ -185,7 +184,7 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
185
184
|
if len(options['pageTitle']) > 0:
|
|
186
185
|
title_string = '<title>{}</title>'.format(options['pageTitle'])
|
|
187
186
|
|
|
188
|
-
f_html.write('<html>{}
|
|
187
|
+
f_html.write('<html><head>{}</head><body>\n'.format(title_string))
|
|
189
188
|
|
|
190
189
|
f_html.write(options['headerHtml'])
|
|
191
190
|
|
|
@@ -126,19 +126,16 @@ def plot_precision_recall_curve(precisions,
|
|
|
126
126
|
ax.step(recalls, precisions, color='b', alpha=0.2, where='post')
|
|
127
127
|
ax.fill_between(recalls, precisions, alpha=0.2, color='b', step='post')
|
|
128
128
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
ax.set_xlabel('Recall')
|
|
135
|
-
ax.set_ylabel('Precision')
|
|
136
|
-
ax.set_title(title)
|
|
137
|
-
ax.set_xlim(xlim[0],xlim[1])
|
|
138
|
-
ax.set_ylim(ylim[0],ylim[1])
|
|
129
|
+
ax.set_xlabel('Recall')
|
|
130
|
+
ax.set_ylabel('Precision')
|
|
131
|
+
ax.set_title(title)
|
|
132
|
+
ax.set_xlim(xlim[0],xlim[1])
|
|
133
|
+
ax.set_ylim(ylim[0],ylim[1])
|
|
139
134
|
|
|
140
135
|
return fig
|
|
141
136
|
|
|
137
|
+
# ...def plot_precision_recall_curve(...)
|
|
138
|
+
|
|
142
139
|
|
|
143
140
|
def plot_stacked_bar_chart(data,
|
|
144
141
|
series_labels=None,
|
|
@@ -174,17 +171,21 @@ def plot_stacked_bar_chart(data,
|
|
|
174
171
|
|
|
175
172
|
# stacked bar charts are made with each segment starting from a y position
|
|
176
173
|
cumulative_size = np.zeros(num_columns)
|
|
177
|
-
for
|
|
178
|
-
|
|
179
|
-
|
|
174
|
+
for i_row, row_data in enumerate(data):
|
|
175
|
+
if series_labels is None:
|
|
176
|
+
label = 'series_{}'.format(str(i_row).zfill(2))
|
|
177
|
+
else:
|
|
178
|
+
label = series_labels[i_row]
|
|
179
|
+
ax.bar(ind, row_data, bottom=cumulative_size, label=label,
|
|
180
|
+
color=colors[i_row])
|
|
180
181
|
cumulative_size += row_data
|
|
181
182
|
|
|
182
|
-
if col_labels and len(col_labels) < 25:
|
|
183
|
+
if (col_labels is not None) and (len(col_labels) < 25):
|
|
183
184
|
ax.set_xticks(ind)
|
|
184
185
|
ax.set_xticklabels(col_labels, rotation=90)
|
|
185
|
-
elif col_labels:
|
|
186
|
+
elif (col_labels is not None):
|
|
186
187
|
ax.set_xticks(list(range(0, len(col_labels), 20)))
|
|
187
|
-
ax.set_xticklabels(col_labels, rotation=90)
|
|
188
|
+
ax.set_xticklabels(col_labels[::20], rotation=90)
|
|
188
189
|
|
|
189
190
|
if x_label is not None:
|
|
190
191
|
ax.set_xlabel(x_label)
|
|
@@ -202,6 +203,8 @@ def plot_stacked_bar_chart(data,
|
|
|
202
203
|
|
|
203
204
|
return fig
|
|
204
205
|
|
|
206
|
+
# ...def plot_stacked_bar_chart(...)
|
|
207
|
+
|
|
205
208
|
|
|
206
209
|
def calibration_ece(true_scores, pred_scores, num_bins):
|
|
207
210
|
r"""
|
|
@@ -245,10 +248,17 @@ def calibration_ece(true_scores, pred_scores, num_bins):
|
|
|
245
248
|
ece = np.abs(accs - confs) @ weights
|
|
246
249
|
return accs, confs, ece
|
|
247
250
|
|
|
251
|
+
# ...def calibration_ece(...)
|
|
252
|
+
|
|
248
253
|
|
|
249
|
-
def plot_calibration_curve(true_scores,
|
|
250
|
-
|
|
251
|
-
|
|
254
|
+
def plot_calibration_curve(true_scores,
|
|
255
|
+
pred_scores,
|
|
256
|
+
num_bins,
|
|
257
|
+
name='calibration',
|
|
258
|
+
plot_perf=True,
|
|
259
|
+
plot_hist=True,
|
|
260
|
+
ax=None,
|
|
261
|
+
**fig_kwargs):
|
|
252
262
|
"""
|
|
253
263
|
Plots a calibration curve.
|
|
254
264
|
|
|
@@ -295,3 +305,5 @@ def plot_calibration_curve(true_scores, pred_scores, num_bins,
|
|
|
295
305
|
fig.legend(loc='upper left', bbox_to_anchor=(0.15, 0.85))
|
|
296
306
|
|
|
297
307
|
return ax.figure
|
|
308
|
+
|
|
309
|
+
# ...def plot_calibration_curve(...)
|
|
@@ -185,6 +185,9 @@ def render_images_with_thumbnails(
|
|
|
185
185
|
# ...for each crop
|
|
186
186
|
|
|
187
187
|
# Write output image to disk
|
|
188
|
+
parent_dir = os.path.dirname(output_image_filename)
|
|
189
|
+
if len(parent_dir) > 0:
|
|
190
|
+
os.makedirs(parent_dir,exist_ok=True)
|
|
188
191
|
output_image.save(output_image_filename)
|
|
189
192
|
|
|
190
193
|
# ...def render_images_with_thumbnails(...)
|
|
@@ -1272,7 +1272,7 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
|
|
|
1272
1272
|
if crop_size[0] > 0 or crop_size[1] > 0:
|
|
1273
1273
|
|
|
1274
1274
|
assert (crop_size[0] + crop_size[1]) < 1.0, \
|
|
1275
|
-
|
|
1275
|
+
'Illegal crop size: {}'.format(str(crop_size))
|
|
1276
1276
|
|
|
1277
1277
|
top_crop_pixels = int(image.height * crop_size[0])
|
|
1278
1278
|
bottom_crop_pixels = int(image.height * crop_size[1])
|
|
@@ -1391,7 +1391,9 @@ def _resize_absolute_image(input_output_files,
|
|
|
1391
1391
|
status = 'error'
|
|
1392
1392
|
error = str(e)
|
|
1393
1393
|
|
|
1394
|
-
return {'input_fn':input_fn_abs,
|
|
1394
|
+
return {'input_fn':input_fn_abs,
|
|
1395
|
+
'output_fn':output_fn_abs,
|
|
1396
|
+
'status':status,
|
|
1395
1397
|
'error':error}
|
|
1396
1398
|
|
|
1397
1399
|
# ..._resize_absolute_image(...)
|
|
@@ -1460,6 +1462,7 @@ def resize_images(input_file_to_output_file,
|
|
|
1460
1462
|
pool = None
|
|
1461
1463
|
|
|
1462
1464
|
try:
|
|
1465
|
+
|
|
1463
1466
|
if pool_type == 'thread':
|
|
1464
1467
|
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1465
1468
|
else:
|
|
@@ -1477,10 +1480,13 @@ def resize_images(input_file_to_output_file,
|
|
|
1477
1480
|
quality=quality)
|
|
1478
1481
|
|
|
1479
1482
|
results = list(tqdm(pool.imap(p, input_output_file_pairs),total=len(input_output_file_pairs)))
|
|
1483
|
+
|
|
1480
1484
|
finally:
|
|
1481
|
-
|
|
1482
|
-
pool
|
|
1483
|
-
|
|
1485
|
+
|
|
1486
|
+
if pool is not None:
|
|
1487
|
+
pool.close()
|
|
1488
|
+
pool.join()
|
|
1489
|
+
print('Pool closed and joined for image resizing')
|
|
1484
1490
|
|
|
1485
1491
|
return results
|
|
1486
1492
|
|
|
@@ -1680,8 +1686,13 @@ def parallel_get_image_sizes(filenames,
|
|
|
1680
1686
|
else:
|
|
1681
1687
|
pool = Pool(n_workers)
|
|
1682
1688
|
|
|
1683
|
-
|
|
1684
|
-
|
|
1689
|
+
try:
|
|
1690
|
+
results = list(tqdm(pool.imap(
|
|
1691
|
+
partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1692
|
+
finally:
|
|
1693
|
+
pool.close()
|
|
1694
|
+
pool.join()
|
|
1695
|
+
print('Pool closed and joined for image size retrieval')
|
|
1685
1696
|
|
|
1686
1697
|
assert len(filenames) == len(results), 'Internal error in parallel_get_image_sizes'
|
|
1687
1698
|
|
|
@@ -102,10 +102,6 @@ class DbVizOptions:
|
|
|
102
102
|
#: :meta private:
|
|
103
103
|
self.multiple_categories_tag = '*multiple*'
|
|
104
104
|
|
|
105
|
-
#: We sometimes flatten image directories by replacing a path separator with
|
|
106
|
-
#: another character. Leave blank for the typical case where this isn't necessary.
|
|
107
|
-
self.pathsep_replacement = '' # '~'
|
|
108
|
-
|
|
109
105
|
#: Parallelize rendering across multiple workers
|
|
110
106
|
self.parallelize_rendering = False
|
|
111
107
|
|
|
@@ -141,24 +137,12 @@ class DbVizOptions:
|
|
|
141
137
|
self.confidence_threshold = None
|
|
142
138
|
|
|
143
139
|
|
|
144
|
-
#%% Helper functions
|
|
145
|
-
|
|
146
|
-
def _image_filename_to_path(image_file_name, image_base_dir, pathsep_replacement=''):
|
|
147
|
-
"""
|
|
148
|
-
Translates the file name in an image entry in the json database to a path, possibly doing
|
|
149
|
-
some manipulation of path separators.
|
|
150
|
-
"""
|
|
151
|
-
|
|
152
|
-
if len(pathsep_replacement) > 0:
|
|
153
|
-
image_file_name = os.path.normpath(image_file_name).replace(os.pathsep,pathsep_replacement)
|
|
154
|
-
return os.path.join(image_base_dir, image_file_name)
|
|
155
|
-
|
|
156
|
-
|
|
157
140
|
#%% Core functions
|
|
158
141
|
|
|
159
142
|
def visualize_db(db_path, output_dir, image_base_dir, options=None):
|
|
160
143
|
"""
|
|
161
|
-
Writes images and html to output_dir to visualize the annotations in a
|
|
144
|
+
Writes images and html to output_dir to visualize the images and annotations in a
|
|
145
|
+
COCO-formatted .json file.
|
|
162
146
|
|
|
163
147
|
Args:
|
|
164
148
|
db_path (str or dict): the .json filename to load, or a previously-loaded database
|
|
@@ -176,9 +160,11 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
|
|
|
176
160
|
|
|
177
161
|
# Consistency checking for fields with specific format requirements
|
|
178
162
|
|
|
179
|
-
#
|
|
163
|
+
# These should be a lists, but if someone specifies a string, do a reasonable thing
|
|
180
164
|
if isinstance(options.extra_image_fields_to_print,str):
|
|
181
165
|
options.extra_image_fields_to_print = [options.extra_image_fields_to_print]
|
|
166
|
+
if isinstance(options.extra_annotation_fields_to_print,str):
|
|
167
|
+
options.extra_annotation_fields_to_print = [options.extra_annotation_fields_to_print]
|
|
182
168
|
|
|
183
169
|
if not options.parallelize_rendering_with_threads:
|
|
184
170
|
print('Warning: process-based parallelization is not yet supported by visualize_db')
|
|
@@ -196,7 +182,7 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
|
|
|
196
182
|
assert(os.path.isfile(db_path))
|
|
197
183
|
print('Loading database from {}...'.format(db_path))
|
|
198
184
|
image_db = json.load(open(db_path))
|
|
199
|
-
print('...done')
|
|
185
|
+
print('...done, loaded {} images'.format(len(image_db['images'])))
|
|
200
186
|
elif isinstance(db_path,dict):
|
|
201
187
|
print('Using previously-loaded DB')
|
|
202
188
|
image_db = db_path
|
|
@@ -312,8 +298,7 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
|
|
|
312
298
|
if image_base_dir.startswith('http'):
|
|
313
299
|
img_path = image_base_dir + img_relative_path
|
|
314
300
|
else:
|
|
315
|
-
img_path = os.path.join(image_base_dir,
|
|
316
|
-
_image_filename_to_path(img_relative_path, image_base_dir))
|
|
301
|
+
img_path = os.path.join(image_base_dir,img_relative_path).replace('\\','/')
|
|
317
302
|
|
|
318
303
|
annos_i = df_anno.loc[df_anno['image_id'] == img_id, :] # all annotations on this image
|
|
319
304
|
|
|
@@ -407,7 +392,8 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
|
|
|
407
392
|
img_id_string = str(img_id).lower()
|
|
408
393
|
file_name = '{}_gt.jpg'.format(os.path.splitext(img_id_string)[0])
|
|
409
394
|
|
|
410
|
-
# Replace characters that muck up image links
|
|
395
|
+
# Replace characters that muck up image links, including flattening file
|
|
396
|
+
# separators.
|
|
411
397
|
illegal_characters = ['/','\\',':','\t','#',' ','%']
|
|
412
398
|
for c in illegal_characters:
|
|
413
399
|
file_name = file_name.replace(c,'~')
|
|
@@ -625,9 +611,6 @@ def main():
|
|
|
625
611
|
help='Only include images with bounding boxes (defaults to false)')
|
|
626
612
|
parser.add_argument('--random_seed', action='store', type=int, default=None,
|
|
627
613
|
help='Random seed for image selection')
|
|
628
|
-
parser.add_argument('--pathsep_replacement', action='store', type=str, default='',
|
|
629
|
-
help='Replace path separators in relative filenames with another ' + \
|
|
630
|
-
'character (frequently ~)')
|
|
631
614
|
|
|
632
615
|
if len(sys.argv[1:]) == 0:
|
|
633
616
|
parser.print_help()
|