megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (80) hide show
  1. megadetector/data_management/animl_to_md.py +5 -2
  2. megadetector/data_management/cct_json_utils.py +4 -2
  3. megadetector/data_management/cct_to_md.py +5 -4
  4. megadetector/data_management/cct_to_wi.py +5 -1
  5. megadetector/data_management/coco_to_yolo.py +3 -2
  6. megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
  7. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  8. megadetector/data_management/databases/subset_json_db.py +0 -3
  9. megadetector/data_management/generate_crops_from_cct.py +6 -4
  10. megadetector/data_management/get_image_sizes.py +5 -35
  11. megadetector/data_management/labelme_to_coco.py +10 -6
  12. megadetector/data_management/labelme_to_yolo.py +19 -28
  13. megadetector/data_management/lila/create_lila_test_set.py +22 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
  15. megadetector/data_management/lila/lila_common.py +2 -2
  16. megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
  17. megadetector/data_management/ocr_tools.py +6 -10
  18. megadetector/data_management/read_exif.py +59 -16
  19. megadetector/data_management/remap_coco_categories.py +1 -1
  20. megadetector/data_management/remove_exif.py +10 -5
  21. megadetector/data_management/rename_images.py +20 -13
  22. megadetector/data_management/resize_coco_dataset.py +10 -4
  23. megadetector/data_management/speciesnet_to_md.py +3 -3
  24. megadetector/data_management/yolo_output_to_md_output.py +3 -1
  25. megadetector/data_management/yolo_to_coco.py +28 -19
  26. megadetector/detection/change_detection.py +26 -18
  27. megadetector/detection/process_video.py +1 -1
  28. megadetector/detection/pytorch_detector.py +5 -5
  29. megadetector/detection/run_detector.py +34 -10
  30. megadetector/detection/run_detector_batch.py +2 -1
  31. megadetector/detection/run_inference_with_yolov5_val.py +3 -1
  32. megadetector/detection/run_md_and_speciesnet.py +215 -101
  33. megadetector/detection/run_tiled_inference.py +7 -7
  34. megadetector/detection/tf_detector.py +1 -1
  35. megadetector/detection/video_utils.py +9 -6
  36. megadetector/postprocessing/add_max_conf.py +4 -4
  37. megadetector/postprocessing/categorize_detections_by_size.py +3 -2
  38. megadetector/postprocessing/classification_postprocessing.py +7 -8
  39. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  40. megadetector/postprocessing/compare_batch_results.py +49 -27
  41. megadetector/postprocessing/convert_output_format.py +8 -6
  42. megadetector/postprocessing/create_crop_folder.py +13 -4
  43. megadetector/postprocessing/generate_csv_report.py +22 -8
  44. megadetector/postprocessing/load_api_results.py +8 -4
  45. megadetector/postprocessing/md_to_coco.py +2 -3
  46. megadetector/postprocessing/md_to_labelme.py +12 -8
  47. megadetector/postprocessing/md_to_wi.py +2 -1
  48. megadetector/postprocessing/merge_detections.py +4 -6
  49. megadetector/postprocessing/postprocess_batch_results.py +4 -3
  50. megadetector/postprocessing/remap_detection_categories.py +6 -3
  51. megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
  52. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  53. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
  54. megadetector/postprocessing/separate_detections_into_folders.py +10 -4
  55. megadetector/postprocessing/subset_json_detector_output.py +1 -1
  56. megadetector/postprocessing/top_folders_to_bottom.py +22 -7
  57. megadetector/postprocessing/validate_batch_results.py +1 -1
  58. megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
  59. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  60. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
  61. megadetector/taxonomy_mapping/species_lookup.py +51 -2
  62. megadetector/utils/ct_utils.py +9 -4
  63. megadetector/utils/extract_frames_from_video.py +4 -0
  64. megadetector/utils/gpu_test.py +6 -6
  65. megadetector/utils/md_tests.py +21 -21
  66. megadetector/utils/path_utils.py +112 -44
  67. megadetector/utils/split_locations_into_train_val.py +0 -4
  68. megadetector/utils/url_utils.py +5 -3
  69. megadetector/utils/wi_taxonomy_utils.py +37 -8
  70. megadetector/utils/write_html_image_list.py +1 -2
  71. megadetector/visualization/plot_utils.py +31 -19
  72. megadetector/visualization/render_images_with_thumbnails.py +3 -0
  73. megadetector/visualization/visualization_utils.py +18 -7
  74. megadetector/visualization/visualize_db.py +9 -26
  75. megadetector/visualization/visualize_video_output.py +14 -2
  76. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
  77. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/RECORD +80 -80
  78. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
  79. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
  80. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
@@ -152,7 +152,6 @@ def folder_list(base_dir,
152
152
  folders = []
153
153
 
154
154
  if recursive:
155
- folders = []
156
155
  for root, dirs, _ in os.walk(base_dir):
157
156
  for d in dirs:
158
157
  folders.append(os.path.join(root, d))
@@ -370,7 +369,9 @@ def safe_create_link(link_exists,link_new):
370
369
  os.remove(link_new)
371
370
  os.symlink(link_exists,link_new)
372
371
  else:
373
- os.makedirs(os.path.dirname(link_new),exist_ok=True)
372
+ link_new_dir = os.path.dirname(link_new)
373
+ if len(link_new_dir) > 0:
374
+ os.makedirs(link_new_dir,exist_ok=True)
374
375
  os.symlink(link_exists,link_new)
375
376
 
376
377
  # ...def safe_create_link(...)
@@ -988,7 +989,9 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
988
989
  if verbose:
989
990
  print('{} to {}'.format(action_string,target_fn))
990
991
 
991
- os.makedirs(os.path.dirname(target_fn),exist_ok=True)
992
+ target_dir = os.path.dirname(target_fn)
993
+ if len(target_dir) > 0:
994
+ os.makedirs(target_dir,exist_ok=True)
992
995
  if move:
993
996
  shutil.move(source_fn, target_fn)
994
997
  else:
@@ -1038,10 +1041,11 @@ def parallel_copy_files(input_file_to_output_file,
1038
1041
  input_output_tuples)):
1039
1042
  pbar.update()
1040
1043
  finally:
1041
- pool.close()
1042
- pool.join()
1043
- if verbose:
1044
- print("Pool closed and joined parallel file copying")
1044
+ if pool is not None:
1045
+ pool.close()
1046
+ pool.join()
1047
+ if verbose:
1048
+ print("Pool closed and joined parallel file copying")
1045
1049
 
1046
1050
  # ...def parallel_copy_files(...)
1047
1051
 
@@ -1100,15 +1104,24 @@ def parallel_delete_files(input_files,
1100
1104
 
1101
1105
  n_workers = min(max_workers, len(input_files))
1102
1106
 
1103
- if use_threads:
1104
- pool = ThreadPool(n_workers)
1105
- else:
1106
- pool = Pool(n_workers)
1107
+ pool = None
1107
1108
 
1108
- with tqdm(total=len(input_files)) as pbar:
1109
- for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
1110
- input_files)):
1111
- pbar.update()
1109
+ try:
1110
+ if use_threads:
1111
+ pool = ThreadPool(n_workers)
1112
+ else:
1113
+ pool = Pool(n_workers)
1114
+
1115
+ with tqdm(total=len(input_files)) as pbar:
1116
+ for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
1117
+ input_files)):
1118
+ pbar.update()
1119
+ finally:
1120
+ if pool is not None:
1121
+ pool.close()
1122
+ pool.join()
1123
+ if verbose:
1124
+ print('Pool closed and joined for file deletion')
1112
1125
 
1113
1126
  # ...def parallel_delete_files(...)
1114
1127
 
@@ -1185,8 +1198,6 @@ def parallel_get_file_sizes(filenames,
1185
1198
  dict: dictionary mapping filenames to file sizes in bytes
1186
1199
  """
1187
1200
 
1188
- n_workers = min(max_workers,len(filenames))
1189
-
1190
1201
  folder_name = None
1191
1202
 
1192
1203
  if isinstance(filenames,str):
@@ -1204,23 +1215,37 @@ def parallel_get_file_sizes(filenames,
1204
1215
 
1205
1216
  assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
1206
1217
 
1218
+ n_workers = min(max_workers,len(filenames))
1219
+
1207
1220
  if verbose:
1208
1221
  print('Creating worker pool')
1209
1222
 
1210
- if use_threads:
1211
- pool_string = 'thread'
1212
- pool = ThreadPool(n_workers)
1213
- else:
1214
- pool_string = 'process'
1215
- pool = Pool(n_workers)
1223
+ pool = None
1216
1224
 
1217
- if verbose:
1218
- print('Created a {} pool of {} workers'.format(
1219
- pool_string,n_workers))
1225
+ try:
1220
1226
 
1221
- # This returns (filename,size) tuples
1222
- get_size_results = list(tqdm(pool.imap(
1223
- partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
1227
+ if use_threads:
1228
+ pool_string = 'thread'
1229
+ pool = ThreadPool(n_workers)
1230
+ else:
1231
+ pool_string = 'process'
1232
+ pool = Pool(n_workers)
1233
+
1234
+ if verbose:
1235
+ print('Created a {} pool of {} workers'.format(
1236
+ pool_string,n_workers))
1237
+
1238
+ # This returns (filename,size) tuples
1239
+ get_size_results = list(tqdm(pool.imap(
1240
+ partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
1241
+
1242
+ finally:
1243
+
1244
+ if pool is not None:
1245
+ pool.close()
1246
+ pool.join()
1247
+ if verbose:
1248
+ print('Pool closed and join for file size collection')
1224
1249
 
1225
1250
  to_return = {}
1226
1251
  for r in get_size_results:
@@ -1275,6 +1300,8 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_
1275
1300
 
1276
1301
  return output_fn
1277
1302
 
1303
+ # ...def zip_file(...)
1304
+
1278
1305
 
1279
1306
  def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
1280
1307
  overwrite=False, verbose=False, mode='x'):
@@ -1315,6 +1342,8 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
1315
1342
 
1316
1343
  return output_fn
1317
1344
 
1345
+ # ...def add_files_to_single_tar_file(...)
1346
+
1318
1347
 
1319
1348
  def zip_files_into_single_zipfile(input_files,
1320
1349
  output_fn,
@@ -1359,6 +1388,8 @@ def zip_files_into_single_zipfile(input_files,
1359
1388
 
1360
1389
  return output_fn
1361
1390
 
1391
+ # ...def zip_files_into_single_zipfile(...)
1392
+
1362
1393
 
1363
1394
  def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
1364
1395
  """
@@ -1382,7 +1413,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
1382
1413
  if not overwrite:
1383
1414
  if os.path.isfile(output_fn):
1384
1415
  print('Zip file {} exists, skipping'.format(output_fn))
1385
- return
1416
+ return output_fn
1386
1417
 
1387
1418
  if verbose:
1388
1419
  print('Zipping {} to {} (compression level {})'.format(
@@ -1400,6 +1431,8 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
1400
1431
 
1401
1432
  return output_fn
1402
1433
 
1434
+ # ...def zip_folder(...)
1435
+
1403
1436
 
1404
1437
  def parallel_zip_files(input_files,
1405
1438
  max_workers=16,
@@ -1428,11 +1461,22 @@ def parallel_zip_files(input_files,
1428
1461
  else:
1429
1462
  pool = Pool(n_workers)
1430
1463
 
1431
- with tqdm(total=len(input_files)) as pbar:
1432
- for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
1433
- output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
1434
- input_files)):
1435
- pbar.update()
1464
+ try:
1465
+
1466
+ with tqdm(total=len(input_files)) as pbar:
1467
+ for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
1468
+ output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
1469
+ input_files)):
1470
+ pbar.update()
1471
+
1472
+ finally:
1473
+
1474
+ pool.close()
1475
+ pool.join()
1476
+ if verbose:
1477
+ print('Pool closed and joined for parallel zipping')
1478
+
1479
+ # ...def parallel_zip_files(...)
1436
1480
 
1437
1481
 
1438
1482
  def parallel_zip_folders(input_folders,
@@ -1462,12 +1506,23 @@ def parallel_zip_folders(input_folders,
1462
1506
  else:
1463
1507
  pool = Pool(n_workers)
1464
1508
 
1465
- with tqdm(total=len(input_folders)) as pbar:
1466
- for i,_ in enumerate(pool.imap_unordered(
1467
- partial(zip_folder,overwrite=overwrite,
1468
- compress_level=compress_level,verbose=verbose),
1469
- input_folders)):
1470
- pbar.update()
1509
+ try:
1510
+
1511
+ with tqdm(total=len(input_folders)) as pbar:
1512
+ for i,_ in enumerate(pool.imap_unordered(
1513
+ partial(zip_folder,overwrite=overwrite,
1514
+ compress_level=compress_level,verbose=verbose),
1515
+ input_folders)):
1516
+ pbar.update()
1517
+
1518
+ finally:
1519
+
1520
+ pool.close()
1521
+ pool.join()
1522
+ if verbose:
1523
+ print('Pool closed and joined for parallel folder zipping')
1524
+
1525
+ # ...def parallel_zip_folders(...)
1471
1526
 
1472
1527
 
1473
1528
  def zip_each_file_in_folder(folder_name,
@@ -1510,6 +1565,8 @@ def zip_each_file_in_folder(folder_name,
1510
1565
  use_threads=use_threads,compress_level=compress_level,
1511
1566
  overwrite=overwrite,verbose=verbose)
1512
1567
 
1568
+ # ...def zip_each_file_in_folder(...)
1569
+
1513
1570
 
1514
1571
  def unzip_file(input_file, output_folder=None):
1515
1572
  """
@@ -1617,9 +1674,20 @@ def parallel_compute_file_hashes(filenames,
1617
1674
  else:
1618
1675
  pool = Pool(n_workers)
1619
1676
 
1620
- results = list(tqdm(pool.imap(
1621
- partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1622
- filenames), total=len(filenames)))
1677
+ try:
1678
+
1679
+ results = list(tqdm(pool.imap(
1680
+ partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1681
+ filenames), total=len(filenames)))
1682
+
1683
+ finally:
1684
+
1685
+ pool.close()
1686
+ pool.join()
1687
+ if verbose:
1688
+ print('Pool closed and joined for parallel zipping')
1689
+
1690
+ # ...if we are/aren't parallelizing
1623
1691
 
1624
1692
  assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
1625
1693
 
@@ -221,14 +221,10 @@ def split_locations_into_train_val(location_to_category_counts,
221
221
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
222
222
  compute_seed_errors(min_error_seed)
223
223
 
224
- random_seed = min_error_seed
225
-
226
- category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
227
224
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
228
225
  sort_values=category_id_to_count,
229
226
  reverse=True)
230
227
 
231
-
232
228
  print('Val fractions by category:\n')
233
229
 
234
230
  for category in category_to_val_fraction:
@@ -132,7 +132,8 @@ def download_url(url,
132
132
  if verbose:
133
133
  print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
134
134
  target_dir = os.path.dirname(destination_filename)
135
- os.makedirs(target_dir,exist_ok=True)
135
+ if len(target_dir) > 0:
136
+ os.makedirs(target_dir,exist_ok=True)
136
137
  urllib.request.urlretrieve(url, destination_filename, progress_updater)
137
138
  assert(os.path.isfile(destination_filename))
138
139
  n_bytes = os.path.getsize(destination_filename)
@@ -800,8 +801,9 @@ class TestUrlUtils:
800
801
  def _test_url_utils():
801
802
  """
802
803
  Runs all tests in the TestUrlUtils class. I generally disable this during testing
803
- because it creates irritating nondeterminism, and this is neither a core module nor
804
- a module that changes often.
804
+ because it creates irritating nondeterminism (because it depends on downloading
805
+ stuff from the Internet), and this is neither a core module nor a module that changes
806
+ often.
805
807
  """
806
808
 
807
809
  test_instance = TestUrlUtils()
@@ -311,7 +311,8 @@ def taxonomy_info_to_taxonomy_string(taxonomy_info, include_taxon_id_and_common_
311
311
  def generate_whole_image_detections_for_classifications(classifications_json_file,
312
312
  detections_json_file,
313
313
  ensemble_json_file=None,
314
- ignore_blank_classifications=True):
314
+ ignore_blank_classifications=True,
315
+ verbose=True):
315
316
  """
316
317
  Given a set of classification results in SpeciesNet format that were likely run on
317
318
  already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
@@ -324,6 +325,7 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
324
325
  and classfications
325
326
  ignore_blank_classifications (bool, optional): use non-top classifications when
326
327
  the top classification is "blank" or "no CV result"
328
+ verbose (bool, optional): enable additional debug output
327
329
 
328
330
  Returns:
329
331
  dict: the contents of [detections_json_file]
@@ -336,16 +338,37 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
336
338
  output_predictions = []
337
339
  ensemble_predictions = []
338
340
 
339
- # prediction = predictions[0]
340
- for prediction in predictions:
341
+ # i_prediction = 0; prediction = predictions[i_prediction]
342
+ for i_prediction,prediction in enumerate(predictions):
341
343
 
342
344
  output_prediction = {}
343
345
  output_prediction['filepath'] = prediction['filepath']
344
346
  i_score = 0
347
+
345
348
  if ignore_blank_classifications:
349
+
346
350
  while (prediction['classifications']['classes'][i_score] in \
347
351
  (blank_prediction_string,no_cv_result_prediction_string)):
352
+
348
353
  i_score += 1
354
+ if (i_score >= len(prediction['classifications']['classes'])):
355
+
356
+ if verbose:
357
+
358
+ print('Ignoring blank classifications, but ' + \
359
+ 'image {} has no non-blank values'.format(
360
+ i_prediction))
361
+
362
+ # Just use the first one
363
+ i_score = 0
364
+ break
365
+
366
+ # ...if we passed the last prediction
367
+
368
+ # ...iterate over classes within this prediction
369
+
370
+ # ...if we're supposed to ignore blank classifications
371
+
349
372
  top_classification = prediction['classifications']['classes'][i_score]
350
373
  top_classification_score = prediction['classifications']['scores'][i_score]
351
374
  if is_animal_classification(top_classification):
@@ -450,8 +473,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
450
473
 
451
474
  # Round floating-point values (confidence scores, coordinates) to a
452
475
  # reasonable number of decimal places
453
- if max_decimals is not None and max_decimals > 0:
454
- round_floats_in_nested_dict(predictions)
476
+ if (max_decimals is not None) and (max_decimals > 0):
477
+ round_floats_in_nested_dict(predictions, decimal_places=max_decimals)
455
478
 
456
479
  predictions = predictions['predictions']
457
480
  assert isinstance(predictions,list)
@@ -714,7 +737,9 @@ def generate_predictions_json_from_md_results(md_results_file,
714
737
 
715
738
  # ...for each image
716
739
 
717
- os.makedirs(os.path.dirname(predictions_json_file),exist_ok=True)
740
+ output_dir = os.path.dirname(predictions_json_file)
741
+ if len(output_dir) > 0:
742
+ os.makedirs(output_dir,exist_ok=True)
718
743
  with open(predictions_json_file,'w') as f:
719
744
  json.dump(output_dict,f,indent=1)
720
745
 
@@ -788,7 +813,9 @@ def generate_instances_json_from_folder(folder,
788
813
  to_return = {'instances':instances}
789
814
 
790
815
  if output_file is not None:
791
- os.makedirs(os.path.dirname(output_file),exist_ok=True)
816
+ output_dir = os.path.dirname(output_file)
817
+ if len(output_dir) > 0:
818
+ os.makedirs(output_dir,exist_ok=True)
792
819
  with open(output_file,'w') as f:
793
820
  json.dump(to_return,f,indent=1)
794
821
 
@@ -870,7 +897,9 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
870
897
 
871
898
  output_dict = {'predictions':predictions}
872
899
 
873
- os.makedirs(os.path.dirname(output_prediction_file),exist_ok=True)
900
+ output_dir = os.path.dirname(output_prediction_file)
901
+ if len(output_dir) > 0:
902
+ os.makedirs(output_dir,exist_ok=True)
874
903
  with open(output_prediction_file,'w') as f:
875
904
  json.dump(output_dict,f,indent=1)
876
905
 
@@ -110,7 +110,6 @@ def write_html_image_list(filename=None,images=None,options=None):
110
110
  if 'linkTarget' not in image_info:
111
111
  image_info['linkTarget'] = ''
112
112
  if 'textStyle' not in image_info:
113
- text_style = options['defaultTextStyle']
114
113
  image_info['textStyle'] = options['defaultTextStyle']
115
114
  images[i_image] = image_info
116
115
 
@@ -185,7 +184,7 @@ def write_html_image_list(filename=None,images=None,options=None):
185
184
  if len(options['pageTitle']) > 0:
186
185
  title_string = '<title>{}</title>'.format(options['pageTitle'])
187
186
 
188
- f_html.write('<html>{}<body>\n'.format(title_string))
187
+ f_html.write('<html><head>{}</head><body>\n'.format(title_string))
189
188
 
190
189
  f_html.write(options['headerHtml'])
191
190
 
@@ -126,19 +126,16 @@ def plot_precision_recall_curve(precisions,
126
126
  ax.step(recalls, precisions, color='b', alpha=0.2, where='post')
127
127
  ax.fill_between(recalls, precisions, alpha=0.2, color='b', step='post')
128
128
 
129
- try:
130
- ax.set(x_label='Recall', y_label='Precision', title=title)
131
- ax.set(x_lim=xlim, y_lim=ylim)
132
- #
133
- except Exception:
134
- ax.set_xlabel('Recall')
135
- ax.set_ylabel('Precision')
136
- ax.set_title(title)
137
- ax.set_xlim(xlim[0],xlim[1])
138
- ax.set_ylim(ylim[0],ylim[1])
129
+ ax.set_xlabel('Recall')
130
+ ax.set_ylabel('Precision')
131
+ ax.set_title(title)
132
+ ax.set_xlim(xlim[0],xlim[1])
133
+ ax.set_ylim(ylim[0],ylim[1])
139
134
 
140
135
  return fig
141
136
 
137
+ # ...def plot_precision_recall_curve(...)
138
+
142
139
 
143
140
  def plot_stacked_bar_chart(data,
144
141
  series_labels=None,
@@ -174,17 +171,21 @@ def plot_stacked_bar_chart(data,
174
171
 
175
172
  # stacked bar charts are made with each segment starting from a y position
176
173
  cumulative_size = np.zeros(num_columns)
177
- for i, row_data in enumerate(data):
178
- ax.bar(ind, row_data, bottom=cumulative_size, label=series_labels[i],
179
- color=colors[i])
174
+ for i_row, row_data in enumerate(data):
175
+ if series_labels is None:
176
+ label = 'series_{}'.format(str(i_row).zfill(2))
177
+ else:
178
+ label = series_labels[i_row]
179
+ ax.bar(ind, row_data, bottom=cumulative_size, label=label,
180
+ color=colors[i_row])
180
181
  cumulative_size += row_data
181
182
 
182
- if col_labels and len(col_labels) < 25:
183
+ if (col_labels is not None) and (len(col_labels) < 25):
183
184
  ax.set_xticks(ind)
184
185
  ax.set_xticklabels(col_labels, rotation=90)
185
- elif col_labels:
186
+ elif (col_labels is not None):
186
187
  ax.set_xticks(list(range(0, len(col_labels), 20)))
187
- ax.set_xticklabels(col_labels, rotation=90)
188
+ ax.set_xticklabels(col_labels[::20], rotation=90)
188
189
 
189
190
  if x_label is not None:
190
191
  ax.set_xlabel(x_label)
@@ -202,6 +203,8 @@ def plot_stacked_bar_chart(data,
202
203
 
203
204
  return fig
204
205
 
206
+ # ...def plot_stacked_bar_chart(...)
207
+
205
208
 
206
209
  def calibration_ece(true_scores, pred_scores, num_bins):
207
210
  r"""
@@ -245,10 +248,17 @@ def calibration_ece(true_scores, pred_scores, num_bins):
245
248
  ece = np.abs(accs - confs) @ weights
246
249
  return accs, confs, ece
247
250
 
251
+ # ...def calibration_ece(...)
252
+
248
253
 
249
- def plot_calibration_curve(true_scores, pred_scores, num_bins,
250
- name='calibration', plot_perf=True, plot_hist=True,
251
- ax=None, **fig_kwargs):
254
+ def plot_calibration_curve(true_scores,
255
+ pred_scores,
256
+ num_bins,
257
+ name='calibration',
258
+ plot_perf=True,
259
+ plot_hist=True,
260
+ ax=None,
261
+ **fig_kwargs):
252
262
  """
253
263
  Plots a calibration curve.
254
264
 
@@ -295,3 +305,5 @@ def plot_calibration_curve(true_scores, pred_scores, num_bins,
295
305
  fig.legend(loc='upper left', bbox_to_anchor=(0.15, 0.85))
296
306
 
297
307
  return ax.figure
308
+
309
+ # ...def plot_calibration_curve(...)
@@ -185,6 +185,9 @@ def render_images_with_thumbnails(
185
185
  # ...for each crop
186
186
 
187
187
  # Write output image to disk
188
+ parent_dir = os.path.dirname(output_image_filename)
189
+ if len(parent_dir) > 0:
190
+ os.makedirs(parent_dir,exist_ok=True)
188
191
  output_image.save(output_image_filename)
189
192
 
190
193
  # ...def render_images_with_thumbnails(...)
@@ -1272,7 +1272,7 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
1272
1272
  if crop_size[0] > 0 or crop_size[1] > 0:
1273
1273
 
1274
1274
  assert (crop_size[0] + crop_size[1]) < 1.0, \
1275
- print('Illegal crop size: {}'.format(str(crop_size)))
1275
+ 'Illegal crop size: {}'.format(str(crop_size))
1276
1276
 
1277
1277
  top_crop_pixels = int(image.height * crop_size[0])
1278
1278
  bottom_crop_pixels = int(image.height * crop_size[1])
@@ -1391,7 +1391,9 @@ def _resize_absolute_image(input_output_files,
1391
1391
  status = 'error'
1392
1392
  error = str(e)
1393
1393
 
1394
- return {'input_fn':input_fn_abs,'output_fn':output_fn_abs,status:'status',
1394
+ return {'input_fn':input_fn_abs,
1395
+ 'output_fn':output_fn_abs,
1396
+ 'status':status,
1395
1397
  'error':error}
1396
1398
 
1397
1399
  # ..._resize_absolute_image(...)
@@ -1460,6 +1462,7 @@ def resize_images(input_file_to_output_file,
1460
1462
  pool = None
1461
1463
 
1462
1464
  try:
1465
+
1463
1466
  if pool_type == 'thread':
1464
1467
  pool = ThreadPool(n_workers); poolstring = 'threads'
1465
1468
  else:
@@ -1477,10 +1480,13 @@ def resize_images(input_file_to_output_file,
1477
1480
  quality=quality)
1478
1481
 
1479
1482
  results = list(tqdm(pool.imap(p, input_output_file_pairs),total=len(input_output_file_pairs)))
1483
+
1480
1484
  finally:
1481
- pool.close()
1482
- pool.join()
1483
- print("Pool closed and joined for image resizing")
1485
+
1486
+ if pool is not None:
1487
+ pool.close()
1488
+ pool.join()
1489
+ print('Pool closed and joined for image resizing')
1484
1490
 
1485
1491
  return results
1486
1492
 
@@ -1680,8 +1686,13 @@ def parallel_get_image_sizes(filenames,
1680
1686
  else:
1681
1687
  pool = Pool(n_workers)
1682
1688
 
1683
- results = list(tqdm(pool.imap(
1684
- partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
1689
+ try:
1690
+ results = list(tqdm(pool.imap(
1691
+ partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
1692
+ finally:
1693
+ pool.close()
1694
+ pool.join()
1695
+ print('Pool closed and joined for image size retrieval')
1685
1696
 
1686
1697
  assert len(filenames) == len(results), 'Internal error in parallel_get_image_sizes'
1687
1698
 
@@ -102,10 +102,6 @@ class DbVizOptions:
102
102
  #: :meta private:
103
103
  self.multiple_categories_tag = '*multiple*'
104
104
 
105
- #: We sometimes flatten image directories by replacing a path separator with
106
- #: another character. Leave blank for the typical case where this isn't necessary.
107
- self.pathsep_replacement = '' # '~'
108
-
109
105
  #: Parallelize rendering across multiple workers
110
106
  self.parallelize_rendering = False
111
107
 
@@ -141,24 +137,12 @@ class DbVizOptions:
141
137
  self.confidence_threshold = None
142
138
 
143
139
 
144
- #%% Helper functions
145
-
146
- def _image_filename_to_path(image_file_name, image_base_dir, pathsep_replacement=''):
147
- """
148
- Translates the file name in an image entry in the json database to a path, possibly doing
149
- some manipulation of path separators.
150
- """
151
-
152
- if len(pathsep_replacement) > 0:
153
- image_file_name = os.path.normpath(image_file_name).replace(os.pathsep,pathsep_replacement)
154
- return os.path.join(image_base_dir, image_file_name)
155
-
156
-
157
140
  #%% Core functions
158
141
 
159
142
  def visualize_db(db_path, output_dir, image_base_dir, options=None):
160
143
  """
161
- Writes images and html to output_dir to visualize the annotations in a .json file.
144
+ Writes images and html to output_dir to visualize the images and annotations in a
145
+ COCO-formatted .json file.
162
146
 
163
147
  Args:
164
148
  db_path (str or dict): the .json filename to load, or a previously-loaded database
@@ -176,9 +160,11 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
176
160
 
177
161
  # Consistency checking for fields with specific format requirements
178
162
 
179
- # This should be a list, but if someone specifies a string, do a reasonable thing
163
+ # These should be a lists, but if someone specifies a string, do a reasonable thing
180
164
  if isinstance(options.extra_image_fields_to_print,str):
181
165
  options.extra_image_fields_to_print = [options.extra_image_fields_to_print]
166
+ if isinstance(options.extra_annotation_fields_to_print,str):
167
+ options.extra_annotation_fields_to_print = [options.extra_annotation_fields_to_print]
182
168
 
183
169
  if not options.parallelize_rendering_with_threads:
184
170
  print('Warning: process-based parallelization is not yet supported by visualize_db')
@@ -196,7 +182,7 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
196
182
  assert(os.path.isfile(db_path))
197
183
  print('Loading database from {}...'.format(db_path))
198
184
  image_db = json.load(open(db_path))
199
- print('...done')
185
+ print('...done, loaded {} images'.format(len(image_db['images'])))
200
186
  elif isinstance(db_path,dict):
201
187
  print('Using previously-loaded DB')
202
188
  image_db = db_path
@@ -312,8 +298,7 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
312
298
  if image_base_dir.startswith('http'):
313
299
  img_path = image_base_dir + img_relative_path
314
300
  else:
315
- img_path = os.path.join(image_base_dir,
316
- _image_filename_to_path(img_relative_path, image_base_dir))
301
+ img_path = os.path.join(image_base_dir,img_relative_path).replace('\\','/')
317
302
 
318
303
  annos_i = df_anno.loc[df_anno['image_id'] == img_id, :] # all annotations on this image
319
304
 
@@ -407,7 +392,8 @@ def visualize_db(db_path, output_dir, image_base_dir, options=None):
407
392
  img_id_string = str(img_id).lower()
408
393
  file_name = '{}_gt.jpg'.format(os.path.splitext(img_id_string)[0])
409
394
 
410
- # Replace characters that muck up image links
395
+ # Replace characters that muck up image links, including flattening file
396
+ # separators.
411
397
  illegal_characters = ['/','\\',':','\t','#',' ','%']
412
398
  for c in illegal_characters:
413
399
  file_name = file_name.replace(c,'~')
@@ -625,9 +611,6 @@ def main():
625
611
  help='Only include images with bounding boxes (defaults to false)')
626
612
  parser.add_argument('--random_seed', action='store', type=int, default=None,
627
613
  help='Random seed for image selection')
628
- parser.add_argument('--pathsep_replacement', action='store', type=str, default='',
629
- help='Replace path separators in relative filenames with another ' + \
630
- 'character (frequently ~)')
631
614
 
632
615
  if len(sys.argv[1:]) == 0:
633
616
  parser.print_help()