megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (84) hide show
  1. megadetector/data_management/animl_to_md.py +5 -2
  2. megadetector/data_management/cct_json_utils.py +4 -2
  3. megadetector/data_management/cct_to_md.py +5 -4
  4. megadetector/data_management/cct_to_wi.py +5 -1
  5. megadetector/data_management/coco_to_yolo.py +3 -2
  6. megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
  7. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  8. megadetector/data_management/databases/subset_json_db.py +0 -3
  9. megadetector/data_management/generate_crops_from_cct.py +6 -4
  10. megadetector/data_management/get_image_sizes.py +5 -35
  11. megadetector/data_management/labelme_to_coco.py +10 -6
  12. megadetector/data_management/labelme_to_yolo.py +19 -28
  13. megadetector/data_management/lila/create_lila_test_set.py +22 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
  15. megadetector/data_management/lila/lila_common.py +2 -2
  16. megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
  17. megadetector/data_management/ocr_tools.py +6 -10
  18. megadetector/data_management/read_exif.py +69 -13
  19. megadetector/data_management/remap_coco_categories.py +1 -1
  20. megadetector/data_management/remove_exif.py +10 -5
  21. megadetector/data_management/rename_images.py +20 -13
  22. megadetector/data_management/resize_coco_dataset.py +10 -4
  23. megadetector/data_management/speciesnet_to_md.py +3 -3
  24. megadetector/data_management/yolo_output_to_md_output.py +3 -1
  25. megadetector/data_management/yolo_to_coco.py +28 -19
  26. megadetector/detection/change_detection.py +26 -18
  27. megadetector/detection/process_video.py +1 -1
  28. megadetector/detection/pytorch_detector.py +5 -5
  29. megadetector/detection/run_detector.py +34 -10
  30. megadetector/detection/run_detector_batch.py +60 -42
  31. megadetector/detection/run_inference_with_yolov5_val.py +3 -1
  32. megadetector/detection/run_md_and_speciesnet.py +282 -110
  33. megadetector/detection/run_tiled_inference.py +7 -7
  34. megadetector/detection/tf_detector.py +4 -6
  35. megadetector/detection/video_utils.py +9 -6
  36. megadetector/postprocessing/add_max_conf.py +4 -4
  37. megadetector/postprocessing/categorize_detections_by_size.py +3 -2
  38. megadetector/postprocessing/classification_postprocessing.py +19 -21
  39. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  40. megadetector/postprocessing/compare_batch_results.py +49 -27
  41. megadetector/postprocessing/convert_output_format.py +8 -6
  42. megadetector/postprocessing/create_crop_folder.py +13 -4
  43. megadetector/postprocessing/generate_csv_report.py +22 -8
  44. megadetector/postprocessing/load_api_results.py +8 -4
  45. megadetector/postprocessing/md_to_coco.py +2 -3
  46. megadetector/postprocessing/md_to_labelme.py +12 -8
  47. megadetector/postprocessing/md_to_wi.py +2 -1
  48. megadetector/postprocessing/merge_detections.py +4 -6
  49. megadetector/postprocessing/postprocess_batch_results.py +4 -3
  50. megadetector/postprocessing/remap_detection_categories.py +6 -3
  51. megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
  52. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  53. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
  54. megadetector/postprocessing/separate_detections_into_folders.py +10 -4
  55. megadetector/postprocessing/subset_json_detector_output.py +1 -1
  56. megadetector/postprocessing/top_folders_to_bottom.py +22 -7
  57. megadetector/postprocessing/validate_batch_results.py +1 -1
  58. megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
  59. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  60. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
  61. megadetector/taxonomy_mapping/species_lookup.py +51 -2
  62. megadetector/utils/ct_utils.py +9 -4
  63. megadetector/utils/directory_listing.py +3 -0
  64. megadetector/utils/extract_frames_from_video.py +4 -0
  65. megadetector/utils/gpu_test.py +6 -6
  66. megadetector/utils/md_tests.py +21 -21
  67. megadetector/utils/path_utils.py +171 -36
  68. megadetector/utils/split_locations_into_train_val.py +0 -4
  69. megadetector/utils/string_utils.py +21 -0
  70. megadetector/utils/url_utils.py +5 -3
  71. megadetector/utils/wi_platform_utils.py +168 -24
  72. megadetector/utils/wi_taxonomy_utils.py +38 -8
  73. megadetector/utils/write_html_image_list.py +1 -2
  74. megadetector/visualization/plot_utils.py +31 -19
  75. megadetector/visualization/render_images_with_thumbnails.py +3 -0
  76. megadetector/visualization/visualization_utils.py +18 -7
  77. megadetector/visualization/visualize_db.py +9 -26
  78. megadetector/visualization/visualize_detector_output.py +1 -0
  79. megadetector/visualization/visualize_video_output.py +14 -2
  80. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
  81. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/RECORD +84 -84
  82. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
  83. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
  84. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
@@ -221,11 +221,13 @@ def frames_to_video(images, fs, output_file_name, codec_spec=default_fourcc):
221
221
  print('Warning: no frames to render')
222
222
  return
223
223
 
224
- os.makedirs(os.path.dirname(output_file_name),exist_ok=True)
224
+ output_dir = os.path.dirname(output_file_name)
225
+ if len(output_dir) > 0:
226
+ os.makedirs(output_dir, exist_ok=True)
225
227
 
226
228
  # Determine the width and height from the first image
227
229
  frame = cv2.imread(images[0])
228
- cv2.imshow('video',frame)
230
+ # cv2.imshow('video',frame)
229
231
  height, width, channels = frame.shape
230
232
 
231
233
  # Define the codec and create VideoWriter object
@@ -297,7 +299,7 @@ def _filename_to_frame_number(filename):
297
299
  try:
298
300
  frame_number = int(frame_number)
299
301
  except Exception:
300
- raise ValueError('Filename {} does contain a valid frame number'.format(filename))
302
+ raise ValueError('Filename {} does not contain a valid frame number'.format(filename))
301
303
 
302
304
  return frame_number
303
305
 
@@ -1059,9 +1061,10 @@ def video_folder_to_frames(input_folder,
1059
1061
 
1060
1062
  finally:
1061
1063
 
1062
- pool.close()
1063
- pool.join()
1064
- print('Pool closed and joined for video processing')
1064
+ if pool is not None:
1065
+ pool.close()
1066
+ pool.join()
1067
+ print('Pool closed and joined for video processing')
1065
1068
 
1066
1069
  # ...try/finally
1067
1070
 
@@ -18,7 +18,8 @@ import json
18
18
  import sys
19
19
  import argparse
20
20
 
21
- from megadetector.utils import ct_utils
21
+ from megadetector.utils.ct_utils import get_max_conf
22
+ from megadetector.utils.ct_utils import write_json
22
23
 
23
24
 
24
25
  #%% Main function
@@ -39,15 +40,14 @@ def add_max_conf(input_file,output_file):
39
40
 
40
41
  for im in d['images']:
41
42
 
42
- max_conf = ct_utils.get_max_conf(im)
43
+ max_conf = get_max_conf(im)
43
44
 
44
45
  if 'max_detection_conf' in im:
45
46
  assert abs(max_conf - im['max_detection_conf']) < 0.00001
46
47
  else:
47
48
  im['max_detection_conf'] = max_conf
48
49
 
49
- with open(output_file,'w') as f:
50
- json.dump(d,f,indent=1)
50
+ write_json(output_file,d)
51
51
 
52
52
 
53
53
  #%% Driver
@@ -14,6 +14,8 @@ import json
14
14
  from collections import defaultdict
15
15
  from tqdm import tqdm
16
16
 
17
+ from megadetector.utils.ct_utils import write_json
18
+
17
19
 
18
20
  #%% Support classes
19
21
 
@@ -157,8 +159,7 @@ def categorize_detections_by_size(input_file,output_file=None,options=None):
157
159
  print('Found {} detections in category {}'.format(category_count,category_name))
158
160
 
159
161
  if output_file is not None:
160
- with open(output_file,'w') as f:
161
- json.dump(data,f,indent=1)
162
+ write_json(output_file,data)
162
163
 
163
164
  return data
164
165
 
@@ -25,6 +25,7 @@ from megadetector.utils.ct_utils import is_empty
25
25
  from megadetector.utils.ct_utils import sort_dictionary_by_value
26
26
  from megadetector.utils.ct_utils import sort_dictionary_by_key
27
27
  from megadetector.utils.ct_utils import invert_dictionary
28
+ from megadetector.utils.ct_utils import write_json
28
29
 
29
30
  from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
30
31
  from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
@@ -420,7 +421,7 @@ def _smooth_classifications_for_list_of_detections(detections,
420
421
 
421
422
  if verbose_debug_enabled:
422
423
  _print_counts_with_names(category_to_count,classification_descriptions)
423
- from IPython import embed; embed()
424
+ # from IPython import embed; embed()
424
425
 
425
426
 
426
427
  ## Possibly change "other" classifications to the most common category
@@ -448,7 +449,7 @@ def _smooth_classifications_for_list_of_detections(detections,
448
449
  if verbose_debug_enabled:
449
450
  print('Replacing {} with {}'.format(
450
451
  classification_descriptions[c[0]],
451
- classification_descriptions[c[1]]))
452
+ most_common_category))
452
453
 
453
454
  n_other_classifications_changed_this_image += 1
454
455
  c[0] = most_common_category
@@ -918,8 +919,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
918
919
 
919
920
  if output_file is not None:
920
921
  print('Writing results after image-level smoothing to:\n{}'.format(output_file))
921
- with open(output_file,'w') as f:
922
- json.dump(d,f,indent=1)
922
+ write_json(output_file,d)
923
923
 
924
924
  return d
925
925
 
@@ -1092,8 +1092,7 @@ def smooth_classification_results_sequence_level(input_file,
1092
1092
  if output_file is not None:
1093
1093
  print('Writing sequence-smoothed classification results to {}'.format(
1094
1094
  output_file))
1095
- with open(output_file,'w') as f:
1096
- json.dump(d,f,indent=1)
1095
+ write_json(output_file,d)
1097
1096
 
1098
1097
  return d
1099
1098
 
@@ -1168,7 +1167,7 @@ def restrict_to_taxa_list(taxa_list,
1168
1167
  # Convert all NaN values in the "common" column to empty strings
1169
1168
  taxa_list_df['common'] = taxa_list_df['common'].fillna('')
1170
1169
 
1171
- # Create a dictionary mapping latin names to common names
1170
+ # Create a dictionary mapping source Latin names to target common names
1172
1171
  target_latin_to_common = {}
1173
1172
 
1174
1173
  for i_row,row in taxa_list_df.iterrows():
@@ -1332,7 +1331,7 @@ def restrict_to_taxa_list(taxa_list,
1332
1331
  _insert_taxonomy_string(new_taxon_string)
1333
1332
 
1334
1333
 
1335
- ##%% Make sure all species on the allow-list are in the taxonomy
1334
+ ##%% Make sure all taxa on the allow-list are in the taxonomy
1336
1335
 
1337
1336
  n_failed_mappings = 0
1338
1337
 
@@ -1498,7 +1497,8 @@ def restrict_to_taxa_list(taxa_list,
1498
1497
  if (protected_common_names is not None) and \
1499
1498
  (common_name in protected_common_names):
1500
1499
  if verbose:
1501
- print('Not messing with protected category {}'.format(common_name))
1500
+ print('Not messing with protected category {}:\n{}'.format(
1501
+ common_name,input_taxon_string))
1502
1502
  input_category_id_to_output_taxon_string[input_category_id] = \
1503
1503
  input_taxon_string
1504
1504
  continue
@@ -1578,12 +1578,13 @@ def restrict_to_taxa_list(taxa_list,
1578
1578
  output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1579
1579
  input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1580
1580
 
1581
- # ...for each category
1581
+ # ...for each category (mapping input category IDs to output taxon strings)
1582
1582
 
1583
1583
 
1584
- ##%% Build the new tables
1584
+ ##%% Map input category IDs to output category IDs
1585
1585
 
1586
- speciesnet_taxon_string_to_latin_name = invert_dictionary(speciesnet_latin_name_to_taxon_string)
1586
+ speciesnet_taxon_string_to_latin_name = \
1587
+ invert_dictionary(speciesnet_latin_name_to_taxon_string)
1587
1588
 
1588
1589
  input_category_id_to_output_category_id = {}
1589
1590
  output_taxon_string_to_category_id = {}
@@ -1604,7 +1605,8 @@ def restrict_to_taxa_list(taxa_list,
1604
1605
  if speciesnet_latin_name in speciesnet_latin_name_to_output_common_name:
1605
1606
  custom_common_name = speciesnet_latin_name_to_output_common_name[speciesnet_latin_name]
1606
1607
  if custom_common_name != output_common_name:
1607
- print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1608
+ if verbose:
1609
+ print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1608
1610
  output_common_name = custom_common_name
1609
1611
 
1610
1612
  # Do we need to create a new output category?
@@ -1625,20 +1627,16 @@ def restrict_to_taxa_list(taxa_list,
1625
1627
  if False:
1626
1628
  original_common_name = \
1627
1629
  input_category_id_to_common_name[input_category_id]
1628
-
1629
1630
  original_taxon_string = \
1630
1631
  input_category_id_to_taxonomy_string[input_category_id]
1631
-
1632
1632
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1633
1633
  original_common_name,original_taxon_string,
1634
1634
  output_common_name,output_taxon_string))
1635
- print('Mapping {} to {}'.format(
1636
- original_common_name,output_common_name,))
1637
1635
 
1638
- # ...for each category
1636
+ # ...for each category (mapping input category IDs to output category IDs)
1639
1637
 
1640
1638
 
1641
- #%% Remap all category labels
1639
+ ##%% Remap all category labels
1642
1640
 
1643
1641
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1644
1642
  len(set(output_taxon_string_to_category_id.values())), \
@@ -1682,7 +1680,7 @@ def restrict_to_taxa_list(taxa_list,
1682
1680
 
1683
1681
  ##%% Write output
1684
1682
 
1685
- with open(output_file,'w') as f:
1686
- json.dump(output_data,f,indent=1)
1683
+ write_json(output_file,output_data)
1684
+
1687
1685
 
1688
1686
  # ...def restrict_to_taxa_list(...)
@@ -203,7 +203,8 @@ def combine_api_shard_files(input_files, output_file=None):
203
203
  input_lists = []
204
204
  print('Loading input files')
205
205
  for fn in input_files:
206
- input_lists.append(json.load(open(fn)))
206
+ with open(fn,'r') as f:
207
+ input_lists.append(json.load(f))
207
208
 
208
209
  detections = []
209
210
  # detection_list = input_lists[0]
@@ -214,7 +215,7 @@ def combine_api_shard_files(input_files, output_file=None):
214
215
  assert 'file' in d
215
216
  assert 'max_detection_conf' in d
216
217
  assert 'detections' in d
217
- detections.extend([d])
218
+ detections.append(d)
218
219
 
219
220
  print('Writing output')
220
221
  if output_file is not None:
@@ -353,10 +353,11 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
353
353
  im_gt = image_pair['im_gt']
354
354
  annotations_gt = image_pair['annotations_gt']
355
355
  gt_boxes = []
356
+ gt_categories = []
356
357
  for ann in annotations_gt:
357
358
  if 'bbox' in ann:
358
359
  gt_boxes.append(ann['bbox'])
359
- gt_categories = [ann['category_id'] for ann in annotations_gt]
360
+ gt_categories.append(ann['category_id'])
360
361
 
361
362
  if len(gt_boxes) > 0:
362
363
 
@@ -474,7 +475,7 @@ def _result_types_to_comparison_category(result_types_present_a,
474
475
  ('tp' not in result_types_present_b):
475
476
  return 'clean_tp_a_only'
476
477
  # Otherwise, TPs are cases where one model has only TPs, and the other model
477
- # has any mistakse
478
+ # has any mistakes
478
479
  if ('fn' in result_types_present_b) or ('fp' in result_types_present_b):
479
480
  return 'tp_a_only'
480
481
 
@@ -486,7 +487,7 @@ def _result_types_to_comparison_category(result_types_present_a,
486
487
  ('tp' not in result_types_present_a):
487
488
  return 'clean_tp_b_only'
488
489
  # Otherwise, TPs are cases where one model has only TPs, and the other model
489
- # has any mistakse
490
+ # has any mistakes
490
491
  if ('fn' in result_types_present_a) or ('fp' in result_types_present_a):
491
492
  return 'tp_b_only'
492
493
 
@@ -674,11 +675,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
674
675
  category_ids_to_include_a = []
675
676
  category_ids_to_include_b = []
676
677
 
677
- for category_name in options.category_names_to_include:
678
- if category_name in category_name_to_id_a:
679
- category_ids_to_include_a.append(category_name_to_id_a[category_name])
680
- if category_name in category_name_to_id_b:
681
- category_ids_to_include_b.append(category_name_to_id_b[category_name])
678
+ # If we're supposed to be including all categories, we don't actually need to
679
+ # populate category_ids_to_include_a/b, but we're doing this for future-proofing.
680
+ if options.category_names_to_include is None:
681
+ category_ids_to_include_a = sorted(list(category_name_to_id_a.values()))
682
+ category_ids_to_include_b = sorted(list(category_name_to_id_b.values()))
683
+ else:
684
+ for category_name in options.category_names_to_include:
685
+ if category_name in category_name_to_id_a:
686
+ category_ids_to_include_a.append(category_name_to_id_a[category_name])
687
+ if category_name in category_name_to_id_b:
688
+ category_ids_to_include_b.append(category_name_to_id_b[category_name])
682
689
 
683
690
  if pairwise_options.results_description_a is None:
684
691
  if 'detector' not in results_a['info']:
@@ -814,7 +821,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
814
821
  print('Warning: {} files are only available in the ground truth (not in MD results)'.format(
815
822
  len(filenames_only_in_gt)))
816
823
 
817
- filenames_only_in_results = gt_filenames_set.difference(gt_filenames)
824
+ filenames_only_in_results = filenames_to_compare_set.difference(gt_filenames_set)
818
825
  if len(filenames_only_in_results) > 0:
819
826
  print('Warning: {} files are only available in the MD results (not in ground truth)'.format(
820
827
  len(filenames_only_in_results)))
@@ -1185,13 +1192,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1185
1192
  if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
1186
1193
  assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
1187
1194
 
1188
- # If either model has a TP or FN, the other has to have a TP or FN, since
1189
- # there was something in the GT
1190
- if ('tp' in result_types_present_a) or ('fn' in result_types_present_a):
1191
- assert 'tp' in result_types_present_b or 'fn' in result_types_present_b
1192
- if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
1193
- assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
1194
-
1195
1195
 
1196
1196
  ## Choose a comparison category based on result types
1197
1197
 
@@ -1677,8 +1677,8 @@ def n_way_comparison(filenames,
1677
1677
  '[detection_thresholds] should be the same length as [filenames]'
1678
1678
 
1679
1679
  if rendering_thresholds is not None:
1680
- assert len(rendering_thresholds) == len(filenames)
1681
- '[rendering_thresholds] should be the same length as [filenames]'
1680
+ assert len(rendering_thresholds) == len(filenames), \
1681
+ '[rendering_thresholds] should be the same length as [filenames]'
1682
1682
  else:
1683
1683
  rendering_thresholds = [(x*0.6666) for x in detection_thresholds]
1684
1684
 
@@ -1932,32 +1932,54 @@ def find_equivalent_threshold(results_a,
1932
1932
 
1933
1933
  if False:
1934
1934
 
1935
+ #%% Prepare test files
1936
+
1937
+ from megadetector.utils.path_utils import insert_before_extension
1938
+
1939
+ model_names = ['mdv5a','mdv5b']
1940
+ image_folder = 'g:/temp/md-test-images'
1941
+ output_filename_base = os.path.join(image_folder,'comparison_test.json')
1942
+
1943
+ output_filenames = []
1944
+
1945
+ commands = []
1946
+
1947
+ for model_name in model_names:
1948
+ output_filename = insert_before_extension(output_filename_base,model_name)
1949
+ output_filenames.append(output_filename)
1950
+ cmd = 'python -m megadetector.detection.run_detector_batch'
1951
+ cmd += ' {} {} {} --recursive --output_relative_filenames'.format(
1952
+ model_name, image_folder,output_filename)
1953
+ commands.append(cmd)
1954
+
1955
+ cmd = '\n\n'.join(commands)
1956
+ print(cmd)
1957
+ import clipboard
1958
+ clipboard.copy(cmd)
1959
+
1960
+
1935
1961
  #%% Test two-way comparison
1936
1962
 
1937
1963
  options = BatchComparisonOptions()
1938
1964
 
1939
1965
  options.parallelize_rendering_with_threads = True
1940
1966
 
1941
- options.job_name = 'BCT'
1967
+ options.job_name = 'md-test-images'
1942
1968
  options.output_folder = r'g:\temp\comparisons'
1943
- options.image_folder = r'g:\camera_traps\camera_trap_images'
1969
+ options.image_folder = image_folder
1944
1970
  options.max_images_per_category = 100
1945
1971
  options.sort_by_confidence = True
1946
1972
 
1947
1973
  options.pairwise_options = []
1948
1974
 
1949
1975
  results_base = os.path.expanduser('~/postprocessing/bellevue-camera-traps')
1950
- filenames = [
1951
- os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-v5a.0.0_detections.json'),
1952
- os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-aug-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-aug-v5a.0.0_detections.json')
1953
- ]
1954
1976
 
1955
1977
  detection_thresholds = [0.15,0.15]
1956
1978
  rendering_thresholds = None
1957
1979
 
1958
- results = n_way_comparison(filenames,
1959
- options,
1960
- detection_thresholds,
1980
+ results = n_way_comparison(filenames=output_filenames,
1981
+ options=options,
1982
+ detection_thresholds=detection_thresholds,
1961
1983
  rendering_thresholds=rendering_thresholds)
1962
1984
 
1963
1985
  from megadetector.utils.path_utils import open_file
@@ -22,7 +22,8 @@ import pandas as pd
22
22
  from megadetector.postprocessing.load_api_results import load_api_results_csv
23
23
  from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
24
24
  from megadetector.data_management.annotations import annotation_constants
25
- from megadetector.utils import ct_utils
25
+ from megadetector.utils.ct_utils import get_max_conf
26
+ from megadetector.utils.ct_utils import write_json
26
27
 
27
28
  CONF_DIGITS = 3
28
29
 
@@ -138,7 +139,7 @@ def convert_json_to_csv(input_path,
138
139
  # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
139
140
  continue
140
141
 
141
- max_conf = ct_utils.get_max_conf(im)
142
+ max_conf = get_max_conf(im)
142
143
  detection_category_id_to_max_conf = defaultdict(float)
143
144
  classification_category_id_to_max_conf = defaultdict(float)
144
145
  detections = []
@@ -177,7 +178,8 @@ def convert_json_to_csv(input_path,
177
178
  classification_category_max = \
178
179
  classification_category_id_to_max_conf[classification_category_id]
179
180
  if classification_conf > classification_category_max:
180
- classification_category_id_to_max_conf[classification_category_id] = d['conf']
181
+ classification_category_id_to_max_conf[classification_category_id] = \
182
+ classification_conf
181
183
 
182
184
  # ...for each classification
183
185
 
@@ -210,7 +212,7 @@ def convert_json_to_csv(input_path,
210
212
 
211
213
  if omit_bounding_boxes:
212
214
  df = df.drop('detections',axis=1)
213
- df.to_csv(output_path,index=False,header=True)
215
+ df.to_csv(output_path,index=False,header=True,encoding=output_encoding)
214
216
 
215
217
  # ...def convert_json_to_csv(...)
216
218
 
@@ -295,7 +297,7 @@ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
295
297
  json_out['classification_categories'] = classification_categories
296
298
  json_out['images'] = images
297
299
 
298
- json.dump(json_out,open(output_path,'w'),indent=1)
300
+ write_json(output_path,json_out)
299
301
 
300
302
  # ...def convert_csv_to_json(...)
301
303
 
@@ -372,7 +374,7 @@ def main():
372
374
  help='Output filename ending in .json or .csv (defaults to ' + \
373
375
  'input file, with .json/.csv replaced by .csv/.json)')
374
376
  parser.add_argument('--omit_bounding_boxes',action='store_true',
375
- help='Output bounding box text from .csv output (large and usually not useful)')
377
+ help='Omit bounding box text from .csv output (large and usually not useful)')
376
378
 
377
379
  if len(sys.argv[1:]) == 0:
378
380
  parser.print_help()
@@ -169,7 +169,9 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
169
169
  'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
170
170
  assert os.path.isfile(crop_results_file), \
171
171
  'Could not find crop results file {}'.format(crop_results_file)
172
- os.makedirs(os.path.dirname(output_file),exist_ok=True)
172
+ output_dir = os.path.dirname(output_file)
173
+ if len(output_dir) > 0:
174
+ os.makedirs(output_dir,exist_ok=True)
173
175
 
174
176
 
175
177
  ##%% Read input files
@@ -259,7 +261,11 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
259
261
  detections_without_classification_handling
260
262
  ))
261
263
 
262
- if not skip_detection:
264
+ if skip_detection:
265
+
266
+ n_skipped_detections += 1
267
+
268
+ else:
263
269
 
264
270
  crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
265
271
 
@@ -340,8 +346,11 @@ def create_crop_folder(input_file,
340
346
  assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
341
347
  assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
342
348
  os.makedirs(output_folder,exist_ok=True)
349
+
343
350
  if output_file is not None:
344
- os.makedirs(os.path.dirname(output_file),exist_ok=True)
351
+ output_dir = os.path.dirname(output_file)
352
+ if len(output_dir) > 0:
353
+ os.makedirs(output_dir,exist_ok=True)
345
354
 
346
355
 
347
356
  ##%% Read input
@@ -599,7 +608,7 @@ def main():
599
608
 
600
609
  print('Starting crop folder creation...')
601
610
  print('Input MD results: {}'.format(args.input_file))
602
- print('Input image folder {}'.format(args.input_folder))
611
+ print('Input image folder: {}'.format(args.input_folder))
603
612
  print('Output crop folder: {}'.format(args.output_folder))
604
613
 
605
614
  if args.output_file:
@@ -126,6 +126,7 @@ def generate_csv_report(md_results_file,
126
126
  recursive=True)
127
127
 
128
128
  else:
129
+
129
130
  assert os.path.isfile(datetime_source), \
130
131
  'datetime source {} is neither a folder nor a file'.format(datetime_source)
131
132
 
@@ -153,11 +154,14 @@ def generate_csv_report(md_results_file,
153
154
  print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
154
155
  'to contain datetime information.')
155
156
 
157
+ # ...if datetime_source is a folder/file
158
+
156
159
  assert all_exif_results is not None
157
160
 
158
161
  filename_to_datetime_string = {}
159
162
 
160
163
  for exif_result in all_exif_results:
164
+
161
165
  datetime_string = unknown_datetime_tag
162
166
  if ('exif_tags' in exif_result) and \
163
167
  (exif_result['exif_tags'] is not None) and \
@@ -169,6 +173,8 @@ def generate_csv_report(md_results_file,
169
173
  assert isinstance(datetime_string,str), 'Unrecognized datetime format'
170
174
  filename_to_datetime_string[exif_result['file_name']] = datetime_string
171
175
 
176
+ # ...for each exif result
177
+
172
178
  image_files = [im['file'] for im in results['images']]
173
179
  image_files_set = set(image_files)
174
180
 
@@ -250,11 +256,10 @@ def generate_csv_report(md_results_file,
250
256
  base_record['filename'] = im['file'].replace('\\','/')
251
257
 
252
258
  # Datetime (if necessary)
259
+ datetime_string = ''
253
260
  if filename_to_datetime_string is not None:
254
261
  if im['file'] in filename_to_datetime_string:
255
262
  datetime_string = filename_to_datetime_string[im['file']]
256
- else:
257
- datetime_string = ''
258
263
  base_record['datetime'] = datetime_string
259
264
 
260
265
  for s in ['detection_category','max_detection_confidence',
@@ -383,13 +388,22 @@ def generate_csv_report(md_results_file,
383
388
  # ...for each image
384
389
 
385
390
  # Make sure every record has the same columns
386
- column_names = output_records[0].keys()
387
- for record in output_records:
388
- assert record.keys() == column_names
389
391
 
390
- # Write to .csv
391
- df = pd.DataFrame(output_records)
392
- df.to_csv(output_file,header=True,index=False)
392
+ if len(output_records) == 0:
393
+ print('Warning: no output records generated')
394
+ else:
395
+ column_names = output_records[0].keys()
396
+ for record in output_records:
397
+ assert record.keys() == column_names
398
+
399
+ # Create folder for output file if necessary
400
+ output_dir = os.path.dirname(output_file)
401
+ if len(output_dir) > 0:
402
+ os.makedirs(output_dir, exist_ok=True)
403
+
404
+ # Write to .csv
405
+ df = pd.DataFrame(output_records)
406
+ df.to_csv(output_file,header=True,index=False)
393
407
 
394
408
  # from megadetector.utils.path_utils import open_file; open_file(output_file)
395
409
 
@@ -23,7 +23,8 @@ from collections.abc import Mapping
23
23
 
24
24
  import pandas as pd
25
25
 
26
- from megadetector.utils import ct_utils
26
+ from megadetector.utils.ct_utils import get_max_conf
27
+ from megadetector.utils.ct_utils import write_json
27
28
  from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
28
29
 
29
30
 
@@ -85,7 +86,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
85
86
  # add them, because our unofficial internal dataframe format includes this.
86
87
  for im in detection_results['images']:
87
88
  if 'max_detection_conf' not in im:
88
- im['max_detection_conf'] = ct_utils.get_max_conf(im)
89
+ im['max_detection_conf'] = get_max_conf(im)
89
90
 
90
91
  # Pack the json output into a Pandas DataFrame
91
92
  detection_results = pd.DataFrame(detection_results['images'])
@@ -139,8 +140,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
139
140
  print('Warning: error removing max_detection_conf from output')
140
141
  pass
141
142
 
142
- with open(out_path, 'w') as f:
143
- json.dump(fields, f, indent=1)
143
+ write_json(out_path,fields)
144
144
 
145
145
  print('Finished writing detection results to {}'.format(out_path))
146
146
 
@@ -214,6 +214,10 @@ def write_api_results_csv(detection_results, filename):
214
214
 
215
215
  print('Writing detection results to {}'.format(filename))
216
216
 
217
+ output_dir = os.path.dirname(filename)
218
+ if len(output_dir) > 0:
219
+ os.makedirs(output_dir, exist_ok=True)
220
+
217
221
  detection_results.to_csv(filename, index=False)
218
222
 
219
223
  print('Finished writing detection results to {}'.format(filename))
@@ -22,6 +22,7 @@ from tqdm import tqdm
22
22
 
23
23
  from megadetector.visualization import visualization_utils as vis_utils
24
24
  from megadetector.utils.path_utils import insert_before_extension
25
+ from megadetector.utils.ct_utils import write_json
25
26
 
26
27
  default_confidence_threshold = 0.15
27
28
 
@@ -296,9 +297,7 @@ def md_to_coco(md_results_file,
296
297
  if verbose:
297
298
  print('Writing COCO output file...')
298
299
 
299
- if coco_output_file is not None:
300
- with open(coco_output_file,'w') as f:
301
- json.dump(output_dict,f,indent=1)
300
+ write_json(coco_output_file,output_dict)
302
301
 
303
302
  return output_dict
304
303