megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,164 @@
1
+ """
2
+
3
+ test_lila_metadata_urls.py
4
+
5
+ Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
6
+ results files.
7
+
8
+ Also pick an arbitrary image from each dataset and make sure that URL is valid.
9
+
10
+ Also picks an arbitrary image from each dataset's MD results and make sure the corresponding URL is valid.
11
+
12
+ """
13
+
14
+ #%% Constants and imports
15
+
16
+ import json
17
+ import os
18
+
19
+ from megadetector.data_management.lila.lila_common import read_lila_metadata,\
20
+ read_metadata_file_for_dataset, read_lila_taxonomy_mapping
21
+ from megadetector.utils.url_utils import test_urls
22
+
23
+ # We'll write images, metadata downloads, and temporary files here
24
+ lila_local_base = os.path.expanduser('~/lila')
25
+
26
+ output_dir = os.path.join(lila_local_base,'lila_metadata_tests')
27
+ os.makedirs(output_dir,exist_ok=True)
28
+
29
+ metadata_dir = os.path.join(lila_local_base,'metadata')
30
+ os.makedirs(metadata_dir,exist_ok=True)
31
+
32
+ md_results_dir = os.path.join(lila_local_base,'md_results')
33
+ os.makedirs(md_results_dir,exist_ok=True)
34
+
35
+ md_results_keys = ['mdv5a_results_raw','mdv5b_results_raw',
36
+ 'md1000-redwood_results_raw','md_results_with_rde']
37
+
38
+ preferred_cloud = None # 'gcp' # 'azure', 'aws'
39
+
40
+ force_download = True
41
+
42
+
43
+ #%% Load category and taxonomy files
44
+
45
+ taxonomy_df = read_lila_taxonomy_mapping(metadata_dir, force_download=force_download)
46
+
47
+
48
+ #%% Download and parse the metadata file
49
+
50
+ metadata_table = read_lila_metadata(metadata_dir, force_download=force_download)
51
+
52
+ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
53
+
54
+
55
+ #%% Download and extract metadata and MD results for each dataset
56
+
57
+ # Takes ~10 minutes if everything needs to be downloaded and unzipped
58
+
59
+ for ds_name in metadata_table.keys():
60
+
61
+ # Download the main metadata file for this dataset
62
+ metadata_table[ds_name]['json_filename'] = \
63
+ read_metadata_file_for_dataset(ds_name=ds_name,
64
+ metadata_dir=metadata_dir,
65
+ metadata_table=metadata_table,
66
+ force_download=force_download,
67
+ preferred_cloud=preferred_cloud)
68
+
69
+ # Download MD results for this dataset
70
+ for k in md_results_keys:
71
+
72
+ md_results_url = metadata_table[ds_name][k]
73
+ if md_results_url is None:
74
+ metadata_table[ds_name][k + '_filename'] = None
75
+ else:
76
+ metadata_table[ds_name][k + '_filename'] = \
77
+ read_metadata_file_for_dataset(ds_name=ds_name,
78
+ metadata_dir=md_results_dir,
79
+ json_url=md_results_url,
80
+ force_download=force_download,
81
+ preferred_cloud=preferred_cloud)
82
+
83
+ # ...for each MD results file
84
+
85
+ # ...for each dataset
86
+
87
+
88
+ #%% Build up a list of URLs to test
89
+
90
+ # Takes ~15 mins, since it has to open all the giant .json files.
91
+
92
+ url_to_source = {}
93
+
94
+ # The first image in a dataset is disproportionately likely to be human (and thus 404),
95
+ # so we pick a semi-arbitrary image that isn't the first. How about the 2000th?
96
+ image_index = 2000
97
+
98
+ # TODO: parallelize this loop
99
+ #
100
+ # ds_name = list(metadata_table.keys())[0]
101
+ for ds_name in metadata_table.keys():
102
+
103
+ if 'bbox' in ds_name:
104
+ print('Skipping bbox dataset {}'.format(ds_name))
105
+ continue
106
+
107
+ print('Processing dataset {}'.format(ds_name))
108
+
109
+ json_filename = metadata_table[ds_name]['json_filename']
110
+ with open(json_filename, 'r') as f:
111
+ data = json.load(f)
112
+
113
+ if preferred_cloud is not None:
114
+ clouds = [preferred_cloud]
115
+ else:
116
+ clouds = ['gcp','aws','azure']
117
+
118
+ for cloud in clouds:
119
+
120
+ image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
121
+ assert not image_base_url.endswith('/')
122
+
123
+ # Download a test image
124
+ test_image_relative_path = data['images'][image_index]['file_name']
125
+ test_image_url = image_base_url + '/' + test_image_relative_path
126
+
127
+ url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
128
+
129
+ # ...for each cloud
130
+
131
+ # Grab an image from the MegaDetector results
132
+
133
+ # k = md_results_keys[0]
134
+ for k in md_results_keys:
135
+ k_fn = k + '_filename'
136
+ if metadata_table[ds_name][k_fn] is not None:
137
+ with open(metadata_table[ds_name][k_fn],'r') as f:
138
+ md_results = json.load(f)
139
+ im = md_results['images'][image_index]
140
+ md_image_url = image_base_url + '/' + im['file']
141
+ url_to_source[md_image_url] = ds_name + ' ' + k
142
+ del md_results
143
+ del data
144
+
145
+ # ...for each dataset
146
+
147
+
148
+ #%% Test URLs
149
+
150
+ urls_to_test = sorted(url_to_source.keys())
151
+ urls_to_test = [fn.replace('\\','/') for fn in urls_to_test]
152
+
153
+ status_codes = test_urls(urls_to_test,
154
+ error_on_failure=False,
155
+ pool_type='thread',
156
+ n_workers=10,
157
+ timeout=2.0)
158
+
159
+ for i_url,url in enumerate(urls_to_test):
160
+ if status_codes[i_url] != 200:
161
+ print('Status {} for {} ({})'.format(
162
+ status_codes[i_url],url,url_to_source[url]))
163
+
164
+ print('Tested {} URLs'.format(len(urls_to_test)))
@@ -0,0 +1,344 @@
1
+ """
2
+
3
+ mewc_to_md.py
4
+
5
+ Converts the output of the MEWC inference scripts to the MD output format.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+ import pandas as pd
14
+ import sys
15
+ import argparse
16
+
17
+ from copy import deepcopy
18
+ from collections import defaultdict
19
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
20
+ from megadetector.utils.path_utils import recursive_file_list
21
+
22
+ from megadetector.postprocessing.validate_batch_results import \
23
+ ValidateBatchResultsOptions, validate_batch_results
24
+
25
+ default_mewc_mount_prefix = '/images/'
26
+ default_mewc_category_name_column = 'class_id'
27
+
28
+
29
+ #%% Functions
30
+
31
+ def mewc_to_md(mewc_input_folder,
32
+ output_file=None,
33
+ mount_prefix=default_mewc_mount_prefix,
34
+ category_name_column=default_mewc_category_name_column,
35
+ mewc_out_filename='mewc_out.csv',
36
+ md_out_filename='md_out.json'):
37
+ """
38
+ Converts the output of the MEWC inference scripts to the MD output format.
39
+
40
+ Args:
41
+ mewc_input_folder (str): the folder we'll search for MEWC output files
42
+ output_file (str, optional): .json file to write with class information
43
+ mount_prefix (str, optional): string to remove from all filenames in the MD
44
+ .json file, typically the prefix used to mount the image folder.
45
+ category_name_column (str, optional): column in the MEWC results .csv to use for
46
+ category naming.
47
+ mewc_out_filename (str, optional): MEWC-formatted .csv file that should be
48
+ in [mewc_input_folder]
49
+ md_out_filename (str, optional): MD-formatted .json file (without classification
50
+ information) that should be in [mewc_input_folder]
51
+
52
+ Returns:
53
+ dict: an MD-formatted dict, the same as what's written to [output_file]
54
+ """
55
+
56
+ ##%% Read input files
57
+
58
+ assert os.path.isdir(mewc_input_folder), \
59
+ 'Could not find folder {}'.format(mewc_input_folder)
60
+
61
+
62
+ ##%% Find MEWC output files
63
+
64
+ relative_path_to_mewc_info = {}
65
+
66
+ print('Listing files in folder {}'.format(mewc_input_folder))
67
+ all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
68
+
69
+ for fn_relative in all_files_relative:
70
+ if fn_relative.endswith(mewc_out_filename):
71
+ folder_relative = '/'.join(fn_relative.split('/')[:-1])
72
+ assert folder_relative not in relative_path_to_mewc_info
73
+ md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
74
+ assert md_output_file_relative in all_files_relative, \
75
+ 'Could not find MD output file {} to match to {}'.format(
76
+ md_output_file_relative,fn_relative)
77
+ relative_path_to_mewc_info[folder_relative] = \
78
+ {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
79
+
80
+ del folder_relative
81
+
82
+ print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
83
+
84
+
85
+ ##%% Prepare to loop over results files
86
+
87
+ md_results_all = {}
88
+ md_results_all['images'] = []
89
+ md_results_all['detection_categories'] = {}
90
+ md_results_all['classification_categories'] = {}
91
+ md_results_all['info'] = None
92
+
93
+ classification_category_name_to_id = {}
94
+
95
+
96
+ ##%% Loop over results files
97
+
98
+ # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
99
+ for relative_folder in relative_path_to_mewc_info:
100
+
101
+ ##%%
102
+
103
+ mewc_info = relative_path_to_mewc_info[relative_folder]
104
+ mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
105
+ mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
106
+
107
+ mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
108
+ mewc_classification_info = mewc_classification_info.to_dict('records')
109
+
110
+ assert os.path.isfile(mewc_md_fn_abs), \
111
+ 'Could not find file {}'.format(mewc_md_fn_abs)
112
+ with open(mewc_md_fn_abs,'r') as f:
113
+ md_results = json.load(f)
114
+
115
+
116
+ ##%% Remove the mount prefix from MD files if necessary
117
+ if mount_prefix is not None and len(mount_prefix) > 0:
118
+
119
+ n_files_without_mount_prefix = 0
120
+
121
+ # im = md_results['images'][0]
122
+ for im in md_results['images']:
123
+ if not im['file'].startswith(mount_prefix):
124
+ n_files_without_mount_prefix += 1
125
+ else:
126
+ im['file'] = im['file'].replace(mount_prefix,'',1)
127
+
128
+ if n_files_without_mount_prefix > 0:
129
+ print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
130
+ n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
131
+
132
+
133
+ ##%% Convert MEWC snip IDs to image files
134
+
135
+ # r = mewc_classification_info[0]
136
+ for r in mewc_classification_info:
137
+
138
+ # E.g. "IMG0-0.jpg"
139
+ snip_file = r['filename']
140
+
141
+ # E.g. "IMG0-0"
142
+ snip_file_no_ext = os.path.splitext(snip_file)[0]
143
+ ext = os.path.splitext(snip_file)[1] # noqa
144
+
145
+ tokens = snip_file_no_ext.split('-')
146
+
147
+ if len(tokens) == 1:
148
+ print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
149
+ relative_folder,snip_file_no_ext))
150
+ r['image_filename_without_extension'] = snip_file_no_ext
151
+ r['snip_id'] = None
152
+
153
+ continue
154
+
155
+ filename_without_snip_id = '-'.join(tokens[0:-1])
156
+ snip_id = int(tokens[-1])
157
+ image_filename_without_extension = filename_without_snip_id
158
+
159
+ r['image_filename_without_extension'] = image_filename_without_extension
160
+ r['snip_id'] = snip_id
161
+
162
+ # ...for each MEWC result record
163
+
164
+
165
+ ##%% Make sure MD results and MEWC results refer to the same files
166
+
167
+ images_in_md_results_no_extension = \
168
+ set([os.path.splitext(im['file'])[0] for im in md_results['images']])
169
+ images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
170
+ for r in mewc_classification_info)
171
+
172
+ # All files with classification results should also have detection results
173
+ for fn in images_in_mewc_results_no_extension:
174
+ assert fn in images_in_md_results_no_extension, \
175
+ 'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
176
+
177
+ # This is just a note to self: no classification results are present for empty images
178
+ if False:
179
+ for fn in images_in_md_results_no_extension:
180
+ if fn not in images_in_mewc_results_no_extension:
181
+ print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
182
+ relative_folder,fn))
183
+
184
+
185
+ ##%% Validate images
186
+
187
+ for im in md_results['images']:
188
+ fn_relative = im['file']
189
+ fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
190
+ if not os.path.isfile(fn_abs):
191
+ print('Warning: image file {} does not exist'.format(fn_abs))
192
+
193
+
194
+ ##%% Map filenames to MEWC results
195
+
196
+ image_id_to_mewc_records = defaultdict(list)
197
+ for r in mewc_classification_info:
198
+ image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
199
+
200
+
201
+ ##%% Add classification info to MD results
202
+
203
+ # im = md_results['images'][0]
204
+ for im in md_results['images']:
205
+
206
+ if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
207
+ continue
208
+
209
+ detections = im['detections']
210
+
211
+ # *Don't* sort by confidence, it looks like snip IDs use the original sort order
212
+ # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
213
+
214
+ # This is just a debug assist, so I can run this cell more than once
215
+ for det in detections:
216
+ det['classifications'] = []
217
+
218
+ image_id = os.path.splitext(im['file'])[0]
219
+ mewc_records_this_image = image_id_to_mewc_records[image_id]
220
+
221
+ # r = mewc_records_this_image[0]
222
+ for r in mewc_records_this_image:
223
+
224
+ if r['snip_id'] is None:
225
+ continue
226
+
227
+ category_name = r[category_name_column]
228
+
229
+ # This is a *global* list of category mappings, across all mewc .csv files
230
+ if category_name not in classification_category_name_to_id:
231
+ category_id = str(len(classification_category_name_to_id))
232
+ classification_category_name_to_id[category_name] = category_id
233
+ else:
234
+ category_id = classification_category_name_to_id[category_name]
235
+
236
+ snip_id = r['snip_id']
237
+ if snip_id >= len(detections):
238
+ print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
239
+ image_id,snip_id,len(detections)))
240
+ continue
241
+
242
+ det = detections[snip_id]
243
+
244
+ if 'classifications' not in det:
245
+ det['classifications'] = []
246
+ det['classifications'].append([category_id,r['prob']])
247
+
248
+ # ...for each classification in this image
249
+
250
+ # ...for each image
251
+
252
+ ##%% Map MD results to the global level
253
+
254
+ if md_results_all['info'] is None:
255
+ md_results_all['info'] = md_results['info']
256
+
257
+ for category_id in md_results['detection_categories']:
258
+ if category_id not in md_results_all['detection_categories']:
259
+ md_results_all['detection_categories'][category_id] = \
260
+ md_results['detection_categories'][category_id]
261
+ else:
262
+ assert md_results_all['detection_categories'][category_id] == \
263
+ md_results['detection_categories'][category_id], \
264
+ 'MD results present with incompatible detection categories'
265
+
266
+ # im = md_results['images'][0]
267
+ for im in md_results['images']:
268
+ im_copy = deepcopy(im)
269
+ im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
270
+ md_results_all['images'].append(im_copy)
271
+
272
+ # ...for each folder that contains MEWC results
273
+
274
+ del md_results
275
+
276
+ ##%% Write output
277
+
278
+ md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
279
+
280
+ if output_file is not None:
281
+ output_dir = os.path.dirname(output_file)
282
+ os.makedirs(output_dir,exist_ok=True)
283
+ with open(output_file,'w') as f:
284
+ json.dump(md_results_all,f,indent=1)
285
+
286
+ validation_options = ValidateBatchResultsOptions()
287
+ validation_options.check_image_existence = True
288
+ validation_options.relative_path_base = mewc_input_folder
289
+ validation_options.raise_errors = True
290
+ validation_results = validate_batch_results(output_file,validation_options) # noqa
291
+
292
+ # ...def mewc_to_md(...)
293
+
294
+
295
+ #%% Interactive driver
296
+
297
+ if False:
298
+
299
+ pass
300
+
301
+ #%%
302
+
303
+ mewc_input_folder = r'G:\temp\mewc-test'
304
+ mount_prefix = '/images/'
305
+ output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
306
+
307
+ _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
308
+ output_file=output_file,
309
+ mount_prefix=mount_prefix,
310
+ category_name_column='class_id')
311
+
312
+
313
+ #%% Command-line driver
314
+
315
+ def main(): # noqa
316
+
317
+ parser = argparse.ArgumentParser()
318
+
319
+ parser.add_argument(
320
+ 'input_folder',type=str,
321
+ help='Folder containing images and MEWC .json/.csv files')
322
+ parser.add_argument(
323
+ 'output_file',type=str,
324
+ help='.json file where output will be written')
325
+ parser.add_argument(
326
+ '--mount_prefix',type=str,default=default_mewc_mount_prefix,
327
+ help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
328
+ parser.add_argument(
329
+ '--category_name_column',type=str,default=default_mewc_category_name_column,
330
+ help='column in the MEWC .csv file to use for category names')
331
+
332
+ if len(sys.argv[1:]) == 0:
333
+ parser.print_help()
334
+ parser.exit()
335
+
336
+ args = parser.parse_args()
337
+
338
+ _ = mewc_to_md(mewc_input_folder=args.input_folder,
339
+ output_file=args.output_file,
340
+ mount_prefix=args.mount_prefix,
341
+ category_name_column=args.category_name_column)
342
+
343
+ if __name__ == '__main__':
344
+ main()