megadetector 10.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +701 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +563 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +192 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +665 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +984 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2172 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1604 -0
- megadetector/detection/run_tiled_inference.py +1044 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1943 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2140 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +231 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2872 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1766 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1973 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +498 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.15.dist-info/METADATA +115 -0
- megadetector-10.0.15.dist-info/RECORD +147 -0
- megadetector-10.0.15.dist-info/WHEEL +5 -0
- megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.15.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
test_lila_metadata_urls.py
|
|
4
|
+
|
|
5
|
+
Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
|
|
6
|
+
results files.
|
|
7
|
+
|
|
8
|
+
Also pick an arbitrary image from each dataset and make sure that URL is valid.
|
|
9
|
+
|
|
10
|
+
Also picks an arbitrary image from each dataset's MD results and make sure the corresponding URL is valid.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
#%% Constants and imports
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
from megadetector.data_management.lila.lila_common import read_lila_metadata,\
|
|
20
|
+
read_metadata_file_for_dataset, read_lila_taxonomy_mapping
|
|
21
|
+
from megadetector.utils.url_utils import test_urls
|
|
22
|
+
|
|
23
|
+
# We'll write images, metadata downloads, and temporary files here
|
|
24
|
+
lila_local_base = os.path.expanduser('~/lila')
|
|
25
|
+
|
|
26
|
+
output_dir = os.path.join(lila_local_base,'lila_metadata_tests')
|
|
27
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
28
|
+
|
|
29
|
+
metadata_dir = os.path.join(lila_local_base,'metadata')
|
|
30
|
+
os.makedirs(metadata_dir,exist_ok=True)
|
|
31
|
+
|
|
32
|
+
md_results_dir = os.path.join(lila_local_base,'md_results')
|
|
33
|
+
os.makedirs(md_results_dir,exist_ok=True)
|
|
34
|
+
|
|
35
|
+
md_results_keys = ['mdv5a_results_raw','mdv5b_results_raw',
|
|
36
|
+
'md1000-redwood_results_raw','md_results_with_rde']
|
|
37
|
+
|
|
38
|
+
preferred_cloud = None # 'gcp' # 'azure', 'aws'
|
|
39
|
+
|
|
40
|
+
force_download = True
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
#%% Load category and taxonomy files
|
|
44
|
+
|
|
45
|
+
taxonomy_df = read_lila_taxonomy_mapping(metadata_dir, force_download=force_download)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#%% Download and parse the metadata file
|
|
49
|
+
|
|
50
|
+
metadata_table = read_lila_metadata(metadata_dir, force_download=force_download)
|
|
51
|
+
|
|
52
|
+
print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
#%% Download and extract metadata and MD results for each dataset
|
|
56
|
+
|
|
57
|
+
# Takes ~10 minutes if everything needs to be downloaded and unzipped
|
|
58
|
+
|
|
59
|
+
for ds_name in metadata_table.keys():
|
|
60
|
+
|
|
61
|
+
# Download the main metadata file for this dataset
|
|
62
|
+
metadata_table[ds_name]['json_filename'] = \
|
|
63
|
+
read_metadata_file_for_dataset(ds_name=ds_name,
|
|
64
|
+
metadata_dir=metadata_dir,
|
|
65
|
+
metadata_table=metadata_table,
|
|
66
|
+
force_download=force_download,
|
|
67
|
+
preferred_cloud=preferred_cloud)
|
|
68
|
+
|
|
69
|
+
# Download MD results for this dataset
|
|
70
|
+
for k in md_results_keys:
|
|
71
|
+
|
|
72
|
+
md_results_url = metadata_table[ds_name][k]
|
|
73
|
+
if md_results_url is None:
|
|
74
|
+
metadata_table[ds_name][k + '_filename'] = None
|
|
75
|
+
else:
|
|
76
|
+
metadata_table[ds_name][k + '_filename'] = \
|
|
77
|
+
read_metadata_file_for_dataset(ds_name=ds_name,
|
|
78
|
+
metadata_dir=md_results_dir,
|
|
79
|
+
json_url=md_results_url,
|
|
80
|
+
force_download=force_download,
|
|
81
|
+
preferred_cloud=preferred_cloud)
|
|
82
|
+
|
|
83
|
+
# ...for each MD results file
|
|
84
|
+
|
|
85
|
+
# ...for each dataset
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
#%% Build up a list of URLs to test
|
|
89
|
+
|
|
90
|
+
# Takes ~15 mins, since it has to open all the giant .json files.
|
|
91
|
+
|
|
92
|
+
url_to_source = {}
|
|
93
|
+
|
|
94
|
+
# The first image in a dataset is disproportionately likely to be human (and thus 404),
|
|
95
|
+
# so we pick a semi-arbitrary image that isn't the first. How about the 2000th?
|
|
96
|
+
image_index = 2000
|
|
97
|
+
|
|
98
|
+
# TODO: parallelize this loop
|
|
99
|
+
#
|
|
100
|
+
# ds_name = list(metadata_table.keys())[0]
|
|
101
|
+
for ds_name in metadata_table.keys():
|
|
102
|
+
|
|
103
|
+
if 'bbox' in ds_name:
|
|
104
|
+
print('Skipping bbox dataset {}'.format(ds_name))
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
print('Processing dataset {}'.format(ds_name))
|
|
108
|
+
|
|
109
|
+
json_filename = metadata_table[ds_name]['json_filename']
|
|
110
|
+
with open(json_filename, 'r') as f:
|
|
111
|
+
data = json.load(f)
|
|
112
|
+
|
|
113
|
+
if preferred_cloud is not None:
|
|
114
|
+
clouds = [preferred_cloud]
|
|
115
|
+
else:
|
|
116
|
+
clouds = ['gcp','aws','azure']
|
|
117
|
+
|
|
118
|
+
for cloud in clouds:
|
|
119
|
+
|
|
120
|
+
image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
|
|
121
|
+
assert not image_base_url.endswith('/')
|
|
122
|
+
|
|
123
|
+
# Download a test image
|
|
124
|
+
test_image_relative_path = data['images'][image_index]['file_name']
|
|
125
|
+
test_image_url = image_base_url + '/' + test_image_relative_path
|
|
126
|
+
|
|
127
|
+
url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
|
|
128
|
+
|
|
129
|
+
# ...for each cloud
|
|
130
|
+
|
|
131
|
+
# Grab an image from the MegaDetector results
|
|
132
|
+
|
|
133
|
+
# k = md_results_keys[0]
|
|
134
|
+
for k in md_results_keys:
|
|
135
|
+
k_fn = k + '_filename'
|
|
136
|
+
if metadata_table[ds_name][k_fn] is not None:
|
|
137
|
+
with open(metadata_table[ds_name][k_fn],'r') as f:
|
|
138
|
+
md_results = json.load(f)
|
|
139
|
+
im = md_results['images'][image_index]
|
|
140
|
+
md_image_url = image_base_url + '/' + im['file']
|
|
141
|
+
url_to_source[md_image_url] = ds_name + ' ' + k
|
|
142
|
+
del md_results
|
|
143
|
+
del data
|
|
144
|
+
|
|
145
|
+
# ...for each dataset
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
#%% Test URLs
|
|
149
|
+
|
|
150
|
+
urls_to_test = sorted(url_to_source.keys())
|
|
151
|
+
urls_to_test = [fn.replace('\\','/') for fn in urls_to_test]
|
|
152
|
+
|
|
153
|
+
status_codes = test_urls(urls_to_test,
|
|
154
|
+
error_on_failure=False,
|
|
155
|
+
pool_type='thread',
|
|
156
|
+
n_workers=10,
|
|
157
|
+
timeout=2.0)
|
|
158
|
+
|
|
159
|
+
for i_url,url in enumerate(urls_to_test):
|
|
160
|
+
if status_codes[i_url] != 200:
|
|
161
|
+
print('Status {} for {} ({})'.format(
|
|
162
|
+
status_codes[i_url],url,url_to_source[url]))
|
|
163
|
+
|
|
164
|
+
print('Tested {} URLs'.format(len(urls_to_test)))
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
mewc_to_md.py
|
|
4
|
+
|
|
5
|
+
Converts the output of the MEWC inference scripts to the MD output format.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import sys
|
|
15
|
+
import argparse
|
|
16
|
+
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
|
|
20
|
+
from megadetector.utils.path_utils import recursive_file_list
|
|
21
|
+
|
|
22
|
+
from megadetector.postprocessing.validate_batch_results import \
|
|
23
|
+
ValidateBatchResultsOptions, validate_batch_results
|
|
24
|
+
|
|
25
|
+
default_mewc_mount_prefix = '/images/'
|
|
26
|
+
default_mewc_category_name_column = 'class_id'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
#%% Functions
|
|
30
|
+
|
|
31
|
+
def mewc_to_md(mewc_input_folder,
|
|
32
|
+
output_file=None,
|
|
33
|
+
mount_prefix=default_mewc_mount_prefix,
|
|
34
|
+
category_name_column=default_mewc_category_name_column,
|
|
35
|
+
mewc_out_filename='mewc_out.csv',
|
|
36
|
+
md_out_filename='md_out.json'):
|
|
37
|
+
"""
|
|
38
|
+
Converts the output of the MEWC inference scripts to the MD output format.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
mewc_input_folder (str): the folder we'll search for MEWC output files
|
|
42
|
+
output_file (str, optional): .json file to write with class information
|
|
43
|
+
mount_prefix (str, optional): string to remove from all filenames in the MD
|
|
44
|
+
.json file, typically the prefix used to mount the image folder.
|
|
45
|
+
category_name_column (str, optional): column in the MEWC results .csv to use for
|
|
46
|
+
category naming.
|
|
47
|
+
mewc_out_filename (str, optional): MEWC-formatted .csv file that should be
|
|
48
|
+
in [mewc_input_folder]
|
|
49
|
+
md_out_filename (str, optional): MD-formatted .json file (without classification
|
|
50
|
+
information) that should be in [mewc_input_folder]
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
dict: an MD-formatted dict, the same as what's written to [output_file]
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
##%% Read input files
|
|
57
|
+
|
|
58
|
+
assert os.path.isdir(mewc_input_folder), \
|
|
59
|
+
'Could not find folder {}'.format(mewc_input_folder)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
##%% Find MEWC output files
|
|
63
|
+
|
|
64
|
+
relative_path_to_mewc_info = {}
|
|
65
|
+
|
|
66
|
+
print('Listing files in folder {}'.format(mewc_input_folder))
|
|
67
|
+
all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
|
|
68
|
+
|
|
69
|
+
for fn_relative in all_files_relative:
|
|
70
|
+
if fn_relative.endswith(mewc_out_filename):
|
|
71
|
+
folder_relative = '/'.join(fn_relative.split('/')[:-1])
|
|
72
|
+
assert folder_relative not in relative_path_to_mewc_info
|
|
73
|
+
md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
|
|
74
|
+
assert md_output_file_relative in all_files_relative, \
|
|
75
|
+
'Could not find MD output file {} to match to {}'.format(
|
|
76
|
+
md_output_file_relative,fn_relative)
|
|
77
|
+
relative_path_to_mewc_info[folder_relative] = \
|
|
78
|
+
{'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
|
|
79
|
+
|
|
80
|
+
del folder_relative
|
|
81
|
+
|
|
82
|
+
print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
##%% Prepare to loop over results files
|
|
86
|
+
|
|
87
|
+
md_results_all = {}
|
|
88
|
+
md_results_all['images'] = []
|
|
89
|
+
md_results_all['detection_categories'] = {}
|
|
90
|
+
md_results_all['classification_categories'] = {}
|
|
91
|
+
md_results_all['info'] = None
|
|
92
|
+
|
|
93
|
+
classification_category_name_to_id = {}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
##%% Loop over results files
|
|
97
|
+
|
|
98
|
+
# relative_folder = next(iter(relative_path_to_mewc_info.keys()))
|
|
99
|
+
for relative_folder in relative_path_to_mewc_info:
|
|
100
|
+
|
|
101
|
+
##%%
|
|
102
|
+
|
|
103
|
+
mewc_info = relative_path_to_mewc_info[relative_folder]
|
|
104
|
+
mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
|
|
105
|
+
mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
|
|
106
|
+
|
|
107
|
+
mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
|
|
108
|
+
mewc_classification_info = mewc_classification_info.to_dict('records')
|
|
109
|
+
|
|
110
|
+
assert os.path.isfile(mewc_md_fn_abs), \
|
|
111
|
+
'Could not find file {}'.format(mewc_md_fn_abs)
|
|
112
|
+
with open(mewc_md_fn_abs,'r') as f:
|
|
113
|
+
md_results = json.load(f)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
##%% Remove the mount prefix from MD files if necessary
|
|
117
|
+
if mount_prefix is not None and len(mount_prefix) > 0:
|
|
118
|
+
|
|
119
|
+
n_files_without_mount_prefix = 0
|
|
120
|
+
|
|
121
|
+
# im = md_results['images'][0]
|
|
122
|
+
for im in md_results['images']:
|
|
123
|
+
if not im['file'].startswith(mount_prefix):
|
|
124
|
+
n_files_without_mount_prefix += 1
|
|
125
|
+
else:
|
|
126
|
+
im['file'] = im['file'].replace(mount_prefix,'',1)
|
|
127
|
+
|
|
128
|
+
if n_files_without_mount_prefix > 0:
|
|
129
|
+
print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
|
|
130
|
+
n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
##%% Convert MEWC snip IDs to image files
|
|
134
|
+
|
|
135
|
+
# r = mewc_classification_info[0]
|
|
136
|
+
for r in mewc_classification_info:
|
|
137
|
+
|
|
138
|
+
# E.g. "IMG0-0.jpg"
|
|
139
|
+
snip_file = r['filename']
|
|
140
|
+
|
|
141
|
+
# E.g. "IMG0-0"
|
|
142
|
+
snip_file_no_ext = os.path.splitext(snip_file)[0]
|
|
143
|
+
ext = os.path.splitext(snip_file)[1] # noqa
|
|
144
|
+
|
|
145
|
+
tokens = snip_file_no_ext.split('-')
|
|
146
|
+
|
|
147
|
+
if len(tokens) == 1:
|
|
148
|
+
print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
|
|
149
|
+
relative_folder,snip_file_no_ext))
|
|
150
|
+
r['image_filename_without_extension'] = snip_file_no_ext
|
|
151
|
+
r['snip_id'] = None
|
|
152
|
+
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
filename_without_snip_id = '-'.join(tokens[0:-1])
|
|
156
|
+
snip_id = int(tokens[-1])
|
|
157
|
+
image_filename_without_extension = filename_without_snip_id
|
|
158
|
+
|
|
159
|
+
r['image_filename_without_extension'] = image_filename_without_extension
|
|
160
|
+
r['snip_id'] = snip_id
|
|
161
|
+
|
|
162
|
+
# ...for each MEWC result record
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
##%% Make sure MD results and MEWC results refer to the same files
|
|
166
|
+
|
|
167
|
+
images_in_md_results_no_extension = \
|
|
168
|
+
set([os.path.splitext(im['file'])[0] for im in md_results['images']])
|
|
169
|
+
images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
|
|
170
|
+
for r in mewc_classification_info)
|
|
171
|
+
|
|
172
|
+
# All files with classification results should also have detection results
|
|
173
|
+
for fn in images_in_mewc_results_no_extension:
|
|
174
|
+
assert fn in images_in_md_results_no_extension, \
|
|
175
|
+
'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
|
|
176
|
+
|
|
177
|
+
# This is just a note to self: no classification results are present for empty images
|
|
178
|
+
if False:
|
|
179
|
+
for fn in images_in_md_results_no_extension:
|
|
180
|
+
if fn not in images_in_mewc_results_no_extension:
|
|
181
|
+
print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
|
|
182
|
+
relative_folder,fn))
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
##%% Validate images
|
|
186
|
+
|
|
187
|
+
for im in md_results['images']:
|
|
188
|
+
fn_relative = im['file']
|
|
189
|
+
fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
|
|
190
|
+
if not os.path.isfile(fn_abs):
|
|
191
|
+
print('Warning: image file {} does not exist'.format(fn_abs))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
##%% Map filenames to MEWC results
|
|
195
|
+
|
|
196
|
+
image_id_to_mewc_records = defaultdict(list)
|
|
197
|
+
for r in mewc_classification_info:
|
|
198
|
+
image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
##%% Add classification info to MD results
|
|
202
|
+
|
|
203
|
+
# im = md_results['images'][0]
|
|
204
|
+
for im in md_results['images']:
|
|
205
|
+
|
|
206
|
+
if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
detections = im['detections']
|
|
210
|
+
|
|
211
|
+
# *Don't* sort by confidence, it looks like snip IDs use the original sort order
|
|
212
|
+
# detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
|
|
213
|
+
|
|
214
|
+
# This is just a debug assist, so I can run this cell more than once
|
|
215
|
+
for det in detections:
|
|
216
|
+
det['classifications'] = []
|
|
217
|
+
|
|
218
|
+
image_id = os.path.splitext(im['file'])[0]
|
|
219
|
+
mewc_records_this_image = image_id_to_mewc_records[image_id]
|
|
220
|
+
|
|
221
|
+
# r = mewc_records_this_image[0]
|
|
222
|
+
for r in mewc_records_this_image:
|
|
223
|
+
|
|
224
|
+
if r['snip_id'] is None:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
category_name = r[category_name_column]
|
|
228
|
+
|
|
229
|
+
# This is a *global* list of category mappings, across all mewc .csv files
|
|
230
|
+
if category_name not in classification_category_name_to_id:
|
|
231
|
+
category_id = str(len(classification_category_name_to_id))
|
|
232
|
+
classification_category_name_to_id[category_name] = category_id
|
|
233
|
+
else:
|
|
234
|
+
category_id = classification_category_name_to_id[category_name]
|
|
235
|
+
|
|
236
|
+
snip_id = r['snip_id']
|
|
237
|
+
if snip_id >= len(detections):
|
|
238
|
+
print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
|
|
239
|
+
image_id,snip_id,len(detections)))
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
det = detections[snip_id]
|
|
243
|
+
|
|
244
|
+
if 'classifications' not in det:
|
|
245
|
+
det['classifications'] = []
|
|
246
|
+
det['classifications'].append([category_id,r['prob']])
|
|
247
|
+
|
|
248
|
+
# ...for each classification in this image
|
|
249
|
+
|
|
250
|
+
# ...for each image
|
|
251
|
+
|
|
252
|
+
##%% Map MD results to the global level
|
|
253
|
+
|
|
254
|
+
if md_results_all['info'] is None:
|
|
255
|
+
md_results_all['info'] = md_results['info']
|
|
256
|
+
|
|
257
|
+
for category_id in md_results['detection_categories']:
|
|
258
|
+
if category_id not in md_results_all['detection_categories']:
|
|
259
|
+
md_results_all['detection_categories'][category_id] = \
|
|
260
|
+
md_results['detection_categories'][category_id]
|
|
261
|
+
else:
|
|
262
|
+
assert md_results_all['detection_categories'][category_id] == \
|
|
263
|
+
md_results['detection_categories'][category_id], \
|
|
264
|
+
'MD results present with incompatible detection categories'
|
|
265
|
+
|
|
266
|
+
# im = md_results['images'][0]
|
|
267
|
+
for im in md_results['images']:
|
|
268
|
+
im_copy = deepcopy(im)
|
|
269
|
+
im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
|
|
270
|
+
md_results_all['images'].append(im_copy)
|
|
271
|
+
|
|
272
|
+
# ...for each folder that contains MEWC results
|
|
273
|
+
|
|
274
|
+
del md_results
|
|
275
|
+
|
|
276
|
+
##%% Write output
|
|
277
|
+
|
|
278
|
+
md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
|
|
279
|
+
|
|
280
|
+
if output_file is not None:
|
|
281
|
+
output_dir = os.path.dirname(output_file)
|
|
282
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
283
|
+
with open(output_file,'w') as f:
|
|
284
|
+
json.dump(md_results_all,f,indent=1)
|
|
285
|
+
|
|
286
|
+
validation_options = ValidateBatchResultsOptions()
|
|
287
|
+
validation_options.check_image_existence = True
|
|
288
|
+
validation_options.relative_path_base = mewc_input_folder
|
|
289
|
+
validation_options.raise_errors = True
|
|
290
|
+
validation_results = validate_batch_results(output_file,validation_options) # noqa
|
|
291
|
+
|
|
292
|
+
# ...def mewc_to_md(...)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
#%% Interactive driver
|
|
296
|
+
|
|
297
|
+
if False:
|
|
298
|
+
|
|
299
|
+
pass
|
|
300
|
+
|
|
301
|
+
#%%
|
|
302
|
+
|
|
303
|
+
mewc_input_folder = r'G:\temp\mewc-test'
|
|
304
|
+
mount_prefix = '/images/'
|
|
305
|
+
output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
|
|
306
|
+
|
|
307
|
+
_ = mewc_to_md(mewc_input_folder=mewc_input_folder,
|
|
308
|
+
output_file=output_file,
|
|
309
|
+
mount_prefix=mount_prefix,
|
|
310
|
+
category_name_column='class_id')
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
#%% Command-line driver
|
|
314
|
+
|
|
315
|
+
def main(): # noqa
|
|
316
|
+
|
|
317
|
+
parser = argparse.ArgumentParser()
|
|
318
|
+
|
|
319
|
+
parser.add_argument(
|
|
320
|
+
'input_folder',type=str,
|
|
321
|
+
help='Folder containing images and MEWC .json/.csv files')
|
|
322
|
+
parser.add_argument(
|
|
323
|
+
'output_file',type=str,
|
|
324
|
+
help='.json file where output will be written')
|
|
325
|
+
parser.add_argument(
|
|
326
|
+
'--mount_prefix',type=str,default=default_mewc_mount_prefix,
|
|
327
|
+
help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
|
|
328
|
+
parser.add_argument(
|
|
329
|
+
'--category_name_column',type=str,default=default_mewc_category_name_column,
|
|
330
|
+
help='column in the MEWC .csv file to use for category names')
|
|
331
|
+
|
|
332
|
+
if len(sys.argv[1:]) == 0:
|
|
333
|
+
parser.print_help()
|
|
334
|
+
parser.exit()
|
|
335
|
+
|
|
336
|
+
args = parser.parse_args()
|
|
337
|
+
|
|
338
|
+
_ = mewc_to_md(mewc_input_folder=args.input_folder,
|
|
339
|
+
output_file=args.output_file,
|
|
340
|
+
mount_prefix=args.mount_prefix,
|
|
341
|
+
category_name_column=args.category_name_column)
|
|
342
|
+
|
|
343
|
+
if __name__ == '__main__':
|
|
344
|
+
main()
|