megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,110 +1,110 @@
|
|
|
1
|
-
|
|
2
|
-
import os
|
|
3
|
-
import json
|
|
4
|
-
import io
|
|
5
|
-
import random
|
|
6
|
-
import requests
|
|
7
|
-
|
|
8
|
-
from PIL import Image
|
|
9
|
-
from multiprocessing import Pool
|
|
10
|
-
from datetime import datetime
|
|
11
|
-
from requests_toolbelt import MultipartEncoder
|
|
12
|
-
from requests_toolbelt.multipart import decoder
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
ip_address = '100.100.200.200'
|
|
16
|
-
port = 5050
|
|
17
|
-
|
|
18
|
-
base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def call_api(args):
|
|
22
|
-
start = datetime.now()
|
|
23
|
-
|
|
24
|
-
index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
|
|
25
|
-
print('calling api: {} starttime: {}'.format(index, start))
|
|
26
|
-
|
|
27
|
-
response = requests.post(url, params=params, data=data, headers=headers)
|
|
28
|
-
elapsed_time = datetime.now() - start
|
|
29
|
-
print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
|
|
30
|
-
|
|
31
|
-
get_detections(response)
|
|
32
|
-
return response
|
|
33
|
-
|
|
34
|
-
def get_detections(response):
|
|
35
|
-
results = decoder.MultipartDecoder.from_response(response)
|
|
36
|
-
text_results = {}
|
|
37
|
-
images = {}
|
|
38
|
-
for part in results.parts:
|
|
39
|
-
# part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
|
|
40
|
-
headers = {}
|
|
41
|
-
for k, v in part.headers.items():
|
|
42
|
-
headers[k.decode(part.encoding)] = v.decode(part.encoding)
|
|
43
|
-
|
|
44
|
-
if headers.get('Content-Type', None) == 'application/json':
|
|
45
|
-
text_result = json.loads(part.content.decode())
|
|
46
|
-
|
|
47
|
-
print(text_result)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def test_load(num_requests, params, max_images=1):
|
|
51
|
-
requests = []
|
|
52
|
-
|
|
53
|
-
# read the images anew for each request
|
|
54
|
-
index = 0
|
|
55
|
-
for i in range(num_requests):
|
|
56
|
-
index += 1
|
|
57
|
-
files = {}
|
|
58
|
-
sample_input_dir = '../../../api/synchronous/sample_input/test_images'
|
|
59
|
-
|
|
60
|
-
image_files = os.listdir(sample_input_dir)
|
|
61
|
-
random.shuffle(image_files)
|
|
62
|
-
|
|
63
|
-
num_images = 0
|
|
64
|
-
for i, image_name in enumerate(image_files):
|
|
65
|
-
if not image_name.lower().endswith('.jpg'):
|
|
66
|
-
continue
|
|
67
|
-
|
|
68
|
-
if num_images >= max_images:
|
|
69
|
-
break
|
|
70
|
-
else:
|
|
71
|
-
num_images += 1
|
|
72
|
-
|
|
73
|
-
img_path = os.path.join(sample_input_dir, image_name)
|
|
74
|
-
with open(img_path, 'rb') as f:
|
|
75
|
-
content = f.read()
|
|
76
|
-
files[image_name] = (image_name, content, 'image/jpeg')
|
|
77
|
-
|
|
78
|
-
m = MultipartEncoder(fields=files)
|
|
79
|
-
args = {
|
|
80
|
-
'index': index,
|
|
81
|
-
'url': base_url + 'detect',
|
|
82
|
-
'params': params,
|
|
83
|
-
'data': m,
|
|
84
|
-
'headers': {'Content-Type': m.content_type}
|
|
85
|
-
}
|
|
86
|
-
requests.append(args)
|
|
87
|
-
|
|
88
|
-
print('starting', num_requests, 'threads...')
|
|
89
|
-
# images are read and in each request by the time we call the API in map()
|
|
90
|
-
with Pool(num_requests) as pool:
|
|
91
|
-
results = pool.map(call_api, requests)
|
|
92
|
-
|
|
93
|
-
return results
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if __name__ == "__main__":
|
|
97
|
-
params = {
|
|
98
|
-
'min_confidence': 0.05,
|
|
99
|
-
'min_rendering_confidence': 0.2,
|
|
100
|
-
'render': True
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
num_requests = 10
|
|
104
|
-
max_images = 1
|
|
105
|
-
|
|
106
|
-
start = datetime.now()
|
|
107
|
-
responses = test_load(num_requests, params, max_images=max_images)
|
|
108
|
-
end = datetime.now()
|
|
109
|
-
total_time = end - start
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import io
|
|
5
|
+
import random
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from PIL import Image
|
|
9
|
+
from multiprocessing import Pool
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from requests_toolbelt import MultipartEncoder
|
|
12
|
+
from requests_toolbelt.multipart import decoder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
ip_address = '100.100.200.200'
|
|
16
|
+
port = 5050
|
|
17
|
+
|
|
18
|
+
base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def call_api(args):
|
|
22
|
+
start = datetime.now()
|
|
23
|
+
|
|
24
|
+
index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
|
|
25
|
+
print('calling api: {} starttime: {}'.format(index, start))
|
|
26
|
+
|
|
27
|
+
response = requests.post(url, params=params, data=data, headers=headers)
|
|
28
|
+
elapsed_time = datetime.now() - start
|
|
29
|
+
print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
|
|
30
|
+
|
|
31
|
+
get_detections(response)
|
|
32
|
+
return response
|
|
33
|
+
|
|
34
|
+
def get_detections(response):
|
|
35
|
+
results = decoder.MultipartDecoder.from_response(response)
|
|
36
|
+
text_results = {}
|
|
37
|
+
images = {}
|
|
38
|
+
for part in results.parts:
|
|
39
|
+
# part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
|
|
40
|
+
headers = {}
|
|
41
|
+
for k, v in part.headers.items():
|
|
42
|
+
headers[k.decode(part.encoding)] = v.decode(part.encoding)
|
|
43
|
+
|
|
44
|
+
if headers.get('Content-Type', None) == 'application/json':
|
|
45
|
+
text_result = json.loads(part.content.decode())
|
|
46
|
+
|
|
47
|
+
print(text_result)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_load(num_requests, params, max_images=1):
|
|
51
|
+
requests = []
|
|
52
|
+
|
|
53
|
+
# read the images anew for each request
|
|
54
|
+
index = 0
|
|
55
|
+
for i in range(num_requests):
|
|
56
|
+
index += 1
|
|
57
|
+
files = {}
|
|
58
|
+
sample_input_dir = '../../../api/synchronous/sample_input/test_images'
|
|
59
|
+
|
|
60
|
+
image_files = os.listdir(sample_input_dir)
|
|
61
|
+
random.shuffle(image_files)
|
|
62
|
+
|
|
63
|
+
num_images = 0
|
|
64
|
+
for i, image_name in enumerate(image_files):
|
|
65
|
+
if not image_name.lower().endswith('.jpg'):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if num_images >= max_images:
|
|
69
|
+
break
|
|
70
|
+
else:
|
|
71
|
+
num_images += 1
|
|
72
|
+
|
|
73
|
+
img_path = os.path.join(sample_input_dir, image_name)
|
|
74
|
+
with open(img_path, 'rb') as f:
|
|
75
|
+
content = f.read()
|
|
76
|
+
files[image_name] = (image_name, content, 'image/jpeg')
|
|
77
|
+
|
|
78
|
+
m = MultipartEncoder(fields=files)
|
|
79
|
+
args = {
|
|
80
|
+
'index': index,
|
|
81
|
+
'url': base_url + 'detect',
|
|
82
|
+
'params': params,
|
|
83
|
+
'data': m,
|
|
84
|
+
'headers': {'Content-Type': m.content_type}
|
|
85
|
+
}
|
|
86
|
+
requests.append(args)
|
|
87
|
+
|
|
88
|
+
print('starting', num_requests, 'threads...')
|
|
89
|
+
# images are read and in each request by the time we call the API in map()
|
|
90
|
+
with Pool(num_requests) as pool:
|
|
91
|
+
results = pool.map(call_api, requests)
|
|
92
|
+
|
|
93
|
+
return results
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
params = {
|
|
98
|
+
'min_confidence': 0.05,
|
|
99
|
+
'min_rendering_confidence': 0.2,
|
|
100
|
+
'render': True
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
num_requests = 10
|
|
104
|
+
max_images = 1
|
|
105
|
+
|
|
106
|
+
start = datetime.now()
|
|
107
|
+
responses = test_load(num_requests, params, max_images=max_images)
|
|
108
|
+
end = datetime.now()
|
|
109
|
+
total_time = end - start
|
|
110
110
|
print('Total time for {} requests: {}'.format(num_requests, total_time))
|
|
File without changes
|
|
@@ -1,27 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# aggregate_classifier_probs.py
|
|
4
|
-
#
|
|
5
|
-
# Aggregate probabilities from a classifier's outputs according to a mapping
|
|
6
|
-
# from the desired (target) categories to the classifier's categories.
|
|
7
|
-
#
|
|
8
|
-
# Using the mapping, create a new version of the classifier output CSV with
|
|
9
|
-
# probabilities summed within each target category. Also output a new
|
|
10
|
-
# "index-to-name" JSON file which identifies the sequential order of the target
|
|
11
|
-
# categories.
|
|
12
|
-
#
|
|
13
|
-
########
|
|
1
|
+
"""
|
|
14
2
|
|
|
15
|
-
|
|
3
|
+
aggregate_classifier_probs.py
|
|
16
4
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
classifier_output.csv.gz \
|
|
20
|
-
--target-mapping target_to_classifier_labels.json \
|
|
21
|
-
--output-csv classifier_output_remapped.csv.gz \
|
|
22
|
-
--output-label-index label_index_remapped.json
|
|
23
|
-
"""
|
|
5
|
+
Aggregate probabilities from a classifier's outputs according to a mapping
|
|
6
|
+
from the desired (target) categories to the classifier's categories.
|
|
24
7
|
|
|
8
|
+
Using the mapping, create a new version of the classifier output CSV with
|
|
9
|
+
probabilities summed within each target category. Also output a new
|
|
10
|
+
"index-to-name" JSON file which identifies the sequential order of the target
|
|
11
|
+
categories.
|
|
12
|
+
|
|
13
|
+
"""
|
|
25
14
|
|
|
26
15
|
#%% Imports
|
|
27
16
|
|
|
@@ -33,6 +22,15 @@ import json
|
|
|
33
22
|
import pandas as pd
|
|
34
23
|
from tqdm import tqdm
|
|
35
24
|
|
|
25
|
+
#%% Example usage
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
python aggregate_classifier_probs.py \
|
|
29
|
+
classifier_output.csv.gz \
|
|
30
|
+
--target-mapping target_to_classifier_labels.json \
|
|
31
|
+
--output-csv classifier_output_remapped.csv.gz \
|
|
32
|
+
--output-label-index label_index_remapped.json
|
|
33
|
+
"""
|
|
36
34
|
|
|
37
35
|
#%% Main function
|
|
38
36
|
|
|
@@ -46,6 +44,7 @@ def main(classifier_results_csv_path: str,
|
|
|
46
44
|
Because the output CSV is often very large, we process it in chunks of 1000
|
|
47
45
|
rows at a time.
|
|
48
46
|
"""
|
|
47
|
+
|
|
49
48
|
chunked_df_iterator = pd.read_csv(
|
|
50
49
|
classifier_results_csv_path, chunksize=1000, float_precision='high',
|
|
51
50
|
index_col='path')
|
|
@@ -81,9 +80,7 @@ def main(classifier_results_csv_path: str,
|
|
|
81
80
|
#%% Command-line driver
|
|
82
81
|
|
|
83
82
|
def _parse_args() -> argparse.Namespace:
|
|
84
|
-
|
|
85
|
-
Parses arguments.
|
|
86
|
-
"""
|
|
83
|
+
|
|
87
84
|
parser = argparse.ArgumentParser(
|
|
88
85
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
89
86
|
description='Aggregate classifier probabilities to target classes.')
|
|
@@ -1,20 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# analyze_failed_images.py
|
|
4
|
-
#
|
|
5
|
-
########
|
|
1
|
+
"""
|
|
6
2
|
|
|
7
|
-
|
|
3
|
+
analyze_failed_images.py
|
|
8
4
|
|
|
9
|
-
"""
|
|
10
|
-
python analyze_failed_images.py failed.json \
|
|
11
|
-
-a ACCOUNT -c CONTAINER -s SAS_TOKEN
|
|
12
5
|
"""
|
|
13
6
|
|
|
14
7
|
#%% Imports and constants
|
|
15
8
|
|
|
16
|
-
from __future__ import annotations
|
|
17
|
-
|
|
18
9
|
import argparse
|
|
19
10
|
from collections.abc import Mapping, Sequence
|
|
20
11
|
from concurrent import futures
|
|
@@ -31,6 +22,14 @@ from data_management.megadb.megadb_utils import MegadbUtils
|
|
|
31
22
|
from md_utils import path_utils
|
|
32
23
|
from md_utils import sas_blob_utils
|
|
33
24
|
|
|
25
|
+
|
|
26
|
+
#%% Example usage
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
python analyze_failed_images.py failed.json \
|
|
30
|
+
-a ACCOUNT -c CONTAINER -s SAS_TOKEN
|
|
31
|
+
"""
|
|
32
|
+
|
|
34
33
|
ImageFile.LOAD_TRUNCATED_IMAGES = False
|
|
35
34
|
|
|
36
35
|
|
|
@@ -191,8 +190,7 @@ def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
|
|
|
191
190
|
|
|
192
191
|
#%% Command-line driver
|
|
193
192
|
|
|
194
|
-
def _parse_args() -> argparse.Namespace:
|
|
195
|
-
|
|
193
|
+
def _parse_args() -> argparse.Namespace:
|
|
196
194
|
|
|
197
195
|
parser = argparse.ArgumentParser(
|
|
198
196
|
description='Analyze a list of images that failed to download or crop.')
|
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
cache_batchapi_outputs.py
|
|
4
|
+
|
|
5
|
+
Script to cache Batch Detection API outputs.
|
|
6
|
+
|
|
7
|
+
This script can handle either the Batch Detection API JSON Response or the
|
|
8
|
+
detections JSON.
|
|
9
|
+
|
|
10
|
+
Batch Detection API Response format:
|
|
11
|
+
|
|
12
|
+
{
|
|
13
|
+
"Status": {
|
|
14
|
+
"request_status": "completed",
|
|
15
|
+
"message": {
|
|
16
|
+
"num_failed_shards": 0,
|
|
17
|
+
"output_file_urls": {
|
|
18
|
+
"detections": "https://url/to/detections.json",
|
|
19
|
+
"failed_images": "https://url/to/failed_images.json",
|
|
20
|
+
"images": https://url/to/images.json",
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
"Endpoint": "/v3/camera-trap/detection-batch/request_detections",
|
|
25
|
+
"TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
Detections JSON format:
|
|
29
|
+
|
|
30
|
+
{
|
|
31
|
+
"info": {...}
|
|
32
|
+
"detection_categories": {...}
|
|
33
|
+
"classification_categories": {...}
|
|
34
|
+
"images": [
|
|
35
|
+
{
|
|
36
|
+
"file": "path/from/base/dir/image1.jpg",
|
|
37
|
+
"max_detection_conf": 0.926,
|
|
38
|
+
"detections": [{
|
|
39
|
+
"category": "1",
|
|
40
|
+
"conf": 0.061,
|
|
41
|
+
"bbox": [0.0451, 0.1849, 0.3642, 0.4636]
|
|
42
|
+
}]
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
Batch Detection API Output Format:
|
|
48
|
+
|
|
49
|
+
github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
52
|
|
|
53
53
|
#%% Imports
|
|
54
54
|
|
|
@@ -1,73 +1,63 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# create_classification_dataset.py
|
|
4
|
-
#
|
|
5
|
-
# Creates a classification dataset CSV with a corresponding JSON file determining
|
|
6
|
-
# the train/val/test split.
|
|
7
|
-
#
|
|
8
|
-
# This script takes as input a "queried images" JSON file whose keys are paths to
|
|
9
|
-
# images and values are dictionaries containing information relevant for training
|
|
10
|
-
# a classifier, including labels and (optionally) ground-truth bounding boxes.
|
|
11
|
-
# The image paths are in the format `<dataset-name>/<blob-name>` where we assume
|
|
12
|
-
# that the dataset name does not contain '/'.
|
|
13
|
-
#
|
|
14
|
-
# {
|
|
15
|
-
# "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
16
|
-
# "dataset": "caltech",
|
|
17
|
-
# "location": 13,
|
|
18
|
-
# "class": "mountain_lion", # class from dataset
|
|
19
|
-
# "bbox": [{"category": "animal",
|
|
20
|
-
# "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
|
|
21
|
-
# "label": ["monutain_lion"] # labels to use in classifier
|
|
22
|
-
# },
|
|
23
|
-
# "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
24
|
-
# "dataset": "caltech",
|
|
25
|
-
# "location": 13,
|
|
26
|
-
# "class": "mountain_lion", # class from dataset
|
|
27
|
-
# "label": ["monutain_lion"] # labels to use in classifier
|
|
28
|
-
# },
|
|
29
|
-
# ...
|
|
30
|
-
# }
|
|
31
|
-
#
|
|
32
|
-
# We assume that the tuple (dataset, location) identifies a unique location. In
|
|
33
|
-
# other words, we assume that no two datasets have overlapping locations. This
|
|
34
|
-
# probably isn't 100% true, but it's pretty much the best we can do in terms of
|
|
35
|
-
# avoiding overlapping locations between the train/val/test splits.
|
|
36
|
-
#
|
|
37
|
-
# This script outputs 3 files to <output_dir>:
|
|
38
|
-
#
|
|
39
|
-
# 1) classification_ds.csv, contains columns:
|
|
40
|
-
#
|
|
41
|
-
# - 'path': str, path to cropped images
|
|
42
|
-
# - 'dataset': str, name of dataset
|
|
43
|
-
# - 'location': str, location that image was taken, as saved in MegaDB
|
|
44
|
-
# - 'dataset_class': str, original class assigned to image, as saved in MegaDB
|
|
45
|
-
# - 'confidence': float, confidence that this crop is of an actual animal,
|
|
46
|
-
# 1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
|
|
47
|
-
# <= 1.0 if the bounding box was detected by MegaDetector
|
|
48
|
-
# - 'label': str, comma-separated list of label(s) assigned to this crop for
|
|
49
|
-
# the sake of classification
|
|
50
|
-
#
|
|
51
|
-
# 2) label_index.json: maps integer to label name
|
|
52
|
-
#
|
|
53
|
-
# - keys are string representations of Python integers (JSON requires keys to
|
|
54
|
-
# be strings), numbered from 0 to num_labels-1
|
|
55
|
-
# - values are strings, label names
|
|
56
|
-
#
|
|
57
|
-
# 3) splits.json: serialization of a Python dict that maps each split
|
|
58
|
-
# ['train', 'val', 'test'] to a list of length-2 lists, where each inner list
|
|
59
|
-
# is [<dataset>, <location>]
|
|
60
|
-
#
|
|
61
|
-
########
|
|
1
|
+
"""
|
|
62
2
|
|
|
63
|
-
|
|
3
|
+
create_classification_dataset.py
|
|
4
|
+
|
|
5
|
+
Creates a classification dataset CSV with a corresponding JSON file determining
|
|
6
|
+
the train/val/test split.
|
|
7
|
+
|
|
8
|
+
This script takes as input a "queried images" JSON file whose keys are paths to
|
|
9
|
+
images and values are dictionaries containing information relevant for training
|
|
10
|
+
a classifier, including labels and (optionally) ground-truth bounding boxes.
|
|
11
|
+
The image paths are in the format `<dataset-name>/<blob-name>` where we assume
|
|
12
|
+
that the dataset name does not contain '/'.
|
|
13
|
+
|
|
14
|
+
{
|
|
15
|
+
"caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
16
|
+
"dataset": "caltech",
|
|
17
|
+
"location": 13,
|
|
18
|
+
"class": "mountain_lion", # class from dataset
|
|
19
|
+
"bbox": [{"category": "animal",
|
|
20
|
+
"bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
|
|
21
|
+
"label": ["monutain_lion"] # labels to use in classifier
|
|
22
|
+
},
|
|
23
|
+
"caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
24
|
+
"dataset": "caltech",
|
|
25
|
+
"location": 13,
|
|
26
|
+
"class": "mountain_lion", # class from dataset
|
|
27
|
+
"label": ["monutain_lion"] # labels to use in classifier
|
|
28
|
+
},
|
|
29
|
+
...
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
We assume that the tuple (dataset, location) identifies a unique location. In
|
|
33
|
+
other words, we assume that no two datasets have overlapping locations. This
|
|
34
|
+
probably isn't 100% true, but it's pretty much the best we can do in terms of
|
|
35
|
+
avoiding overlapping locations between the train/val/test splits.
|
|
36
|
+
|
|
37
|
+
This script outputs 3 files to <output_dir>:
|
|
38
|
+
|
|
39
|
+
1) classification_ds.csv, contains columns:
|
|
40
|
+
|
|
41
|
+
- 'path': str, path to cropped images
|
|
42
|
+
- 'dataset': str, name of dataset
|
|
43
|
+
- 'location': str, location that image was taken, as saved in MegaDB
|
|
44
|
+
- 'dataset_class': str, original class assigned to image, as saved in MegaDB
|
|
45
|
+
- 'confidence': float, confidence that this crop is of an actual animal,
|
|
46
|
+
1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
|
|
47
|
+
<= 1.0 if the bounding box was detected by MegaDetector
|
|
48
|
+
- 'label': str, comma-separated list of label(s) assigned to this crop for
|
|
49
|
+
the sake of classification
|
|
50
|
+
|
|
51
|
+
2) label_index.json: maps integer to label name
|
|
52
|
+
|
|
53
|
+
- keys are string representations of Python integers (JSON requires keys to
|
|
54
|
+
be strings), numbered from 0 to num_labels-1
|
|
55
|
+
- values are strings, label names
|
|
56
|
+
|
|
57
|
+
3) splits.json: serialization of a Python dict that maps each split
|
|
58
|
+
['train', 'val', 'test'] to a list of length-2 lists, where each inner list
|
|
59
|
+
is [<dataset>, <location>]
|
|
64
60
|
|
|
65
|
-
"""
|
|
66
|
-
python create_classification_dataset.py \
|
|
67
|
-
run_idfg2 \
|
|
68
|
-
--queried-images-json run_idfg2/queried_images.json \
|
|
69
|
-
--cropped-images-dir /ssd/crops_sq \
|
|
70
|
-
-d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
|
|
71
61
|
"""
|
|
72
62
|
|
|
73
63
|
#%% Imports and constants
|
|
@@ -87,6 +77,17 @@ from tqdm import tqdm
|
|
|
87
77
|
from classification import detect_and_crop
|
|
88
78
|
|
|
89
79
|
|
|
80
|
+
#%% Example usage
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
python create_classification_dataset.py \
|
|
84
|
+
run_idfg2 \
|
|
85
|
+
--queried-images-json run_idfg2/queried_images.json \
|
|
86
|
+
--cropped-images-dir /ssd/crops_sq \
|
|
87
|
+
-d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
90
91
|
DATASET_FILENAME = 'classification_ds.csv'
|
|
91
92
|
LABEL_INDEX_FILENAME = 'label_index.json'
|
|
92
93
|
SPLITS_FILENAME = 'splits.json'
|