megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +278 -197
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +55 -69
- api/batch_processing/postprocessing/md_to_labelme.py +1 -0
- api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +65 -44
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +1 -1
- data_management/importers/animl_results_to_md_results.py +2 -2
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +34 -6
- data_management/labelme_to_yolo.py +1 -1
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/resize_coco_dataset.py +12 -10
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +34 -21
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +104 -24
- detection/run_inference_with_yolov5_val.py +127 -26
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +3 -1
- md_utils/ct_utils.py +79 -3
- md_utils/md_tests.py +253 -15
- md_utils/path_utils.py +129 -24
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +1 -0
- md_visualization/visualization_utils.py +17 -2
- md_visualization/visualize_db.py +8 -0
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
md_utils/process_utils.py
CHANGED
|
@@ -17,14 +17,28 @@ import subprocess
|
|
|
17
17
|
|
|
18
18
|
os.environ["PYTHONUNBUFFERED"] = "1"
|
|
19
19
|
|
|
20
|
-
def execute(cmd):
|
|
20
|
+
def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
|
|
21
21
|
"""
|
|
22
22
|
Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
|
|
23
|
+
|
|
24
|
+
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
25
|
+
|
|
26
|
+
"verbose" only impacts output about process management, it is not related to printing
|
|
27
|
+
output from the child process.
|
|
23
28
|
"""
|
|
24
|
-
|
|
29
|
+
|
|
30
|
+
if verbose:
|
|
31
|
+
if encoding is not None:
|
|
32
|
+
print('Launching child process with non-default encoding {}'.format(encoding))
|
|
33
|
+
if errors is not None:
|
|
34
|
+
print('Launching child process with non-default text error handling {}'.format(errors))
|
|
35
|
+
if env is not None:
|
|
36
|
+
print('Launching child process with non-default environment {}'.format(str(env)))
|
|
37
|
+
|
|
25
38
|
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
|
26
39
|
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
|
27
|
-
shell=True, universal_newlines=True
|
|
40
|
+
shell=True, universal_newlines=True, encoding=encoding,
|
|
41
|
+
errors=errors, env=env)
|
|
28
42
|
for stdout_line in iter(popen.stdout.readline, ""):
|
|
29
43
|
yield stdout_line
|
|
30
44
|
popen.stdout.close()
|
|
@@ -33,22 +47,27 @@ def execute(cmd):
|
|
|
33
47
|
raise subprocess.CalledProcessError(return_code, cmd)
|
|
34
48
|
|
|
35
49
|
|
|
36
|
-
def execute_and_print(cmd,print_output=True):
|
|
50
|
+
def execute_and_print(cmd,print_output=True,encoding=None,errors=None,env=None,verbose=False):
|
|
37
51
|
"""
|
|
38
52
|
Run [cmd] (a single string) in a shell, capturing and printing output. Returns
|
|
39
53
|
a dictionary with fields "status" and "output".
|
|
54
|
+
|
|
55
|
+
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
56
|
+
|
|
57
|
+
"verbose" only impacts output about process management, it is not related to printing
|
|
58
|
+
output from the child process.
|
|
40
59
|
"""
|
|
41
60
|
|
|
42
61
|
to_return = {'status':'unknown','output':''}
|
|
43
|
-
output=[]
|
|
62
|
+
output = []
|
|
44
63
|
try:
|
|
45
|
-
for s in execute(cmd):
|
|
64
|
+
for s in execute(cmd,encoding=encoding,errors=errors,env=env,verbose=verbose):
|
|
46
65
|
output.append(s)
|
|
47
66
|
if print_output:
|
|
48
67
|
print(s,end='',flush=True)
|
|
49
68
|
to_return['status'] = 0
|
|
50
69
|
except subprocess.CalledProcessError as cpe:
|
|
51
|
-
print('execute_and_print caught error: {}'.format(cpe.output))
|
|
70
|
+
print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
|
|
52
71
|
to_return['status'] = cpe.returncode
|
|
53
72
|
to_return['output'] = output
|
|
54
73
|
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# split_locations_into_train_val.py
|
|
4
|
+
#
|
|
5
|
+
# Split a list of location IDs into training and validation, targeting a specific
|
|
6
|
+
# train/val split for each category, but allowing some categories to be tighter or looser
|
|
7
|
+
# than others. Does nothing particularly clever, just randomly splits locations into
|
|
8
|
+
# train/val lots of times using the target val fraction, and picks the one that meets the
|
|
9
|
+
# specified constraints and minimizes weighted error, where "error" is defined as the
|
|
10
|
+
# sum of each class's absolute divergence from the target val fraction.
|
|
11
|
+
#
|
|
12
|
+
########
|
|
13
|
+
|
|
14
|
+
#%% Imports/constants
|
|
15
|
+
|
|
16
|
+
import random
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from collections import defaultdict
|
|
20
|
+
from md_utils.ct_utils import sort_dictionary_by_value
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
#%% Main function
|
|
25
|
+
|
|
26
|
+
def split_locations_into_train_val(location_to_category_counts,
|
|
27
|
+
n_random_seeds=10000,
|
|
28
|
+
target_val_fraction=0.15,
|
|
29
|
+
category_to_max_allowable_error=None,
|
|
30
|
+
category_to_error_weight=None,
|
|
31
|
+
default_max_allowable_error=0.1):
|
|
32
|
+
"""
|
|
33
|
+
Split a list of location IDs into training and validation, targeting a specific
|
|
34
|
+
train/val split for each category, but allowing some categories to be tighter or looser
|
|
35
|
+
than others. Does nothing particularly clever, just randomly splits locations into
|
|
36
|
+
train/val lots of times using the target val fraction, and picks the one that meets the
|
|
37
|
+
specified constraints and minimizes weighted error, where "error" is defined as the
|
|
38
|
+
sum of each class's absolute divergence from the target val fraction.
|
|
39
|
+
|
|
40
|
+
location_to_category_counts should be a dict mapping location IDs to dicts,
|
|
41
|
+
with each dict mapping a category name to a count. Any categories not present in a
|
|
42
|
+
particular dict are assumed to have a count of zero for that location.
|
|
43
|
+
|
|
44
|
+
If not None, category_to_max_allowable_error should be a dict mapping category names
|
|
45
|
+
to maximum allowable errors. These are hard constraints, but you can specify a subset
|
|
46
|
+
of categories. Categories not included here have a maximum error of Inf.
|
|
47
|
+
|
|
48
|
+
If not None, category_to_error_weight should be a dict mapping category names to
|
|
49
|
+
error weights. You can specify a subset of categories. Categories not included here
|
|
50
|
+
have a weight of 1.0.
|
|
51
|
+
|
|
52
|
+
default_max_allowable_error is the maximum allowable error for categories not present in
|
|
53
|
+
category_to_max_allowable_error. Set to None (or >= 1.0) to disable hard constraints for
|
|
54
|
+
categories not present in category_to_max_allowable_error
|
|
55
|
+
|
|
56
|
+
returns val_locations,category_to_val_fraction
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
location_ids = list(location_to_category_counts.keys())
|
|
61
|
+
|
|
62
|
+
n_val_locations = int(target_val_fraction*len(location_ids))
|
|
63
|
+
|
|
64
|
+
if category_to_max_allowable_error is None:
|
|
65
|
+
category_to_max_allowable_error = {}
|
|
66
|
+
|
|
67
|
+
if category_to_error_weight is None:
|
|
68
|
+
category_to_error_weight = {}
|
|
69
|
+
|
|
70
|
+
# category ID to total count; the total count is used only for printouts
|
|
71
|
+
category_id_to_count = {}
|
|
72
|
+
for location_id in location_to_category_counts:
|
|
73
|
+
for category_id in location_to_category_counts[location_id].keys():
|
|
74
|
+
if category_id not in category_id_to_count:
|
|
75
|
+
category_id_to_count[category_id] = 0
|
|
76
|
+
category_id_to_count[category_id] += \
|
|
77
|
+
location_to_category_counts[location_id][category_id]
|
|
78
|
+
|
|
79
|
+
category_ids = set(category_id_to_count.keys())
|
|
80
|
+
|
|
81
|
+
print('Splitting {} categories over {} locations'.format(
|
|
82
|
+
len(category_ids),len(location_ids)))
|
|
83
|
+
|
|
84
|
+
# random_seed = 0
|
|
85
|
+
def compute_seed_errors(random_seed):
|
|
86
|
+
"""
|
|
87
|
+
Compute the per-category error for a specific random seed.
|
|
88
|
+
|
|
89
|
+
returns weighted_average_error,category_to_val_fraction
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# Randomly split into train/val
|
|
93
|
+
random.seed(random_seed)
|
|
94
|
+
val_locations = random.sample(location_ids,k=n_val_locations)
|
|
95
|
+
val_locations_set = set(val_locations)
|
|
96
|
+
|
|
97
|
+
# For each category, measure the % of images that went into the val set
|
|
98
|
+
category_to_val_fraction = defaultdict(float)
|
|
99
|
+
|
|
100
|
+
for category_id in category_ids:
|
|
101
|
+
category_val_count = 0
|
|
102
|
+
category_train_count = 0
|
|
103
|
+
for location_id in location_to_category_counts:
|
|
104
|
+
if category_id not in location_to_category_counts[location_id]:
|
|
105
|
+
location_category_count = 0
|
|
106
|
+
else:
|
|
107
|
+
location_category_count = location_to_category_counts[location_id][category_id]
|
|
108
|
+
if location_id in val_locations_set:
|
|
109
|
+
category_val_count += location_category_count
|
|
110
|
+
else:
|
|
111
|
+
category_train_count += location_category_count
|
|
112
|
+
category_val_fraction = category_val_count / (category_val_count + category_train_count)
|
|
113
|
+
category_to_val_fraction[category_id] = category_val_fraction
|
|
114
|
+
|
|
115
|
+
# Absolute deviation from the target val fraction for each categorys
|
|
116
|
+
category_errors = {}
|
|
117
|
+
weighted_category_errors = {}
|
|
118
|
+
|
|
119
|
+
# category = next(iter(category_to_val_fraction))
|
|
120
|
+
for category in category_to_val_fraction:
|
|
121
|
+
|
|
122
|
+
category_val_fraction = category_to_val_fraction[category]
|
|
123
|
+
|
|
124
|
+
category_error = abs(category_val_fraction-target_val_fraction)
|
|
125
|
+
category_errors[category] = category_error
|
|
126
|
+
|
|
127
|
+
category_weight = 1.0
|
|
128
|
+
if category in category_to_error_weight:
|
|
129
|
+
category_weight = category_to_error_weight[category]
|
|
130
|
+
weighted_category_error = category_error * category_weight
|
|
131
|
+
weighted_category_errors[category] = weighted_category_error
|
|
132
|
+
|
|
133
|
+
weighted_average_error = np.mean(list(weighted_category_errors.values()))
|
|
134
|
+
|
|
135
|
+
return weighted_average_error,weighted_category_errors,category_to_val_fraction
|
|
136
|
+
|
|
137
|
+
# ... def compute_seed_errors(...)
|
|
138
|
+
|
|
139
|
+
# This will only include random seeds that satisfy the hard constraints
|
|
140
|
+
random_seed_to_weighted_average_error = {}
|
|
141
|
+
|
|
142
|
+
# random_seed = 0
|
|
143
|
+
for random_seed in tqdm(range(0,n_random_seeds)):
|
|
144
|
+
|
|
145
|
+
weighted_average_error,weighted_category_errors,category_to_val_fraction = \
|
|
146
|
+
compute_seed_errors(random_seed)
|
|
147
|
+
|
|
148
|
+
seed_satisfies_hard_constraints = True
|
|
149
|
+
|
|
150
|
+
for category in category_to_val_fraction:
|
|
151
|
+
if category in category_to_max_allowable_error:
|
|
152
|
+
max_allowable_error = category_to_max_allowable_error[category]
|
|
153
|
+
else:
|
|
154
|
+
if default_max_allowable_error is None:
|
|
155
|
+
continue
|
|
156
|
+
max_allowable_error = default_max_allowable_error
|
|
157
|
+
val_fraction = category_to_val_fraction[category]
|
|
158
|
+
category_error = abs(val_fraction - target_val_fraction)
|
|
159
|
+
if category_error > max_allowable_error:
|
|
160
|
+
seed_satisfies_hard_constraints = False
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
if seed_satisfies_hard_constraints:
|
|
164
|
+
random_seed_to_weighted_average_error[random_seed] = weighted_average_error
|
|
165
|
+
|
|
166
|
+
# ...for each random seed
|
|
167
|
+
|
|
168
|
+
assert len(random_seed_to_weighted_average_error) > 0, \
|
|
169
|
+
'No random seed met all the hard constraints'
|
|
170
|
+
|
|
171
|
+
print('\n{} of {} random seeds satisfied hard constraints'.format(
|
|
172
|
+
len(random_seed_to_weighted_average_error),n_random_seeds))
|
|
173
|
+
|
|
174
|
+
min_error = None
|
|
175
|
+
min_error_seed = None
|
|
176
|
+
|
|
177
|
+
for random_seed in random_seed_to_weighted_average_error.keys():
|
|
178
|
+
error_metric = random_seed_to_weighted_average_error[random_seed]
|
|
179
|
+
if min_error is None or error_metric < min_error:
|
|
180
|
+
min_error = error_metric
|
|
181
|
+
min_error_seed = random_seed
|
|
182
|
+
|
|
183
|
+
random.seed(min_error_seed)
|
|
184
|
+
val_locations = random.sample(location_ids,k=n_val_locations)
|
|
185
|
+
train_locations = []
|
|
186
|
+
for location_id in location_ids:
|
|
187
|
+
if location_id not in val_locations:
|
|
188
|
+
train_locations.append(location_id)
|
|
189
|
+
|
|
190
|
+
print('\nVal locations:\n')
|
|
191
|
+
for loc in val_locations:
|
|
192
|
+
print('{}'.format(loc))
|
|
193
|
+
print('')
|
|
194
|
+
|
|
195
|
+
weighted_average_error,weighted_category_errors,category_to_val_fraction = \
|
|
196
|
+
compute_seed_errors(min_error_seed)
|
|
197
|
+
|
|
198
|
+
random_seed = min_error_seed
|
|
199
|
+
|
|
200
|
+
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
|
|
201
|
+
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
|
|
202
|
+
sort_values=category_id_to_count,
|
|
203
|
+
reverse=True)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
print('Val fractions by category:\n')
|
|
207
|
+
|
|
208
|
+
for category in category_to_val_fraction:
|
|
209
|
+
print('{} ({}) {:.2f}'.format(
|
|
210
|
+
category,category_id_to_count[category],
|
|
211
|
+
category_to_val_fraction[category]))
|
|
212
|
+
|
|
213
|
+
return val_locations,category_to_val_fraction
|
|
214
|
+
|
|
215
|
+
# ...def split_locations_into_train_val(...)
|
md_utils/string_utils.py
CHANGED
|
@@ -57,3 +57,13 @@ def human_readable_to_bytes(size):
|
|
|
57
57
|
bytes = 0
|
|
58
58
|
|
|
59
59
|
return bytes
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def remove_ansi_codes(s):
|
|
63
|
+
"""
|
|
64
|
+
Remove ANSI escape codes from a string.
|
|
65
|
+
|
|
66
|
+
https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
|
|
67
|
+
"""
|
|
68
|
+
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
69
|
+
return ansi_escape.sub('', s)
|
md_utils/url_utils.py
CHANGED
|
@@ -177,6 +177,7 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
177
177
|
filename = filename.encode('ascii','ignore').decode('ascii')
|
|
178
178
|
|
|
179
179
|
if options['urlEncodeFilenames']:
|
|
180
|
+
filename = filename.replace('\\','/')
|
|
180
181
|
filename = urllib.parse.quote(filename)
|
|
181
182
|
|
|
182
183
|
if len(title) > 0:
|
|
@@ -172,12 +172,20 @@ def resize_image(image, target_width, target_height=-1, output_file=None):
|
|
|
172
172
|
in place. If either width or height are -1, resizes with aspect ratio preservation.
|
|
173
173
|
If both are -1, returns the original image (does not copy in this case).
|
|
174
174
|
|
|
175
|
+
None is equivalent to -1 for target_width and target_height.
|
|
176
|
+
|
|
175
177
|
[image] can be a PIL image or a filename.
|
|
176
178
|
"""
|
|
177
179
|
|
|
178
180
|
if isinstance(image,str):
|
|
179
181
|
image = load_image(image)
|
|
180
182
|
|
|
183
|
+
if target_width is None:
|
|
184
|
+
target_width = -1
|
|
185
|
+
|
|
186
|
+
if target_height is None:
|
|
187
|
+
target_height = -1
|
|
188
|
+
|
|
181
189
|
# Null operation
|
|
182
190
|
if target_width == -1 and target_height == -1:
|
|
183
191
|
return image
|
|
@@ -371,7 +379,8 @@ def render_detection_bounding_boxes(detections, image,
|
|
|
371
379
|
The type of the numerical label (default string) needs to be consistent with the keys in
|
|
372
380
|
label_map; no casting is carried out. If this is None, no classification labels are shown.
|
|
373
381
|
|
|
374
|
-
confidence_threshold: optional, threshold above which
|
|
382
|
+
confidence_threshold: optional, threshold above which boxes are rendered. Can also be a dictionary
|
|
383
|
+
mapping category IDs to thresholds.
|
|
375
384
|
|
|
376
385
|
thickness: line thickness in pixels. Default value is 4.
|
|
377
386
|
|
|
@@ -405,9 +414,15 @@ def render_detection_bounding_boxes(detections, image,
|
|
|
405
414
|
|
|
406
415
|
score = detection['conf']
|
|
407
416
|
|
|
417
|
+
if isinstance(confidence_threshold,dict):
|
|
418
|
+
rendering_threshold = confidence_threshold[detection['category']]
|
|
419
|
+
else:
|
|
420
|
+
rendering_threshold = confidence_threshold
|
|
421
|
+
|
|
422
|
+
|
|
408
423
|
# Always render objects with a confidence of "None", this is typically used
|
|
409
424
|
# for ground truth data.
|
|
410
|
-
if score is None or score >=
|
|
425
|
+
if score is None or score >= rendering_threshold:
|
|
411
426
|
|
|
412
427
|
x1, y1, w_box, h_box = detection['bbox']
|
|
413
428
|
display_boxes.append([y1, x1, y1 + h_box, x1 + w_box])
|
md_visualization/visualize_db.py
CHANGED
|
@@ -41,7 +41,15 @@ class DbVizOptions:
|
|
|
41
41
|
#
|
|
42
42
|
# If viz_size is None or (-1,-1), the original image size is used.
|
|
43
43
|
viz_size = (675, -1)
|
|
44
|
+
|
|
45
|
+
# The most relevant option one might want to set here is:
|
|
46
|
+
#
|
|
47
|
+
# htmlOptions['maxFiguresPerHtmlFile']
|
|
48
|
+
#
|
|
49
|
+
# ...which can be used to paginate previews to a number of images that will load well
|
|
50
|
+
# in a browser (5000 is a reasonable limit).
|
|
44
51
|
htmlOptions = write_html_image_list()
|
|
52
|
+
|
|
45
53
|
sort_by_filename = True
|
|
46
54
|
trim_to_images_with_bboxes = False
|
|
47
55
|
|