megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +5 -2
- megadetector/data_management/cct_json_utils.py +4 -2
- megadetector/data_management/cct_to_md.py +5 -4
- megadetector/data_management/cct_to_wi.py +5 -1
- megadetector/data_management/coco_to_yolo.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/databases/subset_json_db.py +0 -3
- megadetector/data_management/generate_crops_from_cct.py +6 -4
- megadetector/data_management/get_image_sizes.py +5 -35
- megadetector/data_management/labelme_to_coco.py +10 -6
- megadetector/data_management/labelme_to_yolo.py +19 -28
- megadetector/data_management/lila/create_lila_test_set.py +22 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
- megadetector/data_management/lila/lila_common.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
- megadetector/data_management/ocr_tools.py +6 -10
- megadetector/data_management/read_exif.py +59 -16
- megadetector/data_management/remap_coco_categories.py +1 -1
- megadetector/data_management/remove_exif.py +10 -5
- megadetector/data_management/rename_images.py +20 -13
- megadetector/data_management/resize_coco_dataset.py +10 -4
- megadetector/data_management/speciesnet_to_md.py +3 -3
- megadetector/data_management/yolo_output_to_md_output.py +3 -1
- megadetector/data_management/yolo_to_coco.py +28 -19
- megadetector/detection/change_detection.py +26 -18
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +5 -5
- megadetector/detection/run_detector.py +34 -10
- megadetector/detection/run_detector_batch.py +2 -1
- megadetector/detection/run_inference_with_yolov5_val.py +3 -1
- megadetector/detection/run_md_and_speciesnet.py +215 -101
- megadetector/detection/run_tiled_inference.py +7 -7
- megadetector/detection/tf_detector.py +1 -1
- megadetector/detection/video_utils.py +9 -6
- megadetector/postprocessing/add_max_conf.py +4 -4
- megadetector/postprocessing/categorize_detections_by_size.py +3 -2
- megadetector/postprocessing/classification_postprocessing.py +7 -8
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +49 -27
- megadetector/postprocessing/convert_output_format.py +8 -6
- megadetector/postprocessing/create_crop_folder.py +13 -4
- megadetector/postprocessing/generate_csv_report.py +22 -8
- megadetector/postprocessing/load_api_results.py +8 -4
- megadetector/postprocessing/md_to_coco.py +2 -3
- megadetector/postprocessing/md_to_labelme.py +12 -8
- megadetector/postprocessing/md_to_wi.py +2 -1
- megadetector/postprocessing/merge_detections.py +4 -6
- megadetector/postprocessing/postprocess_batch_results.py +4 -3
- megadetector/postprocessing/remap_detection_categories.py +6 -3
- megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
- megadetector/postprocessing/separate_detections_into_folders.py +10 -4
- megadetector/postprocessing/subset_json_detector_output.py +1 -1
- megadetector/postprocessing/top_folders_to_bottom.py +22 -7
- megadetector/postprocessing/validate_batch_results.py +1 -1
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
- megadetector/taxonomy_mapping/species_lookup.py +51 -2
- megadetector/utils/ct_utils.py +9 -4
- megadetector/utils/extract_frames_from_video.py +4 -0
- megadetector/utils/gpu_test.py +6 -6
- megadetector/utils/md_tests.py +21 -21
- megadetector/utils/path_utils.py +112 -44
- megadetector/utils/split_locations_into_train_val.py +0 -4
- megadetector/utils/url_utils.py +5 -3
- megadetector/utils/wi_taxonomy_utils.py +37 -8
- megadetector/utils/write_html_image_list.py +1 -2
- megadetector/visualization/plot_utils.py +31 -19
- megadetector/visualization/render_images_with_thumbnails.py +3 -0
- megadetector/visualization/visualization_utils.py +18 -7
- megadetector/visualization/visualize_db.py +9 -26
- megadetector/visualization/visualize_video_output.py +14 -2
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/RECORD +80 -80
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
|
@@ -252,9 +252,10 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
252
252
|
md_formatted_results['images']),
|
|
253
253
|
total=len(md_formatted_results['images'])))
|
|
254
254
|
finally:
|
|
255
|
-
pool
|
|
256
|
-
|
|
257
|
-
|
|
255
|
+
if pool is not None:
|
|
256
|
+
pool.close()
|
|
257
|
+
pool.join()
|
|
258
|
+
print("Pool closed and joined for confusion matrix rendering")
|
|
258
259
|
|
|
259
260
|
else:
|
|
260
261
|
|
|
@@ -369,11 +370,15 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
369
370
|
|
|
370
371
|
# If there were no detections at all, call this image empty
|
|
371
372
|
if len(results_im['detections']) == 0:
|
|
373
|
+
|
|
372
374
|
predicted_category_name = empty_category_name
|
|
375
|
+
|
|
373
376
|
# Otherwise look for above-threshold detections
|
|
374
377
|
else:
|
|
378
|
+
|
|
375
379
|
results_category_name_to_confidence = defaultdict(int)
|
|
376
380
|
for det in results_im['detections']:
|
|
381
|
+
|
|
377
382
|
category_name = results_category_id_to_name[det['category']]
|
|
378
383
|
detection_threshold = confidence_thresholds['default']
|
|
379
384
|
if category_name in confidence_thresholds:
|
|
@@ -381,12 +386,15 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
381
386
|
if det['conf'] > detection_threshold:
|
|
382
387
|
results_category_name_to_confidence[category_name] = max(
|
|
383
388
|
results_category_name_to_confidence[category_name],det['conf'])
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
389
|
+
|
|
390
|
+
# ...for each detection
|
|
391
|
+
|
|
392
|
+
# If there were no detections above threshold
|
|
393
|
+
if len(results_category_name_to_confidence) == 0:
|
|
394
|
+
predicted_category_name = empty_category_name
|
|
395
|
+
else:
|
|
396
|
+
predicted_category_name = max(results_category_name_to_confidence,
|
|
397
|
+
key=results_category_name_to_confidence.get)
|
|
390
398
|
|
|
391
399
|
ground_truth_category_index = gt_category_name_to_category_index[ground_truth_category_name]
|
|
392
400
|
predicted_category_index = gt_category_name_to_category_index[predicted_category_name]
|
|
@@ -396,7 +404,7 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
396
404
|
|
|
397
405
|
confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
|
|
398
406
|
|
|
399
|
-
# ...for each file
|
|
407
|
+
# ...for each ground truth file
|
|
400
408
|
|
|
401
409
|
plt.ioff()
|
|
402
410
|
|
|
@@ -37,7 +37,7 @@ def remove_repeat_detections(input_file,output_file,filtering_dir):
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
assert os.path.isfile(input_file), "Can't find file {}".format(input_file)
|
|
40
|
-
assert os.path.
|
|
40
|
+
assert os.path.exists(filtering_dir), "Can't find input file/folder {}".format(filtering_dir)
|
|
41
41
|
options = repeat_detections_core.RepeatDetectionOptions()
|
|
42
42
|
if os.path.isfile(filtering_dir):
|
|
43
43
|
options.filterFileToLoad = filtering_dir
|
|
@@ -869,7 +869,7 @@ def _update_detection_table(repeat_detection_results, options, output_file_name=
|
|
|
869
869
|
detection_to_modify = row_detections[instance.i_detection]
|
|
870
870
|
|
|
871
871
|
# Make sure the bounding box matches
|
|
872
|
-
assert (instance_bbox[0:
|
|
872
|
+
assert (instance_bbox[0:4] == detection_to_modify['bbox'][0:4])
|
|
873
873
|
|
|
874
874
|
# Make the probability negative, if it hasn't been switched by
|
|
875
875
|
# another bounding box
|
|
@@ -1149,7 +1149,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
|
1149
1149
|
|
|
1150
1150
|
# Load the filtering file
|
|
1151
1151
|
detection_index_file_name = options.filterFileToLoad
|
|
1152
|
-
|
|
1152
|
+
with open(detection_index_file_name, 'r') as f:
|
|
1153
|
+
s_in = f.read()
|
|
1153
1154
|
detection_info = jsonpickle.decode(s_in)
|
|
1154
1155
|
filtering_base_dir = os.path.dirname(options.filterFileToLoad)
|
|
1155
1156
|
suspicious_detections = detection_info['suspicious_detections']
|
|
@@ -1382,7 +1383,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
|
1382
1383
|
|
|
1383
1384
|
# candidate_detection_file = all_candidate_detection_files[0]
|
|
1384
1385
|
for candidate_detection_file in all_candidate_detection_files:
|
|
1385
|
-
|
|
1386
|
+
with open(candidate_detection_file, 'r') as f:
|
|
1387
|
+
s = f.read()
|
|
1386
1388
|
candidate_detections_this_file = jsonpickle.decode(s)
|
|
1387
1389
|
all_candidate_detections.append(candidate_detections_this_file)
|
|
1388
1390
|
|
|
@@ -494,7 +494,8 @@ def separate_detections_into_folders(options):
|
|
|
494
494
|
|
|
495
495
|
# Load detection results
|
|
496
496
|
print('Loading detection results')
|
|
497
|
-
|
|
497
|
+
with open(options.results_file,'r') as f:
|
|
498
|
+
results = json.load(f)
|
|
498
499
|
images = results['images']
|
|
499
500
|
|
|
500
501
|
for im in images:
|
|
@@ -618,8 +619,13 @@ def separate_detections_into_folders(options):
|
|
|
618
619
|
|
|
619
620
|
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
620
621
|
pool = ThreadPool(options.n_threads)
|
|
621
|
-
|
|
622
|
-
|
|
622
|
+
try:
|
|
623
|
+
process_detections_with_options = partial(_process_detections, options=options)
|
|
624
|
+
_ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
|
|
625
|
+
finally:
|
|
626
|
+
pool.close()
|
|
627
|
+
pool.join()
|
|
628
|
+
print('Pool closed and joined for folder separation')
|
|
623
629
|
|
|
624
630
|
if options.remove_empty_folders:
|
|
625
631
|
print('Removing empty folders from {}'.format(options.base_output_folder))
|
|
@@ -736,7 +742,7 @@ def main(): # noqa
|
|
|
736
742
|
help='Line thickness (in pixels) for rendering, only meaningful if ' + \
|
|
737
743
|
'using render_boxes (defaults to {})'.format(
|
|
738
744
|
default_line_thickness))
|
|
739
|
-
parser.add_argument('--box_expansion', type=int, default=
|
|
745
|
+
parser.add_argument('--box_expansion', type=int, default=default_box_expansion,
|
|
740
746
|
help='Box expansion (in pixels) for rendering, only meaningful if ' + \
|
|
741
747
|
'using render_boxes (defaults to {})'.format(
|
|
742
748
|
default_box_expansion))
|
|
@@ -45,7 +45,12 @@ class TopFoldersToBottomOptions:
|
|
|
45
45
|
Options used to parameterize top_folders_to_bottom()
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
|
-
def __init__(self,
|
|
48
|
+
def __init__(self,
|
|
49
|
+
input_folder,
|
|
50
|
+
output_folder,
|
|
51
|
+
copy=True,
|
|
52
|
+
n_threads=1,
|
|
53
|
+
overwrite=False):
|
|
49
54
|
|
|
50
55
|
#: Whether to copy (True) vs. move (False) false when re-organizing
|
|
51
56
|
self.copy = copy
|
|
@@ -60,7 +65,7 @@ class TopFoldersToBottomOptions:
|
|
|
60
65
|
self.output_folder = output_folder
|
|
61
66
|
|
|
62
67
|
#: If this is False and an output file exists, throw an error
|
|
63
|
-
self.overwrite =
|
|
68
|
+
self.overwrite = overwrite
|
|
64
69
|
|
|
65
70
|
|
|
66
71
|
#%% Main functions
|
|
@@ -130,6 +135,7 @@ def top_folders_to_bottom(options):
|
|
|
130
135
|
options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
|
|
131
136
|
|
|
132
137
|
"""
|
|
138
|
+
|
|
133
139
|
os.makedirs(options.output_folder,exist_ok=True)
|
|
134
140
|
|
|
135
141
|
# Enumerate input folder
|
|
@@ -167,10 +173,15 @@ def top_folders_to_bottom(options):
|
|
|
167
173
|
|
|
168
174
|
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
169
175
|
pool = ThreadPool(options.n_threads)
|
|
170
|
-
|
|
171
|
-
|
|
176
|
+
try:
|
|
177
|
+
process_file_with_options = partial(_process_file, options=options)
|
|
178
|
+
_ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
|
|
179
|
+
finally:
|
|
180
|
+
pool.close()
|
|
181
|
+
pool.join()
|
|
182
|
+
print('Pool closed and join for folder inversion')
|
|
172
183
|
|
|
173
|
-
# ...def top_folders_to_bottom()
|
|
184
|
+
# ...def top_folders_to_bottom(...)
|
|
174
185
|
|
|
175
186
|
|
|
176
187
|
#%% Interactive driver
|
|
@@ -192,7 +203,7 @@ if False:
|
|
|
192
203
|
|
|
193
204
|
#%% Command-line driver
|
|
194
205
|
|
|
195
|
-
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads
|
|
206
|
+
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
|
|
196
207
|
|
|
197
208
|
def main(): # noqa
|
|
198
209
|
|
|
@@ -215,7 +226,11 @@ def main(): # noqa
|
|
|
215
226
|
|
|
216
227
|
# Convert to an options object
|
|
217
228
|
options = TopFoldersToBottomOptions(
|
|
218
|
-
args.input_folder,
|
|
229
|
+
args.input_folder,
|
|
230
|
+
args.output_folder,
|
|
231
|
+
copy=args.copy,
|
|
232
|
+
n_threads=args.n_threads,
|
|
233
|
+
overwrite=args.overwrite)
|
|
219
234
|
|
|
220
235
|
top_folders_to_bottom(options)
|
|
221
236
|
|
|
@@ -15,10 +15,10 @@ import json
|
|
|
15
15
|
# Created by get_lila_category_list.py
|
|
16
16
|
input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
|
|
17
17
|
|
|
18
|
-
output_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
18
|
+
output_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
|
|
19
19
|
|
|
20
20
|
datasets_to_map = [
|
|
21
|
-
'
|
|
21
|
+
'California Small Animals'
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
|
|
@@ -128,6 +128,52 @@ output_df.to_csv(output_file, index=None, header=True)
|
|
|
128
128
|
# from megadetector.utils.path_utils import open_file; open_file(output_file)
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
#%% Remap missing entries in the .csv file
|
|
132
|
+
|
|
133
|
+
# ...typically because I made a change to the mapping code.
|
|
134
|
+
|
|
135
|
+
from megadetector.utils.path_utils import insert_before_extension
|
|
136
|
+
from megadetector.utils.ct_utils import is_empty
|
|
137
|
+
|
|
138
|
+
remapped_file = insert_before_extension(output_file,'remapped')
|
|
139
|
+
|
|
140
|
+
df = pd.read_csv(output_file)
|
|
141
|
+
|
|
142
|
+
for i_row,row in df.iterrows():
|
|
143
|
+
|
|
144
|
+
# Do we need to map this row?
|
|
145
|
+
if is_empty(row['source']):
|
|
146
|
+
|
|
147
|
+
query = row['query']
|
|
148
|
+
print('Mapping {}'.format(query))
|
|
149
|
+
|
|
150
|
+
taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
|
|
151
|
+
|
|
152
|
+
if (taxonomic_match.source == taxonomy_preference):
|
|
153
|
+
|
|
154
|
+
source = taxonomic_match.source
|
|
155
|
+
taxonomy_level = taxonomic_match.taxonomic_level
|
|
156
|
+
scientific_name = taxonomic_match.scientific_name
|
|
157
|
+
common_name = taxonomic_match.common_name
|
|
158
|
+
taxonomy_string = taxonomic_match.taxonomy_string
|
|
159
|
+
|
|
160
|
+
# Write source, taxonomy_level, scientific_name, common_name, and taxonomy_string
|
|
161
|
+
# to the corresponding columns in the current row in df
|
|
162
|
+
df.loc[i_row, 'source'] = source
|
|
163
|
+
df.loc[i_row, 'taxonomy_level'] = taxonomy_level
|
|
164
|
+
df.loc[i_row, 'scientific_name'] = scientific_name
|
|
165
|
+
df.loc[i_row, 'common_name'] = common_name
|
|
166
|
+
df.loc[i_row, 'taxonomy_string'] = taxonomy_string
|
|
167
|
+
|
|
168
|
+
# ...if we found a match
|
|
169
|
+
|
|
170
|
+
# ...do we need to map this row?
|
|
171
|
+
|
|
172
|
+
# ...for each row
|
|
173
|
+
|
|
174
|
+
df.to_csv(remapped_file, index=None, header=True)
|
|
175
|
+
|
|
176
|
+
|
|
131
177
|
#%% Manual lookup
|
|
132
178
|
|
|
133
179
|
if False:
|
|
@@ -140,11 +186,19 @@ if False:
|
|
|
140
186
|
|
|
141
187
|
#%%
|
|
142
188
|
|
|
143
|
-
|
|
189
|
+
from megadetector.taxonomy_mapping.species_lookup import pop_levels
|
|
190
|
+
|
|
191
|
+
# Use this when an iNat match includes an empty subgenus with the same name as the genus
|
|
192
|
+
n_levels_to_pop = 0
|
|
193
|
+
q = 'sus scrofa'
|
|
144
194
|
|
|
145
195
|
taxonomy_preference = 'inat'
|
|
146
196
|
m = get_preferred_taxonomic_match(q,taxonomy_preference)
|
|
197
|
+
if n_levels_to_pop > 0:
|
|
198
|
+
m = pop_levels(m,n_levels_to_pop)
|
|
199
|
+
|
|
147
200
|
# print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
|
|
201
|
+
# common_name = eval(m.__dict__['taxonomy_string'])[0][-1][0]; print(common_name); clipboard.copy(common_name)
|
|
148
202
|
|
|
149
203
|
if (m is None) or (len(m.taxonomy_string) == 0):
|
|
150
204
|
print('No match')
|
|
@@ -155,3 +209,5 @@ if False:
|
|
|
155
209
|
print(m.source)
|
|
156
210
|
print(m.taxonomy_string)
|
|
157
211
|
import clipboard; clipboard.copy(m.taxonomy_string)
|
|
212
|
+
|
|
213
|
+
|
|
@@ -16,7 +16,7 @@ import os
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
# lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
|
|
19
|
-
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
19
|
+
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
|
|
20
20
|
|
|
21
21
|
preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
|
|
22
22
|
os.makedirs(preview_base,exist_ok=True)
|
|
@@ -56,11 +56,6 @@ def taxonomy_string_to_level(taxonomy_string):
|
|
|
56
56
|
return level
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
#%% Read the taxonomy mapping file
|
|
60
|
-
|
|
61
|
-
df = pd.read_csv(lila_taxonomy_file)
|
|
62
|
-
|
|
63
|
-
|
|
64
59
|
#%% Prepare taxonomy lookup
|
|
65
60
|
|
|
66
61
|
from megadetector.taxonomy_mapping.species_lookup import \
|
|
@@ -95,20 +90,29 @@ taxonomy_preference = 'inat'
|
|
|
95
90
|
# i_row = 0; row = df.iloc[i_row]
|
|
96
91
|
for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
97
92
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
93
|
+
try:
|
|
94
|
+
|
|
95
|
+
sn = row['scientific_name']
|
|
96
|
+
if not isinstance(sn,str):
|
|
97
|
+
continue
|
|
101
98
|
|
|
102
|
-
|
|
103
|
-
|
|
99
|
+
m = get_preferred_taxonomic_match(sn,taxonomy_preference)
|
|
100
|
+
assert m.scientific_name == sn
|
|
104
101
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
102
|
+
ts = row['taxonomy_string']
|
|
103
|
+
assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
|
|
104
|
+
row['dataset_name'],ts,m.taxonomy_string)
|
|
105
|
+
|
|
106
|
+
if ts != m.taxonomy_string:
|
|
107
|
+
n_taxonomy_changes += 1
|
|
108
|
+
df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
108
111
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
+
print('Error at row {}: {}'.format(i_row,str(e)))
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
# ...for each row
|
|
112
116
|
|
|
113
117
|
print('\nMade {} taxonomy changes'.format(n_taxonomy_changes))
|
|
114
118
|
|
|
@@ -325,6 +329,11 @@ for i_row,row in df.iterrows():
|
|
|
325
329
|
|
|
326
330
|
#%% Download sample images for all scientific names
|
|
327
331
|
|
|
332
|
+
# You might have to do this:
|
|
333
|
+
#
|
|
334
|
+
# pip install python-magic
|
|
335
|
+
# pip install python-magic-bin
|
|
336
|
+
|
|
328
337
|
# Takes ~1 minute per 10 rows
|
|
329
338
|
|
|
330
339
|
remapped_queries = {'papio':'papio+baboon',
|
|
@@ -560,6 +560,7 @@ def get_taxonomic_info(query: str) -> List[Dict[str, Any]]:
|
|
|
560
560
|
Main entry point: get taxonomic matches from both taxonomies for [query],
|
|
561
561
|
which may be a scientific or common name.
|
|
562
562
|
"""
|
|
563
|
+
|
|
563
564
|
query = query.strip().lower()
|
|
564
565
|
# print("Finding taxonomy information for: {0}".format(query))
|
|
565
566
|
|
|
@@ -682,6 +683,35 @@ hyphenated_terms = ['crowned', 'backed', 'throated', 'tailed', 'headed', 'cheeke
|
|
|
682
683
|
'fronted', 'bellied', 'spotted', 'eared', 'collared', 'breasted',
|
|
683
684
|
'necked']
|
|
684
685
|
|
|
686
|
+
def pop_levels(m, n_levels=1):
|
|
687
|
+
"""
|
|
688
|
+
Remove [n_levels] levels from the bottom of the TaxonomicMatch object m, typically used to remove
|
|
689
|
+
silly subgenera.
|
|
690
|
+
"""
|
|
691
|
+
|
|
692
|
+
v = eval(m.taxonomy_string)
|
|
693
|
+
assert v[0][1] == m.taxonomic_level
|
|
694
|
+
assert v[0][2] == m.scientific_name
|
|
695
|
+
popped_v = v[n_levels:]
|
|
696
|
+
taxonomic_level = popped_v[0][1]
|
|
697
|
+
scientific_name = popped_v[0][2]
|
|
698
|
+
common_name = popped_v[0][3]
|
|
699
|
+
if len(common_name) == 0:
|
|
700
|
+
common_name = ''
|
|
701
|
+
else:
|
|
702
|
+
common_name = common_name[0]
|
|
703
|
+
taxonomy_string = str(popped_v)
|
|
704
|
+
source = m.source
|
|
705
|
+
return TaxonomicMatch(scientific_name=scientific_name,
|
|
706
|
+
common_name=common_name,
|
|
707
|
+
taxonomic_level=taxonomic_level,
|
|
708
|
+
source=source,
|
|
709
|
+
taxonomy_string=taxonomy_string,
|
|
710
|
+
match=None)
|
|
711
|
+
|
|
712
|
+
# ...def pop_levels(...)
|
|
713
|
+
|
|
714
|
+
|
|
685
715
|
def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retry=True) -> TaxonomicMatch:
|
|
686
716
|
"""
|
|
687
717
|
Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
|
|
@@ -704,6 +734,17 @@ def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retr
|
|
|
704
734
|
for s in hyphenated_terms:
|
|
705
735
|
query = query.replace(' ' + s,'-' + s)
|
|
706
736
|
m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
|
|
737
|
+
|
|
738
|
+
if (len(m.scientific_name) > 0) or (not retry):
|
|
739
|
+
return m
|
|
740
|
+
|
|
741
|
+
query = query.replace(' species','')
|
|
742
|
+
query = query.replace(' order','')
|
|
743
|
+
query = query.replace(' genus','')
|
|
744
|
+
query = query.replace(' family','')
|
|
745
|
+
query = query.replace(' subfamily','')
|
|
746
|
+
m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
|
|
747
|
+
|
|
707
748
|
return m
|
|
708
749
|
|
|
709
750
|
|
|
@@ -887,8 +928,16 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
|
|
|
887
928
|
|
|
888
929
|
taxonomy_string = str(match)
|
|
889
930
|
|
|
890
|
-
|
|
891
|
-
|
|
931
|
+
m = TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
|
|
932
|
+
taxonomy_string, match)
|
|
933
|
+
|
|
934
|
+
if (m.taxonomic_level == 'subgenus' and \
|
|
935
|
+
match[1][1] == 'genus' and \
|
|
936
|
+
match[1][2] == m.scientific_name):
|
|
937
|
+
print('Removing redundant subgenus {}'.format(scientific_name))
|
|
938
|
+
m = pop_levels(m,1)
|
|
939
|
+
|
|
940
|
+
return m,query
|
|
892
941
|
|
|
893
942
|
# ...def _get_preferred_taxonomic_match()
|
|
894
943
|
|
megadetector/utils/ct_utils.py
CHANGED
|
@@ -241,7 +241,10 @@ def write_json(path,
|
|
|
241
241
|
elif force_str:
|
|
242
242
|
default_handler = str
|
|
243
243
|
|
|
244
|
-
|
|
244
|
+
# Create the parent directory if necessary
|
|
245
|
+
parent_dir = os.path.dirname(path)
|
|
246
|
+
if len(parent_dir) > 0:
|
|
247
|
+
os.makedirs(parent_dir, exist_ok=True)
|
|
245
248
|
|
|
246
249
|
with open(path, 'w', newline='\n', encoding=encoding) as f:
|
|
247
250
|
json.dump(content, f, indent=indent, default=default_handler, ensure_ascii=ensure_ascii)
|
|
@@ -562,7 +565,7 @@ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
|
|
|
562
565
|
reverse (bool, optional): whether to sort in reverse (descending) order
|
|
563
566
|
|
|
564
567
|
Returns:
|
|
565
|
-
dict: sorted copy of [d
|
|
568
|
+
dict: sorted copy of [d]
|
|
566
569
|
"""
|
|
567
570
|
|
|
568
571
|
if sort_values is None:
|
|
@@ -1022,8 +1025,10 @@ def parse_bool_string(s, strict=False):
|
|
|
1022
1025
|
s = str(s).lower().strip()
|
|
1023
1026
|
|
|
1024
1027
|
if strict:
|
|
1025
|
-
|
|
1026
|
-
|
|
1028
|
+
# Fun fact: ('false') (rather than ('false,')) creates a string,
|
|
1029
|
+
# not a tuple.
|
|
1030
|
+
false_strings = ('false',)
|
|
1031
|
+
true_strings = ('true',)
|
|
1027
1032
|
else:
|
|
1028
1033
|
false_strings = ('no', 'false', 'f', 'n', '0')
|
|
1029
1034
|
true_strings = ('yes', 'true', 't', 'y', '1')
|
megadetector/utils/gpu_test.py
CHANGED
|
@@ -34,7 +34,7 @@ def torch_test():
|
|
|
34
34
|
except Exception as e: #noqa
|
|
35
35
|
print('PyTorch unavailable, not running PyTorch tests. PyTorch import error was:\n{}'.format(
|
|
36
36
|
str(e)))
|
|
37
|
-
return
|
|
37
|
+
return 0
|
|
38
38
|
|
|
39
39
|
print('Torch version: {}'.format(str(torch.__version__)))
|
|
40
40
|
print('CUDA available (according to PyTorch): {}'.format(torch.cuda.is_available()))
|
|
@@ -71,17 +71,17 @@ def tf_test():
|
|
|
71
71
|
Print diagnostic information about TF/CUDA status.
|
|
72
72
|
|
|
73
73
|
Returns:
|
|
74
|
-
int: The number of CUDA devices reported by
|
|
74
|
+
int: The number of CUDA devices reported by TensorFlow.
|
|
75
75
|
"""
|
|
76
76
|
|
|
77
77
|
try:
|
|
78
|
-
import tensorflow as tf
|
|
78
|
+
import tensorflow as tf # type: ignore
|
|
79
79
|
except Exception as e: #noqa
|
|
80
80
|
print('TensorFlow unavailable, not running TF tests. TF import error was:\n{}'.format(
|
|
81
81
|
str(e)))
|
|
82
|
-
return
|
|
82
|
+
return 0
|
|
83
83
|
|
|
84
|
-
from tensorflow.python.platform import build_info as build
|
|
84
|
+
from tensorflow.python.platform import build_info as build # type: ignore
|
|
85
85
|
print(f"TF version: {tf.__version__}")
|
|
86
86
|
|
|
87
87
|
if 'cuda_version' not in build.build_info:
|
|
@@ -94,7 +94,7 @@ def tf_test():
|
|
|
94
94
|
print(f"CuDNN build version reported by TensorFlow: {build.build_info['cudnn_version']}")
|
|
95
95
|
|
|
96
96
|
try:
|
|
97
|
-
from tensorflow.python.compiler.tensorrt import trt_convert as trt
|
|
97
|
+
from tensorflow.python.compiler.tensorrt import trt_convert as trt # type: ignore
|
|
98
98
|
print("Linked TensorRT version: {}".format(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version()))
|
|
99
99
|
except Exception:
|
|
100
100
|
print('Could not probe TensorRT version')
|
megadetector/utils/md_tests.py
CHANGED
|
@@ -386,7 +386,7 @@ def output_files_are_identical(fn1,fn2,verbose=False):
|
|
|
386
386
|
fn2_results['images'] = \
|
|
387
387
|
sorted(fn2_results['images'], key=lambda d: d['file'])
|
|
388
388
|
|
|
389
|
-
if len(fn1_results['images']) != len(
|
|
389
|
+
if len(fn1_results['images']) != len(fn2_results['images']):
|
|
390
390
|
if verbose:
|
|
391
391
|
print('{} images in {}, {} images in {}'.format(
|
|
392
392
|
len(fn1_results['images']),fn1,
|
|
@@ -1249,8 +1249,8 @@ def run_cli_tests(options):
|
|
|
1249
1249
|
cmd_results = execute_and_print(cmd)
|
|
1250
1250
|
|
|
1251
1251
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1252
|
-
|
|
1253
|
-
|
|
1252
|
+
fn2=inference_output_file_queue,
|
|
1253
|
+
verbose=True)
|
|
1254
1254
|
|
|
1255
1255
|
|
|
1256
1256
|
## Run again with the image queue and worker-side preprocessing enabled
|
|
@@ -1265,24 +1265,24 @@ def run_cli_tests(options):
|
|
|
1265
1265
|
cmd_results = execute_and_print(cmd)
|
|
1266
1266
|
|
|
1267
1267
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1268
|
-
|
|
1269
|
-
|
|
1268
|
+
fn2=inference_output_file_preprocess_queue,
|
|
1269
|
+
verbose=True)
|
|
1270
1270
|
|
|
1271
1271
|
|
|
1272
|
-
## Run again with the image queue
|
|
1272
|
+
## Run again with the image queue but no worker-side preprocessing
|
|
1273
1273
|
|
|
1274
|
-
print('\n** Running MD on a folder (with image queue
|
|
1274
|
+
print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
|
|
1275
1275
|
|
|
1276
|
-
cmd = base_cmd + ' --use_image_queue
|
|
1277
|
-
|
|
1278
|
-
insert_before_extension(inference_output_file,'
|
|
1279
|
-
cmd = cmd.replace(inference_output_file,
|
|
1276
|
+
cmd = base_cmd + ' --use_image_queue'
|
|
1277
|
+
inference_output_file_no_preprocess_queue = \
|
|
1278
|
+
insert_before_extension(inference_output_file,'no_preprocess_queue')
|
|
1279
|
+
cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
|
|
1280
1280
|
cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
|
|
1281
1281
|
cmd_results = execute_and_print(cmd)
|
|
1282
1282
|
|
|
1283
1283
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1284
|
-
|
|
1285
|
-
|
|
1284
|
+
fn2=inference_output_file_no_preprocess_queue,
|
|
1285
|
+
verbose=True)
|
|
1286
1286
|
|
|
1287
1287
|
|
|
1288
1288
|
## Run again with the worker-side preprocessing and an alternative batch size
|
|
@@ -1316,8 +1316,8 @@ def run_cli_tests(options):
|
|
|
1316
1316
|
cmd_results = execute_and_print(cmd)
|
|
1317
1317
|
|
|
1318
1318
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1319
|
-
|
|
1320
|
-
|
|
1319
|
+
fn2=inference_output_file_checkpoint,
|
|
1320
|
+
verbose=True)
|
|
1321
1321
|
|
|
1322
1322
|
|
|
1323
1323
|
## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
|
|
@@ -1331,8 +1331,8 @@ def run_cli_tests(options):
|
|
|
1331
1331
|
cmd_results = execute_and_print(cmd)
|
|
1332
1332
|
|
|
1333
1333
|
assert not output_files_are_identical(fn1=inference_output_file,
|
|
1334
|
-
|
|
1335
|
-
|
|
1334
|
+
fn2=inference_output_file_modern,
|
|
1335
|
+
verbose=True)
|
|
1336
1336
|
|
|
1337
1337
|
|
|
1338
1338
|
## Run again with "modern" postprocessing and worker-side preprocessing,
|
|
@@ -1348,13 +1348,13 @@ def run_cli_tests(options):
|
|
|
1348
1348
|
|
|
1349
1349
|
# This should not be the same as the "classic" results
|
|
1350
1350
|
assert not output_files_are_identical(fn1=inference_output_file,
|
|
1351
|
-
|
|
1352
|
-
|
|
1351
|
+
fn2=inference_output_file_modern_worker_preprocessing,
|
|
1352
|
+
verbose=True)
|
|
1353
1353
|
|
|
1354
1354
|
# ...but it should be the same as the single-threaded "modern" results
|
|
1355
1355
|
assert output_files_are_identical(fn1=inference_output_file_modern,
|
|
1356
|
-
|
|
1357
|
-
|
|
1356
|
+
fn2=inference_output_file_modern_worker_preprocessing,
|
|
1357
|
+
verbose=True)
|
|
1358
1358
|
|
|
1359
1359
|
|
|
1360
1360
|
if not options.skip_cpu_tests:
|