megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +5 -2
- megadetector/data_management/cct_json_utils.py +4 -2
- megadetector/data_management/cct_to_md.py +5 -4
- megadetector/data_management/cct_to_wi.py +5 -1
- megadetector/data_management/coco_to_yolo.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/databases/subset_json_db.py +0 -3
- megadetector/data_management/generate_crops_from_cct.py +6 -4
- megadetector/data_management/get_image_sizes.py +5 -35
- megadetector/data_management/labelme_to_coco.py +10 -6
- megadetector/data_management/labelme_to_yolo.py +19 -28
- megadetector/data_management/lila/create_lila_test_set.py +22 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
- megadetector/data_management/lila/lila_common.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
- megadetector/data_management/ocr_tools.py +6 -10
- megadetector/data_management/read_exif.py +69 -13
- megadetector/data_management/remap_coco_categories.py +1 -1
- megadetector/data_management/remove_exif.py +10 -5
- megadetector/data_management/rename_images.py +20 -13
- megadetector/data_management/resize_coco_dataset.py +10 -4
- megadetector/data_management/speciesnet_to_md.py +3 -3
- megadetector/data_management/yolo_output_to_md_output.py +3 -1
- megadetector/data_management/yolo_to_coco.py +28 -19
- megadetector/detection/change_detection.py +26 -18
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +5 -5
- megadetector/detection/run_detector.py +34 -10
- megadetector/detection/run_detector_batch.py +60 -42
- megadetector/detection/run_inference_with_yolov5_val.py +3 -1
- megadetector/detection/run_md_and_speciesnet.py +282 -110
- megadetector/detection/run_tiled_inference.py +7 -7
- megadetector/detection/tf_detector.py +4 -6
- megadetector/detection/video_utils.py +9 -6
- megadetector/postprocessing/add_max_conf.py +4 -4
- megadetector/postprocessing/categorize_detections_by_size.py +3 -2
- megadetector/postprocessing/classification_postprocessing.py +19 -21
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +49 -27
- megadetector/postprocessing/convert_output_format.py +8 -6
- megadetector/postprocessing/create_crop_folder.py +13 -4
- megadetector/postprocessing/generate_csv_report.py +22 -8
- megadetector/postprocessing/load_api_results.py +8 -4
- megadetector/postprocessing/md_to_coco.py +2 -3
- megadetector/postprocessing/md_to_labelme.py +12 -8
- megadetector/postprocessing/md_to_wi.py +2 -1
- megadetector/postprocessing/merge_detections.py +4 -6
- megadetector/postprocessing/postprocess_batch_results.py +4 -3
- megadetector/postprocessing/remap_detection_categories.py +6 -3
- megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
- megadetector/postprocessing/separate_detections_into_folders.py +10 -4
- megadetector/postprocessing/subset_json_detector_output.py +1 -1
- megadetector/postprocessing/top_folders_to_bottom.py +22 -7
- megadetector/postprocessing/validate_batch_results.py +1 -1
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
- megadetector/taxonomy_mapping/species_lookup.py +51 -2
- megadetector/utils/ct_utils.py +9 -4
- megadetector/utils/directory_listing.py +3 -0
- megadetector/utils/extract_frames_from_video.py +4 -0
- megadetector/utils/gpu_test.py +6 -6
- megadetector/utils/md_tests.py +21 -21
- megadetector/utils/path_utils.py +171 -36
- megadetector/utils/split_locations_into_train_val.py +0 -4
- megadetector/utils/string_utils.py +21 -0
- megadetector/utils/url_utils.py +5 -3
- megadetector/utils/wi_platform_utils.py +168 -24
- megadetector/utils/wi_taxonomy_utils.py +38 -8
- megadetector/utils/write_html_image_list.py +1 -2
- megadetector/visualization/plot_utils.py +31 -19
- megadetector/visualization/render_images_with_thumbnails.py +3 -0
- megadetector/visualization/visualization_utils.py +18 -7
- megadetector/visualization/visualize_db.py +9 -26
- megadetector/visualization/visualize_detector_output.py +1 -0
- megadetector/visualization/visualize_video_output.py +14 -2
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/RECORD +84 -84
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
|
@@ -28,6 +28,7 @@ from functools import partial
|
|
|
28
28
|
|
|
29
29
|
from megadetector.visualization.visualization_utils import open_image
|
|
30
30
|
from megadetector.utils.ct_utils import round_float
|
|
31
|
+
from megadetector.utils.ct_utils import write_json
|
|
31
32
|
from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP, FAILURE_IMAGE_OPEN
|
|
32
33
|
|
|
33
34
|
output_precision = 3
|
|
@@ -36,8 +37,11 @@ default_confidence_threshold = 0.15
|
|
|
36
37
|
|
|
37
38
|
#%% Functions
|
|
38
39
|
|
|
39
|
-
def get_labelme_dict_for_image(im,
|
|
40
|
-
|
|
40
|
+
def get_labelme_dict_for_image(im,
|
|
41
|
+
image_base_name=None,
|
|
42
|
+
category_id_to_name=None,
|
|
43
|
+
info=None,
|
|
44
|
+
confidence_threshold=None):
|
|
41
45
|
"""
|
|
42
46
|
For the given image struct in MD results format, reformat the detections into
|
|
43
47
|
labelme format.
|
|
@@ -60,7 +64,7 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
|
|
|
60
64
|
if image_base_name is None:
|
|
61
65
|
image_base_name = os.path.basename(im['file'])
|
|
62
66
|
|
|
63
|
-
if category_id_to_name:
|
|
67
|
+
if category_id_to_name is None:
|
|
64
68
|
category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
|
|
65
69
|
|
|
66
70
|
if confidence_threshold is None:
|
|
@@ -138,8 +142,7 @@ def _write_output_for_image(im,
|
|
|
138
142
|
info=info,
|
|
139
143
|
confidence_threshold=confidence_threshold)
|
|
140
144
|
|
|
141
|
-
|
|
142
|
-
json.dump(output_dict,f,indent=1)
|
|
145
|
+
write_json(json_path,output_dict)
|
|
143
146
|
|
|
144
147
|
# ...def write_output_for_image(...)
|
|
145
148
|
|
|
@@ -256,9 +259,10 @@ def md_to_labelme(results_file,
|
|
|
256
259
|
md_results['images']),
|
|
257
260
|
total=len(md_results['images'])))
|
|
258
261
|
finally:
|
|
259
|
-
pool
|
|
260
|
-
|
|
261
|
-
|
|
262
|
+
if pool is not None:
|
|
263
|
+
pool.close()
|
|
264
|
+
pool.join()
|
|
265
|
+
print("Pool closed and joined for labelme file writes")
|
|
262
266
|
|
|
263
267
|
# ...for each image
|
|
264
268
|
|
|
@@ -10,6 +10,7 @@ Converts the MD .json format to the WI predictions.json format.
|
|
|
10
10
|
|
|
11
11
|
import sys
|
|
12
12
|
import argparse
|
|
13
|
+
|
|
13
14
|
from megadetector.utils.wi_taxonomy_utils import generate_predictions_json_from_md_results
|
|
14
15
|
|
|
15
16
|
|
|
@@ -34,7 +35,7 @@ def main(): # noqa
|
|
|
34
35
|
|
|
35
36
|
generate_predictions_json_from_md_results(args.md_results_file,
|
|
36
37
|
args.predictions_json_file,
|
|
37
|
-
base_folder=
|
|
38
|
+
base_folder=args.base_folder)
|
|
38
39
|
|
|
39
40
|
if __name__ == '__main__':
|
|
40
41
|
main()
|
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
from tqdm import tqdm
|
|
24
24
|
|
|
25
25
|
from megadetector.utils.ct_utils import get_iou
|
|
26
|
+
from megadetector.utils.ct_utils import write_json
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
#%% Structs
|
|
@@ -121,8 +122,6 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
121
122
|
|
|
122
123
|
assert os.path.isfile(target_file)
|
|
123
124
|
|
|
124
|
-
os.makedirs(os.path.dirname(output_file),exist_ok=True)
|
|
125
|
-
|
|
126
125
|
with open(target_file,'r') as f:
|
|
127
126
|
output_data = json.load(f)
|
|
128
127
|
|
|
@@ -290,8 +289,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
290
289
|
|
|
291
290
|
# ...for each source file
|
|
292
291
|
|
|
293
|
-
|
|
294
|
-
json.dump(output_data,f,indent=1)
|
|
292
|
+
write_json(output_file,output_data)
|
|
295
293
|
|
|
296
294
|
print('Saved merged results to {}'.format(output_file))
|
|
297
295
|
|
|
@@ -308,7 +306,7 @@ def main():
|
|
|
308
306
|
default_options = MergeDetectionsOptions()
|
|
309
307
|
|
|
310
308
|
parser = argparse.ArgumentParser(
|
|
311
|
-
description='Merge detections from one or more MegaDetector results files into an existing
|
|
309
|
+
description='Merge detections from one or more MegaDetector results files into an existing results file')
|
|
312
310
|
parser.add_argument(
|
|
313
311
|
'source_files',
|
|
314
312
|
nargs='+',
|
|
@@ -359,7 +357,7 @@ def main():
|
|
|
359
357
|
type=int,
|
|
360
358
|
nargs='+',
|
|
361
359
|
default=None,
|
|
362
|
-
help='List of numeric detection categories to
|
|
360
|
+
help='List of numeric detection categories to exclude')
|
|
363
361
|
parser.add_argument(
|
|
364
362
|
'--merge_empty_only',
|
|
365
363
|
action='store_true',
|
|
@@ -1889,8 +1889,9 @@ def process_batch_results(options):
|
|
|
1889
1889
|
if options.include_classification_category_report:
|
|
1890
1890
|
|
|
1891
1891
|
# TODO: it's only for silly historical reasons that we re-read
|
|
1892
|
-
# the input file in this case;
|
|
1893
|
-
#
|
|
1892
|
+
# the input file in this case; because this module has used Pandas
|
|
1893
|
+
# forever, we're not currently carrying the json representation around,
|
|
1894
|
+
# only the Pandas representation.
|
|
1894
1895
|
|
|
1895
1896
|
print('Generating classification category report')
|
|
1896
1897
|
|
|
@@ -1905,7 +1906,7 @@ def process_batch_results(options):
|
|
|
1905
1906
|
if ('classifications' in det) and (len(det['classifications']) > 0):
|
|
1906
1907
|
class_id = det['classifications'][0][0]
|
|
1907
1908
|
if class_id not in classification_category_to_count:
|
|
1908
|
-
classification_category_to_count[class_id] =
|
|
1909
|
+
classification_category_to_count[class_id] = 1
|
|
1909
1910
|
else:
|
|
1910
1911
|
classification_category_to_count[class_id] = \
|
|
1911
1912
|
classification_category_to_count[class_id] + 1
|
|
@@ -18,6 +18,7 @@ import argparse
|
|
|
18
18
|
from tqdm import tqdm
|
|
19
19
|
|
|
20
20
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
21
|
+
from megadetector.utils.ct_utils import write_json
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
#%% Main function
|
|
@@ -132,14 +133,16 @@ def remap_detection_categories(input_file,
|
|
|
132
133
|
for det in im['detections']:
|
|
133
134
|
det['category'] = input_category_id_to_output_category_id[det['category']]
|
|
134
135
|
|
|
135
|
-
|
|
136
|
+
# ...for each image
|
|
136
137
|
|
|
137
|
-
|
|
138
|
-
json.dump(input_data,f,indent=1)
|
|
138
|
+
input_data['detection_categories'] = target_category_map
|
|
139
139
|
|
|
140
|
+
write_json(output_file,input_data)
|
|
140
141
|
|
|
141
142
|
print('Saved remapped results to {}'.format(output_file))
|
|
142
143
|
|
|
144
|
+
# ...def remap_detection_categories(...)
|
|
145
|
+
|
|
143
146
|
|
|
144
147
|
#%% Interactive driver
|
|
145
148
|
|
|
@@ -252,9 +252,10 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
252
252
|
md_formatted_results['images']),
|
|
253
253
|
total=len(md_formatted_results['images'])))
|
|
254
254
|
finally:
|
|
255
|
-
pool
|
|
256
|
-
|
|
257
|
-
|
|
255
|
+
if pool is not None:
|
|
256
|
+
pool.close()
|
|
257
|
+
pool.join()
|
|
258
|
+
print("Pool closed and joined for confusion matrix rendering")
|
|
258
259
|
|
|
259
260
|
else:
|
|
260
261
|
|
|
@@ -369,11 +370,15 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
369
370
|
|
|
370
371
|
# If there were no detections at all, call this image empty
|
|
371
372
|
if len(results_im['detections']) == 0:
|
|
373
|
+
|
|
372
374
|
predicted_category_name = empty_category_name
|
|
375
|
+
|
|
373
376
|
# Otherwise look for above-threshold detections
|
|
374
377
|
else:
|
|
378
|
+
|
|
375
379
|
results_category_name_to_confidence = defaultdict(int)
|
|
376
380
|
for det in results_im['detections']:
|
|
381
|
+
|
|
377
382
|
category_name = results_category_id_to_name[det['category']]
|
|
378
383
|
detection_threshold = confidence_thresholds['default']
|
|
379
384
|
if category_name in confidence_thresholds:
|
|
@@ -381,12 +386,15 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
381
386
|
if det['conf'] > detection_threshold:
|
|
382
387
|
results_category_name_to_confidence[category_name] = max(
|
|
383
388
|
results_category_name_to_confidence[category_name],det['conf'])
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
389
|
+
|
|
390
|
+
# ...for each detection
|
|
391
|
+
|
|
392
|
+
# If there were no detections above threshold
|
|
393
|
+
if len(results_category_name_to_confidence) == 0:
|
|
394
|
+
predicted_category_name = empty_category_name
|
|
395
|
+
else:
|
|
396
|
+
predicted_category_name = max(results_category_name_to_confidence,
|
|
397
|
+
key=results_category_name_to_confidence.get)
|
|
390
398
|
|
|
391
399
|
ground_truth_category_index = gt_category_name_to_category_index[ground_truth_category_name]
|
|
392
400
|
predicted_category_index = gt_category_name_to_category_index[predicted_category_name]
|
|
@@ -396,7 +404,7 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
396
404
|
|
|
397
405
|
confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
|
|
398
406
|
|
|
399
|
-
# ...for each file
|
|
407
|
+
# ...for each ground truth file
|
|
400
408
|
|
|
401
409
|
plt.ioff()
|
|
402
410
|
|
|
@@ -37,7 +37,7 @@ def remove_repeat_detections(input_file,output_file,filtering_dir):
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
assert os.path.isfile(input_file), "Can't find file {}".format(input_file)
|
|
40
|
-
assert os.path.
|
|
40
|
+
assert os.path.exists(filtering_dir), "Can't find input file/folder {}".format(filtering_dir)
|
|
41
41
|
options = repeat_detections_core.RepeatDetectionOptions()
|
|
42
42
|
if os.path.isfile(filtering_dir):
|
|
43
43
|
options.filterFileToLoad = filtering_dir
|
|
@@ -869,7 +869,7 @@ def _update_detection_table(repeat_detection_results, options, output_file_name=
|
|
|
869
869
|
detection_to_modify = row_detections[instance.i_detection]
|
|
870
870
|
|
|
871
871
|
# Make sure the bounding box matches
|
|
872
|
-
assert (instance_bbox[0:
|
|
872
|
+
assert (instance_bbox[0:4] == detection_to_modify['bbox'][0:4])
|
|
873
873
|
|
|
874
874
|
# Make the probability negative, if it hasn't been switched by
|
|
875
875
|
# another bounding box
|
|
@@ -1149,7 +1149,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
|
1149
1149
|
|
|
1150
1150
|
# Load the filtering file
|
|
1151
1151
|
detection_index_file_name = options.filterFileToLoad
|
|
1152
|
-
|
|
1152
|
+
with open(detection_index_file_name, 'r') as f:
|
|
1153
|
+
s_in = f.read()
|
|
1153
1154
|
detection_info = jsonpickle.decode(s_in)
|
|
1154
1155
|
filtering_base_dir = os.path.dirname(options.filterFileToLoad)
|
|
1155
1156
|
suspicious_detections = detection_info['suspicious_detections']
|
|
@@ -1382,7 +1383,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
|
1382
1383
|
|
|
1383
1384
|
# candidate_detection_file = all_candidate_detection_files[0]
|
|
1384
1385
|
for candidate_detection_file in all_candidate_detection_files:
|
|
1385
|
-
|
|
1386
|
+
with open(candidate_detection_file, 'r') as f:
|
|
1387
|
+
s = f.read()
|
|
1386
1388
|
candidate_detections_this_file = jsonpickle.decode(s)
|
|
1387
1389
|
all_candidate_detections.append(candidate_detections_this_file)
|
|
1388
1390
|
|
|
@@ -494,7 +494,8 @@ def separate_detections_into_folders(options):
|
|
|
494
494
|
|
|
495
495
|
# Load detection results
|
|
496
496
|
print('Loading detection results')
|
|
497
|
-
|
|
497
|
+
with open(options.results_file,'r') as f:
|
|
498
|
+
results = json.load(f)
|
|
498
499
|
images = results['images']
|
|
499
500
|
|
|
500
501
|
for im in images:
|
|
@@ -618,8 +619,13 @@ def separate_detections_into_folders(options):
|
|
|
618
619
|
|
|
619
620
|
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
620
621
|
pool = ThreadPool(options.n_threads)
|
|
621
|
-
|
|
622
|
-
|
|
622
|
+
try:
|
|
623
|
+
process_detections_with_options = partial(_process_detections, options=options)
|
|
624
|
+
_ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
|
|
625
|
+
finally:
|
|
626
|
+
pool.close()
|
|
627
|
+
pool.join()
|
|
628
|
+
print('Pool closed and joined for folder separation')
|
|
623
629
|
|
|
624
630
|
if options.remove_empty_folders:
|
|
625
631
|
print('Removing empty folders from {}'.format(options.base_output_folder))
|
|
@@ -736,7 +742,7 @@ def main(): # noqa
|
|
|
736
742
|
help='Line thickness (in pixels) for rendering, only meaningful if ' + \
|
|
737
743
|
'using render_boxes (defaults to {})'.format(
|
|
738
744
|
default_line_thickness))
|
|
739
|
-
parser.add_argument('--box_expansion', type=int, default=
|
|
745
|
+
parser.add_argument('--box_expansion', type=int, default=default_box_expansion,
|
|
740
746
|
help='Box expansion (in pixels) for rendering, only meaningful if ' + \
|
|
741
747
|
'using render_boxes (defaults to {})'.format(
|
|
742
748
|
default_box_expansion))
|
|
@@ -45,7 +45,12 @@ class TopFoldersToBottomOptions:
|
|
|
45
45
|
Options used to parameterize top_folders_to_bottom()
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
|
-
def __init__(self,
|
|
48
|
+
def __init__(self,
|
|
49
|
+
input_folder,
|
|
50
|
+
output_folder,
|
|
51
|
+
copy=True,
|
|
52
|
+
n_threads=1,
|
|
53
|
+
overwrite=False):
|
|
49
54
|
|
|
50
55
|
#: Whether to copy (True) vs. move (False) false when re-organizing
|
|
51
56
|
self.copy = copy
|
|
@@ -60,7 +65,7 @@ class TopFoldersToBottomOptions:
|
|
|
60
65
|
self.output_folder = output_folder
|
|
61
66
|
|
|
62
67
|
#: If this is False and an output file exists, throw an error
|
|
63
|
-
self.overwrite =
|
|
68
|
+
self.overwrite = overwrite
|
|
64
69
|
|
|
65
70
|
|
|
66
71
|
#%% Main functions
|
|
@@ -130,6 +135,7 @@ def top_folders_to_bottom(options):
|
|
|
130
135
|
options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
|
|
131
136
|
|
|
132
137
|
"""
|
|
138
|
+
|
|
133
139
|
os.makedirs(options.output_folder,exist_ok=True)
|
|
134
140
|
|
|
135
141
|
# Enumerate input folder
|
|
@@ -167,10 +173,15 @@ def top_folders_to_bottom(options):
|
|
|
167
173
|
|
|
168
174
|
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
169
175
|
pool = ThreadPool(options.n_threads)
|
|
170
|
-
|
|
171
|
-
|
|
176
|
+
try:
|
|
177
|
+
process_file_with_options = partial(_process_file, options=options)
|
|
178
|
+
_ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
|
|
179
|
+
finally:
|
|
180
|
+
pool.close()
|
|
181
|
+
pool.join()
|
|
182
|
+
print('Pool closed and join for folder inversion')
|
|
172
183
|
|
|
173
|
-
# ...def top_folders_to_bottom()
|
|
184
|
+
# ...def top_folders_to_bottom(...)
|
|
174
185
|
|
|
175
186
|
|
|
176
187
|
#%% Interactive driver
|
|
@@ -192,7 +203,7 @@ if False:
|
|
|
192
203
|
|
|
193
204
|
#%% Command-line driver
|
|
194
205
|
|
|
195
|
-
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads
|
|
206
|
+
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
|
|
196
207
|
|
|
197
208
|
def main(): # noqa
|
|
198
209
|
|
|
@@ -215,7 +226,11 @@ def main(): # noqa
|
|
|
215
226
|
|
|
216
227
|
# Convert to an options object
|
|
217
228
|
options = TopFoldersToBottomOptions(
|
|
218
|
-
args.input_folder,
|
|
229
|
+
args.input_folder,
|
|
230
|
+
args.output_folder,
|
|
231
|
+
copy=args.copy,
|
|
232
|
+
n_threads=args.n_threads,
|
|
233
|
+
overwrite=args.overwrite)
|
|
219
234
|
|
|
220
235
|
top_folders_to_bottom(options)
|
|
221
236
|
|
|
@@ -15,10 +15,10 @@ import json
|
|
|
15
15
|
# Created by get_lila_category_list.py
|
|
16
16
|
input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
|
|
17
17
|
|
|
18
|
-
output_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
18
|
+
output_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
|
|
19
19
|
|
|
20
20
|
datasets_to_map = [
|
|
21
|
-
'
|
|
21
|
+
'California Small Animals'
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
|
|
@@ -128,6 +128,52 @@ output_df.to_csv(output_file, index=None, header=True)
|
|
|
128
128
|
# from megadetector.utils.path_utils import open_file; open_file(output_file)
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
#%% Remap missing entries in the .csv file
|
|
132
|
+
|
|
133
|
+
# ...typically because I made a change to the mapping code.
|
|
134
|
+
|
|
135
|
+
from megadetector.utils.path_utils import insert_before_extension
|
|
136
|
+
from megadetector.utils.ct_utils import is_empty
|
|
137
|
+
|
|
138
|
+
remapped_file = insert_before_extension(output_file,'remapped')
|
|
139
|
+
|
|
140
|
+
df = pd.read_csv(output_file)
|
|
141
|
+
|
|
142
|
+
for i_row,row in df.iterrows():
|
|
143
|
+
|
|
144
|
+
# Do we need to map this row?
|
|
145
|
+
if is_empty(row['source']):
|
|
146
|
+
|
|
147
|
+
query = row['query']
|
|
148
|
+
print('Mapping {}'.format(query))
|
|
149
|
+
|
|
150
|
+
taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
|
|
151
|
+
|
|
152
|
+
if (taxonomic_match.source == taxonomy_preference):
|
|
153
|
+
|
|
154
|
+
source = taxonomic_match.source
|
|
155
|
+
taxonomy_level = taxonomic_match.taxonomic_level
|
|
156
|
+
scientific_name = taxonomic_match.scientific_name
|
|
157
|
+
common_name = taxonomic_match.common_name
|
|
158
|
+
taxonomy_string = taxonomic_match.taxonomy_string
|
|
159
|
+
|
|
160
|
+
# Write source, taxonomy_level, scientific_name, common_name, and taxonomy_string
|
|
161
|
+
# to the corresponding columns in the current row in df
|
|
162
|
+
df.loc[i_row, 'source'] = source
|
|
163
|
+
df.loc[i_row, 'taxonomy_level'] = taxonomy_level
|
|
164
|
+
df.loc[i_row, 'scientific_name'] = scientific_name
|
|
165
|
+
df.loc[i_row, 'common_name'] = common_name
|
|
166
|
+
df.loc[i_row, 'taxonomy_string'] = taxonomy_string
|
|
167
|
+
|
|
168
|
+
# ...if we found a match
|
|
169
|
+
|
|
170
|
+
# ...do we need to map this row?
|
|
171
|
+
|
|
172
|
+
# ...for each row
|
|
173
|
+
|
|
174
|
+
df.to_csv(remapped_file, index=None, header=True)
|
|
175
|
+
|
|
176
|
+
|
|
131
177
|
#%% Manual lookup
|
|
132
178
|
|
|
133
179
|
if False:
|
|
@@ -140,11 +186,19 @@ if False:
|
|
|
140
186
|
|
|
141
187
|
#%%
|
|
142
188
|
|
|
143
|
-
|
|
189
|
+
from megadetector.taxonomy_mapping.species_lookup import pop_levels
|
|
190
|
+
|
|
191
|
+
# Use this when an iNat match includes an empty subgenus with the same name as the genus
|
|
192
|
+
n_levels_to_pop = 0
|
|
193
|
+
q = 'sus scrofa'
|
|
144
194
|
|
|
145
195
|
taxonomy_preference = 'inat'
|
|
146
196
|
m = get_preferred_taxonomic_match(q,taxonomy_preference)
|
|
197
|
+
if n_levels_to_pop > 0:
|
|
198
|
+
m = pop_levels(m,n_levels_to_pop)
|
|
199
|
+
|
|
147
200
|
# print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
|
|
201
|
+
# common_name = eval(m.__dict__['taxonomy_string'])[0][-1][0]; print(common_name); clipboard.copy(common_name)
|
|
148
202
|
|
|
149
203
|
if (m is None) or (len(m.taxonomy_string) == 0):
|
|
150
204
|
print('No match')
|
|
@@ -155,3 +209,5 @@ if False:
|
|
|
155
209
|
print(m.source)
|
|
156
210
|
print(m.taxonomy_string)
|
|
157
211
|
import clipboard; clipboard.copy(m.taxonomy_string)
|
|
212
|
+
|
|
213
|
+
|
|
@@ -16,7 +16,7 @@ import os
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
# lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
|
|
19
|
-
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
19
|
+
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
|
|
20
20
|
|
|
21
21
|
preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
|
|
22
22
|
os.makedirs(preview_base,exist_ok=True)
|
|
@@ -56,11 +56,6 @@ def taxonomy_string_to_level(taxonomy_string):
|
|
|
56
56
|
return level
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
#%% Read the taxonomy mapping file
|
|
60
|
-
|
|
61
|
-
df = pd.read_csv(lila_taxonomy_file)
|
|
62
|
-
|
|
63
|
-
|
|
64
59
|
#%% Prepare taxonomy lookup
|
|
65
60
|
|
|
66
61
|
from megadetector.taxonomy_mapping.species_lookup import \
|
|
@@ -95,20 +90,29 @@ taxonomy_preference = 'inat'
|
|
|
95
90
|
# i_row = 0; row = df.iloc[i_row]
|
|
96
91
|
for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
97
92
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
93
|
+
try:
|
|
94
|
+
|
|
95
|
+
sn = row['scientific_name']
|
|
96
|
+
if not isinstance(sn,str):
|
|
97
|
+
continue
|
|
101
98
|
|
|
102
|
-
|
|
103
|
-
|
|
99
|
+
m = get_preferred_taxonomic_match(sn,taxonomy_preference)
|
|
100
|
+
assert m.scientific_name == sn
|
|
104
101
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
102
|
+
ts = row['taxonomy_string']
|
|
103
|
+
assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
|
|
104
|
+
row['dataset_name'],ts,m.taxonomy_string)
|
|
105
|
+
|
|
106
|
+
if ts != m.taxonomy_string:
|
|
107
|
+
n_taxonomy_changes += 1
|
|
108
|
+
df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
108
111
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
+
print('Error at row {}: {}'.format(i_row,str(e)))
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
# ...for each row
|
|
112
116
|
|
|
113
117
|
print('\nMade {} taxonomy changes'.format(n_taxonomy_changes))
|
|
114
118
|
|
|
@@ -325,6 +329,11 @@ for i_row,row in df.iterrows():
|
|
|
325
329
|
|
|
326
330
|
#%% Download sample images for all scientific names
|
|
327
331
|
|
|
332
|
+
# You might have to do this:
|
|
333
|
+
#
|
|
334
|
+
# pip install python-magic
|
|
335
|
+
# pip install python-magic-bin
|
|
336
|
+
|
|
328
337
|
# Takes ~1 minute per 10 rows
|
|
329
338
|
|
|
330
339
|
remapped_queries = {'papio':'papio+baboon',
|
|
@@ -560,6 +560,7 @@ def get_taxonomic_info(query: str) -> List[Dict[str, Any]]:
|
|
|
560
560
|
Main entry point: get taxonomic matches from both taxonomies for [query],
|
|
561
561
|
which may be a scientific or common name.
|
|
562
562
|
"""
|
|
563
|
+
|
|
563
564
|
query = query.strip().lower()
|
|
564
565
|
# print("Finding taxonomy information for: {0}".format(query))
|
|
565
566
|
|
|
@@ -682,6 +683,35 @@ hyphenated_terms = ['crowned', 'backed', 'throated', 'tailed', 'headed', 'cheeke
|
|
|
682
683
|
'fronted', 'bellied', 'spotted', 'eared', 'collared', 'breasted',
|
|
683
684
|
'necked']
|
|
684
685
|
|
|
686
|
+
def pop_levels(m, n_levels=1):
|
|
687
|
+
"""
|
|
688
|
+
Remove [n_levels] levels from the bottom of the TaxonomicMatch object m, typically used to remove
|
|
689
|
+
silly subgenera.
|
|
690
|
+
"""
|
|
691
|
+
|
|
692
|
+
v = eval(m.taxonomy_string)
|
|
693
|
+
assert v[0][1] == m.taxonomic_level
|
|
694
|
+
assert v[0][2] == m.scientific_name
|
|
695
|
+
popped_v = v[n_levels:]
|
|
696
|
+
taxonomic_level = popped_v[0][1]
|
|
697
|
+
scientific_name = popped_v[0][2]
|
|
698
|
+
common_name = popped_v[0][3]
|
|
699
|
+
if len(common_name) == 0:
|
|
700
|
+
common_name = ''
|
|
701
|
+
else:
|
|
702
|
+
common_name = common_name[0]
|
|
703
|
+
taxonomy_string = str(popped_v)
|
|
704
|
+
source = m.source
|
|
705
|
+
return TaxonomicMatch(scientific_name=scientific_name,
|
|
706
|
+
common_name=common_name,
|
|
707
|
+
taxonomic_level=taxonomic_level,
|
|
708
|
+
source=source,
|
|
709
|
+
taxonomy_string=taxonomy_string,
|
|
710
|
+
match=None)
|
|
711
|
+
|
|
712
|
+
# ...def pop_levels(...)
|
|
713
|
+
|
|
714
|
+
|
|
685
715
|
def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retry=True) -> TaxonomicMatch:
|
|
686
716
|
"""
|
|
687
717
|
Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
|
|
@@ -704,6 +734,17 @@ def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retr
|
|
|
704
734
|
for s in hyphenated_terms:
|
|
705
735
|
query = query.replace(' ' + s,'-' + s)
|
|
706
736
|
m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
|
|
737
|
+
|
|
738
|
+
if (len(m.scientific_name) > 0) or (not retry):
|
|
739
|
+
return m
|
|
740
|
+
|
|
741
|
+
query = query.replace(' species','')
|
|
742
|
+
query = query.replace(' order','')
|
|
743
|
+
query = query.replace(' genus','')
|
|
744
|
+
query = query.replace(' family','')
|
|
745
|
+
query = query.replace(' subfamily','')
|
|
746
|
+
m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
|
|
747
|
+
|
|
707
748
|
return m
|
|
708
749
|
|
|
709
750
|
|
|
@@ -887,8 +928,16 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
|
|
|
887
928
|
|
|
888
929
|
taxonomy_string = str(match)
|
|
889
930
|
|
|
890
|
-
|
|
891
|
-
|
|
931
|
+
m = TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
|
|
932
|
+
taxonomy_string, match)
|
|
933
|
+
|
|
934
|
+
if (m.taxonomic_level == 'subgenus' and \
|
|
935
|
+
match[1][1] == 'genus' and \
|
|
936
|
+
match[1][2] == m.scientific_name):
|
|
937
|
+
print('Removing redundant subgenus {}'.format(scientific_name))
|
|
938
|
+
m = pop_levels(m,1)
|
|
939
|
+
|
|
940
|
+
return m,query
|
|
892
941
|
|
|
893
942
|
# ...def _get_preferred_taxonomic_match()
|
|
894
943
|
|