megadetector 5.0.29__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/prepare_classification_script_mc.py +3 -3
- megadetector/data_management/annotations/annotation_constants.py +0 -1
- megadetector/data_management/camtrap_dp_to_coco.py +34 -1
- megadetector/data_management/cct_json_utils.py +2 -2
- megadetector/data_management/coco_to_yolo.py +22 -5
- megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
- megadetector/data_management/databases/integrity_check_json_db.py +29 -15
- megadetector/data_management/generate_crops_from_cct.py +50 -1
- megadetector/data_management/labelme_to_coco.py +4 -2
- megadetector/data_management/labelme_to_yolo.py +82 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
- megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
- megadetector/data_management/lila/lila_common.py +3 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
- megadetector/data_management/mewc_to_md.py +5 -0
- megadetector/data_management/ocr_tools.py +4 -3
- megadetector/data_management/read_exif.py +20 -5
- megadetector/data_management/remap_coco_categories.py +66 -4
- megadetector/data_management/remove_exif.py +50 -1
- megadetector/data_management/rename_images.py +3 -3
- megadetector/data_management/resize_coco_dataset.py +563 -95
- megadetector/data_management/yolo_output_to_md_output.py +131 -2
- megadetector/data_management/yolo_to_coco.py +140 -5
- megadetector/detection/change_detection.py +4 -3
- megadetector/detection/pytorch_detector.py +60 -22
- megadetector/detection/run_detector.py +225 -25
- megadetector/detection/run_detector_batch.py +42 -16
- megadetector/detection/run_inference_with_yolov5_val.py +12 -2
- megadetector/detection/run_tiled_inference.py +1 -0
- megadetector/detection/video_utils.py +53 -24
- megadetector/postprocessing/add_max_conf.py +4 -0
- megadetector/postprocessing/categorize_detections_by_size.py +1 -1
- megadetector/postprocessing/classification_postprocessing.py +55 -20
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +64 -10
- megadetector/postprocessing/convert_output_format.py +12 -8
- megadetector/postprocessing/create_crop_folder.py +137 -10
- megadetector/postprocessing/load_api_results.py +26 -8
- megadetector/postprocessing/md_to_coco.py +4 -4
- megadetector/postprocessing/md_to_labelme.py +18 -7
- megadetector/postprocessing/merge_detections.py +5 -0
- megadetector/postprocessing/postprocess_batch_results.py +6 -3
- megadetector/postprocessing/remap_detection_categories.py +55 -2
- megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
- megadetector/taxonomy_mapping/species_lookup.py +123 -41
- megadetector/utils/ct_utils.py +133 -113
- megadetector/utils/md_tests.py +93 -13
- megadetector/utils/path_utils.py +137 -107
- megadetector/utils/split_locations_into_train_val.py +2 -2
- megadetector/utils/string_utils.py +7 -7
- megadetector/utils/url_utils.py +81 -58
- megadetector/utils/wi_utils.py +46 -17
- megadetector/visualization/plot_utils.py +13 -9
- megadetector/visualization/render_images_with_thumbnails.py +2 -1
- megadetector/visualization/visualization_utils.py +94 -46
- megadetector/visualization/visualize_db.py +36 -9
- megadetector/visualization/visualize_detector_output.py +4 -4
- {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/METADATA +135 -135
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -513
- megadetector-5.0.29.dist-info/RECORD +0 -163
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/WHEEL +0 -0
|
@@ -11,6 +11,8 @@ of images representing all above-threshold crops from the original folder.
|
|
|
11
11
|
|
|
12
12
|
import os
|
|
13
13
|
import json
|
|
14
|
+
import argparse
|
|
15
|
+
|
|
14
16
|
from tqdm import tqdm
|
|
15
17
|
|
|
16
18
|
from multiprocessing.pool import Pool, ThreadPool
|
|
@@ -19,7 +21,7 @@ from functools import partial
|
|
|
19
21
|
|
|
20
22
|
from megadetector.utils.path_utils import insert_before_extension
|
|
21
23
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
22
|
-
from megadetector.utils.ct_utils import is_list_sorted
|
|
24
|
+
from megadetector.utils.ct_utils import is_list_sorted
|
|
23
25
|
from megadetector.visualization.visualization_utils import crop_image
|
|
24
26
|
from megadetector.visualization.visualization_utils import exif_preserving_save
|
|
25
27
|
|
|
@@ -63,6 +65,7 @@ def _get_crop_filename(image_fn,crop_id):
|
|
|
63
65
|
"""
|
|
64
66
|
Generate crop filenames in a consistent way.
|
|
65
67
|
"""
|
|
68
|
+
|
|
66
69
|
if isinstance(crop_id,int):
|
|
67
70
|
crop_id = str(crop_id).zfill(3)
|
|
68
71
|
assert isinstance(crop_id,str)
|
|
@@ -75,7 +78,15 @@ def _generate_crops_for_single_image(crops_this_image,
|
|
|
75
78
|
options):
|
|
76
79
|
"""
|
|
77
80
|
Generate all the crops required for a single image.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
crops_this_image (list of dict): list of dicts with at least keys
|
|
84
|
+
'image_fn_relative', 'crop_id'
|
|
85
|
+
input_folder (str): input folder (whole images)
|
|
86
|
+
output_folder (crops): output folder (crops)
|
|
87
|
+
options (CreateCropFolderOptions): cropping options
|
|
78
88
|
"""
|
|
89
|
+
|
|
79
90
|
if len(crops_this_image) == 0:
|
|
80
91
|
return
|
|
81
92
|
|
|
@@ -147,9 +158,9 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
|
|
|
147
158
|
crop_results_prefix (str, optional): if not None, removes this prefix from crop
|
|
148
159
|
results filenames. Intended to support the case where the crop results
|
|
149
160
|
use absolute paths.
|
|
150
|
-
detections_without_classification_handling (str, optional): what to do when we
|
|
161
|
+
detections_without_classification_handling (str, optional): what to do when we
|
|
151
162
|
encounter a crop that doesn't appear in classification results: 'error',
|
|
152
|
-
or 'include' ("include" means "leave the detection alone, without classifications"
|
|
163
|
+
or 'include' ("include" means "leave the detection alone, without classifications"
|
|
153
164
|
"""
|
|
154
165
|
|
|
155
166
|
##%% Validate inputs
|
|
@@ -190,7 +201,7 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
|
|
|
190
201
|
crop_filename_to_results = {}
|
|
191
202
|
|
|
192
203
|
# im = crop_results['images'][0]
|
|
193
|
-
for im in crop_results['images']:
|
|
204
|
+
for im in crop_results['images']:
|
|
194
205
|
fn = im['file']
|
|
195
206
|
# Possibly remove a prefix from each filename
|
|
196
207
|
if (crop_results_prefix is not None) and (crop_results_prefix in fn):
|
|
@@ -260,14 +271,14 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
|
|
|
260
271
|
assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1], \
|
|
261
272
|
'Invalid crop bounding box'
|
|
262
273
|
|
|
263
|
-
# This check was helpful for the case where crop-level results had already
|
|
274
|
+
# This check was helpful for the case where crop-level results had already
|
|
264
275
|
# taken detection confidence values from detector output by construct, but this isn't
|
|
265
276
|
# really meaningful for most cases.
|
|
266
277
|
# assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
|
|
267
|
-
|
|
278
|
+
|
|
268
279
|
if require_identical_detection_categories:
|
|
269
280
|
assert crop_results_this_detection['detections'][0]['category'] == det['category']
|
|
270
|
-
|
|
281
|
+
|
|
271
282
|
# Copy the crop-level classifications
|
|
272
283
|
det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
|
|
273
284
|
confidence_values = [x[1] for x in det['classifications']]
|
|
@@ -381,8 +392,8 @@ def create_crop_folder(input_file,
|
|
|
381
392
|
det['crop_id'] = i_detection
|
|
382
393
|
|
|
383
394
|
crop_info = {'image_fn_relative':image_fn_relative,
|
|
384
|
-
|
|
385
|
-
|
|
395
|
+
'crop_id':i_detection,
|
|
396
|
+
'detection':det}
|
|
386
397
|
|
|
387
398
|
crop_filename_relative = _get_crop_filename(image_fn_relative,
|
|
388
399
|
crop_info['crop_id'])
|
|
@@ -490,4 +501,120 @@ def create_crop_folder(input_file,
|
|
|
490
501
|
|
|
491
502
|
#%% Command-line driver
|
|
492
503
|
|
|
493
|
-
|
|
504
|
+
def main():
|
|
505
|
+
"""
|
|
506
|
+
Command-line interface for creating a crop folder from MegaDetector results.
|
|
507
|
+
"""
|
|
508
|
+
|
|
509
|
+
parser = argparse.ArgumentParser(
|
|
510
|
+
description='Create a folder of crops from MegaDetector results'
|
|
511
|
+
)
|
|
512
|
+
parser.add_argument(
|
|
513
|
+
'input_file',
|
|
514
|
+
type=str,
|
|
515
|
+
help='Path to the MegaDetector .json results file'
|
|
516
|
+
)
|
|
517
|
+
parser.add_argument(
|
|
518
|
+
'input_folder',
|
|
519
|
+
type=str,
|
|
520
|
+
help='Path to the folder containing the original images'
|
|
521
|
+
)
|
|
522
|
+
parser.add_argument(
|
|
523
|
+
'output_folder',
|
|
524
|
+
type=str,
|
|
525
|
+
help='Path to the folder where cropped images will be saved'
|
|
526
|
+
)
|
|
527
|
+
parser.add_argument(
|
|
528
|
+
'--output_file',
|
|
529
|
+
type=str,
|
|
530
|
+
default=None,
|
|
531
|
+
help='Path to save the modified MegaDetector .json file (with crop IDs and filenames)'
|
|
532
|
+
)
|
|
533
|
+
parser.add_argument(
|
|
534
|
+
'--crops_output_file',
|
|
535
|
+
type=str,
|
|
536
|
+
default=None,
|
|
537
|
+
help='Path to save a new .json file for the crops themselves (with full-image detections for each crop)'
|
|
538
|
+
)
|
|
539
|
+
parser.add_argument(
|
|
540
|
+
'--confidence_threshold',
|
|
541
|
+
type=float,
|
|
542
|
+
default=0.1,
|
|
543
|
+
help='Confidence threshold for detections to be cropped (default: 0.1)'
|
|
544
|
+
)
|
|
545
|
+
parser.add_argument(
|
|
546
|
+
'--expansion',
|
|
547
|
+
type=int,
|
|
548
|
+
default=0,
|
|
549
|
+
help='Number of pixels to expand each crop (default: 0)'
|
|
550
|
+
)
|
|
551
|
+
parser.add_argument(
|
|
552
|
+
'--quality',
|
|
553
|
+
type=int,
|
|
554
|
+
default=95,
|
|
555
|
+
help='JPEG quality for saving crops (default: 95)'
|
|
556
|
+
)
|
|
557
|
+
parser.add_argument(
|
|
558
|
+
'--overwrite',
|
|
559
|
+
type=str,
|
|
560
|
+
default='true',
|
|
561
|
+
choices=['true', 'false'],
|
|
562
|
+
help="Overwrite existing crop images (default: 'true')"
|
|
563
|
+
)
|
|
564
|
+
parser.add_argument(
|
|
565
|
+
'--n_workers',
|
|
566
|
+
type=int,
|
|
567
|
+
default=8,
|
|
568
|
+
help='Number of concurrent workers (default: 8)'
|
|
569
|
+
)
|
|
570
|
+
parser.add_argument(
|
|
571
|
+
'--pool_type',
|
|
572
|
+
type=str,
|
|
573
|
+
default='thread',
|
|
574
|
+
choices=['thread', 'process'],
|
|
575
|
+
help="Type of parallelism to use ('thread' or 'process', default: 'thread')"
|
|
576
|
+
)
|
|
577
|
+
parser.add_argument(
|
|
578
|
+
'--category_names',
|
|
579
|
+
type=str,
|
|
580
|
+
default=None,
|
|
581
|
+
help="Comma-separated list of category names to include " + \
|
|
582
|
+
"(e.g., 'animal,person'). If None (default), all categories are included."
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
args = parser.parse_args()
|
|
586
|
+
|
|
587
|
+
options = CreateCropFolderOptions()
|
|
588
|
+
options.confidence_threshold = args.confidence_threshold
|
|
589
|
+
options.expansion = args.expansion
|
|
590
|
+
options.quality = args.quality
|
|
591
|
+
options.overwrite = (args.overwrite.lower() == 'true')
|
|
592
|
+
options.n_workers = args.n_workers
|
|
593
|
+
options.pool_type = args.pool_type
|
|
594
|
+
|
|
595
|
+
if args.category_names:
|
|
596
|
+
options.category_names_to_include = [name.strip() for name in args.category_names.split(',')]
|
|
597
|
+
else:
|
|
598
|
+
options.category_names_to_include = None
|
|
599
|
+
|
|
600
|
+
print('Starting crop folder creation...')
|
|
601
|
+
print('Input MD results: {}'.format(args.input_file))
|
|
602
|
+
print('Input image folder {}'.format(args.input_folder))
|
|
603
|
+
print('Output crop folder: {}'.format(args.output_folder))
|
|
604
|
+
|
|
605
|
+
if args.output_file:
|
|
606
|
+
print('Modified MD results will be saved to {}'.format(args.output_file))
|
|
607
|
+
if args.crops_output_file:
|
|
608
|
+
print('Crops .json output will be saved to {}'.format(args.crops_output_file))
|
|
609
|
+
|
|
610
|
+
create_crop_folder(
|
|
611
|
+
input_file=args.input_file,
|
|
612
|
+
input_folder=args.input_folder,
|
|
613
|
+
output_folder=args.output_folder,
|
|
614
|
+
output_file=args.output_file,
|
|
615
|
+
crops_output_file=args.crops_output_file,
|
|
616
|
+
options=options
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if __name__ == '__main__':
|
|
620
|
+
main()
|
|
@@ -18,7 +18,8 @@ Includes functions to read/write the (very very old) .csv results format.
|
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
20
|
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import Optional
|
|
22
|
+
from collections.abc import Mapping
|
|
22
23
|
|
|
23
24
|
import pandas as pd
|
|
24
25
|
|
|
@@ -36,13 +37,13 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
36
37
|
Loads json-formatted MegaDetector results to a Pandas DataFrame.
|
|
37
38
|
|
|
38
39
|
Args:
|
|
39
|
-
api_output_path: path to the output json file
|
|
40
|
-
normalize_paths: whether to apply os.path.normpath to the 'file'
|
|
41
|
-
in each image entry in the output file
|
|
42
|
-
filename_replacements: replace some path tokens to match local paths
|
|
43
|
-
the original
|
|
44
|
-
force_forward_slashes: whether to convert backslashes to forward
|
|
45
|
-
in filenames
|
|
40
|
+
api_output_path (str): path to the output json file
|
|
41
|
+
normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
|
|
42
|
+
field in each image entry in the output file
|
|
43
|
+
filename_replacements (dict, optional): replace some path tokens to match local paths
|
|
44
|
+
to the original file structure
|
|
45
|
+
force_forward_slashes (bool, optional): whether to convert backslashes to forward
|
|
46
|
+
slashes in filenames
|
|
46
47
|
|
|
47
48
|
Returns:
|
|
48
49
|
detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
|
|
@@ -98,6 +99,11 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
98
99
|
def write_api_results(detection_results_table, other_fields, out_path):
|
|
99
100
|
"""
|
|
100
101
|
Writes a Pandas DataFrame to the MegaDetector .json format.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
detection_results_table (DataFrame): data to write
|
|
105
|
+
other_fields (dict): additional fields to include in the output .json
|
|
106
|
+
out_path (str): output .json filename
|
|
101
107
|
"""
|
|
102
108
|
|
|
103
109
|
print('Writing detection results to {}'.format(out_path))
|
|
@@ -144,6 +150,14 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements=N
|
|
|
144
150
|
[DEPRECATED]
|
|
145
151
|
|
|
146
152
|
Loads .csv-formatted MegaDetector results to a pandas table
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
filename (str): path to the csv file to read
|
|
156
|
+
normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
|
|
157
|
+
field in each image entry in the output file
|
|
158
|
+
filename_replacements (dict, optional): replace some path tokens to match local paths
|
|
159
|
+
to the original file structure
|
|
160
|
+
nrows (int, optional): read only the first N rows of [filename]
|
|
147
161
|
"""
|
|
148
162
|
|
|
149
163
|
if filename_replacements is None:
|
|
@@ -192,6 +206,10 @@ def write_api_results_csv(detection_results, filename):
|
|
|
192
206
|
Writes a Pandas table to csv in a way that's compatible with the .csv output
|
|
193
207
|
format. Currently just a wrapper around to_csv that forces output writing
|
|
194
208
|
to go through a common code path.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
detection_results (DataFrame): dataframe to write to [filename]
|
|
212
|
+
filename (str): .csv filename to write
|
|
195
213
|
"""
|
|
196
214
|
|
|
197
215
|
print('Writing detection results to {}'.format(filename))
|
|
@@ -74,9 +74,9 @@ def md_to_coco(md_results_file,
|
|
|
74
74
|
data (e.g. EXIF metadata) will be propagated to COCO output
|
|
75
75
|
include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
|
|
76
76
|
with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
|
|
77
|
-
include_annotations_without_bounding_boxes (bool, optional):
|
|
78
|
-
|
|
79
|
-
|
|
77
|
+
include_annotations_without_bounding_boxes (bool, optional): the only time we end up with
|
|
78
|
+
annotations without bounding boxes is when a detection has the category [empty_category_id];
|
|
79
|
+
this determines whether those annotations are included in the output.
|
|
80
80
|
empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
|
|
81
81
|
attached to any bounding boxes
|
|
82
82
|
overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
|
|
@@ -257,7 +257,7 @@ def md_to_coco(md_results_file,
|
|
|
257
257
|
else:
|
|
258
258
|
|
|
259
259
|
# In very esoteric cases, we use the empty category (0) in MD-formatted output files
|
|
260
|
-
print('Warning: empty category ({}) used for annotation
|
|
260
|
+
print('Warning: empty category ({}) used for annotation for image {}'.format(
|
|
261
261
|
empty_category_id,im['file']))
|
|
262
262
|
pass
|
|
263
263
|
|
|
@@ -108,8 +108,13 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
|
|
|
108
108
|
# ...def get_labelme_dict_for_image()
|
|
109
109
|
|
|
110
110
|
|
|
111
|
-
def _write_output_for_image(im,
|
|
112
|
-
|
|
111
|
+
def _write_output_for_image(im,
|
|
112
|
+
image_base,
|
|
113
|
+
extension_prefix,
|
|
114
|
+
info,
|
|
115
|
+
confidence_threshold,
|
|
116
|
+
category_id_to_name,
|
|
117
|
+
overwrite,
|
|
113
118
|
verbose=False):
|
|
114
119
|
|
|
115
120
|
if 'failure' in im and im['failure'] is not None:
|
|
@@ -140,9 +145,14 @@ def _write_output_for_image(im,image_base,extension_prefix,info,
|
|
|
140
145
|
|
|
141
146
|
|
|
142
147
|
|
|
143
|
-
def md_to_labelme(results_file,
|
|
144
|
-
|
|
145
|
-
|
|
148
|
+
def md_to_labelme(results_file,
|
|
149
|
+
image_base,
|
|
150
|
+
confidence_threshold=None,
|
|
151
|
+
overwrite=False,
|
|
152
|
+
extension_prefix='',
|
|
153
|
+
n_workers=1,
|
|
154
|
+
use_threads=False,
|
|
155
|
+
bypass_image_size_read=False,
|
|
146
156
|
verbose=False):
|
|
147
157
|
"""
|
|
148
158
|
For all the images in [results_file], write a .json file in labelme format alongside the
|
|
@@ -153,11 +163,12 @@ def md_to_labelme(results_file,image_base,confidence_threshold=None,
|
|
|
153
163
|
image_base (str): folder of images; filenames in [results_file] should be relative to
|
|
154
164
|
this folder
|
|
155
165
|
confidence_threshold (float, optional): only detections at or above this confidence threshold
|
|
156
|
-
will be included in the output dict
|
|
166
|
+
will be included in the output dict. If None, no threshold will be applied.
|
|
157
167
|
overwrite (bool, optional): whether to overwrite existing output files; if this is False
|
|
158
168
|
and the output file for an image exists, we'll skip that image
|
|
159
169
|
extension_prefix (str, optional): if non-empty, "extension_prefix" will be inserted before the .json
|
|
160
|
-
extension
|
|
170
|
+
extension (typically used to generate multiple copies of labelme files representing different
|
|
171
|
+
MD thresholds)
|
|
161
172
|
n_workers (int, optional): enables multiprocessing if > 1
|
|
162
173
|
use_threads (bool, optional): if [n_workers] > 1, determines whether we parallelize via threads (True)
|
|
163
174
|
or processes (False)
|
|
@@ -85,6 +85,11 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
85
85
|
|
|
86
86
|
The results are written to [output_file].
|
|
87
87
|
|
|
88
|
+
Args:
|
|
89
|
+
source_files (list of str): list of files to merge into the results in [target_file]
|
|
90
|
+
target_file (str): filename that is treated as the primary source of results
|
|
91
|
+
output_file (str): file to which we should write merged results
|
|
92
|
+
options (MergeDetectionsOptions, optional): see MergeDetectionsOptions
|
|
88
93
|
"""
|
|
89
94
|
|
|
90
95
|
if isinstance(source_files,str):
|
|
@@ -1001,13 +1001,16 @@ def process_batch_results(options):
|
|
|
1001
1001
|
options.separate_detections_by_category = False
|
|
1002
1002
|
|
|
1003
1003
|
ground_truth_indexed_db = IndexedJsonDb(
|
|
1004
|
-
options.ground_truth_json_file,
|
|
1004
|
+
options.ground_truth_json_file,
|
|
1005
|
+
b_normalize_paths=True,
|
|
1005
1006
|
filename_replacements=options.ground_truth_filename_replacements)
|
|
1006
1007
|
|
|
1007
1008
|
# Mark images in the ground truth as positive or negative
|
|
1008
1009
|
n_negative, n_positive, n_unknown, n_ambiguous = _mark_detection_status(
|
|
1009
|
-
ground_truth_indexed_db,
|
|
1010
|
+
ground_truth_indexed_db,
|
|
1011
|
+
negative_classes=options.negative_classes,
|
|
1010
1012
|
unknown_classes=options.unlabeled_classes)
|
|
1013
|
+
|
|
1011
1014
|
print(f'Finished loading and indexing ground truth: {n_negative} '
|
|
1012
1015
|
f'negative, {n_positive} positive, {n_unknown} unknown, '
|
|
1013
1016
|
f'{n_ambiguous} ambiguous')
|
|
@@ -1348,7 +1351,7 @@ def process_batch_results(options):
|
|
|
1348
1351
|
# Prepend class name on each line and add to the top
|
|
1349
1352
|
cm_str_lines = [' ' * 16 + ' '.join(classname_headers)]
|
|
1350
1353
|
cm_str_lines += ['{:>15}'.format(cn[:15]) + ' ' + cm_line for cn, cm_line in \
|
|
1351
|
-
zip(classname_list, cm_str.splitlines())]
|
|
1354
|
+
zip(classname_list, cm_str.splitlines(), strict=True)]
|
|
1352
1355
|
|
|
1353
1356
|
# Print formatted confusion matrix
|
|
1354
1357
|
if False:
|
|
@@ -11,8 +11,9 @@ Currently only supports remapping detection categories, not classification categ
|
|
|
11
11
|
|
|
12
12
|
#%% Constants and imports
|
|
13
13
|
|
|
14
|
-
import json
|
|
15
14
|
import os
|
|
15
|
+
import json
|
|
16
|
+
import argparse
|
|
16
17
|
|
|
17
18
|
from tqdm import tqdm
|
|
18
19
|
|
|
@@ -167,4 +168,56 @@ if False:
|
|
|
167
168
|
|
|
168
169
|
#%% Command-line driver
|
|
169
170
|
|
|
170
|
-
|
|
171
|
+
def main():
|
|
172
|
+
"""
|
|
173
|
+
Command-line interface for remapping detection categories in a MegaDetector results file
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
parser = argparse.ArgumentParser(
|
|
177
|
+
description='Remap detection categories in a MegaDetector .json results file'
|
|
178
|
+
)
|
|
179
|
+
parser.add_argument(
|
|
180
|
+
'input_file',
|
|
181
|
+
type=str,
|
|
182
|
+
help='Path to the MegaDetector .json results file to remap'
|
|
183
|
+
)
|
|
184
|
+
parser.add_argument(
|
|
185
|
+
'output_file',
|
|
186
|
+
type=str,
|
|
187
|
+
help='Path to save the remapped .json results file'
|
|
188
|
+
)
|
|
189
|
+
parser.add_argument(
|
|
190
|
+
'target_category_map_file',
|
|
191
|
+
type=str,
|
|
192
|
+
help="Path to a MegaDetector .json results file from which to take the target 'detection_categories' mapping"
|
|
193
|
+
)
|
|
194
|
+
parser.add_argument(
|
|
195
|
+
'--extra_category_handling',
|
|
196
|
+
type=str,
|
|
197
|
+
default='error',
|
|
198
|
+
choices=['error', 'drop_if_unused'],
|
|
199
|
+
help="How to handle source categories not in target map (default: 'error')"
|
|
200
|
+
)
|
|
201
|
+
parser.add_argument(
|
|
202
|
+
'--overwrite',
|
|
203
|
+
type=str,
|
|
204
|
+
default='false',
|
|
205
|
+
choices=['true', 'false'],
|
|
206
|
+
help="Overwrite output file if it exists (default: 'false')."
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
args = parser.parse_args()
|
|
210
|
+
|
|
211
|
+
overwrite_bool = (args.overwrite.lower() == 'true')
|
|
212
|
+
|
|
213
|
+
print('Starting category remapping...')
|
|
214
|
+
|
|
215
|
+
remap_detection_categories(
|
|
216
|
+
input_file=args.input_file,
|
|
217
|
+
output_file=args.output_file,
|
|
218
|
+
target_category_map=args.target_category_map_file, # Pass filename directly
|
|
219
|
+
extra_category_handling=args.extra_category_handling,
|
|
220
|
+
overwrite=overwrite_bool)
|
|
221
|
+
|
|
222
|
+
if __name__ == '__main__':
|
|
223
|
+
main()
|
|
@@ -118,25 +118,28 @@ def render_detection_confusion_matrix(ground_truth_file,
|
|
|
118
118
|
[results_file] should be relative to this folder.
|
|
119
119
|
preview_folder (str): the output folder, i.e. the folder in which we'll create our nifty
|
|
120
120
|
HTML stuff.
|
|
121
|
-
|
|
121
|
+
force_render_images (bool, optional): if False, skips images that already exist
|
|
122
122
|
confidence_thresholds (dict, optional): a dictionary mapping class names to thresholds;
|
|
123
123
|
all classes not explicitly named here will use the threshold for the "default" category.
|
|
124
|
-
|
|
124
|
+
rendering_confidence_thresholds (dict, optional): a dictionary mapping class names to thresholds;
|
|
125
125
|
all classes not explicitly named here will use the threshold for the "default" category.
|
|
126
126
|
target_image_size (tuple, optional): output image size, as a pair of ints (width,height). If one
|
|
127
127
|
value is -1 and the other is not, aspect ratio is preserved. If both are -1, the original image
|
|
128
128
|
sizes are preserved.
|
|
129
129
|
parallelize_rendering (bool, optional): enable (default) or disable parallelization when rendering
|
|
130
|
-
|
|
130
|
+
parallelize_rendering_n_cores (int, optional): number of threads or processes to use for rendering, only
|
|
131
131
|
used if parallelize_rendering is True
|
|
132
|
-
parallelize_rendering_with_threads: whether to use threads (True) or processes (False)
|
|
133
|
-
only used if parallelize_rendering is True
|
|
132
|
+
parallelize_rendering_with_threads (bool, optional): whether to use threads (True) or processes (False)
|
|
133
|
+
when rendering, only used if parallelize_rendering is True
|
|
134
134
|
job_name (str, optional): job name to include in big letters in the output file
|
|
135
|
-
model_file (str, optional) model filename to include in HTML output
|
|
135
|
+
model_file (str, optional): model filename to include in HTML output
|
|
136
136
|
empty_category_name (str, optional): special category name that we should treat as empty, typically
|
|
137
137
|
"empty"
|
|
138
138
|
html_image_list_options (dict, optional): options listed passed along to write_html_image_list;
|
|
139
139
|
see write_html_image_list for documentation.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
dict: confusion matrix information, containing at least the key "html_file"
|
|
140
143
|
"""
|
|
141
144
|
|
|
142
145
|
##%% Argument and path handling
|
|
@@ -1119,8 +1119,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
|
1119
1119
|
output_file_name (str, optional): the filename to which we should write results
|
|
1120
1120
|
with repeat detections removed, typically set to None during the first
|
|
1121
1121
|
part of the RDE process.
|
|
1122
|
-
options (RepeatDetectionOptions): all the interesting options controlling
|
|
1123
|
-
process; see RepeatDetectionOptions for details.
|
|
1122
|
+
options (RepeatDetectionOptions, optional): all the interesting options controlling
|
|
1123
|
+
this process; see RepeatDetectionOptions for details.
|
|
1124
1124
|
|
|
1125
1125
|
Returns:
|
|
1126
1126
|
RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
|
|
@@ -15,10 +15,10 @@ import json
|
|
|
15
15
|
# Created by get_lila_category_list.py
|
|
16
16
|
input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
|
|
17
17
|
|
|
18
|
-
output_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
18
|
+
output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
|
|
19
19
|
|
|
20
20
|
datasets_to_map = [
|
|
21
|
-
'
|
|
21
|
+
'Nkhotakota Camera Traps'
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
|
|
@@ -140,7 +140,7 @@ if False:
|
|
|
140
140
|
|
|
141
141
|
#%%
|
|
142
142
|
|
|
143
|
-
q = '
|
|
143
|
+
q = 'animalia'
|
|
144
144
|
|
|
145
145
|
taxonomy_preference = 'inat'
|
|
146
146
|
m = get_preferred_taxonomic_match(q,taxonomy_preference)
|
|
@@ -154,5 +154,4 @@ if False:
|
|
|
154
154
|
# raise ValueError('')
|
|
155
155
|
print(m.source)
|
|
156
156
|
print(m.taxonomy_string)
|
|
157
|
-
# print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
|
|
158
157
|
import clipboard; clipboard.copy(m.taxonomy_string)
|
|
@@ -52,6 +52,8 @@ if False:
|
|
|
52
52
|
|
|
53
53
|
df['used'] = False
|
|
54
54
|
|
|
55
|
+
n_dropped = 0
|
|
56
|
+
|
|
55
57
|
# i_row = 0; row = df.iloc[i_row]; row
|
|
56
58
|
for i_row,row in df.iterrows():
|
|
57
59
|
ds_name = row['dataset_name']
|
|
@@ -60,8 +62,11 @@ if False:
|
|
|
60
62
|
if mapping_name in used_category_mappings:
|
|
61
63
|
df.loc[i_row,'used'] = True
|
|
62
64
|
else:
|
|
65
|
+
n_dropped += 1
|
|
63
66
|
print('Dropping unused mapping {}'.format(mapping_name))
|
|
64
67
|
|
|
68
|
+
print('Dropping {} of {} mappings'.format(n_dropped,len(df)))
|
|
69
|
+
|
|
65
70
|
df = df[df.used]
|
|
66
71
|
df = df.drop('used',axis=1)
|
|
67
72
|
|
|
@@ -71,29 +76,41 @@ if False:
|
|
|
71
76
|
assert not os.path.isfile(release_taxonomy_file), \
|
|
72
77
|
'File {} exists, delete it manually before proceeding'.format(release_taxonomy_file)
|
|
73
78
|
|
|
74
|
-
known_levels = ['stateofmatter', #noqa
|
|
75
|
-
'kingdom',
|
|
76
|
-
'phylum','subphylum',
|
|
77
|
-
'superclass','class','subclass','infraclass',
|
|
78
|
-
'superorder','order','parvorder','suborder','infraorder',
|
|
79
|
-
'zoosection',
|
|
80
|
-
'superfamily','family','subfamily','tribe',
|
|
81
|
-
'genus',
|
|
82
|
-
'species','subspecies','variety']
|
|
83
|
-
|
|
84
79
|
levels_to_include = ['kingdom',
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
80
|
+
'phylum',
|
|
81
|
+
'subphylum',
|
|
82
|
+
'superclass',
|
|
83
|
+
'class',
|
|
84
|
+
'subclass',
|
|
85
|
+
'infraclass',
|
|
86
|
+
'superorder',
|
|
87
|
+
'order',
|
|
88
|
+
'suborder',
|
|
89
|
+
'infraorder',
|
|
90
|
+
'superfamily',
|
|
91
|
+
'family',
|
|
92
|
+
'subfamily',
|
|
93
|
+
'tribe',
|
|
94
|
+
'genus',
|
|
95
|
+
'subgenus',
|
|
96
|
+
'species',
|
|
97
|
+
'subspecies',
|
|
98
|
+
'variety']
|
|
99
|
+
|
|
100
|
+
levels_to_exclude = ['stateofmatter',
|
|
101
|
+
'zoosection',
|
|
102
|
+
'parvorder',
|
|
103
|
+
'complex',
|
|
104
|
+
'epifamily']
|
|
105
|
+
|
|
106
|
+
for x in [levels_to_include,levels_to_exclude]:
|
|
107
|
+
assert len(x) == len(set(x))
|
|
93
108
|
|
|
94
109
|
for s in levels_to_exclude:
|
|
95
110
|
assert s not in levels_to_include
|
|
96
111
|
|
|
112
|
+
known_levels = levels_to_include + levels_to_exclude
|
|
113
|
+
|
|
97
114
|
levels_used = set()
|
|
98
115
|
|
|
99
116
|
# i_row = 0; row = df.iloc[i_row]; row
|
|
@@ -103,17 +120,21 @@ if False:
|
|
|
103
120
|
assert not isinstance(row['taxonomy_string'],str)
|
|
104
121
|
continue
|
|
105
122
|
|
|
123
|
+
# This is a list of length-4 tuples that each look like:
|
|
124
|
+
#
|
|
125
|
+
# (41789, 'species', 'taxidea taxus', ['american badger'])
|
|
106
126
|
taxonomic_match = eval(row['taxonomy_string'])
|
|
107
127
|
|
|
108
128
|
# match_at_level = taxonomic_match[0]
|
|
109
129
|
for match_at_level in taxonomic_match:
|
|
110
130
|
assert len(match_at_level) == 4
|
|
131
|
+
# E.g. "species"
|
|
111
132
|
levels_used.add(match_at_level[1])
|
|
112
133
|
|
|
113
134
|
levels_used = [s for s in levels_used if isinstance(s,str)]
|
|
114
135
|
|
|
115
136
|
for s in levels_used:
|
|
116
|
-
assert s in
|
|
137
|
+
assert s in known_levels, 'Unrecognized level {}'.format(s)
|
|
117
138
|
|
|
118
139
|
for s in levels_to_include:
|
|
119
140
|
assert s in levels_used
|
|
@@ -16,7 +16,7 @@ import os
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
# lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
|
|
19
|
-
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.
|
|
19
|
+
lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
|
|
20
20
|
|
|
21
21
|
preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
|
|
22
22
|
os.makedirs(preview_base,exist_ok=True)
|