megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
api/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,64 +1,64 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
manage_local_batch.py
|
|
4
|
+
|
|
5
|
+
Semi-automated process for managing a local MegaDetector job, including
|
|
6
|
+
standard postprocessing steps.
|
|
7
|
+
|
|
8
|
+
This script is not intended to be run from top to bottom like a typical Python script,
|
|
9
|
+
it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
|
|
10
|
+
run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
|
|
11
|
+
want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
|
|
12
|
+
|
|
13
|
+
Some general notes on using this script, which I do in Spyder, though everything will be
|
|
14
|
+
the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
|
|
15
|
+
script):
|
|
16
|
+
|
|
17
|
+
* Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
|
|
18
|
+
say I'm running a job for an organization called "bibblebop"; I have a big folder of
|
|
19
|
+
job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
|
|
20
|
+
(the filename doesn't matter, it just helps me keep these organized).
|
|
21
|
+
|
|
22
|
+
* There are three variables you need to set in this script before you start running code:
|
|
23
|
+
"input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
|
|
24
|
+
to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
|
|
25
|
+
"bibblebop", and "2023-07-26", respectively.
|
|
26
|
+
|
|
27
|
+
* The defaults assume you want to split the job into two tasks (this is the default because I have
|
|
28
|
+
two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
|
|
29
|
+
want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
|
|
30
|
+
|
|
31
|
+
* After setting the required variables, I run the first few cells - up to and including the one
|
|
32
|
+
called "Generate commands" - which collectively take basically zero seconds. After you run the
|
|
33
|
+
"Generate commands" cell, you will have a folder that looks something like:
|
|
34
|
+
|
|
35
|
+
~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
|
|
36
|
+
|
|
37
|
+
On Windows, this means:
|
|
38
|
+
|
|
39
|
+
~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
|
|
40
|
+
|
|
41
|
+
Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
|
|
42
|
+
Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
|
|
43
|
+
like:
|
|
44
|
+
|
|
45
|
+
run_chunk_000_gpu_00.sh (or .bat on Windows)
|
|
46
|
+
|
|
47
|
+
Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
|
|
48
|
+
it's OK if Spyder gets shut down while MD is running).
|
|
49
|
+
|
|
50
|
+
At this point, once you get the hang of it, you've invested about zero seconds of human time,
|
|
51
|
+
but possibly several days of unattended compute time, depending on the size of your job.
|
|
52
|
+
|
|
53
|
+
* Then when the jobs are done, back to the interactive environment! I run the next few cells,
|
|
54
|
+
which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
|
|
55
|
+
generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
|
|
56
|
+
all the remaining cells. If you want to do things like repeat detection elimination, or running
|
|
57
|
+
a classifier, or splitting your results file up in specialized ways, there are cells for all of those
|
|
58
|
+
things, but now you're in power-user territory, so I'm going to leave this guide here. Email
|
|
59
|
+
cameratraps@lila.science with questions about the fancy stuff.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
62
|
|
|
63
63
|
#%% Imports and constants
|
|
64
64
|
|
|
@@ -86,6 +86,8 @@ from api.batch_processing.postprocessing.postprocess_batch_results import (
|
|
|
86
86
|
from detection.run_detector import get_detector_version_from_filename
|
|
87
87
|
from md_utils.ct_utils import image_file_to_camera_folder
|
|
88
88
|
|
|
89
|
+
## Inference options
|
|
90
|
+
|
|
89
91
|
# To specify a non-default confidence threshold for including detections in the .json file
|
|
90
92
|
json_threshold = None
|
|
91
93
|
|
|
@@ -109,6 +111,11 @@ quiet_mode = True
|
|
|
109
111
|
# will use its default size, which is 1280 * 1.3, which is almost always what you want.
|
|
110
112
|
image_size = None
|
|
111
113
|
|
|
114
|
+
# Should we include image size, timestamp, and/or EXIF data in MD output?
|
|
115
|
+
include_image_size = False
|
|
116
|
+
include_image_timestamp = False
|
|
117
|
+
include_exif_data = False
|
|
118
|
+
|
|
112
119
|
# Only relevant when running on CPU
|
|
113
120
|
ncores = 1
|
|
114
121
|
|
|
@@ -187,7 +194,7 @@ augment = False
|
|
|
187
194
|
|
|
188
195
|
## Constants related to tiled inference
|
|
189
196
|
|
|
190
|
-
use_tiled_inference =
|
|
197
|
+
use_tiled_inference = False
|
|
191
198
|
|
|
192
199
|
# Should we delete tiles after each job? Only set this to False for debugging;
|
|
193
200
|
# large jobs will take up a lot of space if you keep tiles around after each task.
|
|
@@ -234,7 +241,7 @@ checkpoint_frequency = 10000
|
|
|
234
241
|
approx_images_per_second = estimate_md_images_per_second(model_file)
|
|
235
242
|
|
|
236
243
|
# Rough estimate for the inference time cost of augmentation
|
|
237
|
-
if augment:
|
|
244
|
+
if augment and (approx_images_per_second is not None):
|
|
238
245
|
approx_images_per_second = approx_images_per_second * 0.7
|
|
239
246
|
|
|
240
247
|
base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
|
|
@@ -268,6 +275,10 @@ filename_base = os.path.join(base_output_folder_name, base_task_name)
|
|
|
268
275
|
combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
|
|
269
276
|
postprocessing_output_folder = os.path.join(filename_base, 'preview')
|
|
270
277
|
|
|
278
|
+
combined_api_output_file = os.path.join(
|
|
279
|
+
combined_api_output_folder,
|
|
280
|
+
'{}_detections.json'.format(base_task_name))
|
|
281
|
+
|
|
271
282
|
os.makedirs(filename_base, exist_ok=True)
|
|
272
283
|
os.makedirs(combined_api_output_folder, exist_ok=True)
|
|
273
284
|
os.makedirs(postprocessing_output_folder, exist_ok=True)
|
|
@@ -494,7 +505,14 @@ for i_task,task in enumerate(task_info):
|
|
|
494
505
|
|
|
495
506
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
496
507
|
cmd = f'{cuda_string} python run_detector_batch.py "{model_file}" "{chunk_file}" "{output_fn}" {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string} {image_size_string} {confidence_threshold_string} {overwrite_handling_string}'
|
|
497
|
-
|
|
508
|
+
|
|
509
|
+
if include_image_size:
|
|
510
|
+
cmd += ' --include_image_size'
|
|
511
|
+
if include_image_timestamp:
|
|
512
|
+
cmd += ' --include_image_timestamp'
|
|
513
|
+
if include_exif_data:
|
|
514
|
+
cmd += ' --include_exif_data'
|
|
515
|
+
|
|
498
516
|
cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}{}'.format(str(i_task).zfill(3),
|
|
499
517
|
str(gpu_number).zfill(2),script_extension))
|
|
500
518
|
|
|
@@ -747,10 +765,6 @@ for im in combined_results['images']:
|
|
|
747
765
|
else:
|
|
748
766
|
im['file'] = im['file'].replace(input_path + '/','',1)
|
|
749
767
|
|
|
750
|
-
combined_api_output_file = os.path.join(
|
|
751
|
-
combined_api_output_folder,
|
|
752
|
-
'{}_detections.json'.format(base_task_name))
|
|
753
|
-
|
|
754
768
|
with open(combined_api_output_file,'w') as f:
|
|
755
769
|
json.dump(combined_results,f,indent=1)
|
|
756
770
|
|
|
@@ -789,11 +803,11 @@ if render_animals_only:
|
|
|
789
803
|
os.makedirs(output_base, exist_ok=True)
|
|
790
804
|
print('Processing to {}'.format(output_base))
|
|
791
805
|
|
|
792
|
-
options.
|
|
806
|
+
options.md_results_file = combined_api_output_file
|
|
793
807
|
options.output_dir = output_base
|
|
794
808
|
ppresults = process_batch_results(options)
|
|
795
809
|
html_output_file = ppresults.output_html_file
|
|
796
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
810
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
797
811
|
# import clipboard; clipboard.copy(html_output_file)
|
|
798
812
|
|
|
799
813
|
|
|
@@ -823,7 +837,7 @@ options.otherDetectionsThreshold = options.confidenceMin
|
|
|
823
837
|
|
|
824
838
|
options.bRenderDetectionTiles = True
|
|
825
839
|
options.maxOutputImageWidth = 2000
|
|
826
|
-
options.detectionTilesMaxCrops =
|
|
840
|
+
options.detectionTilesMaxCrops = 250
|
|
827
841
|
|
|
828
842
|
# options.lineThickness = 5
|
|
829
843
|
# options.boxExpansion = 8
|
|
@@ -925,12 +939,12 @@ os.makedirs(output_base, exist_ok=True)
|
|
|
925
939
|
|
|
926
940
|
print('Processing post-RDE to {}'.format(output_base))
|
|
927
941
|
|
|
928
|
-
options.
|
|
942
|
+
options.md_results_file = filtered_output_filename
|
|
929
943
|
options.output_dir = output_base
|
|
930
944
|
ppresults = process_batch_results(options)
|
|
931
945
|
html_output_file = ppresults.output_html_file
|
|
932
946
|
|
|
933
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
947
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
934
948
|
# import clipboard; clipboard.copy(html_output_file)
|
|
935
949
|
|
|
936
950
|
|
|
@@ -2000,10 +2014,10 @@ output_base = os.path.join(postprocessing_output_folder, folder_token + \
|
|
|
2000
2014
|
os.makedirs(output_base, exist_ok=True)
|
|
2001
2015
|
print('Processing {} to {}'.format(base_task_name, output_base))
|
|
2002
2016
|
|
|
2003
|
-
options.
|
|
2017
|
+
options.md_results_file = sequence_smoothed_classification_file
|
|
2004
2018
|
options.output_dir = output_base
|
|
2005
2019
|
ppresults = process_batch_results(options)
|
|
2006
|
-
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
|
|
2020
|
+
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2007
2021
|
# import clipboard; clipboard.copy(ppresults.output_html_file)
|
|
2008
2022
|
|
|
2009
2023
|
#% Zip .json files
|
|
@@ -2071,7 +2085,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
|
|
|
2071
2085
|
results = compare_batch_results(options)
|
|
2072
2086
|
|
|
2073
2087
|
from md_utils.path_utils import open_file
|
|
2074
|
-
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2088
|
+
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2075
2089
|
|
|
2076
2090
|
|
|
2077
2091
|
#%% Merge in high-confidence detections from another results file
|
|
@@ -2120,12 +2134,12 @@ output_base_large_boxes = os.path.join(postprocessing_output_folder,
|
|
|
2120
2134
|
os.makedirs(output_base_large_boxes, exist_ok=True)
|
|
2121
2135
|
print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
|
|
2122
2136
|
|
|
2123
|
-
options.
|
|
2137
|
+
options.md_results_file = size_separated_file
|
|
2124
2138
|
options.output_dir = output_base_large_boxes
|
|
2125
2139
|
|
|
2126
2140
|
ppresults = process_batch_results(options)
|
|
2127
2141
|
html_output_file = ppresults.output_html_file
|
|
2128
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2142
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2129
2143
|
|
|
2130
2144
|
|
|
2131
2145
|
#%% .json splitting
|
|
@@ -2280,7 +2294,7 @@ import nbformat as nbf
|
|
|
2280
2294
|
if os.name == 'nt':
|
|
2281
2295
|
git_base = r'c:\git'
|
|
2282
2296
|
else:
|
|
2283
|
-
git_base = os.path.
|
|
2297
|
+
git_base = os.path.expanduser('~/git')
|
|
2284
2298
|
|
|
2285
2299
|
input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
|
|
2286
2300
|
assert os.path.isfile(input_py_file)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
manage_video_batch.py
|
|
4
|
+
|
|
5
|
+
Notebook-esque script to manage the process of running a local batch of videos
|
|
6
|
+
through MD. Defers most of the heavy lifting to manage_local_batch.py .
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Imports and constants
|
|
11
11
|
|
|
File without changes
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
add_max_conf.py
|
|
4
|
+
|
|
5
|
+
The MD output format included a "max_detection_conf" field with each image
|
|
6
|
+
up to and including version 1.2; it was removed as of version 1.3 (it's
|
|
7
|
+
redundant with the individual detection confidence values).
|
|
8
|
+
|
|
9
|
+
Just in case someone took a dependency on that field, this script allows you
|
|
10
|
+
to add it back to an existing .json file.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports and constants
|
|
15
15
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
categorize_detections_by_size.py
|
|
4
|
+
|
|
5
|
+
Given a MegaDetector .json file, creates a separate category for bounding boxes
|
|
6
|
+
above one or more size thresholds.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Constants and imports
|
|
11
11
|
|
|
@@ -18,24 +18,42 @@ from tqdm import tqdm
|
|
|
18
18
|
#%% Support classes
|
|
19
19
|
|
|
20
20
|
class SizeCategorizationOptions:
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
"""
|
|
22
|
+
Options used to parameterize categorize_detections_by_size().
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
#: Thresholds to use for separation, as a fraction of the image size.
|
|
26
|
+
#:
|
|
27
|
+
#: Should be sorted from smallest to largest.
|
|
23
28
|
size_thresholds = [0.95]
|
|
24
29
|
|
|
25
|
-
|
|
30
|
+
#: List of category numbers to use in separation; uses all categories if None
|
|
26
31
|
categories_to_separate = None
|
|
27
32
|
|
|
28
|
-
|
|
33
|
+
#: Dimension to use for thresholding; can be "size", "width", or "height"
|
|
29
34
|
measurement = 'size'
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
#: Categories to assign to thresholded ranges; should have the same length as
|
|
37
|
+
#: "size_thresholds".
|
|
32
38
|
size_category_names = ['large_detection']
|
|
33
39
|
|
|
34
40
|
|
|
35
41
|
#%% Main functions
|
|
36
42
|
|
|
37
43
|
def categorize_detections_by_size(input_file,output_file=None,options=None):
|
|
38
|
-
|
|
44
|
+
"""
|
|
45
|
+
Given a MegaDetector .json file, creates a separate category for bounding boxes
|
|
46
|
+
above one or more size thresholds, optionally writing results to [output_file].
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
input_file (str): file to process
|
|
50
|
+
output_file (str, optional): optional output file
|
|
51
|
+
options (SizeCategorizationOptions): categorization parameters
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
dict: data loaded from [input_file], with the new size-based categories.
|
|
55
|
+
Identical to what's written to [output_file], if [output_file] is not None.
|
|
56
|
+
"""
|
|
39
57
|
if options is None:
|
|
40
58
|
options = SizeCategorizationOptions()
|
|
41
59
|
|
|
@@ -1,54 +1,56 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
combine_api_outputs.py
|
|
4
|
+
|
|
5
|
+
Merges two or more .json files in batch API output format, optionally
|
|
6
|
+
writing the results to another .json file.
|
|
7
|
+
|
|
8
|
+
* Concatenates image lists, erroring if images are not unique.
|
|
9
|
+
* Errors if class lists are conflicting; errors on unrecognized fields.
|
|
10
|
+
* Checks compatibility in info structs, within reason.
|
|
11
|
+
|
|
12
|
+
File format:
|
|
13
|
+
|
|
14
|
+
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
15
|
+
|
|
16
|
+
Command-line use:
|
|
17
|
+
|
|
18
|
+
combine_api_outputs input1.json input2.json ... inputN.json output.json
|
|
19
|
+
|
|
20
|
+
Also see combine_api_shard_files() (not exposed via the command line yet) to
|
|
21
|
+
combine the intermediate files created by the API.
|
|
22
|
+
|
|
23
|
+
This does no checking for redundancy; if you are looking to ensemble
|
|
24
|
+
the results of multiple model versions, see merge_detections.py.
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
27
|
|
|
28
28
|
#%% Constants and imports
|
|
29
29
|
|
|
30
30
|
import argparse
|
|
31
31
|
import sys
|
|
32
32
|
import json
|
|
33
|
-
from typing import Any, Dict, Iterable, Mapping, List, Optional
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
#%% Merge functions
|
|
37
36
|
|
|
38
|
-
def combine_api_output_files(input_files
|
|
39
|
-
output_file
|
|
40
|
-
require_uniqueness
|
|
41
|
-
verbose
|
|
42
|
-
) -> Dict[str, Any]:
|
|
37
|
+
def combine_api_output_files(input_files,
|
|
38
|
+
output_file=None,
|
|
39
|
+
require_uniqueness=True,
|
|
40
|
+
verbose=True):
|
|
43
41
|
"""
|
|
44
|
-
Merges list of
|
|
45
|
-
dictionary, optionally writing the result to
|
|
42
|
+
Merges the list of MD results files [input_files] into a single
|
|
43
|
+
dictionary, optionally writing the result to [output_file].
|
|
46
44
|
|
|
47
45
|
Args:
|
|
48
|
-
input_files
|
|
49
|
-
output_file
|
|
50
|
-
require_uniqueness
|
|
51
|
-
each
|
|
46
|
+
input_files (list of str): paths to JSON detection files
|
|
47
|
+
output_file (str, optional): path to write merged JSON
|
|
48
|
+
require_uniqueness (bool): whether to require that the images in
|
|
49
|
+
each list of images be unique
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
dict: merged dictionaries loaded from [input_files], identical to what's
|
|
53
|
+
written to [output_file] if [output_file] is not None
|
|
52
54
|
"""
|
|
53
55
|
|
|
54
56
|
def print_if_verbose(s):
|
|
@@ -73,27 +75,27 @@ def combine_api_output_files(input_files: List[str],
|
|
|
73
75
|
return merged_dict
|
|
74
76
|
|
|
75
77
|
|
|
76
|
-
def combine_api_output_dictionaries(input_dicts
|
|
77
|
-
require_uniqueness: bool = True
|
|
78
|
-
) -> Dict[str, Any]:
|
|
78
|
+
def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
|
|
79
79
|
"""
|
|
80
|
-
Merges the list of
|
|
81
|
-
comment for details on merge rules.
|
|
80
|
+
Merges the list of MD results dictionaries [input_dicts] into a single dict.
|
|
81
|
+
See module header comment for details on merge rules.
|
|
82
82
|
|
|
83
83
|
Args:
|
|
84
|
-
input_dicts: list of dicts
|
|
85
|
-
|
|
86
|
-
require_uniqueness
|
|
87
|
-
each
|
|
88
|
-
|
|
89
|
-
|
|
84
|
+
input_dicts (list of dicts): list of dicts in which each dict represents the
|
|
85
|
+
contents of a MD output file
|
|
86
|
+
require_uniqueness (bool): whether to require that the images in
|
|
87
|
+
each input dict be unique; if this is True and image filenames are
|
|
88
|
+
not unique, an error is raised.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
dict: merged MD results
|
|
90
92
|
"""
|
|
91
93
|
|
|
92
94
|
# Map image filenames to detections, we'll convert to a list later
|
|
93
95
|
images = {}
|
|
94
|
-
info
|
|
95
|
-
detection_categories
|
|
96
|
-
classification_categories
|
|
96
|
+
info = {}
|
|
97
|
+
detection_categories = {}
|
|
98
|
+
classification_categories = {}
|
|
97
99
|
n_redundant_images = 0
|
|
98
100
|
n_images = 0
|
|
99
101
|
|
|
@@ -182,8 +184,20 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
|
|
|
182
184
|
|
|
183
185
|
def combine_api_shard_files(input_files, output_file=None):
|
|
184
186
|
"""
|
|
185
|
-
Merges the list of .json-formatted API shard files
|
|
186
|
-
list of dictionaries, optionally writing the result to
|
|
187
|
+
Merges the list of .json-formatted API shard files [input_files] into a single
|
|
188
|
+
list of dictionaries, optionally writing the result to [output_file].
|
|
189
|
+
|
|
190
|
+
This operates on mostly-deprecated API shard files, not MegaDetector results files.
|
|
191
|
+
If you don't know what an API shard file is, you don't want this function.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
input_files (list of str): files to merge
|
|
195
|
+
output_file (str, optiona): file to which we should write merged results
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
dict: merged results
|
|
199
|
+
|
|
200
|
+
:meta private:
|
|
187
201
|
"""
|
|
188
202
|
|
|
189
203
|
input_lists = []
|
|
@@ -215,6 +229,7 @@ def combine_api_shard_files(input_files, output_file=None):
|
|
|
215
229
|
#%% Command-line driver
|
|
216
230
|
|
|
217
231
|
def main():
|
|
232
|
+
|
|
218
233
|
parser = argparse.ArgumentParser()
|
|
219
234
|
parser.add_argument(
|
|
220
235
|
'input_paths', nargs='+',
|
|
@@ -230,6 +245,5 @@ def main():
|
|
|
230
245
|
args = parser.parse_args()
|
|
231
246
|
combine_api_output_files(args.input_paths, args.output_path)
|
|
232
247
|
|
|
233
|
-
|
|
234
248
|
if __name__ == '__main__':
|
|
235
249
|
main()
|