PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
megadetector-5.0.10.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0

classification/crop_detections.py CHANGED Viewed

@@ -1,58 +1,43 @@
-########
-#
-# crop_detections.py
-#
-# Given a detections JSON file from MegaDetector, crops the bounding boxes above
-# a certain confidence threshold.
-#
-# This script takes as input a detections JSON file, usually the output of
-# detection/run_tf_detector_batch.py or the output of the Batch API in the
-# "Batch processing API output format".
-#
-# See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
-#
-# The script can crop images that are either available locally or that need to be
-# downloaded from an Azure Blob Storage container.
-#
-# We assume that no image contains over 100 bounding boxes, and we always save
-# crops as RGB .jpg files for consistency. For each image, each bounding box is
-# cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
-# filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
-# ndicates the MegaDetector version. Based on the given confidence threshold, we
-# may skip saving certain bounding box crops, but we still increment the bounding
-# box number for skipped boxes.
-#
-# Example cropped image path (with MegaDetector bbox):
-#
-#   "path/to/image.jpg___crop00_mdv4.1.jpg"
-#
-# By default, the images are cropped exactly per the given bounding box
-# coordinates. However, if square crops are desired, pass the --square-crops
-# flag. This will always generate a square crop whose size is the larger of the
-# bounding box width or height. In the case that the square crop boundaries exceed
-# the original image size, the crop is padded with 0s.
-#
-# This script outputs a log file to:
-#
-#    <output_dir>/crop_detections_log_{timestamp}.json
-#
-# ...which contains images that failed to download and crop properly.
-#
-########
+"""
-#%% Example usage
+crop_detections.py
+Given a detections JSON file from MegaDetector, crops the bounding boxes above
+a certain confidence threshold.
+This script takes as input a detections JSON file, usually the output of
+detection/run_tf_detector_batch.py or the output of the Batch API in the
+"Batch processing API output format".
+See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
+The script can crop images that are either available locally or that need to be
+downloaded from an Azure Blob Storage container.
+We assume that no image contains over 100 bounding boxes, and we always save
+crops as RGB .jpg files for consistency. For each image, each bounding box is
+cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
+filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
+ndicates the MegaDetector version. Based on the given confidence threshold, we
+may skip saving certain bounding box crops, but we still increment the bounding
+box number for skipped boxes.
+Example cropped image path (with MegaDetector bbox):
+  "path/to/image.jpg___crop00_mdv4.1.jpg"
+By default, the images are cropped exactly per the given bounding box
+coordinates. However, if square crops are desired, pass the --square-crops
+flag. This will always generate a square crop whose size is the larger of the
+bounding box width or height. In the case that the square crop boundaries exceed
+the original image size, the crop is padded with 0s.
+This script outputs a log file to:
+   <output_dir>/crop_detections_log_{timestamp}.json
+...which contains images that failed to download and crop properly.
-"""
-python crop_detections.py \
-    detections.json \
-    /path/to/crops \
-    --images-dir /path/to/images \
-    --container-url "https://account.blob.core.windows.net/container?sastoken" \
-    --detector-version "4.1" \
-    --threshold 0.8 \
-    --save-full-images --square-crops \
-    --threads 50 \
-    --logdir "."
 """
 #%% Imports
@@ -73,6 +58,22 @@ from PIL import Image, ImageOps
 from tqdm import tqdm
+#%% Example usage
+"""
+python crop_detections.py \
+    detections.json \
+    /path/to/crops \
+    --images-dir /path/to/images \
+    --container-url "https://account.blob.core.windows.net/container?sastoken" \
+    --detector-version "4.1" \
+    --threshold 0.8 \
+    --save-full-images --square-crops \
+    --threads 50 \
+    --logdir "."
+"""
 #%% Main function
 def main(detections_json_path: str,

classification/csv_to_json.py CHANGED Viewed

@@ -1,105 +1,101 @@
-########
-#
-# csv_to_json.py
-#
-# Converts CSV to JSON format for label specification.
-#
-# There are 3 possible values for the 'type' column in the CSV:
-#
-# - "row": this selects a specific rowfrom the master taxonomy CSV
-#     content syntax: <dataset_name>|<dataset_label>
-#
-# - "datasettaxon": this selects all animals in a taxon from a particular dataset
-#     content syntax: <dataset_name>|<taxon_level>|<taxon_name>
-#
-# - <taxon_level>: this selects all animals in a taxon across all datasets
-#     content syntax: <taxon_name>
-#
-# Example CSV input:
-#
-"""
-    # comment lines starting with '#' are allowed
-    output_label,type,content
-    cervid,row,idfg|deer
-    cervid,row,idfg|elk
-    cervid,row,idfg|prong
-    cervid,row,idfg_swwlf_2019|elk
-    cervid,row,idfg_swwlf_2019|muledeer
-    cervid,row,idfg_swwlf_2019|whitetaileddeer
-    cervid,max_count,50000
-    cervid,family,cervidae
-    cervid,datasettaxon,idfg|family|cervidae
-    cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
-    bird,row,idfg_swwlf_2019|bird
-    bird,class,aves
-    bird,max_count,50000
-    bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
-    !bird,row,idfg_swwlf_2019|turkey
-    !bird,genus,meleagris
 """
-#
-# Example JSON output:
-#
-"""
-    {
-        "cervid": {
-            "dataset_labels": {
-                "idfg": ["deer", "elk", "prong"],
-                "idfg_swwlf_2019": ["elk", "muledeer", "whitetaileddeer"]
-            },
-            "taxa": [
-                {
-                    "level": "family",
-                    "name": "cervidae"
-                },
-                {
-                    "level": "family",
-                    "name": "cervidae"
-                    "datasets": ["idfg"]
-                },
-                {
-                    "level": "family",
-                    "name": "cervidae"
-                    "datasets": ["idfg_swwlf_2019"]
-                }
-            ],
-            "max_count": 50000
-        },
-        "bird": {
-            "dataset_labels": {
-                "idfg_swwlf_2019": ["bird"]
-            },
-            "taxa": [
-                {
-                    "level": "class",
-                    "name": "aves"
-                }
-            ],
-            "exclude": {
-                "dataset_labels": {
-                    "idfg_swwlf_2019": ["turkey"]
-                },
-                "taxa": [
-                    {
-                        "level": "genus",
-                        "name": "meleagris"
-                    }
-                ]
-            },
-            "max_count": "50000",
-            "prioritize": [
-                ["idfg_swwlf_2019"],
-                ["idfg"]
-            ],
-        }
-    }
+csv_to_json.py
+Converts CSV to JSON format for label specification.
+There are 3 possible values for the 'type' column in the CSV:
+- "row": this selects a specific rowfrom the master taxonomy CSV
+    content syntax: <dataset_name>|<dataset_label>
+- "datasettaxon": this selects all animals in a taxon from a particular dataset
+    content syntax: <dataset_name>|<taxon_level>|<taxon_name>
+- <taxon_level>: this selects all animals in a taxon across all datasets
+    content syntax: <taxon_name>
+Example CSV input:
+"
+  # comment lines starting with '#' are allowed
+  output_label,type,content
+  cervid,row,idfg|deer
+  cervid,row,idfg|elk
+  cervid,row,idfg|prong
+  cervid,row,idfg_swwlf_2019|elk
+  cervid,row,idfg_swwlf_2019|muledeer
+  cervid,row,idfg_swwlf_2019|whitetaileddeer
+  cervid,max_count,50000
+  cervid,family,cervidae
+  cervid,datasettaxon,idfg|family|cervidae
+  cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
+  bird,row,idfg_swwlf_2019|bird
+  bird,class,aves
+  bird,max_count,50000
+  bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
+  !bird,row,idfg_swwlf_2019|turkey
+  !bird,genus,meleagris
+"
+Example JSON output:
+"
+  {
+      "cervid": {
+          "dataset_labels": {
+              "idfg": ["deer", "elk", "prong"],
+              "idfg_swwlf_2019": ["elk", "muledeer", "whitetaileddeer"]
+          },
+          "taxa": [
+              {
+                  "level": "family",
+                  "name": "cervidae"
+              },
+              {
+                  "level": "family",
+                  "name": "cervidae"
+                  "datasets": ["idfg"]
+              },
+              {
+                  "level": "family",
+                  "name": "cervidae"
+                  "datasets": ["idfg_swwlf_2019"]
+              }
+          ],
+          "max_count": 50000
+      },
+      "bird": {
+          "dataset_labels": {
+              "idfg_swwlf_2019": ["bird"]
+          },
+          "taxa": [
+              {
+                  "level": "class",
+                  "name": "aves"
+              }
+          ],
+          "exclude": {
+              "dataset_labels": {
+                  "idfg_swwlf_2019": ["turkey"]
+              },
+              "taxa": [
+                  {
+                      "level": "genus",
+                      "name": "meleagris"
+                  }
+              ]
+          },
+          "max_count": "50000",
+          "prioritize": [
+              ["idfg_swwlf_2019"],
+              ["idfg"]
+          ],
+      }
+  }
+"
 """
-#
-########
 #%% Imports
@@ -125,6 +121,7 @@ def main():
 #%% Support functions
 def parse_csv_row(obj: dict[str, Any], rowtype: str, content: str) -> None:
     """
     Parses a row in the CSV.

classification/detect_and_crop.py CHANGED Viewed

@@ -1,111 +1,97 @@
-########
-#
-# detect_and_crop.py
-#
-# Run MegaDetector on images via Batch API, then save crops of the detected
-# bounding boxes.
-#
-# The input to this script is a "queried images" JSON file, whose keys are paths
-# to images and values are dicts containing information relevant for training
-# a classifier, including labels and (optionally) ground-truth bounding boxes.
-# The image paths are in the format `<dataset-name>/<blob-name>` where we assume
-# that the dataset name does not contain '/'.
-#
-# {
-#     "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
-#         "dataset": "caltech",
-#         "location": 13,
-#         "class": "mountain_lion",  # class from dataset
-#         "bbox": [{"category": "animal",
-#                   "bbox": [0, 0.347, 0.237, 0.257]}],   # ground-truth bbox
-#         "label": ["monutain_lion"]  # labels to use in classifier
-#     },
-#     "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
-#         "dataset": "caltech",
-#         "location": 13,
-#         "class": "mountain_lion",  # class from dataset
-#         "label": ["monutain_lion"]  # labels to use in classifier
-#     },
-#     ...
-# }
-#
-# We assume that no image contains over 100 bounding boxes, and we always save
-# crops as RGB .jpg files for consistency. For each image, each bounding box is
-# cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
-# "___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
-# image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
-# version. If an image has ground truth bounding boxes, we assume that they are
-# exhaustive--i.e., there are no other objects of interest, so we don't need to
-# run MegaDetector on the image. If an image does not have ground truth bounding
-# boxes, we run MegaDetector on the image and label the detected boxes in order
-# from 00 up to 99. Based on the given confidence threshold, we may skip saving
-# certain bounding box crops, but we still increment the bounding box number for
-# skipped boxes.
-#
-# Example cropped image path (with ground truth bbox from MegaDB)
-#
-#     "path/to/crops/image.jpg___crop00.jpg"
-#
-# Example cropped image path (with MegaDetector bbox)
-#
-#     "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
-#
-# By default, the images are cropped exactly per the given bounding box
-# coordinates. However, if square crops are desired, pass the --square-crops
-# flag. This will always generate a square crop whose size is the larger of the
-# bounding box width or height. In the case that the square crop boundaries exceed
-# the original image size, the crop is padded with 0s.
-#
-# This script currently only supports running MegaDetector via the Batch Detection
-# API. See the classification README for instructions on running MegaDetector
-# locally. If running the Batch Detection API, set the following environment
-# variables for the Azure Blob Storage container in which we save the intermediate
-# task lists:
-#
-#     BATCH_DETECTION_API_URL                  # API URL
-#     CLASSIFICATION_BLOB_STORAGE_ACCOUNT      # storage account name
-#     CLASSIFICATION_BLOB_CONTAINER            # container name
-#     CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS  # SAS token, without leading '?'
-#     DETECTION_API_CALLER                     # allow-listed API caller
-#
-# This script allows specifying a directory where MegaDetector outputs are cached
-# via the --detector-output-cache-dir argument. This directory must be
-# organized as:
-#
-#   <cache-dir>/<MegaDetector-version>/<dataset-name>.json
-#
-#     Example: If the `cameratrapssc/classifier-training` Azure blob storage
-#     container is mounted to the local machine via blobfuse, it may be used as
-#     a MegaDetector output cache directory by passing
-#         "cameratrapssc/classifier-training/mdcache/"
-#     as the value for --detector-output-cache-dir.
-#
-# This script outputs either 1 or 3 files, depending on whether the Batch Detection API
-# is run:
-#
-# - <output_dir>/detect_and_crop_log_{timestamp}.json
-#     log of images missing detections and images that failed to properly
-#     download and crop
-# - <output_dir>/batchapi_tasklists/{task_id}.json
-#     (if --run-detector) task lists uploaded to the Batch Detection API
-# - <output_dir>/batchapi_response/{task_id}.json
-#     (if --run-detector) task status responses for completed tasks
-#
-########
+"""
-#%% Example usage
+detect_and_crop.py
+Run MegaDetector on images via Batch API, then save crops of the detected
+bounding boxes.
+The input to this script is a "queried images" JSON file, whose keys are paths
+to images and values are dicts containing information relevant for training
+a classifier, including labels and (optionally) ground-truth bounding boxes.
+The image paths are in the format `<dataset-name>/<blob-name>` where we assume
+that the dataset name does not contain '/'.
+{
+    "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
+        "dataset": "caltech",
+        "location": 13,
+        "class": "mountain_lion",  # class from dataset
+        "bbox": [{"category": "animal",
+                  "bbox": [0, 0.347, 0.237, 0.257]}],   # ground-truth bbox
+        "label": ["monutain_lion"]  # labels to use in classifier
+    },
+    "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
+        "dataset": "caltech",
+        "location": 13,
+        "class": "mountain_lion",  # class from dataset
+        "label": ["monutain_lion"]  # labels to use in classifier
+    },
+    ...
+}
+We assume that no image contains over 100 bounding boxes, and we always save
+crops as RGB .jpg files for consistency. For each image, each bounding box is
+cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
+"___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
+image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
+version. If an image has ground truth bounding boxes, we assume that they are
+exhaustive--i.e., there are no other objects of interest, so we don't need to
+run MegaDetector on the image. If an image does not have ground truth bounding
+boxes, we run MegaDetector on the image and label the detected boxes in order
+from 00 up to 99. Based on the given confidence threshold, we may skip saving
+certain bounding box crops, but we still increment the bounding box number for
+skipped boxes.
+Example cropped image path (with ground truth bbox from MegaDB)
+    "path/to/crops/image.jpg___crop00.jpg"
+Example cropped image path (with MegaDetector bbox)
+    "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
+By default, the images are cropped exactly per the given bounding box
+coordinates. However, if square crops are desired, pass the --square-crops
+flag. This will always generate a square crop whose size is the larger of the
+bounding box width or height. In the case that the square crop boundaries exceed
+the original image size, the crop is padded with 0s.
+This script currently only supports running MegaDetector via the Batch Detection
+API. See the classification README for instructions on running MegaDetector
+locally. If running the Batch Detection API, set the following environment
+variables for the Azure Blob Storage container in which we save the intermediate
+task lists:
+    BATCH_DETECTION_API_URL                  # API URL
+    CLASSIFICATION_BLOB_STORAGE_ACCOUNT      # storage account name
+    CLASSIFICATION_BLOB_CONTAINER            # container name
+    CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS  # SAS token, without leading '?'
+    DETECTION_API_CALLER                     # allow-listed API caller
+This script allows specifying a directory where MegaDetector outputs are cached
+via the --detector-output-cache-dir argument. This directory must be
+organized as:
+  <cache-dir>/<MegaDetector-version>/<dataset-name>.json
+    Example: If the `cameratrapssc/classifier-training` Azure blob storage
+    container is mounted to the local machine via blobfuse, it may be used as
+    a MegaDetector output cache directory by passing
+        "cameratrapssc/classifier-training/mdcache/"
+    as the value for --detector-output-cache-dir.
+This script outputs either 1 or 3 files, depending on whether the Batch Detection API
+is run:
+- <output_dir>/detect_and_crop_log_{timestamp}.json
+    log of images missing detections and images that failed to properly
+    download and crop
+- <output_dir>/batchapi_tasklists/{task_id}.json
+    (if --run-detector) task lists uploaded to the Batch Detection API
+- <output_dir>/batchapi_response/{task_id}.json
+    (if --run-detector) task status responses for completed tasks
 """
-    python detect_and_crop.py \
-        base_logdir/queried_images.json \
-        base_logdir \
-        --detector-output-cache-dir /path/to/classifier-training/mdcache \
-        --detector-version 4.1 \
-        --run-detector --resume-file base_logdir/resume.json \
-        --cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
-        --save-full-images --images-dir /path/to/images --threads 50
-"""
 #%% Imports
@@ -134,6 +120,20 @@ from md_utils import path_utils
 from md_utils import sas_blob_utils
+#%% Example usage
+"""
+    python detect_and_crop.py \
+        base_logdir/queried_images.json \
+        base_logdir \
+        --detector-output-cache-dir /path/to/classifier-training/mdcache \
+        --detector-version 4.1 \
+        --run-detector --resume-file base_logdir/resume.json \
+        --cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
+        --save-full-images --images-dir /path/to/images --threads 50
+"""
 #%% Main function
 def main(queried_images_json_path: str,

classification/evaluate_model.py CHANGED Viewed

@@ -1,47 +1,36 @@
-########
-#
-# evaluate_model.py
-#
-# Evaluate a species classifier.
-#
-# Currently the implementation of multi-label multi-class classification is
-# non-functional.
-#
-# Outputs the following files:
-#
-# 1) outputs_{split}.csv, one file per split, contains columns:
-#     - 'path': str, path to cropped image
-#     - 'label': str
-#     - 'weight': float
-#     - [label names]: float, confidence in each label
-#
-# 2) overall_metrics.csv, contains columns:
-#     - 'split': str
-#     - 'loss': float, mean per-example loss over entire epoch
-#     - 'acc_top{k}': float, accuracy@k over the entire epoch
-#     - 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
-#
-# 3) confusion_matrices.npz
-#     - keys ['train', 'val', 'test']
-#     - values are np.ndarray, confusion matrices
-#
-# 4) label_stats.csv, per-label statistics, columns
-#     - 'split': str
-#     - 'label': str
-#     - 'precision': float
-#     - 'recall': float
-#
-########
+"""
-#%% Example usage
+evaluate_model.py
+Evaluate a species classifier.
+Currently the implementation of multi-label multi-class classification is
+non-functional.
+Outputs the following files:
+1) outputs_{split}.csv, one file per split, contains columns:
+    - 'path': str, path to cropped image
+    - 'label': str
+    - 'weight': float
+    - [label names]: float, confidence in each label
+2) overall_metrics.csv, contains columns:
+    - 'split': str
+    - 'loss': float, mean per-example loss over entire epoch
+    - 'acc_top{k}': float, accuracy@k over the entire epoch
+    - 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
+3) confusion_matrices.npz
+    - keys ['train', 'val', 'test']
+    - values are np.ndarray, confusion matrices
+4) label_stats.csv, per-label statistics, columns
+    - 'split': str
+    - 'label': str
+    - 'precision': float
+    - 'recall': float
-"""
-    python evaluate_model.py \
-        $BASE_LOGDIR/$LOGDIR/params.json \
-        $BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
-        --output-dir $BASE_LOGDIR/$LOGDIR \
-        --splits train val test \
-        --batch-size 256
 """
 #%% Imports and constants
@@ -64,6 +53,18 @@ import tqdm
 from classification import efficientnet, train_classifier
+#%% Example usage
+"""
+    python evaluate_model.py \
+        $BASE_LOGDIR/$LOGDIR/params.json \
+        $BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
+        --output-dir $BASE_LOGDIR/$LOGDIR \
+        --splits train val test \
+        --batch-size 256
+"""
 SPLITS = ['train', 'val', 'test']

megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl