PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

api/synchronous/api_core/tests/load_test.py CHANGED Viewed

@@ -1,110 +1,110 @@
-import os
-import json
-import io
-import random
-import requests
-from PIL import Image
-from multiprocessing import Pool
-from datetime import datetime
-from requests_toolbelt import MultipartEncoder
-from requests_toolbelt.multipart import decoder
-ip_address = '100.100.200.200'
-port = 5050
-base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
-def call_api(args):
-    start = datetime.now()
-    index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
-    print('calling api: {} starttime: {}'.format(index, start))
-    response = requests.post(url, params=params, data=data, headers=headers)
-    elapsed_time = datetime.now() - start
-    print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
-    get_detections(response)
-    return response
-def get_detections(response):
-    results = decoder.MultipartDecoder.from_response(response)
-    text_results = {}
-    images = {}
-    for part in results.parts:
-        # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
-        headers = {}
-        for k, v in part.headers.items():
-            headers[k.decode(part.encoding)] = v.decode(part.encoding)
-        if headers.get('Content-Type', None) == 'application/json':
-            text_result = json.loads(part.content.decode())
-    print(text_result)
-def test_load(num_requests, params, max_images=1):
-    requests = []
-    # read the images anew for each request
-    index = 0
-    for i in range(num_requests):
-        index += 1
-        files = {}
-        sample_input_dir = '../../../api/synchronous/sample_input/test_images'
-        image_files = os.listdir(sample_input_dir)
-        random.shuffle(image_files)
-        num_images = 0
-        for i, image_name in enumerate(image_files):
-            if not image_name.lower().endswith('.jpg'):
-                continue
-            if num_images >= max_images:
-                break
-            else:
-                num_images += 1
-            img_path = os.path.join(sample_input_dir, image_name)
-            with open(img_path, 'rb') as f:
-                content = f.read()
-            files[image_name] = (image_name, content, 'image/jpeg')
-        m = MultipartEncoder(fields=files)
-        args = {
-            'index': index,
-            'url': base_url + 'detect',
-            'params': params,
-            'data': m,
-            'headers': {'Content-Type': m.content_type}
-        }
-        requests.append(args)
-    print('starting', num_requests, 'threads...')
-    # images are read and in each request by the time we call the API in map()
-    with Pool(num_requests) as pool:
-        results = pool.map(call_api, requests)
-    return results
-if __name__ == "__main__":
-    params = {
-    'min_confidence': 0.05,
-    'min_rendering_confidence': 0.2,
-    'render': True
-    }
-    num_requests = 10
-    max_images = 1
-    start = datetime.now()
-    responses = test_load(num_requests, params, max_images=max_images)
-    end = datetime.now()
-    total_time = end - start
+import os
+import json
+import io
+import random
+import requests
+from PIL import Image
+from multiprocessing import Pool
+from datetime import datetime
+from requests_toolbelt import MultipartEncoder
+from requests_toolbelt.multipart import decoder
+ip_address = '100.100.200.200'
+port = 5050
+base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
+def call_api(args):
+    start = datetime.now()
+    index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
+    print('calling api: {} starttime: {}'.format(index, start))
+    response = requests.post(url, params=params, data=data, headers=headers)
+    elapsed_time = datetime.now() - start
+    print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
+    get_detections(response)
+    return response
+def get_detections(response):
+    results = decoder.MultipartDecoder.from_response(response)
+    text_results = {}
+    images = {}
+    for part in results.parts:
+        # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
+        headers = {}
+        for k, v in part.headers.items():
+            headers[k.decode(part.encoding)] = v.decode(part.encoding)
+        if headers.get('Content-Type', None) == 'application/json':
+            text_result = json.loads(part.content.decode())
+    print(text_result)
+def test_load(num_requests, params, max_images=1):
+    requests = []
+    # read the images anew for each request
+    index = 0
+    for i in range(num_requests):
+        index += 1
+        files = {}
+        sample_input_dir = '../../../api/synchronous/sample_input/test_images'
+        image_files = os.listdir(sample_input_dir)
+        random.shuffle(image_files)
+        num_images = 0
+        for i, image_name in enumerate(image_files):
+            if not image_name.lower().endswith('.jpg'):
+                continue
+            if num_images >= max_images:
+                break
+            else:
+                num_images += 1
+            img_path = os.path.join(sample_input_dir, image_name)
+            with open(img_path, 'rb') as f:
+                content = f.read()
+            files[image_name] = (image_name, content, 'image/jpeg')
+        m = MultipartEncoder(fields=files)
+        args = {
+            'index': index,
+            'url': base_url + 'detect',
+            'params': params,
+            'data': m,
+            'headers': {'Content-Type': m.content_type}
+        }
+        requests.append(args)
+    print('starting', num_requests, 'threads...')
+    # images are read and in each request by the time we call the API in map()
+    with Pool(num_requests) as pool:
+        results = pool.map(call_api, requests)
+    return results
+if __name__ == "__main__":
+    params = {
+    'min_confidence': 0.05,
+    'min_rendering_confidence': 0.2,
+    'render': True
+    }
+    num_requests = 10
+    max_images = 1
+    start = datetime.now()
+    responses = test_load(num_requests, params, max_images=max_images)
+    end = datetime.now()
+    total_time = end - start
     print('Total time for {} requests: {}'.format(num_requests, total_time))

classification/__init__.py ADDED Viewed

File without changes

classification/aggregate_classifier_probs.py CHANGED Viewed

@@ -1,27 +1,16 @@
-########
-#
-# aggregate_classifier_probs.py
-#
-# Aggregate probabilities from a classifier's outputs according to a mapping
-# from the desired (target) categories to the classifier's categories.
-#
-# Using the mapping, create a new version of the classifier output CSV with
-# probabilities summed within each target category. Also output a new
-# "index-to-name" JSON file which identifies the sequential order of the target
-# categories.
-#
-########
+"""
-#%%  Example usage
+aggregate_classifier_probs.py
-"""
-python aggregate_classifier_probs.py \
-    classifier_output.csv.gz \
-    --target-mapping target_to_classifier_labels.json \
-    --output-csv classifier_output_remapped.csv.gz \
-    --output-label-index label_index_remapped.json
-"""
+Aggregate probabilities from a classifier's outputs according to a mapping
+from the desired (target) categories to the classifier's categories.
+Using the mapping, create a new version of the classifier output CSV with
+probabilities summed within each target category. Also output a new
+"index-to-name" JSON file which identifies the sequential order of the target
+categories.
+"""
 #%% Imports
@@ -33,6 +22,15 @@ import json
 import pandas as pd
 from tqdm import tqdm
+#%%  Example usage
+"""
+python aggregate_classifier_probs.py \
+    classifier_output.csv.gz \
+    --target-mapping target_to_classifier_labels.json \
+    --output-csv classifier_output_remapped.csv.gz \
+    --output-label-index label_index_remapped.json
+"""
 #%% Main function
@@ -46,6 +44,7 @@ def main(classifier_results_csv_path: str,
     Because the output CSV is often very large, we process it in chunks of 1000
     rows at a time.
     """
     chunked_df_iterator = pd.read_csv(
         classifier_results_csv_path, chunksize=1000, float_precision='high',
         index_col='path')
@@ -81,9 +80,7 @@ def main(classifier_results_csv_path: str,
 #%% Command-line driver
 def _parse_args() -> argparse.Namespace:
-    """
-    Parses arguments.
-    """
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         description='Aggregate classifier probabilities to target classes.')

classification/analyze_failed_images.py CHANGED Viewed

@@ -1,20 +1,11 @@
-########
-#
-# analyze_failed_images.py
-#
-########
+"""
-#%% Example usage
+analyze_failed_images.py
-"""
-    python analyze_failed_images.py failed.json \
-        -a ACCOUNT -c CONTAINER -s SAS_TOKEN
 """
 #%% Imports and constants
-from __future__ import annotations
 import argparse
 from collections.abc import Mapping, Sequence
 from concurrent import futures
@@ -31,6 +22,14 @@ from data_management.megadb.megadb_utils import MegadbUtils
 from md_utils import path_utils
 from md_utils import sas_blob_utils
+#%% Example usage
+"""
+    python analyze_failed_images.py failed.json \
+        -a ACCOUNT -c CONTAINER -s SAS_TOKEN
+"""
 ImageFile.LOAD_TRUNCATED_IMAGES = False
@@ -191,8 +190,7 @@ def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
 #%% Command-line driver
-def _parse_args() -> argparse.Namespace:
+def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description='Analyze a list of images that failed to download or crop.')

classification/cache_batchapi_outputs.py CHANGED Viewed

@@ -1,54 +1,54 @@
-########
-#
-# cache_batchapi_outputs.py
-#
-# Script to cache Batch Detection API outputs.
-#
-# This script can handle either the Batch Detection API JSON Response or the
-# detections JSON.
-#
-# Batch Detection API Response format:
-#
-#     {
-#         "Status": {
-#             "request_status": "completed",
-#             "message": {
-#                 "num_failed_shards": 0,
-#                 "output_file_urls": {
-#                     "detections": "https://url/to/detections.json",
-#                     "failed_images": "https://url/to/failed_images.json",
-#                     "images": https://url/to/images.json",
-#                 }
-#             },
-#         },
-#         "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
-#         "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
-#     }
-#
-# Detections JSON format:
-#
-#     {
-#         "info": {...}
-#         "detection_categories": {...}
-#         "classification_categories": {...}
-#         "images": [
-#             {
-#                 "file": "path/from/base/dir/image1.jpg",
-#                 "max_detection_conf": 0.926,
-#                 "detections": [{
-#                         "category": "1",
-#                         "conf": 0.061,
-#                         "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
-#                 }]
-#             }
-#         ]
-#     }
-#
-# Batch Detection API Output Format:
-#
-# github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
-#
-########
+"""
+cache_batchapi_outputs.py
+Script to cache Batch Detection API outputs.
+This script can handle either the Batch Detection API JSON Response or the
+detections JSON.
+Batch Detection API Response format:
+    {
+        "Status": {
+            "request_status": "completed",
+            "message": {
+                "num_failed_shards": 0,
+                "output_file_urls": {
+                    "detections": "https://url/to/detections.json",
+                    "failed_images": "https://url/to/failed_images.json",
+                    "images": https://url/to/images.json",
+                }
+            },
+        },
+        "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
+        "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
+    }
+Detections JSON format:
+    {
+        "info": {...}
+        "detection_categories": {...}
+        "classification_categories": {...}
+        "images": [
+            {
+                "file": "path/from/base/dir/image1.jpg",
+                "max_detection_conf": 0.926,
+                "detections": [{
+                        "category": "1",
+                        "conf": 0.061,
+                        "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
+                }]
+            }
+        ]
+    }
+Batch Detection API Output Format:
+github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
+"""
 #%% Imports

classification/create_classification_dataset.py CHANGED Viewed

@@ -1,73 +1,63 @@
-########
-#
-# create_classification_dataset.py
-#
-# Creates a classification dataset CSV with a corresponding JSON file determining
-# the train/val/test split.
-#
-# This script takes as input a "queried images" JSON file whose keys are paths to
-# images and values are dictionaries containing information relevant for training
-# a classifier, including labels and (optionally) ground-truth bounding boxes.
-# The image paths are in the format `<dataset-name>/<blob-name>` where we assume
-# that the dataset name does not contain '/'.
-#
-# {
-#     "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
-#         "dataset": "caltech",
-#         "location": 13,
-#         "class": "mountain_lion",  # class from dataset
-#         "bbox": [{"category": "animal",
-#                   "bbox": [0, 0.347, 0.237, 0.257]}],   # ground-truth bbox
-#         "label": ["monutain_lion"]  # labels to use in classifier
-#     },
-#     "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
-#         "dataset": "caltech",
-#         "location": 13,
-#         "class": "mountain_lion",  # class from dataset
-#         "label": ["monutain_lion"]  # labels to use in classifier
-#     },
-#     ...
-# }
-#
-# We assume that the tuple (dataset, location) identifies a unique location. In
-# other words, we assume that no two datasets have overlapping locations. This
-# probably isn't 100% true, but it's pretty much the best we can do in terms of
-# avoiding overlapping locations between the train/val/test splits.
-#
-# This script outputs 3 files to <output_dir>:
-#
-# 1) classification_ds.csv, contains columns:
-#
-#     - 'path': str, path to cropped images
-#     - 'dataset': str, name of dataset
-#     - 'location': str, location that image was taken, as saved in MegaDB
-#     - 'dataset_class': str, original class assigned to image, as saved in MegaDB
-#     - 'confidence': float, confidence that this crop is of an actual animal,
-#         1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
-#         <= 1.0 if the bounding box was detected by MegaDetector
-#     - 'label': str, comma-separated list of label(s) assigned to this crop for
-#         the sake of classification
-#
-# 2) label_index.json: maps integer to label name
-#
-#     - keys are string representations of Python integers (JSON requires keys to
-#       be strings), numbered from 0 to num_labels-1
-#     - values are strings, label names
-#
-# 3) splits.json: serialization of a Python dict that maps each split
-#    ['train', 'val', 'test'] to a list of length-2 lists, where each inner list
-#    is [<dataset>, <location>]
-#
-########
+"""
-#%% Example usage
+create_classification_dataset.py
+Creates a classification dataset CSV with a corresponding JSON file determining
+the train/val/test split.
+This script takes as input a "queried images" JSON file whose keys are paths to
+images and values are dictionaries containing information relevant for training
+a classifier, including labels and (optionally) ground-truth bounding boxes.
+The image paths are in the format `<dataset-name>/<blob-name>` where we assume
+that the dataset name does not contain '/'.
+{
+    "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
+        "dataset": "caltech",
+        "location": 13,
+        "class": "mountain_lion",  # class from dataset
+        "bbox": [{"category": "animal",
+                  "bbox": [0, 0.347, 0.237, 0.257]}],   # ground-truth bbox
+        "label": ["monutain_lion"]  # labels to use in classifier
+    },
+    "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
+        "dataset": "caltech",
+        "location": 13,
+        "class": "mountain_lion",  # class from dataset
+        "label": ["monutain_lion"]  # labels to use in classifier
+    },
+    ...
+}
+We assume that the tuple (dataset, location) identifies a unique location. In
+other words, we assume that no two datasets have overlapping locations. This
+probably isn't 100% true, but it's pretty much the best we can do in terms of
+avoiding overlapping locations between the train/val/test splits.
+This script outputs 3 files to <output_dir>:
+1) classification_ds.csv, contains columns:
+    - 'path': str, path to cropped images
+    - 'dataset': str, name of dataset
+    - 'location': str, location that image was taken, as saved in MegaDB
+    - 'dataset_class': str, original class assigned to image, as saved in MegaDB
+    - 'confidence': float, confidence that this crop is of an actual animal,
+        1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
+        <= 1.0 if the bounding box was detected by MegaDetector
+    - 'label': str, comma-separated list of label(s) assigned to this crop for
+        the sake of classification
+2) label_index.json: maps integer to label name
+    - keys are string representations of Python integers (JSON requires keys to
+      be strings), numbered from 0 to num_labels-1
+    - values are strings, label names
+3) splits.json: serialization of a Python dict that maps each split
+   ['train', 'val', 'test'] to a list of length-2 lists, where each inner list
+   is [<dataset>, <location>]
-"""
-    python create_classification_dataset.py \
-        run_idfg2 \
-        --queried-images-json run_idfg2/queried_images.json \
-        --cropped-images-dir /ssd/crops_sq \
-        -d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
 """
 #%% Imports and constants
@@ -87,6 +77,17 @@ from tqdm import tqdm
 from classification import detect_and_crop
+#%% Example usage
+"""
+    python create_classification_dataset.py \
+        run_idfg2 \
+        --queried-images-json run_idfg2/queried_images.json \
+        --cropped-images-dir /ssd/crops_sq \
+        -d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
+"""
 DATASET_FILENAME = 'classification_ds.csv'
 LABEL_INDEX_FILENAME = 'label_index.json'
 SPLITS_FILENAME = 'splits.json'

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl