PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

classification/train_classifier_tf.py CHANGED Viewed

@@ -1,27 +1,16 @@
-########
-#
-# train_classifier_tf.py
-#
-# Train an EfficientNet classifier.
-#
-# Currently the implementation of multi-label multi-class classification is
-# non-functional.
-#
-# During training, start tensorboard from within the classification/ directory:
-#    tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
-#
-########
+"""
-#%% Example usage
+train_classifier_tf.py
-"""
-    python train_classifier_tf.py run_idfg /ssd/crops_sq \
-        -m "efficientnet-b0" --pretrained --finetune --label-weighted \
-        --epochs 50 --batch-size 512 --lr 1e-4 \
-        --seed 123 \
-        --logdir run_idfg
-"""
+Train an EfficientNet classifier.
+Currently the implementation of multi-label multi-class classification is
+non-functional.
+During training, start tensorboard from within the classification/ directory:
+   tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
+"""
 #%% Imports and constants
@@ -47,7 +36,6 @@ from classification.train_utils import (
     imgs_with_confidences, load_dataset_csv, prefix_all_keys)
 from md_visualization import plot_utils
 AUTOTUNE = tf.data.experimental.AUTOTUNE
 # match pytorch EfficientNet model names
@@ -63,6 +51,17 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
 }
+#%% Example usage
+"""
+    python train_classifier_tf.py run_idfg /ssd/crops_sq \
+        -m "efficientnet-b0" --pretrained --finetune --label-weighted \
+        --epochs 50 --batch-size 512 --lr 1e-4 \
+        --seed 123 \
+        --logdir run_idfg
+"""
 #%% Support functions
 def create_dataset(

classification/train_utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
-########
-#
-# train_utils.py
-#
-# Utility functions useful for training a classifier.
-#
-# This script should NOT depend on any other file within this repo. It should
-# especially be agnostic to PyTorch vs. TensorFlow.
-#
-########
+"""
+train_utils.py
+Utility functions useful for training a classifier.
+This script should NOT depend on any other file within this repo. It should
+especially be agnostic to PyTorch vs. TensorFlow.
+"""
 #%% Imports

data_management/__init__.py ADDED Viewed

File without changes

data_management/annotations/__init__.py ADDED Viewed

File without changes

data_management/annotations/annotation_constants.py CHANGED Viewed

@@ -1,37 +1,20 @@
-########
-#
-# annotation_constants.py
-#
-# Shared constants used to interpret annotation output
-#
-# Categories assigned to bounding boxes.  Used throughout our repo; do not change unless
-# you are Dan or Siyu.  In fact, do not change unless you are both Dan *and* Siyu.
-#
-# We use integer indices here; this is different than the API output .json file,
-# where indices are string integers.
-#
-########
-NUM_DETECTOR_CATEGORIES = 3  # this is for choosing colors, so ignoring the "empty" class
-# This is the label mapping used for our incoming iMerit annotations
-# Only used to parse the incoming annotations. In our database, the string name is used to avoid confusion
-annotation_bbox_categories = [
-    {'id': 0, 'name': 'empty'},
-    {'id': 1, 'name': 'animal'},
-    {'id': 2, 'name': 'person'},
-    {'id': 3, 'name': 'group'},  # group of animals
-    {'id': 4, 'name': 'vehicle'}
-]
+"""
+annotation_constants.py
-annotation_bbox_category_id_to_name = {}
-annotation_bbox_category_name_to_id = {}
+Defines default categories for MegaDetector output boxes.
-for cat in annotation_bbox_categories:
-    annotation_bbox_category_id_to_name[cat['id']] = cat['name']
-    annotation_bbox_category_name_to_id[cat['name']] = cat['id']
+Used throughout the repo; do not change unless you are Dan or Siyu.  In fact, do not change unless
+you are both Dan *and* Siyu.
-# MegaDetector outputs
+We use integer IDs here; this is different from the MD .json file format,
+where indices are string integers.
+"""
+#%% Constants
+# MegaDetector output categories (the "empty" category is implicit)
 detector_bbox_categories = [
     {'id': 0, 'name': 'empty'},
     {'id': 1, 'name': 'animal'},
@@ -39,9 +22,13 @@ detector_bbox_categories = [
     {'id': 3, 'name': 'vehicle'}
 ]
+# This is used for choosing colors, so it ignores the "empty" class.
+NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
 detector_bbox_category_id_to_name = {}
 detector_bbox_category_name_to_id = {}
 for cat in detector_bbox_categories:
     detector_bbox_category_id_to_name[cat['id']] = cat['name']
     detector_bbox_category_name_to_id[cat['name']] = cat['id']

data_management/camtrap_dp_to_coco.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""
+camtrap_dp_to_coco.py
+Parse a very limited subset of the Camtrap DP data package format:
+https://camtrap-dp.tdwg.org/
+...and convert to COCO format.  Assumes that all required metadata files have been
+put in the same directory (which is standard).
+Does not currently parse bounding boxes, just attaches species labels to images.
+Currently supports only sequence-level labeling.
+"""
+#%% Imports and constants
+import os
+import json
+import pandas as pd
+from dateutil import parser as dateparser
+from collections import defaultdict
+#%% Functions
+def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
+    """
+    Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
+    Does not validate images, just converts.  Use integrity_check_json_db to validate
+    the resulting COCO file.
+    Optionally writes the results to [output_file]
+    """
+    required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
+    for fn in required_files:
+        fn_abs = os.path.join(camtrap_dp_folder,fn)
+        assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
+    with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
+        datapackage = json.load(f)
+    assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
+        'I only know how to parse Camtrap DP 1.0 packages'
+    deployments_file = None
+    events_file = None
+    media_file = None
+    observations_file = None
+    resources = datapackage['resources']
+    for r in resources:
+        if r['name'] == 'deployments':
+            deployments_file = r['path']
+        elif r['name'] == 'media':
+            media_file = r['path']
+        elif r['name'] == 'events':
+            events_file = r['path']
+        elif r['name'] == 'observations':
+            observations_file = r['path']
+    assert deployments_file is not None, 'No deployment file specified'
+    assert events_file is not None, 'No events file specified'
+    assert media_file is not None, 'No media file specified'
+    assert observations_file is not None, 'No observation file specified'
+    deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
+    events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
+    media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
+    observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
+    print('Read {} deployment lines'.format(len(deployments_df)))
+    print('Read {} events lines'.format(len(events_df)))
+    print('Read {} media lines'.format(len(media_df)))
+    print('Read {} observation lines'.format(len(observations_df)))
+    media_id_to_media_info = {}
+    # i_row = 0; row = media_df.iloc[i_row]
+    for i_row,row in media_df.iterrows():
+        media_info = {}
+        media_info['file_name'] = os.path.join(row['filePath'],row['fileName']).replace('\\','/')
+        media_info['location'] = row['deploymentID']
+        media_info['id'] = row['mediaID']
+        media_info['datetime'] = row['timestamp']
+        media_info['datetime'] = dateparser.parse(media_info['datetime'])
+        media_info['frame_num'] = -1
+        media_info['seq_num_frames'] = -1
+        media_id_to_media_info[row['mediaID']] = media_info
+    event_id_to_media_ids = defaultdict(list)
+    # i_row = 0; row = events_df.iloc[i_row]
+    for i_row,row in events_df.iterrows():
+        media_id = row['mediaID']
+        assert media_id in media_id_to_media_info
+        event_id_to_media_ids[row['eventID']].append(media_id)
+    event_id_to_category_names = defaultdict(set)
+    # i_row = 0; row = observations_df.iloc[i_row]
+    for i_row,row in observations_df.iterrows():
+        if row['observationLevel'] != 'event':
+            raise ValueError("I don't know how to parse image-level events yet")
+        if row['observationType'] == 'blank':
+            event_id_to_category_names[row['eventID']].add('empty')
+        elif row['observationType'] == 'unknown':
+            event_id_to_category_names[row['eventID']].add('unknown')
+        elif row['observationType'] == 'human':
+            assert row['scientificName'] == 'Homo sapiens'
+            event_id_to_category_names[row['eventID']].add(row['scientificName'])
+        else:
+            assert row['observationType'] == 'animal'
+            assert isinstance(row['scientificName'],str)
+            event_id_to_category_names[row['eventID']].add(row['scientificName'])
+    # Sort images within an event into frame numbers
+    #
+    # event_id = next(iter(event_id_to_media_ids))
+    for event_id in event_id_to_media_ids.keys():
+        media_ids_this_event = event_id_to_media_ids[event_id]
+        media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
+        media_info_this_event = sorted(media_info_this_event, key=lambda x: x['datetime'])
+        for i_media,media_info in enumerate(media_info_this_event):
+            media_info['frame_num'] = i_media
+            media_info['seq_num_frames'] = len(media_info_this_event)
+            media_info['seq_id'] = event_id
+    # Create category names
+    category_name_to_category_id = {'empty':0}
+    for event_id in event_id_to_category_names:
+        category_names_this_event = event_id_to_category_names[event_id]
+        for name in category_names_this_event:
+            if name not in category_name_to_category_id:
+                category_name_to_category_id[name] = len(category_name_to_category_id)
+    # Move everything into COCO format
+    images = list(media_id_to_media_info.values())
+    categories = []
+    for name in category_name_to_category_id:
+        categories.append({'name':name,'id':category_name_to_category_id[name]})
+    info = {'version':1.0,'description':datapackage['name']}
+    # Create annotations
+    annotations = []
+    for event_id in event_id_to_media_ids.keys():
+        i_ann = 0
+        media_ids_this_event = event_id_to_media_ids[event_id]
+        media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
+        categories_this_event = event_id_to_category_names[event_id]
+        for im in media_info_this_event:
+            for category_name in categories_this_event:
+                ann = {}
+                ann['id'] = event_id + '_' + str(i_ann)
+                i_ann += 1
+                ann['image_id'] = im['id']
+                ann['category_id'] = category_name_to_category_id[category_name]
+                ann['sequence_level_annotation'] = True
+                annotations.append(ann)
+    coco_data = {}
+    coco_data['images'] = images
+    coco_data['annotations'] = annotations
+    coco_data['categories'] = categories
+    coco_data['info'] = info
+    for im in coco_data['images']:
+        im['datetime'] = str(im['datetime'] )
+    if output_file is not None:
+        with open(output_file,'w') as f:
+            json.dump(coco_data,f,indent=1)
+    return coco_data
+#%% Interactive driver
+if False:
+    pass
+    #%%
+    camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
+    coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
+    coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
+                                   output_file=coco_file)
+    #%% Validate
+    from data_management.databases.integrity_check_json_db import integrity_check_json_db, IntegrityCheckOptions
+    options = IntegrityCheckOptions()
+    options.baseDir = camtrap_dp_folder
+    options.bCheckImageSizes = False
+    options.bCheckImageExistence = True
+    options.bFindUnusedImages = True
+    options.bRequireLocation = True
+    options.iMaxNumImages = -1
+    options.nThreads = 1
+    options.verbose = True
+    sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
+    #%% Preview
+    from md_visualization.visualize_db import DbVizOptions, visualize_db
+    options = DbVizOptions()
+    options.parallelize_rendering = True
+    options.parallelize_rendering_with_threads = True
+    options.parallelize_rendering_n_cores = 10
+    preview_dir = r'c:\temp\camtrapdp-preview'
+    htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
+    from md_utils.path_utils import open_file
+    open_file(htmlOutputFile)
+#%% Command-line driver
+# TODO

data_management/cct_json_utils.py CHANGED Viewed

@@ -1,12 +1,12 @@
-########
-#
-# cct_json_utils.py
-#
-# Utilities for working with COCO Camera Traps .json databases
-#
-# https://github.com/agentmorris/MegaDetector/blob/master/data_management/README.md#coco-cameratraps-format
-#
-########
+"""
+cct_json_utils.py
+Utilities for working with COCO Camera Traps .json databases
+https://github.com/agentmorris/MegaDetector/blob/master/data_management/README.md#coco-cameratraps-format
+"""
 #%% Constants and imports
@@ -15,9 +15,6 @@ import os
 from tqdm import tqdm
 from collections import defaultdict, OrderedDict
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
-JSONObject = Mapping[str, Any]
 #%% Classes
@@ -31,18 +28,32 @@ class CameraTrapJsonUtils:
     def annotations_to_string(annotations, cat_id_to_name):
         """
         Given a list of annotations and a mapping from class IDs to names, produces
-        a concatenated class list, always sorting alphabetically.
+        a comma-delimited string containing a list of class names, sorted alphabetically.
+        Args:
+            annotations (list): a list of annotation dicts
+            cat_id_to_name (dict): a dict mapping category IDs to category names
+        Returns:
+            str: a comma-delimited list of class names
         """
-        class_names = CameraTrapJsonUtils.annotationsToClassnames(annotations, cat_id_to_name)
+        class_names = CameraTrapJsonUtils.annotations_to_class_names(annotations, cat_id_to_name)
         return ','.join(class_names)
     @staticmethod
-    def annotations_to_classnames(annotations, cat_id_to_name):
+    def annotations_to_class_names(annotations, cat_id_to_name):
         """
         Given a list of annotations and a mapping from class IDs to names, produces
-        a list of class names, always sorting alphabetically.
+        a list of class names, sorted alphabetically.
+        Args:
+            annotations (list): a list of annotation dicts
+            cat_id_to_name (dict): a dict mapping category IDs to category names
+        Returns:
+            list: a list of class names present in [annotations]
         """
         # Collect all names
@@ -53,18 +64,19 @@ class CameraTrapJsonUtils:
     @staticmethod
-    def order_db_keys(db: JSONObject) -> OrderedDict:
+    def order_db_keys(db):
         """
         Given a dict representing a JSON database in the COCO Camera Trap
-        format, return an OrderedDict with keys in the order of 'info',
+        format, returns an OrderedDict with keys in the order of 'info',
         'categories', 'annotations' and 'images'. When this OrderedDict is
         serialized with json.dump(), the order of the keys are preserved.
         Args:
-            db: dict representing a JSON database in the COCO Camera Trap format
+            db (dict): a JSON database in the COCO Camera Trap format
         Returns:
-            the same db but as an OrderedDict with keys ordered for readability
+            dict: the same content as [db] but as an OrderedDict with keys ordered for
+                readability
         """
         ordered = OrderedDict([
@@ -76,10 +88,20 @@ class CameraTrapJsonUtils:
     @staticmethod
-    def annotations_groupby_image_field(db_indexed, image_field='seq_id'):
+    def group_annotations_by_image_field(db_indexed, image_field='seq_id'):
         """
         Given an instance of IndexedJsonDb, group annotation entries by a field in the
-        image entry.
+        image entry.  Typically used to find all the annotations associated with a sequence.
+        Args:
+            db_indexed (IndexedJsonDb): an initialized IndexedJsonDb, typically loaded from a
+                COCO Camera Traps .json file
+            image_field (str, optional): a field by which to group annotations (defaults
+                to 'seq_id')
+        Returns:
+            dict: a dict mapping objects (typically strings, in fact typically sequence IDs) to
+                lists of annotations
         """
         image_id_to_image_field = {}
@@ -95,27 +117,24 @@ class CameraTrapJsonUtils:
     @staticmethod
-    def get_entries_from_locations(db: JSONObject, locations: Iterable[str]
-                                   ) -> Dict[str, Any]:
+    def get_entries_for_locations(db, locations):
         """
-        Given a dict representing a JSON database in the COCO Camera Trap format, return a dict
+        Given a dict representing a JSON database in the COCO Camera Trap format, returns a dict
         with the 'images' and 'annotations' fields in the CCT format, each is an array that only
-        includes entries in the original `db` that are in the `locations` set.
+        includes entries in the original [db] that are in the [locations] set.
         Args:
-            db: a dict representing a JSON database in the COCO Camera Trap format
-            locations: a set or list of locations to include; each item is a string
+            db (dict): a dict representing a JSON database in the COCO Camera Trap format
+            locations (set): a set or list of locations to include; each item is a string
         Returns:
-            a dict with the 'images' and 'annotations' fields in the CCT format
+            dict: a dict with the 'images' and 'annotations' fields in the CCT format
         """
         locations = set(locations)
         print('Original DB has {} image and {} annotation entries.'.format(
             len(db['images']), len(db['annotations'])))
-        new_db: Dict[str, Any] = {
-            'images': [],
-            'annotations': []
-        }
+        new_db = { 'images': [], 'annotations': [] }
         new_images = set()
         for i in db['images']:
             # cast location to string as the entries in locations are strings
@@ -139,12 +158,26 @@ class IndexedJsonDb:
     a .json database.
     """
-    def __init__(self, json_filename: Union[str, JSONObject],
-                 b_normalize_paths: bool = False,
-                 filename_replacements: Optional[Mapping[str, str]] = None,
-                 b_convert_classes_to_lower: bool = True):
+    def __init__(self,
+                 json_filename,
+                 b_normalize_paths=False,
+                 filename_replacements=None,
+                 b_convert_classes_to_lower=True,
+                 b_force_forward_slashes=True):
         """
-        json_filename can also be an existing json db
+        Constructor for IndexedJsonDb that loads from a .json file or CCT-formatted dict.
+        Args:
+            json_filename (str): filename to load, or an already-loaded dict
+            b_normalize_paths (bool, optional): whether to invoke os.path.normpath on
+                all filenames.  Not relevant if b_force_forward_slashes is True.
+            filename_replacements (dict, optional): a set of string --> string mappings
+                that will trigger replacements in all filenames, typically used to remove
+                leading folders
+            b_convert_classes_to_lower (bool, optional): whether to convert all class
+                names to lowercase
+            b_force_forward_slashes (bool, optional): whether to convert backslashes to
+                forward slashes in all path names
         """
         if isinstance(json_filename, str):
@@ -162,11 +195,15 @@ class IndexedJsonDb:
             for c in self.db['categories']:
                 c['name'] = c['name'].lower()
+        # Normalize paths to simplify comparisons later
         if b_normalize_paths:
-            # Normalize paths to simplify comparisons later
             for im in self.db['images']:
                 im['file_name'] = os.path.normpath(im['file_name'])
+        if b_force_forward_slashes:
+            for im in self.db['images']:
+                im['file_name'] = im['file_name'].replace('\\','/')
         if filename_replacements is not None:
             for s in filename_replacements:
                 # Make custom replacements in filenames, typically used to
@@ -192,7 +229,7 @@ class IndexedJsonDb:
         # Image ID --> annotations
         # Each image can potentially multiple annotations, hence using lists
-        self.image_id_to_annotations: Dict[str, List[Dict[str, Any]]]
+        self.image_id_to_annotations = {}
         self.image_id_to_annotations = defaultdict(list)
         for ann in self.db['annotations']:
             self.image_id_to_annotations[ann['image_id']].append(ann)
@@ -200,12 +237,17 @@ class IndexedJsonDb:
     # ...__init__
-    def get_annotations_for_image(self, image: JSONObject
-                                  ) -> Optional[List[Dict[str, Any]]]:
+    def get_annotations_for_image(self, image):
         """
-        Returns: list of annotations associated with an image,
-            None if the db has not been loaded,
-            [] if no annotations are available
+        Finds all the annnotations associated with the image dict [image].
+        Args:
+            image (dict): an image dict loaded from a CCT .json file.  Only the 'id' field
+                is used.
+        Returns:
+            list: list of annotations associated with this image.  Returns None if the db
+                has not been loaded, or [] if no annotations are available for this image.
         """
         if self.db is None:
@@ -218,11 +260,17 @@ class IndexedJsonDb:
         return image_annotations
-    def get_classes_for_image(self, image: JSONObject) -> Optional[List[str]]:
+    def get_classes_for_image(self, image):
         """
-        Returns a list of class names associated with [image]
+        Returns a list of class names associated with [image].
-        Returns None is the db has not been loaded, [] if no annotations are available
+        Args:
+            image (dict): an image dict loaded from a CCT .json file.  Only the 'id' field
+                is used.
+        Returns:
+            list: list of class names associated with this image.  Returns None if the db
+                has not been loaded, or [] if no annotations are available for this image.
         """
         if self.db is None:
@@ -242,27 +290,27 @@ class IndexedJsonDb:
 # ...class IndexedJsonDb
-#%% Functions
 class SequenceOptions:
+    """
+    Options parameterizing the grouping of images into sequences by time.
+    """
+    #: Images separated by <= this duration will be grouped into the same sequence.
     episode_interval_seconds = 60.0
+#%% Functions
 def create_sequences(image_info,options=None):
     """
-    Synthesize episodes/sequences/bursts for the images in [image_info].  [image_info]
-    should be a list of dicts in CCT format, i.e. with fields 'file_name','datetime','location'.
-    'filename' should be a string.
-    'datetime' should be a Python datetime object
-    'location' should be a string.
+    Synthesizes episodes/sequences/bursts for the images in [image_info].
     Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num' fields
     for each image.
+    Args:
+        image_info (dict): a list of dicts in CCT format, i.e. with fields 'file_name' (str),
+            'datetime' (datetime), and 'location' (str).
     """
     if options is None:

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl