PyPI - megadetector - Versions diffs - 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.9.dist-info/RECORD +0 -224
megadetector-5.0.9.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

classification/merge_classification_detection_output.py DELETED Viewed

@@ -1,506 +0,0 @@
-"""
-merge_classification_detection_output.py
-Merges classification results with Batch Detection API outputs.
-This script takes 2 main files as input:
-1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
-    "classification results CSV" (output of evaluate_model.py). The CSV is
-    expected to have columns listed below. The 'label' and [label names] columns
-    are optional, but at least one of them must be provided.
-    * 'path': str, path to cropped image
-        * if passing in a detections JSON, must match
-            <img_file>___cropXX_mdvY.Y.jpg
-        * if passing in a queried images JSON, must match
-            <dataset>/<img_file>___cropXX_mdvY.Y.jpg or
-            <dataset>/<img_file>___cropXX.jpg
-    * 'label': str, label assigned to this crop
-    * [label names]: float, confidence in each label
-2) Either a "detections JSON" (output of MegaDetector) or a "queried images
-    JSON" (output of json_validatory.py).
-If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
-then each crop's "classifications" output will have one value per category.
-Categories are sorted decreasing by confidence.
-    "classifications": [
-        ["3", 0.901],
-        ["1", 0.071],
-        ["4", 0.025],
-        ["2", 0.003],
-   ]
-If the CSV only contains the 'label' column (e.g., output of
-create_classification_dataset.py), then each crop's "classifications" output
-will have only one value, with a confidence of 1.0. The label's classification
-category ID is always greater than 1,000,000, to distinguish it from a predicted
-category ID.
-    "classifications": [
-        ["1000004", 1.0]
-    ]
-If the CSV contains both [label names] and 'label' columns, then both the
-predicted categories and label category will be included. By default, the
-label-category is included last; if the --label-first flag is given, then the
-label category is placed first in the results.
-    "classifications": [
-        ["1000004", 1.0],  # label put first if --label-first flag is given
-        ["3", 0.901],  # all other results are sorted by confidence
-        ["1", 0.071],
-        ["4", 0.025],
-        ["2", 0.003]
-    ]
-"""
-#%% Imports
-from __future__ import annotations
-import argparse
-import datetime
-import json
-import os
-from collections.abc import Mapping, Sequence
-from typing import Any
-import pandas as pd
-from tqdm import tqdm
-from md_utils.ct_utils import truncate_float
-#%% Example usage
-"""
-    python merge_classification_detection_output.py \
-        BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
-        BASE_LOGDIR/label_index.json \
-        BASE_LOGDIR/queried_images.json \
-        --classifier-name "efficientnet-b3-idfg-moredata" \
-        --detector-output-cache-dir $HOME/classifier-training/mdcache \
-        --detector-version "4.1" \
-        --output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
-        --datasets idfg idfg_swwlf_2019
-"""
-#%% Support functions
-def row_to_classification_list(row: Mapping[str, Any],
-                               label_names: Sequence[str],
-                               contains_preds: bool,
-                               label_pos: str | None,
-                               threshold: float,
-                               relative_conf: bool = False
-                               ) -> list[tuple[str, float]]:
-    """
-    Given a mapping from label name to output probability, returns a list of
-    tuples, (str(label_id), prob), which can be serialized into the Batch API
-    output format.
-    The list of tuples is returned in sorted order by the predicted probability
-    for each label.
-    If 'label' is in row and label_pos is not None, then we add
-    (label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at
-    the front of the list. Otherwise, we put it at the end.
-    """
-    contains_label = ('label' in row)
-    assert contains_label or contains_preds
-    if relative_conf:
-        assert contains_label and contains_preds
-    result = []
-    if contains_preds:
-        result = [(str(i), row[label]) for i, label in enumerate(label_names)]
-        if relative_conf:
-            label_conf = row[row['label']]
-            result = [(k, max(v - label_conf, 0)) for k, v in result]
-        # filter out confidences below the threshold, and set precision to 4
-        result = [
-            (k, truncate_float(conf, precision=4))
-            for k, conf in result if conf >= threshold
-        ]
-        # sort from highest to lowest probability
-        result = sorted(result, key=lambda x: x[1], reverse=True)
-    if contains_label and label_pos is not None:
-        label = row['label']
-        label_id = label_names.index(label)
-        item = (str(label_id + 1_000_000), 1.)
-        if label_pos == 'first':
-            result = [item] + result
-        else:
-            result.append(item)
-    return result
-def process_queried_images(
-         df: pd.DataFrame,
-         queried_images_json_path: str,
-         detector_output_cache_base_dir: str,
-         detector_version: str,
-         datasets: Sequence[str] | None = None,
-         samples_per_label: int | None = None,
-         seed: int = 123
-         ) -> dict[str, Any]:
-    """
-    Creates a detection JSON object roughly in the Batch API detection
-    format.
-    Detections are either ground-truth (from the queried images JSON) or
-    retrieved from the detector output cache. Only images corresponding to crop
-    paths from the given pd.DataFrame are included in the detection JSON.
-    Args:
-        df: pd.DataFrame, either a "classification dataset CSV" or a
-            "classification results CSV",  column 'path' has format
-            <dataset>/<img_file>___cropXX[...].jpg
-        queried_images_json_path: str, path to queried images JSON
-        detector_output_cache_base_dir: str
-        detector_version: str
-        datasets: optional list of str, only crops from these datasets will be
-            be included in the output, set to None to include all datasets
-        samples_per_label: optional int, if not None, then randomly sample this
-            many bounding boxes per label (each label must have at least this
-            many examples)
-        seed: int, used for random sampling if samples_per_label is not None
-    Returns: dict, detections JSON file, except that the 'images' field is a
-        dict (img_path => dict) instead of a list
-    """
-    # input validation
-    assert os.path.exists(queried_images_json_path)
-    detection_cache_dir = os.path.join(
-        detector_output_cache_base_dir, f'v{detector_version}')
-    assert os.path.isdir(detection_cache_dir)
-    # extract dataset name from crop path so we can process 1 dataset at a time
-    df['dataset'] = df.index.map(lambda x: x[:x.find('/')])
-    unique_datasets = df['dataset'].unique()
-    if datasets is not None:
-        for ds in datasets:
-            assert ds in unique_datasets
-        df = df[df['dataset'].isin(datasets)]  # filter by dataset
-    else:
-        datasets = unique_datasets
-    # randomly sample images for each class
-    if samples_per_label is not None:
-        print(f'Sampling {samples_per_label} bounding boxes per label')
-        df = df.groupby('label').sample(samples_per_label, random_state=seed)
-    # load queried images JSON, needed for ground-truth bbox info
-    with open(queried_images_json_path, 'r') as f:
-        queried_images_js = json.load(f)
-    merged_js: dict[str, Any] = {
-        'images': {},  # start as dict, will convert to list later
-        'info': {}
-    }
-    images = merged_js['images']
-    for ds in datasets:
-        print('processing dataset:', ds)
-        ds_df = df[df['dataset'] == ds]
-        with open(os.path.join(detection_cache_dir, f'{ds}.json'), 'r') as f:
-            detection_js = json.load(f)
-        img_file_to_index = {
-            im['file']: idx
-            for idx, im in enumerate(detection_js['images'])
-        }
-        # compare info dicts
-        class_info = merged_js['info']
-        detection_info = detection_js['info']
-        key = 'detector'
-        if key not in class_info:
-            class_info[key] = detection_info[key]
-        assert class_info[key] == detection_info[key]
-        # compare detection categories
-        key = 'detection_categories'
-        if key not in merged_js:
-            merged_js[key] = detection_js[key]
-        assert merged_js[key] == detection_js[key]
-        cat_to_catid = {v: k for k, v in detection_js[key].items()}
-        for crop_path in tqdm(ds_df.index):
-            # crop_path: <dataset>/<img_file>___cropXX_mdvY.Y.jpg
-            #            [----<img_path>----]       [-<suffix>--]
-            img_path, suffix = crop_path.split('___crop')
-            img_file = img_path[img_path.find('/') + 1:]
-            # file has detection entry
-            if '_mdv' in suffix and img_path not in images:
-                img_idx = img_file_to_index[img_file]
-                images[img_path] = detection_js['images'][img_idx]
-                images[img_path]['file'] = img_path
-            # bounding box is from ground truth
-            elif img_path not in images:
-                images[img_path] = {
-                    'file': img_path,
-                    'detections': [
-                        {
-                            'category': cat_to_catid[bbox_dict['category']],
-                            'conf': 1.0,
-                            'bbox': bbox_dict['bbox']
-                        }
-                        for bbox_dict in queried_images_js[img_path]['bbox']
-                    ]
-                }
-    return merged_js
-def combine_classification_with_detection(
-        detection_js: dict[str, Any],
-        df: pd.DataFrame,
-        idx_to_label: Mapping[str, str],
-        label_names: Sequence[str],
-        classifier_name: str,
-        classifier_timestamp: str,
-        threshold: float,
-        label_pos: str | None = None,
-        relative_conf: bool = False,
-        typical_confidence_threshold: float = None
-        ) -> dict[str, Any]:
-    """
-    Adds classification information to a detection JSON. Classification
-    information may include the true label and/or the predicted confidences
-    of each label.
-    Args:
-        detection_js: dict, detections JSON file, except that the 'images'
-            field is a dict (img_path => dict) instead of a list
-        df: pd.DataFrame, classification results, indexed by crop path
-        idx_to_label: dict, str(label_id) => label name, may also include
-            str(label_id + 1e6) => 'label: {label_name}'
-        label_names: list of str, label names
-        classifier_name: str, name of classifier to include in output JSON
-        classifier_timestamp: str, timestamp to include in output JSON
-        threshold: float, for each crop, omit classification results for
-            categories whose confidence is below this threshold
-        label_pos: one of [None, 'first', 'last']
-            None: do not include labels in the output JSON
-            'first' / 'last': position in classification list to put the label
-        relative_conf: bool, if True then for each class, outputs its relative
-            confidence over the confidence of the true label, requires 'label'
-            to be in CSV
-        typical_confidence_threshold: float, useful default confidence
-            threshold; not used directly, just passed along to the output file
-    Returns: dict, detections JSON file updated with classification results
-    """
-    classification_metadata = {
-        'classifier': classifier_name,
-        'classification_completion_time': classifier_timestamp
-    }
-    if typical_confidence_threshold is not None:
-        classification_metadata['classifier_metadata'] = \
-        {'typical_classification_threshold':typical_confidence_threshold}
-    detection_js['info'].update(classification_metadata)
-    detection_js['classification_categories'] = idx_to_label
-    contains_preds = (set(label_names) <= set(df.columns))
-    if not contains_preds:
-        print('CSV does not contain predictions. Outputting labels only.')
-    images = detection_js['images']
-    for crop_path in tqdm(df.index):
-        # crop_path: <dataset>/<img_file>___cropXX_mdvY.Y.jpg
-        #            [----<img_path>----]       [-<suffix>--]
-        img_path, suffix = crop_path.split('___crop')
-        crop_index = int(suffix[:2])
-        detection_dict = images[img_path]['detections'][crop_index]
-        detection_dict['classifications'] = row_to_classification_list(
-            row=df.loc[crop_path], label_names=label_names,
-            contains_preds=contains_preds, label_pos=label_pos,
-            threshold=threshold, relative_conf=relative_conf)
-    detection_js['images'] = list(images.values())
-    return detection_js
-#%% Main function
-def main(classification_csv_path: str,
-         label_names_json_path: str,
-         output_json_path: str,
-         classifier_name: str,
-         threshold: float,
-         datasets: Sequence[str] | None,
-         detection_json_path: str | None,
-         queried_images_json_path: str | None,
-         detector_output_cache_base_dir: str | None,
-         detector_version: str | None,
-         samples_per_label: int | None,
-         seed: int,
-         label_pos: str | None,
-         relative_conf: bool,
-         typical_confidence_threshold: float) -> None:
-    # input validation
-    assert os.path.exists(classification_csv_path)
-    assert os.path.exists(label_names_json_path)
-    assert 0 <= threshold <= 1
-    for x in [detection_json_path, queried_images_json_path]:
-        if x is not None:
-            assert os.path.exists(x)
-    assert label_pos in [None, 'first', 'last']
-    # load classification CSV
-    print('Loading classification CSV...')
-    df = pd.read_csv(classification_csv_path, float_precision='high',
-                     index_col='path')
-    if relative_conf or label_pos is not None:
-        assert 'label' in df.columns
-    # load label names
-    with open(label_names_json_path, 'r') as f:
-        idx_to_label = json.load(f)
-    label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]
-    if 'label' in df.columns:
-        for i, label in enumerate(label_names):
-            idx_to_label[str(i + 1_000_000)] = f'label: {label}'
-    if queried_images_json_path is not None:
-        assert detector_output_cache_base_dir is not None
-        assert detector_version is not None
-        detection_js = process_queried_images(
-            df=df, queried_images_json_path=queried_images_json_path,
-            detector_output_cache_base_dir=detector_output_cache_base_dir,
-            detector_version=detector_version, datasets=datasets,
-            samples_per_label=samples_per_label, seed=seed)
-    elif detection_json_path is not None:
-        with open(detection_json_path, 'r') as f:
-            detection_js = json.load(f)
-        images = {}
-        for img in detection_js['images']:
-            path = img['file']
-            if datasets is None or path[:path.find('/')] in datasets:
-                images[path] = img
-        detection_js['images'] = images
-    classification_time = datetime.date.fromtimestamp(
-        os.path.getmtime(classification_csv_path))
-    classifier_timestamp = classification_time.strftime('%Y-%m-%d %H:%M:%S')
-    classification_js = combine_classification_with_detection(
-        detection_js=detection_js, df=df, idx_to_label=idx_to_label,
-        label_names=label_names, classifier_name=classifier_name,
-        classifier_timestamp=classifier_timestamp, threshold=threshold,
-        label_pos=label_pos, relative_conf=relative_conf,
-        typical_confidence_threshold=typical_confidence_threshold)
-    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
-    with open(output_json_path, 'w') as f:
-        json.dump(classification_js, f, indent=1)
-    print('Wrote merged classification/detection results to {}'.format(output_json_path))
-#%% Command-line driver
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description='Merges classification results with Batch Detection API '
-                    'outputs.')
-    parser.add_argument(
-        'classification_csv',
-        help='path to classification CSV')
-    parser.add_argument(
-        'label_names_json',
-        help='path to JSON file mapping label index to label name')
-    parser.add_argument(
-        '-o', '--output-json', required=True,
-        help='(required) path to save output JSON with both detection and '
-             'classification results')
-    parser.add_argument(
-        '-n', '--classifier-name', required=True,
-        help='(required) name of classifier')
-    parser.add_argument(
-        '-t', '--threshold', type=float, default=0.1,
-        help='Confidence threshold between 0 and 1. In the output file, omit '
-             'classifier results on classes whose confidence is below this '
-             'threshold.')
-    parser.add_argument(
-        '-d', '--datasets', nargs='*',
-        help='optionally limit output to images from certain datasets. Assumes '
-             'that image paths are given as <dataset>/<img_file>.')
-    parser.add_argument(
-        '--typical-confidence-threshold', type=float, default=None,
-        help='useful default confidence threshold; not used directly, just '
-             'passed along to the output file')
-    detection_json_group = parser.add_argument_group(
-        'arguments for passing in a detections JSON file')
-    detection_json_group.add_argument(
-        '-j', '--detection-json',
-        help='path to detections JSON file')
-    queried_images_group = parser.add_argument_group(
-        'arguments for passing in a queried images JSON file')
-    queried_images_group.add_argument(
-        '-q', '--queried-images-json',
-        help='path to queried images JSON file')
-    queried_images_group.add_argument(
-        '-c', '--detector-output-cache-dir',
-        help='(required) path to directory where detector outputs are cached')
-    queried_images_group.add_argument(
-        '-v', '--detector-version',
-        help='(required) detector version string, e.g., "4.1"')
-    queried_images_group.add_argument(
-        '-s', '--samples-per-label', type=int,
-        help='randomly sample this many bounding boxes per label (each label '
-             'must have at least this many examples)')
-    queried_images_group.add_argument(
-        '--seed', type=int, default=123,
-        help='random seed, only used if --samples-per-label is given')
-    queried_images_group.add_argument(
-        '--label', choices=['first', 'last'], default=None,
-        help='Whether to put the label first or last in the list of '
-             'classifications. If this argument is omitted, then no labels are '
-             'included in the output.')
-    queried_images_group.add_argument(
-        '--relative-conf', action='store_true',
-        help='for each class, outputs its relative confidence over the '
-             'confidence of the true label, requires "label" to be in CSV')
-    return parser.parse_args()
-if __name__ == '__main__':
-    args = _parse_args()
-    main(classification_csv_path=args.classification_csv,
-         label_names_json_path=args.label_names_json,
-         output_json_path=args.output_json,
-         classifier_name=args.classifier_name,
-         threshold=args.threshold,
-         datasets=args.datasets,
-         detection_json_path=args.detection_json,
-         queried_images_json_path=args.queried_images_json,
-         detector_output_cache_base_dir=args.detector_output_cache_dir,
-         detector_version=args.detector_version,
-         samples_per_label=args.samples_per_label,
-         seed=args.seed,
-         label_pos=args.label,
-         relative_conf=args.relative_conf,
-         typical_confidence_threshold=args.typical_confidence_threshold)

classification/prepare_classification_script.py DELETED Viewed

@@ -1,194 +0,0 @@
-"""
-prepare_classification_script.py
-Notebook-y script used to prepare a series of shell commands to run a classifier
-(other than MegaClassifier) on a MegaDetector result set.
-Differs from prepare_classification_script_mc.py only in the final class mapping step.
-"""
-#%% Job options
-import os
-def main():
-    organization_name = 'idfg'
-    job_name = 'idfg-2022-01-27-EOE2021S_Group6'
-    input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
-    image_base = '/datadrive/idfg/EOE2021S_Group6'
-    crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
-    device_id = 1
-    working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
-                                                    organization_name,
-                                                    job_name)
-    output_base = os.path.join(working_dir_base,'combined_api_outputs')
-    assert os.path.isdir(working_dir_base)
-    assert os.path.isdir(output_base)
-    output_file = os.path.join(working_dir_base,'run_idfgclassifier_' + job_name +  '.sh')
-    input_files = [
-        os.path.join(
-            os.path.expanduser('~/postprocessing'),
-                            organization_name,
-                            job_name,
-                            'combined_api_outputs',
-                            input_filename
-            )
-        ]
-    for fn in input_files:
-        assert os.path.isfile(fn)
-    #%% Constants
-    include_cropping = False
-    classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
-    assert os.path.isdir(classifier_base)
-    checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
-    assert os.path.isfile(checkpoint_path)
-    classifier_categories_path = os.path.join(classifier_base,'label_index.json')
-    assert os.path.isfile(classifier_categories_path)
-    classifier_output_suffix = '_idfg_classifier_output.csv.gz'
-    final_output_suffix = '_idfgclassifier.json'
-    threshold_str = '0.65'
-    n_threads_str = '50'
-    image_size_str = '300'
-    batch_size_str = '64'
-    num_workers_str = '8'
-    logdir = working_dir_base
-    classification_threshold_str = '0.05'
-    # This is just passed along to the metadata in the output file, it has no impact
-    # on how the classification scripts run.
-    typical_classification_threshold_str = '0.75'
-    classifier_name = 'idfg4'
-    #%% Set up environment
-    commands = []
-    # commands.append('cd MegaDetector/classification\n')
-    # commands.append('conda activate cameratraps-classifier\n')
-    #%% Crop images
-    if include_cropping:
-        commands.append('\n### Cropping ###\n')
-        # fn = input_files[0]
-        for fn in input_files:
-            input_file_path = fn
-            crop_cmd = ''
-            crop_comment = '\n# Cropping {}\n'.format(fn)
-            crop_cmd += crop_comment
-            crop_cmd += "python crop_detections.py \\\n" + \
-                input_file_path + ' \\\n' + \
-                crop_path + ' \\\n' + \
-                '--images-dir "' + image_base + '"' + ' \\\n' + \
-                '--threshold "' + threshold_str + '"' + ' \\\n' + \
-                '--square-crops ' + ' \\\n' + \
-                '--threads "' + n_threads_str + '"' + ' \\\n' + \
-                '--logdir "' + logdir + '"' + ' \\\n' + \
-                '\n'
-            crop_cmd = '{}'.format(crop_cmd)
-            commands.append(crop_cmd)
-    #%% Run classifier
-    commands.append('\n### Classifying ###\n')
-    # fn = input_files[0]
-    for fn in input_files:
-        input_file_path = fn
-        classifier_output_path = crop_path + classifier_output_suffix
-        classify_cmd = ''
-        classify_comment = '\n# Classifying {}\n'.format(fn)
-        classify_cmd += classify_comment
-        classify_cmd += "python run_classifier.py \\\n" + \
-            checkpoint_path + ' \\\n' + \
-            crop_path + ' \\\n' + \
-            classifier_output_path + ' \\\n' + \
-            '--detections-json "' + input_file_path + '"' + ' \\\n' + \
-            '--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
-            '--image-size "' + image_size_str + '"' + ' \\\n' + \
-            '--batch-size "' + batch_size_str + '"' + ' \\\n' + \
-            '--num-workers "' + num_workers_str + '"' + ' \\\n'
-        if device_id is not None:
-            classify_cmd += '--device {}'.format(device_id)
-        classify_cmd += '\n\n'
-        classify_cmd = '{}'.format(classify_cmd)
-        commands.append(classify_cmd)
-    #%% Merge classification and detection outputs
-    commands.append('\n### Merging ###\n')
-    # fn = input_files[0]
-    for fn in input_files:
-        input_file_path = fn
-        classifier_output_path = crop_path + classifier_output_suffix
-        final_output_path = os.path.join(output_base,
-                                        os.path.basename(classifier_output_path)).\
-                                        replace(classifier_output_suffix,
-                                        final_output_suffix)
-        final_output_path = final_output_path.replace('_detections','')
-        final_output_path = final_output_path.replace('_crops','')
-        merge_cmd = ''
-        merge_comment = '\n# Merging {}\n'.format(fn)
-        merge_cmd += merge_comment
-        merge_cmd += "python merge_classification_detection_output.py \\\n" + \
-            classifier_output_path + ' \\\n' + \
-            classifier_categories_path + ' \\\n' + \
-            '--output-json "' + final_output_path + '"' + ' \\\n' + \
-            '--detection-json "' + input_file_path + '"' + ' \\\n' + \
-            '--classifier-name "' + classifier_name + '"' + ' \\\n' + \
-            '--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
-            '--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
-            '\n'
-        merge_cmd = '{}'.format(merge_cmd)
-        commands.append(merge_cmd)
-    #%% Write everything out
-    with open(output_file,'w') as f:
-        for s in commands:
-            f.write('{}'.format(s))
-    import stat
-    st = os.stat(output_file)
-    os.chmod(output_file, st.st_mode | stat.S_IEXEC)
-if __name__ == '__main__':
-    main()

megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl