PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py CHANGED Viewed

@@ -1,10 +1,15 @@
-########
-#
-# repeat_detections_core.py
-#
-# Core utilities shared by find_repeat_detections and remove_repeat_detections.
-#
-########
+"""
+repeat_detections_core.py
+Core utilities shared by find_repeat_detections and remove_repeat_detections.
+Nothing in this file (in fact nothing in this subpackage) will make sense until you read
+the RDE user's guide:
+https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
+"""
 #%% Imports and environment
@@ -62,161 +67,214 @@ class RepeatDetectionOptions:
     Options that control the behavior of repeat detection elimination
     """
-    # Relevant for rendering the folder of images for filtering
-    #
-    # imageBase can also be a SAS URL, in which case some error-checking is
-    # disabled.
+    #: Folder where images live; filenames in the MD results .json file should
+    #: be relative to this folder.
+    #:
+    #: imageBase can also be a SAS URL, in which case some error-checking is
+    #: disabled.
     imageBase = ''
+    #: Folder where we should write temporary output.
     outputBase = ''
-    # Don't consider detections with confidence lower than this as suspicious
+    #: Don't consider detections with confidence lower than this as suspicious
     confidenceMin = 0.1
-    # Don't consider detections with confidence higher than this as suspicious
+    #: Don't consider detections with confidence higher than this as suspicious
     confidenceMax = 1.0
-    # What's the IOU threshold for considering two boxes the same?
+    #: What's the IOU threshold for considering two boxes the same?
     iouThreshold = 0.9
-    # How many occurrences of a single location (as defined by the IOU threshold)
-    # are required before we declare it suspicious?
+    #: How many occurrences of a single location (as defined by the IOU threshold)
+    #: are required before we declare it suspicious?
     occurrenceThreshold = 20
-    # Ignore "suspicious" detections smaller than some size
+    #: Ignore "suspicious" detections smaller than some size
     minSuspiciousDetectionSize = 0.0
-    # Ignore "suspicious" detections larger than some size; these are often animals
-    # taking up the whole image.  This is expressed as a fraction of the image size.
+    #: Ignore "suspicious" detections larger than some size; these are often animals
+    #: taking up the whole image.  This is expressed as a fraction of the image size.
     maxSuspiciousDetectionSize = 0.2
-    # Ignore folders with more than this many images in them
+    #: Ignore folders with more than this many images in them
     maxImagesPerFolder = None
-    # A list of classes we don't want to treat as suspicious. Each element is an int.
-    excludeClasses = []  # [annotation_constants.detector_bbox_category_name_to_id['person']]
-    # For very large sets of results, passing chunks of results to and from workers as
-    # parameters ('memory') can be memory-intensive, so we can serialize to intermediate
-    # files instead ('file').
-    #
-    # The use of 'file' here is still experimental.
+    #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
+    #:
+    #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
+    #: detections", which you could do by saying excludeClasses = [2,3].
+    excludeClasses = []
+    #: For very large sets of results, passing chunks of results to and from workers as
+    #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
+    #: files instead ('file').
+    #:
+    #: The use of 'file' here is still experimental.
     pass_detections_to_processes_method = 'memory'
+    #: Number of workers to use for parallel operations
     nWorkers = 10
-    # Should we use threads or processes for parallelization?
+    #: Should we use threads (True) or processes (False) for parallelization?
+    #:
+    #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
+    #: bParallelizeRendering are both False.
     parallelizationUsesThreads = True
-    # Load detections from a filter file rather than finding them from the detector output
-    # .json file containing detections, generally this is the detectionIndex.json file in
-    # the filtering_* folder produced in the first pass
+    #: If this is not empty, we'll load detections from a filter file rather than finding them
+    #: from the detector output.  This should be a .json file containing detections, generally this
+    #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
     filterFileToLoad = ''
-    # (optional) List of filenames remaining after deletion of identified
-    # repeated detections that are actually animals.  This should be a flat
-    # text file, one relative filename per line.  See enumerate_images().
-    #
-    # This is a pretty esoteric code path and a candidate for removal.
-    #
-    # The scenario where I see it being most useful is the very hypothetical one
-    # where we use an external tool for image handling that allows us to do something
-    # smarter and less destructive than deleting images to mark them as non-false-positives.
+    #: (optional) List of filenames remaining after deletion of identified
+    #: repeated detections that are actually animals.  This should be a flat
+    #: text file, one relative filename per line.
+    #:
+    #: This is a pretty esoteric code path and a candidate for removal.
+    #:
+    #: The scenario where I see it being most useful is the very hypothetical one
+    #: where we use an external tool for image handling that allows us to do something
+    #: smarter and less destructive than deleting images to mark them as non-false-positives.
     filteredFileListToLoad = None
-    # Turn on/off optional outputs
+    #: Should we write the folder of images used to manually review repeat detections?
     bWriteFilteringFolder = True
+    #: For debugging: limit comparisons to a specific number of folders
     debugMaxDir = -1
+    #: For debugging: limit rendering to a specific number of folders
     debugMaxRenderDir = -1
+    #: For debugging: limit comparisons to a specific number of detections
     debugMaxRenderDetection = -1
+    #: For debugging: limit comparisons to a specific number of instances
     debugMaxRenderInstance = -1
+    #: Should we parallelize (across cameras) comparisons to find repeat detections?
     bParallelizeComparisons = True
+    #: Should we parallelize image rendering?
     bParallelizeRendering = True
-    # If this is False (default), a detection from class A is not considered to be "the same"
-    # as a detection from class B, even if they're at the same location.
+    #: If this is False (default), a detection from class A is *not* considered to be "the same"
+    #: as a detection from class B, even if they're at the same location.
     categoryAgnosticComparisons = False
-    # Determines whether bounding-box rendering errors (typically network errors) should
-    # be treated as failures
+    #: Determines whether bounding-box rendering errors (typically network errors) should
+    #: be treated as failures
     bFailOnRenderError = False
+    #: Should we print a warning if images referred to in the MD results file are missing?
     bPrintMissingImageWarnings = True
+    #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
+    #: just once ('once') or every time ('all')?
     missingImageWarningType = 'once'  # 'all'
-    # This does *not* include the tile image grid
+    #: Image width for rendered images (it's called "max" because we don't resize smaller images).
+    #:
+    #: Original size is preserved if this is None.
+    #:
+    #: This does *not* include the tile image grid.
     maxOutputImageWidth = None
-    # Box rendering options
+    #: Line thickness (in pixels) for box rendering
     lineThickness = 10
+    #: Box expansion (in pixels)
     boxExpansion = 2
-    # State variables
+    #: Progress bar used during comparisons and rendering.  Do not set externally.
+    #:
+    #: :meta private:
     pbar = None
-    # Replace filename tokens after reading, useful when the directory structure
-    # has changed relative to the structure the detector saw
+    #: Replace filename tokens after reading, useful when the directory structure
+    #: has changed relative to the structure the detector saw.
     filenameReplacements = {}
-    # How many folders up from the leaf nodes should we be going to aggregate images?
+    #: How many folders up from the leaf nodes should we be going to aggregate images into
+    #: cameras?
+    #:
+    #: If this is zero, each leaf folder is treated as a camera.
     nDirLevelsFromLeaf = 0
-    # An optional function that takes a string (an image file name) and returns
-    # a string (the corresponding  folder ID), typically used when multiple folders
-    # actually correspond to the same camera in a manufacturer-specific way (e.g.
-    # a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
+    #: An optional function that takes a string (an image file name) and returns
+    #: a string (the corresponding  folder ID), typically used when multiple folders
+    #: actually correspond to the same camera in a manufacturer-specific way (e.g.
+    #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
+    #:
+    #: See ct_utils for a common replacement function that handles most common
+    #: manufacturer folder names.
     customDirNameFunction = None
-    # Include/exclude specific folders... only one of these may be
-    # specified; "including" folders includes *only* those folders.
+    #: Include only specific folders, mutually exclusive with [excludeFolders]
     includeFolders = None
+    #: Exclude specific folders, mutually exclusive with [includeFolders]
     excludeFolders = None
-    # Optionally show *other* detections (i.e., detections other than the
-    # one the user is evaluating) in a light gray
+    #: Optionally show *other* detections (i.e., detections other than the
+    #: one the user is evaluating), typically in a light gray.
     bRenderOtherDetections = False
+    #: Threshold to use for *other* detections
     otherDetectionsThreshold = 0.2
+    #: Line width (in pixels) for *other* detections
     otherDetectionsLineWidth = 1
-    # Optionally show a grid that includes a sample image for the detection, plus
-    # the top N additional detections
-    bRenderDetectionTiles = False
+    #: Optionally show a grid that includes a sample image for the detection, plus
+    #: the top N additional detections
+    bRenderDetectionTiles = True
-    # If this is None, we'll render at the width of the original image
+    #: Width of the original image (within the larger output image) when bRenderDetectionTiles
+    #: is True.
+    #:
+    #: If this is None, we'll render the original image in the detection tile image
+    #: at its original width.
     detectionTilesPrimaryImageWidth = None
-    # Can be a width in pixels, or a number from 0 to 1 representing a fraction
-    # of the primary image width.
-    #
-    # If you want to render the grid at exactly 1 pixel wide, I guess you're out
-    # of luck.
+    #: Width to use for the grid of detection instances.
+    #:
+    #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
+    #: of the primary image width.
+    #:
+    #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
+    #: of luck.
     detectionTilesCroppedGridWidth = 0.6
-    detectionTilesPrimaryImageLocation='right'
-    detectionTilesMaxCrops = None
-    # If bRenderOtherDetections is True, what color should we use to render the
-    # (hopefully pretty subtle) non-target detections?
-    #
-    # In theory I'd like these "other detection" rectangles to be partially
-    # transparent, but this is not straightforward, and the alpha is ignored
-    # here.  But maybe if I leave it here and wish hard enough, someday it
-    # will work.
-    #
-    # otherDetectionsColors = ['dimgray']
+    #: Location of the primary image within the mosaic ('right' or 'left)
+    detectionTilesPrimaryImageLocation = 'right'
+    #: Maximum number of individual detection instances to include in the mosaic
+    detectionTilesMaxCrops = 250
+    #: If bRenderOtherDetections is True, what color should we use to render the
+    #: (hopefully pretty subtle) non-target detections?
+    #:
+    #: In theory I'd like these "other detection" rectangles to be partially
+    #: transparent, but this is not straightforward, and the alpha is ignored
+    #: here.  But maybe if I leave it here and wish hard enough, someday it
+    #: will work.
+    #:
+    #: otherDetectionsColors = ['dimgray']
     otherDetectionsColors = [(105,105,105,100)]
-    # Sort detections within a directory so nearby detections are adjacent
-    # in the list, for faster review.
-    #
-    # Can be None, 'xsort', or 'clustersort'
-    #
-    # * None sorts detections chronologically by first occurrence
-    # * 'xsort' sorts detections from left to right
-    # * 'clustersort' clusters detections and sorts by cluster
+    #: Sort detections within a directory so nearby detections are adjacent
+    #: in the list, for faster review.
+    #:
+    #: Can be None, 'xsort', or 'clustersort'
+    #:
+    #: * None sorts detections chronologically by first occurrence
+    #: * 'xsort' sorts detections from left to right
+    #: * 'clustersort' clusters detections and sorts by cluster
     smartSort = 'xsort'
-    # Only relevant if smartSort == 'clustersort'
+    #: Only relevant if smartSort == 'clustersort'
     smartSortDistanceThreshold = 0.1
@@ -225,26 +283,28 @@ class RepeatDetectionResults:
     The results of an entire repeat detection analysis
     """
-    # The data table (Pandas DataFrame), as loaded from the input json file via
-    # load_api_results().  Has columns ['file', 'detections','failure'].
+    #: The data table (Pandas DataFrame), as loaded from the input json file via
+    #: load_api_results().  Has columns ['file', 'detections','failure'].
     detectionResults = None
-    # The other fields in the input json file, loaded via load_api_results()
+    #: The other fields in the input json file, loaded via load_api_results()
     otherFields = None
-    # The data table after modification
+    #: The data table after modification
     detectionResultsFiltered = None
-    # dict mapping folder names to whole rows from the data table
+    #: dict mapping folder names to whole rows from the data table
     rowsByDirectory = None
-    # dict mapping filenames to rows in the master table
+    #: dict mapping filenames to rows in the master table
     filenameToRow = None
-    # An array of length nDirs, where each element is a list of DetectionLocation
-    # objects for that directory that have been flagged as suspicious
+    #: An array of length nDirs, where each element is a list of DetectionLocation
+    #: objects for that directory that have been flagged as suspicious
     suspiciousDetections = None
+    #: The location of the .json file written with information about the RDE
+    #: review images (typically detectionIndex.json)
     filterFile = None
@@ -254,21 +314,25 @@ class IndexedDetection:
     """
     def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
-        """
-        Args:
-            iDetection: order in API output file
-            filename: path to the image of this detection
-            bbox: [x_min, y_min, width_of_box, height_of_box]
-        """
         assert isinstance(iDetection,int)
         assert isinstance(filename,str)
         assert isinstance(bbox,list)
         assert isinstance(category,str)
+        #: index of this detection within all detections for this filename
         self.iDetection = iDetection
+        #: path to the image corresponding to this detection
         self.filename = filename
+        #: [x_min, y_min, width_of_box, height_of_box]
         self.bbox = bbox
+        #: confidence value of this detection
         self.confidence = confidence
+        #: category ID (not name) of this detection
         self.category = category
     def __repr__(self):
@@ -280,7 +344,7 @@ class DetectionLocation:
     """
     A unique-ish detection location, meaningful in the context of one
     directory.  All detections within an IoU threshold of self.bbox
-    will be stored in "instances".
+    will be stored in IndexedDetection objects.
     """
     def __init__(self, instance, detection, relativeDir, category, id=None):
@@ -290,15 +354,28 @@ class DetectionLocation:
         assert isinstance(relativeDir,str)
         assert isinstance(category,str)
-        self.instances = [instance]  # list of IndexedDetections
+        #: list of IndexedDetections that match this detection
+        self.instances = [instance]
+        #: category ID (not name) for this detection
         self.category = category
+        #: bbox as x,y,w,h
         self.bbox = detection['bbox']
+        #: relative folder (i.e., camera name) in which this detectin was found
         self.relativeDir = relativeDir
+        #: relative path to the canonical image representing this detection
         self.sampleImageRelativeFileName = ''
+        #: list of detections on that canonical image that match this detection
         self.sampleImageDetections = None
-        # This ID is only guaranteed to be unique within a directory
+        #: ID for this detection; this ID is only guaranteed to be unique within a directory
         self.id = id
+        #: only used when doing cluster-based sorting
         self.clusterLabel = None
     def __repr__(self):
@@ -307,8 +384,11 @@ class DetectionLocation:
     def to_api_detection(self):
         """
-        Converts to a 'detection' dictionary, making the semi-arbitrary assumption that
-        the first instance is representative of confidence.
+        Converts this detection to a 'detection' dictionary, making the semi-arbitrary
+        assumption that the first instance is representative of confidence.
+        Returns:
+            dict: dictionary in the format used to store detections in MD results
         """
         # This is a bit of a hack right now, but for future-proofing, I don't want to call this
@@ -328,30 +408,13 @@ class DetectionLocation:
 #%% Support functions
-def enumerate_images(dirName,outputFileName=None):
+def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
+                        expansion=0):
     """
-    Non-recursively enumerates all image files in *dirName* to the text file
-    *outputFileName*, as relative paths.  This is used to produce a file list
-    after removing true positives from the image directory.
-    Not used directly in this module, but provides a consistent way to enumerate
-    files in the format expected by this module.
+    Rendering the detection [detection] on the image [inputFileName], writing the result
+    to [outputFileName].
     """
-    imageList = path_utils.find_images(dirName)
-    imageList = [os.path.basename(fn) for fn in imageList]
-    if outputFileName is not None:
-        with open(outputFileName,'w') as f:
-            for s in imageList:
-                f.write(s + '\n')
-    return imageList
-def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
-                        expansion=0):
     im = open_image(inputFileName)
     d = detection.to_api_detection()
     render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
@@ -359,8 +422,12 @@ def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
     im.save(outputFileName)
-def detection_rect_to_rtree_rect(detection_rect):
-    # We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
+def _detection_rect_to_rtree_rect(detection_rect):
+    """
+    We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t.  Convert from
+    our representation to rtree's.
+    """
     l = detection_rect[0]
     b = detection_rect[1]
     r = detection_rect[0] + detection_rect[2]
@@ -368,8 +435,12 @@ def detection_rect_to_rtree_rect(detection_rect):
     return (l,b,r,t)
-def rtree_rect_to_detection_rect(rtree_rect):
-    # We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
+def _rtree_rect_to_detection_rect(rtree_rect):
+    """
+    We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t.  Convert from
+    rtree's representation to ours.
+    """
     x = rtree_rect[0]
     y = rtree_rect[1]
     w = rtree_rect[2] - rtree_rect[0]
@@ -377,7 +448,7 @@ def rtree_rect_to_detection_rect(rtree_rect):
     return (x,y,w,h)
-def sort_detections_for_directory(candidateDetections,options):
+def _sort_detections_for_directory(candidateDetections,options):
     """
     candidateDetections is a list of DetectionLocation objects.  Sorts them to
     put nearby detections next to each other, for easier visual review.  Returns
@@ -474,14 +545,15 @@ def sort_detections_for_directory(candidateDetections,options):
         raise ValueError('Unrecognized sort method {}'.format(
             options.smartSort))
-# ...def sort_detections_for_directory(...)
+# ...def _sort_detections_for_directory(...)
-def find_matches_in_directory(dirNameAndRows, options):
+def _find_matches_in_directory(dirNameAndRows, options):
     """
     dirNameAndRows is a tuple of (name,rows).
-    "name" is a location name, typically a folder name.
+    "name" is a location name, typically a folder name, though this may be an arbitrary
+    location identifier.
     "rows" is a Pandas dataframe with one row per image in this location, with columns:
@@ -548,7 +620,7 @@ def find_matches_in_directory(dirNameAndRows, options):
         i_iteration += 1
         filename = row['file']
-        if not ct_utils.is_image_file(filename):
+        if not path_utils.is_image_file(filename):
             continue
         if 'max_detection_conf' not in row or 'detections' not in row or \
@@ -643,7 +715,7 @@ def find_matches_in_directory(dirNameAndRows, options):
             bFoundSimilarDetection = False
-            rtree_rect = detection_rect_to_rtree_rect(bbox)
+            rtree_rect = _detection_rect_to_rtree_rect(bbox)
             # This will return candidates of all classes
             overlappingCandidateDetections =\
@@ -723,10 +795,10 @@ def find_matches_in_directory(dirNameAndRows, options):
     else:
         return candidateDetections
-# ...def find_matches_in_directory(...)
+# ...def _find_matches_in_directory(...)
-def update_detection_table(repeatDetectionResults, options, outputFilename=None):
+def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
     """
     Changes confidence values in repeatDetectionResults.detectionResults so that detections
     deemed to be possible false positives are given negative confidence values.
@@ -870,10 +942,10 @@ def update_detection_table(repeatDetectionResults, options, outputFilename=None)
     return detectionResults
-# ...def update_detection_table(...)
+# ...def _update_detection_table(...)
-def render_sample_image_for_detection(detection,filteringDir,options):
+def _render_sample_image_for_detection(detection,filteringDir,options):
     """
     Render a sample image for one unique detection, possibly containing lightly-colored
     high-confidence detections from elsewhere in the sample image.
@@ -954,7 +1026,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
         else:
-            render_bounding_box(detection, inputFullPath, outputFullPath,
+            _render_bounding_box(detection, inputFullPath, outputFullPath,
                 lineWidth=options.lineThickness, expansion=options.boxExpansion)
         # ...if we are/aren't rendering other bounding boxes
@@ -1003,11 +1075,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
                 cropped_grid_width=croppedGridWidth,
                 output_image_filename=outputFullPath,
                 primary_image_location=options.detectionTilesPrimaryImageLocation)
-            # bDetectionTilesPrimaryImageWidth = None
-            # bDetectionTilesCroppedGridWidth = 0.6
-            # bDetectionTilesPrimaryImageLocation='right'
         # ...if we are/aren't rendering detection tiles
     except Exception as e:
@@ -1018,12 +1086,28 @@ def render_sample_image_for_detection(detection,filteringDir,options):
         if options.bFailOnRenderError:
             raise
-# ...def render_sample_image_for_detection(...)
+# ...def _render_sample_image_for_detection(...)
 #%% Main entry point
 def find_repeat_detections(inputFilename, outputFilename=None, options=None):
+    """
+    Find detections in a MD results file that occur repeatedly and are likely to be
+    rocks/sticks.
+    Args:
+        inputFilename (str): the MD results .json file to analyze
+        outputFilename (str, optional): the filename to which we should write results
+            with repeat detections removed, typically set to None during the first
+            part of the RDE process.
+        options (RepeatDetectionOptions): all the interesting options controlling this
+            process; see RepeatDetectionOptions for details.
+    Returns:
+        RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
+        for details.
+    """
     ##%% Input handling
@@ -1203,7 +1287,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
                 assert dirNameAndRow[0] == dirName
                 print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
                 allCandidateDetections[iDir] = \
-                    find_matches_in_directory(dirNameAndRow, options)
+                    _find_matches_in_directory(dirNameAndRow, options)
         else:
@@ -1271,7 +1355,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
                 options.pbar = None
                 allCandidateDetectionFiles = list(pool.imap(
-                    partial(find_matches_in_directory,options=options), dirNameAndIntermediateFile))
+                    partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
                 ##%% Load into a combined list of candidate detections
@@ -1298,11 +1382,11 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
                 if options.parallelizationUsesThreads:
                     options.pbar = tqdm(total=len(dirNameAndRows))
                     allCandidateDetections = list(pool.imap(
-                        partial(find_matches_in_directory,options=options), dirNameAndRows))
+                        partial(_find_matches_in_directory,options=options), dirNameAndRows))
                 else:
                     options.pbar = None
                     allCandidateDetections = list(tqdm(pool.imap(
-                        partial(find_matches_in_directory,options=options), dirNameAndRows)))
+                        partial(_find_matches_in_directory,options=options), dirNameAndRows)))
         print('\nFinished looking for similar detections')
@@ -1342,7 +1426,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
             # Sort the above-threshold detections for easier review
             if options.smartSort is not None:
-                suspiciousDetections[iDir] = sort_detections_for_directory(
+                suspiciousDetections[iDir] = _sort_detections_for_directory(
                     suspiciousDetections[iDir],options)
             print('Found {} suspicious detections in directory {} ({})'.format(
@@ -1427,7 +1511,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
     toReturn.suspiciousDetections = suspiciousDetections
-    toReturn.allRowsFiltered = update_detection_table(toReturn, options, outputFilename)
+    toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
     ##%% Create filtering directory
@@ -1501,19 +1585,19 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
             if options.parallelizationUsesThreads:
                 options.pbar = tqdm(total=len(allSuspiciousDetections))
                 allCandidateDetections = list(pool.imap(
-                    partial(render_sample_image_for_detection,filteringDir=filteringDir,
+                    partial(_render_sample_image_for_detection,filteringDir=filteringDir,
                             options=options), allSuspiciousDetections))
             else:
                 options.pbar = None
                 allCandidateDetections = list(tqdm(pool.imap(
-                    partial(render_sample_image_for_detection,filteringDir=filteringDir,
+                    partial(_render_sample_image_for_detection,filteringDir=filteringDir,
                             options=options), allSuspiciousDetections)))
         else:
             # Serial loop over detections
             for detection in allSuspiciousDetections:
-                render_sample_image_for_detection(detection,filteringDir,options)
+                _render_sample_image_for_detection(detection,filteringDir,options)
         # Delete (large) temporary data from the list of suspicious detections
         for detection in allSuspiciousDetections:

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl