PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

detection/run_detector_batch.py CHANGED Viewed

@@ -1,38 +1,43 @@
-########
-#
-# run_detector_batch.py
-#
-# Module to run MegaDetector on lots of images, writing the results
-# to a file in the same format produced by our batch API:
-#
-# https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
-#
-# This enables the results to be used in our post-processing pipeline; see
-# api/batch_processing/postprocessing/postprocess_batch_results.py .
-#
-# This script can save results to checkpoints intermittently, in case disaster
-# strikes. To enable this, set --checkpoint_frequency to n > 0, and results
-# will be saved as a checkpoint every n images. Checkpoints will be written
-# to a file in the same directory as the output_file, and after all images
-# are processed and final results file written to output_file, the temporary
-# checkpoint file will be deleted. If you want to resume from a checkpoint, set
-# the checkpoint file's path using --resume_from_checkpoint.
-#
-# The `threshold` you can provide as an argument is the confidence threshold above
-# which detections will be included in the output file.
-#
-# Has preliminary multiprocessing support for CPUs only; if a GPU is available, it will
-# use the GPU instead of CPUs, and the --ncores option will be ignored.  Checkpointing
-# is not supported when using a GPU.
-#
-# Does not have a command-line option to bind the process to a particular GPU, but you can
-# prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
-#
-# CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
-#
-# You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
-#
-########
+"""
+run_detector_batch.py
+Module to run MegaDetector on lots of images, writing the results
+to a file in the MegaDetector results format.
+https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
+This enables the results to be used in our post-processing pipeline; see
+api/batch_processing/postprocessing/postprocess_batch_results.py .
+This script can save results to checkpoints intermittently, in case disaster
+strikes. To enable this, set --checkpoint_frequency to n > 0, and results
+will be saved as a checkpoint every n images. Checkpoints will be written
+to a file in the same directory as the output_file, and after all images
+are processed and final results file written to output_file, the temporary
+checkpoint file will be deleted. If you want to resume from a checkpoint, set
+the checkpoint file's path using --resume_from_checkpoint.
+The `threshold` you can provide as an argument is the confidence threshold above
+which detections will be included in the output file.
+Has multiprocessing support for CPUs only; if a GPU is available, it will
+use the GPU instead of CPUs, and the --ncores option will be ignored.  Checkpointing
+is not supported when using a GPU.
+The lack of GPU multiprocessing support might sound annoying, but in practice we
+run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
+one GPU *per invocation of this script*.  Dividing a big batch of images into one chunk
+per GPU happens outside of this script.
+Does not have a command-line option to bind the process to a particular GPU, but you can
+prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
+CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
+You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
+"""
 #%% Constants, imports, environment
@@ -91,7 +96,7 @@ exif_options.byte_handling = 'convert_to_string'
 #%% Support functions for multiprocessing
-def producer_func(q,image_files):
+def _producer_func(q,image_files):
     """
     Producer function; only used when using the (optional) image queue.
@@ -120,7 +125,7 @@ def producer_func(q,image_files):
     print('Finished image loading'); sys.stdout.flush()
-def consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
+def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
     """
     Consumer function; only used when using the (optional) image queue.
@@ -177,15 +182,28 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
     when --use_image_queue is specified.  Starts a reader process to read images from disk, but
     processes images in the  process from which this function is called (i.e., does not currently
     spawn a separate consumer process).
+    Args:
+        image_files (str): list of absolute paths to images
+        model_file (str): filename or model identifier (e.g. "MDV5A")
+        confidence_threshold (float): minimum confidence detection to include in
+            output
+        quiet (bool, optional): suppress per-image console printouts
+        image_size (tuple, optional): image size to use for inference, only mess with this
+            if (a) you're using a model other than MegaDetector or (b) you know what you're
+            doing
+    Returns:
+        list: list of dicts in the format returned by process_image()
     """
     q = multiprocessing.JoinableQueue(max_queue_size)
     return_queue = multiprocessing.Queue(1)
     if use_threads_for_queue:
-        producer = Thread(target=producer_func,args=(q,image_files,))
+        producer = Thread(target=_producer_func,args=(q,image_files,))
     else:
-        producer = Process(target=producer_func,args=(q,image_files,))
+        producer = Process(target=_producer_func,args=(q,image_files,))
     producer.daemon = False
     producer.start()
@@ -199,15 +217,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
     if run_separate_consumer_process:
         if use_threads_for_queue:
-            consumer = Thread(target=consumer_func,args=(q,return_queue,model_file,
+            consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
                                                          confidence_threshold,image_size,))
         else:
-            consumer = Process(target=consumer_func,args=(q,return_queue,model_file,
+            consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
                                                           confidence_threshold,image_size,))
         consumer.daemon = True
         consumer.start()
     else:
-        consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
+        _consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
     producer.join()
     print('Producer finished')
@@ -226,13 +244,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
 #%% Other support functions
-def chunks_by_number_of_chunks(ls, n):
+def _chunks_by_number_of_chunks(ls, n):
     """
     Splits a list into n even chunks.
+    External callers should use ct_utils.split_list_into_n_chunks().
-    Args
-    - ls: list
-    - n: int, # of chunks
+    Args:
+        ls (list): list to break up into chunks
+        n (int): number of chunks
     """
     for i in range(0, n):
@@ -242,18 +262,31 @@ def chunks_by_number_of_chunks(ls, n):
 #%% Image processing functions
 def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
-                   quiet=False, image_size=None, checkpoint_queue=None, include_image_size=False,
-                   include_image_timestamp=False, include_exif_data=False):
+                   quiet=False, image_size=None, checkpoint_queue=None,
+                   include_image_size=False, include_image_timestamp=False,
+                   include_exif_data=False):
     """
-    Runs MegaDetector over a list of image files.
-    Args
-    - im_files: list of str, paths to image files
-    - detector: loaded model or str (path to .pb/.pt model file)
-    - confidence_threshold: float, only detections above this threshold are returned
+    Runs a detector (typically MegaDetector) over a list of image files.
+    As of 3/2024, this entry point is used when the image queue is enabled, but not in the
+    standard inference path (which instead loops over process_image()).
+    Args:
+        im_files (list: paths to image files
+        detector (str or detector object): loaded model or str; if this is a string, it can be a
+            path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
+        confidence_threshold (float): only detections above this threshold are returned
+        use_image_queue (bool, optional): separate image loading onto a dedicated worker process
+        quiet (bool, optional): suppress per-image printouts
+        image_size (tuple, optional): image size to use for inference, only mess with this
+            if (a) you're using a model other than MegaDetector or (b) you know what you're
+            doing
+        checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
+        include_image_size (bool, optional): should we include image size in the output for each image?
+        include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
+        include_exif_data (bool, optional): should we include EXIF data in the output for each image?
-    Returns
-    - results: list of dict, each dict represents detections on one image
+    Returns:
+        list: list of dicts, in which each dict represents detections on one image,
         see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
     """
@@ -269,7 +302,7 @@ def process_images(im_files, detector, confidence_threshold, use_image_queue=Fal
                                       include_image_size=include_image_size,
                                       include_image_timestamp=include_image_timestamp,
                                       include_exif_data=include_exif_data)
-    else:
+    else:
         results = []
         for im_file in im_files:
             result = process_image(im_file, detector, confidence_threshold,
@@ -292,17 +325,26 @@ def process_image(im_file, detector, confidence_threshold, image=None,
                   include_image_timestamp=False, include_exif_data=False,
                   skip_image_resizing=False):
     """
-    Runs MegaDetector on a single image file.
-    Args
-    - im_file: str, path to image file
-    - detector: loaded model
-    - confidence_threshold: float, only detections above this threshold are returned
-    - image: previously-loaded image, if available
-    - skip_image_resizing: whether to skip internal image resizing and rely on external resizing
+    Runs a detector (typically MegaDetector) on a single image file.
+    Args:
+        im_file (str): path to image file
+        detector (detector object): loaded model, this can no longer be a string by the time
+            you get this far down the pipeline
+        confidence_threshold (float): only detections above this threshold are returned
+        image (Image, optional): previously-loaded image, if available, used when a worker
+            thread is handling image loads
+        quiet (bool, optional): suppress per-image printouts
+        image_size (tuple, optional): image size to use for inference, only mess with this
+            if (a) you're using a model other than MegaDetector or (b) you know what you're
+            doing
+        include_image_size (bool, optional): should we include image size in the output for each image?
+        include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
+        include_exif_data (bool, optional): should we include EXIF data in the output for each image?
+        skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
     Returns:
-    - result: dict representing detections on one image
+        dict: dict representing detections on one image,
         see the 'images' key in
         https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
     """
@@ -350,7 +392,7 @@ def process_image(im_file, detector, confidence_threshold, image=None,
 # ...def process_image(...)
-def load_custom_class_mapping(class_mapping_filename):
+def _load_custom_class_mapping(class_mapping_filename):
     """
     This is an experimental hack to allow the use of non-MD YOLOv5 models through
     the same infrastructure; it disables the code that enforces MDv5-like class lists.
@@ -388,34 +430,50 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
                                 class_mapping_filename=None, include_image_size=False,
                                 include_image_timestamp=False, include_exif_data=False):
     """
-    Args
-    - model_file: path to model file, or supported model string (e.g. "MDV5A")
-    - image_file_names: list of strings (image filenames), a single image filename,
-                        a folder to recursively search for images in, or a .json or .txt file
-                        containing a list of images.
-    - checkpoint_path: str, path to JSON checkpoint file
-    - confidence_threshold: float, only detections above this threshold are returned
-    - checkpoint_frequency: int, write results to JSON checkpoint file every N images
-    - results: list of dict, existing results loaded from checkpoint
-    - n_cores: int, # of CPU cores to use
-    - class_mapping_filename: str, use a non-default class mapping supplied in a .json file
-      or YOLOv5 dataset.yaml file.
-    Returns
-    - results: list of dicts; each dict represents detections on one image
+    Load a model file and run it on a list of images.
+    Args:
+        model_file (str): path to model file, or supported model string (e.g. "MDV5A")
+        image_file_names (list or str): list of strings (image filenames), a single image filename,
+            a folder to recursively search for images in, or a .json or .txt file containing a list
+            of images.
+        checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
+            is disabled)
+        confidence_threshold (float, optional): only detections above this threshold are returned
+        checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
+            images, -1 disabled checkpointing
+        results (list, optional): list of dicts, existing results loaded from checkpoint; generally
+            not useful if you're using this function outside of the CLI
+        n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
+        use_image_queue (bool, optional): use a dedicated worker for image loading
+        quiet (bool, optional): disable per-image console output
+        image_size (tuple, optional): image size to use for inference, only mess with this
+            if (a) you're using a model other than MegaDetector or (b) you know what you're
+            doing
+        class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
+            file or YOLOv5 dataset.yaml file
+        include_image_size (bool, optional): should we include image size in the output for each image?
+        include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
+        include_exif_data (bool, optional): should we include EXIF data in the output for each image?
+    Returns:
+        results: list of dicts; each dict represents detections on one image
     """
+    # Validate input arguments
     if n_cores is None:
         n_cores = 1
     if confidence_threshold is None:
         confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
-    if checkpoint_frequency is None:
+    # Disable checkpointing if checkpoint_path is None
+    if checkpoint_frequency is None or checkpoint_path is None:
         checkpoint_frequency = -1
     if class_mapping_filename is not None:
-        load_custom_class_mapping(class_mapping_filename)
+        _load_custom_class_mapping(class_mapping_filename)
     # Handle the case where image_file_names is not yet actually a list
     if isinstance(image_file_names,str):
@@ -450,7 +508,8 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
                         list_file))
         else:
             raise ValueError(
-                '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder')
+                '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
+                    image_file_names))
     if results is None:
         results = []
@@ -514,12 +573,12 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
             results.append(result)
             # Write a checkpoint if necessary
-            if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
+            if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
                 print('Writing a new checkpoint after having processed {} images since '
                       'last restart'.format(count))
-                write_checkpoint(checkpoint_path, results)
+                _write_checkpoint(checkpoint_path, results)
     else:
@@ -539,7 +598,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
                 len(already_processed),n_images_all))
         # Divide images into chunks; we'll send one chunk to each worker process
-        image_batches = list(chunks_by_number_of_chunks(image_file_names, n_cores))
+        image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
         pool = workerpool(n_cores)
@@ -552,7 +611,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
             # Pass the "results" array (which may already contain images loaded from an existing
             # checkpoint) to the checkpoint queue handler function, which will append results to
             # the list as they become available.
-            checkpoint_thread = Thread(target=checkpoint_queue_handler,
+            checkpoint_thread = Thread(target=_checkpoint_queue_handler,
                                        args=(checkpoint_path, checkpoint_frequency,
                                              checkpoint_queue, results), daemon=True)
             checkpoint_thread.start()
@@ -596,7 +655,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
 # ...def load_and_run_detector_batch(...)
-def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
+def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
     """
     Thread function to accumulate results and write checkpoints when checkpointing and
     multiprocessing are both enabled.
@@ -616,15 +675,15 @@ def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_q
             print('Writing a new checkpoint after having processed {} images since '
                     'last restart'.format(result_count))
-            write_checkpoint(checkpoint_path, results)
+            _write_checkpoint(checkpoint_path, results)
-def write_checkpoint(checkpoint_path, results):
+def _write_checkpoint(checkpoint_path, results):
     """
     Writes the 'images' field in the dict 'results' to a json checkpoint file.
     """
-    assert checkpoint_path is not None
+    assert checkpoint_path is not None
     # Back up any previous checkpoints, to protect against crashes while we're writing
     # the checkpoint file.
@@ -644,9 +703,14 @@ def write_checkpoint(checkpoint_path, results):
 def get_image_datetime(image):
     """
-    Returns the EXIF datetime from [image] (a PIL Image object), if available, as a string.
+    Reads EXIF datetime from a PIL Image object.
-    [im_file] is used only for error reporting.
+    Args:
+        image (Image): the PIL Image object from which we should read datetime information
+    Returns:
+        str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
+        returns None if EXIF datetime is not available.
     """
     exif_tags = read_exif.read_pil_exif(image,exif_options)
@@ -662,26 +726,30 @@ def get_image_datetime(image):
 def write_results_to_file(results, output_file, relative_path_base=None,
                           detector_file=None, info=None, include_max_conf=False,
-                          custom_metadata=None):
+                          custom_metadata=None, force_forward_slashes=True):
     """
     Writes list of detection results to JSON output file. Format matches:
     https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
-    Args
-    - results: list of dict, each dict represents detections on one image
-    - output_file: str, path to JSON output file, should end in '.json'
-    - relative_path_base: str, path to a directory as the base for relative paths
-    - detector_file: filename of the detector used to generate these results, only
-        used to pull out a version number for the "info" field
-    - info: dictionary to use instead of the default "info" field
-    - include_max_conf: old files (version 1.2 and earlier) included a "max_conf" field
-        in each image; this was removed in version 1.3.  Set this flag to force the inclusion
-        of this field.
-    - custom_metadata: additional data to include as info['custom_metadata'].  Typically
-        a dictionary, but no format checks are performed.
-    Returns the complete output dictionary that was written to the output file.
+    Args:
+        results (list):  list of dict, each dict represents detections on one image
+        output_file (str): path to JSON output file, should end in '.json'
+        relative_path_base (str, optional): path to a directory as the base for relative paths, can
+            be None if the paths in [results] are absolute
+        detector_file (str, optional): filename of the detector used to generate these results, only
+            used to pull out a version number for the "info" field
+        info (dict, optional): dictionary to put in the results file instead of the default "info" field
+        include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
+            in each image; this was removed in version 1.3.  Set this flag to force the inclusion
+            of this field.
+        custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
+            a dictionary, but no type/format checks are performed
+        force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
+            forward slashes
+    Returns:
+        dict: the MD-formatted dictionary that was written to [output_file]
     """
     if relative_path_base is not None:
@@ -692,6 +760,14 @@ def write_results_to_file(results, output_file, relative_path_base=None,
             results_relative.append(r_relative)
         results = results_relative
+    if force_forward_slashes:
+        results_converted = []
+        for r in results:
+            r_converted = copy.copy(r)
+            r_converted['file'] = r_converted['file'].replace('\\','/')
+            results_converted.append(r_converted)
+        results = results_converted
     # The typical case: we need to build the 'info' struct
     if info is None:
@@ -988,7 +1064,7 @@ def main():
     assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
     if args.class_mapping_filename is not None:
-        load_custom_class_mapping(args.class_mapping_filename)
+        _load_custom_class_mapping(args.class_mapping_filename)
     # Load the checkpoint if available
     #
@@ -1137,8 +1213,7 @@ def main():
         os.remove(checkpoint_path)
         print('Deleted checkpoint file {}'.format(checkpoint_path))
-    print('Done!')
+    print('Done, thanks for MegaDetect\'ing!')
 if __name__ == '__main__':
     main()

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl