PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

api/batch_processing/postprocessing/subset_json_detector_output.py CHANGED Viewed

@@ -1,61 +1,55 @@
-########
-#
-# subset_json_detector_output.py
-#
-# Creates one or more subsets of a detector API output file (.json), doing either
-# or both of the following (if both are requested, they happen in this order):
-#
-# 1) Retrieve all elements where filenames contain a specified query string,
-#    optionally replacing that query with a replacement token. If the query is blank,
-#    can also be used to prepend content to all filenames.
-#
-#    Does not support regex's, but supports a special case of ^string to indicate "must start with
-#    to match".
-#
-# 2) Create separate .jsons for each unique path, optionally making the filenames
-#    in those .json's relative paths.  In this case, you specify an output directory,
-#    rather than an output path.  All images in the folder blah\foo\bar will end up
-#    in a .json file called blah_foo_bar.json.
-#
-# Can also apply a confidence threshold.
-#
-# Can also subset by categories above a threshold (programmatic invocation only, this is
-# not supported at the command line yet).
-#
-###
-#
-# Sample invocations (splitting into multiple json's):
-#
-# Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
-# individual .jsons in 'd:\temp\idfg\output', making filenames relative to their individual
-# folders:
-#
-# python subset_json_detector_output.py "d:\temp\idfg\1800_idfg_statewide_wolf_detections_w_classifications.json" "d:\temp\idfg\output" --split_folders --make_folder_relative
-#
-# Now do the same thing, but instead of writing .json's to d:\temp\idfg\output, write them to *subfolders*
-# corresponding to the subfolders for each .json file.
-#
-# python subset_json_detector_output.py "d:\temp\idfg\1800_detections_S2.json" "d:\temp\idfg\output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
-#
-###
-#
-# Sample invocations (creating a single subset matching a query):
-#
-# Read from "1800_detections.json", write to "1800_detections_2017.json"
-#
-# Include only images matching "2017", and change "2017" to "blah"
-#
-# python subset_json_detector_output.py "d:\temp\1800_detections.json" "d:\temp\1800_detections_2017_blah.json" --query 2017 --replacement blah
-#
-# Include all images, prepend with "prefix/"
-#
-# python subset_json_detector_output.py "d:\temp\1800_detections.json" "d:\temp\1800_detections_prefix.json" --replacement "prefix/"
-#
-###
-#
-# To subset a COCO Camera Traps .json database, see subset_json_db.py
-#
-########
+r"""
+subset_json_detector_output.py
+Creates one or more subsets of a detector results file (.json), doing either
+or both of the following (if both are requested, they happen in this order):
+1) Retrieve all elements where filenames contain a specified query string,
+   optionally replacing that query with a replacement token. If the query is blank,
+   can also be used to prepend content to all filenames.
+   Does not support regex's, but supports a special case of ^string to indicate "must start with
+   to match".
+2) Create separate .jsons for each unique path, optionally making the filenames
+   in those .json's relative paths.  In this case, you specify an output directory,
+   rather than an output path.  All images in the folder blah/foo/bar will end up
+   in a .json file called blah_foo_bar.json.
+Can also apply a confidence threshold.
+Can also subset by categories above a threshold (programmatic invocation only, this is
+not supported at the command line yet).
+To subset a COCO Camera Traps .json database, see subset_json_db.py
+**Sample invocation (splitting into multiple json's)**
+Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
+individual .jsons in 'd:/temp/idfg/output', making filenames relative to their individual
+folders:
+python subset_json_detector_output.py "d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" --split_folders --make_folder_relative
+Now do the same thing, but instead of writing .json's to d:/temp/idfg/output, write them to *subfolders*
+corresponding to the subfolders for each .json file.
+python subset_json_detector_output.py "d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
+**Sample invocation (creating a single subset matching a query)**
+Read from "1800_detections.json", write to "1800_detections_2017.json"
+Include only images matching "2017", and change "2017" to "blah"
+python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" --query 2017 --replacement blah
+Include all images, prepend with "prefix/"
+python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" --replacement "prefix/"
+"""
 #%% Constants and imports
@@ -68,81 +62,85 @@ import re
 from tqdm import tqdm
-from md_utils.ct_utils import args_to_object
-from md_utils.ct_utils import get_max_conf
-from md_utils.ct_utils import invert_dictionary
+from md_utils.ct_utils import args_to_object, get_max_conf, invert_dictionary
+from md_utils.path_utils import top_level_folder
 #%% Helper classes
 class SubsetJsonDetectorOutputOptions:
+    """
+    Options used to parameterize subset_json_detector_output()
+    """
-    # Only process files containing the token 'query'
+    #: Only process files containing the token 'query'
     query = None
-    # Replace 'query' with 'replacement' if 'replacement' is not None.  If 'query' is None,
-    # prepend 'replacement'
+    #: Replace 'query' with 'replacement' if 'replacement' is not None.  If 'query' is None,
+    #: prepend 'replacement'
     replacement = None
-    # Should we split output into individual .json files for each folder?
+    #: Should we split output into individual .json files for each folder?
     split_folders = False
-    # Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
-    #
-    # 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
-    # to a token.
+    #: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
+    #:
+    #: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
+    #: to a token.
     split_folder_mode = 'bottom'  # 'top'
-    # When using the 'n_from_bottom' parameter to define folder splitting, this
-    # defines the number of directories from the bottom.  'n_from_bottom' with
-    # a parameter of zero is the same as 'bottom'.
-    #
-    # Same story with 'n_from_top'.
-    #
-    # When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
-    # to a token.
+    #: When using the 'n_from_bottom' parameter to define folder splitting, this
+    #: defines the number of directories from the bottom.  'n_from_bottom' with
+    #: a parameter of zero is the same as 'bottom'.
+    #:
+    #: Same story with 'n_from_top'.
+    #:
+    #: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
+    #: to a token.
     split_folder_param = 0
-    # Only meaningful if split_folders is True: should we convert pathnames to be relative
-    # the folder for each .json file?
+    #: Only meaningful if split_folders is True: should we convert pathnames to be relative
+    #: the folder for each .json file?
     make_folder_relative = False
-    # Only meaningful if split_folders and make_folder_relative are True: if not None,
-    # will copy .json files to their corresponding output directories, relative to
-    # output_filename
+    #: Only meaningful if split_folders and make_folder_relative are True: if not None,
+    #: will copy .json files to their corresponding output directories, relative to
+    #: output_filename
     copy_jsons_to_folders = False
-    # Should we over-write .json files?
+    #: Should we over-write .json files?
     overwrite_json_files = False
-    # If copy_jsons_to_folders is true, do we require that directories already exist?
+    #: If copy_jsons_to_folders is true, do we require that directories already exist?
     copy_jsons_to_folders_directories_must_exist = True
-    # Threshold on confidence
+    #: Optional confidence threshold; if not None, detections below this confidence won't be
+    #: included in the output.
     confidence_threshold = None
-    # Should we remove failed images?
+    #: Should we remove failed images?
     remove_failed_images = False
-    # Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
-    # (as string-ints) (not names) to thresholds.  Removes  non-matching detections, does not
-    # remove images.  Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
-    # scenario indeed where you would want to specify both.
+    #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
+    #: (as string-ints) (not names) to thresholds.  Removes  non-matching detections, does not
+    #: remove images.  Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
+    #: scenario indeed where you would want to specify both.
     categories_to_keep = None
-    # Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
-    # Removes non-matching detections, does not remove images.  Not technically mutually exclusize with
-    # category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
+    #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
+    #: Removes non-matching detections, does not remove images.  Not technically mutually exclusize with
+    #: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
     category_names_to_keep = None
+    #: Set to >0 during testing to limit the number of images that get processed.
     debug_max_images = -1
 #%% Main function
-def write_detection_results(data, output_filename, options):
+def _write_detection_results(data, output_filename, options):
     """
-    Write the detector-output-formatted dict *data* to *output_filename*.
+    Writes the detector-output-formatted dict *data* to *output_filename*.
     """
     if (not options.overwrite_json_files) and os.path.isfile(output_filename):
@@ -160,12 +158,19 @@ def write_detection_results(data, output_filename, options):
     with open(output_filename, 'w') as f:
         json.dump(data,f,indent=1)
-# ...write_detection_results()
+# ..._write_detection_results()
 def subset_json_detector_output_by_confidence(data, options):
     """
-    Remove all detections below options.confidence_threshold, update max confidences accordingly.
+    Removes all detections below options.confidence_threshold.
+    Args:
+        data (dict): data loaded from a MD results file
+        options (SubsetJsonDetectorOutputOptions): parameters for subsetting
+    Returns:
+        dict: Possibly-modified version of data (also modifies in place)
     """
     if options.confidence_threshold is None:
@@ -232,7 +237,14 @@ def subset_json_detector_output_by_confidence(data, options):
 def subset_json_detector_output_by_categories(data, options):
     """
-    Remove all detections without detections above a threshold for specific categories.
+    Removes all detections without detections above a threshold for specific categories.
+    Args:
+        data (dict): data loaded from a MD results file
+        options (SubsetJsonDetectorOutputOptions): parameters for subsetting
+    Returns:
+        dict: Possibly-modified version of data (also modifies in place)
     """
     # If categories_to_keep is supplied as a list, convert to a dict
@@ -334,6 +346,13 @@ def subset_json_detector_output_by_categories(data, options):
 def remove_failed_images(data,options):
     """
     Removed failed images from [data]
+    Args:
+        data (dict): data loaded from a MD results file
+        options (SubsetJsonDetectorOutputOptions): parameters for subsetting
+    Returns:
+        dict: Possibly-modified version of data (also modifies in place)
     """
     images_in = data['images']
@@ -365,8 +384,15 @@ def remove_failed_images(data,options):
 def subset_json_detector_output_by_query(data, options):
     """
-    Subset to images whose filename matches options.query; replace all instances of
-    options.query with options.replacement.
+    Subsets to images whose filename matches options.query; replace all instances of
+    options.query with options.replacement.  No-op if options.query_string is None or ''.
+    Args:
+        data (dict): data loaded from a MD results file
+        options (SubsetJsonDetectorOutputOptions): parameters for subsetting
+    Returns:
+        dict: Possibly-modified version of data (also modifies in place)
     """
     images_in = data['images']
@@ -415,74 +441,27 @@ def subset_json_detector_output_by_query(data, options):
 # ...subset_json_detector_output_by_query()
-def split_path(path, maxdepth=100):
-    """
-    Splits [path] into all its constituent tokens, e.g.:
-    c:\blah\boo\goo.txt
-    ...becomes:
-    ['c:\\', 'blah', 'boo', 'goo.txt']
-    http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
-    """
-    (head, tail) = os.path.split(path)
-    return split_path(head, maxdepth - 1) + [tail] \
-        if maxdepth and head and head != path \
-        else [head or tail]
-# ...split_path()
-def top_level_folder(p):
-    """
-    Gets the top-level folder from the path *p*; on Windows, will use the top-level folder
-    that isn't the drive.  E.g., top_level_folder(r"c:\blah\foo") returns "c:\blah".  Does not
-    include the leaf node, i.e. top_level_folder('/blah/foo') returns '/blah'.
-    """
-    if p == '':
-        return ''
-    # Path('/blah').parts is ('/','blah')
-    parts = split_path(p)
-    if len(parts) == 1:
-        return parts[0]
-    # Handle paths like:
-    #
-    # /, \, /stuff, c:, c:\stuff
-    drive = os.path.splitdrive(p)[0]
-    if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
-        return os.path.join(parts[0], parts[1])
-    else:
-        return parts[0]
-# ...top_level_folder()
-if False:
-    p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
-    p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
-    p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
-    p = ''; s = top_level_folder(p); print(s); assert s == ''
-    p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
-    p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
-    p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
-    p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
-    p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
 def subset_json_detector_output(input_filename, output_filename, options, data=None):
     """
-    Main internal entry point
+    Main entry point; creates one or more subsets of a detector results file.  See the
+    module header comment for more information about the available subsetting approaches.
     Makes a copy of [data] before modifying if a data dictionary is supplied.
+    Args:
+        input_filename (str): filename to load and subset; can be None if [data] is supplied
+        output_filename (str): file or folder name (depending on [options]) to which we should
+            write subset results.
+        options (SubsetJsonDetectorOutputOptions): parameters for .json splitting/subsetting;
+            see SubsetJsonDetectorOutputOptions for details.
+        data (dict, optional): data loaded from a .json file; if this is not None, [input_filename]
+            will be ignored.  If supplied, this will be copied before it's modified.
+    Returns:
+        dict: Results that are either loaded from [input_filename] and processed, or copied
+            from [data] and processed.
     """
     if options is None:
@@ -528,7 +507,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
     if not options.split_folders:
-        write_detection_results(data, output_filename, options)
+        _write_detection_results(data, output_filename, options)
         return data
     else:
@@ -558,7 +537,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
                 # Split string into folders, keeping delimiters
                 # Don't use this, it removes delimiters
-                # tokens = split_path(fn)
+                # tokens = _split_path(fn)
                 tokens = re.split(r'([\\/])',fn)
                 n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1;
@@ -621,7 +600,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
             # forward-compatible in that I don't take dependencies on the other fields
             dir_data = data
             dir_data['images'] = folders_to_images[dirname]
-            write_detection_results(dir_data, json_fn, options)
+            _write_detection_results(dir_data, json_fn, options)
             print('Wrote {} images to {}'.format(len(dir_data['images']), json_fn))
         # ...for each directory
@@ -713,7 +692,5 @@ def main():
     subset_json_detector_output(args.input_file, args.output_file, options)
-if __name__ == '__main__':
+if __name__ == '__main__':
     main()

api/batch_processing/postprocessing/top_folders_to_bottom.py CHANGED Viewed

@@ -1,26 +1,26 @@
-########
-#
-# top_folders_to_bottom.py
-#
-# Given a base folder with files like:
-#
-# A/1/2/a.jpg
-# B/3/4/b.jpg
-#
-# ...moves the top-level folders to the bottom in a new output folder, i.e., creates:
-#
-# 1/2/A/a.jpg
-# 3/4/B/b.jpg
-#
-# In practice, this is used to make this:
-#
-# animal/camera01/image01.jpg
-#
-# ...look like:
-#
-# camera01/animal/image01.jpg
-#
-########
+"""
+top_folders_to_bottom.py
+Given a base folder with files like:
+* A/1/2/a.jpg
+* B/3/4/b.jpg
+...moves the top-level folders to the bottom in a new output folder, i.e., creates:
+* 1/2/A/a.jpg
+* 3/4/B/b.jpg
+In practice, this is used to make this:
+animal/camera01/image01.jpg
+...look like:
+camera01/animal/image01.jpg
+"""
 #%% Constants and imports
@@ -35,31 +35,46 @@ from tqdm import tqdm
 from functools import partial
 from multiprocessing.pool import ThreadPool
+from md_utils.path_utils import path_is_abs
+#%% Classes
 class TopFoldersToBottomOptions:
+    """
+    Options used to parameterize top_folders_to_bottom()
+    """
     def __init__(self,input_folder,output_folder,copy=True,n_threads=1):
+        #: Whether to copy (True) vs. move (False) false when re-organizing
         self.copy = copy
+        #: Number of worker threads to use, or <1 to disable parallelization
         self.n_threads = n_threads
+        #: Input folder
         self.input_folder = input_folder
+        #: Output folder
         self.output_folder = output_folder
-        self.overwrite = False
+        #: If this is False and an output file exists, throw an error
+        self.overwrite = False
-#%% Support functions
-def path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
 #%% Main functions
-def process_file(relative_filename,options,execute=True):
+def _process_file(relative_filename,options,execute=True):
-    assert ('/' in relative_filename) and ('\\' not in relative_filename) and (not path_is_abs(relative_filename))
+    assert ('/' in relative_filename) and \
+        ('\\' not in relative_filename) and \
+        (not path_is_abs(relative_filename))
     # Find top-level folder
     tokens = relative_filename.split('/')
-    top_level_folder = tokens.pop(0)
-    tokens.insert(len(tokens)-1,top_level_folder)
+    topmost_folder = tokens.pop(0)
+    tokens.insert(len(tokens)-1,topmost_folder)
     # Find file/folder names
     output_relative_path = '/'.join(tokens)
@@ -86,11 +101,35 @@ def process_file(relative_filename,options,execute=True):
     return output_absolute_path
-# ...def process_file()
+# ...def _process_file()
 def top_folders_to_bottom(options):
+    """
+    top_folders_to_bottom.py
+    Given a base folder with files like:
+    * A/1/2/a.jpg
+    * B/3/4/b.jpg
+    ...moves the top-level folders to the bottom in a new output folder, i.e., creates:
+    * 1/2/A/a.jpg
+    * 3/4/B/b.jpg
+    In practice, this is used to make this:
+    animal/camera01/image01.jpg
+    ...look like:
+    camera01/animal/image01.jpg
+    Args:
+        options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
+    """
     os.makedirs(options.output_folder,exist_ok=True)
     # Enumerate input folder
@@ -112,7 +151,7 @@ def top_folders_to_bottom(options):
         relative_files = [s for s in relative_files if '/' in s]
     # Make sure each input file maps to a unique output file
-    absolute_output_files = [process_file(s, options, execute=False) for s in relative_files]
+    absolute_output_files = [_process_file(s, options, execute=False) for s in relative_files]
     assert len(absolute_output_files) == len(set(absolute_output_files)),\
         "Error: input filenames don't map to unique output filenames"
@@ -122,13 +161,13 @@ def top_folders_to_bottom(options):
     if options.n_threads <= 1:
         for relative_filename in tqdm(relative_files):
-            process_file(relative_filename,options)
+            _process_file(relative_filename,options)
     else:
         print('Starting a pool with {} threads'.format(options.n_threads))
         pool = ThreadPool(options.n_threads)
-        process_file_with_options = partial(process_file, options=options)
+        process_file_with_options = partial(_process_file, options=options)
         _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
 # ...def top_folders_to_bottom()
@@ -180,10 +219,5 @@ def main():
     top_folders_to_bottom(options)
-if __name__ == '__main__':
+if __name__ == '__main__':
     main()

api/synchronous/__init__.py ADDED Viewed

File without changes

api/synchronous/api_core/animal_detection_api/__init__.py ADDED Viewed

File without changes

api/synchronous/api_core/animal_detection_api/api_backend.py CHANGED Viewed

@@ -150,5 +150,3 @@ if __name__ == '__main__':
     print('\n')
     detect_process()

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl