PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (62) hide show

api/batch_processing/data_preparation/manage_local_batch.py +278 -197
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/compare_batch_results.py +110 -60
api/batch_processing/postprocessing/load_api_results.py +55 -69
api/batch_processing/postprocessing/md_to_labelme.py +1 -0
api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/coco_to_yolo.py +65 -44
data_management/databases/integrity_check_json_db.py +7 -5
data_management/generate_crops_from_cct.py +1 -1
data_management/importers/animl_results_to_md_results.py +2 -2
data_management/importers/noaa_seals_2019.py +1 -1
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +34 -6
data_management/labelme_to_yolo.py +1 -1
data_management/lila/create_lila_blank_set.py +474 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +46 -21
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +14 -11
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/resize_coco_dataset.py +12 -10
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +34 -21
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +104 -24
detection/run_inference_with_yolov5_val.py +127 -26
detection/run_tiled_inference.py +153 -43
detection/video_utils.py +3 -1
md_utils/ct_utils.py +79 -3
md_utils/md_tests.py +253 -15
md_utils/path_utils.py +129 -24
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +0 -2
md_utils/write_html_image_list.py +1 -0
md_visualization/visualization_utils.py +17 -2
md_visualization/visualize_db.py +8 -0
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
{megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
{megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
{megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0

md_utils/ct_utils.py CHANGED Viewed

@@ -266,6 +266,7 @@ def point_dist(p1,p2):
     """
     Distance between two points, represented as length-two tuples.
     """
     return math.sqrt( ((p1[0]-p2[0])**2) + ((p1[1]-p2[1])**2) )
@@ -328,16 +329,82 @@ def split_list_into_fixed_size_chunks(L,n):
     return [L[i * n:(i + 1) * n] for i in range((len(L) + n - 1) // n )]
-def split_list_into_n_chunks(L, n):
+def split_list_into_n_chunks(L, n, chunk_strategy='greedy'):
     """
     Splits the list or tuple L into n equally-sized chunks (some chunks may be one
     element smaller than others, i.e. len(L) does not have to be a multiple of n.
+    chunk_strategy can be "greedy" (default, if there are k samples per chunk, the first
+    k go into the first chunk) or "balanced" (alternate between chunks when pulling
+    items from the list).
+    """
+    if chunk_strategy == 'greedy':
+        k, m = divmod(len(L), n)
+        return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+    elif chunk_strategy == 'balanced':
+        chunks = [ [] for _ in range(n) ]
+        for i_item,item in enumerate(L):
+            i_chunk = i_item % n
+            chunks[i_chunk].append(item)
+        return chunks
+    else:
+        raise ValueError('Invalid chunk strategy: {}'.format(chunk_strategy))
+def sort_dictionary_by_value(d,sort_values=None,reverse=False):
+    """
+    Sorts the dictionary [d] by value.  If sort_values is None, uses d.values(),
+    otherwise uses the dictionary sort_values as the sorting criterion.
+    """
+    if sort_values is None:
+        d = {k: v for k, v in sorted(d.items(), key=lambda item: item[1], reverse=reverse)}
+    else:
+        d = {k: v for k, v in sorted(d.items(), key=lambda item: sort_values[item[0]], reverse=reverse)}
+    return d
+def invert_dictionary(d):
+    """
+    Create a new dictionary that maps d.values() to d.keys().  Does not check
+    uniqueness.
+    """
+    return {v: k for k, v in d.items()}
+def image_file_to_camera_folder(image_fn):
     """
+    Remove common overflow folders (e.g. RECNX101, RECNX102) from paths, i.e. turn:
+    a\b\c\RECNX101\image001.jpg
-    k, m = divmod(len(L), n)
-    return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+    ...into:
+    a\b\c
+    Returns the same thing as os.dirname() (i.e., just the folder name) if no overflow folders are
+    present.
+    Always converts backslashes to slashes.
+    """
+    import re
+    # 100RECNX is the overflow folder style for Reconyx cameras
+    # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
+    # 100_BTCF is the overflow folder style for Browning cameras
+    # 100MEDIA is the overflow folder style used on a number of consumer-grade cameras
+    patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/','\/\d+MEDIA\/']
+    image_fn = image_fn.replace('\\','/')
+    for pat in patterns:
+        image_fn = re.sub(pat,'/',image_fn)
+    camera_folder = os.path.dirname(image_fn)
+    return camera_folder
 #%% Test drivers
@@ -345,6 +412,15 @@ if False:
     pass
+    #%% Test image_file_to_camera_folder()
+    relative_path = 'a/b/c/d/100EK113/blah.jpg'
+    print(image_file_to_camera_folder(relative_path))
+    relative_path = 'a/b/c/d/100RECNX/blah.jpg'
+    print(image_file_to_camera_folder(relative_path))
     #%% Test a few rectangle distances
     r1 = [0,0,1,1]; r2 = [0,0,1,1]; assert rect_distance(r1,r2)==0

md_utils/md_tests.py CHANGED Viewed

@@ -1,24 +1,17 @@
 ########
 #
-# md-tests.py
+# md_tests.py
 #
 # A series of tests to validate basic repo functionality and verify either "correct"
 # inference behavior, or - when operating in environments other than the training
 # environment - acceptable deviation from the correct results.
 #
-# This module should not depend on anything else in this repo outside of the,
-# tests themselves, even if it means some duplicated code (e.g. for downloading files), since
-# much of what it tries to test is, e.g., imports.
+# This module should not depend on anything else in this repo outside of the
+# tests themselves, even if it means some duplicated code (e.g. for downloading files),
+# since much of what it tries to test is, e.g., imports.
 #
 ########
-#%% TODO
-# Video tests
-# Augmented inference tests
-# Checkpoint tests
 #%% Imports and constants
 ### Only standard imports belong here, not MD-specific imports ###
@@ -54,6 +47,7 @@ class MDTestOptions:
     max_coord_error = 0.001
     max_conf_error = 0.005
     cli_working_dir = None
+    yolo_working_folder = None
 #%% Support functions
@@ -119,9 +113,9 @@ def download_test_data(options):
     if download_zipfile:
         print('Downloading test data zipfile')
         urllib.request.urlretrieve(options.test_data_url, local_zipfile)
-        print('Finished download')
+        print('Finished download to {}'.format(local_zipfile))
     else:
-        print('Bypassing test data zipfile download')
+        print('Bypassing test data zipfile download for {}'.format(local_zipfile))
     ## Unzip data
@@ -164,6 +158,7 @@ def download_test_data(options):
     options.all_test_files = test_files
     options.test_images = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.jpg','.jpeg','.png')]
     options.test_videos = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.mp4','.avi')]
+    options.test_videos = [fn for fn in options.test_videos if 'rendered' not in fn]
 # ...def download_test_data(...)
@@ -247,6 +242,8 @@ def execute_and_print(cmd,print_output=True):
 def run_python_tests(options):
+    print('\n*** Starting module tests ***\n')
     ## Prepare data
     download_test_data(options)
@@ -393,7 +390,84 @@ def run_python_tests(options):
     assert os.path.isfile(rde_results.filterFile),\
         'Could not find RDE output file {}'.format(rde_results.filterFile)
-    print('Finished running Python tests')
+    # TODO: add remove_repeat_detections test here
+    #
+    # It's already tested in the CLI tests, so this is not urgent.
+    ## Video test (single video)
+    from detection.process_video import ProcessVideoOptions, process_video
+    video_options = ProcessVideoOptions()
+    video_options.model_file = 'MDV5A'
+    video_options.input_video_file = os.path.join(options.scratch_dir,options.test_videos[0])
+    video_options.output_json_file = os.path.join(options.scratch_dir,'single_video_output.json')
+    video_options.output_video_file = os.path.join(options.scratch_dir,'video_scratch/rendered_video.mp4')
+    video_options.frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder')
+    video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
+    video_options.render_output_video = True
+    # video_options.keep_rendered_frames = False
+    # video_options.keep_rendered_frames = False
+    video_options.force_extracted_frame_folder_deletion = True
+    video_options.force_rendered_frame_folder_deletion = True
+    # video_options.reuse_results_if_available = False
+    # video_options.reuse_frames_if_available = False
+    video_options.recursive = True
+    video_options.verbose = False
+    video_options.fourcc = 'mp4v'
+    # video_options.rendering_confidence_threshold = None
+    # video_options.json_confidence_threshold = 0.005
+    video_options.frame_sample = 5
+    video_options.n_cores = 5
+    # video_options.debug_max_frames = -1
+    # video_options.class_mapping_filename = None
+    _ = process_video(video_options)
+    assert os.path.isfile(video_options.output_video_file), \
+        'Python video test failed to render output video file'
+    assert os.path.isfile(video_options.output_json_file), \
+        'Python video test failed to render output .json file'
+    ## Video test (folder)
+    from detection.process_video import ProcessVideoOptions, process_video_folder
+    video_options = ProcessVideoOptions()
+    video_options.model_file = 'MDV5A'
+    video_options.input_video_file = os.path.join(options.scratch_dir,
+                                                  os.path.dirname(options.test_videos[0]))
+    video_options.output_json_file = os.path.join(options.scratch_dir,'video_folder_output.json')
+    # video_options.output_video_file = None
+    video_options.frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder')
+    video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
+    video_options.render_output_video = False
+    # video_options.keep_rendered_frames = False
+    # video_options.keep_rendered_frames = False
+    video_options.force_extracted_frame_folder_deletion = True
+    video_options.force_rendered_frame_folder_deletion = True
+    # video_options.reuse_results_if_available = False
+    # video_options.reuse_frames_if_available = False
+    video_options.recursive = True
+    video_options.verbose = False
+    # video_options.fourcc = None
+    # video_options.rendering_confidence_threshold = None
+    # video_options.json_confidence_threshold = 0.005
+    video_options.frame_sample = 5
+    video_options.n_cores = 5
+    # video_options.debug_max_frames = -1
+    # video_options.class_mapping_filename = None
+    _ = process_video_folder(video_options)
+    assert os.path.isfile(video_options.output_json_file), \
+        'Python video test failed to render output .json file'
+    print('\n*** Finished module tests ***\n')
 # ...def run_python_tests(...)
@@ -402,6 +476,8 @@ def run_python_tests(options):
 def run_cli_tests(options):
+    print('\n*** Starting CLI tests ***\n')
     ## chdir if necessary
     if options.cli_working_dir is not None:
@@ -473,6 +549,158 @@ def run_cli_tests(options):
     print('Running: {}'.format(cmd))
     cmd_results = execute_and_print(cmd)
+    ## RDE
+    rde_output_dir = os.path.join(options.scratch_dir,'rde_output_cli')
+    if options.cli_working_dir is None:
+        cmd = 'python -m api.batch_processing.postprocessing.repeat_detection_elimination.find_repeat_detections'
+    else:
+        cmd = 'python  api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py'
+    cmd += ' {}'.format(inference_output_file)
+    cmd += ' --imageBase {}'.format(image_folder)
+    cmd += ' --outputBase {}'.format(rde_output_dir)
+    cmd += ' --occurrenceThreshold 1' # Use an absurd number here to make sure we get some suspicious detections
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    # Find the latest filtering folder
+    filtering_output_dir = os.listdir(rde_output_dir)
+    filtering_output_dir = [fn for fn in filtering_output_dir if fn.startswith('filtering_')]
+    filtering_output_dir = [os.path.join(rde_output_dir,fn) for fn in filtering_output_dir]
+    filtering_output_dir = [fn for fn in filtering_output_dir if os.path.isdir(fn)]
+    filtering_output_dir = sorted(filtering_output_dir)[-1]
+    print('Using RDE filtering folder {}'.format(filtering_output_dir))
+    filtered_output_file = inference_output_file.replace('.json','_filtered.json')
+    if options.cli_working_dir is None:
+        cmd = 'python -m api.batch_processing.postprocessing.repeat_detection_elimination.remove_repeat_detections'
+    else:
+        cmd = 'python  api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py'
+    cmd += ' {} {} {}'.format(inference_output_file,filtered_output_file,filtering_output_dir)
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    assert os.path.isfile(filtered_output_file), \
+        'Could not find RDE output file {}'.format(filtered_output_file)
+    ## Run inference on a folder (tiled)
+    image_folder = os.path.join(options.scratch_dir,'md-test-images')
+    tiling_folder = os.path.join(options.scratch_dir,'tiling-folder')
+    inference_output_file_tiled = os.path.join(options.scratch_dir,'folder_inference_output_tiled.json')
+    if options.cli_working_dir is None:
+        cmd = 'python -m detection.run_tiled_inference'
+    else:
+        cmd = 'python detection/run_tiled_inference.py'
+    cmd += ' {} {} {} {}'.format(
+        model_file,image_folder,tiling_folder,inference_output_file_tiled)
+    cmd += ' --overwrite_handling overwrite'
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    with open(inference_output_file_tiled,'r') as f:
+        results_from_file = json.load(f) # noqa
+    ## Run inference on a folder (augmented)
+    if options.yolo_working_folder is None:
+        print('Bypassing YOLOv5 val tests, no yolo folder supplied')
+    else:
+        image_folder = os.path.join(options.scratch_dir,'md-test-images')
+        yolo_results_folder = os.path.join(options.scratch_dir,'yolo-output-folder')
+        yolo_symlink_folder = os.path.join(options.scratch_dir,'yolo-symlink_folder')
+        inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
+        if options.cli_working_dir is None:
+            cmd = 'python -m detection.run_inference_with_yolov5_val'
+        else:
+            cmd = 'python detection/run_inference_with_yolov5_val.py'
+        cmd += ' {} {} {}'.format(
+            model_file,image_folder,inference_output_file_yolo_val)
+        cmd += ' --yolo_working_folder {}'.format(options.yolo_working_folder)
+        cmd += ' --yolo_results_folder {}'.format(yolo_results_folder)
+        cmd += ' --symlink_folder {}'.format(yolo_symlink_folder)
+        cmd += ' --augment_enabled 1'
+        # cmd += ' --no_use_symlinks'
+        cmd += ' --overwrite_handling overwrite'
+        print('Running: {}'.format(cmd))
+        cmd_results = execute_and_print(cmd)
+        with open(inference_output_file_yolo_val,'r') as f:
+            results_from_file = json.load(f) # noqa
+    ## Video test
+    model_file = 'MDV5A'
+    video_inference_output_file = os.path.join(options.scratch_dir,'video_inference_output.json')
+    output_video_file = os.path.join(options.scratch_dir,'video_scratch/cli_rendered_video.mp4')
+    frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder_cli')
+    frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder_cli')
+    video_fn = os.path.join(options.scratch_dir,options.test_videos[-1])
+    output_dir = os.path.join(options.scratch_dir,'single_video_test_cli')
+    if options.cli_working_dir is None:
+        cmd = 'python -m detection.process_video'
+    else:
+        cmd = 'python detection/process_video.py'
+    cmd += ' {} {}'.format(model_file,video_fn)
+    cmd += ' --frame_folder {} --frame_rendering_folder {} --output_json_file {} --output_video_file {}'.format(
+        frame_folder,frame_rendering_folder,video_inference_output_file,output_video_file)
+    cmd += ' --render_output_video --fourcc mp4v'
+    cmd += ' --force_extracted_frame_folder_deletion --force_rendered_frame_folder_deletion --n_cores 5 --frame_sample 3'
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    ## Run inference on a folder (again, so we can do a comparison)
+    image_folder = os.path.join(options.scratch_dir,'md-test-images')
+    model_file = 'MDV5B'
+    inference_output_file_alt = os.path.join(options.scratch_dir,'folder_inference_output_alt.json')
+    if options.cli_working_dir is None:
+        cmd = 'python -m detection.run_detector_batch'
+    else:
+        cmd = 'python detection/run_detector_batch.py'
+    cmd += ' {} {} {} --recursive'.format(
+        model_file,image_folder,inference_output_file_alt)
+    cmd += ' --output_relative_filenames --quiet --include_image_size'
+    cmd += ' --include_image_timestamp --include_exif_data'
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    with open(inference_output_file_alt,'r') as f:
+        results_from_file = json.load(f) # noqa
+    ## Compare the two files
+    comparison_output_folder = os.path.join(options.scratch_dir,'results_comparison')
+    image_folder = os.path.join(options.scratch_dir,'md-test-images')
+    results_files_string = '"{}" "{}"'.format(
+        inference_output_file,inference_output_file_alt)
+    if options.cli_working_dir is None:
+        cmd = 'python -m api.batch_processing.postprocessing.compare_batch_results'
+    else:
+        cmd = 'python api/batch_processing/postprocessing/compare_batch_results.py'
+    cmd += ' {} {} {}'.format(comparison_output_folder,image_folder,results_files_string)
+    print('Running: {}'.format(cmd))
+    cmd_results = execute_and_print(cmd)
+    assert cmd_results['status'] == 0, 'Error generating comparison HTML'
+    assert os.path.isfile(os.path.join(comparison_output_folder,'index.html')), \
+        'Failed to generate comparison HTML'
+    print('\n*** Finished CLI tests ***\n')
 # ...def run_cli_tests(...)
@@ -518,9 +746,19 @@ if False:
     options.disable_gpu = False
     options.cpu_execution_is_error = False
-    options.disable_video_tests = False
+    options.skip_video_tests = False
+    options.skip_python_tests = False
+    options.skip_cli_tests = False
     options.scratch_dir = None
+    options.test_data_url = 'https://lila.science/public/md-test-package.zip'
+    options.force_data_download = False
+    options.force_data_unzip = False
+    options.warning_mode = True
+    options.test_image_subdir = 'md-test-images'
+    options.max_coord_error = 0.001
+    options.max_conf_error = 0.005
     options.cli_working_dir = r'c:\git\MegaDetector'
+    options.yolo_working_folder = r'c:\git\yolov5'
     #%%

md_utils/path_utils.py CHANGED Viewed

@@ -21,7 +21,8 @@ import zipfile
 from zipfile import ZipFile
 from datetime import datetime
 from typing import Container, Iterable, List, Optional, Tuple, Sequence
-from multiprocessing.pool import ThreadPool
+from multiprocessing.pool import Pool, ThreadPool
+from functools import partial
 from tqdm import tqdm
 IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -34,31 +35,51 @@ CHAR_LIMIT = 255
 #%% General path functions
-def recursive_file_list(base_dir, convert_slashes=True, return_relative_paths=False):
-    """
+def recursive_file_list(base_dir, convert_slashes=True,
+                        return_relative_paths=False, sort_files=True,
+                        recursive=True):
+    r"""
     Enumerate files (not directories) in [base_dir], optionally converting
     \ to /
     """
     all_files = []
-    for root, _, filenames in os.walk(base_dir):
-        for filename in filenames:
-            full_path = os.path.join(root, filename)
-            all_files.append(full_path)
+    if recursive:
+        for root, _, filenames in os.walk(base_dir):
+            for filename in filenames:
+                full_path = os.path.join(root, filename)
+                all_files.append(full_path)
+    else:
+        all_files_relative = os.listdir(base_dir)
+        all_files = [os.path.join(base_dir,fn) for fn in all_files_relative]
+        all_files = [fn for fn in all_files if os.path.isfile(fn)]
     if return_relative_paths:
         all_files = [os.path.relpath(fn,base_dir) for fn in all_files]
     if convert_slashes:
         all_files = [fn.replace('\\', '/') for fn in all_files]
+    if sort_files:
+        all_files = sorted(all_files)
-    all_files = sorted(all_files)
     return all_files
-def split_path(path: str) -> List[str]:
+def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_files=True,
+              recursive=False):
     """
+    Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
+    it doesn't really make sense to have a "recursive" option in a function called "recursive_file_list".
+    """
+    return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
+                               recursive=recursive)
+def split_path(path: str) -> List[str]:
+    r"""
     Splits [path] into all its constituent tokens.
     Non-recursive version of:
@@ -88,7 +109,7 @@ def split_path(path: str) -> List[str]:
 def fileparts(path: str) -> Tuple[str, str, str]:
-    """
+    r"""
     Breaks down a path into the directory path, filename, and extension.
     Note that the '.' lives with the extension, and separators are removed.
@@ -187,7 +208,8 @@ def safe_create_link(link_exists,link_new):
     it.
     Errors if link_new already exists but it's not a link.
-    """
+    """
     if os.path.exists(link_new) or os.path.islink(link_new):
         assert os.path.islink(link_new)
         if not os.readlink(link_new) == link_exists:
@@ -240,7 +262,8 @@ def find_image_strings(strings: Iterable[str]) -> List[str]:
 def find_images(dirname: str, recursive: bool = False,
-                return_relative_paths: bool = False, convert_slashes: bool = False) -> List[str]:
+                return_relative_paths: bool = False,
+                convert_slashes: bool = False) -> List[str]:
     """
     Finds all files in a directory that look like image file names. Returns
     absolute paths unless return_relative_paths is set.  Uses the OS-native
@@ -270,11 +293,11 @@ def find_images(dirname: str, recursive: bool = False,
 def clean_filename(filename: str, allow_list: str = VALID_FILENAME_CHARS,
                    char_limit: int = CHAR_LIMIT, force_lower: bool = False) -> str:
-    """
+    r"""
     Removes non-ASCII and other invalid filename characters (on any
     reasonable OS) from a filename, then trims to a maximum length.
-    Does not allow :\/, use clean_path if you want to preserve those.
+    Does not allow :\/ by default, use clean_path if you want to preserve those.
     Adapted from
     https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
@@ -319,15 +342,71 @@ def flatten_path(pathname: str, separator_chars: str = SEPARATOR_CHARS) -> str:
 #%% Platform-independent way to open files in their associated application
-import sys,subprocess
+import sys,subprocess,platform,re
+def environment_is_wsl():
+    """
+    Returns True if we're running in WSL
+    """
+    if sys.platform not in ('linux','posix'):
+        return False
+    platform_string = ' '.join(platform.uname()).lower()
+    return 'microsoft' in platform_string and 'wsl' in platform_string
+def wsl_path_to_windows_path(filename):
+    """
+    Converts a WSL path to a Windows path, or returns None if that's not possible.  E.g.
+    converts:
+    /mnt/e/a/b/c
+    ...to:
+    e:\a\b\c
+    """
+    result = subprocess.run(['wslpath', '-w', filename], text=True, capture_output=True)
+    if result.returncode != 0:
+        print('Could not convert path {} from WSL to Windows'.format(filename))
+        return None
+    return result.stdout.strip()
-def open_file(filename):
-    if sys.platform == "win32":
+def open_file(filename,attempt_to_open_in_wsl_host=False):
+    """
+    Opens [filename] in the native OS file handler.  If attempt_to_open_in_wsl_host
+    is True, and we're in WSL, attempts to open [filename] in Windows.
+    """
+    if sys.platform == 'win32':
         os.startfile(filename)
+    elif sys.platform == 'darwin':
+        opener = 'open'
+        subprocess.call([opener, filename])
+    elif attempt_to_open_in_wsl_host and environment_is_wsl():
+        windows_path = wsl_path_to_windows_path(filename)
+        # Fall back to xdg-open
+        if windows_path is None:
+            subprocess.call(['xdg-open', filename])
+        if os.path.isdir(filename):
+            subprocess.run(["explorer.exe", windows_path])
+        else:
+            os.system("cmd.exe /C start %s" % (re.escape(windows_path)))
     else:
-        opener = "open" if sys.platform == "darwin" else "xdg-open"
+        opener = 'xdg-open'
         subprocess.call([opener, filename])
 #%% File list functions
@@ -403,7 +482,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
     with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
-        for input_fn_relative in relative_filenames:
+        for input_fn_relative in tqdm(relative_filenames,disable=(not verbose)):
             input_fn_abs = os.path.join(input_folder,input_fn_relative)
             zipf.write(input_fn_abs,
                        arcname=input_fn_relative,
@@ -413,19 +492,45 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     return output_fn
-def parallel_zip_files(input_files,max_workers=16):
+def parallel_zip_files(input_files, max_workers=16, use_threads=True):
     """
     Zip one or more files to separate output files in parallel, leaving the
-    original files in place.
+    original files in place.  Each file is zipped to [filename].zip.
     """
     n_workers = min(max_workers,len(input_files))
-    pool = ThreadPool(n_workers)
+    if use_threads:
+        pool = ThreadPool(n_workers)
+    else:
+        pool = Pool(n_workers)
     with tqdm(total=len(input_files)) as pbar:
         for i,_ in enumerate(pool.imap_unordered(zip_file,input_files)):
             pbar.update()
+def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
+                         compresslevel=9, overwrite=False):
+    """
+    Zip one or more folders to separate output files in parallel, leaving the
+    original folders in place.  Each folder is zipped to [folder_name].zip.
+    """
+    n_workers = min(max_workers,len(input_folders))
+    if use_threads:
+        pool = ThreadPool(n_workers)
+    else:
+        pool = Pool(n_workers)
+    with tqdm(total=len(input_folders)) as pbar:
+        for i,_ in enumerate(pool.imap_unordered(
+                partial(zip_folder,overwrite=overwrite,compresslevel=compresslevel),
+                input_folders)):
+            pbar.update()
 def unzip_file(input_file, output_folder=None):
     """
     Unzip a zipfile to the specified output folder, defaulting to the same location as

megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.7py3-none-any.whl