PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show

megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +23 -23
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -1
megadetector/data_management/camtrap_dp_to_coco.py +45 -45
megadetector/data_management/cct_json_utils.py +101 -101
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +189 -189
megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
megadetector/data_management/databases/integrity_check_json_db.py +202 -188
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +38 -38
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +130 -124
megadetector/data_management/labelme_to_yolo.py +78 -72
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +70 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
megadetector/data_management/mewc_to_md.py +339 -340
megadetector/data_management/ocr_tools.py +258 -252
megadetector/data_management/read_exif.py +231 -224
megadetector/data_management/remap_coco_categories.py +26 -26
megadetector/data_management/remove_exif.py +31 -20
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +41 -41
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +117 -120
megadetector/data_management/yolo_to_coco.py +195 -188
megadetector/detection/change_detection.py +831 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +304 -262
megadetector/detection/run_detector.py +177 -164
megadetector/detection/run_detector_batch.py +364 -363
megadetector/detection/run_inference_with_yolov5_val.py +328 -325
megadetector/detection/run_tiled_inference.py +256 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +290 -282
megadetector/postprocessing/add_max_conf.py +15 -11
megadetector/postprocessing/categorize_detections_by_size.py +44 -44
megadetector/postprocessing/classification_postprocessing.py +415 -415
megadetector/postprocessing/combine_batch_outputs.py +20 -21
megadetector/postprocessing/compare_batch_results.py +528 -517
megadetector/postprocessing/convert_output_format.py +97 -97
megadetector/postprocessing/create_crop_folder.py +219 -146
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +23 -20
megadetector/postprocessing/md_to_coco.py +129 -98
megadetector/postprocessing/md_to_labelme.py +89 -83
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +87 -114
megadetector/postprocessing/postprocess_batch_results.py +313 -298
megadetector/postprocessing/remap_detection_categories.py +36 -36
megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +33 -33
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/azure_utils.py +22 -22
megadetector/utils/ct_utils.py +1018 -200
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +541 -518
megadetector/utils/path_utils.py +1457 -398
megadetector/utils/process_utils.py +41 -41
megadetector/utils/sas_blob_utils.py +53 -49
megadetector/utils/split_locations_into_train_val.py +61 -61
megadetector/utils/string_utils.py +147 -26
megadetector/utils/url_utils.py +463 -173
megadetector/utils/wi_utils.py +2629 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +21 -21
megadetector/visualization/render_images_with_thumbnails.py +37 -73
megadetector/visualization/visualization_utils.py +401 -397
megadetector/visualization/visualize_db.py +197 -190
megadetector/visualization/visualize_detector_output.py +79 -73
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
megadetector-5.0.29.dist-info/RECORD +163 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector-5.0.28.dist-info/RECORD +0 -209

megadetector/utils/path_utils.py CHANGED Viewed

@@ -24,6 +24,7 @@ import tarfile
 import webbrowser
 import subprocess
 import re
+import tempfile
 from zipfile import ZipFile
 from datetime import datetime
@@ -34,6 +35,7 @@ from shutil import which
 from tqdm import tqdm
 from megadetector.utils.ct_utils import is_iterable
+from megadetector.utils.ct_utils import make_test_folder
 from megadetector.utils.ct_utils import sort_dictionary_by_value
 # Should all be lower-case
@@ -47,14 +49,14 @@ CHAR_LIMIT = 255
 #%% General path functions
-def recursive_file_list(base_dir,
-                        convert_slashes=True,
-                        return_relative_paths=False,
+def recursive_file_list(base_dir,
+                        convert_slashes=True,
+                        return_relative_paths=False,
                         sort_files=True,
                         recursive=True):
     r"""
     Enumerates files (not directories) in [base_dir].
     Args:
         base_dir (str): folder to enumerate
         convert_slashes (bool, optional): force forward slashes; if this is False, will use
@@ -64,15 +66,15 @@ def recursive_file_list(base_dir,
         sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
             provided by os.walk()
         recursive (bool, optional): enumerate recursively
     Returns:
         list: list of filenames
     """
     assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
     all_files = []
     if recursive:
         for root, _, filenames in os.walk(base_dir):
             for filename in filenames:
@@ -82,29 +84,29 @@ def recursive_file_list(base_dir,
         all_files_relative = os.listdir(base_dir)
         all_files = [os.path.join(base_dir,fn) for fn in all_files_relative]
         all_files = [fn for fn in all_files if os.path.isfile(fn)]
     if return_relative_paths:
         all_files = [os.path.relpath(fn,base_dir) for fn in all_files]
     if convert_slashes:
         all_files = [fn.replace('\\', '/') for fn in all_files]
     if sort_files:
         all_files = sorted(all_files)
     return all_files
-def file_list(base_dir,
+def file_list(base_dir,
               convert_slashes=True,
-              return_relative_paths=False,
-              sort_files=True,
+              return_relative_paths=False,
+              sort_files=True,
               recursive=False):
     """
-    Trivial wrapper for recursive_file_list, which was a poor function name choice
-    at the time, since I later wanted to add non-recursive lists, but it doesn't
+    Trivial wrapper for recursive_file_list, which was a poor function name choice
+    at the time, since I later wanted to add non-recursive lists, but it doesn't
     make sense to have a "recursive" option in a function called  "recursive_file_list".
     Args:
         base_dir (str): folder to enumerate
         convert_slashes (bool, optional): force forward slashes; if this is False, will use
@@ -114,11 +116,11 @@ def file_list(base_dir,
         sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
             provided by os.walk()
         recursive (bool, optional): enumerate recursively
     Returns:
-        list: list of filenames
+        list: list of filenames
     """
     return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
                                recursive=recursive)
@@ -128,10 +130,9 @@ def folder_list(base_dir,
                 return_relative_paths=False,
                 sort_folders=True,
                 recursive=False):
     """
     Enumerates folders (not files) in [base_dir].
     Args:
         base_dir (str): folder to enumerate
         convert_slashes (bool, optional): force forward slashes; if this is False, will use
@@ -141,81 +142,81 @@ def folder_list(base_dir,
         sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
             provided by os.walk()
         recursive (bool, optional): enumerate recursively
     Returns:
         list: list of folder names
     """
     assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
     folders = []
-    if recursive:
+    if recursive:
         folders = []
         for root, dirs, _ in os.walk(base_dir):
             for d in dirs:
-                folders.append(os.path.join(root, d))
+                folders.append(os.path.join(root, d))
     else:
         folders = os.listdir(base_dir)
         folders = [os.path.join(base_dir,fn) for fn in folders]
         folders = [fn for fn in folders if os.path.isdir(fn)]
     if return_relative_paths:
         folders = [os.path.relpath(fn,base_dir) for fn in folders]
     if convert_slashes:
         folders = [fn.replace('\\', '/') for fn in folders]
     if sort_folders:
-        folders = sorted(folders)
+        folders = sorted(folders)
     return folders
 def folder_summary(folder,print_summary=True):
     """
     Returns (and optionally prints) a summary of [folder], including:
     * The total number of files
     * The total number of folders
-    * The number of files for each extension
+    * The number of files for each extension
     Args:
         folder (str): folder to summarize
         print_summary (bool, optional): whether to print the summary
     Returns:
         dict: with fields "n_files", "n_folders", and "extension_to_count"
     """
     assert os.path.isdir(folder), '{} is not a folder'.format(folder)
     folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
     files_relative = file_list(folder,return_relative_paths=True,recursive=True)
     extension_to_count = defaultdict(int)
     for fn in files_relative:
         ext = os.path.splitext(fn)[1]
         extension_to_count[ext] += 1
     extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
     if print_summary:
         for extension in extension_to_count.keys():
             print('{}: {}'.format(extension,extension_to_count[extension]))
         print('')
         print('Total files: {}'.format(len(files_relative)))
         print('Total folders: {}'.format(len(folders_relative)))
     to_return = {}
     to_return['n_files'] = len(files_relative)
     to_return['n_folders'] = len(folders_relative)
-    to_return['extension_to_count'] = extension_to_count
+    to_return['extension_to_count'] = extension_to_count
     return to_return
 def fileparts(path):
     r"""
     Breaks down a path into the directory path, filename, and extension.
@@ -223,25 +224,25 @@ def fileparts(path):
     Note that the '.' lives with the extension, and separators are removed.
     Examples:
     .. code-block:: none
-        >>> fileparts('file')
+        >>> fileparts('file')
         ('', 'file', '')
         >>> fileparts(r'c:/dir/file.jpg')
         ('c:/dir', 'file', '.jpg')
         >>> fileparts('/dir/subdir/file.jpg')
-        ('/dir/subdir', 'file', '.jpg')
+        ('/dir/subdir', 'file', '.jpg')
     Args:
         path (str): path name to separate into parts
     Returns:
-        tuple: tuple containing (p,n,e):
+        tuple: tuple containing (p,n,e):
             - p: str, directory path
             - n: str, filename without extension
             - e: str, extension including the '.'
     """
     # ntpath seems to do the right thing for both Windows and Unix paths
     p = ntpath.dirname(path)
     basename = ntpath.basename(path)
@@ -257,27 +258,27 @@ def insert_before_extension(filename, s=None, separator='.'):
     appends [s].
     Examples:
     .. code-block:: none
         >>> insert_before_extension('/dir/subdir/file.ext', 'insert')
         '/dir/subdir/file.insert.ext'
         >>> insert_before_extension('/dir/subdir/file', 'insert')
         '/dir/subdir/file.insert'
         >>> insert_before_extension('/dir/subdir/file')
         '/dir/subdir/file.2020.07.20.10.54.38'
     Args:
         filename (str): filename to manipulate
         s (str, optional): string to insert before the extension in [filename], or
             None to insert a datestamp
         separator (str, optional): separator to place between the filename base
             and the inserted string
     Returns:
         str: modified string
     """
     assert len(filename) > 0
     if s is None or len(s) == 0:
         s = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
@@ -290,9 +291,9 @@ def split_path(path):
     Splits [path] into all its constituent file/folder tokens.
     Examples:
     .. code-block:: none
         >>> split_path(r'c:\dir\subdir\file.txt')
         ['c:\\', 'dir', 'subdir', 'file.txt']
         >>> split_path('/dir/subdir/file.jpg')
@@ -301,13 +302,19 @@ def split_path(path):
         ['c:\\']
         >>> split_path('/')
         ['/']
     Args:
         path (str): path to split into tokens
     Returns:
         list: list of path tokens
     """
+    # Edge cases
+    if path == '':
+        return ''
+    if path is None:
+        return None
     parts = []
     while True:
@@ -325,32 +332,32 @@ def path_is_abs(p):
     """
     Determines whether [p] is an absolute path.  An absolute path is defined as
     one that starts with slash, backslash, or a letter followed by a colon.
     Args:
         p (str): path to evaluate
     Returns:
         bool: True if [p] is an absolute path, else False
     """
     return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
 def safe_create_link(link_exists,link_new):
     """
     Creates a symlink at [link_new] pointing to [link_exists].
     If [link_new] already exists, make sure it's a link (not a file),
     and if it has a different target than [link_exists], removes and re-creates
     it.
     Errors if [link_new] already exists but it's not a link.
     Args:
         link_exists (str): the source of the (possibly-new) symlink
         link_new (str): the target of the (possibly-new) symlink
     """
     if os.path.exists(link_new) or os.path.islink(link_new):
         assert os.path.islink(link_new)
         if not os.readlink(link_new) == link_exists:
@@ -358,35 +365,35 @@ def safe_create_link(link_exists,link_new):
             os.symlink(link_exists,link_new)
     else:
         os.symlink(link_exists,link_new)
 def remove_empty_folders(path, remove_root=False):
     """
     Recursively removes empty folders within the specified path.
     Args:
-        path (str): the folder from which we should recursively remove
+        path (str): the folder from which we should recursively remove
             empty folders.
-        remove_root (bool, optional): whether to remove the root directory if
+        remove_root (bool, optional): whether to remove the root directory if
             it's empty after removing all empty subdirectories.  This will always
             be True during recursive calls.
     Returns:
         bool: True if the directory is empty after processing, False otherwise
     """
     # Verify that [path] is a directory
     if not os.path.isdir(path):
         return False
     # Track whether the current directory is empty
     is_empty = True
     # Iterate through all items in the directory
     for item in os.listdir(path):
         item_path = os.path.join(path, item)
         # If it's a directory, process it recursively
         if os.path.isdir(item_path):
             # If the subdirectory is empty after processing, it will be removed
@@ -396,118 +403,57 @@ def remove_empty_folders(path, remove_root=False):
         else:
             # If there's a file, the directory is not empty
             is_empty = False
     # If the directory is empty and we're supposed to remove it
     if is_empty and remove_root:
         try:
-            os.rmdir(path)
+            os.rmdir(path)
         except Exception as e:
             print('Error removing directory {}: {}'.format(path,str(e)))
             is_empty = False
     return is_empty
 # ...def remove_empty_folders(...)
-def top_level_folder(p):
-    r"""
-    Gets the top-level folder from the path *p*.
-    On UNIX, this is straightforward:
-    /blah/foo
-    ...returns '/blah'
-    On Windows, we define this as the top-level folder that isn't the drive, so:
-    c:\blah\foo
-    ...returns 'c:\blah'.
-    Args:
-        p (str): filename to evaluate
-    Returns:
-        str: the top-level folder in [p], see above for details on how this is defined
-    """
-    if p == '':
-        return ''
-    # Path('/blah').parts is ('/','blah')
-    parts = split_path(p)
-    if len(parts) == 1:
-        return parts[0]
-    # Handle paths like:
-    #
-    # /, \, /stuff, c:, c:\stuff
-    drive = os.path.splitdrive(p)[0]
-    if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
-        return os.path.join(parts[0], parts[1])
-    else:
-        return parts[0]
-# ...top_level_folder()
 def path_join(*paths, convert_slashes=True):
     r"""
     Wrapper for os.path.join that optionally converts backslashes to forward slashes.
     Args:
         *paths (variable-length set of strings): Path components to be joined.
         convert_slashes (bool, optional): whether to convert \\ to /
     Returns:
         A string with the joined path components.
     """
     joined_path = os.path.join(*paths)
     if convert_slashes:
         return joined_path.replace('\\', '/')
     else:
         return joined_path
-#%% Test driver for top_level_folder
-if False:
-    #%%
-    p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
-    p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
-    p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
-    p = ''; s = top_level_folder(p); print(s); assert s == ''
-    p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
-    p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
-    p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
-    p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
-    p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
 #%% Image-related path functions
 def is_image_file(s, img_extensions=IMG_EXTENSIONS):
     """
     Checks a file's extension against a hard-coded set of image file
     extensions.  Uses case-insensitive comparison.
     Does not check whether the file exists, only determines whether the filename
     implies it's an image file.
     Args:
         s (str): filename to evaluate for image-ness
         img_extensions (list, optional): list of known image file extensions
     Returns:
         bool: True if [s] appears to be an image file, else False
     """
     ext = os.path.splitext(s)[1]
     return ext.lower() in img_extensions
@@ -516,27 +462,27 @@ def find_image_strings(strings):
     """
     Given a list of strings that are potentially image file names, looks for
     strings that actually look like image file names (based on extension).
     Args:
         strings (list): list of filenames to check for image-ness
     Returns:
         list: the subset of [strings] that appear to be image filenames
     """
     return [s for s in strings if is_image_file(s)]
-def find_images(dirname,
-                recursive=False,
-                return_relative_paths=False,
+def find_images(dirname,
+                recursive=False,
+                return_relative_paths=False,
                 convert_slashes=True):
     """
     Finds all files in a directory that look like image file names. Returns
     absolute paths unless return_relative_paths is set.  Uses the OS-native
     path separator unless convert_slashes is set, in which case will always
     use '/'.
     Args:
         dirname (str): the folder to search for images
         recursive (bool, optional): whether to search recursively
@@ -547,30 +493,30 @@ def find_images(dirname,
     Returns:
         list: list of image filenames found in [dirname]
     """
     assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
     if recursive:
         strings = glob.glob(os.path.join(dirname, '**', '*.*'), recursive=True)
     else:
         strings = glob.glob(os.path.join(dirname, '*.*'))
     image_files = find_image_strings(strings)
     if return_relative_paths:
         image_files = [os.path.relpath(fn,dirname) for fn in image_files]
     image_files = sorted(image_files)
     if convert_slashes:
         image_files = [fn.replace('\\', '/') for fn in image_files]
     return image_files
 #%% Filename cleaning functions
-def clean_filename(filename,
+def clean_filename(filename,
                    allow_list=VALID_FILENAME_CHARS,
                    char_limit=CHAR_LIMIT,
                    force_lower= False):
@@ -582,18 +528,18 @@ def clean_filename(filename,
     Adapted from
     https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
     Args:
         filename (str): filename to clean
         allow_list (str, optional): string containing all allowable filename characters
         char_limit (int, optional): maximum allowable filename length, if None will skip this
             step
         force_lower (bool, optional): convert the resulting filename to lowercase
-    returns:
-        str: cleaned version of [filename]
+    Returns:
+        str: cleaned version of [filename]
     """
     # keep only valid ascii chars
     cleaned_filename = (unicodedata.normalize('NFKD', filename)
                         .encode('ASCII', 'ignore').decode())
@@ -607,26 +553,26 @@ def clean_filename(filename,
     return cleaned_filename
-def clean_path(pathname,
+def clean_path(pathname,
                allow_list=VALID_PATH_CHARS,
                char_limit=CHAR_LIMIT,
                force_lower=False):
     """
     Removes non-ASCII and other invalid path characters (on any reasonable
     OS) from a path, then optionally trims to a maximum length.
     Args:
         pathname (str): path name to clean
         allow_list (str, optional): string containing all allowable filename characters
         char_limit (int, optional): maximum allowable filename length, if None will skip this
             step
         force_lower (bool, optional): convert the resulting filename to lowercase
-    returns:
-        str: cleaned version of [filename]
+    Returns:
+        str: cleaned version of [filename]
     """
-    return clean_filename(pathname, allow_list=allow_list,
+    return clean_filename(pathname, allow_list=allow_list,
                           char_limit=char_limit, force_lower=force_lower)
@@ -635,34 +581,34 @@ def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replace
     Removes non-ASCII and other invalid path characters (on any reasonable
     OS) from a path, then trims to a maximum length. Replaces all valid
     separators with [separator_char_replacement.]
     Args:
         pathname (str): path name to flatten
         separator_chars (str, optional): string containing all known path separators
-        separator_char_replacement (str, optional): string to insert in place of
+        separator_char_replacement (str, optional): string to insert in place of
             path separators.
     Returns:
         str: flattened version of [pathname]
     """
     s = clean_path(pathname)
     for c in separator_chars:
         s = s.replace(c, separator_char_replacement)
     return s
-def is_executable(filename):
+def is_executable(filename):
     """
     Checks whether [filename] is on the system path and marked as executable.
     Args:
         filename (str): filename to check for executable status
     Returns:
         bool: True if [filename] is on the system path and marked as executable, otherwise False
     """
     # https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
     return which(filename) is not None
@@ -673,247 +619,247 @@ def is_executable(filename):
 def environment_is_wsl():
     """
     Determines whether we're running in WSL.
     Returns:
-        True if we're running in WSL.
+        True if we're running in WSL.
     """
     if sys.platform not in ('linux','posix'):
         return False
     platform_string = ' '.join(platform.uname()).lower()
     return 'microsoft' in platform_string and 'wsl' in platform_string
 def wsl_path_to_windows_path(filename, failure_behavior='none'):
     r"""
     Converts a WSL path to a Windows path.  For example, converts:
     /mnt/e/a/b/c
     ...to:
     e:\a\b\c
     Args:
         filename (str): filename to convert
         failure_behavior (str): what to do if the path can't be processed as a WSL path.
             'none' to return None in this case, 'original' to return the original path.
     Returns:
         str: Windows equivalent to the WSL path [filename]
     """
     assert failure_behavior in ('none','original'), \
         'Unrecognized failure_behavior value {}'.format(failure_behavior)
     # Check whether the path follows the standard WSL mount pattern
     wsl_path_pattern = r'^/mnt/([a-zA-Z])(/.*)?$'
     match = re.match(wsl_path_pattern, filename)
     if match:
         # Extract the drive letter and the rest of the path
         drive_letter = match.group(1)
         path_remainder = match.group(2) if match.group(2) else ''
         # Convert forward slashes to backslashes for Windows
         path_remainder = path_remainder.replace('/', '\\')
         # Format the Windows path
         windows_path = f"{drive_letter}:{path_remainder}"
         return windows_path
     if failure_behavior == 'none':
         return None
     else:
         return filename
 # ...def wsl_path_to_windows_path(...)
 def windows_path_to_wsl_path(filename, failure_behavior='none'):
     r"""
     Converts a Windows path to a WSL path, or returns None if that's not possible.  E.g.
     converts:
     e:\a\b\c
     ...to:
     /mnt/e/a/b/c
     Args:
         filename (str): filename to convert
         failure_behavior (str): what to do if the path can't be processed as a Windows path.
             'none' to return None in this case, 'original' to return the original path.
     Returns:
         str: WSL equivalent to the Windows path [filename]
     """
     assert failure_behavior in ('none','original'), \
         'Unrecognized failure_behavior value {}'.format(failure_behavior)
     filename = filename.replace('\\', '/')
     # Check whether the path follows a Windows drive letter pattern
     windows_path_pattern = r'^([a-zA-Z]):(/.*)?$'
     match = re.match(windows_path_pattern, filename)
     if match:
         # Extract the drive letter and the rest of the path
         drive_letter = match.group(1).lower()  # Convert to lowercase for WSL
         path_remainder = match.group(2) if match.group(2) else ''
         # Format the WSL path
         wsl_path = f"/mnt/{drive_letter}{path_remainder}"
         return wsl_path
     if failure_behavior == 'none':
         return None
     else:
         return filename
 # ...def window_path_to_wsl_path(...)
 def open_file_in_chrome(filename):
     """
-    Open a file in chrome, regardless of file type.  I typically use this to open
+    Open a file in chrome, regardless of file type.  I typically use this to open
     .md files in Chrome.
     Args:
         filename (str): file to open
     Return:
         bool: whether the operation was successful
     """
     # Create URL
     abs_path = os.path.abspath(filename)
     system = platform.system()
     if system == 'Windows':
         url = f'file:///{abs_path.replace(os.sep, "/")}'
     else:  # macOS and Linux
         url = f'file://{abs_path}'
     # Determine the Chrome path
     if system == 'Windows':
         # This is a native Python module, but it only exists on Windows
         import winreg
         chrome_paths = [
             os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
             r"C:\Program Files\Google\Chrome\Application\chrome.exe",
             r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
         ]
         # Default approach: run from a typical chrome location
         for path in chrome_paths:
             if os.path.exists(path):
                 subprocess.run([path, url])
                 return True
         # Method 2: Check registry for Chrome path
         try:
-            with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
+            with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                                 r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
                 chrome_path = winreg.QueryValue(key, None)
                 if chrome_path and os.path.exists(chrome_path):
                     subprocess.run([chrome_path, url])
                     return True
-        except:
+        except Exception:
             pass
         # Method 3: Try alternate registry location
         try:
-            with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+            with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
                                r"Software\Google\Chrome\BLBeacon") as key:
                 chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
                 if os.path.exists(chrome_path):
                     subprocess.run([chrome_path, url])
                     return True
-        except:
+        except Exception:
             pass
         # Method 4: Try system path or command
         for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
             try:
                 subprocess.run([chrome_cmd, url], shell=True)
                 return True
-            except:
+            except Exception:
                 continue
         # Method 5: Use Windows URL protocol handler
         try:
             os.startfile(url)
             return True
-        except:
+        except Exception:
             pass
-        # Method 6: Use rundll32
+        # Method 6: Use rundll32
         try:
             cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
             subprocess.run(cmd, shell=True)
             return True
-        except:
+        except Exception:
             pass
     elif system == 'Darwin':
         chrome_paths = [
             '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
             os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
         ]
         for path in chrome_paths:
             if os.path.exists(path):
                 subprocess.run([path, url])
                 return True
         # Fallback to 'open' command with Chrome as the app
         try:
             subprocess.run(['open', '-a', 'Google Chrome', url])
             return True
-        except:
+        except Exception:
             pass
     elif system == 'Linux':
         chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
         for cmd in chrome_commands:
             try:
                 subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
                 return True
-            except:
+            except Exception:
                 continue
     print(f"Could not open {filename} in Chrome on {system}.")
     return False
 def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
     """
     Opens [filename] in the default OS file handler for this file type.
     If browser_name is not None, uses the webbrowser module to open the filename
     in the specified browser; see https://docs.python.org/3/library/webbrowser.html
     for supported browsers.  Falls back to the default file handler if webbrowser.open()
     fails.  In this case, attempt_to_open_in_wsl_host is ignored unless webbrowser.open() fails.
-    If browser_name is 'default', uses the system default.  This is different from the
+    If browser_name is 'default', uses the system default.  This is different from the
     parameter to webbrowser.get(), where None implies the system default.
     Args:
         filename (str): file to open
         attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts to open
             [filename] in the Windows host environment
         browser_name: see above
     """
     if browser_name is not None:
         if browser_name == 'chrome':
             browser_name = 'google-chrome'
@@ -925,32 +871,32 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
             result = False
         if result:
             return
     if sys.platform == 'win32':
         os.startfile(filename)
     elif sys.platform == 'darwin':
         opener = 'open'
         subprocess.call([opener, filename])
     elif attempt_to_open_in_wsl_host and environment_is_wsl():
         windows_path = wsl_path_to_windows_path(filename)
         # Fall back to xdg-open
         if windows_path is None:
             subprocess.call(['xdg-open', filename])
-        if os.path.isdir(filename):
+        if os.path.isdir(filename):
             subprocess.run(["explorer.exe", windows_path])
         else:
-            os.system("cmd.exe /C start %s" % (re.escape(windows_path)))
+            os.system("cmd.exe /C start {}".format(re.escape(windows_path)))
     else:
-        opener = 'xdg-open'
+        opener = 'xdg-open'
         subprocess.call([opener, filename])
 # ...def open_file(...)
@@ -962,12 +908,12 @@ def write_list_to_file(output_file,strings):
     """
     Writes a list of strings to either a JSON file or text file,
     depending on extension of the given file name.
     Args:
         output_file (str): file to write
         strings (list): list of strings to write to [output_file]
     """
     with open(output_file, 'w') as f:
         if output_file.endswith('.json'):
             json.dump(strings, f, indent=1)
@@ -978,14 +924,14 @@ def write_list_to_file(output_file,strings):
 def read_list_from_file(filename):
     """
     Reads a json-formatted list of strings from a file.
     Args:
         filename (str): .json filename to read
     Returns:
         list: list of strings read from [filename]
     """
     assert filename.endswith('.json')
     with open(filename, 'r') as f:
         file_list = json.load(f)
@@ -1001,39 +947,39 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
     """
     Internal function for copying files from within parallel_copy_files.
     """
     assert len(input_output_tuple) == 2
     source_fn = input_output_tuple[0]
     target_fn = input_output_tuple[1]
     if (not overwrite) and (os.path.isfile(target_fn)):
         if verbose:
             print('Skipping existing target file {}'.format(target_fn))
-        return
+        return
     if move:
         action_string = 'Moving'
     else:
         action_string = 'Copying'
     if verbose:
         print('{} to {}'.format(action_string,target_fn))
     os.makedirs(os.path.dirname(target_fn),exist_ok=True)
     if move:
         shutil.move(source_fn, target_fn)
     else:
         shutil.copyfile(source_fn,target_fn)
-def parallel_copy_files(input_file_to_output_file,
-                        max_workers=16,
-                        use_threads=True,
-                        overwrite=False,
+def parallel_copy_files(input_file_to_output_file,
+                        max_workers=16,
+                        use_threads=True,
+                        overwrite=False,
                         verbose=False,
                         move=False):
     """
     Copy (or move) files from source to target according to the dict input_file_to_output_file.
     Args:
         input_file_to_output_file (dict): dictionary mapping source files to the target files
             to which they should be copied
@@ -1046,24 +992,32 @@ def parallel_copy_files(input_file_to_output_file,
     """
     n_workers = min(max_workers,len(input_file_to_output_file))
     # Package the dictionary as a set of 2-tuples
     input_output_tuples = []
     for input_fn in input_file_to_output_file:
         input_output_tuples.append((input_fn,input_file_to_output_file[input_fn]))
-    if use_threads:
-        pool = ThreadPool(n_workers)
-    else:
-        pool = Pool(n_workers)
+    pool = None
-    with tqdm(total=len(input_output_tuples)) as pbar:
-        for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
-                                                         overwrite=overwrite,
-                                                         verbose=verbose,
-                                                         move=move),
-                                                 input_output_tuples)):
-            pbar.update()
+    try:
+        if use_threads:
+            pool = ThreadPool(n_workers)
+        else:
+            pool = Pool(n_workers)
+        with tqdm(total=len(input_output_tuples)) as pbar:
+            for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
+                                                            overwrite=overwrite,
+                                                            verbose=verbose,
+                                                            move=move),
+                                                    input_output_tuples)):
+                pbar.update()
+    finally:
+        pool.close()
+        pool.join()
+        if verbose:
+            print("Pool closed and joined parallel file copying")
 # ...def parallel_copy_files(...)
@@ -1074,36 +1028,36 @@ def get_file_sizes(base_dir, convert_slashes=True):
     """
     Gets sizes recursively for all files in base_dir, returning a dict mapping
     relative filenames to size.
     TODO: merge the functionality here with parallel_get_file_sizes, which uses slightly
     different semantics.
     Args:
         base_dir (str): folder within which we want all file sizes
         convert_slashes (bool, optional): force forward slashes in return strings,
             otherwise uses the native path separator
     Returns:
         dict: dictionary mapping filenames to file sizes in bytes
     """
-    relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
+    relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
                                              return_relative_paths=True)
     fn_to_size = {}
     for fn_relative in tqdm(relative_filenames):
         fn_abs = os.path.join(base_dir,fn_relative)
         fn_to_size[fn_relative] = os.path.getsize(fn_abs)
     return fn_to_size
 def _get_file_size(filename,verbose=False):
     """
     Internal function for safely getting the size of a file.  Returns a (filename,size)
     tuple, where size is None if there is an error.
     """
     try:
         size = os.path.getsize(filename)
     except Exception as e:
@@ -1112,18 +1066,18 @@ def _get_file_size(filename,verbose=False):
         size = None
     return (filename,size)
-def parallel_get_file_sizes(filenames,
-                            max_workers=16,
-                            use_threads=True,
+def parallel_get_file_sizes(filenames,
+                            max_workers=16,
+                            use_threads=True,
                             verbose=False,
-                            recursive=True,
+                            recursive=True,
                             convert_slashes=True,
                             return_relative_paths=False):
     """
     Returns a dictionary mapping every file in [filenames] to the corresponding file size,
     or None for errors.  If [filenames] is a folder, will enumerate the folder (optionally recursively).
     Args:
         filenames (list or str): list of filenames for which we should read sizes, or a folder
             within which we should read all file sizes recursively
@@ -1135,33 +1089,33 @@ def parallel_get_file_sizes(filenames,
         convert_slashes (bool, optional): convert backslashes to forward slashes
         return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
             is a folder.
     Returns:
         dict: dictionary mapping filenames to file sizes in bytes
     """
     n_workers = min(max_workers,len(filenames))
     folder_name = None
     if isinstance(filenames,str):
         folder_name = filenames
-        assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
+        assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
         if verbose:
             print('Enumerating files in {}'.format(folder_name))
         # Enumerate absolute paths here, we'll convert to relative later if requested
         filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
     else:
         assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
     if verbose:
         print('Creating worker pool')
     if use_threads:
         pool_string = 'thread'
         pool = ThreadPool(n_workers)
@@ -1172,11 +1126,11 @@ def parallel_get_file_sizes(filenames,
     if verbose:
         print('Created a {} pool of {} workers'.format(
             pool_string,n_workers))
     # This returns (filename,size) tuples
     get_size_results = list(tqdm(pool.imap(
         partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
     to_return = {}
     for r in get_size_results:
         fn = r[0]
@@ -1197,7 +1151,7 @@ def parallel_get_file_sizes(filenames,
 def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
     """
     Zips a single file.
     Args:
         input_fn (str): file to zip
         output_fn (str, optional): target zipfile; if this is None, we'll use
@@ -1205,23 +1159,23 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
         overwrite (bool, optional): whether to overwrite an existing target file
         verbose (bool, optional): enable existing debug console output
         compresslevel (int, optional): compression level to use, between 0 and 9
     Returns:
         str: the output zipfile, whether we created it or determined that it already exists
     """
     basename = os.path.basename(input_fn)
     if output_fn is None:
         output_fn = input_fn + '.zip'
     if (not overwrite) and (os.path.isfile(output_fn)):
         print('Skipping existing file {}'.format(output_fn))
         return output_fn
     if verbose:
         print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compresslevel))
     with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
         zipf.write(input_fn,arcname=basename,compresslevel=compresslevel,
                    compress_type=zipfile.ZIP_DEFLATED)
@@ -1232,9 +1186,9 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
 def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
                                  overwrite=False, verbose=False, mode='x'):
     """
-    Adds all the files in [input_files] to the tar file [output_fn].
+    Adds all the files in [input_files] to the tar file [output_fn].
     Archive names are relative to arc_name_base.
     Args:
         input_files (list): list of absolute filenames to include in the .tar file
         output_fn (str): .tar file to create
@@ -1244,11 +1198,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
         overwrite (bool, optional): whether to overwrite an existing .tar file
         verbose (bool, optional): enable additional debug console output
         mode (str, optional): compression type, can be 'x' (no compression), 'x:gz', or 'x:bz2'.
     Returns:
         str: the output tar file, whether we created it or determined that it already exists
     """
     if os.path.isfile(output_fn):
         if not overwrite:
             print('Tar file {} exists, skipping'.format(output_fn))
@@ -1256,11 +1210,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
         else:
             print('Tar file {} exists, deleting and re-creating'.format(output_fn))
             os.remove(output_fn)
     if verbose:
         print('Adding {} files to {} (mode {})'.format(
             len(input_files),output_fn,mode))
     with tarfile.open(output_fn,mode) as tarf:
         for input_fn_abs in tqdm(input_files,disable=(not verbose)):
             input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
@@ -1272,9 +1226,9 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
 def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
                                   overwrite=False, verbose=False, compresslevel=9):
     """
-    Zip all the files in [input_files] into [output_fn].  Archive names are relative to
+    Zip all the files in [input_files] into [output_fn].  Archive names are relative to
     arc_name_base.
     Args:
         input_files (list): list of absolute filenames to include in the .tar file
         output_fn (str): .tar file to create
@@ -1284,20 +1238,20 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
         overwrite (bool, optional): whether to overwrite an existing .tar file
         verbose (bool, optional): enable additional debug console output
         compresslevel (int, optional): compression level to use, between 0 and 9
     Returns:
         str: the output zipfile, whether we created it or determined that it already exists
     """
     if not overwrite:
         if os.path.isfile(output_fn):
             print('Zip file {} exists, skipping'.format(output_fn))
             return output_fn
     if verbose:
         print('Zipping {} files to {} (compression level {})'.format(
             len(input_files),output_fn,compresslevel))
     with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
         for input_fn_abs in tqdm(input_files,disable=(not verbose)):
             input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
@@ -1307,41 +1261,41 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
                        compress_type=zipfile.ZIP_DEFLATED)
     return output_fn
 def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
     """
-    Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
+    Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
     relative to [input_folder].
-    Args:
+    Args:
         input_folder (str): folder to zip
         output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
         overwrite (bool, optional): whether to overwrite an existing .tar file
         verbose (bool, optional): enable additional debug console output
-        compresslevel (int, optional): compression level to use, between 0 and 9
+        compresslevel (int, optional): compression level to use, between 0 and 9
     Returns:
-        str: the output zipfile, whether we created it or determined that it already exists
+        str: the output zipfile, whether we created it or determined that it already exists
     """
     if output_fn is None:
         output_fn = input_folder + '.zip'
     if not overwrite:
         if os.path.isfile(output_fn):
             print('Zip file {} exists, skipping'.format(output_fn))
-            return
+            return
     if verbose:
         print('Zipping {} to {} (compression level {})'.format(
             input_folder,output_fn,compresslevel))
     relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
     with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
         for input_fn_relative in tqdm(relative_filenames,disable=(not verbose)):
-            input_fn_abs = os.path.join(input_folder,input_fn_relative)
+            input_fn_abs = os.path.join(input_folder,input_fn_relative)
             zipf.write(input_fn_abs,
                        arcname=input_fn_relative,
                        compresslevel=compresslevel,
@@ -1349,17 +1303,17 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     return output_fn
-def parallel_zip_files(input_files,
-                       max_workers=16,
-                       use_threads=True,
-                       compresslevel=9,
-                       overwrite=False,
+def parallel_zip_files(input_files,
+                       max_workers=16,
+                       use_threads=True,
+                       compresslevel=9,
+                       overwrite=False,
                        verbose=False):
     """
-    Zips one or more files to separate output files in parallel, leaving the
+    Zips one or more files to separate output files in parallel, leaving the
     original files in place.  Each file is zipped to [filename].zip.
     Args:
         input_file (str): list of files to zip
         max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
@@ -1387,9 +1341,9 @@ def parallel_zip_files(input_files,
 def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
                          compresslevel=9, overwrite=False, verbose=False):
     """
-    Zips one or more folders to separate output files in parallel, leaving the
+    Zips one or more folders to separate output files in parallel, leaving the
     original folders in place.  Each folder is zipped to [folder_name].zip.
     Args:
         input_folder (list): list of folders to zip
         max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
@@ -1406,7 +1360,7 @@ def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
         pool = ThreadPool(n_workers)
     else:
         pool = Pool(n_workers)
     with tqdm(total=len(input_folders)) as pbar:
         for i,_ in enumerate(pool.imap_unordered(
                 partial(zip_folder,overwrite=overwrite,
@@ -1419,9 +1373,9 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
                             compresslevel=9,overwrite=False,required_token=None,verbose=False,
                             exclude_zip=True):
     """
-    Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing.  To
+    Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing.  To
     zip a whole folder into a single zipfile, use zip_folder().
     Args:
         folder_name (str): the folder within which we should zip files
         recursive (bool, optional): whether to recurse within [folder_name]
@@ -1432,19 +1386,19 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
         overwrite (bool, optional): whether to overwrite an existing .tar file
         required_token (str, optional): only zip files whose names contain this string
         verbose (bool, optional): enable additional debug console output
-        exclude_zip (bool, optional): skip files ending in .zip
+        exclude_zip (bool, optional): skip files ending in .zip
     """
     assert os.path.isdir(folder_name), '{} is not a folder'.format(folder_name)
     input_files = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
     if required_token is not None:
         input_files = [fn for fn in input_files if required_token in fn]
     if exclude_zip:
         input_files = [fn for fn in input_files if (not fn.endswith('.zip'))]
     parallel_zip_files(input_files=input_files,max_workers=max_workers,
                        use_threads=use_threads,compresslevel=compresslevel,
                        overwrite=overwrite,verbose=verbose)
@@ -1454,16 +1408,16 @@ def unzip_file(input_file, output_folder=None):
     """
     Unzips a zipfile to the specified output folder, defaulting to the same location as
     the input file.
     Args:
         input_file (str): zipfile to unzip
         output_folder (str, optional): folder to which we should unzip [input_file], defaults
             to unzipping to the folder where [input_file] lives
     """
     if output_folder is None:
         output_folder = os.path.dirname(input_file)
     with zipfile.ZipFile(input_file, 'r') as zf:
         zf.extractall(output_folder)
@@ -1473,31 +1427,31 @@ def unzip_file(input_file, output_folder=None):
 def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
     """
     Compute the hash of a file.
     Adapted from:
     https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
     Args:
         file_path (str): the file to hash
         algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
     Returns:
         str: the hash value for this file
     """
     try:
         hash_func = hashlib.new(algorithm)
         with open(file_path, 'rb') as file:
             while chunk := file.read(8192):  # Read the file in chunks of 8192 bytes
                 hash_func.update(chunk)
         return str(hash_func.hexdigest())
     except Exception:
         if allow_failures:
             return None
         else:
@@ -1507,14 +1461,14 @@ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
 def parallel_compute_file_hashes(filenames,
-                               max_workers=16,
-                               use_threads=True,
+                               max_workers=16,
+                               use_threads=True,
                                recursive=True,
                                algorithm='sha256',
                                verbose=False):
     """
     Compute file hashes for a list or folder of images.
     Args:
         filenames (list or str): a list of filenames or a folder
         max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
@@ -1524,8 +1478,8 @@ def parallel_compute_file_hashes(filenames,
         algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
         recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
             Ignored if [filenames] is a list.
-        verbose (bool, optional): enable additional debug output
+        verbose (bool, optional): enable additional debug output
     Returns:
         dict: a dict mapping filenames to hash values; values will be None for files that fail
         to load.
@@ -1535,35 +1489,1140 @@ def parallel_compute_file_hashes(filenames,
         if verbose:
             print('Enumerating files in {}'.format(filenames))
         filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
     n_workers = min(max_workers,len(filenames))
     if verbose:
         print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
     if n_workers <= 1:
         results = []
         for filename in filenames:
             results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
     else:
         if use_threads:
             pool = ThreadPool(n_workers)
         else:
             pool = Pool(n_workers)
         results = list(tqdm(pool.imap(
             partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
             filenames), total=len(filenames)))
     assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
     to_return = {}
     for i_file,filename in enumerate(filenames):
         to_return[filename] = results[i_file]
     return to_return
 # ...def parallel_compute_file_hashes(...)
+#%% Tests
+class TestPathUtils:
+    """
+    Tests for path_utils.py
+    """
+    def set_up(self):
+        """
+        Create a temporary directory for testing.
+        """
+        self.test_dir = make_test_folder(subfolder='megadetector/path_utils_tests')
+        os.makedirs(self.test_dir, exist_ok=True)
+    def tear_down(self):
+        """
+        Remove the temporary directory after tests.
+        """
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    def test_is_image_file(self):
+        """
+        Test the is_image_file function.
+        """
+        assert is_image_file('test.jpg')
+        assert is_image_file('test.jpeg')
+        assert is_image_file('test.png')
+        assert is_image_file('test.gif')
+        assert is_image_file('test.bmp')
+        assert is_image_file('test.tiff')
+        assert is_image_file('test.TIF')
+        assert not is_image_file('test.txt')
+        assert not is_image_file('test.doc')
+        assert is_image_file('path/to/image.JPG')
+        assert not is_image_file('image')
+        assert is_image_file('test.custom', img_extensions=['.custom'])
+        assert not is_image_file('test.jpg', img_extensions=['.custom'])
+    def test_find_image_strings(self):
+        """
+        Test the find_image_strings function.
+        """
+        strings = ['a.jpg', 'b.txt', 'c.PNG', 'd.gif', 'e.jpeg', 'f.doc']
+        expected = ['a.jpg', 'c.PNG', 'd.gif', 'e.jpeg']
+        assert sorted(find_image_strings(strings)) == sorted(expected)
+        assert find_image_strings([]) == []
+        assert find_image_strings(['no_image.txt', 'another.doc']) == []
+    def test_find_images(self):
+        """
+        Test the find_images function.
+        """
+        # Create some dummy files
+        img1_abs = os.path.join(self.test_dir, 'img1.jpg')
+        img2_abs = os.path.join(self.test_dir, 'img2.PNG')
+        txt1_abs = os.path.join(self.test_dir, 'text1.txt')
+        open(img1_abs, 'w').close()
+        open(img2_abs, 'w').close()
+        open(txt1_abs, 'w').close()
+        subdir = os.path.join(self.test_dir, 'subdir')
+        os.makedirs(subdir, exist_ok=True)
+        img3_abs = os.path.join(subdir, 'img3.jpeg')
+        txt2_abs = os.path.join(subdir, 'text2.txt')
+        open(img3_abs, 'w').close()
+        open(txt2_abs, 'w').close()
+        # Test non-recursive
+        expected_non_recursive_abs = sorted([img1_abs.replace('\\', '/'), img2_abs.replace('\\', '/')])
+        found_non_recursive_abs = find_images(self.test_dir, recursive=False, return_relative_paths=False)
+        assert sorted(found_non_recursive_abs) == expected_non_recursive_abs
+        # Test non-recursive, relative paths
+        expected_non_recursive_rel = sorted(['img1.jpg', 'img2.PNG'])
+        found_non_recursive_rel = find_images(self.test_dir, recursive=False, return_relative_paths=True)
+        assert sorted(found_non_recursive_rel) == expected_non_recursive_rel
+        # Test recursive
+        expected_recursive_abs = sorted([
+            img1_abs.replace('\\', '/'),
+            img2_abs.replace('\\', '/'),
+            img3_abs.replace('\\', '/')
+        ])
+        found_recursive_abs = find_images(self.test_dir, recursive=True, return_relative_paths=False)
+        assert sorted(found_recursive_abs) == expected_recursive_abs
+        # Test recursive, relative paths
+        expected_recursive_rel = sorted([
+            'img1.jpg',
+            'img2.PNG',
+            os.path.join('subdir', 'img3.jpeg').replace('\\', '/')
+        ])
+        found_recursive_rel = find_images(self.test_dir, recursive=True, return_relative_paths=True)
+        assert sorted(found_recursive_rel) == expected_recursive_rel
+        # Test with an empty directory
+        empty_dir = os.path.join(self.test_dir, 'empty_dir')
+        os.makedirs(empty_dir, exist_ok=True)
+        assert find_images(empty_dir, recursive=True) == []
+        # Test with a directory that doesn't exist (should assert)
+        try:
+            find_images(os.path.join(self.test_dir, 'non_existent_dir'))
+            raise AssertionError("AssertionError not raised for non_existent_dir")
+        except AssertionError:
+            pass
+    def test_recursive_file_list_and_file_list(self):
+        """
+        Test the recursive_file_list and file_list functions.
+        """
+        # Setup directory structure
+        # test_dir/
+        #   file1.txt
+        #   file2.jpg
+        #   subdir1/
+        #     file3.txt
+        #     subsubdir/
+        #       file4.png
+        #   subdir2/
+        #     file5.doc
+        list_dir = os.path.join(self.test_dir,'recursive_list')
+        f1 = os.path.join(list_dir, 'file1.txt')
+        f2 = os.path.join(list_dir, 'file2.jpg')
+        subdir1 = os.path.join(list_dir, 'subdir1')
+        os.makedirs(subdir1, exist_ok=True)
+        f3 = os.path.join(subdir1, 'file3.txt')
+        subsubdir = os.path.join(subdir1, 'subsubdir')
+        os.makedirs(subsubdir, exist_ok=True)
+        f4 = os.path.join(subsubdir, 'file4.png')
+        subdir2 = os.path.join(list_dir, 'subdir2')
+        os.makedirs(subdir2, exist_ok=True)
+        f5 = os.path.join(subdir2, 'file5.doc')
+        for filepath in [f1, f2, f3, f4, f5]:
+            with open(filepath, 'w') as f:
+                f.write('test')
+        # Test recursive_file_list (recursive=True by default)
+        expected_all_files_abs = sorted([
+            f1.replace('\\', '/'), f2.replace('\\', '/'), f3.replace('\\', '/'),
+            f4.replace('\\', '/'), f5.replace('\\', '/')
+        ])
+        all_files_abs = recursive_file_list(list_dir, convert_slashes=True,
+                                            return_relative_paths=False)
+        assert sorted(all_files_abs) == expected_all_files_abs
+        # Test recursive_file_list with relative paths
+        expected_all_files_rel = sorted([
+            'file1.txt', 'file2.jpg',
+            os.path.join('subdir1', 'file3.txt').replace('\\', '/'),
+            os.path.join('subdir1', 'subsubdir', 'file4.png').replace('\\', '/'),
+            os.path.join('subdir2', 'file5.doc').replace('\\', '/')
+        ])
+        all_files_rel = recursive_file_list(list_dir, convert_slashes=True,
+                                            return_relative_paths=True)
+        assert sorted(all_files_rel) == expected_all_files_rel
+        # Test file_list (non-recursive by default via wrapper)
+        expected_top_level_files_abs = sorted([f1.replace('\\', '/'), f2.replace('\\', '/')])
+        top_level_files_abs = file_list(list_dir, convert_slashes=True,
+                                        return_relative_paths=False, recursive=False)
+        assert sorted(top_level_files_abs) == expected_top_level_files_abs
+        # Test file_list (recursive explicitly) - should be same as recursive_file_list
+        recursive_via_file_list = file_list(list_dir, convert_slashes=True,
+                                            return_relative_paths=False, recursive=True)
+        assert sorted(recursive_via_file_list) == expected_all_files_abs
+        # Test with convert_slashes=False (use os.sep)
+        #
+        # Note: This test might be tricky if os.sep is '/', as no replacement happens. We'll check
+        # that backslashes remain on Windows.
+        if os.sep == '\\':
+            f1_raw = os.path.join(list_dir, 'file1.txt')
+            # Only one file for simplicity
+            files_no_slash_conversion = file_list(list_dir, convert_slashes=False, recursive=False)
+            assert any(f1_raw in s for s in files_no_slash_conversion)
+        # Test with an empty directory
+        empty_dir = os.path.join(list_dir, "empty_dir_for_files")
+        os.makedirs(empty_dir, exist_ok=True)
+        assert recursive_file_list(empty_dir) == []
+        assert file_list(empty_dir, recursive=False) == []
+        # Test with a non-existent directory
+        try:
+            recursive_file_list(os.path.join(list_dir, "non_existent_dir"))
+            raise AssertionError("AssertionError not raised for non_existent_dir in recursive_file_list")
+        except AssertionError:
+            pass
+    def test_folder_list(self):
+        """
+        Test the folder_list function.
+        """
+        # Setup directory structure
+        # test_dir/
+        #   subdir1/
+        #     subsubdir1/
+        #   subdir2/
+        #   file1.txt (should be ignored)
+        folder_list_dir = os.path.join(self.test_dir,'folder_list')
+        subdir1 = os.path.join(folder_list_dir, 'subdir1')
+        subsubdir1 = os.path.join(subdir1, 'subsubdir1')
+        subdir2 = os.path.join(folder_list_dir, 'subdir2')
+        os.makedirs(subdir1, exist_ok=True)
+        os.makedirs(subsubdir1, exist_ok=True)
+        os.makedirs(subdir2, exist_ok=True)
+        with open(os.path.join(folder_list_dir, 'file1.txt'), 'w') as f:
+            f.write('test')
+        # Test non-recursive
+        expected_folders_non_recursive_abs = sorted([
+            subdir1.replace('\\', '/'), subdir2.replace('\\', '/')
+        ])
+        folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
+                                                return_relative_paths=False)
+        assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
+        # Test non-recursive, relative paths
+        expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
+        folders_non_recursive_rel = folder_list(folder_list_dir, recursive=False,
+                                                return_relative_paths=True)
+        assert sorted(folders_non_recursive_rel) == expected_folders_non_recursive_rel
+        # Test recursive
+        expected_folders_recursive_abs = sorted([
+            subdir1.replace('\\', '/'),
+            subsubdir1.replace('\\', '/'),
+            subdir2.replace('\\', '/')
+        ])
+        folders_recursive_abs = folder_list(folder_list_dir, recursive=True,
+                                            return_relative_paths=False)
+        assert sorted(folders_recursive_abs) == expected_folders_recursive_abs
+        # Test recursive, relative paths
+        expected_folders_recursive_rel = sorted([
+            'subdir1',
+            os.path.join('subdir1', 'subsubdir1').replace('\\', '/'),
+            'subdir2'
+        ])
+        folders_recursive_rel = folder_list(folder_list_dir, recursive=True,
+                                            return_relative_paths=True)
+        assert sorted(folders_recursive_rel) == expected_folders_recursive_rel
+        # Test with an empty directory (except for the file)
+        empty_dir_for_folders = os.path.join(folder_list_dir, "empty_for_folders")
+        os.makedirs(empty_dir_for_folders, exist_ok=True)
+        with open(os.path.join(empty_dir_for_folders, 'temp.txt'), 'w') as f: f.write('t')
+        assert folder_list(empty_dir_for_folders, recursive=True) == []
+        assert folder_list(empty_dir_for_folders, recursive=False) == []
+        # Test with a non-existent directory
+        try:
+            folder_list(os.path.join(self.test_dir, "non_existent_dir"))
+            raise AssertionError("AssertionError not raised for non_existent_dir in folder_list")
+        except AssertionError:
+            pass
+    def test_folder_summary(self):
+        """
+        Test the folder_summary function.
+        """
+        # test_dir/
+        #   file1.txt
+        #   img1.jpg
+        #   subdir/
+        #     file2.txt
+        #     img2.png
+        #     img3.png
+        fodler_summary_dir = os.path.join(self.test_dir,'folder_summary')
+        f1 = os.path.join(fodler_summary_dir, 'file1.txt')
+        img1 = os.path.join(fodler_summary_dir, 'img1.jpg')
+        subdir = os.path.join(fodler_summary_dir, 'subdir')
+        os.makedirs(subdir, exist_ok=True)
+        f2 = os.path.join(subdir, 'file2.txt')
+        img2 = os.path.join(subdir, 'img2.png')
+        img3 = os.path.join(subdir, 'img3.png')
+        for filepath in [f1, img1, f2, img2, img3]:
+            with open(filepath, 'w') as f:
+                f.write('test')
+        summary = folder_summary(fodler_summary_dir, print_summary=False)
+        assert summary['n_files'] == 5
+        assert summary['n_folders'] == 1 # 'subdir'
+        assert summary['extension_to_count']['.txt'] == 2
+        assert summary['extension_to_count']['.jpg'] == 1
+        assert summary['extension_to_count']['.png'] == 2
+        # Check order (sorted by value, desc)
+		#
+        # The specific order of keys with the same counts can vary based on file system list
+		# order.  We'll check that the counts are correct and the number of unique extensions is
+		# right.
+        assert len(summary['extension_to_count']) == 3
+        empty_dir = os.path.join(fodler_summary_dir, "empty_summary_dir")
+        os.makedirs(empty_dir, exist_ok=True)
+        empty_summary = folder_summary(empty_dir, print_summary=False)
+        assert empty_summary['n_files'] == 0
+        assert empty_summary['n_folders'] == 0
+        assert empty_summary['extension_to_count'] == {}
+    def test_fileparts(self):
+        """
+        Test the fileparts function.
+        """
+        assert fileparts('file') == ('', 'file', '')
+        assert fileparts('file.txt') == ('', 'file', '.txt')
+        assert fileparts(r'c:/dir/file.jpg') == ('c:/dir', 'file', '.jpg')
+        assert fileparts('/dir/subdir/file.jpg') == ('/dir/subdir', 'file', '.jpg')
+        assert fileparts(r'c:\dir\file') == (r'c:\dir', 'file', '')
+        assert fileparts(r'c:\dir\file.tar.gz') == (r'c:\dir', 'file.tar', '.gz')
+        assert fileparts('.bashrc') == ('', '.bashrc', '') # Hidden file, no extension
+        assert fileparts('nodir/.bashrc') == ('nodir', '.bashrc', '')
+        assert fileparts('a/b/c.d.e') == ('a/b', 'c.d', '.e')
+    def test_insert_before_extension(self):
+        """
+        Test the insert_before_extension function.
+        """
+        assert insert_before_extension('file.ext', 'inserted') == 'file.inserted.ext'
+        assert insert_before_extension('file', 'inserted') == 'file.inserted'
+        assert insert_before_extension('path/to/file.ext', 'tag') == 'path/to/file.tag.ext'
+        assert insert_before_extension('path/to/file', 'tag') == 'path/to/file.tag'
+        assert insert_before_extension('file.tar.gz', 'new') == 'file.tar.new.gz'
+        # Test with custom separator
+        assert insert_before_extension('file.ext', 'inserted', separator='_') == 'file_inserted.ext'
+        # Test with s=None (timestamp) - check format roughly
+        fname_with_ts = insert_before_extension('file.ext', None)
+        parts = fname_with_ts.split('.')
+        # file.YYYY.MM.DD.HH.MM.SS.ext
+        assert len(parts) >= 8 # file, Y, M, D, H, M, S, ext
+        assert parts[0] == 'file'
+        assert parts[-1] == 'ext'
+        assert all(p.isdigit() for p in parts[1:-1])
+        fname_no_ext_ts = insert_before_extension('file', '') # s is empty string, should also use timestamp
+        parts_no_ext = fname_no_ext_ts.split('.')
+        assert len(parts_no_ext) >= 7 # file, Y, M, D, H, M, S
+        assert parts_no_ext[0] == 'file'
+        assert all(p.isdigit() for p in parts_no_ext[1:])
+    def test_split_path(self):
+        """
+        Test the split_path function.
+        """
+        if os.name == 'nt':
+            assert split_path(r'c:\dir\subdir\file.txt') == ['c:\\', 'dir', 'subdir', 'file.txt']
+            assert split_path('c:\\') == ['c:\\']
+            # Test with mixed slashes, ntpath.split handles them
+            assert split_path(r'c:/dir/subdir/file.txt') == ['c:/', 'dir', 'subdir', 'file.txt']
+        else: # POSIX
+            assert split_path('/dir/subdir/file.jpg') == ['/', 'dir', 'subdir', 'file.jpg']
+            assert split_path('/') == ['/']
+        assert split_path('dir/file.txt') == ['dir', 'file.txt']
+        assert split_path('file.txt') == ['file.txt']
+        assert split_path('') == ''
+        assert split_path('.') == ['.']
+        assert split_path('..') == ['..']
+        assert split_path('../a/b') == ['..', 'a', 'b']
+    def test_path_is_abs(self):
+        """
+        Test the path_is_abs function.
+        """
+        assert path_is_abs('/absolute/path')
+        assert path_is_abs('c:/absolute/path')
+        assert path_is_abs('C:\\absolute\\path')
+        assert path_is_abs('\\\\server\\share\\path') # UNC path
+        assert path_is_abs('c:file_without_slash_after_drive')
+        assert not path_is_abs('relative/path')
+        assert not path_is_abs('file.txt')
+        assert not path_is_abs('../relative')
+        assert not path_is_abs('')
+    def test_safe_create_link_unix(self):
+        """
+        Test the safe_create_link function on Unix-like systems.
+        """
+        if os.name == 'nt':
+            # print("Skipping test_safe_create_link_unix on Windows.")
+            return
+        source_file_path = os.path.join(self.test_dir, 'source.txt')
+        link_path = os.path.join(self.test_dir, 'link.txt')
+        other_source_path = os.path.join(self.test_dir, 'other_source.txt')
+        with open(source_file_path, 'w') as f:
+            f.write('source data')
+        with open(other_source_path, 'w') as f:
+            f.write('other data')
+        # Create new link
+        safe_create_link(source_file_path, link_path)
+        assert os.path.islink(link_path)
+        assert os.readlink(link_path) == source_file_path
+        # Link already exists and points to the correct source
+        safe_create_link(source_file_path, link_path) # Should do nothing
+        assert os.path.islink(link_path)
+        assert os.readlink(link_path) == source_file_path
+        # Link already exists but points to a different source
+        safe_create_link(other_source_path, link_path) # Should remove and re-create
+        assert os.path.islink(link_path)
+        assert os.readlink(link_path) == other_source_path
+        # Link_new path exists and is a file (not a link)
+        file_path_conflict = os.path.join(self.test_dir, 'conflict_file.txt')
+        with open(file_path_conflict, 'w') as f:
+            f.write('actual file')
+        try:
+            safe_create_link(source_file_path, file_path_conflict)
+            raise AssertionError("AssertionError not raised for file conflict")
+        except AssertionError:
+            pass
+        os.remove(file_path_conflict)
+        # Link_new path exists and is a directory
+        dir_path_conflict = os.path.join(self.test_dir, 'conflict_dir')
+        os.makedirs(dir_path_conflict, exist_ok=True)
+        try:
+            safe_create_link(source_file_path, dir_path_conflict)
+            raise AssertionError("AssertionError not raised for directory conflict")
+        except AssertionError: # islink will be false
+            pass
+        shutil.rmtree(dir_path_conflict)
+    def test_remove_empty_folders(self):
+        """
+        Test the remove_empty_folders function.
+        """
+        # test_dir/
+        #   empty_top/
+        #     empty_mid/
+        #       empty_leaf/
+        #   mixed_top/
+        #     empty_mid_in_mixed/
+        #       empty_leaf_in_mixed/
+        #     non_empty_mid/
+        #       file.txt
+        #   non_empty_top/
+        #     file_in_top.txt
+        empty_top = os.path.join(self.test_dir, 'empty_top')
+        empty_mid = os.path.join(empty_top, 'empty_mid')
+        empty_leaf = os.path.join(empty_mid, 'empty_leaf')
+        os.makedirs(empty_leaf, exist_ok=True)
+        mixed_top = os.path.join(self.test_dir, 'mixed_top')
+        empty_mid_in_mixed = os.path.join(mixed_top, 'empty_mid_in_mixed')
+        empty_leaf_in_mixed = os.path.join(empty_mid_in_mixed, 'empty_leaf_in_mixed')
+        os.makedirs(empty_leaf_in_mixed, exist_ok=True)
+        non_empty_mid = os.path.join(mixed_top, 'non_empty_mid')
+        os.makedirs(non_empty_mid, exist_ok=True)
+        with open(os.path.join(non_empty_mid, 'file.txt'), 'w') as f:
+            f.write('data')
+        non_empty_top = os.path.join(self.test_dir, 'non_empty_top')
+        os.makedirs(non_empty_top, exist_ok=True)
+        with open(os.path.join(non_empty_top, 'file_in_top.txt'), 'w') as f:
+            f.write('data')
+        # Process empty_top - should remove all three
+        remove_empty_folders(empty_top, remove_root=True)
+        assert not os.path.exists(empty_top)
+        assert not os.path.exists(empty_mid)
+        assert not os.path.exists(empty_leaf)
+        # Process mixed_top; should remove empty_leaf_in_mixed and empty_mid_in_mixed
+        # but not mixed_top or non_empty_mid.
+        remove_empty_folders(mixed_top, remove_root=True)
+        assert os.path.exists(mixed_top) # mixed_top itself should remain
+        assert not os.path.exists(empty_mid_in_mixed)
+        assert not os.path.exists(empty_leaf_in_mixed)
+        assert os.path.exists(non_empty_mid)
+        assert os.path.exists(os.path.join(non_empty_mid, 'file.txt'))
+        # Process non_empty_top; should remove nothing.
+        remove_empty_folders(non_empty_top, remove_root=True)
+        assert os.path.exists(non_empty_top)
+        assert os.path.exists(os.path.join(non_empty_top, 'file_in_top.txt'))
+        # Test with a file path (should do nothing and return False)
+        file_path_for_removal = os.path.join(self.test_dir, 'a_file.txt')
+        with open(file_path_for_removal, 'w') as f: f.write('t')
+        assert not remove_empty_folders(file_path_for_removal, remove_root=True)
+        assert os.path.exists(file_path_for_removal)
+        # Test with remove_root=False for the top level
+        another_empty_top = os.path.join(self.test_dir, 'another_empty_top')
+        another_empty_mid = os.path.join(another_empty_top, 'another_empty_mid')
+        os.makedirs(another_empty_mid)
+        remove_empty_folders(another_empty_top, remove_root=False)
+        assert os.path.exists(another_empty_top) # Root not removed
+        assert not os.path.exists(another_empty_mid) # Mid removed
+    def test_path_join(self):
+        """
+        Test the path_join function.
+        """
+        assert path_join('a', 'b', 'c') == 'a/b/c'
+        assert path_join('a/b', 'c', 'd.txt') == 'a/b/c/d.txt'
+        if os.name == 'nt':
+            # On Windows, os.path.join uses '\', so convert_slashes=True should change it
+            assert path_join('a', 'b', convert_slashes=True) == 'a/b'
+            assert path_join('a', 'b', convert_slashes=False) == 'a\\b'
+            assert path_join('c:\\', 'foo', 'bar', convert_slashes=True) == 'c:/foo/bar'
+            assert path_join('c:\\', 'foo', 'bar', convert_slashes=False) == 'c:\\foo\\bar'
+        else:
+            # On POSIX, os.path.join uses '/', so convert_slashes=False should still be '/'
+            assert path_join('a', 'b', convert_slashes=False) == 'a/b'
+        assert path_join('a', '', 'b') == 'a/b' # os.path.join behavior
+        assert path_join('/a', 'b') == '/a/b'
+        assert path_join('a', '/b') == '/b' # '/b' is absolute
+    def test_filename_cleaning(self):
+        """
+        Test clean_filename, clean_path, and flatten_path functions.
+        """
+        # clean_filename
+        assert clean_filename("test file.txt") == "test file.txt"
+        assert clean_filename("test*file?.txt", char_limit=10) == "testfile.t"
+        assert clean_filename("TestFile.TXT", force_lower=True) == "testfile.txt"
+        assert clean_filename("file:with<illegal>chars.txt") == "filewithillegalchars.txt"
+        assert clean_filename(" accented_name_éà.txt") == " accented_name_ea.txt"
+        # Separators are not allowed by default in clean_filename
+        assert clean_filename("path/to/file.txt") == "pathtofile.txt"
+        # clean_path
+        assert clean_path("path/to/file.txt") == "path/to/file.txt" # slashes allowed
+        assert clean_path("path\\to\\file.txt") == "path\\to\\file.txt" # backslashes allowed
+        assert clean_path("path:to:file.txt") == "path:to:file.txt" # colons allowed
+        assert clean_path("path/to<illegal>/file.txt") == "path/toillegal/file.txt"
+        # flatten_path
+        assert flatten_path("path/to/file.txt") == "path~to~file.txt"
+        assert flatten_path("path:to:file.txt", separator_char_replacement='_') == "path_to_file.txt"
+        assert flatten_path("path\\to/file:name.txt") == "path~to~file~name.txt"
+        assert flatten_path("path/to<illegal>/file.txt") == "path~toillegal~file.txt"
+    def test_is_executable(self):
+        """
+        Test the is_executable function.
+        This is a basic test; comprehensive testing is environment-dependent.
+        """
+        # Hard to test reliably across all systems without knowing what's on PATH.
+        if os.name == 'nt':
+            assert is_executable('cmd.exe')
+            assert not is_executable('non_existent_executable_blah_blah')
+        else:
+            assert is_executable('ls')
+            assert is_executable('sh')
+            assert not is_executable('non_existent_executable_blah_blah')
+    def test_write_read_list_to_file(self):
+        """
+        Test write_list_to_file and read_list_from_file functions.
+        """
+        test_list = ["item1", "item2 with space", "item3/with/slash"]
+        # Test with .json
+        json_file_path = os.path.join(self.test_dir, "test_list.json")
+        write_list_to_file(json_file_path, test_list)
+        read_list_json = read_list_from_file(json_file_path)
+        assert test_list == read_list_json
+        # Test with .txt
+        txt_file_path = os.path.join(self.test_dir, "test_list.txt")
+        write_list_to_file(txt_file_path, test_list)
+        # read_list_from_file is specifically for JSON, so we read .txt manually
+        with open(txt_file_path, 'r') as f:
+            read_list_txt = [line.strip() for line in f.readlines()]
+        assert test_list == read_list_txt
+        # Test reading non-existent json
+        try:
+            read_list_from_file(os.path.join(self.test_dir,"non_existent.json"))
+            raise AssertionError("FileNotFoundError not raised")
+        except FileNotFoundError:
+            pass
+        # Test reading a non-json file with read_list_from_file (should fail parsing)
+        non_json_path = os.path.join(self.test_dir, "not_a_list.json")
+        with open(non_json_path, 'w') as f: f.write("this is not json")
+        try:
+             read_list_from_file(non_json_path)
+             raise AssertionError("json.JSONDecodeError not raised")
+        except json.JSONDecodeError:
+             pass
+    def test_parallel_copy_files(self):
+        """
+        Test the parallel_copy_files function (with max_workers=1 for test simplicity).
+        """
+        source_dir = os.path.join(self.test_dir, "copy_source")
+        target_dir = os.path.join(self.test_dir, "copy_target")
+        os.makedirs(source_dir, exist_ok=True)
+        file_mappings = {}
+        source_files_content = {}
+        for i in range(3):
+            src_fn = f"file{i}.txt"
+            src_path = os.path.join(source_dir, src_fn)
+            if i == 0:
+                tgt_fn = f"copied_file{i}.txt"
+                tgt_path = os.path.join(target_dir, tgt_fn)
+            else:
+                tgt_fn = f"copied_file{i}_subdir.txt"
+                tgt_path = os.path.join(target_dir, f"sub{i}", tgt_fn)
+            content = f"content of file {i}"
+            with open(src_path, 'w') as f:
+                f.write(content)
+            file_mappings[src_path] = tgt_path
+            source_files_content[tgt_path] = content
+        # Test copy
+        parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
+        for tgt_path, expected_content in source_files_content.items():
+            assert os.path.exists(tgt_path)
+            with open(tgt_path, 'r') as f:
+                assert f.read() == expected_content
+        existing_target_path = list(source_files_content.keys())[0]
+        with open(existing_target_path, 'w') as f:
+            f.write("old content")
+        parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
+        with open(existing_target_path, 'r') as f:
+            assert f.read() == "old content"
+        parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=True)
+        with open(existing_target_path, 'r') as f:
+            assert f.read() == source_files_content[existing_target_path]
+        for src_path_orig, tgt_path_orig in file_mappings.items(): # Re-create source for move
+            with open(src_path_orig, 'w') as f:
+                f.write(source_files_content[tgt_path_orig])
+        parallel_copy_files(file_mappings, max_workers=1, use_threads=True, move=True, overwrite=True)
+        for src_path, tgt_path in file_mappings.items():
+            assert not os.path.exists(src_path)
+            assert os.path.exists(tgt_path)
+            with open(tgt_path, 'r') as f:
+                assert f.read() == source_files_content[tgt_path]
+    def test_get_file_sizes(self):
+        """
+        Test get_file_sizes and parallel_get_file_sizes functions.
+        """
+        file_sizes_test_dir = os.path.join(self.test_dir,'file_sizes')
+        os.makedirs(file_sizes_test_dir,exist_ok=True)
+        f1_path = os.path.join(file_sizes_test_dir, 'file1.txt')
+        content1 = "0123456789" # 10 bytes
+        with open(f1_path, 'w') as f:
+            f.write(content1)
+        subdir_path = os.path.join(file_sizes_test_dir, 'subdir')
+        os.makedirs(subdir_path, exist_ok=True)
+        f2_path = os.path.join(subdir_path, 'file2.txt')
+        content2 = "01234567890123456789" # 20 bytes
+        with open(f2_path, 'w') as f:
+            f.write(content2)
+        sizes_relative = get_file_sizes(file_sizes_test_dir)
+        expected_sizes_relative = {
+            'file1.txt': len(content1),
+            os.path.join('subdir', 'file2.txt').replace('\\','/'): len(content2)
+        }
+        assert sizes_relative == expected_sizes_relative
+        file_list_abs = [f1_path, f2_path]
+        sizes_parallel_abs = parallel_get_file_sizes(file_list_abs, max_workers=1)
+        expected_sizes_parallel_abs = {
+            f1_path.replace('\\','/'): len(content1),
+            f2_path.replace('\\','/'): len(content2)
+        }
+        assert sizes_parallel_abs == expected_sizes_parallel_abs
+        sizes_parallel_folder_abs = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=False)
+        assert sizes_parallel_folder_abs == expected_sizes_parallel_abs
+        sizes_parallel_folder_rel = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=True)
+        assert sizes_parallel_folder_rel == expected_sizes_relative
+        non_existent_file = os.path.join(file_sizes_test_dir, "no_such_file.txt")
+        sizes_with_error = parallel_get_file_sizes([f1_path, non_existent_file], max_workers=1)
+        expected_with_error = {
+            f1_path.replace('\\','/'): len(content1),
+            non_existent_file.replace('\\','/'): None
+        }
+        assert sizes_with_error == expected_with_error
+    def test_zip_file_and_unzip_file(self):
+        """
+        Test zip_file and unzip_file functions.
+        """
+        file_to_zip_name = "test_zip_me.txt"
+        file_to_zip_path = os.path.join(self.test_dir, file_to_zip_name)
+        content = "This is the content to be zipped."
+        with open(file_to_zip_path, 'w') as f:
+            f.write(content)
+        default_zip_output_path = file_to_zip_path + ".zip"
+        returned_zip_path = zip_file(file_to_zip_path)
+        assert returned_zip_path == default_zip_output_path
+        assert os.path.exists(default_zip_output_path)
+        unzip_dir_default = os.path.join(self.test_dir, "unzip_default")
+        os.makedirs(unzip_dir_default, exist_ok=True)
+        unzip_file(default_zip_output_path, unzip_dir_default)
+        unzipped_file_path_default = os.path.join(unzip_dir_default, file_to_zip_name)
+        assert os.path.exists(unzipped_file_path_default)
+        with open(unzipped_file_path_default, 'r') as f:
+            assert f.read() == content
+        custom_zip_output_name = "custom_archive.zip"
+        custom_zip_output_path = os.path.join(self.test_dir, custom_zip_output_name)
+        zip_file(file_to_zip_path, output_fn=custom_zip_output_path, overwrite=True)
+        assert os.path.exists(custom_zip_output_path)
+        zip_in_subdir_path = os.path.join(self.test_dir, "subdir_zip", "my.zip")
+        file_in_subdir_name = "file_for_subdir_zip.txt"
+        file_in_subdir_path = os.path.join(self.test_dir,"subdir_zip", file_in_subdir_name)
+        os.makedirs(os.path.dirname(zip_in_subdir_path), exist_ok=True)
+        with open(file_in_subdir_path, "w") as f: f.write("sub dir content")
+        zip_file(file_in_subdir_path, output_fn=zip_in_subdir_path)
+        unzip_file(zip_in_subdir_path, output_folder=None)
+        unzipped_in_same_dir_path = os.path.join(os.path.dirname(zip_in_subdir_path), file_in_subdir_name)
+        assert os.path.exists(unzipped_in_same_dir_path)
+        with open(unzipped_in_same_dir_path, 'r') as f:
+            assert f.read() == "sub dir content"
+    def test_zip_folder(self):
+        """
+        Test the zip_folder function.
+        """
+        folder_to_zip = os.path.join(self.test_dir, "folder_to_zip")
+        os.makedirs(folder_to_zip, exist_ok=True)
+        file1_name = "file1.txt"; path1 = os.path.join(folder_to_zip, file1_name)
+        file2_name = "file2.log"; path2 = os.path.join(folder_to_zip, file2_name)
+        subdir_name = "sub"; subdir_path = os.path.join(folder_to_zip, subdir_name)
+        os.makedirs(subdir_path, exist_ok=True)
+        file3_name = "file3.dat"; path3 = os.path.join(subdir_path, file3_name)
+        content1 = "content1"; content2 = "content2"; content3 = "content3"
+        with open(path1, 'w') as f: f.write(content1)
+        with open(path2, 'w') as f: f.write(content2)
+        with open(path3, 'w') as f: f.write(content3)
+        default_zip_path = folder_to_zip + ".zip"
+        zip_folder(folder_to_zip, output_fn=None, overwrite=True)
+        assert os.path.exists(default_zip_path)
+        unzip_output_dir = os.path.join(self.test_dir, "unzipped_folder_content")
+        os.makedirs(unzip_output_dir, exist_ok=True)
+        unzip_file(default_zip_path, unzip_output_dir)
+        assert os.path.exists(os.path.join(unzip_output_dir, file1_name))
+        assert os.path.exists(os.path.join(unzip_output_dir, file2_name))
+        assert os.path.exists(os.path.join(unzip_output_dir, subdir_name, file3_name))
+        with open(os.path.join(unzip_output_dir, file1_name), 'r')as f: assert f.read() == content1
+        with open(os.path.join(unzip_output_dir, file2_name), 'r')as f: assert f.read() == content2
+        with open(os.path.join(unzip_output_dir, subdir_name, file3_name), 'r')as f: assert f.read() == content3
+        mtime_before = os.path.getmtime(default_zip_path)
+        zip_folder(folder_to_zip, output_fn=None, overwrite=False)
+        mtime_after = os.path.getmtime(default_zip_path)
+        assert mtime_before == mtime_after
+    def test_zip_files_into_single_zipfile(self):
+        """
+        Test zip_files_into_single_zipfile.
+        """
+        file1_path = os.path.join(self.test_dir, "zfs_file1.txt")
+        content1 = "content for zfs1"
+        with open(file1_path, 'w') as f: f.write(content1)
+        subdir_for_zfs = os.path.join(self.test_dir, "zfs_subdir")
+        os.makedirs(subdir_for_zfs, exist_ok=True)
+        file2_path = os.path.join(subdir_for_zfs, "zfs_file2.log")
+        content2 = "content for zfs2"
+        with open(file2_path, 'w') as f: f.write(content2)
+        input_files = [file1_path, file2_path]
+        output_zip_path = os.path.join(self.test_dir, "multi_file_archive.zip")
+        zip_files_into_single_zipfile(input_files, output_zip_path, arc_name_base=self.test_dir, overwrite=True)
+        assert os.path.exists(output_zip_path)
+        unzip_dir = os.path.join(self.test_dir, "unzip_multi_file")
+        os.makedirs(unzip_dir, exist_ok=True)
+        unzip_file(output_zip_path, unzip_dir)
+        expected_unzipped_file1 = os.path.join(unzip_dir, os.path.relpath(file1_path, self.test_dir))
+        expected_unzipped_file2 = os.path.join(unzip_dir, os.path.relpath(file2_path, self.test_dir))
+        assert os.path.exists(expected_unzipped_file1)
+        with open(expected_unzipped_file1, 'r') as f: assert f.read() == content1
+        assert os.path.exists(expected_unzipped_file2)
+        assert os.path.basename(expected_unzipped_file2) == "zfs_file2.log"
+        assert os.path.basename(os.path.dirname(expected_unzipped_file2)) == "zfs_subdir"
+        with open(expected_unzipped_file2, 'r') as f: assert f.read() == content2
+    def test_add_files_to_single_tar_file(self):
+        """
+        Test add_files_to_single_tar_file.
+        """
+        file1_path = os.path.join(self.test_dir, "tar_file1.txt")
+        content1 = "content for tar1"
+        with open(file1_path, 'w') as f: f.write(content1)
+        subdir_for_tar = os.path.join(self.test_dir, "tar_subdir")
+        os.makedirs(subdir_for_tar, exist_ok=True)
+        file2_path = os.path.join(subdir_for_tar, "tar_file2.log")
+        content2 = "content for tar2"
+        with open(file2_path, 'w') as f: f.write(content2)
+        input_files = [file1_path, file2_path]
+        output_tar_path = os.path.join(self.test_dir, "archive.tar.gz")
+        add_files_to_single_tar_file(input_files, output_tar_path, arc_name_base=self.test_dir,
+                                     overwrite=True, mode='x:gz')
+        assert os.path.exists(output_tar_path)
+        un_tar_dir = os.path.join(self.test_dir, "un_tar_contents")
+        os.makedirs(un_tar_dir, exist_ok=True)
+        with tarfile.open(output_tar_path, 'r:gz') as tf:
+            tf.extractall(path=un_tar_dir)
+        expected_untarred_file1 = os.path.join(un_tar_dir, os.path.relpath(file1_path, self.test_dir))
+        expected_untarred_file2 = os.path.join(un_tar_dir, os.path.relpath(file2_path, self.test_dir))
+        assert os.path.exists(expected_untarred_file1)
+        with open(expected_untarred_file1, 'r') as f: assert f.read() == content1
+        assert os.path.exists(expected_untarred_file2)
+        with open(expected_untarred_file2, 'r') as f: assert f.read() == content2
+    def test_parallel_zip_individual_files_and_folders(self):
+        """
+        Test parallel_zip_files, parallel_zip_folders, and zip_each_file_in_folder.
+        """
+        file1_to_zip = os.path.join(self.test_dir, "pz_file1.txt")
+        file2_to_zip = os.path.join(self.test_dir, "pz_file2.txt")
+        with open(file1_to_zip, 'w') as f: f.write("pz_content1")
+        with open(file2_to_zip, 'w') as f: f.write("pz_content2")
+        parallel_zip_files([file1_to_zip, file2_to_zip], max_workers=1, overwrite=True)
+        assert os.path.exists(file1_to_zip + ".zip")
+        assert os.path.exists(file2_to_zip + ".zip")
+        unzip_dir_pz = os.path.join(self.test_dir, "unzip_pz")
+        unzip_file(file1_to_zip + ".zip", unzip_dir_pz)
+        assert os.path.exists(os.path.join(unzip_dir_pz, os.path.basename(file1_to_zip)))
+        folder1_to_zip = os.path.join(self.test_dir, "pz_folder1")
+        os.makedirs(folder1_to_zip, exist_ok=True)
+        with open(os.path.join(folder1_to_zip, "pf1.txt"), 'w') as f: f.write("pf1_content")
+        folder2_to_zip = os.path.join(self.test_dir, "pz_folder2")
+        os.makedirs(folder2_to_zip, exist_ok=True)
+        with open(os.path.join(folder2_to_zip, "pf2.txt"), 'w') as f: f.write("pf2_content")
+        parallel_zip_folders([folder1_to_zip, folder2_to_zip], max_workers=1, overwrite=True)
+        assert os.path.exists(folder1_to_zip + ".zip")
+        assert os.path.exists(folder2_to_zip + ".zip")
+        unzip_dir_pzf = os.path.join(self.test_dir, "unzip_pzf")
+        unzip_file(folder1_to_zip + ".zip", unzip_dir_pzf)
+        assert os.path.exists(os.path.join(unzip_dir_pzf, "pf1.txt"))
+        zef_folder = os.path.join(self.test_dir, "zef_test_folder")
+        os.makedirs(zef_folder, exist_ok=True)
+        zef_file1 = os.path.join(zef_folder, "zef1.txt")
+        zef_file2_png = os.path.join(zef_folder, "zef2.png")
+        zef_file3_zip = os.path.join(zef_folder, "zef3.zip")
+        zef_subdir = os.path.join(zef_folder, "zef_sub")
+        os.makedirs(zef_subdir, exist_ok=True)
+        zef_file_in_sub = os.path.join(zef_subdir, "zef_subfile.txt")
+        for p_path in [zef_file1, zef_file2_png, zef_file3_zip, zef_file_in_sub]:
+            with open(p_path, 'w') as f: f.write(f"content of {os.path.basename(p_path)}")
+        zip_each_file_in_folder(zef_folder, recursive=False, max_workers=1, overwrite=True)
+        assert os.path.exists(zef_file1 + ".zip")
+        assert os.path.exists(zef_file2_png + ".zip")
+        assert not os.path.exists(zef_file3_zip + ".zip")
+        assert not os.path.exists(zef_file_in_sub + ".zip")
+        if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
+        if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
+        zip_each_file_in_folder(zef_folder, recursive=True, max_workers=1, overwrite=True)
+        assert os.path.exists(zef_file1 + ".zip")
+        assert os.path.exists(zef_file2_png + ".zip")
+        assert not os.path.exists(zef_file3_zip + ".zip")
+        assert os.path.exists(zef_file_in_sub + ".zip")
+        if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
+        if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
+        if os.path.exists(zef_file_in_sub + ".zip"): os.remove(zef_file_in_sub + ".zip")
+        zip_each_file_in_folder(zef_folder, recursive=True, required_token="zef1", max_workers=1, overwrite=True)
+        assert os.path.exists(zef_file1 + ".zip")
+        assert not os.path.exists(zef_file2_png + ".zip")
+        assert not os.path.exists(zef_file_in_sub + ".zip")
+        if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
+        dummy_to_zip = os.path.join(zef_folder,"dummy.txt")
+        with open(dummy_to_zip,'w') as f: f.write('d')
+        zip_each_file_in_folder(zef_folder, recursive=False, exclude_zip=False, max_workers=1, overwrite=True)
+        assert os.path.exists(dummy_to_zip + ".zip")
+        assert os.path.exists(zef_file3_zip + ".zip")
+        if os.path.exists(dummy_to_zip + ".zip"): os.remove(dummy_to_zip + ".zip")
+        if os.path.exists(zef_file3_zip + ".zip"): os.remove(zef_file3_zip + ".zip")
+    def test_compute_file_hash(self):
+        """
+        Test compute_file_hash and parallel_compute_file_hashes.
+        """
+        file1_name = "hash_me1.txt"
+        file1_path = os.path.join(self.test_dir, file1_name)
+        content1 = "This is a test string for hashing."
+        with open(file1_path, 'w') as f:
+            f.write(content1)
+        file2_name = "hash_me2.txt"
+        file2_path = os.path.join(self.test_dir, file2_name)
+        with open(file2_path, 'w') as f:
+            f.write(content1)
+        file3_name = "hash_me3.txt"
+        file3_path = os.path.join(self.test_dir, file3_name)
+        content3 = "This is a different test string for hashing."
+        with open(file3_path, 'w') as f:
+            f.write(content3)
+        expected_hash_content1_sha256 = \
+            "c56f19d76df6a09e49fe0d9ce7b1bc7f1dbd582f668742bede65c54c47d5bcf4".lower()
+        expected_hash_content3_sha256 = \
+            "23013ff7e93264317f7b2fc0e9a217649f2dc0b11ca7e0bd49632424b70b6680".lower()
+        hash1 = compute_file_hash(file1_path)
+        hash2 = compute_file_hash(file2_path)
+        hash3 = compute_file_hash(file3_path)
+        assert hash1 == expected_hash_content1_sha256
+        assert hash2 == expected_hash_content1_sha256
+        assert hash1 != hash3
+        assert hash3 == expected_hash_content3_sha256
+        expected_hash_content1_md5 = "94b971f1f8cdb23c2af82af73160d4b0".lower()
+        hash1_md5 = compute_file_hash(file1_path, algorithm='md5')
+        assert hash1_md5 == expected_hash_content1_md5
+        non_existent_path = os.path.join(self.test_dir, "no_such_file.txt")
+        assert compute_file_hash(non_existent_path, allow_failures=True) is None
+        try:
+             compute_file_hash(non_existent_path, allow_failures=False)
+             raise AssertionError("FileNotFoundError not raised for compute_file_hash")
+        except FileNotFoundError:
+             pass
+        files_to_hash = [file1_path, file3_path, non_existent_path]
+        hashes_parallel = parallel_compute_file_hashes(files_to_hash, max_workers=1)
+        norm_f1 = file1_path.replace('\\','/')
+        norm_f3 = file3_path.replace('\\','/')
+        norm_non = non_existent_path.replace('\\','/')
+        expected_parallel_hashes = {
+            norm_f1: expected_hash_content1_sha256,
+            norm_f3: expected_hash_content3_sha256,
+            norm_non: None
+        }
+        hashes_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_parallel.items()}
+        assert hashes_parallel_norm == expected_parallel_hashes
+        hash_folder = os.path.join(self.test_dir, "hash_test_folder")
+        os.makedirs(hash_folder, exist_ok=True)
+        h_f1_name = "h_f1.txt"; h_f1_path = os.path.join(hash_folder, h_f1_name)
+        h_f2_name = "h_f2.txt"; h_f2_path = os.path.join(hash_folder, h_f2_name)
+        with open(h_f1_path, 'w') as f: f.write(content1)
+        with open(h_f2_path, 'w') as f: f.write(content3)
+        hashes_folder_parallel = parallel_compute_file_hashes(hash_folder, recursive=False, max_workers=1)
+        norm_hf1 = h_f1_path.replace('\\','/')
+        norm_hf2 = h_f2_path.replace('\\','/')
+        expected_folder_hashes = {
+            norm_hf1: expected_hash_content1_sha256,
+            norm_hf2: expected_hash_content3_sha256
+        }
+        hashes_folder_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_folder_parallel.items()}
+        assert hashes_folder_parallel_norm == expected_folder_hashes
+def test_path_utils():
+    """
+    Runs all tests in the TestPathUtils class.
+    """
+    test_instance = TestPathUtils()
+    test_instance.set_up()
+    try:
+        test_instance.test_is_image_file()
+        test_instance.test_find_image_strings()
+        test_instance.test_find_images()
+        test_instance.test_recursive_file_list_and_file_list()
+        test_instance.test_folder_list()
+        test_instance.test_folder_summary()
+        test_instance.test_fileparts()
+        test_instance.test_insert_before_extension()
+        test_instance.test_split_path()
+        test_instance.test_path_is_abs()
+        test_instance.test_safe_create_link_unix()
+        test_instance.test_remove_empty_folders()
+        test_instance.test_path_join()
+        test_instance.test_filename_cleaning()
+        test_instance.test_is_executable()
+        test_instance.test_write_read_list_to_file()
+        test_instance.test_parallel_copy_files()
+        test_instance.test_get_file_sizes()
+        test_instance.test_zip_file_and_unzip_file()
+        test_instance.test_zip_folder()
+        test_instance.test_zip_files_into_single_zipfile()
+        test_instance.test_add_files_to_single_tar_file()
+        test_instance.test_parallel_zip_individual_files_and_folders()
+        test_instance.test_compute_file_hash()
+    finally:
+        test_instance.tear_down()
+# from IPython import embed; embed()
+# test_path_utils()

megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl