PyPI - megadetector - Versions diffs - 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl - Mend

megadetector 10.0.9py3-none-any.whl → 10.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (84) hide show

megadetector/utils/ct_utils.py CHANGED Viewed

@@ -241,7 +241,10 @@ def write_json(path,
     elif force_str:
         default_handler = str
-    os.makedirs(os.path.dirname(path), exist_ok=True)
+    # Create the parent directory if necessary
+    parent_dir = os.path.dirname(path)
+    if len(parent_dir) > 0:
+        os.makedirs(parent_dir, exist_ok=True)
     with open(path, 'w', newline='\n', encoding=encoding) as f:
         json.dump(content, f, indent=indent, default=default_handler, ensure_ascii=ensure_ascii)
@@ -562,7 +565,7 @@ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
         reverse (bool, optional): whether to sort in reverse (descending) order
     Returns:
-        dict: sorted copy of [d
+        dict: sorted copy of [d]
     """
     if sort_values is None:
@@ -1022,8 +1025,10 @@ def parse_bool_string(s, strict=False):
     s = str(s).lower().strip()
     if strict:
-        false_strings = ('false')
-        true_strings = ('true')
+        # Fun fact: ('false') (rather than ('false,')) creates a string,
+        # not a tuple.
+        false_strings = ('false',)
+        true_strings = ('true',)
     else:
         false_strings = ('no', 'false', 'f', 'n', '0')
         true_strings = ('yes', 'true', 't', 'y', '1')

megadetector/utils/directory_listing.py CHANGED Viewed

@@ -129,6 +129,9 @@ def create_html_index(dir,
         recursive (bool, optional): recurse into subfolders
     """
+    if template_fun is None:
+        template_fun = _create_plain_index
     print('Traversing {}'.format(dir))
     # Make sure we remove the trailing /

megadetector/utils/extract_frames_from_video.py CHANGED Viewed

@@ -55,6 +55,10 @@ class FrameExtractionOptions:
         #: must be a folder when this is specified.
         self.detector_output_file = None
+    # ...def __init__(...)
+# ...class FrameExtractionOptions
 #%% Core functions

megadetector/utils/gpu_test.py CHANGED Viewed

@@ -34,7 +34,7 @@ def torch_test():
     except Exception as e: #noqa
         print('PyTorch unavailable, not running PyTorch tests.  PyTorch import error was:\n{}'.format(
             str(e)))
-        return
+        return 0
     print('Torch version: {}'.format(str(torch.__version__)))
     print('CUDA available (according to PyTorch): {}'.format(torch.cuda.is_available()))
@@ -71,17 +71,17 @@ def tf_test():
     Print diagnostic information about TF/CUDA status.
     Returns:
-        int: The number of CUDA devices reported by PyTorch.
+        int: The number of CUDA devices reported by TensorFlow.
     """
     try:
-        import tensorflow as tf
+        import tensorflow as tf # type: ignore
     except Exception as e: #noqa
         print('TensorFlow unavailable, not running TF tests.  TF import error was:\n{}'.format(
             str(e)))
-        return
+        return 0
-    from tensorflow.python.platform import build_info as build
+    from tensorflow.python.platform import build_info as build # type: ignore
     print(f"TF version: {tf.__version__}")
     if 'cuda_version' not in build.build_info:
@@ -94,7 +94,7 @@ def tf_test():
         print(f"CuDNN build version reported by TensorFlow: {build.build_info['cudnn_version']}")
     try:
-        from tensorflow.python.compiler.tensorrt import trt_convert as trt
+        from tensorflow.python.compiler.tensorrt import trt_convert as trt # type: ignore
         print("Linked TensorRT version: {}".format(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version()))
     except Exception:
         print('Could not probe TensorRT version')

megadetector/utils/md_tests.py CHANGED Viewed

@@ -386,7 +386,7 @@ def output_files_are_identical(fn1,fn2,verbose=False):
     fn2_results['images'] = \
          sorted(fn2_results['images'], key=lambda d: d['file'])
-    if len(fn1_results['images']) != len(fn1_results['images']):
+    if len(fn1_results['images']) != len(fn2_results['images']):
         if verbose:
             print('{} images in {}, {} images in {}'.format(
                 len(fn1_results['images']),fn1,
@@ -1249,8 +1249,8 @@ def run_cli_tests(options):
         cmd_results = execute_and_print(cmd)
         assert output_files_are_identical(fn1=inference_output_file,
-                                        fn2=inference_output_file_queue,
-                                        verbose=True)
+                                          fn2=inference_output_file_queue,
+                                          verbose=True)
         ## Run again with the image queue and worker-side preprocessing enabled
@@ -1265,24 +1265,24 @@ def run_cli_tests(options):
         cmd_results = execute_and_print(cmd)
         assert output_files_are_identical(fn1=inference_output_file,
-                                        fn2=inference_output_file_preprocess_queue,
-                                        verbose=True)
+                                          fn2=inference_output_file_preprocess_queue,
+                                          verbose=True)
-        ## Run again with the image queue and worker-side preprocessing
+        ## Run again with the image queue but no worker-side preprocessing
-        print('\n** Running MD on a folder (with image queue and preprocessing) (CLI) **\n')
+        print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
-        cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue'
-        inference_output_file_preprocess_queue = \
-            insert_before_extension(inference_output_file,'preprocess_queue')
-        cmd = cmd.replace(inference_output_file,inference_output_file_preprocess_queue)
+        cmd = base_cmd + ' --use_image_queue'
+        inference_output_file_no_preprocess_queue = \
+            insert_before_extension(inference_output_file,'no_preprocess_queue')
+        cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
         cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
         cmd_results = execute_and_print(cmd)
         assert output_files_are_identical(fn1=inference_output_file,
-                                        fn2=inference_output_file_preprocess_queue,
-                                        verbose=True)
+                                          fn2=inference_output_file_no_preprocess_queue,
+                                          verbose=True)
         ## Run again with the worker-side preprocessing and an alternative batch size
@@ -1316,8 +1316,8 @@ def run_cli_tests(options):
         cmd_results = execute_and_print(cmd)
         assert output_files_are_identical(fn1=inference_output_file,
-                                        fn2=inference_output_file_checkpoint,
-                                        verbose=True)
+                                          fn2=inference_output_file_checkpoint,
+                                          verbose=True)
         ## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
@@ -1331,8 +1331,8 @@ def run_cli_tests(options):
         cmd_results = execute_and_print(cmd)
         assert not output_files_are_identical(fn1=inference_output_file,
-                                            fn2=inference_output_file_modern,
-                                            verbose=True)
+                                              fn2=inference_output_file_modern,
+                                              verbose=True)
         ## Run again with "modern" postprocessing and worker-side preprocessing,
@@ -1348,13 +1348,13 @@ def run_cli_tests(options):
         # This should not be the same as the "classic" results
         assert not output_files_are_identical(fn1=inference_output_file,
-                                            fn2=inference_output_file_modern_worker_preprocessing,
-                                            verbose=True)
+                                              fn2=inference_output_file_modern_worker_preprocessing,
+                                              verbose=True)
         # ...but it should be the same as the single-threaded "modern" results
         assert output_files_are_identical(fn1=inference_output_file_modern,
-                                        fn2=inference_output_file_modern_worker_preprocessing,
-                                        verbose=True)
+                                          fn2=inference_output_file_modern_worker_preprocessing,
+                                          verbose=True)
         if not options.skip_cpu_tests:

megadetector/utils/path_utils.py CHANGED Viewed

@@ -152,7 +152,6 @@ def folder_list(base_dir,
     folders = []
     if recursive:
-        folders = []
         for root, dirs, _ in os.walk(base_dir):
             for d in dirs:
                 folders.append(os.path.join(root, d))
@@ -370,7 +369,9 @@ def safe_create_link(link_exists,link_new):
             os.remove(link_new)
             os.symlink(link_exists,link_new)
     else:
-        os.makedirs(os.path.dirname(link_new),exist_ok=True)
+        link_new_dir = os.path.dirname(link_new)
+        if len(link_new_dir) > 0:
+            os.makedirs(link_new_dir,exist_ok=True)
         os.symlink(link_exists,link_new)
  # ...def safe_create_link(...)
@@ -988,7 +989,9 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
     if verbose:
         print('{} to {}'.format(action_string,target_fn))
-    os.makedirs(os.path.dirname(target_fn),exist_ok=True)
+    target_dir = os.path.dirname(target_fn)
+    if len(target_dir) > 0:
+        os.makedirs(target_dir,exist_ok=True)
     if move:
         shutil.move(source_fn, target_fn)
     else:
@@ -1038,14 +1041,91 @@ def parallel_copy_files(input_file_to_output_file,
                                                     input_output_tuples)):
                 pbar.update()
     finally:
-        pool.close()
-        pool.join()
-        if verbose:
-            print("Pool closed and joined parallel file copying")
+        if pool is not None:
+            pool.close()
+            pool.join()
+            if verbose:
+                print("Pool closed and joined parallel file copying")
 # ...def parallel_copy_files(...)
+#%% File deletion functions
+def delete_file(input_file, verbose=False):
+    """
+    Deletes a single file.
+    Args:
+        input_file (str): file to delete
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        bool: True if file was deleted successfully, False otherwise
+    """
+    try:
+        if verbose:
+            print('Deleting file {}'.format(input_file))
+        if os.path.isfile(input_file):
+            os.remove(input_file)
+            return True
+        else:
+            if verbose:
+                print('File {} does not exist'.format(input_file))
+            return False
+    except Exception as e:
+        if verbose:
+            print('Error deleting file {}: {}'.format(input_file, str(e)))
+        return False
+# ...def delete_file(...)
+def parallel_delete_files(input_files,
+                          max_workers=16,
+                          use_threads=True,
+                          verbose=False):
+    """
+    Deletes one or more files in parallel.
+    Args:
+        input_files (list): list of files to delete
+        max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
+        use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
+            max_workers <= 1
+        verbose (bool, optional): enable additional debug console output
+    """
+    if len(input_files) == 0:
+        return
+    n_workers = min(max_workers, len(input_files))
+    pool = None
+    try:
+        if use_threads:
+            pool = ThreadPool(n_workers)
+        else:
+            pool = Pool(n_workers)
+        with tqdm(total=len(input_files)) as pbar:
+            for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
+                                                    input_files)):
+                pbar.update()
+    finally:
+        if pool is not None:
+            pool.close()
+            pool.join()
+            if verbose:
+                print('Pool closed and joined for file deletion')
+# ...def parallel_delete_files(...)
 #%% File size functions
 def get_file_sizes(base_dir, convert_slashes=True):
@@ -1118,8 +1198,6 @@ def parallel_get_file_sizes(filenames,
         dict: dictionary mapping filenames to file sizes in bytes
     """
-    n_workers = min(max_workers,len(filenames))
     folder_name = None
     if isinstance(filenames,str):
@@ -1137,23 +1215,37 @@ def parallel_get_file_sizes(filenames,
         assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
+    n_workers = min(max_workers,len(filenames))
     if verbose:
         print('Creating worker pool')
-    if use_threads:
-        pool_string = 'thread'
-        pool = ThreadPool(n_workers)
-    else:
-        pool_string = 'process'
-        pool = Pool(n_workers)
+    pool = None
-    if verbose:
-        print('Created a {} pool of {} workers'.format(
-            pool_string,n_workers))
+    try:
+        if use_threads:
+            pool_string = 'thread'
+            pool = ThreadPool(n_workers)
+        else:
+            pool_string = 'process'
+            pool = Pool(n_workers)
+        if verbose:
+            print('Created a {} pool of {} workers'.format(
+                pool_string,n_workers))
-    # This returns (filename,size) tuples
-    get_size_results = list(tqdm(pool.imap(
-        partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
+        # This returns (filename,size) tuples
+        get_size_results = list(tqdm(pool.imap(
+            partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
+    finally:
+        if pool is not None:
+            pool.close()
+            pool.join()
+            if verbose:
+                print('Pool closed and join for file size collection')
     to_return = {}
     for r in get_size_results:
@@ -1208,6 +1300,8 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_
     return output_fn
+# ...def zip_file(...)
 def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
                                  overwrite=False, verbose=False, mode='x'):
@@ -1248,6 +1342,8 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
     return output_fn
+# ...def add_files_to_single_tar_file(...)
 def zip_files_into_single_zipfile(input_files,
                                   output_fn,
@@ -1292,6 +1388,8 @@ def zip_files_into_single_zipfile(input_files,
     return output_fn
+# ...def zip_files_into_single_zipfile(...)
 def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
     """
@@ -1315,7 +1413,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     if not overwrite:
         if os.path.isfile(output_fn):
             print('Zip file {} exists, skipping'.format(output_fn))
-            return
+            return output_fn
     if verbose:
         print('Zipping {} to {} (compression level {})'.format(
@@ -1333,6 +1431,8 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     return output_fn
+# ...def zip_folder(...)
 def parallel_zip_files(input_files,
                        max_workers=16,
@@ -1361,11 +1461,22 @@ def parallel_zip_files(input_files,
     else:
         pool = Pool(n_workers)
-    with tqdm(total=len(input_files)) as pbar:
-        for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
-          output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
-          input_files)):
-            pbar.update()
+    try:
+        with tqdm(total=len(input_files)) as pbar:
+            for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
+            output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
+            input_files)):
+                pbar.update()
+    finally:
+        pool.close()
+        pool.join()
+        if verbose:
+            print('Pool closed and joined for parallel zipping')
+# ...def parallel_zip_files(...)
 def parallel_zip_folders(input_folders,
@@ -1395,12 +1506,23 @@ def parallel_zip_folders(input_folders,
     else:
         pool = Pool(n_workers)
-    with tqdm(total=len(input_folders)) as pbar:
-        for i,_ in enumerate(pool.imap_unordered(
-                partial(zip_folder,overwrite=overwrite,
-                        compress_level=compress_level,verbose=verbose),
-                input_folders)):
-            pbar.update()
+    try:
+        with tqdm(total=len(input_folders)) as pbar:
+            for i,_ in enumerate(pool.imap_unordered(
+                    partial(zip_folder,overwrite=overwrite,
+                            compress_level=compress_level,verbose=verbose),
+                    input_folders)):
+                pbar.update()
+    finally:
+        pool.close()
+        pool.join()
+        if verbose:
+            print('Pool closed and joined for parallel folder zipping')
+# ...def parallel_zip_folders(...)
 def zip_each_file_in_folder(folder_name,
@@ -1443,6 +1565,8 @@ def zip_each_file_in_folder(folder_name,
                        use_threads=use_threads,compress_level=compress_level,
                        overwrite=overwrite,verbose=verbose)
+# ...def zip_each_file_in_folder(...)
 def unzip_file(input_file, output_folder=None):
     """
@@ -1550,9 +1674,20 @@ def parallel_compute_file_hashes(filenames,
         else:
             pool = Pool(n_workers)
-        results = list(tqdm(pool.imap(
-            partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
-            filenames), total=len(filenames)))
+        try:
+            results = list(tqdm(pool.imap(
+                partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
+                filenames), total=len(filenames)))
+        finally:
+            pool.close()
+            pool.join()
+            if verbose:
+                print('Pool closed and joined for parallel zipping')
+    # ...if we are/aren't parallelizing
     assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'

megadetector/utils/split_locations_into_train_val.py CHANGED Viewed

@@ -221,14 +221,10 @@ def split_locations_into_train_val(location_to_category_counts,
     weighted_average_error,weighted_category_errors,category_to_val_fraction = \
         compute_seed_errors(min_error_seed)
-    random_seed = min_error_seed
-    category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
     category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
                                                         sort_values=category_id_to_count,
                                                         reverse=True)
     print('Val fractions by category:\n')
     for category in category_to_val_fraction:

megadetector/utils/string_utils.py CHANGED Viewed

@@ -34,6 +34,27 @@ def is_float(s):
     return True
+def is_int(s):
+    """
+    Checks whether [s] is an object (typically a string) that can be cast to a int
+    Args:
+        s (object): object to evaluate
+    Returns:
+        bool: True if s successfully casts to a int, otherwise False
+    """
+    if s is None:
+        return False
+    try:
+        _ = int(s)
+    except ValueError:
+        return False
+    return True
 def human_readable_to_bytes(size):
     """
     Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),

megadetector/utils/url_utils.py CHANGED Viewed

@@ -132,7 +132,8 @@ def download_url(url,
         if verbose:
             print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
         target_dir = os.path.dirname(destination_filename)
-        os.makedirs(target_dir,exist_ok=True)
+        if len(target_dir) > 0:
+            os.makedirs(target_dir,exist_ok=True)
         urllib.request.urlretrieve(url, destination_filename, progress_updater)
         assert(os.path.isfile(destination_filename))
         n_bytes = os.path.getsize(destination_filename)
@@ -800,8 +801,9 @@ class TestUrlUtils:
 def _test_url_utils():
     """
     Runs all tests in the TestUrlUtils class.  I generally disable this during testing
-    because it creates irritating nondeterminism, and this is neither a core module nor
-    a module that changes often.
+    because it creates irritating nondeterminism (because it depends on downloading
+    stuff from the Internet), and this is neither a core module nor a module that changes
+    often.
     """
     test_instance = TestUrlUtils()

megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.9py3-none-any.whl → 10.0.11py3-none-any.whl