PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/utils/process_utils.py CHANGED Viewed

@@ -18,33 +18,33 @@ import subprocess
 def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
     """
     Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
     The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
     "verbose" only impacts output about process management, it is not related to printing
     output from the child process.
     Args:
         cmd (str): command to run
         encoding (str, optional): stdout encoding, see Popen() documentation
         errors (str, optional): error handling, see Popen() documentation
         env (dict, optional): environment variables, see Popen() documentation
         verbose (bool, optional): enable additional debug console output
     Returns:
-        int: the command's return code, always zero, otherwise a CalledProcessError is raised
+        int: the command's return code, always zero, otherwise a CalledProcessError is raised
     """
     os.environ["PYTHONUNBUFFERED"] = "1"
-    if verbose:
+    if verbose:
         if encoding is not None:
             print('Launching child process with non-default encoding {}'.format(encoding))
         if errors is not None:
             print('Launching child process with non-default text error handling {}'.format(errors))
         if env is not None:
             print('Launching child process with non-default environment {}'.format(str(env)))
     # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
     popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                              shell=True, universal_newlines=True, encoding=encoding,
@@ -55,7 +55,7 @@ def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
     return_code = popen.wait()
     if return_code:
         raise subprocess.CalledProcessError(return_code, cmd)
     return return_code
@@ -70,15 +70,15 @@ def execute_and_print(cmd,
     """
     Run [cmd] (a single string) in a shell, capturing and printing output.  Returns
     a dictionary with fields "status" and "output".
     The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
     "verbose" only impacts output about process management, it is not related to printing
     output from the child process.
     Args:
         cmd (str): command to run
-        print_output (bool, optional): whether to print output from [cmd] (stdout is
+        print_output (bool, optional): whether to print output from [cmd] (stdout is
             captured regardless of the value of print_output)
         encoding (str, optional): stdout encoding, see Popen() documentation
         errors (str, optional): error handling, see Popen() documentation
@@ -86,15 +86,15 @@ def execute_and_print(cmd,
         verbose (bool, optional): enable additional debug console output
         catch_exceptions (bool, optional): catch exceptions and include in the output, otherwise raise
         echo_command (bool, optional): print the command before executing
     Returns:
         dict: a dictionary with fields "status" (the process return code) and "output"
-        (the content of stdout)
+        (the content of stdout)
     """
     if echo_command:
         print('Running command:\n{}\n'.format(cmd))
     to_return = {'status':'unknown','output':''}
     output = []
     try:
@@ -109,64 +109,64 @@ def execute_and_print(cmd,
         print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
         to_return['status'] = cpe.returncode
     to_return['output'] = output
     return to_return
 #%% Single-threaded test driver for execute_and_print
 if False:
     pass
     #%%
     if os.name == 'nt':
-        execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
+        execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
     else:
-        execute_and_print('echo hello && sleep 1 && echo goodbye')
+        execute_and_print('echo hello && sleep 1 && echo goodbye')
 #%% Parallel test driver for execute_and_print
 if False:
     pass
     #%%
     from functools import partial
     from multiprocessing.pool import ThreadPool as ThreadPool
     from multiprocessing.pool import Pool as Pool
     n_workers = 10
     # Should we use threads (vs. processes) for parallelization?
     use_threads = True
     test_data = ['a','b','c','d']
-    def process_sample(s):
+    def _process_sample(s):
         return execute_and_print('echo ' + s,True)
-    if n_workers == 1:
+    if n_workers == 1:
         results = []
-        for i_sample,sample in enumerate(test_data):
-            results.append(process_sample(sample))
+        for i_sample,sample in enumerate(test_data):
+            results.append(_process_sample(sample))
     else:
         n_threads = min(n_workers,len(test_data))
         if use_threads:
             print('Starting parallel thread pool with {} workers'.format(n_threads))
             pool = ThreadPool(n_threads)
         else:
             print('Starting parallel process pool with {} workers'.format(n_threads))
             pool = Pool(n_threads)
-        results = list(pool.map(partial(process_sample),test_data))
+        results = list(pool.map(partial(_process_sample),test_data))
         for r in results:
             print(r)

megadetector/utils/split_locations_into_train_val.py CHANGED Viewed

@@ -4,8 +4,8 @@ split_locations_into_train_val.py
 Splits a list of location IDs into training and validation, targeting a specific
 train/val split for each category, but allowing some categories to be tighter or looser
-than others.  Does nothing particularly clever, just randomly splits locations into
-train/val lots of times using the target val fraction, and picks the one that meets the
+than others.  Does nothing particularly clever, just randomly splits locations into
+train/val lots of times using the target val fraction, and picks the one that meets the
 specified constraints and minimizes weighted error, where "error" is defined as the
 sum of each class's absolute divergence from the target val fraction.
@@ -26,63 +26,63 @@ from tqdm import tqdm
 def split_locations_into_train_val(location_to_category_counts,
                                    n_random_seeds=10000,
                                    target_val_fraction=0.15,
-                                   category_to_max_allowable_error=None,
+                                   category_to_max_allowable_error=None,
                                    category_to_error_weight=None,
                                    default_max_allowable_error=0.1,
                                    require_complete_coverage=True):
     """
     Splits a list of location IDs into training and validation, targeting a specific
     train/val split for each category, but allowing some categories to be tighter or looser
-    than others.  Does nothing particularly clever, just randomly splits locations into
-    train/val lots of times using the target val fraction, and picks the one that meets the
+    than others.  Does nothing particularly clever, just randomly splits locations into
+    train/val lots of times using the target val fraction, and picks the one that meets the
     specified constraints and minimizes weighted error, where "error" is defined as the
-    sum of each class's absolute divergence from the target val fraction.
+    sum of each class's absolute divergence from the target val fraction.
     Args:
         location_to_category_counts (dict): a dict mapping location IDs to dicts,
-            with each dict mapping a category name to a count.  Any categories not present
+            with each dict mapping a category name to a count.  Any categories not present
             in a particular dict are assumed to have a count of zero for that location.
             For example:
             .. code-block:: none
                 {'location-000': {'bear':4,'wolf':10},
                  'location-001': {'bear':12,'elk':20}}
         n_random_seeds (int, optional): number of random seeds to try, always starting from zero
         target_val_fraction (float, optional): fraction of images containing each species we'd
             like to put in the val split
         category_to_max_allowable_error (dict, optional): a dict mapping category names
             to maximum allowable errors.  These are hard constraints (i.e., we will error
-            if we can't meet them).  Does not need to include all categories; categories not
+            if we can't meet them).  Does not need to include all categories; categories not
             included will be assigned a maximum error according to [default_max_allowable_error].
             If this is None, no hard constraints are applied.
         category_to_error_weight (dict, optional): a dict mapping category names to
             error weights.  You can specify a subset of categories; categories not included here
             have a weight of 1.0.  If None, all categories have the same weight.
-        default_max_allowable_error (float, optional): the maximum allowable error for categories not
-            present in [category_to_max_allowable_error].  Set to None (or >= 1.0) to disable hard
+        default_max_allowable_error (float, optional): the maximum allowable error for categories not
+            present in [category_to_max_allowable_error].  Set to None (or >= 1.0) to disable hard
             constraints for categories not present in [category_to_max_allowable_error]
-        require_complete_coverage (bool, optional): require that every category appear in both train and
-            val
+        require_complete_coverage (bool, optional): require that every category appear in both train
+            and val
     Returns:
         tuple: A two-element tuple:
             - list of location IDs in the val split
-            - a dict mapping category names to the fraction of images in the val split
+            - a dict mapping category names to the fraction of images in the val split
     """
     location_ids = list(location_to_category_counts.keys())
     n_val_locations = int(target_val_fraction*len(location_ids))
     if category_to_max_allowable_error is None:
         category_to_max_allowable_error = {}
     if category_to_error_weight is None:
         category_to_error_weight = {}
     # category ID to total count; the total count is used only for printouts
     category_id_to_count = {}
     for location_id in location_to_category_counts:
@@ -91,28 +91,28 @@ def split_locations_into_train_val(location_to_category_counts,
                 category_id_to_count[category_id] = 0
             category_id_to_count[category_id] += \
                 location_to_category_counts[location_id][category_id]
     category_ids = set(category_id_to_count.keys())
     print('Splitting {} categories over {} locations'.format(
         len(category_ids),len(location_ids)))
     # random_seed = 0
     def compute_seed_errors(random_seed):
         """
         Computes the per-category error for a specific random seed.
         returns weighted_average_error,category_to_val_fraction
         """
         # Randomly split into train/val
         random.seed(random_seed)
         val_locations = random.sample(location_ids,k=n_val_locations)
         val_locations_set = set(val_locations)
         # For each category, measure the % of images that went into the val set
         category_to_val_fraction = defaultdict(float)
         for category_id in category_ids:
             category_val_count = 0
             category_train_count = 0
@@ -127,44 +127,44 @@ def split_locations_into_train_val(location_to_category_counts,
                     category_train_count += location_category_count
             category_val_fraction = category_val_count / (category_val_count + category_train_count)
             category_to_val_fraction[category_id] = category_val_fraction
         # Absolute deviation from the target val fraction for each category
         category_errors = {}
         weighted_category_errors = {}
         # category = next(iter(category_to_val_fraction))
         for category in category_to_val_fraction:
             category_val_fraction = category_to_val_fraction[category]
             category_error = abs(category_val_fraction-target_val_fraction)
             category_errors[category] = category_error
             category_weight = 1.0
             if category in category_to_error_weight:
                 category_weight = category_to_error_weight[category]
             weighted_category_error = category_error * category_weight
             weighted_category_errors[category] = weighted_category_error
         weighted_average_error = np.mean(list(weighted_category_errors.values()))
         return weighted_average_error,weighted_category_errors,category_to_val_fraction
     # ... def compute_seed_errors(...)
     # This will only include random seeds that satisfy the hard constraints
     random_seed_to_weighted_average_error = {}
     # random_seed = 0
     for random_seed in tqdm(range(0,n_random_seeds)):
         weighted_average_error,weighted_category_errors,category_to_val_fraction = \
             compute_seed_errors(random_seed)
         seed_satisfies_hard_constraints = True
         for category in category_to_val_fraction:
-            if category in category_to_max_allowable_error:
+            if category in category_to_max_allowable_error:
                 max_allowable_error = category_to_max_allowable_error[category]
             else:
                 if default_max_allowable_error is None:
@@ -183,59 +183,59 @@ def split_locations_into_train_val(location_to_category_counts,
             if category_error > max_allowable_error:
                 seed_satisfies_hard_constraints = False
                 break
         # ...for each category
-        if seed_satisfies_hard_constraints:
+        if seed_satisfies_hard_constraints:
             random_seed_to_weighted_average_error[random_seed] = weighted_average_error
     # ...for each random seed
     assert len(random_seed_to_weighted_average_error) > 0, \
         'No random seed met all the hard constraints'
     print('\n{} of {} random seeds satisfied hard constraints'.format(
         len(random_seed_to_weighted_average_error),n_random_seeds))
     min_error = None
     min_error_seed = None
     for random_seed in random_seed_to_weighted_average_error.keys():
         error_metric = random_seed_to_weighted_average_error[random_seed]
         if min_error is None or error_metric < min_error:
             min_error = error_metric
             min_error_seed = random_seed
     random.seed(min_error_seed)
     val_locations = random.sample(location_ids,k=n_val_locations)
     train_locations = []
     for location_id in location_ids:
         if location_id not in val_locations:
             train_locations.append(location_id)
-    print('\nVal locations:\n')
+    print('\nVal locations:\n')
     for loc in val_locations:
         print('{}'.format(loc))
     print('')
     weighted_average_error,weighted_category_errors,category_to_val_fraction = \
         compute_seed_errors(min_error_seed)
     random_seed = min_error_seed
     category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
     category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
                                                         sort_values=category_id_to_count,
                                                         reverse=True)
     print('Val fractions by category:\n')
     for category in category_to_val_fraction:
         print('{} ({}) {:.2f}'.format(
             category,category_id_to_count[category],
             category_to_val_fraction[category]))
     return val_locations,category_to_val_fraction
 # ...def split_locations_into_train_val(...)

megadetector/utils/string_utils.py CHANGED Viewed

@@ -14,16 +14,19 @@ import re
 #%% Functions
 def is_float(s):
-    """
+    """
     Checks whether [s] is an object (typically a string) that can be cast to a float
     Args:
         s (object): object to evaluate
     Returns:
         bool: True if s successfully casts to a float, otherwise False
     """
+    if s is None:
+        return False
     try:
         _ = float(s)
     except ValueError:
@@ -36,57 +39,175 @@ def human_readable_to_bytes(size):
     Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
     returns the number of bytes.  Will return 0 if the argument has
     unexpected form.
     https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
     Args:
         size (str): string representing a size
     Returns:
         int: the corresponding size in bytes
     """
     size = re.sub(r'\s+', '', size)
+    if not size: # Handle empty string case after stripping spaces
+        return 0
     if (size[-1] == 'B'):
         size = size[:-1]
+    if not size: # Handle case where size was just "B"
+        return 0
     if (size.isdigit()):
-        bytes = int(size)
+        bytes_val = int(size) # Renamed to avoid conflict with built-in 'bytes'
     elif (is_float(size)):
-        bytes = float(size)
+        bytes_val = float(size) # Renamed
     else:
-        bytes = size[:-1]
-        unit = size[-1]
-        try:
-            bytes = float(bytes)
+        # Handle cases like "1KB" where size[:-1] might be "1K" before this block
+        # The original code would try to float("1K") which fails.
+        # Need to separate numeric part from unit more carefully.
+        numeric_part = ''
+        unit_part = ''
+        # Iterate from the end to find the unit (K, M, G, T)
+        # This handles cases like "10KB" or "2.5GB"
+        for i in range(len(size) -1, -1, -1):
+            if size[i].isalpha():
+                unit_part = size[i] + unit_part
+            else:
+                numeric_part = size[:i+1]
+                break
+        # If no unit found, or numeric part is empty after stripping unit
+        if not unit_part or not numeric_part:
+            return 0
+        try:
+            bytes_val = float(numeric_part)
+            unit = unit_part
             if (unit == 'T'):
-                bytes *= 1024*1024*1024*1024
+                bytes_val *= 1024*1024*1024*1024
             elif (unit == 'G'):
-                bytes *= 1024*1024*1024
+                bytes_val *= 1024*1024*1024
             elif (unit == 'M'):
-                bytes *= 1024*1024
+                bytes_val *= 1024*1024
             elif (unit == 'K'):
-                bytes *= 1024
+                bytes_val *= 1024
             else:
-                bytes = 0
+                # If it's a known unit (like 'B' already stripped) but not T/G/M/K,
+                # and it was floatable, it's just bytes.  If it's an unknown unit, it's
+                # an error.
+                if unit not in ['B', '']: # 'B' was stripped, '' means just a number
+                     bytes_val = 0
         except ValueError:
-            bytes = 0
-    return bytes
+            bytes_val = 0
+    return bytes_val
 def remove_ansi_codes(s):
     """
     Removes ANSI escape codes from a string.
     https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
     Args:
         s (str): the string to de-ANSI-i-fy
     Returns:
         str: A copy of [s] without ANSI codes
     """
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     return ansi_escape.sub('', s)
+#%% Tests
+class TestStringUtils:
+    """
+    Tests for string_utils.py
+    """
+    def test_is_float(self):
+        """
+        Test the is_float function.
+        """
+        assert is_float("1.23")
+        assert is_float("-0.5")
+        assert is_float("0")
+        assert is_float(1.23)
+        assert is_float(0)
+        assert not is_float("abc")
+        assert not is_float("1.2.3")
+        assert not is_float("")
+        assert not is_float(None)
+        assert not is_float("1,23")
+    def test_human_readable_to_bytes(self):
+        """
+        Test the human_readable_to_bytes function.
+        """
+        assert human_readable_to_bytes("10B") == 10
+        assert human_readable_to_bytes("10") == 10
+        assert human_readable_to_bytes("1K") == 1024
+        assert human_readable_to_bytes("1KB") == 1024
+        assert human_readable_to_bytes("1M") == 1024*1024
+        assert human_readable_to_bytes("1MB") == 1024*1024
+        assert human_readable_to_bytes("1G") == 1024*1024*1024
+        assert human_readable_to_bytes("1GB") == 1024*1024*1024
+        assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
+        assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
+        assert human_readable_to_bytes("2.5K") == 2.5 * 1024
+        assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
+        # Test with spaces
+        assert human_readable_to_bytes(" 2 G ") == 2 * 1024*1024*1024
+        assert human_readable_to_bytes("500 KB") == 500 * 1024
+        # Invalid inputs
+        assert human_readable_to_bytes("abc") == 0
+        assert human_readable_to_bytes("1X") == 0
+        assert human_readable_to_bytes("1KBB") == 0
+        assert human_readable_to_bytes("K1") == 0
+        assert human_readable_to_bytes("") == 0
+        assert human_readable_to_bytes("1.2.3K") == 0
+        assert human_readable_to_bytes("B") == 0
+    def test_remove_ansi_codes(self):
+        """
+        Test the remove_ansi_codes function.
+        """
+        assert remove_ansi_codes("text without codes") == "text without codes"
+        assert remove_ansi_codes("\x1b[31mRed text\x1b[0m") == "Red text"
+        assert remove_ansi_codes("\x1b[1m\x1b[4mBold and Underline\x1b[0m") == "Bold and Underline"
+        assert remove_ansi_codes("Mixed \x1b[32mgreen\x1b[0m and normal") == "Mixed green and normal"
+        assert remove_ansi_codes("") == ""
+        # More complex/varied ANSI codes
+        assert remove_ansi_codes("text\x1b[1Aup") == "textup"
+        assert remove_ansi_codes("\x1b[2Jclearscreen") == "clearscreen"
+def test_string_utils():
+    """
+    Runs all tests in the TestStringUtils class.
+    """
+    test_instance = TestStringUtils()
+    test_instance.test_is_float()
+    test_instance.test_human_readable_to_bytes()
+    test_instance.test_remove_ansi_codes()
+# from IPython import embed; embed()
+# test_string_utils()

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl