megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -4,8 +4,8 @@ split_locations_into_train_val.py
4
4
 
5
5
  Splits a list of location IDs into training and validation, targeting a specific
6
6
  train/val split for each category, but allowing some categories to be tighter or looser
7
- than others. Does nothing particularly clever, just randomly splits locations into
8
- train/val lots of times using the target val fraction, and picks the one that meets the
7
+ than others. Does nothing particularly clever, just randomly splits locations into
8
+ train/val lots of times using the target val fraction, and picks the one that meets the
9
9
  specified constraints and minimizes weighted error, where "error" is defined as the
10
10
  sum of each class's absolute divergence from the target val fraction.
11
11
 
@@ -26,60 +26,63 @@ from tqdm import tqdm
26
26
  def split_locations_into_train_val(location_to_category_counts,
27
27
  n_random_seeds=10000,
28
28
  target_val_fraction=0.15,
29
- category_to_max_allowable_error=None,
29
+ category_to_max_allowable_error=None,
30
30
  category_to_error_weight=None,
31
- default_max_allowable_error=0.1):
31
+ default_max_allowable_error=0.1,
32
+ require_complete_coverage=True):
32
33
  """
33
34
  Splits a list of location IDs into training and validation, targeting a specific
34
35
  train/val split for each category, but allowing some categories to be tighter or looser
35
- than others. Does nothing particularly clever, just randomly splits locations into
36
- train/val lots of times using the target val fraction, and picks the one that meets the
36
+ than others. Does nothing particularly clever, just randomly splits locations into
37
+ train/val lots of times using the target val fraction, and picks the one that meets the
37
38
  specified constraints and minimizes weighted error, where "error" is defined as the
38
- sum of each class's absolute divergence from the target val fraction.
39
-
39
+ sum of each class's absolute divergence from the target val fraction.
40
+
40
41
  Args:
41
42
  location_to_category_counts (dict): a dict mapping location IDs to dicts,
42
- with each dict mapping a category name to a count. Any categories not present
43
+ with each dict mapping a category name to a count. Any categories not present
43
44
  in a particular dict are assumed to have a count of zero for that location.
44
-
45
+
45
46
  For example:
46
-
47
+
47
48
  .. code-block:: none
48
49
 
49
50
  {'location-000': {'bear':4,'wolf':10},
50
51
  'location-001': {'bear':12,'elk':20}}
51
-
52
+
52
53
  n_random_seeds (int, optional): number of random seeds to try, always starting from zero
53
54
  target_val_fraction (float, optional): fraction of images containing each species we'd
54
55
  like to put in the val split
55
56
  category_to_max_allowable_error (dict, optional): a dict mapping category names
56
57
  to maximum allowable errors. These are hard constraints (i.e., we will error
57
- if we can't meet them). Does not need to include all categories; categories not
58
+ if we can't meet them). Does not need to include all categories; categories not
58
59
  included will be assigned a maximum error according to [default_max_allowable_error].
59
60
  If this is None, no hard constraints are applied.
60
61
  category_to_error_weight (dict, optional): a dict mapping category names to
61
62
  error weights. You can specify a subset of categories; categories not included here
62
63
  have a weight of 1.0. If None, all categories have the same weight.
63
- default_max_allowable_error (float, optional): the maximum allowable error for categories not
64
- present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
64
+ default_max_allowable_error (float, optional): the maximum allowable error for categories not
65
+ present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
65
66
  constraints for categories not present in [category_to_max_allowable_error]
66
-
67
+ require_complete_coverage (bool, optional): require that every category appear in both train and
68
+ val
69
+
67
70
  Returns:
68
71
  tuple: A two-element tuple:
69
72
  - list of location IDs in the val split
70
- - a dict mapping category names to the fraction of images in the val split
73
+ - a dict mapping category names to the fraction of images in the val split
71
74
  """
72
-
75
+
73
76
  location_ids = list(location_to_category_counts.keys())
74
-
77
+
75
78
  n_val_locations = int(target_val_fraction*len(location_ids))
76
-
79
+
77
80
  if category_to_max_allowable_error is None:
78
81
  category_to_max_allowable_error = {}
79
-
82
+
80
83
  if category_to_error_weight is None:
81
84
  category_to_error_weight = {}
82
-
85
+
83
86
  # category ID to total count; the total count is used only for printouts
84
87
  category_id_to_count = {}
85
88
  for location_id in location_to_category_counts:
@@ -88,28 +91,28 @@ def split_locations_into_train_val(location_to_category_counts,
88
91
  category_id_to_count[category_id] = 0
89
92
  category_id_to_count[category_id] += \
90
93
  location_to_category_counts[location_id][category_id]
91
-
94
+
92
95
  category_ids = set(category_id_to_count.keys())
93
-
96
+
94
97
  print('Splitting {} categories over {} locations'.format(
95
98
  len(category_ids),len(location_ids)))
96
-
99
+
97
100
  # random_seed = 0
98
101
  def compute_seed_errors(random_seed):
99
102
  """
100
103
  Computes the per-category error for a specific random seed.
101
-
104
+
102
105
  returns weighted_average_error,category_to_val_fraction
103
106
  """
104
-
107
+
105
108
  # Randomly split into train/val
106
109
  random.seed(random_seed)
107
110
  val_locations = random.sample(location_ids,k=n_val_locations)
108
111
  val_locations_set = set(val_locations)
109
-
112
+
110
113
  # For each category, measure the % of images that went into the val set
111
114
  category_to_val_fraction = defaultdict(float)
112
-
115
+
113
116
  for category_id in category_ids:
114
117
  category_val_count = 0
115
118
  category_train_count = 0
@@ -124,42 +127,42 @@ def split_locations_into_train_val(location_to_category_counts,
124
127
  category_train_count += location_category_count
125
128
  category_val_fraction = category_val_count / (category_val_count + category_train_count)
126
129
  category_to_val_fraction[category_id] = category_val_fraction
127
-
128
- # Absolute deviation from the target val fraction for each categorys
130
+
131
+ # Absolute deviation from the target val fraction for each category
129
132
  category_errors = {}
130
133
  weighted_category_errors = {}
131
-
134
+
132
135
  # category = next(iter(category_to_val_fraction))
133
136
  for category in category_to_val_fraction:
134
-
137
+
135
138
  category_val_fraction = category_to_val_fraction[category]
136
-
139
+
137
140
  category_error = abs(category_val_fraction-target_val_fraction)
138
141
  category_errors[category] = category_error
139
-
142
+
140
143
  category_weight = 1.0
141
144
  if category in category_to_error_weight:
142
145
  category_weight = category_to_error_weight[category]
143
146
  weighted_category_error = category_error * category_weight
144
147
  weighted_category_errors[category] = weighted_category_error
145
-
148
+
146
149
  weighted_average_error = np.mean(list(weighted_category_errors.values()))
147
-
150
+
148
151
  return weighted_average_error,weighted_category_errors,category_to_val_fraction
149
-
152
+
150
153
  # ... def compute_seed_errors(...)
151
-
154
+
152
155
  # This will only include random seeds that satisfy the hard constraints
153
156
  random_seed_to_weighted_average_error = {}
154
-
157
+
155
158
  # random_seed = 0
156
159
  for random_seed in tqdm(range(0,n_random_seeds)):
157
-
160
+
158
161
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
159
162
  compute_seed_errors(random_seed)
160
-
163
+
161
164
  seed_satisfies_hard_constraints = True
162
-
165
+
163
166
  for category in category_to_val_fraction:
164
167
  if category in category_to_max_allowable_error:
165
168
  max_allowable_error = category_to_max_allowable_error[category]
@@ -168,61 +171,71 @@ def split_locations_into_train_val(location_to_category_counts,
168
171
  continue
169
172
  max_allowable_error = default_max_allowable_error
170
173
  val_fraction = category_to_val_fraction[category]
174
+
175
+ # If necessary, verify that this category doesn't *only* appear in train or val
176
+ if require_complete_coverage:
177
+ if (val_fraction == 0.0) or (val_fraction == 1.0):
178
+ seed_satisfies_hard_constraints = False
179
+ break
180
+
181
+ # Check whether this category exceeds the hard maximum deviation
171
182
  category_error = abs(val_fraction - target_val_fraction)
172
183
  if category_error > max_allowable_error:
173
184
  seed_satisfies_hard_constraints = False
174
185
  break
175
-
176
- if seed_satisfies_hard_constraints:
186
+
187
+ # ...for each category
188
+
189
+ if seed_satisfies_hard_constraints:
177
190
  random_seed_to_weighted_average_error[random_seed] = weighted_average_error
178
-
191
+
179
192
  # ...for each random seed
180
-
193
+
181
194
  assert len(random_seed_to_weighted_average_error) > 0, \
182
195
  'No random seed met all the hard constraints'
183
-
196
+
184
197
  print('\n{} of {} random seeds satisfied hard constraints'.format(
185
198
  len(random_seed_to_weighted_average_error),n_random_seeds))
186
-
199
+
187
200
  min_error = None
188
201
  min_error_seed = None
189
-
202
+
190
203
  for random_seed in random_seed_to_weighted_average_error.keys():
191
204
  error_metric = random_seed_to_weighted_average_error[random_seed]
192
205
  if min_error is None or error_metric < min_error:
193
206
  min_error = error_metric
194
207
  min_error_seed = random_seed
195
-
208
+
196
209
  random.seed(min_error_seed)
197
210
  val_locations = random.sample(location_ids,k=n_val_locations)
198
211
  train_locations = []
199
212
  for location_id in location_ids:
200
213
  if location_id not in val_locations:
201
214
  train_locations.append(location_id)
202
-
203
- print('\nVal locations:\n')
215
+
216
+ print('\nVal locations:\n')
204
217
  for loc in val_locations:
205
218
  print('{}'.format(loc))
206
219
  print('')
207
-
220
+
208
221
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
209
222
  compute_seed_errors(min_error_seed)
210
-
223
+
211
224
  random_seed = min_error_seed
212
-
225
+
213
226
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
214
227
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
215
228
  sort_values=category_id_to_count,
216
229
  reverse=True)
217
-
218
-
230
+
231
+
219
232
  print('Val fractions by category:\n')
220
-
233
+
221
234
  for category in category_to_val_fraction:
222
235
  print('{} ({}) {:.2f}'.format(
223
236
  category,category_id_to_count[category],
224
237
  category_to_val_fraction[category]))
225
-
238
+
226
239
  return val_locations,category_to_val_fraction
227
240
 
228
241
  # ...def split_locations_into_train_val(...)
@@ -14,15 +14,18 @@ import re
14
14
  #%% Functions
15
15
 
16
16
  def is_float(s):
17
- """
17
+ """
18
18
  Checks whether [s] is an object (typically a string) that can be cast to a float
19
-
19
+
20
20
  Args:
21
21
  s (object): object to evaluate
22
-
22
+
23
23
  Returns:
24
24
  bool: True if s successfully casts to a float, otherwise False
25
25
  """
26
+
27
+ if s is None:
28
+ return False
26
29
 
27
30
  try:
28
31
  _ = float(s)
@@ -36,57 +39,175 @@ def human_readable_to_bytes(size):
36
39
  Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
37
40
  returns the number of bytes. Will return 0 if the argument has
38
41
  unexpected form.
39
-
42
+
40
43
  https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
41
-
44
+
42
45
  Args:
43
46
  size (str): string representing a size
44
-
47
+
45
48
  Returns:
46
49
  int: the corresponding size in bytes
47
50
  """
48
-
51
+
49
52
  size = re.sub(r'\s+', '', size)
50
-
53
+
54
+ if not size: # Handle empty string case after stripping spaces
55
+ return 0
56
+
51
57
  if (size[-1] == 'B'):
52
58
  size = size[:-1]
53
-
59
+
60
+ if not size: # Handle case where size was just "B"
61
+ return 0
62
+
54
63
  if (size.isdigit()):
55
- bytes = int(size)
64
+ bytes_val = int(size) # Renamed to avoid conflict with built-in 'bytes'
56
65
  elif (is_float(size)):
57
- bytes = float(size)
66
+ bytes_val = float(size) # Renamed
58
67
  else:
59
- bytes = size[:-1]
60
- unit = size[-1]
61
- try:
62
- bytes = float(bytes)
68
+ # Handle cases like "1KB" where size[:-1] might be "1K" before this block
69
+ # The original code would try to float("1K") which fails.
70
+ # Need to separate numeric part from unit more carefully.
71
+ numeric_part = ''
72
+ unit_part = ''
73
+
74
+ # Iterate from the end to find the unit (K, M, G, T)
75
+ # This handles cases like "10KB" or "2.5GB"
76
+ for i in range(len(size) -1, -1, -1):
77
+ if size[i].isalpha():
78
+ unit_part = size[i] + unit_part
79
+ else:
80
+ numeric_part = size[:i+1]
81
+ break
82
+
83
+ # If no unit found, or numeric part is empty after stripping unit
84
+ if not unit_part or not numeric_part:
85
+ return 0
86
+
87
+ try:
88
+ bytes_val = float(numeric_part)
89
+ unit = unit_part
63
90
  if (unit == 'T'):
64
- bytes *= 1024*1024*1024*1024
91
+ bytes_val *= 1024*1024*1024*1024
65
92
  elif (unit == 'G'):
66
- bytes *= 1024*1024*1024
93
+ bytes_val *= 1024*1024*1024
67
94
  elif (unit == 'M'):
68
- bytes *= 1024*1024
95
+ bytes_val *= 1024*1024
69
96
  elif (unit == 'K'):
70
- bytes *= 1024
97
+ bytes_val *= 1024
71
98
  else:
72
- bytes = 0
99
+ # If it's a known unit (like 'B' already stripped) but not T/G/M/K,
100
+ # and it was floatable, it's just bytes. If it's an unknown unit, it's
101
+ # an error.
102
+ if unit not in ['B', '']: # 'B' was stripped, '' means just a number
103
+ bytes_val = 0
73
104
  except ValueError:
74
- bytes = 0
75
-
76
- return bytes
105
+ bytes_val = 0
106
+
107
+ return bytes_val
77
108
 
78
109
 
79
110
  def remove_ansi_codes(s):
80
111
  """
81
112
  Removes ANSI escape codes from a string.
82
-
113
+
83
114
  https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
84
-
115
+
85
116
  Args:
86
117
  s (str): the string to de-ANSI-i-fy
87
-
118
+
88
119
  Returns:
89
120
  str: A copy of [s] without ANSI codes
90
121
  """
122
+
91
123
  ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
92
124
  return ansi_escape.sub('', s)
125
+
126
+
127
+ #%% Tests
128
+
129
+
130
+ class TestStringUtils:
131
+ """
132
+ Tests for string_utils.py
133
+ """
134
+
135
+
136
+ def test_is_float(self):
137
+ """
138
+ Test the is_float function.
139
+ """
140
+
141
+ assert is_float("1.23")
142
+ assert is_float("-0.5")
143
+ assert is_float("0")
144
+ assert is_float(1.23)
145
+ assert is_float(0)
146
+ assert not is_float("abc")
147
+ assert not is_float("1.2.3")
148
+ assert not is_float("")
149
+ assert not is_float(None)
150
+ assert not is_float("1,23")
151
+
152
+
153
+ def test_human_readable_to_bytes(self):
154
+ """
155
+ Test the human_readable_to_bytes function.
156
+ """
157
+
158
+ assert human_readable_to_bytes("10B") == 10
159
+ assert human_readable_to_bytes("10") == 10
160
+ assert human_readable_to_bytes("1K") == 1024
161
+ assert human_readable_to_bytes("1KB") == 1024
162
+ assert human_readable_to_bytes("1M") == 1024*1024
163
+ assert human_readable_to_bytes("1MB") == 1024*1024
164
+ assert human_readable_to_bytes("1G") == 1024*1024*1024
165
+ assert human_readable_to_bytes("1GB") == 1024*1024*1024
166
+ assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
167
+ assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
168
+
169
+ assert human_readable_to_bytes("2.5K") == 2.5 * 1024
170
+ assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
171
+
172
+ # Test with spaces
173
+ assert human_readable_to_bytes(" 2 G ") == 2 * 1024*1024*1024
174
+ assert human_readable_to_bytes("500 KB") == 500 * 1024
175
+
176
+ # Invalid inputs
177
+ assert human_readable_to_bytes("abc") == 0
178
+ assert human_readable_to_bytes("1X") == 0
179
+ assert human_readable_to_bytes("1KBB") == 0
180
+ assert human_readable_to_bytes("K1") == 0
181
+ assert human_readable_to_bytes("") == 0
182
+ assert human_readable_to_bytes("1.2.3K") == 0
183
+ assert human_readable_to_bytes("B") == 0
184
+
185
+
186
+ def test_remove_ansi_codes(self):
187
+ """
188
+ Test the remove_ansi_codes function.
189
+ """
190
+
191
+ assert remove_ansi_codes("text without codes") == "text without codes"
192
+ assert remove_ansi_codes("\x1b[31mRed text\x1b[0m") == "Red text"
193
+ assert remove_ansi_codes("\x1b[1m\x1b[4mBold and Underline\x1b[0m") == "Bold and Underline"
194
+ assert remove_ansi_codes("Mixed \x1b[32mgreen\x1b[0m and normal") == "Mixed green and normal"
195
+ assert remove_ansi_codes("") == ""
196
+
197
+ # More complex/varied ANSI codes
198
+ assert remove_ansi_codes("text\x1b[1Aup") == "textup"
199
+ assert remove_ansi_codes("\x1b[2Jclearscreen") == "clearscreen"
200
+
201
+
202
+ def test_string_utils():
203
+ """
204
+ Runs all tests in the TestStringUtils class.
205
+ """
206
+
207
+ test_instance = TestStringUtils()
208
+ test_instance.test_is_float()
209
+ test_instance.test_human_readable_to_bytes()
210
+ test_instance.test_remove_ansi_codes()
211
+
212
+ # from IPython import embed; embed()
213
+ # test_string_utils()