megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -4,8 +4,8 @@ split_locations_into_train_val.py
4
4
 
5
5
  Splits a list of location IDs into training and validation, targeting a specific
6
6
  train/val split for each category, but allowing some categories to be tighter or looser
7
- than others. Does nothing particularly clever, just randomly splits locations into
8
- train/val lots of times using the target val fraction, and picks the one that meets the
7
+ than others. Does nothing particularly clever, just randomly splits locations into
8
+ train/val lots of times using the target val fraction, and picks the one that meets the
9
9
  specified constraints and minimizes weighted error, where "error" is defined as the
10
10
  sum of each class's absolute divergence from the target val fraction.
11
11
 
@@ -26,63 +26,63 @@ from tqdm import tqdm
26
26
  def split_locations_into_train_val(location_to_category_counts,
27
27
  n_random_seeds=10000,
28
28
  target_val_fraction=0.15,
29
- category_to_max_allowable_error=None,
29
+ category_to_max_allowable_error=None,
30
30
  category_to_error_weight=None,
31
31
  default_max_allowable_error=0.1,
32
32
  require_complete_coverage=True):
33
33
  """
34
34
  Splits a list of location IDs into training and validation, targeting a specific
35
35
  train/val split for each category, but allowing some categories to be tighter or looser
36
- than others. Does nothing particularly clever, just randomly splits locations into
37
- train/val lots of times using the target val fraction, and picks the one that meets the
36
+ than others. Does nothing particularly clever, just randomly splits locations into
37
+ train/val lots of times using the target val fraction, and picks the one that meets the
38
38
  specified constraints and minimizes weighted error, where "error" is defined as the
39
- sum of each class's absolute divergence from the target val fraction.
40
-
39
+ sum of each class's absolute divergence from the target val fraction.
40
+
41
41
  Args:
42
42
  location_to_category_counts (dict): a dict mapping location IDs to dicts,
43
- with each dict mapping a category name to a count. Any categories not present
43
+ with each dict mapping a category name to a count. Any categories not present
44
44
  in a particular dict are assumed to have a count of zero for that location.
45
-
45
+
46
46
  For example:
47
-
47
+
48
48
  .. code-block:: none
49
49
 
50
50
  {'location-000': {'bear':4,'wolf':10},
51
51
  'location-001': {'bear':12,'elk':20}}
52
-
52
+
53
53
  n_random_seeds (int, optional): number of random seeds to try, always starting from zero
54
54
  target_val_fraction (float, optional): fraction of images containing each species we'd
55
55
  like to put in the val split
56
56
  category_to_max_allowable_error (dict, optional): a dict mapping category names
57
57
  to maximum allowable errors. These are hard constraints (i.e., we will error
58
- if we can't meet them). Does not need to include all categories; categories not
58
+ if we can't meet them). Does not need to include all categories; categories not
59
59
  included will be assigned a maximum error according to [default_max_allowable_error].
60
60
  If this is None, no hard constraints are applied.
61
61
  category_to_error_weight (dict, optional): a dict mapping category names to
62
62
  error weights. You can specify a subset of categories; categories not included here
63
63
  have a weight of 1.0. If None, all categories have the same weight.
64
- default_max_allowable_error (float, optional): the maximum allowable error for categories not
65
- present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
64
+ default_max_allowable_error (float, optional): the maximum allowable error for categories not
65
+ present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
66
66
  constraints for categories not present in [category_to_max_allowable_error]
67
- require_complete_coverage (bool, optional): require that every category appear in both train and
67
+ require_complete_coverage (bool, optional): require that every category appear in both train and
68
68
  val
69
-
69
+
70
70
  Returns:
71
71
  tuple: A two-element tuple:
72
72
  - list of location IDs in the val split
73
- - a dict mapping category names to the fraction of images in the val split
73
+ - a dict mapping category names to the fraction of images in the val split
74
74
  """
75
-
75
+
76
76
  location_ids = list(location_to_category_counts.keys())
77
-
77
+
78
78
  n_val_locations = int(target_val_fraction*len(location_ids))
79
-
79
+
80
80
  if category_to_max_allowable_error is None:
81
81
  category_to_max_allowable_error = {}
82
-
82
+
83
83
  if category_to_error_weight is None:
84
84
  category_to_error_weight = {}
85
-
85
+
86
86
  # category ID to total count; the total count is used only for printouts
87
87
  category_id_to_count = {}
88
88
  for location_id in location_to_category_counts:
@@ -91,28 +91,28 @@ def split_locations_into_train_val(location_to_category_counts,
91
91
  category_id_to_count[category_id] = 0
92
92
  category_id_to_count[category_id] += \
93
93
  location_to_category_counts[location_id][category_id]
94
-
94
+
95
95
  category_ids = set(category_id_to_count.keys())
96
-
96
+
97
97
  print('Splitting {} categories over {} locations'.format(
98
98
  len(category_ids),len(location_ids)))
99
-
99
+
100
100
  # random_seed = 0
101
101
  def compute_seed_errors(random_seed):
102
102
  """
103
103
  Computes the per-category error for a specific random seed.
104
-
104
+
105
105
  returns weighted_average_error,category_to_val_fraction
106
106
  """
107
-
107
+
108
108
  # Randomly split into train/val
109
109
  random.seed(random_seed)
110
110
  val_locations = random.sample(location_ids,k=n_val_locations)
111
111
  val_locations_set = set(val_locations)
112
-
112
+
113
113
  # For each category, measure the % of images that went into the val set
114
114
  category_to_val_fraction = defaultdict(float)
115
-
115
+
116
116
  for category_id in category_ids:
117
117
  category_val_count = 0
118
118
  category_train_count = 0
@@ -127,44 +127,44 @@ def split_locations_into_train_val(location_to_category_counts,
127
127
  category_train_count += location_category_count
128
128
  category_val_fraction = category_val_count / (category_val_count + category_train_count)
129
129
  category_to_val_fraction[category_id] = category_val_fraction
130
-
130
+
131
131
  # Absolute deviation from the target val fraction for each category
132
132
  category_errors = {}
133
133
  weighted_category_errors = {}
134
-
134
+
135
135
  # category = next(iter(category_to_val_fraction))
136
136
  for category in category_to_val_fraction:
137
-
137
+
138
138
  category_val_fraction = category_to_val_fraction[category]
139
-
139
+
140
140
  category_error = abs(category_val_fraction-target_val_fraction)
141
141
  category_errors[category] = category_error
142
-
142
+
143
143
  category_weight = 1.0
144
144
  if category in category_to_error_weight:
145
145
  category_weight = category_to_error_weight[category]
146
146
  weighted_category_error = category_error * category_weight
147
147
  weighted_category_errors[category] = weighted_category_error
148
-
148
+
149
149
  weighted_average_error = np.mean(list(weighted_category_errors.values()))
150
-
150
+
151
151
  return weighted_average_error,weighted_category_errors,category_to_val_fraction
152
-
152
+
153
153
  # ... def compute_seed_errors(...)
154
-
154
+
155
155
  # This will only include random seeds that satisfy the hard constraints
156
156
  random_seed_to_weighted_average_error = {}
157
-
157
+
158
158
  # random_seed = 0
159
159
  for random_seed in tqdm(range(0,n_random_seeds)):
160
-
160
+
161
161
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
162
162
  compute_seed_errors(random_seed)
163
-
163
+
164
164
  seed_satisfies_hard_constraints = True
165
-
165
+
166
166
  for category in category_to_val_fraction:
167
- if category in category_to_max_allowable_error:
167
+ if category in category_to_max_allowable_error:
168
168
  max_allowable_error = category_to_max_allowable_error[category]
169
169
  else:
170
170
  if default_max_allowable_error is None:
@@ -183,59 +183,59 @@ def split_locations_into_train_val(location_to_category_counts,
183
183
  if category_error > max_allowable_error:
184
184
  seed_satisfies_hard_constraints = False
185
185
  break
186
-
186
+
187
187
  # ...for each category
188
-
189
- if seed_satisfies_hard_constraints:
188
+
189
+ if seed_satisfies_hard_constraints:
190
190
  random_seed_to_weighted_average_error[random_seed] = weighted_average_error
191
-
191
+
192
192
  # ...for each random seed
193
-
193
+
194
194
  assert len(random_seed_to_weighted_average_error) > 0, \
195
195
  'No random seed met all the hard constraints'
196
-
196
+
197
197
  print('\n{} of {} random seeds satisfied hard constraints'.format(
198
198
  len(random_seed_to_weighted_average_error),n_random_seeds))
199
-
199
+
200
200
  min_error = None
201
201
  min_error_seed = None
202
-
202
+
203
203
  for random_seed in random_seed_to_weighted_average_error.keys():
204
204
  error_metric = random_seed_to_weighted_average_error[random_seed]
205
205
  if min_error is None or error_metric < min_error:
206
206
  min_error = error_metric
207
207
  min_error_seed = random_seed
208
-
208
+
209
209
  random.seed(min_error_seed)
210
210
  val_locations = random.sample(location_ids,k=n_val_locations)
211
211
  train_locations = []
212
212
  for location_id in location_ids:
213
213
  if location_id not in val_locations:
214
214
  train_locations.append(location_id)
215
-
216
- print('\nVal locations:\n')
215
+
216
+ print('\nVal locations:\n')
217
217
  for loc in val_locations:
218
218
  print('{}'.format(loc))
219
219
  print('')
220
-
220
+
221
221
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
222
222
  compute_seed_errors(min_error_seed)
223
-
223
+
224
224
  random_seed = min_error_seed
225
-
225
+
226
226
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
227
227
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
228
228
  sort_values=category_id_to_count,
229
229
  reverse=True)
230
-
231
-
230
+
231
+
232
232
  print('Val fractions by category:\n')
233
-
233
+
234
234
  for category in category_to_val_fraction:
235
235
  print('{} ({}) {:.2f}'.format(
236
236
  category,category_id_to_count[category],
237
237
  category_to_val_fraction[category]))
238
-
238
+
239
239
  return val_locations,category_to_val_fraction
240
240
 
241
241
  # ...def split_locations_into_train_val(...)
@@ -14,15 +14,18 @@ import re
14
14
  #%% Functions
15
15
 
16
16
  def is_float(s):
17
- """
17
+ """
18
18
  Checks whether [s] is an object (typically a string) that can be cast to a float
19
-
19
+
20
20
  Args:
21
21
  s (object): object to evaluate
22
-
22
+
23
23
  Returns:
24
24
  bool: True if s successfully casts to a float, otherwise False
25
25
  """
26
+
27
+ if s is None:
28
+ return False
26
29
 
27
30
  try:
28
31
  _ = float(s)
@@ -36,57 +39,175 @@ def human_readable_to_bytes(size):
36
39
  Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
37
40
  returns the number of bytes. Will return 0 if the argument has
38
41
  unexpected form.
39
-
42
+
40
43
  https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
41
-
44
+
42
45
  Args:
43
46
  size (str): string representing a size
44
-
47
+
45
48
  Returns:
46
49
  int: the corresponding size in bytes
47
50
  """
48
-
51
+
49
52
  size = re.sub(r'\s+', '', size)
50
-
53
+
54
+ if not size: # Handle empty string case after stripping spaces
55
+ return 0
56
+
51
57
  if (size[-1] == 'B'):
52
58
  size = size[:-1]
53
-
59
+
60
+ if not size: # Handle case where size was just "B"
61
+ return 0
62
+
54
63
  if (size.isdigit()):
55
- bytes = int(size)
64
+ bytes_val = int(size) # Renamed to avoid conflict with built-in 'bytes'
56
65
  elif (is_float(size)):
57
- bytes = float(size)
66
+ bytes_val = float(size) # Renamed
58
67
  else:
59
- bytes = size[:-1]
60
- unit = size[-1]
61
- try:
62
- bytes = float(bytes)
68
+ # Handle cases like "1KB" where size[:-1] might be "1K" before this block
69
+ # The original code would try to float("1K") which fails.
70
+ # Need to separate numeric part from unit more carefully.
71
+ numeric_part = ''
72
+ unit_part = ''
73
+
74
+ # Iterate from the end to find the unit (K, M, G, T)
75
+ # This handles cases like "10KB" or "2.5GB"
76
+ for i in range(len(size) -1, -1, -1):
77
+ if size[i].isalpha():
78
+ unit_part = size[i] + unit_part
79
+ else:
80
+ numeric_part = size[:i+1]
81
+ break
82
+
83
+ # If no unit found, or numeric part is empty after stripping unit
84
+ if not unit_part or not numeric_part:
85
+ return 0
86
+
87
+ try:
88
+ bytes_val = float(numeric_part)
89
+ unit = unit_part
63
90
  if (unit == 'T'):
64
- bytes *= 1024*1024*1024*1024
91
+ bytes_val *= 1024*1024*1024*1024
65
92
  elif (unit == 'G'):
66
- bytes *= 1024*1024*1024
93
+ bytes_val *= 1024*1024*1024
67
94
  elif (unit == 'M'):
68
- bytes *= 1024*1024
95
+ bytes_val *= 1024*1024
69
96
  elif (unit == 'K'):
70
- bytes *= 1024
97
+ bytes_val *= 1024
71
98
  else:
72
- bytes = 0
99
+ # If it's a known unit (like 'B' already stripped) but not T/G/M/K,
100
+ # and it was floatable, it's just bytes. If it's an unknown unit, it's
101
+ # an error.
102
+ if unit not in ['B', '']: # 'B' was stripped, '' means just a number
103
+ bytes_val = 0
73
104
  except ValueError:
74
- bytes = 0
75
-
76
- return bytes
105
+ bytes_val = 0
106
+
107
+ return bytes_val
77
108
 
78
109
 
79
110
  def remove_ansi_codes(s):
80
111
  """
81
112
  Removes ANSI escape codes from a string.
82
-
113
+
83
114
  https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
84
-
115
+
85
116
  Args:
86
117
  s (str): the string to de-ANSI-i-fy
87
-
118
+
88
119
  Returns:
89
120
  str: A copy of [s] without ANSI codes
90
121
  """
122
+
91
123
  ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
92
124
  return ansi_escape.sub('', s)
125
+
126
+
127
+ #%% Tests
128
+
129
+
130
+ class TestStringUtils:
131
+ """
132
+ Tests for string_utils.py
133
+ """
134
+
135
+
136
+ def test_is_float(self):
137
+ """
138
+ Test the is_float function.
139
+ """
140
+
141
+ assert is_float("1.23")
142
+ assert is_float("-0.5")
143
+ assert is_float("0")
144
+ assert is_float(1.23)
145
+ assert is_float(0)
146
+ assert not is_float("abc")
147
+ assert not is_float("1.2.3")
148
+ assert not is_float("")
149
+ assert not is_float(None)
150
+ assert not is_float("1,23")
151
+
152
+
153
+ def test_human_readable_to_bytes(self):
154
+ """
155
+ Test the human_readable_to_bytes function.
156
+ """
157
+
158
+ assert human_readable_to_bytes("10B") == 10
159
+ assert human_readable_to_bytes("10") == 10
160
+ assert human_readable_to_bytes("1K") == 1024
161
+ assert human_readable_to_bytes("1KB") == 1024
162
+ assert human_readable_to_bytes("1M") == 1024*1024
163
+ assert human_readable_to_bytes("1MB") == 1024*1024
164
+ assert human_readable_to_bytes("1G") == 1024*1024*1024
165
+ assert human_readable_to_bytes("1GB") == 1024*1024*1024
166
+ assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
167
+ assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
168
+
169
+ assert human_readable_to_bytes("2.5K") == 2.5 * 1024
170
+ assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
171
+
172
+ # Test with spaces
173
+ assert human_readable_to_bytes(" 2 G ") == 2 * 1024*1024*1024
174
+ assert human_readable_to_bytes("500 KB") == 500 * 1024
175
+
176
+ # Invalid inputs
177
+ assert human_readable_to_bytes("abc") == 0
178
+ assert human_readable_to_bytes("1X") == 0
179
+ assert human_readable_to_bytes("1KBB") == 0
180
+ assert human_readable_to_bytes("K1") == 0
181
+ assert human_readable_to_bytes("") == 0
182
+ assert human_readable_to_bytes("1.2.3K") == 0
183
+ assert human_readable_to_bytes("B") == 0
184
+
185
+
186
+ def test_remove_ansi_codes(self):
187
+ """
188
+ Test the remove_ansi_codes function.
189
+ """
190
+
191
+ assert remove_ansi_codes("text without codes") == "text without codes"
192
+ assert remove_ansi_codes("\x1b[31mRed text\x1b[0m") == "Red text"
193
+ assert remove_ansi_codes("\x1b[1m\x1b[4mBold and Underline\x1b[0m") == "Bold and Underline"
194
+ assert remove_ansi_codes("Mixed \x1b[32mgreen\x1b[0m and normal") == "Mixed green and normal"
195
+ assert remove_ansi_codes("") == ""
196
+
197
+ # More complex/varied ANSI codes
198
+ assert remove_ansi_codes("text\x1b[1Aup") == "textup"
199
+ assert remove_ansi_codes("\x1b[2Jclearscreen") == "clearscreen"
200
+
201
+
202
+ def test_string_utils():
203
+ """
204
+ Runs all tests in the TestStringUtils class.
205
+ """
206
+
207
+ test_instance = TestStringUtils()
208
+ test_instance.test_is_float()
209
+ test_instance.test_human_readable_to_bytes()
210
+ test_instance.test_remove_ansi_codes()
211
+
212
+ # from IPython import embed; embed()
213
+ # test_string_utils()