megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
md_utils/path_utils.py CHANGED
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # path_utils.py
4
- #
5
- # Miscellaneous useful utils for path manipulation, things that could *almost*
6
- # be in os.path, but aren't.
7
- #
8
- ########
1
+ """
2
+
3
+ path_utils.py
4
+
5
+ Miscellaneous useful utils for path manipulation, i.e. things that could *almost*
6
+ be in os.path, but aren't.
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -14,23 +14,24 @@ import ntpath
14
14
  import os
15
15
  import sys
16
16
  import platform
17
- import posixpath
18
17
  import string
19
18
  import json
20
19
  import shutil
21
20
  import unicodedata
22
21
  import zipfile
22
+ import tarfile
23
23
  import webbrowser
24
24
  import subprocess
25
25
  import re
26
26
 
27
27
  from zipfile import ZipFile
28
28
  from datetime import datetime
29
- from typing import Container, Iterable, List, Optional, Tuple, Sequence
30
29
  from multiprocessing.pool import Pool, ThreadPool
31
30
  from functools import partial
31
+ from shutil import which
32
32
  from tqdm import tqdm
33
33
 
34
+ # Should all be lower-case
34
35
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
35
36
 
36
37
  VALID_FILENAME_CHARS = f"~-_.() {string.ascii_letters}{string.digits}"
@@ -41,12 +42,27 @@ CHAR_LIMIT = 255
41
42
 
42
43
  #%% General path functions
43
44
 
44
- def recursive_file_list(base_dir, convert_slashes=True,
45
- return_relative_paths=False, sort_files=True,
45
+ def recursive_file_list(base_dir,
46
+ convert_slashes=True,
47
+ return_relative_paths=False,
48
+ sort_files=True,
46
49
  recursive=True):
47
50
  r"""
48
- Enumerate files (not directories) in [base_dir], optionally converting
49
- \ to /
51
+ Enumerates files (not directories) in [base_dir], optionally converting
52
+ backslahes to slashes
53
+
54
+ Args:
55
+ base_dir (str): folder to enumerate
56
+ convert_slashes (bool, optional): force forward slashes; if this is False, will use
57
+ the native path separator
58
+ return_relative_paths (bool, optional): return paths that are relative to [base_dir],
59
+ rather than absolute paths
60
+ sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
61
+ provided by os.walk()
62
+ recursive (bool, optional): enumerate recursively
63
+
64
+ Returns:
65
+ list: list of filenames
50
66
  """
51
67
 
52
68
  assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
@@ -79,61 +95,51 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
79
95
  recursive=False):
80
96
  """
81
97
  Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
82
- it doesn't really make sense to have a "recursive" option in a function called "recursive_file_list".
98
+ since it doesn't really make sense to have a "recursive" option in a function called
99
+ "recursive_file_list".
100
+
101
+ Args:
102
+ base_dir (str): folder to enumerate
103
+ convert_slashes (bool, optional): force forward slashes; if this is False, will use
104
+ the native path separator
105
+ return_relative_paths (bool, optional): return paths that are relative to [base_dir],
106
+ rather than absolute paths
107
+ sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
108
+ provided by os.walk()
109
+ recursive (bool, optional): enumerate recursively
110
+
111
+ Returns:
112
+ list: list of filenames
83
113
  """
84
114
 
85
115
  return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
86
116
  recursive=recursive)
87
117
 
88
118
 
89
- def split_path(path: str) -> List[str]:
90
- r"""
91
- Splits [path] into all its constituent tokens.
92
-
93
- Non-recursive version of:
94
- http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
95
-
96
- Examples
97
- >>> split_path(r'c:\dir\subdir\file.txt')
98
- ['c:\\', 'dir', 'subdir', 'file.txt']
99
- >>> split_path('/dir/subdir/file.jpg')
100
- ['/', 'dir', 'subdir', 'file.jpg']
101
- >>> split_path('c:\\')
102
- ['c:\\']
103
- >>> split_path('/')
104
- ['/']
105
- """
106
-
107
- parts = []
108
- while True:
109
- # ntpath seems to do the right thing for both Windows and Unix paths
110
- head, tail = ntpath.split(path)
111
- if head == '' or head == path:
112
- break
113
- parts.append(tail)
114
- path = head
115
- parts.append(head or tail)
116
- return parts[::-1] # reverse
117
-
118
-
119
- def fileparts(path: str) -> Tuple[str, str, str]:
119
+ def fileparts(path):
120
120
  r"""
121
121
  Breaks down a path into the directory path, filename, and extension.
122
122
 
123
123
  Note that the '.' lives with the extension, and separators are removed.
124
124
 
125
- Examples
126
- >>> fileparts('file')
127
- ('', 'file', '')
128
- >>> fileparts(r'c:\dir\file.jpg')
129
- ('c:\\dir', 'file', '.jpg')
130
- >>> fileparts('/dir/subdir/file.jpg')
131
- ('/dir/subdir', 'file', '.jpg')
125
+ Examples:
126
+
127
+ .. code-block:: none
128
+
129
+ >>> fileparts('file')
130
+ ('', 'file', '')
131
+ >>> fileparts(r'c:/dir/file.jpg')
132
+ ('c:/dir', 'file', '.jpg')
133
+ >>> fileparts('/dir/subdir/file.jpg')
134
+ ('/dir/subdir', 'file', '.jpg')
132
135
 
136
+ Args:
137
+ path (str): path name to separate into parts
133
138
  Returns:
134
- p: str, directory path
135
- n: str, filename without extension
136
- e: str, extension including the '.'
139
+ tuple: tuple containing (p,n,e):
140
+ - p: str, directory path
141
+ - n: str, filename without extension
142
+ - e: str, extension including the '.'
137
143
  """
138
144
 
139
145
  # ntpath seems to do the right thing for both Windows and Unix paths
@@ -143,79 +149,168 @@ def fileparts(path: str) -> Tuple[str, str, str]:
143
149
  return p, n, e
144
150
 
145
151
 
146
- def insert_before_extension(filename: str, s: str = '', separator='.') -> str:
152
+ def insert_before_extension(filename, s=None, separator='.'):
147
153
  """
148
154
  Insert string [s] before the extension in [filename], separated with [separator].
149
155
 
150
156
  If [s] is empty, generates a date/timestamp. If [filename] has no extension,
151
157
  appends [s].
152
158
 
153
- Examples
154
- >>> insert_before_extension('/dir/subdir/file.ext', 'insert')
155
- '/dir/subdir/file.insert.ext'
156
- >>> insert_before_extension('/dir/subdir/file', 'insert')
157
- '/dir/subdir/file.insert'
158
- >>> insert_before_extension('/dir/subdir/file')
159
- '/dir/subdir/file.2020.07.20.10.54.38'
159
+ Examples:
160
+
161
+ .. code-block:: none
162
+
163
+ >>> insert_before_extension('/dir/subdir/file.ext', 'insert')
164
+ '/dir/subdir/file.insert.ext'
165
+ >>> insert_before_extension('/dir/subdir/file', 'insert')
166
+ '/dir/subdir/file.insert'
167
+ >>> insert_before_extension('/dir/subdir/file')
168
+ '/dir/subdir/file.2020.07.20.10.54.38'
169
+
170
+ Args:
171
+ filename (str): filename to manipulate
172
+ s (str, optional): string to insert before the extension in [filename], or
173
+ None to insert a datestamp
174
+ separator (str, optional): separator to place between the filename base
175
+ and the inserted string
176
+
177
+ Returns:
178
+ str: modified string
160
179
  """
161
180
 
162
181
  assert len(filename) > 0
163
- if len(s) == 0:
182
+ if s is None or len(s) == 0:
164
183
  s = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
165
184
  name, ext = os.path.splitext(filename)
166
185
  return f'{name}{separator}{s}{ext}'
167
186
 
168
187
 
169
- def top_level_folder(p: str, windows: Optional[bool] = None) -> str:
188
+ def split_path(path):
189
+ r"""
190
+ Splits [path] into all its constituent file/folder tokens.
191
+
192
+ Examples:
193
+
194
+ .. code-block:: none
195
+
196
+ >>> split_path(r'c:\dir\subdir\file.txt')
197
+ ['c:\\', 'dir', 'subdir', 'file.txt']
198
+ >>> split_path('/dir/subdir/file.jpg')
199
+ ['/', 'dir', 'subdir', 'file.jpg']
200
+ >>> split_path('c:\\')
201
+ ['c:\\']
202
+ >>> split_path('/')
203
+ ['/']
204
+
205
+ Args:
206
+ path (str): path to split into tokens
207
+
208
+ Returns:
209
+ list: list of path tokens
170
210
  """
171
- Gets the top-level folder from path [p].
211
+
212
+ parts = []
213
+ while True:
214
+ # ntpath seems to do the right thing for both Windows and Unix paths
215
+ head, tail = ntpath.split(path)
216
+ if head == '' or head == path:
217
+ break
218
+ parts.append(tail)
219
+ path = head
220
+ parts.append(head or tail)
221
+ return parts[::-1] # reverse
172
222
 
173
- This function behaves differently for Windows vs. Unix paths. Set
174
- windows=True if [p] is a Windows path. Set windows=None (default) to treat
175
- [p] as a native system path.
176
223
 
177
- On Windows, will use the top-level folder that isn't the drive.
178
- >>> top_level_folder(r'c:\blah\foo')
179
- 'c:\blah'
224
+ def path_is_abs(p):
225
+ """
226
+ Determines whether [p] is an absolute path. An absolute path is defined as
227
+ one that starts with slash, backslash, or a letter followed by a colon.
228
+
229
+ Args:
230
+ p (str): path to evaluate
231
+
232
+ Returns:
233
+ bool: True if [p] is an absolute path, else False
234
+ """
235
+
236
+ return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
237
+
180
238
 
181
- On Unix, does not include the leaf node.
182
- >>> top_level_folder('/blah/foo')
183
- '/blah'
239
+ def top_level_folder(p):
240
+ r"""
241
+ Gets the top-level folder from the path *p*.
242
+
243
+ On UNIX, this is straightforward:
244
+
245
+ /blah/foo
246
+
247
+ ...returns '/blah'
248
+
249
+ On Windows, we define this as the top-level folder that isn't the drive, so:
250
+
251
+ c:\blah\foo
252
+
253
+ ...returns 'c:\blah'.
254
+
255
+ Args:
256
+ p (str): filename to evaluate
257
+
258
+ Returns:
259
+ str: the top-level folder in [p], see above for details on how this is defined
184
260
  """
185
261
 
186
262
  if p == '':
187
263
  return ''
188
-
189
- default_lib = os.path # save default os.path
190
- if windows is not None:
191
- os.path = ntpath if windows else posixpath
192
-
193
- # Path('/blah').parts is ('/', 'blah')
264
+
265
+ # Path('/blah').parts is ('/','blah')
194
266
  parts = split_path(p)
267
+
268
+ if len(parts) == 1:
269
+ return parts[0]
195
270
 
271
+ # Handle paths like:
272
+ #
273
+ # /, \, /stuff, c:, c:\stuff
196
274
  drive = os.path.splitdrive(p)[0]
197
- if len(parts) > 1 and (
198
- parts[0] == drive
199
- or parts[0] == drive + '/'
200
- or parts[0] == drive + '\\'
201
- or parts[0] in ['\\', '/']):
202
- result = os.path.join(parts[0], parts[1])
275
+ if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
276
+ return os.path.join(parts[0], parts[1])
203
277
  else:
204
- result = parts[0]
278
+ return parts[0]
279
+
280
+ # ...top_level_folder()
205
281
 
206
- os.path = default_lib # restore default os.path
207
- return result
208
282
 
283
+ #%% Test driver for top_level_folder
284
+
285
+ if False:
286
+
287
+ #%%
288
+
289
+ p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
290
+ p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
291
+ p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
292
+ p = ''; s = top_level_folder(p); print(s); assert s == ''
293
+ p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
294
+ p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
295
+ p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
296
+ p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
297
+ p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
298
+
299
+ #%%
209
300
 
210
301
  def safe_create_link(link_exists,link_new):
211
302
  """
212
- Create a symlink at link_new pointing to link_exists.
303
+ Creates a symlink at [link_new] pointing to [link_exists].
213
304
 
214
- If link_new already exists, make sure it's a link (not a file),
215
- and if it has a different target than link_exists, remove and re-create
305
+ If [link_new] already exists, make sure it's a link (not a file),
306
+ and if it has a different target than [link_exists], removes and re-creates
216
307
  it.
217
308
 
218
- Errors if link_new already exists but it's not a link.
309
+ Errors if [link_new] already exists but it's not a link.
310
+
311
+ Args:
312
+ link_exists (str): the source of the (possibly-new) symlink
313
+ link_new (str): the target of the (possibly-new) symlink
219
314
  """
220
315
 
221
316
  if os.path.exists(link_new) or os.path.islink(link_new):
@@ -229,37 +324,60 @@ def safe_create_link(link_exists,link_new):
229
324
 
230
325
  #%% Image-related path functions
231
326
 
232
- def is_image_file(s: str, img_extensions: Container[str] = IMG_EXTENSIONS
233
- ) -> bool:
327
+ def is_image_file(s, img_extensions=IMG_EXTENSIONS):
234
328
  """
235
329
  Checks a file's extension against a hard-coded set of image file
236
- extensions.
330
+ extensions. Uses case-insensitive comparison.
237
331
 
238
332
  Does not check whether the file exists, only determines whether the filename
239
333
  implies it's an image file.
334
+
335
+ Args:
336
+ s (str): filename to evaluate for image-ness
337
+ img_extensions (list, optional): list of known image file extensions
338
+
339
+ Returns:
340
+ bool: True if [s] appears to be an image file, else False
240
341
  """
241
342
 
242
343
  ext = os.path.splitext(s)[1]
243
344
  return ext.lower() in img_extensions
244
345
 
245
346
 
246
- def find_image_strings(strings: Iterable[str]) -> List[str]:
347
+ def find_image_strings(strings):
247
348
  """
248
349
  Given a list of strings that are potentially image file names, looks for
249
350
  strings that actually look like image file names (based on extension).
351
+
352
+ Args:
353
+ strings (list): list of filenames to check for image-ness
354
+
355
+ Returns:
356
+ list: the subset of [strings] that appear to be image filenames
250
357
  """
251
358
 
252
359
  return [s for s in strings if is_image_file(s)]
253
360
 
254
361
 
255
- def find_images(dirname: str, recursive: bool = False,
256
- return_relative_paths: bool = False,
257
- convert_slashes: bool = False) -> List[str]:
362
+ def find_images(dirname,
363
+ recursive=False,
364
+ return_relative_paths=False,
365
+ convert_slashes=True):
258
366
  """
259
367
  Finds all files in a directory that look like image file names. Returns
260
368
  absolute paths unless return_relative_paths is set. Uses the OS-native
261
369
  path separator unless convert_slashes is set, in which case will always
262
370
  use '/'.
371
+
372
+ Args:
373
+ dirname (str): the folder to search for images
374
+ recursive (bool, optional): whether to search recursively
375
+ return_relative_paths (str, optional): return paths that are relative
376
+ to [dirname], rather than absolute paths
377
+ convert_slashes (bool, optional): force forward slashes in return values
378
+
379
+ Returns:
380
+ list: list of image filenames found in [dirname]
263
381
  """
264
382
 
265
383
  assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
@@ -284,16 +402,28 @@ def find_images(dirname: str, recursive: bool = False,
284
402
 
285
403
  #%% Filename cleaning functions
286
404
 
287
- def clean_filename(filename: str, allow_list: str = VALID_FILENAME_CHARS,
288
- char_limit: int = CHAR_LIMIT, force_lower: bool = False) -> str:
405
+ def clean_filename(filename,
406
+ allow_list=VALID_FILENAME_CHARS,
407
+ char_limit=CHAR_LIMIT,
408
+ force_lower= False):
289
409
  r"""
290
410
  Removes non-ASCII and other invalid filename characters (on any
291
- reasonable OS) from a filename, then trims to a maximum length.
411
+ reasonable OS) from a filename, then optionally trims to a maximum length.
292
412
 
293
413
  Does not allow :\/ by default, use clean_path if you want to preserve those.
294
414
 
295
415
  Adapted from
296
416
  https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
417
+
418
+ Args:
419
+ filename (str): filename to clean
420
+ allow_list (str, optional): string containing all allowable filename characters
421
+ char_limit (int, optional): maximum allowable filename length, if None will skip this
422
+ step
423
+ force_lower (bool, optional): convert the resulting filename to lowercase
424
+
425
+ returns:
426
+ str: cleaned version of [filename]
297
427
  """
298
428
 
299
429
  # keep only valid ascii chars
@@ -309,35 +439,75 @@ def clean_filename(filename: str, allow_list: str = VALID_FILENAME_CHARS,
309
439
  return cleaned_filename
310
440
 
311
441
 
312
- def clean_path(pathname: str, allow_list: str = VALID_PATH_CHARS,
313
- char_limit: int = CHAR_LIMIT, force_lower: bool = False) -> str:
442
+ def clean_path(pathname,
443
+ allow_list=VALID_PATH_CHARS,
444
+ char_limit=CHAR_LIMIT,
445
+ force_lower=False):
314
446
  """
315
447
  Removes non-ASCII and other invalid path characters (on any reasonable
316
- OS) from a path, then trims to a maximum length.
448
+ OS) from a path, then optionally trims to a maximum length.
449
+
450
+ Args:
451
+ pathname (str): path name to clean
452
+ allow_list (str, optional): string containing all allowable filename characters
453
+ char_limit (int, optional): maximum allowable filename length, if None will skip this
454
+ step
455
+ force_lower (bool, optional): convert the resulting filename to lowercase
456
+
457
+ returns:
458
+ str: cleaned version of [filename]
317
459
  """
318
460
 
319
461
  return clean_filename(pathname, allow_list=allow_list,
320
462
  char_limit=char_limit, force_lower=force_lower)
321
463
 
322
464
 
323
- def flatten_path(pathname: str, separator_chars: str = SEPARATOR_CHARS) -> str:
324
- """
465
+ def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replacement='~'):
466
+ r"""
325
467
  Removes non-ASCII and other invalid path characters (on any reasonable
326
468
  OS) from a path, then trims to a maximum length. Replaces all valid
327
- separators with '~'.
469
+ separators with [separator_char_replacement.]
470
+
471
+ Args:
472
+ pathname (str): path name to flatten
473
+ separator_chars (str, optional): string containing all known path separators
474
+ separator_char_replacement (str, optional): string to insert in place of
475
+ path separators.
476
+
477
+ Returns:
478
+ str: flattened version of [pathname]
328
479
  """
329
480
 
330
481
  s = clean_path(pathname)
331
482
  for c in separator_chars:
332
- s = s.replace(c, '~')
483
+ s = s.replace(c, separator_char_replacement)
333
484
  return s
334
485
 
335
486
 
487
+ def is_executable(filename):
488
+ """
489
+ Checks whether [filename] is on the system path and marked as executable.
490
+
491
+ Args:
492
+ filename (str): filename to check for executable status
493
+
494
+ Returns:
495
+ bool: True if [filename] is on the system path and marked as executable, otherwise False
496
+ """
497
+
498
+ # https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
499
+
500
+ return which(filename) is not None
501
+
502
+
336
503
  #%% Platform-independent way to open files in their associated application
337
504
 
338
505
  def environment_is_wsl():
339
506
  """
340
- Returns True if we're running in WSL
507
+ Determines whether we're running in WSL.
508
+
509
+ Returns:
510
+ True if we're running in WSL.
341
511
  """
342
512
 
343
513
  if sys.platform not in ('linux','posix'):
@@ -347,7 +517,7 @@ def environment_is_wsl():
347
517
 
348
518
 
349
519
  def wsl_path_to_windows_path(filename):
350
- """
520
+ r"""
351
521
  Converts a WSL path to a Windows path, or returns None if that's not possible. E.g.
352
522
  converts:
353
523
 
@@ -356,6 +526,12 @@ def wsl_path_to_windows_path(filename):
356
526
  ...to:
357
527
 
358
528
  e:\a\b\c
529
+
530
+ Args:
531
+ filename (str): filename to convert
532
+
533
+ Returns:
534
+ str: Windows equivalent to the WSL path [filename]
359
535
  """
360
536
 
361
537
  result = subprocess.run(['wslpath', '-w', filename], text=True, capture_output=True)
@@ -369,16 +545,19 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
369
545
  """
370
546
  Opens [filename] in the default OS file handler for this file type.
371
547
 
372
- If attempt_to_open_in_wsl_host is True, and we're in WSL, attempts to open
373
- [filename] in the Windows host environment.
374
-
375
548
  If browser_name is not None, uses the webbrowser module to open the filename
376
549
  in the specified browser; see https://docs.python.org/3/library/webbrowser.html
377
550
  for supported browsers. Falls back to the default file handler if webbrowser.open()
378
551
  fails. In this case, attempt_to_open_in_wsl_host is ignored unless webbrowser.open() fails.
379
552
 
380
- If browser_name is 'default', use the system default. This is different from the
553
+ If browser_name is 'default', uses the system default. This is different from the
381
554
  parameter to webbrowser.get(), where None implies the system default.
555
+
556
+ Args:
557
+ filename (str): file to open
558
+ attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts to open
559
+ [filename] in the Windows host environment
560
+ browser_name: see above
382
561
  """
383
562
 
384
563
  if browser_name is not None:
@@ -423,10 +602,14 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
423
602
 
424
603
  #%% File list functions
425
604
 
426
- def write_list_to_file(output_file: str, strings: Sequence[str]) -> None:
605
+ def write_list_to_file(output_file,strings):
427
606
  """
428
607
  Writes a list of strings to either a JSON file or text file,
429
608
  depending on extension of the given file name.
609
+
610
+ Args:
611
+ output_file (str): file to write
612
+ strings (list): list of strings to write to [output_file]
430
613
  """
431
614
 
432
615
  with open(output_file, 'w') as f:
@@ -436,9 +619,15 @@ def write_list_to_file(output_file: str, strings: Sequence[str]) -> None:
436
619
  f.write('\n'.join(strings))
437
620
 
438
621
 
439
- def read_list_from_file(filename: str) -> List[str]:
622
+ def read_list_from_file(filename):
440
623
  """
441
624
  Reads a json-formatted list of strings from a file.
625
+
626
+ Args:
627
+ filename (str): .json filename to read
628
+
629
+ Returns:
630
+ list: list of strings read from [filename]
442
631
  """
443
632
 
444
633
  assert filename.endswith('.json')
@@ -451,6 +640,10 @@ def read_list_from_file(filename: str) -> List[str]:
451
640
 
452
641
 
453
642
  def _copy_file(input_output_tuple,overwrite=True,verbose=False):
643
+ """
644
+ Internal function for copying files from within parallel_copy_files.
645
+ """
646
+
454
647
  assert len(input_output_tuple) == 2
455
648
  source_fn = input_output_tuple[0]
456
649
  target_fn = input_output_tuple[1]
@@ -465,7 +658,16 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
465
658
  def parallel_copy_files(input_file_to_output_file, max_workers=16,
466
659
  use_threads=True, overwrite=False, verbose=False):
467
660
  """
468
- Copy files from source to target according to the dict input_file_to_output_file.
661
+ Copies files from source to target according to the dict input_file_to_output_file.
662
+
663
+ Args:
664
+ input_file_to_output_file (dict): dictionary mapping source files to the target files
665
+ to which they should be copied
666
+ max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
667
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
668
+ parallel copying; ignored if max_workers <= 1
669
+ overwrite (bool, optional): whether to overwrite existing destination files
670
+ verbose (bool, optional): enable additionald debug output
469
671
  """
470
672
 
471
673
  n_workers = min(max_workers,len(input_file_to_output_file))
@@ -490,11 +692,19 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
490
692
 
491
693
  def get_file_sizes(base_dir, convert_slashes=True):
492
694
  """
493
- Get sizes recursively for all files in base_dir, returning a dict mapping
695
+ Gets sizes recursively for all files in base_dir, returning a dict mapping
494
696
  relative filenames to size.
495
697
 
496
698
  TODO: merge the functionality here with parallel_get_file_sizes, which uses slightly
497
699
  different semantics.
700
+
701
+ Args:
702
+ base_dir (str): folder within which we want all file sizes
703
+ convert_slashes (bool, optional): force forward slashes in return strings,
704
+ otherwise uses the native path separator
705
+
706
+ Returns:
707
+ dict: dictionary mapping filenames to file sizes in bytes
498
708
  """
499
709
 
500
710
  relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
@@ -527,8 +737,19 @@ def parallel_get_file_sizes(filenames, max_workers=16,
527
737
  use_threads=True, verbose=False,
528
738
  recursive=True):
529
739
  """
530
- Return a dictionary mapping every file in [filenames] to the corresponding file size,
740
+ Returns a dictionary mapping every file in [filenames] to the corresponding file size,
531
741
  or None for errors. If [filenames] is a folder, will enumerate the folder (optionally recursively).
742
+
743
+ Args:
744
+ filenames (list or str): list of filenames for which we should read sizes, or a folder
745
+ within which we should read all file sizes recursively
746
+ max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
747
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
748
+ parallel copying; ignored if max_workers <= 1
749
+ verbose (bool, optional): enable additionald debug output
750
+
751
+ Returns:
752
+ dict: dictionary mapping filenames to file sizes in bytes
532
753
  """
533
754
 
534
755
  n_workers = min(max_workers,len(filenames))
@@ -555,7 +776,18 @@ def parallel_get_file_sizes(filenames, max_workers=16,
555
776
 
556
777
  def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
557
778
  """
558
- Zip a single file, by default writing to a new file called [input_fn].zip
779
+ Zips a single file.
780
+
781
+ Args:
782
+ input_fn (str): file to zip
783
+ output_fn (str, optional): target zipfile; if this is None, we'll use
784
+ [input_fn].zip
785
+ overwrite (bool, optional): whether to overwrite an existing target file
786
+ verbose (bool, optional): enable existing debug console output
787
+ compresslevel (int, optional): compression level to use, between 0 and 9
788
+
789
+ Returns:
790
+ str: the output zipfile, whether we created it or determined that it already exists
559
791
  """
560
792
 
561
793
  basename = os.path.basename(input_fn)
@@ -565,7 +797,7 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
565
797
 
566
798
  if (not overwrite) and (os.path.isfile(output_fn)):
567
799
  print('Skipping existing file {}'.format(output_fn))
568
- return
800
+ return output_fn
569
801
 
570
802
  if verbose:
571
803
  print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compresslevel))
@@ -577,17 +809,70 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
577
809
  return output_fn
578
810
 
579
811
 
812
+ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
813
+ overwrite=False, verbose=False, mode='x'):
814
+ """
815
+ Adds all the files in [input_files] to the tar file [output_fn].
816
+ Archive names are relative to arc_name_base.
817
+
818
+ Args:
819
+ input_files (list): list of absolute filenames to include in the .tar file
820
+ output_fn (str): .tar file to create
821
+ arc_name_base (str): absolute folder from which relative paths should be determined;
822
+ behavior is undefined if there are files in [input_files] that don't live within
823
+ [arc_name_base]
824
+ overwrite (bool, optional): whether to overwrite an existing .tar file
825
+ verbose (bool, optional): enable additional debug console output
826
+ mode (str, optional): compression type, can be 'x' (no compression), 'x:gz', or 'x:bz2'.
827
+
828
+ Returns:
829
+ str: the output tar file, whether we created it or determined that it already exists
830
+ """
831
+
832
+ if os.path.isfile(output_fn):
833
+ if not overwrite:
834
+ print('Tar file {} exists, skipping'.format(output_fn))
835
+ return output_fn
836
+ else:
837
+ print('Tar file {} exists, deleting and re-creating'.format(output_fn))
838
+ os.remove(output_fn)
839
+
840
+ if verbose:
841
+ print('Adding {} files to {} (mode {})'.format(
842
+ len(input_files),output_fn,mode))
843
+
844
+ with tarfile.open(output_fn,mode) as tarf:
845
+ for input_fn_abs in tqdm(input_files,disable=(not verbose)):
846
+ input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
847
+ tarf.add(input_fn_abs,arcname=input_fn_relative)
848
+
849
+ return output_fn
850
+
851
+
580
852
  def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
581
853
  overwrite=False, verbose=False, compresslevel=9):
582
854
  """
583
855
  Zip all the files in [input_files] into [output_fn]. Archive names are relative to
584
856
  arc_name_base.
857
+
858
+ Args:
859
+ input_files (list): list of absolute filenames to include in the .tar file
860
+ output_fn (str): .tar file to create
861
+ arc_name_base (str): absolute folder from which relative paths should be determined;
862
+ behavior is undefined if there are files in [input_files] that don't live within
863
+ [arc_name_base]
864
+ overwrite (bool, optional): whether to overwrite an existing .tar file
865
+ verbose (bool, optional): enable additional debug console output
866
+ compresslevel (int, optional): compression level to use, between 0 and 9
867
+
868
+ Returns:
869
+ str: the output zipfile, whether we created it or determined that it already exists
585
870
  """
586
871
 
587
872
  if not overwrite:
588
873
  if os.path.isfile(output_fn):
589
874
  print('Zip file {} exists, skipping'.format(output_fn))
590
- return
875
+ return output_fn
591
876
 
592
877
  if verbose:
593
878
  print('Zipping {} files to {} (compression level {})'.format(
@@ -609,7 +894,15 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
609
894
  Recursively zip everything in [input_folder] into a single zipfile, storing outputs as relative
610
895
  paths.
611
896
 
612
- Defaults to writing to [input_folder].zip
897
+ Args:
898
+ input_folder (str): folder to zip
899
+ output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
900
+ overwrite (bool, optional): whether to overwrite an existing .tar file
901
+ verbose (bool, optional): enable additional debug console output
902
+ compresslevel (int, optional): compression level to use, between 0 and 9
903
+
904
+ Returns:
905
+ str: the output zipfile, whether we created it or determined that it already exists
613
906
  """
614
907
 
615
908
  if output_fn is None:
@@ -640,8 +933,17 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
640
933
  def parallel_zip_files(input_files, max_workers=16, use_threads=True, compresslevel=9,
641
934
  overwrite=False, verbose=False):
642
935
  """
643
- Zip one or more files to separate output files in parallel, leaving the
936
+ Zips one or more files to separate output files in parallel, leaving the
644
937
  original files in place. Each file is zipped to [filename].zip.
938
+
939
+ Args:
940
+ input_file (str): list of files to zip
941
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
942
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
943
+ max_workers <= 1
944
+ compresslevel (int, optional): zip compression level between 0 and 9
945
+ overwrite (bool, optional): whether to overwrite an existing .tar file
946
+ verbose (bool, optional): enable additional debug console output
645
947
  """
646
948
 
647
949
  n_workers = min(max_workers,len(input_files))
@@ -661,8 +963,17 @@ def parallel_zip_files(input_files, max_workers=16, use_threads=True, compressle
661
963
  def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
662
964
  compresslevel=9, overwrite=False, verbose=False):
663
965
  """
664
- Zip one or more folders to separate output files in parallel, leaving the
966
+ Zips one or more folders to separate output files in parallel, leaving the
665
967
  original folders in place. Each folder is zipped to [folder_name].zip.
968
+
969
+ Args:
970
+ input_folder (list): list of folders to zip
971
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
972
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
973
+ max_workers <= 1
974
+ compresslevel (int, optional): zip compression level between 0 and 9
975
+ overwrite (bool, optional): whether to overwrite an existing .tar file
976
+ verbose (bool, optional): enable additional debug console output
666
977
  """
667
978
 
668
979
  n_workers = min(max_workers,len(input_folders))
@@ -684,10 +995,20 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
684
995
  compresslevel=9,overwrite=False,required_token=None,verbose=False,
685
996
  exclude_zip=True):
686
997
  """
687
- Zip each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To zip a whole
688
- folder into a single zipfile, use zip_folder().
998
+ Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
999
+ zip a whole folder into a single zipfile, use zip_folder().
689
1000
 
690
- If required_token is not None, include only files that contain that token.
1001
+ Args:
1002
+ folder_name (str): the folder within which we should zip files
1003
+ recursive (bool, optional): whether to recurse within [folder_name]
1004
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
1005
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
1006
+ max_workers <= 1
1007
+ compresslevel (int, optional): zip compression level between 0 and 9
1008
+ overwrite (bool, optional): whether to overwrite an existing .tar file
1009
+ required_token (str, optional): only zip files whose names contain this string
1010
+ verbose (bool, optional): enable additional debug console output
1011
+ exclude_zip (bool, optional): skip files ending in .zip
691
1012
  """
692
1013
 
693
1014
  assert os.path.isdir(folder_name), '{} is not a folder'.format(folder_name)
@@ -707,8 +1028,13 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
707
1028
 
708
1029
  def unzip_file(input_file, output_folder=None):
709
1030
  """
710
- Unzip a zipfile to the specified output folder, defaulting to the same location as
711
- the input file
1031
+ Unzips a zipfile to the specified output folder, defaulting to the same location as
1032
+ the input file.
1033
+
1034
+ Args:
1035
+ input_file (str): zipfile to unzip
1036
+ output_folder (str, optional): folder to which we should unzip [input_file], defaults
1037
+ to unzipping to the folder where [input_file] lives
712
1038
  """
713
1039
 
714
1040
  if output_folder is None: