megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
md_utils/url_utils.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
url_utils.py
|
|
4
|
+
|
|
5
|
+
Frequently-used functions for downloading or manipulating URLs
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
#%% Imports and constants
|
|
10
10
|
|
|
@@ -14,8 +14,11 @@ import urllib
|
|
|
14
14
|
import tempfile
|
|
15
15
|
import requests
|
|
16
16
|
|
|
17
|
+
from functools import partial
|
|
17
18
|
from tqdm import tqdm
|
|
18
19
|
from urllib.parse import urlparse
|
|
20
|
+
from multiprocessing.pool import ThreadPool
|
|
21
|
+
from multiprocessing.pool import Pool
|
|
19
22
|
|
|
20
23
|
url_utils_temp_dir = None
|
|
21
24
|
max_path_len = 255
|
|
@@ -25,6 +28,8 @@ max_path_len = 255
|
|
|
25
28
|
|
|
26
29
|
class DownloadProgressBar():
|
|
27
30
|
"""
|
|
31
|
+
Progress updater based on the progressbar2 package.
|
|
32
|
+
|
|
28
33
|
https://stackoverflow.com/questions/37748105/how-to-use-progressbar-module-with-urlretrieve
|
|
29
34
|
"""
|
|
30
35
|
|
|
@@ -48,7 +53,15 @@ class DownloadProgressBar():
|
|
|
48
53
|
|
|
49
54
|
|
|
50
55
|
def get_temp_folder(preferred_name='url_utils'):
|
|
51
|
-
|
|
56
|
+
"""
|
|
57
|
+
Gets a temporary folder for use within this module.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
preferred_name (str, optional): subfolder to use within the system temp folder
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
str: the full path to the temporary subfolder
|
|
64
|
+
"""
|
|
52
65
|
global url_utils_temp_dir
|
|
53
66
|
|
|
54
67
|
if url_utils_temp_dir is None:
|
|
@@ -58,15 +71,31 @@ def get_temp_folder(preferred_name='url_utils'):
|
|
|
58
71
|
return url_utils_temp_dir
|
|
59
72
|
|
|
60
73
|
|
|
61
|
-
def download_url(url,
|
|
62
|
-
|
|
74
|
+
def download_url(url,
|
|
75
|
+
destination_filename=None,
|
|
76
|
+
progress_updater=None,
|
|
77
|
+
force_download=False,
|
|
78
|
+
verbose=True):
|
|
63
79
|
"""
|
|
64
|
-
|
|
65
|
-
|
|
80
|
+
Downloads a URL to a file. If no file is specified, creates a temporary file,
|
|
81
|
+
making a best effort to avoid filename collisions.
|
|
66
82
|
|
|
67
83
|
Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
|
|
68
84
|
|
|
69
|
-
|
|
85
|
+
Args:
|
|
86
|
+
url (str): the URL to download
|
|
87
|
+
destination_filename (str, optional): the target filename; if None, will create
|
|
88
|
+
a file in system temp space
|
|
89
|
+
progress_updater (object or bool, optional): can be "None", "False", "True", or a
|
|
90
|
+
specific callable object. If None or False, no progress updated will be
|
|
91
|
+
displayed. If True, a default progress bar will be created.
|
|
92
|
+
force_download (bool, optional): download this file even if [destination_filename]
|
|
93
|
+
exists.
|
|
94
|
+
verbose (bool, optional): enable additional debug console output
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
str: the filename to which [url] was downloaded, the same as [destination_filename]
|
|
98
|
+
if [destination_filename] was not None
|
|
70
99
|
"""
|
|
71
100
|
|
|
72
101
|
if progress_updater is not None and isinstance(progress_updater,bool):
|
|
@@ -109,34 +138,186 @@ def download_url(url, destination_filename=None, progress_updater=None,
|
|
|
109
138
|
|
|
110
139
|
def download_relative_filename(url, output_base, verbose=False):
|
|
111
140
|
"""
|
|
112
|
-
Download a URL to output_base, preserving relative path
|
|
141
|
+
Download a URL to output_base, preserving relative path. Path is relative to
|
|
142
|
+
the site, so:
|
|
143
|
+
|
|
144
|
+
https://abc.com/xyz/123.txt
|
|
145
|
+
|
|
146
|
+
...will get downloaded to:
|
|
147
|
+
|
|
148
|
+
output_base/xyz/123.txt
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
url (str): the URL to download
|
|
152
|
+
output_base (str): the base folder to which we should download this file
|
|
153
|
+
verbose (bool, optional): enable additional debug console output
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
str: the local destination filename
|
|
113
157
|
"""
|
|
114
158
|
|
|
115
159
|
p = urlparse(url)
|
|
116
160
|
# remove the leading '/'
|
|
117
161
|
assert p.path.startswith('/'); relative_filename = p.path[1:]
|
|
118
162
|
destination_filename = os.path.join(output_base,relative_filename)
|
|
119
|
-
download_url(url, destination_filename, verbose=verbose)
|
|
163
|
+
return download_url(url, destination_filename, verbose=verbose)
|
|
120
164
|
|
|
121
165
|
|
|
122
|
-
def
|
|
166
|
+
def _do_parallelized_download(download_info,overwrite=False,verbose=False):
|
|
123
167
|
"""
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
168
|
+
Internal function for download parallelization.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
url = download_info['url']
|
|
172
|
+
target_file = download_info['target_file']
|
|
173
|
+
result = {'status':'unknown','url':url,'target_file':target_file}
|
|
174
|
+
|
|
175
|
+
if ((os.path.isfile(target_file)) and (not overwrite)):
|
|
176
|
+
if verbose:
|
|
177
|
+
print('Skipping existing file {}'.format(target_file))
|
|
178
|
+
result['status'] = 'skipped'
|
|
179
|
+
return result
|
|
180
|
+
try:
|
|
181
|
+
download_url(url=url,
|
|
182
|
+
destination_filename=target_file,
|
|
183
|
+
verbose=verbose,
|
|
184
|
+
force_download=overwrite)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print('Warning: error downloading URL {}: {}'.format(
|
|
187
|
+
url,str(e)))
|
|
188
|
+
result['status'] = 'error: {}'.format(str(e))
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
result['status'] = 'success'
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
|
|
196
|
+
n_workers=20,pool_type='thread'):
|
|
197
|
+
"""
|
|
198
|
+
Downloads a list of URLs to local files.
|
|
199
|
+
|
|
200
|
+
Catches exceptions and reports them in the returned "results" array.
|
|
127
201
|
|
|
128
|
-
|
|
202
|
+
Args:
|
|
203
|
+
url_to_target_file: a dict mapping URLs to local filenames.
|
|
204
|
+
verbose (bool, optional): enable additional debug console output
|
|
205
|
+
overwrite (bool, optional): whether to overwrite existing local files
|
|
206
|
+
n_workers (int, optional): number of concurrent workers, set to <=1 to disable
|
|
207
|
+
parallelization
|
|
208
|
+
pool_type (str, optional): worker type to use; should be 'thread' or 'process'
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
list: list of dicts with keys:
|
|
212
|
+
- 'url': the url this item refers to
|
|
213
|
+
- 'status': 'skipped', 'success', or a string starting with 'error'
|
|
214
|
+
- 'target_file': the local filename to which we downloaded (or tried to
|
|
215
|
+
download) this URL
|
|
129
216
|
"""
|
|
130
217
|
|
|
131
|
-
|
|
218
|
+
all_download_info = []
|
|
132
219
|
|
|
133
|
-
|
|
220
|
+
print('Preparing download list')
|
|
221
|
+
for url in tqdm(url_to_target_file):
|
|
222
|
+
download_info = {}
|
|
223
|
+
download_info['url'] = url
|
|
224
|
+
download_info['target_file'] = url_to_target_file[url]
|
|
225
|
+
all_download_info.append(download_info)
|
|
134
226
|
|
|
135
|
-
|
|
227
|
+
print('Downloading {} images on {} workers'.format(
|
|
228
|
+
len(all_download_info),n_workers))
|
|
229
|
+
|
|
230
|
+
if n_workers <= 1:
|
|
231
|
+
|
|
232
|
+
results = []
|
|
136
233
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
234
|
+
for download_info in tqdm(all_download_info):
|
|
235
|
+
result = _do_parallelized_download(download_info,overwrite=overwrite,verbose=verbose)
|
|
236
|
+
results.append(result)
|
|
140
237
|
|
|
141
|
-
|
|
238
|
+
else:
|
|
142
239
|
|
|
240
|
+
if pool_type == 'thread':
|
|
241
|
+
pool = ThreadPool(n_workers)
|
|
242
|
+
else:
|
|
243
|
+
assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
|
|
244
|
+
pool = Pool(n_workers)
|
|
245
|
+
|
|
246
|
+
print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
|
|
247
|
+
|
|
248
|
+
results = list(tqdm(pool.imap(
|
|
249
|
+
partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
|
|
250
|
+
all_download_info), total=len(all_download_info)))
|
|
251
|
+
|
|
252
|
+
return results
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def test_url(url, error_on_failure=True, timeout=None):
|
|
256
|
+
"""
|
|
257
|
+
Tests the availability of [url], returning an http status code.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
url (str): URL to test
|
|
261
|
+
error_on_failure (bool, optional): whether to error (vs. just returning an
|
|
262
|
+
error code) if accessing this URL fails
|
|
263
|
+
timeout (int, optional): timeout in seconds to wait before considering this
|
|
264
|
+
access attempt to be a failure; see requests.head() for precise documentation
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
int: http status code (200 for success)
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
# r = requests.get(url, stream=True, verify=True, timeout=timeout)
|
|
271
|
+
r = requests.head(url, stream=True, verify=True, timeout=timeout)
|
|
272
|
+
|
|
273
|
+
if error_on_failure and r.status_code != 200:
|
|
274
|
+
raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
|
|
275
|
+
return r.status_code
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def test_urls(urls, error_on_failure=True, n_workers=1, pool_type='thread', timeout=None):
|
|
279
|
+
"""
|
|
280
|
+
Verify that URLs are available (i.e., returns status 200). By default,
|
|
281
|
+
errors if any URL is unavailable.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
urls (list): list of URLs to test
|
|
285
|
+
error_on_failure (bool, optional): whether to error (vs. just returning an
|
|
286
|
+
error code) if accessing this URL fails
|
|
287
|
+
n_workers (int, optional): number of concurrent workers, set to <=1 to disable
|
|
288
|
+
parallelization
|
|
289
|
+
pool_type (str, optional): worker type to use; should be 'thread' or 'process'
|
|
290
|
+
timeout (int, optional): timeout in seconds to wait before considering this
|
|
291
|
+
access attempt to be a failure; see requests.head() for precise documentation
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
list: a list of http status codes, the same length and order as [urls]
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
if n_workers <= 1:
|
|
298
|
+
|
|
299
|
+
status_codes = []
|
|
300
|
+
|
|
301
|
+
for url in tqdm(urls):
|
|
302
|
+
|
|
303
|
+
r = requests.get(url, timeout=timeout)
|
|
304
|
+
|
|
305
|
+
if error_on_failure and r.status_code != 200:
|
|
306
|
+
raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
|
|
307
|
+
status_codes.append(r.status_code)
|
|
308
|
+
|
|
309
|
+
else:
|
|
310
|
+
|
|
311
|
+
if pool_type == 'thread':
|
|
312
|
+
pool = ThreadPool(n_workers)
|
|
313
|
+
else:
|
|
314
|
+
assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
|
|
315
|
+
pool = Pool(n_workers)
|
|
316
|
+
|
|
317
|
+
print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
|
|
318
|
+
|
|
319
|
+
status_codes = list(tqdm(pool.imap(
|
|
320
|
+
partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
|
|
321
|
+
urls), total=len(urls)))
|
|
322
|
+
|
|
323
|
+
return status_codes
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
write_html_image_list.py
|
|
4
|
+
|
|
5
|
+
Given a list of image file names, writes an HTML file that
|
|
6
|
+
shows all those images, with optional one-line headers above each.
|
|
7
|
+
|
|
8
|
+
Each "filename" can also be a dict with elements 'filename','title',
|
|
9
|
+
'imageStyle','textStyle', 'linkTarget'
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
12
|
|
|
13
13
|
#%% Constants and imports
|
|
14
14
|
|
|
@@ -23,26 +23,34 @@ from md_utils import path_utils
|
|
|
23
23
|
|
|
24
24
|
def write_html_image_list(filename=None,images=None,options=None):
|
|
25
25
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
filename
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
26
|
+
Given a list of image file names, writes an HTML file that shows all those images,
|
|
27
|
+
with optional one-line headers above each.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
filename (str, optional): the .html output file; if None, just returns a valid
|
|
31
|
+
options dict
|
|
32
|
+
images (list, optional): the images to write to the .html file; if None, just returns
|
|
33
|
+
a valid options dict. This can be a flat list of image filenames, or this can
|
|
34
|
+
be a list of dictionaries with one or more of the following fields:
|
|
35
|
+
|
|
36
|
+
- filename (image filename) (required, all other fields are optional)
|
|
37
|
+
- imageStyle (css style for this image)
|
|
38
|
+
- textStyle (css style for the title associated with this image)
|
|
39
|
+
- title (text label for this image)
|
|
40
|
+
- linkTarget (URL to which this image should link on click)
|
|
41
|
+
|
|
42
|
+
options (dict, optional): a dict with one or more of the following fields:
|
|
43
|
+
|
|
44
|
+
- fHtml (file pointer to write to, used for splitting write operations over multiple calls)
|
|
45
|
+
- headerHtml (html text to include before the image list)
|
|
46
|
+
- trailerHtml (html text to include after the image list)
|
|
47
|
+
- defaultImageStyle (default css style for images)
|
|
48
|
+
- defaultTextStyle (default css style for image titles)
|
|
49
|
+
- maxFiguresPerHtmlFile (max figures for a single HTML file; overflow will be handled by creating
|
|
50
|
+
multiple files and a TOC with links)
|
|
51
|
+
- urlEncodeFilenames (default True, e.g. '#' will be replaced by '%23')
|
|
52
|
+
- urlEncodeLinkTargets (default True, e.g. '#' will be replaced by '%23')
|
|
53
|
+
|
|
46
54
|
"""
|
|
47
55
|
|
|
48
56
|
# returns an options struct
|
|
@@ -68,13 +76,16 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
68
76
|
|
|
69
77
|
if 'urlEncodeFilenames' not in options or options['urlEncodeFilenames'] is None:
|
|
70
78
|
options['urlEncodeFilenames'] = True
|
|
71
|
-
|
|
79
|
+
|
|
80
|
+
if 'urlEncodeLinkTargets' not in options or options['urlEncodeLinkTargets'] is None:
|
|
81
|
+
options['urlEncodeLinkTargets'] = True
|
|
82
|
+
|
|
72
83
|
# Possibly split the html output for figures into multiple files; Chrome gets sad with
|
|
73
84
|
# thousands of images in a single tab.
|
|
74
85
|
if 'maxFiguresPerHtmlFile' not in options or options['maxFiguresPerHtmlFile'] is None:
|
|
75
86
|
options['maxFiguresPerHtmlFile'] = math.inf
|
|
76
87
|
|
|
77
|
-
if filename is None:
|
|
88
|
+
if filename is None or images is None:
|
|
78
89
|
return options
|
|
79
90
|
|
|
80
91
|
# images may be a list of images or a list of image/style/title dictionaries,
|
|
@@ -176,8 +187,8 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
176
187
|
title = title.encode('ascii','ignore').decode('ascii')
|
|
177
188
|
filename = filename.encode('ascii','ignore').decode('ascii')
|
|
178
189
|
|
|
179
|
-
|
|
180
|
-
|
|
190
|
+
filename = filename.replace('\\','/')
|
|
191
|
+
if options['urlEncodeFilenames']:
|
|
181
192
|
filename = urllib.parse.quote(filename)
|
|
182
193
|
|
|
183
194
|
if len(title) > 0:
|
|
@@ -185,6 +196,11 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
185
196
|
'<p style="{}">{}</p>\n'\
|
|
186
197
|
.format(textStyle,title))
|
|
187
198
|
|
|
199
|
+
linkTarget = linkTarget.replace('\\','/')
|
|
200
|
+
if options['urlEncodeLinkTargets']:
|
|
201
|
+
# These are typically absolute paths, so we only want to mess with certain characters
|
|
202
|
+
linkTarget = urllib.parse.quote(linkTarget,safe=':/')
|
|
203
|
+
|
|
188
204
|
if len(linkTarget) > 0:
|
|
189
205
|
fHtml.write('<a href="{}">'.format(linkTarget))
|
|
190
206
|
# imageStyle.append(';border:0px;')
|
|
File without changes
|