megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
md_utils/url_utils.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
url_utils.py
|
|
4
|
+
|
|
5
|
+
Frequently-used functions for downloading or manipulating URLs
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
#%% Imports and constants
|
|
10
10
|
|
|
@@ -14,9 +14,11 @@ import urllib
|
|
|
14
14
|
import tempfile
|
|
15
15
|
import requests
|
|
16
16
|
|
|
17
|
+
from functools import partial
|
|
17
18
|
from tqdm import tqdm
|
|
18
19
|
from urllib.parse import urlparse
|
|
19
20
|
from multiprocessing.pool import ThreadPool
|
|
21
|
+
from multiprocessing.pool import Pool
|
|
20
22
|
|
|
21
23
|
url_utils_temp_dir = None
|
|
22
24
|
max_path_len = 255
|
|
@@ -26,6 +28,8 @@ max_path_len = 255
|
|
|
26
28
|
|
|
27
29
|
class DownloadProgressBar():
|
|
28
30
|
"""
|
|
31
|
+
Progress updater based on the progressbar2 package.
|
|
32
|
+
|
|
29
33
|
https://stackoverflow.com/questions/37748105/how-to-use-progressbar-module-with-urlretrieve
|
|
30
34
|
"""
|
|
31
35
|
|
|
@@ -49,7 +53,15 @@ class DownloadProgressBar():
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
def get_temp_folder(preferred_name='url_utils'):
|
|
52
|
-
|
|
56
|
+
"""
|
|
57
|
+
Gets a temporary folder for use within this module.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
preferred_name (str, optional): subfolder to use within the system temp folder
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
str: the full path to the temporary subfolder
|
|
64
|
+
"""
|
|
53
65
|
global url_utils_temp_dir
|
|
54
66
|
|
|
55
67
|
if url_utils_temp_dir is None:
|
|
@@ -59,15 +71,31 @@ def get_temp_folder(preferred_name='url_utils'):
|
|
|
59
71
|
return url_utils_temp_dir
|
|
60
72
|
|
|
61
73
|
|
|
62
|
-
def download_url(url,
|
|
63
|
-
|
|
74
|
+
def download_url(url,
|
|
75
|
+
destination_filename=None,
|
|
76
|
+
progress_updater=None,
|
|
77
|
+
force_download=False,
|
|
78
|
+
verbose=True):
|
|
64
79
|
"""
|
|
65
|
-
|
|
66
|
-
|
|
80
|
+
Downloads a URL to a file. If no file is specified, creates a temporary file,
|
|
81
|
+
making a best effort to avoid filename collisions.
|
|
67
82
|
|
|
68
83
|
Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
|
|
69
84
|
|
|
70
|
-
|
|
85
|
+
Args:
|
|
86
|
+
url (str): the URL to download
|
|
87
|
+
destination_filename (str, optional): the target filename; if None, will create
|
|
88
|
+
a file in system temp space
|
|
89
|
+
progress_updater (object or bool, optional): can be "None", "False", "True", or a
|
|
90
|
+
specific callable object. If None or False, no progress updated will be
|
|
91
|
+
displayed. If True, a default progress bar will be created.
|
|
92
|
+
force_download (bool, optional): download this file even if [destination_filename]
|
|
93
|
+
exists.
|
|
94
|
+
verbose (bool, optional): enable additional debug console output
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
str: the filename to which [url] was downloaded, the same as [destination_filename]
|
|
98
|
+
if [destination_filename] was not None
|
|
71
99
|
"""
|
|
72
100
|
|
|
73
101
|
if progress_updater is not None and isinstance(progress_updater,bool):
|
|
@@ -118,46 +146,79 @@ def download_relative_filename(url, output_base, verbose=False):
|
|
|
118
146
|
...will get downloaded to:
|
|
119
147
|
|
|
120
148
|
output_base/xyz/123.txt
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
url (str): the URL to download
|
|
152
|
+
output_base (str): the base folder to which we should download this file
|
|
153
|
+
verbose (bool, optional): enable additional debug console output
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
str: the local destination filename
|
|
121
157
|
"""
|
|
122
158
|
|
|
123
159
|
p = urlparse(url)
|
|
124
160
|
# remove the leading '/'
|
|
125
161
|
assert p.path.startswith('/'); relative_filename = p.path[1:]
|
|
126
162
|
destination_filename = os.path.join(output_base,relative_filename)
|
|
127
|
-
download_url(url, destination_filename, verbose=verbose)
|
|
163
|
+
return download_url(url, destination_filename, verbose=verbose)
|
|
128
164
|
|
|
129
165
|
|
|
130
|
-
def
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
result['status'] = 'error: {}'.format(str(e))
|
|
154
|
-
return result
|
|
155
|
-
|
|
156
|
-
result['status'] = 'success'
|
|
166
|
+
def _do_parallelized_download(download_info,overwrite=False,verbose=False):
|
|
167
|
+
"""
|
|
168
|
+
Internal function for download parallelization.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
url = download_info['url']
|
|
172
|
+
target_file = download_info['target_file']
|
|
173
|
+
result = {'status':'unknown','url':url,'target_file':target_file}
|
|
174
|
+
|
|
175
|
+
if ((os.path.isfile(target_file)) and (not overwrite)):
|
|
176
|
+
if verbose:
|
|
177
|
+
print('Skipping existing file {}'.format(target_file))
|
|
178
|
+
result['status'] = 'skipped'
|
|
179
|
+
return result
|
|
180
|
+
try:
|
|
181
|
+
download_url(url=url,
|
|
182
|
+
destination_filename=target_file,
|
|
183
|
+
verbose=verbose,
|
|
184
|
+
force_download=overwrite)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print('Warning: error downloading URL {}: {}'.format(
|
|
187
|
+
url,str(e)))
|
|
188
|
+
result['status'] = 'error: {}'.format(str(e))
|
|
157
189
|
return result
|
|
190
|
+
|
|
191
|
+
result['status'] = 'success'
|
|
192
|
+
return result
|
|
193
|
+
|
|
158
194
|
|
|
195
|
+
def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
|
|
196
|
+
n_workers=20,pool_type='thread'):
|
|
197
|
+
"""
|
|
198
|
+
Downloads a list of URLs to local files.
|
|
199
|
+
|
|
200
|
+
Catches exceptions and reports them in the returned "results" array.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
url_to_target_file: a dict mapping URLs to local filenames.
|
|
204
|
+
verbose (bool, optional): enable additional debug console output
|
|
205
|
+
overwrite (bool, optional): whether to overwrite existing local files
|
|
206
|
+
n_workers (int, optional): number of concurrent workers, set to <=1 to disable
|
|
207
|
+
parallelization
|
|
208
|
+
pool_type (str, optional): worker type to use; should be 'thread' or 'process'
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
list: list of dicts with keys:
|
|
212
|
+
- 'url': the url this item refers to
|
|
213
|
+
- 'status': 'skipped', 'success', or a string starting with 'error'
|
|
214
|
+
- 'target_file': the local filename to which we downloaded (or tried to
|
|
215
|
+
download) this URL
|
|
216
|
+
"""
|
|
217
|
+
|
|
159
218
|
all_download_info = []
|
|
160
|
-
|
|
219
|
+
|
|
220
|
+
print('Preparing download list')
|
|
221
|
+
for url in tqdm(url_to_target_file):
|
|
161
222
|
download_info = {}
|
|
162
223
|
download_info['url'] = url
|
|
163
224
|
download_info['target_file'] = url_to_target_file[url]
|
|
@@ -171,37 +232,92 @@ def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
|
|
|
171
232
|
results = []
|
|
172
233
|
|
|
173
234
|
for download_info in tqdm(all_download_info):
|
|
174
|
-
result = _do_parallelized_download(download_info,overwrite=overwrite)
|
|
235
|
+
result = _do_parallelized_download(download_info,overwrite=overwrite,verbose=verbose)
|
|
175
236
|
results.append(result)
|
|
176
237
|
|
|
177
238
|
else:
|
|
178
239
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
240
|
+
if pool_type == 'thread':
|
|
241
|
+
pool = ThreadPool(n_workers)
|
|
242
|
+
else:
|
|
243
|
+
assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
|
|
244
|
+
pool = Pool(n_workers)
|
|
245
|
+
|
|
246
|
+
print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
|
|
247
|
+
|
|
248
|
+
results = list(tqdm(pool.imap(
|
|
249
|
+
partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
|
|
250
|
+
all_download_info), total=len(all_download_info)))
|
|
251
|
+
|
|
184
252
|
return results
|
|
185
253
|
|
|
186
254
|
|
|
187
|
-
def
|
|
255
|
+
def test_url(url, error_on_failure=True, timeout=None):
|
|
256
|
+
"""
|
|
257
|
+
Tests the availability of [url], returning an http status code.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
url (str): URL to test
|
|
261
|
+
error_on_failure (bool, optional): whether to error (vs. just returning an
|
|
262
|
+
error code) if accessing this URL fails
|
|
263
|
+
timeout (int, optional): timeout in seconds to wait before considering this
|
|
264
|
+
access attempt to be a failure; see requests.head() for precise documentation
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
int: http status code (200 for success)
|
|
188
268
|
"""
|
|
189
|
-
Verify that a list of URLs is available (returns status 200). By default,
|
|
190
|
-
errors if any URL is unavailable. If error_on_failure is False, returns
|
|
191
|
-
status codes for each URL.
|
|
192
269
|
|
|
193
|
-
|
|
270
|
+
# r = requests.get(url, stream=True, verify=True, timeout=timeout)
|
|
271
|
+
r = requests.head(url, stream=True, verify=True, timeout=timeout)
|
|
272
|
+
|
|
273
|
+
if error_on_failure and r.status_code != 200:
|
|
274
|
+
raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
|
|
275
|
+
return r.status_code
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def test_urls(urls, error_on_failure=True, n_workers=1, pool_type='thread', timeout=None):
|
|
194
279
|
"""
|
|
280
|
+
Verify that URLs are available (i.e., returns status 200). By default,
|
|
281
|
+
errors if any URL is unavailable.
|
|
195
282
|
|
|
196
|
-
|
|
283
|
+
Args:
|
|
284
|
+
urls (list): list of URLs to test
|
|
285
|
+
error_on_failure (bool, optional): whether to error (vs. just returning an
|
|
286
|
+
error code) if accessing this URL fails
|
|
287
|
+
n_workers (int, optional): number of concurrent workers, set to <=1 to disable
|
|
288
|
+
parallelization
|
|
289
|
+
pool_type (str, optional): worker type to use; should be 'thread' or 'process'
|
|
290
|
+
timeout (int, optional): timeout in seconds to wait before considering this
|
|
291
|
+
access attempt to be a failure; see requests.head() for precise documentation
|
|
197
292
|
|
|
198
|
-
|
|
293
|
+
Returns:
|
|
294
|
+
list: a list of http status codes, the same length and order as [urls]
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
if n_workers <= 1:
|
|
298
|
+
|
|
299
|
+
status_codes = []
|
|
199
300
|
|
|
200
|
-
|
|
301
|
+
for url in tqdm(urls):
|
|
302
|
+
|
|
303
|
+
r = requests.get(url, timeout=timeout)
|
|
304
|
+
|
|
305
|
+
if error_on_failure and r.status_code != 200:
|
|
306
|
+
raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
|
|
307
|
+
status_codes.append(r.status_code)
|
|
308
|
+
|
|
309
|
+
else:
|
|
310
|
+
|
|
311
|
+
if pool_type == 'thread':
|
|
312
|
+
pool = ThreadPool(n_workers)
|
|
313
|
+
else:
|
|
314
|
+
assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
|
|
315
|
+
pool = Pool(n_workers)
|
|
201
316
|
|
|
202
|
-
|
|
203
|
-
raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
|
|
204
|
-
status_codes.append(r.status_code)
|
|
317
|
+
print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
|
|
205
318
|
|
|
319
|
+
status_codes = list(tqdm(pool.imap(
|
|
320
|
+
partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
|
|
321
|
+
urls), total=len(urls)))
|
|
322
|
+
|
|
206
323
|
return status_codes
|
|
207
|
-
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
write_html_image_list.py
|
|
4
|
+
|
|
5
|
+
Given a list of image file names, writes an HTML file that
|
|
6
|
+
shows all those images, with optional one-line headers above each.
|
|
7
|
+
|
|
8
|
+
Each "filename" can also be a dict with elements 'filename','title',
|
|
9
|
+
'imageStyle','textStyle', 'linkTarget'
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
12
|
|
|
13
13
|
#%% Constants and imports
|
|
14
14
|
|
|
@@ -23,27 +23,34 @@ from md_utils import path_utils
|
|
|
23
23
|
|
|
24
24
|
def write_html_image_list(filename=None,images=None,options=None):
|
|
25
25
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
filename
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
26
|
+
Given a list of image file names, writes an HTML file that shows all those images,
|
|
27
|
+
with optional one-line headers above each.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
filename (str, optional): the .html output file; if None, just returns a valid
|
|
31
|
+
options dict
|
|
32
|
+
images (list, optional): the images to write to the .html file; if None, just returns
|
|
33
|
+
a valid options dict. This can be a flat list of image filenames, or this can
|
|
34
|
+
be a list of dictionaries with one or more of the following fields:
|
|
35
|
+
|
|
36
|
+
- filename (image filename) (required, all other fields are optional)
|
|
37
|
+
- imageStyle (css style for this image)
|
|
38
|
+
- textStyle (css style for the title associated with this image)
|
|
39
|
+
- title (text label for this image)
|
|
40
|
+
- linkTarget (URL to which this image should link on click)
|
|
41
|
+
|
|
42
|
+
options (dict, optional): a dict with one or more of the following fields:
|
|
43
|
+
|
|
44
|
+
- fHtml (file pointer to write to, used for splitting write operations over multiple calls)
|
|
45
|
+
- headerHtml (html text to include before the image list)
|
|
46
|
+
- trailerHtml (html text to include after the image list)
|
|
47
|
+
- defaultImageStyle (default css style for images)
|
|
48
|
+
- defaultTextStyle (default css style for image titles)
|
|
49
|
+
- maxFiguresPerHtmlFile (max figures for a single HTML file; overflow will be handled by creating
|
|
50
|
+
multiple files and a TOC with links)
|
|
51
|
+
- urlEncodeFilenames (default True, e.g. '#' will be replaced by '%23')
|
|
52
|
+
- urlEncodeLinkTargets (default True, e.g. '#' will be replaced by '%23')
|
|
53
|
+
|
|
47
54
|
"""
|
|
48
55
|
|
|
49
56
|
# returns an options struct
|
|
@@ -78,7 +85,7 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
78
85
|
if 'maxFiguresPerHtmlFile' not in options or options['maxFiguresPerHtmlFile'] is None:
|
|
79
86
|
options['maxFiguresPerHtmlFile'] = math.inf
|
|
80
87
|
|
|
81
|
-
if filename is None:
|
|
88
|
+
if filename is None or images is None:
|
|
82
89
|
return options
|
|
83
90
|
|
|
84
91
|
# images may be a list of images or a list of image/style/title dictionaries,
|
|
File without changes
|