megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,11 @@ Frequently-used functions for downloading or manipulating URLs
11
11
  import os
12
12
  import re
13
13
  import urllib
14
- import tempfile
14
+ import urllib.request
15
+ import urllib.error
15
16
  import requests
17
+ import shutil
18
+ import pytest
16
19
 
17
20
  from functools import partial
18
21
  from tqdm import tqdm
@@ -20,111 +23,105 @@ from urllib.parse import urlparse
20
23
  from multiprocessing.pool import ThreadPool
21
24
  from multiprocessing.pool import Pool
22
25
 
23
- url_utils_temp_dir = None
26
+ from megadetector.utils.ct_utils import make_test_folder
27
+ from megadetector.utils.ct_utils import make_temp_folder
28
+
24
29
  max_path_len = 255
25
30
 
26
31
 
27
32
  #%% Download functions
28
33
 
29
- class DownloadProgressBar():
34
+ class DownloadProgressBar:
30
35
  """
31
36
  Progress updater based on the progressbar2 package.
32
-
37
+
33
38
  https://stackoverflow.com/questions/37748105/how-to-use-progressbar-module-with-urlretrieve
34
39
  """
35
-
40
+
41
+
36
42
  def __init__(self):
43
+
37
44
  self.pbar = None
38
45
 
39
- def __call__(self, block_num, block_size, total_size):
40
- if not self.pbar:
41
- # This is a pretty random import I'd rather not depend on outside of the
42
- # rare case where it's used, so importing locally
43
- # pip install progressbar2
44
- import progressbar
45
- self.pbar = progressbar.ProgressBar(max_value=total_size)
46
- self.pbar.start()
47
-
48
- downloaded = block_num * block_size
49
- if downloaded < total_size:
50
- self.pbar.update(downloaded)
51
- else:
52
- self.pbar.finish()
53
-
54
46
 
55
- def get_temp_folder(preferred_name='url_utils'):
56
- """
57
- Gets a temporary folder for use within this module.
58
-
59
- Args:
60
- preferred_name (str, optional): subfolder to use within the system temp folder
61
-
62
- Returns:
63
- str: the full path to the temporary subfolder
64
- """
65
- global url_utils_temp_dir
66
-
67
- if url_utils_temp_dir is None:
68
- url_utils_temp_dir = os.path.join(tempfile.gettempdir(),preferred_name)
69
- os.makedirs(url_utils_temp_dir,exist_ok=True)
70
-
71
- return url_utils_temp_dir
72
-
73
-
74
- def download_url(url,
75
- destination_filename=None,
76
- progress_updater=None,
77
- force_download=False,
47
+ def __call__(self, block_num, block_size, total_size): # noqa
48
+
49
+ if not self.pbar:
50
+ try:
51
+ import progressbar # type: ignore
52
+ self.pbar = progressbar.ProgressBar(max_value=total_size)
53
+ self.pbar.start()
54
+ except ImportError:
55
+ self.pbar = None
56
+ # print("ProgressBar not available, install 'progressbar2' for visual progress.")
57
+
58
+ if self.pbar:
59
+ downloaded = block_num * block_size
60
+ if downloaded < total_size:
61
+ self.pbar.update(downloaded)
62
+ else:
63
+ self.pbar.finish()
64
+
65
+
66
+ def download_url(url,
67
+ destination_filename=None,
68
+ progress_updater=None,
69
+ force_download=False,
78
70
  verbose=True,
79
71
  escape_spaces=True):
80
72
  """
81
- Downloads a URL to a file. If no file is specified, creates a temporary file,
73
+ Downloads a URL to a file. If no file is specified, creates a temporary file,
82
74
  making a best effort to avoid filename collisions.
83
-
75
+
84
76
  Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
85
-
77
+
86
78
  Args:
87
79
  url (str): the URL to download
88
80
  destination_filename (str, optional): the target filename; if None, will create
89
- a file in system temp space
90
- progress_updater (object or bool, optional): can be "None", "False", "True", or a
91
- specific callable object. If None or False, no progress updated will be
81
+ a file in system temp space
82
+ progress_updater (object or bool, optional): can be "None", "False", "True", or a
83
+ specific callable object. If None or False, no progress updated will be
92
84
  displayed. If True, a default progress bar will be created.
93
85
  force_download (bool, optional): download this file even if [destination_filename]
94
86
  exists.
95
87
  verbose (bool, optional): enable additional debug console output
96
88
  escape_spaces (bool, optional): replace ' ' with '%20'
97
-
89
+
98
90
  Returns:
99
91
  str: the filename to which [url] was downloaded, the same as [destination_filename]
100
92
  if [destination_filename] was not None
101
93
  """
102
-
94
+
103
95
  if progress_updater is not None and isinstance(progress_updater,bool):
104
96
  if not progress_updater:
105
97
  progress_updater = None
106
98
  else:
107
99
  progress_updater = DownloadProgressBar()
108
-
100
+
109
101
  url_no_sas = url.split('?')[0]
110
-
102
+
111
103
  if destination_filename is None:
112
-
113
- target_folder = get_temp_folder()
104
+
105
+ target_folder = make_temp_folder(subfolder='url_utils',append_guid=False)
114
106
  url_without_sas = url.split('?', 1)[0]
115
-
107
+
116
108
  # This does not guarantee uniqueness, hence "semi-best-effort"
117
109
  url_as_filename = re.sub(r'\W+', '', url_without_sas)
118
- n_folder_chars = len(url_utils_temp_dir)
119
- if len(url_as_filename) + n_folder_chars > max_path_len:
110
+
111
+ n_folder_chars = len(target_folder)
112
+
113
+ if (len(url_as_filename) + n_folder_chars) >= max_path_len:
120
114
  print('Warning: truncating filename target to {} characters'.format(max_path_len))
121
- url_as_filename = url_as_filename[-1*(max_path_len-n_folder_chars):]
115
+ max_fn_len = max_path_len - (n_folder_chars + 1)
116
+ url_as_filename = url_as_filename[-1 * max_fn_len:]
122
117
  destination_filename = \
123
118
  os.path.join(target_folder,url_as_filename)
124
-
119
+
120
+ # ...if the destination filename wasn't specified
121
+
125
122
  if escape_spaces:
126
123
  url = url.replace(' ','%20')
127
-
124
+
128
125
  if (not force_download) and (os.path.isfile(destination_filename)):
129
126
  if verbose:
130
127
  print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url_no_sas)))
@@ -133,12 +130,12 @@ def download_url(url,
133
130
  print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
134
131
  target_dir = os.path.dirname(destination_filename)
135
132
  os.makedirs(target_dir,exist_ok=True)
136
- urllib.request.urlretrieve(url, destination_filename, progress_updater)
133
+ urllib.request.urlretrieve(url, destination_filename, progress_updater)
137
134
  assert(os.path.isfile(destination_filename))
138
- nBytes = os.path.getsize(destination_filename)
135
+ n_bytes = os.path.getsize(destination_filename)
139
136
  if verbose:
140
- print('...done, {} bytes.'.format(nBytes))
141
-
137
+ print('...done, {} bytes.'.format(n_bytes))
138
+
142
139
  return destination_filename
143
140
 
144
141
  # ...def download_url(...)
@@ -146,24 +143,24 @@ def download_url(url,
146
143
 
147
144
  def download_relative_filename(url, output_base, verbose=False):
148
145
  """
149
- Download a URL to output_base, preserving relative path. Path is relative to
146
+ Download a URL to output_base, preserving relative path. Path is relative to
150
147
  the site, so:
151
-
148
+
152
149
  https://abc.com/xyz/123.txt
153
-
150
+
154
151
  ...will get downloaded to:
155
-
156
- output_base/xyz/123.txt
157
-
152
+
153
+ output_base/xyz/123.txt
154
+
158
155
  Args:
159
156
  url (str): the URL to download
160
157
  output_base (str): the base folder to which we should download this file
161
158
  verbose (bool, optional): enable additional debug console output
162
-
159
+
163
160
  Returns:
164
161
  str: the local destination filename
165
162
  """
166
-
163
+
167
164
  p = urlparse(url)
168
165
  # remove the leading '/'
169
166
  assert p.path.startswith('/'); relative_filename = p.path[1:]
@@ -177,123 +174,139 @@ def _do_parallelized_download(download_info,overwrite=False,verbose=False):
177
174
  """
178
175
  Internal function for download parallelization.
179
176
  """
180
-
177
+
181
178
  url = download_info['url']
182
179
  target_file = download_info['target_file']
183
180
  result = {'status':'unknown','url':url,'target_file':target_file}
184
-
181
+
185
182
  if ((os.path.isfile(target_file)) and (not overwrite)):
186
183
  if verbose:
187
184
  print('Skipping existing file {}'.format(target_file))
188
185
  result['status'] = 'skipped'
189
186
  return result
190
187
  try:
191
- download_url(url=url,
188
+ download_url(url=url,
192
189
  destination_filename=target_file,
193
- verbose=verbose,
190
+ verbose=verbose,
194
191
  force_download=overwrite)
195
192
  except Exception as e:
196
193
  print('Warning: error downloading URL {}: {}'.format(
197
- url,str(e)))
194
+ url,str(e)))
198
195
  result['status'] = 'error: {}'.format(str(e))
199
196
  return result
200
-
197
+
201
198
  result['status'] = 'success'
202
199
  return result
203
200
 
204
201
  # ...def _do_parallelized_download(...)
205
202
 
206
203
 
207
- def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
208
- n_workers=20,pool_type='thread'):
204
+ def parallel_download_urls(url_to_target_file,
205
+ verbose=False,
206
+ overwrite=False,
207
+ n_workers=20,
208
+ pool_type='thread'):
209
209
  """
210
210
  Downloads a list of URLs to local files.
211
-
212
- Catches exceptions and reports them in the returned "results" array.
213
-
211
+
212
+ Catches exceptions and reports them in the returned "results" array.
213
+
214
214
  Args:
215
- url_to_target_file: a dict mapping URLs to local filenames.
215
+ url_to_target_file (dict): a dict mapping URLs to local filenames.
216
216
  verbose (bool, optional): enable additional debug console output
217
217
  overwrite (bool, optional): whether to overwrite existing local files
218
218
  n_workers (int, optional): number of concurrent workers, set to <=1 to disable
219
219
  parallelization
220
220
  pool_type (str, optional): worker type to use; should be 'thread' or 'process'
221
-
221
+
222
222
  Returns:
223
223
  list: list of dicts with keys:
224
224
  - 'url': the url this item refers to
225
225
  - 'status': 'skipped', 'success', or a string starting with 'error'
226
- - 'target_file': the local filename to which we downloaded (or tried to
227
- download) this URL
226
+ - 'target_file': the local filename to which we downloaded (or tried to
227
+ download) this URL
228
228
  """
229
-
229
+
230
230
  all_download_info = []
231
-
232
- print('Preparing download list')
233
- for url in tqdm(url_to_target_file):
231
+
232
+ if verbose:
233
+ print('Preparing download list')
234
+ for url in tqdm(url_to_target_file, disable=(not verbose)):
234
235
  download_info = {}
235
236
  download_info['url'] = url
236
237
  download_info['target_file'] = url_to_target_file[url]
237
238
  all_download_info.append(download_info)
238
-
239
- print('Downloading {} images on {} workers'.format(
240
- len(all_download_info),n_workers))
239
+
240
+ if verbose:
241
+ print('Downloading {} images on {} workers'.format(
242
+ len(all_download_info),n_workers))
241
243
 
242
244
  if n_workers <= 1:
243
245
 
244
246
  results = []
245
-
246
- for download_info in tqdm(all_download_info):
247
+
248
+ for download_info in tqdm(all_download_info, disable=(not verbose)):
247
249
  result = _do_parallelized_download(download_info,overwrite=overwrite,verbose=verbose)
248
250
  results.append(result)
249
-
251
+
250
252
  else:
251
253
 
252
- if pool_type == 'thread':
253
- pool = ThreadPool(n_workers)
254
- else:
255
- assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
256
- pool = Pool(n_workers)
257
-
258
- print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
259
-
260
- results = list(tqdm(pool.imap(
261
- partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
262
- all_download_info), total=len(all_download_info)))
263
-
254
+ pool = None
255
+
256
+ try:
257
+ if pool_type == 'thread':
258
+ pool = ThreadPool(n_workers)
259
+ else:
260
+ assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
261
+ pool = Pool(n_workers)
262
+
263
+ if verbose:
264
+ print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
265
+
266
+ results = list(tqdm(pool.imap(
267
+ partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
268
+ all_download_info), total=len(all_download_info), disable=(not verbose)))
269
+
270
+ finally:
271
+ if pool:
272
+ pool.close()
273
+ pool.join()
274
+ print("Pool closed and joined for parallel URL downloads")
275
+
264
276
  return results
265
277
 
266
278
  # ...def parallel_download_urls(...)
267
279
 
268
280
 
281
+ @pytest.mark.skip(reason="This is not a test function")
269
282
  def test_url(url,error_on_failure=True,timeout=None):
270
283
  """
271
284
  Tests the availability of [url], returning an http status code.
272
-
285
+
273
286
  Args:
274
287
  url (str): URL to test
275
288
  error_on_failure (bool, optional): whether to error (vs. just returning an
276
289
  error code) if accessing this URL fails
277
- timeout (int, optional): timeout in seconds to wait before considering this
290
+ timeout (int, optional): timeout in seconds to wait before considering this
278
291
  access attempt to be a failure; see requests.head() for precise documentation
279
-
292
+
280
293
  Returns:
281
294
  int: http status code (200 for success)
282
295
  """
283
-
284
- # r = requests.get(url, stream=True, verify=True, timeout=timeout)
296
+
285
297
  r = requests.head(url, stream=True, verify=True, timeout=timeout)
286
-
287
- if error_on_failure and r.status_code != 200:
298
+
299
+ if error_on_failure and r.status_code != 200:
288
300
  raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
289
301
  return r.status_code
290
-
291
302
 
292
- def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=None):
303
+
304
+ @pytest.mark.skip(reason="This is not a test function")
305
+ def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=None,verbose=False):
293
306
  """
294
307
  Verify that URLs are available (i.e., returns status 200). By default,
295
- errors if any URL is unavailable.
296
-
308
+ errors if any URL is unavailable.
309
+
297
310
  Args:
298
311
  urls (list): list of URLs to test
299
312
  error_on_failure (bool, optional): whether to error (vs. just returning an
@@ -301,39 +314,48 @@ def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=
301
314
  n_workers (int, optional): number of concurrent workers, set to <=1 to disable
302
315
  parallelization
303
316
  pool_type (str, optional): worker type to use; should be 'thread' or 'process'
304
- timeout (int, optional): timeout in seconds to wait before considering this
317
+ timeout (int, optional): timeout in seconds to wait before considering this
305
318
  access attempt to be a failure; see requests.head() for precise documentation
306
-
319
+ verbose (bool, optional): enable additional debug output
320
+
307
321
  Returns:
308
322
  list: a list of http status codes, the same length and order as [urls]
309
323
  """
310
-
324
+
311
325
  if n_workers <= 1:
312
326
 
313
327
  status_codes = []
314
-
315
- for url in tqdm(urls):
316
-
328
+
329
+ for url in tqdm(urls,disable=(not verbose)):
330
+
317
331
  r = requests.get(url, timeout=timeout)
318
-
319
- if error_on_failure and r.status_code != 200:
332
+
333
+ if error_on_failure and r.status_code != 200:
320
334
  raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
321
335
  status_codes.append(r.status_code)
322
-
336
+
323
337
  else:
324
338
 
325
- if pool_type == 'thread':
326
- pool = ThreadPool(n_workers)
327
- else:
328
- assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
329
- pool = Pool(n_workers)
330
-
331
- print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
332
-
333
- status_codes = list(tqdm(pool.imap(
334
- partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
335
- urls), total=len(urls)))
336
-
339
+ pool = None
340
+ try:
341
+ if pool_type == 'thread':
342
+ pool = ThreadPool(n_workers)
343
+ else:
344
+ assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
345
+ pool = Pool(n_workers)
346
+
347
+ if verbose:
348
+ print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
349
+
350
+ status_codes = list(tqdm(pool.imap(
351
+ partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
352
+ urls), total=len(urls), disable=(not verbose)))
353
+ finally:
354
+ if pool:
355
+ pool.close()
356
+ pool.join()
357
+ print('Pool closed and joined for URL tests')
358
+
337
359
  return status_codes
338
360
 
339
361
  # ...def test_urls(...)
@@ -341,16 +363,16 @@ def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=
341
363
 
342
364
  def get_url_size(url,verbose=False,timeout=None):
343
365
  """
344
- Get the size of the file pointed to by a URL, based on the Content-Length property. If the
345
- URL is not available, or the Content-Length property is not available, or the content-Length
346
- property is not an integer, returns None.
347
-
366
+ Get the size of the file pointed to by a URL, based on the Content-Length property. If the
367
+ URL is not available, or the Content-Length property is not available, or the content-Length
368
+ property is not an integer, returns None.
369
+
348
370
  Args:
349
371
  url (str): the url to test
350
372
  verbose (bool, optional): enable additional debug output
351
- timeout (int, optional): timeout in seconds to wait before considering this
373
+ timeout (int, optional): timeout in seconds to wait before considering this
352
374
  access attempt to be a failure; see requests.head() for precise documentation
353
-
375
+
354
376
  Returns:
355
377
  int: the file size in bytes, or None if it can't be retrieved
356
378
  """
@@ -362,13 +384,18 @@ def get_url_size(url,verbose=False,timeout=None):
362
384
  if verbose:
363
385
  print('Status {} retrieving file size for {}'.format(f.status,url))
364
386
  return None
365
- size_bytes = int(f.headers['Content-Length'])
387
+ size_bytes_str = f.headers.get('Content-Length')
388
+ if size_bytes_str is None:
389
+ if verbose:
390
+ print('No Content-Length header for {}'.format(url))
391
+ return None
392
+ size_bytes = int(size_bytes_str)
366
393
  return size_bytes
367
394
  except Exception as e:
368
395
  if verbose:
369
396
  print('Error retrieving file size for {}:\n{}'.format(url,str(e)))
370
397
  return None
371
-
398
+
372
399
  # ...def get_url_size(...)
373
400
 
374
401
 
@@ -376,45 +403,331 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
376
403
  """
377
404
  Retrieve file sizes for the URLs specified by [urls]. Returns None for any URLs
378
405
  that we can't access, or URLs for which the Content-Length property is not set.
379
-
406
+
380
407
  Args:
381
408
  urls (list): list of URLs for which we should retrieve sizes
382
409
  n_workers (int, optional): number of concurrent workers, set to <=1 to disable
383
410
  parallelization
384
411
  pool_type (str, optional): worker type to use; should be 'thread' or 'process'
385
- timeout (int, optional): timeout in seconds to wait before considering this
412
+ timeout (int, optional): timeout in seconds to wait before considering this
386
413
  access attempt to be a failure; see requests.head() for precise documentation
387
414
  verbose (bool, optional): print additional debug information
388
-
415
+
389
416
  Returns:
390
417
  dict: maps urls to file sizes, which will be None for URLs for which we were unable
391
- to retrieve a valid size.
418
+ to retrieve a valid size.
392
419
  """
393
-
420
+
394
421
  url_to_size = {}
395
-
396
- if n_workers <= 1:
397
-
398
- for url in tqdm(urls):
422
+
423
+ if n_workers <= 1:
424
+
425
+ for url in tqdm(urls, disable=(not verbose)):
399
426
  url_to_size[url] = get_url_size(url,verbose=verbose,timeout=timeout)
400
-
427
+
401
428
  else:
402
429
 
403
- if pool_type == 'thread':
404
- pool = ThreadPool(n_workers)
405
- else:
406
- assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
407
- pool = Pool(n_workers)
408
-
409
- print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
410
-
411
- file_sizes = list(tqdm(pool.imap(
412
- partial(get_url_size,verbose=verbose,timeout=timeout),
413
- urls), total=len(urls)))
414
-
415
- for i_url,url in enumerate(urls):
416
- url_to_size[url] = file_sizes[i_url]
417
-
430
+ pool = None
431
+ try:
432
+ if pool_type == 'thread':
433
+ pool = ThreadPool(n_workers)
434
+ else:
435
+ assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
436
+ pool = Pool(n_workers)
437
+
438
+ if verbose:
439
+ print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
440
+
441
+ file_sizes = list(tqdm(pool.imap(
442
+ partial(get_url_size,verbose=verbose,timeout=timeout),
443
+ urls), total=len(urls), disable=(not verbose)))
444
+
445
+ for i_url,url in enumerate(urls):
446
+ url_to_size[url] = file_sizes[i_url]
447
+ finally:
448
+ if pool:
449
+ pool.close()
450
+ pool.join()
451
+ print('Pool closed and joined for URL size checks')
452
+
418
453
  return url_to_size
419
454
 
420
- # ...get_url_sizes(...)
455
+
456
+ #%% Tests
457
+
458
+ # Constants for tests
459
+
460
+ SMALL_FILE_URL = "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png"
461
+ REDIRECT_SRC_URL = "http://google.com"
462
+ REDIRECT_DEST_URL = "https://www.google.com/"
463
+ NON_EXISTENT_URL = "https://example.com/non_existent_page_404.html"
464
+ DEFINITELY_NON_EXISTENT_DOMAIN_URL = "https://thisshouldnotexist1234567890.com/file.txt"
465
+ RELATIVE_DOWNLOAD_URL = "https://raw.githubusercontent.com/agentmorris/MegaDetector/main/README.md"
466
+ RELATIVE_DOWNLOAD_CONTAIN_TOKEN = 'agentmorris'
467
+ RELATIVE_DOWNLOAD_NOT_CONTAIN_TOKEN = 'github'
468
+
469
+
470
+ class TestUrlUtils:
471
+ """
472
+ Tests for url_utils.py
473
+ """
474
+
475
+ def set_up(self):
476
+ """
477
+ Create a temporary directory for testing.
478
+ """
479
+
480
+ self.test_dir = make_test_folder(subfolder='url_utils_tests')
481
+ self.download_target_dir = os.path.join(self.test_dir, 'downloads')
482
+ os.makedirs(self.download_target_dir, exist_ok=True)
483
+
484
+
485
+ def tear_down(self):
486
+ """
487
+ Remove the temporary directory after tests and restore module temp_dir.
488
+ """
489
+
490
+ if os.path.exists(self.test_dir):
491
+ shutil.rmtree(self.test_dir)
492
+
493
+
494
+ def test_download_url_to_specified_file(self):
495
+ """
496
+ Test download_url with a specified destination filename.
497
+ """
498
+
499
+ dest_filename = os.path.join(self.download_target_dir, "downloaded_google_logo.png")
500
+ returned_filename = download_url(SMALL_FILE_URL,
501
+ destination_filename=dest_filename,
502
+ verbose=False)
503
+ assert returned_filename == dest_filename
504
+ assert os.path.exists(dest_filename)
505
+ assert os.path.getsize(dest_filename) > 1000
506
+
507
+
508
+ def test_download_url_to_temp_file(self):
509
+ """
510
+ Test download_url when destination_filename is None.
511
+ """
512
+
513
+ returned_filename = download_url(SMALL_FILE_URL,
514
+ destination_filename=None,
515
+ verbose=False)
516
+ assert os.path.exists(returned_filename)
517
+ assert os.path.getsize(returned_filename) > 1000
518
+
519
+
520
+ def test_download_url_non_existent(self):
521
+ """
522
+ Test download_url with a non-existent URL.
523
+ """
524
+
525
+ dest_filename = os.path.join(self.download_target_dir, "non_existent.html")
526
+ try:
527
+ download_url(NON_EXISTENT_URL, destination_filename=dest_filename, verbose=False)
528
+ raise AssertionError("urllib.error.HTTPError not raised for 404")
529
+ except urllib.error.HTTPError:
530
+ pass
531
+
532
+ try:
533
+ download_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
534
+ destination_filename=dest_filename,
535
+ verbose=False)
536
+ raise AssertionError(
537
+ "urllib.error.URLError or requests.exceptions.ConnectionError not raised for DNS failure")
538
+ except urllib.error.URLError:
539
+ pass
540
+ except requests.exceptions.ConnectionError:
541
+ pass
542
+
543
+
544
+ def test_download_url_force_download(self):
545
+ """
546
+ Test the force_download parameter of download_url.
547
+ """
548
+
549
+ dest_filename = os.path.join(self.download_target_dir, "force_test.png")
550
+
551
+ download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=False)
552
+ assert os.path.exists(dest_filename)
553
+ initial_mtime = os.path.getmtime(dest_filename)
554
+
555
+ download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=True)
556
+ assert os.path.getmtime(dest_filename) == initial_mtime
557
+
558
+ download_url(SMALL_FILE_URL,
559
+ destination_filename=dest_filename,
560
+ force_download=True,
561
+ verbose=False)
562
+ assert os.path.exists(dest_filename)
563
+
564
+
565
+ def test_download_url_escape_spaces(self):
566
+ """
567
+ Test download_url with spaces in the URL.
568
+ """
569
+
570
+ dest_filename = os.path.join(self.download_target_dir, "escape_test.png")
571
+ download_url(SMALL_FILE_URL,
572
+ destination_filename=dest_filename,
573
+ escape_spaces=True,
574
+ verbose=False)
575
+ assert os.path.exists(dest_filename)
576
+
577
+
578
+ def test_download_relative_filename(self):
579
+ """
580
+ Test download_relative_filename.
581
+ """
582
+
583
+ output_base = os.path.join(self.download_target_dir, "relative_dl")
584
+ returned_filename = download_relative_filename(RELATIVE_DOWNLOAD_URL, output_base, verbose=False)
585
+ assert RELATIVE_DOWNLOAD_CONTAIN_TOKEN in returned_filename
586
+ assert RELATIVE_DOWNLOAD_NOT_CONTAIN_TOKEN not in returned_filename
587
+ assert os.path.exists(returned_filename)
588
+ assert os.path.getsize(returned_filename) > 100
589
+
590
+
591
+ def test_parallel_download_urls(self):
592
+ """
593
+ Test parallel_download_urls (with n_workers=1 for simplicity).
594
+ """
595
+
596
+ url1_target = os.path.join(self.download_target_dir, "parallel_dl_1.png")
597
+ url2_target = os.path.join(self.download_target_dir, "parallel_dl_2_nonexistent.html")
598
+
599
+ url_to_target_file = {
600
+ SMALL_FILE_URL: url1_target,
601
+ NON_EXISTENT_URL: url2_target
602
+ }
603
+
604
+ results = parallel_download_urls(url_to_target_file, n_workers=1, verbose=False)
605
+
606
+ assert len(results) == 2
607
+
608
+ status_map = {res['url']: res for res in results}
609
+
610
+ assert status_map[SMALL_FILE_URL]['status'] == 'success'
611
+ assert status_map[SMALL_FILE_URL]['target_file'] == url1_target
612
+ assert os.path.exists(url1_target)
613
+
614
+ assert status_map[NON_EXISTENT_URL]['status'].startswith('error: HTTP Error 404')
615
+ assert status_map[NON_EXISTENT_URL]['target_file'] == url2_target
616
+ assert not os.path.exists(url2_target)
617
+
618
+ if not os.path.exists(url1_target):
619
+ download_url(SMALL_FILE_URL, url1_target, verbose=False)
620
+ results_skip = parallel_download_urls({SMALL_FILE_URL: url1_target},
621
+ n_workers=1,
622
+ overwrite=False,
623
+ verbose=True)
624
+ assert results_skip[0]['status'] == 'skipped'
625
+
626
+ results_overwrite = parallel_download_urls({SMALL_FILE_URL: url1_target},
627
+ n_workers=1,
628
+ overwrite=True,
629
+ verbose=False)
630
+ assert results_overwrite[0]['status'] == 'success'
631
+
632
+
633
+ def test_test_url_and_test_urls(self):
634
+ """
635
+ Test test_url and test_urls functions.
636
+ """
637
+
638
+ assert test_url(SMALL_FILE_URL, error_on_failure=False, timeout=10) == 200
639
+ assert test_url(REDIRECT_SRC_URL, error_on_failure=False, timeout=10) in (200,301)
640
+
641
+ status_non_existent = test_url(NON_EXISTENT_URL, error_on_failure=False, timeout=5)
642
+ assert status_non_existent == 404
643
+
644
+ try:
645
+ test_url(NON_EXISTENT_URL, error_on_failure=True, timeout=5)
646
+ raise AssertionError("ValueError not raised for NON_EXISTENT_URL")
647
+ except ValueError:
648
+ pass
649
+
650
+ try:
651
+ test_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
652
+ error_on_failure=True,
653
+ timeout=5)
654
+ raise AssertionError("requests.exceptions.ConnectionError or urllib.error.URLError not raised")
655
+ except requests.exceptions.ConnectionError:
656
+ pass
657
+ except urllib.error.URLError:
658
+ pass
659
+
660
+
661
+ urls_to_test = [SMALL_FILE_URL, NON_EXISTENT_URL]
662
+ status_codes = test_urls(urls_to_test, error_on_failure=False, n_workers=1, timeout=10)
663
+ assert len(status_codes) == 2
664
+ assert status_codes[0] == 200
665
+ assert status_codes[1] == 404
666
+
667
+ try:
668
+ test_urls(urls_to_test, error_on_failure=True, n_workers=1, timeout=5)
669
+ raise AssertionError("ValueError not raised for urls_to_test")
670
+ except ValueError:
671
+ pass
672
+
673
+ good_urls = [SMALL_FILE_URL, REDIRECT_SRC_URL]
674
+ good_status_codes = test_urls(good_urls, error_on_failure=True, n_workers=1, timeout=10)
675
+ assert good_status_codes == [200, 200]
676
+
677
+
678
+ def test_get_url_size_and_sizes(self):
679
+ """
680
+ Test get_url_size and get_url_sizes functions.
681
+ """
682
+
683
+ size = get_url_size(SMALL_FILE_URL, timeout=10)
684
+ assert size is not None
685
+ assert size > 1000
686
+
687
+ size_dynamic = get_url_size(REDIRECT_DEST_URL, timeout=10, verbose=True)
688
+ if size_dynamic is not None:
689
+ assert isinstance(size_dynamic, int)
690
+
691
+ size_non_existent = get_url_size(NON_EXISTENT_URL, timeout=5)
692
+ assert size_non_existent is None
693
+
694
+ size_bad_domain = get_url_size(DEFINITELY_NON_EXISTENT_DOMAIN_URL, timeout=5)
695
+ assert size_bad_domain is None
696
+
697
+ urls_for_size = [SMALL_FILE_URL, NON_EXISTENT_URL, REDIRECT_DEST_URL]
698
+ sizes_map = get_url_sizes(urls_for_size, n_workers=1, timeout=10)
699
+
700
+ assert SMALL_FILE_URL in sizes_map
701
+ assert sizes_map[SMALL_FILE_URL] == size
702
+
703
+ assert NON_EXISTENT_URL in sizes_map
704
+ assert sizes_map[NON_EXISTENT_URL] is None
705
+
706
+ assert REDIRECT_DEST_URL in sizes_map
707
+ assert sizes_map[REDIRECT_DEST_URL] == size_dynamic
708
+
709
+
710
+ def _test_url_utils():
711
+ """
712
+ Runs all tests in the TestUrlUtils class. I generally disable this during testing
713
+ because it creates irritating nondeterminism, and this is neither a core module nor
714
+ a module that changes often.
715
+ """
716
+
717
+ test_instance = TestUrlUtils()
718
+ test_instance.set_up()
719
+ try:
720
+ test_instance.test_download_url_to_specified_file()
721
+ test_instance.test_download_url_to_temp_file()
722
+ test_instance.test_download_url_non_existent()
723
+ test_instance.test_download_url_force_download()
724
+ test_instance.test_download_url_escape_spaces()
725
+ test_instance.test_download_relative_filename()
726
+ test_instance.test_parallel_download_urls()
727
+ test_instance.test_test_url_and_test_urls()
728
+ test_instance.test_get_url_size_and_sizes()
729
+ finally:
730
+ test_instance.tear_down()
731
+
732
+ # from IPython import embed; embed()
733
+ # test_url_utils()