eodag 4.0.0a3__py3-none-any.whl → 4.0.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/collection.py +8 -8
- eodag/api/core.py +216 -313
- eodag/api/product/_product.py +28 -6
- eodag/api/provider.py +990 -0
- eodag/cli.py +11 -4
- eodag/config.py +73 -444
- eodag/plugins/apis/ecmwf.py +3 -24
- eodag/plugins/apis/usgs.py +3 -24
- eodag/plugins/authentication/token.py +0 -1
- eodag/plugins/download/aws.py +83 -44
- eodag/plugins/download/base.py +117 -41
- eodag/plugins/download/http.py +84 -56
- eodag/plugins/manager.py +24 -34
- eodag/resources/ext_collections.json +1 -1
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/providers.yml +2 -0
- eodag/utils/s3.py +4 -4
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/METADATA +1 -1
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/RECORD +23 -22
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/WHEEL +0 -0
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/entry_points.txt +0 -0
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/licenses/LICENSE +0 -0
- {eodag-4.0.0a3.dist-info → eodag-4.0.0a5.dist-info}/top_level.txt +0 -0
eodag/plugins/apis/ecmwf.py
CHANGED
|
@@ -48,6 +48,7 @@ from eodag.utils.logging import get_logging_verbose
|
|
|
48
48
|
if TYPE_CHECKING:
|
|
49
49
|
from typing import Any, Optional, Union
|
|
50
50
|
|
|
51
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
51
52
|
from mypy_boto3_s3 import S3ServiceResource
|
|
52
53
|
from requests.auth import AuthBase
|
|
53
54
|
|
|
@@ -55,7 +56,7 @@ if TYPE_CHECKING:
|
|
|
55
56
|
from eodag.api.search_result import SearchResult
|
|
56
57
|
from eodag.config import PluginConfig
|
|
57
58
|
from eodag.types.download_args import DownloadConf
|
|
58
|
-
from eodag.utils import
|
|
59
|
+
from eodag.utils import ProgressCallback, Unpack
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
logger = logging.getLogger("eodag.apis.ecmwf")
|
|
@@ -185,6 +186,7 @@ class EcmwfApi(Api, ECMWFSearch):
|
|
|
185
186
|
product: EOProduct,
|
|
186
187
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
187
188
|
progress_callback: Optional[ProgressCallback] = None,
|
|
189
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
188
190
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
189
191
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
190
192
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -269,29 +271,6 @@ class EcmwfApi(Api, ECMWFSearch):
|
|
|
269
271
|
product.location = path_to_uri(product_path)
|
|
270
272
|
return product_path
|
|
271
273
|
|
|
272
|
-
def download_all(
|
|
273
|
-
self,
|
|
274
|
-
products: SearchResult,
|
|
275
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
276
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
277
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
278
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
279
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
280
|
-
**kwargs: Unpack[DownloadConf],
|
|
281
|
-
) -> list[str]:
|
|
282
|
-
"""
|
|
283
|
-
Download all using parent (base plugin) method
|
|
284
|
-
"""
|
|
285
|
-
return super(EcmwfApi, self).download_all(
|
|
286
|
-
products,
|
|
287
|
-
auth=auth,
|
|
288
|
-
downloaded_callback=downloaded_callback,
|
|
289
|
-
progress_callback=progress_callback,
|
|
290
|
-
wait=wait,
|
|
291
|
-
timeout=timeout,
|
|
292
|
-
**kwargs,
|
|
293
|
-
)
|
|
294
|
-
|
|
295
274
|
def clear(self) -> None:
|
|
296
275
|
"""Clear search context"""
|
|
297
276
|
pass
|
eodag/plugins/apis/usgs.py
CHANGED
|
@@ -57,12 +57,13 @@ from eodag.utils.exceptions import (
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
if TYPE_CHECKING:
|
|
60
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
60
61
|
from mypy_boto3_s3 import S3ServiceResource
|
|
61
62
|
from requests.auth import AuthBase
|
|
62
63
|
|
|
63
64
|
from eodag.config import PluginConfig
|
|
64
65
|
from eodag.types.download_args import DownloadConf
|
|
65
|
-
from eodag.utils import
|
|
66
|
+
from eodag.utils import Unpack
|
|
66
67
|
|
|
67
68
|
logger = logging.getLogger("eodag.apis.usgs")
|
|
68
69
|
|
|
@@ -312,6 +313,7 @@ class UsgsApi(Api):
|
|
|
312
313
|
product: EOProduct,
|
|
313
314
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
314
315
|
progress_callback: Optional[ProgressCallback] = None,
|
|
316
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
315
317
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
316
318
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
317
319
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -477,26 +479,3 @@ class UsgsApi(Api):
|
|
|
477
479
|
shutil.move(fs_path, new_fs_path)
|
|
478
480
|
product.location = path_to_uri(new_fs_path)
|
|
479
481
|
return new_fs_path
|
|
480
|
-
|
|
481
|
-
def download_all(
|
|
482
|
-
self,
|
|
483
|
-
products: SearchResult,
|
|
484
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
485
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
486
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
487
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
488
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
489
|
-
**kwargs: Unpack[DownloadConf],
|
|
490
|
-
) -> list[str]:
|
|
491
|
-
"""
|
|
492
|
-
Download all using parent (base plugin) method
|
|
493
|
-
"""
|
|
494
|
-
return super(UsgsApi, self).download_all(
|
|
495
|
-
products,
|
|
496
|
-
auth=auth,
|
|
497
|
-
downloaded_callback=downloaded_callback,
|
|
498
|
-
progress_callback=progress_callback,
|
|
499
|
-
wait=wait,
|
|
500
|
-
timeout=timeout,
|
|
501
|
-
**kwargs,
|
|
502
|
-
)
|
|
@@ -226,7 +226,6 @@ class TokenAuth(Authentication):
|
|
|
226
226
|
self,
|
|
227
227
|
session: requests.Session,
|
|
228
228
|
) -> requests.Response:
|
|
229
|
-
|
|
230
229
|
retry_total = getattr(self.config, "retry_total", REQ_RETRY_TOTAL)
|
|
231
230
|
retry_backoff_factor = getattr(
|
|
232
231
|
self.config, "retry_backoff_factor", REQ_RETRY_BACKOFF_FACTOR
|
eodag/plugins/download/aws.py
CHANGED
|
@@ -25,7 +25,9 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
|
|
25
25
|
|
|
26
26
|
import boto3
|
|
27
27
|
import requests
|
|
28
|
+
from boto3.s3.transfer import TransferConfig
|
|
28
29
|
from botocore.exceptions import ClientError
|
|
30
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
29
31
|
from lxml import etree
|
|
30
32
|
from requests.auth import AuthBase
|
|
31
33
|
|
|
@@ -34,7 +36,7 @@ from eodag.api.product.metadata_mapping import (
|
|
|
34
36
|
properties_from_json,
|
|
35
37
|
properties_from_xml,
|
|
36
38
|
)
|
|
37
|
-
from eodag.plugins.authentication.aws_auth import raise_if_auth_error
|
|
39
|
+
from eodag.plugins.authentication.aws_auth import AwsAuth, raise_if_auth_error
|
|
38
40
|
from eodag.plugins.download.base import Download
|
|
39
41
|
from eodag.utils import (
|
|
40
42
|
DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -65,10 +67,9 @@ if TYPE_CHECKING:
|
|
|
65
67
|
from mypy_boto3_s3.client import S3Client
|
|
66
68
|
|
|
67
69
|
from eodag.api.product import EOProduct
|
|
68
|
-
from eodag.api.search_result import SearchResult
|
|
69
70
|
from eodag.config import PluginConfig
|
|
70
71
|
from eodag.types.download_args import DownloadConf
|
|
71
|
-
from eodag.utils import
|
|
72
|
+
from eodag.utils import Unpack
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
logger = logging.getLogger("eodag.download.aws")
|
|
@@ -227,6 +228,7 @@ class AwsDownload(Download):
|
|
|
227
228
|
product: EOProduct,
|
|
228
229
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
229
230
|
progress_callback: Optional[ProgressCallback] = None,
|
|
231
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
230
232
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
231
233
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
232
234
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -246,6 +248,7 @@ class AwsDownload(Download):
|
|
|
246
248
|
size as inputs and handle progress bar
|
|
247
249
|
creation and update to give the user a
|
|
248
250
|
feedback on the download progress
|
|
251
|
+
:param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
|
|
249
252
|
:param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
|
|
250
253
|
and `dl_url_params` (dict) can be provided as additional kwargs
|
|
251
254
|
and will override any other values defined in a configuration
|
|
@@ -293,7 +296,7 @@ class AwsDownload(Download):
|
|
|
293
296
|
)
|
|
294
297
|
|
|
295
298
|
# authenticate
|
|
296
|
-
if product.downloader_auth:
|
|
299
|
+
if product.downloader_auth and isinstance(product.downloader_auth, AwsAuth):
|
|
297
300
|
authenticated_objects = product.downloader_auth.authenticate_objects(
|
|
298
301
|
bucket_names_and_prefixes
|
|
299
302
|
)
|
|
@@ -302,9 +305,19 @@ class AwsDownload(Download):
|
|
|
302
305
|
"Authentication plugin (AwsAuth) has to be configured if AwsDownload is used"
|
|
303
306
|
)
|
|
304
307
|
|
|
308
|
+
# create an executor if not given and anticipate the possible need to shut it down
|
|
309
|
+
executor, shutdown_executor = (
|
|
310
|
+
(ThreadPoolExecutor(), True) if executor is None else (executor, False)
|
|
311
|
+
)
|
|
312
|
+
self._config_executor(executor)
|
|
313
|
+
|
|
305
314
|
# files in zip
|
|
306
315
|
updated_bucket_names_and_prefixes = self._download_file_in_zip(
|
|
307
|
-
product,
|
|
316
|
+
product.downloader_auth,
|
|
317
|
+
bucket_names_and_prefixes,
|
|
318
|
+
product_local_path,
|
|
319
|
+
progress_callback,
|
|
320
|
+
executor,
|
|
308
321
|
)
|
|
309
322
|
# prevent nothing-to-download errors if download was performed in zip
|
|
310
323
|
raise_error = (
|
|
@@ -329,7 +342,8 @@ class AwsDownload(Download):
|
|
|
329
342
|
if len(unique_product_chunks) > 0:
|
|
330
343
|
progress_callback.reset(total=total_size)
|
|
331
344
|
try:
|
|
332
|
-
|
|
345
|
+
|
|
346
|
+
def download_chunk(product_chunk: Any) -> None:
|
|
333
347
|
try:
|
|
334
348
|
chunk_rel_path = self.get_chunk_dest_path(
|
|
335
349
|
product,
|
|
@@ -339,11 +353,11 @@ class AwsDownload(Download):
|
|
|
339
353
|
except NotAvailableError as e:
|
|
340
354
|
# out of SAFE format chunk
|
|
341
355
|
logger.warning(e)
|
|
342
|
-
|
|
356
|
+
return
|
|
357
|
+
|
|
343
358
|
chunk_abs_path = os.path.join(product_local_path, chunk_rel_path)
|
|
344
359
|
chunk_abs_path_dir = os.path.dirname(chunk_abs_path)
|
|
345
|
-
|
|
346
|
-
os.makedirs(chunk_abs_path_dir)
|
|
360
|
+
os.makedirs(chunk_abs_path_dir, exist_ok=True)
|
|
347
361
|
|
|
348
362
|
bucket_objects = authenticated_objects.get(product_chunk.bucket_name)
|
|
349
363
|
extra_args = (
|
|
@@ -352,12 +366,31 @@ class AwsDownload(Download):
|
|
|
352
366
|
else {}
|
|
353
367
|
)
|
|
354
368
|
if not os.path.isfile(chunk_abs_path):
|
|
369
|
+
transfer_config = TransferConfig(use_threads=False)
|
|
355
370
|
product_chunk.Bucket().download_file(
|
|
356
371
|
product_chunk.key,
|
|
357
372
|
chunk_abs_path,
|
|
358
373
|
ExtraArgs=extra_args,
|
|
359
374
|
Callback=progress_callback,
|
|
375
|
+
Config=transfer_config,
|
|
360
376
|
)
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
# use parallelization if possible.
|
|
380
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
381
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
382
|
+
if (
|
|
383
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
384
|
+
and executor._max_workers == 1
|
|
385
|
+
):
|
|
386
|
+
for product_chunk in unique_product_chunks:
|
|
387
|
+
download_chunk(product_chunk)
|
|
388
|
+
else:
|
|
389
|
+
futures = (
|
|
390
|
+
executor.submit(download_chunk, product_chunk)
|
|
391
|
+
for product_chunk in unique_product_chunks
|
|
392
|
+
)
|
|
393
|
+
[f.result() for f in as_completed(futures)]
|
|
361
394
|
|
|
362
395
|
except AuthenticationError as e:
|
|
363
396
|
logger.warning("Unexpected error: %s" % e)
|
|
@@ -365,6 +398,9 @@ class AwsDownload(Download):
|
|
|
365
398
|
raise_if_auth_error(e, self.provider)
|
|
366
399
|
logger.warning("Unexpected error: %s" % e)
|
|
367
400
|
|
|
401
|
+
if shutdown_executor:
|
|
402
|
+
executor.shutdown(wait=True)
|
|
403
|
+
|
|
368
404
|
# finalize safe product
|
|
369
405
|
if build_safe and product.collection and "S2_MSI" in product.collection:
|
|
370
406
|
self.finalize_s2_safe_product(product_local_path)
|
|
@@ -386,31 +422,33 @@ class AwsDownload(Download):
|
|
|
386
422
|
return product_local_path
|
|
387
423
|
|
|
388
424
|
def _download_file_in_zip(
|
|
389
|
-
self,
|
|
425
|
+
self,
|
|
426
|
+
downloader_auth: AwsAuth,
|
|
427
|
+
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
428
|
+
product_local_path: str,
|
|
429
|
+
progress_callback: ProgressCallback,
|
|
430
|
+
executor: ThreadPoolExecutor,
|
|
390
431
|
):
|
|
391
432
|
"""
|
|
392
433
|
Download file in zip from a prefix like `foo/bar.zip!file.txt`
|
|
393
434
|
"""
|
|
394
|
-
if
|
|
395
|
-
not getattr(product, "downloader_auth", None)
|
|
396
|
-
or product.downloader_auth.s3_resource is None
|
|
397
|
-
):
|
|
435
|
+
if downloader_auth.s3_resource is None:
|
|
398
436
|
logger.debug("Cannot check files in s3 zip without s3 resource")
|
|
399
437
|
return bucket_names_and_prefixes
|
|
400
438
|
|
|
401
|
-
s3_client =
|
|
439
|
+
s3_client = downloader_auth.get_s3_client()
|
|
402
440
|
|
|
403
441
|
downloaded = []
|
|
404
|
-
|
|
442
|
+
|
|
443
|
+
def process_zip_file(i: int, pack: tuple[str, Optional[str]]) -> Optional[int]:
|
|
405
444
|
bucket_name, prefix = pack
|
|
406
|
-
if ".zip!" in prefix:
|
|
445
|
+
if prefix is not None and ".zip!" in prefix:
|
|
407
446
|
splitted_path = prefix.split(".zip!")
|
|
408
447
|
zip_prefix = f"{splitted_path[0]}.zip"
|
|
409
448
|
rel_path = splitted_path[-1]
|
|
410
449
|
dest_file = os.path.join(product_local_path, rel_path)
|
|
411
450
|
dest_abs_path_dir = os.path.dirname(dest_file)
|
|
412
|
-
|
|
413
|
-
os.makedirs(dest_abs_path_dir)
|
|
451
|
+
os.makedirs(dest_abs_path_dir, exist_ok=True)
|
|
414
452
|
|
|
415
453
|
zip_file, _ = open_s3_zipped_object(
|
|
416
454
|
bucket_name, zip_prefix, s3_client, partial=False
|
|
@@ -428,7 +466,30 @@ class AwsDownload(Download):
|
|
|
428
466
|
output_file.write(zchunk)
|
|
429
467
|
progress_callback(len(zchunk))
|
|
430
468
|
|
|
431
|
-
|
|
469
|
+
return i
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
# use parallelization if possible
|
|
473
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
474
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
475
|
+
if (
|
|
476
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
477
|
+
and executor._max_workers == 1
|
|
478
|
+
):
|
|
479
|
+
for i, pack in enumerate(bucket_names_and_prefixes):
|
|
480
|
+
result = process_zip_file(i, pack)
|
|
481
|
+
if result is not None:
|
|
482
|
+
downloaded.append(result)
|
|
483
|
+
else:
|
|
484
|
+
futures = (
|
|
485
|
+
executor.submit(process_zip_file, i, pack)
|
|
486
|
+
for i, pack in enumerate(bucket_names_and_prefixes)
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
for future in as_completed(futures):
|
|
490
|
+
result = future.result()
|
|
491
|
+
if result is not None:
|
|
492
|
+
downloaded.append(result)
|
|
432
493
|
|
|
433
494
|
return [
|
|
434
495
|
pack
|
|
@@ -710,7 +771,7 @@ class AwsDownload(Download):
|
|
|
710
771
|
ignore_assets,
|
|
711
772
|
product,
|
|
712
773
|
)
|
|
713
|
-
if auth and isinstance(auth, boto3.
|
|
774
|
+
if auth and isinstance(auth, boto3.resource("s3").__class__):
|
|
714
775
|
s3_resource = auth
|
|
715
776
|
else:
|
|
716
777
|
s3_resource = boto3.resource(
|
|
@@ -773,6 +834,7 @@ class AwsDownload(Download):
|
|
|
773
834
|
byte_range,
|
|
774
835
|
compress,
|
|
775
836
|
zip_filename,
|
|
837
|
+
provider_max_workers=getattr(self.config, "max_workers", None),
|
|
776
838
|
)
|
|
777
839
|
|
|
778
840
|
def _get_commonpath(
|
|
@@ -1112,26 +1174,3 @@ class AwsDownload(Download):
|
|
|
1112
1174
|
|
|
1113
1175
|
logger.debug(f"Downloading {chunk.key} to {product_path}")
|
|
1114
1176
|
return product_path
|
|
1115
|
-
|
|
1116
|
-
def download_all(
|
|
1117
|
-
self,
|
|
1118
|
-
products: SearchResult,
|
|
1119
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
1120
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
1121
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
1122
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
1123
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
1124
|
-
**kwargs: Unpack[DownloadConf],
|
|
1125
|
-
) -> list[str]:
|
|
1126
|
-
"""
|
|
1127
|
-
download_all using parent (base plugin) method
|
|
1128
|
-
"""
|
|
1129
|
-
return super(AwsDownload, self).download_all(
|
|
1130
|
-
products,
|
|
1131
|
-
auth=auth,
|
|
1132
|
-
downloaded_callback=downloaded_callback,
|
|
1133
|
-
progress_callback=progress_callback,
|
|
1134
|
-
wait=wait,
|
|
1135
|
-
timeout=timeout,
|
|
1136
|
-
**kwargs,
|
|
1137
|
-
)
|
eodag/plugins/download/base.py
CHANGED
|
@@ -29,6 +29,8 @@ from pathlib import Path
|
|
|
29
29
|
from time import sleep
|
|
30
30
|
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
|
|
31
31
|
|
|
32
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
33
|
+
|
|
32
34
|
from eodag.api.product.metadata_mapping import ONLINE_STATUS
|
|
33
35
|
from eodag.plugins.base import PluginTopic
|
|
34
36
|
from eodag.utils import (
|
|
@@ -105,6 +107,7 @@ class Download(PluginTopic):
|
|
|
105
107
|
product: EOProduct,
|
|
106
108
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
107
109
|
progress_callback: Optional[ProgressCallback] = None,
|
|
110
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
108
111
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
109
112
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
110
113
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -115,6 +118,7 @@ class Download(PluginTopic):
|
|
|
115
118
|
:param product: The EO product to download
|
|
116
119
|
:param auth: (optional) authenticated object
|
|
117
120
|
:param progress_callback: (optional) A progress callback
|
|
121
|
+
:param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
|
|
118
122
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
119
123
|
:param timeout: (optional) If download fails, maximum time in minutes before stop retrying
|
|
120
124
|
to download
|
|
@@ -447,6 +451,7 @@ class Download(PluginTopic):
|
|
|
447
451
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
448
452
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
449
453
|
progress_callback: Optional[ProgressCallback] = None,
|
|
454
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
450
455
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
451
456
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
452
457
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -454,7 +459,7 @@ class Download(PluginTopic):
|
|
|
454
459
|
"""
|
|
455
460
|
Base download_all method.
|
|
456
461
|
|
|
457
|
-
This specific implementation uses the :meth
|
|
462
|
+
This specific implementation uses the :meth:`~eodag.api.product._product.EOProduct.download` method
|
|
458
463
|
implemented by the plugin to **sequentially** attempt to download products.
|
|
459
464
|
|
|
460
465
|
:param products: Products to download
|
|
@@ -465,6 +470,8 @@ class Download(PluginTopic):
|
|
|
465
470
|
its ``__call__`` method. Will be called each time a product
|
|
466
471
|
finishes downloading
|
|
467
472
|
:param progress_callback: (optional) A progress callback
|
|
473
|
+
:param executor: (optional) An executor to download products in parallel which may
|
|
474
|
+
be reused to also download assets of these products in parallel.
|
|
468
475
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
469
476
|
:param timeout: (optional) If download fails, maximum time in minutes before stop retrying
|
|
470
477
|
to download
|
|
@@ -485,9 +492,15 @@ class Download(PluginTopic):
|
|
|
485
492
|
stop_time = start_time + timedelta(minutes=timeout)
|
|
486
493
|
nb_products = len(products)
|
|
487
494
|
retry_count = 0
|
|
488
|
-
# another output for
|
|
495
|
+
# another output for notebooks
|
|
489
496
|
nb_info = NotebookWidgets()
|
|
490
497
|
|
|
498
|
+
# create an executor if not given
|
|
499
|
+
executor = ThreadPoolExecutor() if executor is None else executor
|
|
500
|
+
# set thread name prefix so that the EOProduct download() method can identify
|
|
501
|
+
# whether the executor was created during parallel product downloads or not
|
|
502
|
+
self._config_executor(executor, "eodag-download-all")
|
|
503
|
+
|
|
491
504
|
for product in products:
|
|
492
505
|
product.next_try = start_time
|
|
493
506
|
|
|
@@ -508,53 +521,88 @@ class Download(PluginTopic):
|
|
|
508
521
|
progress_callback.unit_scale = False
|
|
509
522
|
progress_callback.refresh()
|
|
510
523
|
|
|
524
|
+
# anticipate nested tasks to download assets in parallel for at least one product
|
|
525
|
+
nested_asset_downloads = any(
|
|
526
|
+
product
|
|
527
|
+
for product in products
|
|
528
|
+
if (
|
|
529
|
+
product.downloader
|
|
530
|
+
and product.downloader.config.type == "AwsDownload"
|
|
531
|
+
or len(product.assets) > 0
|
|
532
|
+
and (
|
|
533
|
+
not getattr(self.config, "ignore_assets", False)
|
|
534
|
+
or kwargs.get("asset") is not None
|
|
535
|
+
)
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
|
|
511
539
|
with progress_callback as bar:
|
|
512
540
|
while "Loop until all products are download or timeout is reached":
|
|
513
|
-
# try downloading each product before retry
|
|
514
|
-
|
|
541
|
+
# try downloading each product in parallel before retry
|
|
542
|
+
|
|
543
|
+
# Download products in batches to handle nested tasks to download assets in parallel.
|
|
544
|
+
# We avoid having less workers in the executor than the number of products to download in parallel
|
|
545
|
+
# to prevent deadlocks. This could happen by submiting and waiting for a task within a task.
|
|
546
|
+
# We ensure at least one thread is available for these tasks and at least one product is downloaded
|
|
547
|
+
# at a time.
|
|
548
|
+
# If there is only one worker, a specific process at assets download level is used to avoid deadlocks.
|
|
549
|
+
batch_size = len(products)
|
|
550
|
+
if nested_asset_downloads and executor._max_workers <= batch_size:
|
|
551
|
+
batch_size = max(executor._max_workers - 1, 1)
|
|
552
|
+
|
|
553
|
+
products_batch = products[:batch_size]
|
|
554
|
+
futures = {}
|
|
555
|
+
|
|
556
|
+
for idx, product in enumerate(products_batch):
|
|
515
557
|
if datetime.now() >= product.next_try:
|
|
516
558
|
products[idx].next_try += timedelta(minutes=wait)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
559
|
+
future = executor.submit(
|
|
560
|
+
product.download,
|
|
561
|
+
progress_callback=product_progress_callback,
|
|
562
|
+
executor=executor,
|
|
563
|
+
wait=wait,
|
|
564
|
+
timeout=-1,
|
|
565
|
+
**kwargs, # type: ignore
|
|
566
|
+
)
|
|
567
|
+
futures[future] = product
|
|
526
568
|
|
|
527
|
-
|
|
528
|
-
|
|
569
|
+
for future in as_completed(futures.keys()):
|
|
570
|
+
product = futures[future]
|
|
571
|
+
try:
|
|
572
|
+
result = future.result()
|
|
573
|
+
paths.append(result)
|
|
529
574
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
bar(1)
|
|
575
|
+
if downloaded_callback:
|
|
576
|
+
downloaded_callback(product)
|
|
533
577
|
|
|
534
|
-
|
|
535
|
-
|
|
578
|
+
# product downloaded, to not retry it
|
|
579
|
+
products.remove(product)
|
|
580
|
+
bar(1)
|
|
536
581
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
continue
|
|
582
|
+
# reset stop time for next product
|
|
583
|
+
stop_time = datetime.now() + timedelta(minutes=timeout)
|
|
540
584
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
)
|
|
545
|
-
raise
|
|
585
|
+
except NotAvailableError as e:
|
|
586
|
+
logger.info(e)
|
|
587
|
+
continue
|
|
546
588
|
|
|
547
|
-
|
|
548
|
-
|
|
589
|
+
except (AuthenticationError, MisconfiguredError):
|
|
590
|
+
logger.exception(
|
|
591
|
+
f"Stopped because of credentials problems with provider {self.provider}"
|
|
592
|
+
)
|
|
593
|
+
raise
|
|
549
594
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
595
|
+
except (RuntimeError, Exception):
|
|
596
|
+
import traceback as tb
|
|
597
|
+
|
|
598
|
+
logger.error(
|
|
599
|
+
f"A problem occurred during download of product: {product}. "
|
|
600
|
+
"Skipping it"
|
|
601
|
+
)
|
|
602
|
+
logger.debug(f"\n{tb.format_exc()}")
|
|
555
603
|
|
|
556
|
-
|
|
557
|
-
|
|
604
|
+
# product skipped, to not retry it
|
|
605
|
+
products.remove(product)
|
|
558
606
|
|
|
559
607
|
if (
|
|
560
608
|
len(products) > 0
|
|
@@ -567,6 +615,7 @@ class Download(PluginTopic):
|
|
|
567
615
|
f"[Retry #{retry_count}, {nb_products - len(products)}/{nb_products} D/L] "
|
|
568
616
|
f"Waiting {wait_seconds}s until next download try (retry every {wait}' for {timeout}')"
|
|
569
617
|
)
|
|
618
|
+
|
|
570
619
|
logger.info(info_message)
|
|
571
620
|
nb_info.display_html(info_message)
|
|
572
621
|
sleep(wait_seconds + 1)
|
|
@@ -579,6 +628,9 @@ class Download(PluginTopic):
|
|
|
579
628
|
elif len(products) == 0:
|
|
580
629
|
break
|
|
581
630
|
|
|
631
|
+
# Shutdown executor at the end
|
|
632
|
+
executor.shutdown(wait=True)
|
|
633
|
+
|
|
582
634
|
return paths
|
|
583
635
|
|
|
584
636
|
def _order_download_retry(
|
|
@@ -641,8 +693,8 @@ class Download(PluginTopic):
|
|
|
641
693
|
)
|
|
642
694
|
logger.info(not_available_info)
|
|
643
695
|
# Retry-After info from Response header
|
|
644
|
-
if hasattr(
|
|
645
|
-
retry_server_info =
|
|
696
|
+
if hasattr(product, "_stream"):
|
|
697
|
+
retry_server_info = product._stream.headers.get(
|
|
646
698
|
"Retry-After", ""
|
|
647
699
|
)
|
|
648
700
|
if retry_server_info:
|
|
@@ -663,8 +715,8 @@ class Download(PluginTopic):
|
|
|
663
715
|
)
|
|
664
716
|
logger.info(not_available_info)
|
|
665
717
|
# Retry-After info from Response header
|
|
666
|
-
if hasattr(
|
|
667
|
-
retry_server_info =
|
|
718
|
+
if hasattr(product, "_stream"):
|
|
719
|
+
retry_server_info = product._stream.headers.get(
|
|
668
720
|
"Retry-After", ""
|
|
669
721
|
)
|
|
670
722
|
if retry_server_info:
|
|
@@ -690,3 +742,27 @@ class Download(PluginTopic):
|
|
|
690
742
|
return download_and_retry
|
|
691
743
|
|
|
692
744
|
return decorator
|
|
745
|
+
|
|
746
|
+
def _config_executor(
|
|
747
|
+
self, executor: ThreadPoolExecutor, thread_name_prefix: Optional[str] = None
|
|
748
|
+
) -> None:
|
|
749
|
+
"""
|
|
750
|
+
Configure a ThreadPoolExecutor instance.
|
|
751
|
+
|
|
752
|
+
This method ensures that a ThreadPoolExecutor is correctly set for downloads by adjusting its
|
|
753
|
+
maximum number of workers if necessary. It also configures the thread name prefix to identify
|
|
754
|
+
threads created by the executor, which is useful for distinguishing between executors created
|
|
755
|
+
for parallel product downloads versus those created for other purposes.
|
|
756
|
+
|
|
757
|
+
:param executor: A ThreadPoolExecutor instance.
|
|
758
|
+
:param thread_name_prefix: (optional) A prefix for naming threads created by the executor.
|
|
759
|
+
When provided, threads will be named using this prefix to help
|
|
760
|
+
identify the executor's purpose (e.g., "eodag-download-all").
|
|
761
|
+
"""
|
|
762
|
+
if (
|
|
763
|
+
max_workers := getattr(self.config, "max_workers", executor._max_workers)
|
|
764
|
+
) < executor._max_workers:
|
|
765
|
+
executor._max_workers = max_workers
|
|
766
|
+
|
|
767
|
+
if thread_name_prefix:
|
|
768
|
+
executor._thread_name_prefix = "eodag-download-all"
|