eodag 4.0.0a4__py3-none-any.whl → 4.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eodag/api/core.py CHANGED
@@ -87,6 +87,7 @@ from eodag.utils.free_text_search import compile_free_text_query
87
87
  from eodag.utils.stac_reader import fetch_stac_items
88
88
 
89
89
  if TYPE_CHECKING:
90
+ from concurrent.futures import ThreadPoolExecutor
90
91
  from shapely.geometry.base import BaseGeometry
91
92
 
92
93
  from eodag.api.product import EOProduct
@@ -452,6 +453,9 @@ class EODataAccessGateway:
452
453
  if locations_conf_path is None:
453
454
  locations_conf_path = os.path.join(self.conf_dir, "locations.yml")
454
455
  if not os.path.isfile(locations_conf_path):
456
+ # Ensure the directory exists
457
+ os.makedirs(os.path.dirname(locations_conf_path), exist_ok=True)
458
+
455
459
  # copy locations conf file and replace path example
456
460
  locations_conf_template = str(
457
461
  res_files("eodag") / "resources" / "locations_conf_template.yml"
@@ -1919,6 +1923,7 @@ class EODataAccessGateway:
1919
1923
  search_result: SearchResult,
1920
1924
  downloaded_callback: Optional[DownloadedCallback] = None,
1921
1925
  progress_callback: Optional[ProgressCallback] = None,
1926
+ executor: Optional[ThreadPoolExecutor] = None,
1922
1927
  wait: float = DEFAULT_DOWNLOAD_WAIT,
1923
1928
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1924
1929
  **kwargs: Unpack[DownloadConf],
@@ -1936,6 +1941,8 @@ class EODataAccessGateway:
1936
1941
  size as inputs and handle progress bar
1937
1942
  creation and update to give the user a
1938
1943
  feedback on the download progress
1944
+ :param executor: (optional) An executor to download EO products of ``search_result`` in parallel
1945
+ which will also be reused to download assets of these products in parallel.
1939
1946
  :param wait: (optional) If download fails, wait time in minutes between
1940
1947
  two download tries of the same product
1941
1948
  :param timeout: (optional) If download fails, maximum time in minutes
@@ -1956,8 +1963,7 @@ class EODataAccessGateway:
1956
1963
  paths = []
1957
1964
  if search_result:
1958
1965
  logger.info("Downloading %s products", len(search_result))
1959
- # Get download plugin using first product assuming product from several provider
1960
- # aren't mixed into a search result
1966
+ # Get download plugin using first product assuming all plugins use base.Download.download_all
1961
1967
  download_plugin = self._plugins_manager.get_download_plugin(
1962
1968
  search_result[0]
1963
1969
  )
@@ -1965,6 +1971,7 @@ class EODataAccessGateway:
1965
1971
  search_result,
1966
1972
  downloaded_callback=downloaded_callback,
1967
1973
  progress_callback=progress_callback,
1974
+ executor=executor,
1968
1975
  wait=wait,
1969
1976
  timeout=timeout,
1970
1977
  **kwargs,
@@ -2026,6 +2033,7 @@ class EODataAccessGateway:
2026
2033
  self,
2027
2034
  product: EOProduct,
2028
2035
  progress_callback: Optional[ProgressCallback] = None,
2036
+ executor: Optional[ThreadPoolExecutor] = None,
2029
2037
  wait: float = DEFAULT_DOWNLOAD_WAIT,
2030
2038
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
2031
2039
  **kwargs: Unpack[DownloadConf],
@@ -2056,6 +2064,8 @@ class EODataAccessGateway:
2056
2064
  size as inputs and handle progress bar
2057
2065
  creation and update to give the user a
2058
2066
  feedback on the download progress
2067
+ :param executor: (optional) An executor to download assets of ``product`` in parallel if it has any. If ``None``
2068
+ , a default executor will be created
2059
2069
  :param wait: (optional) If download fails, wait time in minutes between
2060
2070
  two download tries
2061
2071
  :param timeout: (optional) If download fails, maximum time in minutes
@@ -2080,7 +2090,11 @@ class EODataAccessGateway:
2080
2090
  return uri_to_path(product.location)
2081
2091
  self._setup_downloader(product)
2082
2092
  path = product.download(
2083
- progress_callback=progress_callback, wait=wait, timeout=timeout, **kwargs
2093
+ progress_callback=progress_callback,
2094
+ executor=executor,
2095
+ wait=wait,
2096
+ timeout=timeout,
2097
+ **kwargs,
2084
2098
  )
2085
2099
 
2086
2100
  return path
@@ -61,6 +61,7 @@ from eodag.utils.exceptions import DownloadError, MisconfiguredError, Validation
61
61
  from eodag.utils.repr import dict_to_html_table
62
62
 
63
63
  if TYPE_CHECKING:
64
+ from concurrent.futures import ThreadPoolExecutor
64
65
  from shapely.geometry.base import BaseGeometry
65
66
 
66
67
  from eodag.api.product.drivers.base import DatasetDriver
@@ -122,6 +123,8 @@ class EOProduct:
122
123
  search_kwargs: Any
123
124
  #: Datetime for download next try
124
125
  next_try: datetime
126
+ #: Stream for requests
127
+ _stream: requests.Response
125
128
 
126
129
  def __init__(
127
130
  self, provider: str, properties: dict[str, Any], **kwargs: Any
@@ -337,6 +340,7 @@ class EOProduct:
337
340
  def download(
338
341
  self,
339
342
  progress_callback: Optional[ProgressCallback] = None,
343
+ executor: Optional[ThreadPoolExecutor] = None,
340
344
  wait: float = DEFAULT_DOWNLOAD_WAIT,
341
345
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
342
346
  **kwargs: Unpack[DownloadConf],
@@ -353,6 +357,8 @@ class EOProduct:
353
357
  size as inputs and handle progress bar
354
358
  creation and update to give the user a
355
359
  feedback on the download progress
360
+ :param executor: (optional) An executor to download assets of the product in parallel if it has any. If ``None``
361
+ , a default executor will be created
356
362
  :param wait: (optional) If download fails, wait time in minutes between
357
363
  two download tries
358
364
  :param timeout: (optional) If download fails, maximum time in minutes
@@ -377,17 +383,26 @@ class EOProduct:
377
383
  )
378
384
 
379
385
  progress_callback, close_progress_callback = self._init_progress_bar(
380
- progress_callback
386
+ progress_callback, executor
381
387
  )
388
+
382
389
  fs_path = self.downloader.download(
383
390
  self,
384
391
  auth=auth,
385
392
  progress_callback=progress_callback,
393
+ executor=executor,
386
394
  wait=wait,
387
395
  timeout=timeout,
388
396
  **kwargs,
389
397
  )
390
398
 
399
+ # shutdown executor if it was not created during parallel product downloads
400
+ if (
401
+ executor is not None
402
+ and executor._thread_name_prefix != "eodag-download-all"
403
+ ):
404
+ executor.shutdown(wait=True)
405
+
391
406
  # close progress bar if needed
392
407
  if close_progress_callback:
393
408
  progress_callback.close()
@@ -408,15 +423,22 @@ class EOProduct:
408
423
  return fs_path
409
424
 
410
425
  def _init_progress_bar(
411
- self, progress_callback: Optional[ProgressCallback]
426
+ self,
427
+ progress_callback: Optional[ProgressCallback],
428
+ executor: Optional[ThreadPoolExecutor],
412
429
  ) -> tuple[ProgressCallback, bool]:
430
+ # determine position of the progress bar with a counter of executor passings
431
+ # to avoid bar overwriting in case of parallel downloads
432
+ count = executor._counter() if executor is not None else 1 # type: ignore
433
+
413
434
  # progress bar init
414
435
  if progress_callback is None:
415
- progress_callback = ProgressCallback(position=1)
436
+ progress_callback = ProgressCallback(position=count)
416
437
  # one shot progress callback to close after download
417
438
  close_progress_callback = True
418
439
  else:
419
440
  close_progress_callback = False
441
+ progress_callback.pos = count
420
442
  # update units as bar may have been previously used for extraction
421
443
  progress_callback.unit = "B"
422
444
  progress_callback.unit_scale = True
eodag/cli.py CHANGED
@@ -48,6 +48,7 @@ from typing import TYPE_CHECKING, Any, Callable, Mapping, Optional
48
48
  from urllib.parse import parse_qs
49
49
 
50
50
  import click
51
+ from concurrent.futures import ThreadPoolExecutor
51
52
 
52
53
  from eodag.api.collection import CollectionsList
53
54
  from eodag.api.core import EODataAccessGateway, SearchResult
@@ -556,6 +557,11 @@ Examples:
556
557
  type=click.Path(dir_okay=True, file_okay=False),
557
558
  help="Products or quicklooks download directory (Default: local temporary directory)",
558
559
  )
560
+ @click.option(
561
+ "--max-workers",
562
+ type=int,
563
+ help="The maximum number of workers to use for downloading products and assets in parallel",
564
+ )
559
565
  @click.pass_context
560
566
  def download(ctx: Context, **kwargs: Any) -> None:
561
567
  """Download a bunch of products from a serialized search result"""
@@ -601,7 +607,10 @@ def download(ctx: Context, **kwargs: Any) -> None:
601
607
 
602
608
  else:
603
609
  # Download products
604
- downloaded_files = satim_api.download_all(search_results, output_dir=output_dir)
610
+ executor = ThreadPoolExecutor(max_workers=kwargs.pop("max_workers"))
611
+ downloaded_files = satim_api.download_all(
612
+ search_results, output_dir=output_dir, executor=executor
613
+ )
605
614
  if downloaded_files and len(downloaded_files) > 0:
606
615
  for downloaded_file in downloaded_files:
607
616
  if downloaded_file is None:
@@ -48,6 +48,7 @@ from eodag.utils.logging import get_logging_verbose
48
48
  if TYPE_CHECKING:
49
49
  from typing import Any, Optional, Union
50
50
 
51
+ from concurrent.futures import ThreadPoolExecutor
51
52
  from mypy_boto3_s3 import S3ServiceResource
52
53
  from requests.auth import AuthBase
53
54
 
@@ -55,7 +56,7 @@ if TYPE_CHECKING:
55
56
  from eodag.api.search_result import SearchResult
56
57
  from eodag.config import PluginConfig
57
58
  from eodag.types.download_args import DownloadConf
58
- from eodag.utils import DownloadedCallback, ProgressCallback, Unpack
59
+ from eodag.utils import ProgressCallback, Unpack
59
60
 
60
61
 
61
62
  logger = logging.getLogger("eodag.apis.ecmwf")
@@ -185,6 +186,7 @@ class EcmwfApi(Api, ECMWFSearch):
185
186
  product: EOProduct,
186
187
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
187
188
  progress_callback: Optional[ProgressCallback] = None,
189
+ executor: Optional[ThreadPoolExecutor] = None,
188
190
  wait: float = DEFAULT_DOWNLOAD_WAIT,
189
191
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
190
192
  **kwargs: Unpack[DownloadConf],
@@ -269,29 +271,6 @@ class EcmwfApi(Api, ECMWFSearch):
269
271
  product.location = path_to_uri(product_path)
270
272
  return product_path
271
273
 
272
- def download_all(
273
- self,
274
- products: SearchResult,
275
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
276
- downloaded_callback: Optional[DownloadedCallback] = None,
277
- progress_callback: Optional[ProgressCallback] = None,
278
- wait: float = DEFAULT_DOWNLOAD_WAIT,
279
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
280
- **kwargs: Unpack[DownloadConf],
281
- ) -> list[str]:
282
- """
283
- Download all using parent (base plugin) method
284
- """
285
- return super(EcmwfApi, self).download_all(
286
- products,
287
- auth=auth,
288
- downloaded_callback=downloaded_callback,
289
- progress_callback=progress_callback,
290
- wait=wait,
291
- timeout=timeout,
292
- **kwargs,
293
- )
294
-
295
274
  def clear(self) -> None:
296
275
  """Clear search context"""
297
276
  pass
@@ -57,12 +57,13 @@ from eodag.utils.exceptions import (
57
57
  )
58
58
 
59
59
  if TYPE_CHECKING:
60
+ from concurrent.futures import ThreadPoolExecutor
60
61
  from mypy_boto3_s3 import S3ServiceResource
61
62
  from requests.auth import AuthBase
62
63
 
63
64
  from eodag.config import PluginConfig
64
65
  from eodag.types.download_args import DownloadConf
65
- from eodag.utils import DownloadedCallback, Unpack
66
+ from eodag.utils import Unpack
66
67
 
67
68
  logger = logging.getLogger("eodag.apis.usgs")
68
69
 
@@ -312,6 +313,7 @@ class UsgsApi(Api):
312
313
  product: EOProduct,
313
314
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
314
315
  progress_callback: Optional[ProgressCallback] = None,
316
+ executor: Optional[ThreadPoolExecutor] = None,
315
317
  wait: float = DEFAULT_DOWNLOAD_WAIT,
316
318
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
317
319
  **kwargs: Unpack[DownloadConf],
@@ -477,26 +479,3 @@ class UsgsApi(Api):
477
479
  shutil.move(fs_path, new_fs_path)
478
480
  product.location = path_to_uri(new_fs_path)
479
481
  return new_fs_path
480
-
481
- def download_all(
482
- self,
483
- products: SearchResult,
484
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
485
- downloaded_callback: Optional[DownloadedCallback] = None,
486
- progress_callback: Optional[ProgressCallback] = None,
487
- wait: float = DEFAULT_DOWNLOAD_WAIT,
488
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
489
- **kwargs: Unpack[DownloadConf],
490
- ) -> list[str]:
491
- """
492
- Download all using parent (base plugin) method
493
- """
494
- return super(UsgsApi, self).download_all(
495
- products,
496
- auth=auth,
497
- downloaded_callback=downloaded_callback,
498
- progress_callback=progress_callback,
499
- wait=wait,
500
- timeout=timeout,
501
- **kwargs,
502
- )
@@ -25,7 +25,9 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
25
25
 
26
26
  import boto3
27
27
  import requests
28
+ from boto3.s3.transfer import TransferConfig
28
29
  from botocore.exceptions import ClientError
30
+ from concurrent.futures import ThreadPoolExecutor, as_completed
29
31
  from lxml import etree
30
32
  from requests.auth import AuthBase
31
33
 
@@ -34,7 +36,7 @@ from eodag.api.product.metadata_mapping import (
34
36
  properties_from_json,
35
37
  properties_from_xml,
36
38
  )
37
- from eodag.plugins.authentication.aws_auth import raise_if_auth_error
39
+ from eodag.plugins.authentication.aws_auth import AwsAuth, raise_if_auth_error
38
40
  from eodag.plugins.download.base import Download
39
41
  from eodag.utils import (
40
42
  DEFAULT_DOWNLOAD_TIMEOUT,
@@ -65,10 +67,9 @@ if TYPE_CHECKING:
65
67
  from mypy_boto3_s3.client import S3Client
66
68
 
67
69
  from eodag.api.product import EOProduct
68
- from eodag.api.search_result import SearchResult
69
70
  from eodag.config import PluginConfig
70
71
  from eodag.types.download_args import DownloadConf
71
- from eodag.utils import DownloadedCallback, Unpack
72
+ from eodag.utils import Unpack
72
73
 
73
74
 
74
75
  logger = logging.getLogger("eodag.download.aws")
@@ -227,6 +228,7 @@ class AwsDownload(Download):
227
228
  product: EOProduct,
228
229
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
229
230
  progress_callback: Optional[ProgressCallback] = None,
231
+ executor: Optional[ThreadPoolExecutor] = None,
230
232
  wait: float = DEFAULT_DOWNLOAD_WAIT,
231
233
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
232
234
  **kwargs: Unpack[DownloadConf],
@@ -246,6 +248,7 @@ class AwsDownload(Download):
246
248
  size as inputs and handle progress bar
247
249
  creation and update to give the user a
248
250
  feedback on the download progress
251
+ :param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
249
252
  :param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
250
253
  and `dl_url_params` (dict) can be provided as additional kwargs
251
254
  and will override any other values defined in a configuration
@@ -293,7 +296,7 @@ class AwsDownload(Download):
293
296
  )
294
297
 
295
298
  # authenticate
296
- if product.downloader_auth:
299
+ if product.downloader_auth and isinstance(product.downloader_auth, AwsAuth):
297
300
  authenticated_objects = product.downloader_auth.authenticate_objects(
298
301
  bucket_names_and_prefixes
299
302
  )
@@ -302,9 +305,19 @@ class AwsDownload(Download):
302
305
  "Authentication plugin (AwsAuth) has to be configured if AwsDownload is used"
303
306
  )
304
307
 
308
+ # create an executor if not given and anticipate the possible need to shut it down
309
+ executor, shutdown_executor = (
310
+ (ThreadPoolExecutor(), True) if executor is None else (executor, False)
311
+ )
312
+ self._config_executor(executor)
313
+
305
314
  # files in zip
306
315
  updated_bucket_names_and_prefixes = self._download_file_in_zip(
307
- product, bucket_names_and_prefixes, product_local_path, progress_callback
316
+ product.downloader_auth,
317
+ bucket_names_and_prefixes,
318
+ product_local_path,
319
+ progress_callback,
320
+ executor,
308
321
  )
309
322
  # prevent nothing-to-download errors if download was performed in zip
310
323
  raise_error = (
@@ -329,7 +342,8 @@ class AwsDownload(Download):
329
342
  if len(unique_product_chunks) > 0:
330
343
  progress_callback.reset(total=total_size)
331
344
  try:
332
- for product_chunk in unique_product_chunks:
345
+
346
+ def download_chunk(product_chunk: Any) -> None:
333
347
  try:
334
348
  chunk_rel_path = self.get_chunk_dest_path(
335
349
  product,
@@ -339,11 +353,11 @@ class AwsDownload(Download):
339
353
  except NotAvailableError as e:
340
354
  # out of SAFE format chunk
341
355
  logger.warning(e)
342
- continue
356
+ return
357
+
343
358
  chunk_abs_path = os.path.join(product_local_path, chunk_rel_path)
344
359
  chunk_abs_path_dir = os.path.dirname(chunk_abs_path)
345
- if not os.path.isdir(chunk_abs_path_dir):
346
- os.makedirs(chunk_abs_path_dir)
360
+ os.makedirs(chunk_abs_path_dir, exist_ok=True)
347
361
 
348
362
  bucket_objects = authenticated_objects.get(product_chunk.bucket_name)
349
363
  extra_args = (
@@ -352,12 +366,31 @@ class AwsDownload(Download):
352
366
  else {}
353
367
  )
354
368
  if not os.path.isfile(chunk_abs_path):
369
+ transfer_config = TransferConfig(use_threads=False)
355
370
  product_chunk.Bucket().download_file(
356
371
  product_chunk.key,
357
372
  chunk_abs_path,
358
373
  ExtraArgs=extra_args,
359
374
  Callback=progress_callback,
375
+ Config=transfer_config,
360
376
  )
377
+ return
378
+
379
+ # use parallelization if possible.
380
+ # when products are already downloaded in parallel but the executor has only one worker,
381
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
382
+ if (
383
+ executor._thread_name_prefix == "eodag-download-all"
384
+ and executor._max_workers == 1
385
+ ):
386
+ for product_chunk in unique_product_chunks:
387
+ download_chunk(product_chunk)
388
+ else:
389
+ futures = (
390
+ executor.submit(download_chunk, product_chunk)
391
+ for product_chunk in unique_product_chunks
392
+ )
393
+ [f.result() for f in as_completed(futures)]
361
394
 
362
395
  except AuthenticationError as e:
363
396
  logger.warning("Unexpected error: %s" % e)
@@ -365,6 +398,9 @@ class AwsDownload(Download):
365
398
  raise_if_auth_error(e, self.provider)
366
399
  logger.warning("Unexpected error: %s" % e)
367
400
 
401
+ if shutdown_executor:
402
+ executor.shutdown(wait=True)
403
+
368
404
  # finalize safe product
369
405
  if build_safe and product.collection and "S2_MSI" in product.collection:
370
406
  self.finalize_s2_safe_product(product_local_path)
@@ -386,31 +422,33 @@ class AwsDownload(Download):
386
422
  return product_local_path
387
423
 
388
424
  def _download_file_in_zip(
389
- self, product, bucket_names_and_prefixes, product_local_path, progress_callback
425
+ self,
426
+ downloader_auth: AwsAuth,
427
+ bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
428
+ product_local_path: str,
429
+ progress_callback: ProgressCallback,
430
+ executor: ThreadPoolExecutor,
390
431
  ):
391
432
  """
392
433
  Download file in zip from a prefix like `foo/bar.zip!file.txt`
393
434
  """
394
- if (
395
- not getattr(product, "downloader_auth", None)
396
- or product.downloader_auth.s3_resource is None
397
- ):
435
+ if downloader_auth.s3_resource is None:
398
436
  logger.debug("Cannot check files in s3 zip without s3 resource")
399
437
  return bucket_names_and_prefixes
400
438
 
401
- s3_client = product.downloader_auth.get_s3_client()
439
+ s3_client = downloader_auth.get_s3_client()
402
440
 
403
441
  downloaded = []
404
- for i, pack in enumerate(bucket_names_and_prefixes):
442
+
443
+ def process_zip_file(i: int, pack: tuple[str, Optional[str]]) -> Optional[int]:
405
444
  bucket_name, prefix = pack
406
- if ".zip!" in prefix:
445
+ if prefix is not None and ".zip!" in prefix:
407
446
  splitted_path = prefix.split(".zip!")
408
447
  zip_prefix = f"{splitted_path[0]}.zip"
409
448
  rel_path = splitted_path[-1]
410
449
  dest_file = os.path.join(product_local_path, rel_path)
411
450
  dest_abs_path_dir = os.path.dirname(dest_file)
412
- if not os.path.isdir(dest_abs_path_dir):
413
- os.makedirs(dest_abs_path_dir)
451
+ os.makedirs(dest_abs_path_dir, exist_ok=True)
414
452
 
415
453
  zip_file, _ = open_s3_zipped_object(
416
454
  bucket_name, zip_prefix, s3_client, partial=False
@@ -428,7 +466,30 @@ class AwsDownload(Download):
428
466
  output_file.write(zchunk)
429
467
  progress_callback(len(zchunk))
430
468
 
431
- downloaded.append(i)
469
+ return i
470
+ return None
471
+
472
+ # use parallelization if possible
473
+ # when products are already downloaded in parallel but the executor has only one worker,
474
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
475
+ if (
476
+ executor._thread_name_prefix == "eodag-download-all"
477
+ and executor._max_workers == 1
478
+ ):
479
+ for i, pack in enumerate(bucket_names_and_prefixes):
480
+ result = process_zip_file(i, pack)
481
+ if result is not None:
482
+ downloaded.append(result)
483
+ else:
484
+ futures = (
485
+ executor.submit(process_zip_file, i, pack)
486
+ for i, pack in enumerate(bucket_names_and_prefixes)
487
+ )
488
+
489
+ for future in as_completed(futures):
490
+ result = future.result()
491
+ if result is not None:
492
+ downloaded.append(result)
432
493
 
433
494
  return [
434
495
  pack
@@ -710,7 +771,7 @@ class AwsDownload(Download):
710
771
  ignore_assets,
711
772
  product,
712
773
  )
713
- if auth and isinstance(auth, boto3.resources.base.ServiceResource):
774
+ if auth and isinstance(auth, boto3.resource("s3").__class__):
714
775
  s3_resource = auth
715
776
  else:
716
777
  s3_resource = boto3.resource(
@@ -773,6 +834,7 @@ class AwsDownload(Download):
773
834
  byte_range,
774
835
  compress,
775
836
  zip_filename,
837
+ provider_max_workers=getattr(self.config, "max_workers", None),
776
838
  )
777
839
 
778
840
  def _get_commonpath(
@@ -1112,26 +1174,3 @@ class AwsDownload(Download):
1112
1174
 
1113
1175
  logger.debug(f"Downloading {chunk.key} to {product_path}")
1114
1176
  return product_path
1115
-
1116
- def download_all(
1117
- self,
1118
- products: SearchResult,
1119
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
1120
- downloaded_callback: Optional[DownloadedCallback] = None,
1121
- progress_callback: Optional[ProgressCallback] = None,
1122
- wait: float = DEFAULT_DOWNLOAD_WAIT,
1123
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1124
- **kwargs: Unpack[DownloadConf],
1125
- ) -> list[str]:
1126
- """
1127
- download_all using parent (base plugin) method
1128
- """
1129
- return super(AwsDownload, self).download_all(
1130
- products,
1131
- auth=auth,
1132
- downloaded_callback=downloaded_callback,
1133
- progress_callback=progress_callback,
1134
- wait=wait,
1135
- timeout=timeout,
1136
- **kwargs,
1137
- )
@@ -29,6 +29,8 @@ from pathlib import Path
29
29
  from time import sleep
30
30
  from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
31
31
 
32
+ from concurrent.futures import ThreadPoolExecutor, as_completed
33
+
32
34
  from eodag.api.product.metadata_mapping import ONLINE_STATUS
33
35
  from eodag.plugins.base import PluginTopic
34
36
  from eodag.utils import (
@@ -105,6 +107,7 @@ class Download(PluginTopic):
105
107
  product: EOProduct,
106
108
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
107
109
  progress_callback: Optional[ProgressCallback] = None,
110
+ executor: Optional[ThreadPoolExecutor] = None,
108
111
  wait: float = DEFAULT_DOWNLOAD_WAIT,
109
112
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
110
113
  **kwargs: Unpack[DownloadConf],
@@ -115,6 +118,7 @@ class Download(PluginTopic):
115
118
  :param product: The EO product to download
116
119
  :param auth: (optional) authenticated object
117
120
  :param progress_callback: (optional) A progress callback
121
+ :param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
118
122
  :param wait: (optional) If download fails, wait time in minutes between two download tries
119
123
  :param timeout: (optional) If download fails, maximum time in minutes before stop retrying
120
124
  to download
@@ -447,6 +451,7 @@ class Download(PluginTopic):
447
451
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
448
452
  downloaded_callback: Optional[DownloadedCallback] = None,
449
453
  progress_callback: Optional[ProgressCallback] = None,
454
+ executor: Optional[ThreadPoolExecutor] = None,
450
455
  wait: float = DEFAULT_DOWNLOAD_WAIT,
451
456
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
452
457
  **kwargs: Unpack[DownloadConf],
@@ -454,7 +459,7 @@ class Download(PluginTopic):
454
459
  """
455
460
  Base download_all method.
456
461
 
457
- This specific implementation uses the :meth:`eodag.plugins.download.base.Download.download` method
462
+ This specific implementation uses the :meth:`~eodag.api.product._product.EOProduct.download` method
458
463
  implemented by the plugin to **sequentially** attempt to download products.
459
464
 
460
465
  :param products: Products to download
@@ -465,6 +470,8 @@ class Download(PluginTopic):
465
470
  its ``__call__`` method. Will be called each time a product
466
471
  finishes downloading
467
472
  :param progress_callback: (optional) A progress callback
473
+ :param executor: (optional) An executor to download products in parallel which may
474
+ be reused to also download assets of these products in parallel.
468
475
  :param wait: (optional) If download fails, wait time in minutes between two download tries
469
476
  :param timeout: (optional) If download fails, maximum time in minutes before stop retrying
470
477
  to download
@@ -485,9 +492,15 @@ class Download(PluginTopic):
485
492
  stop_time = start_time + timedelta(minutes=timeout)
486
493
  nb_products = len(products)
487
494
  retry_count = 0
488
- # another output for notbooks
495
+ # another output for notebooks
489
496
  nb_info = NotebookWidgets()
490
497
 
498
+ # create an executor if not given
499
+ executor = ThreadPoolExecutor() if executor is None else executor
500
+ # set thread name prefix so that the EOProduct download() method can identify
501
+ # whether the executor was created during parallel product downloads or not
502
+ self._config_executor(executor, "eodag-download-all")
503
+
491
504
  for product in products:
492
505
  product.next_try = start_time
493
506
 
@@ -508,53 +521,88 @@ class Download(PluginTopic):
508
521
  progress_callback.unit_scale = False
509
522
  progress_callback.refresh()
510
523
 
524
+ # anticipate nested tasks to download assets in parallel for at least one product
525
+ nested_asset_downloads = any(
526
+ product
527
+ for product in products
528
+ if (
529
+ product.downloader
530
+ and product.downloader.config.type == "AwsDownload"
531
+ or len(product.assets) > 0
532
+ and (
533
+ not getattr(self.config, "ignore_assets", False)
534
+ or kwargs.get("asset") is not None
535
+ )
536
+ )
537
+ )
538
+
511
539
  with progress_callback as bar:
512
540
  while "Loop until all products are download or timeout is reached":
513
- # try downloading each product before retry
514
- for idx, product in enumerate(products):
541
+ # try downloading each product in parallel before retry
542
+
543
+ # Download products in batches to handle nested tasks to download assets in parallel.
544
+ # We avoid having less workers in the executor than the number of products to download in parallel
545
+ # to prevent deadlocks. This could happen by submiting and waiting for a task within a task.
546
+ # We ensure at least one thread is available for these tasks and at least one product is downloaded
547
+ # at a time.
548
+ # If there is only one worker, a specific process at assets download level is used to avoid deadlocks.
549
+ batch_size = len(products)
550
+ if nested_asset_downloads and executor._max_workers <= batch_size:
551
+ batch_size = max(executor._max_workers - 1, 1)
552
+
553
+ products_batch = products[:batch_size]
554
+ futures = {}
555
+
556
+ for idx, product in enumerate(products_batch):
515
557
  if datetime.now() >= product.next_try:
516
558
  products[idx].next_try += timedelta(minutes=wait)
517
- try:
518
- paths.append(
519
- product.download(
520
- progress_callback=product_progress_callback,
521
- wait=wait,
522
- timeout=-1,
523
- **kwargs,
524
- )
525
- )
559
+ future = executor.submit(
560
+ product.download,
561
+ progress_callback=product_progress_callback,
562
+ executor=executor,
563
+ wait=wait,
564
+ timeout=-1,
565
+ **kwargs, # type: ignore
566
+ )
567
+ futures[future] = product
526
568
 
527
- if downloaded_callback:
528
- downloaded_callback(product)
569
+ for future in as_completed(futures.keys()):
570
+ product = futures[future]
571
+ try:
572
+ result = future.result()
573
+ paths.append(result)
529
574
 
530
- # product downloaded, to not retry it
531
- products.remove(product)
532
- bar(1)
575
+ if downloaded_callback:
576
+ downloaded_callback(product)
533
577
 
534
- # reset stop time for next product
535
- stop_time = datetime.now() + timedelta(minutes=timeout)
578
+ # product downloaded, to not retry it
579
+ products.remove(product)
580
+ bar(1)
536
581
 
537
- except NotAvailableError as e:
538
- logger.info(e)
539
- continue
582
+ # reset stop time for next product
583
+ stop_time = datetime.now() + timedelta(minutes=timeout)
540
584
 
541
- except (AuthenticationError, MisconfiguredError):
542
- logger.exception(
543
- f"Stopped because of credentials problems with provider {self.provider}"
544
- )
545
- raise
585
+ except NotAvailableError as e:
586
+ logger.info(e)
587
+ continue
546
588
 
547
- except (RuntimeError, Exception):
548
- import traceback as tb
589
+ except (AuthenticationError, MisconfiguredError):
590
+ logger.exception(
591
+ f"Stopped because of credentials problems with provider {self.provider}"
592
+ )
593
+ raise
549
594
 
550
- logger.error(
551
- f"A problem occurred during download of product: {product}. "
552
- "Skipping it"
553
- )
554
- logger.debug(f"\n{tb.format_exc()}")
595
+ except (RuntimeError, Exception):
596
+ import traceback as tb
597
+
598
+ logger.error(
599
+ f"A problem occurred during download of product: {product}. "
600
+ "Skipping it"
601
+ )
602
+ logger.debug(f"\n{tb.format_exc()}")
555
603
 
556
- # product skipped, to not retry it
557
- products.remove(product)
604
+ # product skipped, to not retry it
605
+ products.remove(product)
558
606
 
559
607
  if (
560
608
  len(products) > 0
@@ -567,6 +615,7 @@ class Download(PluginTopic):
567
615
  f"[Retry #{retry_count}, {nb_products - len(products)}/{nb_products} D/L] "
568
616
  f"Waiting {wait_seconds}s until next download try (retry every {wait}' for {timeout}')"
569
617
  )
618
+
570
619
  logger.info(info_message)
571
620
  nb_info.display_html(info_message)
572
621
  sleep(wait_seconds + 1)
@@ -579,6 +628,9 @@ class Download(PluginTopic):
579
628
  elif len(products) == 0:
580
629
  break
581
630
 
631
+ # Shutdown executor at the end
632
+ executor.shutdown(wait=True)
633
+
582
634
  return paths
583
635
 
584
636
  def _order_download_retry(
@@ -641,8 +693,8 @@ class Download(PluginTopic):
641
693
  )
642
694
  logger.info(not_available_info)
643
695
  # Retry-After info from Response header
644
- if hasattr(self, "stream"):
645
- retry_server_info = self.stream.headers.get(
696
+ if hasattr(product, "_stream"):
697
+ retry_server_info = product._stream.headers.get(
646
698
  "Retry-After", ""
647
699
  )
648
700
  if retry_server_info:
@@ -663,8 +715,8 @@ class Download(PluginTopic):
663
715
  )
664
716
  logger.info(not_available_info)
665
717
  # Retry-After info from Response header
666
- if hasattr(self, "stream"):
667
- retry_server_info = self.stream.headers.get(
718
+ if hasattr(product, "_stream"):
719
+ retry_server_info = product._stream.headers.get(
668
720
  "Retry-After", ""
669
721
  )
670
722
  if retry_server_info:
@@ -690,3 +742,27 @@ class Download(PluginTopic):
690
742
  return download_and_retry
691
743
 
692
744
  return decorator
745
+
746
+ def _config_executor(
747
+ self, executor: ThreadPoolExecutor, thread_name_prefix: Optional[str] = None
748
+ ) -> None:
749
+ """
750
+ Configure a ThreadPoolExecutor instance.
751
+
752
+ This method ensures that a ThreadPoolExecutor is correctly set for downloads by adjusting its
753
+ maximum number of workers if necessary. It also configures the thread name prefix to identify
754
+ threads created by the executor, which is useful for distinguishing between executors created
755
+ for parallel product downloads versus those created for other purposes.
756
+
757
+ :param executor: A ThreadPoolExecutor instance.
758
+ :param thread_name_prefix: (optional) A prefix for naming threads created by the executor.
759
+ When provided, threads will be named using this prefix to help
760
+ identify the executor's purpose (e.g., "eodag-download-all").
761
+ """
762
+ if (
763
+ max_workers := getattr(self.config, "max_workers", executor._max_workers)
764
+ ) < executor._max_workers:
765
+ executor._max_workers = max_workers
766
+
767
+ if thread_name_prefix:
768
+ executor._thread_name_prefix = "eodag-download-all"
@@ -41,6 +41,7 @@ from urllib.parse import parse_qs, urlparse
41
41
 
42
42
  import geojson
43
43
  import requests
44
+ from concurrent.futures import ThreadPoolExecutor, as_completed
44
45
  from lxml import etree
45
46
  from requests import RequestException
46
47
  from requests.auth import AuthBase
@@ -90,10 +91,9 @@ if TYPE_CHECKING:
90
91
  from requests import Response
91
92
 
92
93
  from eodag.api.product import Asset, EOProduct # type: ignore
93
- from eodag.api.search_result import SearchResult
94
94
  from eodag.config import PluginConfig
95
95
  from eodag.types.download_args import DownloadConf
96
- from eodag.utils import DownloadedCallback, Unpack
96
+ from eodag.utils import Unpack
97
97
 
98
98
  logger = logging.getLogger("eodag.download.http")
99
99
 
@@ -596,6 +596,7 @@ class HTTPDownload(Download):
596
596
  product: EOProduct,
597
597
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
598
598
  progress_callback: Optional[ProgressCallback] = None,
599
+ executor: Optional[ThreadPoolExecutor] = None,
599
600
  wait: float = DEFAULT_DOWNLOAD_WAIT,
600
601
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
601
602
  **kwargs: Unpack[DownloadConf],
@@ -637,6 +638,7 @@ class HTTPDownload(Download):
637
638
  record_filename,
638
639
  auth,
639
640
  progress_callback,
641
+ executor,
640
642
  **kwargs,
641
643
  )
642
644
  if kwargs.get("asset") is None:
@@ -674,7 +676,7 @@ class HTTPDownload(Download):
674
676
  is_empty = False
675
677
  progress_callback(len(chunk))
676
678
  fhandle.write(chunk)
677
- self.stream.close() # Closing response stream
679
+ product._stream.close() # Closing response stream
678
680
 
679
681
  if is_empty:
680
682
  raise DownloadError(f"product {product.properties['id']} is empty")
@@ -720,7 +722,7 @@ class HTTPDownload(Download):
720
722
  return product_path
721
723
 
722
724
  def _check_stream_size(self, product: EOProduct) -> int:
723
- stream_size = int(self.stream.headers.get("content-length", 0))
725
+ stream_size = int(product._stream.headers.get("content-length", 0))
724
726
  if (
725
727
  stream_size == 0
726
728
  and "order:status" in product.properties
@@ -731,14 +733,14 @@ class HTTPDownload(Download):
731
733
  % (
732
734
  product.properties["title"],
733
735
  product.properties["order:status"],
734
- self.stream.reason,
736
+ product._stream.reason,
735
737
  )
736
738
  )
737
739
  return stream_size
738
740
 
739
741
  def _check_product_filename(self, product: EOProduct) -> str:
740
742
  filename = None
741
- asset_content_disposition = self.stream.headers.get("content-disposition")
743
+ asset_content_disposition = product._stream.headers.get("content-disposition")
742
744
  if asset_content_disposition:
743
745
  filename = cast(
744
746
  Optional[str],
@@ -746,7 +748,7 @@ class HTTPDownload(Download):
746
748
  )
747
749
  if not filename:
748
750
  # default filename extracted from path
749
- filename = str(os.path.basename(self.stream.url))
751
+ filename = str(os.path.basename(product._stream.url))
750
752
  filename_extension = os.path.splitext(filename)[1]
751
753
  if not filename_extension:
752
754
  if content_type := getattr(product, "headers", {}).get("Content-Type"):
@@ -789,15 +791,20 @@ class HTTPDownload(Download):
789
791
  not getattr(self.config, "ignore_assets", False)
790
792
  or kwargs.get("asset") is not None
791
793
  ):
794
+ executor = ThreadPoolExecutor(
795
+ max_workers=getattr(self.config, "max_workers", None)
796
+ )
792
797
  try:
793
798
  assets_values = product.assets.get_values(kwargs.get("asset"))
794
- assets_stream_list = self._stream_download_assets(
795
- product,
796
- auth,
797
- None,
798
- assets_values=assets_values,
799
- **kwargs,
800
- )
799
+ with executor:
800
+ assets_stream_list = self._stream_download_assets(
801
+ product,
802
+ executor,
803
+ auth,
804
+ None,
805
+ assets_values,
806
+ **kwargs,
807
+ )
801
808
 
802
809
  # single asset
803
810
  if len(assets_stream_list) == 1:
@@ -1009,7 +1016,7 @@ class HTTPDownload(Download):
1009
1016
 
1010
1017
  s = requests.Session()
1011
1018
  try:
1012
- self.stream = s.request(
1019
+ product._stream = s.request(
1013
1020
  req_method,
1014
1021
  req_url,
1015
1022
  stream=True,
@@ -1024,7 +1031,7 @@ class HTTPDownload(Download):
1024
1031
  # location is not a valid url -> product is not available yet
1025
1032
  raise NotAvailableError("Product is not available yet")
1026
1033
  try:
1027
- self.stream.raise_for_status()
1034
+ product._stream.raise_for_status()
1028
1035
  except requests.exceptions.Timeout as exc:
1029
1036
  raise TimeOutError(exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT) from exc
1030
1037
  except RequestException as e:
@@ -1036,8 +1043,8 @@ class HTTPDownload(Download):
1036
1043
  # check if product was ordered
1037
1044
 
1038
1045
  if getattr(
1039
- self.stream, "status_code", None
1040
- ) is not None and self.stream.status_code == getattr(
1046
+ product._stream, "status_code", None
1047
+ ) is not None and product._stream.status_code == getattr(
1041
1048
  self.config, "order_status", {}
1042
1049
  ).get(
1043
1050
  "ordered", {}
@@ -1048,7 +1055,7 @@ class HTTPDownload(Download):
1048
1055
  self._process_exception(None, product, ordered_message)
1049
1056
  stream_size = self._check_stream_size(product) or None
1050
1057
 
1051
- product.headers = self.stream.headers
1058
+ product.headers = product._stream.headers
1052
1059
  filename = self._check_product_filename(product)
1053
1060
  content_type = product.headers.get("Content-Type")
1054
1061
  guessed_content_type = (
@@ -1061,11 +1068,12 @@ class HTTPDownload(Download):
1061
1068
  product.size = stream_size
1062
1069
 
1063
1070
  product.filename = filename
1064
- return self.stream.iter_content(chunk_size=64 * 1024)
1071
+ return product._stream.iter_content(chunk_size=64 * 1024)
1065
1072
 
1066
1073
  def _stream_download_assets(
1067
1074
  self,
1068
1075
  product: EOProduct,
1076
+ executor: ThreadPoolExecutor,
1069
1077
  auth: Optional[AuthBase] = None,
1070
1078
  progress_callback: Optional[ProgressCallback] = None,
1071
1079
  assets_values: list[Asset] = [],
@@ -1082,7 +1090,9 @@ class HTTPDownload(Download):
1082
1090
  self.config, "dl_url_params", {}
1083
1091
  )
1084
1092
 
1085
- total_size = self._get_asset_sizes(assets_values, auth, params) or None
1093
+ total_size = (
1094
+ self._get_asset_sizes(assets_values, executor, auth, params) or None
1095
+ )
1086
1096
 
1087
1097
  progress_callback.reset(total=total_size)
1088
1098
 
@@ -1188,11 +1198,6 @@ class HTTPDownload(Download):
1188
1198
 
1189
1199
  # Process each asset
1190
1200
  for asset in assets_values:
1191
- if not asset["href"] or asset["href"].startswith("file:"):
1192
- logger.info(
1193
- f"Local asset detected. Download skipped for {asset['href']}"
1194
- )
1195
- continue
1196
1201
  asset_chunks = get_chunks_generator(asset)
1197
1202
  try:
1198
1203
  # start reading chunks to set assets attributes
@@ -1220,6 +1225,7 @@ class HTTPDownload(Download):
1220
1225
  record_filename: str,
1221
1226
  auth: Optional[AuthBase] = None,
1222
1227
  progress_callback: Optional[ProgressCallback] = None,
1228
+ executor: Optional[ThreadPoolExecutor] = None,
1223
1229
  **kwargs: Unpack[DownloadConf],
1224
1230
  ) -> str:
1225
1231
  """Download product assets if they exist"""
@@ -1227,6 +1233,12 @@ class HTTPDownload(Download):
1227
1233
  logger.info("Progress bar unavailable, please call product.download()")
1228
1234
  progress_callback = ProgressCallback(disable=True)
1229
1235
 
1236
+ # create an executor if not given and anticipate the possible need to shut it down
1237
+ executor, shutdown_executor = (
1238
+ (ThreadPoolExecutor(), True) if executor is None else (executor, False)
1239
+ )
1240
+ self._config_executor(executor)
1241
+
1230
1242
  assets_urls = [
1231
1243
  a["href"] for a in getattr(product, "assets", {}).values() if "href" in a
1232
1244
  ]
@@ -1236,7 +1248,7 @@ class HTTPDownload(Download):
1236
1248
  assets_values = product.assets.get_values(kwargs.get("asset"))
1237
1249
 
1238
1250
  assets_stream_list = self._stream_download_assets(
1239
- product, auth, progress_callback, assets_values=assets_values, **kwargs
1251
+ product, executor, auth, progress_callback, assets_values, **kwargs
1240
1252
  )
1241
1253
 
1242
1254
  # remove existing incomplete file
@@ -1259,15 +1271,14 @@ class HTTPDownload(Download):
1259
1271
  local_assets_count += 1
1260
1272
  continue
1261
1273
 
1262
- for asset_stream in assets_stream_list:
1274
+ def download_asset(asset_stream: StreamResponse) -> None:
1263
1275
  asset_chunks = asset_stream.content
1264
1276
  asset_path = cast(str, asset_stream.arcname)
1265
1277
  asset_abs_path = os.path.join(fs_dir_path, asset_path)
1266
1278
  asset_abs_path_temp = asset_abs_path + "~"
1267
1279
  # create asset subdir if not exist
1268
1280
  asset_abs_path_dir = os.path.dirname(asset_abs_path)
1269
- if not os.path.isdir(asset_abs_path_dir):
1270
- os.makedirs(asset_abs_path_dir)
1281
+ os.makedirs(asset_abs_path_dir, exist_ok=True)
1271
1282
  # remove temporary file
1272
1283
  if os.path.isfile(asset_abs_path_temp):
1273
1284
  os.remove(asset_abs_path_temp)
@@ -1283,6 +1294,27 @@ class HTTPDownload(Download):
1283
1294
  os.path.basename(asset_abs_path),
1284
1295
  )
1285
1296
  os.rename(asset_abs_path_temp, asset_abs_path)
1297
+ return
1298
+
1299
+ # use parallelization if possible
1300
+ # when products are already downloaded in parallel but the executor has only one worker,
1301
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1302
+ if (
1303
+ executor._thread_name_prefix == "eodag-download-all"
1304
+ and executor._max_workers == 1
1305
+ ):
1306
+ for asset_stream in assets_stream_list:
1307
+ download_asset(asset_stream)
1308
+ else:
1309
+ futures = (
1310
+ executor.submit(download_asset, asset_stream)
1311
+ for asset_stream in assets_stream_list
1312
+ )
1313
+ [f.result() for f in as_completed(futures)]
1314
+
1315
+ if shutdown_executor:
1316
+ executor.shutdown(wait=True)
1317
+
1286
1318
  # only one local asset
1287
1319
  if local_assets_count == len(assets_urls) and local_assets_count == 1:
1288
1320
  # remove empty {fs_dir_path}
@@ -1336,6 +1368,7 @@ class HTTPDownload(Download):
1336
1368
  def _get_asset_sizes(
1337
1369
  self,
1338
1370
  assets_values: list[Asset],
1371
+ executor: ThreadPoolExecutor,
1339
1372
  auth: Optional[AuthBase],
1340
1373
  params: Optional[dict[str, str]],
1341
1374
  zipped: bool = False,
@@ -1344,8 +1377,11 @@ class HTTPDownload(Download):
1344
1377
 
1345
1378
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1346
1379
  ssl_verify = getattr(self.config, "ssl_verify", True)
1347
- # loop for assets size & filename
1348
- for asset in assets_values:
1380
+
1381
+ # loop for assets size & filename in parallel
1382
+ def fetch_asset_size(asset: Asset) -> None:
1383
+ nonlocal total_size
1384
+
1349
1385
  if asset["href"] and not asset["href"].startswith("file:"):
1350
1386
  # HEAD request for size & filename
1351
1387
  try:
@@ -1407,27 +1443,20 @@ class HTTPDownload(Download):
1407
1443
  asset.size = int(size_str) if size_str.isdigit() else 0
1408
1444
 
1409
1445
  total_size += asset.size
1410
- return total_size
1411
1446
 
1412
- def download_all(
1413
- self,
1414
- products: SearchResult,
1415
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
1416
- downloaded_callback: Optional[DownloadedCallback] = None,
1417
- progress_callback: Optional[ProgressCallback] = None,
1418
- wait: float = DEFAULT_DOWNLOAD_WAIT,
1419
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1420
- **kwargs: Unpack[DownloadConf],
1421
- ):
1422
- """
1423
- Download all using parent (base plugin) method
1424
- """
1425
- return super(HTTPDownload, self).download_all(
1426
- products,
1427
- auth=auth,
1428
- downloaded_callback=downloaded_callback,
1429
- progress_callback=progress_callback,
1430
- wait=wait,
1431
- timeout=timeout,
1432
- **kwargs,
1433
- )
1447
+ # use parallelization if possible
1448
+ # when products are already downloaded in parallel but the executor has only one worker,
1449
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1450
+ if (
1451
+ executor._thread_name_prefix == "eodag-download-all"
1452
+ and executor._max_workers == 1
1453
+ ):
1454
+ for asset in assets_values:
1455
+ fetch_asset_size(asset)
1456
+ else:
1457
+ futures = (
1458
+ executor.submit(fetch_asset_size, asset) for asset in assets_values
1459
+ )
1460
+ [f.result() for f in as_completed(futures)]
1461
+
1462
+ return total_size
@@ -49,6 +49,7 @@
49
49
  usgs:productId: '$.id'
50
50
  extract: True
51
51
  order_enabled: true
52
+ max_workers: 2
52
53
  products:
53
54
  # datasets list http://kapadia.github.io/usgs/_sources/reference/catalog/ee.txt may be outdated
54
55
  # see also https://dds.cr.usgs.gov/ee-data/coveragemaps/shp/ee/
@@ -2558,6 +2559,7 @@
2558
2559
  extract: true
2559
2560
  order_enabled: false
2560
2561
  archive_depth: 2
2562
+ max_workers: 4
2561
2563
  ssl_verify: true
2562
2564
  auth: !plugin
2563
2565
  type: KeycloakOIDCPasswordAuth
eodag/utils/s3.py CHANGED
@@ -391,7 +391,7 @@ def stream_download_from_s3(
391
391
  compress: Literal["zip", "raw", "auto"] = "auto",
392
392
  zip_filename: str = "archive",
393
393
  range_size: int = 1024**2 * 8,
394
- max_workers: int = 8,
394
+ provider_max_workers: Optional[int] = None,
395
395
  ) -> StreamResponse:
396
396
  """
397
397
  Stream data from one or more S3 objects in chunks, with support for global byte ranges.
@@ -431,7 +431,7 @@ def stream_download_from_s3(
431
431
  Only used when creating ZIP archives.
432
432
  :param range_size: Size of each download chunk in bytes. Larger chunks reduce
433
433
  request overhead but use more memory. Default: 8MB.
434
- :param max_workers: Maximum number of concurrent download threads.
434
+ :param provider_max_workers: (optional) Maximum number of concurrent download threads of the provider used.
435
435
  Higher values improve throughput for multiple ranges.
436
436
  :return: StreamResponse object containing:
437
437
 
@@ -480,7 +480,7 @@ def stream_download_from_s3(
480
480
  pass
481
481
  """
482
482
 
483
- executor = ThreadPoolExecutor(max_workers=max_workers)
483
+ executor = ThreadPoolExecutor(max_workers=provider_max_workers)
484
484
 
485
485
  # Prepare all files
486
486
  offset = 0
@@ -596,7 +596,7 @@ def update_assets_from_s3(
596
596
  def open_s3_zipped_object(
597
597
  bucket_name: str,
598
598
  key_name: str,
599
- s3_client,
599
+ s3_client: S3Client,
600
600
  zip_size: Optional[int] = None,
601
601
  partial: bool = True,
602
602
  ) -> tuple[ZipFile, bytes]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodag
3
- Version: 4.0.0a4
3
+ Version: 4.0.0a5
4
4
  Summary: Earth Observation Data Access Gateway
5
5
  Home-page: https://github.com/CS-SI/eodag
6
6
  Author: CS GROUP - France
@@ -1,16 +1,16 @@
1
1
  eodag/__init__.py,sha256=vS9nq3ZOcdlHxCakeRqCV4nm8q3r7ZKr-p7RQg0gatg,1627
2
- eodag/cli.py,sha256=zTb9oyq8BBJI1Iftfp_8iJF_EHaAPr_c_4SyLnyMcaA,22023
2
+ eodag/cli.py,sha256=QZqnhopBdPasNWdlti7n2VQ8sYylUoWIKT7_P1ev-pw,22340
3
3
  eodag/config.py,sha256=ZdWE4rZCcR8OX5jGv7zKEQ4CgH5bNk_PpNGe8HlhjR4,35054
4
4
  eodag/crunch.py,sha256=fLVAPGVPw31N_DrnFk4gkCpQZLMY8oBhK6NUSYmdr24,1099
5
5
  eodag/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  eodag/api/__init__.py,sha256=ytr30NUVmEtmJTsp3QCwkCIhS1nF6UlFCv0vmySHN7g,735
7
7
  eodag/api/collection.py,sha256=ZEKTB4yKJAIQfjAFkhHZ76Vw-KQ9qPAmpzkZqJ4KGUU,13804
8
- eodag/api/core.py,sha256=c0RWQJgVluTQCopVHZMmiqhusUdZcekDQZILiyyaMmc,102269
8
+ eodag/api/core.py,sha256=KJWj_HRmZpMfrjvzMgLVLO4SwXrirGJfPp9rRHIo6J4,103037
9
9
  eodag/api/provider.py,sha256=ZvkmJNYRCcYMP18X5KLSVhXZTvG1gkltfah1OA4LV-8,37279
10
10
  eodag/api/search_result.py,sha256=LGWIQgbhQF5Lz489vcvalb2VsYtplyzUXGWMqduP0uc,21107
11
11
  eodag/api/product/__init__.py,sha256=6-RXKAw-sZVNU-7KjpmSqLc35dfc0V42wI5GqIzH2QA,2501
12
12
  eodag/api/product/_assets.py,sha256=9bWIe_SYvsQO-q_lQFd7SxhUIWv7feze2-wnXOe8hhs,7570
13
- eodag/api/product/_product.py,sha256=Sxc1VnNWbtivpAdpwxmTjLCFQ8P7JedzFnIRZA25hO4,27475
13
+ eodag/api/product/_product.py,sha256=CdoQG7SuX-lNfd4Ob3M9ET7jdNSovEekS5_Jq0AmIao,28456
14
14
  eodag/api/product/metadata_mapping.py,sha256=OhB5JhyMLwCrXYJbdk2Nep0Kd-lFeiCWLtw1pa3XukU,76821
15
15
  eodag/api/product/drivers/__init__.py,sha256=v85lXHsfrkn59uyMgConb5ZY9acCCx95srdsh_wFIlM,1998
16
16
  eodag/api/product/drivers/base.py,sha256=MvaXicICn9DxXsaJuK34nPM3__TksstZXSVfABcINAc,2776
@@ -22,8 +22,8 @@ eodag/plugins/base.py,sha256=8Us6InkQu59Ya0Ne8se2Q6ZtU85IpCpcQqO9Z2ECDWI,2746
22
22
  eodag/plugins/manager.py,sha256=4annSLfT6N3o-japomvIUCTpDXfMgzfhBkvHVSKbVf0,19748
23
23
  eodag/plugins/apis/__init__.py,sha256=PyY4f7P2iu3MkLPnw5eOrVew2fuavbBL3Asci3Ulwoo,744
24
24
  eodag/plugins/apis/base.py,sha256=PZwAg0uINSEQcHSzTgU9xSAb-WaGMbmDL-jhrmybtLQ,2814
25
- eodag/plugins/apis/ecmwf.py,sha256=6_Zr3u2XHJl6XijNZOWazdtqi8v_b9D4WNET-Eub5qs,11767
26
- eodag/plugins/apis/usgs.py,sha256=V1R13tqPgVJ_kMoEdbbFT-lx5mZ7yYqRtna--wmTXAk,20029
25
+ eodag/plugins/apis/ecmwf.py,sha256=qjELC_rwSY3Q3aZbwmJ8-eBq9FVZDt4uWdjK77eB-34,11075
26
+ eodag/plugins/apis/usgs.py,sha256=e8QFXg3zqFyMx6R3-gChJzWjUNZ65VULLsrOTqKXwLM,19338
27
27
  eodag/plugins/authentication/__init__.py,sha256=_LVw42Bb1IhGAZH5xHRaS4b1iFoF9e27KDZOyoSoJHY,1039
28
28
  eodag/plugins/authentication/aws_auth.py,sha256=A0sr5mVmCkAwCldlGSGFcZqnthusbcB52THthja77cw,12651
29
29
  eodag/plugins/authentication/base.py,sha256=wp-OI0G4DbUFykuehvyh4IJLAJyicyKiIH9k5z3W7Mo,3506
@@ -43,9 +43,9 @@ eodag/plugins/crunch/filter_latest_tpl_name.py,sha256=Aau0RMBRCblVL2kapEaV0SMZXd
43
43
  eodag/plugins/crunch/filter_overlap.py,sha256=tU3h15jyfePOQPgGvowHFm2ZuckEbAo6lgLQNaPiJ2Q,7142
44
44
  eodag/plugins/crunch/filter_property.py,sha256=2BKb7wxw1Yi2NTtnPCBtdZ-caJXxlVUUS2ps4LHXOMI,3187
45
45
  eodag/plugins/download/__init__.py,sha256=zqszaeNgYP0YHlZDkLMf6odcwNw0KrAahGpcA-l0kAw,740
46
- eodag/plugins/download/aws.py,sha256=x0HWR9O5U3kFo7W8qHcZhkCqzWEkSw_XEVIdx1zX7Qg,46884
47
- eodag/plugins/download/base.py,sha256=TVUCDMuDkRo9EKYj5o3zUuOl76VJfQ6GlefD1EFwSTs,30807
48
- eodag/plugins/download/http.py,sha256=DDeHZkVi6E0Zi7YQxRB9z8AENKwBsUdri68mozVI5OI,58608
46
+ eodag/plugins/download/aws.py,sha256=n3C3XZl-wFdCzsSB-OuDem985iFPVCnfAy4SWUjOkj8,48653
47
+ eodag/plugins/download/base.py,sha256=--30euISg80V_STkwRUhg6ocvQnQuSWJwlnh5FMv8ts,34504
48
+ eodag/plugins/download/http.py,sha256=DB46ff8UvXXOykrGef3qXWar5NtXqWjdswdlYvf3Nwk,59792
49
49
  eodag/plugins/search/__init__.py,sha256=z_OD0bIloltQIJ9D0-pLC6o6nT0VmX2PRtMn_nLwWDQ,2174
50
50
  eodag/plugins/search/base.py,sha256=yBmfca1EcR8c8pDD0GLnyZkU1UQ_8RNh1X8xqV-gHRQ,22235
51
51
  eodag/plugins/search/build_search_result.py,sha256=4QZKXEBj5n1EE0BGciLfApSEYxxk-2srlVrq27lL1hg,62934
@@ -59,7 +59,7 @@ eodag/resources/collections.yml,sha256=0JZr5y8V8QlB1MGqGzs5-Pmrk1ZYkEBlS3VYrg22N
59
59
  eodag/resources/ext_collections.json,sha256=_HLFBMx6jSaFG2ZYAbTqSZUr9Rf87tQgLd8Ibad4khc,2587896
60
60
  eodag/resources/ext_product_types.json,sha256=uNhgmppM6ZrxPSWWfR1rANavy1f88stFORElk-WkCxA,2607329
61
61
  eodag/resources/locations_conf_template.yml,sha256=_eBv-QKHYMIKhY0b0kp4Ee33RsayxN8LWH3kDXxfFSk,986
62
- eodag/resources/providers.yml,sha256=2ryHvNIaBpETbAJUQzXAbY08crWxzbBUfTJ1yprVZr0,237882
62
+ eodag/resources/providers.yml,sha256=yBz5S-E5MR_zdp5yTGbkQ6I9Tk9Kax_Pdk0AstggUqM,237920
63
63
  eodag/resources/stac_provider.yml,sha256=RNhFNhv7593VkY2od-LQgUBdIO1WXL1Rko7UyjLaxFY,4648
64
64
  eodag/resources/user_conf_template.yml,sha256=aHSiscSQ3B4Dd18709iQAX7tFrkMufRC1a9akcNVVTs,7541
65
65
  eodag/resources/shp/ne_110m_admin_0_map_units.VERSION.txt,sha256=CHSo_jbv-4d4D0MYRbWn2FvmV_K9mYzo7qznF4YNO3g,7
@@ -84,11 +84,11 @@ eodag/utils/logging.py,sha256=KoMsyS1f6O1hr_SMDOIxvt842mOJgmu_yLUk0-0EKFs,3507
84
84
  eodag/utils/notebook.py,sha256=AUxtayvu26qYf3x3Eu3ujRl1XDgy24EfQaETbqmXSZw,2703
85
85
  eodag/utils/repr.py,sha256=72BIKFq07aU4YrQVJJX-AADdWXAhJqC4LXGmkbCo1kA,5537
86
86
  eodag/utils/requests.py,sha256=avNHKrOZ7Kp6lUA7u4kqupIth9MoirLzDsMrrmQDt4s,4560
87
- eodag/utils/s3.py,sha256=eESanPLVv-Luqo_o1WgUuO7YLqiXg_iEzHZ15fu-ugY,30063
87
+ eodag/utils/s3.py,sha256=Yu-yF0ebIm03XOm94p4SkDi2WzU_ZTv4kVJ3VGBo-Z4,30145
88
88
  eodag/utils/stac_reader.py,sha256=8r6amio5EtwGF9iu9zHaGDz4oUPKKeXRuyTzPNakrO4,9406
89
- eodag-4.0.0a4.dist-info/licenses/LICENSE,sha256=4MAecetnRTQw5DlHtiikDSzKWO1xVLwzM5_DsPMYlnE,10172
90
- eodag-4.0.0a4.dist-info/METADATA,sha256=deJg_rjOM8_P2iTCZWGswMBYbVZmrSZDkAGxRlgn8cA,12739
91
- eodag-4.0.0a4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
92
- eodag-4.0.0a4.dist-info/entry_points.txt,sha256=atMIh-Q4hRsOdw1_778mDIhWFHQJigEo3x-0fMqhqLE,2254
93
- eodag-4.0.0a4.dist-info/top_level.txt,sha256=025IMTmVe5eDjIPP4KEFQKespOPMQdne4U4jOy8nftM,6
94
- eodag-4.0.0a4.dist-info/RECORD,,
89
+ eodag-4.0.0a5.dist-info/licenses/LICENSE,sha256=4MAecetnRTQw5DlHtiikDSzKWO1xVLwzM5_DsPMYlnE,10172
90
+ eodag-4.0.0a5.dist-info/METADATA,sha256=NccWfNN7f57un7W4xCpEBhpt2UWQFYSjNImlVg9uvx8,12739
91
+ eodag-4.0.0a5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
92
+ eodag-4.0.0a5.dist-info/entry_points.txt,sha256=atMIh-Q4hRsOdw1_778mDIhWFHQJigEo3x-0fMqhqLE,2254
93
+ eodag-4.0.0a5.dist-info/top_level.txt,sha256=025IMTmVe5eDjIPP4KEFQKespOPMQdne4U4jOy8nftM,6
94
+ eodag-4.0.0a5.dist-info/RECORD,,