eodag 4.0.0a4__py3-none-any.whl → 4.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. eodag/api/collection.py +65 -1
  2. eodag/api/core.py +65 -19
  3. eodag/api/product/_assets.py +1 -1
  4. eodag/api/product/_product.py +133 -18
  5. eodag/api/product/drivers/__init__.py +3 -1
  6. eodag/api/product/drivers/base.py +3 -1
  7. eodag/api/product/drivers/generic.py +9 -5
  8. eodag/api/product/drivers/sentinel1.py +14 -9
  9. eodag/api/product/drivers/sentinel2.py +14 -7
  10. eodag/api/product/metadata_mapping.py +5 -2
  11. eodag/api/provider.py +1 -0
  12. eodag/api/search_result.py +4 -1
  13. eodag/cli.py +17 -8
  14. eodag/config.py +22 -4
  15. eodag/plugins/apis/ecmwf.py +3 -24
  16. eodag/plugins/apis/usgs.py +3 -24
  17. eodag/plugins/download/aws.py +85 -44
  18. eodag/plugins/download/base.py +117 -41
  19. eodag/plugins/download/http.py +88 -65
  20. eodag/plugins/search/base.py +8 -3
  21. eodag/plugins/search/build_search_result.py +108 -120
  22. eodag/plugins/search/cop_marine.py +3 -1
  23. eodag/plugins/search/qssearch.py +7 -6
  24. eodag/resources/collections.yml +255 -0
  25. eodag/resources/ext_collections.json +1 -1
  26. eodag/resources/ext_product_types.json +1 -1
  27. eodag/resources/providers.yml +62 -25
  28. eodag/resources/user_conf_template.yml +6 -0
  29. eodag/types/__init__.py +22 -16
  30. eodag/types/download_args.py +3 -1
  31. eodag/types/queryables.py +125 -55
  32. eodag/types/stac_extensions.py +408 -0
  33. eodag/types/stac_metadata.py +312 -0
  34. eodag/utils/__init__.py +42 -4
  35. eodag/utils/dates.py +202 -2
  36. eodag/utils/s3.py +4 -4
  37. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/METADATA +7 -13
  38. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/RECORD +42 -40
  39. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/WHEEL +1 -1
  40. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/entry_points.txt +1 -1
  41. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/licenses/LICENSE +0 -0
  42. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,8 @@ from pathlib import Path
29
29
  from time import sleep
30
30
  from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
31
31
 
32
+ from concurrent.futures import ThreadPoolExecutor, as_completed
33
+
32
34
  from eodag.api.product.metadata_mapping import ONLINE_STATUS
33
35
  from eodag.plugins.base import PluginTopic
34
36
  from eodag.utils import (
@@ -105,6 +107,7 @@ class Download(PluginTopic):
105
107
  product: EOProduct,
106
108
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
107
109
  progress_callback: Optional[ProgressCallback] = None,
110
+ executor: Optional[ThreadPoolExecutor] = None,
108
111
  wait: float = DEFAULT_DOWNLOAD_WAIT,
109
112
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
110
113
  **kwargs: Unpack[DownloadConf],
@@ -115,6 +118,7 @@ class Download(PluginTopic):
115
118
  :param product: The EO product to download
116
119
  :param auth: (optional) authenticated object
117
120
  :param progress_callback: (optional) A progress callback
121
+ :param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
118
122
  :param wait: (optional) If download fails, wait time in minutes between two download tries
119
123
  :param timeout: (optional) If download fails, maximum time in minutes before stop retrying
120
124
  to download
@@ -447,6 +451,7 @@ class Download(PluginTopic):
447
451
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
448
452
  downloaded_callback: Optional[DownloadedCallback] = None,
449
453
  progress_callback: Optional[ProgressCallback] = None,
454
+ executor: Optional[ThreadPoolExecutor] = None,
450
455
  wait: float = DEFAULT_DOWNLOAD_WAIT,
451
456
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
452
457
  **kwargs: Unpack[DownloadConf],
@@ -454,7 +459,7 @@ class Download(PluginTopic):
454
459
  """
455
460
  Base download_all method.
456
461
 
457
- This specific implementation uses the :meth:`eodag.plugins.download.base.Download.download` method
462
+ This specific implementation uses the :meth:`~eodag.api.product._product.EOProduct.download` method
458
463
  implemented by the plugin to **sequentially** attempt to download products.
459
464
 
460
465
  :param products: Products to download
@@ -465,6 +470,8 @@ class Download(PluginTopic):
465
470
  its ``__call__`` method. Will be called each time a product
466
471
  finishes downloading
467
472
  :param progress_callback: (optional) A progress callback
473
+ :param executor: (optional) An executor to download products in parallel which may
474
+ be reused to also download assets of these products in parallel.
468
475
  :param wait: (optional) If download fails, wait time in minutes between two download tries
469
476
  :param timeout: (optional) If download fails, maximum time in minutes before stop retrying
470
477
  to download
@@ -485,9 +492,15 @@ class Download(PluginTopic):
485
492
  stop_time = start_time + timedelta(minutes=timeout)
486
493
  nb_products = len(products)
487
494
  retry_count = 0
488
- # another output for notbooks
495
+ # another output for notebooks
489
496
  nb_info = NotebookWidgets()
490
497
 
498
+ # create an executor if not given
499
+ executor = ThreadPoolExecutor() if executor is None else executor
500
+ # set thread name prefix so that the EOProduct download() method can identify
501
+ # whether the executor was created during parallel product downloads or not
502
+ self._config_executor(executor, "eodag-download-all")
503
+
491
504
  for product in products:
492
505
  product.next_try = start_time
493
506
 
@@ -508,53 +521,88 @@ class Download(PluginTopic):
508
521
  progress_callback.unit_scale = False
509
522
  progress_callback.refresh()
510
523
 
524
+ # anticipate nested tasks to download assets in parallel for at least one product
525
+ nested_asset_downloads = any(
526
+ product
527
+ for product in products
528
+ if (
529
+ product.downloader
530
+ and product.downloader.config.type == "AwsDownload"
531
+ or len(product.assets) > 0
532
+ and (
533
+ not getattr(self.config, "ignore_assets", False)
534
+ or kwargs.get("asset") is not None
535
+ )
536
+ )
537
+ )
538
+
511
539
  with progress_callback as bar:
512
540
  while "Loop until all products are download or timeout is reached":
513
- # try downloading each product before retry
514
- for idx, product in enumerate(products):
541
+ # try downloading each product in parallel before retry
542
+
543
+ # Download products in batches to handle nested tasks to download assets in parallel.
544
+ # We avoid having less workers in the executor than the number of products to download in parallel
545
+ # to prevent deadlocks. This could happen by submiting and waiting for a task within a task.
546
+ # We ensure at least one thread is available for these tasks and at least one product is downloaded
547
+ # at a time.
548
+ # If there is only one worker, a specific process at assets download level is used to avoid deadlocks.
549
+ batch_size = len(products)
550
+ if nested_asset_downloads and executor._max_workers <= batch_size:
551
+ batch_size = max(executor._max_workers - 1, 1)
552
+
553
+ products_batch = products[:batch_size]
554
+ futures = {}
555
+
556
+ for idx, product in enumerate(products_batch):
515
557
  if datetime.now() >= product.next_try:
516
558
  products[idx].next_try += timedelta(minutes=wait)
517
- try:
518
- paths.append(
519
- product.download(
520
- progress_callback=product_progress_callback,
521
- wait=wait,
522
- timeout=-1,
523
- **kwargs,
524
- )
525
- )
559
+ future = executor.submit(
560
+ product.download,
561
+ progress_callback=product_progress_callback,
562
+ executor=executor,
563
+ wait=wait,
564
+ timeout=-1,
565
+ **kwargs, # type: ignore
566
+ )
567
+ futures[future] = product
526
568
 
527
- if downloaded_callback:
528
- downloaded_callback(product)
569
+ for future in as_completed(futures.keys()):
570
+ product = futures[future]
571
+ try:
572
+ result = future.result()
573
+ paths.append(result)
529
574
 
530
- # product downloaded, to not retry it
531
- products.remove(product)
532
- bar(1)
575
+ if downloaded_callback:
576
+ downloaded_callback(product)
533
577
 
534
- # reset stop time for next product
535
- stop_time = datetime.now() + timedelta(minutes=timeout)
578
+ # product downloaded, to not retry it
579
+ products.remove(product)
580
+ bar(1)
536
581
 
537
- except NotAvailableError as e:
538
- logger.info(e)
539
- continue
582
+ # reset stop time for next product
583
+ stop_time = datetime.now() + timedelta(minutes=timeout)
540
584
 
541
- except (AuthenticationError, MisconfiguredError):
542
- logger.exception(
543
- f"Stopped because of credentials problems with provider {self.provider}"
544
- )
545
- raise
585
+ except NotAvailableError as e:
586
+ logger.info(e)
587
+ continue
546
588
 
547
- except (RuntimeError, Exception):
548
- import traceback as tb
589
+ except (AuthenticationError, MisconfiguredError):
590
+ logger.exception(
591
+ f"Stopped because of credentials problems with provider {self.provider}"
592
+ )
593
+ raise
549
594
 
550
- logger.error(
551
- f"A problem occurred during download of product: {product}. "
552
- "Skipping it"
553
- )
554
- logger.debug(f"\n{tb.format_exc()}")
595
+ except (RuntimeError, Exception):
596
+ import traceback as tb
597
+
598
+ logger.error(
599
+ f"A problem occurred during download of product: {product}. "
600
+ "Skipping it"
601
+ )
602
+ logger.debug(f"\n{tb.format_exc()}")
555
603
 
556
- # product skipped, to not retry it
557
- products.remove(product)
604
+ # product skipped, to not retry it
605
+ products.remove(product)
558
606
 
559
607
  if (
560
608
  len(products) > 0
@@ -567,6 +615,7 @@ class Download(PluginTopic):
567
615
  f"[Retry #{retry_count}, {nb_products - len(products)}/{nb_products} D/L] "
568
616
  f"Waiting {wait_seconds}s until next download try (retry every {wait}' for {timeout}')"
569
617
  )
618
+
570
619
  logger.info(info_message)
571
620
  nb_info.display_html(info_message)
572
621
  sleep(wait_seconds + 1)
@@ -579,6 +628,9 @@ class Download(PluginTopic):
579
628
  elif len(products) == 0:
580
629
  break
581
630
 
631
+ # Shutdown executor at the end
632
+ executor.shutdown(wait=True)
633
+
582
634
  return paths
583
635
 
584
636
  def _order_download_retry(
@@ -641,8 +693,8 @@ class Download(PluginTopic):
641
693
  )
642
694
  logger.info(not_available_info)
643
695
  # Retry-After info from Response header
644
- if hasattr(self, "stream"):
645
- retry_server_info = self.stream.headers.get(
696
+ if hasattr(product, "_stream"):
697
+ retry_server_info = product._stream.headers.get(
646
698
  "Retry-After", ""
647
699
  )
648
700
  if retry_server_info:
@@ -663,8 +715,8 @@ class Download(PluginTopic):
663
715
  )
664
716
  logger.info(not_available_info)
665
717
  # Retry-After info from Response header
666
- if hasattr(self, "stream"):
667
- retry_server_info = self.stream.headers.get(
718
+ if hasattr(product, "_stream"):
719
+ retry_server_info = product._stream.headers.get(
668
720
  "Retry-After", ""
669
721
  )
670
722
  if retry_server_info:
@@ -690,3 +742,27 @@ class Download(PluginTopic):
690
742
  return download_and_retry
691
743
 
692
744
  return decorator
745
+
746
+ def _config_executor(
747
+ self, executor: ThreadPoolExecutor, thread_name_prefix: Optional[str] = None
748
+ ) -> None:
749
+ """
750
+ Configure a ThreadPoolExecutor instance.
751
+
752
+ This method ensures that a ThreadPoolExecutor is correctly set for downloads by adjusting its
753
+ maximum number of workers if necessary. It also configures the thread name prefix to identify
754
+ threads created by the executor, which is useful for distinguishing between executors created
755
+ for parallel product downloads versus those created for other purposes.
756
+
757
+ :param executor: A ThreadPoolExecutor instance.
758
+ :param thread_name_prefix: (optional) A prefix for naming threads created by the executor.
759
+ When provided, threads will be named using this prefix to help
760
+ identify the executor's purpose (e.g., "eodag-download-all").
761
+ """
762
+ if (
763
+ max_workers := getattr(self.config, "max_workers", executor._max_workers)
764
+ ) < executor._max_workers:
765
+ executor._max_workers = max_workers
766
+
767
+ if thread_name_prefix:
768
+ executor._thread_name_prefix = "eodag-download-all"
@@ -27,24 +27,17 @@ from email.message import Message
27
27
  from itertools import chain
28
28
  from json import JSONDecodeError
29
29
  from pathlib import Path
30
- from typing import (
31
- TYPE_CHECKING,
32
- Any,
33
- Iterator,
34
- Literal,
35
- Optional,
36
- TypedDict,
37
- Union,
38
- cast,
39
- )
30
+ from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional, Union, cast
40
31
  from urllib.parse import parse_qs, urlparse
41
32
 
42
33
  import geojson
43
34
  import requests
35
+ from concurrent.futures import ThreadPoolExecutor, as_completed
44
36
  from lxml import etree
45
37
  from requests import RequestException
46
38
  from requests.auth import AuthBase
47
39
  from requests.structures import CaseInsensitiveDict
40
+ from typing_extensions import TypedDict
48
41
  from zipstream import ZipStream
49
42
 
50
43
  from eodag.api.product.metadata_mapping import (
@@ -90,10 +83,9 @@ if TYPE_CHECKING:
90
83
  from requests import Response
91
84
 
92
85
  from eodag.api.product import Asset, EOProduct # type: ignore
93
- from eodag.api.search_result import SearchResult
94
86
  from eodag.config import PluginConfig
95
87
  from eodag.types.download_args import DownloadConf
96
- from eodag.utils import DownloadedCallback, Unpack
88
+ from eodag.utils import Unpack
97
89
 
98
90
  logger = logging.getLogger("eodag.download.http")
99
91
 
@@ -476,6 +468,8 @@ class HTTPDownload(Download):
476
468
  if (
477
469
  success_status and success_status != status_dict.get("eodag:order_status")
478
470
  ) or (success_code and success_code != response.status_code):
471
+ # Remove the download link if the order has not been completed or was not successful
472
+ product.properties.pop("eodag:download_link", None)
479
473
  return None
480
474
 
481
475
  product.properties["order:status"] = ONLINE_STATUS
@@ -596,6 +590,7 @@ class HTTPDownload(Download):
596
590
  product: EOProduct,
597
591
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
598
592
  progress_callback: Optional[ProgressCallback] = None,
593
+ executor: Optional[ThreadPoolExecutor] = None,
599
594
  wait: float = DEFAULT_DOWNLOAD_WAIT,
600
595
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
601
596
  **kwargs: Unpack[DownloadConf],
@@ -637,6 +632,7 @@ class HTTPDownload(Download):
637
632
  record_filename,
638
633
  auth,
639
634
  progress_callback,
635
+ executor,
640
636
  **kwargs,
641
637
  )
642
638
  if kwargs.get("asset") is None:
@@ -674,7 +670,7 @@ class HTTPDownload(Download):
674
670
  is_empty = False
675
671
  progress_callback(len(chunk))
676
672
  fhandle.write(chunk)
677
- self.stream.close() # Closing response stream
673
+ product._stream.close() # Closing response stream
678
674
 
679
675
  if is_empty:
680
676
  raise DownloadError(f"product {product.properties['id']} is empty")
@@ -720,7 +716,7 @@ class HTTPDownload(Download):
720
716
  return product_path
721
717
 
722
718
  def _check_stream_size(self, product: EOProduct) -> int:
723
- stream_size = int(self.stream.headers.get("content-length", 0))
719
+ stream_size = int(product._stream.headers.get("content-length", 0))
724
720
  if (
725
721
  stream_size == 0
726
722
  and "order:status" in product.properties
@@ -731,14 +727,14 @@ class HTTPDownload(Download):
731
727
  % (
732
728
  product.properties["title"],
733
729
  product.properties["order:status"],
734
- self.stream.reason,
730
+ product._stream.reason,
735
731
  )
736
732
  )
737
733
  return stream_size
738
734
 
739
735
  def _check_product_filename(self, product: EOProduct) -> str:
740
736
  filename = None
741
- asset_content_disposition = self.stream.headers.get("content-disposition")
737
+ asset_content_disposition = product._stream.headers.get("content-disposition")
742
738
  if asset_content_disposition:
743
739
  filename = cast(
744
740
  Optional[str],
@@ -746,7 +742,7 @@ class HTTPDownload(Download):
746
742
  )
747
743
  if not filename:
748
744
  # default filename extracted from path
749
- filename = str(os.path.basename(self.stream.url))
745
+ filename = str(os.path.basename(product._stream.url))
750
746
  filename_extension = os.path.splitext(filename)[1]
751
747
  if not filename_extension:
752
748
  if content_type := getattr(product, "headers", {}).get("Content-Type"):
@@ -789,15 +785,20 @@ class HTTPDownload(Download):
789
785
  not getattr(self.config, "ignore_assets", False)
790
786
  or kwargs.get("asset") is not None
791
787
  ):
788
+ executor = ThreadPoolExecutor(
789
+ max_workers=getattr(self.config, "max_workers", None)
790
+ )
792
791
  try:
793
792
  assets_values = product.assets.get_values(kwargs.get("asset"))
794
- assets_stream_list = self._stream_download_assets(
795
- product,
796
- auth,
797
- None,
798
- assets_values=assets_values,
799
- **kwargs,
800
- )
793
+ with executor:
794
+ assets_stream_list = self._stream_download_assets(
795
+ product,
796
+ executor,
797
+ auth,
798
+ None,
799
+ assets_values,
800
+ **kwargs,
801
+ )
801
802
 
802
803
  # single asset
803
804
  if len(assets_stream_list) == 1:
@@ -1009,7 +1010,7 @@ class HTTPDownload(Download):
1009
1010
 
1010
1011
  s = requests.Session()
1011
1012
  try:
1012
- self.stream = s.request(
1013
+ product._stream = s.request(
1013
1014
  req_method,
1014
1015
  req_url,
1015
1016
  stream=True,
@@ -1024,7 +1025,7 @@ class HTTPDownload(Download):
1024
1025
  # location is not a valid url -> product is not available yet
1025
1026
  raise NotAvailableError("Product is not available yet")
1026
1027
  try:
1027
- self.stream.raise_for_status()
1028
+ product._stream.raise_for_status()
1028
1029
  except requests.exceptions.Timeout as exc:
1029
1030
  raise TimeOutError(exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT) from exc
1030
1031
  except RequestException as e:
@@ -1036,8 +1037,8 @@ class HTTPDownload(Download):
1036
1037
  # check if product was ordered
1037
1038
 
1038
1039
  if getattr(
1039
- self.stream, "status_code", None
1040
- ) is not None and self.stream.status_code == getattr(
1040
+ product._stream, "status_code", None
1041
+ ) is not None and product._stream.status_code == getattr(
1041
1042
  self.config, "order_status", {}
1042
1043
  ).get(
1043
1044
  "ordered", {}
@@ -1048,7 +1049,7 @@ class HTTPDownload(Download):
1048
1049
  self._process_exception(None, product, ordered_message)
1049
1050
  stream_size = self._check_stream_size(product) or None
1050
1051
 
1051
- product.headers = self.stream.headers
1052
+ product.headers = product._stream.headers
1052
1053
  filename = self._check_product_filename(product)
1053
1054
  content_type = product.headers.get("Content-Type")
1054
1055
  guessed_content_type = (
@@ -1061,11 +1062,12 @@ class HTTPDownload(Download):
1061
1062
  product.size = stream_size
1062
1063
 
1063
1064
  product.filename = filename
1064
- return self.stream.iter_content(chunk_size=64 * 1024)
1065
+ return product._stream.iter_content(chunk_size=64 * 1024)
1065
1066
 
1066
1067
  def _stream_download_assets(
1067
1068
  self,
1068
1069
  product: EOProduct,
1070
+ executor: ThreadPoolExecutor,
1069
1071
  auth: Optional[AuthBase] = None,
1070
1072
  progress_callback: Optional[ProgressCallback] = None,
1071
1073
  assets_values: list[Asset] = [],
@@ -1082,7 +1084,9 @@ class HTTPDownload(Download):
1082
1084
  self.config, "dl_url_params", {}
1083
1085
  )
1084
1086
 
1085
- total_size = self._get_asset_sizes(assets_values, auth, params) or None
1087
+ total_size = (
1088
+ self._get_asset_sizes(assets_values, executor, auth, params) or None
1089
+ )
1086
1090
 
1087
1091
  progress_callback.reset(total=total_size)
1088
1092
 
@@ -1188,11 +1192,6 @@ class HTTPDownload(Download):
1188
1192
 
1189
1193
  # Process each asset
1190
1194
  for asset in assets_values:
1191
- if not asset["href"] or asset["href"].startswith("file:"):
1192
- logger.info(
1193
- f"Local asset detected. Download skipped for {asset['href']}"
1194
- )
1195
- continue
1196
1195
  asset_chunks = get_chunks_generator(asset)
1197
1196
  try:
1198
1197
  # start reading chunks to set assets attributes
@@ -1220,6 +1219,7 @@ class HTTPDownload(Download):
1220
1219
  record_filename: str,
1221
1220
  auth: Optional[AuthBase] = None,
1222
1221
  progress_callback: Optional[ProgressCallback] = None,
1222
+ executor: Optional[ThreadPoolExecutor] = None,
1223
1223
  **kwargs: Unpack[DownloadConf],
1224
1224
  ) -> str:
1225
1225
  """Download product assets if they exist"""
@@ -1227,6 +1227,12 @@ class HTTPDownload(Download):
1227
1227
  logger.info("Progress bar unavailable, please call product.download()")
1228
1228
  progress_callback = ProgressCallback(disable=True)
1229
1229
 
1230
+ # create an executor if not given and anticipate the possible need to shut it down
1231
+ executor, shutdown_executor = (
1232
+ (ThreadPoolExecutor(), True) if executor is None else (executor, False)
1233
+ )
1234
+ self._config_executor(executor)
1235
+
1230
1236
  assets_urls = [
1231
1237
  a["href"] for a in getattr(product, "assets", {}).values() if "href" in a
1232
1238
  ]
@@ -1236,7 +1242,7 @@ class HTTPDownload(Download):
1236
1242
  assets_values = product.assets.get_values(kwargs.get("asset"))
1237
1243
 
1238
1244
  assets_stream_list = self._stream_download_assets(
1239
- product, auth, progress_callback, assets_values=assets_values, **kwargs
1245
+ product, executor, auth, progress_callback, assets_values, **kwargs
1240
1246
  )
1241
1247
 
1242
1248
  # remove existing incomplete file
@@ -1259,15 +1265,14 @@ class HTTPDownload(Download):
1259
1265
  local_assets_count += 1
1260
1266
  continue
1261
1267
 
1262
- for asset_stream in assets_stream_list:
1268
+ def download_asset(asset_stream: StreamResponse) -> None:
1263
1269
  asset_chunks = asset_stream.content
1264
1270
  asset_path = cast(str, asset_stream.arcname)
1265
1271
  asset_abs_path = os.path.join(fs_dir_path, asset_path)
1266
1272
  asset_abs_path_temp = asset_abs_path + "~"
1267
1273
  # create asset subdir if not exist
1268
1274
  asset_abs_path_dir = os.path.dirname(asset_abs_path)
1269
- if not os.path.isdir(asset_abs_path_dir):
1270
- os.makedirs(asset_abs_path_dir)
1275
+ os.makedirs(asset_abs_path_dir, exist_ok=True)
1271
1276
  # remove temporary file
1272
1277
  if os.path.isfile(asset_abs_path_temp):
1273
1278
  os.remove(asset_abs_path_temp)
@@ -1283,6 +1288,27 @@ class HTTPDownload(Download):
1283
1288
  os.path.basename(asset_abs_path),
1284
1289
  )
1285
1290
  os.rename(asset_abs_path_temp, asset_abs_path)
1291
+ return
1292
+
1293
+ # use parallelization if possible
1294
+ # when products are already downloaded in parallel but the executor has only one worker,
1295
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1296
+ if (
1297
+ executor._thread_name_prefix == "eodag-download-all"
1298
+ and executor._max_workers == 1
1299
+ ):
1300
+ for asset_stream in assets_stream_list:
1301
+ download_asset(asset_stream)
1302
+ else:
1303
+ futures = (
1304
+ executor.submit(download_asset, asset_stream)
1305
+ for asset_stream in assets_stream_list
1306
+ )
1307
+ [f.result() for f in as_completed(futures)]
1308
+
1309
+ if shutdown_executor:
1310
+ executor.shutdown(wait=True)
1311
+
1286
1312
  # only one local asset
1287
1313
  if local_assets_count == len(assets_urls) and local_assets_count == 1:
1288
1314
  # remove empty {fs_dir_path}
@@ -1336,6 +1362,7 @@ class HTTPDownload(Download):
1336
1362
  def _get_asset_sizes(
1337
1363
  self,
1338
1364
  assets_values: list[Asset],
1365
+ executor: ThreadPoolExecutor,
1339
1366
  auth: Optional[AuthBase],
1340
1367
  params: Optional[dict[str, str]],
1341
1368
  zipped: bool = False,
@@ -1344,8 +1371,11 @@ class HTTPDownload(Download):
1344
1371
 
1345
1372
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1346
1373
  ssl_verify = getattr(self.config, "ssl_verify", True)
1347
- # loop for assets size & filename
1348
- for asset in assets_values:
1374
+
1375
+ # loop for assets size & filename in parallel
1376
+ def fetch_asset_size(asset: Asset) -> None:
1377
+ nonlocal total_size
1378
+
1349
1379
  if asset["href"] and not asset["href"].startswith("file:"):
1350
1380
  # HEAD request for size & filename
1351
1381
  try:
@@ -1407,27 +1437,20 @@ class HTTPDownload(Download):
1407
1437
  asset.size = int(size_str) if size_str.isdigit() else 0
1408
1438
 
1409
1439
  total_size += asset.size
1410
- return total_size
1411
1440
 
1412
- def download_all(
1413
- self,
1414
- products: SearchResult,
1415
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
1416
- downloaded_callback: Optional[DownloadedCallback] = None,
1417
- progress_callback: Optional[ProgressCallback] = None,
1418
- wait: float = DEFAULT_DOWNLOAD_WAIT,
1419
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1420
- **kwargs: Unpack[DownloadConf],
1421
- ):
1422
- """
1423
- Download all using parent (base plugin) method
1424
- """
1425
- return super(HTTPDownload, self).download_all(
1426
- products,
1427
- auth=auth,
1428
- downloaded_callback=downloaded_callback,
1429
- progress_callback=progress_callback,
1430
- wait=wait,
1431
- timeout=timeout,
1432
- **kwargs,
1433
- )
1441
+ # use parallelization if possible
1442
+ # when products are already downloaded in parallel but the executor has only one worker,
1443
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1444
+ if (
1445
+ executor._thread_name_prefix == "eodag-download-all"
1446
+ and executor._max_workers == 1
1447
+ ):
1448
+ for asset in assets_values:
1449
+ fetch_asset_size(asset)
1450
+ else:
1451
+ futures = (
1452
+ executor.submit(fetch_asset_size, asset) for asset in assets_values
1453
+ )
1454
+ [f.result() for f in as_completed(futures)]
1455
+
1456
+ return total_size
@@ -35,6 +35,7 @@ from eodag.plugins.search import PreparedSearch
35
35
  from eodag.types import model_fields_to_annotated
36
36
  from eodag.types.queryables import Queryables, QueryablesDict
37
37
  from eodag.types.search_args import SortByList
38
+ from eodag.types.stac_metadata import CommonStacMetadata, create_stac_metadata_model
38
39
  from eodag.utils import (
39
40
  GENERIC_COLLECTION,
40
41
  copy_deepcopy,
@@ -358,7 +359,7 @@ class Search(PluginTopic):
358
359
  queryables = self.discover_queryables(**{**default_values, **filters}) or {}
359
360
  except NotImplementedError as e:
360
361
  if str(e):
361
- logger.debug(str(e))
362
+ logger.debug("%s, configured metadata-mapping used", str(e))
362
363
  queryables = self.queryables_from_metadata_mapping(collection, alias)
363
364
 
364
365
  return QueryablesDict(**queryables)
@@ -408,9 +409,10 @@ class Search(PluginTopic):
408
409
  col_queryables = self._get_collection_queryables(col, None, filters)
409
410
  all_queryables.update(col_queryables)
410
411
  # reset defaults because they may vary between collections
412
+ queryables_fields = Queryables.from_stac_models().model_fields
411
413
  for k, v in all_queryables.items():
412
414
  v.__metadata__[0].default = getattr(
413
- Queryables.model_fields.get(k, Field(None)), "default", None
415
+ queryables_fields.get(k, Field(None)), "default", None
414
416
  )
415
417
  return QueryablesDict(
416
418
  additional_properties=auto_discovery,
@@ -468,8 +470,11 @@ class Search(PluginTopic):
468
470
  ):
469
471
  del metadata_mapping[param]
470
472
 
473
+ queryables_model = create_stac_metadata_model(
474
+ base_models=[Queryables, CommonStacMetadata]
475
+ )
471
476
  eodag_queryables = copy_deepcopy(
472
- model_fields_to_annotated(Queryables.model_fields)
477
+ model_fields_to_annotated(queryables_model.model_fields)
473
478
  )
474
479
  queryables["collection"] = eodag_queryables.pop("collection")
475
480
  # add default value for collection