eodag 4.0.0a4__py3-none-any.whl → 4.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. eodag/api/collection.py +65 -1
  2. eodag/api/core.py +65 -19
  3. eodag/api/product/_assets.py +1 -1
  4. eodag/api/product/_product.py +133 -18
  5. eodag/api/product/drivers/__init__.py +3 -1
  6. eodag/api/product/drivers/base.py +3 -1
  7. eodag/api/product/drivers/generic.py +9 -5
  8. eodag/api/product/drivers/sentinel1.py +14 -9
  9. eodag/api/product/drivers/sentinel2.py +14 -7
  10. eodag/api/product/metadata_mapping.py +5 -2
  11. eodag/api/provider.py +1 -0
  12. eodag/api/search_result.py +4 -1
  13. eodag/cli.py +17 -8
  14. eodag/config.py +22 -4
  15. eodag/plugins/apis/ecmwf.py +3 -24
  16. eodag/plugins/apis/usgs.py +3 -24
  17. eodag/plugins/download/aws.py +85 -44
  18. eodag/plugins/download/base.py +117 -41
  19. eodag/plugins/download/http.py +88 -65
  20. eodag/plugins/search/base.py +8 -3
  21. eodag/plugins/search/build_search_result.py +108 -120
  22. eodag/plugins/search/cop_marine.py +3 -1
  23. eodag/plugins/search/qssearch.py +7 -6
  24. eodag/resources/collections.yml +255 -0
  25. eodag/resources/ext_collections.json +1 -1
  26. eodag/resources/ext_product_types.json +1 -1
  27. eodag/resources/providers.yml +62 -25
  28. eodag/resources/user_conf_template.yml +6 -0
  29. eodag/types/__init__.py +22 -16
  30. eodag/types/download_args.py +3 -1
  31. eodag/types/queryables.py +125 -55
  32. eodag/types/stac_extensions.py +408 -0
  33. eodag/types/stac_metadata.py +312 -0
  34. eodag/utils/__init__.py +42 -4
  35. eodag/utils/dates.py +202 -2
  36. eodag/utils/s3.py +4 -4
  37. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/METADATA +7 -13
  38. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/RECORD +42 -40
  39. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/WHEEL +1 -1
  40. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/entry_points.txt +1 -1
  41. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/licenses/LICENSE +0 -0
  42. {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/top_level.txt +0 -0
@@ -38,12 +38,13 @@ class Sentinel1Driver(DatasetDriver):
38
38
  (re.compile(r"grd", re.IGNORECASE), ""),
39
39
  (re.compile(r"slc", re.IGNORECASE), ""),
40
40
  (re.compile(r"ocn", re.IGNORECASE), ""),
41
- (re.compile(r"iw", re.IGNORECASE), ""),
42
- (re.compile(r"ew", re.IGNORECASE), ""),
41
+ (re.compile(r"(?<![A-Za-z])iw(?![A-Za-z])", re.IGNORECASE), ""),
42
+ (re.compile(r"(?<![A-Za-z])ew(?![A-Za-z])", re.IGNORECASE), ""),
43
43
  (re.compile(r"wv", re.IGNORECASE), ""),
44
- (re.compile(r"sm", re.IGNORECASE), ""),
45
- (re.compile(r"raw([-_]s)?", re.IGNORECASE), ""),
44
+ (re.compile(r"(?<![A-Za-z])sm(?![A-Za-z])", re.IGNORECASE), ""),
45
+ (re.compile(r"(?<![A-Za-z])raw([-_]s)?(?![A-Za-z])", re.IGNORECASE), ""),
46
46
  (re.compile(r"[t?0-9]{3,}", re.IGNORECASE), ""),
47
+ (re.compile(r"\b[0-9A-F]{3,}\b", re.IGNORECASE), ""),
47
48
  (re.compile(r"-+"), "-"),
48
49
  (re.compile(r"-+\."), "."),
49
50
  (re.compile(r"_+"), "_"),
@@ -55,34 +56,38 @@ class Sentinel1Driver(DatasetDriver):
55
56
  # data
56
57
  {
57
58
  "pattern": re.compile(
58
- r"^.*?([vh]{2}).*\.(?:jp2|tiff?|dat)$", re.IGNORECASE
59
+ r"^.*?([vh]{2}).*\.(?:jp2|tiff?|dat)(?:\?.*)?$", re.IGNORECASE
59
60
  ),
60
61
  "roles": ["data"],
61
62
  },
62
63
  # metadata
63
64
  {
64
65
  "pattern": re.compile(
65
- r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)$", re.IGNORECASE
66
+ r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)(?:\?.*)?$",
67
+ re.IGNORECASE,
66
68
  ),
67
69
  "roles": ["metadata"],
68
70
  },
69
71
  # thumbnail
70
72
  {
71
73
  "pattern": re.compile(
72
- r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)$", re.IGNORECASE
74
+ r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)(?:\?.*)?$", re.IGNORECASE
73
75
  ),
74
76
  "roles": ["thumbnail"],
75
77
  },
76
78
  # quicklook
77
79
  {
78
80
  "pattern": re.compile(
79
- r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpe?g|\.png)$",
81
+ r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpe?g|\.png)(?:\?.*)?$",
80
82
  re.IGNORECASE,
81
83
  ),
82
84
  "roles": ["overview"],
83
85
  },
84
86
  # default
85
- {"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
87
+ {
88
+ "pattern": re.compile(r"^(?:.*[/\\])?([^/\\?]+)(?:\?.*)?$"),
89
+ "roles": ["auxiliary"],
90
+ },
86
91
  ]
87
92
 
88
93
  def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
@@ -40,47 +40,54 @@ class Sentinel2Driver(DatasetDriver):
40
40
  ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
41
41
  # masks
42
42
  {
43
- "pattern": re.compile(r"^.*?(MSK_[^/\\]+)\.(?:jp2|tiff?)$", re.IGNORECASE),
43
+ "pattern": re.compile(
44
+ r"^.*?(MSK_[^/\\]+)\.(?:jp2|tiff?)(?:\?.*)?$", re.IGNORECASE
45
+ ),
44
46
  "roles": ["data-mask"],
45
47
  },
46
48
  # visual
47
49
  {
48
50
  "pattern": re.compile(
49
- r"^.*?(TCI)(_[0-9]+m)?\.(?:jp2|tiff?)$", re.IGNORECASE
51
+ r"^.*?(TCI)(_[0-9]+m)?\.(?:jp2|tiff?)(?:\?.*)?$", re.IGNORECASE
50
52
  ),
51
53
  "roles": ["visual"],
52
54
  },
53
55
  # bands
54
56
  {
55
57
  "pattern": re.compile(
56
- r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.(?:jp2|tiff?)$", re.IGNORECASE
58
+ r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.(?:jp2|tiff?)(?:\?.*)?$",
59
+ re.IGNORECASE,
57
60
  ),
58
61
  "roles": ["data"],
59
62
  },
60
63
  # metadata
61
64
  {
62
65
  "pattern": re.compile(
63
- r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)$", re.IGNORECASE
66
+ r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)(?:\?.*)?$",
67
+ re.IGNORECASE,
64
68
  ),
65
69
  "roles": ["metadata"],
66
70
  },
67
71
  # thumbnail
68
72
  {
69
73
  "pattern": re.compile(
70
- r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)$", re.IGNORECASE
74
+ r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)(?:\?.*)?$", re.IGNORECASE
71
75
  ),
72
76
  "roles": ["thumbnail"],
73
77
  },
74
78
  # quicklook
75
79
  {
76
80
  "pattern": re.compile(
77
- r"^(?:.*[/\\])?[^/\\]+(-ql|preview|quick-?look)(\.jpe?g|\.png)$",
81
+ r"^(?:.*[/\\])?[^/\\]+(-ql|preview|quick-?look)(\.jpe?g|\.png)(?:\?.*)?$",
78
82
  re.IGNORECASE,
79
83
  ),
80
84
  "roles": ["overview"],
81
85
  },
82
86
  # default
83
- {"pattern": re.compile(r"^(?:.*[/\\])?([^/\\]+)$"), "roles": ["auxiliary"]},
87
+ {
88
+ "pattern": re.compile(r"^(?:.*[/\\])?([^/\\?]+)(?:\?.*)?$"),
89
+ "roles": ["auxiliary"],
90
+ },
84
91
  ]
85
92
 
86
93
  def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
@@ -1731,16 +1731,19 @@ def get_queryable_from_provider(
1731
1731
  mapping_values = [
1732
1732
  v[0] if isinstance(v, list) else "" for v in metadata_mapping.values()
1733
1733
  ]
1734
+ StacQueryables = Queryables.from_stac_models()
1734
1735
  if provider_queryable in mapping_values:
1735
1736
  ind = mapping_values.index(provider_queryable)
1736
- return Queryables.get_queryable_from_alias(list(metadata_mapping.keys())[ind])
1737
+ return StacQueryables.get_queryable_from_alias(
1738
+ list(metadata_mapping.keys())[ind]
1739
+ )
1737
1740
  for param, param_conf in metadata_mapping.items():
1738
1741
  if (
1739
1742
  isinstance(param_conf, list)
1740
1743
  and param_conf[0]
1741
1744
  and re.search(pattern, param_conf[0])
1742
1745
  ):
1743
- return Queryables.get_queryable_from_alias(param)
1746
+ return StacQueryables.get_queryable_from_alias(param)
1744
1747
  return None
1745
1748
 
1746
1749
 
eodag/api/provider.py CHANGED
@@ -451,6 +451,7 @@ class Provider:
451
451
  getattr(
452
452
  self.config, key
453
453
  ).credentials = conf_with_creds.credentials
454
+ return
454
455
 
455
456
  def delete_collection(self, name: str) -> None:
456
457
  """Remove a collection from this provider.
@@ -32,7 +32,7 @@ from eodag.plugins.crunch.filter_latest_intersect import FilterLatestIntersect
32
32
  from eodag.plugins.crunch.filter_latest_tpl_name import FilterLatestByName
33
33
  from eodag.plugins.crunch.filter_overlap import FilterOverlap
34
34
  from eodag.plugins.crunch.filter_property import FilterProperty
35
- from eodag.utils import GENERIC_STAC_PROVIDER, STAC_SEARCH_PLUGINS
35
+ from eodag.utils import GENERIC_STAC_PROVIDER, STAC_SEARCH_PLUGINS, STAC_VERSION
36
36
  from eodag.utils.exceptions import MisconfiguredError
37
37
 
38
38
  if TYPE_CHECKING:
@@ -206,6 +206,9 @@ class SearchResult(UserList[EOProduct]):
206
206
  "eodag:search_params": geojson_search_params or None,
207
207
  "eodag:raise_errors": self.raise_errors,
208
208
  },
209
+ "links": [],
210
+ "stac_extensions": [],
211
+ "stac_version": STAC_VERSION,
209
212
  }
210
213
 
211
214
  def as_shapely_geometry_object(self) -> GeometryCollection:
eodag/cli.py CHANGED
@@ -48,6 +48,7 @@ from typing import TYPE_CHECKING, Any, Callable, Mapping, Optional
48
48
  from urllib.parse import parse_qs
49
49
 
50
50
  import click
51
+ from concurrent.futures import ThreadPoolExecutor
51
52
 
52
53
  from eodag.api.collection import CollectionsList
53
54
  from eodag.api.core import EODataAccessGateway, SearchResult
@@ -122,14 +123,14 @@ def _deprecated_cli(message: str, version: Optional[str] = None) -> Callable[...
122
123
  help="Control the verbosity of the logs. For maximum verbosity, type -vvv",
123
124
  )
124
125
  @click.pass_context
125
- def eodag(ctx: Context, verbose: int) -> None:
126
+ def eodag_cli(ctx: Context, verbose: int) -> None:
126
127
  """Earth Observation Data Access Gateway: work on EO products from any provider"""
127
128
  if ctx.obj is None:
128
129
  ctx.obj = {}
129
130
  ctx.obj["verbosity"] = verbose
130
131
 
131
132
 
132
- @eodag.command(name="version", help="Print eodag version and exit")
133
+ @eodag_cli.command(name="version", help="Print eodag version and exit")
133
134
  def version() -> None:
134
135
  """Print eodag version and exit"""
135
136
  click.echo(
@@ -141,7 +142,7 @@ def version() -> None:
141
142
  )
142
143
 
143
144
 
144
- @eodag.command(
145
+ @eodag_cli.command(
145
146
  name="search",
146
147
  help="Search satellite images by their collections, instruments, constellation, "
147
148
  "platform, processing level or sensor type. It is mandatory to provide "
@@ -406,7 +407,7 @@ def search_crunch(ctx: Context, **kwargs: Any) -> None:
406
407
  ctx.obj["search_results"] = results
407
408
 
408
409
 
409
- @eodag.command(name="list", help="List supported collections")
410
+ @eodag_cli.command(name="list", help="List supported collections")
410
411
  @click.option("-p", "--provider", help="List collections supported by this provider")
411
412
  @click.option(
412
413
  "--instruments", help="List collections originating from these instruments"
@@ -488,7 +489,7 @@ def list_col(ctx: Context, **kwargs: Any) -> None:
488
489
  sys.exit(1)
489
490
 
490
491
 
491
- @eodag.command(name="discover", help="Fetch providers to discover collections")
492
+ @eodag_cli.command(name="discover", help="Fetch providers to discover collections")
492
493
  @click.option("-p", "--provider", help="Fetch only the given provider")
493
494
  @click.option(
494
495
  "--storage",
@@ -519,7 +520,7 @@ def discover_col(ctx: Context, **kwargs: Any) -> None:
519
520
  click.echo("Results stored at '{}'".format(storage_filepath))
520
521
 
521
522
 
522
- @eodag.command(
523
+ @eodag_cli.command(
523
524
  help="""Download a list of products from a serialized search result or STAC items URLs/paths
524
525
 
525
526
  Examples:
@@ -556,6 +557,11 @@ Examples:
556
557
  type=click.Path(dir_okay=True, file_okay=False),
557
558
  help="Products or quicklooks download directory (Default: local temporary directory)",
558
559
  )
560
+ @click.option(
561
+ "--max-workers",
562
+ type=int,
563
+ help="The maximum number of workers to use for downloading products and assets in parallel",
564
+ )
559
565
  @click.pass_context
560
566
  def download(ctx: Context, **kwargs: Any) -> None:
561
567
  """Download a bunch of products from a serialized search result"""
@@ -601,7 +607,10 @@ def download(ctx: Context, **kwargs: Any) -> None:
601
607
 
602
608
  else:
603
609
  # Download products
604
- downloaded_files = satim_api.download_all(search_results, output_dir=output_dir)
610
+ executor = ThreadPoolExecutor(max_workers=kwargs.pop("max_workers"))
611
+ downloaded_files = satim_api.download_all(
612
+ search_results, output_dir=output_dir, executor=executor
613
+ )
605
614
  if downloaded_files and len(downloaded_files) > 0:
606
615
  for downloaded_file in downloaded_files:
607
616
  if downloaded_file is None:
@@ -617,4 +626,4 @@ def download(ctx: Context, **kwargs: Any) -> None:
617
626
 
618
627
 
619
628
  if __name__ == "__main__":
620
- eodag(obj={})
629
+ eodag_cli(obj={})
eodag/config.py CHANGED
@@ -20,7 +20,7 @@ from __future__ import annotations
20
20
  import logging
21
21
  import os
22
22
  from importlib.resources import files as res_files
23
- from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, TypedDict, Union
23
+ from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, Union
24
24
 
25
25
  import orjson
26
26
  import requests
@@ -28,6 +28,7 @@ import yaml
28
28
  import yaml.parser
29
29
  from annotated_types import Gt
30
30
  from jsonpath_ng import JSONPath
31
+ from typing_extensions import TypedDict
31
32
 
32
33
  from eodag.utils import (
33
34
  HTTP_REQ_TIMEOUT,
@@ -635,12 +636,29 @@ class PluginConfig(yaml.YAMLObject):
635
636
  matching_conf = getattr(self, "matching_conf", {})
636
637
  matching_url = getattr(self, "matching_url", None)
637
638
 
638
- if target_matching_conf and sort_dict(target_matching_conf) == sort_dict(
639
- matching_conf
639
+ # both match
640
+ if (
641
+ target_matching_conf
642
+ and sort_dict(target_matching_conf) == sort_dict(matching_conf)
643
+ and target_matching_url
644
+ and target_matching_url == matching_url
640
645
  ):
641
646
  return True
642
647
 
643
- if target_matching_url and target_matching_url == matching_url:
648
+ # conf matches and no matching_url expected
649
+ if (
650
+ target_matching_conf
651
+ and sort_dict(target_matching_conf) == sort_dict(matching_conf)
652
+ and not target_matching_url
653
+ ):
654
+ return True
655
+
656
+ # url matches and no matching_conf expected
657
+ if (
658
+ target_matching_url
659
+ and target_matching_url == matching_url
660
+ and not target_matching_conf
661
+ ):
644
662
  return True
645
663
 
646
664
  return False
@@ -48,6 +48,7 @@ from eodag.utils.logging import get_logging_verbose
48
48
  if TYPE_CHECKING:
49
49
  from typing import Any, Optional, Union
50
50
 
51
+ from concurrent.futures import ThreadPoolExecutor
51
52
  from mypy_boto3_s3 import S3ServiceResource
52
53
  from requests.auth import AuthBase
53
54
 
@@ -55,7 +56,7 @@ if TYPE_CHECKING:
55
56
  from eodag.api.search_result import SearchResult
56
57
  from eodag.config import PluginConfig
57
58
  from eodag.types.download_args import DownloadConf
58
- from eodag.utils import DownloadedCallback, ProgressCallback, Unpack
59
+ from eodag.utils import ProgressCallback, Unpack
59
60
 
60
61
 
61
62
  logger = logging.getLogger("eodag.apis.ecmwf")
@@ -185,6 +186,7 @@ class EcmwfApi(Api, ECMWFSearch):
185
186
  product: EOProduct,
186
187
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
187
188
  progress_callback: Optional[ProgressCallback] = None,
189
+ executor: Optional[ThreadPoolExecutor] = None,
188
190
  wait: float = DEFAULT_DOWNLOAD_WAIT,
189
191
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
190
192
  **kwargs: Unpack[DownloadConf],
@@ -269,29 +271,6 @@ class EcmwfApi(Api, ECMWFSearch):
269
271
  product.location = path_to_uri(product_path)
270
272
  return product_path
271
273
 
272
- def download_all(
273
- self,
274
- products: SearchResult,
275
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
276
- downloaded_callback: Optional[DownloadedCallback] = None,
277
- progress_callback: Optional[ProgressCallback] = None,
278
- wait: float = DEFAULT_DOWNLOAD_WAIT,
279
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
280
- **kwargs: Unpack[DownloadConf],
281
- ) -> list[str]:
282
- """
283
- Download all using parent (base plugin) method
284
- """
285
- return super(EcmwfApi, self).download_all(
286
- products,
287
- auth=auth,
288
- downloaded_callback=downloaded_callback,
289
- progress_callback=progress_callback,
290
- wait=wait,
291
- timeout=timeout,
292
- **kwargs,
293
- )
294
-
295
274
  def clear(self) -> None:
296
275
  """Clear search context"""
297
276
  pass
@@ -57,12 +57,13 @@ from eodag.utils.exceptions import (
57
57
  )
58
58
 
59
59
  if TYPE_CHECKING:
60
+ from concurrent.futures import ThreadPoolExecutor
60
61
  from mypy_boto3_s3 import S3ServiceResource
61
62
  from requests.auth import AuthBase
62
63
 
63
64
  from eodag.config import PluginConfig
64
65
  from eodag.types.download_args import DownloadConf
65
- from eodag.utils import DownloadedCallback, Unpack
66
+ from eodag.utils import Unpack
66
67
 
67
68
  logger = logging.getLogger("eodag.apis.usgs")
68
69
 
@@ -312,6 +313,7 @@ class UsgsApi(Api):
312
313
  product: EOProduct,
313
314
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
314
315
  progress_callback: Optional[ProgressCallback] = None,
316
+ executor: Optional[ThreadPoolExecutor] = None,
315
317
  wait: float = DEFAULT_DOWNLOAD_WAIT,
316
318
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
317
319
  **kwargs: Unpack[DownloadConf],
@@ -477,26 +479,3 @@ class UsgsApi(Api):
477
479
  shutil.move(fs_path, new_fs_path)
478
480
  product.location = path_to_uri(new_fs_path)
479
481
  return new_fs_path
480
-
481
- def download_all(
482
- self,
483
- products: SearchResult,
484
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
485
- downloaded_callback: Optional[DownloadedCallback] = None,
486
- progress_callback: Optional[ProgressCallback] = None,
487
- wait: float = DEFAULT_DOWNLOAD_WAIT,
488
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
489
- **kwargs: Unpack[DownloadConf],
490
- ) -> list[str]:
491
- """
492
- Download all using parent (base plugin) method
493
- """
494
- return super(UsgsApi, self).download_all(
495
- products,
496
- auth=auth,
497
- downloaded_callback=downloaded_callback,
498
- progress_callback=progress_callback,
499
- wait=wait,
500
- timeout=timeout,
501
- **kwargs,
502
- )
@@ -25,7 +25,9 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
25
25
 
26
26
  import boto3
27
27
  import requests
28
+ from boto3.s3.transfer import TransferConfig
28
29
  from botocore.exceptions import ClientError
30
+ from concurrent.futures import ThreadPoolExecutor, as_completed
29
31
  from lxml import etree
30
32
  from requests.auth import AuthBase
31
33
 
@@ -34,7 +36,7 @@ from eodag.api.product.metadata_mapping import (
34
36
  properties_from_json,
35
37
  properties_from_xml,
36
38
  )
37
- from eodag.plugins.authentication.aws_auth import raise_if_auth_error
39
+ from eodag.plugins.authentication.aws_auth import AwsAuth, raise_if_auth_error
38
40
  from eodag.plugins.download.base import Download
39
41
  from eodag.utils import (
40
42
  DEFAULT_DOWNLOAD_TIMEOUT,
@@ -65,10 +67,9 @@ if TYPE_CHECKING:
65
67
  from mypy_boto3_s3.client import S3Client
66
68
 
67
69
  from eodag.api.product import EOProduct
68
- from eodag.api.search_result import SearchResult
69
70
  from eodag.config import PluginConfig
70
71
  from eodag.types.download_args import DownloadConf
71
- from eodag.utils import DownloadedCallback, Unpack
72
+ from eodag.utils import Unpack
72
73
 
73
74
 
74
75
  logger = logging.getLogger("eodag.download.aws")
@@ -227,6 +228,7 @@ class AwsDownload(Download):
227
228
  product: EOProduct,
228
229
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
229
230
  progress_callback: Optional[ProgressCallback] = None,
231
+ executor: Optional[ThreadPoolExecutor] = None,
230
232
  wait: float = DEFAULT_DOWNLOAD_WAIT,
231
233
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
232
234
  **kwargs: Unpack[DownloadConf],
@@ -246,6 +248,7 @@ class AwsDownload(Download):
246
248
  size as inputs and handle progress bar
247
249
  creation and update to give the user a
248
250
  feedback on the download progress
251
+ :param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
249
252
  :param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
250
253
  and `dl_url_params` (dict) can be provided as additional kwargs
251
254
  and will override any other values defined in a configuration
@@ -293,7 +296,7 @@ class AwsDownload(Download):
293
296
  )
294
297
 
295
298
  # authenticate
296
- if product.downloader_auth:
299
+ if product.downloader_auth and isinstance(product.downloader_auth, AwsAuth):
297
300
  authenticated_objects = product.downloader_auth.authenticate_objects(
298
301
  bucket_names_and_prefixes
299
302
  )
@@ -302,9 +305,19 @@ class AwsDownload(Download):
302
305
  "Authentication plugin (AwsAuth) has to be configured if AwsDownload is used"
303
306
  )
304
307
 
308
+ # create an executor if not given and anticipate the possible need to shut it down
309
+ executor, shutdown_executor = (
310
+ (ThreadPoolExecutor(), True) if executor is None else (executor, False)
311
+ )
312
+ self._config_executor(executor)
313
+
305
314
  # files in zip
306
315
  updated_bucket_names_and_prefixes = self._download_file_in_zip(
307
- product, bucket_names_and_prefixes, product_local_path, progress_callback
316
+ product.downloader_auth,
317
+ bucket_names_and_prefixes,
318
+ product_local_path,
319
+ progress_callback,
320
+ executor,
308
321
  )
309
322
  # prevent nothing-to-download errors if download was performed in zip
310
323
  raise_error = (
@@ -329,7 +342,8 @@ class AwsDownload(Download):
329
342
  if len(unique_product_chunks) > 0:
330
343
  progress_callback.reset(total=total_size)
331
344
  try:
332
- for product_chunk in unique_product_chunks:
345
+
346
+ def download_chunk(product_chunk: Any) -> None:
333
347
  try:
334
348
  chunk_rel_path = self.get_chunk_dest_path(
335
349
  product,
@@ -339,11 +353,11 @@ class AwsDownload(Download):
339
353
  except NotAvailableError as e:
340
354
  # out of SAFE format chunk
341
355
  logger.warning(e)
342
- continue
356
+ return
357
+
343
358
  chunk_abs_path = os.path.join(product_local_path, chunk_rel_path)
344
359
  chunk_abs_path_dir = os.path.dirname(chunk_abs_path)
345
- if not os.path.isdir(chunk_abs_path_dir):
346
- os.makedirs(chunk_abs_path_dir)
360
+ os.makedirs(chunk_abs_path_dir, exist_ok=True)
347
361
 
348
362
  bucket_objects = authenticated_objects.get(product_chunk.bucket_name)
349
363
  extra_args = (
@@ -352,12 +366,31 @@ class AwsDownload(Download):
352
366
  else {}
353
367
  )
354
368
  if not os.path.isfile(chunk_abs_path):
369
+ transfer_config = TransferConfig(use_threads=False)
355
370
  product_chunk.Bucket().download_file(
356
371
  product_chunk.key,
357
372
  chunk_abs_path,
358
373
  ExtraArgs=extra_args,
359
374
  Callback=progress_callback,
375
+ Config=transfer_config,
360
376
  )
377
+ return
378
+
379
+ # use parallelization if possible.
380
+ # when products are already downloaded in parallel but the executor has only one worker,
381
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
382
+ if (
383
+ executor._thread_name_prefix == "eodag-download-all"
384
+ and executor._max_workers == 1
385
+ ):
386
+ for product_chunk in unique_product_chunks:
387
+ download_chunk(product_chunk)
388
+ else:
389
+ futures = (
390
+ executor.submit(download_chunk, product_chunk)
391
+ for product_chunk in unique_product_chunks
392
+ )
393
+ [f.result() for f in as_completed(futures)]
361
394
 
362
395
  except AuthenticationError as e:
363
396
  logger.warning("Unexpected error: %s" % e)
@@ -365,6 +398,9 @@ class AwsDownload(Download):
365
398
  raise_if_auth_error(e, self.provider)
366
399
  logger.warning("Unexpected error: %s" % e)
367
400
 
401
+ if shutdown_executor:
402
+ executor.shutdown(wait=True)
403
+
368
404
  # finalize safe product
369
405
  if build_safe and product.collection and "S2_MSI" in product.collection:
370
406
  self.finalize_s2_safe_product(product_local_path)
@@ -386,31 +422,33 @@ class AwsDownload(Download):
386
422
  return product_local_path
387
423
 
388
424
  def _download_file_in_zip(
389
- self, product, bucket_names_and_prefixes, product_local_path, progress_callback
425
+ self,
426
+ downloader_auth: AwsAuth,
427
+ bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
428
+ product_local_path: str,
429
+ progress_callback: ProgressCallback,
430
+ executor: ThreadPoolExecutor,
390
431
  ):
391
432
  """
392
433
  Download file in zip from a prefix like `foo/bar.zip!file.txt`
393
434
  """
394
- if (
395
- not getattr(product, "downloader_auth", None)
396
- or product.downloader_auth.s3_resource is None
397
- ):
435
+ if downloader_auth.s3_resource is None:
398
436
  logger.debug("Cannot check files in s3 zip without s3 resource")
399
437
  return bucket_names_and_prefixes
400
438
 
401
- s3_client = product.downloader_auth.get_s3_client()
439
+ s3_client = downloader_auth.get_s3_client()
402
440
 
403
441
  downloaded = []
404
- for i, pack in enumerate(bucket_names_and_prefixes):
442
+
443
+ def process_zip_file(i: int, pack: tuple[str, Optional[str]]) -> Optional[int]:
405
444
  bucket_name, prefix = pack
406
- if ".zip!" in prefix:
445
+ if prefix is not None and ".zip!" in prefix:
407
446
  splitted_path = prefix.split(".zip!")
408
447
  zip_prefix = f"{splitted_path[0]}.zip"
409
448
  rel_path = splitted_path[-1]
410
449
  dest_file = os.path.join(product_local_path, rel_path)
411
450
  dest_abs_path_dir = os.path.dirname(dest_file)
412
- if not os.path.isdir(dest_abs_path_dir):
413
- os.makedirs(dest_abs_path_dir)
451
+ os.makedirs(dest_abs_path_dir, exist_ok=True)
414
452
 
415
453
  zip_file, _ = open_s3_zipped_object(
416
454
  bucket_name, zip_prefix, s3_client, partial=False
@@ -428,7 +466,30 @@ class AwsDownload(Download):
428
466
  output_file.write(zchunk)
429
467
  progress_callback(len(zchunk))
430
468
 
431
- downloaded.append(i)
469
+ return i
470
+ return None
471
+
472
+ # use parallelization if possible
473
+ # when products are already downloaded in parallel but the executor has only one worker,
474
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
475
+ if (
476
+ executor._thread_name_prefix == "eodag-download-all"
477
+ and executor._max_workers == 1
478
+ ):
479
+ for i, pack in enumerate(bucket_names_and_prefixes):
480
+ result = process_zip_file(i, pack)
481
+ if result is not None:
482
+ downloaded.append(result)
483
+ else:
484
+ futures = (
485
+ executor.submit(process_zip_file, i, pack)
486
+ for i, pack in enumerate(bucket_names_and_prefixes)
487
+ )
488
+
489
+ for future in as_completed(futures):
490
+ result = future.result()
491
+ if result is not None:
492
+ downloaded.append(result)
432
493
 
433
494
  return [
434
495
  pack
@@ -710,7 +771,9 @@ class AwsDownload(Download):
710
771
  ignore_assets,
711
772
  product,
712
773
  )
713
- if auth and isinstance(auth, boto3.resources.base.ServiceResource):
774
+
775
+ # check if auth is a S3 resource by verifying it has the meta.client attribute.
776
+ if auth and hasattr(auth, "meta") and hasattr(auth.meta, "client"):
714
777
  s3_resource = auth
715
778
  else:
716
779
  s3_resource = boto3.resource(
@@ -773,6 +836,7 @@ class AwsDownload(Download):
773
836
  byte_range,
774
837
  compress,
775
838
  zip_filename,
839
+ provider_max_workers=getattr(self.config, "max_workers", None),
776
840
  )
777
841
 
778
842
  def _get_commonpath(
@@ -1112,26 +1176,3 @@ class AwsDownload(Download):
1112
1176
 
1113
1177
  logger.debug(f"Downloading {chunk.key} to {product_path}")
1114
1178
  return product_path
1115
-
1116
- def download_all(
1117
- self,
1118
- products: SearchResult,
1119
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
1120
- downloaded_callback: Optional[DownloadedCallback] = None,
1121
- progress_callback: Optional[ProgressCallback] = None,
1122
- wait: float = DEFAULT_DOWNLOAD_WAIT,
1123
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1124
- **kwargs: Unpack[DownloadConf],
1125
- ) -> list[str]:
1126
- """
1127
- download_all using parent (base plugin) method
1128
- """
1129
- return super(AwsDownload, self).download_all(
1130
- products,
1131
- auth=auth,
1132
- downloaded_callback=downloaded_callback,
1133
- progress_callback=progress_callback,
1134
- wait=wait,
1135
- timeout=timeout,
1136
- **kwargs,
1137
- )