eodag 3.8.1__py3-none-any.whl → 3.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. eodag/api/core.py +1 -1
  2. eodag/api/product/drivers/generic.py +5 -1
  3. eodag/api/product/metadata_mapping.py +109 -8
  4. eodag/cli.py +36 -4
  5. eodag/config.py +5 -2
  6. eodag/plugins/apis/ecmwf.py +3 -1
  7. eodag/plugins/apis/usgs.py +2 -1
  8. eodag/plugins/authentication/aws_auth.py +228 -37
  9. eodag/plugins/authentication/base.py +12 -2
  10. eodag/plugins/authentication/oauth.py +5 -0
  11. eodag/plugins/base.py +3 -2
  12. eodag/plugins/download/aws.py +44 -285
  13. eodag/plugins/download/base.py +3 -2
  14. eodag/plugins/download/creodias_s3.py +1 -38
  15. eodag/plugins/download/http.py +111 -103
  16. eodag/plugins/download/s3rest.py +3 -1
  17. eodag/plugins/manager.py +2 -1
  18. eodag/plugins/search/__init__.py +2 -1
  19. eodag/plugins/search/base.py +2 -1
  20. eodag/plugins/search/build_search_result.py +2 -2
  21. eodag/plugins/search/creodias_s3.py +9 -1
  22. eodag/plugins/search/qssearch.py +3 -1
  23. eodag/resources/ext_product_types.json +1 -1
  24. eodag/resources/product_types.yml +220 -30
  25. eodag/resources/providers.yml +633 -88
  26. eodag/resources/stac_provider.yml +5 -2
  27. eodag/resources/user_conf_template.yml +0 -5
  28. eodag/rest/core.py +8 -0
  29. eodag/rest/errors.py +9 -0
  30. eodag/rest/server.py +8 -0
  31. eodag/rest/stac.py +8 -0
  32. eodag/rest/utils/__init__.py +2 -4
  33. eodag/rest/utils/rfc3339.py +1 -1
  34. eodag/utils/__init__.py +69 -54
  35. eodag/utils/dates.py +204 -0
  36. eodag/utils/s3.py +187 -168
  37. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/METADATA +4 -3
  38. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/RECORD +42 -42
  39. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/entry_points.txt +1 -1
  40. eodag/utils/rest.py +0 -100
  41. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/WHEEL +0 -0
  42. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/licenses/LICENSE +0 -0
  43. {eodag-3.8.1.dist-info → eodag-3.9.0.dist-info}/top_level.txt +0 -0
eodag/api/core.py CHANGED
@@ -74,6 +74,7 @@ from eodag.utils import (
74
74
  string_to_jsonpath,
75
75
  uri_to_path,
76
76
  )
77
+ from eodag.utils.dates import rfc3339_str_to_datetime
77
78
  from eodag.utils.env import is_env_var_true
78
79
  from eodag.utils.exceptions import (
79
80
  AuthenticationError,
@@ -84,7 +85,6 @@ from eodag.utils.exceptions import (
84
85
  UnsupportedProvider,
85
86
  )
86
87
  from eodag.utils.free_text_search import compile_free_text_query
87
- from eodag.utils.rest import rfc3339_str_to_datetime
88
88
  from eodag.utils.stac_reader import fetch_stac_items
89
89
 
90
90
  if TYPE_CHECKING:
@@ -33,7 +33,11 @@ class GenericDriver(DatasetDriver):
33
33
  # data
34
34
  {
35
35
  "pattern": re.compile(
36
- r"^(?:.*[/\\])?([^/\\]+)(\.jp2|\.tiff?|\.dat|\.nc|\.grib2?)$",
36
+ (
37
+ r"^(?:.*[/\\])?([^/\\]+)"
38
+ r"(\.jp2|\.tiff?|\.dat|\.nc|\.grib2?|"
39
+ r"\.zarr|\.nat|\.covjson|\.parquet|\.zip|\.tar|\.gz)$"
40
+ ),
37
41
  re.IGNORECASE,
38
42
  ),
39
43
  "roles": ["data"],
@@ -47,7 +47,6 @@ from eodag.utils import (
47
47
  dict_items_recursive_apply,
48
48
  format_string,
49
49
  get_geometry_from_various,
50
- get_timestamp,
51
50
  items_recursive_apply,
52
51
  nested_pairs2dict,
53
52
  remove_str_array_quotes,
@@ -55,6 +54,7 @@ from eodag.utils import (
55
54
  string_to_jsonpath,
56
55
  update_nested_dict,
57
56
  )
57
+ from eodag.utils.dates import get_timestamp
58
58
  from eodag.utils.exceptions import ValidationError
59
59
 
60
60
  if TYPE_CHECKING:
@@ -174,6 +174,7 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
174
174
  - ``slice_str``: slice a string (equivalent to s[start, end, step])
175
175
  - ``to_lower``: Convert a string to lowercase
176
176
  - ``to_upper``: Convert a string to uppercase
177
+ - ``to_title``: Convert a string to title case
177
178
  - ``fake_l2a_title_from_l1c``: used to generate SAFE format metadata for data from AWS
178
179
  - ``s2msil2a_title_to_aws_productinfo``: used to generate SAFE format metadata for data from AWS
179
180
  - ``split_cop_dem_id``: get the bbox by splitting the product id
@@ -182,6 +183,8 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
182
183
  - ``get_ecmwf_time``: get the time of a datetime string in the ECMWF format
183
184
  - ``sanitize``: sanitize string
184
185
  - ``ceda_collection_name``: generate a CEDA collection name from a string
186
+ - ``convert_dict_filter_and_sub``: filter dict items using jsonpath and then apply recursive_sub_str
187
+ - ``convert_from_alternate``: update assets using given alternate
185
188
 
186
189
  :param search_param: The string to be formatted
187
190
  :param args: (optional) Additional arguments to use in the formatting process
@@ -529,6 +532,35 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
529
532
  old, new = ast.literal_eval(args)
530
533
  return re.sub(old, new, value)
531
534
 
535
+ @staticmethod
536
+ def convert_replace_str_tuple(value: Any, args: str) -> str:
537
+ """
538
+ Apply multiple replacements on a string.
539
+ args should be a string representing a list/tuple of (old, new) pairs.
540
+ Example: '(("old1", "new1"), ("old2", "new2"))'
541
+ """
542
+ if isinstance(value, dict):
543
+ value = MetadataFormatter.convert_to_geojson(value)
544
+ elif not isinstance(value, str):
545
+ raise TypeError(
546
+ f"convert_replace_str_tuple expects a string or a dict (apply to_geojson). "
547
+ f"Got {type(value)}: {value}"
548
+ )
549
+
550
+ # args sera une chaîne représentant une liste/tuple de tuples
551
+ replacements = ast.literal_eval(args)
552
+
553
+ if not isinstance(replacements, (list, tuple)):
554
+ raise TypeError(
555
+ f"convert_replace_str_tuple expects a list/tuple of (old,new) pairs. "
556
+ f"Got {type(replacements)}: {replacements}"
557
+ )
558
+
559
+ for old, new in replacements:
560
+ value = re.sub(old, new, value)
561
+
562
+ return value
563
+
532
564
  @staticmethod
533
565
  def convert_ceda_collection_name(value: str) -> str:
534
566
  data_regex = re.compile(r"/data/(?P<name>.+?)/?$")
@@ -580,6 +612,45 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
580
612
  result[key] = match.value
581
613
  return result
582
614
 
615
+ @staticmethod
616
+ def convert_dict_filter_and_sub(
617
+ input_dict: dict[Any, Any], args: str
618
+ ) -> Union[dict[Any, Any], list[Any]]:
619
+ """Fitlers dict items using jsonpath and then apply recursive_sub_str"""
620
+ jsonpath_filter_str, old, new = ast.literal_eval(args)
621
+ filtered = MetadataFormatter.convert_dict_filter(
622
+ input_dict, jsonpath_filter_str
623
+ )
624
+ args_str = f"('{old}', '{new}')"
625
+ return MetadataFormatter.convert_recursive_sub_str(filtered, args_str)
626
+
627
+ @staticmethod
628
+ def convert_from_alternate(
629
+ input_obj: dict[str, Any], value: str
630
+ ) -> dict[str, Any]:
631
+ """
632
+ Update assets using given alternate.
633
+ """
634
+ result: dict[str, Any] = {}
635
+ for k, v in input_obj.items():
636
+ if not isinstance(v, dict):
637
+ continue
638
+
639
+ alt_dict = deepcopy(v).get("alternate")
640
+ if not isinstance(alt_dict, dict):
641
+ continue
642
+
643
+ value_entry = alt_dict.pop(value, None)
644
+ if not isinstance(value_entry, dict):
645
+ continue
646
+
647
+ result[k] = v | value_entry | {"alternate": alt_dict}
648
+
649
+ if len(result[k]["alternate"]) == 0:
650
+ del result[k]["alternate"]
651
+
652
+ return result
653
+
583
654
  @staticmethod
584
655
  def convert_slice_str(string: str, args: str) -> str:
585
656
  cmin, cmax, cstep = [
@@ -591,6 +662,8 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
591
662
  @staticmethod
592
663
  def convert_to_lower(string: str) -> str:
593
664
  """Convert a string to lowercase."""
665
+ if string == NOT_AVAILABLE:
666
+ return string
594
667
  return string.lower()
595
668
 
596
669
  @staticmethod
@@ -598,6 +671,13 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
598
671
  """Convert a string to uppercase."""
599
672
  return string.upper()
600
673
 
674
+ @staticmethod
675
+ def convert_to_title(string: str) -> str:
676
+ """Convert a string to title case."""
677
+ if string == NOT_AVAILABLE:
678
+ return string
679
+ return string.title()
680
+
601
681
  @staticmethod
602
682
  def convert_fake_l2a_title_from_l1c(string: str) -> str:
603
683
  id_regex = re.compile(
@@ -1362,17 +1442,30 @@ def format_query_params(
1362
1442
  error_context,
1363
1443
  )
1364
1444
 
1365
- for eodag_search_key, provider_search_key in queryables.items():
1445
+ for eodag_search_key, provider_search_param in queryables.items():
1366
1446
  user_input = query_dict[eodag_search_key]
1367
1447
 
1368
- if COMPLEX_QS_REGEX.match(provider_search_key):
1369
- parts = provider_search_key.split("=")
1448
+ if provider_search_param == user_input:
1449
+ # means the mapping is to be passed as is, in which case we
1450
+ # readily register it
1451
+ if (
1452
+ eodag_search_key in query_params
1453
+ and isinstance(query_params[eodag_search_key], dict)
1454
+ and isinstance(user_input, dict)
1455
+ ):
1456
+ query_params[eodag_search_key].update(user_input)
1457
+ else:
1458
+ query_params[eodag_search_key] = user_input
1459
+ continue
1460
+
1461
+ if COMPLEX_QS_REGEX.match(provider_search_param):
1462
+ parts = provider_search_param.split("=")
1370
1463
  if len(parts) == 1:
1371
1464
  formatted_query_param = format_metadata(
1372
- provider_search_key, product_type, **query_dict
1465
+ provider_search_param, product_type, **query_dict
1373
1466
  )
1374
1467
  formatted_query_param = formatted_query_param.replace("'", '"')
1375
- if "{{" in provider_search_key:
1468
+ if "{{" in provider_search_param:
1376
1469
  # retrieve values from hashes where keys are given in the param
1377
1470
  if "}[" in formatted_query_param:
1378
1471
  formatted_query_param = _resolve_hashes(formatted_query_param)
@@ -1396,7 +1489,7 @@ def format_query_params(
1396
1489
  provider_value, product_type, **query_dict
1397
1490
  )
1398
1491
  else:
1399
- query_params[provider_search_key] = user_input
1492
+ query_params[provider_search_param] = user_input
1400
1493
  # Now get all the literal search params (i.e params to be passed "as is"
1401
1494
  # in the search request)
1402
1495
  # ignore additional_params if it isn't a dictionary
@@ -1527,7 +1620,15 @@ def _get_queryables(
1527
1620
  config.discover_metadata.get("metadata_pattern", "")
1528
1621
  )
1529
1622
  search_param_cfg = config.discover_metadata.get("search_param", "")
1530
- if pattern.match(eodag_search_key) and isinstance(
1623
+ search_param_unparsed_cfg = config.discover_metadata.get(
1624
+ "search_param_unparsed", []
1625
+ )
1626
+ if (
1627
+ search_param_unparsed_cfg
1628
+ and eodag_search_key in search_param_unparsed_cfg
1629
+ ):
1630
+ queryables[eodag_search_key] = user_input
1631
+ elif pattern.match(eodag_search_key) and isinstance(
1531
1632
  search_param_cfg, str
1532
1633
  ):
1533
1634
  search_param = search_param_cfg.format(metadata=eodag_search_key)
eodag/cli.py CHANGED
@@ -42,13 +42,14 @@ Commands:
42
42
 
43
43
  from __future__ import annotations
44
44
 
45
+ import functools
45
46
  import json
46
47
  import os
47
48
  import shutil
48
49
  import sys
49
50
  import textwrap
50
51
  from importlib.metadata import metadata
51
- from typing import TYPE_CHECKING, Any, Mapping
52
+ from typing import TYPE_CHECKING, Any, Callable, Mapping, Optional
52
53
  from urllib.parse import parse_qs
53
54
 
54
55
  import click
@@ -118,6 +119,22 @@ class MutuallyExclusiveOption(click.Option):
118
119
  return super(MutuallyExclusiveOption, self).handle_parse_result(ctx, opts, args)
119
120
 
120
121
 
122
+ def _deprecated_cli(message: str, version: Optional[str] = None) -> Callable[..., Any]:
123
+ """Decorator to mark a CLI command as deprecated and print a bold yellow warning."""
124
+ version_msg = f" -- Deprecated since v{version}" if version else ""
125
+
126
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
127
+ @functools.wraps(func)
128
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
129
+ full_message = f"DEPRECATED: {message}{version_msg}"
130
+ click.echo(click.style(full_message, fg="yellow", bold=True), err=True)
131
+ return func(*args, **kwargs)
132
+
133
+ return wrapper
134
+
135
+ return decorator
136
+
137
+
121
138
  @click.group(chain=True)
122
139
  @click.option(
123
140
  "-v",
@@ -631,9 +648,17 @@ def download(ctx: Context, **kwargs: Any) -> None:
631
648
 
632
649
 
633
650
  @eodag.command(
634
- help="Start eodag HTTP server\n\n"
635
- "Set EODAG_CORS_ALLOWED_ORIGINS environment variable to configure Cross-Origin Resource Sharing allowed origins as "
636
- "comma-separated URLs (e.g. 'http://somewhere,htttp://somewhere.else')."
651
+ help="(deprecated) Start eodag HTTP server\n\n"
652
+ + (
653
+ click.style(
654
+ "Running a web server from the CLI is deprecated and will be removed in a future version.\n"
655
+ "This feature has been moved to its own repository: https://github.com/CS-SI/stac-fastapi-eodag\n\n",
656
+ fg="yellow",
657
+ bold=True,
658
+ )
659
+ + "Set EODAG_CORS_ALLOWED_ORIGINS environment variable to configure Cross-Origin Resource Sharing allowed "
660
+ "origins as comma-separated URLs (e.g. 'http://somewhere,http://somewhere.else')."
661
+ )
637
662
  )
638
663
  @click.option(
639
664
  "-f",
@@ -676,6 +701,13 @@ def download(ctx: Context, **kwargs: Any) -> None:
676
701
  help="Run in debug mode (for development purpose)",
677
702
  )
678
703
  @click.pass_context
704
+ @_deprecated_cli(
705
+ message=(
706
+ "Running a web server from the CLI is deprecated and will be removed in a future version. "
707
+ "This feature has been moved to its own repository: https://github.com/CS-SI/stac-fastapi-eodag"
708
+ ),
709
+ version="3.9.0",
710
+ )
679
711
  def serve_rest(
680
712
  ctx: Context,
681
713
  daemon: bool,
eodag/config.py CHANGED
@@ -272,6 +272,8 @@ class PluginConfig(yaml.YAMLObject):
272
272
  search_param: str | dict[str, Any]
273
273
  #: Path to the metadata in search result
274
274
  metadata_path: str
275
+ #: list search parameters to send as is to the provider
276
+ search_param_unparsed: list[str]
275
277
  #: Whether an error must be raised when using a search parameter which is not queryable or not
276
278
  raise_mtd_discovery_error: bool
277
279
 
@@ -543,8 +545,6 @@ class PluginConfig(yaml.YAMLObject):
543
545
  #: :class:`~eodag.plugins.download.s3rest.S3RestDownload`
544
546
  #: At which level of the path part of the url the bucket can be found
545
547
  bucket_path_level: int
546
- #: :class:`~eodag.plugins.download.aws.AwsDownload` Whether download is done from a requester-pays bucket or not
547
- requester_pays: bool
548
548
  #: :class:`~eodag.plugins.download.aws.AwsDownload` S3 endpoint
549
549
  s3_endpoint: str
550
550
 
@@ -571,6 +571,9 @@ class PluginConfig(yaml.YAMLObject):
571
571
  #: :class:`~eodag.plugins.authentication.base.Authentication` Part of the search or download plugin configuration
572
572
  #: that needs authentication
573
573
  matching_conf: dict[str, Any]
574
+ #: :class:`~eodag.plugins.authentication.aws_auth.AwsAuth`
575
+ #: Whether download is done from a requester-pays bucket or not
576
+ requester_pays: bool
574
577
  #: :class:`~eodag.plugins.authentication.openid_connect.OIDCRefreshTokenBase`
575
578
  #: How the token should be used in the request
576
579
  token_provision: str
@@ -46,6 +46,7 @@ from eodag.utils.logging import get_logging_verbose
46
46
  if TYPE_CHECKING:
47
47
  from typing import Any, Optional, Union
48
48
 
49
+ from mypy_boto3_s3 import S3ServiceResource
49
50
  from requests.auth import AuthBase
50
51
 
51
52
  from eodag.api.product import EOProduct
@@ -55,6 +56,7 @@ if TYPE_CHECKING:
55
56
  from eodag.types.download_args import DownloadConf
56
57
  from eodag.utils import DownloadedCallback, ProgressCallback, Unpack
57
58
 
59
+
58
60
  logger = logging.getLogger("eodag.apis.ecmwf")
59
61
 
60
62
  ECMWF_MARS_KNOWN_FORMATS = {"grib": "grib", "netcdf": "nc"}
@@ -171,7 +173,7 @@ class EcmwfApi(Api, ECMWFSearch):
171
173
  def download(
172
174
  self,
173
175
  product: EOProduct,
174
- auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
176
+ auth: Optional[Union[AuthBase, S3SessionKwargs, S3ServiceResource]] = None,
175
177
  progress_callback: Optional[ProgressCallback] = None,
176
178
  wait: float = DEFAULT_DOWNLOAD_WAIT,
177
179
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
@@ -57,6 +57,7 @@ from eodag.utils.exceptions import (
57
57
  )
58
58
 
59
59
  if TYPE_CHECKING:
60
+ from mypy_boto3_s3 import S3ServiceResource
60
61
  from requests.auth import AuthBase
61
62
 
62
63
  from eodag.api.search_result import SearchResult
@@ -296,7 +297,7 @@ class UsgsApi(Api):
296
297
  def download(
297
298
  self,
298
299
  product: EOProduct,
299
- auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
300
+ auth: Optional[Union[AuthBase, S3SessionKwargs, S3ServiceResource]] = None,
300
301
  progress_callback: Optional[ProgressCallback] = None,
301
302
  wait: float = DEFAULT_DOWNLOAD_WAIT,
302
303
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
@@ -17,29 +17,72 @@
17
17
  # limitations under the License.
18
18
  from __future__ import annotations
19
19
 
20
- from typing import TYPE_CHECKING, Optional, cast
20
+ import logging
21
+ from typing import TYPE_CHECKING, Any, Optional, cast
22
+
23
+ import boto3
24
+ from botocore.exceptions import ClientError, ProfileNotFound
25
+ from botocore.handlers import disable_signing
21
26
 
22
27
  from eodag.plugins.authentication.base import Authentication
23
28
  from eodag.types import S3SessionKwargs
29
+ from eodag.utils.exceptions import AuthenticationError
24
30
 
25
31
  if TYPE_CHECKING:
26
- from mypy_boto3_s3.client import S3Client
32
+ from mypy_boto3_s3 import S3Client, S3ServiceResource
33
+ from mypy_boto3_s3.service_resource import BucketObjectsCollection
27
34
 
28
35
  from eodag.config import PluginConfig
29
36
 
30
37
 
38
+ logger = logging.getLogger("eodag.download.aws_auth")
39
+
40
+ AWS_AUTH_ERROR_MESSAGES = [
41
+ "AccessDenied",
42
+ "InvalidAccessKeyId",
43
+ "SignatureDoesNotMatch",
44
+ "InvalidRequest",
45
+ ]
46
+
47
+
48
+ def raise_if_auth_error(exception: ClientError, provider: str) -> None:
49
+ """Raises an error if given exception is an authentication error"""
50
+ err = cast(dict[str, str], exception.response["Error"])
51
+ if err["Code"] in AWS_AUTH_ERROR_MESSAGES and "key" in err["Message"].lower():
52
+ raise AuthenticationError(
53
+ f"Please check your credentials for {provider}.",
54
+ f"HTTP Error {exception.response['ResponseMetadata']['HTTPStatusCode']} returned.",
55
+ err["Code"] + ": " + err["Message"],
56
+ )
57
+
58
+
59
+ def create_s3_session(**kwargs) -> boto3.Session:
60
+ """create s3 session based on available credentials
61
+
62
+ :param kwargs: keyword arguments containing credentials
63
+ :returns: boto3 Session
64
+ """
65
+ try:
66
+ s3_session = boto3.Session(**kwargs)
67
+ except ProfileNotFound:
68
+ raise AuthenticationError(
69
+ f"AWS profile {kwargs['profile_name']} not found, please check your credentials configuration"
70
+ )
71
+ return s3_session
72
+
73
+
31
74
  class AwsAuth(Authentication):
32
75
  """AWS authentication plugin
33
76
 
34
- Authentication will use the first valid method within the following ones depending on which
35
- parameters are available in the configuration:
77
+ The authentication method will be chosen depending on which parameters are available in the configuration:
36
78
 
37
- * auth anonymously using no-sign-request
38
- * auth using ``aws_profile``
39
- * auth using ``aws_access_key_id`` and ``aws_secret_access_key``
40
- (optionally ``aws_session_token``)
41
- * auth using current environment (AWS environment variables and/or ``~/aws/*``),
42
- will be skipped if AWS credentials are filled in eodag conf
79
+ * auth using ``profile_name`` (if credentials are given and contain ``aws_profile``)
80
+ * auth using ``aws_access_key_id``, ``aws_secret_access_key`` and optionally ``aws_session_token``
81
+ (if credentials are given but no ``aws_profile``)
82
+ * auth using current environment - AWS environment variables and/or ``~/.aws/*``
83
+ (if no credentials are given in config)
84
+ * auth anonymously using no-sign-request if no credentials are given in config and
85
+ auth using current environment failed
43
86
 
44
87
  :param provider: provider name
45
88
  :param config: Authentication plugin configuration:
@@ -47,41 +90,189 @@ class AwsAuth(Authentication):
47
90
  * :attr:`~eodag.config.PluginConfig.type` (``str``) (**mandatory**): AwsAuth
48
91
  * :attr:`~eodag.config.PluginConfig.auth_error_code` (``int``) (mandatory for ``creodias_s3``):
49
92
  which error code is returned in case of an authentication error
93
+ * :attr:`~eodag.config.PluginConfig.s3_endpoint` (``str``): s3 endpoint url
94
+ * :attr:`~eodag.config.PluginConfig.requester_pays` (``bool``): whether download is done
95
+ from a requester-pays bucket or not; default: ``False``
50
96
 
51
97
  """
52
98
 
53
- s3_client: S3Client
54
-
55
99
  def __init__(self, provider: str, config: PluginConfig) -> None:
56
100
  super(AwsAuth, self).__init__(provider, config)
57
- self.aws_access_key_id: Optional[str] = None
58
- self.aws_secret_access_key: Optional[str] = None
59
- self.aws_session_token: Optional[str] = None
60
- self.profile_name: Optional[str] = None
101
+ self.s3_session: Optional[boto3.Session] = None
102
+ self.s3_resource: Optional[S3ServiceResource] = None
103
+ # set default for requester_pays if not given
104
+ self.config.__dict__.setdefault("requester_pays", False)
105
+
106
+ def _create_s3_session_from_credentials(self) -> boto3.Session:
107
+ credentials = getattr(self.config, "credentials", {}) or {}
108
+ if "aws_profile" in credentials:
109
+ return create_s3_session(profile_name=credentials["aws_profile"])
110
+ # auth using aws keys
111
+ elif credentials:
112
+ s3_session_kwargs: S3SessionKwargs = {
113
+ "aws_access_key_id": credentials["aws_access_key_id"],
114
+ "aws_secret_access_key": credentials["aws_secret_access_key"],
115
+ }
116
+ if credentials.get("aws_session_token"):
117
+ s3_session_kwargs["aws_session_token"] = credentials[
118
+ "aws_session_token"
119
+ ]
120
+ return create_s3_session(**s3_session_kwargs)
121
+ else:
122
+ # auth using env variables or ~/.aws
123
+ return create_s3_session()
124
+
125
+ def _create_s3_resource(self) -> S3ServiceResource:
126
+ """create s3 resource based on s3 session"""
127
+ if not self.s3_session:
128
+ self.s3_session = self._create_s3_session_from_credentials()
129
+ endpoint_url = getattr(self.config, "s3_endpoint", None)
130
+ if self.s3_session.get_credentials():
131
+ return self.s3_session.resource(
132
+ service_name="s3",
133
+ endpoint_url=endpoint_url,
134
+ )
135
+ # could not auth using credentials: use no-sign-request strategy
136
+ s3_resource = boto3.resource(service_name="s3", endpoint_url=endpoint_url)
137
+ s3_resource.meta.client.meta.events.register(
138
+ "choose-signer.s3.*", disable_signing
139
+ )
140
+ return s3_resource
141
+
142
+ def get_s3_client(self) -> S3Client:
143
+ """Get S3 client from S3 resource
61
144
 
62
- def authenticate(self) -> S3SessionKwargs:
145
+ :returns: boto3 client
146
+ """
147
+ if not self.s3_resource:
148
+ self.s3_resource = self._create_s3_resource()
149
+ return self.s3_resource.meta.client
150
+
151
+ def authenticate(self) -> S3ServiceResource:
63
152
  """Authenticate
64
153
 
65
- :returns: dict containing AWS/boto3 non-empty credentials
154
+ :returns: S3 Resource created based on an S3 session
66
155
  """
67
- credentials = getattr(self.config, "credentials", {}) or {}
68
- self.aws_access_key_id = credentials.get(
69
- "aws_access_key_id", self.aws_access_key_id
70
- )
71
- self.aws_secret_access_key = credentials.get(
72
- "aws_secret_access_key", self.aws_secret_access_key
73
- )
74
- self.aws_session_token = credentials.get(
75
- "aws_session_token", self.aws_session_token
156
+ self.s3_resource = self._create_s3_resource()
157
+ return self.s3_resource
158
+
159
+ def _get_authenticated_objects(
160
+ self, bucket_name: str, prefix: str
161
+ ) -> BucketObjectsCollection:
162
+ """Get boto3 authenticated objects for the given bucket
163
+
164
+ :param bucket_name: Bucket containg objects
165
+ :param prefix: Prefix used to filter objects
166
+ :returns: The boto3 authenticated objects
167
+ """
168
+ if not self.s3_resource:
169
+ self.s3_resource = self._create_s3_resource()
170
+ try:
171
+ if self.config.requester_pays:
172
+ objects = self.s3_resource.Bucket(bucket_name).objects.filter(
173
+ RequestPayer="requester"
174
+ )
175
+ else:
176
+ objects = self.s3_resource.Bucket(bucket_name).objects
177
+ list(objects.filter(Prefix=prefix).limit(1))
178
+ if objects:
179
+ logger.debug(
180
+ "Authentication for bucket %s succeeded; returning available objects",
181
+ bucket_name,
182
+ )
183
+ return objects
184
+ except ClientError as e:
185
+ if e.response.get("Error", {}).get("Code", {}) in AWS_AUTH_ERROR_MESSAGES:
186
+ pass
187
+ else:
188
+ raise e
189
+ logger.debug(
190
+ "Authentication for bucket %s failed, please check the credentials",
191
+ bucket_name,
76
192
  )
77
- self.profile_name = credentials.get("aws_profile", self.profile_name)
78
-
79
- auth_dict = cast(
80
- S3SessionKwargs,
81
- {
82
- k: getattr(self, k)
83
- for k in S3SessionKwargs.__annotations__
84
- if getattr(self, k, None)
85
- },
193
+
194
+ raise AuthenticationError(
195
+ "Unable do authenticate on s3://%s using credendials configuration"
196
+ % bucket_name
86
197
  )
87
- return auth_dict
198
+
199
+ def authenticate_objects(
200
+ self,
201
+ bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
202
+ ) -> dict[str, BucketObjectsCollection]:
203
+ """
204
+ Authenticates with s3 and retrieves the available objects
205
+
206
+ :param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes
207
+ :raises AuthenticationError: authentication is not possible
208
+ :return: authenticated objects per bucket
209
+ """
210
+
211
+ authenticated_objects: dict[str, Any] = {}
212
+ auth_error_messages: set[str] = set()
213
+ for _, pack in enumerate(bucket_names_and_prefixes):
214
+
215
+ bucket_name, prefix = pack
216
+ if not prefix:
217
+ continue
218
+ if bucket_name not in authenticated_objects:
219
+ # get Prefixes longest common base path
220
+ common_prefix = ""
221
+ prefix_split = prefix.split("/")
222
+ prefixes_in_bucket = len(
223
+ [p for b, p in bucket_names_and_prefixes if b == bucket_name]
224
+ )
225
+ for i in range(1, len(prefix_split)):
226
+ common_prefix = "/".join(prefix_split[0:i])
227
+ if (
228
+ len(
229
+ [
230
+ p
231
+ for b, p in bucket_names_and_prefixes
232
+ if p and b == bucket_name and common_prefix in p
233
+ ]
234
+ )
235
+ < prefixes_in_bucket
236
+ ):
237
+ common_prefix = "/".join(prefix_split[0 : i - 1])
238
+ break
239
+ try:
240
+ # connect to aws s3 and get bucket auhenticated objects
241
+ authenticated_objects[
242
+ bucket_name
243
+ ] = self._get_authenticated_objects(bucket_name, common_prefix)
244
+
245
+ except AuthenticationError as e:
246
+ logger.warning("Unexpected error: %s" % e)
247
+ logger.warning("Skipping %s/%s" % (bucket_name, prefix))
248
+ auth_error_messages.add(str(e))
249
+ except ClientError as e:
250
+ raise_if_auth_error(e, self.provider)
251
+ logger.warning("Unexpected error: %s" % e)
252
+ logger.warning("Skipping %s/%s" % (bucket_name, prefix))
253
+ auth_error_messages.add(str(e))
254
+
255
+ # could not auth on any bucket
256
+ if not authenticated_objects:
257
+ raise AuthenticationError(", ".join(auth_error_messages))
258
+ return authenticated_objects
259
+
260
+ def get_rio_env(self) -> dict[str, Any]:
261
+ """Get rasterio environment variables needed for data access authentication.
262
+
263
+ :returns: The rasterio environement variables
264
+ """
265
+ rio_env_kwargs = {}
266
+ if endpoint_url := getattr(self.config, "s3_endpoint", None):
267
+ rio_env_kwargs["endpoint_url"] = endpoint_url.split("://")[-1]
268
+
269
+ if self.s3_session is None:
270
+ self.authenticate()
271
+
272
+ if self.config.requester_pays:
273
+ rio_env_kwargs["requester_pays"] = True
274
+
275
+ return {
276
+ "session": self.s3_session,
277
+ **rio_env_kwargs,
278
+ }