PyPI - eodag - Versions diffs - 3.0.1__py3-none-any.whl → 3.1.0__py3-none-any.whl - Mend

eodag 3.0.1py3-none-any.whl → 3.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

eodag/api/core.py +174 -138
eodag/api/product/_assets.py +44 -15
eodag/api/product/_product.py +58 -47
eodag/api/product/drivers/__init__.py +81 -4
eodag/api/product/drivers/base.py +65 -4
eodag/api/product/drivers/generic.py +65 -0
eodag/api/product/drivers/sentinel1.py +97 -0
eodag/api/product/drivers/sentinel2.py +95 -0
eodag/api/product/metadata_mapping.py +117 -90
eodag/api/search_result.py +13 -23
eodag/cli.py +26 -5
eodag/config.py +86 -92
eodag/plugins/apis/base.py +1 -1
eodag/plugins/apis/ecmwf.py +42 -22
eodag/plugins/apis/usgs.py +17 -16
eodag/plugins/authentication/aws_auth.py +16 -13
eodag/plugins/authentication/base.py +5 -3
eodag/plugins/authentication/header.py +3 -3
eodag/plugins/authentication/keycloak.py +4 -4
eodag/plugins/authentication/oauth.py +7 -3
eodag/plugins/authentication/openid_connect.py +22 -16
eodag/plugins/authentication/sas_auth.py +4 -4
eodag/plugins/authentication/token.py +41 -10
eodag/plugins/authentication/token_exchange.py +1 -1
eodag/plugins/base.py +4 -4
eodag/plugins/crunch/base.py +4 -4
eodag/plugins/crunch/filter_date.py +4 -4
eodag/plugins/crunch/filter_latest_intersect.py +6 -6
eodag/plugins/crunch/filter_latest_tpl_name.py +7 -7
eodag/plugins/crunch/filter_overlap.py +4 -4
eodag/plugins/crunch/filter_property.py +6 -7
eodag/plugins/download/aws.py +146 -87
eodag/plugins/download/base.py +38 -56
eodag/plugins/download/creodias_s3.py +29 -0
eodag/plugins/download/http.py +173 -183
eodag/plugins/download/s3rest.py +10 -11
eodag/plugins/manager.py +10 -20
eodag/plugins/search/__init__.py +6 -5
eodag/plugins/search/base.py +90 -46
eodag/plugins/search/build_search_result.py +1048 -361
eodag/plugins/search/cop_marine.py +22 -12
eodag/plugins/search/creodias_s3.py +9 -73
eodag/plugins/search/csw.py +11 -11
eodag/plugins/search/data_request_search.py +19 -18
eodag/plugins/search/qssearch.py +99 -258
eodag/plugins/search/stac_list_assets.py +85 -0
eodag/plugins/search/static_stac_search.py +4 -4
eodag/resources/ext_product_types.json +1 -1
eodag/resources/product_types.yml +1134 -325
eodag/resources/providers.yml +906 -2006
eodag/resources/stac_api.yml +2 -2
eodag/resources/user_conf_template.yml +10 -9
eodag/rest/cache.py +2 -2
eodag/rest/config.py +3 -3
eodag/rest/core.py +112 -82
eodag/rest/errors.py +5 -5
eodag/rest/server.py +33 -14
eodag/rest/stac.py +41 -38
eodag/rest/types/collections_search.py +3 -3
eodag/rest/types/eodag_search.py +29 -23
eodag/rest/types/queryables.py +42 -31
eodag/rest/types/stac_search.py +15 -25
eodag/rest/utils/__init__.py +14 -21
eodag/rest/utils/cql_evaluate.py +6 -6
eodag/rest/utils/rfc3339.py +2 -2
eodag/types/__init__.py +141 -32
eodag/types/bbox.py +2 -2
eodag/types/download_args.py +3 -3
eodag/types/queryables.py +183 -72
eodag/types/search_args.py +4 -4
eodag/types/whoosh.py +127 -3
eodag/utils/__init__.py +153 -51
eodag/utils/exceptions.py +28 -21
eodag/utils/import_system.py +2 -2
eodag/utils/repr.py +65 -6
eodag/utils/requests.py +13 -13
eodag/utils/rest.py +2 -2
eodag/utils/s3.py +231 -0
eodag/utils/stac_reader.py +10 -10
{eodag-3.0.1.dist-info → eodag-3.1.0.dist-info}/METADATA +77 -76
eodag-3.1.0.dist-info/RECORD +113 -0
{eodag-3.0.1.dist-info → eodag-3.1.0.dist-info}/WHEEL +1 -1
{eodag-3.0.1.dist-info → eodag-3.1.0.dist-info}/entry_points.txt +4 -2
eodag/utils/constraints.py +0 -244
eodag-3.0.1.dist-info/RECORD +0 -109
{eodag-3.0.1.dist-info → eodag-3.1.0.dist-info}/LICENSE +0 -0
{eodag-3.0.1.dist-info → eodag-3.1.0.dist-info}/top_level.txt +0 -0

eodag/utils/repr.py CHANGED Viewed

@@ -18,12 +18,21 @@
 from __future__ import annotations
 import collections.abc
+import re
 from typing import Any, Optional
 from urllib.parse import urlparse
 def str_as_href(link: str) -> str:
-    """URL to html link"""
+    """URL to html link
+    :param link: URL to format
+    :returns: HMLT formatted link
+    >>> str_as_href("http://foo.bar")
+    "<a href='http://foo.bar' target='_blank'>http://foo.bar</a>"
+    """
     if urlparse(link).scheme in ("file", "http", "https", "s3"):
         return f"<a href='{link}' target='_blank'>{link}</a>"
     else:
@@ -31,7 +40,13 @@ def str_as_href(link: str) -> str:
 def html_table(input: Any, depth: Optional[int] = None) -> str:
-    """Transform input to HTML table"""
+    """Transform input object to HTML table
+    :param input: input object to represent
+    :param depth: maximum depth level until which nested objects should be represented
+                  in new tables (unlimited by default)
+    :returns: HTML table
+    """
     if isinstance(input, collections.abc.Mapping):
         return dict_to_html_table(input, depth=depth)
     elif isinstance(input, collections.abc.Sequence) and not isinstance(input, str):
@@ -47,7 +62,14 @@ def dict_to_html_table(
     depth: Optional[int] = None,
     brackets: bool = True,
 ) -> str:
-    """Transform input dict to HTML table"""
+    """Transform input dict to HTML table
+    :param input_dict: input dict to represent
+    :param depth: maximum depth level until which nested objects should be represented
+                  in new tables (unlimited by default)
+    :param brackets: whether surrounding brackets should be displayed or not
+    :returns: HTML table
+    """
     opening_bracket = "<span style='color: grey;'>{</span>" if brackets else ""
     closing_bracket = "<span style='color: grey;'>}</span>" if brackets else ""
     indent = "10px" if brackets else "0"
@@ -90,7 +112,13 @@ def dict_to_html_table(
 def list_to_html_table(
     input_list: collections.abc.Sequence, depth: Optional[int] = None
 ) -> str:
-    """Transform input list to HTML table"""
+    """Transform input list to HTML table
+    :param input_list: input list to represent
+    :param depth: maximum depth level until which nested objects should be represented
+                  in new tables (unlimited by default)
+    :returns: HTML table
+    """
     if depth is not None:
         depth -= 1
     separator = (
@@ -103,11 +131,42 @@ def list_to_html_table(
         + separator.join(
             [
                 f"""<span style='text-align: left;'>{
-                html_table(v, depth=depth)
-            }</span>
+                    html_table(v, depth=depth)
+                }</span>
             """
                 for v in input_list
             ]
         )
         + "<span style='color: grey;'>]</span>"
     )
+def remove_class_repr(type_repr: str) -> str:
+    """Removes class tag from type representation
+    :param type_repr: input type representation
+    :returns: type without class tag
+    >>> remove_class_repr(str(type("foo")))
+    'str'
+    """
+    return re.sub(r"<class '(\w+)'>", r"\1", type_repr)
+def shorter_type_repr(long_type: str) -> str:
+    """Shorten long type representation
+    :param long_type: long type representation
+    :returns: type reprensentation shortened
+    >>> import typing
+    >>> shorter_type_repr(str(typing.Literal["foo", "bar"]))
+    "Literal['foo', ...]"
+    """
+    # shorten lists
+    shorter = re.sub(r",[^\[^\]]+\]", ", ...]", str(long_type))
+    # remove class prefix
+    shorter = remove_class_repr(shorter)
+    # remove parent objects
+    shorter = re.sub(r"\w+\.", "", shorter)
+    return shorter

eodag/utils/requests.py CHANGED Viewed

@@ -19,7 +19,7 @@ from __future__ import annotations
 import logging
 import os
-from typing import Any, Optional, Tuple
+from typing import Any, Optional
 import requests
@@ -30,7 +30,7 @@ logger = logging.getLogger("eodag.utils.requests")
 def fetch_json(
-    file_url: str,
+    url: str,
     req_session: Optional[requests.Session] = None,
     auth: Optional[requests.auth.AuthBase] = None,
     timeout: float = HTTP_REQ_TIMEOUT,
@@ -38,32 +38,32 @@ def fetch_json(
     """
     Fetches http/distant or local json file
-    :param file_url: url from which the file can be fetched
+    :param url: url from which the file can be fetched
     :param req_session: (optional) requests session
     :param auth: (optional) authenticated object if request needs authentication
     :param timeout: (optional) authenticated object
     :returns: json file content
     """
     if req_session is None:
-        req_session = requests.Session()
+        req_session = requests.sessions.Session()
     try:
-        if not file_url.lower().startswith("http"):
-            file_url = path_to_uri(os.path.abspath(file_url))
+        if not url.lower().startswith("http"):
+            url = path_to_uri(os.path.abspath(url))
             req_session.mount("file://", LocalFileAdapter())
         headers = USER_AGENT
-        logger.debug(f"fetching {file_url}")
+        logger.debug(f"fetching {url}")
         res = req_session.get(
-            file_url,
+            url,
             headers=headers,
             auth=auth,
             timeout=timeout,
         )
         res.raise_for_status()
     except requests.exceptions.Timeout as exc:
-        raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
+        raise TimeOutError(exc, timeout=timeout) from exc
     except requests.exceptions.RequestException as exc:
-        raise RequestError.from_error(exc, f"Unable to fetch {file_url}") from exc
+        raise RequestError.from_error(exc, f"Unable to fetch {url}") from exc
     else:
         return res.json()
@@ -75,7 +75,7 @@ class LocalFileAdapter(requests.adapters.BaseAdapter):
     """
     @staticmethod
-    def _chkpath(method: str, path: str) -> Tuple[int, str]:
+    def _chkpath(method: str, path: str) -> tuple[int, str]:
         """Return an HTTP status for the given filesystem path.
         :param method: method of the request
@@ -100,8 +100,8 @@ class LocalFileAdapter(requests.adapters.BaseAdapter):
     ) -> requests.Response:
         """Wraps a file, described in request, in a Response object.
-        :param req: The PreparedRequest being "sent".
-        :param kwargs: (not used) additionnal arguments of the request
+        :param request: The PreparedRequest being "sent".
+        :param kwargs: (not used) additional arguments of the request
         :returns: a Response object containing the file
         """
         response = requests.Response()

eodag/utils/rest.py CHANGED Viewed

@@ -21,7 +21,7 @@ from __future__ import annotations
 import datetime
 import re
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Optional
 import dateutil.parser
 from dateutil import tz
@@ -35,7 +35,7 @@ RFC3339_PATTERN = (
 )
-def get_datetime(arguments: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+def get_datetime(arguments: dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
     """Get start and end dates from a dict containing `/` separated dates in `datetime` item
     :param arguments: dict containing a single date or `/` separated dates in `datetime` item

eodag/utils/s3.py ADDED Viewed

@@ -0,0 +1,231 @@
+# -*- coding: utf-8 -*-
+# Copyright 2024, CS GROUP - France, https://www.csgroup.eu/
+#
+# This file is part of EODAG project
+#     https://www.github.com/CS-SI/EODAG
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import io
+import logging
+import os
+import zipfile
+from typing import TYPE_CHECKING, List, Optional
+from urllib.parse import urlparse
+import boto3
+import botocore
+from eodag.plugins.authentication.aws_auth import AwsAuth
+from eodag.utils import get_bucket_name_and_prefix, guess_file_type
+from eodag.utils.exceptions import (
+    AuthenticationError,
+    MisconfiguredError,
+    NotAvailableError,
+)
+if TYPE_CHECKING:
+    from zipfile import ZipFile, ZipInfo
+    from mypy_boto3_s3.client import S3Client
+    from eodag.api.product import EOProduct  # type: ignore
+logger = logging.getLogger("eodag.utils.s3")
+def fetch(
+    bucket_name: str, key_name: str, start: int, len: int, client_s3: S3Client
+) -> bytes:
+    """
+    Range-fetches a S3 key.
+    :param bucket_name: Bucket name of the object to fetch
+    :param key_name: Key name of the object to fetch
+    :param start: Bucket name to fetch
+    :param len: Bucket name to fetch
+    :param client_s3: s3 client used to fetch the object
+    :returns: Object bytes
+    """
+    end = start + len - 1
+    s3_object = client_s3.get_object(
+        Bucket=bucket_name, Key=key_name, Range="bytes=%d-%d" % (start, end)
+    )
+    return s3_object["Body"].read()
+def parse_int(bytes: bytes) -> int:
+    """
+    Parses 2 or 4 little-endian bits into their corresponding integer value.
+    :param bytes: bytes to parse
+    :returns: parsed int
+    """
+    val = (bytes[0]) + ((bytes[1]) << 8)
+    if len(bytes) > 3:
+        val += ((bytes[2]) << 16) + ((bytes[3]) << 24)
+    return val
+def open_s3_zipped_object(
+    bucket_name: str, key_name: str, client_s3: S3Client, partial: bool = True
+) -> ZipFile:
+    """
+    Open s3 zipped object, without downloading it.
+    See https://stackoverflow.com/questions/41789176/how-to-count-files-inside-zip-in-aws-s3-without-downloading-it;
+    Based on https://stackoverflow.com/questions/51351000/read-zip-files-from-s3-without-downloading-the-entire-file
+    :param bucket_name: Bucket name of the object to fetch
+    :param key_name: Key name of the object to fetch
+    :param client_s3: s3 client used to fetch the object
+    :param partial: fetch partial data if only content info is needed
+    :returns: List of files in zip
+    """
+    response = client_s3.head_object(Bucket=bucket_name, Key=key_name)
+    size = response["ContentLength"]
+    # End Of Central Directory bytes
+    eocd = fetch(bucket_name, key_name, size - 22, 22, client_s3)
+    # start offset and size of the central directory
+    cd_start = parse_int(eocd[16:20])
+    cd_size = parse_int(eocd[12:16])
+    # fetch central directory, append EOCD, and open as zipfile
+    cd = fetch(bucket_name, key_name, cd_start, cd_size, client_s3)
+    zip_data = (
+        cd + eocd if partial else fetch(bucket_name, key_name, 0, size, client_s3)
+    )
+    zip = zipfile.ZipFile(io.BytesIO(zip_data))
+    return zip
+def list_files_in_s3_zipped_object(
+    bucket_name: str, key_name: str, client_s3: S3Client
+) -> List[ZipInfo]:
+    """
+    List files in s3 zipped object, without downloading it.
+    See https://stackoverflow.com/questions/41789176/how-to-count-files-inside-zip-in-aws-s3-without-downloading-it;
+    Based on https://stackoverflow.com/questions/51351000/read-zip-files-from-s3-without-downloading-the-entire-file
+    :param bucket_name: Bucket name of the object to fetch
+    :param key_name: Key name of the object to fetch
+    :param client_s3: s3 client used to fetch the object
+    :returns: List of files in zip
+    """
+    with open_s3_zipped_object(bucket_name, key_name, client_s3) as zip_file:
+        logger.debug("Found %s files in %s" % (len(zip_file.filelist), key_name))
+        return zip_file.filelist
+def update_assets_from_s3(
+    product: EOProduct,
+    auth: AwsAuth,
+    s3_endpoint: Optional[str] = None,
+    content_url: Optional[str] = None,
+) -> None:
+    """Update ``EOProduct.assets`` using content listed in its ``remote_location`` or given
+    ``content_url``.
+    If url points to a zipped archive, its content will also be be listed.
+    :param product: product to update
+    :param auth: Authentication plugin
+    :param s3_endpoint: s3 endpoint if not hosted on AWS
+    :param content_url: s3 URL pointing to the content that must be listed (defaults to
+                        ``product.remote_location`` if empty)
+    """
+    required_creds = ["aws_access_key_id", "aws_secret_access_key"]
+    if content_url is None:
+        content_url = product.remote_location
+    bucket, prefix = get_bucket_name_and_prefix(content_url)
+    if bucket is None or prefix is None:
+        logger.debug(f"No s3 prefix could guessed from {content_url}")
+        return None
+    try:
+        auth_dict = auth.authenticate()
+        if not all(x in auth_dict for x in required_creds):
+            raise MisconfiguredError(
+                f"Incomplete credentials for {product.provider}, missing "
+                f"{[x for x in required_creds if x not in auth_dict]}"
+            )
+        if not getattr(auth, "s3_client", None):
+            auth.s3_client = boto3.client(
+                service_name="s3",
+                endpoint_url=s3_endpoint,
+                aws_access_key_id=auth_dict.get("aws_access_key_id"),
+                aws_secret_access_key=auth_dict.get("aws_secret_access_key"),
+                aws_session_token=auth_dict.get("aws_session_token"),
+            )
+        logger.debug("Listing assets in %s", prefix)
+        if prefix.endswith(".zip"):
+            # List prefix zip content
+            assets_urls = [
+                f"zip+s3://{bucket}/{prefix}!{f.filename}"
+                for f in list_files_in_s3_zipped_object(bucket, prefix, auth.s3_client)
+            ]
+        else:
+            # List files in prefix
+            assets_urls = [
+                f"s3://{bucket}/{obj['Key']}"
+                for obj in auth.s3_client.list_objects(
+                    Bucket=bucket, Prefix=prefix, MaxKeys=300
+                ).get("Contents", [])
+            ]
+        for asset_url in assets_urls:
+            out_of_zip_url = asset_url.split("!")[-1]
+            key, roles = product.driver.guess_asset_key_and_roles(
+                out_of_zip_url, product
+            )
+            parsed_url = urlparse(out_of_zip_url)
+            title = os.path.basename(parsed_url.path)
+            if key and key not in product.assets:
+                product.assets[key] = {
+                    "title": title,
+                    "roles": roles,
+                    "href": asset_url,
+                }
+                if mime_type := guess_file_type(asset_url):
+                    product.assets[key]["type"] = mime_type
+        # sort assets
+        product.assets.data = dict(sorted(product.assets.data.items()))
+        # update driver
+        product.driver = product.get_driver()
+    except botocore.exceptions.ClientError as e:
+        if hasattr(auth.config, "auth_error_code") and str(
+            auth.config.auth_error_code
+        ) in str(e):
+            raise AuthenticationError(
+                f"Authentication failed on {s3_endpoint} s3"
+            ) from e
+        raise NotAvailableError(
+            f"assets for product {prefix} could not be found"
+        ) from e

eodag/utils/stac_reader.py CHANGED Viewed

@@ -20,7 +20,7 @@ from __future__ import annotations
 import logging
 import re
 import socket
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Optional, Union
 from urllib.error import URLError
 from urllib.request import urlopen
@@ -108,7 +108,7 @@ def fetch_stac_items(
     max_connections: int = 100,
     timeout: int = HTTP_REQ_TIMEOUT,
     ssl_verify: bool = True,
-) -> List[Dict[str, Any]]:
+) -> list[dict[str, Any]]:
     """Fetch STAC item from a single item file or items from a catalog.
     :param stac_path: A STAC object filepath
@@ -142,13 +142,13 @@ def _fetch_stac_items_from_catalog(
     recursive: bool,
     max_connections: int,
     _text_opener: Callable[[str, bool], Any],
-) -> List[Any]:
+) -> list[Any]:
     """Fetch items from a STAC catalog"""
-    items: List[Dict[Any, Any]] = []
+    items: list[dict[Any, Any]] = []
     # pystac cannot yet return links from a single file catalog, see:
     # https://github.com/stac-utils/pystac/issues/256
-    extensions: Optional[Union[List[str], str]] = getattr(cat, "stac_extensions", None)
+    extensions: Optional[Union[list[str], str]] = getattr(cat, "stac_extensions", None)
     if extensions:
         extensions = extensions if isinstance(extensions, list) else [extensions]
         if "single-file-stac" in extensions:
@@ -157,7 +157,7 @@ def _fetch_stac_items_from_catalog(
     # Making the links absolutes allow for both relative and absolute links to be handled.
     if not recursive:
-        hrefs: List[Optional[str]] = [
+        hrefs: list[Optional[str]] = [
             link.get_absolute_href() for link in cat.get_item_links()
         ]
     else:
@@ -188,7 +188,7 @@ def fetch_stac_collections(
     max_connections: int = 100,
     timeout: int = HTTP_REQ_TIMEOUT,
     ssl_verify: bool = True,
-) -> List[Dict[str, Any]]:
+) -> list[dict[str, Any]]:
     """Fetch STAC collection(s) from a catalog.
     :param stac_path: A STAC object filepath
@@ -217,12 +217,12 @@ def _fetch_stac_collections_from_catalog(
     collection: Optional[str],
     max_connections: int,
     _text_opener: Callable[[str, bool], Any],
-) -> List[Any]:
+) -> list[Any]:
     """Fetch collections from a STAC catalog"""
-    collections: List[Dict[Any, Any]] = []
+    collections: list[dict[Any, Any]] = []
     # Making the links absolutes allow for both relative and absolute links to be handled.
-    hrefs: List[Optional[str]] = [
+    hrefs: list[Optional[str]] = [
         link.get_absolute_href()
         for link in cat.get_child_links()
         if collection is not None and link.title == collection

eodag 3.0.1__py3-none-any.whl → 3.1.0__py3-none-any.whl

eodag 3.0.1py3-none-any.whl → 3.1.0py3-none-any.whl