eodag 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +110 -189
- eodag/api/product/metadata_mapping.py +42 -3
- eodag/cli.py +6 -3
- eodag/config.py +7 -1
- eodag/plugins/authentication/openid_connect.py +1 -2
- eodag/plugins/download/aws.py +145 -178
- eodag/plugins/download/base.py +3 -2
- eodag/plugins/download/creodias_s3.py +10 -5
- eodag/plugins/download/http.py +14 -6
- eodag/plugins/download/s3rest.py +7 -3
- eodag/plugins/manager.py +1 -1
- eodag/plugins/search/base.py +34 -4
- eodag/plugins/search/build_search_result.py +3 -0
- eodag/plugins/search/cop_marine.py +2 -0
- eodag/plugins/search/data_request_search.py +6 -1
- eodag/plugins/search/qssearch.py +64 -25
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +30 -171
- eodag/resources/providers.yml +87 -328
- eodag/resources/stac.yml +1 -2
- eodag/resources/stac_provider.yml +1 -1
- eodag/resources/user_conf_template.yml +0 -11
- eodag/rest/core.py +5 -16
- eodag/rest/stac.py +0 -4
- eodag/utils/__init__.py +41 -27
- eodag/utils/exceptions.py +4 -0
- eodag/utils/free_text_search.py +229 -0
- eodag/utils/s3.py +605 -65
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/METADATA +7 -9
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/RECORD +34 -34
- eodag/types/whoosh.py +0 -203
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/WHEEL +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/entry_points.txt +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/licenses/LICENSE +0 -0
- {eodag-3.6.0.dist-info → eodag-3.8.0.dist-info}/top_level.txt +0 -0
eodag/plugins/download/aws.py
CHANGED
|
@@ -20,10 +20,8 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
import re
|
|
23
|
-
from datetime import datetime
|
|
24
|
-
from itertools import chain
|
|
25
23
|
from pathlib import Path
|
|
26
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
|
|
27
25
|
|
|
28
26
|
import boto3
|
|
29
27
|
import requests
|
|
@@ -31,7 +29,6 @@ from botocore.exceptions import ClientError, ProfileNotFound
|
|
|
31
29
|
from botocore.handlers import disable_signing
|
|
32
30
|
from lxml import etree
|
|
33
31
|
from requests.auth import AuthBase
|
|
34
|
-
from stream_zip import ZIP_AUTO, stream_zip
|
|
35
32
|
|
|
36
33
|
from eodag.api.product.metadata_mapping import (
|
|
37
34
|
mtd_cfg_as_conversion_and_querypath,
|
|
@@ -55,15 +52,17 @@ from eodag.utils import (
|
|
|
55
52
|
from eodag.utils.exceptions import (
|
|
56
53
|
AuthenticationError,
|
|
57
54
|
DownloadError,
|
|
55
|
+
EodagError,
|
|
58
56
|
MisconfiguredError,
|
|
59
57
|
NoMatchingProductType,
|
|
60
58
|
NotAvailableError,
|
|
61
59
|
TimeOutError,
|
|
62
60
|
)
|
|
63
|
-
from eodag.utils.s3 import open_s3_zipped_object
|
|
61
|
+
from eodag.utils.s3 import S3FileInfo, open_s3_zipped_object, stream_download_from_s3
|
|
64
62
|
|
|
65
63
|
if TYPE_CHECKING:
|
|
66
|
-
from
|
|
64
|
+
from mypy_boto3_s3.client import S3Client
|
|
65
|
+
from mypy_boto3_s3.service_resource import BucketObjectsCollection
|
|
67
66
|
|
|
68
67
|
from eodag.api.product import EOProduct
|
|
69
68
|
from eodag.api.search_result import SearchResult
|
|
@@ -306,23 +305,12 @@ class AwsDownload(Download):
|
|
|
306
305
|
self._configure_safe_build(build_safe, product)
|
|
307
306
|
# bucket names and prefixes
|
|
308
307
|
bucket_names_and_prefixes = self._get_bucket_names_and_prefixes(
|
|
309
|
-
product,
|
|
308
|
+
product,
|
|
309
|
+
asset_filter,
|
|
310
|
+
ignore_assets,
|
|
311
|
+
product_conf.get("complementary_url_key", []),
|
|
310
312
|
)
|
|
311
313
|
|
|
312
|
-
# add complementary urls
|
|
313
|
-
try:
|
|
314
|
-
for complementary_url_key in product_conf.get("complementary_url_key", []):
|
|
315
|
-
bucket_names_and_prefixes.append(
|
|
316
|
-
self.get_product_bucket_name_and_prefix(
|
|
317
|
-
product, product.properties[complementary_url_key]
|
|
318
|
-
)
|
|
319
|
-
)
|
|
320
|
-
except KeyError:
|
|
321
|
-
logger.warning(
|
|
322
|
-
"complementary_url_key %s is missing in %s properties"
|
|
323
|
-
% (complementary_url_key, product.properties["id"])
|
|
324
|
-
)
|
|
325
|
-
|
|
326
314
|
# authenticate
|
|
327
315
|
authenticated_objects, s3_objects = self._do_authentication(
|
|
328
316
|
bucket_names_and_prefixes, auth
|
|
@@ -429,15 +417,17 @@ class AwsDownload(Download):
|
|
|
429
417
|
if not os.path.isdir(dest_abs_path_dir):
|
|
430
418
|
os.makedirs(dest_abs_path_dir)
|
|
431
419
|
|
|
432
|
-
|
|
420
|
+
zip_file, _ = open_s3_zipped_object(
|
|
433
421
|
bucket_name, zip_prefix, s3_client, partial=False
|
|
434
|
-
)
|
|
422
|
+
)
|
|
423
|
+
with zip_file:
|
|
435
424
|
# file size
|
|
436
425
|
file_info = zip_file.getinfo(rel_path)
|
|
437
426
|
progress_callback.reset(total=file_info.file_size)
|
|
438
|
-
with
|
|
439
|
-
|
|
440
|
-
|
|
427
|
+
with (
|
|
428
|
+
zip_file.open(rel_path) as extracted,
|
|
429
|
+
open(dest_file, "wb") as output_file,
|
|
430
|
+
):
|
|
441
431
|
# Read in 1MB chunks
|
|
442
432
|
for zchunk in iter(lambda: extracted.read(1024 * 1024), b""):
|
|
443
433
|
output_file.write(zchunk)
|
|
@@ -530,8 +520,9 @@ class AwsDownload(Download):
|
|
|
530
520
|
def _get_bucket_names_and_prefixes(
|
|
531
521
|
self,
|
|
532
522
|
product: EOProduct,
|
|
533
|
-
asset_filter: Optional[str]
|
|
534
|
-
ignore_assets:
|
|
523
|
+
asset_filter: Optional[str],
|
|
524
|
+
ignore_assets: bool,
|
|
525
|
+
complementary_url_keys: list[str],
|
|
535
526
|
) -> list[tuple[str, Optional[str]]]:
|
|
536
527
|
"""
|
|
537
528
|
Retrieves the bucket names and path prefixes for the assets
|
|
@@ -570,13 +561,27 @@ class AwsDownload(Download):
|
|
|
570
561
|
bucket_names_and_prefixes = [
|
|
571
562
|
self.get_product_bucket_name_and_prefix(product)
|
|
572
563
|
]
|
|
564
|
+
|
|
565
|
+
# add complementary urls
|
|
566
|
+
try:
|
|
567
|
+
for complementary_url_key in complementary_url_keys or []:
|
|
568
|
+
bucket_names_and_prefixes.append(
|
|
569
|
+
self.get_product_bucket_name_and_prefix(
|
|
570
|
+
product, product.properties[complementary_url_key]
|
|
571
|
+
)
|
|
572
|
+
)
|
|
573
|
+
except KeyError:
|
|
574
|
+
logger.warning(
|
|
575
|
+
"complementary_url_key %s is missing in %s properties"
|
|
576
|
+
% (complementary_url_key, product.properties["id"])
|
|
577
|
+
)
|
|
573
578
|
return bucket_names_and_prefixes
|
|
574
579
|
|
|
575
580
|
def _do_authentication(
|
|
576
581
|
self,
|
|
577
582
|
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
578
583
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
579
|
-
) -> tuple[dict[str, Any],
|
|
584
|
+
) -> tuple[dict[str, Any], BucketObjectsCollection]:
|
|
580
585
|
"""
|
|
581
586
|
Authenticates with s3 and retrieves the available objects
|
|
582
587
|
|
|
@@ -705,152 +710,93 @@ class AwsDownload(Download):
|
|
|
705
710
|
self,
|
|
706
711
|
product: EOProduct,
|
|
707
712
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
708
|
-
|
|
713
|
+
byte_range: tuple[Optional[int], Optional[int]] = (None, None),
|
|
714
|
+
compress: Literal["zip", "raw", "auto"] = "auto",
|
|
709
715
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
710
716
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
711
717
|
**kwargs: Unpack[DownloadConf],
|
|
712
718
|
) -> StreamResponse:
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
719
|
+
"""
|
|
720
|
+
Stream EO product data as a FastAPI-compatible `StreamResponse`, with support for partial downloads,
|
|
721
|
+
asset filtering, and on-the-fly compression.
|
|
716
722
|
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
723
|
+
This method streams data from one or more S3 objects that belong to a given EO product.
|
|
724
|
+
It supports:
|
|
725
|
+
|
|
726
|
+
- **Regex-based asset filtering** via `asset`, allowing partial product downloads.
|
|
727
|
+
- **Byte-range requests** through the `byte_range` parameter, enabling partial download of data.
|
|
728
|
+
- **Selective file extraction from ZIP archives**, for uncompressed entries (ZIP method: STORE only).
|
|
729
|
+
This enables lazy access to individual files inside ZIPs without downloading the entire archive.
|
|
730
|
+
|
|
731
|
+
Data is downloaded from S3 in parallel using HTTP range requests, which improves speed by downloading
|
|
732
|
+
chunks concurrently using multiple concurrent **range requests**.
|
|
733
|
+
|
|
734
|
+
#### Compression Behavior (`compress` parameter):
|
|
735
|
+
|
|
736
|
+
- `"raw"`:
|
|
737
|
+
- If there is only one file: returns a raw stream of that file.
|
|
738
|
+
- For multiple files, streams them sequentially using an HTTP multipart/mixed response with proper MIME
|
|
739
|
+
boundaries and per-file headers, allowing clients to parse each file independently.
|
|
740
|
+
|
|
741
|
+
- `"auto"` (default):
|
|
742
|
+
- Streams a single file as raw.
|
|
743
|
+
- Streams multiple files as a ZIP archive.
|
|
744
|
+
|
|
745
|
+
- `"zip"`:
|
|
746
|
+
- Always returns a ZIP archive, whether one or many files are included.
|
|
747
|
+
|
|
748
|
+
#### SAFE Archive Support:
|
|
749
|
+
|
|
750
|
+
If the product type supports SAFE structure and no `asset_regex` is specified (i.e., full product download),
|
|
751
|
+
the method attempts to reconstruct a valid SAFE archive layout in the streamed output.
|
|
752
|
+
|
|
753
|
+
:param product: The EO product to download.
|
|
754
|
+
:param asset: (optional) Regex pattern to filter which assets/files to include.
|
|
755
|
+
:param auth: (optional) Authentication configuration (e.g., AWS credentials).
|
|
756
|
+
:param byte_range: Tuple of (start, end) for a global byte range request. Either can be None for open-ended
|
|
757
|
+
ranges.
|
|
758
|
+
:param compress: One of "zip", "raw", or "auto". Controls how output is compressed:
|
|
759
|
+
- "raw": single file is streamed directly; multiple files use a custom separator.
|
|
760
|
+
- "auto": raw for single file, zipped for multiple.
|
|
761
|
+
- "zip": always returns a ZIP archive.
|
|
762
|
+
:returns: A `StreamResponse` object containing the streamed download and appropriate headers.
|
|
728
763
|
"""
|
|
729
|
-
|
|
730
|
-
logger.info(
|
|
731
|
-
"Progress bar unavailable, please call product.download() instead of plugin.download()"
|
|
732
|
-
)
|
|
733
|
-
progress_callback = ProgressCallback(disable=True)
|
|
764
|
+
asset_regex = kwargs.get("asset")
|
|
734
765
|
|
|
735
766
|
product_conf = getattr(self.config, "products", {}).get(
|
|
736
767
|
product.product_type, {}
|
|
737
768
|
)
|
|
738
|
-
# do not try to build SAFE if asset filter is used
|
|
739
|
-
asset_filter = kwargs.get("asset")
|
|
740
|
-
if asset_filter:
|
|
741
|
-
build_safe = False
|
|
742
|
-
else:
|
|
743
|
-
build_safe = product_conf.get("build_safe", False)
|
|
744
769
|
|
|
770
|
+
build_safe = (
|
|
771
|
+
False if asset_regex is not None else product_conf.get("build_safe", False)
|
|
772
|
+
)
|
|
745
773
|
ignore_assets = getattr(self.config, "ignore_assets", False)
|
|
746
774
|
|
|
747
|
-
# xtra metadata needed for SAFE product
|
|
748
775
|
self._configure_safe_build(build_safe, product)
|
|
749
|
-
|
|
776
|
+
|
|
750
777
|
bucket_names_and_prefixes = self._get_bucket_names_and_prefixes(
|
|
751
|
-
product,
|
|
778
|
+
product,
|
|
779
|
+
asset_regex,
|
|
780
|
+
ignore_assets,
|
|
781
|
+
product_conf.get("complementary_url_key", []),
|
|
752
782
|
)
|
|
753
783
|
|
|
754
|
-
# add complementary urls
|
|
755
|
-
try:
|
|
756
|
-
for complementary_url_key in product_conf.get("complementary_url_key", []):
|
|
757
|
-
bucket_names_and_prefixes.append(
|
|
758
|
-
self.get_product_bucket_name_and_prefix(
|
|
759
|
-
product, product.properties[complementary_url_key]
|
|
760
|
-
)
|
|
761
|
-
)
|
|
762
|
-
except KeyError:
|
|
763
|
-
logger.warning(
|
|
764
|
-
"complementary_url_key %s is missing in %s properties"
|
|
765
|
-
% (complementary_url_key, product.properties["id"])
|
|
766
|
-
)
|
|
767
|
-
|
|
768
784
|
# authenticate
|
|
769
|
-
authenticated_objects,
|
|
785
|
+
authenticated_objects, _ = self._do_authentication(
|
|
770
786
|
bucket_names_and_prefixes, auth
|
|
771
787
|
)
|
|
772
788
|
|
|
773
|
-
# stream not implemented for prefixes like `foo/bar.zip!file.txt`
|
|
774
|
-
for _, prefix in bucket_names_and_prefixes:
|
|
775
|
-
if prefix and ".zip!" in prefix:
|
|
776
|
-
raise NotImplementedError(
|
|
777
|
-
"Download streaming is not implemented for files in zip on S3"
|
|
778
|
-
)
|
|
779
|
-
|
|
780
789
|
# downloadable files
|
|
781
|
-
|
|
790
|
+
product_objects = self._get_unique_products(
|
|
782
791
|
bucket_names_and_prefixes,
|
|
783
792
|
authenticated_objects,
|
|
784
|
-
|
|
793
|
+
asset_regex,
|
|
785
794
|
ignore_assets,
|
|
786
795
|
product,
|
|
787
796
|
)
|
|
788
|
-
assets_values = product.assets.get_values(asset_filter)
|
|
789
|
-
chunks_tuples = self._stream_download(
|
|
790
|
-
unique_product_chunks, product, build_safe, progress_callback, assets_values
|
|
791
|
-
)
|
|
792
|
-
outputs_filename = (
|
|
793
|
-
sanitize(product.properties["title"])
|
|
794
|
-
if "title" in product.properties
|
|
795
|
-
else sanitize(product.properties.get("id", "download"))
|
|
796
|
-
)
|
|
797
797
|
|
|
798
|
-
if
|
|
799
|
-
|
|
800
|
-
# update headers
|
|
801
|
-
filename = os.path.basename(list(unique_product_chunks)[0].key)
|
|
802
|
-
headers = {"content-disposition": f"attachment; filename={filename}"}
|
|
803
|
-
if assets_values and assets_values[0].get("type"):
|
|
804
|
-
headers["content-type"] = assets_values[0]["type"]
|
|
805
|
-
|
|
806
|
-
return StreamResponse(
|
|
807
|
-
content=chain(iter([first_chunks_tuple]), chunks_tuples),
|
|
808
|
-
headers=headers,
|
|
809
|
-
)
|
|
810
|
-
return StreamResponse(
|
|
811
|
-
content=stream_zip(chunks_tuples),
|
|
812
|
-
media_type="application/zip",
|
|
813
|
-
headers={
|
|
814
|
-
"content-disposition": f"attachment; filename={outputs_filename}.zip",
|
|
815
|
-
},
|
|
816
|
-
)
|
|
817
|
-
|
|
818
|
-
def _stream_download(
|
|
819
|
-
self,
|
|
820
|
-
unique_product_chunks: set[Any],
|
|
821
|
-
product: EOProduct,
|
|
822
|
-
build_safe: bool,
|
|
823
|
-
progress_callback: ProgressCallback,
|
|
824
|
-
assets_values: list[dict[str, Any]],
|
|
825
|
-
) -> Iterator[Any]:
|
|
826
|
-
"""Yield product data chunks"""
|
|
827
|
-
|
|
828
|
-
chunk_size = 4096 * 1024
|
|
829
|
-
modified_at = datetime.now()
|
|
830
|
-
perms = 0o600
|
|
831
|
-
|
|
832
|
-
def get_chunk_parts(
|
|
833
|
-
product_chunk: Any, progress_callback: ProgressCallback
|
|
834
|
-
) -> Any:
|
|
835
|
-
try:
|
|
836
|
-
chunk_start = 0
|
|
837
|
-
chunk_end = chunk_start + chunk_size - 1
|
|
838
|
-
|
|
839
|
-
while chunk_start <= product_chunk.size:
|
|
840
|
-
get_kwargs = (
|
|
841
|
-
dict(RequestPayer="requester") if self.requester_pays else {}
|
|
842
|
-
)
|
|
843
|
-
chunk_part = product_chunk.get(
|
|
844
|
-
Range=f"bytes={chunk_start}-{chunk_end}", **get_kwargs
|
|
845
|
-
)["Body"].read()
|
|
846
|
-
progress_callback(len(chunk_part))
|
|
847
|
-
chunk_start += chunk_size
|
|
848
|
-
chunk_end += chunk_size
|
|
849
|
-
yield chunk_part
|
|
850
|
-
|
|
851
|
-
except ClientError as e:
|
|
852
|
-
self._raise_if_auth_error(e)
|
|
853
|
-
raise DownloadError("Unexpected error: %s" % e) from e
|
|
798
|
+
if self.s3_resource is None:
|
|
799
|
+
raise EodagError("Cannot check files in s3 zip without s3 resource")
|
|
854
800
|
|
|
855
801
|
product_conf = getattr(self.config, "products", {}).get(
|
|
856
802
|
product.product_type, {}
|
|
@@ -858,38 +804,58 @@ class AwsDownload(Download):
|
|
|
858
804
|
flatten_top_dirs = product_conf.get(
|
|
859
805
|
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
860
806
|
)
|
|
861
|
-
common_path =
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
807
|
+
common_path = (
|
|
808
|
+
self._get_commonpath(product, product_objects, build_safe)
|
|
809
|
+
if flatten_top_dirs
|
|
810
|
+
else ""
|
|
811
|
+
)
|
|
812
|
+
if len(product_objects) == 1:
|
|
813
|
+
common_path = os.path.dirname(common_path)
|
|
814
|
+
|
|
815
|
+
assets_by_path = {
|
|
816
|
+
a.get("href", "").split("s3://")[-1]: a
|
|
817
|
+
for a in product.assets.get_values(asset_filter=asset_regex or "")
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
files_info = []
|
|
821
|
+
for obj in product_objects:
|
|
867
822
|
try:
|
|
868
|
-
|
|
869
|
-
product,
|
|
870
|
-
product_chunk,
|
|
871
|
-
build_safe=build_safe,
|
|
872
|
-
)
|
|
823
|
+
rel_path = self.get_chunk_dest_path(product, obj, build_safe=build_safe)
|
|
873
824
|
if flatten_top_dirs:
|
|
874
|
-
|
|
825
|
+
rel_path = os.path.join(
|
|
875
826
|
product.properties["title"],
|
|
876
|
-
re.sub(rf"^{common_path}/?", "",
|
|
827
|
+
re.sub(rf"^{common_path}/?", "", rel_path),
|
|
877
828
|
)
|
|
878
829
|
|
|
830
|
+
data_type = assets_by_path.get(f"{obj.bucket_name}/{obj.key}", {}).get(
|
|
831
|
+
"type"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
file_info = S3FileInfo(
|
|
835
|
+
key=obj.key,
|
|
836
|
+
size=obj.size,
|
|
837
|
+
bucket_name=obj.bucket_name,
|
|
838
|
+
rel_path=rel_path,
|
|
839
|
+
)
|
|
840
|
+
if data_type:
|
|
841
|
+
file_info.data_type = data_type
|
|
842
|
+
|
|
843
|
+
files_info.append(file_info)
|
|
879
844
|
except NotAvailableError as e:
|
|
880
|
-
# out of SAFE format chunk
|
|
881
845
|
logger.warning(e)
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
846
|
+
|
|
847
|
+
title = product.properties.get("title") or product.properties.get(
|
|
848
|
+
"id", "download"
|
|
849
|
+
)
|
|
850
|
+
zip_filename = sanitize(title)
|
|
851
|
+
|
|
852
|
+
return stream_download_from_s3(
|
|
853
|
+
cast("S3Client", self.s3_resource.meta.client),
|
|
854
|
+
files_info,
|
|
855
|
+
byte_range,
|
|
856
|
+
compress,
|
|
857
|
+
zip_filename,
|
|
858
|
+
)
|
|
893
859
|
|
|
894
860
|
def _get_commonpath(
|
|
895
861
|
self, product: EOProduct, product_chunks: set[Any], build_safe: bool
|
|
@@ -931,7 +897,7 @@ class AwsDownload(Download):
|
|
|
931
897
|
|
|
932
898
|
def get_authenticated_objects(
|
|
933
899
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
934
|
-
) ->
|
|
900
|
+
) -> BucketObjectsCollection:
|
|
935
901
|
"""Get boto3 authenticated objects for the given bucket using
|
|
936
902
|
the most adapted auth strategy.
|
|
937
903
|
Also expose ``s3_session`` as class variable if available.
|
|
@@ -943,7 +909,7 @@ class AwsDownload(Download):
|
|
|
943
909
|
:returns: The boto3 authenticated objects
|
|
944
910
|
"""
|
|
945
911
|
auth_methods: list[
|
|
946
|
-
Callable[[str, str, S3SessionKwargs], Optional[
|
|
912
|
+
Callable[[str, str, S3SessionKwargs], Optional[BucketObjectsCollection]]
|
|
947
913
|
] = [
|
|
948
914
|
self._get_authenticated_objects_unsigned,
|
|
949
915
|
self._get_authenticated_objects_from_auth_profile,
|
|
@@ -979,7 +945,7 @@ class AwsDownload(Download):
|
|
|
979
945
|
|
|
980
946
|
def _get_authenticated_objects_unsigned(
|
|
981
947
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
982
|
-
) -> Optional[
|
|
948
|
+
) -> Optional[BucketObjectsCollection]:
|
|
983
949
|
"""Auth strategy using no-sign-request"""
|
|
984
950
|
|
|
985
951
|
s3_resource = boto3.resource(
|
|
@@ -990,11 +956,12 @@ class AwsDownload(Download):
|
|
|
990
956
|
)
|
|
991
957
|
objects = s3_resource.Bucket(bucket_name).objects
|
|
992
958
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
959
|
+
self.s3_resource = s3_resource
|
|
993
960
|
return objects
|
|
994
961
|
|
|
995
962
|
def _get_authenticated_objects_from_auth_profile(
|
|
996
963
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
997
|
-
) -> Optional[
|
|
964
|
+
) -> Optional[BucketObjectsCollection]:
|
|
998
965
|
"""Auth strategy using RequestPayer=requester and ``aws_profile`` from provided credentials"""
|
|
999
966
|
|
|
1000
967
|
if "profile_name" in auth_dict.keys():
|
|
@@ -1018,7 +985,7 @@ class AwsDownload(Download):
|
|
|
1018
985
|
|
|
1019
986
|
def _get_authenticated_objects_from_auth_keys(
|
|
1020
987
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
1021
|
-
) -> Optional[
|
|
988
|
+
) -> Optional[BucketObjectsCollection]:
|
|
1022
989
|
"""Auth strategy using RequestPayer=requester and ``aws_access_key_id``/``aws_secret_access_key``
|
|
1023
990
|
from provided credentials"""
|
|
1024
991
|
|
|
@@ -1049,7 +1016,7 @@ class AwsDownload(Download):
|
|
|
1049
1016
|
|
|
1050
1017
|
def _get_authenticated_objects_from_env(
|
|
1051
1018
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
1052
|
-
) -> Optional[
|
|
1019
|
+
) -> Optional[BucketObjectsCollection]:
|
|
1053
1020
|
"""Auth strategy using RequestPayer=requester and current environment"""
|
|
1054
1021
|
|
|
1055
1022
|
s3_session = boto3.session.Session()
|
eodag/plugins/download/base.py
CHANGED
|
@@ -27,7 +27,7 @@ import zipfile
|
|
|
27
27
|
from datetime import datetime, timedelta
|
|
28
28
|
from pathlib import Path
|
|
29
29
|
from time import sleep
|
|
30
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
|
|
31
31
|
|
|
32
32
|
from eodag.api.product.metadata_mapping import ONLINE_STATUS
|
|
33
33
|
from eodag.plugins.base import PluginTopic
|
|
@@ -134,7 +134,8 @@ class Download(PluginTopic):
|
|
|
134
134
|
self,
|
|
135
135
|
product: EOProduct,
|
|
136
136
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
137
|
-
|
|
137
|
+
byte_range: tuple[Optional[int], Optional[int]] = (None, None),
|
|
138
|
+
compress: Literal["zip", "raw", "auto"] = "auto",
|
|
138
139
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
139
140
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
140
141
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
16
|
# See the License for the specific language governing permissions and
|
|
17
17
|
# limitations under the License.
|
|
18
|
-
from typing import Optional
|
|
18
|
+
from typing import TYPE_CHECKING, Optional
|
|
19
19
|
|
|
20
20
|
import boto3
|
|
21
21
|
from botocore.exceptions import ClientError
|
|
@@ -24,6 +24,9 @@ from eodag import EOProduct
|
|
|
24
24
|
from eodag.plugins.download.aws import AwsDownload
|
|
25
25
|
from eodag.utils.exceptions import MisconfiguredError
|
|
26
26
|
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from mypy_boto3_s3.service_resource import S3ServiceResource
|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
class CreodiasS3Download(AwsDownload):
|
|
29
32
|
"""
|
|
@@ -60,19 +63,21 @@ class CreodiasS3Download(AwsDownload):
|
|
|
60
63
|
)
|
|
61
64
|
|
|
62
65
|
s3_session = boto3.session.Session(**auth_dict)
|
|
63
|
-
s3_resource = s3_session.resource(
|
|
66
|
+
s3_resource: S3ServiceResource = s3_session.resource(
|
|
64
67
|
"s3", endpoint_url=getattr(self.config, "s3_endpoint", None)
|
|
65
68
|
)
|
|
66
69
|
objects = s3_resource.Bucket(bucket_name).objects.filter()
|
|
67
70
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
68
71
|
self.s3_session = s3_session
|
|
72
|
+
self.s3_resource = s3_resource
|
|
69
73
|
return objects
|
|
70
74
|
|
|
71
75
|
def _get_bucket_names_and_prefixes(
|
|
72
76
|
self,
|
|
73
77
|
product: EOProduct,
|
|
74
|
-
asset_filter: Optional[str]
|
|
75
|
-
ignore_assets:
|
|
78
|
+
asset_filter: Optional[str],
|
|
79
|
+
ignore_assets: bool,
|
|
80
|
+
complementary_url_keys: list[str],
|
|
76
81
|
) -> list[tuple[str, Optional[str]]]:
|
|
77
82
|
"""
|
|
78
83
|
Retrieves the bucket names and path prefixes for the assets
|
|
@@ -85,7 +90,7 @@ class CreodiasS3Download(AwsDownload):
|
|
|
85
90
|
# if assets are defined, use them instead of scanning product.location
|
|
86
91
|
if len(product.assets) > 0 and not ignore_assets:
|
|
87
92
|
bucket_names_and_prefixes = super()._get_bucket_names_and_prefixes(
|
|
88
|
-
product, asset_filter, ignore_assets
|
|
93
|
+
product, asset_filter, ignore_assets, complementary_url_keys
|
|
89
94
|
)
|
|
90
95
|
else:
|
|
91
96
|
# if no assets are given, use productIdentifier to get S3 path for download
|
eodag/plugins/download/http.py
CHANGED
|
@@ -28,7 +28,16 @@ from email.message import Message
|
|
|
28
28
|
from itertools import chain
|
|
29
29
|
from json import JSONDecodeError
|
|
30
30
|
from pathlib import Path
|
|
31
|
-
from typing import
|
|
31
|
+
from typing import (
|
|
32
|
+
TYPE_CHECKING,
|
|
33
|
+
Any,
|
|
34
|
+
Iterator,
|
|
35
|
+
Literal,
|
|
36
|
+
Optional,
|
|
37
|
+
TypedDict,
|
|
38
|
+
Union,
|
|
39
|
+
cast,
|
|
40
|
+
)
|
|
32
41
|
from urllib.parse import parse_qs, urlparse
|
|
33
42
|
|
|
34
43
|
import geojson
|
|
@@ -745,7 +754,8 @@ class HTTPDownload(Download):
|
|
|
745
754
|
self,
|
|
746
755
|
product: EOProduct,
|
|
747
756
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
748
|
-
|
|
757
|
+
byte_range: tuple[Optional[int], Optional[int]] = (None, None),
|
|
758
|
+
compress: Literal["zip", "raw", "auto"] = "auto",
|
|
749
759
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
750
760
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
751
761
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -779,7 +789,7 @@ class HTTPDownload(Download):
|
|
|
779
789
|
chunks_tuples = self._stream_download_assets(
|
|
780
790
|
product,
|
|
781
791
|
auth,
|
|
782
|
-
|
|
792
|
+
None,
|
|
783
793
|
assets_values=assets_values,
|
|
784
794
|
**kwargs,
|
|
785
795
|
)
|
|
@@ -825,9 +835,7 @@ class HTTPDownload(Download):
|
|
|
825
835
|
else:
|
|
826
836
|
pass
|
|
827
837
|
|
|
828
|
-
chunk_iterator = self._stream_download(
|
|
829
|
-
product, auth, progress_callback, **kwargs
|
|
830
|
-
)
|
|
838
|
+
chunk_iterator = self._stream_download(product, auth, None, **kwargs)
|
|
831
839
|
|
|
832
840
|
# start reading chunks to set product.headers
|
|
833
841
|
try:
|
eodag/plugins/download/s3rest.py
CHANGED
|
@@ -21,6 +21,7 @@ import logging
|
|
|
21
21
|
import os
|
|
22
22
|
import os.path
|
|
23
23
|
from typing import TYPE_CHECKING, Optional, Union
|
|
24
|
+
from urllib.parse import unquote, urljoin
|
|
24
25
|
from xml.dom import minidom
|
|
25
26
|
from xml.parsers.expat import ExpatError
|
|
26
27
|
|
|
@@ -38,10 +39,9 @@ from eodag.utils import (
|
|
|
38
39
|
HTTP_REQ_TIMEOUT,
|
|
39
40
|
USER_AGENT,
|
|
40
41
|
ProgressCallback,
|
|
42
|
+
_deprecated,
|
|
41
43
|
get_bucket_name_and_prefix,
|
|
42
44
|
path_to_uri,
|
|
43
|
-
unquote,
|
|
44
|
-
urljoin,
|
|
45
45
|
)
|
|
46
46
|
from eodag.utils.exceptions import (
|
|
47
47
|
AuthenticationError,
|
|
@@ -61,6 +61,10 @@ if TYPE_CHECKING:
|
|
|
61
61
|
logger = logging.getLogger("eodag.download.s3rest")
|
|
62
62
|
|
|
63
63
|
|
|
64
|
+
@_deprecated(
|
|
65
|
+
reason="Plugin that was used in previous mundi provider configuration, but not used anymore",
|
|
66
|
+
version="3.7.1",
|
|
67
|
+
)
|
|
64
68
|
class S3RestDownload(Download):
|
|
65
69
|
"""Http download on S3-like object storage location
|
|
66
70
|
|
|
@@ -88,7 +92,7 @@ class S3RestDownload(Download):
|
|
|
88
92
|
"""
|
|
89
93
|
|
|
90
94
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
91
|
-
super(
|
|
95
|
+
super().__init__(provider, config)
|
|
92
96
|
self.http_download_plugin = HTTPDownload(self.provider, self.config)
|
|
93
97
|
|
|
94
98
|
def download(
|
eodag/plugins/manager.py
CHANGED
|
@@ -109,7 +109,7 @@ class PluginManager:
|
|
|
109
109
|
logger.warning("Reason:\n%s", tb.format_exc())
|
|
110
110
|
logger.warning(
|
|
111
111
|
"Check that the plugin module (%s) is importable",
|
|
112
|
-
entry_point.
|
|
112
|
+
entry_point.name,
|
|
113
113
|
)
|
|
114
114
|
if entry_point.dist and entry_point.dist.name != "eodag":
|
|
115
115
|
# use plugin providers if any
|