eodag 3.6.0__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +0 -14
- eodag/api/product/metadata_mapping.py +20 -3
- eodag/cli.py +6 -3
- eodag/config.py +5 -0
- eodag/plugins/authentication/openid_connect.py +1 -2
- eodag/plugins/download/aws.py +145 -178
- eodag/plugins/download/base.py +3 -2
- eodag/plugins/download/creodias_s3.py +10 -5
- eodag/plugins/download/http.py +14 -6
- eodag/plugins/download/s3rest.py +1 -2
- eodag/plugins/manager.py +1 -1
- eodag/plugins/search/base.py +34 -4
- eodag/plugins/search/build_search_result.py +3 -0
- eodag/plugins/search/cop_marine.py +2 -0
- eodag/plugins/search/qssearch.py +44 -25
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +30 -153
- eodag/resources/providers.yml +48 -325
- eodag/resources/stac.yml +1 -2
- eodag/resources/user_conf_template.yml +0 -11
- eodag/rest/core.py +5 -9
- eodag/utils/__init__.py +41 -27
- eodag/utils/exceptions.py +4 -0
- eodag/utils/s3.py +605 -65
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/METADATA +7 -8
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/RECORD +30 -30
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/WHEEL +0 -0
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/entry_points.txt +0 -0
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/licenses/LICENSE +0 -0
- {eodag-3.6.0.dist-info → eodag-3.7.0.dist-info}/top_level.txt +0 -0
eodag/api/core.py
CHANGED
|
@@ -2007,20 +2007,6 @@ class EODataAccessGateway:
|
|
|
2007
2007
|
nb_res,
|
|
2008
2008
|
search_plugin.provider,
|
|
2009
2009
|
)
|
|
2010
|
-
# Hitting for instance
|
|
2011
|
-
# https://theia.cnes.fr/atdistrib/resto2/api/collections/SENTINEL2/
|
|
2012
|
-
# search.json?startDate=2019-03-01&completionDate=2019-06-15
|
|
2013
|
-
# &processingLevel=LEVEL2A&maxRecords=1&page=1
|
|
2014
|
-
# returns a number (properties.totalResults) that is the number of
|
|
2015
|
-
# products in the collection (here SENTINEL2) instead of the estimated
|
|
2016
|
-
# total number of products matching the search criteria (start/end date).
|
|
2017
|
-
# Remove this warning when this is fixed upstream by THEIA.
|
|
2018
|
-
if search_plugin.provider == "theia":
|
|
2019
|
-
logger.warning(
|
|
2020
|
-
"Results found on provider 'theia' is the total number of products "
|
|
2021
|
-
"available in the searched collection (e.g. SENTINEL2) instead of "
|
|
2022
|
-
"the total number of products matching the search criteria"
|
|
2023
|
-
)
|
|
2024
2010
|
except Exception as e:
|
|
2025
2011
|
if raise_errors:
|
|
2026
2012
|
# Raise the error, letting the application wrapping eodag know that
|
|
@@ -219,7 +219,7 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
|
|
|
219
219
|
elif value is not None:
|
|
220
220
|
converted = self.custom_converter(value)
|
|
221
221
|
else:
|
|
222
|
-
converted =
|
|
222
|
+
converted = None
|
|
223
223
|
# Clear this state variable in case the same converter is used to
|
|
224
224
|
# resolve other named arguments
|
|
225
225
|
self.custom_converter = None
|
|
@@ -374,6 +374,18 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
|
|
|
374
374
|
def convert_to_geojson(value: Any) -> str:
|
|
375
375
|
return geojson.dumps(value)
|
|
376
376
|
|
|
377
|
+
@staticmethod
|
|
378
|
+
def convert_to_geojson_polytope(
|
|
379
|
+
value: BaseGeometry,
|
|
380
|
+
) -> Union[dict[Any, Any], str]:
|
|
381
|
+
# ECMWF Polytope uses non-geojson structure for features
|
|
382
|
+
if isinstance(value, Polygon):
|
|
383
|
+
return {
|
|
384
|
+
"type": "polygon",
|
|
385
|
+
"shape": [[y, x] for x, y in value.exterior.coords],
|
|
386
|
+
}
|
|
387
|
+
raise ValidationError("to_geojson_polytope only accepts shapely Polygon")
|
|
388
|
+
|
|
377
389
|
@staticmethod
|
|
378
390
|
def convert_from_ewkt(ewkt_string: str) -> Union[BaseGeometry, str]:
|
|
379
391
|
"""Convert EWKT (Extended Well-Known text) to shapely geometry"""
|
|
@@ -488,10 +500,14 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
|
|
|
488
500
|
|
|
489
501
|
@staticmethod
|
|
490
502
|
def convert_get_group_name(string: str, pattern: str) -> str:
|
|
503
|
+
sanitized_pattern = pattern.replace(" ", "_SPACE_")
|
|
491
504
|
try:
|
|
492
|
-
match = re.search(
|
|
505
|
+
match = re.search(sanitized_pattern, str(string))
|
|
493
506
|
if match:
|
|
494
|
-
|
|
507
|
+
if result := match.lastgroup:
|
|
508
|
+
return result.replace("_SPACE_", " ")
|
|
509
|
+
else:
|
|
510
|
+
return NOT_AVAILABLE
|
|
495
511
|
except AttributeError:
|
|
496
512
|
pass
|
|
497
513
|
logger.warning(
|
|
@@ -1342,6 +1358,7 @@ def format_query_params(
|
|
|
1342
1358
|
formatted_query_param = remove_str_array_quotes(
|
|
1343
1359
|
formatted_query_param
|
|
1344
1360
|
)
|
|
1361
|
+
|
|
1345
1362
|
# json query string (for POST request)
|
|
1346
1363
|
update_nested_dict(
|
|
1347
1364
|
query_params,
|
eodag/cli.py
CHANGED
|
@@ -49,11 +49,12 @@ import sys
|
|
|
49
49
|
import textwrap
|
|
50
50
|
from importlib.metadata import metadata
|
|
51
51
|
from typing import TYPE_CHECKING, Any, Mapping
|
|
52
|
+
from urllib.parse import parse_qs
|
|
52
53
|
|
|
53
54
|
import click
|
|
54
55
|
|
|
55
56
|
from eodag.api.core import EODataAccessGateway, SearchResult
|
|
56
|
-
from eodag.utils import DEFAULT_ITEMS_PER_PAGE, DEFAULT_PAGE
|
|
57
|
+
from eodag.utils import DEFAULT_ITEMS_PER_PAGE, DEFAULT_PAGE
|
|
57
58
|
from eodag.utils.exceptions import NoMatchingProductType, UnsupportedProvider
|
|
58
59
|
from eodag.utils.logging import setup_logging
|
|
59
60
|
|
|
@@ -109,8 +110,9 @@ class MutuallyExclusiveOption(click.Option):
|
|
|
109
110
|
"""Raise error or use parent handle_parse_result()"""
|
|
110
111
|
if self.mutually_exclusive.intersection(opts) and self.name in opts:
|
|
111
112
|
raise click.UsageError(
|
|
112
|
-
"Illegal usage: `{}` is mutually exclusive with "
|
|
113
|
-
|
|
113
|
+
"Illegal usage: `{}` is mutually exclusive with arguments `{}`.".format(
|
|
114
|
+
self.name, ", ".join(self.mutually_exclusive)
|
|
115
|
+
)
|
|
114
116
|
)
|
|
115
117
|
|
|
116
118
|
return super(MutuallyExclusiveOption, self).handle_parse_result(ctx, opts, args)
|
|
@@ -687,6 +689,7 @@ def serve_rest(
|
|
|
687
689
|
setup_logging(verbose=ctx.obj["verbosity"])
|
|
688
690
|
try:
|
|
689
691
|
import uvicorn
|
|
692
|
+
import uvicorn.config
|
|
690
693
|
except ImportError:
|
|
691
694
|
raise ImportError(
|
|
692
695
|
"Feature not available, please install eodag[server] or eodag[all]"
|
eodag/config.py
CHANGED
|
@@ -452,6 +452,11 @@ class PluginConfig(yaml.YAMLObject):
|
|
|
452
452
|
discover_queryables: PluginConfig.DiscoverQueryables
|
|
453
453
|
#: :class:`~eodag.plugins.search.base.Search` The mapping between eodag metadata and the plugin specific metadata
|
|
454
454
|
metadata_mapping: dict[str, Union[str, list[str]]]
|
|
455
|
+
#: :class:`~eodag.plugins.search.base.Search` :attr:`~eodag.config.PluginConfig.metadata_mapping` got from the given
|
|
456
|
+
#: product type
|
|
457
|
+
metadata_mapping_from_product: str
|
|
458
|
+
#: :class:`~eodag.plugins.search.base.Search` A mapping for the metadata of individual assets
|
|
459
|
+
assets_mapping: dict[str, dict[str, Any]]
|
|
455
460
|
#: :class:`~eodag.plugins.search.base.Search` Parameters to remove from queryables
|
|
456
461
|
remove_from_queryables: list[str]
|
|
457
462
|
#: :class:`~eodag.plugins.search.base.Search` Parameters to be passed as is in the search url query string
|
|
@@ -23,6 +23,7 @@ import string
|
|
|
23
23
|
from datetime import datetime, timedelta, timezone
|
|
24
24
|
from random import SystemRandom
|
|
25
25
|
from typing import TYPE_CHECKING, Any, Optional
|
|
26
|
+
from urllib.parse import parse_qs, urlparse
|
|
26
27
|
|
|
27
28
|
import jwt
|
|
28
29
|
import requests
|
|
@@ -34,9 +35,7 @@ from eodag.utils import (
|
|
|
34
35
|
DEFAULT_TOKEN_EXPIRATION_MARGIN,
|
|
35
36
|
HTTP_REQ_TIMEOUT,
|
|
36
37
|
USER_AGENT,
|
|
37
|
-
parse_qs,
|
|
38
38
|
repeatfunc,
|
|
39
|
-
urlparse,
|
|
40
39
|
)
|
|
41
40
|
from eodag.utils.exceptions import (
|
|
42
41
|
AuthenticationError,
|
eodag/plugins/download/aws.py
CHANGED
|
@@ -20,10 +20,8 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
import re
|
|
23
|
-
from datetime import datetime
|
|
24
|
-
from itertools import chain
|
|
25
23
|
from pathlib import Path
|
|
26
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
|
|
27
25
|
|
|
28
26
|
import boto3
|
|
29
27
|
import requests
|
|
@@ -31,7 +29,6 @@ from botocore.exceptions import ClientError, ProfileNotFound
|
|
|
31
29
|
from botocore.handlers import disable_signing
|
|
32
30
|
from lxml import etree
|
|
33
31
|
from requests.auth import AuthBase
|
|
34
|
-
from stream_zip import ZIP_AUTO, stream_zip
|
|
35
32
|
|
|
36
33
|
from eodag.api.product.metadata_mapping import (
|
|
37
34
|
mtd_cfg_as_conversion_and_querypath,
|
|
@@ -55,15 +52,17 @@ from eodag.utils import (
|
|
|
55
52
|
from eodag.utils.exceptions import (
|
|
56
53
|
AuthenticationError,
|
|
57
54
|
DownloadError,
|
|
55
|
+
EodagError,
|
|
58
56
|
MisconfiguredError,
|
|
59
57
|
NoMatchingProductType,
|
|
60
58
|
NotAvailableError,
|
|
61
59
|
TimeOutError,
|
|
62
60
|
)
|
|
63
|
-
from eodag.utils.s3 import open_s3_zipped_object
|
|
61
|
+
from eodag.utils.s3 import S3FileInfo, open_s3_zipped_object, stream_download_from_s3
|
|
64
62
|
|
|
65
63
|
if TYPE_CHECKING:
|
|
66
|
-
from
|
|
64
|
+
from mypy_boto3_s3.client import S3Client
|
|
65
|
+
from mypy_boto3_s3.service_resource import BucketObjectsCollection
|
|
67
66
|
|
|
68
67
|
from eodag.api.product import EOProduct
|
|
69
68
|
from eodag.api.search_result import SearchResult
|
|
@@ -306,23 +305,12 @@ class AwsDownload(Download):
|
|
|
306
305
|
self._configure_safe_build(build_safe, product)
|
|
307
306
|
# bucket names and prefixes
|
|
308
307
|
bucket_names_and_prefixes = self._get_bucket_names_and_prefixes(
|
|
309
|
-
product,
|
|
308
|
+
product,
|
|
309
|
+
asset_filter,
|
|
310
|
+
ignore_assets,
|
|
311
|
+
product_conf.get("complementary_url_key", []),
|
|
310
312
|
)
|
|
311
313
|
|
|
312
|
-
# add complementary urls
|
|
313
|
-
try:
|
|
314
|
-
for complementary_url_key in product_conf.get("complementary_url_key", []):
|
|
315
|
-
bucket_names_and_prefixes.append(
|
|
316
|
-
self.get_product_bucket_name_and_prefix(
|
|
317
|
-
product, product.properties[complementary_url_key]
|
|
318
|
-
)
|
|
319
|
-
)
|
|
320
|
-
except KeyError:
|
|
321
|
-
logger.warning(
|
|
322
|
-
"complementary_url_key %s is missing in %s properties"
|
|
323
|
-
% (complementary_url_key, product.properties["id"])
|
|
324
|
-
)
|
|
325
|
-
|
|
326
314
|
# authenticate
|
|
327
315
|
authenticated_objects, s3_objects = self._do_authentication(
|
|
328
316
|
bucket_names_and_prefixes, auth
|
|
@@ -429,15 +417,17 @@ class AwsDownload(Download):
|
|
|
429
417
|
if not os.path.isdir(dest_abs_path_dir):
|
|
430
418
|
os.makedirs(dest_abs_path_dir)
|
|
431
419
|
|
|
432
|
-
|
|
420
|
+
zip_file, _ = open_s3_zipped_object(
|
|
433
421
|
bucket_name, zip_prefix, s3_client, partial=False
|
|
434
|
-
)
|
|
422
|
+
)
|
|
423
|
+
with zip_file:
|
|
435
424
|
# file size
|
|
436
425
|
file_info = zip_file.getinfo(rel_path)
|
|
437
426
|
progress_callback.reset(total=file_info.file_size)
|
|
438
|
-
with
|
|
439
|
-
|
|
440
|
-
|
|
427
|
+
with (
|
|
428
|
+
zip_file.open(rel_path) as extracted,
|
|
429
|
+
open(dest_file, "wb") as output_file,
|
|
430
|
+
):
|
|
441
431
|
# Read in 1MB chunks
|
|
442
432
|
for zchunk in iter(lambda: extracted.read(1024 * 1024), b""):
|
|
443
433
|
output_file.write(zchunk)
|
|
@@ -530,8 +520,9 @@ class AwsDownload(Download):
|
|
|
530
520
|
def _get_bucket_names_and_prefixes(
|
|
531
521
|
self,
|
|
532
522
|
product: EOProduct,
|
|
533
|
-
asset_filter: Optional[str]
|
|
534
|
-
ignore_assets:
|
|
523
|
+
asset_filter: Optional[str],
|
|
524
|
+
ignore_assets: bool,
|
|
525
|
+
complementary_url_keys: list[str],
|
|
535
526
|
) -> list[tuple[str, Optional[str]]]:
|
|
536
527
|
"""
|
|
537
528
|
Retrieves the bucket names and path prefixes for the assets
|
|
@@ -570,13 +561,27 @@ class AwsDownload(Download):
|
|
|
570
561
|
bucket_names_and_prefixes = [
|
|
571
562
|
self.get_product_bucket_name_and_prefix(product)
|
|
572
563
|
]
|
|
564
|
+
|
|
565
|
+
# add complementary urls
|
|
566
|
+
try:
|
|
567
|
+
for complementary_url_key in complementary_url_keys or []:
|
|
568
|
+
bucket_names_and_prefixes.append(
|
|
569
|
+
self.get_product_bucket_name_and_prefix(
|
|
570
|
+
product, product.properties[complementary_url_key]
|
|
571
|
+
)
|
|
572
|
+
)
|
|
573
|
+
except KeyError:
|
|
574
|
+
logger.warning(
|
|
575
|
+
"complementary_url_key %s is missing in %s properties"
|
|
576
|
+
% (complementary_url_key, product.properties["id"])
|
|
577
|
+
)
|
|
573
578
|
return bucket_names_and_prefixes
|
|
574
579
|
|
|
575
580
|
def _do_authentication(
|
|
576
581
|
self,
|
|
577
582
|
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
578
583
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
579
|
-
) -> tuple[dict[str, Any],
|
|
584
|
+
) -> tuple[dict[str, Any], BucketObjectsCollection]:
|
|
580
585
|
"""
|
|
581
586
|
Authenticates with s3 and retrieves the available objects
|
|
582
587
|
|
|
@@ -705,152 +710,93 @@ class AwsDownload(Download):
|
|
|
705
710
|
self,
|
|
706
711
|
product: EOProduct,
|
|
707
712
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
708
|
-
|
|
713
|
+
byte_range: tuple[Optional[int], Optional[int]] = (None, None),
|
|
714
|
+
compress: Literal["zip", "raw", "auto"] = "auto",
|
|
709
715
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
710
716
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
711
717
|
**kwargs: Unpack[DownloadConf],
|
|
712
718
|
) -> StreamResponse:
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
719
|
+
"""
|
|
720
|
+
Stream EO product data as a FastAPI-compatible `StreamResponse`, with support for partial downloads,
|
|
721
|
+
asset filtering, and on-the-fly compression.
|
|
716
722
|
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
723
|
+
This method streams data from one or more S3 objects that belong to a given EO product.
|
|
724
|
+
It supports:
|
|
725
|
+
|
|
726
|
+
- **Regex-based asset filtering** via `asset`, allowing partial product downloads.
|
|
727
|
+
- **Byte-range requests** through the `byte_range` parameter, enabling partial download of data.
|
|
728
|
+
- **Selective file extraction from ZIP archives**, for uncompressed entries (ZIP method: STORE only).
|
|
729
|
+
This enables lazy access to individual files inside ZIPs without downloading the entire archive.
|
|
730
|
+
|
|
731
|
+
Data is downloaded from S3 in parallel using HTTP range requests, which improves speed by downloading
|
|
732
|
+
chunks concurrently using multiple concurrent **range requests**.
|
|
733
|
+
|
|
734
|
+
#### Compression Behavior (`compress` parameter):
|
|
735
|
+
|
|
736
|
+
- `"raw"`:
|
|
737
|
+
- If there is only one file: returns a raw stream of that file.
|
|
738
|
+
- For multiple files, streams them sequentially using an HTTP multipart/mixed response with proper MIME
|
|
739
|
+
boundaries and per-file headers, allowing clients to parse each file independently.
|
|
740
|
+
|
|
741
|
+
- `"auto"` (default):
|
|
742
|
+
- Streams a single file as raw.
|
|
743
|
+
- Streams multiple files as a ZIP archive.
|
|
744
|
+
|
|
745
|
+
- `"zip"`:
|
|
746
|
+
- Always returns a ZIP archive, whether one or many files are included.
|
|
747
|
+
|
|
748
|
+
#### SAFE Archive Support:
|
|
749
|
+
|
|
750
|
+
If the product type supports SAFE structure and no `asset_regex` is specified (i.e., full product download),
|
|
751
|
+
the method attempts to reconstruct a valid SAFE archive layout in the streamed output.
|
|
752
|
+
|
|
753
|
+
:param product: The EO product to download.
|
|
754
|
+
:param asset: (optional) Regex pattern to filter which assets/files to include.
|
|
755
|
+
:param auth: (optional) Authentication configuration (e.g., AWS credentials).
|
|
756
|
+
:param byte_range: Tuple of (start, end) for a global byte range request. Either can be None for open-ended
|
|
757
|
+
ranges.
|
|
758
|
+
:param compress: One of "zip", "raw", or "auto". Controls how output is compressed:
|
|
759
|
+
- "raw": single file is streamed directly; multiple files use a custom separator.
|
|
760
|
+
- "auto": raw for single file, zipped for multiple.
|
|
761
|
+
- "zip": always returns a ZIP archive.
|
|
762
|
+
:returns: A `StreamResponse` object containing the streamed download and appropriate headers.
|
|
728
763
|
"""
|
|
729
|
-
|
|
730
|
-
logger.info(
|
|
731
|
-
"Progress bar unavailable, please call product.download() instead of plugin.download()"
|
|
732
|
-
)
|
|
733
|
-
progress_callback = ProgressCallback(disable=True)
|
|
764
|
+
asset_regex = kwargs.get("asset")
|
|
734
765
|
|
|
735
766
|
product_conf = getattr(self.config, "products", {}).get(
|
|
736
767
|
product.product_type, {}
|
|
737
768
|
)
|
|
738
|
-
# do not try to build SAFE if asset filter is used
|
|
739
|
-
asset_filter = kwargs.get("asset")
|
|
740
|
-
if asset_filter:
|
|
741
|
-
build_safe = False
|
|
742
|
-
else:
|
|
743
|
-
build_safe = product_conf.get("build_safe", False)
|
|
744
769
|
|
|
770
|
+
build_safe = (
|
|
771
|
+
False if asset_regex is not None else product_conf.get("build_safe", False)
|
|
772
|
+
)
|
|
745
773
|
ignore_assets = getattr(self.config, "ignore_assets", False)
|
|
746
774
|
|
|
747
|
-
# xtra metadata needed for SAFE product
|
|
748
775
|
self._configure_safe_build(build_safe, product)
|
|
749
|
-
|
|
776
|
+
|
|
750
777
|
bucket_names_and_prefixes = self._get_bucket_names_and_prefixes(
|
|
751
|
-
product,
|
|
778
|
+
product,
|
|
779
|
+
asset_regex,
|
|
780
|
+
ignore_assets,
|
|
781
|
+
product_conf.get("complementary_url_key", []),
|
|
752
782
|
)
|
|
753
783
|
|
|
754
|
-
# add complementary urls
|
|
755
|
-
try:
|
|
756
|
-
for complementary_url_key in product_conf.get("complementary_url_key", []):
|
|
757
|
-
bucket_names_and_prefixes.append(
|
|
758
|
-
self.get_product_bucket_name_and_prefix(
|
|
759
|
-
product, product.properties[complementary_url_key]
|
|
760
|
-
)
|
|
761
|
-
)
|
|
762
|
-
except KeyError:
|
|
763
|
-
logger.warning(
|
|
764
|
-
"complementary_url_key %s is missing in %s properties"
|
|
765
|
-
% (complementary_url_key, product.properties["id"])
|
|
766
|
-
)
|
|
767
|
-
|
|
768
784
|
# authenticate
|
|
769
|
-
authenticated_objects,
|
|
785
|
+
authenticated_objects, _ = self._do_authentication(
|
|
770
786
|
bucket_names_and_prefixes, auth
|
|
771
787
|
)
|
|
772
788
|
|
|
773
|
-
# stream not implemented for prefixes like `foo/bar.zip!file.txt`
|
|
774
|
-
for _, prefix in bucket_names_and_prefixes:
|
|
775
|
-
if prefix and ".zip!" in prefix:
|
|
776
|
-
raise NotImplementedError(
|
|
777
|
-
"Download streaming is not implemented for files in zip on S3"
|
|
778
|
-
)
|
|
779
|
-
|
|
780
789
|
# downloadable files
|
|
781
|
-
|
|
790
|
+
product_objects = self._get_unique_products(
|
|
782
791
|
bucket_names_and_prefixes,
|
|
783
792
|
authenticated_objects,
|
|
784
|
-
|
|
793
|
+
asset_regex,
|
|
785
794
|
ignore_assets,
|
|
786
795
|
product,
|
|
787
796
|
)
|
|
788
|
-
assets_values = product.assets.get_values(asset_filter)
|
|
789
|
-
chunks_tuples = self._stream_download(
|
|
790
|
-
unique_product_chunks, product, build_safe, progress_callback, assets_values
|
|
791
|
-
)
|
|
792
|
-
outputs_filename = (
|
|
793
|
-
sanitize(product.properties["title"])
|
|
794
|
-
if "title" in product.properties
|
|
795
|
-
else sanitize(product.properties.get("id", "download"))
|
|
796
|
-
)
|
|
797
797
|
|
|
798
|
-
if
|
|
799
|
-
|
|
800
|
-
# update headers
|
|
801
|
-
filename = os.path.basename(list(unique_product_chunks)[0].key)
|
|
802
|
-
headers = {"content-disposition": f"attachment; filename={filename}"}
|
|
803
|
-
if assets_values and assets_values[0].get("type"):
|
|
804
|
-
headers["content-type"] = assets_values[0]["type"]
|
|
805
|
-
|
|
806
|
-
return StreamResponse(
|
|
807
|
-
content=chain(iter([first_chunks_tuple]), chunks_tuples),
|
|
808
|
-
headers=headers,
|
|
809
|
-
)
|
|
810
|
-
return StreamResponse(
|
|
811
|
-
content=stream_zip(chunks_tuples),
|
|
812
|
-
media_type="application/zip",
|
|
813
|
-
headers={
|
|
814
|
-
"content-disposition": f"attachment; filename={outputs_filename}.zip",
|
|
815
|
-
},
|
|
816
|
-
)
|
|
817
|
-
|
|
818
|
-
def _stream_download(
|
|
819
|
-
self,
|
|
820
|
-
unique_product_chunks: set[Any],
|
|
821
|
-
product: EOProduct,
|
|
822
|
-
build_safe: bool,
|
|
823
|
-
progress_callback: ProgressCallback,
|
|
824
|
-
assets_values: list[dict[str, Any]],
|
|
825
|
-
) -> Iterator[Any]:
|
|
826
|
-
"""Yield product data chunks"""
|
|
827
|
-
|
|
828
|
-
chunk_size = 4096 * 1024
|
|
829
|
-
modified_at = datetime.now()
|
|
830
|
-
perms = 0o600
|
|
831
|
-
|
|
832
|
-
def get_chunk_parts(
|
|
833
|
-
product_chunk: Any, progress_callback: ProgressCallback
|
|
834
|
-
) -> Any:
|
|
835
|
-
try:
|
|
836
|
-
chunk_start = 0
|
|
837
|
-
chunk_end = chunk_start + chunk_size - 1
|
|
838
|
-
|
|
839
|
-
while chunk_start <= product_chunk.size:
|
|
840
|
-
get_kwargs = (
|
|
841
|
-
dict(RequestPayer="requester") if self.requester_pays else {}
|
|
842
|
-
)
|
|
843
|
-
chunk_part = product_chunk.get(
|
|
844
|
-
Range=f"bytes={chunk_start}-{chunk_end}", **get_kwargs
|
|
845
|
-
)["Body"].read()
|
|
846
|
-
progress_callback(len(chunk_part))
|
|
847
|
-
chunk_start += chunk_size
|
|
848
|
-
chunk_end += chunk_size
|
|
849
|
-
yield chunk_part
|
|
850
|
-
|
|
851
|
-
except ClientError as e:
|
|
852
|
-
self._raise_if_auth_error(e)
|
|
853
|
-
raise DownloadError("Unexpected error: %s" % e) from e
|
|
798
|
+
if self.s3_resource is None:
|
|
799
|
+
raise EodagError("Cannot check files in s3 zip without s3 resource")
|
|
854
800
|
|
|
855
801
|
product_conf = getattr(self.config, "products", {}).get(
|
|
856
802
|
product.product_type, {}
|
|
@@ -858,38 +804,58 @@ class AwsDownload(Download):
|
|
|
858
804
|
flatten_top_dirs = product_conf.get(
|
|
859
805
|
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
860
806
|
)
|
|
861
|
-
common_path =
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
807
|
+
common_path = (
|
|
808
|
+
self._get_commonpath(product, product_objects, build_safe)
|
|
809
|
+
if flatten_top_dirs
|
|
810
|
+
else ""
|
|
811
|
+
)
|
|
812
|
+
if len(product_objects) == 1:
|
|
813
|
+
common_path = os.path.dirname(common_path)
|
|
814
|
+
|
|
815
|
+
assets_by_path = {
|
|
816
|
+
a.get("href", "").split("s3://")[-1]: a
|
|
817
|
+
for a in product.assets.get_values(asset_filter=asset_regex or "")
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
files_info = []
|
|
821
|
+
for obj in product_objects:
|
|
867
822
|
try:
|
|
868
|
-
|
|
869
|
-
product,
|
|
870
|
-
product_chunk,
|
|
871
|
-
build_safe=build_safe,
|
|
872
|
-
)
|
|
823
|
+
rel_path = self.get_chunk_dest_path(product, obj, build_safe=build_safe)
|
|
873
824
|
if flatten_top_dirs:
|
|
874
|
-
|
|
825
|
+
rel_path = os.path.join(
|
|
875
826
|
product.properties["title"],
|
|
876
|
-
re.sub(rf"^{common_path}/?", "",
|
|
827
|
+
re.sub(rf"^{common_path}/?", "", rel_path),
|
|
877
828
|
)
|
|
878
829
|
|
|
830
|
+
data_type = assets_by_path.get(f"{obj.bucket_name}/{obj.key}", {}).get(
|
|
831
|
+
"type"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
file_info = S3FileInfo(
|
|
835
|
+
key=obj.key,
|
|
836
|
+
size=obj.size,
|
|
837
|
+
bucket_name=obj.bucket_name,
|
|
838
|
+
rel_path=rel_path,
|
|
839
|
+
)
|
|
840
|
+
if data_type:
|
|
841
|
+
file_info.data_type = data_type
|
|
842
|
+
|
|
843
|
+
files_info.append(file_info)
|
|
879
844
|
except NotAvailableError as e:
|
|
880
|
-
# out of SAFE format chunk
|
|
881
845
|
logger.warning(e)
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
846
|
+
|
|
847
|
+
title = product.properties.get("title") or product.properties.get(
|
|
848
|
+
"id", "download"
|
|
849
|
+
)
|
|
850
|
+
zip_filename = sanitize(title)
|
|
851
|
+
|
|
852
|
+
return stream_download_from_s3(
|
|
853
|
+
cast("S3Client", self.s3_resource.meta.client),
|
|
854
|
+
files_info,
|
|
855
|
+
byte_range,
|
|
856
|
+
compress,
|
|
857
|
+
zip_filename,
|
|
858
|
+
)
|
|
893
859
|
|
|
894
860
|
def _get_commonpath(
|
|
895
861
|
self, product: EOProduct, product_chunks: set[Any], build_safe: bool
|
|
@@ -931,7 +897,7 @@ class AwsDownload(Download):
|
|
|
931
897
|
|
|
932
898
|
def get_authenticated_objects(
|
|
933
899
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
934
|
-
) ->
|
|
900
|
+
) -> BucketObjectsCollection:
|
|
935
901
|
"""Get boto3 authenticated objects for the given bucket using
|
|
936
902
|
the most adapted auth strategy.
|
|
937
903
|
Also expose ``s3_session`` as class variable if available.
|
|
@@ -943,7 +909,7 @@ class AwsDownload(Download):
|
|
|
943
909
|
:returns: The boto3 authenticated objects
|
|
944
910
|
"""
|
|
945
911
|
auth_methods: list[
|
|
946
|
-
Callable[[str, str, S3SessionKwargs], Optional[
|
|
912
|
+
Callable[[str, str, S3SessionKwargs], Optional[BucketObjectsCollection]]
|
|
947
913
|
] = [
|
|
948
914
|
self._get_authenticated_objects_unsigned,
|
|
949
915
|
self._get_authenticated_objects_from_auth_profile,
|
|
@@ -979,7 +945,7 @@ class AwsDownload(Download):
|
|
|
979
945
|
|
|
980
946
|
def _get_authenticated_objects_unsigned(
|
|
981
947
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
982
|
-
) -> Optional[
|
|
948
|
+
) -> Optional[BucketObjectsCollection]:
|
|
983
949
|
"""Auth strategy using no-sign-request"""
|
|
984
950
|
|
|
985
951
|
s3_resource = boto3.resource(
|
|
@@ -990,11 +956,12 @@ class AwsDownload(Download):
|
|
|
990
956
|
)
|
|
991
957
|
objects = s3_resource.Bucket(bucket_name).objects
|
|
992
958
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
959
|
+
self.s3_resource = s3_resource
|
|
993
960
|
return objects
|
|
994
961
|
|
|
995
962
|
def _get_authenticated_objects_from_auth_profile(
|
|
996
963
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
997
|
-
) -> Optional[
|
|
964
|
+
) -> Optional[BucketObjectsCollection]:
|
|
998
965
|
"""Auth strategy using RequestPayer=requester and ``aws_profile`` from provided credentials"""
|
|
999
966
|
|
|
1000
967
|
if "profile_name" in auth_dict.keys():
|
|
@@ -1018,7 +985,7 @@ class AwsDownload(Download):
|
|
|
1018
985
|
|
|
1019
986
|
def _get_authenticated_objects_from_auth_keys(
|
|
1020
987
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
1021
|
-
) -> Optional[
|
|
988
|
+
) -> Optional[BucketObjectsCollection]:
|
|
1022
989
|
"""Auth strategy using RequestPayer=requester and ``aws_access_key_id``/``aws_secret_access_key``
|
|
1023
990
|
from provided credentials"""
|
|
1024
991
|
|
|
@@ -1049,7 +1016,7 @@ class AwsDownload(Download):
|
|
|
1049
1016
|
|
|
1050
1017
|
def _get_authenticated_objects_from_env(
|
|
1051
1018
|
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
1052
|
-
) -> Optional[
|
|
1019
|
+
) -> Optional[BucketObjectsCollection]:
|
|
1053
1020
|
"""Auth strategy using RequestPayer=requester and current environment"""
|
|
1054
1021
|
|
|
1055
1022
|
s3_session = boto3.session.Session()
|
eodag/plugins/download/base.py
CHANGED
|
@@ -27,7 +27,7 @@ import zipfile
|
|
|
27
27
|
from datetime import datetime, timedelta
|
|
28
28
|
from pathlib import Path
|
|
29
29
|
from time import sleep
|
|
30
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
|
|
31
31
|
|
|
32
32
|
from eodag.api.product.metadata_mapping import ONLINE_STATUS
|
|
33
33
|
from eodag.plugins.base import PluginTopic
|
|
@@ -134,7 +134,8 @@ class Download(PluginTopic):
|
|
|
134
134
|
self,
|
|
135
135
|
product: EOProduct,
|
|
136
136
|
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
137
|
-
|
|
137
|
+
byte_range: tuple[Optional[int], Optional[int]] = (None, None),
|
|
138
|
+
compress: Literal["zip", "raw", "auto"] = "auto",
|
|
138
139
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
139
140
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
140
141
|
**kwargs: Unpack[DownloadConf],
|