eodag 3.1.0b1__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +69 -63
- eodag/api/product/_assets.py +49 -13
- eodag/api/product/_product.py +41 -30
- eodag/api/product/drivers/__init__.py +81 -4
- eodag/api/product/drivers/base.py +65 -4
- eodag/api/product/drivers/generic.py +65 -0
- eodag/api/product/drivers/sentinel1.py +97 -0
- eodag/api/product/drivers/sentinel2.py +95 -0
- eodag/api/product/metadata_mapping.py +85 -79
- eodag/api/search_result.py +13 -23
- eodag/cli.py +4 -4
- eodag/config.py +77 -80
- eodag/plugins/apis/base.py +1 -1
- eodag/plugins/apis/ecmwf.py +12 -15
- eodag/plugins/apis/usgs.py +12 -11
- eodag/plugins/authentication/aws_auth.py +16 -13
- eodag/plugins/authentication/base.py +5 -3
- eodag/plugins/authentication/header.py +3 -3
- eodag/plugins/authentication/keycloak.py +4 -4
- eodag/plugins/authentication/oauth.py +7 -3
- eodag/plugins/authentication/openid_connect.py +20 -14
- eodag/plugins/authentication/sas_auth.py +4 -4
- eodag/plugins/authentication/token.py +7 -7
- eodag/plugins/authentication/token_exchange.py +1 -1
- eodag/plugins/base.py +4 -4
- eodag/plugins/crunch/base.py +4 -4
- eodag/plugins/crunch/filter_date.py +4 -4
- eodag/plugins/crunch/filter_latest_intersect.py +6 -6
- eodag/plugins/crunch/filter_latest_tpl_name.py +7 -7
- eodag/plugins/crunch/filter_overlap.py +4 -4
- eodag/plugins/crunch/filter_property.py +4 -4
- eodag/plugins/download/aws.py +137 -77
- eodag/plugins/download/base.py +8 -17
- eodag/plugins/download/creodias_s3.py +2 -2
- eodag/plugins/download/http.py +30 -32
- eodag/plugins/download/s3rest.py +5 -4
- eodag/plugins/manager.py +10 -20
- eodag/plugins/search/__init__.py +6 -5
- eodag/plugins/search/base.py +38 -42
- eodag/plugins/search/build_search_result.py +286 -336
- eodag/plugins/search/cop_marine.py +22 -12
- eodag/plugins/search/creodias_s3.py +8 -78
- eodag/plugins/search/csw.py +11 -11
- eodag/plugins/search/data_request_search.py +19 -18
- eodag/plugins/search/qssearch.py +84 -151
- eodag/plugins/search/stac_list_assets.py +85 -0
- eodag/plugins/search/static_stac_search.py +4 -4
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +848 -398
- eodag/resources/providers.yml +1038 -1115
- eodag/resources/stac_api.yml +2 -2
- eodag/resources/user_conf_template.yml +10 -9
- eodag/rest/cache.py +2 -2
- eodag/rest/config.py +3 -3
- eodag/rest/core.py +24 -24
- eodag/rest/errors.py +5 -5
- eodag/rest/server.py +3 -11
- eodag/rest/stac.py +41 -38
- eodag/rest/types/collections_search.py +3 -3
- eodag/rest/types/eodag_search.py +23 -23
- eodag/rest/types/queryables.py +40 -28
- eodag/rest/types/stac_search.py +15 -25
- eodag/rest/utils/__init__.py +11 -21
- eodag/rest/utils/cql_evaluate.py +6 -6
- eodag/rest/utils/rfc3339.py +2 -2
- eodag/types/__init__.py +97 -29
- eodag/types/bbox.py +2 -2
- eodag/types/download_args.py +2 -2
- eodag/types/queryables.py +5 -2
- eodag/types/search_args.py +4 -4
- eodag/types/whoosh.py +1 -3
- eodag/utils/__init__.py +82 -41
- eodag/utils/exceptions.py +2 -2
- eodag/utils/import_system.py +2 -2
- eodag/utils/requests.py +2 -2
- eodag/utils/rest.py +2 -2
- eodag/utils/s3.py +231 -0
- eodag/utils/stac_reader.py +10 -10
- {eodag-3.1.0b1.dist-info → eodag-3.2.0.dist-info}/METADATA +12 -10
- eodag-3.2.0.dist-info/RECORD +113 -0
- {eodag-3.1.0b1.dist-info → eodag-3.2.0.dist-info}/WHEEL +1 -1
- {eodag-3.1.0b1.dist-info → eodag-3.2.0.dist-info}/entry_points.txt +1 -0
- eodag-3.1.0b1.dist-info/RECORD +0 -108
- {eodag-3.1.0b1.dist-info → eodag-3.2.0.dist-info/licenses}/LICENSE +0 -0
- {eodag-3.1.0b1.dist-info → eodag-3.2.0.dist-info}/top_level.txt +0 -0
|
@@ -19,7 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import re
|
|
22
|
-
from typing import TYPE_CHECKING, Any,
|
|
22
|
+
from typing import TYPE_CHECKING, Any, Optional, cast
|
|
23
23
|
|
|
24
24
|
from eodag.plugins.crunch.base import Crunch
|
|
25
25
|
from eodag.utils.exceptions import ValidationError
|
|
@@ -42,7 +42,7 @@ class FilterLatestByName(Crunch):
|
|
|
42
42
|
|
|
43
43
|
NAME_PATTERN_CONSTRAINT = re.compile(r"\(\?P<tileid>\\d\{6\}\)")
|
|
44
44
|
|
|
45
|
-
def __init__(self, config:
|
|
45
|
+
def __init__(self, config: dict[str, Any]) -> None:
|
|
46
46
|
super(FilterLatestByName, self).__init__(config)
|
|
47
47
|
name_pattern = config.pop("name_pattern")
|
|
48
48
|
if not self.NAME_PATTERN_CONSTRAINT.search(name_pattern):
|
|
@@ -54,19 +54,19 @@ class FilterLatestByName(Crunch):
|
|
|
54
54
|
self.name_pattern = re.compile(name_pattern)
|
|
55
55
|
|
|
56
56
|
def proceed(
|
|
57
|
-
self, products:
|
|
58
|
-
) ->
|
|
57
|
+
self, products: list[EOProduct], **search_params: Any
|
|
58
|
+
) -> list[EOProduct]:
|
|
59
59
|
"""Execute crunch: Filter Search results to get only the latest product, based on the name of the product
|
|
60
60
|
|
|
61
61
|
:param products: A list of products resulting from a search
|
|
62
62
|
:returns: The filtered products
|
|
63
63
|
"""
|
|
64
64
|
logger.debug("Starting products filtering")
|
|
65
|
-
processed:
|
|
66
|
-
filtered:
|
|
65
|
+
processed: list[str] = []
|
|
66
|
+
filtered: list[EOProduct] = []
|
|
67
67
|
for product in products:
|
|
68
68
|
match = cast(
|
|
69
|
-
Optional[Match[Any]],
|
|
69
|
+
Optional[re.Match[Any]],
|
|
70
70
|
self.name_pattern.match(product.properties["title"]),
|
|
71
71
|
)
|
|
72
72
|
if match:
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
|
-
from typing import TYPE_CHECKING, Any
|
|
21
|
+
from typing import TYPE_CHECKING, Any
|
|
22
22
|
|
|
23
23
|
from eodag.plugins.crunch.base import Crunch
|
|
24
24
|
from eodag.utils import get_geometry_from_various
|
|
@@ -49,8 +49,8 @@ class FilterOverlap(Crunch):
|
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
def proceed(
|
|
52
|
-
self, products:
|
|
53
|
-
) ->
|
|
52
|
+
self, products: list[EOProduct], **search_params: Any
|
|
53
|
+
) -> list[EOProduct]:
|
|
54
54
|
"""Execute crunch: Filter products, retaining only those that are overlapping with the search_extent
|
|
55
55
|
|
|
56
56
|
:param products: A list of products resulting from a search
|
|
@@ -58,7 +58,7 @@ class FilterOverlap(Crunch):
|
|
|
58
58
|
:returns: The filtered products
|
|
59
59
|
"""
|
|
60
60
|
logger.debug("Start filtering for overlapping products")
|
|
61
|
-
filtered:
|
|
61
|
+
filtered: list[EOProduct] = []
|
|
62
62
|
add_to_filtered = filtered.append
|
|
63
63
|
|
|
64
64
|
search_geom = get_geometry_from_various(**search_params)
|
|
@@ -19,7 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import operator
|
|
22
|
-
from typing import TYPE_CHECKING, Any
|
|
22
|
+
from typing import TYPE_CHECKING, Any
|
|
23
23
|
|
|
24
24
|
from eodag.plugins.crunch.base import Crunch
|
|
25
25
|
|
|
@@ -42,8 +42,8 @@ class FilterProperty(Crunch):
|
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
44
|
def proceed(
|
|
45
|
-
self, products:
|
|
46
|
-
) ->
|
|
45
|
+
self, products: list[EOProduct], **search_params: Any
|
|
46
|
+
) -> list[EOProduct]:
|
|
47
47
|
"""Execute crunch: Filter products, retaining only those that match property filtering
|
|
48
48
|
|
|
49
49
|
:param products: A list of products resulting from a search
|
|
@@ -72,7 +72,7 @@ class FilterProperty(Crunch):
|
|
|
72
72
|
property_key,
|
|
73
73
|
property_value,
|
|
74
74
|
)
|
|
75
|
-
filtered:
|
|
75
|
+
filtered: list[EOProduct] = []
|
|
76
76
|
add_to_filtered = filtered.append
|
|
77
77
|
|
|
78
78
|
for product in products:
|
eodag/plugins/download/aws.py
CHANGED
|
@@ -23,21 +23,7 @@ import re
|
|
|
23
23
|
from datetime import datetime
|
|
24
24
|
from itertools import chain
|
|
25
25
|
from pathlib import Path
|
|
26
|
-
from typing import
|
|
27
|
-
TYPE_CHECKING,
|
|
28
|
-
Any,
|
|
29
|
-
Callable,
|
|
30
|
-
Dict,
|
|
31
|
-
Iterator,
|
|
32
|
-
List,
|
|
33
|
-
Match,
|
|
34
|
-
Optional,
|
|
35
|
-
Set,
|
|
36
|
-
Tuple,
|
|
37
|
-
TypedDict,
|
|
38
|
-
Union,
|
|
39
|
-
cast,
|
|
40
|
-
)
|
|
26
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, Union, cast
|
|
41
27
|
|
|
42
28
|
import boto3
|
|
43
29
|
import requests
|
|
@@ -74,6 +60,7 @@ from eodag.utils.exceptions import (
|
|
|
74
60
|
NotAvailableError,
|
|
75
61
|
TimeOutError,
|
|
76
62
|
)
|
|
63
|
+
from eodag.utils.s3 import open_s3_zipped_object
|
|
77
64
|
|
|
78
65
|
if TYPE_CHECKING:
|
|
79
66
|
from boto3.resources.collection import ResourceCollection
|
|
@@ -81,6 +68,7 @@ if TYPE_CHECKING:
|
|
|
81
68
|
from eodag.api.product import EOProduct
|
|
82
69
|
from eodag.api.search_result import SearchResult
|
|
83
70
|
from eodag.config import PluginConfig
|
|
71
|
+
from eodag.types import S3SessionKwargs
|
|
84
72
|
from eodag.types.download_args import DownloadConf
|
|
85
73
|
from eodag.utils import DownloadedCallback, Unpack
|
|
86
74
|
|
|
@@ -208,6 +196,7 @@ AWS_AUTH_ERROR_MESSAGES = [
|
|
|
208
196
|
"AccessDenied",
|
|
209
197
|
"InvalidAccessKeyId",
|
|
210
198
|
"SignatureDoesNotMatch",
|
|
199
|
+
"InvalidRequest",
|
|
211
200
|
]
|
|
212
201
|
|
|
213
202
|
|
|
@@ -230,14 +219,14 @@ class AwsDownload(Download):
|
|
|
230
219
|
* :attr:`~eodag.config.PluginConfig.bucket_path_level` (``int``): at which level of the
|
|
231
220
|
path part of the url the bucket can be found; If no bucket_path_level is given, the bucket
|
|
232
221
|
is taken from the first element of the netloc part.
|
|
233
|
-
* :attr:`~eodag.config.PluginConfig.products` (``
|
|
222
|
+
* :attr:`~eodag.config.PluginConfig.products` (``dict[str, dict[str, Any]``): product type
|
|
234
223
|
specific config; the keys are the product types, the values are dictionaries which can contain the keys:
|
|
235
224
|
|
|
236
225
|
* **default_bucket** (``str``): bucket where the product type can be found
|
|
237
226
|
* **complementary_url_key** (``str``): keys to add additional urls
|
|
238
227
|
* **build_safe** (``bool``): if a SAFE (Standard Archive Format for Europe) product should
|
|
239
228
|
be created; used for Sentinel products; default: False
|
|
240
|
-
* **fetch_metadata** (``
|
|
229
|
+
* **fetch_metadata** (``dict[str, Any]``): config for metadata to be fetched for the SAFE product
|
|
241
230
|
|
|
242
231
|
"""
|
|
243
232
|
|
|
@@ -245,11 +234,12 @@ class AwsDownload(Download):
|
|
|
245
234
|
super(AwsDownload, self).__init__(provider, config)
|
|
246
235
|
self.requester_pays = getattr(self.config, "requester_pays", False)
|
|
247
236
|
self.s3_session: Optional[boto3.session.Session] = None
|
|
237
|
+
self.s3_resource: Optional[boto3.resources.base.ServiceResource] = None
|
|
248
238
|
|
|
249
239
|
def download(
|
|
250
240
|
self,
|
|
251
241
|
product: EOProduct,
|
|
252
|
-
auth: Optional[Union[AuthBase,
|
|
242
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
253
243
|
progress_callback: Optional[ProgressCallback] = None,
|
|
254
244
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
255
245
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -302,10 +292,10 @@ class AwsDownload(Download):
|
|
|
302
292
|
asset_filter = kwargs.get("asset", None)
|
|
303
293
|
if asset_filter:
|
|
304
294
|
build_safe = False
|
|
295
|
+
ignore_assets = False
|
|
305
296
|
else:
|
|
306
297
|
build_safe = product_conf.get("build_safe", False)
|
|
307
|
-
|
|
308
|
-
ignore_assets = getattr(self.config, "ignore_assets", False)
|
|
298
|
+
ignore_assets = getattr(self.config, "ignore_assets", False)
|
|
309
299
|
|
|
310
300
|
# product conf overrides provider conf for "flatten_top_dirs"
|
|
311
301
|
flatten_top_dirs = product_conf.get(
|
|
@@ -338,19 +328,32 @@ class AwsDownload(Download):
|
|
|
338
328
|
bucket_names_and_prefixes, auth
|
|
339
329
|
)
|
|
340
330
|
|
|
331
|
+
# files in zip
|
|
332
|
+
updated_bucket_names_and_prefixes = self._download_file_in_zip(
|
|
333
|
+
product, bucket_names_and_prefixes, product_local_path, progress_callback
|
|
334
|
+
)
|
|
335
|
+
# prevent nothing-to-download errors if download was performed in zip
|
|
336
|
+
raise_error = (
|
|
337
|
+
False
|
|
338
|
+
if len(updated_bucket_names_and_prefixes) != len(bucket_names_and_prefixes)
|
|
339
|
+
else True
|
|
340
|
+
)
|
|
341
|
+
|
|
341
342
|
# downloadable files
|
|
342
343
|
unique_product_chunks = self._get_unique_products(
|
|
343
|
-
|
|
344
|
+
updated_bucket_names_and_prefixes,
|
|
344
345
|
authenticated_objects,
|
|
345
346
|
asset_filter,
|
|
346
347
|
ignore_assets,
|
|
347
348
|
product,
|
|
349
|
+
raise_error=raise_error,
|
|
348
350
|
)
|
|
349
351
|
|
|
350
352
|
total_size = sum([p.size for p in unique_product_chunks]) or None
|
|
351
353
|
|
|
352
354
|
# download
|
|
353
|
-
|
|
355
|
+
if len(unique_product_chunks) > 0:
|
|
356
|
+
progress_callback.reset(total=total_size)
|
|
354
357
|
try:
|
|
355
358
|
for product_chunk in unique_product_chunks:
|
|
356
359
|
try:
|
|
@@ -402,17 +405,65 @@ class AwsDownload(Download):
|
|
|
402
405
|
|
|
403
406
|
return product_local_path
|
|
404
407
|
|
|
408
|
+
def _download_file_in_zip(
|
|
409
|
+
self, product, bucket_names_and_prefixes, product_local_path, progress_callback
|
|
410
|
+
):
|
|
411
|
+
"""
|
|
412
|
+
Download file in zip from a prefix like `foo/bar.zip!file.txt`
|
|
413
|
+
"""
|
|
414
|
+
if self.s3_resource is None:
|
|
415
|
+
logger.debug("Cannot check files in s3 zip without s3 resource")
|
|
416
|
+
return bucket_names_and_prefixes
|
|
417
|
+
|
|
418
|
+
s3_client = self.s3_resource.meta.client
|
|
419
|
+
|
|
420
|
+
downloaded = []
|
|
421
|
+
for i, pack in enumerate(bucket_names_and_prefixes):
|
|
422
|
+
bucket_name, prefix = pack
|
|
423
|
+
if ".zip!" in prefix:
|
|
424
|
+
splitted_path = prefix.split(".zip!")
|
|
425
|
+
zip_prefix = f"{splitted_path[0]}.zip"
|
|
426
|
+
rel_path = splitted_path[-1]
|
|
427
|
+
dest_file = os.path.join(product_local_path, rel_path)
|
|
428
|
+
dest_abs_path_dir = os.path.dirname(dest_file)
|
|
429
|
+
if not os.path.isdir(dest_abs_path_dir):
|
|
430
|
+
os.makedirs(dest_abs_path_dir)
|
|
431
|
+
|
|
432
|
+
with open_s3_zipped_object(
|
|
433
|
+
bucket_name, zip_prefix, s3_client, partial=False
|
|
434
|
+
) as zip_file:
|
|
435
|
+
# file size
|
|
436
|
+
file_info = zip_file.getinfo(rel_path)
|
|
437
|
+
progress_callback.reset(total=file_info.file_size)
|
|
438
|
+
with zip_file.open(rel_path) as extracted, open(
|
|
439
|
+
dest_file, "wb"
|
|
440
|
+
) as output_file:
|
|
441
|
+
# Read in 1MB chunks
|
|
442
|
+
for zchunk in iter(lambda: extracted.read(1024 * 1024), b""):
|
|
443
|
+
output_file.write(zchunk)
|
|
444
|
+
progress_callback(len(zchunk))
|
|
445
|
+
|
|
446
|
+
downloaded.append(i)
|
|
447
|
+
|
|
448
|
+
return [
|
|
449
|
+
pack
|
|
450
|
+
for i, pack in enumerate(bucket_names_and_prefixes)
|
|
451
|
+
if i not in downloaded
|
|
452
|
+
]
|
|
453
|
+
|
|
405
454
|
def _download_preparation(
|
|
406
455
|
self,
|
|
407
456
|
product: EOProduct,
|
|
408
457
|
progress_callback: ProgressCallback,
|
|
409
458
|
**kwargs: Unpack[DownloadConf],
|
|
410
|
-
) ->
|
|
459
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
411
460
|
"""
|
|
412
|
-
|
|
461
|
+
Preparation for the download:
|
|
462
|
+
|
|
413
463
|
- check if file was already downloaded
|
|
414
464
|
- get file path
|
|
415
465
|
- create directories
|
|
466
|
+
|
|
416
467
|
:param product: product to be downloaded
|
|
417
468
|
:param progress_callback: progress callback to be used
|
|
418
469
|
:param kwargs: additional arguments
|
|
@@ -436,7 +487,8 @@ class AwsDownload(Download):
|
|
|
436
487
|
|
|
437
488
|
def _configure_safe_build(self, build_safe: bool, product: EOProduct):
|
|
438
489
|
"""
|
|
439
|
-
|
|
490
|
+
Updates the product properties with fetch metadata if safe build is enabled
|
|
491
|
+
|
|
440
492
|
:param build_safe: if safe build is enabled
|
|
441
493
|
:param product: product to be updated
|
|
442
494
|
"""
|
|
@@ -480,7 +532,7 @@ class AwsDownload(Download):
|
|
|
480
532
|
product: EOProduct,
|
|
481
533
|
asset_filter: Optional[str] = None,
|
|
482
534
|
ignore_assets: Optional[bool] = False,
|
|
483
|
-
) ->
|
|
535
|
+
) -> list[tuple[str, Optional[str]]]:
|
|
484
536
|
"""
|
|
485
537
|
Retrieves the bucket names and path prefixes for the assets
|
|
486
538
|
|
|
@@ -522,14 +574,15 @@ class AwsDownload(Download):
|
|
|
522
574
|
|
|
523
575
|
def _do_authentication(
|
|
524
576
|
self,
|
|
525
|
-
bucket_names_and_prefixes:
|
|
526
|
-
auth: Optional[Union[AuthBase,
|
|
527
|
-
) ->
|
|
577
|
+
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
578
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
579
|
+
) -> tuple[dict[str, Any], ResourceCollection]:
|
|
528
580
|
"""
|
|
529
|
-
|
|
530
|
-
|
|
581
|
+
Authenticates with s3 and retrieves the available objects
|
|
582
|
+
|
|
531
583
|
:param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes
|
|
532
584
|
:param auth: authentication information
|
|
585
|
+
:raises AuthenticationError: authentication is not possible
|
|
533
586
|
:return: authenticated objects per bucket, list of available objects
|
|
534
587
|
"""
|
|
535
588
|
if not isinstance(auth, (dict, type(None))):
|
|
@@ -538,8 +591,8 @@ class AwsDownload(Download):
|
|
|
538
591
|
)
|
|
539
592
|
if auth is None:
|
|
540
593
|
auth = {}
|
|
541
|
-
authenticated_objects:
|
|
542
|
-
auth_error_messages:
|
|
594
|
+
authenticated_objects: dict[str, Any] = {}
|
|
595
|
+
auth_error_messages: set[str] = set()
|
|
543
596
|
for _, pack in enumerate(bucket_names_and_prefixes):
|
|
544
597
|
try:
|
|
545
598
|
bucket_name, prefix = pack
|
|
@@ -591,22 +644,25 @@ class AwsDownload(Download):
|
|
|
591
644
|
|
|
592
645
|
def _get_unique_products(
|
|
593
646
|
self,
|
|
594
|
-
bucket_names_and_prefixes:
|
|
595
|
-
authenticated_objects:
|
|
647
|
+
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
648
|
+
authenticated_objects: dict[str, Any],
|
|
596
649
|
asset_filter: Optional[str],
|
|
597
650
|
ignore_assets: bool,
|
|
598
651
|
product: EOProduct,
|
|
599
|
-
|
|
652
|
+
raise_error: bool = True,
|
|
653
|
+
) -> set[Any]:
|
|
600
654
|
"""
|
|
601
|
-
|
|
655
|
+
Retrieve unique product chunks based on authenticated objects and asset filters
|
|
656
|
+
|
|
602
657
|
:param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes
|
|
603
658
|
:param authenticated_objects: available objects per bucket
|
|
604
659
|
:param asset_filter: text for which assets should be filtered
|
|
605
660
|
:param ignore_assets: if product instead of individual assets should be used
|
|
606
661
|
:param product: product that shall be downloaded
|
|
662
|
+
:param raise_error: raise error if there is nothing to download
|
|
607
663
|
:return: set of product chunks that can be downloaded
|
|
608
664
|
"""
|
|
609
|
-
product_chunks:
|
|
665
|
+
product_chunks: list[Any] = []
|
|
610
666
|
for bucket_name, prefix in bucket_names_and_prefixes:
|
|
611
667
|
# unauthenticated items filtered out
|
|
612
668
|
if bucket_name in authenticated_objects.keys():
|
|
@@ -625,19 +681,19 @@ class AwsDownload(Download):
|
|
|
625
681
|
unique_product_chunks,
|
|
626
682
|
)
|
|
627
683
|
)
|
|
628
|
-
if not unique_product_chunks:
|
|
684
|
+
if not unique_product_chunks and raise_error:
|
|
629
685
|
raise NotAvailableError(
|
|
630
686
|
rf"No file basename matching re.fullmatch(r'{asset_filter}') was found in {product.remote_location}"
|
|
631
687
|
)
|
|
632
688
|
|
|
633
|
-
if not unique_product_chunks:
|
|
689
|
+
if not unique_product_chunks and raise_error:
|
|
634
690
|
raise NoMatchingProductType("No product found to download.")
|
|
635
691
|
|
|
636
692
|
return unique_product_chunks
|
|
637
693
|
|
|
638
694
|
def _raise_if_auth_error(self, exception: ClientError) -> None:
|
|
639
695
|
"""Raises an error if given exception is an authentication error"""
|
|
640
|
-
err = cast(
|
|
696
|
+
err = cast(dict[str, str], exception.response["Error"])
|
|
641
697
|
if err["Code"] in AWS_AUTH_ERROR_MESSAGES and "key" in err["Message"].lower():
|
|
642
698
|
raise AuthenticationError(
|
|
643
699
|
f"Please check your credentials for {self.provider}.",
|
|
@@ -648,7 +704,7 @@ class AwsDownload(Download):
|
|
|
648
704
|
def _stream_download_dict(
|
|
649
705
|
self,
|
|
650
706
|
product: EOProduct,
|
|
651
|
-
auth: Optional[Union[AuthBase,
|
|
707
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
652
708
|
progress_callback: Optional[ProgressCallback] = None,
|
|
653
709
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
654
710
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -714,6 +770,13 @@ class AwsDownload(Download):
|
|
|
714
770
|
bucket_names_and_prefixes, auth
|
|
715
771
|
)
|
|
716
772
|
|
|
773
|
+
# stream not implemented for prefixes like `foo/bar.zip!file.txt`
|
|
774
|
+
for _, prefix in bucket_names_and_prefixes:
|
|
775
|
+
if prefix and ".zip!" in prefix:
|
|
776
|
+
raise NotImplementedError(
|
|
777
|
+
"Download streaming is not implemented for files in zip on S3"
|
|
778
|
+
)
|
|
779
|
+
|
|
717
780
|
# downloadable files
|
|
718
781
|
unique_product_chunks = self._get_unique_products(
|
|
719
782
|
bucket_names_and_prefixes,
|
|
@@ -754,11 +817,11 @@ class AwsDownload(Download):
|
|
|
754
817
|
|
|
755
818
|
def _stream_download(
|
|
756
819
|
self,
|
|
757
|
-
unique_product_chunks:
|
|
820
|
+
unique_product_chunks: set[Any],
|
|
758
821
|
product: EOProduct,
|
|
759
822
|
build_safe: bool,
|
|
760
823
|
progress_callback: ProgressCallback,
|
|
761
|
-
assets_values:
|
|
824
|
+
assets_values: list[dict[str, Any]],
|
|
762
825
|
) -> Iterator[Any]:
|
|
763
826
|
"""Yield product data chunks"""
|
|
764
827
|
|
|
@@ -829,7 +892,7 @@ class AwsDownload(Download):
|
|
|
829
892
|
)
|
|
830
893
|
|
|
831
894
|
def _get_commonpath(
|
|
832
|
-
self, product: EOProduct, product_chunks:
|
|
895
|
+
self, product: EOProduct, product_chunks: set[Any], build_safe: bool
|
|
833
896
|
) -> str:
|
|
834
897
|
chunk_paths = []
|
|
835
898
|
for product_chunk in product_chunks:
|
|
@@ -839,8 +902,8 @@ class AwsDownload(Download):
|
|
|
839
902
|
return os.path.commonpath(chunk_paths)
|
|
840
903
|
|
|
841
904
|
def get_rio_env(
|
|
842
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
843
|
-
) ->
|
|
905
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
906
|
+
) -> dict[str, Any]:
|
|
844
907
|
"""Get rasterio environment variables needed for data access authentication.
|
|
845
908
|
|
|
846
909
|
:param bucket_name: Bucket containg objects
|
|
@@ -848,23 +911,26 @@ class AwsDownload(Download):
|
|
|
848
911
|
:param auth_dict: Dictionary containing authentication keys
|
|
849
912
|
:returns: The rasterio environement variables
|
|
850
913
|
"""
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
914
|
+
rio_env_kwargs = {}
|
|
915
|
+
if endpoint_url := getattr(self.config, "s3_endpoint", None):
|
|
916
|
+
rio_env_kwargs["endpoint_url"] = endpoint_url.split("://")[-1]
|
|
917
|
+
rio_env_kwargs |= auth_dict
|
|
918
|
+
|
|
919
|
+
if self.s3_session is None:
|
|
920
|
+
_ = self.get_authenticated_objects(bucket_name, prefix, auth_dict)
|
|
856
921
|
|
|
857
|
-
_ = self.get_authenticated_objects(bucket_name, prefix, auth_dict)
|
|
858
922
|
if self.s3_session is not None:
|
|
859
923
|
if self.requester_pays:
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
924
|
+
rio_env_kwargs["requester_pays"] = True
|
|
925
|
+
return {
|
|
926
|
+
"session": self.s3_session,
|
|
927
|
+
**rio_env_kwargs,
|
|
928
|
+
}
|
|
863
929
|
else:
|
|
864
|
-
return {"aws_unsigned": True}
|
|
930
|
+
return {"aws_unsigned": True, **rio_env_kwargs}
|
|
865
931
|
|
|
866
932
|
def get_authenticated_objects(
|
|
867
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
933
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
868
934
|
) -> ResourceCollection:
|
|
869
935
|
"""Get boto3 authenticated objects for the given bucket using
|
|
870
936
|
the most adapted auth strategy.
|
|
@@ -876,8 +942,8 @@ class AwsDownload(Download):
|
|
|
876
942
|
:param auth_dict: Dictionary containing authentication keys
|
|
877
943
|
:returns: The boto3 authenticated objects
|
|
878
944
|
"""
|
|
879
|
-
auth_methods:
|
|
880
|
-
Callable[[str, str,
|
|
945
|
+
auth_methods: list[
|
|
946
|
+
Callable[[str, str, S3SessionKwargs], Optional[ResourceCollection]]
|
|
881
947
|
] = [
|
|
882
948
|
self._get_authenticated_objects_unsigned,
|
|
883
949
|
self._get_authenticated_objects_from_auth_profile,
|
|
@@ -912,7 +978,7 @@ class AwsDownload(Download):
|
|
|
912
978
|
)
|
|
913
979
|
|
|
914
980
|
def _get_authenticated_objects_unsigned(
|
|
915
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
981
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
916
982
|
) -> Optional[ResourceCollection]:
|
|
917
983
|
"""Auth strategy using no-sign-request"""
|
|
918
984
|
|
|
@@ -927,7 +993,7 @@ class AwsDownload(Download):
|
|
|
927
993
|
return objects
|
|
928
994
|
|
|
929
995
|
def _get_authenticated_objects_from_auth_profile(
|
|
930
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
996
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
931
997
|
) -> Optional[ResourceCollection]:
|
|
932
998
|
"""Auth strategy using RequestPayer=requester and ``aws_profile`` from provided credentials"""
|
|
933
999
|
|
|
@@ -945,26 +1011,18 @@ class AwsDownload(Download):
|
|
|
945
1011
|
objects = s3_resource.Bucket(bucket_name).objects
|
|
946
1012
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
947
1013
|
self.s3_session = s3_session
|
|
1014
|
+
self.s3_resource = s3_resource
|
|
948
1015
|
return objects
|
|
949
1016
|
else:
|
|
950
1017
|
return None
|
|
951
1018
|
|
|
952
1019
|
def _get_authenticated_objects_from_auth_keys(
|
|
953
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
1020
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
954
1021
|
) -> Optional[ResourceCollection]:
|
|
955
1022
|
"""Auth strategy using RequestPayer=requester and ``aws_access_key_id``/``aws_secret_access_key``
|
|
956
1023
|
from provided credentials"""
|
|
957
1024
|
|
|
958
1025
|
if all(k in auth_dict for k in ("aws_access_key_id", "aws_secret_access_key")):
|
|
959
|
-
S3SessionKwargs = TypedDict(
|
|
960
|
-
"S3SessionKwargs",
|
|
961
|
-
{
|
|
962
|
-
"aws_access_key_id": str,
|
|
963
|
-
"aws_secret_access_key": str,
|
|
964
|
-
"aws_session_token": str,
|
|
965
|
-
},
|
|
966
|
-
total=False,
|
|
967
|
-
)
|
|
968
1026
|
s3_session_kwargs: S3SessionKwargs = {
|
|
969
1027
|
"aws_access_key_id": auth_dict["aws_access_key_id"],
|
|
970
1028
|
"aws_secret_access_key": auth_dict["aws_secret_access_key"],
|
|
@@ -984,12 +1042,13 @@ class AwsDownload(Download):
|
|
|
984
1042
|
objects = s3_resource.Bucket(bucket_name).objects
|
|
985
1043
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
986
1044
|
self.s3_session = s3_session
|
|
1045
|
+
self.s3_resource = s3_resource
|
|
987
1046
|
return objects
|
|
988
1047
|
else:
|
|
989
1048
|
return None
|
|
990
1049
|
|
|
991
1050
|
def _get_authenticated_objects_from_env(
|
|
992
|
-
self, bucket_name: str, prefix: str, auth_dict:
|
|
1051
|
+
self, bucket_name: str, prefix: str, auth_dict: S3SessionKwargs
|
|
993
1052
|
) -> Optional[ResourceCollection]:
|
|
994
1053
|
"""Auth strategy using RequestPayer=requester and current environment"""
|
|
995
1054
|
|
|
@@ -1005,11 +1064,12 @@ class AwsDownload(Download):
|
|
|
1005
1064
|
objects = s3_resource.Bucket(bucket_name).objects
|
|
1006
1065
|
list(objects.filter(Prefix=prefix).limit(1))
|
|
1007
1066
|
self.s3_session = s3_session
|
|
1067
|
+
self.s3_resource = s3_resource
|
|
1008
1068
|
return objects
|
|
1009
1069
|
|
|
1010
1070
|
def get_product_bucket_name_and_prefix(
|
|
1011
1071
|
self, product: EOProduct, url: Optional[str] = None
|
|
1012
|
-
) ->
|
|
1072
|
+
) -> tuple[str, Optional[str]]:
|
|
1013
1073
|
"""Extract bucket name and prefix from product URL
|
|
1014
1074
|
|
|
1015
1075
|
:param product: The EO product to download
|
|
@@ -1140,7 +1200,7 @@ class AwsDownload(Download):
|
|
|
1140
1200
|
s1_title_suffix: Optional[str] = None
|
|
1141
1201
|
# S2 common
|
|
1142
1202
|
if product.product_type and "S2_MSI" in product.product_type:
|
|
1143
|
-
title_search: Optional[Match[str]] = re.search(
|
|
1203
|
+
title_search: Optional[re.Match[str]] = re.search(
|
|
1144
1204
|
r"^\w+_\w+_(\w+)_(\w+)_(\w+)_(\w+)_(\w+)$",
|
|
1145
1205
|
product.properties["title"],
|
|
1146
1206
|
)
|
|
@@ -1326,13 +1386,13 @@ class AwsDownload(Download):
|
|
|
1326
1386
|
def download_all(
|
|
1327
1387
|
self,
|
|
1328
1388
|
products: SearchResult,
|
|
1329
|
-
auth: Optional[Union[AuthBase,
|
|
1389
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
1330
1390
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
1331
1391
|
progress_callback: Optional[ProgressCallback] = None,
|
|
1332
1392
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
1333
1393
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
1334
1394
|
**kwargs: Unpack[DownloadConf],
|
|
1335
|
-
) ->
|
|
1395
|
+
) -> list[str]:
|
|
1336
1396
|
"""
|
|
1337
1397
|
download_all using parent (base plugin) method
|
|
1338
1398
|
"""
|
eodag/plugins/download/base.py
CHANGED
|
@@ -26,17 +26,7 @@ import tempfile
|
|
|
26
26
|
import zipfile
|
|
27
27
|
from datetime import datetime, timedelta
|
|
28
28
|
from time import sleep
|
|
29
|
-
from typing import
|
|
30
|
-
TYPE_CHECKING,
|
|
31
|
-
Any,
|
|
32
|
-
Callable,
|
|
33
|
-
Dict,
|
|
34
|
-
List,
|
|
35
|
-
Optional,
|
|
36
|
-
Tuple,
|
|
37
|
-
TypeVar,
|
|
38
|
-
Union,
|
|
39
|
-
)
|
|
29
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
40
30
|
|
|
41
31
|
from eodag.plugins.base import PluginTopic
|
|
42
32
|
from eodag.utils import (
|
|
@@ -60,6 +50,7 @@ if TYPE_CHECKING:
|
|
|
60
50
|
from eodag.api.product import EOProduct
|
|
61
51
|
from eodag.api.search_result import SearchResult
|
|
62
52
|
from eodag.config import PluginConfig
|
|
53
|
+
from eodag.types import S3SessionKwargs
|
|
63
54
|
from eodag.types.download_args import DownloadConf
|
|
64
55
|
from eodag.utils import DownloadedCallback, Unpack
|
|
65
56
|
|
|
@@ -110,7 +101,7 @@ class Download(PluginTopic):
|
|
|
110
101
|
def download(
|
|
111
102
|
self,
|
|
112
103
|
product: EOProduct,
|
|
113
|
-
auth: Optional[Union[AuthBase,
|
|
104
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
114
105
|
progress_callback: Optional[ProgressCallback] = None,
|
|
115
106
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
116
107
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -140,7 +131,7 @@ class Download(PluginTopic):
|
|
|
140
131
|
def _stream_download_dict(
|
|
141
132
|
self,
|
|
142
133
|
product: EOProduct,
|
|
143
|
-
auth: Optional[Union[AuthBase,
|
|
134
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
144
135
|
progress_callback: Optional[ProgressCallback] = None,
|
|
145
136
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
146
137
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -170,7 +161,7 @@ class Download(PluginTopic):
|
|
|
170
161
|
product: EOProduct,
|
|
171
162
|
progress_callback: Optional[ProgressCallback] = None,
|
|
172
163
|
**kwargs: Unpack[DownloadConf],
|
|
173
|
-
) ->
|
|
164
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
174
165
|
"""Check if file has already been downloaded, and prepare product download
|
|
175
166
|
|
|
176
167
|
:param product: The EO product to download
|
|
@@ -439,13 +430,13 @@ class Download(PluginTopic):
|
|
|
439
430
|
def download_all(
|
|
440
431
|
self,
|
|
441
432
|
products: SearchResult,
|
|
442
|
-
auth: Optional[Union[AuthBase,
|
|
433
|
+
auth: Optional[Union[AuthBase, S3SessionKwargs]] = None,
|
|
443
434
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
444
435
|
progress_callback: Optional[ProgressCallback] = None,
|
|
445
436
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
446
437
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
447
438
|
**kwargs: Unpack[DownloadConf],
|
|
448
|
-
) ->
|
|
439
|
+
) -> list[str]:
|
|
449
440
|
"""
|
|
450
441
|
Base download_all method.
|
|
451
442
|
|
|
@@ -474,7 +465,7 @@ class Download(PluginTopic):
|
|
|
474
465
|
# Products are going to be removed one by one from this sequence once
|
|
475
466
|
# downloaded.
|
|
476
467
|
products = products[:]
|
|
477
|
-
paths:
|
|
468
|
+
paths: list[str] = []
|
|
478
469
|
# initiate retry loop
|
|
479
470
|
start_time = datetime.now()
|
|
480
471
|
stop_time = start_time + timedelta(minutes=timeout)
|