eodag 4.0.0a4__py3-none-any.whl → 4.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/collection.py +65 -1
- eodag/api/core.py +65 -19
- eodag/api/product/_assets.py +1 -1
- eodag/api/product/_product.py +133 -18
- eodag/api/product/drivers/__init__.py +3 -1
- eodag/api/product/drivers/base.py +3 -1
- eodag/api/product/drivers/generic.py +9 -5
- eodag/api/product/drivers/sentinel1.py +14 -9
- eodag/api/product/drivers/sentinel2.py +14 -7
- eodag/api/product/metadata_mapping.py +5 -2
- eodag/api/provider.py +1 -0
- eodag/api/search_result.py +4 -1
- eodag/cli.py +17 -8
- eodag/config.py +22 -4
- eodag/plugins/apis/ecmwf.py +3 -24
- eodag/plugins/apis/usgs.py +3 -24
- eodag/plugins/download/aws.py +85 -44
- eodag/plugins/download/base.py +117 -41
- eodag/plugins/download/http.py +88 -65
- eodag/plugins/search/base.py +8 -3
- eodag/plugins/search/build_search_result.py +108 -120
- eodag/plugins/search/cop_marine.py +3 -1
- eodag/plugins/search/qssearch.py +7 -6
- eodag/resources/collections.yml +255 -0
- eodag/resources/ext_collections.json +1 -1
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/providers.yml +62 -25
- eodag/resources/user_conf_template.yml +6 -0
- eodag/types/__init__.py +22 -16
- eodag/types/download_args.py +3 -1
- eodag/types/queryables.py +125 -55
- eodag/types/stac_extensions.py +408 -0
- eodag/types/stac_metadata.py +312 -0
- eodag/utils/__init__.py +42 -4
- eodag/utils/dates.py +202 -2
- eodag/utils/s3.py +4 -4
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/METADATA +7 -13
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/RECORD +42 -40
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/WHEEL +1 -1
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/entry_points.txt +1 -1
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/licenses/LICENSE +0 -0
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -38,12 +38,13 @@ class Sentinel1Driver(DatasetDriver):
|
|
|
38
38
|
(re.compile(r"grd", re.IGNORECASE), ""),
|
|
39
39
|
(re.compile(r"slc", re.IGNORECASE), ""),
|
|
40
40
|
(re.compile(r"ocn", re.IGNORECASE), ""),
|
|
41
|
-
(re.compile(r"iw", re.IGNORECASE), ""),
|
|
42
|
-
(re.compile(r"ew", re.IGNORECASE), ""),
|
|
41
|
+
(re.compile(r"(?<![A-Za-z])iw(?![A-Za-z])", re.IGNORECASE), ""),
|
|
42
|
+
(re.compile(r"(?<![A-Za-z])ew(?![A-Za-z])", re.IGNORECASE), ""),
|
|
43
43
|
(re.compile(r"wv", re.IGNORECASE), ""),
|
|
44
|
-
(re.compile(r"sm", re.IGNORECASE), ""),
|
|
45
|
-
(re.compile(r"raw([-_]s)?", re.IGNORECASE), ""),
|
|
44
|
+
(re.compile(r"(?<![A-Za-z])sm(?![A-Za-z])", re.IGNORECASE), ""),
|
|
45
|
+
(re.compile(r"(?<![A-Za-z])raw([-_]s)?(?![A-Za-z])", re.IGNORECASE), ""),
|
|
46
46
|
(re.compile(r"[t?0-9]{3,}", re.IGNORECASE), ""),
|
|
47
|
+
(re.compile(r"\b[0-9A-F]{3,}\b", re.IGNORECASE), ""),
|
|
47
48
|
(re.compile(r"-+"), "-"),
|
|
48
49
|
(re.compile(r"-+\."), "."),
|
|
49
50
|
(re.compile(r"_+"), "_"),
|
|
@@ -55,34 +56,38 @@ class Sentinel1Driver(DatasetDriver):
|
|
|
55
56
|
# data
|
|
56
57
|
{
|
|
57
58
|
"pattern": re.compile(
|
|
58
|
-
r"^.*?([vh]{2}).*\.(?:jp2|tiff?|dat)
|
|
59
|
+
r"^.*?([vh]{2}).*\.(?:jp2|tiff?|dat)(?:\?.*)?$", re.IGNORECASE
|
|
59
60
|
),
|
|
60
61
|
"roles": ["data"],
|
|
61
62
|
},
|
|
62
63
|
# metadata
|
|
63
64
|
{
|
|
64
65
|
"pattern": re.compile(
|
|
65
|
-
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)
|
|
66
|
+
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)(?:\?.*)?$",
|
|
67
|
+
re.IGNORECASE,
|
|
66
68
|
),
|
|
67
69
|
"roles": ["metadata"],
|
|
68
70
|
},
|
|
69
71
|
# thumbnail
|
|
70
72
|
{
|
|
71
73
|
"pattern": re.compile(
|
|
72
|
-
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)
|
|
74
|
+
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)(?:\?.*)?$", re.IGNORECASE
|
|
73
75
|
),
|
|
74
76
|
"roles": ["thumbnail"],
|
|
75
77
|
},
|
|
76
78
|
# quicklook
|
|
77
79
|
{
|
|
78
80
|
"pattern": re.compile(
|
|
79
|
-
r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpe?g|\.png)
|
|
81
|
+
r"^(?:.*[/\\])?([^/\\]+-ql|preview|quick-?look)(\.jpe?g|\.png)(?:\?.*)?$",
|
|
80
82
|
re.IGNORECASE,
|
|
81
83
|
),
|
|
82
84
|
"roles": ["overview"],
|
|
83
85
|
},
|
|
84
86
|
# default
|
|
85
|
-
{
|
|
87
|
+
{
|
|
88
|
+
"pattern": re.compile(r"^(?:.*[/\\])?([^/\\?]+)(?:\?.*)?$"),
|
|
89
|
+
"roles": ["auxiliary"],
|
|
90
|
+
},
|
|
86
91
|
]
|
|
87
92
|
|
|
88
93
|
def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
|
|
@@ -40,47 +40,54 @@ class Sentinel2Driver(DatasetDriver):
|
|
|
40
40
|
ASSET_KEYS_PATTERNS_ROLES: list[AssetPatterns] = [
|
|
41
41
|
# masks
|
|
42
42
|
{
|
|
43
|
-
"pattern": re.compile(
|
|
43
|
+
"pattern": re.compile(
|
|
44
|
+
r"^.*?(MSK_[^/\\]+)\.(?:jp2|tiff?)(?:\?.*)?$", re.IGNORECASE
|
|
45
|
+
),
|
|
44
46
|
"roles": ["data-mask"],
|
|
45
47
|
},
|
|
46
48
|
# visual
|
|
47
49
|
{
|
|
48
50
|
"pattern": re.compile(
|
|
49
|
-
r"^.*?(TCI)(_[0-9]+m)?\.(?:jp2|tiff?)
|
|
51
|
+
r"^.*?(TCI)(_[0-9]+m)?\.(?:jp2|tiff?)(?:\?.*)?$", re.IGNORECASE
|
|
50
52
|
),
|
|
51
53
|
"roles": ["visual"],
|
|
52
54
|
},
|
|
53
55
|
# bands
|
|
54
56
|
{
|
|
55
57
|
"pattern": re.compile(
|
|
56
|
-
r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.(?:jp2|tiff?)
|
|
58
|
+
r"^.*?([A-Z]+[0-9]*[A-Z]?)(_[0-9]+m)?\.(?:jp2|tiff?)(?:\?.*)?$",
|
|
59
|
+
re.IGNORECASE,
|
|
57
60
|
),
|
|
58
61
|
"roles": ["data"],
|
|
59
62
|
},
|
|
60
63
|
# metadata
|
|
61
64
|
{
|
|
62
65
|
"pattern": re.compile(
|
|
63
|
-
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)
|
|
66
|
+
r"^(?:.*[/\\])?([^/\\]+)(\.xml|\.xsd|\.safe|\.json)(?:\?.*)?$",
|
|
67
|
+
re.IGNORECASE,
|
|
64
68
|
),
|
|
65
69
|
"roles": ["metadata"],
|
|
66
70
|
},
|
|
67
71
|
# thumbnail
|
|
68
72
|
{
|
|
69
73
|
"pattern": re.compile(
|
|
70
|
-
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)
|
|
74
|
+
r"^(?:.*[/\\])?(thumbnail)(\.jpe?g|\.png)(?:\?.*)?$", re.IGNORECASE
|
|
71
75
|
),
|
|
72
76
|
"roles": ["thumbnail"],
|
|
73
77
|
},
|
|
74
78
|
# quicklook
|
|
75
79
|
{
|
|
76
80
|
"pattern": re.compile(
|
|
77
|
-
r"^(?:.*[/\\])?[^/\\]+(-ql|preview|quick-?look)(\.jpe?g|\.png)
|
|
81
|
+
r"^(?:.*[/\\])?[^/\\]+(-ql|preview|quick-?look)(\.jpe?g|\.png)(?:\?.*)?$",
|
|
78
82
|
re.IGNORECASE,
|
|
79
83
|
),
|
|
80
84
|
"roles": ["overview"],
|
|
81
85
|
},
|
|
82
86
|
# default
|
|
83
|
-
{
|
|
87
|
+
{
|
|
88
|
+
"pattern": re.compile(r"^(?:.*[/\\])?([^/\\?]+)(?:\?.*)?$"),
|
|
89
|
+
"roles": ["auxiliary"],
|
|
90
|
+
},
|
|
84
91
|
]
|
|
85
92
|
|
|
86
93
|
def _normalize_key(self, key: str, eo_product: EOProduct) -> str:
|
|
@@ -1731,16 +1731,19 @@ def get_queryable_from_provider(
|
|
|
1731
1731
|
mapping_values = [
|
|
1732
1732
|
v[0] if isinstance(v, list) else "" for v in metadata_mapping.values()
|
|
1733
1733
|
]
|
|
1734
|
+
StacQueryables = Queryables.from_stac_models()
|
|
1734
1735
|
if provider_queryable in mapping_values:
|
|
1735
1736
|
ind = mapping_values.index(provider_queryable)
|
|
1736
|
-
return
|
|
1737
|
+
return StacQueryables.get_queryable_from_alias(
|
|
1738
|
+
list(metadata_mapping.keys())[ind]
|
|
1739
|
+
)
|
|
1737
1740
|
for param, param_conf in metadata_mapping.items():
|
|
1738
1741
|
if (
|
|
1739
1742
|
isinstance(param_conf, list)
|
|
1740
1743
|
and param_conf[0]
|
|
1741
1744
|
and re.search(pattern, param_conf[0])
|
|
1742
1745
|
):
|
|
1743
|
-
return
|
|
1746
|
+
return StacQueryables.get_queryable_from_alias(param)
|
|
1744
1747
|
return None
|
|
1745
1748
|
|
|
1746
1749
|
|
eodag/api/provider.py
CHANGED
eodag/api/search_result.py
CHANGED
|
@@ -32,7 +32,7 @@ from eodag.plugins.crunch.filter_latest_intersect import FilterLatestIntersect
|
|
|
32
32
|
from eodag.plugins.crunch.filter_latest_tpl_name import FilterLatestByName
|
|
33
33
|
from eodag.plugins.crunch.filter_overlap import FilterOverlap
|
|
34
34
|
from eodag.plugins.crunch.filter_property import FilterProperty
|
|
35
|
-
from eodag.utils import GENERIC_STAC_PROVIDER, STAC_SEARCH_PLUGINS
|
|
35
|
+
from eodag.utils import GENERIC_STAC_PROVIDER, STAC_SEARCH_PLUGINS, STAC_VERSION
|
|
36
36
|
from eodag.utils.exceptions import MisconfiguredError
|
|
37
37
|
|
|
38
38
|
if TYPE_CHECKING:
|
|
@@ -206,6 +206,9 @@ class SearchResult(UserList[EOProduct]):
|
|
|
206
206
|
"eodag:search_params": geojson_search_params or None,
|
|
207
207
|
"eodag:raise_errors": self.raise_errors,
|
|
208
208
|
},
|
|
209
|
+
"links": [],
|
|
210
|
+
"stac_extensions": [],
|
|
211
|
+
"stac_version": STAC_VERSION,
|
|
209
212
|
}
|
|
210
213
|
|
|
211
214
|
def as_shapely_geometry_object(self) -> GeometryCollection:
|
eodag/cli.py
CHANGED
|
@@ -48,6 +48,7 @@ from typing import TYPE_CHECKING, Any, Callable, Mapping, Optional
|
|
|
48
48
|
from urllib.parse import parse_qs
|
|
49
49
|
|
|
50
50
|
import click
|
|
51
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
51
52
|
|
|
52
53
|
from eodag.api.collection import CollectionsList
|
|
53
54
|
from eodag.api.core import EODataAccessGateway, SearchResult
|
|
@@ -122,14 +123,14 @@ def _deprecated_cli(message: str, version: Optional[str] = None) -> Callable[...
|
|
|
122
123
|
help="Control the verbosity of the logs. For maximum verbosity, type -vvv",
|
|
123
124
|
)
|
|
124
125
|
@click.pass_context
|
|
125
|
-
def
|
|
126
|
+
def eodag_cli(ctx: Context, verbose: int) -> None:
|
|
126
127
|
"""Earth Observation Data Access Gateway: work on EO products from any provider"""
|
|
127
128
|
if ctx.obj is None:
|
|
128
129
|
ctx.obj = {}
|
|
129
130
|
ctx.obj["verbosity"] = verbose
|
|
130
131
|
|
|
131
132
|
|
|
132
|
-
@
|
|
133
|
+
@eodag_cli.command(name="version", help="Print eodag version and exit")
|
|
133
134
|
def version() -> None:
|
|
134
135
|
"""Print eodag version and exit"""
|
|
135
136
|
click.echo(
|
|
@@ -141,7 +142,7 @@ def version() -> None:
|
|
|
141
142
|
)
|
|
142
143
|
|
|
143
144
|
|
|
144
|
-
@
|
|
145
|
+
@eodag_cli.command(
|
|
145
146
|
name="search",
|
|
146
147
|
help="Search satellite images by their collections, instruments, constellation, "
|
|
147
148
|
"platform, processing level or sensor type. It is mandatory to provide "
|
|
@@ -406,7 +407,7 @@ def search_crunch(ctx: Context, **kwargs: Any) -> None:
|
|
|
406
407
|
ctx.obj["search_results"] = results
|
|
407
408
|
|
|
408
409
|
|
|
409
|
-
@
|
|
410
|
+
@eodag_cli.command(name="list", help="List supported collections")
|
|
410
411
|
@click.option("-p", "--provider", help="List collections supported by this provider")
|
|
411
412
|
@click.option(
|
|
412
413
|
"--instruments", help="List collections originating from these instruments"
|
|
@@ -488,7 +489,7 @@ def list_col(ctx: Context, **kwargs: Any) -> None:
|
|
|
488
489
|
sys.exit(1)
|
|
489
490
|
|
|
490
491
|
|
|
491
|
-
@
|
|
492
|
+
@eodag_cli.command(name="discover", help="Fetch providers to discover collections")
|
|
492
493
|
@click.option("-p", "--provider", help="Fetch only the given provider")
|
|
493
494
|
@click.option(
|
|
494
495
|
"--storage",
|
|
@@ -519,7 +520,7 @@ def discover_col(ctx: Context, **kwargs: Any) -> None:
|
|
|
519
520
|
click.echo("Results stored at '{}'".format(storage_filepath))
|
|
520
521
|
|
|
521
522
|
|
|
522
|
-
@
|
|
523
|
+
@eodag_cli.command(
|
|
523
524
|
help="""Download a list of products from a serialized search result or STAC items URLs/paths
|
|
524
525
|
|
|
525
526
|
Examples:
|
|
@@ -556,6 +557,11 @@ Examples:
|
|
|
556
557
|
type=click.Path(dir_okay=True, file_okay=False),
|
|
557
558
|
help="Products or quicklooks download directory (Default: local temporary directory)",
|
|
558
559
|
)
|
|
560
|
+
@click.option(
|
|
561
|
+
"--max-workers",
|
|
562
|
+
type=int,
|
|
563
|
+
help="The maximum number of workers to use for downloading products and assets in parallel",
|
|
564
|
+
)
|
|
559
565
|
@click.pass_context
|
|
560
566
|
def download(ctx: Context, **kwargs: Any) -> None:
|
|
561
567
|
"""Download a bunch of products from a serialized search result"""
|
|
@@ -601,7 +607,10 @@ def download(ctx: Context, **kwargs: Any) -> None:
|
|
|
601
607
|
|
|
602
608
|
else:
|
|
603
609
|
# Download products
|
|
604
|
-
|
|
610
|
+
executor = ThreadPoolExecutor(max_workers=kwargs.pop("max_workers"))
|
|
611
|
+
downloaded_files = satim_api.download_all(
|
|
612
|
+
search_results, output_dir=output_dir, executor=executor
|
|
613
|
+
)
|
|
605
614
|
if downloaded_files and len(downloaded_files) > 0:
|
|
606
615
|
for downloaded_file in downloaded_files:
|
|
607
616
|
if downloaded_file is None:
|
|
@@ -617,4 +626,4 @@ def download(ctx: Context, **kwargs: Any) -> None:
|
|
|
617
626
|
|
|
618
627
|
|
|
619
628
|
if __name__ == "__main__":
|
|
620
|
-
|
|
629
|
+
eodag_cli(obj={})
|
eodag/config.py
CHANGED
|
@@ -20,7 +20,7 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
from importlib.resources import files as res_files
|
|
23
|
-
from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional,
|
|
23
|
+
from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, Union
|
|
24
24
|
|
|
25
25
|
import orjson
|
|
26
26
|
import requests
|
|
@@ -28,6 +28,7 @@ import yaml
|
|
|
28
28
|
import yaml.parser
|
|
29
29
|
from annotated_types import Gt
|
|
30
30
|
from jsonpath_ng import JSONPath
|
|
31
|
+
from typing_extensions import TypedDict
|
|
31
32
|
|
|
32
33
|
from eodag.utils import (
|
|
33
34
|
HTTP_REQ_TIMEOUT,
|
|
@@ -635,12 +636,29 @@ class PluginConfig(yaml.YAMLObject):
|
|
|
635
636
|
matching_conf = getattr(self, "matching_conf", {})
|
|
636
637
|
matching_url = getattr(self, "matching_url", None)
|
|
637
638
|
|
|
638
|
-
|
|
639
|
-
|
|
639
|
+
# both match
|
|
640
|
+
if (
|
|
641
|
+
target_matching_conf
|
|
642
|
+
and sort_dict(target_matching_conf) == sort_dict(matching_conf)
|
|
643
|
+
and target_matching_url
|
|
644
|
+
and target_matching_url == matching_url
|
|
640
645
|
):
|
|
641
646
|
return True
|
|
642
647
|
|
|
643
|
-
|
|
648
|
+
# conf matches and no matching_url expected
|
|
649
|
+
if (
|
|
650
|
+
target_matching_conf
|
|
651
|
+
and sort_dict(target_matching_conf) == sort_dict(matching_conf)
|
|
652
|
+
and not target_matching_url
|
|
653
|
+
):
|
|
654
|
+
return True
|
|
655
|
+
|
|
656
|
+
# url matches and no matching_conf expected
|
|
657
|
+
if (
|
|
658
|
+
target_matching_url
|
|
659
|
+
and target_matching_url == matching_url
|
|
660
|
+
and not target_matching_conf
|
|
661
|
+
):
|
|
644
662
|
return True
|
|
645
663
|
|
|
646
664
|
return False
|
eodag/plugins/apis/ecmwf.py
CHANGED
|
@@ -48,6 +48,7 @@ from eodag.utils.logging import get_logging_verbose
|
|
|
48
48
|
if TYPE_CHECKING:
|
|
49
49
|
from typing import Any, Optional, Union
|
|
50
50
|
|
|
51
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
51
52
|
from mypy_boto3_s3 import S3ServiceResource
|
|
52
53
|
from requests.auth import AuthBase
|
|
53
54
|
|
|
@@ -55,7 +56,7 @@ if TYPE_CHECKING:
|
|
|
55
56
|
from eodag.api.search_result import SearchResult
|
|
56
57
|
from eodag.config import PluginConfig
|
|
57
58
|
from eodag.types.download_args import DownloadConf
|
|
58
|
-
from eodag.utils import
|
|
59
|
+
from eodag.utils import ProgressCallback, Unpack
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
logger = logging.getLogger("eodag.apis.ecmwf")
|
|
@@ -185,6 +186,7 @@ class EcmwfApi(Api, ECMWFSearch):
|
|
|
185
186
|
product: EOProduct,
|
|
186
187
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
187
188
|
progress_callback: Optional[ProgressCallback] = None,
|
|
189
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
188
190
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
189
191
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
190
192
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -269,29 +271,6 @@ class EcmwfApi(Api, ECMWFSearch):
|
|
|
269
271
|
product.location = path_to_uri(product_path)
|
|
270
272
|
return product_path
|
|
271
273
|
|
|
272
|
-
def download_all(
|
|
273
|
-
self,
|
|
274
|
-
products: SearchResult,
|
|
275
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
276
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
277
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
278
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
279
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
280
|
-
**kwargs: Unpack[DownloadConf],
|
|
281
|
-
) -> list[str]:
|
|
282
|
-
"""
|
|
283
|
-
Download all using parent (base plugin) method
|
|
284
|
-
"""
|
|
285
|
-
return super(EcmwfApi, self).download_all(
|
|
286
|
-
products,
|
|
287
|
-
auth=auth,
|
|
288
|
-
downloaded_callback=downloaded_callback,
|
|
289
|
-
progress_callback=progress_callback,
|
|
290
|
-
wait=wait,
|
|
291
|
-
timeout=timeout,
|
|
292
|
-
**kwargs,
|
|
293
|
-
)
|
|
294
|
-
|
|
295
274
|
def clear(self) -> None:
|
|
296
275
|
"""Clear search context"""
|
|
297
276
|
pass
|
eodag/plugins/apis/usgs.py
CHANGED
|
@@ -57,12 +57,13 @@ from eodag.utils.exceptions import (
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
if TYPE_CHECKING:
|
|
60
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
60
61
|
from mypy_boto3_s3 import S3ServiceResource
|
|
61
62
|
from requests.auth import AuthBase
|
|
62
63
|
|
|
63
64
|
from eodag.config import PluginConfig
|
|
64
65
|
from eodag.types.download_args import DownloadConf
|
|
65
|
-
from eodag.utils import
|
|
66
|
+
from eodag.utils import Unpack
|
|
66
67
|
|
|
67
68
|
logger = logging.getLogger("eodag.apis.usgs")
|
|
68
69
|
|
|
@@ -312,6 +313,7 @@ class UsgsApi(Api):
|
|
|
312
313
|
product: EOProduct,
|
|
313
314
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
314
315
|
progress_callback: Optional[ProgressCallback] = None,
|
|
316
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
315
317
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
316
318
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
317
319
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -477,26 +479,3 @@ class UsgsApi(Api):
|
|
|
477
479
|
shutil.move(fs_path, new_fs_path)
|
|
478
480
|
product.location = path_to_uri(new_fs_path)
|
|
479
481
|
return new_fs_path
|
|
480
|
-
|
|
481
|
-
def download_all(
|
|
482
|
-
self,
|
|
483
|
-
products: SearchResult,
|
|
484
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
485
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
486
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
487
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
488
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
489
|
-
**kwargs: Unpack[DownloadConf],
|
|
490
|
-
) -> list[str]:
|
|
491
|
-
"""
|
|
492
|
-
Download all using parent (base plugin) method
|
|
493
|
-
"""
|
|
494
|
-
return super(UsgsApi, self).download_all(
|
|
495
|
-
products,
|
|
496
|
-
auth=auth,
|
|
497
|
-
downloaded_callback=downloaded_callback,
|
|
498
|
-
progress_callback=progress_callback,
|
|
499
|
-
wait=wait,
|
|
500
|
-
timeout=timeout,
|
|
501
|
-
**kwargs,
|
|
502
|
-
)
|
eodag/plugins/download/aws.py
CHANGED
|
@@ -25,7 +25,9 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
|
|
25
25
|
|
|
26
26
|
import boto3
|
|
27
27
|
import requests
|
|
28
|
+
from boto3.s3.transfer import TransferConfig
|
|
28
29
|
from botocore.exceptions import ClientError
|
|
30
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
29
31
|
from lxml import etree
|
|
30
32
|
from requests.auth import AuthBase
|
|
31
33
|
|
|
@@ -34,7 +36,7 @@ from eodag.api.product.metadata_mapping import (
|
|
|
34
36
|
properties_from_json,
|
|
35
37
|
properties_from_xml,
|
|
36
38
|
)
|
|
37
|
-
from eodag.plugins.authentication.aws_auth import raise_if_auth_error
|
|
39
|
+
from eodag.plugins.authentication.aws_auth import AwsAuth, raise_if_auth_error
|
|
38
40
|
from eodag.plugins.download.base import Download
|
|
39
41
|
from eodag.utils import (
|
|
40
42
|
DEFAULT_DOWNLOAD_TIMEOUT,
|
|
@@ -65,10 +67,9 @@ if TYPE_CHECKING:
|
|
|
65
67
|
from mypy_boto3_s3.client import S3Client
|
|
66
68
|
|
|
67
69
|
from eodag.api.product import EOProduct
|
|
68
|
-
from eodag.api.search_result import SearchResult
|
|
69
70
|
from eodag.config import PluginConfig
|
|
70
71
|
from eodag.types.download_args import DownloadConf
|
|
71
|
-
from eodag.utils import
|
|
72
|
+
from eodag.utils import Unpack
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
logger = logging.getLogger("eodag.download.aws")
|
|
@@ -227,6 +228,7 @@ class AwsDownload(Download):
|
|
|
227
228
|
product: EOProduct,
|
|
228
229
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
229
230
|
progress_callback: Optional[ProgressCallback] = None,
|
|
231
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
230
232
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
231
233
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
232
234
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -246,6 +248,7 @@ class AwsDownload(Download):
|
|
|
246
248
|
size as inputs and handle progress bar
|
|
247
249
|
creation and update to give the user a
|
|
248
250
|
feedback on the download progress
|
|
251
|
+
:param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
|
|
249
252
|
:param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
|
|
250
253
|
and `dl_url_params` (dict) can be provided as additional kwargs
|
|
251
254
|
and will override any other values defined in a configuration
|
|
@@ -293,7 +296,7 @@ class AwsDownload(Download):
|
|
|
293
296
|
)
|
|
294
297
|
|
|
295
298
|
# authenticate
|
|
296
|
-
if product.downloader_auth:
|
|
299
|
+
if product.downloader_auth and isinstance(product.downloader_auth, AwsAuth):
|
|
297
300
|
authenticated_objects = product.downloader_auth.authenticate_objects(
|
|
298
301
|
bucket_names_and_prefixes
|
|
299
302
|
)
|
|
@@ -302,9 +305,19 @@ class AwsDownload(Download):
|
|
|
302
305
|
"Authentication plugin (AwsAuth) has to be configured if AwsDownload is used"
|
|
303
306
|
)
|
|
304
307
|
|
|
308
|
+
# create an executor if not given and anticipate the possible need to shut it down
|
|
309
|
+
executor, shutdown_executor = (
|
|
310
|
+
(ThreadPoolExecutor(), True) if executor is None else (executor, False)
|
|
311
|
+
)
|
|
312
|
+
self._config_executor(executor)
|
|
313
|
+
|
|
305
314
|
# files in zip
|
|
306
315
|
updated_bucket_names_and_prefixes = self._download_file_in_zip(
|
|
307
|
-
product,
|
|
316
|
+
product.downloader_auth,
|
|
317
|
+
bucket_names_and_prefixes,
|
|
318
|
+
product_local_path,
|
|
319
|
+
progress_callback,
|
|
320
|
+
executor,
|
|
308
321
|
)
|
|
309
322
|
# prevent nothing-to-download errors if download was performed in zip
|
|
310
323
|
raise_error = (
|
|
@@ -329,7 +342,8 @@ class AwsDownload(Download):
|
|
|
329
342
|
if len(unique_product_chunks) > 0:
|
|
330
343
|
progress_callback.reset(total=total_size)
|
|
331
344
|
try:
|
|
332
|
-
|
|
345
|
+
|
|
346
|
+
def download_chunk(product_chunk: Any) -> None:
|
|
333
347
|
try:
|
|
334
348
|
chunk_rel_path = self.get_chunk_dest_path(
|
|
335
349
|
product,
|
|
@@ -339,11 +353,11 @@ class AwsDownload(Download):
|
|
|
339
353
|
except NotAvailableError as e:
|
|
340
354
|
# out of SAFE format chunk
|
|
341
355
|
logger.warning(e)
|
|
342
|
-
|
|
356
|
+
return
|
|
357
|
+
|
|
343
358
|
chunk_abs_path = os.path.join(product_local_path, chunk_rel_path)
|
|
344
359
|
chunk_abs_path_dir = os.path.dirname(chunk_abs_path)
|
|
345
|
-
|
|
346
|
-
os.makedirs(chunk_abs_path_dir)
|
|
360
|
+
os.makedirs(chunk_abs_path_dir, exist_ok=True)
|
|
347
361
|
|
|
348
362
|
bucket_objects = authenticated_objects.get(product_chunk.bucket_name)
|
|
349
363
|
extra_args = (
|
|
@@ -352,12 +366,31 @@ class AwsDownload(Download):
|
|
|
352
366
|
else {}
|
|
353
367
|
)
|
|
354
368
|
if not os.path.isfile(chunk_abs_path):
|
|
369
|
+
transfer_config = TransferConfig(use_threads=False)
|
|
355
370
|
product_chunk.Bucket().download_file(
|
|
356
371
|
product_chunk.key,
|
|
357
372
|
chunk_abs_path,
|
|
358
373
|
ExtraArgs=extra_args,
|
|
359
374
|
Callback=progress_callback,
|
|
375
|
+
Config=transfer_config,
|
|
360
376
|
)
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
# use parallelization if possible.
|
|
380
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
381
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
382
|
+
if (
|
|
383
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
384
|
+
and executor._max_workers == 1
|
|
385
|
+
):
|
|
386
|
+
for product_chunk in unique_product_chunks:
|
|
387
|
+
download_chunk(product_chunk)
|
|
388
|
+
else:
|
|
389
|
+
futures = (
|
|
390
|
+
executor.submit(download_chunk, product_chunk)
|
|
391
|
+
for product_chunk in unique_product_chunks
|
|
392
|
+
)
|
|
393
|
+
[f.result() for f in as_completed(futures)]
|
|
361
394
|
|
|
362
395
|
except AuthenticationError as e:
|
|
363
396
|
logger.warning("Unexpected error: %s" % e)
|
|
@@ -365,6 +398,9 @@ class AwsDownload(Download):
|
|
|
365
398
|
raise_if_auth_error(e, self.provider)
|
|
366
399
|
logger.warning("Unexpected error: %s" % e)
|
|
367
400
|
|
|
401
|
+
if shutdown_executor:
|
|
402
|
+
executor.shutdown(wait=True)
|
|
403
|
+
|
|
368
404
|
# finalize safe product
|
|
369
405
|
if build_safe and product.collection and "S2_MSI" in product.collection:
|
|
370
406
|
self.finalize_s2_safe_product(product_local_path)
|
|
@@ -386,31 +422,33 @@ class AwsDownload(Download):
|
|
|
386
422
|
return product_local_path
|
|
387
423
|
|
|
388
424
|
def _download_file_in_zip(
|
|
389
|
-
self,
|
|
425
|
+
self,
|
|
426
|
+
downloader_auth: AwsAuth,
|
|
427
|
+
bucket_names_and_prefixes: list[tuple[str, Optional[str]]],
|
|
428
|
+
product_local_path: str,
|
|
429
|
+
progress_callback: ProgressCallback,
|
|
430
|
+
executor: ThreadPoolExecutor,
|
|
390
431
|
):
|
|
391
432
|
"""
|
|
392
433
|
Download file in zip from a prefix like `foo/bar.zip!file.txt`
|
|
393
434
|
"""
|
|
394
|
-
if
|
|
395
|
-
not getattr(product, "downloader_auth", None)
|
|
396
|
-
or product.downloader_auth.s3_resource is None
|
|
397
|
-
):
|
|
435
|
+
if downloader_auth.s3_resource is None:
|
|
398
436
|
logger.debug("Cannot check files in s3 zip without s3 resource")
|
|
399
437
|
return bucket_names_and_prefixes
|
|
400
438
|
|
|
401
|
-
s3_client =
|
|
439
|
+
s3_client = downloader_auth.get_s3_client()
|
|
402
440
|
|
|
403
441
|
downloaded = []
|
|
404
|
-
|
|
442
|
+
|
|
443
|
+
def process_zip_file(i: int, pack: tuple[str, Optional[str]]) -> Optional[int]:
|
|
405
444
|
bucket_name, prefix = pack
|
|
406
|
-
if ".zip!" in prefix:
|
|
445
|
+
if prefix is not None and ".zip!" in prefix:
|
|
407
446
|
splitted_path = prefix.split(".zip!")
|
|
408
447
|
zip_prefix = f"{splitted_path[0]}.zip"
|
|
409
448
|
rel_path = splitted_path[-1]
|
|
410
449
|
dest_file = os.path.join(product_local_path, rel_path)
|
|
411
450
|
dest_abs_path_dir = os.path.dirname(dest_file)
|
|
412
|
-
|
|
413
|
-
os.makedirs(dest_abs_path_dir)
|
|
451
|
+
os.makedirs(dest_abs_path_dir, exist_ok=True)
|
|
414
452
|
|
|
415
453
|
zip_file, _ = open_s3_zipped_object(
|
|
416
454
|
bucket_name, zip_prefix, s3_client, partial=False
|
|
@@ -428,7 +466,30 @@ class AwsDownload(Download):
|
|
|
428
466
|
output_file.write(zchunk)
|
|
429
467
|
progress_callback(len(zchunk))
|
|
430
468
|
|
|
431
|
-
|
|
469
|
+
return i
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
# use parallelization if possible
|
|
473
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
474
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
475
|
+
if (
|
|
476
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
477
|
+
and executor._max_workers == 1
|
|
478
|
+
):
|
|
479
|
+
for i, pack in enumerate(bucket_names_and_prefixes):
|
|
480
|
+
result = process_zip_file(i, pack)
|
|
481
|
+
if result is not None:
|
|
482
|
+
downloaded.append(result)
|
|
483
|
+
else:
|
|
484
|
+
futures = (
|
|
485
|
+
executor.submit(process_zip_file, i, pack)
|
|
486
|
+
for i, pack in enumerate(bucket_names_and_prefixes)
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
for future in as_completed(futures):
|
|
490
|
+
result = future.result()
|
|
491
|
+
if result is not None:
|
|
492
|
+
downloaded.append(result)
|
|
432
493
|
|
|
433
494
|
return [
|
|
434
495
|
pack
|
|
@@ -710,7 +771,9 @@ class AwsDownload(Download):
|
|
|
710
771
|
ignore_assets,
|
|
711
772
|
product,
|
|
712
773
|
)
|
|
713
|
-
|
|
774
|
+
|
|
775
|
+
# check if auth is a S3 resource by verifying it has the meta.client attribute.
|
|
776
|
+
if auth and hasattr(auth, "meta") and hasattr(auth.meta, "client"):
|
|
714
777
|
s3_resource = auth
|
|
715
778
|
else:
|
|
716
779
|
s3_resource = boto3.resource(
|
|
@@ -773,6 +836,7 @@ class AwsDownload(Download):
|
|
|
773
836
|
byte_range,
|
|
774
837
|
compress,
|
|
775
838
|
zip_filename,
|
|
839
|
+
provider_max_workers=getattr(self.config, "max_workers", None),
|
|
776
840
|
)
|
|
777
841
|
|
|
778
842
|
def _get_commonpath(
|
|
@@ -1112,26 +1176,3 @@ class AwsDownload(Download):
|
|
|
1112
1176
|
|
|
1113
1177
|
logger.debug(f"Downloading {chunk.key} to {product_path}")
|
|
1114
1178
|
return product_path
|
|
1115
|
-
|
|
1116
|
-
def download_all(
|
|
1117
|
-
self,
|
|
1118
|
-
products: SearchResult,
|
|
1119
|
-
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
1120
|
-
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
1121
|
-
progress_callback: Optional[ProgressCallback] = None,
|
|
1122
|
-
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
1123
|
-
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
1124
|
-
**kwargs: Unpack[DownloadConf],
|
|
1125
|
-
) -> list[str]:
|
|
1126
|
-
"""
|
|
1127
|
-
download_all using parent (base plugin) method
|
|
1128
|
-
"""
|
|
1129
|
-
return super(AwsDownload, self).download_all(
|
|
1130
|
-
products,
|
|
1131
|
-
auth=auth,
|
|
1132
|
-
downloaded_callback=downloaded_callback,
|
|
1133
|
-
progress_callback=progress_callback,
|
|
1134
|
-
wait=wait,
|
|
1135
|
-
timeout=timeout,
|
|
1136
|
-
**kwargs,
|
|
1137
|
-
)
|