eodag 2.12.0__py3-none-any.whl → 3.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +434 -319
- eodag/api/product/__init__.py +5 -1
- eodag/api/product/_assets.py +7 -2
- eodag/api/product/_product.py +46 -68
- eodag/api/product/metadata_mapping.py +181 -66
- eodag/api/search_result.py +21 -1
- eodag/cli.py +20 -6
- eodag/config.py +95 -6
- eodag/plugins/apis/base.py +8 -162
- eodag/plugins/apis/ecmwf.py +36 -24
- eodag/plugins/apis/usgs.py +40 -24
- eodag/plugins/authentication/aws_auth.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +13 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +256 -46
- eodag/plugins/authentication/qsauth.py +3 -0
- eodag/plugins/authentication/sas_auth.py +8 -1
- eodag/plugins/authentication/token.py +92 -46
- eodag/plugins/authentication/token_exchange.py +120 -0
- eodag/plugins/download/aws.py +86 -91
- eodag/plugins/download/base.py +72 -40
- eodag/plugins/download/http.py +607 -264
- eodag/plugins/download/s3rest.py +28 -15
- eodag/plugins/manager.py +73 -57
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +225 -18
- eodag/plugins/search/build_search_result.py +389 -32
- eodag/plugins/search/cop_marine.py +378 -0
- eodag/plugins/search/creodias_s3.py +15 -14
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +44 -20
- eodag/plugins/search/qssearch.py +508 -203
- eodag/plugins/search/static_stac_search.py +99 -36
- eodag/resources/constraints/climate-dt.json +13 -0
- eodag/resources/constraints/extremes-dt.json +8 -0
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1897 -34
- eodag/resources/providers.yml +3539 -3277
- eodag/resources/stac.yml +48 -54
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +51 -3
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +27 -0
- eodag/rest/core.py +757 -0
- eodag/rest/server.py +397 -258
- eodag/rest/stac.py +438 -307
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +232 -43
- eodag/rest/types/{stac_queryables.py → queryables.py} +81 -43
- eodag/rest/types/stac_search.py +277 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +65 -0
- eodag/types/__init__.py +99 -9
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +31 -0
- eodag/types/search_args.py +58 -7
- eodag/types/whoosh.py +81 -0
- eodag/utils/__init__.py +72 -9
- eodag/utils/constraints.py +37 -37
- eodag/utils/exceptions.py +23 -17
- eodag/utils/requests.py +138 -0
- eodag/utils/rest.py +104 -0
- eodag/utils/stac_reader.py +100 -16
- {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/METADATA +64 -44
- eodag-3.0.0b1.dist-info/RECORD +109 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/WHEEL +1 -1
- {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/entry_points.txt +6 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/utils.py +0 -1133
- eodag-2.12.0.dist-info/RECORD +0 -94
- {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/LICENSE +0 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024, CS GROUP - France, https://www.csgroup.eu/
|
|
3
|
+
#
|
|
4
|
+
# This file is part of EODAG project
|
|
5
|
+
# https://www.github.com/CS-SI/EODAG
|
|
6
|
+
#
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import copy
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
|
|
25
|
+
from urllib.parse import urlsplit
|
|
26
|
+
|
|
27
|
+
import boto3
|
|
28
|
+
import botocore
|
|
29
|
+
import requests
|
|
30
|
+
from dateutil.parser import isoparse
|
|
31
|
+
from dateutil.tz import tzutc
|
|
32
|
+
from dateutil.utils import today
|
|
33
|
+
|
|
34
|
+
from eodag import EOProduct
|
|
35
|
+
from eodag.api.product import AssetsDict
|
|
36
|
+
from eodag.config import PluginConfig
|
|
37
|
+
from eodag.plugins.search import PreparedSearch
|
|
38
|
+
from eodag.plugins.search.static_stac_search import StaticStacSearch
|
|
39
|
+
from eodag.utils import get_bucket_name_and_prefix
|
|
40
|
+
from eodag.utils.exceptions import UnsupportedProductType, ValidationError
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from mypy_boto3_s3 import S3Client
|
|
44
|
+
from mypy_boto3_s3.type_defs import ListObjectsOutputTypeDef
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger("eodag.search.cop_marine")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_date_from_yyyymmdd(date_str: str, item_key: str) -> Optional[datetime]:
|
|
50
|
+
year = date_str[:4]
|
|
51
|
+
month = date_str[4:6]
|
|
52
|
+
if len(date_str) > 6:
|
|
53
|
+
day = date_str[6:]
|
|
54
|
+
else:
|
|
55
|
+
day = "1"
|
|
56
|
+
try:
|
|
57
|
+
date = datetime(
|
|
58
|
+
int(year),
|
|
59
|
+
int(month),
|
|
60
|
+
int(day),
|
|
61
|
+
tzinfo=tzutc(),
|
|
62
|
+
)
|
|
63
|
+
except ValueError:
|
|
64
|
+
logger.error(f"{item_key}: {date_str} is not a valid date")
|
|
65
|
+
return None
|
|
66
|
+
else:
|
|
67
|
+
return date
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_s3_client(endpoint_url: str) -> S3Client:
|
|
71
|
+
s3_session = boto3.Session()
|
|
72
|
+
return s3_session.client(
|
|
73
|
+
"s3",
|
|
74
|
+
config=botocore.config.Config(
|
|
75
|
+
# Configures to use subdomain/virtual calling format.
|
|
76
|
+
s3={"addressing_style": "virtual"},
|
|
77
|
+
signature_version=botocore.UNSIGNED,
|
|
78
|
+
),
|
|
79
|
+
endpoint_url=endpoint_url,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _check_int_values_properties(properties: Dict[str, Any]):
|
|
84
|
+
# remove int values with a bit length of more than 64 from the properties
|
|
85
|
+
invalid = []
|
|
86
|
+
for prop, prop_value in properties.items():
|
|
87
|
+
if isinstance(prop_value, int) and prop_value.bit_length() > 64:
|
|
88
|
+
invalid.append(prop)
|
|
89
|
+
if isinstance(prop_value, dict):
|
|
90
|
+
_check_int_values_properties(prop_value)
|
|
91
|
+
|
|
92
|
+
for inv_key in invalid:
|
|
93
|
+
properties.pop(inv_key)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class CopMarineSearch(StaticStacSearch):
|
|
97
|
+
"""class that implements search for the Copernicus Marine provider"""
|
|
98
|
+
|
|
99
|
+
def __init__(self, provider: str, config: PluginConfig):
|
|
100
|
+
original_metadata_mapping = copy.deepcopy(config.metadata_mapping)
|
|
101
|
+
super().__init__(provider, config)
|
|
102
|
+
# reset to original metadata mapping from config (changed in super class init)
|
|
103
|
+
self.config.metadata_mapping = original_metadata_mapping
|
|
104
|
+
|
|
105
|
+
def _get_product_type_info(
|
|
106
|
+
self, product_type: str
|
|
107
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
108
|
+
"""Fetch product type and associated datasets info"""
|
|
109
|
+
|
|
110
|
+
fetch_url = cast(
|
|
111
|
+
str,
|
|
112
|
+
self.config.discover_product_types["fetch_url"].format(
|
|
113
|
+
**self.config.__dict__
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
logger.debug("fetch data for collection %s", product_type)
|
|
117
|
+
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
118
|
+
"productType", None
|
|
119
|
+
)
|
|
120
|
+
if not provider_product_type:
|
|
121
|
+
provider_product_type = product_type
|
|
122
|
+
collection_url = (
|
|
123
|
+
fetch_url.replace("catalog.stac.json", provider_product_type)
|
|
124
|
+
+ "/product.stac.json"
|
|
125
|
+
)
|
|
126
|
+
try:
|
|
127
|
+
collection_data = requests.get(collection_url).json()
|
|
128
|
+
except requests.RequestException:
|
|
129
|
+
logger.error("data for product %s could not be fetched", product_type)
|
|
130
|
+
raise UnsupportedProductType(product_type)
|
|
131
|
+
|
|
132
|
+
datasets = []
|
|
133
|
+
for link in [li for li in collection_data["links"] if li["rel"] == "item"]:
|
|
134
|
+
dataset_url = (
|
|
135
|
+
fetch_url.replace("catalog.stac.json", provider_product_type)
|
|
136
|
+
+ "/"
|
|
137
|
+
+ link["href"]
|
|
138
|
+
)
|
|
139
|
+
try:
|
|
140
|
+
dataset_item = requests.get(dataset_url).json()
|
|
141
|
+
datasets.append(dataset_item)
|
|
142
|
+
except requests.RequestException:
|
|
143
|
+
logger.error("data for dataset %s could not be fetched", link["title"])
|
|
144
|
+
|
|
145
|
+
return collection_data, datasets
|
|
146
|
+
|
|
147
|
+
def _get_product_by_id(
|
|
148
|
+
self,
|
|
149
|
+
collection_objects: ListObjectsOutputTypeDef,
|
|
150
|
+
product_id: str,
|
|
151
|
+
s3_url: str,
|
|
152
|
+
product_type: str,
|
|
153
|
+
dataset_item: Dict[str, Any],
|
|
154
|
+
collection_dict: Dict[str, Any],
|
|
155
|
+
):
|
|
156
|
+
for obj in collection_objects["Contents"]:
|
|
157
|
+
if product_id in obj["Key"]:
|
|
158
|
+
return self._create_product(
|
|
159
|
+
product_type, obj["Key"], s3_url, dataset_item, collection_dict
|
|
160
|
+
)
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
def _create_product(
|
|
164
|
+
self,
|
|
165
|
+
product_type: str,
|
|
166
|
+
item_key: str,
|
|
167
|
+
s3_url: str,
|
|
168
|
+
dataset_item: Dict[str, Any],
|
|
169
|
+
collection_dict: Dict[str, Any],
|
|
170
|
+
use_dataset_dates: bool = False,
|
|
171
|
+
) -> Optional[EOProduct]:
|
|
172
|
+
|
|
173
|
+
item_id = item_key.split("/")[-1].split(".")[0]
|
|
174
|
+
download_url = s3_url + "/" + item_key
|
|
175
|
+
properties = {
|
|
176
|
+
"id": item_id,
|
|
177
|
+
"title": item_id,
|
|
178
|
+
"geometry": self.config.metadata_mapping["defaultGeometry"],
|
|
179
|
+
"downloadLink": download_url,
|
|
180
|
+
"dataset": dataset_item["id"],
|
|
181
|
+
}
|
|
182
|
+
if use_dataset_dates:
|
|
183
|
+
if "start_datetime" in dataset_item:
|
|
184
|
+
properties["startTimeFromAscendingNode"] = dataset_item[
|
|
185
|
+
"start_datetime"
|
|
186
|
+
]
|
|
187
|
+
properties["completionTimeFromAscendingNode"] = dataset_item[
|
|
188
|
+
"end_datetime"
|
|
189
|
+
]
|
|
190
|
+
elif "datetime" in dataset_item:
|
|
191
|
+
properties["startTimeFromAscendingNode"] = dataset_item["datetime"]
|
|
192
|
+
properties["completionTimeFromAscendingNode"] = dataset_item["datetime"]
|
|
193
|
+
else:
|
|
194
|
+
item_dates = re.findall(r"\d{8}", item_key)
|
|
195
|
+
if not item_dates:
|
|
196
|
+
item_dates = re.findall(r"\d{6}", item_key)
|
|
197
|
+
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
198
|
+
if not item_start: # identified pattern was not a valid datetime
|
|
199
|
+
return None
|
|
200
|
+
if len(item_dates) > 2: # start, end and created_at timestamps
|
|
201
|
+
item_end = _get_date_from_yyyymmdd(item_dates[1], item_key)
|
|
202
|
+
else: # only date and created_at timestamps
|
|
203
|
+
item_end = item_start
|
|
204
|
+
properties["startTimeFromAscendingNode"] = item_start.strftime(
|
|
205
|
+
"%Y-%m-%dT%H:%M:%SZ"
|
|
206
|
+
)
|
|
207
|
+
properties["completionTimeFromAscendingNode"] = (
|
|
208
|
+
item_end or item_start
|
|
209
|
+
).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
210
|
+
|
|
211
|
+
for key, value in collection_dict["properties"].items():
|
|
212
|
+
if key not in ["id", "title", "start_datetime", "end_datetime"]:
|
|
213
|
+
properties[key] = value
|
|
214
|
+
for key, value in dataset_item["properties"].items():
|
|
215
|
+
if key not in ["id", "title", "start_datetime", "end_datetime"]:
|
|
216
|
+
properties[key] = value
|
|
217
|
+
_check_int_values_properties(properties)
|
|
218
|
+
|
|
219
|
+
properties["thumbnail"] = collection_dict["assets"]["thumbnail"]["href"]
|
|
220
|
+
if "omiFigure" in collection_dict["assets"]:
|
|
221
|
+
properties["quicklook"] = collection_dict["assets"]["omiFigure"]["href"]
|
|
222
|
+
assets = {
|
|
223
|
+
"native": {
|
|
224
|
+
"title": "native",
|
|
225
|
+
"href": download_url,
|
|
226
|
+
"type": "application/x-netcdf",
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
product = EOProduct(self.provider, properties, productType=product_type)
|
|
230
|
+
product.assets = AssetsDict(product, assets)
|
|
231
|
+
return product
|
|
232
|
+
|
|
233
|
+
def query(
|
|
234
|
+
self,
|
|
235
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
236
|
+
**kwargs: Any,
|
|
237
|
+
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
238
|
+
"""
|
|
239
|
+
Implementation of search for the Copernicus Marine provider
|
|
240
|
+
:param prep: object containing search parameterds
|
|
241
|
+
:type prep: PreparedSearch
|
|
242
|
+
:param kwargs: additional search arguments
|
|
243
|
+
:returns: list of products and total number of products
|
|
244
|
+
:rtype: Tuple[List[EOProduct], Optional[int]]
|
|
245
|
+
"""
|
|
246
|
+
page = prep.page
|
|
247
|
+
items_per_page = prep.items_per_page
|
|
248
|
+
|
|
249
|
+
# only return 1 page if pagination is disabled
|
|
250
|
+
if page > 1 and items_per_page <= 0:
|
|
251
|
+
return ([], 0) if prep.count else ([], None)
|
|
252
|
+
|
|
253
|
+
product_type = kwargs.get("productType", prep.product_type)
|
|
254
|
+
if not product_type:
|
|
255
|
+
raise ValidationError(
|
|
256
|
+
"parameter product type is required for search with cop_marine provider"
|
|
257
|
+
)
|
|
258
|
+
collection_dict, datasets_items_list = self._get_product_type_info(product_type)
|
|
259
|
+
products: List[EOProduct] = []
|
|
260
|
+
start_index = items_per_page * (page - 1) + 1
|
|
261
|
+
num_total = 0
|
|
262
|
+
for i, dataset_item in enumerate(datasets_items_list):
|
|
263
|
+
try:
|
|
264
|
+
logger.debug("searching data for dataset %s", dataset_item["id"])
|
|
265
|
+
|
|
266
|
+
# date bounds
|
|
267
|
+
if "startTimeFromAscendingNode" in kwargs:
|
|
268
|
+
start_date = isoparse(kwargs["startTimeFromAscendingNode"])
|
|
269
|
+
elif "start_datetime" in dataset_item["properties"]:
|
|
270
|
+
start_date = isoparse(dataset_item["properties"]["start_datetime"])
|
|
271
|
+
else:
|
|
272
|
+
start_date = isoparse(dataset_item["properties"]["datetime"])
|
|
273
|
+
if not start_date.tzinfo:
|
|
274
|
+
start_date = start_date.replace(tzinfo=tzutc())
|
|
275
|
+
if "completionTimeFromAscendingNode" in kwargs:
|
|
276
|
+
end_date = isoparse(kwargs["completionTimeFromAscendingNode"])
|
|
277
|
+
elif "end_datetime" in dataset_item["properties"]:
|
|
278
|
+
end_date = isoparse(dataset_item["properties"]["end_datetime"])
|
|
279
|
+
else:
|
|
280
|
+
end_date = today(tzinfo=tzutc())
|
|
281
|
+
if not end_date.tzinfo:
|
|
282
|
+
end_date = end_date.replace(tzinfo=tzutc())
|
|
283
|
+
|
|
284
|
+
# retrieve information about s3 from collection data
|
|
285
|
+
s3_url = dataset_item["assets"]["native"]["href"]
|
|
286
|
+
except KeyError as e:
|
|
287
|
+
logger.warning(
|
|
288
|
+
f"Unable to extract info from {product_type} item #{i}: {str(e)}"
|
|
289
|
+
)
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
url_parts = urlsplit(s3_url)
|
|
293
|
+
endpoint_url = url_parts.scheme + "://" + url_parts.hostname
|
|
294
|
+
bucket, collection_path = get_bucket_name_and_prefix(s3_url, 0)
|
|
295
|
+
if bucket is None or collection_path is None:
|
|
296
|
+
logger.warning(
|
|
297
|
+
f"Unable to get bucket and prefix from {s3_url}, got {(bucket, collection_path)}"
|
|
298
|
+
)
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
if ".nc" in collection_path:
|
|
302
|
+
num_total += 1
|
|
303
|
+
if num_total < start_index:
|
|
304
|
+
continue
|
|
305
|
+
if len(products) < items_per_page or items_per_page < 0:
|
|
306
|
+
product = self._create_product(
|
|
307
|
+
product_type,
|
|
308
|
+
collection_path,
|
|
309
|
+
endpoint_url + "/" + bucket,
|
|
310
|
+
dataset_item,
|
|
311
|
+
collection_dict,
|
|
312
|
+
True,
|
|
313
|
+
)
|
|
314
|
+
if product:
|
|
315
|
+
products.append(product)
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
s3_client = _get_s3_client(endpoint_url)
|
|
319
|
+
stop_search = False
|
|
320
|
+
current_object = None
|
|
321
|
+
while not stop_search:
|
|
322
|
+
# list_objects returns max 1000 objects -> use marker to get next objects
|
|
323
|
+
if current_object:
|
|
324
|
+
s3_objects = s3_client.list_objects(
|
|
325
|
+
Bucket=bucket, Prefix=collection_path, Marker=current_object
|
|
326
|
+
)
|
|
327
|
+
else:
|
|
328
|
+
s3_objects = s3_client.list_objects(
|
|
329
|
+
Bucket=bucket, Prefix=collection_path
|
|
330
|
+
)
|
|
331
|
+
if "Contents" not in s3_objects:
|
|
332
|
+
if len(products) == 0 and i == len(datasets_items_list) - 1:
|
|
333
|
+
return ([], 0) if prep.count else ([], None)
|
|
334
|
+
else:
|
|
335
|
+
break
|
|
336
|
+
|
|
337
|
+
if "id" in kwargs:
|
|
338
|
+
product = self._get_product_by_id(
|
|
339
|
+
s3_objects,
|
|
340
|
+
kwargs["id"],
|
|
341
|
+
endpoint_url + "/" + bucket,
|
|
342
|
+
product_type,
|
|
343
|
+
dataset_item,
|
|
344
|
+
collection_dict,
|
|
345
|
+
)
|
|
346
|
+
if product:
|
|
347
|
+
return [product], 1
|
|
348
|
+
current_object = s3_objects["Contents"][-1]["Key"]
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
for obj in s3_objects["Contents"]:
|
|
352
|
+
item_key = obj["Key"]
|
|
353
|
+
# filter according to date(s) in item id
|
|
354
|
+
item_dates = re.findall(r"\d{8}", item_key)
|
|
355
|
+
if not item_dates:
|
|
356
|
+
item_dates = re.findall(r"\d{6}", item_key)
|
|
357
|
+
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
358
|
+
if not item_start: # identified pattern was not a valid datetime
|
|
359
|
+
continue
|
|
360
|
+
if item_start > end_date:
|
|
361
|
+
stop_search = True
|
|
362
|
+
if not item_dates or (start_date <= item_start <= end_date):
|
|
363
|
+
num_total += 1
|
|
364
|
+
if num_total < start_index:
|
|
365
|
+
continue
|
|
366
|
+
if len(products) < items_per_page or items_per_page < 0:
|
|
367
|
+
product = self._create_product(
|
|
368
|
+
product_type,
|
|
369
|
+
item_key,
|
|
370
|
+
endpoint_url + "/" + bucket,
|
|
371
|
+
dataset_item,
|
|
372
|
+
collection_dict,
|
|
373
|
+
)
|
|
374
|
+
if product:
|
|
375
|
+
products.append(product)
|
|
376
|
+
current_object = item_key
|
|
377
|
+
|
|
378
|
+
return products, num_total
|
|
@@ -17,17 +17,18 @@
|
|
|
17
17
|
# limitations under the License.
|
|
18
18
|
import logging
|
|
19
19
|
from types import MethodType
|
|
20
|
-
from typing import Any,
|
|
20
|
+
from typing import Any, List
|
|
21
21
|
|
|
22
22
|
import boto3
|
|
23
23
|
import botocore
|
|
24
24
|
from botocore.exceptions import BotoCoreError
|
|
25
25
|
|
|
26
|
-
from eodag import EOProduct
|
|
27
|
-
from eodag.api.
|
|
26
|
+
from eodag.api.product import AssetsDict, EOProduct # type: ignore
|
|
27
|
+
from eodag.api.search_result import RawSearchResult
|
|
28
28
|
from eodag.config import PluginConfig
|
|
29
29
|
from eodag.plugins.authentication.aws_auth import AwsAuth
|
|
30
|
-
from eodag.plugins.search.qssearch import
|
|
30
|
+
from eodag.plugins.search.qssearch import ODataV4Search
|
|
31
|
+
from eodag.utils import guess_file_type
|
|
31
32
|
from eodag.utils.exceptions import AuthenticationError, MisconfiguredError, RequestError
|
|
32
33
|
|
|
33
34
|
DATA_EXTENSIONS = ["jp2", "tiff", "nc", "grib"]
|
|
@@ -64,18 +65,19 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
64
65
|
try:
|
|
65
66
|
auth_dict = auth.authenticate()
|
|
66
67
|
required_creds = ["aws_access_key_id", "aws_secret_access_key"]
|
|
67
|
-
if not all(
|
|
68
|
+
if not all(x in auth_dict for x in required_creds):
|
|
68
69
|
raise MisconfiguredError(
|
|
69
70
|
f"Incomplete credentials for {product.provider}, missing "
|
|
70
|
-
f"{[x for x in required_creds if not
|
|
71
|
+
f"{[x for x in required_creds if x not in auth_dict]}"
|
|
71
72
|
)
|
|
72
73
|
if not getattr(auth, "s3_client", None):
|
|
73
74
|
auth.s3_client = boto3.client(
|
|
74
75
|
"s3",
|
|
75
76
|
endpoint_url=config.base_uri,
|
|
76
|
-
|
|
77
|
+
aws_access_key_id=auth_dict["aws_access_key_id"],
|
|
78
|
+
aws_secret_access_key=auth_dict["aws_secret_access_key"],
|
|
77
79
|
)
|
|
78
|
-
logger.debug(
|
|
80
|
+
logger.debug("Listing assets in %s", prefix)
|
|
79
81
|
product.assets = AssetsDict(product)
|
|
80
82
|
for asset in auth.s3_client.list_objects(
|
|
81
83
|
Bucket=config.s3_bucket, Prefix=prefix, MaxKeys=300
|
|
@@ -96,6 +98,8 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
96
98
|
"roles": [role],
|
|
97
99
|
"href": f"s3://{config.s3_bucket}/{asset['Key']}",
|
|
98
100
|
}
|
|
101
|
+
if mime_type := guess_file_type(asset["Key"]):
|
|
102
|
+
product.assets[asset_basename]["type"] = mime_type
|
|
99
103
|
# update driver
|
|
100
104
|
product.driver = product.get_driver()
|
|
101
105
|
|
|
@@ -104,13 +108,10 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
104
108
|
raise AuthenticationError(
|
|
105
109
|
f"Authentication failed on {config.base_uri} s3"
|
|
106
110
|
) from e
|
|
107
|
-
|
|
108
|
-
raise RequestError(
|
|
109
|
-
"assets for product %s could not be found", prefix
|
|
110
|
-
) from e
|
|
111
|
+
raise RequestError(f"assets for product {prefix} could not be found") from e
|
|
111
112
|
|
|
112
113
|
|
|
113
|
-
class CreodiasS3Search(
|
|
114
|
+
class CreodiasS3Search(ODataV4Search):
|
|
114
115
|
"""
|
|
115
116
|
Search on creodias and adapt results to s3
|
|
116
117
|
"""
|
|
@@ -119,7 +120,7 @@ class CreodiasS3Search(QueryStringSearch):
|
|
|
119
120
|
super(CreodiasS3Search, self).__init__(provider, config)
|
|
120
121
|
|
|
121
122
|
def normalize_results(
|
|
122
|
-
self, results:
|
|
123
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
123
124
|
) -> List[EOProduct]:
|
|
124
125
|
"""Build EOProducts from provider results"""
|
|
125
126
|
|
eodag/plugins/search/csw.py
CHANGED
|
@@ -35,8 +35,9 @@ from shapely import geometry, wkt
|
|
|
35
35
|
|
|
36
36
|
from eodag.api.product import EOProduct
|
|
37
37
|
from eodag.api.product.metadata_mapping import properties_from_xml
|
|
38
|
+
from eodag.plugins.search import PreparedSearch
|
|
38
39
|
from eodag.plugins.search.base import Search
|
|
39
|
-
from eodag.utils import
|
|
40
|
+
from eodag.utils import DEFAULT_PROJ
|
|
40
41
|
from eodag.utils.import_system import patch_owslib_requests
|
|
41
42
|
|
|
42
43
|
if TYPE_CHECKING:
|
|
@@ -64,16 +65,13 @@ class CSWSearch(Search):
|
|
|
64
65
|
|
|
65
66
|
def query(
|
|
66
67
|
self,
|
|
67
|
-
|
|
68
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
69
|
-
page: int = DEFAULT_PAGE,
|
|
70
|
-
count: bool = True,
|
|
68
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
71
69
|
**kwargs: Any,
|
|
72
70
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
73
71
|
"""Perform a search on a OGC/CSW-like interface"""
|
|
74
72
|
product_type = kwargs.get("productType")
|
|
75
73
|
if product_type is None:
|
|
76
|
-
return [], 0
|
|
74
|
+
return ([], 0) if prep.count else ([], None)
|
|
77
75
|
auth = kwargs.get("auth")
|
|
78
76
|
if auth:
|
|
79
77
|
self.__init_catalog(**getattr(auth.config, "credentials", {}))
|
|
@@ -118,7 +116,7 @@ class CSWSearch(Search):
|
|
|
118
116
|
)
|
|
119
117
|
results.extend(partial_results)
|
|
120
118
|
logger.info("Found %s overall results", len(results))
|
|
121
|
-
total_results = len(results) if count else None
|
|
119
|
+
total_results = len(results) if prep.count else None
|
|
122
120
|
return results, total_results
|
|
123
121
|
|
|
124
122
|
def __init_catalog(
|
|
@@ -19,8 +19,8 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import time
|
|
22
|
-
from datetime import datetime, timedelta
|
|
23
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
22
|
+
from datetime import datetime, timedelta, timezone
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
|
|
24
24
|
|
|
25
25
|
import requests
|
|
26
26
|
|
|
@@ -30,10 +30,11 @@ from eodag.api.product.metadata_mapping import (
|
|
|
30
30
|
mtd_cfg_as_conversion_and_querypath,
|
|
31
31
|
properties_from_json,
|
|
32
32
|
)
|
|
33
|
+
from eodag.plugins.search import PreparedSearch
|
|
33
34
|
from eodag.plugins.search.base import Search
|
|
34
|
-
from eodag.rest.stac import DEFAULT_MISSION_START_DATE
|
|
35
35
|
from eodag.utils import (
|
|
36
36
|
DEFAULT_ITEMS_PER_PAGE,
|
|
37
|
+
DEFAULT_MISSION_START_DATE,
|
|
37
38
|
DEFAULT_PAGE,
|
|
38
39
|
GENERIC_PRODUCT_TYPE,
|
|
39
40
|
HTTP_REQ_TIMEOUT,
|
|
@@ -41,7 +42,12 @@ from eodag.utils import (
|
|
|
41
42
|
deepcopy,
|
|
42
43
|
string_to_jsonpath,
|
|
43
44
|
)
|
|
44
|
-
from eodag.utils.exceptions import
|
|
45
|
+
from eodag.utils.exceptions import (
|
|
46
|
+
NotAvailableError,
|
|
47
|
+
RequestError,
|
|
48
|
+
TimeOutError,
|
|
49
|
+
ValidationError,
|
|
50
|
+
)
|
|
45
51
|
|
|
46
52
|
if TYPE_CHECKING:
|
|
47
53
|
from eodag.config import PluginConfig
|
|
@@ -57,6 +63,8 @@ class DataRequestSearch(Search):
|
|
|
57
63
|
- if finished - fetch the result of the job
|
|
58
64
|
"""
|
|
59
65
|
|
|
66
|
+
data_request_id: Optional[str]
|
|
67
|
+
|
|
60
68
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
61
69
|
super(DataRequestSearch, self).__init__(provider, config)
|
|
62
70
|
self.config.__dict__.setdefault("result_type", "json")
|
|
@@ -101,10 +109,10 @@ class DataRequestSearch(Search):
|
|
|
101
109
|
self.config.pagination["next_page_url_key_path"] = string_to_jsonpath(
|
|
102
110
|
self.config.pagination.get("next_page_url_key_path", None)
|
|
103
111
|
)
|
|
104
|
-
self.download_info = {}
|
|
112
|
+
self.download_info: Dict[str, Any] = {}
|
|
105
113
|
self.data_request_id = None
|
|
106
114
|
|
|
107
|
-
def discover_product_types(self) -> Optional[Dict[str, Any]]:
|
|
115
|
+
def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
108
116
|
"""Fetch product types is disabled for `DataRequestSearch`
|
|
109
117
|
|
|
110
118
|
:returns: empty dict
|
|
@@ -119,26 +127,30 @@ class DataRequestSearch(Search):
|
|
|
119
127
|
|
|
120
128
|
def query(
|
|
121
129
|
self,
|
|
122
|
-
|
|
123
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
124
|
-
page: int = DEFAULT_PAGE,
|
|
125
|
-
count: bool = True,
|
|
130
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
126
131
|
**kwargs: Any,
|
|
127
132
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
128
133
|
"""
|
|
129
134
|
performs the search for a provider where several steps are required to fetch the data
|
|
130
135
|
"""
|
|
136
|
+
if kwargs.get("sortBy"):
|
|
137
|
+
raise ValidationError(f"{self.provider} does not support sorting feature")
|
|
138
|
+
|
|
131
139
|
product_type = kwargs.get("productType", None)
|
|
140
|
+
|
|
141
|
+
if product_type is None:
|
|
142
|
+
raise ValidationError("Required productType is missing")
|
|
143
|
+
|
|
132
144
|
# replace "product_type" to "providerProductType" in search args if exists
|
|
133
145
|
# for compatibility with DataRequestSearch method
|
|
134
146
|
if kwargs.get("product_type"):
|
|
135
147
|
kwargs["providerProductType"] = kwargs.pop("product_type", None)
|
|
136
|
-
provider_product_type = self._map_product_type(product_type or "")
|
|
148
|
+
provider_product_type = cast(str, self._map_product_type(product_type or ""))
|
|
137
149
|
keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
|
|
138
150
|
|
|
139
151
|
if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
|
|
140
152
|
keywords["productType"] = provider_product_type
|
|
141
|
-
|
|
153
|
+
else:
|
|
142
154
|
keywords["productType"] = product_type
|
|
143
155
|
|
|
144
156
|
# provider product type specific conf
|
|
@@ -185,7 +197,7 @@ class DataRequestSearch(Search):
|
|
|
185
197
|
if not keywords.get("completionTimeFromAscendingNode", None):
|
|
186
198
|
keywords["completionTimeFromAscendingNode"] = getattr(
|
|
187
199
|
self.config, "product_type_config", {}
|
|
188
|
-
).get("missionEndDate", datetime.
|
|
200
|
+
).get("missionEndDate", datetime.now(timezone.utc).isoformat())
|
|
189
201
|
|
|
190
202
|
# ask for data_request_id if not set (it must exist when iterating over pages)
|
|
191
203
|
if not self.data_request_id:
|
|
@@ -246,16 +258,19 @@ class DataRequestSearch(Search):
|
|
|
246
258
|
self, product_type: str, eodag_product_type: str, **kwargs: Any
|
|
247
259
|
) -> str:
|
|
248
260
|
headers = getattr(self.auth, "headers", USER_AGENT)
|
|
261
|
+
ssl_verify = getattr(self.config.ssl_verify, "ssl_verify", True)
|
|
249
262
|
try:
|
|
250
263
|
url = self.config.data_request_url
|
|
251
|
-
request_body = format_query_params(
|
|
252
|
-
eodag_product_type, self.config, **kwargs
|
|
253
|
-
)
|
|
264
|
+
request_body = format_query_params(eodag_product_type, self.config, kwargs)
|
|
254
265
|
logger.debug(
|
|
255
266
|
f"Sending search job request to {url} with {str(request_body)}"
|
|
256
267
|
)
|
|
257
268
|
request_job = requests.post(
|
|
258
|
-
url,
|
|
269
|
+
url,
|
|
270
|
+
json=request_body,
|
|
271
|
+
headers=headers,
|
|
272
|
+
timeout=HTTP_REQ_TIMEOUT,
|
|
273
|
+
verify=ssl_verify,
|
|
259
274
|
)
|
|
260
275
|
request_job.raise_for_status()
|
|
261
276
|
except requests.exceptions.Timeout as exc:
|
|
@@ -271,9 +286,10 @@ class DataRequestSearch(Search):
|
|
|
271
286
|
def _cancel_request(self, data_request_id: str) -> None:
|
|
272
287
|
logger.info("deleting request job %s", data_request_id)
|
|
273
288
|
delete_url = f"{self.config.data_request_url}/{data_request_id}"
|
|
289
|
+
headers = getattr(self.auth, "headers", USER_AGENT)
|
|
274
290
|
try:
|
|
275
291
|
delete_resp = requests.delete(
|
|
276
|
-
delete_url, headers=
|
|
292
|
+
delete_url, headers=headers, timeout=HTTP_REQ_TIMEOUT
|
|
277
293
|
)
|
|
278
294
|
delete_resp.raise_for_status()
|
|
279
295
|
except requests.exceptions.Timeout as exc:
|
|
@@ -284,9 +300,15 @@ class DataRequestSearch(Search):
|
|
|
284
300
|
def _check_request_status(self, data_request_id: str) -> bool:
|
|
285
301
|
logger.debug("checking status of request job %s", data_request_id)
|
|
286
302
|
status_url = self.config.status_url + data_request_id
|
|
303
|
+
headers = getattr(self.auth, "headers", USER_AGENT)
|
|
304
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
305
|
+
|
|
287
306
|
try:
|
|
288
307
|
status_resp = requests.get(
|
|
289
|
-
status_url,
|
|
308
|
+
status_url,
|
|
309
|
+
headers=headers,
|
|
310
|
+
timeout=HTTP_REQ_TIMEOUT,
|
|
311
|
+
verify=ssl_verify,
|
|
290
312
|
)
|
|
291
313
|
status_resp.raise_for_status()
|
|
292
314
|
except requests.exceptions.Timeout as exc:
|
|
@@ -315,9 +337,11 @@ class DataRequestSearch(Search):
|
|
|
315
337
|
url = self.config.result_url.format(
|
|
316
338
|
jobId=data_request_id, items_per_page=items_per_page, page=page
|
|
317
339
|
)
|
|
340
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
341
|
+
headers = getattr(self.auth, "headers", USER_AGENT)
|
|
318
342
|
try:
|
|
319
343
|
return requests.get(
|
|
320
|
-
url, headers=
|
|
344
|
+
url, headers=headers, timeout=HTTP_REQ_TIMEOUT, verify=ssl_verify
|
|
321
345
|
).json()
|
|
322
346
|
except requests.exceptions.Timeout as exc:
|
|
323
347
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|