eodag 2.12.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/__init__.py +6 -8
- eodag/api/core.py +654 -538
- eodag/api/product/__init__.py +12 -2
- eodag/api/product/_assets.py +59 -16
- eodag/api/product/_product.py +100 -93
- eodag/api/product/drivers/__init__.py +7 -2
- eodag/api/product/drivers/base.py +0 -3
- eodag/api/product/metadata_mapping.py +192 -96
- eodag/api/search_result.py +69 -10
- eodag/cli.py +55 -25
- eodag/config.py +391 -116
- eodag/plugins/apis/base.py +11 -165
- eodag/plugins/apis/ecmwf.py +36 -25
- eodag/plugins/apis/usgs.py +80 -35
- eodag/plugins/authentication/aws_auth.py +13 -4
- eodag/plugins/authentication/base.py +10 -1
- eodag/plugins/authentication/generic.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +17 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +268 -49
- eodag/plugins/authentication/qsauth.py +4 -1
- eodag/plugins/authentication/sas_auth.py +9 -2
- eodag/plugins/authentication/token.py +98 -47
- eodag/plugins/authentication/token_exchange.py +122 -0
- eodag/plugins/crunch/base.py +3 -1
- eodag/plugins/crunch/filter_date.py +3 -9
- eodag/plugins/crunch/filter_latest_intersect.py +0 -3
- eodag/plugins/crunch/filter_latest_tpl_name.py +1 -4
- eodag/plugins/crunch/filter_overlap.py +4 -8
- eodag/plugins/crunch/filter_property.py +5 -11
- eodag/plugins/download/aws.py +149 -185
- eodag/plugins/download/base.py +88 -97
- eodag/plugins/download/creodias_s3.py +1 -1
- eodag/plugins/download/http.py +638 -310
- eodag/plugins/download/s3rest.py +47 -45
- eodag/plugins/manager.py +228 -88
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +239 -30
- eodag/plugins/search/build_search_result.py +382 -37
- eodag/plugins/search/cop_marine.py +441 -0
- eodag/plugins/search/creodias_s3.py +25 -20
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +61 -30
- eodag/plugins/search/qssearch.py +713 -255
- eodag/plugins/search/static_stac_search.py +106 -40
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1921 -34
- eodag/resources/providers.yml +4091 -3655
- eodag/resources/stac.yml +50 -216
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +89 -32
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +26 -0
- eodag/rest/core.py +735 -0
- eodag/rest/errors.py +178 -0
- eodag/rest/server.py +264 -431
- eodag/rest/stac.py +442 -836
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +238 -47
- eodag/rest/types/queryables.py +164 -0
- eodag/rest/types/stac_search.py +273 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +64 -0
- eodag/types/__init__.py +106 -10
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +40 -0
- eodag/types/search_args.py +57 -7
- eodag/types/whoosh.py +79 -0
- eodag/utils/__init__.py +110 -91
- eodag/utils/constraints.py +37 -45
- eodag/utils/exceptions.py +39 -22
- eodag/utils/import_system.py +0 -4
- eodag/utils/logging.py +37 -80
- eodag/utils/notebook.py +4 -4
- eodag/utils/repr.py +113 -0
- eodag/utils/requests.py +128 -0
- eodag/utils/rest.py +100 -0
- eodag/utils/stac_reader.py +93 -21
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/METADATA +88 -53
- eodag-3.0.0.dist-info/RECORD +109 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/WHEEL +1 -1
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/entry_points.txt +7 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/types/stac_queryables.py +0 -134
- eodag/rest/utils.py +0 -1133
- eodag-2.12.0.dist-info/RECORD +0 -94
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/LICENSE +0 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright 2024, CS GROUP - France, https://www.csgroup.eu/
|
|
3
|
+
#
|
|
4
|
+
# This file is part of EODAG project
|
|
5
|
+
# https://www.github.com/CS-SI/EODAG
|
|
6
|
+
#
|
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
# you may not use this file except in compliance with the License.
|
|
9
|
+
# You may obtain a copy of the License at
|
|
10
|
+
#
|
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
#
|
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
# See the License for the specific language governing permissions and
|
|
17
|
+
# limitations under the License.
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import copy
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
|
|
25
|
+
from urllib.parse import urlsplit
|
|
26
|
+
|
|
27
|
+
import boto3
|
|
28
|
+
import botocore
|
|
29
|
+
import requests
|
|
30
|
+
from dateutil.parser import isoparse
|
|
31
|
+
from dateutil.tz import tzutc
|
|
32
|
+
from dateutil.utils import today
|
|
33
|
+
|
|
34
|
+
from eodag import EOProduct
|
|
35
|
+
from eodag.api.product import AssetsDict
|
|
36
|
+
from eodag.config import PluginConfig
|
|
37
|
+
from eodag.plugins.search import PreparedSearch
|
|
38
|
+
from eodag.plugins.search.static_stac_search import StaticStacSearch
|
|
39
|
+
from eodag.utils import get_bucket_name_and_prefix
|
|
40
|
+
from eodag.utils.exceptions import UnsupportedProductType, ValidationError
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from mypy_boto3_s3 import S3Client
|
|
44
|
+
from mypy_boto3_s3.type_defs import ListObjectsOutputTypeDef
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger("eodag.search.cop_marine")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_date_from_yyyymmdd(date_str: str, item_key: str) -> Optional[datetime]:
|
|
50
|
+
year = date_str[:4]
|
|
51
|
+
month = date_str[4:6]
|
|
52
|
+
if len(date_str) > 6:
|
|
53
|
+
day = date_str[6:]
|
|
54
|
+
else:
|
|
55
|
+
day = "1"
|
|
56
|
+
try:
|
|
57
|
+
date = datetime(
|
|
58
|
+
int(year),
|
|
59
|
+
int(month),
|
|
60
|
+
int(day),
|
|
61
|
+
tzinfo=tzutc(),
|
|
62
|
+
)
|
|
63
|
+
except ValueError:
|
|
64
|
+
logger.error(f"{item_key}: {date_str} is not a valid date")
|
|
65
|
+
return None
|
|
66
|
+
else:
|
|
67
|
+
return date
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_dates_from_dataset_data(
|
|
71
|
+
dataset_item: Dict[str, Any]
|
|
72
|
+
) -> Optional[Dict[str, str]]:
|
|
73
|
+
dates = {}
|
|
74
|
+
if "start_datetime" in dataset_item["properties"]:
|
|
75
|
+
dates["start"] = dataset_item["properties"]["start_datetime"]
|
|
76
|
+
dates["end"] = dataset_item["properties"]["end_datetime"]
|
|
77
|
+
elif "datetime" in dataset_item["properties"]:
|
|
78
|
+
dates["start"] = dataset_item["properties"]["datetime"]
|
|
79
|
+
dates["end"] = dataset_item["properties"]["datetime"]
|
|
80
|
+
else:
|
|
81
|
+
return None
|
|
82
|
+
return dates
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_s3_client(endpoint_url: str) -> S3Client:
|
|
86
|
+
s3_session = boto3.Session()
|
|
87
|
+
return s3_session.client(
|
|
88
|
+
"s3",
|
|
89
|
+
config=botocore.config.Config(
|
|
90
|
+
# Configures to use subdomain/virtual calling format.
|
|
91
|
+
s3={"addressing_style": "virtual"},
|
|
92
|
+
signature_version=botocore.UNSIGNED,
|
|
93
|
+
),
|
|
94
|
+
endpoint_url=endpoint_url,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _check_int_values_properties(properties: Dict[str, Any]):
|
|
99
|
+
# remove int values with a bit length of more than 64 from the properties
|
|
100
|
+
invalid = []
|
|
101
|
+
for prop, prop_value in properties.items():
|
|
102
|
+
if isinstance(prop_value, int) and prop_value.bit_length() > 64:
|
|
103
|
+
invalid.append(prop)
|
|
104
|
+
if isinstance(prop_value, dict):
|
|
105
|
+
_check_int_values_properties(prop_value)
|
|
106
|
+
|
|
107
|
+
for inv_key in invalid:
|
|
108
|
+
properties.pop(inv_key)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class CopMarineSearch(StaticStacSearch):
|
|
112
|
+
"""class that implements search for the Copernicus Marine provider"""
|
|
113
|
+
|
|
114
|
+
def __init__(self, provider: str, config: PluginConfig):
|
|
115
|
+
original_metadata_mapping = copy.deepcopy(config.metadata_mapping)
|
|
116
|
+
super().__init__(provider, config)
|
|
117
|
+
# reset to original metadata mapping from config (changed in super class init)
|
|
118
|
+
self.config.metadata_mapping = original_metadata_mapping
|
|
119
|
+
|
|
120
|
+
def _get_product_type_info(
|
|
121
|
+
self, product_type: str
|
|
122
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
123
|
+
"""Fetch product type and associated datasets info"""
|
|
124
|
+
|
|
125
|
+
fetch_url = cast(
|
|
126
|
+
str,
|
|
127
|
+
self.config.discover_product_types["fetch_url"].format(
|
|
128
|
+
**self.config.__dict__
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
logger.debug("fetch data for collection %s", product_type)
|
|
132
|
+
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
133
|
+
"productType", None
|
|
134
|
+
)
|
|
135
|
+
if not provider_product_type:
|
|
136
|
+
provider_product_type = product_type
|
|
137
|
+
collection_url = (
|
|
138
|
+
fetch_url.replace("catalog.stac.json", provider_product_type)
|
|
139
|
+
+ "/product.stac.json"
|
|
140
|
+
)
|
|
141
|
+
try:
|
|
142
|
+
collection_data = requests.get(collection_url).json()
|
|
143
|
+
except requests.RequestException:
|
|
144
|
+
logger.error("data for product %s could not be fetched", product_type)
|
|
145
|
+
raise UnsupportedProductType(product_type)
|
|
146
|
+
|
|
147
|
+
datasets = []
|
|
148
|
+
for link in [li for li in collection_data["links"] if li["rel"] == "item"]:
|
|
149
|
+
dataset_url = (
|
|
150
|
+
fetch_url.replace("catalog.stac.json", provider_product_type)
|
|
151
|
+
+ "/"
|
|
152
|
+
+ link["href"]
|
|
153
|
+
)
|
|
154
|
+
try:
|
|
155
|
+
dataset_item = requests.get(dataset_url).json()
|
|
156
|
+
datasets.append(dataset_item)
|
|
157
|
+
except requests.RequestException:
|
|
158
|
+
logger.error("data for dataset %s could not be fetched", link["title"])
|
|
159
|
+
|
|
160
|
+
return collection_data, datasets
|
|
161
|
+
|
|
162
|
+
def _get_product_by_id(
|
|
163
|
+
self,
|
|
164
|
+
collection_objects: ListObjectsOutputTypeDef,
|
|
165
|
+
product_id: str,
|
|
166
|
+
s3_url: str,
|
|
167
|
+
product_type: str,
|
|
168
|
+
dataset_item: Dict[str, Any],
|
|
169
|
+
collection_dict: Dict[str, Any],
|
|
170
|
+
):
|
|
171
|
+
for obj in collection_objects["Contents"]:
|
|
172
|
+
if product_id in obj["Key"]:
|
|
173
|
+
return self._create_product(
|
|
174
|
+
product_type, obj["Key"], s3_url, dataset_item, collection_dict
|
|
175
|
+
)
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
def _create_product(
|
|
179
|
+
self,
|
|
180
|
+
product_type: str,
|
|
181
|
+
item_key: str,
|
|
182
|
+
s3_url: str,
|
|
183
|
+
dataset_item: Dict[str, Any],
|
|
184
|
+
collection_dict: Dict[str, Any],
|
|
185
|
+
use_dataset_dates: bool = False,
|
|
186
|
+
) -> Optional[EOProduct]:
|
|
187
|
+
|
|
188
|
+
item_id = item_key.split("/")[-1].split(".")[0]
|
|
189
|
+
download_url = s3_url + "/" + item_key
|
|
190
|
+
properties = {
|
|
191
|
+
"id": item_id,
|
|
192
|
+
"title": item_id,
|
|
193
|
+
"geometry": self.config.metadata_mapping["defaultGeometry"],
|
|
194
|
+
"downloadLink": download_url,
|
|
195
|
+
"dataset": dataset_item["id"],
|
|
196
|
+
}
|
|
197
|
+
if use_dataset_dates:
|
|
198
|
+
dates = _get_dates_from_dataset_data(dataset_item)
|
|
199
|
+
if not dates:
|
|
200
|
+
return None
|
|
201
|
+
properties["startTimeFromAscendingNode"] = dates["start"]
|
|
202
|
+
properties["completionTimeFromAscendingNode"] = dates["end"]
|
|
203
|
+
else:
|
|
204
|
+
item_dates = re.findall(r"(\d{4})(0[1-9]|1[0-2])([0-3]\d)", item_id)
|
|
205
|
+
if not item_dates:
|
|
206
|
+
item_dates = re.findall(r"_(\d{4})(0[1-9]|1[0-2])", item_id)
|
|
207
|
+
item_dates = ["".join(row) for row in item_dates]
|
|
208
|
+
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
209
|
+
if not item_start: # identified pattern was not a valid datetime
|
|
210
|
+
return None
|
|
211
|
+
if len(item_dates) > 2: # start, end and created_at timestamps
|
|
212
|
+
item_end = _get_date_from_yyyymmdd(item_dates[1], item_key)
|
|
213
|
+
else: # only date and created_at timestamps
|
|
214
|
+
item_end = item_start
|
|
215
|
+
properties["startTimeFromAscendingNode"] = item_start.strftime(
|
|
216
|
+
"%Y-%m-%dT%H:%M:%SZ"
|
|
217
|
+
)
|
|
218
|
+
properties["completionTimeFromAscendingNode"] = (
|
|
219
|
+
item_end or item_start
|
|
220
|
+
).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
221
|
+
|
|
222
|
+
for key, value in collection_dict["properties"].items():
|
|
223
|
+
if key not in ["id", "title", "start_datetime", "end_datetime", "datetime"]:
|
|
224
|
+
properties[key] = value
|
|
225
|
+
for key, value in dataset_item["properties"].items():
|
|
226
|
+
if key not in ["id", "title", "start_datetime", "end_datetime", "datetime"]:
|
|
227
|
+
properties[key] = value
|
|
228
|
+
|
|
229
|
+
code_mapping = self.config.products.get(product_type, {}).get(
|
|
230
|
+
"code_mapping", None
|
|
231
|
+
)
|
|
232
|
+
if code_mapping:
|
|
233
|
+
id_parts = item_id.split("_")
|
|
234
|
+
if len(id_parts) > code_mapping["index"]:
|
|
235
|
+
code = id_parts[code_mapping["index"]]
|
|
236
|
+
if "pattern" not in code_mapping:
|
|
237
|
+
properties[code_mapping["param"]] = code
|
|
238
|
+
elif re.findall(code_mapping["pattern"], code):
|
|
239
|
+
properties[code_mapping["param"]] = re.findall(
|
|
240
|
+
code_mapping["pattern"], code
|
|
241
|
+
)[0]
|
|
242
|
+
|
|
243
|
+
_check_int_values_properties(properties)
|
|
244
|
+
|
|
245
|
+
properties["thumbnail"] = collection_dict["assets"]["thumbnail"]["href"]
|
|
246
|
+
if "omiFigure" in collection_dict["assets"]:
|
|
247
|
+
properties["quicklook"] = collection_dict["assets"]["omiFigure"]["href"]
|
|
248
|
+
assets = {
|
|
249
|
+
"native": {
|
|
250
|
+
"title": "native",
|
|
251
|
+
"href": download_url,
|
|
252
|
+
"type": "application/x-netcdf",
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
product = EOProduct(self.provider, properties, productType=product_type)
|
|
256
|
+
product.assets = AssetsDict(product, assets)
|
|
257
|
+
return product
|
|
258
|
+
|
|
259
|
+
def query(
|
|
260
|
+
self,
|
|
261
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
262
|
+
**kwargs: Any,
|
|
263
|
+
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
264
|
+
"""
|
|
265
|
+
Implementation of search for the Copernicus Marine provider
|
|
266
|
+
:param prep: object containing search parameterds
|
|
267
|
+
:param kwargs: additional search arguments
|
|
268
|
+
:returns: list of products and total number of products
|
|
269
|
+
"""
|
|
270
|
+
page = prep.page
|
|
271
|
+
items_per_page = prep.items_per_page
|
|
272
|
+
|
|
273
|
+
# only return 1 page if pagination is disabled
|
|
274
|
+
if page is None or items_per_page is None or page > 1 and items_per_page <= 0:
|
|
275
|
+
return ([], 0) if prep.count else ([], None)
|
|
276
|
+
|
|
277
|
+
product_type = kwargs.get("productType", prep.product_type)
|
|
278
|
+
if not product_type:
|
|
279
|
+
raise ValidationError(
|
|
280
|
+
"parameter product type is required for search with cop_marine provider"
|
|
281
|
+
)
|
|
282
|
+
collection_dict, datasets_items_list = self._get_product_type_info(product_type)
|
|
283
|
+
products: List[EOProduct] = []
|
|
284
|
+
start_index = items_per_page * (page - 1) + 1
|
|
285
|
+
num_total = 0
|
|
286
|
+
for i, dataset_item in enumerate(datasets_items_list):
|
|
287
|
+
try:
|
|
288
|
+
logger.debug("searching data for dataset %s", dataset_item["id"])
|
|
289
|
+
|
|
290
|
+
# date bounds
|
|
291
|
+
if "startTimeFromAscendingNode" in kwargs:
|
|
292
|
+
start_date = isoparse(kwargs["startTimeFromAscendingNode"])
|
|
293
|
+
elif "start_datetime" in dataset_item["properties"]:
|
|
294
|
+
start_date = isoparse(dataset_item["properties"]["start_datetime"])
|
|
295
|
+
else:
|
|
296
|
+
start_date = isoparse(dataset_item["properties"]["datetime"])
|
|
297
|
+
if not start_date.tzinfo:
|
|
298
|
+
start_date = start_date.replace(tzinfo=tzutc())
|
|
299
|
+
if "completionTimeFromAscendingNode" in kwargs:
|
|
300
|
+
end_date = isoparse(kwargs["completionTimeFromAscendingNode"])
|
|
301
|
+
elif "end_datetime" in dataset_item["properties"]:
|
|
302
|
+
end_date = isoparse(dataset_item["properties"]["end_datetime"])
|
|
303
|
+
else:
|
|
304
|
+
end_date = today(tzinfo=tzutc())
|
|
305
|
+
if not end_date.tzinfo:
|
|
306
|
+
end_date = end_date.replace(tzinfo=tzutc())
|
|
307
|
+
|
|
308
|
+
# retrieve information about s3 from collection data
|
|
309
|
+
s3_url = dataset_item["assets"]["native"]["href"]
|
|
310
|
+
except KeyError as e:
|
|
311
|
+
logger.warning(
|
|
312
|
+
f"Unable to extract info from {product_type} item #{i}: {str(e)}"
|
|
313
|
+
)
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
url_parts = urlsplit(s3_url)
|
|
317
|
+
endpoint_url = url_parts.scheme + "://" + url_parts.hostname
|
|
318
|
+
bucket, collection_path = get_bucket_name_and_prefix(s3_url, 0)
|
|
319
|
+
if bucket is None or collection_path is None:
|
|
320
|
+
logger.warning(
|
|
321
|
+
f"Unable to get bucket and prefix from {s3_url}, got {(bucket, collection_path)}"
|
|
322
|
+
)
|
|
323
|
+
continue
|
|
324
|
+
|
|
325
|
+
if ".nc" in collection_path:
|
|
326
|
+
num_total += 1
|
|
327
|
+
if num_total < start_index:
|
|
328
|
+
continue
|
|
329
|
+
if len(products) < items_per_page or items_per_page < 0:
|
|
330
|
+
product = self._create_product(
|
|
331
|
+
product_type,
|
|
332
|
+
collection_path,
|
|
333
|
+
endpoint_url + "/" + bucket,
|
|
334
|
+
dataset_item,
|
|
335
|
+
collection_dict,
|
|
336
|
+
True,
|
|
337
|
+
)
|
|
338
|
+
if product:
|
|
339
|
+
products.append(product)
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
s3_client = _get_s3_client(endpoint_url)
|
|
343
|
+
stop_search = False
|
|
344
|
+
current_object = None
|
|
345
|
+
while not stop_search:
|
|
346
|
+
# list_objects returns max 1000 objects -> use marker to get next objects
|
|
347
|
+
if current_object:
|
|
348
|
+
s3_objects = s3_client.list_objects(
|
|
349
|
+
Bucket=bucket, Prefix=collection_path, Marker=current_object
|
|
350
|
+
)
|
|
351
|
+
else:
|
|
352
|
+
s3_objects = s3_client.list_objects(
|
|
353
|
+
Bucket=bucket, Prefix=collection_path
|
|
354
|
+
)
|
|
355
|
+
if "Contents" not in s3_objects:
|
|
356
|
+
if len(products) == 0 and i == len(datasets_items_list) - 1:
|
|
357
|
+
return ([], 0) if prep.count else ([], None)
|
|
358
|
+
else:
|
|
359
|
+
break
|
|
360
|
+
|
|
361
|
+
if "id" in kwargs:
|
|
362
|
+
product = self._get_product_by_id(
|
|
363
|
+
s3_objects,
|
|
364
|
+
kwargs["id"],
|
|
365
|
+
endpoint_url + "/" + bucket,
|
|
366
|
+
product_type,
|
|
367
|
+
dataset_item,
|
|
368
|
+
collection_dict,
|
|
369
|
+
)
|
|
370
|
+
if product:
|
|
371
|
+
return [product], 1
|
|
372
|
+
current_object = s3_objects["Contents"][-1]["Key"]
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
for obj in s3_objects["Contents"]:
|
|
376
|
+
item_key = obj["Key"]
|
|
377
|
+
item_id = item_key.split("/")[-1].split(".")[0]
|
|
378
|
+
# filter according to date(s) in item id
|
|
379
|
+
item_dates = re.findall(r"(\d{4})(0[1-9]|1[0-2])([0-3]\d)", item_id)
|
|
380
|
+
if not item_dates:
|
|
381
|
+
item_dates = re.findall(r"_(\d{4})(0[1-9]|1[0-2])", item_id)
|
|
382
|
+
item_dates = [
|
|
383
|
+
"".join(row) for row in item_dates
|
|
384
|
+
] # join tuples returned by findall
|
|
385
|
+
item_start = None
|
|
386
|
+
item_end = None
|
|
387
|
+
use_dataset_dates = False
|
|
388
|
+
if item_dates:
|
|
389
|
+
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
390
|
+
if len(item_dates) > 2: # start, end and created_at timestamps
|
|
391
|
+
item_end = _get_date_from_yyyymmdd(item_dates[1], item_key)
|
|
392
|
+
if not item_start:
|
|
393
|
+
# no valid datetime given in id
|
|
394
|
+
use_dataset_dates = True
|
|
395
|
+
dates = _get_dates_from_dataset_data(dataset_item)
|
|
396
|
+
if dates:
|
|
397
|
+
item_start_str = dates["start"].replace("Z", "+0000")
|
|
398
|
+
item_end_str = dates["end"].replace("Z", "+0000")
|
|
399
|
+
try:
|
|
400
|
+
item_start = datetime.strptime(
|
|
401
|
+
item_start_str, "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
402
|
+
)
|
|
403
|
+
item_end = datetime.strptime(
|
|
404
|
+
item_end_str, "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
405
|
+
)
|
|
406
|
+
except ValueError:
|
|
407
|
+
item_start = datetime.strptime(
|
|
408
|
+
item_start_str, "%Y-%m-%dT%H:%M:%S%z"
|
|
409
|
+
)
|
|
410
|
+
item_end = datetime.strptime(
|
|
411
|
+
item_end_str, "%Y-%m-%dT%H:%M:%S%z"
|
|
412
|
+
)
|
|
413
|
+
if not item_start:
|
|
414
|
+
# no valid datetime in id and dataset data
|
|
415
|
+
continue
|
|
416
|
+
if item_start > end_date:
|
|
417
|
+
stop_search = True
|
|
418
|
+
if (
|
|
419
|
+
(start_date <= item_start <= end_date)
|
|
420
|
+
or (item_end and start_date <= item_end <= end_date)
|
|
421
|
+
or (
|
|
422
|
+
item_end and item_start < start_date and item_end > end_date
|
|
423
|
+
)
|
|
424
|
+
):
|
|
425
|
+
num_total += 1
|
|
426
|
+
if num_total < start_index:
|
|
427
|
+
continue
|
|
428
|
+
if len(products) < items_per_page or items_per_page < 0:
|
|
429
|
+
product = self._create_product(
|
|
430
|
+
product_type,
|
|
431
|
+
item_key,
|
|
432
|
+
endpoint_url + "/" + bucket,
|
|
433
|
+
dataset_item,
|
|
434
|
+
collection_dict,
|
|
435
|
+
use_dataset_dates,
|
|
436
|
+
)
|
|
437
|
+
if product:
|
|
438
|
+
products.append(product)
|
|
439
|
+
current_object = item_key
|
|
440
|
+
|
|
441
|
+
return products, num_total
|
|
@@ -17,18 +17,24 @@
|
|
|
17
17
|
# limitations under the License.
|
|
18
18
|
import logging
|
|
19
19
|
from types import MethodType
|
|
20
|
-
from typing import Any,
|
|
20
|
+
from typing import Any, List
|
|
21
21
|
|
|
22
22
|
import boto3
|
|
23
23
|
import botocore
|
|
24
24
|
from botocore.exceptions import BotoCoreError
|
|
25
25
|
|
|
26
|
-
from eodag import EOProduct
|
|
27
|
-
from eodag.api.
|
|
26
|
+
from eodag.api.product import AssetsDict, EOProduct # type: ignore
|
|
27
|
+
from eodag.api.search_result import RawSearchResult
|
|
28
28
|
from eodag.config import PluginConfig
|
|
29
29
|
from eodag.plugins.authentication.aws_auth import AwsAuth
|
|
30
|
-
from eodag.plugins.search.qssearch import
|
|
31
|
-
from eodag.utils
|
|
30
|
+
from eodag.plugins.search.qssearch import ODataV4Search
|
|
31
|
+
from eodag.utils import guess_file_type
|
|
32
|
+
from eodag.utils.exceptions import (
|
|
33
|
+
AuthenticationError,
|
|
34
|
+
MisconfiguredError,
|
|
35
|
+
NotAvailableError,
|
|
36
|
+
RequestError,
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
DATA_EXTENSIONS = ["jp2", "tiff", "nc", "grib"]
|
|
34
40
|
logger = logging.getLogger("eodag.search.creodiass3")
|
|
@@ -37,13 +43,10 @@ logger = logging.getLogger("eodag.search.creodiass3")
|
|
|
37
43
|
def patched_register_downloader(self, downloader, authenticator):
|
|
38
44
|
"""Add the download information to the product.
|
|
39
45
|
:param self: product to which information should be added
|
|
40
|
-
:type self: EoProduct
|
|
41
46
|
:param downloader: The download method that it can use
|
|
42
|
-
:type downloader: Concrete subclass of
|
|
43
47
|
:class:`~eodag.plugins.download.base.Download` or
|
|
44
48
|
:class:`~eodag.plugins.api.base.Api`
|
|
45
49
|
:param authenticator: The authentication method needed to perform the download
|
|
46
|
-
:type authenticator: Concrete subclass of
|
|
47
50
|
:class:`~eodag.plugins.authentication.base.Authentication`
|
|
48
51
|
"""
|
|
49
52
|
# register downloader
|
|
@@ -52,7 +55,7 @@ def patched_register_downloader(self, downloader, authenticator):
|
|
|
52
55
|
try:
|
|
53
56
|
_update_assets(self, downloader.config, authenticator)
|
|
54
57
|
except BotoCoreError as e:
|
|
55
|
-
raise RequestError(
|
|
58
|
+
raise RequestError.from_error(e, "could not update assets") from e
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
@@ -64,18 +67,19 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
64
67
|
try:
|
|
65
68
|
auth_dict = auth.authenticate()
|
|
66
69
|
required_creds = ["aws_access_key_id", "aws_secret_access_key"]
|
|
67
|
-
if not all(
|
|
70
|
+
if not all(x in auth_dict for x in required_creds):
|
|
68
71
|
raise MisconfiguredError(
|
|
69
72
|
f"Incomplete credentials for {product.provider}, missing "
|
|
70
|
-
f"{[x for x in required_creds if not
|
|
73
|
+
f"{[x for x in required_creds if x not in auth_dict]}"
|
|
71
74
|
)
|
|
72
75
|
if not getattr(auth, "s3_client", None):
|
|
73
76
|
auth.s3_client = boto3.client(
|
|
74
77
|
"s3",
|
|
75
|
-
endpoint_url=config.
|
|
76
|
-
|
|
78
|
+
endpoint_url=config.s3_endpoint,
|
|
79
|
+
aws_access_key_id=auth_dict["aws_access_key_id"],
|
|
80
|
+
aws_secret_access_key=auth_dict["aws_secret_access_key"],
|
|
77
81
|
)
|
|
78
|
-
logger.debug(
|
|
82
|
+
logger.debug("Listing assets in %s", prefix)
|
|
79
83
|
product.assets = AssetsDict(product)
|
|
80
84
|
for asset in auth.s3_client.list_objects(
|
|
81
85
|
Bucket=config.s3_bucket, Prefix=prefix, MaxKeys=300
|
|
@@ -96,6 +100,8 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
96
100
|
"roles": [role],
|
|
97
101
|
"href": f"s3://{config.s3_bucket}/{asset['Key']}",
|
|
98
102
|
}
|
|
103
|
+
if mime_type := guess_file_type(asset["Key"]):
|
|
104
|
+
product.assets[asset_basename]["type"] = mime_type
|
|
99
105
|
# update driver
|
|
100
106
|
product.driver = product.get_driver()
|
|
101
107
|
|
|
@@ -104,13 +110,12 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
104
110
|
raise AuthenticationError(
|
|
105
111
|
f"Authentication failed on {config.base_uri} s3"
|
|
106
112
|
) from e
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
) from e
|
|
113
|
+
raise NotAvailableError(
|
|
114
|
+
f"assets for product {prefix} could not be found"
|
|
115
|
+
) from e
|
|
111
116
|
|
|
112
117
|
|
|
113
|
-
class CreodiasS3Search(
|
|
118
|
+
class CreodiasS3Search(ODataV4Search):
|
|
114
119
|
"""
|
|
115
120
|
Search on creodias and adapt results to s3
|
|
116
121
|
"""
|
|
@@ -119,7 +124,7 @@ class CreodiasS3Search(QueryStringSearch):
|
|
|
119
124
|
super(CreodiasS3Search, self).__init__(provider, config)
|
|
120
125
|
|
|
121
126
|
def normalize_results(
|
|
122
|
-
self, results:
|
|
127
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
123
128
|
) -> List[EOProduct]:
|
|
124
129
|
"""Build EOProducts from provider results"""
|
|
125
130
|
|
eodag/plugins/search/csw.py
CHANGED
|
@@ -35,8 +35,9 @@ from shapely import geometry, wkt
|
|
|
35
35
|
|
|
36
36
|
from eodag.api.product import EOProduct
|
|
37
37
|
from eodag.api.product.metadata_mapping import properties_from_xml
|
|
38
|
+
from eodag.plugins.search import PreparedSearch
|
|
38
39
|
from eodag.plugins.search.base import Search
|
|
39
|
-
from eodag.utils import
|
|
40
|
+
from eodag.utils import DEFAULT_PROJ
|
|
40
41
|
from eodag.utils.import_system import patch_owslib_requests
|
|
41
42
|
|
|
42
43
|
if TYPE_CHECKING:
|
|
@@ -64,16 +65,13 @@ class CSWSearch(Search):
|
|
|
64
65
|
|
|
65
66
|
def query(
|
|
66
67
|
self,
|
|
67
|
-
|
|
68
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
69
|
-
page: int = DEFAULT_PAGE,
|
|
70
|
-
count: bool = True,
|
|
68
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
71
69
|
**kwargs: Any,
|
|
72
70
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
73
71
|
"""Perform a search on a OGC/CSW-like interface"""
|
|
74
72
|
product_type = kwargs.get("productType")
|
|
75
73
|
if product_type is None:
|
|
76
|
-
return [], 0
|
|
74
|
+
return ([], 0) if prep.count else ([], None)
|
|
77
75
|
auth = kwargs.get("auth")
|
|
78
76
|
if auth:
|
|
79
77
|
self.__init_catalog(**getattr(auth.config, "credentials", {}))
|
|
@@ -118,7 +116,7 @@ class CSWSearch(Search):
|
|
|
118
116
|
)
|
|
119
117
|
results.extend(partial_results)
|
|
120
118
|
logger.info("Found %s overall results", len(results))
|
|
121
|
-
total_results = len(results) if count else None
|
|
119
|
+
total_results = len(results) if prep.count else None
|
|
122
120
|
return results, total_results
|
|
123
121
|
|
|
124
122
|
def __init_catalog(
|