eodag 3.0.0b3__py3-none-any.whl → 3.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +292 -198
- eodag/api/product/_assets.py +6 -6
- eodag/api/product/_product.py +18 -18
- eodag/api/product/metadata_mapping.py +51 -14
- eodag/api/search_result.py +29 -3
- eodag/cli.py +57 -20
- eodag/config.py +413 -117
- eodag/plugins/apis/base.py +10 -4
- eodag/plugins/apis/ecmwf.py +49 -16
- eodag/plugins/apis/usgs.py +30 -7
- eodag/plugins/authentication/aws_auth.py +14 -5
- eodag/plugins/authentication/base.py +10 -1
- eodag/plugins/authentication/generic.py +14 -3
- eodag/plugins/authentication/header.py +12 -4
- eodag/plugins/authentication/keycloak.py +41 -22
- eodag/plugins/authentication/oauth.py +11 -1
- eodag/plugins/authentication/openid_connect.py +178 -163
- eodag/plugins/authentication/qsauth.py +12 -4
- eodag/plugins/authentication/sas_auth.py +19 -2
- eodag/plugins/authentication/token.py +93 -15
- eodag/plugins/authentication/token_exchange.py +19 -19
- eodag/plugins/crunch/base.py +4 -1
- eodag/plugins/crunch/filter_date.py +5 -2
- eodag/plugins/crunch/filter_latest_intersect.py +5 -4
- eodag/plugins/crunch/filter_latest_tpl_name.py +1 -1
- eodag/plugins/crunch/filter_overlap.py +5 -7
- eodag/plugins/crunch/filter_property.py +6 -6
- eodag/plugins/download/aws.py +50 -34
- eodag/plugins/download/base.py +41 -50
- eodag/plugins/download/creodias_s3.py +40 -2
- eodag/plugins/download/http.py +221 -195
- eodag/plugins/download/s3rest.py +25 -25
- eodag/plugins/manager.py +168 -23
- eodag/plugins/search/base.py +106 -39
- eodag/plugins/search/build_search_result.py +1065 -324
- eodag/plugins/search/cop_marine.py +112 -29
- eodag/plugins/search/creodias_s3.py +45 -24
- eodag/plugins/search/csw.py +41 -1
- eodag/plugins/search/data_request_search.py +109 -9
- eodag/plugins/search/qssearch.py +549 -257
- eodag/plugins/search/static_stac_search.py +20 -21
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +577 -87
- eodag/resources/providers.yml +1619 -2776
- eodag/resources/stac.yml +3 -163
- eodag/resources/user_conf_template.yml +112 -97
- eodag/rest/config.py +1 -2
- eodag/rest/constants.py +0 -1
- eodag/rest/core.py +138 -98
- eodag/rest/errors.py +181 -0
- eodag/rest/server.py +55 -329
- eodag/rest/stac.py +93 -544
- eodag/rest/types/eodag_search.py +19 -8
- eodag/rest/types/queryables.py +6 -8
- eodag/rest/types/stac_search.py +11 -2
- eodag/rest/utils/__init__.py +3 -0
- eodag/types/__init__.py +71 -18
- eodag/types/download_args.py +3 -3
- eodag/types/queryables.py +180 -73
- eodag/types/search_args.py +3 -3
- eodag/types/whoosh.py +126 -0
- eodag/utils/__init__.py +147 -66
- eodag/utils/exceptions.py +47 -26
- eodag/utils/logging.py +37 -77
- eodag/utils/repr.py +65 -6
- eodag/utils/requests.py +11 -13
- eodag/utils/stac_reader.py +1 -1
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/METADATA +80 -81
- eodag-3.1.0b1.dist-info/RECORD +108 -0
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/WHEEL +1 -1
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/entry_points.txt +4 -2
- eodag/resources/constraints/climate-dt.json +0 -13
- eodag/resources/constraints/extremes-dt.json +0 -8
- eodag/utils/constraints.py +0 -244
- eodag-3.0.0b3.dist-info/RECORD +0 -110
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/LICENSE +0 -0
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import copy
|
|
21
21
|
import logging
|
|
22
|
+
import os
|
|
22
23
|
import re
|
|
23
24
|
from datetime import datetime
|
|
24
25
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
|
|
@@ -37,7 +38,7 @@ from eodag.config import PluginConfig
|
|
|
37
38
|
from eodag.plugins.search import PreparedSearch
|
|
38
39
|
from eodag.plugins.search.static_stac_search import StaticStacSearch
|
|
39
40
|
from eodag.utils import get_bucket_name_and_prefix
|
|
40
|
-
from eodag.utils.exceptions import UnsupportedProductType, ValidationError
|
|
41
|
+
from eodag.utils.exceptions import RequestError, UnsupportedProductType, ValidationError
|
|
41
42
|
|
|
42
43
|
if TYPE_CHECKING:
|
|
43
44
|
from mypy_boto3_s3 import S3Client
|
|
@@ -67,6 +68,21 @@ def _get_date_from_yyyymmdd(date_str: str, item_key: str) -> Optional[datetime]:
|
|
|
67
68
|
return date
|
|
68
69
|
|
|
69
70
|
|
|
71
|
+
def _get_dates_from_dataset_data(
|
|
72
|
+
dataset_item: Dict[str, Any]
|
|
73
|
+
) -> Optional[Dict[str, str]]:
|
|
74
|
+
dates = {}
|
|
75
|
+
if "start_datetime" in dataset_item["properties"]:
|
|
76
|
+
dates["start"] = dataset_item["properties"]["start_datetime"]
|
|
77
|
+
dates["end"] = dataset_item["properties"]["end_datetime"]
|
|
78
|
+
elif "datetime" in dataset_item["properties"]:
|
|
79
|
+
dates["start"] = dataset_item["properties"]["datetime"]
|
|
80
|
+
dates["end"] = dataset_item["properties"]["datetime"]
|
|
81
|
+
else:
|
|
82
|
+
return None
|
|
83
|
+
return dates
|
|
84
|
+
|
|
85
|
+
|
|
70
86
|
def _get_s3_client(endpoint_url: str) -> S3Client:
|
|
71
87
|
s3_session = boto3.Session()
|
|
72
88
|
return s3_session.client(
|
|
@@ -94,7 +110,21 @@ def _check_int_values_properties(properties: Dict[str, Any]):
|
|
|
94
110
|
|
|
95
111
|
|
|
96
112
|
class CopMarineSearch(StaticStacSearch):
|
|
97
|
-
"""class that implements search for the Copernicus Marine provider
|
|
113
|
+
"""class that implements search for the Copernicus Marine provider
|
|
114
|
+
|
|
115
|
+
It calls :meth:`~eodag.plugins.search.static_stac_search.StaticStacSearch.discover_product_types`
|
|
116
|
+
inherited from :class:`~eodag.plugins.search.static_stac_search.StaticStacSearch`
|
|
117
|
+
but for the actual search a special method which fetches the urls of the available products from an S3 storage and
|
|
118
|
+
filters them has been written.
|
|
119
|
+
|
|
120
|
+
The configuration parameters are inherited from the parent and grand-parent classes. The
|
|
121
|
+
:attr:`~eodag.config.PluginConfig.DiscoverMetadata.auto_discovery` parameter in the
|
|
122
|
+
:attr:`~eodag.config.PluginConfig.discover_metadata` section has to be set to ``false`` and the
|
|
123
|
+
:attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` in the
|
|
124
|
+
:attr:`~eodag.config.PluginConfig.discover_queryables` queryables section has to be set to ``null`` to
|
|
125
|
+
overwrite the default config from the stac provider configuration because those functionalities
|
|
126
|
+
are not available.
|
|
127
|
+
"""
|
|
98
128
|
|
|
99
129
|
def __init__(self, provider: str, config: PluginConfig):
|
|
100
130
|
original_metadata_mapping = copy.deepcopy(config.metadata_mapping)
|
|
@@ -107,12 +137,10 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
107
137
|
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
108
138
|
"""Fetch product type and associated datasets info"""
|
|
109
139
|
|
|
110
|
-
fetch_url = cast(
|
|
111
|
-
|
|
112
|
-
self.config.discover_product_types["fetch_url"].format(
|
|
113
|
-
**self.config.__dict__
|
|
114
|
-
),
|
|
140
|
+
fetch_url = cast(str, self.config.discover_product_types["fetch_url"]).format(
|
|
141
|
+
**self.config.__dict__
|
|
115
142
|
)
|
|
143
|
+
|
|
116
144
|
logger.debug("fetch data for collection %s", product_type)
|
|
117
145
|
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
118
146
|
"productType", None
|
|
@@ -125,9 +153,14 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
125
153
|
)
|
|
126
154
|
try:
|
|
127
155
|
collection_data = requests.get(collection_url).json()
|
|
128
|
-
except requests.RequestException:
|
|
156
|
+
except requests.RequestException as exc:
|
|
157
|
+
if exc.errno == 404:
|
|
158
|
+
logger.error("product %s not found", product_type)
|
|
159
|
+
raise UnsupportedProductType(product_type)
|
|
129
160
|
logger.error("data for product %s could not be fetched", product_type)
|
|
130
|
-
raise
|
|
161
|
+
raise RequestError.from_error(
|
|
162
|
+
exc, f"data for product {product_type} could not be fetched"
|
|
163
|
+
) from exc
|
|
131
164
|
|
|
132
165
|
datasets = []
|
|
133
166
|
for link in [li for li in collection_data["links"] if li["rel"] == "item"]:
|
|
@@ -170,7 +203,7 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
170
203
|
use_dataset_dates: bool = False,
|
|
171
204
|
) -> Optional[EOProduct]:
|
|
172
205
|
|
|
173
|
-
item_id = item_key.split("/")[-1]
|
|
206
|
+
item_id = os.path.splitext(item_key.split("/")[-1])[0]
|
|
174
207
|
download_url = s3_url + "/" + item_key
|
|
175
208
|
properties = {
|
|
176
209
|
"id": item_id,
|
|
@@ -180,20 +213,16 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
180
213
|
"dataset": dataset_item["id"],
|
|
181
214
|
}
|
|
182
215
|
if use_dataset_dates:
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
"end_datetime"
|
|
189
|
-
]
|
|
190
|
-
elif "datetime" in dataset_item:
|
|
191
|
-
properties["startTimeFromAscendingNode"] = dataset_item["datetime"]
|
|
192
|
-
properties["completionTimeFromAscendingNode"] = dataset_item["datetime"]
|
|
216
|
+
dates = _get_dates_from_dataset_data(dataset_item)
|
|
217
|
+
if not dates:
|
|
218
|
+
return None
|
|
219
|
+
properties["startTimeFromAscendingNode"] = dates["start"]
|
|
220
|
+
properties["completionTimeFromAscendingNode"] = dates["end"]
|
|
193
221
|
else:
|
|
194
|
-
item_dates = re.findall(r"\d{
|
|
222
|
+
item_dates = re.findall(r"(\d{4})(0[1-9]|1[0-2])([0-3]\d)", item_id)
|
|
195
223
|
if not item_dates:
|
|
196
|
-
item_dates = re.findall(r"\d{
|
|
224
|
+
item_dates = re.findall(r"_(\d{4})(0[1-9]|1[0-2])", item_id)
|
|
225
|
+
item_dates = ["".join(row) for row in item_dates]
|
|
197
226
|
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
198
227
|
if not item_start: # identified pattern was not a valid datetime
|
|
199
228
|
return None
|
|
@@ -209,11 +238,26 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
209
238
|
).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
210
239
|
|
|
211
240
|
for key, value in collection_dict["properties"].items():
|
|
212
|
-
if key not in ["id", "title", "start_datetime", "end_datetime"]:
|
|
241
|
+
if key not in ["id", "title", "start_datetime", "end_datetime", "datetime"]:
|
|
213
242
|
properties[key] = value
|
|
214
243
|
for key, value in dataset_item["properties"].items():
|
|
215
|
-
if key not in ["id", "title", "start_datetime", "end_datetime"]:
|
|
244
|
+
if key not in ["id", "title", "start_datetime", "end_datetime", "datetime"]:
|
|
216
245
|
properties[key] = value
|
|
246
|
+
|
|
247
|
+
code_mapping = self.config.products.get(product_type, {}).get(
|
|
248
|
+
"code_mapping", None
|
|
249
|
+
)
|
|
250
|
+
if code_mapping:
|
|
251
|
+
id_parts = item_id.split("_")
|
|
252
|
+
if len(id_parts) > code_mapping["index"]:
|
|
253
|
+
code = id_parts[code_mapping["index"]]
|
|
254
|
+
if "pattern" not in code_mapping:
|
|
255
|
+
properties[code_mapping["param"]] = code
|
|
256
|
+
elif re.findall(code_mapping["pattern"], code):
|
|
257
|
+
properties[code_mapping["param"]] = re.findall(
|
|
258
|
+
code_mapping["pattern"], code
|
|
259
|
+
)[0]
|
|
260
|
+
|
|
217
261
|
_check_int_values_properties(properties)
|
|
218
262
|
|
|
219
263
|
properties["thumbnail"] = collection_dict["assets"]["thumbnail"]["href"]
|
|
@@ -348,16 +392,54 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
348
392
|
|
|
349
393
|
for obj in s3_objects["Contents"]:
|
|
350
394
|
item_key = obj["Key"]
|
|
395
|
+
item_id = os.path.splitext(item_key.split("/")[-1])[0]
|
|
351
396
|
# filter according to date(s) in item id
|
|
352
|
-
item_dates = re.findall(r"\d{
|
|
397
|
+
item_dates = re.findall(r"(\d{4})(0[1-9]|1[0-2])([0-3]\d)", item_id)
|
|
353
398
|
if not item_dates:
|
|
354
|
-
item_dates = re.findall(r"\d{
|
|
355
|
-
|
|
356
|
-
|
|
399
|
+
item_dates = re.findall(r"_(\d{4})(0[1-9]|1[0-2])", item_id)
|
|
400
|
+
item_dates = [
|
|
401
|
+
"".join(row) for row in item_dates
|
|
402
|
+
] # join tuples returned by findall
|
|
403
|
+
item_start = None
|
|
404
|
+
item_end = None
|
|
405
|
+
use_dataset_dates = False
|
|
406
|
+
if item_dates:
|
|
407
|
+
item_start = _get_date_from_yyyymmdd(item_dates[0], item_key)
|
|
408
|
+
if len(item_dates) > 2: # start, end and created_at timestamps
|
|
409
|
+
item_end = _get_date_from_yyyymmdd(item_dates[1], item_key)
|
|
410
|
+
if not item_start:
|
|
411
|
+
# no valid datetime given in id
|
|
412
|
+
use_dataset_dates = True
|
|
413
|
+
dates = _get_dates_from_dataset_data(dataset_item)
|
|
414
|
+
if dates:
|
|
415
|
+
item_start_str = dates["start"].replace("Z", "+0000")
|
|
416
|
+
item_end_str = dates["end"].replace("Z", "+0000")
|
|
417
|
+
try:
|
|
418
|
+
item_start = datetime.strptime(
|
|
419
|
+
item_start_str, "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
420
|
+
)
|
|
421
|
+
item_end = datetime.strptime(
|
|
422
|
+
item_end_str, "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
423
|
+
)
|
|
424
|
+
except ValueError:
|
|
425
|
+
item_start = datetime.strptime(
|
|
426
|
+
item_start_str, "%Y-%m-%dT%H:%M:%S%z"
|
|
427
|
+
)
|
|
428
|
+
item_end = datetime.strptime(
|
|
429
|
+
item_end_str, "%Y-%m-%dT%H:%M:%S%z"
|
|
430
|
+
)
|
|
431
|
+
if not item_start:
|
|
432
|
+
# no valid datetime in id and dataset data
|
|
357
433
|
continue
|
|
358
434
|
if item_start > end_date:
|
|
359
435
|
stop_search = True
|
|
360
|
-
if
|
|
436
|
+
if (
|
|
437
|
+
(start_date <= item_start <= end_date)
|
|
438
|
+
or (item_end and start_date <= item_end <= end_date)
|
|
439
|
+
or (
|
|
440
|
+
item_end and item_start < start_date and item_end > end_date
|
|
441
|
+
)
|
|
442
|
+
):
|
|
361
443
|
num_total += 1
|
|
362
444
|
if num_total < start_index:
|
|
363
445
|
continue
|
|
@@ -368,6 +450,7 @@ class CopMarineSearch(StaticStacSearch):
|
|
|
368
450
|
endpoint_url + "/" + bucket,
|
|
369
451
|
dataset_item,
|
|
370
452
|
collection_dict,
|
|
453
|
+
use_dataset_dates,
|
|
371
454
|
)
|
|
372
455
|
if product:
|
|
373
456
|
products.append(product)
|
|
@@ -29,7 +29,12 @@ from eodag.config import PluginConfig
|
|
|
29
29
|
from eodag.plugins.authentication.aws_auth import AwsAuth
|
|
30
30
|
from eodag.plugins.search.qssearch import ODataV4Search
|
|
31
31
|
from eodag.utils import guess_file_type
|
|
32
|
-
from eodag.utils.exceptions import
|
|
32
|
+
from eodag.utils.exceptions import (
|
|
33
|
+
AuthenticationError,
|
|
34
|
+
MisconfiguredError,
|
|
35
|
+
NotAvailableError,
|
|
36
|
+
RequestError,
|
|
37
|
+
)
|
|
33
38
|
|
|
34
39
|
DATA_EXTENSIONS = ["jp2", "tiff", "nc", "grib"]
|
|
35
40
|
logger = logging.getLogger("eodag.search.creodiass3")
|
|
@@ -37,6 +42,7 @@ logger = logging.getLogger("eodag.search.creodiass3")
|
|
|
37
42
|
|
|
38
43
|
def patched_register_downloader(self, downloader, authenticator):
|
|
39
44
|
"""Add the download information to the product.
|
|
45
|
+
|
|
40
46
|
:param self: product to which information should be added
|
|
41
47
|
:param downloader: The download method that it can use
|
|
42
48
|
:class:`~eodag.plugins.download.base.Download` or
|
|
@@ -50,11 +56,11 @@ def patched_register_downloader(self, downloader, authenticator):
|
|
|
50
56
|
try:
|
|
51
57
|
_update_assets(self, downloader.config, authenticator)
|
|
52
58
|
except BotoCoreError as e:
|
|
53
|
-
raise RequestError(
|
|
59
|
+
raise RequestError.from_error(e, "could not update assets") from e
|
|
54
60
|
|
|
55
61
|
|
|
56
62
|
def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
57
|
-
product.assets =
|
|
63
|
+
product.assets = AssetsDict(product)
|
|
58
64
|
prefix = (
|
|
59
65
|
product.properties.get("productIdentifier", None).replace("/eodata/", "") + "/"
|
|
60
66
|
)
|
|
@@ -70,33 +76,38 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
70
76
|
if not getattr(auth, "s3_client", None):
|
|
71
77
|
auth.s3_client = boto3.client(
|
|
72
78
|
"s3",
|
|
73
|
-
endpoint_url=config.
|
|
79
|
+
endpoint_url=config.s3_endpoint,
|
|
74
80
|
aws_access_key_id=auth_dict["aws_access_key_id"],
|
|
75
81
|
aws_secret_access_key=auth_dict["aws_secret_access_key"],
|
|
76
82
|
)
|
|
77
83
|
logger.debug("Listing assets in %s", prefix)
|
|
78
84
|
product.assets = AssetsDict(product)
|
|
79
|
-
|
|
85
|
+
s3_res = auth.s3_client.list_objects(
|
|
80
86
|
Bucket=config.s3_bucket, Prefix=prefix, MaxKeys=300
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"
|
|
89
|
-
if asset_basename.split(".")[-1] in DATA_EXTENSIONS
|
|
90
|
-
else "metadata"
|
|
87
|
+
)
|
|
88
|
+
# check if product path has assets or is already a file
|
|
89
|
+
if "Contents" in s3_res:
|
|
90
|
+
for asset in s3_res["Contents"]:
|
|
91
|
+
asset_basename = (
|
|
92
|
+
asset["Key"].split("/")[-1]
|
|
93
|
+
if "/" in asset["Key"]
|
|
94
|
+
else asset["Key"]
|
|
91
95
|
)
|
|
92
96
|
|
|
93
|
-
product.assets
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
if len(asset_basename) > 0 and asset_basename not in product.assets:
|
|
98
|
+
role = (
|
|
99
|
+
"data"
|
|
100
|
+
if asset_basename.split(".")[-1] in DATA_EXTENSIONS
|
|
101
|
+
else "metadata"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
product.assets[asset_basename] = {
|
|
105
|
+
"title": asset_basename,
|
|
106
|
+
"roles": [role],
|
|
107
|
+
"href": f"s3://{config.s3_bucket}/{asset['Key']}",
|
|
108
|
+
}
|
|
109
|
+
if mime_type := guess_file_type(asset["Key"]):
|
|
110
|
+
product.assets[asset_basename]["type"] = mime_type
|
|
100
111
|
# update driver
|
|
101
112
|
product.driver = product.get_driver()
|
|
102
113
|
|
|
@@ -105,12 +116,22 @@ def _update_assets(product: EOProduct, config: PluginConfig, auth: AwsAuth):
|
|
|
105
116
|
raise AuthenticationError(
|
|
106
117
|
f"Authentication failed on {config.base_uri} s3"
|
|
107
118
|
) from e
|
|
108
|
-
raise
|
|
119
|
+
raise NotAvailableError(
|
|
120
|
+
f"assets for product {prefix} could not be found"
|
|
121
|
+
) from e
|
|
109
122
|
|
|
110
123
|
|
|
111
124
|
class CreodiasS3Search(ODataV4Search):
|
|
112
125
|
"""
|
|
113
|
-
|
|
126
|
+
``CreodiasS3Search`` is an extension of :class:`~eodag.plugins.search.qssearch.ODataV4Search`,
|
|
127
|
+
it executes a Search on creodias and adapts results so that the assets contain links to s3.
|
|
128
|
+
It has the same configuration parameters as :class:`~eodag.plugins.search.qssearch.ODataV4Search` and
|
|
129
|
+
one additional parameter:
|
|
130
|
+
|
|
131
|
+
:param provider: provider name
|
|
132
|
+
:param config: Search plugin configuration:
|
|
133
|
+
|
|
134
|
+
* :attr:`~eodag.config.PluginConfig.s3_endpoint` (``str``) (**mandatory**): base url of the s3
|
|
114
135
|
"""
|
|
115
136
|
|
|
116
137
|
def __init__(self, provider, config):
|
eodag/plugins/search/csw.py
CHANGED
|
@@ -52,7 +52,47 @@ SUPPORTED_REFERENCE_SCHEMES = ["WWW:DOWNLOAD-1.0-http--download"]
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
class CSWSearch(Search):
|
|
55
|
-
"""A plugin for implementing search based on OGC CSW
|
|
55
|
+
"""A plugin for implementing search based on OGC CSW
|
|
56
|
+
|
|
57
|
+
:param provider: provider name
|
|
58
|
+
:param config: Search plugin configuration:
|
|
59
|
+
|
|
60
|
+
* :attr:`~eodag.config.PluginConfig.api_endpoint` (``str``) (**mandatory**): The endpoint of the
|
|
61
|
+
provider's search interface
|
|
62
|
+
* :attr:`~eodag.config.PluginConfig.version` (``str``): OGC Catalogue Service version; default: ``2.0.2``
|
|
63
|
+
* :attr:`~eodag.config.PluginConfig.search_definition` (``Dict[str, Any]``) (**mandatory**):
|
|
64
|
+
|
|
65
|
+
* **product_type_tags** (``List[Dict[str, Any]``): dict of product type tags
|
|
66
|
+
* **resource_location_filter** (``str``): regex string
|
|
67
|
+
* **date_tags** (``Dict[str, Any]``): tags for start and end
|
|
68
|
+
|
|
69
|
+
* :attr:`~eodag.config.PluginConfig.metadata_mapping` (``Dict[str, Any]``): The search plugins of this kind can
|
|
70
|
+
detect when a metadata mapping is "query-able", and get the semantics of how to format the query string
|
|
71
|
+
parameter that enables to make a query on the corresponding metadata. To make a metadata query-able,
|
|
72
|
+
just configure it in the metadata mapping to be a list of 2 items, the first one being the
|
|
73
|
+
specification of the query string search formatting. The later is a string following the
|
|
74
|
+
specification of Python string formatting, with a special behaviour added to it. For example,
|
|
75
|
+
an entry in the metadata mapping of this kind::
|
|
76
|
+
|
|
77
|
+
completionTimeFromAscendingNode:
|
|
78
|
+
- 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
|
|
79
|
+
- '$.properties.acquisition.endViewingDate'
|
|
80
|
+
|
|
81
|
+
means that the search url will have a query string parameter named ``f`` with a value of
|
|
82
|
+
``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
|
|
83
|
+
of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
|
|
84
|
+
``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
|
|
85
|
+
of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
|
|
86
|
+
semantics of the query string formatting introduced by this plugin: any eodag search parameter
|
|
87
|
+
can be referenced in the query string with an additional optional conversion function that
|
|
88
|
+
is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
|
|
89
|
+
details on the available converters). Note that for the values in the
|
|
90
|
+
:attr:`~eodag.config.PluginConfig.free_text_search_operations` configuration parameter follow the same rule.
|
|
91
|
+
If the metadata_mapping is not a list but only a string, this means that the parameters is not queryable but
|
|
92
|
+
it is included in the result obtained from the provider. The string indicates how the provider result should
|
|
93
|
+
be mapped to the eodag parameter.
|
|
94
|
+
|
|
95
|
+
"""
|
|
56
96
|
|
|
57
97
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
58
98
|
super(CSWSearch, self).__init__(provider, config)
|
|
@@ -58,9 +58,107 @@ logger = logging.getLogger("eodag.search.data_request_search")
|
|
|
58
58
|
class DataRequestSearch(Search):
|
|
59
59
|
"""
|
|
60
60
|
Plugin to execute search requests composed of several steps:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
|
|
62
|
+
#. do a data request which defines which data shall be searched
|
|
63
|
+
#. check the status of the request job
|
|
64
|
+
#. if finished - fetch the result of the job
|
|
65
|
+
|
|
66
|
+
:param provider: provider name
|
|
67
|
+
:param config: Search plugin configuration:
|
|
68
|
+
|
|
69
|
+
* :attr:`~eodag.config.PluginConfig.api_endpoint` (``str``) (**mandatory**): The endpoint of the
|
|
70
|
+
provider's search interface
|
|
71
|
+
* :attr:`~eodag.config.PluginConfig.results_entry` (``str``) (**mandatory**): The name of
|
|
72
|
+
the key in the provider search result that gives access to the result entries
|
|
73
|
+
* :attr:`~eodag.config.PluginConfig.data_request_url` (``str``) (**mandatory**): url
|
|
74
|
+
to which the data request shall be sent
|
|
75
|
+
* :attr:`~eodag.config.PluginConfig.status_url` (``str``) (**mandatory**): url to fetch
|
|
76
|
+
the status of the data request
|
|
77
|
+
* :attr:`~eodag.config.PluginConfig.result_url` (``str``) (**mandatory**): url to fetch
|
|
78
|
+
the search result when the data request is done
|
|
79
|
+
* :attr:`~eodag.config.PluginConfig.need_auth` (``bool``): if authentication is needed for
|
|
80
|
+
the search request; default: ``False``
|
|
81
|
+
* :attr:`~eodag.config.PluginConfig.auth_error_code` (``int``): which error code is returned in case of an
|
|
82
|
+
authentication error; only used if ``need_auth=true``
|
|
83
|
+
* :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be
|
|
84
|
+
verified in requests; default: ``True``
|
|
85
|
+
* :attr:`~eodag.config.PluginConfig.timeout` (``int``): time to wait until request timeout in seconds;
|
|
86
|
+
default: ``5``
|
|
87
|
+
* :attr:`~eodag.config.PluginConfig.dates_required` (``bool``): if date parameters are mandatory
|
|
88
|
+
in the request; default: ``True``
|
|
89
|
+
* :attr:`~eodag.config.PluginConfig.pagination` (:class:`~eodag.config.PluginConfig.Pagination`)
|
|
90
|
+
(**mandatory**): The configuration of how the pagination is done on the provider. It is a tree with the
|
|
91
|
+
following nodes:
|
|
92
|
+
|
|
93
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
|
|
94
|
+
leading to the total number of results satisfying a request. This is used for providers which provides the
|
|
95
|
+
total results metadata along with the result of the query and don't have an endpoint for querying
|
|
96
|
+
the number of items satisfying a request, or for providers for which the count endpoint
|
|
97
|
+
returns a json or xml document
|
|
98
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum
|
|
99
|
+
number of items per page that the provider can handle; default: ``50``
|
|
100
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.start_page` (``int``): number of the
|
|
101
|
+
first page; default: ``1``
|
|
102
|
+
|
|
103
|
+
* :attr:`~eodag.config.PluginConfig.discover_product_types`
|
|
104
|
+
(:class:`~eodag.config.PluginConfig.DiscoverProductTypes`): configuration for product type discovery based on
|
|
105
|
+
information from the provider; It contains the keys:
|
|
106
|
+
|
|
107
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` (``str``) (**mandatory**): url from which
|
|
108
|
+
the product types can be fetched
|
|
109
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.result_type` (``str``): type of the provider result;
|
|
110
|
+
currently only ``json`` is supported (other types could be used in an extension of this plugin)
|
|
111
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.results_entry` (``str``) (**mandatory**): json path
|
|
112
|
+
to the list of product types
|
|
113
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_id` (``str``): mapping for the
|
|
114
|
+
product type id
|
|
115
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_metadata`
|
|
116
|
+
(``Dict[str, str]``): mapping for product type metadata (e.g. ``abstract``, ``licence``) which can be parsed
|
|
117
|
+
from the provider result
|
|
118
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_properties`
|
|
119
|
+
(``Dict[str, str]``): mapping for product type properties which can be parsed from the result and are not
|
|
120
|
+
product type metadata
|
|
121
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url` (``str``): url to fetch
|
|
122
|
+
data for a single collection; used if product type metadata is not available from the endpoint given in
|
|
123
|
+
:attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url`
|
|
124
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_qs` (``str``): query string
|
|
125
|
+
to be added to the :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` to filter for a
|
|
126
|
+
collection
|
|
127
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_product_type_parsable_metadata`
|
|
128
|
+
(``Dict[str, str]``): mapping for product type metadata returned by the endpoint given in
|
|
129
|
+
:attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url`.
|
|
130
|
+
|
|
131
|
+
* :attr:`~eodag.config.PluginConfig.constraints_file_url` (``str``): url to fetch the constraints for a specific
|
|
132
|
+
product type, can be an http url or a path to a file; the constraints are used to build queryables
|
|
133
|
+
* :attr:`~eodag.config.PluginConfig.constraints_entry` (``str``): key in the json result where the constraints
|
|
134
|
+
can be found; if not given, it is assumed that the constraints are on top level of the result, i.e.
|
|
135
|
+
the result is an array of constraints
|
|
136
|
+
* :attr:`~eodag.config.PluginConfig.metadata_mapping` (``Dict[str, Any]``): The search plugins of this kind can
|
|
137
|
+
detect when a metadata mapping is "query-able", and get the semantics of how to format the query string
|
|
138
|
+
parameter that enables to make a query on the corresponding metadata. To make a metadata query-able,
|
|
139
|
+
just configure it in the metadata mapping to be a list of 2 items, the first one being the
|
|
140
|
+
specification of the query string search formatting. The later is a string following the
|
|
141
|
+
specification of Python string formatting, with a special behaviour added to it. For example,
|
|
142
|
+
an entry in the metadata mapping of this kind::
|
|
143
|
+
|
|
144
|
+
completionTimeFromAscendingNode:
|
|
145
|
+
- 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
|
|
146
|
+
- '$.properties.acquisition.endViewingDate'
|
|
147
|
+
|
|
148
|
+
means that the search url will have a query string parameter named ``f`` with a value of
|
|
149
|
+
``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
|
|
150
|
+
of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
|
|
151
|
+
``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
|
|
152
|
+
of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
|
|
153
|
+
semantics of the query string formatting introduced by this plugin: any eodag search parameter
|
|
154
|
+
can be referenced in the query string with an additional optional conversion function that
|
|
155
|
+
is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
|
|
156
|
+
details on the available converters). Note that for the values in the
|
|
157
|
+
:attr:`~eodag.config.PluginConfig.free_text_search_operations` configuration parameter follow the same rule.
|
|
158
|
+
If the metadata_mapping is not a list but only a string, this means that the parameters is not queryable but
|
|
159
|
+
it is included in the result obtained from the provider. The string indicates how the provider result should
|
|
160
|
+
be mapped to the eodag parameter.
|
|
161
|
+
|
|
64
162
|
"""
|
|
65
163
|
|
|
66
164
|
data_request_id: Optional[str]
|
|
@@ -275,9 +373,9 @@ class DataRequestSearch(Search):
|
|
|
275
373
|
except requests.exceptions.Timeout as exc:
|
|
276
374
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|
|
277
375
|
except requests.RequestException as e:
|
|
278
|
-
raise RequestError(
|
|
279
|
-
f"search job for product_type {product_type} could not be created
|
|
280
|
-
)
|
|
376
|
+
raise RequestError.from_error(
|
|
377
|
+
e, f"search job for product_type {product_type} could not be created"
|
|
378
|
+
) from e
|
|
281
379
|
else:
|
|
282
380
|
logger.info("search job for product_type %s created", product_type)
|
|
283
381
|
return request_job.json()["jobId"]
|
|
@@ -294,7 +392,7 @@ class DataRequestSearch(Search):
|
|
|
294
392
|
except requests.exceptions.Timeout as exc:
|
|
295
393
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|
|
296
394
|
except requests.RequestException as e:
|
|
297
|
-
raise RequestError(
|
|
395
|
+
raise RequestError.from_error(e, "_cancel_request failed") from e
|
|
298
396
|
|
|
299
397
|
def _check_request_status(self, data_request_id: str) -> bool:
|
|
300
398
|
logger.debug("checking status of request job %s", data_request_id)
|
|
@@ -313,7 +411,7 @@ class DataRequestSearch(Search):
|
|
|
313
411
|
except requests.exceptions.Timeout as exc:
|
|
314
412
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|
|
315
413
|
except requests.RequestException as e:
|
|
316
|
-
raise RequestError(
|
|
414
|
+
raise RequestError.from_error(e, "_check_request_status failed") from e
|
|
317
415
|
else:
|
|
318
416
|
status_data = status_resp.json()
|
|
319
417
|
if "status_code" in status_data and status_data["status_code"] in [
|
|
@@ -321,7 +419,9 @@ class DataRequestSearch(Search):
|
|
|
321
419
|
404,
|
|
322
420
|
]:
|
|
323
421
|
logger.error(f"_check_request_status failed: {status_data}")
|
|
324
|
-
|
|
422
|
+
error = RequestError("authentication token expired during request")
|
|
423
|
+
error.status_code = status_data["status_code"]
|
|
424
|
+
raise error
|
|
325
425
|
if status_data["status"] == "failed":
|
|
326
426
|
logger.error(f"_check_request_status failed: {status_data}")
|
|
327
427
|
raise RequestError(
|