eodag 3.0.0b3__py3-none-any.whl → 3.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +292 -198
- eodag/api/product/_assets.py +6 -6
- eodag/api/product/_product.py +18 -18
- eodag/api/product/metadata_mapping.py +51 -14
- eodag/api/search_result.py +29 -3
- eodag/cli.py +57 -20
- eodag/config.py +413 -117
- eodag/plugins/apis/base.py +10 -4
- eodag/plugins/apis/ecmwf.py +49 -16
- eodag/plugins/apis/usgs.py +30 -7
- eodag/plugins/authentication/aws_auth.py +14 -5
- eodag/plugins/authentication/base.py +10 -1
- eodag/plugins/authentication/generic.py +14 -3
- eodag/plugins/authentication/header.py +12 -4
- eodag/plugins/authentication/keycloak.py +41 -22
- eodag/plugins/authentication/oauth.py +11 -1
- eodag/plugins/authentication/openid_connect.py +178 -163
- eodag/plugins/authentication/qsauth.py +12 -4
- eodag/plugins/authentication/sas_auth.py +19 -2
- eodag/plugins/authentication/token.py +93 -15
- eodag/plugins/authentication/token_exchange.py +19 -19
- eodag/plugins/crunch/base.py +4 -1
- eodag/plugins/crunch/filter_date.py +5 -2
- eodag/plugins/crunch/filter_latest_intersect.py +5 -4
- eodag/plugins/crunch/filter_latest_tpl_name.py +1 -1
- eodag/plugins/crunch/filter_overlap.py +5 -7
- eodag/plugins/crunch/filter_property.py +6 -6
- eodag/plugins/download/aws.py +50 -34
- eodag/plugins/download/base.py +41 -50
- eodag/plugins/download/creodias_s3.py +40 -2
- eodag/plugins/download/http.py +221 -195
- eodag/plugins/download/s3rest.py +25 -25
- eodag/plugins/manager.py +168 -23
- eodag/plugins/search/base.py +106 -39
- eodag/plugins/search/build_search_result.py +1065 -324
- eodag/plugins/search/cop_marine.py +112 -29
- eodag/plugins/search/creodias_s3.py +45 -24
- eodag/plugins/search/csw.py +41 -1
- eodag/plugins/search/data_request_search.py +109 -9
- eodag/plugins/search/qssearch.py +549 -257
- eodag/plugins/search/static_stac_search.py +20 -21
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +577 -87
- eodag/resources/providers.yml +1619 -2776
- eodag/resources/stac.yml +3 -163
- eodag/resources/user_conf_template.yml +112 -97
- eodag/rest/config.py +1 -2
- eodag/rest/constants.py +0 -1
- eodag/rest/core.py +138 -98
- eodag/rest/errors.py +181 -0
- eodag/rest/server.py +55 -329
- eodag/rest/stac.py +93 -544
- eodag/rest/types/eodag_search.py +19 -8
- eodag/rest/types/queryables.py +6 -8
- eodag/rest/types/stac_search.py +11 -2
- eodag/rest/utils/__init__.py +3 -0
- eodag/types/__init__.py +71 -18
- eodag/types/download_args.py +3 -3
- eodag/types/queryables.py +180 -73
- eodag/types/search_args.py +3 -3
- eodag/types/whoosh.py +126 -0
- eodag/utils/__init__.py +147 -66
- eodag/utils/exceptions.py +47 -26
- eodag/utils/logging.py +37 -77
- eodag/utils/repr.py +65 -6
- eodag/utils/requests.py +11 -13
- eodag/utils/stac_reader.py +1 -1
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/METADATA +80 -81
- eodag-3.1.0b1.dist-info/RECORD +108 -0
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/WHEEL +1 -1
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/entry_points.txt +4 -2
- eodag/resources/constraints/climate-dt.json +0 -13
- eodag/resources/constraints/extremes-dt.json +0 -8
- eodag/utils/constraints.py +0 -244
- eodag-3.0.0b3.dist-info/RECORD +0 -110
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/LICENSE +0 -0
- {eodag-3.0.0b3.dist-info → eodag-3.1.0b1.dist-info}/top_level.txt +0 -0
eodag/plugins/search/qssearch.py
CHANGED
|
@@ -20,8 +20,10 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import re
|
|
22
22
|
from copy import copy as copy_copy
|
|
23
|
+
from datetime import datetime, timedelta
|
|
23
24
|
from typing import (
|
|
24
25
|
TYPE_CHECKING,
|
|
26
|
+
Annotated,
|
|
25
27
|
Any,
|
|
26
28
|
Callable,
|
|
27
29
|
Dict,
|
|
@@ -32,6 +34,7 @@ from typing import (
|
|
|
32
34
|
Tuple,
|
|
33
35
|
TypedDict,
|
|
34
36
|
cast,
|
|
37
|
+
get_args,
|
|
35
38
|
)
|
|
36
39
|
from urllib.error import URLError
|
|
37
40
|
from urllib.parse import (
|
|
@@ -44,10 +47,12 @@ from urllib.parse import (
|
|
|
44
47
|
)
|
|
45
48
|
from urllib.request import Request, urlopen
|
|
46
49
|
|
|
50
|
+
import concurrent.futures
|
|
47
51
|
import geojson
|
|
48
52
|
import orjson
|
|
49
53
|
import requests
|
|
50
54
|
import yaml
|
|
55
|
+
from dateutil.utils import today
|
|
51
56
|
from jsonpath_ng import JSONPath
|
|
52
57
|
from lxml import etree
|
|
53
58
|
from pydantic import create_model
|
|
@@ -55,6 +60,7 @@ from pydantic.fields import FieldInfo
|
|
|
55
60
|
from requests import Response
|
|
56
61
|
from requests.adapters import HTTPAdapter
|
|
57
62
|
from requests.auth import AuthBase
|
|
63
|
+
from urllib3 import Retry
|
|
58
64
|
|
|
59
65
|
from eodag.api.product import EOProduct
|
|
60
66
|
from eodag.api.product.metadata_mapping import (
|
|
@@ -69,28 +75,25 @@ from eodag.api.search_result import RawSearchResult
|
|
|
69
75
|
from eodag.plugins.search import PreparedSearch
|
|
70
76
|
from eodag.plugins.search.base import Search
|
|
71
77
|
from eodag.types import json_field_definition_to_python, model_fields_to_annotated
|
|
72
|
-
from eodag.types.queryables import CommonQueryables
|
|
73
78
|
from eodag.types.search_args import SortByList
|
|
74
79
|
from eodag.utils import (
|
|
80
|
+
DEFAULT_MISSION_START_DATE,
|
|
75
81
|
GENERIC_PRODUCT_TYPE,
|
|
76
82
|
HTTP_REQ_TIMEOUT,
|
|
83
|
+
REQ_RETRY_BACKOFF_FACTOR,
|
|
84
|
+
REQ_RETRY_STATUS_FORCELIST,
|
|
85
|
+
REQ_RETRY_TOTAL,
|
|
77
86
|
USER_AGENT,
|
|
78
|
-
Annotated,
|
|
79
87
|
_deprecated,
|
|
80
88
|
deepcopy,
|
|
81
89
|
dict_items_recursive_apply,
|
|
82
90
|
format_dict_items,
|
|
83
|
-
get_args,
|
|
84
91
|
get_ssl_context,
|
|
85
92
|
quote,
|
|
86
93
|
string_to_jsonpath,
|
|
87
94
|
update_nested_dict,
|
|
88
95
|
urlencode,
|
|
89
96
|
)
|
|
90
|
-
from eodag.utils.constraints import (
|
|
91
|
-
fetch_constraints,
|
|
92
|
-
get_constraint_queryables_with_additional_params,
|
|
93
|
-
)
|
|
94
97
|
from eodag.utils.exceptions import (
|
|
95
98
|
AuthenticationError,
|
|
96
99
|
MisconfiguredError,
|
|
@@ -108,98 +111,179 @@ logger = logging.getLogger("eodag.search.qssearch")
|
|
|
108
111
|
|
|
109
112
|
class QueryStringSearch(Search):
|
|
110
113
|
"""A plugin that helps implementing any kind of search protocol that relies on
|
|
111
|
-
query strings (e.g: opensearch).
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
result that gives access to the result entries
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
the
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
114
|
+
query strings (e.g: opensearch). Most of the other search plugins inherit from this plugin.
|
|
115
|
+
|
|
116
|
+
:param provider: provider name
|
|
117
|
+
:param config: Search plugin configuration:
|
|
118
|
+
|
|
119
|
+
* :attr:`~eodag.config.PluginConfig.result_type` (``str``): One of ``json`` or ``xml``, depending on the
|
|
120
|
+
representation of the provider's search results. The default is ``json``.
|
|
121
|
+
* :attr:`~eodag.config.PluginConfig.results_entry` (``str``) (**mandatory**): The name of the key in the
|
|
122
|
+
provider search result that gives access to the result entries
|
|
123
|
+
* :attr:`~eodag.config.PluginConfig.api_endpoint` (``str``) (**mandatory**): The endpoint of the provider's
|
|
124
|
+
search interface
|
|
125
|
+
* :attr:`~eodag.config.PluginConfig.need_auth` (``bool``): if authentication is needed for the search request;
|
|
126
|
+
default: ``False``
|
|
127
|
+
* :attr:`~eodag.config.PluginConfig.auth_error_code` (``int``): which error code is returned in case of an
|
|
128
|
+
authentication error; only used if ``need_auth=true``
|
|
129
|
+
* :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be verified in
|
|
130
|
+
requests; default: ``True``
|
|
131
|
+
* :attr:`~eodag.config.PluginConfig.dont_quote` (``List[str]``): characters that should not be quoted in the
|
|
132
|
+
url params
|
|
133
|
+
* :attr:`~eodag.config.PluginConfig.timeout` (``int``): time to wait until request timeout in seconds;
|
|
134
|
+
default: ``5``
|
|
135
|
+
* :attr:`~eodag.config.PluginConfig.retry_total` (``int``): :class:`urllib3.util.Retry` ``total`` parameter,
|
|
136
|
+
total number of retries to allow; default: ``3``
|
|
137
|
+
* :attr:`~eodag.config.PluginConfig.retry_backoff_factor` (``int``): :class:`urllib3.util.Retry`
|
|
138
|
+
``backoff_factor`` parameter, backoff factor to apply between attempts after the second try; default: ``2``
|
|
139
|
+
* :attr:`~eodag.config.PluginConfig.retry_status_forcelist` (``List[int]``): :class:`urllib3.util.Retry`
|
|
140
|
+
``status_forcelist`` parameter, list of integer HTTP status codes that we should force a retry on; default:
|
|
141
|
+
``[401, 429, 500, 502, 503, 504]``
|
|
142
|
+
* :attr:`~eodag.config.PluginConfig.literal_search_params` (``Dict[str, str]``): A mapping of (search_param =>
|
|
143
|
+
search_value) pairs giving search parameters to be passed as is in the search url query string. This is useful
|
|
144
|
+
for example in situations where the user wants to add a fixed search query parameter exactly
|
|
145
|
+
as it is done on the provider interface.
|
|
146
|
+
* :attr:`~eodag.config.PluginConfig.pagination` (:class:`~eodag.config.PluginConfig.Pagination`)
|
|
147
|
+
(**mandatory**): The configuration of how the pagination is done on the provider. It is a tree with the
|
|
148
|
+
following nodes:
|
|
149
|
+
|
|
150
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_tpl` (``str``) (**mandatory**): The template for
|
|
151
|
+
pagination requests. This is a simple Python format string which will be resolved using the following
|
|
152
|
+
keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
|
|
153
|
+
to the search request), ``items_per_page`` (the number of items to return per page),
|
|
154
|
+
``skip`` (the number of items to skip) or ``skip_base_1`` (the number of items to skip,
|
|
155
|
+
starting from 1) and ``page`` (which page to return).
|
|
156
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
|
|
157
|
+
leading to the total number of results satisfying a request. This is used for providers which provides the
|
|
158
|
+
total results metadata along with the result of the query and don't have an endpoint for querying
|
|
159
|
+
the number of items satisfying a request, or for providers for which the count endpoint
|
|
160
|
+
returns a json or xml document
|
|
161
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.count_endpoint` (``str``): The endpoint for counting the number
|
|
162
|
+
of items satisfying a request
|
|
163
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.count_tpl` (``str``): template for the count parameter that
|
|
164
|
+
should be added to the search request
|
|
165
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_key_path` (``str``): A JsonPath expression used
|
|
166
|
+
to retrieve the URL of the next page in the response of the current page.
|
|
167
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items per
|
|
168
|
+
page that the provider can handle; default: ``50``
|
|
169
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.start_page` (``int``): number of the first page; default: ``1``
|
|
170
|
+
|
|
171
|
+
* :attr:`~eodag.config.PluginConfig.discover_product_types`
|
|
172
|
+
(:class:`~eodag.config.PluginConfig.DiscoverProductTypes`): configuration for product type discovery based on
|
|
173
|
+
information from the provider; It contains the keys:
|
|
174
|
+
|
|
175
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` (``str``) (**mandatory**): url from which
|
|
176
|
+
the product types can be fetched
|
|
177
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.max_connections` (``int``): Maximum number of
|
|
178
|
+
connections for concurrent HTTP requests
|
|
179
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.result_type` (``str``): type of the provider result;
|
|
180
|
+
currently only ``json`` is supported (other types could be used in an extension of this plugin)
|
|
181
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.results_entry` (``str``) (**mandatory**): json path
|
|
182
|
+
to the list of product types
|
|
183
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_id` (``str``): mapping for the
|
|
184
|
+
product type id
|
|
185
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_metadata`
|
|
186
|
+
(``Dict[str, str]``): mapping for product type metadata (e.g. ``abstract``, ``licence``) which can be parsed
|
|
187
|
+
from the provider result
|
|
188
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_properties`
|
|
189
|
+
(``Dict[str, str]``): mapping for product type properties which can be parsed from the result and are not
|
|
190
|
+
product type metadata
|
|
191
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_unparsable_properties`
|
|
192
|
+
(``Dict[str, str]``): mapping for product type properties which cannot be parsed from the result and are not
|
|
193
|
+
product type metadata
|
|
194
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url` (``str``): url to fetch
|
|
195
|
+
data for a single collection; used if product type metadata is not available from the endpoint given in
|
|
196
|
+
:attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url`
|
|
197
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_qs` (``str``): query string
|
|
198
|
+
to be added to the :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` to filter for a
|
|
199
|
+
collection
|
|
200
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_product_type_parsable_metadata`
|
|
201
|
+
(``Dict[str, str]``): mapping for product type metadata returned by the endpoint given in
|
|
202
|
+
:attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url`.
|
|
203
|
+
|
|
204
|
+
* :attr:`~eodag.config.PluginConfig.sort` (:class:`~eodag.config.PluginConfig.Sort`): configuration for sorting
|
|
205
|
+
the results. It contains the keys:
|
|
206
|
+
|
|
207
|
+
* :attr:`~eodag.config.PluginConfig.Sort.sort_by_default` (``List[Tuple(str, Literal["ASC", "DESC"])]``):
|
|
208
|
+
parameter and sort order by which the result will be sorted by default (if the user does not enter a
|
|
209
|
+
``sort_by`` parameter); if not given the result will use the default sorting of the provider; Attention:
|
|
210
|
+
for some providers sorting might cause a timeout if no filters are used. In that case no default
|
|
211
|
+
sort parameters should be given. The format is::
|
|
212
|
+
|
|
213
|
+
sort_by_default:
|
|
214
|
+
- !!python/tuple [<param>, <sort order> (ASC or DESC)]
|
|
215
|
+
|
|
216
|
+
* :attr:`~eodag.config.PluginConfig.Sort.sort_by_tpl` (``str``): template for the sort parameter that is added
|
|
217
|
+
to the request; It contains the parameters `sort_param` and `sort_order` which will be replaced by user
|
|
218
|
+
input or default value. If the parameters are added as query params to a GET request, the string
|
|
219
|
+
should start with ``&``, otherwise it should be a valid json string surrounded by ``{{ }}``.
|
|
220
|
+
* :attr:`~eodag.config.PluginConfig.Sort.sort_param_mapping` (``Dict [str, str]``): mapping for the parameters
|
|
221
|
+
available for sorting
|
|
222
|
+
* :attr:`~eodag.config.PluginConfig.Sort.sort_order_mapping`
|
|
223
|
+
(``Dict[Literal["ascending", "descending"], str]``): mapping for the sort order
|
|
224
|
+
* :attr:`~eodag.config.PluginConfig.Sort.max_sort_params` (``int``): maximum number of sort parameters
|
|
225
|
+
supported by the provider; used to validate the user input to avoid failed requests or unexpected behaviour
|
|
226
|
+
(not all parameters are used in the request)
|
|
227
|
+
|
|
228
|
+
* :attr:`~eodag.config.PluginConfig.metadata_mapping` (``Dict[str, Any]``): The search plugins of this kind can
|
|
229
|
+
detect when a metadata mapping is "query-able", and get the semantics of how to format the query string
|
|
230
|
+
parameter that enables to make a query on the corresponding metadata. To make a metadata query-able,
|
|
231
|
+
just configure it in the metadata mapping to be a list of 2 items, the first one being the
|
|
232
|
+
specification of the query string search formatting. The later is a string following the
|
|
233
|
+
specification of Python string formatting, with a special behaviour added to it. For example,
|
|
234
|
+
an entry in the metadata mapping of this kind::
|
|
235
|
+
|
|
236
|
+
completionTimeFromAscendingNode:
|
|
237
|
+
- 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
|
|
238
|
+
- '$.properties.acquisition.endViewingDate'
|
|
239
|
+
|
|
240
|
+
means that the search url will have a query string parameter named ``f`` with a value of
|
|
241
|
+
``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
|
|
242
|
+
of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
|
|
243
|
+
``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
|
|
244
|
+
of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
|
|
245
|
+
semantics of the query string formatting introduced by this plugin: any eodag search parameter
|
|
246
|
+
can be referenced in the query string with an additional optional conversion function that
|
|
247
|
+
is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
|
|
248
|
+
details on the available converters). Note that for the values in the
|
|
249
|
+
:attr:`~eodag.config.PluginConfig.free_text_search_operations` configuration parameter follow the same rule.
|
|
250
|
+
If the metadata_mapping is not a list but only a string, this means that the parameters is not queryable but
|
|
251
|
+
it is included in the result obtained from the provider. The string indicates how the provider result should
|
|
252
|
+
be mapped to the eodag parameter.
|
|
253
|
+
* :attr:`~eodag.config.PluginConfig.discover_metadata` (:class:`~eodag.config.PluginConfig.DiscoverMetadata`):
|
|
254
|
+
configuration for the auto-discovery of queryable parameters as well as parameters returned by the provider
|
|
255
|
+
which are not in the metadata mapping. It has the attributes:
|
|
256
|
+
|
|
257
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverMetadata.auto_discovery` (``bool``): if the automatic discovery of
|
|
258
|
+
metadata is activated; default: ``False``; if false, the other parameters are not used;
|
|
259
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_pattern` (``str``): regex string a parameter in
|
|
260
|
+
the result should match so that is used
|
|
261
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverMetadata.search_param` (``Union [str, Dict[str, Any]]``): format
|
|
262
|
+
to add a query param given by the user and not in the metadata mapping to the requests, 'metadata' will be
|
|
263
|
+
replaced by the search param; can be a string or a dict containing
|
|
264
|
+
:attr:`~eodag.config.PluginConfig.free_text_search_operations`
|
|
265
|
+
(see :class:`~eodag.plugins.search.qssearch.ODataV4Search`)
|
|
266
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_path` (``str``): path where the queryable
|
|
267
|
+
properties can be found in the provider result
|
|
268
|
+
|
|
269
|
+
* :attr:`~eodag.config.PluginConfig.discover_queryables`
|
|
270
|
+
(:class:`~eodag.config.PluginConfig.DiscoverQueryables`): configuration to fetch the queryables from a
|
|
271
|
+
provider queryables endpoint; It has the following keys:
|
|
272
|
+
|
|
273
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` (``str``): url to fetch the queryables valid
|
|
274
|
+
for all product types
|
|
275
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` (``str``): url to fetch the
|
|
276
|
+
queryables for a specific product type
|
|
277
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverQueryables.result_type` (``str``): type of the result (currently
|
|
278
|
+
only ``json`` is used)
|
|
279
|
+
* :attr:`~eodag.config.PluginConfig.DiscoverQueryables.results_entry` (``str``): json path to retrieve the
|
|
280
|
+
queryables from the provider result
|
|
281
|
+
|
|
282
|
+
* :attr:`~eodag.config.PluginConfig.constraints_file_url` (``str``): url to fetch the constraints for a specific
|
|
283
|
+
product type, can be an http url or a path to a file; the constraints are used to build queryables
|
|
284
|
+
* :attr:`~eodag.config.PluginConfig.constraints_entry` (``str``): key in the json result where the constraints
|
|
285
|
+
can be found; if not given, it is assumed that the constraints are on top level of the result, i.e.
|
|
286
|
+
the result is an array of constraints
|
|
203
287
|
"""
|
|
204
288
|
|
|
205
289
|
extract_properties: Dict[str, Callable[..., Dict[str, Any]]] = {
|
|
@@ -362,17 +446,70 @@ class QueryStringSearch(Search):
|
|
|
362
446
|
def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
363
447
|
"""Fetch product types list from provider using `discover_product_types` conf
|
|
364
448
|
|
|
449
|
+
:returns: configuration dict containing fetched product types information
|
|
450
|
+
"""
|
|
451
|
+
unpaginated_fetch_url = self.config.discover_product_types.get("fetch_url")
|
|
452
|
+
if not unpaginated_fetch_url:
|
|
453
|
+
return None
|
|
454
|
+
|
|
455
|
+
# product types pagination
|
|
456
|
+
next_page_url_tpl = self.config.discover_product_types.get("next_page_url_tpl")
|
|
457
|
+
page = self.config.discover_product_types.get("start_page", 1)
|
|
458
|
+
|
|
459
|
+
if not next_page_url_tpl:
|
|
460
|
+
# no pagination
|
|
461
|
+
return self.discover_product_types_per_page(**kwargs)
|
|
462
|
+
|
|
463
|
+
conf_update_dict: Dict[str, Any] = {
|
|
464
|
+
"providers_config": {},
|
|
465
|
+
"product_types_config": {},
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
while True:
|
|
469
|
+
fetch_url = next_page_url_tpl.format(url=unpaginated_fetch_url, page=page)
|
|
470
|
+
|
|
471
|
+
conf_update_dict_per_page = self.discover_product_types_per_page(
|
|
472
|
+
fetch_url=fetch_url, **kwargs
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
if (
|
|
476
|
+
not conf_update_dict_per_page
|
|
477
|
+
or not conf_update_dict_per_page.get("providers_config")
|
|
478
|
+
or conf_update_dict_per_page.items() <= conf_update_dict.items()
|
|
479
|
+
):
|
|
480
|
+
# conf_update_dict_per_page is empty or a subset on existing conf
|
|
481
|
+
break
|
|
482
|
+
else:
|
|
483
|
+
conf_update_dict["providers_config"].update(
|
|
484
|
+
conf_update_dict_per_page["providers_config"]
|
|
485
|
+
)
|
|
486
|
+
conf_update_dict["product_types_config"].update(
|
|
487
|
+
conf_update_dict_per_page["product_types_config"]
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
page += 1
|
|
491
|
+
|
|
492
|
+
return conf_update_dict
|
|
493
|
+
|
|
494
|
+
def discover_product_types_per_page(
|
|
495
|
+
self, **kwargs: Any
|
|
496
|
+
) -> Optional[Dict[str, Any]]:
|
|
497
|
+
"""Fetch product types list from provider using `discover_product_types` conf
|
|
498
|
+
using paginated ``kwargs["fetch_url"]``
|
|
499
|
+
|
|
365
500
|
:returns: configuration dict containing fetched product types information
|
|
366
501
|
"""
|
|
367
502
|
try:
|
|
368
503
|
prep = PreparedSearch()
|
|
369
504
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
505
|
+
# url from discover_product_types() or conf
|
|
506
|
+
fetch_url: Optional[str] = kwargs.get("fetch_url")
|
|
507
|
+
if fetch_url is None:
|
|
508
|
+
if fetch_url := self.config.discover_product_types.get("fetch_url"):
|
|
509
|
+
fetch_url = fetch_url.format(**self.config.__dict__)
|
|
510
|
+
else:
|
|
511
|
+
return None
|
|
512
|
+
prep.url = fetch_url
|
|
376
513
|
|
|
377
514
|
# get auth if available
|
|
378
515
|
if "auth" in kwargs:
|
|
@@ -402,7 +539,14 @@ class QueryStringSearch(Search):
|
|
|
402
539
|
"Skipping error while fetching product types for " "{} {} instance:"
|
|
403
540
|
).format(self.provider, self.__class__.__name__)
|
|
404
541
|
|
|
405
|
-
|
|
542
|
+
# Query using appropriate method
|
|
543
|
+
fetch_method = self.config.discover_product_types.get("fetch_method", "GET")
|
|
544
|
+
fetch_body = self.config.discover_product_types.get("fetch_body", {})
|
|
545
|
+
if fetch_method == "POST" and isinstance(self, PostJsonSearch):
|
|
546
|
+
prep.query_params = fetch_body
|
|
547
|
+
response = self._request(prep)
|
|
548
|
+
else:
|
|
549
|
+
response = QueryStringSearch._request(self, prep)
|
|
406
550
|
except (RequestError, KeyError, AttributeError):
|
|
407
551
|
return None
|
|
408
552
|
else:
|
|
@@ -414,16 +558,21 @@ class QueryStringSearch(Search):
|
|
|
414
558
|
if self.config.discover_product_types["result_type"] == "json":
|
|
415
559
|
resp_as_json = response.json()
|
|
416
560
|
# extract results from response json
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
"results_entry"
|
|
421
|
-
|
|
422
|
-
|
|
561
|
+
results_entry = self.config.discover_product_types["results_entry"]
|
|
562
|
+
if not isinstance(results_entry, JSONPath):
|
|
563
|
+
logger.warning(
|
|
564
|
+
f"Could not parse {self.provider} discover_product_types.results_entry"
|
|
565
|
+
f" as JSONPath: {results_entry}"
|
|
566
|
+
)
|
|
567
|
+
return None
|
|
568
|
+
result = [match.value for match in results_entry.find(resp_as_json)]
|
|
423
569
|
if result and isinstance(result[0], list):
|
|
424
570
|
result = result[0]
|
|
425
571
|
|
|
426
|
-
|
|
572
|
+
def conf_update_from_product_type_result(
|
|
573
|
+
product_type_result: Dict[str, Any]
|
|
574
|
+
) -> None:
|
|
575
|
+
"""Update ``conf_update_dict`` using given product type json response"""
|
|
427
576
|
# providers_config extraction
|
|
428
577
|
extracted_mapping = properties_from_json(
|
|
429
578
|
product_type_result,
|
|
@@ -487,7 +636,11 @@ class QueryStringSearch(Search):
|
|
|
487
636
|
][kf]
|
|
488
637
|
)
|
|
489
638
|
for kf in keywords_fields
|
|
490
|
-
if
|
|
639
|
+
if kf
|
|
640
|
+
in conf_update_dict["product_types_config"][
|
|
641
|
+
generic_product_type_id
|
|
642
|
+
]
|
|
643
|
+
and conf_update_dict["product_types_config"][
|
|
491
644
|
generic_product_type_id
|
|
492
645
|
][kf]
|
|
493
646
|
!= NOT_AVAILABLE
|
|
@@ -510,6 +663,20 @@ class QueryStringSearch(Search):
|
|
|
510
663
|
conf_update_dict["product_types_config"][
|
|
511
664
|
generic_product_type_id
|
|
512
665
|
]["keywords"] = keywords_values_str
|
|
666
|
+
|
|
667
|
+
# runs concurrent requests and aggregate results in conf_update_dict
|
|
668
|
+
max_connections = self.config.discover_product_types.get(
|
|
669
|
+
"max_connections"
|
|
670
|
+
)
|
|
671
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
672
|
+
max_workers=max_connections
|
|
673
|
+
) as executor:
|
|
674
|
+
futures = (
|
|
675
|
+
executor.submit(conf_update_from_product_type_result, r)
|
|
676
|
+
for r in result
|
|
677
|
+
)
|
|
678
|
+
[f.result() for f in concurrent.futures.as_completed(futures)]
|
|
679
|
+
|
|
513
680
|
except KeyError as e:
|
|
514
681
|
logger.warning(
|
|
515
682
|
"Incomplete %s discover_product_types configuration: %s",
|
|
@@ -517,6 +684,12 @@ class QueryStringSearch(Search):
|
|
|
517
684
|
e,
|
|
518
685
|
)
|
|
519
686
|
return None
|
|
687
|
+
except requests.RequestException as e:
|
|
688
|
+
logger.debug(
|
|
689
|
+
"Could not parse discovered product types response from "
|
|
690
|
+
f"{self.provider}, {type(e).__name__}: {e.args}"
|
|
691
|
+
)
|
|
692
|
+
return None
|
|
520
693
|
conf_update_dict["product_types_config"] = dict_items_recursive_apply(
|
|
521
694
|
conf_update_dict["product_types_config"],
|
|
522
695
|
lambda k, v: v if v != NOT_AVAILABLE else None,
|
|
@@ -538,9 +711,7 @@ class QueryStringSearch(Search):
|
|
|
538
711
|
self,
|
|
539
712
|
PreparedSearch(
|
|
540
713
|
url=single_collection_url,
|
|
541
|
-
info_message="Fetching data for product type
|
|
542
|
-
product_type
|
|
543
|
-
),
|
|
714
|
+
info_message=f"Fetching data for product type: {product_type}",
|
|
544
715
|
exception_message="Skipping error while fetching product types for "
|
|
545
716
|
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
546
717
|
),
|
|
@@ -551,102 +722,6 @@ class QueryStringSearch(Search):
|
|
|
551
722
|
self.config.discover_product_types["single_product_type_parsable_metadata"],
|
|
552
723
|
)
|
|
553
724
|
|
|
554
|
-
def discover_queryables(
|
|
555
|
-
self, **kwargs: Any
|
|
556
|
-
) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
|
|
557
|
-
"""Fetch queryables list from provider using its constraints file
|
|
558
|
-
|
|
559
|
-
:param kwargs: additional filters for queryables (`productType` and other search
|
|
560
|
-
arguments)
|
|
561
|
-
:returns: fetched queryable parameters dict
|
|
562
|
-
"""
|
|
563
|
-
product_type = kwargs.pop("productType", None)
|
|
564
|
-
if not product_type:
|
|
565
|
-
return {}
|
|
566
|
-
constraints_file_url = getattr(self.config, "constraints_file_url", "")
|
|
567
|
-
if not constraints_file_url:
|
|
568
|
-
return {}
|
|
569
|
-
|
|
570
|
-
constraints_file_dataset_key = getattr(
|
|
571
|
-
self.config, "constraints_file_dataset_key", "dataset"
|
|
572
|
-
)
|
|
573
|
-
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
574
|
-
constraints_file_dataset_key, None
|
|
575
|
-
)
|
|
576
|
-
|
|
577
|
-
# defaults
|
|
578
|
-
default_queryables = self._get_defaults_as_queryables(product_type)
|
|
579
|
-
# remove unwanted queryables
|
|
580
|
-
for param in getattr(self.config, "remove_from_queryables", []):
|
|
581
|
-
default_queryables.pop(param, None)
|
|
582
|
-
|
|
583
|
-
non_empty_kwargs = {k: v for k, v in kwargs.items() if v}
|
|
584
|
-
|
|
585
|
-
if "{" in constraints_file_url:
|
|
586
|
-
constraints_file_url = constraints_file_url.format(
|
|
587
|
-
dataset=provider_product_type
|
|
588
|
-
)
|
|
589
|
-
constraints = fetch_constraints(constraints_file_url, self)
|
|
590
|
-
if not constraints:
|
|
591
|
-
return default_queryables
|
|
592
|
-
|
|
593
|
-
constraint_params: Dict[str, Dict[str, Set[Any]]] = {}
|
|
594
|
-
if len(kwargs) == 0:
|
|
595
|
-
# get values from constraints without additional filters
|
|
596
|
-
for constraint in constraints:
|
|
597
|
-
for key in constraint.keys():
|
|
598
|
-
if key in constraint_params:
|
|
599
|
-
constraint_params[key]["enum"].update(constraint[key])
|
|
600
|
-
else:
|
|
601
|
-
constraint_params[key] = {"enum": set(constraint[key])}
|
|
602
|
-
else:
|
|
603
|
-
# get values from constraints with additional filters
|
|
604
|
-
constraints_input_params = {k: v for k, v in non_empty_kwargs.items()}
|
|
605
|
-
constraint_params = get_constraint_queryables_with_additional_params(
|
|
606
|
-
constraints, constraints_input_params, self, product_type
|
|
607
|
-
)
|
|
608
|
-
# query params that are not in constraints but might be default queryables
|
|
609
|
-
if len(constraint_params) == 1 and "not_available" in constraint_params:
|
|
610
|
-
not_queryables = set()
|
|
611
|
-
for constraint_param in constraint_params["not_available"]["enum"]:
|
|
612
|
-
param = CommonQueryables.get_queryable_from_alias(constraint_param)
|
|
613
|
-
if param in dict(
|
|
614
|
-
CommonQueryables.model_fields, **default_queryables
|
|
615
|
-
):
|
|
616
|
-
non_empty_kwargs.pop(constraint_param)
|
|
617
|
-
else:
|
|
618
|
-
not_queryables.add(constraint_param)
|
|
619
|
-
if not_queryables:
|
|
620
|
-
raise ValidationError(
|
|
621
|
-
f"parameter(s) {str(not_queryables)} not queryable"
|
|
622
|
-
)
|
|
623
|
-
else:
|
|
624
|
-
# get constraints again without common queryables
|
|
625
|
-
constraint_params = (
|
|
626
|
-
get_constraint_queryables_with_additional_params(
|
|
627
|
-
constraints, non_empty_kwargs, self, product_type
|
|
628
|
-
)
|
|
629
|
-
)
|
|
630
|
-
|
|
631
|
-
field_definitions: Dict[str, Any] = dict()
|
|
632
|
-
for json_param, json_mtd in constraint_params.items():
|
|
633
|
-
param = (
|
|
634
|
-
get_queryable_from_provider(
|
|
635
|
-
json_param, self.get_metadata_mapping(product_type)
|
|
636
|
-
)
|
|
637
|
-
or json_param
|
|
638
|
-
)
|
|
639
|
-
default = kwargs.get(param, None) or self.config.products.get(
|
|
640
|
-
product_type, {}
|
|
641
|
-
).get(param, None)
|
|
642
|
-
annotated_def = json_field_definition_to_python(
|
|
643
|
-
json_mtd, default_value=default, required=True
|
|
644
|
-
)
|
|
645
|
-
field_definitions[param] = get_args(annotated_def)
|
|
646
|
-
|
|
647
|
-
python_queryables = create_model("m", **field_definitions).model_fields
|
|
648
|
-
return dict(default_queryables, **model_fields_to_annotated(python_queryables))
|
|
649
|
-
|
|
650
725
|
def query(
|
|
651
726
|
self,
|
|
652
727
|
prep: PreparedSearch = PreparedSearch(),
|
|
@@ -703,9 +778,6 @@ class QueryStringSearch(Search):
|
|
|
703
778
|
}
|
|
704
779
|
)
|
|
705
780
|
|
|
706
|
-
if product_type is None:
|
|
707
|
-
raise ValidationError("Required productType is missing")
|
|
708
|
-
|
|
709
781
|
qp, qs = self.build_query_string(product_type, **keywords)
|
|
710
782
|
|
|
711
783
|
prep.query_params = qp
|
|
@@ -825,7 +897,7 @@ class QueryStringSearch(Search):
|
|
|
825
897
|
else:
|
|
826
898
|
next_url = "{}?{}".format(search_endpoint, qs_with_sort)
|
|
827
899
|
urls.append(next_url)
|
|
828
|
-
return urls, total_results
|
|
900
|
+
return list(dict.fromkeys(urls)), total_results
|
|
829
901
|
|
|
830
902
|
def do_search(
|
|
831
903
|
self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
|
|
@@ -854,8 +926,8 @@ class QueryStringSearch(Search):
|
|
|
854
926
|
search_url
|
|
855
927
|
)
|
|
856
928
|
single_search_prep.exception_message = (
|
|
857
|
-
"Skipping error while searching for {}
|
|
858
|
-
"
|
|
929
|
+
f"Skipping error while searching for {self.provider}"
|
|
930
|
+
f" {self.__class__.__name__} instance"
|
|
859
931
|
)
|
|
860
932
|
response = self._request(single_search_prep)
|
|
861
933
|
next_page_url_key_path = self.config.pagination.get(
|
|
@@ -969,9 +1041,13 @@ class QueryStringSearch(Search):
|
|
|
969
1041
|
logger.debug(
|
|
970
1042
|
"Could not extract total_items_nb from search results"
|
|
971
1043
|
)
|
|
972
|
-
if
|
|
1044
|
+
if (
|
|
1045
|
+
getattr(self.config, "merge_responses", False)
|
|
1046
|
+
and self.config.result_type == "json"
|
|
1047
|
+
):
|
|
1048
|
+
json_result = cast(list[dict[str, Any]], result)
|
|
973
1049
|
results = (
|
|
974
|
-
[dict(r, **
|
|
1050
|
+
[dict(r, **json_result[i]) for i, r in enumerate(results)]
|
|
975
1051
|
if results
|
|
976
1052
|
else result
|
|
977
1053
|
)
|
|
@@ -1114,6 +1190,14 @@ class QueryStringSearch(Search):
|
|
|
1114
1190
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1115
1191
|
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1116
1192
|
|
|
1193
|
+
retry_total = getattr(self.config, "retry_total", REQ_RETRY_TOTAL)
|
|
1194
|
+
retry_backoff_factor = getattr(
|
|
1195
|
+
self.config, "retry_backoff_factor", REQ_RETRY_BACKOFF_FACTOR
|
|
1196
|
+
)
|
|
1197
|
+
retry_status_forcelist = getattr(
|
|
1198
|
+
self.config, "retry_status_forcelist", REQ_RETRY_STATUS_FORCELIST
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1117
1201
|
ssl_ctx = get_ssl_context(ssl_verify)
|
|
1118
1202
|
# auth if needed
|
|
1119
1203
|
kwargs: Dict[str, Any] = {}
|
|
@@ -1152,7 +1236,16 @@ class QueryStringSearch(Search):
|
|
|
1152
1236
|
else:
|
|
1153
1237
|
if info_message:
|
|
1154
1238
|
logger.info(info_message)
|
|
1155
|
-
|
|
1239
|
+
|
|
1240
|
+
session = requests.Session()
|
|
1241
|
+
retries = Retry(
|
|
1242
|
+
total=retry_total,
|
|
1243
|
+
backoff_factor=retry_backoff_factor,
|
|
1244
|
+
status_forcelist=retry_status_forcelist,
|
|
1245
|
+
)
|
|
1246
|
+
session.mount(url, HTTPAdapter(max_retries=retries))
|
|
1247
|
+
|
|
1248
|
+
response = session.get(
|
|
1156
1249
|
url,
|
|
1157
1250
|
timeout=timeout,
|
|
1158
1251
|
headers=USER_AGENT,
|
|
@@ -1174,13 +1267,54 @@ class QueryStringSearch(Search):
|
|
|
1174
1267
|
self.__class__.__name__,
|
|
1175
1268
|
err_msg,
|
|
1176
1269
|
)
|
|
1177
|
-
raise RequestError(
|
|
1270
|
+
raise RequestError.from_error(err, exception_message) from err
|
|
1178
1271
|
return response
|
|
1179
1272
|
|
|
1180
1273
|
|
|
1181
1274
|
class ODataV4Search(QueryStringSearch):
|
|
1182
|
-
"""A specialisation of a QueryStringSearch that does a two step search to
|
|
1183
|
-
all products metadata
|
|
1275
|
+
"""A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that does a two step search to
|
|
1276
|
+
retrieve all products metadata. All configuration parameters of
|
|
1277
|
+
:class:`~eodag.plugins.search.qssearch.QueryStringSearch` are also available for this plugin. In addition, the
|
|
1278
|
+
following parameters can be configured:
|
|
1279
|
+
|
|
1280
|
+
:param provider: provider name
|
|
1281
|
+
:param config: Search plugin configuration:
|
|
1282
|
+
|
|
1283
|
+
* :attr:`~eodag.config.PluginConfig.per_product_metadata_query` (``bool``): should be set to true if the metadata
|
|
1284
|
+
is not given in the search result and a two step search has to be performed; default: false
|
|
1285
|
+
* :attr:`~eodag.config.PluginConfig.metadata_pre_mapping` (:class:`~eodag.config.PluginConfig.MetadataPreMapping`)
|
|
1286
|
+
: a dictionary which can be used to simplify further metadata extraction. For example, going from
|
|
1287
|
+
``$.Metadata[?(@.id="foo")].value`` to ``$.Metadata.foo.value``. It has the keys:
|
|
1288
|
+
|
|
1289
|
+
* :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path` (``str``): json path of the metadata entry
|
|
1290
|
+
* :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_id` (``str``): key to get the metadata id
|
|
1291
|
+
* :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_value` (``str``): key to get the metadata
|
|
1292
|
+
value
|
|
1293
|
+
|
|
1294
|
+
* :attr:`~eodag.config.PluginConfig.free_text_search_operations`: (optional) A tree structure of the form::
|
|
1295
|
+
|
|
1296
|
+
# noqa: E800
|
|
1297
|
+
<search-param>: # e.g: $search
|
|
1298
|
+
union: # how to join the operations below (e.g: ' AND ' -->
|
|
1299
|
+
# '(op1 AND op2) AND (op3 OR op4)')
|
|
1300
|
+
wrapper: # a pattern for how each operation will be wrapped
|
|
1301
|
+
# (e.g: '({})' --> '(op1 AND op2)')
|
|
1302
|
+
operations: # The operations to build
|
|
1303
|
+
<opname>: # e.g: AND
|
|
1304
|
+
- <op1> # e.g:
|
|
1305
|
+
# 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
|
|
1306
|
+
- <op2> # e.g:
|
|
1307
|
+
# 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
|
|
1308
|
+
...
|
|
1309
|
+
...
|
|
1310
|
+
...
|
|
1311
|
+
|
|
1312
|
+
With the structure above, each operation will become a string of the form:
|
|
1313
|
+
``(<op1> <opname> <op2>)``, then the operations will be joined together using
|
|
1314
|
+
the union string and finally if the number of operations is greater than 1,
|
|
1315
|
+
they will be wrapped as specified by the wrapper config key.
|
|
1316
|
+
|
|
1317
|
+
"""
|
|
1184
1318
|
|
|
1185
1319
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
1186
1320
|
super(ODataV4Search, self).__init__(provider, config)
|
|
@@ -1219,7 +1353,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
1219
1353
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|
|
1220
1354
|
except requests.RequestException:
|
|
1221
1355
|
logger.exception(
|
|
1222
|
-
"Skipping error while searching for %s %s instance
|
|
1356
|
+
"Skipping error while searching for %s %s instance",
|
|
1223
1357
|
self.provider,
|
|
1224
1358
|
self.__class__.__name__,
|
|
1225
1359
|
)
|
|
@@ -1271,7 +1405,106 @@ class ODataV4Search(QueryStringSearch):
|
|
|
1271
1405
|
|
|
1272
1406
|
|
|
1273
1407
|
class PostJsonSearch(QueryStringSearch):
|
|
1274
|
-
"""A specialisation of a QueryStringSearch that uses POST method
|
|
1408
|
+
"""A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that uses POST method
|
|
1409
|
+
|
|
1410
|
+
All configuration parameters available for :class:`~eodag.plugins.search.qssearch.QueryStringSearch`
|
|
1411
|
+
are also available for PostJsonSearch. The mappings given in metadata_mapping are used to construct
|
|
1412
|
+
a (json) body for the POST request that is sent to the provider. Due to the fact that we sent a POST request and
|
|
1413
|
+
not a get request, the pagination configuration will look slightly different. It has the
|
|
1414
|
+
following parameters:
|
|
1415
|
+
|
|
1416
|
+
:param provider: provider name
|
|
1417
|
+
:param config: Search plugin configuration:
|
|
1418
|
+
|
|
1419
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.next_page_query_obj` (``str``): The additional parameters
|
|
1420
|
+
needed to add pagination information to the search request. These parameters won't be
|
|
1421
|
+
included in result. This must be a json dict formatted like ``{{"foo":"bar"}}`` because
|
|
1422
|
+
it will be passed to a :meth:`str.format` method before being loaded as json.
|
|
1423
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
|
|
1424
|
+
leading to the total number of results satisfying a request. This is used for providers
|
|
1425
|
+
which provides the total results metadata along with the result of the query and don't
|
|
1426
|
+
have an endpoint for querying the number of items satisfying a request, or for providers
|
|
1427
|
+
for which the count endpoint returns a json or xml document
|
|
1428
|
+
* :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items
|
|
1429
|
+
per page that the provider can handle; default: ``50``
|
|
1430
|
+
|
|
1431
|
+
"""
|
|
1432
|
+
|
|
1433
|
+
def _get_default_end_date_from_start_date(
|
|
1434
|
+
self, start_datetime: str, product_type_conf: Dict[str, Any]
|
|
1435
|
+
) -> str:
|
|
1436
|
+
try:
|
|
1437
|
+
start_date = datetime.fromisoformat(start_datetime)
|
|
1438
|
+
except ValueError:
|
|
1439
|
+
start_date = datetime.strptime(start_datetime, "%Y-%m-%dT%H:%M:%SZ")
|
|
1440
|
+
if "completionTimeFromAscendingNode" in product_type_conf:
|
|
1441
|
+
mapping = product_type_conf["completionTimeFromAscendingNode"]
|
|
1442
|
+
# if date is mapped to year/month/(day), use end_date = start_date else start_date + 1 day
|
|
1443
|
+
# (default dates are only needed for ecmwf products where selected timespans should not be too large)
|
|
1444
|
+
if isinstance(mapping, list) and "year" in mapping[0]:
|
|
1445
|
+
end_date = start_date
|
|
1446
|
+
else:
|
|
1447
|
+
end_date = start_date + timedelta(days=1)
|
|
1448
|
+
return end_date.isoformat()
|
|
1449
|
+
return self.get_product_type_cfg_value("missionEndDate", today().isoformat())
|
|
1450
|
+
|
|
1451
|
+
def _check_date_params(
|
|
1452
|
+
self, keywords: Dict[str, Any], product_type: Optional[str]
|
|
1453
|
+
) -> None:
|
|
1454
|
+
"""checks if start and end date are present in the keywords and adds them if not"""
|
|
1455
|
+
if (
|
|
1456
|
+
"startTimeFromAscendingNode"
|
|
1457
|
+
and "completionTimeFromAscendingNode" in keywords
|
|
1458
|
+
):
|
|
1459
|
+
return
|
|
1460
|
+
|
|
1461
|
+
product_type_conf = getattr(self.config, "metadata_mapping", {})
|
|
1462
|
+
if (
|
|
1463
|
+
product_type
|
|
1464
|
+
and product_type in self.config.products
|
|
1465
|
+
and "metadata_mapping" in self.config.products[product_type]
|
|
1466
|
+
):
|
|
1467
|
+
product_type_conf = self.config.products[product_type]["metadata_mapping"]
|
|
1468
|
+
# start time given, end time missing
|
|
1469
|
+
if "startTimeFromAscendingNode" in keywords:
|
|
1470
|
+
keywords[
|
|
1471
|
+
"completionTimeFromAscendingNode"
|
|
1472
|
+
] = self._get_default_end_date_from_start_date(
|
|
1473
|
+
keywords["startTimeFromAscendingNode"], product_type_conf
|
|
1474
|
+
)
|
|
1475
|
+
return
|
|
1476
|
+
|
|
1477
|
+
if "completionTimeFromAscendingNode" in product_type_conf:
|
|
1478
|
+
mapping = product_type_conf["startTimeFromAscendingNode"]
|
|
1479
|
+
if not isinstance(mapping, list):
|
|
1480
|
+
mapping = product_type_conf["completionTimeFromAscendingNode"]
|
|
1481
|
+
if isinstance(mapping, list):
|
|
1482
|
+
# get time parameters (date, year, month, ...) from metadata mapping
|
|
1483
|
+
input_mapping = mapping[0].replace("{{", "").replace("}}", "")
|
|
1484
|
+
time_params = [
|
|
1485
|
+
values.split(":")[0].strip() for values in input_mapping.split(",")
|
|
1486
|
+
]
|
|
1487
|
+
time_params = [
|
|
1488
|
+
tp.replace('"', "").replace("'", "") for tp in time_params
|
|
1489
|
+
]
|
|
1490
|
+
# if startTime is not given but other time params (e.g. year/month/(day)) are given,
|
|
1491
|
+
# no default date is required
|
|
1492
|
+
in_keywords = True
|
|
1493
|
+
for tp in time_params:
|
|
1494
|
+
if tp not in keywords:
|
|
1495
|
+
in_keywords = False
|
|
1496
|
+
break
|
|
1497
|
+
if not in_keywords:
|
|
1498
|
+
keywords[
|
|
1499
|
+
"startTimeFromAscendingNode"
|
|
1500
|
+
] = self.get_product_type_cfg_value(
|
|
1501
|
+
"missionStartDate", DEFAULT_MISSION_START_DATE
|
|
1502
|
+
)
|
|
1503
|
+
keywords[
|
|
1504
|
+
"completionTimeFromAscendingNode"
|
|
1505
|
+
] = self._get_default_end_date_from_start_date(
|
|
1506
|
+
keywords["startTimeFromAscendingNode"], product_type_conf
|
|
1507
|
+
)
|
|
1275
1508
|
|
|
1276
1509
|
def query(
|
|
1277
1510
|
self,
|
|
@@ -1279,7 +1512,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1279
1512
|
**kwargs: Any,
|
|
1280
1513
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
1281
1514
|
"""Perform a search on an OpenSearch-like interface"""
|
|
1282
|
-
product_type = kwargs.get("productType",
|
|
1515
|
+
product_type = kwargs.get("productType", "")
|
|
1283
1516
|
count = prep.count
|
|
1284
1517
|
# remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
|
|
1285
1518
|
kwargs.pop("product_type", None)
|
|
@@ -1322,6 +1555,8 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1322
1555
|
and isinstance(self.config.metadata_mapping[k], list)
|
|
1323
1556
|
}
|
|
1324
1557
|
)
|
|
1558
|
+
if getattr(self.config, "dates_required", False):
|
|
1559
|
+
self._check_date_params(keywords, product_type)
|
|
1325
1560
|
|
|
1326
1561
|
qp, _ = self.build_query_string(product_type, **keywords)
|
|
1327
1562
|
|
|
@@ -1392,6 +1627,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1392
1627
|
# do not try to extract total_items from search results if count is False
|
|
1393
1628
|
del prep.total_items_nb
|
|
1394
1629
|
del prep.need_count
|
|
1630
|
+
|
|
1395
1631
|
provider_results = self.do_search(prep, **kwargs)
|
|
1396
1632
|
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
1397
1633
|
total_items = prep.total_items_nb
|
|
@@ -1503,7 +1739,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1503
1739
|
)
|
|
1504
1740
|
|
|
1505
1741
|
urls.append(search_endpoint)
|
|
1506
|
-
return urls, total_results
|
|
1742
|
+
return list(dict.fromkeys(urls)), total_results
|
|
1507
1743
|
|
|
1508
1744
|
def _request(
|
|
1509
1745
|
self,
|
|
@@ -1537,8 +1773,14 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1537
1773
|
prep.query_params = self.next_page_query_obj
|
|
1538
1774
|
if info_message:
|
|
1539
1775
|
logger.info(info_message)
|
|
1540
|
-
|
|
1541
|
-
|
|
1776
|
+
try:
|
|
1777
|
+
logger.debug("Query parameters: %s" % geojson.dumps(prep.query_params))
|
|
1778
|
+
except TypeError:
|
|
1779
|
+
logger.debug("Query parameters: %s" % prep.query_params)
|
|
1780
|
+
try:
|
|
1781
|
+
logger.debug("Query kwargs: %s" % geojson.dumps(kwargs))
|
|
1782
|
+
except TypeError:
|
|
1783
|
+
logger.debug("Query kwargs: %s" % kwargs)
|
|
1542
1784
|
response = requests.post(
|
|
1543
1785
|
url,
|
|
1544
1786
|
json=prep.query_params,
|
|
@@ -1551,22 +1793,16 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1551
1793
|
except requests.exceptions.Timeout as exc:
|
|
1552
1794
|
raise TimeOutError(exc, timeout=timeout) from exc
|
|
1553
1795
|
except (requests.RequestException, URLError) as err:
|
|
1796
|
+
response = locals().get("response", Response())
|
|
1554
1797
|
# check if error is identified as auth_error in provider conf
|
|
1555
1798
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
1556
1799
|
if not isinstance(auth_errors, list):
|
|
1557
1800
|
auth_errors = [auth_errors]
|
|
1558
|
-
if
|
|
1559
|
-
hasattr(err, "response")
|
|
1560
|
-
and err.response is not None
|
|
1561
|
-
and getattr(err.response, "status_code", None)
|
|
1562
|
-
and err.response.status_code in auth_errors
|
|
1563
|
-
):
|
|
1801
|
+
if response.status_code and response.status_code in auth_errors:
|
|
1564
1802
|
raise AuthenticationError(
|
|
1565
|
-
"
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
self.provider,
|
|
1569
|
-
)
|
|
1803
|
+
f"Please check your credentials for {self.provider}.",
|
|
1804
|
+
f"HTTP Error {response.status_code} returned.",
|
|
1805
|
+
response.text.strip(),
|
|
1570
1806
|
)
|
|
1571
1807
|
if exception_message:
|
|
1572
1808
|
logger.exception(exception_message)
|
|
@@ -1577,21 +1813,23 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1577
1813
|
self.provider,
|
|
1578
1814
|
self.__class__.__name__,
|
|
1579
1815
|
)
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
error_text = str(err)
|
|
1583
|
-
if (
|
|
1584
|
-
hasattr(err, "response")
|
|
1585
|
-
and err.response is not None
|
|
1586
|
-
and getattr(err.response, "text", None)
|
|
1587
|
-
):
|
|
1588
|
-
error_text = err.response.text
|
|
1589
|
-
raise RequestError(error_text) from err
|
|
1816
|
+
logger.debug(response.content or str(err))
|
|
1817
|
+
raise RequestError.from_error(err, exception_message) from err
|
|
1590
1818
|
return response
|
|
1591
1819
|
|
|
1592
1820
|
|
|
1593
1821
|
class StacSearch(PostJsonSearch):
|
|
1594
|
-
"""A specialisation of
|
|
1822
|
+
"""A specialisation of :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses generic
|
|
1823
|
+
STAC configuration, it therefore has the same configuration parameters (those inherited
|
|
1824
|
+
from :class:`~eodag.plugins.search.qssearch.QueryStringSearch`).
|
|
1825
|
+
For providers using ``StacSearch`` default values are defined for most of the parameters
|
|
1826
|
+
(see ``stac_provider.yml``). If some parameters are different for a specific provider, they
|
|
1827
|
+
have to be overwritten. If certain functionalities are not available, their configuration
|
|
1828
|
+
parameters have to be overwritten with ``null``. E.g. if there is no queryables endpoint,
|
|
1829
|
+
the :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` and
|
|
1830
|
+
:attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` in the
|
|
1831
|
+
:attr:`~eodag.config.PluginConfig.discover_queryables` config have to be set to ``null``.
|
|
1832
|
+
"""
|
|
1595
1833
|
|
|
1596
1834
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
1597
1835
|
# backup results_entry overwritten by init
|
|
@@ -1637,12 +1875,35 @@ class StacSearch(PostJsonSearch):
|
|
|
1637
1875
|
arguments)
|
|
1638
1876
|
:returns: fetched queryable parameters dict
|
|
1639
1877
|
"""
|
|
1878
|
+
if (
|
|
1879
|
+
not self.config.discover_queryables["fetch_url"]
|
|
1880
|
+
and not self.config.discover_queryables["product_type_fetch_url"]
|
|
1881
|
+
):
|
|
1882
|
+
logger.info(f"Cannot fetch queryables with {self.provider}")
|
|
1883
|
+
return None
|
|
1884
|
+
|
|
1640
1885
|
product_type = kwargs.get("productType", None)
|
|
1641
1886
|
provider_product_type = (
|
|
1642
1887
|
self.config.products.get(product_type, {}).get("productType", product_type)
|
|
1643
1888
|
if product_type
|
|
1644
1889
|
else None
|
|
1645
1890
|
)
|
|
1891
|
+
if (
|
|
1892
|
+
provider_product_type
|
|
1893
|
+
and not self.config.discover_queryables["product_type_fetch_url"]
|
|
1894
|
+
):
|
|
1895
|
+
logger.info(
|
|
1896
|
+
f"Cannot fetch queryables for a specific product type with {self.provider}"
|
|
1897
|
+
)
|
|
1898
|
+
return None
|
|
1899
|
+
if (
|
|
1900
|
+
not provider_product_type
|
|
1901
|
+
and not self.config.discover_queryables["fetch_url"]
|
|
1902
|
+
):
|
|
1903
|
+
logger.info(
|
|
1904
|
+
f"Cannot fetch global queryables with {self.provider}. A product type must be specified"
|
|
1905
|
+
)
|
|
1906
|
+
return None
|
|
1646
1907
|
|
|
1647
1908
|
try:
|
|
1648
1909
|
unparsed_fetch_url = (
|
|
@@ -1650,14 +1911,22 @@ class StacSearch(PostJsonSearch):
|
|
|
1650
1911
|
if provider_product_type
|
|
1651
1912
|
else self.config.discover_queryables["fetch_url"]
|
|
1652
1913
|
)
|
|
1914
|
+
if unparsed_fetch_url is None:
|
|
1915
|
+
return None
|
|
1653
1916
|
|
|
1654
1917
|
fetch_url = unparsed_fetch_url.format(
|
|
1655
1918
|
provider_product_type=provider_product_type, **self.config.__dict__
|
|
1656
1919
|
)
|
|
1920
|
+
auth = (
|
|
1921
|
+
self.auth
|
|
1922
|
+
if hasattr(self, "auth") and isinstance(self.auth, AuthBase)
|
|
1923
|
+
else None
|
|
1924
|
+
)
|
|
1657
1925
|
response = QueryStringSearch._request(
|
|
1658
1926
|
self,
|
|
1659
1927
|
PreparedSearch(
|
|
1660
1928
|
url=fetch_url,
|
|
1929
|
+
auth=auth,
|
|
1661
1930
|
info_message="Fetching queryables: {}".format(fetch_url),
|
|
1662
1931
|
exception_message="Skipping error while fetching queryables for "
|
|
1663
1932
|
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
@@ -1671,11 +1940,15 @@ class StacSearch(PostJsonSearch):
|
|
|
1671
1940
|
resp_as_json = response.json()
|
|
1672
1941
|
|
|
1673
1942
|
# extract results from response json
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1943
|
+
results_entry = self.config.discover_queryables["results_entry"]
|
|
1944
|
+
if not isinstance(results_entry, JSONPath):
|
|
1945
|
+
logger.warning(
|
|
1946
|
+
f"Could not parse {self.provider} discover_queryables.results_entry"
|
|
1947
|
+
f" as JSONPath: {results_entry}"
|
|
1678
1948
|
)
|
|
1949
|
+
return None
|
|
1950
|
+
json_queryables = [
|
|
1951
|
+
match.value for match in results_entry.find(resp_as_json)
|
|
1679
1952
|
][0]
|
|
1680
1953
|
|
|
1681
1954
|
except KeyError as e:
|
|
@@ -1707,5 +1980,24 @@ class StacSearch(PostJsonSearch):
|
|
|
1707
1980
|
field_definitions[param] = get_args(annotated_def)
|
|
1708
1981
|
|
|
1709
1982
|
python_queryables = create_model("m", **field_definitions).model_fields
|
|
1983
|
+
# replace geometry by geom
|
|
1984
|
+
geom_queryable = python_queryables.pop("geometry", None)
|
|
1985
|
+
if geom_queryable:
|
|
1986
|
+
python_queryables["geom"] = geom_queryable
|
|
1710
1987
|
|
|
1711
1988
|
return model_fields_to_annotated(python_queryables)
|
|
1989
|
+
|
|
1990
|
+
|
|
1991
|
+
class PostJsonSearchWithStacQueryables(StacSearch, PostJsonSearch):
|
|
1992
|
+
"""A specialisation of a :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses
|
|
1993
|
+
generic STAC configuration for queryables (inherited from :class:`~eodag.plugins.search.qssearch.StacSearch`).
|
|
1994
|
+
"""
|
|
1995
|
+
|
|
1996
|
+
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
1997
|
+
PostJsonSearch.__init__(self, provider, config)
|
|
1998
|
+
|
|
1999
|
+
def build_query_string(
|
|
2000
|
+
self, product_type: str, **kwargs: Any
|
|
2001
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
2002
|
+
"""Build The query string using the search parameters"""
|
|
2003
|
+
return PostJsonSearch.build_query_string(self, product_type, **kwargs)
|