eodag 3.0.0b2__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. eodag/__init__.py +6 -8
  2. eodag/api/core.py +295 -287
  3. eodag/api/product/__init__.py +10 -4
  4. eodag/api/product/_assets.py +2 -14
  5. eodag/api/product/_product.py +16 -30
  6. eodag/api/product/drivers/__init__.py +7 -2
  7. eodag/api/product/drivers/base.py +0 -3
  8. eodag/api/product/metadata_mapping.py +12 -31
  9. eodag/api/search_result.py +33 -12
  10. eodag/cli.py +35 -19
  11. eodag/config.py +455 -155
  12. eodag/plugins/apis/base.py +13 -7
  13. eodag/plugins/apis/ecmwf.py +16 -7
  14. eodag/plugins/apis/usgs.py +68 -16
  15. eodag/plugins/authentication/aws_auth.py +25 -7
  16. eodag/plugins/authentication/base.py +10 -1
  17. eodag/plugins/authentication/generic.py +14 -3
  18. eodag/plugins/authentication/header.py +12 -4
  19. eodag/plugins/authentication/keycloak.py +41 -22
  20. eodag/plugins/authentication/oauth.py +11 -1
  21. eodag/plugins/authentication/openid_connect.py +183 -167
  22. eodag/plugins/authentication/qsauth.py +12 -4
  23. eodag/plugins/authentication/sas_auth.py +19 -2
  24. eodag/plugins/authentication/token.py +59 -11
  25. eodag/plugins/authentication/token_exchange.py +19 -19
  26. eodag/plugins/crunch/base.py +7 -2
  27. eodag/plugins/crunch/filter_date.py +8 -11
  28. eodag/plugins/crunch/filter_latest_intersect.py +5 -7
  29. eodag/plugins/crunch/filter_latest_tpl_name.py +2 -5
  30. eodag/plugins/crunch/filter_overlap.py +9 -15
  31. eodag/plugins/crunch/filter_property.py +9 -14
  32. eodag/plugins/download/aws.py +84 -99
  33. eodag/plugins/download/base.py +36 -77
  34. eodag/plugins/download/creodias_s3.py +11 -2
  35. eodag/plugins/download/http.py +134 -109
  36. eodag/plugins/download/s3rest.py +37 -43
  37. eodag/plugins/manager.py +173 -41
  38. eodag/plugins/search/__init__.py +9 -9
  39. eodag/plugins/search/base.py +35 -35
  40. eodag/plugins/search/build_search_result.py +55 -64
  41. eodag/plugins/search/cop_marine.py +113 -32
  42. eodag/plugins/search/creodias_s3.py +20 -8
  43. eodag/plugins/search/csw.py +41 -1
  44. eodag/plugins/search/data_request_search.py +119 -14
  45. eodag/plugins/search/qssearch.py +619 -197
  46. eodag/plugins/search/static_stac_search.py +25 -23
  47. eodag/resources/ext_product_types.json +1 -1
  48. eodag/resources/product_types.yml +211 -56
  49. eodag/resources/providers.yml +1762 -1809
  50. eodag/resources/stac.yml +3 -163
  51. eodag/resources/user_conf_template.yml +134 -119
  52. eodag/rest/config.py +1 -2
  53. eodag/rest/constants.py +0 -1
  54. eodag/rest/core.py +70 -92
  55. eodag/rest/errors.py +181 -0
  56. eodag/rest/server.py +24 -330
  57. eodag/rest/stac.py +105 -630
  58. eodag/rest/types/eodag_search.py +17 -15
  59. eodag/rest/types/queryables.py +5 -14
  60. eodag/rest/types/stac_search.py +18 -13
  61. eodag/rest/utils/rfc3339.py +0 -1
  62. eodag/types/__init__.py +24 -6
  63. eodag/types/download_args.py +14 -5
  64. eodag/types/queryables.py +1 -2
  65. eodag/types/search_args.py +10 -11
  66. eodag/types/whoosh.py +0 -2
  67. eodag/utils/__init__.py +97 -136
  68. eodag/utils/constraints.py +0 -8
  69. eodag/utils/exceptions.py +23 -9
  70. eodag/utils/import_system.py +0 -4
  71. eodag/utils/logging.py +37 -80
  72. eodag/utils/notebook.py +4 -4
  73. eodag/utils/requests.py +13 -23
  74. eodag/utils/rest.py +0 -4
  75. eodag/utils/stac_reader.py +3 -15
  76. {eodag-3.0.0b2.dist-info → eodag-3.0.1.dist-info}/METADATA +41 -24
  77. eodag-3.0.1.dist-info/RECORD +109 -0
  78. {eodag-3.0.0b2.dist-info → eodag-3.0.1.dist-info}/WHEEL +1 -1
  79. {eodag-3.0.0b2.dist-info → eodag-3.0.1.dist-info}/entry_points.txt +1 -0
  80. eodag/resources/constraints/climate-dt.json +0 -13
  81. eodag/resources/constraints/extremes-dt.json +0 -8
  82. eodag-3.0.0b2.dist-info/RECORD +0 -110
  83. {eodag-3.0.0b2.dist-info → eodag-3.0.1.dist-info}/LICENSE +0 -0
  84. {eodag-3.0.0b2.dist-info → eodag-3.0.1.dist-info}/top_level.txt +0 -0
@@ -19,19 +19,22 @@ from __future__ import annotations
19
19
 
20
20
  import logging
21
21
  import re
22
- from collections.abc import Iterable
23
22
  from copy import copy as copy_copy
23
+ from datetime import datetime
24
24
  from typing import (
25
25
  TYPE_CHECKING,
26
+ Annotated,
26
27
  Any,
27
28
  Callable,
28
29
  Dict,
29
30
  List,
30
31
  Optional,
32
+ Sequence,
31
33
  Set,
32
34
  Tuple,
33
35
  TypedDict,
34
36
  cast,
37
+ get_args,
35
38
  )
36
39
  from urllib.error import URLError
37
40
  from urllib.parse import (
@@ -44,16 +47,20 @@ from urllib.parse import (
44
47
  )
45
48
  from urllib.request import Request, urlopen
46
49
 
50
+ import concurrent.futures
47
51
  import geojson
48
52
  import orjson
49
53
  import requests
50
54
  import yaml
55
+ from dateutil.utils import today
56
+ from jsonpath_ng import JSONPath
51
57
  from lxml import etree
52
58
  from pydantic import create_model
53
59
  from pydantic.fields import FieldInfo
54
60
  from requests import Response
55
61
  from requests.adapters import HTTPAdapter
56
62
  from requests.auth import AuthBase
63
+ from urllib3 import Retry
57
64
 
58
65
  from eodag.api.product import EOProduct
59
66
  from eodag.api.product.metadata_mapping import (
@@ -73,13 +80,14 @@ from eodag.types.search_args import SortByList
73
80
  from eodag.utils import (
74
81
  GENERIC_PRODUCT_TYPE,
75
82
  HTTP_REQ_TIMEOUT,
83
+ REQ_RETRY_BACKOFF_FACTOR,
84
+ REQ_RETRY_STATUS_FORCELIST,
85
+ REQ_RETRY_TOTAL,
76
86
  USER_AGENT,
77
- Annotated,
78
87
  _deprecated,
79
88
  deepcopy,
80
89
  dict_items_recursive_apply,
81
90
  format_dict_items,
82
- get_args,
83
91
  get_ssl_context,
84
92
  quote,
85
93
  string_to_jsonpath,
@@ -93,6 +101,7 @@ from eodag.utils.constraints import (
93
101
  from eodag.utils.exceptions import (
94
102
  AuthenticationError,
95
103
  MisconfiguredError,
104
+ PluginImplementationError,
96
105
  RequestError,
97
106
  TimeOutError,
98
107
  ValidationError,
@@ -106,103 +115,186 @@ logger = logging.getLogger("eodag.search.qssearch")
106
115
 
107
116
  class QueryStringSearch(Search):
108
117
  """A plugin that helps implementing any kind of search protocol that relies on
109
- query strings (e.g: opensearch).
110
-
111
- The available configuration parameters for this kind of plugin are:
112
-
113
- - **result_type**: (optional) One of "json" or "xml", depending on the
114
- representation of the provider's search results. The default is "json"
115
-
116
- - **results_entry**: (mandatory) The name of the key in the provider search
117
- result that gives access to the result entries
118
-
119
- - **api_endpoint**: (mandatory) The endpoint of the provider's search interface
120
-
121
- - **literal_search_params**: (optional) A mapping of (search_param =>
122
- search_value) pairs giving search parameters to be passed as is in the search
123
- url query string. This is useful for example in situations where the user wants
124
- to pass-in a search query as it is done on the provider interface. In such a case,
125
- the user can put in his configuration file the query he needs to pass to the provider.
126
-
127
- - **pagination**: (mandatory) The configuration of how the pagination is done
128
- on the provider. It is a tree with the following nodes:
129
-
130
- - *next_page_url_tpl*: The template for pagination requests. This is a simple
131
- Python format string which will be resolved using the following keywords:
132
- ``url`` (the base url of the search endpoint), ``search`` (the query string
133
- corresponding to the search request), ``items_per_page`` (the number of
134
- items to return per page), ``skip`` (the number of items to skip) or
135
- ``skip_base_1`` (the number of items to skip, starting from 1) and
136
- ``page`` (which page to return).
137
-
138
- - *total_items_nb_key_path*: (optional) An XPath or JsonPath leading to the
139
- total number of results satisfying a request. This is used for providers
140
- which provides the total results metadata along with the result of the
141
- query and don't have an endpoint for querying the number of items
142
- satisfying a request, or for providers for which the count endpoint returns
143
- a json or xml document
144
-
145
- - *count_endpoint*: (optional) The endpoint for counting the number of items
146
- satisfying a request
147
-
148
- - *next_page_url_key_path*: (optional) A JSONPATH expression used to retrieve
149
- the URL of the next page in the response of the current page.
150
-
151
- - **free_text_search_operations**: (optional) A tree structure of the form::
152
-
153
- # noqa: E800
154
- <search-param>: # e.g: $search
155
- union: # how to join the operations below (e.g: ' AND ' -->
156
- # '(op1 AND op2) AND (op3 OR op4)')
157
- wrapper: # a pattern for how each operation will be wrapped
158
- # (e.g: '({})' --> '(op1 AND op2)')
159
- operations: # The operations to build
160
- <opname>: # e.g: AND
161
- - <op1> # e.g:
162
- # 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
163
- - <op2> # e.g:
164
- # 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
165
- ...
166
- ...
167
- ...
168
-
169
- With the structure above, each operation will become a string of the form:
170
- '(<op1> <opname> <op2>)', then the operations will be joined together using
171
- the union string and finally if the number of operations is greater than 1,
172
- they will be wrapped as specified by the wrapper config key.
173
-
174
- The search plugins of this kind can detect when a metadata mapping is "query-able",
175
- and get the semantics of how to format the query string parameter that enables to
176
- make a query on the corresponding metadata. To make a metadata query-able, just
177
- configure it in the metadata mapping to be a list of 2 items, the first one being
178
- the specification of the query string search formatting. The later is a string
179
- following the specification of Python string formatting, with a special behaviour
180
- added to it. For example, an entry in the metadata mapping of this kind::
181
-
182
- completionTimeFromAscendingNode:
183
- - 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
184
- - '$.properties.acquisition.endViewingDate'
185
-
186
- means that the search url will have a query string parameter named *"f"* with a
187
- value of *"acquisition.endViewingDate:lte:1543922280.0"* if the search was done
188
- with the value of ``completionTimeFromAscendingNode`` being
189
- ``2018-12-04T12:18:00``. What happened is that
190
- ``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp
191
- of the value of ``completionTimeFromAscendingNode``. This example shows all there
192
- is to know about the semantics of the query string formatting introduced by this
193
- plugin: any eodag search parameter can be referenced in the query string
194
- with an additional optional conversion function that is separated from it by a
195
- ``#`` (see :func:`~eodag.utils.format_metadata` for further details on the
196
- available converters). Note that for the values in the
197
- ``free_text_search_operations`` configuration parameter follow the same rule.
198
-
199
- :param provider: An eodag providers configuration dictionary
200
- :type provider: dict
201
- :param config: Path to the user configuration file
202
- :type config: str
118
+ query strings (e.g: opensearch). Most of the other search plugins inherit from this plugin.
119
+
120
+ :param provider: provider name
121
+ :param config: Search plugin configuration:
122
+
123
+ * :attr:`~eodag.config.PluginConfig.result_type` (``str``): One of ``json`` or ``xml``, depending on the
124
+ representation of the provider's search results. The default is ``json``.
125
+ * :attr:`~eodag.config.PluginConfig.results_entry` (``str``) (**mandatory**): The name of the key in the
126
+ provider search result that gives access to the result entries
127
+ * :attr:`~eodag.config.PluginConfig.api_endpoint` (``str``) (**mandatory**): The endpoint of the provider's
128
+ search interface
129
+ * :attr:`~eodag.config.PluginConfig.need_auth` (``bool``): if authentication is needed for the search request;
130
+ default: ``False``
131
+ * :attr:`~eodag.config.PluginConfig.auth_error_code` (``int``): which error code is returned in case of an
132
+ authentication error; only used if ``need_auth=true``
133
+ * :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be verified in
134
+ requests; default: ``True``
135
+ * :attr:`~eodag.config.PluginConfig.dont_quote` (``List[str]``): characters that should not be quoted in the
136
+ url params
137
+ * :attr:`~eodag.config.PluginConfig.timeout` (``int``): time to wait until request timeout in seconds;
138
+ default: ``5``
139
+ * :attr:`~eodag.config.PluginConfig.retry_total` (``int``): :class:`urllib3.util.Retry` ``total`` parameter,
140
+ total number of retries to allow; default: ``3``
141
+ * :attr:`~eodag.config.PluginConfig.retry_backoff_factor` (``int``): :class:`urllib3.util.Retry`
142
+ ``backoff_factor`` parameter, backoff factor to apply between attempts after the second try; default: ``2``
143
+ * :attr:`~eodag.config.PluginConfig.retry_status_forcelist` (``List[int]``): :class:`urllib3.util.Retry`
144
+ ``status_forcelist`` parameter, list of integer HTTP status codes that we should force a retry on; default:
145
+ ``[401, 429, 500, 502, 503, 504]``
146
+ * :attr:`~eodag.config.PluginConfig.literal_search_params` (``Dict[str, str]``): A mapping of (search_param =>
147
+ search_value) pairs giving search parameters to be passed as is in the search url query string. This is useful
148
+ for example in situations where the user wants to add a fixed search query parameter exactly
149
+ as it is done on the provider interface.
150
+ * :attr:`~eodag.config.PluginConfig.pagination` (:class:`~eodag.config.PluginConfig.Pagination`)
151
+ (**mandatory**): The configuration of how the pagination is done on the provider. It is a tree with the
152
+ following nodes:
153
+
154
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_tpl` (``str``) (**mandatory**): The template for
155
+ pagination requests. This is a simple Python format string which will be resolved using the following
156
+ keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
157
+ to the search request), ``items_per_page`` (the number of items to return per page),
158
+ ``skip`` (the number of items to skip) or ``skip_base_1`` (the number of items to skip,
159
+ starting from 1) and ``page`` (which page to return).
160
+ * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
161
+ leading to the total number of results satisfying a request. This is used for providers which provides the
162
+ total results metadata along with the result of the query and don't have an endpoint for querying
163
+ the number of items satisfying a request, or for providers for which the count endpoint
164
+ returns a json or xml document
165
+ * :attr:`~eodag.config.PluginConfig.Pagination.count_endpoint` (``str``): The endpoint for counting the number
166
+ of items satisfying a request
167
+ * :attr:`~eodag.config.PluginConfig.Pagination.count_tpl` (``str``): template for the count parameter that
168
+ should be added to the search request
169
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_key_path` (``str``): A JsonPath expression used
170
+ to retrieve the URL of the next page in the response of the current page.
171
+ * :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items per
172
+ page that the provider can handle; default: ``50``
173
+ * :attr:`~eodag.config.PluginConfig.Pagination.start_page` (``int``): number of the first page; default: ``1``
174
+
175
+ * :attr:`~eodag.config.PluginConfig.discover_product_types`
176
+ (:class:`~eodag.config.PluginConfig.DiscoverProductTypes`): configuration for product type discovery based on
177
+ information from the provider; It contains the keys:
178
+
179
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` (``str``) (**mandatory**): url from which
180
+ the product types can be fetched
181
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.max_connections` (``int``): Maximum number of
182
+ connections for concurrent HTTP requests
183
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.result_type` (``str``): type of the provider result;
184
+ currently only ``json`` is supported (other types could be used in an extension of this plugin)
185
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.results_entry` (``str``) (**mandatory**): json path
186
+ to the list of product types
187
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_id` (``str``): mapping for the
188
+ product type id
189
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_metadata`
190
+ (``Dict[str, str]``): mapping for product type metadata (e.g. ``abstract``, ``licence``) which can be parsed
191
+ from the provider result
192
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_properties`
193
+ (``Dict[str, str]``): mapping for product type properties which can be parsed from the result that are not
194
+ product type metadata
195
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url` (``str``): url to fetch
196
+ data for a single collection; used if product type metadata is not available from the endpoint given in
197
+ :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url`
198
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_qs` (``str``): query string
199
+ to be added to the :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` to filter for a
200
+ collection
201
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_product_type_parsable_metadata`
202
+ (``Dict[str, str]``): mapping for product type metadata returned by the endpoint given in
203
+ :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url`.
204
+
205
+ * :attr:`~eodag.config.PluginConfig.sort` (:class:`~eodag.config.PluginConfig.Sort`): configuration for sorting
206
+ the results. It contains the keys:
207
+
208
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_by_default` (``List[Tuple(str, Literal["ASC", "DESC"])]``):
209
+ parameter and sort order by which the result will be sorted by default (if the user does not enter a
210
+ ``sort_by`` parameter); if not given the result will use the default sorting of the provider; Attention:
211
+ for some providers sorting might cause a timeout if no filters are used. In that case no default
212
+ sort parameters should be given. The format is::
213
+
214
+ sort_by_default:
215
+ - !!python/tuple [<param>, <sort order> (ASC or DESC)]
216
+
217
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_by_tpl` (``str``): template for the sort parameter that is added
218
+ to the request; It contains the parameters `sort_param` and `sort_order` which will be replaced by user
219
+ input or default value. If the parameters are added as query params to a GET request, the string
220
+ should start with ``&``, otherwise it should be a valid json string surrounded by ``{{ }}``.
221
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_param_mapping` (``Dict [str, str]``): mapping for the parameters
222
+ available for sorting
223
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_order_mapping`
224
+ (``Dict[Literal["ascending", "descending"], str]``): mapping for the sort order
225
+ * :attr:`~eodag.config.PluginConfig.Sort.max_sort_params` (``int``): maximum number of sort parameters
226
+ supported by the provider; used to validate the user input to avoid failed requests or unexpected behaviour
227
+ (not all parameters are used in the request)
228
+
229
+ * :attr:`~eodag.config.PluginConfig.metadata_mapping` (``Dict[str, Any]``): The search plugins of this kind can
230
+ detect when a metadata mapping is "query-able", and get the semantics of how to format the query string
231
+ parameter that enables to make a query on the corresponding metadata. To make a metadata query-able,
232
+ just configure it in the metadata mapping to be a list of 2 items, the first one being the
233
+ specification of the query string search formatting. The later is a string following the
234
+ specification of Python string formatting, with a special behaviour added to it. For example,
235
+ an entry in the metadata mapping of this kind::
236
+
237
+ completionTimeFromAscendingNode:
238
+ - 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
239
+ - '$.properties.acquisition.endViewingDate'
240
+
241
+ means that the search url will have a query string parameter named ``f`` with a value of
242
+ ``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
243
+ of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
244
+ ``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
245
+ of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
246
+ semantics of the query string formatting introduced by this plugin: any eodag search parameter
247
+ can be referenced in the query string with an additional optional conversion function that
248
+ is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
249
+ details on the available converters). Note that for the values in the
250
+ :attr:`~eodag.config.PluginConfig.free_text_search_operations` configuration parameter follow the same rule.
251
+ If the metadata_mapping is not a list but only a string, this means that the parameters is not queryable but
252
+ it is included in the result obtained from the provider. The string indicates how the provider result should
253
+ be mapped to the eodag parameter.
254
+ * :attr:`~eodag.config.PluginConfig.discover_metadata` (:class:`~eodag.config.PluginConfig.DiscoverMetadata`):
255
+ configuration for the auto-discovery of queryable parameters as well as parameters returned by the provider
256
+ which are not in the metadata mapping. It has the attributes:
257
+
258
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.auto_discovery` (``bool``): if the automatic discovery of
259
+ metadata is activated; default: ``False``; if false, the other parameters are not used;
260
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_pattern` (``str``): regex string a parameter in
261
+ the result should match so that is used
262
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.search_param` (``Union [str, Dict[str, Any]]``): format
263
+ to add a query param given by the user and not in the metadata mapping to the requests, 'metadata' will be
264
+ replaced by the search param; can be a string or a dict containing
265
+ :attr:`~eodag.config.PluginConfig.free_text_search_operations`
266
+ (see :class:`~eodag.plugins.search.qssearch.ODataV4Search`)
267
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_path` (``str``): path where the queryable
268
+ properties can be found in the provider result
269
+
270
+ * :attr:`~eodag.config.PluginConfig.discover_queryables`
271
+ (:class:`~eodag.config.PluginConfig.DiscoverQueryables`): configuration to fetch the queryables from a
272
+ provider queryables endpoint; It has the following keys:
273
+
274
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` (``str``): url to fetch the queryables valid
275
+ for all product types
276
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` (``str``): url to fetch the
277
+ queryables for a specific product type
278
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.result_type` (``str``): type of the result (currently
279
+ only ``json`` is used)
280
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.results_entry` (``str``): json path to retrieve the
281
+ queryables from the provider result
282
+
283
+ * :attr:`~eodag.config.PluginConfig.constraints_file_url` (``str``): url to fetch the constraints for a specific
284
+ product type, can be an http url or a path to a file; the constraints are used to build queryables
285
+ * :attr:`~eodag.config.PluginConfig.constraints_file_dataset_key` (``str``): key which is used in the eodag
286
+ configuration to map the eodag product type to the provider product type; default: ``dataset``
287
+ * :attr:`~eodag.config.PluginConfig.constraints_entry` (``str``): key in the json result where the constraints
288
+ can be found; if not given, it is assumed that the constraints are on top level of the result, i.e.
289
+ the result is an array of constraints
290
+ * :attr:`~eodag.config.PluginConfig.stop_without_constraints_entry_key` (``bool``): if true only a provider
291
+ result containing `constraints_entry` is accepted as valid and used to create constraints; default: ``False``
203
292
  """
204
293
 
205
- extract_properties = {"xml": properties_from_xml, "json": properties_from_json}
294
+ extract_properties: Dict[str, Callable[..., Dict[str, Any]]] = {
295
+ "xml": properties_from_xml,
296
+ "json": properties_from_json,
297
+ }
206
298
 
207
299
  def __init__(self, provider: str, config: PluginConfig) -> None:
208
300
  super(QueryStringSearch, self).__init__(provider, config)
@@ -360,17 +452,69 @@ class QueryStringSearch(Search):
360
452
  """Fetch product types list from provider using `discover_product_types` conf
361
453
 
362
454
  :returns: configuration dict containing fetched product types information
363
- :rtype: (optional) dict
455
+ """
456
+ unpaginated_fetch_url = self.config.discover_product_types.get("fetch_url")
457
+ if not unpaginated_fetch_url:
458
+ return None
459
+
460
+ # product types pagination
461
+ next_page_url_tpl = self.config.discover_product_types.get("next_page_url_tpl")
462
+ page = self.config.discover_product_types.get("start_page", 1)
463
+
464
+ if not next_page_url_tpl:
465
+ # no pagination
466
+ return self.discover_product_types_per_page(**kwargs)
467
+
468
+ conf_update_dict: Dict[str, Any] = {
469
+ "providers_config": {},
470
+ "product_types_config": {},
471
+ }
472
+
473
+ while True:
474
+ fetch_url = next_page_url_tpl.format(url=unpaginated_fetch_url, page=page)
475
+
476
+ conf_update_dict_per_page = self.discover_product_types_per_page(
477
+ fetch_url=fetch_url, **kwargs
478
+ )
479
+
480
+ if (
481
+ not conf_update_dict_per_page
482
+ or not conf_update_dict_per_page.get("providers_config")
483
+ or conf_update_dict_per_page.items() <= conf_update_dict.items()
484
+ ):
485
+ # conf_update_dict_per_page is empty or a subset on existing conf
486
+ break
487
+ else:
488
+ conf_update_dict["providers_config"].update(
489
+ conf_update_dict_per_page["providers_config"]
490
+ )
491
+ conf_update_dict["product_types_config"].update(
492
+ conf_update_dict_per_page["product_types_config"]
493
+ )
494
+
495
+ page += 1
496
+
497
+ return conf_update_dict
498
+
499
+ def discover_product_types_per_page(
500
+ self, **kwargs: Any
501
+ ) -> Optional[Dict[str, Any]]:
502
+ """Fetch product types list from provider using `discover_product_types` conf
503
+ using paginated ``kwargs["fetch_url"]``
504
+
505
+ :returns: configuration dict containing fetched product types information
364
506
  """
365
507
  try:
366
508
  prep = PreparedSearch()
367
509
 
368
- prep.url = cast(
369
- str,
370
- self.config.discover_product_types["fetch_url"].format(
371
- **self.config.__dict__
372
- ),
373
- )
510
+ # url from discover_product_types() or conf
511
+ fetch_url: Optional[str] = kwargs.get("fetch_url")
512
+ if fetch_url is None:
513
+ if fetch_url := self.config.discover_product_types.get("fetch_url"):
514
+ fetch_url = fetch_url.format(**self.config.__dict__)
515
+ else:
516
+ return None
517
+ prep.url = fetch_url
374
518
 
375
519
  # get auth if available
376
520
  if "auth" in kwargs:
@@ -400,7 +544,14 @@ class QueryStringSearch(Search):
400
544
  "Skipping error while fetching product types for " "{} {} instance:"
401
545
  ).format(self.provider, self.__class__.__name__)
402
546
 
403
- response = QueryStringSearch._request(self, prep)
547
+ # Query using appropriate method
548
+ fetch_method = self.config.discover_product_types.get("fetch_method", "GET")
549
+ fetch_body = self.config.discover_product_types.get("fetch_body", {})
550
+ if fetch_method == "POST" and isinstance(self, PostJsonSearch):
551
+ prep.query_params = fetch_body
552
+ response = self._request(prep)
553
+ else:
554
+ response = QueryStringSearch._request(self, prep)
404
555
  except (RequestError, KeyError, AttributeError):
405
556
  return None
406
557
  else:
@@ -412,16 +563,21 @@ class QueryStringSearch(Search):
412
563
  if self.config.discover_product_types["result_type"] == "json":
413
564
  resp_as_json = response.json()
414
565
  # extract results from response json
415
- result = [
416
- match.value
417
- for match in self.config.discover_product_types[
418
- "results_entry"
419
- ].find(resp_as_json)
420
- ]
566
+ results_entry = self.config.discover_product_types["results_entry"]
567
+ if not isinstance(results_entry, JSONPath):
568
+ logger.warning(
569
+ f"Could not parse {self.provider} discover_product_types.results_entry"
570
+ f" as JSONPath: {results_entry}"
571
+ )
572
+ return None
573
+ result = [match.value for match in results_entry.find(resp_as_json)]
421
574
  if result and isinstance(result[0], list):
422
575
  result = result[0]
423
576
 
424
- for product_type_result in result:
577
+ def conf_update_from_product_type_result(
578
+ product_type_result: Dict[str, Any]
579
+ ) -> None:
580
+ """Update ``conf_update_dict`` using given product type json response"""
425
581
  # providers_config extraction
426
582
  extracted_mapping = properties_from_json(
427
583
  product_type_result,
@@ -508,6 +664,20 @@ class QueryStringSearch(Search):
508
664
  conf_update_dict["product_types_config"][
509
665
  generic_product_type_id
510
666
  ]["keywords"] = keywords_values_str
667
+
668
+ # runs concurrent requests and aggregate results in conf_update_dict
669
+ max_connections = self.config.discover_product_types.get(
670
+ "max_connections"
671
+ )
672
+ with concurrent.futures.ThreadPoolExecutor(
673
+ max_workers=max_connections
674
+ ) as executor:
675
+ futures = (
676
+ executor.submit(conf_update_from_product_type_result, r)
677
+ for r in result
678
+ )
679
+ [f.result() for f in concurrent.futures.as_completed(futures)]
680
+
511
681
  except KeyError as e:
512
682
  logger.warning(
513
683
  "Incomplete %s discover_product_types configuration: %s",
@@ -515,6 +685,12 @@ class QueryStringSearch(Search):
515
685
  e,
516
686
  )
517
687
  return None
688
+ except requests.RequestException as e:
689
+ logger.debug(
690
+ "Could not parse discovered product types response from "
691
+ f"{self.provider}, {type(e).__name__}: {e.args}"
692
+ )
693
+ return None
518
694
  conf_update_dict["product_types_config"] = dict_items_recursive_apply(
519
695
  conf_update_dict["product_types_config"],
520
696
  lambda k, v: v if v != NOT_AVAILABLE else None,
@@ -527,9 +703,7 @@ class QueryStringSearch(Search):
527
703
  """
528
704
  retrieves additional product type information from an endpoint returning data for a single collection
529
705
  :param product_type: product type
530
- :type product_type: str
531
706
  :return: product types and their metadata
532
- :rtype: Dict[str, Any]
533
707
  """
534
708
  single_collection_url = self.config.discover_product_types[
535
709
  "single_collection_fetch_url"
@@ -538,9 +712,7 @@ class QueryStringSearch(Search):
538
712
  self,
539
713
  PreparedSearch(
540
714
  url=single_collection_url,
541
- info_message="Fetching data for product type product type: {}".format(
542
- product_type
543
- ),
715
+ info_message=f"Fetching data for product type: {product_type}",
544
716
  exception_message="Skipping error while fetching product types for "
545
717
  "{} {} instance:".format(self.provider, self.__class__.__name__),
546
718
  ),
@@ -558,9 +730,7 @@ class QueryStringSearch(Search):
558
730
 
559
731
  :param kwargs: additional filters for queryables (`productType` and other search
560
732
  arguments)
561
- :type kwargs: Any
562
733
  :returns: fetched queryable parameters dict
563
- :rtype: Optional[Dict[str, Annotated[Any, FieldInfo]]]
564
734
  """
565
735
  product_type = kwargs.pop("productType", None)
566
736
  if not product_type:
@@ -630,7 +800,7 @@ class QueryStringSearch(Search):
630
800
  )
631
801
  )
632
802
 
633
- field_definitions = dict()
803
+ field_definitions: Dict[str, Any] = dict()
634
804
  for json_param, json_mtd in constraint_params.items():
635
805
  param = (
636
806
  get_queryable_from_provider(
@@ -657,7 +827,6 @@ class QueryStringSearch(Search):
657
827
  """Perform a search on an OpenSearch-like interface
658
828
 
659
829
  :param prep: Object collecting needed information for search.
660
- :type prep: :class:`~eodag.plugins.search.PreparedSearch`
661
830
  """
662
831
  count = prep.count
663
832
  product_type = kwargs.get("productType", prep.product_type)
@@ -706,9 +875,6 @@ class QueryStringSearch(Search):
706
875
  }
707
876
  )
708
877
 
709
- if product_type is None:
710
- raise ValidationError("Required productType is missing")
711
-
712
878
  qp, qs = self.build_query_string(product_type, **keywords)
713
879
 
714
880
  prep.query_params = qp
@@ -751,7 +917,9 @@ class QueryStringSearch(Search):
751
917
 
752
918
  # Build the final query string, in one go without quoting it
753
919
  # (some providers do not operate well with urlencoded and quoted query strings)
754
- quote_via: Callable[[Any, str, str, str], str] = lambda x, *_args, **_kwargs: x
920
+ def quote_via(x: Any, *_args, **_kwargs) -> str:
921
+ return x
922
+
755
923
  return (
756
924
  query_params,
757
925
  urlencode(query_params, doseq=True, quote_via=quote_via),
@@ -783,7 +951,7 @@ class QueryStringSearch(Search):
783
951
  prep.need_count = True
784
952
  prep.total_items_nb = None
785
953
 
786
- for collection in self.get_collections(prep, **kwargs):
954
+ for collection in self.get_collections(prep, **kwargs) or (None,):
787
955
  # skip empty collection if one is required in api_endpoint
788
956
  if "{collection}" in self.config.api_endpoint and not collection:
789
957
  continue
@@ -811,6 +979,10 @@ class QueryStringSearch(Search):
811
979
  0 if total_results is None else total_results
812
980
  )
813
981
  total_results += _total_results or 0
982
+ if "next_page_url_tpl" not in self.config.pagination:
983
+ raise MisconfiguredError(
984
+ f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
985
+ )
814
986
  next_url = self.config.pagination["next_page_url_tpl"].format(
815
987
  url=search_endpoint,
816
988
  search=qs_with_sort,
@@ -822,7 +994,7 @@ class QueryStringSearch(Search):
822
994
  else:
823
995
  next_url = "{}?{}".format(search_endpoint, qs_with_sort)
824
996
  urls.append(next_url)
825
- return urls, total_results
997
+ return list(dict.fromkeys(urls)), total_results
826
998
 
827
999
  def do_search(
828
1000
  self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
@@ -833,7 +1005,6 @@ class QueryStringSearch(Search):
833
1005
  as this number is reached
834
1006
 
835
1007
  :param prep: Object collecting needed information for search.
836
- :type prep: :class:`~eodag.plugins.search.PreparedSearch`
837
1008
  """
838
1009
  items_per_page = prep.items_per_page
839
1010
  total_items_nb = 0
@@ -852,8 +1023,8 @@ class QueryStringSearch(Search):
852
1023
  search_url
853
1024
  )
854
1025
  single_search_prep.exception_message = (
855
- "Skipping error while searching for {} {} "
856
- "instance:".format(self.provider, self.__class__.__name__)
1026
+ f"Skipping error while searching for {self.provider}"
1027
+ f" {self.__class__.__name__} instance"
857
1028
  )
858
1029
  response = self._request(single_search_prep)
859
1030
  next_page_url_key_path = self.config.pagination.get(
@@ -873,7 +1044,7 @@ class QueryStringSearch(Search):
873
1044
  )
874
1045
  result = (
875
1046
  [etree.tostring(element_or_tree=entry) for entry in results_xpath]
876
- if isinstance(results_xpath, Iterable)
1047
+ if isinstance(results_xpath, Sequence)
877
1048
  else []
878
1049
  )
879
1050
 
@@ -893,7 +1064,7 @@ class QueryStringSearch(Search):
893
1064
  )
894
1065
  total_nb_results = (
895
1066
  total_nb_results_xpath
896
- if isinstance(total_nb_results_xpath, Iterable)
1067
+ if isinstance(total_nb_results_xpath, Sequence)
897
1068
  else []
898
1069
  )[0]
899
1070
  _total_items_nb = int(total_nb_results)
@@ -910,55 +1081,60 @@ class QueryStringSearch(Search):
910
1081
  resp_as_json = response.json()
911
1082
  if next_page_url_key_path:
912
1083
  path_parsed = next_page_url_key_path
913
- try:
914
- self.next_page_url = path_parsed.find(resp_as_json)[0].value
1084
+ found_paths = path_parsed.find(resp_as_json)
1085
+ if found_paths and not isinstance(found_paths, int):
1086
+ self.next_page_url = found_paths[0].value
915
1087
  logger.debug(
916
1088
  "Next page URL collected and set for the next search",
917
1089
  )
918
- except IndexError:
1090
+ else:
919
1091
  logger.debug("Next page URL could not be collected")
920
1092
  if next_page_query_obj_key_path:
921
1093
  path_parsed = next_page_query_obj_key_path
922
- try:
923
- self.next_page_query_obj = path_parsed.find(resp_as_json)[
924
- 0
925
- ].value
1094
+ found_paths = path_parsed.find(resp_as_json)
1095
+ if found_paths and not isinstance(found_paths, int):
1096
+ self.next_page_query_obj = found_paths[0].value
926
1097
  logger.debug(
927
1098
  "Next page Query-object collected and set for the next search",
928
1099
  )
929
- except IndexError:
1100
+ else:
930
1101
  logger.debug("Next page Query-object could not be collected")
931
1102
  if next_page_merge_key_path:
932
1103
  path_parsed = next_page_merge_key_path
933
- try:
934
- self.next_page_merge = path_parsed.find(resp_as_json)[0].value
1104
+ found_paths = path_parsed.find(resp_as_json)
1105
+ if found_paths and not isinstance(found_paths, int):
1106
+ self.next_page_merge = found_paths[0].value
935
1107
  logger.debug(
936
1108
  "Next page merge collected and set for the next search",
937
1109
  )
938
- except IndexError:
1110
+ else:
939
1111
  logger.debug("Next page merge could not be collected")
940
1112
 
941
1113
  results_entry = string_to_jsonpath(
942
1114
  self.config.results_entry, force=True
943
1115
  )
944
- try:
945
- result = results_entry.find(resp_as_json)[0].value
946
- except Exception:
1116
+ found_entry_paths = results_entry.find(resp_as_json)
1117
+ if found_entry_paths and not isinstance(found_entry_paths, int):
1118
+ result = found_entry_paths[0].value
1119
+ else:
947
1120
  result = []
948
1121
  if not isinstance(result, list):
949
1122
  result = [result]
950
1123
 
951
1124
  if getattr(prep, "need_count", False):
952
1125
  # extract total_items_nb from search results
953
- try:
954
- _total_items_nb = total_items_nb_key_path_parsed.find(
955
- resp_as_json
956
- )[0].value
1126
+ found_total_items_nb_paths = total_items_nb_key_path_parsed.find(
1127
+ resp_as_json
1128
+ )
1129
+ if found_total_items_nb_paths and not isinstance(
1130
+ found_total_items_nb_paths, int
1131
+ ):
1132
+ _total_items_nb = found_total_items_nb_paths[0].value
957
1133
  if getattr(self.config, "merge_responses", False):
958
1134
  total_items_nb = _total_items_nb or 0
959
1135
  else:
960
1136
  total_items_nb += _total_items_nb or 0
961
- except IndexError:
1137
+ else:
962
1138
  logger.debug(
963
1139
  "Could not extract total_items_nb from search results"
964
1140
  )
@@ -1036,25 +1212,34 @@ class QueryStringSearch(Search):
1036
1212
  count_results = response.json()
1037
1213
  if isinstance(count_results, dict):
1038
1214
  path_parsed = self.config.pagination["total_items_nb_key_path"]
1039
- total_results = path_parsed.find(count_results)[0].value
1215
+ if not isinstance(path_parsed, JSONPath):
1216
+ raise PluginImplementationError(
1217
+ "total_items_nb_key_path must be parsed to JSONPath on plugin init"
1218
+ )
1219
+ found_paths = path_parsed.find(count_results)
1220
+ if found_paths and not isinstance(found_paths, int):
1221
+ total_results = found_paths[0].value
1222
+ else:
1223
+ raise MisconfiguredError(
1224
+ "Could not get results count from response using total_items_nb_key_path"
1225
+ )
1040
1226
  else: # interpret the result as a raw int
1041
1227
  total_results = int(count_results)
1042
1228
  return total_results
1043
1229
 
1044
- def get_collections(
1045
- self, prep: PreparedSearch, **kwargs: Any
1046
- ) -> Tuple[Set[Dict[str, Any]], ...]:
1230
+ def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> Tuple[str, ...]:
1047
1231
  """Get the collection to which the product belongs"""
1048
1232
  # See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
1049
1233
  # /asset_publisher/Ac0d/content/change-of
1050
1234
  # -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
1051
1235
  product_type: Optional[str] = kwargs.get("productType")
1236
+ collection: Optional[str] = None
1052
1237
  if product_type is None and (
1053
1238
  not hasattr(prep, "product_type_def_params")
1054
1239
  or not prep.product_type_def_params
1055
1240
  ):
1056
- collections: Set[Dict[str, Any]] = set()
1057
- collection: Optional[str] = getattr(self.config, "collection", None)
1241
+ collections: Set[str] = set()
1242
+ collection = getattr(self.config, "collection", None)
1058
1243
  if collection is None:
1059
1244
  try:
1060
1245
  for product_type, product_config in self.config.products.items():
@@ -1072,24 +1257,40 @@ class QueryStringSearch(Search):
1072
1257
  collections.add(collection)
1073
1258
  return tuple(collections)
1074
1259
 
1075
- collection: Optional[str] = getattr(self.config, "collection", None)
1260
+ collection = getattr(self.config, "collection", None)
1076
1261
  if collection is None:
1077
1262
  collection = (
1078
1263
  prep.product_type_def_params.get("collection", None) or product_type
1079
1264
  )
1080
- return (collection,) if not isinstance(collection, list) else tuple(collection)
1265
+
1266
+ if collection is None:
1267
+ return ()
1268
+ elif not isinstance(collection, list):
1269
+ return (collection,)
1270
+ else:
1271
+ return tuple(collection)
1081
1272
 
1082
1273
  def _request(
1083
1274
  self,
1084
1275
  prep: PreparedSearch,
1085
1276
  ) -> Response:
1086
1277
  url = prep.url
1278
+ if url is None:
1279
+ raise ValidationError("Cannot request empty URL")
1087
1280
  info_message = prep.info_message
1088
1281
  exception_message = prep.exception_message
1089
1282
  try:
1090
1283
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1091
1284
  ssl_verify = getattr(self.config, "ssl_verify", True)
1092
1285
 
1286
+ retry_total = getattr(self.config, "retry_total", REQ_RETRY_TOTAL)
1287
+ retry_backoff_factor = getattr(
1288
+ self.config, "retry_backoff_factor", REQ_RETRY_BACKOFF_FACTOR
1289
+ )
1290
+ retry_status_forcelist = getattr(
1291
+ self.config, "retry_status_forcelist", REQ_RETRY_STATUS_FORCELIST
1292
+ )
1293
+
1093
1294
  ssl_ctx = get_ssl_context(ssl_verify)
1094
1295
  # auth if needed
1095
1296
  kwargs: Dict[str, Any] = {}
@@ -1128,7 +1329,16 @@ class QueryStringSearch(Search):
1128
1329
  else:
1129
1330
  if info_message:
1130
1331
  logger.info(info_message)
1131
- response = requests.get(
1332
+
1333
+ session = requests.Session()
1334
+ retries = Retry(
1335
+ total=retry_total,
1336
+ backoff_factor=retry_backoff_factor,
1337
+ status_forcelist=retry_status_forcelist,
1338
+ )
1339
+ session.mount(url, HTTPAdapter(max_retries=retries))
1340
+
1341
+ response = session.get(
1132
1342
  url,
1133
1343
  timeout=timeout,
1134
1344
  headers=USER_AGENT,
@@ -1150,13 +1360,54 @@ class QueryStringSearch(Search):
1150
1360
  self.__class__.__name__,
1151
1361
  err_msg,
1152
1362
  )
1153
- raise RequestError(str(err))
1363
+ raise RequestError.from_error(err, exception_message) from err
1154
1364
  return response
1155
1365
 
1156
1366
 
1157
1367
  class ODataV4Search(QueryStringSearch):
1158
- """A specialisation of a QueryStringSearch that does a two step search to retrieve
1159
- all products metadata"""
1368
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that does a two step search to
1369
+ retrieve all products metadata. All configuration parameters of
1370
+ :class:`~eodag.plugins.search.qssearch.QueryStringSearch` are also available for this plugin. In addition, the
1371
+ following parameters can be configured:
1372
+
1373
+ :param provider: provider name
1374
+ :param config: Search plugin configuration:
1375
+
1376
+ * :attr:`~eodag.config.PluginConfig.per_product_metadata_query` (``bool``): should be set to true if the metadata
1377
+ is not given in the search result and a two step search has to be performed; default: false
1378
+ * :attr:`~eodag.config.PluginConfig.metadata_pre_mapping` (:class:`~eodag.config.PluginConfig.MetadataPreMapping`)
1379
+ : a dictionary which can be used to simplify further metadata extraction. For example, going from
1380
+ ``$.Metadata[?(@.id="foo")].value`` to ``$.Metadata.foo.value``. It has the keys:
1381
+
1382
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path` (``str``): json path of the metadata entry
1383
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_id` (``str``): key to get the metadata id
1384
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_value` (``str``): key to get the metadata
1385
+ value
1386
+
1387
+ * :attr:`~eodag.config.PluginConfig.free_text_search_operations`: (optional) A tree structure of the form::
1388
+
1389
+ # noqa: E800
1390
+ <search-param>: # e.g: $search
1391
+ union: # how to join the operations below (e.g: ' AND ' -->
1392
+ # '(op1 AND op2) AND (op3 OR op4)')
1393
+ wrapper: # a pattern for how each operation will be wrapped
1394
+ # (e.g: '({})' --> '(op1 AND op2)')
1395
+ operations: # The operations to build
1396
+ <opname>: # e.g: AND
1397
+ - <op1> # e.g:
1398
+ # 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
1399
+ - <op2> # e.g:
1400
+ # 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
1401
+ ...
1402
+ ...
1403
+ ...
1404
+
1405
+ With the structure above, each operation will become a string of the form:
1406
+ ``(<op1> <opname> <op2>)``, then the operations will be joined together using
1407
+ the union string and finally if the number of operations is greater than 1,
1408
+ they will be wrapped as specified by the wrapper config key.
1409
+
1410
+ """
1160
1411
 
1161
1412
  def __init__(self, provider: str, config: PluginConfig) -> None:
1162
1413
  super(ODataV4Search, self).__init__(provider, config)
@@ -1195,7 +1446,7 @@ class ODataV4Search(QueryStringSearch):
1195
1446
  raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
1196
1447
  except requests.RequestException:
1197
1448
  logger.exception(
1198
- "Skipping error while searching for %s %s instance:",
1449
+ "Skipping error while searching for %s %s instance",
1199
1450
  self.provider,
1200
1451
  self.__class__.__name__,
1201
1452
  )
@@ -1247,7 +1498,106 @@ class ODataV4Search(QueryStringSearch):
1247
1498
 
1248
1499
 
1249
1500
  class PostJsonSearch(QueryStringSearch):
1250
- """A specialisation of a QueryStringSearch that uses POST method"""
1501
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that uses POST method
1502
+
1503
+ All configuration parameters available for :class:`~eodag.plugins.search.qssearch.QueryStringSearch`
1504
+ are also available for PostJsonSearch. The mappings given in metadata_mapping are used to construct
1505
+ a (json) body for the POST request that is sent to the provider. Due to the fact that we sent a POST request and
1506
+ not a get request, the pagination configuration will look slightly different. It has the
1507
+ following parameters:
1508
+
1509
+ :param provider: provider name
1510
+ :param config: Search plugin configuration:
1511
+
1512
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_query_obj` (``str``): The additional parameters
1513
+ needed to add pagination information to the search request. These parameters won't be
1514
+ included in result. This must be a json dict formatted like ``{{"foo":"bar"}}`` because
1515
+ it will be passed to a :meth:`str.format` method before being loaded as json.
1516
+ * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
1517
+ leading to the total number of results satisfying a request. This is used for providers
1518
+ which provides the total results metadata along with the result of the query and don't
1519
+ have an endpoint for querying the number of items satisfying a request, or for providers
1520
+ for which the count endpoint returns a json or xml document
1521
+ * :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items
1522
+ per page that the provider can handle; default: ``50``
1523
+
1524
+ """
1525
+
1526
+ def _get_default_end_date_from_start_date(
1527
+ self, start_datetime: str, product_type: str
1528
+ ) -> str:
1529
+ default_end_date = self.config.products.get(product_type, {}).get(
1530
+ "_default_end_date", None
1531
+ )
1532
+ if default_end_date:
1533
+ return default_end_date
1534
+ try:
1535
+ start_date = datetime.fromisoformat(start_datetime)
1536
+ except ValueError:
1537
+ start_date = datetime.strptime(start_datetime, "%Y-%m-%dT%H:%M:%SZ")
1538
+ product_type_conf = self.config.products[product_type]
1539
+ if (
1540
+ "metadata_mapping" in product_type_conf
1541
+ and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
1542
+ ):
1543
+ mapping = product_type_conf["metadata_mapping"][
1544
+ "startTimeFromAscendingNode"
1545
+ ]
1546
+ if isinstance(mapping, list) and "year" in mapping[0]:
1547
+ # if date is mapped to year/month/(day), use end_date = start_date to avoid large requests
1548
+ end_date = start_date
1549
+ return end_date.isoformat()
1550
+ return self.get_product_type_cfg_value("missionEndDate", today().isoformat())
1551
+
1552
+ def _check_date_params(self, keywords: Dict[str, Any], product_type: str) -> None:
1553
+ """checks if start and end date are present in the keywords and adds them if not"""
1554
+ if (
1555
+ "startTimeFromAscendingNode"
1556
+ and "completionTimeFromAscendingNode" in keywords
1557
+ ):
1558
+ return
1559
+ # start time given, end time missing
1560
+ if "startTimeFromAscendingNode" in keywords:
1561
+ keywords[
1562
+ "completionTimeFromAscendingNode"
1563
+ ] = self._get_default_end_date_from_start_date(
1564
+ keywords["startTimeFromAscendingNode"], product_type
1565
+ )
1566
+ return
1567
+ product_type_conf = self.config.products[product_type]
1568
+ if (
1569
+ "metadata_mapping" in product_type_conf
1570
+ and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
1571
+ ):
1572
+ mapping = product_type_conf["metadata_mapping"][
1573
+ "startTimeFromAscendingNode"
1574
+ ]
1575
+ if isinstance(mapping, list):
1576
+ # get time parameters (date, year, month, ...) from metadata mapping
1577
+ input_mapping = mapping[0].replace("{{", "").replace("}}", "")
1578
+ time_params = [
1579
+ values.split(":")[0].strip() for values in input_mapping.split(",")
1580
+ ]
1581
+ time_params = [
1582
+ tp.replace('"', "").replace("'", "") for tp in time_params
1583
+ ]
1584
+ # if startTime is not given but other time params (e.g. year/month/(day)) are given,
1585
+ # no default date is required
1586
+ in_keywords = True
1587
+ for tp in time_params:
1588
+ if tp not in keywords:
1589
+ in_keywords = False
1590
+ if not in_keywords:
1591
+ keywords[
1592
+ "startTimeFromAscendingNode"
1593
+ ] = self.get_product_type_cfg_value(
1594
+ "missionStartDate", today().isoformat()
1595
+ )
1596
+ keywords[
1597
+ "completionTimeFromAscendingNode"
1598
+ ] = self._get_default_end_date_from_start_date(
1599
+ keywords["startTimeFromAscendingNode"], product_type
1600
+ )
1251
1601
 
1252
1602
  def query(
1253
1603
  self,
@@ -1298,6 +1648,8 @@ class PostJsonSearch(QueryStringSearch):
1298
1648
  and isinstance(self.config.metadata_mapping[k], list)
1299
1649
  }
1300
1650
  )
1651
+ if getattr(self.config, "dates_required", False):
1652
+ self._check_date_params(keywords, product_type)
1301
1653
 
1302
1654
  qp, _ = self.build_query_string(product_type, **keywords)
1303
1655
 
@@ -1329,8 +1681,11 @@ class PostJsonSearch(QueryStringSearch):
1329
1681
  "specific_qssearch"
1330
1682
  ].get("merge_responses", None)
1331
1683
 
1332
- self.count_hits = lambda *x, **y: 1
1333
- self._request = super(PostJsonSearch, self)._request
1684
+ def count_hits(self, *x, **y):
1685
+ return 1
1686
+
1687
+ def _request(self, *x, **y):
1688
+ return super(PostJsonSearch, self)._request(*x, **y)
1334
1689
 
1335
1690
  try:
1336
1691
  eo_products, total_items = super(PostJsonSearch, self).query(
@@ -1431,7 +1786,7 @@ class PostJsonSearch(QueryStringSearch):
1431
1786
  auth_conf_dict = getattr(prep.auth_plugin.config, "credentials", {})
1432
1787
  else:
1433
1788
  auth_conf_dict = {}
1434
- for collection in self.get_collections(prep, **kwargs):
1789
+ for collection in self.get_collections(prep, **kwargs) or (None,):
1435
1790
  try:
1436
1791
  search_endpoint: str = self.config.api_endpoint.rstrip("/").format(
1437
1792
  **dict(collection=collection, **auth_conf_dict)
@@ -1454,7 +1809,11 @@ class PostJsonSearch(QueryStringSearch):
1454
1809
  if getattr(self.config, "merge_responses", False):
1455
1810
  total_results = _total_results or 0
1456
1811
  else:
1457
- total_results += _total_results or 0
1812
+ total_results = (
1813
+ (_total_results or 0)
1814
+ if total_results is None
1815
+ else total_results + (_total_results or 0)
1816
+ )
1458
1817
  if "next_page_query_obj" in self.config.pagination and isinstance(
1459
1818
  self.config.pagination["next_page_query_obj"], str
1460
1819
  ):
@@ -1472,13 +1831,15 @@ class PostJsonSearch(QueryStringSearch):
1472
1831
  )
1473
1832
 
1474
1833
  urls.append(search_endpoint)
1475
- return urls, total_results
1834
+ return list(dict.fromkeys(urls)), total_results
1476
1835
 
1477
1836
  def _request(
1478
1837
  self,
1479
1838
  prep: PreparedSearch,
1480
1839
  ) -> Response:
1481
1840
  url = prep.url
1841
+ if url is None:
1842
+ raise ValidationError("Cannot request empty URL")
1482
1843
  info_message = prep.info_message
1483
1844
  exception_message = prep.exception_message
1484
1845
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
@@ -1497,12 +1858,21 @@ class PostJsonSearch(QueryStringSearch):
1497
1858
  kwargs["auth"] = prep.auth
1498
1859
 
1499
1860
  # perform the request using the next page arguments if they are defined
1500
- if getattr(self, "next_page_query_obj", None):
1861
+ if (
1862
+ hasattr(self, "next_page_query_obj")
1863
+ and self.next_page_query_obj is not None
1864
+ ):
1501
1865
  prep.query_params = self.next_page_query_obj
1502
1866
  if info_message:
1503
1867
  logger.info(info_message)
1504
- logger.debug("Query parameters: %s" % prep.query_params)
1505
- logger.debug("Query kwargs: %s" % kwargs)
1868
+ try:
1869
+ logger.debug("Query parameters: %s" % geojson.dumps(prep.query_params))
1870
+ except TypeError:
1871
+ logger.debug("Query parameters: %s" % prep.query_params)
1872
+ try:
1873
+ logger.debug("Query kwargs: %s" % geojson.dumps(kwargs))
1874
+ except TypeError:
1875
+ logger.debug("Query kwargs: %s" % kwargs)
1506
1876
  response = requests.post(
1507
1877
  url,
1508
1878
  json=prep.query_params,
@@ -1515,20 +1885,16 @@ class PostJsonSearch(QueryStringSearch):
1515
1885
  except requests.exceptions.Timeout as exc:
1516
1886
  raise TimeOutError(exc, timeout=timeout) from exc
1517
1887
  except (requests.RequestException, URLError) as err:
1888
+ response = locals().get("response", Response())
1518
1889
  # check if error is identified as auth_error in provider conf
1519
1890
  auth_errors = getattr(self.config, "auth_error_code", [None])
1520
1891
  if not isinstance(auth_errors, list):
1521
1892
  auth_errors = [auth_errors]
1522
- if (
1523
- hasattr(err.response, "status_code")
1524
- and err.response.status_code in auth_errors
1525
- ):
1893
+ if response.status_code and response.status_code in auth_errors:
1526
1894
  raise AuthenticationError(
1527
- "HTTP Error {} returned:\n{}\nPlease check your credentials for {}".format(
1528
- err.response.status_code,
1529
- err.response.text.strip(),
1530
- self.provider,
1531
- )
1895
+ f"Please check your credentials for {self.provider}.",
1896
+ f"HTTP Error {response.status_code} returned.",
1897
+ response.text.strip(),
1532
1898
  )
1533
1899
  if exception_message:
1534
1900
  logger.exception(exception_message)
@@ -1539,17 +1905,23 @@ class PostJsonSearch(QueryStringSearch):
1539
1905
  self.provider,
1540
1906
  self.__class__.__name__,
1541
1907
  )
1542
- if "response" in locals():
1543
- logger.debug(response.content)
1544
- error_text = str(err)
1545
- if getattr(err, "response", None) is not None:
1546
- error_text = err.response.text
1547
- raise RequestError(error_text) from err
1908
+ logger.debug(response.content or str(err))
1909
+ raise RequestError.from_error(err, exception_message) from err
1548
1910
  return response
1549
1911
 
1550
1912
 
1551
1913
  class StacSearch(PostJsonSearch):
1552
- """A specialisation of a QueryStringSearch that uses generic STAC configuration"""
1914
+ """A specialisation of :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses generic
1915
+ STAC configuration, it therefore has the same configuration parameters (those inherited
1916
+ from :class:`~eodag.plugins.search.qssearch.QueryStringSearch`).
1917
+ For providers using ``StacSearch`` default values are defined for most of the parameters
1918
+ (see ``stac_provider.yml``). If some parameters are different for a specific provider, they
1919
+ have to be overwritten. If certain functionalities are not available, their configuration
1920
+ parameters have to be overwritten with ``null``. E.g. if there is no queryables endpoint,
1921
+ the :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` and
1922
+ :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` in the
1923
+ :attr:`~eodag.config.PluginConfig.discover_queryables` config have to be set to ``null``.
1924
+ """
1553
1925
 
1554
1926
  def __init__(self, provider: str, config: PluginConfig) -> None:
1555
1927
  # backup results_entry overwritten by init
@@ -1578,7 +1950,9 @@ class StacSearch(PostJsonSearch):
1578
1950
 
1579
1951
  # Build the final query string, in one go without quoting it
1580
1952
  # (some providers do not operate well with urlencoded and quoted query strings)
1581
- quote_via: Callable[[Any, str, str, str], str] = lambda x, *_args, **_kwargs: x
1953
+ def quote_via(x: Any, *_args, **_kwargs) -> str:
1954
+ return x
1955
+
1582
1956
  return (
1583
1957
  query_params,
1584
1958
  urlencode(query_params, doseq=True, quote_via=quote_via),
@@ -1591,16 +1965,37 @@ class StacSearch(PostJsonSearch):
1591
1965
 
1592
1966
  :param kwargs: additional filters for queryables (`productType` and other search
1593
1967
  arguments)
1594
- :type kwargs: Any
1595
1968
  :returns: fetched queryable parameters dict
1596
- :rtype: Optional[Dict[str, Annotated[Any, FieldInfo]]]
1597
1969
  """
1970
+ if (
1971
+ not self.config.discover_queryables["fetch_url"]
1972
+ and not self.config.discover_queryables["product_type_fetch_url"]
1973
+ ):
1974
+ logger.info(f"Cannot fetch queryables with {self.provider}")
1975
+ return None
1976
+
1598
1977
  product_type = kwargs.get("productType", None)
1599
1978
  provider_product_type = (
1600
1979
  self.config.products.get(product_type, {}).get("productType", product_type)
1601
1980
  if product_type
1602
1981
  else None
1603
1982
  )
1983
+ if (
1984
+ provider_product_type
1985
+ and not self.config.discover_queryables["product_type_fetch_url"]
1986
+ ):
1987
+ logger.info(
1988
+ f"Cannot fetch queryables for a specific product type with {self.provider}"
1989
+ )
1990
+ return None
1991
+ if (
1992
+ not provider_product_type
1993
+ and not self.config.discover_queryables["fetch_url"]
1994
+ ):
1995
+ logger.info(
1996
+ f"Cannot fetch global queryables with {self.provider}. A product type must be specified"
1997
+ )
1998
+ return None
1604
1999
 
1605
2000
  try:
1606
2001
  unparsed_fetch_url = (
@@ -1608,14 +2003,22 @@ class StacSearch(PostJsonSearch):
1608
2003
  if provider_product_type
1609
2004
  else self.config.discover_queryables["fetch_url"]
1610
2005
  )
2006
+ if unparsed_fetch_url is None:
2007
+ return None
1611
2008
 
1612
2009
  fetch_url = unparsed_fetch_url.format(
1613
2010
  provider_product_type=provider_product_type, **self.config.__dict__
1614
2011
  )
2012
+ auth = (
2013
+ self.auth
2014
+ if hasattr(self, "auth") and isinstance(self.auth, AuthBase)
2015
+ else None
2016
+ )
1615
2017
  response = QueryStringSearch._request(
1616
2018
  self,
1617
2019
  PreparedSearch(
1618
2020
  url=fetch_url,
2021
+ auth=auth,
1619
2022
  info_message="Fetching queryables: {}".format(fetch_url),
1620
2023
  exception_message="Skipping error while fetching queryables for "
1621
2024
  "{} {} instance:".format(self.provider, self.__class__.__name__),
@@ -1629,11 +2032,15 @@ class StacSearch(PostJsonSearch):
1629
2032
  resp_as_json = response.json()
1630
2033
 
1631
2034
  # extract results from response json
1632
- json_queryables = [
1633
- match.value
1634
- for match in self.config.discover_queryables["results_entry"].find(
1635
- resp_as_json
2035
+ results_entry = self.config.discover_queryables["results_entry"]
2036
+ if not isinstance(results_entry, JSONPath):
2037
+ logger.warning(
2038
+ f"Could not parse {self.provider} discover_queryables.results_entry"
2039
+ f" as JSONPath: {results_entry}"
1636
2040
  )
2041
+ return None
2042
+ json_queryables = [
2043
+ match.value for match in results_entry.find(resp_as_json)
1637
2044
  ][0]
1638
2045
 
1639
2046
  except KeyError as e:
@@ -1667,3 +2074,18 @@ class StacSearch(PostJsonSearch):
1667
2074
  python_queryables = create_model("m", **field_definitions).model_fields
1668
2075
 
1669
2076
  return model_fields_to_annotated(python_queryables)
2077
+
2078
+
2079
+ class PostJsonSearchWithStacQueryables(StacSearch, PostJsonSearch):
2080
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses
2081
+ generic STAC configuration for queryables (inherited from :class:`~eodag.plugins.search.qssearch.StacSearch`).
2082
+ """
2083
+
2084
+ def __init__(self, provider: str, config: PluginConfig) -> None:
2085
+ PostJsonSearch.__init__(self, provider, config)
2086
+
2087
+ def build_query_string(
2088
+ self, product_type: str, **kwargs: Any
2089
+ ) -> Tuple[Dict[str, Any], str]:
2090
+ """Build The query string using the search parameters"""
2091
+ return PostJsonSearch.build_query_string(self, product_type, **kwargs)