eodag 3.0.0b3__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. eodag/api/core.py +347 -247
  2. eodag/api/product/_assets.py +44 -15
  3. eodag/api/product/_product.py +58 -47
  4. eodag/api/product/drivers/__init__.py +81 -4
  5. eodag/api/product/drivers/base.py +65 -4
  6. eodag/api/product/drivers/generic.py +65 -0
  7. eodag/api/product/drivers/sentinel1.py +97 -0
  8. eodag/api/product/drivers/sentinel2.py +95 -0
  9. eodag/api/product/metadata_mapping.py +129 -93
  10. eodag/api/search_result.py +28 -12
  11. eodag/cli.py +61 -24
  12. eodag/config.py +457 -167
  13. eodag/plugins/apis/base.py +10 -4
  14. eodag/plugins/apis/ecmwf.py +53 -23
  15. eodag/plugins/apis/usgs.py +41 -17
  16. eodag/plugins/authentication/aws_auth.py +30 -18
  17. eodag/plugins/authentication/base.py +14 -3
  18. eodag/plugins/authentication/generic.py +14 -3
  19. eodag/plugins/authentication/header.py +14 -6
  20. eodag/plugins/authentication/keycloak.py +44 -25
  21. eodag/plugins/authentication/oauth.py +18 -4
  22. eodag/plugins/authentication/openid_connect.py +192 -171
  23. eodag/plugins/authentication/qsauth.py +12 -4
  24. eodag/plugins/authentication/sas_auth.py +22 -5
  25. eodag/plugins/authentication/token.py +95 -17
  26. eodag/plugins/authentication/token_exchange.py +19 -19
  27. eodag/plugins/base.py +4 -4
  28. eodag/plugins/crunch/base.py +8 -5
  29. eodag/plugins/crunch/filter_date.py +9 -6
  30. eodag/plugins/crunch/filter_latest_intersect.py +9 -8
  31. eodag/plugins/crunch/filter_latest_tpl_name.py +8 -8
  32. eodag/plugins/crunch/filter_overlap.py +9 -11
  33. eodag/plugins/crunch/filter_property.py +10 -10
  34. eodag/plugins/download/aws.py +181 -105
  35. eodag/plugins/download/base.py +49 -67
  36. eodag/plugins/download/creodias_s3.py +40 -2
  37. eodag/plugins/download/http.py +247 -223
  38. eodag/plugins/download/s3rest.py +29 -28
  39. eodag/plugins/manager.py +176 -41
  40. eodag/plugins/search/__init__.py +6 -5
  41. eodag/plugins/search/base.py +123 -60
  42. eodag/plugins/search/build_search_result.py +1046 -355
  43. eodag/plugins/search/cop_marine.py +132 -39
  44. eodag/plugins/search/creodias_s3.py +19 -68
  45. eodag/plugins/search/csw.py +48 -8
  46. eodag/plugins/search/data_request_search.py +124 -23
  47. eodag/plugins/search/qssearch.py +531 -310
  48. eodag/plugins/search/stac_list_assets.py +85 -0
  49. eodag/plugins/search/static_stac_search.py +23 -24
  50. eodag/resources/ext_product_types.json +1 -1
  51. eodag/resources/product_types.yml +1295 -355
  52. eodag/resources/providers.yml +1819 -3010
  53. eodag/resources/stac.yml +3 -163
  54. eodag/resources/stac_api.yml +2 -2
  55. eodag/resources/user_conf_template.yml +115 -99
  56. eodag/rest/cache.py +2 -2
  57. eodag/rest/config.py +3 -4
  58. eodag/rest/constants.py +0 -1
  59. eodag/rest/core.py +157 -117
  60. eodag/rest/errors.py +181 -0
  61. eodag/rest/server.py +57 -339
  62. eodag/rest/stac.py +133 -581
  63. eodag/rest/types/collections_search.py +3 -3
  64. eodag/rest/types/eodag_search.py +41 -30
  65. eodag/rest/types/queryables.py +42 -32
  66. eodag/rest/types/stac_search.py +15 -16
  67. eodag/rest/utils/__init__.py +14 -21
  68. eodag/rest/utils/cql_evaluate.py +6 -6
  69. eodag/rest/utils/rfc3339.py +2 -2
  70. eodag/types/__init__.py +153 -32
  71. eodag/types/bbox.py +2 -2
  72. eodag/types/download_args.py +4 -4
  73. eodag/types/queryables.py +183 -73
  74. eodag/types/search_args.py +6 -6
  75. eodag/types/whoosh.py +127 -3
  76. eodag/utils/__init__.py +228 -106
  77. eodag/utils/exceptions.py +47 -26
  78. eodag/utils/import_system.py +2 -2
  79. eodag/utils/logging.py +37 -77
  80. eodag/utils/repr.py +65 -6
  81. eodag/utils/requests.py +13 -15
  82. eodag/utils/rest.py +2 -2
  83. eodag/utils/s3.py +231 -0
  84. eodag/utils/stac_reader.py +11 -11
  85. {eodag-3.0.0b3.dist-info → eodag-3.1.0.dist-info}/METADATA +81 -81
  86. eodag-3.1.0.dist-info/RECORD +113 -0
  87. {eodag-3.0.0b3.dist-info → eodag-3.1.0.dist-info}/WHEEL +1 -1
  88. {eodag-3.0.0b3.dist-info → eodag-3.1.0.dist-info}/entry_points.txt +5 -2
  89. eodag/resources/constraints/climate-dt.json +0 -13
  90. eodag/resources/constraints/extremes-dt.json +0 -8
  91. eodag/utils/constraints.py +0 -244
  92. eodag-3.0.0b3.dist-info/RECORD +0 -110
  93. {eodag-3.0.0b3.dist-info → eodag-3.1.0.dist-info}/LICENSE +0 -0
  94. {eodag-3.0.0b3.dist-info → eodag-3.1.0.dist-info}/top_level.txt +0 -0
@@ -19,19 +19,18 @@ from __future__ import annotations
19
19
 
20
20
  import logging
21
21
  import re
22
+ import socket
22
23
  from copy import copy as copy_copy
23
24
  from typing import (
24
25
  TYPE_CHECKING,
26
+ Annotated,
25
27
  Any,
26
28
  Callable,
27
- Dict,
28
- List,
29
29
  Optional,
30
30
  Sequence,
31
- Set,
32
- Tuple,
33
31
  TypedDict,
34
32
  cast,
33
+ get_args,
35
34
  )
36
35
  from urllib.error import URLError
37
36
  from urllib.parse import (
@@ -44,6 +43,7 @@ from urllib.parse import (
44
43
  )
45
44
  from urllib.request import Request, urlopen
46
45
 
46
+ import concurrent.futures
47
47
  import geojson
48
48
  import orjson
49
49
  import requests
@@ -55,6 +55,7 @@ from pydantic.fields import FieldInfo
55
55
  from requests import Response
56
56
  from requests.adapters import HTTPAdapter
57
57
  from requests.auth import AuthBase
58
+ from urllib3 import Retry
58
59
 
59
60
  from eodag.api.product import EOProduct
60
61
  from eodag.api.product.metadata_mapping import (
@@ -69,28 +70,25 @@ from eodag.api.search_result import RawSearchResult
69
70
  from eodag.plugins.search import PreparedSearch
70
71
  from eodag.plugins.search.base import Search
71
72
  from eodag.types import json_field_definition_to_python, model_fields_to_annotated
72
- from eodag.types.queryables import CommonQueryables
73
73
  from eodag.types.search_args import SortByList
74
74
  from eodag.utils import (
75
+ DEFAULT_SEARCH_TIMEOUT,
75
76
  GENERIC_PRODUCT_TYPE,
76
77
  HTTP_REQ_TIMEOUT,
78
+ REQ_RETRY_BACKOFF_FACTOR,
79
+ REQ_RETRY_STATUS_FORCELIST,
80
+ REQ_RETRY_TOTAL,
77
81
  USER_AGENT,
78
- Annotated,
79
82
  _deprecated,
80
83
  deepcopy,
81
84
  dict_items_recursive_apply,
82
85
  format_dict_items,
83
- get_args,
84
86
  get_ssl_context,
85
87
  quote,
86
88
  string_to_jsonpath,
87
89
  update_nested_dict,
88
90
  urlencode,
89
91
  )
90
- from eodag.utils.constraints import (
91
- fetch_constraints,
92
- get_constraint_queryables_with_additional_params,
93
- )
94
92
  from eodag.utils.exceptions import (
95
93
  AuthenticationError,
96
94
  MisconfiguredError,
@@ -108,101 +106,182 @@ logger = logging.getLogger("eodag.search.qssearch")
108
106
 
109
107
  class QueryStringSearch(Search):
110
108
  """A plugin that helps implementing any kind of search protocol that relies on
111
- query strings (e.g: opensearch).
112
-
113
- The available configuration parameters for this kind of plugin are:
114
-
115
- - **result_type**: (optional) One of "json" or "xml", depending on the
116
- representation of the provider's search results. The default is "json"
117
-
118
- - **results_entry**: (mandatory) The name of the key in the provider search
119
- result that gives access to the result entries
120
-
121
- - **api_endpoint**: (mandatory) The endpoint of the provider's search interface
122
-
123
- - **literal_search_params**: (optional) A mapping of (search_param =>
124
- search_value) pairs giving search parameters to be passed as is in the search
125
- url query string. This is useful for example in situations where the user wants
126
- to pass-in a search query as it is done on the provider interface. In such a case,
127
- the user can put in his configuration file the query he needs to pass to the provider.
128
-
129
- - **pagination**: (mandatory) The configuration of how the pagination is done
130
- on the provider. It is a tree with the following nodes:
131
-
132
- - *next_page_url_tpl*: The template for pagination requests. This is a simple
133
- Python format string which will be resolved using the following keywords:
134
- ``url`` (the base url of the search endpoint), ``search`` (the query string
135
- corresponding to the search request), ``items_per_page`` (the number of
136
- items to return per page), ``skip`` (the number of items to skip) or
137
- ``skip_base_1`` (the number of items to skip, starting from 1) and
138
- ``page`` (which page to return).
139
-
140
- - *total_items_nb_key_path*: (optional) An XPath or JsonPath leading to the
141
- total number of results satisfying a request. This is used for providers
142
- which provides the total results metadata along with the result of the
143
- query and don't have an endpoint for querying the number of items
144
- satisfying a request, or for providers for which the count endpoint returns
145
- a json or xml document
146
-
147
- - *count_endpoint*: (optional) The endpoint for counting the number of items
148
- satisfying a request
149
-
150
- - *next_page_url_key_path*: (optional) A JSONPATH expression used to retrieve
151
- the URL of the next page in the response of the current page.
152
-
153
- - **free_text_search_operations**: (optional) A tree structure of the form::
154
-
155
- # noqa: E800
156
- <search-param>: # e.g: $search
157
- union: # how to join the operations below (e.g: ' AND ' -->
158
- # '(op1 AND op2) AND (op3 OR op4)')
159
- wrapper: # a pattern for how each operation will be wrapped
160
- # (e.g: '({})' --> '(op1 AND op2)')
161
- operations: # The operations to build
162
- <opname>: # e.g: AND
163
- - <op1> # e.g:
164
- # 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
165
- - <op2> # e.g:
166
- # 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
167
- ...
168
- ...
169
- ...
170
-
171
- With the structure above, each operation will become a string of the form:
172
- '(<op1> <opname> <op2>)', then the operations will be joined together using
173
- the union string and finally if the number of operations is greater than 1,
174
- they will be wrapped as specified by the wrapper config key.
175
-
176
- The search plugins of this kind can detect when a metadata mapping is "query-able",
177
- and get the semantics of how to format the query string parameter that enables to
178
- make a query on the corresponding metadata. To make a metadata query-able, just
179
- configure it in the metadata mapping to be a list of 2 items, the first one being
180
- the specification of the query string search formatting. The later is a string
181
- following the specification of Python string formatting, with a special behaviour
182
- added to it. For example, an entry in the metadata mapping of this kind::
183
-
184
- completionTimeFromAscendingNode:
185
- - 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
186
- - '$.properties.acquisition.endViewingDate'
187
-
188
- means that the search url will have a query string parameter named *"f"* with a
189
- value of *"acquisition.endViewingDate:lte:1543922280.0"* if the search was done
190
- with the value of ``completionTimeFromAscendingNode`` being
191
- ``2018-12-04T12:18:00``. What happened is that
192
- ``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp
193
- of the value of ``completionTimeFromAscendingNode``. This example shows all there
194
- is to know about the semantics of the query string formatting introduced by this
195
- plugin: any eodag search parameter can be referenced in the query string
196
- with an additional optional conversion function that is separated from it by a
197
- ``#`` (see :func:`~eodag.utils.format_metadata` for further details on the
198
- available converters). Note that for the values in the
199
- ``free_text_search_operations`` configuration parameter follow the same rule.
200
-
201
- :param provider: An eodag providers configuration dictionary
202
- :param config: Path to the user configuration file
109
+ query strings (e.g: opensearch). Most of the other search plugins inherit from this plugin.
110
+
111
+ :param provider: provider name
112
+ :param config: Search plugin configuration:
113
+
114
+ * :attr:`~eodag.config.PluginConfig.result_type` (``str``): One of ``json`` or ``xml``, depending on the
115
+ representation of the provider's search results. The default is ``json``.
116
+ * :attr:`~eodag.config.PluginConfig.results_entry` (``str``) (**mandatory**): The name of the key in the
117
+ provider search result that gives access to the result entries
118
+ * :attr:`~eodag.config.PluginConfig.api_endpoint` (``str``) (**mandatory**): The endpoint of the provider's
119
+ search interface
120
+ * :attr:`~eodag.config.PluginConfig.need_auth` (``bool``): if authentication is needed for the search request;
121
+ default: ``False``
122
+ * :attr:`~eodag.config.PluginConfig.auth_error_code` (``int``): which error code is returned in case of an
123
+ authentication error; only used if ``need_auth=true``
124
+ * :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be verified in
125
+ requests; default: ``True``
126
+ * :attr:`~eodag.config.PluginConfig.dont_quote` (``list[str]``): characters that should not be quoted in the
127
+ url params
128
+ * :attr:`~eodag.config.PluginConfig.timeout` (``int``): time to wait until request timeout in seconds;
129
+ default: ``5``
130
+ * :attr:`~eodag.config.PluginConfig.retry_total` (``int``): :class:`urllib3.util.Retry` ``total`` parameter,
131
+ total number of retries to allow; default: ``3``
132
+ * :attr:`~eodag.config.PluginConfig.retry_backoff_factor` (``int``): :class:`urllib3.util.Retry`
133
+ ``backoff_factor`` parameter, backoff factor to apply between attempts after the second try; default: ``2``
134
+ * :attr:`~eodag.config.PluginConfig.retry_status_forcelist` (``list[int]``): :class:`urllib3.util.Retry`
135
+ ``status_forcelist`` parameter, list of integer HTTP status codes that we should force a retry on; default:
136
+ ``[401, 429, 500, 502, 503, 504]``
137
+ * :attr:`~eodag.config.PluginConfig.literal_search_params` (``dict[str, str]``): A mapping of (search_param =>
138
+ search_value) pairs giving search parameters to be passed as is in the search url query string. This is useful
139
+ for example in situations where the user wants to add a fixed search query parameter exactly
140
+ as it is done on the provider interface.
141
+ * :attr:`~eodag.config.PluginConfig.pagination` (:class:`~eodag.config.PluginConfig.Pagination`)
142
+ (**mandatory**): The configuration of how the pagination is done on the provider. It is a tree with the
143
+ following nodes:
144
+
145
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_tpl` (``str``) (**mandatory**): The template for
146
+ pagination requests. This is a simple Python format string which will be resolved using the following
147
+ keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
148
+ to the search request), ``items_per_page`` (the number of items to return per page),
149
+ ``skip`` (the number of items to skip) or ``skip_base_1`` (the number of items to skip,
150
+ starting from 1) and ``page`` (which page to return).
151
+ * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
152
+ leading to the total number of results satisfying a request. This is used for providers which provides the
153
+ total results metadata along with the result of the query and don't have an endpoint for querying
154
+ the number of items satisfying a request, or for providers for which the count endpoint
155
+ returns a json or xml document
156
+ * :attr:`~eodag.config.PluginConfig.Pagination.count_endpoint` (``str``): The endpoint for counting the number
157
+ of items satisfying a request
158
+ * :attr:`~eodag.config.PluginConfig.Pagination.count_tpl` (``str``): template for the count parameter that
159
+ should be added to the search request
160
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_url_key_path` (``str``): A JsonPath expression used
161
+ to retrieve the URL of the next page in the response of the current page.
162
+ * :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items per
163
+ page that the provider can handle; default: ``50``
164
+ * :attr:`~eodag.config.PluginConfig.Pagination.start_page` (``int``): number of the first page; default: ``1``
165
+
166
+ * :attr:`~eodag.config.PluginConfig.discover_product_types`
167
+ (:class:`~eodag.config.PluginConfig.DiscoverProductTypes`): configuration for product type discovery based on
168
+ information from the provider; It contains the keys:
169
+
170
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` (``str``) (**mandatory**): url from which
171
+ the product types can be fetched
172
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.max_connections` (``int``): Maximum number of
173
+ connections for concurrent HTTP requests
174
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.result_type` (``str``): type of the provider result;
175
+ currently only ``json`` is supported (other types could be used in an extension of this plugin)
176
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.results_entry` (``str``) (**mandatory**): json path
177
+ to the list of product types
178
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_id` (``str``): mapping for the
179
+ product type id
180
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_metadata`
181
+ (``dict[str, str]``): mapping for product type metadata (e.g. ``abstract``, ``licence``) which can be parsed
182
+ from the provider result
183
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_properties`
184
+ (``dict[str, str]``): mapping for product type properties which can be parsed from the result and are not
185
+ product type metadata
186
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_unparsable_properties`
187
+ (``dict[str, str]``): mapping for product type properties which cannot be parsed from the result and are not
188
+ product type metadata
189
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url` (``str``): url to fetch
190
+ data for a single collection; used if product type metadata is not available from the endpoint given in
191
+ :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url`
192
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_qs` (``str``): query string
193
+ to be added to the :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` to filter for a
194
+ collection
195
+ * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_product_type_parsable_metadata`
196
+ (``dict[str, str]``): mapping for product type metadata returned by the endpoint given in
197
+ :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url`.
198
+
199
+ * :attr:`~eodag.config.PluginConfig.sort` (:class:`~eodag.config.PluginConfig.Sort`): configuration for sorting
200
+ the results. It contains the keys:
201
+
202
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_by_default` (``list[Tuple(str, Literal["ASC", "DESC"])]``):
203
+ parameter and sort order by which the result will be sorted by default (if the user does not enter a
204
+ ``sort_by`` parameter); if not given the result will use the default sorting of the provider; Attention:
205
+ for some providers sorting might cause a timeout if no filters are used. In that case no default
206
+ sort parameters should be given. The format is::
207
+
208
+ sort_by_default:
209
+ - !!python/tuple [<param>, <sort order> (ASC or DESC)]
210
+
211
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_by_tpl` (``str``): template for the sort parameter that is added
212
+ to the request; It contains the parameters `sort_param` and `sort_order` which will be replaced by user
213
+ input or default value. If the parameters are added as query params to a GET request, the string
214
+ should start with ``&``, otherwise it should be a valid json string surrounded by ``{{ }}``.
215
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_param_mapping` (``Dict [str, str]``): mapping for the parameters
216
+ available for sorting
217
+ * :attr:`~eodag.config.PluginConfig.Sort.sort_order_mapping`
218
+ (``dict[Literal["ascending", "descending"], str]``): mapping for the sort order
219
+ * :attr:`~eodag.config.PluginConfig.Sort.max_sort_params` (``int``): maximum number of sort parameters
220
+ supported by the provider; used to validate the user input to avoid failed requests or unexpected behaviour
221
+ (not all parameters are used in the request)
222
+
223
+ * :attr:`~eodag.config.PluginConfig.metadata_mapping` (``dict[str, Any]``): The search plugins of this kind can
224
+ detect when a metadata mapping is "query-able", and get the semantics of how to format the query string
225
+ parameter that enables to make a query on the corresponding metadata. To make a metadata query-able,
226
+ just configure it in the metadata mapping to be a list of 2 items, the first one being the
227
+ specification of the query string search formatting. The later is a string following the
228
+ specification of Python string formatting, with a special behaviour added to it. For example,
229
+ an entry in the metadata mapping of this kind::
230
+
231
+ completionTimeFromAscendingNode:
232
+ - 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
233
+ - '$.properties.acquisition.endViewingDate'
234
+
235
+ means that the search url will have a query string parameter named ``f`` with a value of
236
+ ``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
237
+ of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
238
+ ``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
239
+ of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
240
+ semantics of the query string formatting introduced by this plugin: any eodag search parameter
241
+ can be referenced in the query string with an additional optional conversion function that
242
+ is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
243
+ details on the available converters). Note that for the values in the
244
+ :attr:`~eodag.config.PluginConfig.free_text_search_operations` configuration parameter follow the same rule.
245
+ If the metadata_mapping is not a list but only a string, this means that the parameters is not queryable but
246
+ it is included in the result obtained from the provider. The string indicates how the provider result should
247
+ be mapped to the eodag parameter.
248
+ * :attr:`~eodag.config.PluginConfig.discover_metadata` (:class:`~eodag.config.PluginConfig.DiscoverMetadata`):
249
+ configuration for the auto-discovery of queryable parameters as well as parameters returned by the provider
250
+ which are not in the metadata mapping. It has the attributes:
251
+
252
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.auto_discovery` (``bool``): if the automatic discovery of
253
+ metadata is activated; default: ``False``; if false, the other parameters are not used;
254
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_pattern` (``str``): regex string a parameter in
255
+ the result should match so that is used
256
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.search_param` (``Union [str, dict[str, Any]]``): format
257
+ to add a query param given by the user and not in the metadata mapping to the requests, 'metadata' will be
258
+ replaced by the search param; can be a string or a dict containing
259
+ :attr:`~eodag.config.PluginConfig.free_text_search_operations`
260
+ (see :class:`~eodag.plugins.search.qssearch.ODataV4Search`)
261
+ * :attr:`~eodag.config.PluginConfig.DiscoverMetadata.metadata_path` (``str``): path where the queryable
262
+ properties can be found in the provider result
263
+
264
+ * :attr:`~eodag.config.PluginConfig.discover_queryables`
265
+ (:class:`~eodag.config.PluginConfig.DiscoverQueryables`): configuration to fetch the queryables from a
266
+ provider queryables endpoint; It has the following keys:
267
+
268
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` (``str``): url to fetch the queryables valid
269
+ for all product types
270
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` (``str``): url to fetch the
271
+ queryables for a specific product type
272
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.result_type` (``str``): type of the result (currently
273
+ only ``json`` is used)
274
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.results_entry` (``str``): json path to retrieve the
275
+ queryables from the provider result
276
+
277
+ * :attr:`~eodag.config.PluginConfig.constraints_file_url` (``str``): url to fetch the constraints for a specific
278
+ product type, can be an http url or a path to a file; the constraints are used to build queryables
279
+ * :attr:`~eodag.config.PluginConfig.constraints_entry` (``str``): key in the json result where the constraints
280
+ can be found; if not given, it is assumed that the constraints are on top level of the result, i.e.
281
+ the result is an array of constraints
203
282
  """
204
283
 
205
- extract_properties: Dict[str, Callable[..., Dict[str, Any]]] = {
284
+ extract_properties: dict[str, Callable[..., dict[str, Any]]] = {
206
285
  "xml": properties_from_xml,
207
286
  "json": properties_from_json,
208
287
  }
@@ -213,8 +292,8 @@ class QueryStringSearch(Search):
213
292
  self.config.__dict__.setdefault("results_entry", "features")
214
293
  self.config.__dict__.setdefault("pagination", {})
215
294
  self.config.__dict__.setdefault("free_text_search_operations", {})
216
- self.search_urls: List[str] = []
217
- self.query_params: Dict[str, str] = dict()
295
+ self.search_urls: list[str] = []
296
+ self.query_params: dict[str, str] = dict()
218
297
  self.query_string = ""
219
298
  self.next_page_url = None
220
299
  self.next_page_query_obj = None
@@ -359,20 +438,73 @@ class QueryStringSearch(Search):
359
438
  self.next_page_query_obj = None
360
439
  self.next_page_merge = None
361
440
 
362
- def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
441
+ def discover_product_types(self, **kwargs: Any) -> Optional[dict[str, Any]]:
363
442
  """Fetch product types list from provider using `discover_product_types` conf
364
443
 
444
+ :returns: configuration dict containing fetched product types information
445
+ """
446
+ unpaginated_fetch_url = self.config.discover_product_types.get("fetch_url")
447
+ if not unpaginated_fetch_url:
448
+ return None
449
+
450
+ # product types pagination
451
+ next_page_url_tpl = self.config.discover_product_types.get("next_page_url_tpl")
452
+ page = self.config.discover_product_types.get("start_page", 1)
453
+
454
+ if not next_page_url_tpl:
455
+ # no pagination
456
+ return self.discover_product_types_per_page(**kwargs)
457
+
458
+ conf_update_dict: dict[str, Any] = {
459
+ "providers_config": {},
460
+ "product_types_config": {},
461
+ }
462
+
463
+ while True:
464
+ fetch_url = next_page_url_tpl.format(url=unpaginated_fetch_url, page=page)
465
+
466
+ conf_update_dict_per_page = self.discover_product_types_per_page(
467
+ fetch_url=fetch_url, **kwargs
468
+ )
469
+
470
+ if (
471
+ not conf_update_dict_per_page
472
+ or not conf_update_dict_per_page.get("providers_config")
473
+ or conf_update_dict_per_page.items() <= conf_update_dict.items()
474
+ ):
475
+ # conf_update_dict_per_page is empty or a subset on existing conf
476
+ break
477
+ else:
478
+ conf_update_dict["providers_config"].update(
479
+ conf_update_dict_per_page["providers_config"]
480
+ )
481
+ conf_update_dict["product_types_config"].update(
482
+ conf_update_dict_per_page["product_types_config"]
483
+ )
484
+
485
+ page += 1
486
+
487
+ return conf_update_dict
488
+
489
+ def discover_product_types_per_page(
490
+ self, **kwargs: Any
491
+ ) -> Optional[dict[str, Any]]:
492
+ """Fetch product types list from provider using `discover_product_types` conf
493
+ using paginated ``kwargs["fetch_url"]``
494
+
365
495
  :returns: configuration dict containing fetched product types information
366
496
  """
367
497
  try:
368
498
  prep = PreparedSearch()
369
499
 
370
- prep.url = cast(
371
- str,
372
- self.config.discover_product_types["fetch_url"].format(
373
- **self.config.__dict__
374
- ),
375
- )
500
+ # url from discover_product_types() or conf
501
+ fetch_url: Optional[str] = kwargs.get("fetch_url")
502
+ if fetch_url is None:
503
+ if fetch_url := self.config.discover_product_types.get("fetch_url"):
504
+ fetch_url = fetch_url.format(**self.config.__dict__)
505
+ else:
506
+ return None
507
+ prep.url = fetch_url
376
508
 
377
509
  # get auth if available
378
510
  if "auth" in kwargs:
@@ -399,31 +531,43 @@ class QueryStringSearch(Search):
399
531
 
400
532
  prep.info_message = "Fetching product types: {}".format(prep.url)
401
533
  prep.exception_message = (
402
- "Skipping error while fetching product types for " "{} {} instance:"
534
+ "Skipping error while fetching product types for {} {} instance:"
403
535
  ).format(self.provider, self.__class__.__name__)
404
536
 
405
- response = QueryStringSearch._request(self, prep)
537
+ # Query using appropriate method
538
+ fetch_method = self.config.discover_product_types.get("fetch_method", "GET")
539
+ fetch_body = self.config.discover_product_types.get("fetch_body", {})
540
+ if fetch_method == "POST" and isinstance(self, PostJsonSearch):
541
+ prep.query_params = fetch_body
542
+ response = self._request(prep)
543
+ else:
544
+ response = QueryStringSearch._request(self, prep)
406
545
  except (RequestError, KeyError, AttributeError):
407
546
  return None
408
547
  else:
409
548
  try:
410
- conf_update_dict: Dict[str, Any] = {
549
+ conf_update_dict: dict[str, Any] = {
411
550
  "providers_config": {},
412
551
  "product_types_config": {},
413
552
  }
414
553
  if self.config.discover_product_types["result_type"] == "json":
415
554
  resp_as_json = response.json()
416
555
  # extract results from response json
417
- result = [
418
- match.value
419
- for match in self.config.discover_product_types[
420
- "results_entry"
421
- ].find(resp_as_json)
422
- ]
556
+ results_entry = self.config.discover_product_types["results_entry"]
557
+ if not isinstance(results_entry, JSONPath):
558
+ logger.warning(
559
+ f"Could not parse {self.provider} discover_product_types.results_entry"
560
+ f" as JSONPath: {results_entry}"
561
+ )
562
+ return None
563
+ result = [match.value for match in results_entry.find(resp_as_json)]
423
564
  if result and isinstance(result[0], list):
424
565
  result = result[0]
425
566
 
426
- for product_type_result in result:
567
+ def conf_update_from_product_type_result(
568
+ product_type_result: dict[str, Any],
569
+ ) -> None:
570
+ """Update ``conf_update_dict`` using given product type json response"""
427
571
  # providers_config extraction
428
572
  extracted_mapping = properties_from_json(
429
573
  product_type_result,
@@ -487,7 +631,11 @@ class QueryStringSearch(Search):
487
631
  ][kf]
488
632
  )
489
633
  for kf in keywords_fields
490
- if conf_update_dict["product_types_config"][
634
+ if kf
635
+ in conf_update_dict["product_types_config"][
636
+ generic_product_type_id
637
+ ]
638
+ and conf_update_dict["product_types_config"][
491
639
  generic_product_type_id
492
640
  ][kf]
493
641
  != NOT_AVAILABLE
@@ -510,6 +658,20 @@ class QueryStringSearch(Search):
510
658
  conf_update_dict["product_types_config"][
511
659
  generic_product_type_id
512
660
  ]["keywords"] = keywords_values_str
661
+
662
+ # runs concurrent requests and aggregate results in conf_update_dict
663
+ max_connections = self.config.discover_product_types.get(
664
+ "max_connections"
665
+ )
666
+ with concurrent.futures.ThreadPoolExecutor(
667
+ max_workers=max_connections
668
+ ) as executor:
669
+ futures = (
670
+ executor.submit(conf_update_from_product_type_result, r)
671
+ for r in result
672
+ )
673
+ [f.result() for f in concurrent.futures.as_completed(futures)]
674
+
513
675
  except KeyError as e:
514
676
  logger.warning(
515
677
  "Incomplete %s discover_product_types configuration: %s",
@@ -517,6 +679,12 @@ class QueryStringSearch(Search):
517
679
  e,
518
680
  )
519
681
  return None
682
+ except requests.RequestException as e:
683
+ logger.debug(
684
+ "Could not parse discovered product types response from "
685
+ f"{self.provider}, {type(e).__name__}: {e.args}"
686
+ )
687
+ return None
520
688
  conf_update_dict["product_types_config"] = dict_items_recursive_apply(
521
689
  conf_update_dict["product_types_config"],
522
690
  lambda k, v: v if v != NOT_AVAILABLE else None,
@@ -525,7 +693,7 @@ class QueryStringSearch(Search):
525
693
 
526
694
  def _get_product_type_metadata_from_single_collection_endpoint(
527
695
  self, product_type: str
528
- ) -> Dict[str, Any]:
696
+ ) -> dict[str, Any]:
529
697
  """
530
698
  retrieves additional product type information from an endpoint returning data for a single collection
531
699
  :param product_type: product type
@@ -538,9 +706,7 @@ class QueryStringSearch(Search):
538
706
  self,
539
707
  PreparedSearch(
540
708
  url=single_collection_url,
541
- info_message="Fetching data for product type product type: {}".format(
542
- product_type
543
- ),
709
+ info_message=f"Fetching data for product type: {product_type}",
544
710
  exception_message="Skipping error while fetching product types for "
545
711
  "{} {} instance:".format(self.provider, self.__class__.__name__),
546
712
  ),
@@ -551,107 +717,11 @@ class QueryStringSearch(Search):
551
717
  self.config.discover_product_types["single_product_type_parsable_metadata"],
552
718
  )
553
719
 
554
- def discover_queryables(
555
- self, **kwargs: Any
556
- ) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
557
- """Fetch queryables list from provider using its constraints file
558
-
559
- :param kwargs: additional filters for queryables (`productType` and other search
560
- arguments)
561
- :returns: fetched queryable parameters dict
562
- """
563
- product_type = kwargs.pop("productType", None)
564
- if not product_type:
565
- return {}
566
- constraints_file_url = getattr(self.config, "constraints_file_url", "")
567
- if not constraints_file_url:
568
- return {}
569
-
570
- constraints_file_dataset_key = getattr(
571
- self.config, "constraints_file_dataset_key", "dataset"
572
- )
573
- provider_product_type = self.config.products.get(product_type, {}).get(
574
- constraints_file_dataset_key, None
575
- )
576
-
577
- # defaults
578
- default_queryables = self._get_defaults_as_queryables(product_type)
579
- # remove unwanted queryables
580
- for param in getattr(self.config, "remove_from_queryables", []):
581
- default_queryables.pop(param, None)
582
-
583
- non_empty_kwargs = {k: v for k, v in kwargs.items() if v}
584
-
585
- if "{" in constraints_file_url:
586
- constraints_file_url = constraints_file_url.format(
587
- dataset=provider_product_type
588
- )
589
- constraints = fetch_constraints(constraints_file_url, self)
590
- if not constraints:
591
- return default_queryables
592
-
593
- constraint_params: Dict[str, Dict[str, Set[Any]]] = {}
594
- if len(kwargs) == 0:
595
- # get values from constraints without additional filters
596
- for constraint in constraints:
597
- for key in constraint.keys():
598
- if key in constraint_params:
599
- constraint_params[key]["enum"].update(constraint[key])
600
- else:
601
- constraint_params[key] = {"enum": set(constraint[key])}
602
- else:
603
- # get values from constraints with additional filters
604
- constraints_input_params = {k: v for k, v in non_empty_kwargs.items()}
605
- constraint_params = get_constraint_queryables_with_additional_params(
606
- constraints, constraints_input_params, self, product_type
607
- )
608
- # query params that are not in constraints but might be default queryables
609
- if len(constraint_params) == 1 and "not_available" in constraint_params:
610
- not_queryables = set()
611
- for constraint_param in constraint_params["not_available"]["enum"]:
612
- param = CommonQueryables.get_queryable_from_alias(constraint_param)
613
- if param in dict(
614
- CommonQueryables.model_fields, **default_queryables
615
- ):
616
- non_empty_kwargs.pop(constraint_param)
617
- else:
618
- not_queryables.add(constraint_param)
619
- if not_queryables:
620
- raise ValidationError(
621
- f"parameter(s) {str(not_queryables)} not queryable"
622
- )
623
- else:
624
- # get constraints again without common queryables
625
- constraint_params = (
626
- get_constraint_queryables_with_additional_params(
627
- constraints, non_empty_kwargs, self, product_type
628
- )
629
- )
630
-
631
- field_definitions: Dict[str, Any] = dict()
632
- for json_param, json_mtd in constraint_params.items():
633
- param = (
634
- get_queryable_from_provider(
635
- json_param, self.get_metadata_mapping(product_type)
636
- )
637
- or json_param
638
- )
639
- default = kwargs.get(param, None) or self.config.products.get(
640
- product_type, {}
641
- ).get(param, None)
642
- annotated_def = json_field_definition_to_python(
643
- json_mtd, default_value=default, required=True
644
- )
645
- field_definitions[param] = get_args(annotated_def)
646
-
647
- python_queryables = create_model("m", **field_definitions).model_fields
648
- return dict(default_queryables, **model_fields_to_annotated(python_queryables))
649
-
650
720
  def query(
651
721
  self,
652
722
  prep: PreparedSearch = PreparedSearch(),
653
723
  **kwargs: Any,
654
- ) -> Tuple[List[EOProduct], Optional[int]]:
724
+ ) -> tuple[list[EOProduct], Optional[int]]:
655
725
  """Perform a search on an OpenSearch-like interface
656
726
 
657
727
  :param prep: Object collecting needed information for search.
@@ -679,7 +749,7 @@ class QueryStringSearch(Search):
679
749
 
680
750
  # provider product type specific conf
681
751
  prep.product_type_def_params = (
682
- self.get_product_type_def_params(product_type, **kwargs)
752
+ self.get_product_type_def_params(product_type, format_variables=kwargs)
683
753
  if product_type is not None
684
754
  else {}
685
755
  )
@@ -703,10 +773,7 @@ class QueryStringSearch(Search):
703
773
  }
704
774
  )
705
775
 
706
- if product_type is None:
707
- raise ValidationError("Required productType is missing")
708
-
709
- qp, qs = self.build_query_string(product_type, **keywords)
776
+ qp, qs = self.build_query_string(product_type, keywords)
710
777
 
711
778
  prep.query_params = qp
712
779
  prep.query_string = qs
@@ -734,21 +801,21 @@ class QueryStringSearch(Search):
734
801
  reason="Simply run `self.config.metadata_mapping.update(metadata_mapping)` instead",
735
802
  version="2.10.0",
736
803
  )
737
- def update_metadata_mapping(self, metadata_mapping: Dict[str, Any]) -> None:
804
+ def update_metadata_mapping(self, metadata_mapping: dict[str, Any]) -> None:
738
805
  """Update plugin metadata_mapping with input metadata_mapping configuration"""
739
806
  if self.config.metadata_mapping:
740
807
  self.config.metadata_mapping.update(metadata_mapping)
741
808
 
742
809
  def build_query_string(
743
- self, product_type: str, **kwargs: Any
744
- ) -> Tuple[Dict[str, Any], str]:
810
+ self, product_type: str, query_dict: dict[str, Any]
811
+ ) -> tuple[dict[str, Any], str]:
745
812
  """Build The query string using the search parameters"""
746
813
  logger.debug("Building the query string that will be used for search")
747
- query_params = format_query_params(product_type, self.config, kwargs)
814
+ query_params = format_query_params(product_type, self.config, query_dict)
748
815
 
749
816
  # Build the final query string, in one go without quoting it
750
817
  # (some providers do not operate well with urlencoded and quoted query strings)
751
- def quote_via(x: Any, *_args, **_kwargs) -> str:
818
+ def quote_via(x: Any, *_args: Any, **_kwargs: Any) -> str:
752
819
  return x
753
820
 
754
821
  return (
@@ -760,7 +827,7 @@ class QueryStringSearch(Search):
760
827
  self,
761
828
  prep: PreparedSearch = PreparedSearch(page=None, items_per_page=None),
762
829
  **kwargs: Any,
763
- ) -> Tuple[List[str], Optional[int]]:
830
+ ) -> tuple[list[str], Optional[int]]:
764
831
  """Build paginated urls"""
765
832
  page = prep.page
766
833
  items_per_page = prep.items_per_page
@@ -825,11 +892,11 @@ class QueryStringSearch(Search):
825
892
  else:
826
893
  next_url = "{}?{}".format(search_endpoint, qs_with_sort)
827
894
  urls.append(next_url)
828
- return urls, total_results
895
+ return list(dict.fromkeys(urls)), total_results
829
896
 
830
897
  def do_search(
831
898
  self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
832
- ) -> List[Any]:
899
+ ) -> list[Any]:
833
900
  """Perform the actual search request.
834
901
 
835
902
  If there is a specified number of items per page, return the results as soon
@@ -846,7 +913,7 @@ class QueryStringSearch(Search):
846
913
  "total_items_nb_key_path"
847
914
  ]
848
915
 
849
- results: List[Any] = []
916
+ results: list[Any] = []
850
917
  for search_url in prep.search_urls:
851
918
  single_search_prep = copy_copy(prep)
852
919
  single_search_prep.url = search_url
@@ -854,8 +921,8 @@ class QueryStringSearch(Search):
854
921
  search_url
855
922
  )
856
923
  single_search_prep.exception_message = (
857
- "Skipping error while searching for {} {} "
858
- "instance:".format(self.provider, self.__class__.__name__)
924
+ f"Skipping error while searching for {self.provider}"
925
+ f" {self.__class__.__name__} instance"
859
926
  )
860
927
  response = self._request(single_search_prep)
861
928
  next_page_url_key_path = self.config.pagination.get(
@@ -969,9 +1036,13 @@ class QueryStringSearch(Search):
969
1036
  logger.debug(
970
1037
  "Could not extract total_items_nb from search results"
971
1038
  )
972
- if getattr(self.config, "merge_responses", False):
1039
+ if (
1040
+ getattr(self.config, "merge_responses", False)
1041
+ and self.config.result_type == "json"
1042
+ ):
1043
+ json_result = cast(list[dict[str, Any]], result)
973
1044
  results = (
974
- [dict(r, **result[i]) for i, r in enumerate(results)]
1045
+ [dict(r, **json_result[i]) for i, r in enumerate(results)]
975
1046
  if results
976
1047
  else result
977
1048
  )
@@ -993,14 +1064,14 @@ class QueryStringSearch(Search):
993
1064
 
994
1065
  def normalize_results(
995
1066
  self, results: RawSearchResult, **kwargs: Any
996
- ) -> List[EOProduct]:
1067
+ ) -> list[EOProduct]:
997
1068
  """Build EOProducts from provider results"""
998
1069
  normalize_remaining_count = len(results)
999
1070
  logger.debug(
1000
1071
  "Adapting %s plugin results to eodag product representation"
1001
1072
  % normalize_remaining_count
1002
1073
  )
1003
- products: List[EOProduct] = []
1074
+ products: list[EOProduct] = []
1004
1075
  for result in results:
1005
1076
  product = EOProduct(
1006
1077
  self.provider,
@@ -1015,8 +1086,15 @@ class QueryStringSearch(Search):
1015
1086
  product.properties = dict(
1016
1087
  getattr(self.config, "product_type_config", {}), **product.properties
1017
1088
  )
1018
- # move assets from properties to product's attr
1019
- product.assets.update(product.properties.pop("assets", {}))
1089
+ # move assets from properties to product's attr, normalize keys & roles
1090
+ for key, asset in product.properties.pop("assets", {}).items():
1091
+ norm_key, asset["roles"] = product.driver.guess_asset_key_and_roles(
1092
+ asset.get("href", ""), product
1093
+ )
1094
+ if norm_key:
1095
+ product.assets[norm_key] = asset
1096
+ # sort assets
1097
+ product.assets.data = dict(sorted(product.assets.data.items()))
1020
1098
  products.append(product)
1021
1099
  return products
1022
1100
 
@@ -1058,7 +1136,7 @@ class QueryStringSearch(Search):
1058
1136
  total_results = int(count_results)
1059
1137
  return total_results
1060
1138
 
1061
- def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> Tuple[str, ...]:
1139
+ def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> tuple[str, ...]:
1062
1140
  """Get the collection to which the product belongs"""
1063
1141
  # See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
1064
1142
  # /asset_publisher/Ac0d/content/change-of
@@ -1069,7 +1147,7 @@ class QueryStringSearch(Search):
1069
1147
  not hasattr(prep, "product_type_def_params")
1070
1148
  or not prep.product_type_def_params
1071
1149
  ):
1072
- collections: Set[str] = set()
1150
+ collections: set[str] = set()
1073
1151
  collection = getattr(self.config, "collection", None)
1074
1152
  if collection is None:
1075
1153
  try:
@@ -1111,12 +1189,20 @@ class QueryStringSearch(Search):
1111
1189
  info_message = prep.info_message
1112
1190
  exception_message = prep.exception_message
1113
1191
  try:
1114
- timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1192
+ timeout = getattr(self.config, "timeout", DEFAULT_SEARCH_TIMEOUT)
1115
1193
  ssl_verify = getattr(self.config, "ssl_verify", True)
1116
1194
 
1195
+ retry_total = getattr(self.config, "retry_total", REQ_RETRY_TOTAL)
1196
+ retry_backoff_factor = getattr(
1197
+ self.config, "retry_backoff_factor", REQ_RETRY_BACKOFF_FACTOR
1198
+ )
1199
+ retry_status_forcelist = getattr(
1200
+ self.config, "retry_status_forcelist", REQ_RETRY_STATUS_FORCELIST
1201
+ )
1202
+
1117
1203
  ssl_ctx = get_ssl_context(ssl_verify)
1118
1204
  # auth if needed
1119
- kwargs: Dict[str, Any] = {}
1205
+ kwargs: dict[str, Any] = {}
1120
1206
  if (
1121
1207
  getattr(self.config, "need_auth", False)
1122
1208
  and hasattr(prep, "auth")
@@ -1152,7 +1238,16 @@ class QueryStringSearch(Search):
1152
1238
  else:
1153
1239
  if info_message:
1154
1240
  logger.info(info_message)
1155
- response = requests.get(
1241
+
1242
+ session = requests.Session()
1243
+ retries = Retry(
1244
+ total=retry_total,
1245
+ backoff_factor=retry_backoff_factor,
1246
+ status_forcelist=retry_status_forcelist,
1247
+ )
1248
+ session.mount(url, HTTPAdapter(max_retries=retries))
1249
+
1250
+ response = session.get(
1156
1251
  url,
1157
1252
  timeout=timeout,
1158
1253
  headers=USER_AGENT,
@@ -1162,6 +1257,9 @@ class QueryStringSearch(Search):
1162
1257
  response.raise_for_status()
1163
1258
  except requests.exceptions.Timeout as exc:
1164
1259
  raise TimeOutError(exc, timeout=timeout) from exc
1260
+ except socket.timeout:
1261
+ err = requests.exceptions.Timeout(request=requests.Request(url=url))
1262
+ raise TimeOutError(err, timeout=timeout)
1165
1263
  except (requests.RequestException, URLError) as err:
1166
1264
  err_msg = err.readlines() if hasattr(err, "readlines") else ""
1167
1265
  if exception_message:
@@ -1174,13 +1272,54 @@ class QueryStringSearch(Search):
1174
1272
  self.__class__.__name__,
1175
1273
  err_msg,
1176
1274
  )
1177
- raise RequestError(str(err))
1275
+ raise RequestError.from_error(err, exception_message) from err
1178
1276
  return response
1179
1277
 
1180
1278
 
1181
1279
  class ODataV4Search(QueryStringSearch):
1182
- """A specialisation of a QueryStringSearch that does a two step search to retrieve
1183
- all products metadata"""
1280
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that does a two step search to
1281
+ retrieve all products metadata. All configuration parameters of
1282
+ :class:`~eodag.plugins.search.qssearch.QueryStringSearch` are also available for this plugin. In addition, the
1283
+ following parameters can be configured:
1284
+
1285
+ :param provider: provider name
1286
+ :param config: Search plugin configuration:
1287
+
1288
+ * :attr:`~eodag.config.PluginConfig.per_product_metadata_query` (``bool``): should be set to true if the metadata
1289
+ is not given in the search result and a two step search has to be performed; default: false
1290
+ * :attr:`~eodag.config.PluginConfig.metadata_pre_mapping` (:class:`~eodag.config.PluginConfig.MetadataPreMapping`)
1291
+ : a dictionary which can be used to simplify further metadata extraction. For example, going from
1292
+ ``$.Metadata[?(@.id="foo")].value`` to ``$.Metadata.foo.value``. It has the keys:
1293
+
1294
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path` (``str``): json path of the metadata entry
1295
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_id` (``str``): key to get the metadata id
1296
+ * :attr:`~eodag.config.PluginConfig.MetadataPreMapping.metadata_path_value` (``str``): key to get the metadata
1297
+ value
1298
+
1299
+ * :attr:`~eodag.config.PluginConfig.free_text_search_operations`: (optional) A tree structure of the form::
1300
+
1301
+ # noqa: E800
1302
+ <search-param>: # e.g: $search
1303
+ union: # how to join the operations below (e.g: ' AND ' -->
1304
+ # '(op1 AND op2) AND (op3 OR op4)')
1305
+ wrapper: # a pattern for how each operation will be wrapped
1306
+ # (e.g: '({})' --> '(op1 AND op2)')
1307
+ operations: # The operations to build
1308
+ <opname>: # e.g: AND
1309
+ - <op1> # e.g:
1310
+ # 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
1311
+ - <op2> # e.g:
1312
+ # 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
1313
+ ...
1314
+ ...
1315
+ ...
1316
+
1317
+ With the structure above, each operation will become a string of the form:
1318
+ ``(<op1> <opname> <op2>)``, then the operations will be joined together using
1319
+ the union string and finally if the number of operations is greater than 1,
1320
+ they will be wrapped as specified by the wrapper config key.
1321
+
1322
+ """
1184
1323
 
1185
1324
  def __init__(self, provider: str, config: PluginConfig) -> None:
1186
1325
  super(ODataV4Search, self).__init__(provider, config)
@@ -1197,7 +1336,7 @@ class ODataV4Search(QueryStringSearch):
1197
1336
 
1198
1337
  def do_search(
1199
1338
  self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
1200
- ) -> List[Any]:
1339
+ ) -> list[Any]:
1201
1340
  """A two step search can be performed if the metadata are not given into the search result"""
1202
1341
 
1203
1342
  if getattr(self.config, "per_product_metadata_query", False):
@@ -1219,7 +1358,7 @@ class ODataV4Search(QueryStringSearch):
1219
1358
  raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
1220
1359
  except requests.RequestException:
1221
1360
  logger.exception(
1222
- "Skipping error while searching for %s %s instance:",
1361
+ "Skipping error while searching for %s %s instance",
1223
1362
  self.provider,
1224
1363
  self.__class__.__name__,
1225
1364
  )
@@ -1232,7 +1371,7 @@ class ODataV4Search(QueryStringSearch):
1232
1371
  else:
1233
1372
  return super(ODataV4Search, self).do_search(prep, **kwargs)
1234
1373
 
1235
- def get_metadata_search_url(self, entity: Dict[str, Any]) -> str:
1374
+ def get_metadata_search_url(self, entity: dict[str, Any]) -> str:
1236
1375
  """Build the metadata link for the given entity"""
1237
1376
  return "{}({})/Metadata".format(
1238
1377
  self.config.api_endpoint.rstrip("/"), entity["id"]
@@ -1240,7 +1379,7 @@ class ODataV4Search(QueryStringSearch):
1240
1379
 
1241
1380
  def normalize_results(
1242
1381
  self, results: RawSearchResult, **kwargs: Any
1243
- ) -> List[EOProduct]:
1382
+ ) -> list[EOProduct]:
1244
1383
  """Build EOProducts from provider results
1245
1384
 
1246
1385
  If configured, a metadata pre-mapping can be applied to simplify further metadata extraction.
@@ -1271,15 +1410,38 @@ class ODataV4Search(QueryStringSearch):
1271
1410
 
1272
1411
 
1273
1412
  class PostJsonSearch(QueryStringSearch):
1274
- """A specialisation of a QueryStringSearch that uses POST method"""
1413
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.QueryStringSearch` that uses POST method
1414
+
1415
+ All configuration parameters available for :class:`~eodag.plugins.search.qssearch.QueryStringSearch`
1416
+ are also available for PostJsonSearch. The mappings given in metadata_mapping are used to construct
1417
+ a (json) body for the POST request that is sent to the provider. Due to the fact that we sent a POST request and
1418
+ not a get request, the pagination configuration will look slightly different. It has the
1419
+ following parameters:
1420
+
1421
+ :param provider: provider name
1422
+ :param config: Search plugin configuration:
1423
+
1424
+ * :attr:`~eodag.config.PluginConfig.Pagination.next_page_query_obj` (``str``): The additional parameters
1425
+ needed to add pagination information to the search request. These parameters won't be
1426
+ included in result. This must be a json dict formatted like ``{{"foo":"bar"}}`` because
1427
+ it will be passed to a :meth:`str.format` method before being loaded as json.
1428
+ * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
1429
+ leading to the total number of results satisfying a request. This is used for providers
1430
+ which provides the total results metadata along with the result of the query and don't
1431
+ have an endpoint for querying the number of items satisfying a request, or for providers
1432
+ for which the count endpoint returns a json or xml document
1433
+ * :attr:`~eodag.config.PluginConfig.Pagination.max_items_per_page` (``int``): The maximum number of items
1434
+ per page that the provider can handle; default: ``50``
1435
+
1436
+ """
1275
1437
 
1276
1438
  def query(
1277
1439
  self,
1278
1440
  prep: PreparedSearch = PreparedSearch(),
1279
1441
  **kwargs: Any,
1280
- ) -> Tuple[List[EOProduct], Optional[int]]:
1442
+ ) -> tuple[list[EOProduct], Optional[int]]:
1281
1443
  """Perform a search on an OpenSearch-like interface"""
1282
- product_type = kwargs.get("productType", None)
1444
+ product_type = kwargs.get("productType", "")
1283
1445
  count = prep.count
1284
1446
  # remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
1285
1447
  kwargs.pop("product_type", None)
@@ -1295,7 +1457,7 @@ class PostJsonSearch(QueryStringSearch):
1295
1457
 
1296
1458
  # provider product type specific conf
1297
1459
  prep.product_type_def_params = self.get_product_type_def_params(
1298
- product_type, **kwargs
1460
+ product_type, format_variables=kwargs
1299
1461
  )
1300
1462
  else:
1301
1463
  keywords = {
@@ -1309,7 +1471,7 @@ class PostJsonSearch(QueryStringSearch):
1309
1471
 
1310
1472
  # provider product type specific conf
1311
1473
  prep.product_type_def_params = self.get_product_type_def_params(
1312
- product_type, **kwargs
1474
+ product_type, format_variables=kwargs
1313
1475
  )
1314
1476
 
1315
1477
  # Add to the query, the queryable parameters set in the provider product type definition
@@ -1323,7 +1485,7 @@ class PostJsonSearch(QueryStringSearch):
1323
1485
  }
1324
1486
  )
1325
1487
 
1326
- qp, _ = self.build_query_string(product_type, **keywords)
1488
+ qp, _ = self.build_query_string(product_type, keywords)
1327
1489
 
1328
1490
  for query_param, query_value in qp.items():
1329
1491
  if (
@@ -1392,6 +1554,7 @@ class PostJsonSearch(QueryStringSearch):
1392
1554
  # do not try to extract total_items from search results if count is False
1393
1555
  del prep.total_items_nb
1394
1556
  del prep.need_count
1557
+
1395
1558
  provider_results = self.do_search(prep, **kwargs)
1396
1559
  if count and total_items is None and hasattr(prep, "total_items_nb"):
1397
1560
  total_items = prep.total_items_nb
@@ -1405,7 +1568,7 @@ class PostJsonSearch(QueryStringSearch):
1405
1568
 
1406
1569
  def normalize_results(
1407
1570
  self, results: RawSearchResult, **kwargs: Any
1408
- ) -> List[EOProduct]:
1571
+ ) -> list[EOProduct]:
1409
1572
  """Build EOProducts from provider results"""
1410
1573
  normalized = super().normalize_results(results, **kwargs)
1411
1574
  for product in normalized:
@@ -1440,12 +1603,12 @@ class PostJsonSearch(QueryStringSearch):
1440
1603
  self,
1441
1604
  prep: PreparedSearch = PreparedSearch(),
1442
1605
  **kwargs: Any,
1443
- ) -> Tuple[List[str], Optional[int]]:
1606
+ ) -> tuple[list[str], Optional[int]]:
1444
1607
  """Adds pagination to query parameters, and auth to url"""
1445
1608
  page = prep.page
1446
1609
  items_per_page = prep.items_per_page
1447
1610
  count = prep.count
1448
- urls: List[str] = []
1611
+ urls: list[str] = []
1449
1612
  total_results = 0 if count else None
1450
1613
 
1451
1614
  if "count_endpoint" not in self.config.pagination:
@@ -1503,7 +1666,7 @@ class PostJsonSearch(QueryStringSearch):
1503
1666
  )
1504
1667
 
1505
1668
  urls.append(search_endpoint)
1506
- return urls, total_results
1669
+ return list(dict.fromkeys(urls)), total_results
1507
1670
 
1508
1671
  def _request(
1509
1672
  self,
@@ -1514,7 +1677,7 @@ class PostJsonSearch(QueryStringSearch):
1514
1677
  raise ValidationError("Cannot request empty URL")
1515
1678
  info_message = prep.info_message
1516
1679
  exception_message = prep.exception_message
1517
- timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1680
+ timeout = getattr(self.config, "timeout", DEFAULT_SEARCH_TIMEOUT)
1518
1681
  ssl_verify = getattr(self.config, "ssl_verify", True)
1519
1682
  try:
1520
1683
  # auth if needed
@@ -1537,8 +1700,14 @@ class PostJsonSearch(QueryStringSearch):
1537
1700
  prep.query_params = self.next_page_query_obj
1538
1701
  if info_message:
1539
1702
  logger.info(info_message)
1540
- logger.debug("Query parameters: %s" % prep.query_params)
1541
- logger.debug("Query kwargs: %s" % kwargs)
1703
+ try:
1704
+ logger.debug("Query parameters: %s" % geojson.dumps(prep.query_params))
1705
+ except TypeError:
1706
+ logger.debug("Query parameters: %s" % prep.query_params)
1707
+ try:
1708
+ logger.debug("Query kwargs: %s" % geojson.dumps(kwargs))
1709
+ except TypeError:
1710
+ logger.debug("Query kwargs: %s" % kwargs)
1542
1711
  response = requests.post(
1543
1712
  url,
1544
1713
  json=prep.query_params,
@@ -1551,22 +1720,16 @@ class PostJsonSearch(QueryStringSearch):
1551
1720
  except requests.exceptions.Timeout as exc:
1552
1721
  raise TimeOutError(exc, timeout=timeout) from exc
1553
1722
  except (requests.RequestException, URLError) as err:
1723
+ response = locals().get("response", Response())
1554
1724
  # check if error is identified as auth_error in provider conf
1555
1725
  auth_errors = getattr(self.config, "auth_error_code", [None])
1556
1726
  if not isinstance(auth_errors, list):
1557
1727
  auth_errors = [auth_errors]
1558
- if (
1559
- hasattr(err, "response")
1560
- and err.response is not None
1561
- and getattr(err.response, "status_code", None)
1562
- and err.response.status_code in auth_errors
1563
- ):
1728
+ if response.status_code and response.status_code in auth_errors:
1564
1729
  raise AuthenticationError(
1565
- "HTTP Error {} returned:\n{}\nPlease check your credentials for {}".format(
1566
- err.response.status_code,
1567
- err.response.text.strip(),
1568
- self.provider,
1569
- )
1730
+ f"Please check your credentials for {self.provider}.",
1731
+ f"HTTP Error {response.status_code} returned.",
1732
+ response.text.strip(),
1570
1733
  )
1571
1734
  if exception_message:
1572
1735
  logger.exception(exception_message)
@@ -1577,21 +1740,23 @@ class PostJsonSearch(QueryStringSearch):
1577
1740
  self.provider,
1578
1741
  self.__class__.__name__,
1579
1742
  )
1580
- if "response" in locals():
1581
- logger.debug(response.content)
1582
- error_text = str(err)
1583
- if (
1584
- hasattr(err, "response")
1585
- and err.response is not None
1586
- and getattr(err.response, "text", None)
1587
- ):
1588
- error_text = err.response.text
1589
- raise RequestError(error_text) from err
1743
+ logger.debug(response.content or str(err))
1744
+ raise RequestError.from_error(err, exception_message) from err
1590
1745
  return response
1591
1746
 
1592
1747
 
1593
1748
  class StacSearch(PostJsonSearch):
1594
- """A specialisation of a QueryStringSearch that uses generic STAC configuration"""
1749
+ """A specialisation of :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses generic
1750
+ STAC configuration, it therefore has the same configuration parameters (those inherited
1751
+ from :class:`~eodag.plugins.search.qssearch.QueryStringSearch`).
1752
+ For providers using ``StacSearch`` default values are defined for most of the parameters
1753
+ (see ``stac_provider.yml``). If some parameters are different for a specific provider, they
1754
+ have to be overwritten. If certain functionalities are not available, their configuration
1755
+ parameters have to be overwritten with ``null``. E.g. if there is no queryables endpoint,
1756
+ the :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` and
1757
+ :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` in the
1758
+ :attr:`~eodag.config.PluginConfig.discover_queryables` config have to be set to ``null``.
1759
+ """
1595
1760
 
1596
1761
  def __init__(self, provider: str, config: PluginConfig) -> None:
1597
1762
  # backup results_entry overwritten by init
@@ -1603,24 +1768,24 @@ class StacSearch(PostJsonSearch):
1603
1768
  self.config.results_entry = results_entry
1604
1769
 
1605
1770
  def build_query_string(
1606
- self, product_type: str, **kwargs: Any
1607
- ) -> Tuple[Dict[str, Any], str]:
1771
+ self, product_type: str, query_dict: dict[str, Any]
1772
+ ) -> tuple[dict[str, Any], str]:
1608
1773
  """Build The query string using the search parameters"""
1609
1774
  logger.debug("Building the query string that will be used for search")
1610
1775
 
1611
1776
  # handle opened time intervals
1612
1777
  if any(
1613
- k in kwargs
1614
- for k in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
1778
+ q in query_dict
1779
+ for q in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
1615
1780
  ):
1616
- kwargs.setdefault("startTimeFromAscendingNode", "..")
1617
- kwargs.setdefault("completionTimeFromAscendingNode", "..")
1781
+ query_dict.setdefault("startTimeFromAscendingNode", "..")
1782
+ query_dict.setdefault("completionTimeFromAscendingNode", "..")
1618
1783
 
1619
- query_params = format_query_params(product_type, self.config, kwargs)
1784
+ query_params = format_query_params(product_type, self.config, query_dict)
1620
1785
 
1621
1786
  # Build the final query string, in one go without quoting it
1622
1787
  # (some providers do not operate well with urlencoded and quoted query strings)
1623
- def quote_via(x: Any, *_args, **_kwargs) -> str:
1788
+ def quote_via(x: Any, *_args: Any, **_kwargs: Any) -> str:
1624
1789
  return x
1625
1790
 
1626
1791
  return (
@@ -1630,19 +1795,42 @@ class StacSearch(PostJsonSearch):
1630
1795
 
1631
1796
  def discover_queryables(
1632
1797
  self, **kwargs: Any
1633
- ) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
1798
+ ) -> Optional[dict[str, Annotated[Any, FieldInfo]]]:
1634
1799
  """Fetch queryables list from provider using `discover_queryables` conf
1635
1800
 
1636
1801
  :param kwargs: additional filters for queryables (`productType` and other search
1637
1802
  arguments)
1638
1803
  :returns: fetched queryable parameters dict
1639
1804
  """
1805
+ if (
1806
+ not self.config.discover_queryables["fetch_url"]
1807
+ and not self.config.discover_queryables["product_type_fetch_url"]
1808
+ ):
1809
+ logger.info(f"Cannot fetch queryables with {self.provider}")
1810
+ return None
1811
+
1640
1812
  product_type = kwargs.get("productType", None)
1641
1813
  provider_product_type = (
1642
1814
  self.config.products.get(product_type, {}).get("productType", product_type)
1643
1815
  if product_type
1644
1816
  else None
1645
1817
  )
1818
+ if (
1819
+ provider_product_type
1820
+ and not self.config.discover_queryables["product_type_fetch_url"]
1821
+ ):
1822
+ logger.info(
1823
+ f"Cannot fetch queryables for a specific product type with {self.provider}"
1824
+ )
1825
+ return None
1826
+ if (
1827
+ not provider_product_type
1828
+ and not self.config.discover_queryables["fetch_url"]
1829
+ ):
1830
+ logger.info(
1831
+ f"Cannot fetch global queryables with {self.provider}. A product type must be specified"
1832
+ )
1833
+ return None
1646
1834
 
1647
1835
  try:
1648
1836
  unparsed_fetch_url = (
@@ -1650,20 +1838,30 @@ class StacSearch(PostJsonSearch):
1650
1838
  if provider_product_type
1651
1839
  else self.config.discover_queryables["fetch_url"]
1652
1840
  )
1841
+ if unparsed_fetch_url is None:
1842
+ return None
1653
1843
 
1654
1844
  fetch_url = unparsed_fetch_url.format(
1655
- provider_product_type=provider_product_type, **self.config.__dict__
1845
+ provider_product_type=provider_product_type,
1846
+ **self.config.__dict__,
1847
+ )
1848
+ auth = (
1849
+ self.auth
1850
+ if hasattr(self, "auth") and isinstance(self.auth, AuthBase)
1851
+ else None
1656
1852
  )
1657
1853
  response = QueryStringSearch._request(
1658
1854
  self,
1659
1855
  PreparedSearch(
1660
1856
  url=fetch_url,
1857
+ auth=auth,
1661
1858
  info_message="Fetching queryables: {}".format(fetch_url),
1662
1859
  exception_message="Skipping error while fetching queryables for "
1663
1860
  "{} {} instance:".format(self.provider, self.__class__.__name__),
1664
1861
  ),
1665
1862
  )
1666
- except (RequestError, KeyError, AttributeError):
1863
+ except (RequestError, KeyError, AttributeError) as e:
1864
+ logger.warning("failure in queryables discovery: %s", e)
1667
1865
  return None
1668
1866
  else:
1669
1867
  json_queryables = dict()
@@ -1671,11 +1869,15 @@ class StacSearch(PostJsonSearch):
1671
1869
  resp_as_json = response.json()
1672
1870
 
1673
1871
  # extract results from response json
1674
- json_queryables = [
1675
- match.value
1676
- for match in self.config.discover_queryables["results_entry"].find(
1677
- resp_as_json
1872
+ results_entry = self.config.discover_queryables["results_entry"]
1873
+ if not isinstance(results_entry, JSONPath):
1874
+ logger.warning(
1875
+ f"Could not parse {self.provider} discover_queryables.results_entry"
1876
+ f" as JSONPath: {results_entry}"
1678
1877
  )
1878
+ return None
1879
+ json_queryables = [
1880
+ match.value for match in results_entry.find(resp_as_json)
1679
1881
  ][0]
1680
1882
 
1681
1883
  except KeyError as e:
@@ -1691,7 +1893,7 @@ class StacSearch(PostJsonSearch):
1691
1893
  return None
1692
1894
 
1693
1895
  # convert json results to pydantic model fields
1694
- field_definitions: Dict[str, Any] = dict()
1896
+ field_definitions: dict[str, Any] = dict()
1695
1897
  for json_param, json_mtd in json_queryables.items():
1696
1898
  param = (
1697
1899
  get_queryable_from_provider(
@@ -1707,5 +1909,24 @@ class StacSearch(PostJsonSearch):
1707
1909
  field_definitions[param] = get_args(annotated_def)
1708
1910
 
1709
1911
  python_queryables = create_model("m", **field_definitions).model_fields
1912
+ # replace geometry by geom
1913
+ geom_queryable = python_queryables.pop("geometry", None)
1914
+ if geom_queryable:
1915
+ python_queryables["geom"] = geom_queryable
1710
1916
 
1711
1917
  return model_fields_to_annotated(python_queryables)
1918
+
1919
+
1920
+ class PostJsonSearchWithStacQueryables(StacSearch, PostJsonSearch):
1921
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that uses
1922
+ generic STAC configuration for queryables (inherited from :class:`~eodag.plugins.search.qssearch.StacSearch`).
1923
+ """
1924
+
1925
+ def __init__(self, provider: str, config: PluginConfig) -> None:
1926
+ PostJsonSearch.__init__(self, provider, config)
1927
+
1928
+ def build_query_string(
1929
+ self, product_type: str, query_dict: dict[str, Any]
1930
+ ) -> tuple[dict[str, Any], str]:
1931
+ """Build The query string using the search parameters"""
1932
+ return PostJsonSearch.build_query_string(self, product_type, query_dict)