eodag 3.10.1__py3-none-any.whl → 4.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. eodag/__init__.py +6 -1
  2. eodag/api/collection.py +353 -0
  3. eodag/api/core.py +606 -641
  4. eodag/api/product/__init__.py +3 -3
  5. eodag/api/product/_product.py +74 -56
  6. eodag/api/product/drivers/__init__.py +4 -46
  7. eodag/api/product/drivers/base.py +0 -28
  8. eodag/api/product/metadata_mapping.py +178 -216
  9. eodag/api/search_result.py +156 -15
  10. eodag/cli.py +83 -403
  11. eodag/config.py +81 -51
  12. eodag/plugins/apis/base.py +2 -2
  13. eodag/plugins/apis/ecmwf.py +36 -25
  14. eodag/plugins/apis/usgs.py +55 -40
  15. eodag/plugins/authentication/base.py +1 -3
  16. eodag/plugins/crunch/filter_date.py +3 -3
  17. eodag/plugins/crunch/filter_latest_intersect.py +2 -2
  18. eodag/plugins/crunch/filter_latest_tpl_name.py +1 -1
  19. eodag/plugins/download/aws.py +46 -42
  20. eodag/plugins/download/base.py +13 -14
  21. eodag/plugins/download/http.py +65 -65
  22. eodag/plugins/manager.py +28 -29
  23. eodag/plugins/search/__init__.py +6 -4
  24. eodag/plugins/search/base.py +131 -80
  25. eodag/plugins/search/build_search_result.py +245 -173
  26. eodag/plugins/search/cop_marine.py +87 -56
  27. eodag/plugins/search/csw.py +47 -37
  28. eodag/plugins/search/qssearch.py +653 -429
  29. eodag/plugins/search/stac_list_assets.py +1 -1
  30. eodag/plugins/search/static_stac_search.py +43 -44
  31. eodag/resources/{product_types.yml → collections.yml} +2594 -2453
  32. eodag/resources/ext_collections.json +1 -1
  33. eodag/resources/ext_product_types.json +1 -1
  34. eodag/resources/providers.yml +2706 -2733
  35. eodag/resources/stac_provider.yml +50 -92
  36. eodag/resources/user_conf_template.yml +9 -0
  37. eodag/types/__init__.py +2 -0
  38. eodag/types/queryables.py +70 -91
  39. eodag/types/search_args.py +1 -1
  40. eodag/utils/__init__.py +97 -21
  41. eodag/utils/dates.py +0 -12
  42. eodag/utils/exceptions.py +6 -6
  43. eodag/utils/free_text_search.py +3 -3
  44. eodag/utils/repr.py +2 -0
  45. {eodag-3.10.1.dist-info → eodag-4.0.0a2.dist-info}/METADATA +13 -99
  46. eodag-4.0.0a2.dist-info/RECORD +93 -0
  47. {eodag-3.10.1.dist-info → eodag-4.0.0a2.dist-info}/entry_points.txt +0 -4
  48. eodag/plugins/authentication/oauth.py +0 -60
  49. eodag/plugins/download/creodias_s3.py +0 -71
  50. eodag/plugins/download/s3rest.py +0 -351
  51. eodag/plugins/search/data_request_search.py +0 -565
  52. eodag/resources/stac.yml +0 -294
  53. eodag/resources/stac_api.yml +0 -2105
  54. eodag/rest/__init__.py +0 -24
  55. eodag/rest/cache.py +0 -70
  56. eodag/rest/config.py +0 -67
  57. eodag/rest/constants.py +0 -26
  58. eodag/rest/core.py +0 -764
  59. eodag/rest/errors.py +0 -210
  60. eodag/rest/server.py +0 -604
  61. eodag/rest/server.wsgi +0 -6
  62. eodag/rest/stac.py +0 -1032
  63. eodag/rest/templates/README +0 -1
  64. eodag/rest/types/__init__.py +0 -18
  65. eodag/rest/types/collections_search.py +0 -44
  66. eodag/rest/types/eodag_search.py +0 -386
  67. eodag/rest/types/queryables.py +0 -174
  68. eodag/rest/types/stac_search.py +0 -272
  69. eodag/rest/utils/__init__.py +0 -207
  70. eodag/rest/utils/cql_evaluate.py +0 -119
  71. eodag/rest/utils/rfc3339.py +0 -64
  72. eodag-3.10.1.dist-info/RECORD +0 -116
  73. {eodag-3.10.1.dist-info → eodag-4.0.0a2.dist-info}/WHEEL +0 -0
  74. {eodag-3.10.1.dist-info → eodag-4.0.0a2.dist-info}/licenses/LICENSE +0 -0
  75. {eodag-3.10.1.dist-info → eodag-4.0.0a2.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,7 @@ from typing import (
34
34
  )
35
35
  from urllib.error import URLError
36
36
  from urllib.parse import (
37
+ parse_qs,
37
38
  parse_qsl,
38
39
  quote,
39
40
  quote_plus,
@@ -68,25 +69,28 @@ from eodag.api.product.metadata_mapping import (
68
69
  properties_from_json,
69
70
  properties_from_xml,
70
71
  )
71
- from eodag.api.search_result import RawSearchResult
72
+ from eodag.api.search_result import RawSearchResult, SearchResult
72
73
  from eodag.plugins.search import PreparedSearch
73
74
  from eodag.plugins.search.base import Search
74
75
  from eodag.types import json_field_definition_to_python, model_fields_to_annotated
75
76
  from eodag.types.queryables import Queryables
76
77
  from eodag.types.search_args import SortByList
77
78
  from eodag.utils import (
79
+ DEFAULT_ITEMS_PER_PAGE,
80
+ DEFAULT_PAGE,
78
81
  DEFAULT_SEARCH_TIMEOUT,
79
- GENERIC_PRODUCT_TYPE,
82
+ GENERIC_COLLECTION,
80
83
  HTTP_REQ_TIMEOUT,
84
+ KNOWN_NEXT_PAGE_TOKEN_KEYS,
81
85
  REQ_RETRY_BACKOFF_FACTOR,
82
86
  REQ_RETRY_STATUS_FORCELIST,
83
87
  REQ_RETRY_TOTAL,
84
88
  USER_AGENT,
85
- _deprecated,
86
89
  copy_deepcopy,
87
90
  deepcopy,
88
91
  dict_items_recursive_apply,
89
92
  format_dict_items,
93
+ format_string,
90
94
  get_ssl_context,
91
95
  string_to_jsonpath,
92
96
  update_nested_dict,
@@ -150,8 +154,7 @@ class QueryStringSearch(Search):
150
154
  pagination requests. This is a simple Python format string which will be resolved using the following
151
155
  keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
152
156
  to the search request), ``items_per_page`` (the number of items to return per page),
153
- ``skip`` (the number of items to skip) or ``skip_base_1`` (the number of items to skip,
154
- starting from 1) and ``page`` (which page to return).
157
+ ``skip`` (the number of items to skip).
155
158
  * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
156
159
  leading to the total number of results satisfying a request. This is used for providers which provides the
157
160
  total results metadata along with the result of the query and don't have an endpoint for querying
@@ -167,38 +170,38 @@ class QueryStringSearch(Search):
167
170
  page that the provider can handle; default: ``50``
168
171
  * :attr:`~eodag.config.PluginConfig.Pagination.start_page` (``int``): number of the first page; default: ``1``
169
172
 
170
- * :attr:`~eodag.config.PluginConfig.discover_product_types`
171
- (:class:`~eodag.config.PluginConfig.DiscoverProductTypes`): configuration for product type discovery based on
173
+ * :attr:`~eodag.config.PluginConfig.discover_collections`
174
+ (:class:`~eodag.config.PluginConfig.DiscoverCollections`): configuration for collection discovery based on
172
175
  information from the provider; It contains the keys:
173
176
 
174
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` (``str``) (**mandatory**): url from which
175
- the product types can be fetched
176
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.max_connections` (``int``): Maximum number of
177
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.fetch_url` (``str``) (**mandatory**): url from which
178
+ the collections can be fetched
179
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.max_connections` (``int``): Maximum number of
177
180
  connections for concurrent HTTP requests
178
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.result_type` (``str``): type of the provider result;
181
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.result_type` (``str``): type of the provider result;
179
182
  currently only ``json`` is supported (other types could be used in an extension of this plugin)
180
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.results_entry` (``str``) (**mandatory**): json path
181
- to the list of product types
182
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_id` (``str``): mapping for the
183
- product type id
184
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_metadata`
185
- (``dict[str, str]``): mapping for product type metadata (e.g. ``abstract``, ``licence``) which can be parsed
186
- from the provider result
187
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_parsable_properties`
188
- (``dict[str, str]``): mapping for product type properties which can be parsed from the result and are not
189
- product type metadata
190
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.generic_product_type_unparsable_properties`
191
- (``dict[str, str]``): mapping for product type properties which cannot be parsed from the result and are not
192
- product type metadata
193
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url` (``str``): url to fetch
194
- data for a single collection; used if product type metadata is not available from the endpoint given in
195
- :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url`
196
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_qs` (``str``): query string
197
- to be added to the :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.fetch_url` to filter for a
183
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.results_entry` (``str``) (**mandatory**): json path
184
+ to the list of collections
185
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_id` (``str``): mapping for the
186
+ collection id
187
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_metadata`
188
+ (``dict[str, str]``): mapping for collection metadata (e.g. ``description``, ``license``) which can be
189
+ parsed from the provider result
190
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_properties`
191
+ (``dict[str, str]``): mapping for collection properties which can be parsed from the result and are not
192
+ collection metadata
193
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_unparsable_properties`
194
+ (``dict[str, str]``): mapping for collection properties which cannot be parsed from the result and are not
195
+ collection metadata
196
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.single_collection_fetch_url` (``str``): url to fetch
197
+ data for a single collection; used if collection metadata is not available from the endpoint given in
198
+ :attr:`~eodag.config.PluginConfig.DiscoverCollections.fetch_url`
199
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.single_collection_fetch_qs` (``str``): query string
200
+ to be added to the :attr:`~eodag.config.PluginConfig.DiscoverCollections.fetch_url` to filter for a
198
201
  collection
199
- * :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_product_type_parsable_metadata`
200
- (``dict[str, str]``): mapping for product type metadata returned by the endpoint given in
201
- :attr:`~eodag.config.PluginConfig.DiscoverProductTypes.single_collection_fetch_url`.
202
+ * :attr:`~eodag.config.PluginConfig.DiscoverCollections.single_collection_parsable_metadata`
203
+ (``dict[str, str]``): mapping for collection metadata returned by the endpoint given in
204
+ :attr:`~eodag.config.PluginConfig.DiscoverCollections.single_collection_fetch_url`.
202
205
 
203
206
  * :attr:`~eodag.config.PluginConfig.sort` (:class:`~eodag.config.PluginConfig.Sort`): configuration for sorting
204
207
  the results. It contains the keys:
@@ -232,15 +235,15 @@ class QueryStringSearch(Search):
232
235
  specification of Python string formatting, with a special behaviour added to it. For example,
233
236
  an entry in the metadata mapping of this kind::
234
237
 
235
- completionTimeFromAscendingNode:
236
- - 'f=acquisition.endViewingDate:lte:{completionTimeFromAscendingNode#timestamp}'
238
+ end_datetime:
239
+ - 'f=acquisition.endViewingDate:lte:{end_datetime#timestamp}'
237
240
  - '$.properties.acquisition.endViewingDate'
238
241
 
239
242
  means that the search url will have a query string parameter named ``f`` with a value of
240
243
  ``acquisition.endViewingDate:lte:1543922280.0`` if the search was done with the value
241
- of ``completionTimeFromAscendingNode`` being ``2018-12-04T12:18:00``. What happened is that
242
- ``{completionTimeFromAscendingNode#timestamp}`` was replaced with the timestamp of the value
243
- of ``completionTimeFromAscendingNode``. This example shows all there is to know about the
244
+ of ``end_datetime`` being ``2018-12-04T12:18:00``. What happened is that
245
+ ``{end_datetime#timestamp}`` was replaced with the timestamp of the value
246
+ of ``end_datetime``. This example shows all there is to know about the
244
247
  semantics of the query string formatting introduced by this plugin: any eodag search parameter
245
248
  can be referenced in the query string with an additional optional conversion function that
246
249
  is separated from it by a ``#`` (see :func:`~eodag.api.product.metadata_mapping.format_metadata` for further
@@ -270,16 +273,16 @@ class QueryStringSearch(Search):
270
273
  provider queryables endpoint; It has the following keys:
271
274
 
272
275
  * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` (``str``): url to fetch the queryables valid
273
- for all product types
274
- * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` (``str``): url to fetch the
275
- queryables for a specific product type
276
+ for all collections
277
+ * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.collection_fetch_url` (``str``): url to fetch the
278
+ queryables for a specific collection
276
279
  * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.result_type` (``str``): type of the result (currently
277
280
  only ``json`` is used)
278
281
  * :attr:`~eodag.config.PluginConfig.DiscoverQueryables.results_entry` (``str``): json path to retrieve the
279
282
  queryables from the provider result
280
283
 
281
284
  * :attr:`~eodag.config.PluginConfig.constraints_file_url` (``str``): url to fetch the constraints for a specific
282
- product type, can be an http url or a path to a file; the constraints are used to build queryables
285
+ collection, can be an http url or a path to a file; the constraints are used to build queryables
283
286
  * :attr:`~eodag.config.PluginConfig.constraints_entry` (``str``): key in the json result where the constraints
284
287
  can be found; if not given, it is assumed that the constraints are on top level of the result, i.e.
285
288
  the result is an array of constraints
@@ -332,45 +335,43 @@ class QueryStringSearch(Search):
332
335
  self.config.pagination.get("next_page_merge_key_path")
333
336
  )
334
337
 
335
- # parse jsonpath on init: product types discovery
338
+ # parse jsonpath on init: collections discovery
336
339
  if (
337
- getattr(self.config, "discover_product_types", {}).get("results_entry")
338
- and getattr(self.config, "discover_product_types", {}).get("result_type")
340
+ getattr(self.config, "discover_collections", {}).get("results_entry")
341
+ and getattr(self.config, "discover_collections", {}).get("result_type")
339
342
  == "json"
340
343
  ):
341
- self.config.discover_product_types["results_entry"] = string_to_jsonpath(
342
- self.config.discover_product_types["results_entry"], force=True
344
+ self.config.discover_collections["results_entry"] = string_to_jsonpath(
345
+ self.config.discover_collections["results_entry"], force=True
343
346
  )
344
- self.config.discover_product_types[
345
- "generic_product_type_id"
347
+ self.config.discover_collections[
348
+ "generic_collection_id"
346
349
  ] = mtd_cfg_as_conversion_and_querypath(
347
- {"foo": self.config.discover_product_types["generic_product_type_id"]}
350
+ {"foo": self.config.discover_collections["generic_collection_id"]}
348
351
  )[
349
352
  "foo"
350
353
  ]
351
- self.config.discover_product_types[
352
- "generic_product_type_parsable_properties"
354
+ self.config.discover_collections[
355
+ "generic_collection_parsable_properties"
353
356
  ] = mtd_cfg_as_conversion_and_querypath(
354
- self.config.discover_product_types[
355
- "generic_product_type_parsable_properties"
357
+ self.config.discover_collections[
358
+ "generic_collection_parsable_properties"
356
359
  ]
357
360
  )
358
- self.config.discover_product_types[
359
- "generic_product_type_parsable_metadata"
361
+ self.config.discover_collections[
362
+ "generic_collection_parsable_metadata"
360
363
  ] = mtd_cfg_as_conversion_and_querypath(
361
- self.config.discover_product_types[
362
- "generic_product_type_parsable_metadata"
363
- ]
364
+ self.config.discover_collections["generic_collection_parsable_metadata"]
364
365
  )
365
366
  if (
366
- "single_product_type_parsable_metadata"
367
- in self.config.discover_product_types
367
+ "single_collection_parsable_metadata"
368
+ in self.config.discover_collections
368
369
  ):
369
- self.config.discover_product_types[
370
- "single_product_type_parsable_metadata"
370
+ self.config.discover_collections[
371
+ "single_collection_parsable_metadata"
371
372
  ] = mtd_cfg_as_conversion_and_querypath(
372
- self.config.discover_product_types[
373
- "single_product_type_parsable_metadata"
373
+ self.config.discover_collections[
374
+ "single_collection_parsable_metadata"
374
375
  ]
375
376
  )
376
377
 
@@ -384,62 +385,60 @@ class QueryStringSearch(Search):
384
385
  self.config.discover_queryables["results_entry"], force=True
385
386
  )
386
387
 
387
- # parse jsonpath on init: product type specific metadata-mapping
388
- for product_type in self.config.products.keys():
388
+ # parse jsonpath on init: collection specific metadata-mapping
389
+ for collection in self.config.products.keys():
389
390
 
390
- product_type_metadata_mapping = {}
391
- # product-type specific metadata-mapping
391
+ collection_metadata_mapping = {}
392
+ # collection specific metadata-mapping
392
393
  if any(
393
- mm in self.config.products[product_type].keys()
394
+ mm in self.config.products[collection].keys()
394
395
  for mm in ("metadata_mapping", "metadata_mapping_from_product")
395
396
  ):
396
- # Complete and ready to use product type specific metadata-mapping
397
- product_type_metadata_mapping = deepcopy(self.config.metadata_mapping)
397
+ # Complete and ready to use collection specific metadata-mapping
398
+ collection_metadata_mapping = deepcopy(self.config.metadata_mapping)
398
399
 
399
400
  # metadata_mapping from another product
400
- if other_product_for_mapping := self.config.products[product_type].get(
401
+ if other_product_for_mapping := self.config.products[collection].get(
401
402
  "metadata_mapping_from_product"
402
403
  ):
403
- other_product_type_def_params = self.get_product_type_def_params(
404
+ other_collection_def_params = self.get_collection_def_params(
404
405
  other_product_for_mapping,
405
406
  )
406
407
  # parse mapping to apply
407
- if other_product_type_mtd_mapping := other_product_type_def_params.get(
408
+ if other_collection_mtd_mapping := other_collection_def_params.get(
408
409
  "metadata_mapping", {}
409
410
  ):
410
- other_product_type_mtd_mapping = (
411
- mtd_cfg_as_conversion_and_querypath(
412
- other_product_type_def_params.get("metadata_mapping", {})
413
- )
411
+ other_collection_mtd_mapping = mtd_cfg_as_conversion_and_querypath(
412
+ other_collection_def_params.get("metadata_mapping", {})
414
413
  )
415
414
  else:
416
- msg = f"Cannot reuse empty metadata_mapping from {other_product_for_mapping} for {product_type}"
415
+ msg = f"Cannot reuse empty metadata_mapping from {other_product_for_mapping} for {collection}"
417
416
  raise MisconfiguredError(msg)
418
417
  # update mapping
419
- for metadata, mapping in other_product_type_mtd_mapping.items():
420
- product_type_metadata_mapping.pop(metadata, None)
421
- product_type_metadata_mapping[metadata] = mapping
418
+ for metadata, mapping in other_collection_mtd_mapping.items():
419
+ collection_metadata_mapping.pop(metadata, None)
420
+ collection_metadata_mapping[metadata] = mapping
422
421
 
423
422
  # metadata_mapping from current product
424
- if "metadata_mapping" in self.config.products[product_type].keys():
423
+ if "metadata_mapping" in self.config.products[collection].keys():
425
424
  # parse mapping to apply
426
- self.config.products[product_type][
425
+ self.config.products[collection][
427
426
  "metadata_mapping"
428
427
  ] = mtd_cfg_as_conversion_and_querypath(
429
- self.config.products[product_type]["metadata_mapping"]
428
+ self.config.products[collection]["metadata_mapping"]
430
429
  )
431
430
 
432
431
  # from current product, updated mapping at the end
433
- for metadata, mapping in self.config.products[product_type][
432
+ for metadata, mapping in self.config.products[collection][
434
433
  "metadata_mapping"
435
434
  ].items():
436
- product_type_metadata_mapping.pop(metadata, None)
437
- product_type_metadata_mapping[metadata] = mapping
435
+ collection_metadata_mapping.pop(metadata, None)
436
+ collection_metadata_mapping[metadata] = mapping
438
437
 
439
- if product_type_metadata_mapping:
440
- self.config.products[product_type][
438
+ if collection_metadata_mapping:
439
+ self.config.products[collection][
441
440
  "metadata_mapping"
442
- ] = product_type_metadata_mapping
441
+ ] = collection_metadata_mapping
443
442
 
444
443
  def clear(self) -> None:
445
444
  """Clear search context"""
@@ -451,32 +450,32 @@ class QueryStringSearch(Search):
451
450
  self.next_page_query_obj = None
452
451
  self.next_page_merge = None
453
452
 
454
- def discover_product_types(self, **kwargs: Any) -> Optional[dict[str, Any]]:
455
- """Fetch product types list from provider using `discover_product_types` conf
453
+ def discover_collections(self, **kwargs: Any) -> Optional[dict[str, Any]]:
454
+ """Fetch collections list from provider using `discover_collections` conf
456
455
 
457
- :returns: configuration dict containing fetched product types information
456
+ :returns: configuration dict containing fetched collections information
458
457
  """
459
- unpaginated_fetch_url = self.config.discover_product_types.get("fetch_url")
458
+ unpaginated_fetch_url = self.config.discover_collections.get("fetch_url")
460
459
  if not unpaginated_fetch_url:
461
460
  return None
462
461
 
463
- # product types pagination
464
- next_page_url_tpl = self.config.discover_product_types.get("next_page_url_tpl")
465
- page = self.config.discover_product_types.get("start_page", 1)
462
+ # collections pagination
463
+ next_page_url_tpl = self.config.discover_collections.get("next_page_url_tpl")
464
+ page = self.config.discover_collections.get("start_page", 1)
466
465
 
467
466
  if not next_page_url_tpl:
468
467
  # no pagination
469
- return self.discover_product_types_per_page(**kwargs)
468
+ return self.discover_collections_per_page(**kwargs)
470
469
 
471
470
  conf_update_dict: dict[str, Any] = {
472
471
  "providers_config": {},
473
- "product_types_config": {},
472
+ "collections_config": {},
474
473
  }
475
474
 
476
475
  while True:
477
476
  fetch_url = next_page_url_tpl.format(url=unpaginated_fetch_url, page=page)
478
477
 
479
- conf_update_dict_per_page = self.discover_product_types_per_page(
478
+ conf_update_dict_per_page = self.discover_collections_per_page(
480
479
  fetch_url=fetch_url, **kwargs
481
480
  )
482
481
 
@@ -491,29 +490,27 @@ class QueryStringSearch(Search):
491
490
  conf_update_dict["providers_config"].update(
492
491
  conf_update_dict_per_page["providers_config"]
493
492
  )
494
- conf_update_dict["product_types_config"].update(
495
- conf_update_dict_per_page["product_types_config"]
493
+ conf_update_dict["collections_config"].update(
494
+ conf_update_dict_per_page["collections_config"]
496
495
  )
497
496
 
498
497
  page += 1
499
498
 
500
499
  return conf_update_dict
501
500
 
502
- def discover_product_types_per_page(
503
- self, **kwargs: Any
504
- ) -> Optional[dict[str, Any]]:
505
- """Fetch product types list from provider using `discover_product_types` conf
501
+ def discover_collections_per_page(self, **kwargs: Any) -> Optional[dict[str, Any]]:
502
+ """Fetch collections list from provider using `discover_collections` conf
506
503
  using paginated ``kwargs["fetch_url"]``
507
504
 
508
- :returns: configuration dict containing fetched product types information
505
+ :returns: configuration dict containing fetched collections information
509
506
  """
510
507
  try:
511
508
  prep = PreparedSearch()
512
509
 
513
- # url from discover_product_types() or conf
510
+ # url from discover_collections() or conf
514
511
  fetch_url: Optional[str] = kwargs.get("fetch_url")
515
512
  if fetch_url is None:
516
- if fetch_url := self.config.discover_product_types.get("fetch_url"):
513
+ if fetch_url := self.config.discover_collections.get("fetch_url"):
517
514
  fetch_url = fetch_url.format(**self.config.__dict__)
518
515
  else:
519
516
  return None
@@ -523,13 +520,15 @@ class QueryStringSearch(Search):
523
520
  if "auth" in kwargs:
524
521
  prep.auth = kwargs.pop("auth")
525
522
 
526
- # try updating fetch_url qs using productType
523
+ # try updating fetch_url qs using collection
527
524
  fetch_qs_dict = {}
528
- if "single_collection_fetch_qs" in self.config.discover_product_types:
525
+ if "single_collection_fetch_qs" in self.config.discover_collections:
529
526
  try:
530
- fetch_qs = self.config.discover_product_types[
531
- "single_collection_fetch_qs"
532
- ].format(**kwargs)
527
+ fetch_qs = format_string(
528
+ None,
529
+ self.config.discover_collections["single_collection_fetch_qs"],
530
+ **kwargs,
531
+ )
533
532
  fetch_qs_dict = dict(parse_qsl(fetch_qs))
534
533
  except KeyError:
535
534
  pass
@@ -542,14 +541,14 @@ class QueryStringSearch(Search):
542
541
  url_parse = url_parse._replace(query=url_new_query)
543
542
  prep.url = urlunparse(url_parse)
544
543
 
545
- prep.info_message = "Fetching product types: {}".format(prep.url)
544
+ prep.info_message = "Fetching collections: {}".format(prep.url)
546
545
  prep.exception_message = (
547
- "Skipping error while fetching product types for {} {} instance:"
546
+ "Skipping error while fetching collections for {} {} instance:"
548
547
  ).format(self.provider, self.__class__.__name__)
549
548
 
550
549
  # Query using appropriate method
551
- fetch_method = self.config.discover_product_types.get("fetch_method", "GET")
552
- fetch_body = self.config.discover_product_types.get("fetch_body", {})
550
+ fetch_method = self.config.discover_collections.get("fetch_method", "GET")
551
+ fetch_body = self.config.discover_collections.get("fetch_body", {})
553
552
  if fetch_method == "POST" and isinstance(self, PostJsonSearch):
554
553
  prep.query_params = fetch_body
555
554
  response = self._request(prep)
@@ -561,15 +560,15 @@ class QueryStringSearch(Search):
561
560
  try:
562
561
  conf_update_dict: dict[str, Any] = {
563
562
  "providers_config": {},
564
- "product_types_config": {},
563
+ "collections_config": {},
565
564
  }
566
- if self.config.discover_product_types["result_type"] == "json":
565
+ if self.config.discover_collections["result_type"] == "json":
567
566
  resp_as_json = response.json()
568
567
  # extract results from response json
569
- results_entry = self.config.discover_product_types["results_entry"]
568
+ results_entry = self.config.discover_collections["results_entry"]
570
569
  if not isinstance(results_entry, JSONPath):
571
570
  logger.warning(
572
- f"Could not parse {self.provider} discover_product_types.results_entry"
571
+ f"Could not parse {self.provider} discover_collections.results_entry"
573
572
  f" as JSONPath: {results_entry}"
574
573
  )
575
574
  return None
@@ -577,99 +576,124 @@ class QueryStringSearch(Search):
577
576
  if result and isinstance(result[0], list):
578
577
  result = result[0]
579
578
 
580
- def conf_update_from_product_type_result(
581
- product_type_result: dict[str, Any],
579
+ def conf_update_from_collection_result(
580
+ collection_result: dict[str, Any],
582
581
  ) -> None:
583
- """Update ``conf_update_dict`` using given product type json response"""
582
+ """Update ``conf_update_dict`` using given collection json response"""
584
583
  # providers_config extraction
585
584
  extracted_mapping = properties_from_json(
586
- product_type_result,
585
+ collection_result,
587
586
  dict(
588
- self.config.discover_product_types[
589
- "generic_product_type_parsable_properties"
587
+ self.config.discover_collections[
588
+ "generic_collection_parsable_properties"
590
589
  ],
591
590
  **{
592
- "generic_product_type_id": self.config.discover_product_types[
593
- "generic_product_type_id"
591
+ "generic_collection_id": self.config.discover_collections[
592
+ "generic_collection_id"
594
593
  ]
595
594
  },
596
595
  ),
597
596
  )
598
- generic_product_type_id = extracted_mapping.pop(
599
- "generic_product_type_id"
597
+ generic_collection_id = extracted_mapping.pop(
598
+ "generic_collection_id"
600
599
  )
601
600
  conf_update_dict["providers_config"][
602
- generic_product_type_id
601
+ generic_collection_id
603
602
  ] = dict(
604
603
  extracted_mapping,
605
- **self.config.discover_product_types.get(
606
- "generic_product_type_unparsable_properties", {}
604
+ **self.config.discover_collections.get(
605
+ "generic_collection_unparsable_properties", {}
607
606
  ),
608
607
  )
609
- # product_types_config extraction
610
- conf_update_dict["product_types_config"][
611
- generic_product_type_id
612
- ] = properties_from_json(
613
- product_type_result,
614
- self.config.discover_product_types[
615
- "generic_product_type_parsable_metadata"
608
+ # collections_config extraction
609
+ collection_properties = properties_from_json(
610
+ collection_result,
611
+ self.config.discover_collections[
612
+ "generic_collection_parsable_metadata"
616
613
  ],
617
614
  )
618
-
615
+ conf_update_dict["collections_config"][
616
+ generic_collection_id
617
+ ] = {
618
+ k: v
619
+ for k, v in collection_properties.items()
620
+ if v != NOT_AVAILABLE
621
+ }
619
622
  if (
620
- "single_product_type_parsable_metadata"
621
- in self.config.discover_product_types
623
+ "single_collection_parsable_metadata"
624
+ in self.config.discover_collections
622
625
  ):
623
- collection_data = self._get_product_type_metadata_from_single_collection_endpoint(
624
- generic_product_type_id
626
+ collection_data = self._get_collection_metadata_from_single_collection_endpoint(
627
+ generic_collection_id
625
628
  )
626
- conf_update_dict["product_types_config"][
627
- generic_product_type_id
628
- ].update(collection_data)
629
-
630
- # update product type id if needed
631
- if collection_data_id := collection_data.get("ID"):
632
- if generic_product_type_id != collection_data_id:
633
- logger.debug(
634
- "Rename %s product type to %s",
635
- generic_product_type_id,
636
- collection_data_id,
637
- )
638
- conf_update_dict["providers_config"][
639
- collection_data_id
640
- ] = conf_update_dict["providers_config"].pop(
641
- generic_product_type_id
642
- )
643
- conf_update_dict["product_types_config"][
644
- collection_data_id
645
- ] = conf_update_dict["product_types_config"].pop(
646
- generic_product_type_id
647
- )
648
- generic_product_type_id = collection_data_id
629
+ collection_data_id = collection_data.pop("id", None)
630
+
631
+ # remove collection if it must have be renamed but renaming failed
632
+ if (
633
+ collection_data_id
634
+ and collection_data_id == NOT_AVAILABLE
635
+ ):
636
+ del conf_update_dict["collections_config"][
637
+ generic_collection_id
638
+ ]
639
+ del conf_update_dict["providers_config"][
640
+ generic_collection_id
641
+ ]
642
+ return
643
+
644
+ conf_update_dict["collections_config"][
645
+ generic_collection_id
646
+ ] |= {
647
+ k: v
648
+ for k, v in collection_data.items()
649
+ if v != NOT_AVAILABLE
650
+ }
651
+
652
+ # update collection id if needed
653
+ if (
654
+ collection_data_id
655
+ and collection_data_id != generic_collection_id
656
+ ):
657
+ logger.debug(
658
+ "Rename %s collection to %s",
659
+ generic_collection_id,
660
+ collection_data_id,
661
+ )
662
+ conf_update_dict["providers_config"][
663
+ collection_data_id
664
+ ] = conf_update_dict["providers_config"].pop(
665
+ generic_collection_id
666
+ )
667
+ conf_update_dict["collections_config"][
668
+ collection_data_id
669
+ ] = conf_update_dict["collections_config"].pop(
670
+ generic_collection_id
671
+ )
672
+ generic_collection_id = collection_data_id
649
673
 
650
674
  # update keywords
651
675
  keywords_fields = [
652
- "instrument",
676
+ "instruments",
677
+ "constellation",
653
678
  "platform",
654
- "platformSerialIdentifier",
655
- "processingLevel",
679
+ "processing:level",
656
680
  "keywords",
657
681
  ]
658
682
  keywords_values_str = ",".join(
659
- [generic_product_type_id]
683
+ [generic_collection_id]
660
684
  + [
661
685
  str(
662
- conf_update_dict["product_types_config"][
663
- generic_product_type_id
686
+ conf_update_dict["collections_config"][
687
+ generic_collection_id
664
688
  ][kf]
665
689
  )
666
690
  for kf in keywords_fields
667
691
  if kf
668
- in conf_update_dict["product_types_config"][
669
- generic_product_type_id
692
+ in conf_update_dict["collections_config"][
693
+ generic_collection_id
670
694
  ]
671
- and conf_update_dict["product_types_config"][
672
- generic_product_type_id
695
+ and conf_update_dict["collections_config"][
696
+ generic_collection_id
673
697
  ][kf]
674
698
  != NOT_AVAILABLE
675
699
  ]
@@ -685,128 +709,131 @@ class QueryStringSearch(Search):
685
709
  r"[\[\]'\"]", "", keywords_values_str
686
710
  )
687
711
  # sorted list of unique lowercase keywords
688
- keywords_values_str = ",".join(
689
- sorted(set(keywords_values_str.split(",")))
690
- )
691
- conf_update_dict["product_types_config"][
692
- generic_product_type_id
693
- ]["keywords"] = keywords_values_str
712
+ keywords_values = sorted(set(keywords_values_str.split(",")))
713
+
714
+ conf_update_dict["collections_config"][generic_collection_id][
715
+ "keywords"
716
+ ] = keywords_values
694
717
 
695
718
  # runs concurrent requests and aggregate results in conf_update_dict
696
- max_connections = self.config.discover_product_types.get(
719
+ max_connections = self.config.discover_collections.get(
697
720
  "max_connections"
698
721
  )
699
722
  with concurrent.futures.ThreadPoolExecutor(
700
723
  max_workers=max_connections
701
724
  ) as executor:
702
725
  futures = (
703
- executor.submit(conf_update_from_product_type_result, r)
726
+ executor.submit(conf_update_from_collection_result, r)
704
727
  for r in result
705
728
  )
706
729
  [f.result() for f in concurrent.futures.as_completed(futures)]
707
730
 
708
731
  except KeyError as e:
709
732
  logger.warning(
710
- "Incomplete %s discover_product_types configuration: %s",
733
+ "Incomplete %s discover_collections configuration: %s",
711
734
  self.provider,
712
735
  e,
713
736
  )
714
737
  return None
715
738
  except requests.RequestException as e:
716
739
  logger.debug(
717
- "Could not parse discovered product types response from "
740
+ "Could not parse discovered collections response from "
718
741
  f"{self.provider}, {type(e).__name__}: {e.args}"
719
742
  )
720
743
  return None
721
- conf_update_dict["product_types_config"] = dict_items_recursive_apply(
722
- conf_update_dict["product_types_config"],
744
+ conf_update_dict["collections_config"] = dict_items_recursive_apply(
745
+ conf_update_dict["collections_config"],
723
746
  lambda k, v: v if v != NOT_AVAILABLE else None,
724
747
  )
725
748
  return conf_update_dict
726
749
 
727
- def _get_product_type_metadata_from_single_collection_endpoint(
728
- self, product_type: str
750
+ def _get_collection_metadata_from_single_collection_endpoint(
751
+ self, collection: str
729
752
  ) -> dict[str, Any]:
730
753
  """
731
- retrieves additional product type information from an endpoint returning data for a single collection
732
- :param product_type: product type
733
- :return: product types and their metadata
754
+ retrieves additional collection information from an endpoint returning data for a single collection
755
+ :param collection: collection
756
+ :return: collections and their metadata
734
757
  """
735
- single_collection_url = self.config.discover_product_types[
758
+ single_collection_url = self.config.discover_collections[
736
759
  "single_collection_fetch_url"
737
- ].format(productType=product_type)
760
+ ].format(_collection=collection)
738
761
  resp = QueryStringSearch._request(
739
762
  self,
740
763
  PreparedSearch(
741
764
  url=single_collection_url,
742
- info_message=f"Fetching data for product type: {product_type}",
743
- exception_message="Skipping error while fetching product types for "
765
+ info_message=f"Fetching data for collection: {collection}",
766
+ exception_message="Skipping error while fetching collections for "
744
767
  "{} {} instance:".format(self.provider, self.__class__.__name__),
745
768
  ),
746
769
  )
747
770
  product_data = resp.json()
748
771
  return properties_from_json(
749
772
  product_data,
750
- self.config.discover_product_types["single_product_type_parsable_metadata"],
773
+ self.config.discover_collections["single_collection_parsable_metadata"],
751
774
  )
752
775
 
753
776
  def query(
754
777
  self,
755
778
  prep: PreparedSearch = PreparedSearch(),
756
779
  **kwargs: Any,
757
- ) -> tuple[list[EOProduct], Optional[int]]:
780
+ ) -> SearchResult:
758
781
  """Perform a search on an OpenSearch-like interface
759
782
 
760
783
  :param prep: Object collecting needed information for search.
761
784
  """
762
785
  count = prep.count
763
- product_type = cast(str, kwargs.get("productType", prep.product_type))
764
- if product_type == GENERIC_PRODUCT_TYPE:
786
+ raise_errors = getattr(prep, "raise_errors", False)
787
+ collection = cast(str, kwargs.get("collection", prep.collection))
788
+ if collection == GENERIC_COLLECTION:
765
789
  logger.warning(
766
- "GENERIC_PRODUCT_TYPE is not a real product_type and should only be used internally as a template"
790
+ "GENERIC_COLLECTION is not a real collection and should only be used internally as a template"
767
791
  )
768
- return ([], 0) if prep.count else ([], None)
792
+ result = SearchResult([])
793
+ if prep.count and not result.number_matched:
794
+ result.number_matched = 0
795
+ return result
769
796
 
770
797
  sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
771
798
  prep.sort_by_qs, _ = (
772
799
  ("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
773
800
  )
774
801
 
775
- provider_product_type = self.map_product_type(product_type)
802
+ provider_collection = self.map_collection(collection)
776
803
  keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
777
- keywords["productType"] = (
778
- provider_product_type
779
- if (provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE)
780
- else product_type
804
+ keywords["collection"] = (
805
+ provider_collection
806
+ if (provider_collection and provider_collection != GENERIC_COLLECTION)
807
+ else collection
781
808
  )
782
809
 
783
- # provider product type specific conf
784
- prep.product_type_def_params = (
785
- self.get_product_type_def_params(product_type, format_variables=kwargs)
786
- if product_type is not None
810
+ # provider collection specific conf
811
+ prep.collection_def_params = (
812
+ self.get_collection_def_params(collection, format_variables=kwargs)
813
+ if collection is not None
787
814
  else {}
788
815
  )
789
816
 
790
- # if product_type_def_params is set, remove product_type as it may conflict with this conf
791
- if prep.product_type_def_params:
792
- keywords.pop("productType", None)
817
+ # if collection_def_params is set, remove collection as it may conflict with this conf
818
+ if prep.collection_def_params:
819
+ keywords.pop("collection", None)
793
820
 
794
821
  if self.config.metadata_mapping:
795
- product_type_metadata_mapping = dict(
822
+ collection_metadata_mapping = dict(
796
823
  self.config.metadata_mapping,
797
- **prep.product_type_def_params.get("metadata_mapping", {}),
824
+ **prep.collection_def_params.get("metadata_mapping", {}),
798
825
  )
799
826
  keywords.update(
800
827
  {
801
828
  k: v
802
- for k, v in prep.product_type_def_params.items()
829
+ for k, v in prep.collection_def_params.items()
803
830
  if k not in keywords.keys()
804
- and k in product_type_metadata_mapping.keys()
805
- and isinstance(product_type_metadata_mapping[k], list)
831
+ and k in collection_metadata_mapping.keys()
832
+ and isinstance(collection_metadata_mapping[k], list)
806
833
  }
807
834
  )
808
835
 
809
- qp, qs = self.build_query_string(product_type, keywords)
836
+ qp, qs = self.build_query_string(collection, keywords)
810
837
 
811
838
  prep.query_params = qp
812
839
  prep.query_string = qs
@@ -822,31 +849,27 @@ class QueryStringSearch(Search):
822
849
  provider_results = self.do_search(prep, **kwargs)
823
850
  if count and total_items is None and hasattr(prep, "total_items_nb"):
824
851
  total_items = prep.total_items_nb
825
-
826
- raw_search_result = RawSearchResult(provider_results)
827
- raw_search_result.query_params = prep.query_params
828
- raw_search_result.product_type_def_params = prep.product_type_def_params
829
-
830
- eo_products = self.normalize_results(raw_search_result, **kwargs)
831
- return eo_products, total_items
832
-
833
- @_deprecated(
834
- reason="Simply run `self.config.metadata_mapping.update(metadata_mapping)` instead",
835
- version="2.10.0",
836
- )
837
- def update_metadata_mapping(self, metadata_mapping: dict[str, Any]) -> None:
838
- """Update plugin metadata_mapping with input metadata_mapping configuration"""
839
- if self.config.metadata_mapping:
840
- self.config.metadata_mapping.update(metadata_mapping)
852
+ if not count and "number_matched" in kwargs:
853
+ total_items = kwargs["number_matched"]
854
+
855
+ eo_products = self.normalize_results(provider_results, **kwargs)
856
+ formated_result = SearchResult(
857
+ eo_products,
858
+ total_items,
859
+ search_params=provider_results.search_params,
860
+ next_page_token=getattr(provider_results, "next_page_token", None),
861
+ raise_errors=raise_errors,
862
+ )
863
+ return formated_result
841
864
 
842
865
  def build_query_string(
843
- self, product_type: str, query_dict: dict[str, Any]
866
+ self, collection: str, query_dict: dict[str, Any]
844
867
  ) -> tuple[dict[str, Any], str]:
845
868
  """Build The query string using the search parameters"""
846
869
  logger.debug("Building the query string that will be used for search")
847
- error_context = f"Product type: {product_type} / provider : {self.provider}"
870
+ error_context = f"Collection: {collection} / provider : {self.provider}"
848
871
  query_params = format_query_params(
849
- product_type, self.config, query_dict, error_context
872
+ collection, self.config, query_dict, error_context
850
873
  )
851
874
 
852
875
  # Build the final query string, in one go without quoting it
@@ -865,9 +888,12 @@ class QueryStringSearch(Search):
865
888
  **kwargs: Any,
866
889
  ) -> tuple[list[str], Optional[int]]:
867
890
  """Build paginated urls"""
868
- page = prep.page
891
+ token = getattr(prep, "next_page_token", None)
869
892
  items_per_page = prep.items_per_page
870
893
  count = prep.count
894
+ next_page_token_key = str(
895
+ self.config.pagination.get("next_page_token_key", "page")
896
+ )
871
897
 
872
898
  urls = []
873
899
  total_results = 0 if count else None
@@ -885,19 +911,32 @@ class QueryStringSearch(Search):
885
911
  prep.need_count = True
886
912
  prep.total_items_nb = None
887
913
 
888
- for collection in self.get_collections(prep, **kwargs) or (None,):
914
+ for provider_collection in self.get_provider_collections(prep, **kwargs) or (
915
+ None,
916
+ ):
889
917
  # skip empty collection if one is required in api_endpoint
890
- if "{collection}" in self.config.api_endpoint and not collection:
918
+ if "{_collection}" in self.config.api_endpoint and not provider_collection:
891
919
  continue
892
920
  search_endpoint = self.config.api_endpoint.rstrip("/").format(
893
- collection=collection
921
+ _collection=provider_collection
894
922
  )
895
- if page is not None and items_per_page is not None:
896
- page = page - 1 + self.config.pagination.get("start_page", 1)
923
+ # numeric page token
924
+ if (
925
+ next_page_token_key == "page" or next_page_token_key == "skip"
926
+ ) and items_per_page is not None:
927
+ if token is None and next_page_token_key == "skip":
928
+ # first page & next_page_token_key == skip
929
+ token = 0
930
+ elif token is None:
931
+ # first page & next_page_token_key == page
932
+ token = self.config.pagination.get("start_page", DEFAULT_PAGE)
933
+ else:
934
+ # next pages
935
+ token = int(token)
897
936
  if count:
898
937
  count_endpoint = self.config.pagination.get(
899
938
  "count_endpoint", ""
900
- ).format(collection=collection)
939
+ ).format(_collection=provider_collection)
901
940
  if count_endpoint:
902
941
  count_url = "{}?{}".format(count_endpoint, prep.query_string)
903
942
  _total_results = (
@@ -917,22 +956,23 @@ class QueryStringSearch(Search):
917
956
  raise MisconfiguredError(
918
957
  f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
919
958
  )
920
- next_url = self.config.pagination["next_page_url_tpl"].format(
959
+ next_page_url = self.config.pagination["next_page_url_tpl"].format(
921
960
  url=search_endpoint,
922
961
  search=qs_with_sort,
923
962
  items_per_page=items_per_page,
924
- page=page,
925
- skip=(page - 1) * items_per_page,
926
- skip_base_1=(page - 1) * items_per_page + 1,
963
+ next_page_token=token,
964
+ skip=token,
927
965
  )
928
- else:
929
- next_url = "{}?{}".format(search_endpoint, qs_with_sort)
930
- urls.append(next_url)
966
+
967
+ if token is not None:
968
+ prep.next_page_token = token
969
+ urls.append(next_page_url)
970
+
931
971
  return list(dict.fromkeys(urls)), total_results
932
972
 
933
973
  def do_search(
934
974
  self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
935
- ) -> list[Any]:
975
+ ) -> RawSearchResult:
936
976
  """Perform the actual search request.
937
977
 
938
978
  If there is a specified number of items per page, return the results as soon
@@ -973,6 +1013,7 @@ class QueryStringSearch(Search):
973
1013
  if self.config.result_type == "xml":
974
1014
  root_node = etree.fromstring(response.content)
975
1015
  namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
1016
+ resp_as_json = {}
976
1017
  results_xpath = root_node.xpath(
977
1018
  self.config.results_entry or "//ns:entry", namespaces=namespaces
978
1019
  )
@@ -1017,7 +1058,6 @@ class QueryStringSearch(Search):
1017
1058
  path_parsed = next_page_url_key_path
1018
1059
  found_paths = path_parsed.find(resp_as_json)
1019
1060
  if found_paths and not isinstance(found_paths, int):
1020
- self.next_page_url = found_paths[0].value
1021
1061
  logger.debug(
1022
1062
  "Next page URL collected and set for the next search",
1023
1063
  )
@@ -1027,7 +1067,6 @@ class QueryStringSearch(Search):
1027
1067
  path_parsed = next_page_query_obj_key_path
1028
1068
  found_paths = path_parsed.find(resp_as_json)
1029
1069
  if found_paths and not isinstance(found_paths, int):
1030
- self.next_page_query_obj = found_paths[0].value
1031
1070
  logger.debug(
1032
1071
  "Next page Query-object collected and set for the next search",
1033
1072
  )
@@ -1095,8 +1134,135 @@ class QueryStringSearch(Search):
1095
1134
  ):
1096
1135
  del prep.total_items_nb
1097
1136
  if items_per_page is not None and len(results) == items_per_page:
1098
- return results
1099
- return results
1137
+
1138
+ raw_search_results = self._build_raw_search_results(
1139
+ results, resp_as_json, kwargs, items_per_page, prep
1140
+ )
1141
+ return raw_search_results
1142
+
1143
+ raw_search_results = self._build_raw_search_results(
1144
+ results, resp_as_json, kwargs, items_per_page, prep
1145
+ )
1146
+ return raw_search_results
1147
+
1148
+ def _build_raw_search_results(
1149
+ self,
1150
+ results: list[dict[str, Any]],
1151
+ resp_as_json: dict[str, Any],
1152
+ search_kwargs: dict[str, Any],
1153
+ items_per_page: Optional[int],
1154
+ prep: PreparedSearch,
1155
+ ):
1156
+ """
1157
+ Build a `RawSearchResult` object from raw search results.
1158
+
1159
+ This method initializes a `RawSearchResult` instance with the provided results,
1160
+ sets the search parameters, and determines the token or identifier for the next page
1161
+ based on the pagination configuration.
1162
+
1163
+ :param results: Raw results returned by the search.
1164
+ :param resp_as_json: The search response parsed as JSON.
1165
+ :param search_kwargs: Search parameters used for the query.
1166
+ :param items_per_page: Number of items per page.
1167
+ :param prep: Request preparation object containing query parameters.
1168
+ :returns: An object containing the raw results, search parameters, and the next page token if available.
1169
+ """
1170
+ # Create the RawSearchResult object and populate basic fields
1171
+ raw_search_results = RawSearchResult(results)
1172
+ raw_search_results.search_params = search_kwargs | {
1173
+ "items_per_page": items_per_page
1174
+ }
1175
+ raw_search_results.query_params = prep.query_params
1176
+ raw_search_results.collection_def_params = prep.collection_def_params
1177
+ raw_search_results.next_page_token_key = prep.next_page_token_key
1178
+
1179
+ # If no JSON response is available, return the result as is
1180
+ if resp_as_json is None:
1181
+ return raw_search_results
1182
+
1183
+ # Handle pagination
1184
+ if self.config.pagination.get("next_page_query_obj_key_path") is not None:
1185
+ # Use next_page_query_obj_key_path to find the next page token in the response
1186
+ jsonpath_expr = string_to_jsonpath(
1187
+ self.config.pagination["next_page_query_obj_key_path"]
1188
+ )
1189
+ if isinstance(jsonpath_expr, str):
1190
+ raise PluginImplementationError(
1191
+ "next_page_query_obj_key_path must be parsed to JSONPath on plugin init"
1192
+ )
1193
+ jsonpath_match = jsonpath_expr.find(resp_as_json)
1194
+ if jsonpath_match:
1195
+ next_page_query_obj = jsonpath_match[0].value
1196
+ next_page_token_key = raw_search_results.next_page_token_key
1197
+ if next_page_token_key and next_page_token_key in next_page_query_obj:
1198
+ raw_search_results.next_page_token = next_page_query_obj[
1199
+ next_page_token_key
1200
+ ]
1201
+ else:
1202
+ for token_key in KNOWN_NEXT_PAGE_TOKEN_KEYS:
1203
+ if token_key in next_page_query_obj:
1204
+ raw_search_results.next_page_token = next_page_query_obj[
1205
+ token_key
1206
+ ]
1207
+ raw_search_results.next_page_token_key = token_key
1208
+ logger.debug(
1209
+ "Using '%s' as next_page_token_key for the next search",
1210
+ token_key,
1211
+ )
1212
+ break
1213
+ else:
1214
+ raw_search_results.next_page_token = None
1215
+ elif self.config.pagination.get("next_page_url_key_path") is not None:
1216
+ jsonpath_expr = string_to_jsonpath(
1217
+ self.config.pagination["next_page_url_key_path"]
1218
+ )
1219
+ # Use next_page_url_key_path to find the next page token in the response
1220
+ if isinstance(jsonpath_expr, str):
1221
+ raise PluginImplementationError(
1222
+ "next_page_url_key_path must be parsed to JSONPath on plugin init"
1223
+ )
1224
+ href = jsonpath_expr.find(resp_as_json)
1225
+ if href:
1226
+ # Determine the key to extract the token from the URL or object
1227
+ href_value = href[0].value
1228
+ next_page_token_key = (
1229
+ unquote(self.config.pagination["parse_url_key"])
1230
+ if "parse_url_key" in self.config.pagination
1231
+ else raw_search_results.next_page_token_key
1232
+ )
1233
+ raw_search_results.next_page_token_key = next_page_token_key
1234
+ # Try to extract the token from the found value
1235
+ if next_page_token_key in href_value:
1236
+ raw_search_results.next_page_token = href_value[next_page_token_key]
1237
+ elif next_page_token_key in unquote(href_value):
1238
+ # If the token is in the URL query string
1239
+ query = urlparse(href_value).query
1240
+ page_param = parse_qs(query).get(next_page_token_key)
1241
+ if page_param:
1242
+ raw_search_results.next_page_token = page_param[0]
1243
+ else:
1244
+ # Use the whole value as the token
1245
+ raw_search_results.next_page_token = href_value
1246
+ else:
1247
+ # No token found: set to empty string
1248
+ raw_search_results.next_page_token = None
1249
+ else:
1250
+ # pagination using next_page_token_key
1251
+ next_page_token_key = raw_search_results.next_page_token_key
1252
+ next_page_token = prep.next_page_token
1253
+ # page number as next_page_token_key
1254
+ if next_page_token is not None and next_page_token_key == "page":
1255
+ raw_search_results.next_page_token = str(int(next_page_token) + 1)
1256
+ # skip as next_page_token_key
1257
+ elif next_page_token is not None and next_page_token_key == "skip":
1258
+ raw_search_results.next_page_token = str(
1259
+ int(next_page_token)
1260
+ + int(prep.items_per_page or DEFAULT_ITEMS_PER_PAGE)
1261
+ )
1262
+ else:
1263
+ raw_search_results.next_page_token = None
1264
+
1265
+ return raw_search_results
1100
1266
 
1101
1267
  def normalize_results(
1102
1268
  self, results: RawSearchResult, **kwargs: Any
@@ -1110,19 +1276,16 @@ class QueryStringSearch(Search):
1110
1276
  products: list[EOProduct] = []
1111
1277
  asset_key_from_href = getattr(self.config, "asset_key_from_href", True)
1112
1278
  for result in results:
1113
- product = EOProduct(
1114
- self.provider,
1115
- QueryStringSearch.extract_properties[self.config.result_type](
1116
- result,
1117
- self.get_metadata_mapping(kwargs.get("productType")),
1118
- discovery_config=getattr(self.config, "discover_metadata", {}),
1119
- ),
1120
- **kwargs,
1121
- )
1122
- # use product_type_config as default properties
1123
- product.properties = dict(
1124
- getattr(self.config, "product_type_config", {}), **product.properties
1279
+ properties = QueryStringSearch.extract_properties[self.config.result_type](
1280
+ result,
1281
+ self.get_metadata_mapping(kwargs.get("collection")),
1282
+ discovery_config=getattr(self.config, "discover_metadata", {}),
1125
1283
  )
1284
+ # collection alias (required by opentelemetry-instrumentation-eodag)
1285
+ if alias := getattr(self.config, "collection_config", {}).get("alias"):
1286
+ properties["eodag:alias"] = alias
1287
+ product = EOProduct(self.provider, properties, **kwargs)
1288
+
1126
1289
  additional_assets = self.get_assets_from_mapping(result)
1127
1290
  product.assets.update(additional_assets)
1128
1291
  # move assets from properties to product's attr, normalize keys & roles
@@ -1178,49 +1341,47 @@ class QueryStringSearch(Search):
1178
1341
  total_results = int(count_results)
1179
1342
  return total_results
1180
1343
 
1181
- def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> tuple[str, ...]:
1182
- """Get the collection to which the product belongs"""
1183
- # See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
1184
- # /asset_publisher/Ac0d/content/change-of
1185
- # -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
1186
- product_type: Optional[str] = kwargs.get("productType")
1187
- collection: Optional[str] = None
1188
- if product_type is None and (
1189
- not hasattr(prep, "product_type_def_params")
1190
- or not prep.product_type_def_params
1344
+ def get_provider_collections(
1345
+ self, prep: PreparedSearch, **kwargs: Any
1346
+ ) -> tuple[str, ...]:
1347
+ """Get the _collection(s) / provider collection(s) to which the product belongs"""
1348
+ collection: Optional[str] = kwargs.get("collection")
1349
+ provider_collection: Optional[str] = None
1350
+ if collection is None and (
1351
+ not hasattr(prep, "collection_def_params") or not prep.collection_def_params
1191
1352
  ):
1192
1353
  collections: set[str] = set()
1193
- collection = getattr(self.config, "collection", None)
1194
- if collection is None:
1354
+ provider_collection = getattr(self.config, "_collection", None)
1355
+ if provider_collection is None:
1195
1356
  try:
1196
- for product_type, product_config in self.config.products.items():
1197
- if product_type != GENERIC_PRODUCT_TYPE:
1198
- collections.add(product_config["collection"])
1357
+ for collection, product_config in self.config.products.items():
1358
+ if collection != GENERIC_COLLECTION:
1359
+ collections.add(product_config["_collection"])
1199
1360
  else:
1200
1361
  collections.add(
1201
1362
  format_dict_items(product_config, **kwargs).get(
1202
- "collection", ""
1363
+ "_collection", ""
1203
1364
  )
1204
1365
  )
1205
1366
  except KeyError:
1206
1367
  collections.add("")
1207
1368
  else:
1208
- collections.add(collection)
1369
+ collections.add(provider_collection)
1209
1370
  return tuple(collections)
1210
1371
 
1211
- collection = getattr(self.config, "collection", None)
1212
- if collection is None:
1213
- collection = (
1214
- getattr(prep, "product_type_def_params", {}).get("collection")
1215
- or product_type
1372
+ provider_collection = getattr(self.config, "_collection", None)
1373
+ if provider_collection is None:
1374
+ provider_collection = (
1375
+ getattr(prep, "collection_def_params", {}).get("_collection")
1376
+ or collection
1216
1377
  )
1217
1378
 
1218
- if collection is None:
1379
+ if provider_collection is None:
1219
1380
  return ()
1220
- elif not isinstance(collection, list):
1221
- return (collection,)
1381
+ elif not isinstance(provider_collection, list):
1382
+ return (provider_collection,)
1222
1383
  else:
1223
- return tuple(collection)
1384
+ return tuple(provider_collection)
1224
1385
 
1225
1386
  def _request(
1226
1387
  self,
@@ -1350,9 +1511,9 @@ class ODataV4Search(QueryStringSearch):
1350
1511
  operations: # The operations to build
1351
1512
  <opname>: # e.g: AND
1352
1513
  - <op1> # e.g:
1353
- # 'sensingStartDate:[{startTimeFromAscendingNode}Z TO *]'
1514
+ # 'sensingStartDate:[{start_datetime}Z TO *]'
1354
1515
  - <op2> # e.g:
1355
- # 'sensingStopDate:[* TO {completionTimeFromAscendingNode}Z]'
1516
+ # 'sensingStopDate:[* TO {end_datetime}Z]'
1356
1517
  ...
1357
1518
  ...
1358
1519
  ...
@@ -1379,7 +1540,7 @@ class ODataV4Search(QueryStringSearch):
1379
1540
 
1380
1541
  def do_search(
1381
1542
  self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
1382
- ) -> list[Any]:
1543
+ ) -> RawSearchResult:
1383
1544
  """A two step search can be performed if the metadata are not given into the search result"""
1384
1545
 
1385
1546
  if getattr(self.config, "per_product_metadata_query", False):
@@ -1410,7 +1571,17 @@ class ODataV4Search(QueryStringSearch):
1410
1571
  {item["id"]: item["value"] for item in response.json()["value"]}
1411
1572
  )
1412
1573
  final_result.append(entity)
1413
- return final_result
1574
+ raw_search_results = RawSearchResult(final_result)
1575
+ raw_search_results.search_params = kwargs
1576
+ raw_search_results.query_params = (
1577
+ prep.query_params if hasattr(prep, "query_params") else {}
1578
+ )
1579
+ raw_search_results.collection_def_params = (
1580
+ prep.collection_def_params
1581
+ if hasattr(prep, "collection_def_params")
1582
+ else {}
1583
+ )
1584
+ return raw_search_results
1414
1585
  else:
1415
1586
  return super(ODataV4Search, self).do_search(prep, **kwargs)
1416
1587
 
@@ -1482,62 +1653,64 @@ class PostJsonSearch(QueryStringSearch):
1482
1653
  self,
1483
1654
  prep: PreparedSearch = PreparedSearch(),
1484
1655
  **kwargs: Any,
1485
- ) -> tuple[list[EOProduct], Optional[int]]:
1656
+ ) -> SearchResult:
1486
1657
  """Perform a search on an OpenSearch-like interface"""
1487
- product_type = kwargs.get("productType", "")
1658
+ collection = kwargs.get("collection", "")
1488
1659
  count = prep.count
1489
- # remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
1490
- kwargs.pop("product_type", None)
1660
+ raise_errors = getattr(prep, "raise_errors", False)
1661
+ number_matched = kwargs.pop("number_matched", None)
1491
1662
  sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
1492
1663
  _, sort_by_qp = (
1493
1664
  ("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
1494
1665
  )
1495
- provider_product_type = self.map_product_type(product_type)
1666
+ provider_collection = self.map_collection(collection)
1496
1667
  _dc_qs = kwargs.pop("_dc_qs", None)
1497
1668
  if _dc_qs is not None:
1498
1669
  qs = unquote_plus(unquote_plus(_dc_qs))
1499
1670
  qp = geojson.loads(qs)
1500
1671
 
1501
- # provider product type specific conf
1502
- prep.product_type_def_params = self.get_product_type_def_params(
1503
- product_type, format_variables=kwargs
1672
+ # provider collection specific conf
1673
+ prep.collection_def_params = self.get_collection_def_params(
1674
+ collection, format_variables=kwargs
1504
1675
  )
1505
1676
  else:
1506
1677
  keywords = {
1507
- k: v for k, v in kwargs.items() if k != "auth" and v is not None
1678
+ k: v
1679
+ for k, v in kwargs.items()
1680
+ if k not in ("auth", "collection") and v is not None
1508
1681
  }
1509
1682
 
1510
- if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
1511
- keywords["productType"] = provider_product_type
1512
- elif product_type:
1513
- keywords["productType"] = product_type
1683
+ if provider_collection and provider_collection != GENERIC_COLLECTION:
1684
+ keywords["_collection"] = provider_collection
1685
+ elif collection:
1686
+ keywords["_collection"] = collection
1514
1687
 
1515
- # provider product type specific conf
1516
- prep.product_type_def_params = self.get_product_type_def_params(
1517
- product_type, format_variables=kwargs
1688
+ # provider collection specific conf
1689
+ prep.collection_def_params = self.get_collection_def_params(
1690
+ collection, format_variables=kwargs
1518
1691
  )
1519
1692
 
1520
- # Add to the query, the queryable parameters set in the provider product type definition
1521
- product_type_metadata_mapping = {
1693
+ # Add to the query, the queryable parameters set in the provider collection definition
1694
+ collection_metadata_mapping = {
1522
1695
  **getattr(self.config, "metadata_mapping", {}),
1523
- **prep.product_type_def_params.get("metadata_mapping", {}),
1696
+ **prep.collection_def_params.get("metadata_mapping", {}),
1524
1697
  }
1525
1698
  keywords.update(
1526
1699
  {
1527
1700
  k: v
1528
- for k, v in prep.product_type_def_params.items()
1701
+ for k, v in prep.collection_def_params.items()
1529
1702
  if k not in keywords.keys()
1530
- and k in product_type_metadata_mapping.keys()
1531
- and isinstance(product_type_metadata_mapping[k], list)
1703
+ and k in collection_metadata_mapping.keys()
1704
+ and isinstance(collection_metadata_mapping[k], list)
1532
1705
  }
1533
1706
  )
1534
1707
 
1535
- qp, _ = self.build_query_string(product_type, keywords)
1708
+ qp, _ = self.build_query_string(collection, keywords)
1536
1709
 
1537
1710
  for query_param, query_value in qp.items():
1538
1711
  if (
1539
1712
  query_param
1540
- in self.config.products.get(product_type, {}).get(
1713
+ in self.config.products.get(collection, {}).get(
1541
1714
  "specific_qssearch", {"parameters": []}
1542
1715
  )["parameters"]
1543
1716
  ):
@@ -1545,20 +1718,20 @@ class PostJsonSearch(QueryStringSearch):
1545
1718
  plugin_config_backup = yaml.dump(self.config)
1546
1719
 
1547
1720
  self.config.api_endpoint = query_value
1548
- self.config.products[product_type][
1721
+ self.config.products[collection][
1549
1722
  "metadata_mapping"
1550
1723
  ] = mtd_cfg_as_conversion_and_querypath(
1551
- self.config.products[product_type]["specific_qssearch"][
1724
+ self.config.products[collection]["specific_qssearch"][
1552
1725
  "metadata_mapping"
1553
1726
  ]
1554
1727
  )
1555
- self.config.results_entry = self.config.products[product_type][
1728
+ self.config.results_entry = self.config.products[collection][
1556
1729
  "specific_qssearch"
1557
1730
  ]["results_entry"]
1558
- self.config.collection = self.config.products[product_type][
1731
+ self.config._collection = self.config.products[collection][
1559
1732
  "specific_qssearch"
1560
- ].get("collection")
1561
- self.config.merge_responses = self.config.products[product_type][
1733
+ ].get("_collection")
1734
+ self.config.merge_responses = self.config.products[collection][
1562
1735
  "specific_qssearch"
1563
1736
  ].get("merge_responses")
1564
1737
 
@@ -1569,9 +1742,7 @@ class PostJsonSearch(QueryStringSearch):
1569
1742
  return super(PostJsonSearch, self)._request(*x, **y)
1570
1743
 
1571
1744
  try:
1572
- eo_products, total_items = super(PostJsonSearch, self).query(
1573
- prep, **kwargs
1574
- )
1745
+ eo_products = super(PostJsonSearch, self).query(prep, **kwargs)
1575
1746
  except Exception:
1576
1747
  raise
1577
1748
  finally:
@@ -1580,21 +1751,24 @@ class PostJsonSearch(QueryStringSearch):
1580
1751
  plugin_config_backup, self.config.yaml_loader
1581
1752
  )
1582
1753
 
1583
- return eo_products, total_items
1754
+ return eo_products
1584
1755
 
1585
1756
  # If we were not able to build query params but have queryable search criteria,
1586
1757
  # this means the provider does not support the search criteria given. If so,
1587
1758
  # stop searching right away
1588
- product_type_metadata_mapping = dict(
1759
+ collection_metadata_mapping = dict(
1589
1760
  self.config.metadata_mapping,
1590
- **prep.product_type_def_params.get("metadata_mapping", {}),
1761
+ **prep.collection_def_params.get("metadata_mapping", {}),
1591
1762
  )
1592
1763
  if not qp and any(
1593
1764
  k
1594
1765
  for k in keywords.keys()
1595
- if isinstance(product_type_metadata_mapping.get(k, []), list)
1766
+ if isinstance(collection_metadata_mapping.get(k), list)
1596
1767
  ):
1597
- return ([], 0) if prep.count else ([], None)
1768
+ result = SearchResult([])
1769
+ if prep.count:
1770
+ result.number_matched = 0
1771
+ return result
1598
1772
  prep.query_params = dict(qp, **sort_by_qp)
1599
1773
  prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
1600
1774
  if not count and getattr(prep, "need_count", False):
@@ -1605,13 +1779,19 @@ class PostJsonSearch(QueryStringSearch):
1605
1779
  provider_results = self.do_search(prep, **kwargs)
1606
1780
  if count and total_items is None and hasattr(prep, "total_items_nb"):
1607
1781
  total_items = prep.total_items_nb
1608
-
1609
- raw_search_result = RawSearchResult(provider_results)
1610
- raw_search_result.query_params = prep.query_params
1611
- raw_search_result.product_type_def_params = prep.product_type_def_params
1612
-
1613
- eo_products = self.normalize_results(raw_search_result, **kwargs)
1614
- return eo_products, total_items
1782
+ if not count and "number_matched" in kwargs and number_matched:
1783
+ total_items = number_matched
1784
+
1785
+ eo_products_normalize = self.normalize_results(provider_results, **kwargs)
1786
+ formated_result = SearchResult(
1787
+ eo_products_normalize,
1788
+ total_items,
1789
+ search_params=provider_results.search_params,
1790
+ next_page_token=getattr(provider_results, "next_page_token", None),
1791
+ next_page_token_key=getattr(provider_results, "next_page_token_key", None),
1792
+ raise_errors=raise_errors,
1793
+ )
1794
+ return formated_result
1615
1795
 
1616
1796
  def normalize_results(
1617
1797
  self, results: RawSearchResult, **kwargs: Any
@@ -1619,31 +1799,31 @@ class PostJsonSearch(QueryStringSearch):
1619
1799
  """Build EOProducts from provider results"""
1620
1800
  normalized = super().normalize_results(results, **kwargs)
1621
1801
  for product in normalized:
1622
- if "downloadLink" in product.properties:
1623
- decoded_link = unquote(product.properties["downloadLink"])
1802
+ if "eodag:download_link" in product.properties:
1803
+ decoded_link = unquote(product.properties["eodag:download_link"])
1624
1804
  if decoded_link[0] == "{": # not a url but a dict
1625
1805
  default_values = deepcopy(
1626
- self.config.products.get(product.product_type, {})
1806
+ self.config.products.get(product.collection, {})
1627
1807
  )
1628
1808
  default_values.pop("metadata_mapping", None)
1629
1809
  searched_values = orjson.loads(decoded_link)
1630
1810
  _dc_qs = orjson.dumps(
1631
1811
  format_query_params(
1632
- product.product_type,
1812
+ product.collection,
1633
1813
  self.config,
1634
1814
  {**default_values, **searched_values},
1635
1815
  )
1636
1816
  )
1637
1817
  product.properties["_dc_qs"] = quote_plus(_dc_qs)
1638
1818
 
1639
- # workaround to add product type to wekeo cmems order links
1819
+ # workaround to add collection to wekeo cmems order links
1640
1820
  if (
1641
- "orderLink" in product.properties
1642
- and "productType" in product.properties["orderLink"]
1821
+ "eodag:order_link" in product.properties
1822
+ and "collection" in product.properties["eodag:order_link"]
1643
1823
  ):
1644
- product.properties["orderLink"] = product.properties[
1645
- "orderLink"
1646
- ].replace("productType", product.product_type)
1824
+ product.properties["eodag:order_link"] = product.properties[
1825
+ "eodag:order_link"
1826
+ ].replace("collection", product.collection)
1647
1827
  return normalized
1648
1828
 
1649
1829
  def collect_search_urls(
@@ -1652,11 +1832,14 @@ class PostJsonSearch(QueryStringSearch):
1652
1832
  **kwargs: Any,
1653
1833
  ) -> tuple[list[str], Optional[int]]:
1654
1834
  """Adds pagination to query parameters, and auth to url"""
1655
- page = prep.page
1835
+ token = getattr(prep, "next_page_token", None)
1656
1836
  items_per_page = prep.items_per_page
1657
1837
  count = prep.count
1658
1838
  urls: list[str] = []
1659
1839
  total_results = 0 if count else None
1840
+ next_page_token_key = prep.next_page_token_key or self.config.pagination.get(
1841
+ "next_page_token_key"
1842
+ )
1660
1843
 
1661
1844
  if "count_endpoint" not in self.config.pagination:
1662
1845
  # if count_endpoint is not set, total_results should be extracted from search result
@@ -1668,22 +1851,35 @@ class PostJsonSearch(QueryStringSearch):
1668
1851
  auth_conf_dict = getattr(prep.auth_plugin.config, "credentials", {})
1669
1852
  else:
1670
1853
  auth_conf_dict = {}
1671
- for collection in self.get_collections(prep, **kwargs) or (None,):
1854
+ for _collection in self.get_provider_collections(prep, **kwargs) or (None,):
1672
1855
  try:
1673
1856
  search_endpoint: str = self.config.api_endpoint.rstrip("/").format(
1674
- **dict(collection=collection, **auth_conf_dict)
1857
+ **dict(_collection=_collection, **auth_conf_dict)
1675
1858
  )
1676
1859
  except KeyError as e:
1677
1860
  provider = prep.auth_plugin.provider if prep.auth_plugin else ""
1678
1861
  raise MisconfiguredError(
1679
1862
  "Missing %s in %s configuration" % (",".join(e.args), provider)
1680
1863
  )
1681
- if page is not None and items_per_page is not None:
1682
- page = page - 1 + self.config.pagination.get("start_page", 1)
1864
+ # numeric page token
1865
+ if (
1866
+ next_page_token_key == "page" or next_page_token_key == "skip"
1867
+ ) and items_per_page is not None:
1868
+ if token is None and next_page_token_key == "skip":
1869
+ # first page & next_page_token_key == skip
1870
+ token = max(
1871
+ 0, self.config.pagination.get("start_page", DEFAULT_PAGE) - 1
1872
+ )
1873
+ elif token is None:
1874
+ # first page & next_page_token_key == page
1875
+ token = self.config.pagination.get("start_page", DEFAULT_PAGE)
1876
+ else:
1877
+ # next pages
1878
+ token = int(token)
1683
1879
  if count:
1684
1880
  count_endpoint = self.config.pagination.get(
1685
1881
  "count_endpoint", ""
1686
- ).format(**dict(collection=collection, **auth_conf_dict))
1882
+ ).format(**dict(_collection=_collection, **auth_conf_dict))
1687
1883
  if count_endpoint:
1688
1884
  _total_results = self.count_hits(
1689
1885
  count_endpoint, result_type=self.config.result_type
@@ -1696,21 +1892,52 @@ class PostJsonSearch(QueryStringSearch):
1696
1892
  if total_results is None
1697
1893
  else total_results + (_total_results or 0)
1698
1894
  )
1699
- if "next_page_query_obj" in self.config.pagination and isinstance(
1700
- self.config.pagination["next_page_query_obj"], str
1895
+ # parse next page url if needed
1896
+ if "next_page_url_tpl" in self.config.pagination:
1897
+ search_endpoint = self.config.pagination["next_page_url_tpl"].format(
1898
+ url=search_endpoint,
1899
+ items_per_page=items_per_page,
1900
+ next_page_token=token,
1901
+ )
1902
+
1903
+ # parse next page body / query-obj if needed
1904
+ if "next_page_query_obj" in self.config.pagination and isinstance(
1905
+ self.config.pagination["next_page_query_obj"], str
1906
+ ):
1907
+ if next_page_token_key is None or token is None:
1908
+ next_page_token_kwargs = {
1909
+ "next_page_token": -1,
1910
+ "next_page_token_key": NOT_AVAILABLE,
1911
+ }
1912
+ else:
1913
+ next_page_token_kwargs = {
1914
+ "next_page_token": token,
1915
+ "next_page_token_key": next_page_token_key,
1916
+ }
1917
+ next_page_token_kwargs["next_page_token_key"] = (
1918
+ next_page_token_key or NOT_AVAILABLE
1919
+ )
1920
+ next_page_token_kwargs["next_page_token"] = (
1921
+ token if token is not None else -1
1922
+ )
1923
+
1924
+ # next_page_query_obj needs to be parsed
1925
+ next_page_query_obj_str = self.config.pagination[
1926
+ "next_page_query_obj"
1927
+ ].format(items_per_page=items_per_page, **next_page_token_kwargs)
1928
+ next_page_query_obj = orjson.loads(next_page_query_obj_str)
1929
+ # remove NOT_AVAILABLE entries
1930
+ next_page_query_obj.pop(NOT_AVAILABLE, None)
1931
+ if (
1932
+ next_page_token_key
1933
+ and next_page_query_obj.get(next_page_token_key) == "-1"
1701
1934
  ):
1702
- # next_page_query_obj needs to be parsed
1703
- next_page_query_obj = self.config.pagination[
1704
- "next_page_query_obj"
1705
- ].format(
1706
- items_per_page=items_per_page,
1707
- page=page,
1708
- skip=(page - 1) * items_per_page,
1709
- skip_base_1=(page - 1) * items_per_page + 1,
1710
- )
1711
- update_nested_dict(
1712
- prep.query_params, orjson.loads(next_page_query_obj)
1713
- )
1935
+ next_page_query_obj.pop(next_page_token_key, None)
1936
+ # update prep query_params with pagination info
1937
+ update_nested_dict(prep.query_params, next_page_query_obj)
1938
+
1939
+ if token is not None:
1940
+ prep.next_page_token = token
1714
1941
 
1715
1942
  urls.append(search_endpoint)
1716
1943
  return list(dict.fromkeys(urls)), total_results
@@ -1801,7 +2028,7 @@ class StacSearch(PostJsonSearch):
1801
2028
  have to be overwritten. If certain functionalities are not available, their configuration
1802
2029
  parameters have to be overwritten with ``null``. E.g. if there is no queryables endpoint,
1803
2030
  the :attr:`~eodag.config.PluginConfig.DiscoverQueryables.fetch_url` and
1804
- :attr:`~eodag.config.PluginConfig.DiscoverQueryables.product_type_fetch_url` in the
2031
+ :attr:`~eodag.config.PluginConfig.DiscoverQueryables.collection_fetch_url` in the
1805
2032
  :attr:`~eodag.config.PluginConfig.discover_queryables` config have to be set to ``null``.
1806
2033
  """
1807
2034
 
@@ -1815,22 +2042,19 @@ class StacSearch(PostJsonSearch):
1815
2042
  self.config.results_entry = results_entry
1816
2043
 
1817
2044
  def build_query_string(
1818
- self, product_type: str, query_dict: dict[str, Any]
2045
+ self, collection: str, query_dict: dict[str, Any]
1819
2046
  ) -> tuple[dict[str, Any], str]:
1820
2047
  """Build The query string using the search parameters"""
1821
2048
  logger.debug("Building the query string that will be used for search")
1822
2049
 
1823
2050
  # handle opened time intervals
1824
- if any(
1825
- q in query_dict
1826
- for q in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
1827
- ):
1828
- query_dict.setdefault("startTimeFromAscendingNode", "..")
1829
- query_dict.setdefault("completionTimeFromAscendingNode", "..")
2051
+ if any(q in query_dict for q in ("start_datetime", "end_datetime")):
2052
+ query_dict.setdefault("start_datetime", "..")
2053
+ query_dict.setdefault("end_datetime", "..")
1830
2054
 
1831
- error_context = f"Product type: {product_type} / provider : {self.provider}"
2055
+ error_context = f"Collection: {collection} / provider : {self.provider}"
1832
2056
  query_params = format_query_params(
1833
- product_type, self.config, query_dict, error_context
2057
+ collection, self.config, query_dict, error_context
1834
2058
  )
1835
2059
 
1836
2060
  # Build the final query string, in one go without quoting it
@@ -1848,41 +2072,38 @@ class StacSearch(PostJsonSearch):
1848
2072
  ) -> Optional[dict[str, Annotated[Any, FieldInfo]]]:
1849
2073
  """Fetch queryables list from provider using `discover_queryables` conf
1850
2074
 
1851
- :param kwargs: additional filters for queryables (`productType` and other search
2075
+ :param kwargs: additional filters for queryables (`collection` and other search
1852
2076
  arguments)
1853
2077
  :returns: fetched queryable parameters dict
1854
2078
  """
1855
2079
  if (
1856
2080
  not self.config.discover_queryables["fetch_url"]
1857
- and not self.config.discover_queryables["product_type_fetch_url"]
2081
+ and not self.config.discover_queryables["collection_fetch_url"]
1858
2082
  ):
1859
2083
  raise NotImplementedError()
1860
2084
 
1861
- product_type = kwargs.get("productType")
1862
- provider_product_type = (
1863
- self.config.products.get(product_type, {}).get("productType", product_type)
1864
- if product_type
2085
+ collection = kwargs.get("collection")
2086
+ provider_collection = (
2087
+ self.config.products.get(collection, {}).get("_collection", collection)
2088
+ if collection
1865
2089
  else None
1866
2090
  )
1867
2091
  if (
1868
- provider_product_type
1869
- and not self.config.discover_queryables["product_type_fetch_url"]
2092
+ provider_collection
2093
+ and not self.config.discover_queryables["collection_fetch_url"]
1870
2094
  ):
1871
2095
  raise NotImplementedError(
1872
- f"Cannot fetch queryables for a specific product type with {self.provider}"
2096
+ f"Cannot fetch queryables for a specific collection with {self.provider}"
1873
2097
  )
1874
- if (
1875
- not provider_product_type
1876
- and not self.config.discover_queryables["fetch_url"]
1877
- ):
2098
+ if not provider_collection and not self.config.discover_queryables["fetch_url"]:
1878
2099
  raise ValidationError(
1879
- f"Cannot fetch global queryables for {self.provider}. A product type must be specified"
2100
+ f"Cannot fetch global queryables for {self.provider}. A collection must be specified"
1880
2101
  )
1881
2102
 
1882
2103
  try:
1883
2104
  unparsed_fetch_url = (
1884
- self.config.discover_queryables["product_type_fetch_url"]
1885
- if provider_product_type
2105
+ self.config.discover_queryables["collection_fetch_url"]
2106
+ if provider_collection
1886
2107
  else self.config.discover_queryables["fetch_url"]
1887
2108
  )
1888
2109
  if unparsed_fetch_url is None:
@@ -1891,7 +2112,7 @@ class StacSearch(PostJsonSearch):
1891
2112
  )
1892
2113
 
1893
2114
  fetch_url = unparsed_fetch_url.format(
1894
- provider_product_type=provider_product_type,
2115
+ provider_collection=provider_collection,
1895
2116
  **self.config.__dict__,
1896
2117
  )
1897
2118
  auth = (
@@ -1939,40 +2160,43 @@ class StacSearch(PostJsonSearch):
1939
2160
  )
1940
2161
  except IndexError:
1941
2162
  logger.info(
1942
- "No queryable found for %s on %s", product_type, self.provider
2163
+ "No queryable found for %s on %s", collection, self.provider
1943
2164
  )
1944
2165
  return None
1945
2166
  # convert json results to pydantic model fields
1946
2167
  field_definitions: dict[str, Any] = dict()
1947
- STAC_TO_EODAG_QUERYABLES = {
1948
- "start_datetime": "start",
1949
- "end_datetime": "end",
1950
- "datetime": None,
1951
- "bbox": "geom",
1952
- }
2168
+ eodag_queryables_and_defaults: list[tuple[str, Any]] = []
1953
2169
  for json_param, json_mtd in json_queryables.items():
1954
- param = STAC_TO_EODAG_QUERYABLES.get(
1955
- json_param,
1956
- get_queryable_from_provider(
1957
- json_param, self.get_metadata_mapping(product_type)
1958
- )
1959
- or json_param,
1960
- )
1961
- if param is None:
2170
+ param = get_queryable_from_provider(
2171
+ json_param, self.get_metadata_mapping(collection)
2172
+ ) or Queryables.get_queryable_from_alias(json_param)
2173
+ # do not expose internal parameters, neither datetime
2174
+ if param == "datetime" or param.startswith("_"):
1962
2175
  continue
1963
2176
 
1964
2177
  default = kwargs.get(param, json_mtd.get("default"))
2178
+
2179
+ if param in Queryables.model_fields:
2180
+ # use eodag queryable as default
2181
+ eodag_queryables_and_defaults += [(param, default)]
2182
+ continue
2183
+
2184
+ # convert provider json field definition to python
2185
+ default = kwargs.get(param, json_mtd.get("default"))
1965
2186
  annotated_def = json_field_definition_to_python(
1966
2187
  json_mtd, default_value=default
1967
2188
  )
1968
- field_definitions[param] = get_args(annotated_def)
2189
+ field_definition = get_args(annotated_def)
2190
+ field_definitions[param] = field_definition
1969
2191
 
1970
2192
  python_queryables = create_model("m", **field_definitions).model_fields
1971
- geom_queryable = python_queryables.pop("geometry", None)
1972
- if geom_queryable:
1973
- python_queryables["geom"] = Queryables.model_fields["geom"]
1974
2193
 
1975
2194
  queryables_dict = model_fields_to_annotated(python_queryables)
2195
+
2196
+ # append eodag queryables
2197
+ for param, default in eodag_queryables_and_defaults:
2198
+ queryables_dict[param] = Queryables.get_with_default(param, default)
2199
+
1976
2200
  # append "datetime" as "start" & "end" if needed
1977
2201
  if "datetime" in json_queryables:
1978
2202
  eodag_queryables = copy_deepcopy(
@@ -1993,7 +2217,7 @@ class WekeoSearch(StacSearch, PostJsonSearch):
1993
2217
  PostJsonSearch.__init__(self, provider, config)
1994
2218
 
1995
2219
  def build_query_string(
1996
- self, product_type: str, query_dict: dict[str, Any]
2220
+ self, collection: str, query_dict: dict[str, Any]
1997
2221
  ) -> tuple[dict[str, Any], str]:
1998
2222
  """Build The query string using the search parameters"""
1999
- return PostJsonSearch.build_query_string(self, product_type, query_dict)
2223
+ return PostJsonSearch.build_query_string(self, collection, query_dict)