eodag 4.0.0a1__py3-none-any.whl → 4.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. eodag/__init__.py +6 -1
  2. eodag/api/collection.py +353 -0
  3. eodag/api/core.py +308 -296
  4. eodag/api/product/_product.py +15 -29
  5. eodag/api/product/drivers/__init__.py +2 -42
  6. eodag/api/product/drivers/base.py +0 -11
  7. eodag/api/product/metadata_mapping.py +34 -5
  8. eodag/api/search_result.py +144 -9
  9. eodag/cli.py +18 -15
  10. eodag/config.py +37 -3
  11. eodag/plugins/apis/ecmwf.py +16 -4
  12. eodag/plugins/apis/usgs.py +18 -7
  13. eodag/plugins/crunch/filter_latest_intersect.py +1 -0
  14. eodag/plugins/crunch/filter_overlap.py +3 -7
  15. eodag/plugins/search/__init__.py +3 -0
  16. eodag/plugins/search/base.py +6 -6
  17. eodag/plugins/search/build_search_result.py +157 -56
  18. eodag/plugins/search/cop_marine.py +48 -8
  19. eodag/plugins/search/csw.py +18 -8
  20. eodag/plugins/search/qssearch.py +331 -88
  21. eodag/plugins/search/static_stac_search.py +11 -12
  22. eodag/resources/collections.yml +610 -348
  23. eodag/resources/ext_collections.json +1 -1
  24. eodag/resources/ext_product_types.json +1 -1
  25. eodag/resources/providers.yml +330 -58
  26. eodag/resources/stac_provider.yml +4 -2
  27. eodag/resources/user_conf_template.yml +9 -0
  28. eodag/types/__init__.py +2 -0
  29. eodag/types/queryables.py +16 -0
  30. eodag/utils/__init__.py +47 -2
  31. eodag/utils/repr.py +2 -0
  32. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/METADATA +4 -2
  33. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/RECORD +37 -36
  34. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/WHEEL +0 -0
  35. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/entry_points.txt +0 -0
  36. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/licenses/LICENSE +0 -0
  37. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,7 @@ from typing import (
34
34
  )
35
35
  from urllib.error import URLError
36
36
  from urllib.parse import (
37
+ parse_qs,
37
38
  parse_qsl,
38
39
  quote,
39
40
  quote_plus,
@@ -68,16 +69,19 @@ from eodag.api.product.metadata_mapping import (
68
69
  properties_from_json,
69
70
  properties_from_xml,
70
71
  )
71
- from eodag.api.search_result import RawSearchResult
72
+ from eodag.api.search_result import RawSearchResult, SearchResult
72
73
  from eodag.plugins.search import PreparedSearch
73
74
  from eodag.plugins.search.base import Search
74
75
  from eodag.types import json_field_definition_to_python, model_fields_to_annotated
75
76
  from eodag.types.queryables import Queryables
76
77
  from eodag.types.search_args import SortByList
77
78
  from eodag.utils import (
79
+ DEFAULT_ITEMS_PER_PAGE,
80
+ DEFAULT_PAGE,
78
81
  DEFAULT_SEARCH_TIMEOUT,
79
82
  GENERIC_COLLECTION,
80
83
  HTTP_REQ_TIMEOUT,
84
+ KNOWN_NEXT_PAGE_TOKEN_KEYS,
81
85
  REQ_RETRY_BACKOFF_FACTOR,
82
86
  REQ_RETRY_STATUS_FORCELIST,
83
87
  REQ_RETRY_TOTAL,
@@ -150,8 +154,7 @@ class QueryStringSearch(Search):
150
154
  pagination requests. This is a simple Python format string which will be resolved using the following
151
155
  keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
152
156
  to the search request), ``items_per_page`` (the number of items to return per page),
153
- ``skip`` (the number of items to skip) or ``skip_base_1`` (the number of items to skip,
154
- starting from 1) and ``page`` (which page to return).
157
+ ``skip`` (the number of items to skip).
155
158
  * :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
156
159
  leading to the total number of results satisfying a request. This is used for providers which provides the
157
160
  total results metadata along with the result of the query and don't have an endpoint for querying
@@ -182,8 +185,8 @@ class QueryStringSearch(Search):
182
185
  * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_id` (``str``): mapping for the
183
186
  collection id
184
187
  * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_metadata`
185
- (``dict[str, str]``): mapping for collection metadata (e.g. ``abstract``, ``licence``) which can be parsed
186
- from the provider result
188
+ (``dict[str, str]``): mapping for collection metadata (e.g. ``description``, ``license``) which can be
189
+ parsed from the provider result
187
190
  * :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_properties`
188
191
  (``dict[str, str]``): mapping for collection properties which can be parsed from the result and are not
189
192
  collection metadata
@@ -603,15 +606,19 @@ class QueryStringSearch(Search):
603
606
  ),
604
607
  )
605
608
  # collections_config extraction
606
- conf_update_dict["collections_config"][
607
- generic_collection_id
608
- ] = properties_from_json(
609
+ collection_properties = properties_from_json(
609
610
  collection_result,
610
611
  self.config.discover_collections[
611
612
  "generic_collection_parsable_metadata"
612
613
  ],
613
614
  )
614
-
615
+ conf_update_dict["collections_config"][
616
+ generic_collection_id
617
+ ] = {
618
+ k: v
619
+ for k, v in collection_properties.items()
620
+ if v != NOT_AVAILABLE
621
+ }
615
622
  if (
616
623
  "single_collection_parsable_metadata"
617
624
  in self.config.discover_collections
@@ -619,29 +626,50 @@ class QueryStringSearch(Search):
619
626
  collection_data = self._get_collection_metadata_from_single_collection_endpoint(
620
627
  generic_collection_id
621
628
  )
629
+ collection_data_id = collection_data.pop("id", None)
630
+
631
+ # remove collection if it must have be renamed but renaming failed
632
+ if (
633
+ collection_data_id
634
+ and collection_data_id == NOT_AVAILABLE
635
+ ):
636
+ del conf_update_dict["collections_config"][
637
+ generic_collection_id
638
+ ]
639
+ del conf_update_dict["providers_config"][
640
+ generic_collection_id
641
+ ]
642
+ return
643
+
622
644
  conf_update_dict["collections_config"][
623
645
  generic_collection_id
624
- ].update(collection_data)
646
+ ] |= {
647
+ k: v
648
+ for k, v in collection_data.items()
649
+ if v != NOT_AVAILABLE
650
+ }
625
651
 
626
652
  # update collection id if needed
627
- if collection_data_id := collection_data.get("ID"):
628
- if generic_collection_id != collection_data_id:
629
- logger.debug(
630
- "Rename %s collection to %s",
631
- generic_collection_id,
632
- collection_data_id,
633
- )
634
- conf_update_dict["providers_config"][
635
- collection_data_id
636
- ] = conf_update_dict["providers_config"].pop(
637
- generic_collection_id
638
- )
639
- conf_update_dict["collections_config"][
640
- collection_data_id
641
- ] = conf_update_dict["collections_config"].pop(
642
- generic_collection_id
643
- )
644
- generic_collection_id = collection_data_id
653
+ if (
654
+ collection_data_id
655
+ and collection_data_id != generic_collection_id
656
+ ):
657
+ logger.debug(
658
+ "Rename %s collection to %s",
659
+ generic_collection_id,
660
+ collection_data_id,
661
+ )
662
+ conf_update_dict["providers_config"][
663
+ collection_data_id
664
+ ] = conf_update_dict["providers_config"].pop(
665
+ generic_collection_id
666
+ )
667
+ conf_update_dict["collections_config"][
668
+ collection_data_id
669
+ ] = conf_update_dict["collections_config"].pop(
670
+ generic_collection_id
671
+ )
672
+ generic_collection_id = collection_data_id
645
673
 
646
674
  # update keywords
647
675
  keywords_fields = [
@@ -681,12 +709,11 @@ class QueryStringSearch(Search):
681
709
  r"[\[\]'\"]", "", keywords_values_str
682
710
  )
683
711
  # sorted list of unique lowercase keywords
684
- keywords_values_str = ",".join(
685
- sorted(set(keywords_values_str.split(",")))
686
- )
712
+ keywords_values = sorted(set(keywords_values_str.split(",")))
713
+
687
714
  conf_update_dict["collections_config"][generic_collection_id][
688
715
  "keywords"
689
- ] = keywords_values_str
716
+ ] = keywords_values
690
717
 
691
718
  # runs concurrent requests and aggregate results in conf_update_dict
692
719
  max_connections = self.config.discover_collections.get(
@@ -750,18 +777,22 @@ class QueryStringSearch(Search):
750
777
  self,
751
778
  prep: PreparedSearch = PreparedSearch(),
752
779
  **kwargs: Any,
753
- ) -> tuple[list[EOProduct], Optional[int]]:
780
+ ) -> SearchResult:
754
781
  """Perform a search on an OpenSearch-like interface
755
782
 
756
783
  :param prep: Object collecting needed information for search.
757
784
  """
758
785
  count = prep.count
786
+ raise_errors = getattr(prep, "raise_errors", False)
759
787
  collection = cast(str, kwargs.get("collection", prep.collection))
760
788
  if collection == GENERIC_COLLECTION:
761
789
  logger.warning(
762
790
  "GENERIC_COLLECTION is not a real collection and should only be used internally as a template"
763
791
  )
764
- return ([], 0) if prep.count else ([], None)
792
+ result = SearchResult([])
793
+ if prep.count and not result.number_matched:
794
+ result.number_matched = 0
795
+ return result
765
796
 
766
797
  sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
767
798
  prep.sort_by_qs, _ = (
@@ -818,13 +849,18 @@ class QueryStringSearch(Search):
818
849
  provider_results = self.do_search(prep, **kwargs)
819
850
  if count and total_items is None and hasattr(prep, "total_items_nb"):
820
851
  total_items = prep.total_items_nb
821
-
822
- raw_search_result = RawSearchResult(provider_results)
823
- raw_search_result.query_params = prep.query_params
824
- raw_search_result.collection_def_params = prep.collection_def_params
825
-
826
- eo_products = self.normalize_results(raw_search_result, **kwargs)
827
- return eo_products, total_items
852
+ if not count and "number_matched" in kwargs:
853
+ total_items = kwargs["number_matched"]
854
+
855
+ eo_products = self.normalize_results(provider_results, **kwargs)
856
+ formated_result = SearchResult(
857
+ eo_products,
858
+ total_items,
859
+ search_params=provider_results.search_params,
860
+ next_page_token=getattr(provider_results, "next_page_token", None),
861
+ raise_errors=raise_errors,
862
+ )
863
+ return formated_result
828
864
 
829
865
  def build_query_string(
830
866
  self, collection: str, query_dict: dict[str, Any]
@@ -852,9 +888,12 @@ class QueryStringSearch(Search):
852
888
  **kwargs: Any,
853
889
  ) -> tuple[list[str], Optional[int]]:
854
890
  """Build paginated urls"""
855
- page = prep.page
891
+ token = getattr(prep, "next_page_token", None)
856
892
  items_per_page = prep.items_per_page
857
893
  count = prep.count
894
+ next_page_token_key = str(
895
+ self.config.pagination.get("next_page_token_key", "page")
896
+ )
858
897
 
859
898
  urls = []
860
899
  total_results = 0 if count else None
@@ -881,8 +920,19 @@ class QueryStringSearch(Search):
881
920
  search_endpoint = self.config.api_endpoint.rstrip("/").format(
882
921
  _collection=provider_collection
883
922
  )
884
- if page is not None and items_per_page is not None:
885
- page = page - 1 + self.config.pagination.get("start_page", 1)
923
+ # numeric page token
924
+ if (
925
+ next_page_token_key == "page" or next_page_token_key == "skip"
926
+ ) and items_per_page is not None:
927
+ if token is None and next_page_token_key == "skip":
928
+ # first page & next_page_token_key == skip
929
+ token = 0
930
+ elif token is None:
931
+ # first page & next_page_token_key == page
932
+ token = self.config.pagination.get("start_page", DEFAULT_PAGE)
933
+ else:
934
+ # next pages
935
+ token = int(token)
886
936
  if count:
887
937
  count_endpoint = self.config.pagination.get(
888
938
  "count_endpoint", ""
@@ -906,22 +956,23 @@ class QueryStringSearch(Search):
906
956
  raise MisconfiguredError(
907
957
  f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
908
958
  )
909
- next_url = self.config.pagination["next_page_url_tpl"].format(
959
+ next_page_url = self.config.pagination["next_page_url_tpl"].format(
910
960
  url=search_endpoint,
911
961
  search=qs_with_sort,
912
962
  items_per_page=items_per_page,
913
- page=page,
914
- skip=(page - 1) * items_per_page,
915
- skip_base_1=(page - 1) * items_per_page + 1,
963
+ next_page_token=token,
964
+ skip=token,
916
965
  )
917
- else:
918
- next_url = "{}?{}".format(search_endpoint, qs_with_sort)
919
- urls.append(next_url)
966
+
967
+ if token is not None:
968
+ prep.next_page_token = token
969
+ urls.append(next_page_url)
970
+
920
971
  return list(dict.fromkeys(urls)), total_results
921
972
 
922
973
  def do_search(
923
974
  self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
924
- ) -> list[Any]:
975
+ ) -> RawSearchResult:
925
976
  """Perform the actual search request.
926
977
 
927
978
  If there is a specified number of items per page, return the results as soon
@@ -962,6 +1013,7 @@ class QueryStringSearch(Search):
962
1013
  if self.config.result_type == "xml":
963
1014
  root_node = etree.fromstring(response.content)
964
1015
  namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
1016
+ resp_as_json = {}
965
1017
  results_xpath = root_node.xpath(
966
1018
  self.config.results_entry or "//ns:entry", namespaces=namespaces
967
1019
  )
@@ -1006,7 +1058,6 @@ class QueryStringSearch(Search):
1006
1058
  path_parsed = next_page_url_key_path
1007
1059
  found_paths = path_parsed.find(resp_as_json)
1008
1060
  if found_paths and not isinstance(found_paths, int):
1009
- self.next_page_url = found_paths[0].value
1010
1061
  logger.debug(
1011
1062
  "Next page URL collected and set for the next search",
1012
1063
  )
@@ -1016,7 +1067,6 @@ class QueryStringSearch(Search):
1016
1067
  path_parsed = next_page_query_obj_key_path
1017
1068
  found_paths = path_parsed.find(resp_as_json)
1018
1069
  if found_paths and not isinstance(found_paths, int):
1019
- self.next_page_query_obj = found_paths[0].value
1020
1070
  logger.debug(
1021
1071
  "Next page Query-object collected and set for the next search",
1022
1072
  )
@@ -1084,8 +1134,135 @@ class QueryStringSearch(Search):
1084
1134
  ):
1085
1135
  del prep.total_items_nb
1086
1136
  if items_per_page is not None and len(results) == items_per_page:
1087
- return results
1088
- return results
1137
+
1138
+ raw_search_results = self._build_raw_search_results(
1139
+ results, resp_as_json, kwargs, items_per_page, prep
1140
+ )
1141
+ return raw_search_results
1142
+
1143
+ raw_search_results = self._build_raw_search_results(
1144
+ results, resp_as_json, kwargs, items_per_page, prep
1145
+ )
1146
+ return raw_search_results
1147
+
1148
+ def _build_raw_search_results(
1149
+ self,
1150
+ results: list[dict[str, Any]],
1151
+ resp_as_json: dict[str, Any],
1152
+ search_kwargs: dict[str, Any],
1153
+ items_per_page: Optional[int],
1154
+ prep: PreparedSearch,
1155
+ ):
1156
+ """
1157
+ Build a `RawSearchResult` object from raw search results.
1158
+
1159
+ This method initializes a `RawSearchResult` instance with the provided results,
1160
+ sets the search parameters, and determines the token or identifier for the next page
1161
+ based on the pagination configuration.
1162
+
1163
+ :param results: Raw results returned by the search.
1164
+ :param resp_as_json: The search response parsed as JSON.
1165
+ :param search_kwargs: Search parameters used for the query.
1166
+ :param items_per_page: Number of items per page.
1167
+ :param prep: Request preparation object containing query parameters.
1168
+ :returns: An object containing the raw results, search parameters, and the next page token if available.
1169
+ """
1170
+ # Create the RawSearchResult object and populate basic fields
1171
+ raw_search_results = RawSearchResult(results)
1172
+ raw_search_results.search_params = search_kwargs | {
1173
+ "items_per_page": items_per_page
1174
+ }
1175
+ raw_search_results.query_params = prep.query_params
1176
+ raw_search_results.collection_def_params = prep.collection_def_params
1177
+ raw_search_results.next_page_token_key = prep.next_page_token_key
1178
+
1179
+ # If no JSON response is available, return the result as is
1180
+ if resp_as_json is None:
1181
+ return raw_search_results
1182
+
1183
+ # Handle pagination
1184
+ if self.config.pagination.get("next_page_query_obj_key_path") is not None:
1185
+ # Use next_page_query_obj_key_path to find the next page token in the response
1186
+ jsonpath_expr = string_to_jsonpath(
1187
+ self.config.pagination["next_page_query_obj_key_path"]
1188
+ )
1189
+ if isinstance(jsonpath_expr, str):
1190
+ raise PluginImplementationError(
1191
+ "next_page_query_obj_key_path must be parsed to JSONPath on plugin init"
1192
+ )
1193
+ jsonpath_match = jsonpath_expr.find(resp_as_json)
1194
+ if jsonpath_match:
1195
+ next_page_query_obj = jsonpath_match[0].value
1196
+ next_page_token_key = raw_search_results.next_page_token_key
1197
+ if next_page_token_key and next_page_token_key in next_page_query_obj:
1198
+ raw_search_results.next_page_token = next_page_query_obj[
1199
+ next_page_token_key
1200
+ ]
1201
+ else:
1202
+ for token_key in KNOWN_NEXT_PAGE_TOKEN_KEYS:
1203
+ if token_key in next_page_query_obj:
1204
+ raw_search_results.next_page_token = next_page_query_obj[
1205
+ token_key
1206
+ ]
1207
+ raw_search_results.next_page_token_key = token_key
1208
+ logger.debug(
1209
+ "Using '%s' as next_page_token_key for the next search",
1210
+ token_key,
1211
+ )
1212
+ break
1213
+ else:
1214
+ raw_search_results.next_page_token = None
1215
+ elif self.config.pagination.get("next_page_url_key_path") is not None:
1216
+ jsonpath_expr = string_to_jsonpath(
1217
+ self.config.pagination["next_page_url_key_path"]
1218
+ )
1219
+ # Use next_page_url_key_path to find the next page token in the response
1220
+ if isinstance(jsonpath_expr, str):
1221
+ raise PluginImplementationError(
1222
+ "next_page_url_key_path must be parsed to JSONPath on plugin init"
1223
+ )
1224
+ href = jsonpath_expr.find(resp_as_json)
1225
+ if href:
1226
+ # Determine the key to extract the token from the URL or object
1227
+ href_value = href[0].value
1228
+ next_page_token_key = (
1229
+ unquote(self.config.pagination["parse_url_key"])
1230
+ if "parse_url_key" in self.config.pagination
1231
+ else raw_search_results.next_page_token_key
1232
+ )
1233
+ raw_search_results.next_page_token_key = next_page_token_key
1234
+ # Try to extract the token from the found value
1235
+ if next_page_token_key in href_value:
1236
+ raw_search_results.next_page_token = href_value[next_page_token_key]
1237
+ elif next_page_token_key in unquote(href_value):
1238
+ # If the token is in the URL query string
1239
+ query = urlparse(href_value).query
1240
+ page_param = parse_qs(query).get(next_page_token_key)
1241
+ if page_param:
1242
+ raw_search_results.next_page_token = page_param[0]
1243
+ else:
1244
+ # Use the whole value as the token
1245
+ raw_search_results.next_page_token = href_value
1246
+ else:
1247
+ # No token found: set to empty string
1248
+ raw_search_results.next_page_token = None
1249
+ else:
1250
+ # pagination using next_page_token_key
1251
+ next_page_token_key = raw_search_results.next_page_token_key
1252
+ next_page_token = prep.next_page_token
1253
+ # page number as next_page_token_key
1254
+ if next_page_token is not None and next_page_token_key == "page":
1255
+ raw_search_results.next_page_token = str(int(next_page_token) + 1)
1256
+ # skip as next_page_token_key
1257
+ elif next_page_token is not None and next_page_token_key == "skip":
1258
+ raw_search_results.next_page_token = str(
1259
+ int(next_page_token)
1260
+ + int(prep.items_per_page or DEFAULT_ITEMS_PER_PAGE)
1261
+ )
1262
+ else:
1263
+ raw_search_results.next_page_token = None
1264
+
1265
+ return raw_search_results
1089
1266
 
1090
1267
  def normalize_results(
1091
1268
  self, results: RawSearchResult, **kwargs: Any
@@ -1363,7 +1540,7 @@ class ODataV4Search(QueryStringSearch):
1363
1540
 
1364
1541
  def do_search(
1365
1542
  self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
1366
- ) -> list[Any]:
1543
+ ) -> RawSearchResult:
1367
1544
  """A two step search can be performed if the metadata are not given into the search result"""
1368
1545
 
1369
1546
  if getattr(self.config, "per_product_metadata_query", False):
@@ -1394,7 +1571,17 @@ class ODataV4Search(QueryStringSearch):
1394
1571
  {item["id"]: item["value"] for item in response.json()["value"]}
1395
1572
  )
1396
1573
  final_result.append(entity)
1397
- return final_result
1574
+ raw_search_results = RawSearchResult(final_result)
1575
+ raw_search_results.search_params = kwargs
1576
+ raw_search_results.query_params = (
1577
+ prep.query_params if hasattr(prep, "query_params") else {}
1578
+ )
1579
+ raw_search_results.collection_def_params = (
1580
+ prep.collection_def_params
1581
+ if hasattr(prep, "collection_def_params")
1582
+ else {}
1583
+ )
1584
+ return raw_search_results
1398
1585
  else:
1399
1586
  return super(ODataV4Search, self).do_search(prep, **kwargs)
1400
1587
 
@@ -1466,10 +1653,12 @@ class PostJsonSearch(QueryStringSearch):
1466
1653
  self,
1467
1654
  prep: PreparedSearch = PreparedSearch(),
1468
1655
  **kwargs: Any,
1469
- ) -> tuple[list[EOProduct], Optional[int]]:
1656
+ ) -> SearchResult:
1470
1657
  """Perform a search on an OpenSearch-like interface"""
1471
1658
  collection = kwargs.get("collection", "")
1472
1659
  count = prep.count
1660
+ raise_errors = getattr(prep, "raise_errors", False)
1661
+ number_matched = kwargs.pop("number_matched", None)
1473
1662
  sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
1474
1663
  _, sort_by_qp = (
1475
1664
  ("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
@@ -1553,9 +1742,7 @@ class PostJsonSearch(QueryStringSearch):
1553
1742
  return super(PostJsonSearch, self)._request(*x, **y)
1554
1743
 
1555
1744
  try:
1556
- eo_products, total_items = super(PostJsonSearch, self).query(
1557
- prep, **kwargs
1558
- )
1745
+ eo_products = super(PostJsonSearch, self).query(prep, **kwargs)
1559
1746
  except Exception:
1560
1747
  raise
1561
1748
  finally:
@@ -1564,7 +1751,7 @@ class PostJsonSearch(QueryStringSearch):
1564
1751
  plugin_config_backup, self.config.yaml_loader
1565
1752
  )
1566
1753
 
1567
- return eo_products, total_items
1754
+ return eo_products
1568
1755
 
1569
1756
  # If we were not able to build query params but have queryable search criteria,
1570
1757
  # this means the provider does not support the search criteria given. If so,
@@ -1578,7 +1765,10 @@ class PostJsonSearch(QueryStringSearch):
1578
1765
  for k in keywords.keys()
1579
1766
  if isinstance(collection_metadata_mapping.get(k), list)
1580
1767
  ):
1581
- return ([], 0) if prep.count else ([], None)
1768
+ result = SearchResult([])
1769
+ if prep.count:
1770
+ result.number_matched = 0
1771
+ return result
1582
1772
  prep.query_params = dict(qp, **sort_by_qp)
1583
1773
  prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
1584
1774
  if not count and getattr(prep, "need_count", False):
@@ -1589,13 +1779,19 @@ class PostJsonSearch(QueryStringSearch):
1589
1779
  provider_results = self.do_search(prep, **kwargs)
1590
1780
  if count and total_items is None and hasattr(prep, "total_items_nb"):
1591
1781
  total_items = prep.total_items_nb
1592
-
1593
- raw_search_result = RawSearchResult(provider_results)
1594
- raw_search_result.query_params = prep.query_params
1595
- raw_search_result.collection_def_params = prep.collection_def_params
1596
-
1597
- eo_products = self.normalize_results(raw_search_result, **kwargs)
1598
- return eo_products, total_items
1782
+ if not count and "number_matched" in kwargs and number_matched:
1783
+ total_items = number_matched
1784
+
1785
+ eo_products_normalize = self.normalize_results(provider_results, **kwargs)
1786
+ formated_result = SearchResult(
1787
+ eo_products_normalize,
1788
+ total_items,
1789
+ search_params=provider_results.search_params,
1790
+ next_page_token=getattr(provider_results, "next_page_token", None),
1791
+ next_page_token_key=getattr(provider_results, "next_page_token_key", None),
1792
+ raise_errors=raise_errors,
1793
+ )
1794
+ return formated_result
1599
1795
 
1600
1796
  def normalize_results(
1601
1797
  self, results: RawSearchResult, **kwargs: Any
@@ -1636,11 +1832,14 @@ class PostJsonSearch(QueryStringSearch):
1636
1832
  **kwargs: Any,
1637
1833
  ) -> tuple[list[str], Optional[int]]:
1638
1834
  """Adds pagination to query parameters, and auth to url"""
1639
- page = prep.page
1835
+ token = getattr(prep, "next_page_token", None)
1640
1836
  items_per_page = prep.items_per_page
1641
1837
  count = prep.count
1642
1838
  urls: list[str] = []
1643
1839
  total_results = 0 if count else None
1840
+ next_page_token_key = prep.next_page_token_key or self.config.pagination.get(
1841
+ "next_page_token_key"
1842
+ )
1644
1843
 
1645
1844
  if "count_endpoint" not in self.config.pagination:
1646
1845
  # if count_endpoint is not set, total_results should be extracted from search result
@@ -1662,8 +1861,21 @@ class PostJsonSearch(QueryStringSearch):
1662
1861
  raise MisconfiguredError(
1663
1862
  "Missing %s in %s configuration" % (",".join(e.args), provider)
1664
1863
  )
1665
- if page is not None and items_per_page is not None:
1666
- page = page - 1 + self.config.pagination.get("start_page", 1)
1864
+ # numeric page token
1865
+ if (
1866
+ next_page_token_key == "page" or next_page_token_key == "skip"
1867
+ ) and items_per_page is not None:
1868
+ if token is None and next_page_token_key == "skip":
1869
+ # first page & next_page_token_key == skip
1870
+ token = max(
1871
+ 0, self.config.pagination.get("start_page", DEFAULT_PAGE) - 1
1872
+ )
1873
+ elif token is None:
1874
+ # first page & next_page_token_key == page
1875
+ token = self.config.pagination.get("start_page", DEFAULT_PAGE)
1876
+ else:
1877
+ # next pages
1878
+ token = int(token)
1667
1879
  if count:
1668
1880
  count_endpoint = self.config.pagination.get(
1669
1881
  "count_endpoint", ""
@@ -1680,21 +1892,52 @@ class PostJsonSearch(QueryStringSearch):
1680
1892
  if total_results is None
1681
1893
  else total_results + (_total_results or 0)
1682
1894
  )
1683
- if "next_page_query_obj" in self.config.pagination and isinstance(
1684
- self.config.pagination["next_page_query_obj"], str
1895
+ # parse next page url if needed
1896
+ if "next_page_url_tpl" in self.config.pagination:
1897
+ search_endpoint = self.config.pagination["next_page_url_tpl"].format(
1898
+ url=search_endpoint,
1899
+ items_per_page=items_per_page,
1900
+ next_page_token=token,
1901
+ )
1902
+
1903
+ # parse next page body / query-obj if needed
1904
+ if "next_page_query_obj" in self.config.pagination and isinstance(
1905
+ self.config.pagination["next_page_query_obj"], str
1906
+ ):
1907
+ if next_page_token_key is None or token is None:
1908
+ next_page_token_kwargs = {
1909
+ "next_page_token": -1,
1910
+ "next_page_token_key": NOT_AVAILABLE,
1911
+ }
1912
+ else:
1913
+ next_page_token_kwargs = {
1914
+ "next_page_token": token,
1915
+ "next_page_token_key": next_page_token_key,
1916
+ }
1917
+ next_page_token_kwargs["next_page_token_key"] = (
1918
+ next_page_token_key or NOT_AVAILABLE
1919
+ )
1920
+ next_page_token_kwargs["next_page_token"] = (
1921
+ token if token is not None else -1
1922
+ )
1923
+
1924
+ # next_page_query_obj needs to be parsed
1925
+ next_page_query_obj_str = self.config.pagination[
1926
+ "next_page_query_obj"
1927
+ ].format(items_per_page=items_per_page, **next_page_token_kwargs)
1928
+ next_page_query_obj = orjson.loads(next_page_query_obj_str)
1929
+ # remove NOT_AVAILABLE entries
1930
+ next_page_query_obj.pop(NOT_AVAILABLE, None)
1931
+ if (
1932
+ next_page_token_key
1933
+ and next_page_query_obj.get(next_page_token_key) == "-1"
1685
1934
  ):
1686
- # next_page_query_obj needs to be parsed
1687
- next_page_query_obj = self.config.pagination[
1688
- "next_page_query_obj"
1689
- ].format(
1690
- items_per_page=items_per_page,
1691
- page=page,
1692
- skip=(page - 1) * items_per_page,
1693
- skip_base_1=(page - 1) * items_per_page + 1,
1694
- )
1695
- update_nested_dict(
1696
- prep.query_params, orjson.loads(next_page_query_obj)
1697
- )
1935
+ next_page_query_obj.pop(next_page_token_key, None)
1936
+ # update prep query_params with pagination info
1937
+ update_nested_dict(prep.query_params, next_page_query_obj)
1938
+
1939
+ if token is not None:
1940
+ prep.next_page_token = token
1698
1941
 
1699
1942
  urls.append(search_endpoint)
1700
1943
  return list(dict.fromkeys(urls)), total_results