eodag 4.0.0a1__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. eodag/__init__.py +6 -1
  2. eodag/api/collection.py +354 -0
  3. eodag/api/core.py +324 -303
  4. eodag/api/product/_product.py +15 -29
  5. eodag/api/product/drivers/__init__.py +2 -42
  6. eodag/api/product/drivers/base.py +0 -11
  7. eodag/api/product/metadata_mapping.py +34 -5
  8. eodag/api/search_result.py +144 -9
  9. eodag/cli.py +18 -15
  10. eodag/config.py +37 -3
  11. eodag/plugins/apis/ecmwf.py +16 -4
  12. eodag/plugins/apis/usgs.py +18 -7
  13. eodag/plugins/crunch/filter_latest_intersect.py +1 -0
  14. eodag/plugins/crunch/filter_overlap.py +3 -7
  15. eodag/plugins/search/__init__.py +3 -0
  16. eodag/plugins/search/base.py +6 -6
  17. eodag/plugins/search/build_search_result.py +157 -56
  18. eodag/plugins/search/cop_marine.py +48 -8
  19. eodag/plugins/search/csw.py +18 -8
  20. eodag/plugins/search/qssearch.py +331 -88
  21. eodag/plugins/search/static_stac_search.py +11 -12
  22. eodag/resources/collections.yml +610 -348
  23. eodag/resources/ext_collections.json +1 -1
  24. eodag/resources/ext_product_types.json +1 -1
  25. eodag/resources/providers.yml +334 -62
  26. eodag/resources/stac_provider.yml +4 -2
  27. eodag/resources/user_conf_template.yml +9 -0
  28. eodag/types/__init__.py +2 -0
  29. eodag/types/queryables.py +16 -0
  30. eodag/utils/__init__.py +47 -2
  31. eodag/utils/repr.py +2 -0
  32. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/METADATA +4 -2
  33. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/RECORD +37 -36
  34. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/WHEEL +0 -0
  35. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/entry_points.txt +0 -0
  36. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/licenses/LICENSE +0 -0
  37. {eodag-4.0.0a1.dist-info → eodag-4.0.0a3.dist-info}/top_level.txt +0 -0
eodag/api/core.py CHANGED
@@ -23,14 +23,17 @@ import os
23
23
  import re
24
24
  import shutil
25
25
  import tempfile
26
+ import warnings
27
+ from collections import deque
26
28
  from importlib.metadata import version
27
29
  from importlib.resources import files as res_files
28
- from operator import itemgetter
30
+ from operator import attrgetter, itemgetter
29
31
  from typing import TYPE_CHECKING, Any, Iterator, Optional, Union
30
32
 
31
33
  import geojson
32
- import yaml.parser
34
+ import yaml
33
35
 
36
+ from eodag.api.collection import Collection, CollectionsDict, CollectionsList
34
37
  from eodag.api.product.metadata_mapping import (
35
38
  NOT_AVAILABLE,
36
39
  mtd_cfg_as_conversion_and_querypath,
@@ -53,7 +56,10 @@ from eodag.config import (
53
56
  )
54
57
  from eodag.plugins.manager import PluginManager
55
58
  from eodag.plugins.search import PreparedSearch
56
- from eodag.plugins.search.build_search_result import MeteoblueSearch
59
+ from eodag.plugins.search.build_search_result import (
60
+ ALLOWED_KEYWORDS as ECMWF_ALLOWED_KEYWORDS,
61
+ )
62
+ from eodag.plugins.search.build_search_result import ECMWF_PREFIX, MeteoblueSearch
57
63
  from eodag.plugins.search.qssearch import PostJsonSearch
58
64
  from eodag.types import model_fields_to_annotated
59
65
  from eodag.types.queryables import CommonQueryables, Queryables, QueryablesDict
@@ -65,7 +71,7 @@ from eodag.utils import (
65
71
  DEFAULT_PAGE,
66
72
  GENERIC_COLLECTION,
67
73
  GENERIC_STAC_PROVIDER,
68
- get_collection_dates,
74
+ _deprecated,
69
75
  get_geometry_from_various,
70
76
  makedirs,
71
77
  sort_dict,
@@ -81,6 +87,7 @@ from eodag.utils.exceptions import (
81
87
  RequestError,
82
88
  UnsupportedCollection,
83
89
  UnsupportedProvider,
90
+ ValidationError,
84
91
  )
85
92
  from eodag.utils.free_text_search import compile_free_text_query
86
93
  from eodag.utils.stac_reader import fetch_stac_items
@@ -115,7 +122,8 @@ class EODataAccessGateway:
115
122
  collections_config_path = os.getenv("EODAG_COLLECTIONS_CFG_FILE") or str(
116
123
  res_files("eodag") / "resources" / "collections.yml"
117
124
  )
118
- self.collections_config = SimpleYamlProxyConfig(collections_config_path)
125
+ collections_config_dict = SimpleYamlProxyConfig(collections_config_path).source
126
+ self.collections_config = self._collections_config_init(collections_config_dict)
119
127
  self.providers_config = load_default_config()
120
128
 
121
129
  env_var_cfg_dir = "EODAG_CFG_DIR"
@@ -168,7 +176,7 @@ class EODataAccessGateway:
168
176
 
169
177
  # init updated providers conf
170
178
  strict_mode = is_env_var_true("EODAG_STRICT_COLLECTIONS")
171
- available_collections = set(self.collections_config.source.keys())
179
+ available_collections = set(self.collections_config.keys())
172
180
 
173
181
  for provider in self.providers_config.keys():
174
182
  provider_config_init(
@@ -179,8 +187,6 @@ class EODataAccessGateway:
179
187
  self._sync_provider_collections(
180
188
  provider, available_collections, strict_mode
181
189
  )
182
- # init collections configuration
183
- self._collections_config_init()
184
190
 
185
191
  # re-build _plugins_manager using up-to-date providers_config
186
192
  self._plugins_manager.rebuild(self.providers_config)
@@ -223,10 +229,19 @@ class EODataAccessGateway:
223
229
  )
224
230
  self.set_locations_conf(locations_conf_path)
225
231
 
226
- def _collections_config_init(self) -> None:
227
- """Initialize collections configuration."""
228
- for pt_id, pd_dict in self.collections_config.source.items():
229
- self.collections_config.source[pt_id].setdefault("_id", pt_id)
232
+ def _collections_config_init(
233
+ self, collections_config_dict: dict[str, Any]
234
+ ) -> CollectionsDict:
235
+ """Initialize collections configuration.
236
+
237
+ :param collections_config_dict: The collections config as a dictionary
238
+ """
239
+ # Turn the collections config from a dict into a CollectionsDict() object
240
+ collections = [
241
+ Collection.create_with_dag(self, id=col, **col_f)
242
+ for col, col_f in collections_config_dict.items()
243
+ ]
244
+ return CollectionsDict(collections)
230
245
 
231
246
  def _sync_provider_collections(
232
247
  self,
@@ -258,13 +273,10 @@ class EODataAccessGateway:
258
273
  products_to_remove.append(product_id)
259
274
  continue
260
275
 
261
- empty_product = {
262
- "title": product_id,
263
- "description": NOT_AVAILABLE,
264
- }
265
- self.collections_config.source[
266
- product_id
267
- ] = empty_product # will update available_collections
276
+ empty_product = Collection.create_with_dag(
277
+ self, id=product_id, title=product_id, description=NOT_AVAILABLE
278
+ )
279
+ self.collections_config[product_id] = empty_product
268
280
  products_to_add.append(product_id)
269
281
 
270
282
  if products_to_add:
@@ -541,7 +553,7 @@ class EODataAccessGateway:
541
553
 
542
554
  def list_collections(
543
555
  self, provider: Optional[str] = None, fetch_providers: bool = True
544
- ) -> list[dict[str, Any]]:
556
+ ) -> CollectionsList:
545
557
  """Lists supported collections.
546
558
 
547
559
  :param provider: (optional) The name of a provider that must support the product
@@ -555,8 +567,6 @@ class EODataAccessGateway:
555
567
  # First, update collections list if possible
556
568
  self.fetch_collections_list(provider=provider)
557
569
 
558
- collections: list[dict[str, Any]] = []
559
-
560
570
  providers_configs = (
561
571
  list(self.providers_config.values())
562
572
  if not provider
@@ -572,21 +582,21 @@ class EODataAccessGateway:
572
582
  f"The requested provider is not (yet) supported: {provider}"
573
583
  )
574
584
 
575
- for p in providers_configs:
576
- for collection_id in p.products: # type: ignore
577
- if collection_id == GENERIC_COLLECTION:
578
- continue
579
-
580
- config = self.collections_config[collection_id]
581
- if "alias" in config:
582
- collection_id = config["alias"]
583
- collection = {"ID": collection_id, **config}
585
+ # unique collection ids from providers configs
586
+ collection_ids = {
587
+ collection_id
588
+ for p in providers_configs
589
+ for collection_id in p.products
590
+ if collection_id != GENERIC_COLLECTION
591
+ }
584
592
 
585
- if collection not in collections:
586
- collections.append(collection)
593
+ collections = CollectionsList(
594
+ [self.collections_config[collection_id] for collection_id in collection_ids]
595
+ )
587
596
 
588
- # Return the collections sorted in lexicographic order of their ID
589
- return sorted(collections, key=itemgetter("ID"))
597
+ # Return the collections sorted in lexicographic order of their id
598
+ collections.sort(key=attrgetter("id"))
599
+ return collections
590
600
 
591
601
  def fetch_collections_list(self, provider: Optional[str] = None) -> None:
592
602
  """Fetch collections list and update if needed.
@@ -843,6 +853,7 @@ class EODataAccessGateway:
843
853
  )
844
854
  continue
845
855
  new_collections: list[str] = []
856
+ bad_formatted_col_count = 0
846
857
  for (
847
858
  new_collection,
848
859
  new_collection_conf,
@@ -867,26 +878,57 @@ class EODataAccessGateway:
867
878
  # new_collections_conf is a subset on an existing conf
868
879
  break
869
880
  else:
870
- # new_collection_conf does not already exist, append it
871
- # to provider_products_config
872
- provider_products_config[
873
- new_collection
874
- ] = new_collection_conf
875
- # to self.collections_config
876
- self.collections_config.source.update(
877
- {
878
- new_collection: {"_id": new_collection}
879
- | new_collections_conf["collections_config"][
881
+ try:
882
+ # new_collection_conf does not already exist, append it
883
+ # to self.collections_config
884
+ new_coll_obj = Collection.create_with_dag(
885
+ self,
886
+ id=new_collection,
887
+ **new_collections_conf["collections_config"][
880
888
  new_collection
881
- ]
889
+ ],
890
+ )
891
+ self.collections_config[new_coll_obj._id] = new_coll_obj
892
+ except ValidationError:
893
+ # skip collection if there is a problem with its id (missing or not a string)
894
+ logger.debug(
895
+ (
896
+ "Collection %s has been pruned on provider %s "
897
+ "because its id was incorrectly parsed for eodag"
898
+ ),
899
+ new_collection,
900
+ provider,
901
+ )
902
+ else:
903
+ # to provider_products_config
904
+ provider_products_config[
905
+ new_collection
906
+ ] = new_collection_conf
907
+ ext_collections_conf[provider] = new_collections_conf
908
+ new_collections.append(new_collection)
909
+ # increase the increment if the new collection had
910
+ # bad formatted attributes in the external config
911
+ dumped_collection = self.collections_config[
912
+ new_coll_obj._id
913
+ ].model_dump()
914
+ dumped_ext_conf_col = {
915
+ **dumped_collection,
916
+ **new_collections_conf["collections_config"][
917
+ new_collection
918
+ ],
882
919
  }
883
- )
884
- ext_collections_conf[provider] = new_collections_conf
885
- new_collections.append(new_collection)
920
+ if dumped_ext_conf_col != dumped_collection:
921
+ bad_formatted_col_count += 1
886
922
  if new_collections:
887
923
  logger.debug(
888
- f"Added {len(new_collections)} collections for {provider}"
924
+ "Added %s collections for %s", len(new_collections), provider
889
925
  )
926
+ if bad_formatted_col_count > 0:
927
+ logger.debug(
928
+ "bad formatted attributes skipped for %s collection(s) on %s",
929
+ bad_formatted_col_count,
930
+ provider,
931
+ )
890
932
 
891
933
  elif provider not in self.providers_config:
892
934
  # unknown provider
@@ -938,16 +980,14 @@ class EODataAccessGateway:
938
980
  return [name for name, _ in providers]
939
981
 
940
982
  def get_collection_from_alias(self, alias_or_id: str) -> str:
941
- """Return the ID of a collection by either its ID or alias
983
+ """Return the id of a collection by either its id or alias
942
984
 
943
- :param alias_or_id: Alias of the collection. If an existing ID is given, this
985
+ :param alias_or_id: Alias of the collection. If an existing id is given, this
944
986
  method will directly return the given value.
945
987
  :returns: Internal name of the collection.
946
988
  """
947
989
  collections = [
948
- k
949
- for k, v in self.collections_config.items()
950
- if v.get("alias") == alias_or_id
990
+ v for k, v in self.collections_config.items() if v.id == alias_or_id
951
991
  ]
952
992
 
953
993
  if len(collections) > 1:
@@ -960,22 +1000,24 @@ class EODataAccessGateway:
960
1000
  return alias_or_id
961
1001
  else:
962
1002
  raise NoMatchingCollection(
963
- f"Could not find collection from alias or ID {alias_or_id}"
1003
+ f"Could not find collection from alias or id {alias_or_id}"
964
1004
  )
965
1005
 
966
- return collections[0]
1006
+ return collections[0]._id or collections[0].id
967
1007
 
968
1008
  def get_alias_from_collection(self, collection: str) -> str:
969
- """Return the alias of a collection by its ID. If no alias was defined for the
970
- given collection, its ID is returned instead.
1009
+ """Return the alias of a collection by its id. If no alias was defined for the
1010
+ given collection, its id is returned instead.
971
1011
 
972
- :param collection: collection ID
973
- :returns: Alias of the collection or its ID if no alias has been defined for it.
1012
+ :param collection: collection id
1013
+ :returns: Alias of the collection or its id if no alias has been defined for it.
974
1014
  """
975
1015
  if collection not in self.collections_config:
976
1016
  raise NoMatchingCollection(collection)
977
1017
 
978
- return self.collections_config[collection].get("alias", collection)
1018
+ if alias := self.collections_config[collection].alias:
1019
+ return alias
1020
+ return collection
979
1021
 
980
1022
  def guess_collection(
981
1023
  self,
@@ -992,7 +1034,7 @@ class EODataAccessGateway:
992
1034
  start_date: Optional[str] = None,
993
1035
  end_date: Optional[str] = None,
994
1036
  **kwargs: Any,
995
- ) -> list[str]:
1037
+ ) -> CollectionsList:
996
1038
  """
997
1039
  Find EODAG collection IDs that best match a set of search parameters.
998
1040
 
@@ -1002,7 +1044,7 @@ class EODataAccessGateway:
1002
1044
  operators with parenthesis (``AND``/``OR``/``NOT``), quoted phrases (``"exact phrase"``),
1003
1045
  ``*`` and ``?`` wildcards.
1004
1046
  :param intersect: Join results for each parameter using INTERSECT instead of UNION.
1005
- :param instrument: Instrument parameter.
1047
+ :param instruments: Instruments parameter.
1006
1048
  :param platform: Platform parameter.
1007
1049
  :param constellation: Constellation parameter.
1008
1050
  :param processing_level: Processing level parameter.
@@ -1016,7 +1058,16 @@ class EODataAccessGateway:
1016
1058
  :raises: :class:`~eodag.utils.exceptions.NoMatchingCollection`
1017
1059
  """
1018
1060
  if collection := kwargs.get("collection"):
1019
- return [collection]
1061
+ if collection in self.collections_config:
1062
+ return CollectionsList([self.collections_config[collection]])
1063
+ else:
1064
+ try:
1065
+ collection = self.get_collection_from_alias(collection)
1066
+ return CollectionsList([self.collections_config[collection]])
1067
+ except NoMatchingCollection:
1068
+ return CollectionsList(
1069
+ [Collection.create_with_dag(self, id=collection)]
1070
+ )
1020
1071
 
1021
1072
  filters: dict[str, str] = {
1022
1073
  k: v
@@ -1045,18 +1096,17 @@ class EODataAccessGateway:
1045
1096
 
1046
1097
  guesses_with_score: list[tuple[str, int]] = []
1047
1098
 
1048
- for pt_id, pt_dict in self.collections_config.source.items():
1099
+ for col, col_f in self.collections_config.items():
1049
1100
  if (
1050
- pt_id == GENERIC_COLLECTION
1051
- or pt_id not in self._plugins_manager.collection_to_provider_config_map
1101
+ col == GENERIC_COLLECTION
1102
+ or col not in self._plugins_manager.collection_to_provider_config_map
1052
1103
  ):
1053
1104
  continue
1054
-
1055
1105
  score = 0 # how many filters matched
1056
1106
 
1057
1107
  # free text search
1058
1108
  if free_text:
1059
- match = free_text_evaluator(pt_dict)
1109
+ match = free_text_evaluator(col_f.model_dump())
1060
1110
  if match:
1061
1111
  score += 1
1062
1112
  elif intersect:
@@ -1072,9 +1122,16 @@ class EODataAccessGateway:
1072
1122
  }
1073
1123
 
1074
1124
  filter_matches = [
1075
- filters_evaluators[filter_name]({filter_name: pt_dict[filter_name]})
1125
+ filters_evaluators[filter_name](
1126
+ {
1127
+ filter_name: col_f.__dict__[
1128
+ Collection.get_collection_mtd_from_alias(filter_name)
1129
+ ]
1130
+ }
1131
+ )
1076
1132
  for filter_name, value in filters.items()
1077
- if filter_name in pt_dict
1133
+ if Collection.get_collection_mtd_from_alias(filter_name)
1134
+ in col_f.__dict__
1078
1135
  ]
1079
1136
 
1080
1137
  if filters_matching_method(filter_matches):
@@ -1091,33 +1148,43 @@ class EODataAccessGateway:
1091
1148
  min_aware = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
1092
1149
  max_aware = datetime.datetime.max.replace(tzinfo=datetime.timezone.utc)
1093
1150
 
1094
- col_start, col_end = get_collection_dates(pt_dict)
1151
+ col_start_str = col_f.extent.temporal.interval[0][0]
1152
+ if col_start_str and isinstance(col_start_str, str):
1153
+ col_start = rfc3339_str_to_datetime(col_start_str)
1154
+ else:
1155
+ col_start = col_start_str or min_aware
1156
+ col_end_str = col_f.extent.temporal.interval[0][1]
1157
+ if col_end_str and isinstance(col_end_str, str):
1158
+ col_end = rfc3339_str_to_datetime(col_end_str)
1159
+ else:
1160
+ col_end = col_end_str or max_aware
1095
1161
 
1096
1162
  max_start = max(
1097
1163
  rfc3339_str_to_datetime(start_date) if start_date else min_aware,
1098
- rfc3339_str_to_datetime(col_start) if col_start else min_aware,
1164
+ col_start,
1099
1165
  )
1100
1166
  min_end = min(
1101
1167
  rfc3339_str_to_datetime(end_date) if end_date else max_aware,
1102
- rfc3339_str_to_datetime(col_end) if col_end else max_aware,
1168
+ col_end,
1103
1169
  )
1104
1170
  if not (max_start <= min_end):
1105
1171
  continue
1106
1172
 
1107
- pt_alias = pt_dict.get("alias", pt_id)
1108
- guesses_with_score.append((pt_alias, score))
1173
+ guesses_with_score.append((col_f._id, score))
1109
1174
 
1110
1175
  if guesses_with_score:
1111
- # sort by score descending, then pt_id for stability
1176
+ # sort by score descending, then col for stability
1112
1177
  guesses_with_score.sort(key=lambda x: (-x[1], x[0]))
1113
- return [pt_id for pt_id, _ in guesses_with_score]
1178
+ return CollectionsList(
1179
+ [self.collections_config[col] for col, _ in guesses_with_score]
1180
+ )
1114
1181
 
1115
1182
  raise NoMatchingCollection()
1116
1183
 
1117
1184
  def search(
1118
1185
  self,
1119
1186
  page: int = DEFAULT_PAGE,
1120
- items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
1187
+ items_per_page: Optional[int] = DEFAULT_ITEMS_PER_PAGE,
1121
1188
  raise_errors: bool = False,
1122
1189
  start: Optional[str] = None,
1123
1190
  end: Optional[str] = None,
@@ -1137,9 +1204,10 @@ class EODataAccessGateway:
1137
1204
  will be request from the provider with the next highest priority.
1138
1205
  Only if the request fails for all available providers, an error will be thrown.
1139
1206
 
1140
- :param page: (optional) The page number to return
1207
+ :param page: (optional) The page number to return (**deprecated**, use
1208
+ :meth:`eodag.api.search_result.SearchResult.next_page` instead)
1141
1209
  :param items_per_page: (optional) The number of results that must appear in one single
1142
- page
1210
+ page. If ``None``, the maximum number possible will be used.
1143
1211
  :param raise_errors: (optional) When an error occurs when searching, if this is set to
1144
1212
  True, the error is raised
1145
1213
  :param start: (optional) Start sensing time in ISO 8601 format (e.g. "1990-11-26",
@@ -1181,6 +1249,15 @@ class EODataAccessGateway:
1181
1249
  return a list as a result of their processing. This requirement is
1182
1250
  enforced here.
1183
1251
  """
1252
+ if page != DEFAULT_PAGE:
1253
+ warnings.warn(
1254
+ "Usage of deprecated search parameter 'page' "
1255
+ "(Please use 'SearchResult.next_page()' instead)"
1256
+ " -- Deprecated since v3.9.0",
1257
+ DeprecationWarning,
1258
+ stacklevel=2,
1259
+ )
1260
+
1184
1261
  search_plugins, search_kwargs = self._prepare_search(
1185
1262
  start=start,
1186
1263
  end=end,
@@ -1200,16 +1277,23 @@ class EODataAccessGateway:
1200
1277
  )
1201
1278
  # remove datacube query string from kwargs which was only needed for search-by-id
1202
1279
  search_kwargs.pop("_dc_qs", None)
1203
-
1204
- search_kwargs.update(
1205
- page=page,
1206
- items_per_page=items_per_page,
1207
- )
1280
+ # add page parameter
1281
+ search_kwargs["page"] = page
1208
1282
 
1209
1283
  errors: list[tuple[str, Exception]] = []
1210
1284
  # Loop over available providers and return the first non-empty results
1211
1285
  for i, search_plugin in enumerate(search_plugins):
1212
1286
  search_plugin.clear()
1287
+
1288
+ # add appropriate items_per_page value
1289
+ search_kwargs["items_per_page"] = (
1290
+ items_per_page
1291
+ if items_per_page is not None
1292
+ else getattr(search_plugin.config, "pagination", {}).get(
1293
+ "max_items_per_page", DEFAULT_MAX_ITEMS_PER_PAGE
1294
+ )
1295
+ )
1296
+
1213
1297
  search_results = self._do_search(
1214
1298
  search_plugin,
1215
1299
  count=count,
@@ -1225,12 +1309,22 @@ class EODataAccessGateway:
1225
1309
  )
1226
1310
  elif len(search_results) > 0:
1227
1311
  search_results.errors = errors
1312
+ if count and search_results.number_matched:
1313
+ logger.info(
1314
+ "Found %s result(s) on provider '%s'",
1315
+ search_results.number_matched,
1316
+ search_results[0].provider,
1317
+ )
1228
1318
  return search_results
1229
1319
 
1230
1320
  if i > 1:
1231
1321
  logger.error("No result could be obtained from any available provider")
1232
1322
  return SearchResult([], 0, errors) if count else SearchResult([], errors=errors)
1233
1323
 
1324
+ @_deprecated(
1325
+ reason="Please use 'SearchResult.next_page()' instead",
1326
+ version="v3.9.0",
1327
+ )
1234
1328
  def search_iter_page(
1235
1329
  self,
1236
1330
  items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
@@ -1242,6 +1336,9 @@ class EODataAccessGateway:
1242
1336
  ) -> Iterator[SearchResult]:
1243
1337
  """Iterate over the pages of a products search.
1244
1338
 
1339
+ .. deprecated:: v3.9.0
1340
+ Please use :meth:`eodag.api.search_result.SearchResult.next_page` instead.
1341
+
1245
1342
  :param items_per_page: (optional) The number of results requested per page
1246
1343
  :param start: (optional) Start sensing time in ISO 8601 format (e.g. "1990-11-26",
1247
1344
  "1990-11-26T14:30:10.153Z", "1990-11-26T14:30:10+02:00", ...).
@@ -1292,6 +1389,10 @@ class EODataAccessGateway:
1292
1389
  raise
1293
1390
  raise RequestError("No result could be obtained from any available provider")
1294
1391
 
1392
+ @_deprecated(
1393
+ reason="Please use 'SearchResult.next_page()' instead",
1394
+ version="v3.9.0",
1395
+ )
1295
1396
  def search_iter_page_plugin(
1296
1397
  self,
1297
1398
  search_plugin: Union[Search, Api],
@@ -1300,6 +1401,9 @@ class EODataAccessGateway:
1300
1401
  ) -> Iterator[SearchResult]:
1301
1402
  """Iterate over the pages of a products search using a given search plugin.
1302
1403
 
1404
+ .. deprecated:: v3.9.0
1405
+ Please use :meth:`eodag.api.search_result.SearchResult.next_page` instead.
1406
+
1303
1407
  :param items_per_page: (optional) The number of results requested per page
1304
1408
  :param kwargs: Some other criteria that will be used to do the search,
1305
1409
  using parameters compatibles with the provider
@@ -1307,114 +1411,40 @@ class EODataAccessGateway:
1307
1411
  :returns: An iterator that yields page per page a set of EO products
1308
1412
  matching the criteria
1309
1413
  """
1310
-
1311
- iteration = 1
1312
- # Store the search plugin config pagination.next_page_url_tpl to reset it later
1313
- # since it might be modified if the next_page_url mechanism is used by the
1314
- # plugin. (same thing for next_page_query_obj, next_page_query_obj with POST reqs)
1315
- pagination_config = getattr(search_plugin.config, "pagination", {})
1316
- prev_next_page_url_tpl = pagination_config.get("next_page_url_tpl")
1317
- prev_next_page_query_obj = pagination_config.get("next_page_query_obj")
1318
- # Page has to be set to a value even if use_next is True, this is required
1319
- # internally by the search plugin (see collect_search_urls)
1320
1414
  kwargs.update(
1321
1415
  page=1,
1322
1416
  items_per_page=items_per_page,
1323
1417
  )
1324
- prev_product = None
1325
- next_page_url = None
1326
- next_page_query_obj = None
1327
- number_matched = None
1328
- while True:
1329
- # if count is enabled, it will only be performed on 1st iteration
1330
- if iteration == 2:
1331
- kwargs["count"] = False
1332
- if iteration > 1 and next_page_url:
1333
- pagination_config["next_page_url_tpl"] = next_page_url
1334
- if iteration > 1 and next_page_query_obj:
1335
- pagination_config["next_page_query_obj"] = next_page_query_obj
1336
- logger.info("Iterate search over multiple pages: page #%s", iteration)
1337
- try:
1338
- # remove unwanted kwargs for _do_search
1339
- kwargs.pop("raise_errors", None)
1340
- search_result = self._do_search(
1341
- search_plugin, raise_errors=True, **kwargs
1342
- )
1343
- # if count is enabled, it will only be performed on 1st iteration
1344
- if iteration == 1:
1345
- number_matched = search_result.number_matched
1346
- except Exception:
1347
- logger.warning(
1348
- "error at retrieval of data from %s, for params: %s",
1349
- search_plugin.provider,
1350
- str(kwargs),
1351
- )
1352
- raise
1353
- finally:
1354
- # we don't want that next(search_iter_page(...)) modifies the plugin
1355
- # indefinitely. So we reset after each request, but before the generator
1356
- # yields, the attr next_page_url (to None) and
1357
- # config.pagination["next_page_url_tpl"] (to its original value).
1358
- next_page_url = getattr(search_plugin, "next_page_url", None)
1359
- next_page_query_obj = getattr(search_plugin, "next_page_query_obj", {})
1360
- next_page_merge = getattr(search_plugin, "next_page_merge", None)
1361
-
1362
- if next_page_url:
1363
- search_plugin.next_page_url = None
1364
- if prev_next_page_url_tpl:
1365
- search_plugin.config.pagination[
1366
- "next_page_url_tpl"
1367
- ] = prev_next_page_url_tpl
1368
- if next_page_query_obj:
1369
- if prev_next_page_query_obj:
1370
- search_plugin.config.pagination[
1371
- "next_page_query_obj"
1372
- ] = prev_next_page_query_obj
1373
- # Update next_page_query_obj for next page req
1374
- if next_page_merge:
1375
- search_plugin.next_page_query_obj = dict(
1376
- getattr(search_plugin, "query_params", {}),
1377
- **next_page_query_obj,
1378
- )
1379
- else:
1380
- search_plugin.next_page_query_obj = next_page_query_obj
1381
-
1382
- if len(search_result) > 0:
1383
- # The first products between two iterations are compared. If they
1384
- # are actually the same product, it means the iteration failed at
1385
- # progressing for some reason. This is implemented as a workaround
1386
- # to some search plugins/providers not handling pagination.
1387
- product = search_result[0]
1388
- if (
1389
- prev_product
1390
- and product.properties["id"] == prev_product.properties["id"]
1391
- and product.provider == prev_product.provider
1392
- ):
1393
- logger.warning(
1394
- "Iterate over pages: stop iterating since the next page "
1395
- "appears to have the same products as in the previous one. "
1396
- "This provider may not implement pagination.",
1397
- )
1398
- last_page_with_products = iteration - 1
1399
- break
1400
- # use count got from 1st iteration
1401
- search_result.number_matched = number_matched
1402
- yield search_result
1403
- prev_product = product
1404
- # Prevent a last search if the current one returned less than the
1405
- # maximum number of items asked for.
1406
- if len(search_result) < items_per_page:
1407
- last_page_with_products = iteration
1408
- break
1409
- else:
1410
- last_page_with_products = iteration - 1
1418
+ try:
1419
+ # remove unwanted kwargs for _do_search
1420
+ kwargs.pop("raise_errors", None)
1421
+ search_result = self._do_search(search_plugin, raise_errors=True, **kwargs)
1422
+ search_result.raise_errors = True
1423
+
1424
+ except Exception:
1425
+ logger.warning(
1426
+ "error at retrieval of data from %s, for params: %s",
1427
+ search_plugin.provider,
1428
+ str(kwargs),
1429
+ )
1430
+ raise
1431
+
1432
+ if len(search_result) == 0:
1433
+ return
1434
+ # remove unwanted kwargs for next_page
1435
+ if kwargs.get("count") is True:
1436
+ kwargs["count"] = False
1437
+ kwargs.pop("page", None)
1438
+ search_result.search_params = kwargs
1439
+ if search_result._dag is None:
1440
+ search_result._dag = self
1441
+
1442
+ yield search_result
1443
+
1444
+ for next_result in search_result.next_page():
1445
+ if len(next_result) == 0:
1411
1446
  break
1412
- iteration += 1
1413
- kwargs["page"] = iteration
1414
- logger.debug(
1415
- "Iterate over pages: last products found on page %s",
1416
- last_page_with_products,
1417
- )
1447
+ yield next_result
1418
1448
 
1419
1449
  def search_all(
1420
1450
  self,
@@ -1467,81 +1497,42 @@ class EODataAccessGateway:
1467
1497
  :returns: An iterator that yields page per page a set of EO products
1468
1498
  matching the criteria
1469
1499
  """
1470
- # Get the search plugin and the maximized value
1471
- # of items_per_page if defined for the provider used.
1472
- try:
1473
- collection = self.get_collection_from_alias(
1474
- self.guess_collection(**kwargs)[0]
1475
- )
1476
- except NoMatchingCollection:
1477
- collection = GENERIC_COLLECTION
1478
- else:
1479
- # fetch collections list if collection is unknown
1480
- if (
1481
- collection
1482
- not in self._plugins_manager.collection_to_provider_config_map.keys()
1483
- ):
1484
- logger.debug(
1485
- f"Fetching external collections sources to find {collection} collection"
1486
- )
1487
- self.fetch_collections_list()
1488
-
1489
1500
  # remove unwanted count
1490
1501
  kwargs.pop("count", None)
1491
1502
 
1492
- search_plugins, search_kwargs = self._prepare_search(
1493
- start=start, end=end, geom=geom, locations=locations, **kwargs
1503
+ # First search
1504
+ search_results = self.search(
1505
+ items_per_page=items_per_page,
1506
+ start=start,
1507
+ end=end,
1508
+ geom=geom,
1509
+ locations=locations,
1510
+ **kwargs,
1494
1511
  )
1495
- for i, search_plugin in enumerate(search_plugins):
1496
- itp = (
1497
- items_per_page
1498
- or getattr(search_plugin.config, "pagination", {}).get(
1499
- "max_items_per_page"
1500
- )
1501
- or DEFAULT_MAX_ITEMS_PER_PAGE
1502
- )
1512
+ if len(search_results) == 0:
1513
+ return search_results
1514
+
1515
+ try:
1516
+ search_results.raise_errors = True
1517
+
1518
+ # consume iterator
1519
+ deque(search_results.next_page(update=True))
1520
+
1503
1521
  logger.info(
1504
- "Searching for all the products with provider %s and a maximum of %s "
1505
- "items per page.",
1506
- search_plugin.provider,
1507
- itp,
1522
+ "Found %s result(s) on provider '%s'",
1523
+ len(search_results),
1524
+ search_results[0].provider,
1508
1525
  )
1509
- all_results = SearchResult([])
1510
- try:
1511
- for page_results in self.search_iter_page_plugin(
1512
- items_per_page=itp,
1513
- search_plugin=search_plugin,
1514
- count=False,
1515
- **search_kwargs,
1516
- ):
1517
- all_results.data.extend(page_results.data)
1518
- logger.info(
1519
- "Found %s result(s) on provider '%s'",
1520
- len(all_results),
1521
- search_plugin.provider,
1522
- )
1523
- return all_results
1524
- except RequestError:
1525
- if len(all_results) == 0 and i < len(search_plugins) - 1:
1526
- logger.warning(
1527
- "No result could be obtained from provider %s, "
1528
- "we will try to get the data from another provider",
1529
- search_plugin.provider,
1530
- )
1531
- elif len(all_results) == 0:
1532
- logger.error(
1533
- "No result could be obtained from any available provider"
1534
- )
1535
- raise
1536
- elif len(all_results) > 0:
1537
- logger.warning(
1538
- "Found %s result(s) on provider '%s', but it may be incomplete "
1539
- "as it ended with an error",
1540
- len(all_results),
1541
- search_plugin.provider,
1542
- )
1543
- return all_results
1544
- raise RequestError("No result could be obtained from any available provider")
1526
+ search_results.number_matched = len(search_results)
1527
+ except RequestError:
1528
+ logger.warning(
1529
+ "Found %s result(s) on provider '%s', but it may be incomplete "
1530
+ "as it ended with an error",
1531
+ len(search_results),
1532
+ search_results[0].provider,
1533
+ )
1534
+
1535
+ return search_results
1545
1536
 
1546
1537
  def _search_by_id(
1547
1538
  self, uid: str, provider: Optional[str] = None, **kwargs: Any
@@ -1628,7 +1619,7 @@ class EODataAccessGateway:
1628
1619
  if not results[0].collection:
1629
1620
  # guess collection from properties
1630
1621
  guesses = self.guess_collection(**results[0].properties)
1631
- results[0].collection = guesses[0]
1622
+ results[0].collection = guesses[0].id
1632
1623
  # reset driver
1633
1624
  results[0].driver = results[0].get_driver()
1634
1625
  results.number_matched = 1
@@ -1719,7 +1710,7 @@ class EODataAccessGateway:
1719
1710
  kwargs.pop(param, None)
1720
1711
 
1721
1712
  # By now, only use the best bet
1722
- collection = guesses[0]
1713
+ collection = guesses[0].id
1723
1714
  except NoMatchingCollection:
1724
1715
  queried_id = kwargs.get("id")
1725
1716
  if queried_id is None:
@@ -1776,7 +1767,9 @@ class EODataAccessGateway:
1776
1767
  not in self._plugins_manager.collection_to_provider_config_map.keys()
1777
1768
  ):
1778
1769
  # Try to get specific collection from external provider
1779
- logger.debug(f"Fetching {provider} to find {collection} collection")
1770
+ logger.debug(
1771
+ "Fetching %s to find %s collection", provider, collection
1772
+ )
1780
1773
  self._fetch_external_collection(provider, collection)
1781
1774
  if not provider:
1782
1775
  # no provider or still not found -> fetch all external collections
@@ -1861,13 +1854,11 @@ class EODataAccessGateway:
1861
1854
  max_items_per_page,
1862
1855
  )
1863
1856
 
1864
- results: list[EOProduct] = []
1865
- total_results: Optional[int] = 0 if count else None
1866
-
1867
1857
  errors: list[tuple[str, Exception]] = []
1868
1858
 
1869
1859
  try:
1870
1860
  prep = PreparedSearch(count=count)
1861
+ prep.raise_errors = raise_errors
1871
1862
 
1872
1863
  # append auth if needed
1873
1864
  if getattr(search_plugin.config, "need_auth", False):
@@ -1878,17 +1869,41 @@ class EODataAccessGateway:
1878
1869
  ):
1879
1870
  prep.auth = auth
1880
1871
 
1881
- prep.page = kwargs.pop("page", None)
1882
1872
  prep.items_per_page = kwargs.pop("items_per_page", None)
1873
+ prep.next_page_token = kwargs.pop("next_page_token", None)
1874
+ prep.next_page_token_key = kwargs.pop(
1875
+ "next_page_token_key", None
1876
+ ) or search_plugin.config.pagination.get("next_page_token_key", "page")
1877
+ prep.page = kwargs.pop("page", None)
1878
+
1879
+ if (
1880
+ prep.next_page_token_key == "page"
1881
+ and prep.items_per_page is not None
1882
+ and prep.next_page_token is None
1883
+ and prep.page is not None
1884
+ ):
1885
+ prep.next_page_token = str(
1886
+ prep.page
1887
+ - 1
1888
+ + search_plugin.config.pagination.get("start_page", DEFAULT_PAGE)
1889
+ )
1883
1890
 
1884
1891
  # remove None values and convert param names to their pydantic alias if any
1885
1892
  search_params = {}
1893
+ ecmwf_queryables = [
1894
+ f"{ECMWF_PREFIX[:-1]}_{k}" for k in ECMWF_ALLOWED_KEYWORDS
1895
+ ]
1886
1896
  for param, value in kwargs.items():
1887
1897
  if value is None:
1888
1898
  continue
1889
1899
  if param in Queryables.model_fields:
1890
1900
  param_alias = Queryables.model_fields[param].alias or param
1891
1901
  search_params[param_alias] = value
1902
+ elif param in ecmwf_queryables:
1903
+ # alias equivalent for ECMWF queryables
1904
+ search_params[
1905
+ re.sub(rf"^{ECMWF_PREFIX[:-1]}_", f"{ECMWF_PREFIX}", param)
1906
+ ] = value
1892
1907
  else:
1893
1908
  # remove `provider:` or `provider_` prefix if any
1894
1909
  search_params[
@@ -1898,14 +1913,13 @@ class EODataAccessGateway:
1898
1913
  if validate:
1899
1914
  search_plugin.validate(search_params, prep.auth)
1900
1915
 
1901
- res, nb_res = search_plugin.query(prep, **search_params)
1916
+ search_result = search_plugin.query(prep, **search_params)
1902
1917
 
1903
- if not isinstance(res, list):
1918
+ if not isinstance(search_result.data, list):
1904
1919
  raise PluginImplementationError(
1905
1920
  "The query function of a Search plugin must return a list of "
1906
- "results, got {} instead".format(type(res))
1921
+ "results, got {} instead".format(type(search_result.data))
1907
1922
  )
1908
-
1909
1923
  # Filter and attach to each eoproduct in the result the plugin capable of
1910
1924
  # downloading it (this is done to enable the eo_product to download itself
1911
1925
  # doing: eo_product.download()). The filtering is done by keeping only
@@ -1915,7 +1929,7 @@ class EODataAccessGateway:
1915
1929
  # WARNING: this means an eo_product that has an invalid geometry can still
1916
1930
  # be returned as a search result if there was no search extent (because we
1917
1931
  # will not try to do an intersection)
1918
- for eo_product in res:
1932
+ for eo_product in search_result.data:
1919
1933
  # if collection is not defined, try to guess using properties
1920
1934
  if eo_product.collection is None:
1921
1935
  pattern = re.compile(r"[^\w,]+")
@@ -1940,7 +1954,7 @@ class EODataAccessGateway:
1940
1954
  except NoMatchingCollection:
1941
1955
  pass
1942
1956
  else:
1943
- eo_product.collection = guesses[0]
1957
+ eo_product.collection = guesses[0].id
1944
1958
 
1945
1959
  try:
1946
1960
  if eo_product.collection is not None:
@@ -1953,18 +1967,13 @@ class EODataAccessGateway:
1953
1967
  if eo_product.search_intersection is not None:
1954
1968
  eo_product._register_downloader_from_manager(self._plugins_manager)
1955
1969
 
1956
- results.extend(res)
1957
- total_results = (
1958
- None
1959
- if (nb_res is None or total_results is None)
1960
- else total_results + nb_res
1961
- )
1962
- if count and nb_res is not None:
1963
- logger.info(
1964
- "Found %s result(s) on provider '%s'",
1965
- nb_res,
1966
- search_plugin.provider,
1967
- )
1970
+ # Make next_page not available if the current one returned less than the maximum number of items asked for.
1971
+ if not prep.items_per_page or len(search_result) < prep.items_per_page:
1972
+ search_result.next_page_token = None
1973
+
1974
+ search_result._dag = self
1975
+ return search_result
1976
+
1968
1977
  except Exception as e:
1969
1978
  if raise_errors:
1970
1979
  # Raise the error, letting the application wrapping eodag know that
@@ -1976,7 +1985,7 @@ class EODataAccessGateway:
1976
1985
  search_plugin.provider,
1977
1986
  )
1978
1987
  errors.append((search_plugin.provider, e))
1979
- return SearchResult(results, total_results, errors)
1988
+ return SearchResult([], 0, errors)
1980
1989
 
1981
1990
  def crunch(self, results: SearchResult, **kwargs: Any) -> SearchResult:
1982
1991
  """Apply the filters given through the keyword arguments to the results
@@ -2080,13 +2089,16 @@ class EODataAccessGateway:
2080
2089
  search_result: SearchResult, filename: str = "search_results.geojson"
2081
2090
  ) -> str:
2082
2091
  """Registers results of a search into a geojson file.
2092
+ The output is a FeatureCollection containing the EO products as features,
2093
+ with additional metadata such as ``number_matched``, ``next_page_token``,
2094
+ and ``search_params`` stored in the properties.
2083
2095
 
2084
2096
  :param search_result: A set of EO products resulting from a search
2085
2097
  :param filename: (optional) The name of the file to generate
2086
2098
  :returns: The name of the created file
2087
2099
  """
2088
2100
  with open(filename, "w") as fh:
2089
- geojson.dump(search_result, fh)
2101
+ geojson.dump(search_result.as_geojson_object(), fh)
2090
2102
  return filename
2091
2103
 
2092
2104
  @staticmethod
@@ -2101,12 +2113,16 @@ class EODataAccessGateway:
2101
2113
 
2102
2114
  def deserialize_and_register(self, filename: str) -> SearchResult:
2103
2115
  """Loads results of a search from a geojson file and register
2104
- products with the information needed to download itself
2116
+ products with the information needed to download itself.
2117
+
2118
+ This method also sets the internal EODataAccessGateway instance on the products,
2119
+ enabling pagination (e.g. access to next pages) if available.
2105
2120
 
2106
2121
  :param filename: A filename containing a search result encoded as a geojson
2107
- :returns: The search results encoded in `filename`
2122
+ :returns: The search results encoded in `filename`, ready for download and pagination
2108
2123
  """
2109
2124
  products = self.deserialize(filename)
2125
+ products._dag = self
2110
2126
  for i, product in enumerate(products):
2111
2127
  if product.downloader is None:
2112
2128
  downloader = self._plugins_manager.get_download_plugin(product)
@@ -2220,8 +2236,8 @@ class EODataAccessGateway:
2220
2236
  """
2221
2237
  # only fetch providers if collection is not found
2222
2238
  available_collections: list[str] = [
2223
- pt["ID"]
2224
- for pt in self.list_collections(provider=provider, fetch_providers=False)
2239
+ col.id
2240
+ for col in self.list_collections(provider=provider, fetch_providers=False)
2225
2241
  ]
2226
2242
  collection: Optional[str] = kwargs.get("collection")
2227
2243
  coll_alias: Optional[str] = collection
@@ -2231,8 +2247,8 @@ class EODataAccessGateway:
2231
2247
  if fetch_providers:
2232
2248
  # fetch providers and try again
2233
2249
  available_collections = [
2234
- pt["ID"]
2235
- for pt in self.list_collections(
2250
+ col.id
2251
+ for col in self.list_collections(
2236
2252
  provider=provider, fetch_providers=True
2237
2253
  )
2238
2254
  ]
@@ -2261,9 +2277,9 @@ class EODataAccessGateway:
2261
2277
  self._attach_collection_config(plugin, collection)
2262
2278
  collection_configs[collection] = plugin.config.collection_config
2263
2279
  else:
2264
- for pt in available_collections:
2265
- self._attach_collection_config(plugin, pt)
2266
- collection_configs[pt] = plugin.config.collection_config
2280
+ for col in available_collections:
2281
+ self._attach_collection_config(plugin, col)
2282
+ collection_configs[col] = plugin.config.collection_config
2267
2283
 
2268
2284
  # authenticate if required
2269
2285
  if getattr(plugin.config, "need_auth", False) and (
@@ -2277,8 +2293,14 @@ class EODataAccessGateway:
2277
2293
  plugin.provider,
2278
2294
  )
2279
2295
 
2296
+ # use queryables aliases
2297
+ kwargs_alias = {**kwargs}
2298
+ for search_param, field_info in Queryables.model_fields.items():
2299
+ if search_param in kwargs and field_info.alias:
2300
+ kwargs_alias[field_info.alias] = kwargs_alias.pop(search_param)
2301
+
2280
2302
  plugin_queryables = plugin.list_queryables(
2281
- kwargs,
2303
+ kwargs_alias,
2282
2304
  available_collections,
2283
2305
  collection_configs,
2284
2306
  collection,
@@ -2340,11 +2362,11 @@ class EODataAccessGateway:
2340
2362
  try:
2341
2363
  plugin.config.collection_config = dict(
2342
2364
  [
2343
- p
2344
- for p in self.list_collections(
2365
+ c.model_dump(mode="json", exclude={"id"})
2366
+ for c in self.list_collections(
2345
2367
  plugin.provider, fetch_providers=False
2346
2368
  )
2347
- if p["_id"] == collection
2369
+ if c._id == collection
2348
2370
  ][0],
2349
2371
  **{"collection": collection},
2350
2372
  )
@@ -2352,12 +2374,11 @@ class EODataAccessGateway:
2352
2374
  except IndexError:
2353
2375
  # Construct the GENERIC_COLLECTION metadata
2354
2376
  plugin.config.collection_config = dict(
2355
- ID=GENERIC_COLLECTION,
2356
- **self.collections_config[GENERIC_COLLECTION],
2377
+ **self.collections_config[GENERIC_COLLECTION].model_dump(
2378
+ mode="json", exclude={"id"}
2379
+ ),
2357
2380
  collection=collection,
2358
2381
  )
2359
- # Remove the ID since this is equal to collection.
2360
- plugin.config.collection_config.pop("ID", None)
2361
2382
 
2362
2383
  def import_stac_items(self, items_urls: list[str]) -> SearchResult:
2363
2384
  """Import STAC items from a list of URLs and convert them to SearchResult.