eodag 4.0.0a1__py3-none-any.whl → 4.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. eodag/__init__.py +6 -1
  2. eodag/api/collection.py +353 -0
  3. eodag/api/core.py +308 -296
  4. eodag/api/product/_product.py +15 -29
  5. eodag/api/product/drivers/__init__.py +2 -42
  6. eodag/api/product/drivers/base.py +0 -11
  7. eodag/api/product/metadata_mapping.py +34 -5
  8. eodag/api/search_result.py +144 -9
  9. eodag/cli.py +18 -15
  10. eodag/config.py +37 -3
  11. eodag/plugins/apis/ecmwf.py +16 -4
  12. eodag/plugins/apis/usgs.py +18 -7
  13. eodag/plugins/crunch/filter_latest_intersect.py +1 -0
  14. eodag/plugins/crunch/filter_overlap.py +3 -7
  15. eodag/plugins/search/__init__.py +3 -0
  16. eodag/plugins/search/base.py +6 -6
  17. eodag/plugins/search/build_search_result.py +157 -56
  18. eodag/plugins/search/cop_marine.py +48 -8
  19. eodag/plugins/search/csw.py +18 -8
  20. eodag/plugins/search/qssearch.py +331 -88
  21. eodag/plugins/search/static_stac_search.py +11 -12
  22. eodag/resources/collections.yml +610 -348
  23. eodag/resources/ext_collections.json +1 -1
  24. eodag/resources/ext_product_types.json +1 -1
  25. eodag/resources/providers.yml +330 -58
  26. eodag/resources/stac_provider.yml +4 -2
  27. eodag/resources/user_conf_template.yml +9 -0
  28. eodag/types/__init__.py +2 -0
  29. eodag/types/queryables.py +16 -0
  30. eodag/utils/__init__.py +47 -2
  31. eodag/utils/repr.py +2 -0
  32. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/METADATA +4 -2
  33. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/RECORD +37 -36
  34. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/WHEEL +0 -0
  35. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/entry_points.txt +0 -0
  36. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/licenses/LICENSE +0 -0
  37. {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/top_level.txt +0 -0
eodag/api/core.py CHANGED
@@ -23,14 +23,17 @@ import os
23
23
  import re
24
24
  import shutil
25
25
  import tempfile
26
+ import warnings
27
+ from collections import deque
26
28
  from importlib.metadata import version
27
29
  from importlib.resources import files as res_files
28
- from operator import itemgetter
30
+ from operator import attrgetter, itemgetter
29
31
  from typing import TYPE_CHECKING, Any, Iterator, Optional, Union
30
32
 
31
33
  import geojson
32
- import yaml.parser
34
+ import yaml
33
35
 
36
+ from eodag.api.collection import Collection, CollectionsDict, CollectionsList
34
37
  from eodag.api.product.metadata_mapping import (
35
38
  NOT_AVAILABLE,
36
39
  mtd_cfg_as_conversion_and_querypath,
@@ -53,7 +56,10 @@ from eodag.config import (
53
56
  )
54
57
  from eodag.plugins.manager import PluginManager
55
58
  from eodag.plugins.search import PreparedSearch
56
- from eodag.plugins.search.build_search_result import MeteoblueSearch
59
+ from eodag.plugins.search.build_search_result import (
60
+ ALLOWED_KEYWORDS as ECMWF_ALLOWED_KEYWORDS,
61
+ )
62
+ from eodag.plugins.search.build_search_result import ECMWF_PREFIX, MeteoblueSearch
57
63
  from eodag.plugins.search.qssearch import PostJsonSearch
58
64
  from eodag.types import model_fields_to_annotated
59
65
  from eodag.types.queryables import CommonQueryables, Queryables, QueryablesDict
@@ -65,7 +71,7 @@ from eodag.utils import (
65
71
  DEFAULT_PAGE,
66
72
  GENERIC_COLLECTION,
67
73
  GENERIC_STAC_PROVIDER,
68
- get_collection_dates,
74
+ _deprecated,
69
75
  get_geometry_from_various,
70
76
  makedirs,
71
77
  sort_dict,
@@ -81,6 +87,7 @@ from eodag.utils.exceptions import (
81
87
  RequestError,
82
88
  UnsupportedCollection,
83
89
  UnsupportedProvider,
90
+ ValidationError,
84
91
  )
85
92
  from eodag.utils.free_text_search import compile_free_text_query
86
93
  from eodag.utils.stac_reader import fetch_stac_items
@@ -115,7 +122,8 @@ class EODataAccessGateway:
115
122
  collections_config_path = os.getenv("EODAG_COLLECTIONS_CFG_FILE") or str(
116
123
  res_files("eodag") / "resources" / "collections.yml"
117
124
  )
118
- self.collections_config = SimpleYamlProxyConfig(collections_config_path)
125
+ collections_config_dict = SimpleYamlProxyConfig(collections_config_path).source
126
+ self.collections_config = self._collections_config_init(collections_config_dict)
119
127
  self.providers_config = load_default_config()
120
128
 
121
129
  env_var_cfg_dir = "EODAG_CFG_DIR"
@@ -168,7 +176,7 @@ class EODataAccessGateway:
168
176
 
169
177
  # init updated providers conf
170
178
  strict_mode = is_env_var_true("EODAG_STRICT_COLLECTIONS")
171
- available_collections = set(self.collections_config.source.keys())
179
+ available_collections = set(self.collections_config.keys())
172
180
 
173
181
  for provider in self.providers_config.keys():
174
182
  provider_config_init(
@@ -179,8 +187,6 @@ class EODataAccessGateway:
179
187
  self._sync_provider_collections(
180
188
  provider, available_collections, strict_mode
181
189
  )
182
- # init collections configuration
183
- self._collections_config_init()
184
190
 
185
191
  # re-build _plugins_manager using up-to-date providers_config
186
192
  self._plugins_manager.rebuild(self.providers_config)
@@ -223,10 +229,19 @@ class EODataAccessGateway:
223
229
  )
224
230
  self.set_locations_conf(locations_conf_path)
225
231
 
226
- def _collections_config_init(self) -> None:
227
- """Initialize collections configuration."""
228
- for pt_id, pd_dict in self.collections_config.source.items():
229
- self.collections_config.source[pt_id].setdefault("_id", pt_id)
232
+ def _collections_config_init(
233
+ self, collections_config_dict: dict[str, Any]
234
+ ) -> CollectionsDict:
235
+ """Initialize collections configuration.
236
+
237
+ :param collections_config_dict: The collections config as a dictionary
238
+ """
239
+ # Turn the collections config from a dict into a CollectionsDict() object
240
+ collections = [
241
+ Collection.create_with_dag(self, id=col, **col_f)
242
+ for col, col_f in collections_config_dict.items()
243
+ ]
244
+ return CollectionsDict(collections)
230
245
 
231
246
  def _sync_provider_collections(
232
247
  self,
@@ -258,13 +273,10 @@ class EODataAccessGateway:
258
273
  products_to_remove.append(product_id)
259
274
  continue
260
275
 
261
- empty_product = {
262
- "title": product_id,
263
- "description": NOT_AVAILABLE,
264
- }
265
- self.collections_config.source[
266
- product_id
267
- ] = empty_product # will update available_collections
276
+ empty_product = Collection.create_with_dag(
277
+ self, id=product_id, title=product_id, description=NOT_AVAILABLE
278
+ )
279
+ self.collections_config[product_id] = empty_product
268
280
  products_to_add.append(product_id)
269
281
 
270
282
  if products_to_add:
@@ -541,7 +553,7 @@ class EODataAccessGateway:
541
553
 
542
554
  def list_collections(
543
555
  self, provider: Optional[str] = None, fetch_providers: bool = True
544
- ) -> list[dict[str, Any]]:
556
+ ) -> CollectionsList:
545
557
  """Lists supported collections.
546
558
 
547
559
  :param provider: (optional) The name of a provider that must support the product
@@ -555,7 +567,7 @@ class EODataAccessGateway:
555
567
  # First, update collections list if possible
556
568
  self.fetch_collections_list(provider=provider)
557
569
 
558
- collections: list[dict[str, Any]] = []
570
+ collections: CollectionsList = CollectionsList([])
559
571
 
560
572
  providers_configs = (
561
573
  list(self.providers_config.values())
@@ -573,20 +585,18 @@ class EODataAccessGateway:
573
585
  )
574
586
 
575
587
  for p in providers_configs:
576
- for collection_id in p.products: # type: ignore
588
+ for collection_id in p.products:
577
589
  if collection_id == GENERIC_COLLECTION:
578
590
  continue
579
591
 
580
- config = self.collections_config[collection_id]
581
- if "alias" in config:
582
- collection_id = config["alias"]
583
- collection = {"ID": collection_id, **config}
584
-
585
- if collection not in collections:
592
+ if (
593
+ collection := self.collections_config[collection_id]
594
+ ) not in collections:
586
595
  collections.append(collection)
587
596
 
588
- # Return the collections sorted in lexicographic order of their ID
589
- return sorted(collections, key=itemgetter("ID"))
597
+ # Return the collections sorted in lexicographic order of their id
598
+ collections.sort(key=attrgetter("id"))
599
+ return collections
590
600
 
591
601
  def fetch_collections_list(self, provider: Optional[str] = None) -> None:
592
602
  """Fetch collections list and update if needed.
@@ -843,6 +853,7 @@ class EODataAccessGateway:
843
853
  )
844
854
  continue
845
855
  new_collections: list[str] = []
856
+ bad_formatted_col_count = 0
846
857
  for (
847
858
  new_collection,
848
859
  new_collection_conf,
@@ -867,26 +878,58 @@ class EODataAccessGateway:
867
878
  # new_collections_conf is a subset on an existing conf
868
879
  break
869
880
  else:
870
- # new_collection_conf does not already exist, append it
871
- # to provider_products_config
872
- provider_products_config[
873
- new_collection
874
- ] = new_collection_conf
875
- # to self.collections_config
876
- self.collections_config.source.update(
877
- {
878
- new_collection: {"_id": new_collection}
879
- | new_collections_conf["collections_config"][
881
+ try:
882
+ # new_collection_conf does not already exist, append it
883
+ # to self.collections_config
884
+ self.collections_config[
885
+ new_collection
886
+ ] = Collection.create_with_dag(
887
+ self,
888
+ id=new_collection,
889
+ **new_collections_conf["collections_config"][
880
890
  new_collection
881
- ]
891
+ ],
892
+ )
893
+ except ValidationError:
894
+ # skip collection if there is a problem with its id (missing or not a string)
895
+ logger.debug(
896
+ (
897
+ "Collection %s has been pruned on provider %s "
898
+ "because its id was incorrectly parsed for eodag"
899
+ ),
900
+ new_collection,
901
+ provider,
902
+ )
903
+ else:
904
+ # to provider_products_config
905
+ provider_products_config[
906
+ new_collection
907
+ ] = new_collection_conf
908
+ ext_collections_conf[provider] = new_collections_conf
909
+ new_collections.append(new_collection)
910
+ # increase the increment if the new collection had
911
+ # bad formatted attributes in the external config
912
+ dumped_collection = self.collections_config[
913
+ new_collection
914
+ ].model_dump()
915
+ dumped_ext_conf_col = {
916
+ **dumped_collection,
917
+ **new_collections_conf["collections_config"][
918
+ new_collection
919
+ ],
882
920
  }
883
- )
884
- ext_collections_conf[provider] = new_collections_conf
885
- new_collections.append(new_collection)
921
+ if dumped_ext_conf_col != dumped_collection:
922
+ bad_formatted_col_count += 1
886
923
  if new_collections:
887
924
  logger.debug(
888
- f"Added {len(new_collections)} collections for {provider}"
925
+ "Added %s collections for %s", len(new_collections), provider
889
926
  )
927
+ if bad_formatted_col_count > 0:
928
+ logger.debug(
929
+ "bad formatted attributes skipped for %s collection(s) on %s",
930
+ bad_formatted_col_count,
931
+ provider,
932
+ )
890
933
 
891
934
  elif provider not in self.providers_config:
892
935
  # unknown provider
@@ -938,16 +981,14 @@ class EODataAccessGateway:
938
981
  return [name for name, _ in providers]
939
982
 
940
983
  def get_collection_from_alias(self, alias_or_id: str) -> str:
941
- """Return the ID of a collection by either its ID or alias
984
+ """Return the id of a collection by either its id or alias
942
985
 
943
- :param alias_or_id: Alias of the collection. If an existing ID is given, this
986
+ :param alias_or_id: Alias of the collection. If an existing id is given, this
944
987
  method will directly return the given value.
945
988
  :returns: Internal name of the collection.
946
989
  """
947
990
  collections = [
948
- k
949
- for k, v in self.collections_config.items()
950
- if v.get("alias") == alias_or_id
991
+ k for k, v in self.collections_config.items() if v.alias == alias_or_id
951
992
  ]
952
993
 
953
994
  if len(collections) > 1:
@@ -960,22 +1001,24 @@ class EODataAccessGateway:
960
1001
  return alias_or_id
961
1002
  else:
962
1003
  raise NoMatchingCollection(
963
- f"Could not find collection from alias or ID {alias_or_id}"
1004
+ f"Could not find collection from alias or id {alias_or_id}"
964
1005
  )
965
1006
 
966
1007
  return collections[0]
967
1008
 
968
1009
  def get_alias_from_collection(self, collection: str) -> str:
969
- """Return the alias of a collection by its ID. If no alias was defined for the
970
- given collection, its ID is returned instead.
1010
+ """Return the alias of a collection by its id. If no alias was defined for the
1011
+ given collection, its id is returned instead.
971
1012
 
972
- :param collection: collection ID
973
- :returns: Alias of the collection or its ID if no alias has been defined for it.
1013
+ :param collection: collection id
1014
+ :returns: Alias of the collection or its id if no alias has been defined for it.
974
1015
  """
975
1016
  if collection not in self.collections_config:
976
1017
  raise NoMatchingCollection(collection)
977
1018
 
978
- return self.collections_config[collection].get("alias", collection)
1019
+ if alias := self.collections_config[collection].alias:
1020
+ return alias
1021
+ return collection
979
1022
 
980
1023
  def guess_collection(
981
1024
  self,
@@ -992,7 +1035,7 @@ class EODataAccessGateway:
992
1035
  start_date: Optional[str] = None,
993
1036
  end_date: Optional[str] = None,
994
1037
  **kwargs: Any,
995
- ) -> list[str]:
1038
+ ) -> CollectionsList:
996
1039
  """
997
1040
  Find EODAG collection IDs that best match a set of search parameters.
998
1041
 
@@ -1002,7 +1045,7 @@ class EODataAccessGateway:
1002
1045
  operators with parenthesis (``AND``/``OR``/``NOT``), quoted phrases (``"exact phrase"``),
1003
1046
  ``*`` and ``?`` wildcards.
1004
1047
  :param intersect: Join results for each parameter using INTERSECT instead of UNION.
1005
- :param instrument: Instrument parameter.
1048
+ :param instruments: Instruments parameter.
1006
1049
  :param platform: Platform parameter.
1007
1050
  :param constellation: Constellation parameter.
1008
1051
  :param processing_level: Processing level parameter.
@@ -1016,7 +1059,13 @@ class EODataAccessGateway:
1016
1059
  :raises: :class:`~eodag.utils.exceptions.NoMatchingCollection`
1017
1060
  """
1018
1061
  if collection := kwargs.get("collection"):
1019
- return [collection]
1062
+ try:
1063
+ collection = self.get_collection_from_alias(collection)
1064
+ return CollectionsList([self.collections_config[collection]])
1065
+ except NoMatchingCollection:
1066
+ return CollectionsList(
1067
+ [Collection.create_with_dag(self, id=collection)]
1068
+ )
1020
1069
 
1021
1070
  filters: dict[str, str] = {
1022
1071
  k: v
@@ -1045,10 +1094,10 @@ class EODataAccessGateway:
1045
1094
 
1046
1095
  guesses_with_score: list[tuple[str, int]] = []
1047
1096
 
1048
- for pt_id, pt_dict in self.collections_config.source.items():
1097
+ for col, col_f in self.collections_config.items():
1049
1098
  if (
1050
- pt_id == GENERIC_COLLECTION
1051
- or pt_id not in self._plugins_manager.collection_to_provider_config_map
1099
+ col == GENERIC_COLLECTION
1100
+ or col not in self._plugins_manager.collection_to_provider_config_map
1052
1101
  ):
1053
1102
  continue
1054
1103
 
@@ -1056,7 +1105,7 @@ class EODataAccessGateway:
1056
1105
 
1057
1106
  # free text search
1058
1107
  if free_text:
1059
- match = free_text_evaluator(pt_dict)
1108
+ match = free_text_evaluator(col_f.model_dump())
1060
1109
  if match:
1061
1110
  score += 1
1062
1111
  elif intersect:
@@ -1072,9 +1121,16 @@ class EODataAccessGateway:
1072
1121
  }
1073
1122
 
1074
1123
  filter_matches = [
1075
- filters_evaluators[filter_name]({filter_name: pt_dict[filter_name]})
1124
+ filters_evaluators[filter_name](
1125
+ {
1126
+ filter_name: col_f.__dict__[
1127
+ Collection.get_collection_mtd_from_alias(filter_name)
1128
+ ]
1129
+ }
1130
+ )
1076
1131
  for filter_name, value in filters.items()
1077
- if filter_name in pt_dict
1132
+ if Collection.get_collection_mtd_from_alias(filter_name)
1133
+ in col_f.__dict__
1078
1134
  ]
1079
1135
 
1080
1136
  if filters_matching_method(filter_matches):
@@ -1091,33 +1147,35 @@ class EODataAccessGateway:
1091
1147
  min_aware = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
1092
1148
  max_aware = datetime.datetime.max.replace(tzinfo=datetime.timezone.utc)
1093
1149
 
1094
- col_start, col_end = get_collection_dates(pt_dict)
1150
+ col_start = col_f.extent.temporal.interval[0][0]
1151
+ col_end = col_f.extent.temporal.interval[0][1]
1095
1152
 
1096
1153
  max_start = max(
1097
1154
  rfc3339_str_to_datetime(start_date) if start_date else min_aware,
1098
- rfc3339_str_to_datetime(col_start) if col_start else min_aware,
1155
+ col_start or min_aware,
1099
1156
  )
1100
1157
  min_end = min(
1101
1158
  rfc3339_str_to_datetime(end_date) if end_date else max_aware,
1102
- rfc3339_str_to_datetime(col_end) if col_end else max_aware,
1159
+ col_end or max_aware,
1103
1160
  )
1104
1161
  if not (max_start <= min_end):
1105
1162
  continue
1106
1163
 
1107
- pt_alias = pt_dict.get("alias", pt_id)
1108
- guesses_with_score.append((pt_alias, score))
1164
+ guesses_with_score.append((col_f._id, score))
1109
1165
 
1110
1166
  if guesses_with_score:
1111
- # sort by score descending, then pt_id for stability
1167
+ # sort by score descending, then col for stability
1112
1168
  guesses_with_score.sort(key=lambda x: (-x[1], x[0]))
1113
- return [pt_id for pt_id, _ in guesses_with_score]
1169
+ return CollectionsList(
1170
+ [self.collections_config[col] for col, _ in guesses_with_score]
1171
+ )
1114
1172
 
1115
1173
  raise NoMatchingCollection()
1116
1174
 
1117
1175
  def search(
1118
1176
  self,
1119
1177
  page: int = DEFAULT_PAGE,
1120
- items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
1178
+ items_per_page: Optional[int] = DEFAULT_ITEMS_PER_PAGE,
1121
1179
  raise_errors: bool = False,
1122
1180
  start: Optional[str] = None,
1123
1181
  end: Optional[str] = None,
@@ -1137,9 +1195,10 @@ class EODataAccessGateway:
1137
1195
  will be request from the provider with the next highest priority.
1138
1196
  Only if the request fails for all available providers, an error will be thrown.
1139
1197
 
1140
- :param page: (optional) The page number to return
1198
+ :param page: (optional) The page number to return (**deprecated**, use
1199
+ :meth:`eodag.api.search_result.SearchResult.next_page` instead)
1141
1200
  :param items_per_page: (optional) The number of results that must appear in one single
1142
- page
1201
+ page. If ``None``, the maximum number possible will be used.
1143
1202
  :param raise_errors: (optional) When an error occurs when searching, if this is set to
1144
1203
  True, the error is raised
1145
1204
  :param start: (optional) Start sensing time in ISO 8601 format (e.g. "1990-11-26",
@@ -1181,6 +1240,15 @@ class EODataAccessGateway:
1181
1240
  return a list as a result of their processing. This requirement is
1182
1241
  enforced here.
1183
1242
  """
1243
+ if page != DEFAULT_PAGE:
1244
+ warnings.warn(
1245
+ "Usage of deprecated search parameter 'page' "
1246
+ "(Please use 'SearchResult.next_page()' instead)"
1247
+ " -- Deprecated since v3.9.0",
1248
+ DeprecationWarning,
1249
+ stacklevel=2,
1250
+ )
1251
+
1184
1252
  search_plugins, search_kwargs = self._prepare_search(
1185
1253
  start=start,
1186
1254
  end=end,
@@ -1200,16 +1268,23 @@ class EODataAccessGateway:
1200
1268
  )
1201
1269
  # remove datacube query string from kwargs which was only needed for search-by-id
1202
1270
  search_kwargs.pop("_dc_qs", None)
1203
-
1204
- search_kwargs.update(
1205
- page=page,
1206
- items_per_page=items_per_page,
1207
- )
1271
+ # add page parameter
1272
+ search_kwargs["page"] = page
1208
1273
 
1209
1274
  errors: list[tuple[str, Exception]] = []
1210
1275
  # Loop over available providers and return the first non-empty results
1211
1276
  for i, search_plugin in enumerate(search_plugins):
1212
1277
  search_plugin.clear()
1278
+
1279
+ # add appropriate items_per_page value
1280
+ search_kwargs["items_per_page"] = (
1281
+ items_per_page
1282
+ if items_per_page is not None
1283
+ else getattr(search_plugin.config, "pagination", {}).get(
1284
+ "max_items_per_page", DEFAULT_MAX_ITEMS_PER_PAGE
1285
+ )
1286
+ )
1287
+
1213
1288
  search_results = self._do_search(
1214
1289
  search_plugin,
1215
1290
  count=count,
@@ -1225,12 +1300,22 @@ class EODataAccessGateway:
1225
1300
  )
1226
1301
  elif len(search_results) > 0:
1227
1302
  search_results.errors = errors
1303
+ if count and search_results.number_matched:
1304
+ logger.info(
1305
+ "Found %s result(s) on provider '%s'",
1306
+ search_results.number_matched,
1307
+ search_results[0].provider,
1308
+ )
1228
1309
  return search_results
1229
1310
 
1230
1311
  if i > 1:
1231
1312
  logger.error("No result could be obtained from any available provider")
1232
1313
  return SearchResult([], 0, errors) if count else SearchResult([], errors=errors)
1233
1314
 
1315
+ @_deprecated(
1316
+ reason="Please use 'SearchResult.next_page()' instead",
1317
+ version="v3.9.0",
1318
+ )
1234
1319
  def search_iter_page(
1235
1320
  self,
1236
1321
  items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
@@ -1242,6 +1327,9 @@ class EODataAccessGateway:
1242
1327
  ) -> Iterator[SearchResult]:
1243
1328
  """Iterate over the pages of a products search.
1244
1329
 
1330
+ .. deprecated:: v3.9.0
1331
+ Please use :meth:`eodag.api.search_result.SearchResult.next_page` instead.
1332
+
1245
1333
  :param items_per_page: (optional) The number of results requested per page
1246
1334
  :param start: (optional) Start sensing time in ISO 8601 format (e.g. "1990-11-26",
1247
1335
  "1990-11-26T14:30:10.153Z", "1990-11-26T14:30:10+02:00", ...).
@@ -1292,6 +1380,10 @@ class EODataAccessGateway:
1292
1380
  raise
1293
1381
  raise RequestError("No result could be obtained from any available provider")
1294
1382
 
1383
+ @_deprecated(
1384
+ reason="Please use 'SearchResult.next_page()' instead",
1385
+ version="v3.9.0",
1386
+ )
1295
1387
  def search_iter_page_plugin(
1296
1388
  self,
1297
1389
  search_plugin: Union[Search, Api],
@@ -1300,6 +1392,9 @@ class EODataAccessGateway:
1300
1392
  ) -> Iterator[SearchResult]:
1301
1393
  """Iterate over the pages of a products search using a given search plugin.
1302
1394
 
1395
+ .. deprecated:: v3.9.0
1396
+ Please use :meth:`eodag.api.search_result.SearchResult.next_page` instead.
1397
+
1303
1398
  :param items_per_page: (optional) The number of results requested per page
1304
1399
  :param kwargs: Some other criteria that will be used to do the search,
1305
1400
  using parameters compatibles with the provider
@@ -1307,114 +1402,40 @@ class EODataAccessGateway:
1307
1402
  :returns: An iterator that yields page per page a set of EO products
1308
1403
  matching the criteria
1309
1404
  """
1310
-
1311
- iteration = 1
1312
- # Store the search plugin config pagination.next_page_url_tpl to reset it later
1313
- # since it might be modified if the next_page_url mechanism is used by the
1314
- # plugin. (same thing for next_page_query_obj, next_page_query_obj with POST reqs)
1315
- pagination_config = getattr(search_plugin.config, "pagination", {})
1316
- prev_next_page_url_tpl = pagination_config.get("next_page_url_tpl")
1317
- prev_next_page_query_obj = pagination_config.get("next_page_query_obj")
1318
- # Page has to be set to a value even if use_next is True, this is required
1319
- # internally by the search plugin (see collect_search_urls)
1320
1405
  kwargs.update(
1321
1406
  page=1,
1322
1407
  items_per_page=items_per_page,
1323
1408
  )
1324
- prev_product = None
1325
- next_page_url = None
1326
- next_page_query_obj = None
1327
- number_matched = None
1328
- while True:
1329
- # if count is enabled, it will only be performed on 1st iteration
1330
- if iteration == 2:
1331
- kwargs["count"] = False
1332
- if iteration > 1 and next_page_url:
1333
- pagination_config["next_page_url_tpl"] = next_page_url
1334
- if iteration > 1 and next_page_query_obj:
1335
- pagination_config["next_page_query_obj"] = next_page_query_obj
1336
- logger.info("Iterate search over multiple pages: page #%s", iteration)
1337
- try:
1338
- # remove unwanted kwargs for _do_search
1339
- kwargs.pop("raise_errors", None)
1340
- search_result = self._do_search(
1341
- search_plugin, raise_errors=True, **kwargs
1342
- )
1343
- # if count is enabled, it will only be performed on 1st iteration
1344
- if iteration == 1:
1345
- number_matched = search_result.number_matched
1346
- except Exception:
1347
- logger.warning(
1348
- "error at retrieval of data from %s, for params: %s",
1349
- search_plugin.provider,
1350
- str(kwargs),
1351
- )
1352
- raise
1353
- finally:
1354
- # we don't want that next(search_iter_page(...)) modifies the plugin
1355
- # indefinitely. So we reset after each request, but before the generator
1356
- # yields, the attr next_page_url (to None) and
1357
- # config.pagination["next_page_url_tpl"] (to its original value).
1358
- next_page_url = getattr(search_plugin, "next_page_url", None)
1359
- next_page_query_obj = getattr(search_plugin, "next_page_query_obj", {})
1360
- next_page_merge = getattr(search_plugin, "next_page_merge", None)
1361
-
1362
- if next_page_url:
1363
- search_plugin.next_page_url = None
1364
- if prev_next_page_url_tpl:
1365
- search_plugin.config.pagination[
1366
- "next_page_url_tpl"
1367
- ] = prev_next_page_url_tpl
1368
- if next_page_query_obj:
1369
- if prev_next_page_query_obj:
1370
- search_plugin.config.pagination[
1371
- "next_page_query_obj"
1372
- ] = prev_next_page_query_obj
1373
- # Update next_page_query_obj for next page req
1374
- if next_page_merge:
1375
- search_plugin.next_page_query_obj = dict(
1376
- getattr(search_plugin, "query_params", {}),
1377
- **next_page_query_obj,
1378
- )
1379
- else:
1380
- search_plugin.next_page_query_obj = next_page_query_obj
1381
-
1382
- if len(search_result) > 0:
1383
- # The first products between two iterations are compared. If they
1384
- # are actually the same product, it means the iteration failed at
1385
- # progressing for some reason. This is implemented as a workaround
1386
- # to some search plugins/providers not handling pagination.
1387
- product = search_result[0]
1388
- if (
1389
- prev_product
1390
- and product.properties["id"] == prev_product.properties["id"]
1391
- and product.provider == prev_product.provider
1392
- ):
1393
- logger.warning(
1394
- "Iterate over pages: stop iterating since the next page "
1395
- "appears to have the same products as in the previous one. "
1396
- "This provider may not implement pagination.",
1397
- )
1398
- last_page_with_products = iteration - 1
1399
- break
1400
- # use count got from 1st iteration
1401
- search_result.number_matched = number_matched
1402
- yield search_result
1403
- prev_product = product
1404
- # Prevent a last search if the current one returned less than the
1405
- # maximum number of items asked for.
1406
- if len(search_result) < items_per_page:
1407
- last_page_with_products = iteration
1408
- break
1409
- else:
1410
- last_page_with_products = iteration - 1
1409
+ try:
1410
+ # remove unwanted kwargs for _do_search
1411
+ kwargs.pop("raise_errors", None)
1412
+ search_result = self._do_search(search_plugin, raise_errors=True, **kwargs)
1413
+ search_result.raise_errors = True
1414
+
1415
+ except Exception:
1416
+ logger.warning(
1417
+ "error at retrieval of data from %s, for params: %s",
1418
+ search_plugin.provider,
1419
+ str(kwargs),
1420
+ )
1421
+ raise
1422
+
1423
+ if len(search_result) == 0:
1424
+ return
1425
+ # remove unwanted kwargs for next_page
1426
+ if kwargs.get("count") is True:
1427
+ kwargs["count"] = False
1428
+ kwargs.pop("page", None)
1429
+ search_result.search_params = kwargs
1430
+ if search_result._dag is None:
1431
+ search_result._dag = self
1432
+
1433
+ yield search_result
1434
+
1435
+ for next_result in search_result.next_page():
1436
+ if len(next_result) == 0:
1411
1437
  break
1412
- iteration += 1
1413
- kwargs["page"] = iteration
1414
- logger.debug(
1415
- "Iterate over pages: last products found on page %s",
1416
- last_page_with_products,
1417
- )
1438
+ yield next_result
1418
1439
 
1419
1440
  def search_all(
1420
1441
  self,
@@ -1467,81 +1488,42 @@ class EODataAccessGateway:
1467
1488
  :returns: An iterator that yields page per page a set of EO products
1468
1489
  matching the criteria
1469
1490
  """
1470
- # Get the search plugin and the maximized value
1471
- # of items_per_page if defined for the provider used.
1472
- try:
1473
- collection = self.get_collection_from_alias(
1474
- self.guess_collection(**kwargs)[0]
1475
- )
1476
- except NoMatchingCollection:
1477
- collection = GENERIC_COLLECTION
1478
- else:
1479
- # fetch collections list if collection is unknown
1480
- if (
1481
- collection
1482
- not in self._plugins_manager.collection_to_provider_config_map.keys()
1483
- ):
1484
- logger.debug(
1485
- f"Fetching external collections sources to find {collection} collection"
1486
- )
1487
- self.fetch_collections_list()
1488
-
1489
1491
  # remove unwanted count
1490
1492
  kwargs.pop("count", None)
1491
1493
 
1492
- search_plugins, search_kwargs = self._prepare_search(
1493
- start=start, end=end, geom=geom, locations=locations, **kwargs
1494
+ # First search
1495
+ search_results = self.search(
1496
+ items_per_page=items_per_page,
1497
+ start=start,
1498
+ end=end,
1499
+ geom=geom,
1500
+ locations=locations,
1501
+ **kwargs,
1494
1502
  )
1495
- for i, search_plugin in enumerate(search_plugins):
1496
- itp = (
1497
- items_per_page
1498
- or getattr(search_plugin.config, "pagination", {}).get(
1499
- "max_items_per_page"
1500
- )
1501
- or DEFAULT_MAX_ITEMS_PER_PAGE
1502
- )
1503
+ if len(search_results) == 0:
1504
+ return search_results
1505
+
1506
+ try:
1507
+ search_results.raise_errors = True
1508
+
1509
+ # consume iterator
1510
+ deque(search_results.next_page(update=True))
1511
+
1503
1512
  logger.info(
1504
- "Searching for all the products with provider %s and a maximum of %s "
1505
- "items per page.",
1506
- search_plugin.provider,
1507
- itp,
1513
+ "Found %s result(s) on provider '%s'",
1514
+ len(search_results),
1515
+ search_results[0].provider,
1516
+ )
1517
+ search_results.number_matched = len(search_results)
1518
+ except RequestError:
1519
+ logger.warning(
1520
+ "Found %s result(s) on provider '%s', but it may be incomplete "
1521
+ "as it ended with an error",
1522
+ len(search_results),
1523
+ search_results[0].provider,
1508
1524
  )
1509
- all_results = SearchResult([])
1510
- try:
1511
- for page_results in self.search_iter_page_plugin(
1512
- items_per_page=itp,
1513
- search_plugin=search_plugin,
1514
- count=False,
1515
- **search_kwargs,
1516
- ):
1517
- all_results.data.extend(page_results.data)
1518
- logger.info(
1519
- "Found %s result(s) on provider '%s'",
1520
- len(all_results),
1521
- search_plugin.provider,
1522
- )
1523
- return all_results
1524
- except RequestError:
1525
- if len(all_results) == 0 and i < len(search_plugins) - 1:
1526
- logger.warning(
1527
- "No result could be obtained from provider %s, "
1528
- "we will try to get the data from another provider",
1529
- search_plugin.provider,
1530
- )
1531
- elif len(all_results) == 0:
1532
- logger.error(
1533
- "No result could be obtained from any available provider"
1534
- )
1535
- raise
1536
- elif len(all_results) > 0:
1537
- logger.warning(
1538
- "Found %s result(s) on provider '%s', but it may be incomplete "
1539
- "as it ended with an error",
1540
- len(all_results),
1541
- search_plugin.provider,
1542
- )
1543
- return all_results
1544
- raise RequestError("No result could be obtained from any available provider")
1525
+
1526
+ return search_results
1545
1527
 
1546
1528
  def _search_by_id(
1547
1529
  self, uid: str, provider: Optional[str] = None, **kwargs: Any
@@ -1628,7 +1610,7 @@ class EODataAccessGateway:
1628
1610
  if not results[0].collection:
1629
1611
  # guess collection from properties
1630
1612
  guesses = self.guess_collection(**results[0].properties)
1631
- results[0].collection = guesses[0]
1613
+ results[0].collection = guesses[0].id
1632
1614
  # reset driver
1633
1615
  results[0].driver = results[0].get_driver()
1634
1616
  results.number_matched = 1
@@ -1719,7 +1701,7 @@ class EODataAccessGateway:
1719
1701
  kwargs.pop(param, None)
1720
1702
 
1721
1703
  # By now, only use the best bet
1722
- collection = guesses[0]
1704
+ collection = guesses[0].id
1723
1705
  except NoMatchingCollection:
1724
1706
  queried_id = kwargs.get("id")
1725
1707
  if queried_id is None:
@@ -1776,7 +1758,9 @@ class EODataAccessGateway:
1776
1758
  not in self._plugins_manager.collection_to_provider_config_map.keys()
1777
1759
  ):
1778
1760
  # Try to get specific collection from external provider
1779
- logger.debug(f"Fetching {provider} to find {collection} collection")
1761
+ logger.debug(
1762
+ "Fetching %s to find %s collection", provider, collection
1763
+ )
1780
1764
  self._fetch_external_collection(provider, collection)
1781
1765
  if not provider:
1782
1766
  # no provider or still not found -> fetch all external collections
@@ -1861,13 +1845,11 @@ class EODataAccessGateway:
1861
1845
  max_items_per_page,
1862
1846
  )
1863
1847
 
1864
- results: list[EOProduct] = []
1865
- total_results: Optional[int] = 0 if count else None
1866
-
1867
1848
  errors: list[tuple[str, Exception]] = []
1868
1849
 
1869
1850
  try:
1870
1851
  prep = PreparedSearch(count=count)
1852
+ prep.raise_errors = raise_errors
1871
1853
 
1872
1854
  # append auth if needed
1873
1855
  if getattr(search_plugin.config, "need_auth", False):
@@ -1878,17 +1860,41 @@ class EODataAccessGateway:
1878
1860
  ):
1879
1861
  prep.auth = auth
1880
1862
 
1881
- prep.page = kwargs.pop("page", None)
1882
1863
  prep.items_per_page = kwargs.pop("items_per_page", None)
1864
+ prep.next_page_token = kwargs.pop("next_page_token", None)
1865
+ prep.next_page_token_key = kwargs.pop(
1866
+ "next_page_token_key", None
1867
+ ) or search_plugin.config.pagination.get("next_page_token_key", "page")
1868
+ prep.page = kwargs.pop("page", None)
1869
+
1870
+ if (
1871
+ prep.next_page_token_key == "page"
1872
+ and prep.items_per_page is not None
1873
+ and prep.next_page_token is None
1874
+ and prep.page is not None
1875
+ ):
1876
+ prep.next_page_token = str(
1877
+ prep.page
1878
+ - 1
1879
+ + search_plugin.config.pagination.get("start_page", DEFAULT_PAGE)
1880
+ )
1883
1881
 
1884
1882
  # remove None values and convert param names to their pydantic alias if any
1885
1883
  search_params = {}
1884
+ ecmwf_queryables = [
1885
+ f"{ECMWF_PREFIX[:-1]}_{k}" for k in ECMWF_ALLOWED_KEYWORDS
1886
+ ]
1886
1887
  for param, value in kwargs.items():
1887
1888
  if value is None:
1888
1889
  continue
1889
1890
  if param in Queryables.model_fields:
1890
1891
  param_alias = Queryables.model_fields[param].alias or param
1891
1892
  search_params[param_alias] = value
1893
+ elif param in ecmwf_queryables:
1894
+ # alias equivalent for ECMWF queryables
1895
+ search_params[
1896
+ re.sub(rf"^{ECMWF_PREFIX[:-1]}_", f"{ECMWF_PREFIX}", param)
1897
+ ] = value
1892
1898
  else:
1893
1899
  # remove `provider:` or `provider_` prefix if any
1894
1900
  search_params[
@@ -1898,14 +1904,13 @@ class EODataAccessGateway:
1898
1904
  if validate:
1899
1905
  search_plugin.validate(search_params, prep.auth)
1900
1906
 
1901
- res, nb_res = search_plugin.query(prep, **search_params)
1907
+ search_result = search_plugin.query(prep, **search_params)
1902
1908
 
1903
- if not isinstance(res, list):
1909
+ if not isinstance(search_result.data, list):
1904
1910
  raise PluginImplementationError(
1905
1911
  "The query function of a Search plugin must return a list of "
1906
- "results, got {} instead".format(type(res))
1912
+ "results, got {} instead".format(type(search_result.data))
1907
1913
  )
1908
-
1909
1914
  # Filter and attach to each eoproduct in the result the plugin capable of
1910
1915
  # downloading it (this is done to enable the eo_product to download itself
1911
1916
  # doing: eo_product.download()). The filtering is done by keeping only
@@ -1915,7 +1920,7 @@ class EODataAccessGateway:
1915
1920
  # WARNING: this means an eo_product that has an invalid geometry can still
1916
1921
  # be returned as a search result if there was no search extent (because we
1917
1922
  # will not try to do an intersection)
1918
- for eo_product in res:
1923
+ for eo_product in search_result.data:
1919
1924
  # if collection is not defined, try to guess using properties
1920
1925
  if eo_product.collection is None:
1921
1926
  pattern = re.compile(r"[^\w,]+")
@@ -1940,7 +1945,7 @@ class EODataAccessGateway:
1940
1945
  except NoMatchingCollection:
1941
1946
  pass
1942
1947
  else:
1943
- eo_product.collection = guesses[0]
1948
+ eo_product.collection = guesses[0].id
1944
1949
 
1945
1950
  try:
1946
1951
  if eo_product.collection is not None:
@@ -1953,18 +1958,13 @@ class EODataAccessGateway:
1953
1958
  if eo_product.search_intersection is not None:
1954
1959
  eo_product._register_downloader_from_manager(self._plugins_manager)
1955
1960
 
1956
- results.extend(res)
1957
- total_results = (
1958
- None
1959
- if (nb_res is None or total_results is None)
1960
- else total_results + nb_res
1961
- )
1962
- if count and nb_res is not None:
1963
- logger.info(
1964
- "Found %s result(s) on provider '%s'",
1965
- nb_res,
1966
- search_plugin.provider,
1967
- )
1961
+ # Make next_page not available if the current one returned less than the maximum number of items asked for.
1962
+ if not prep.items_per_page or len(search_result) < prep.items_per_page:
1963
+ search_result.next_page_token = None
1964
+
1965
+ search_result._dag = self
1966
+ return search_result
1967
+
1968
1968
  except Exception as e:
1969
1969
  if raise_errors:
1970
1970
  # Raise the error, letting the application wrapping eodag know that
@@ -1976,7 +1976,7 @@ class EODataAccessGateway:
1976
1976
  search_plugin.provider,
1977
1977
  )
1978
1978
  errors.append((search_plugin.provider, e))
1979
- return SearchResult(results, total_results, errors)
1979
+ return SearchResult([], 0, errors)
1980
1980
 
1981
1981
  def crunch(self, results: SearchResult, **kwargs: Any) -> SearchResult:
1982
1982
  """Apply the filters given through the keyword arguments to the results
@@ -2080,13 +2080,16 @@ class EODataAccessGateway:
2080
2080
  search_result: SearchResult, filename: str = "search_results.geojson"
2081
2081
  ) -> str:
2082
2082
  """Registers results of a search into a geojson file.
2083
+ The output is a FeatureCollection containing the EO products as features,
2084
+ with additional metadata such as ``number_matched``, ``next_page_token``,
2085
+ and ``search_params`` stored in the properties.
2083
2086
 
2084
2087
  :param search_result: A set of EO products resulting from a search
2085
2088
  :param filename: (optional) The name of the file to generate
2086
2089
  :returns: The name of the created file
2087
2090
  """
2088
2091
  with open(filename, "w") as fh:
2089
- geojson.dump(search_result, fh)
2092
+ geojson.dump(search_result.as_geojson_object(), fh)
2090
2093
  return filename
2091
2094
 
2092
2095
  @staticmethod
@@ -2101,12 +2104,16 @@ class EODataAccessGateway:
2101
2104
 
2102
2105
  def deserialize_and_register(self, filename: str) -> SearchResult:
2103
2106
  """Loads results of a search from a geojson file and register
2104
- products with the information needed to download itself
2107
+ products with the information needed to download itself.
2108
+
2109
+ This method also sets the internal EODataAccessGateway instance on the products,
2110
+ enabling pagination (e.g. access to next pages) if available.
2105
2111
 
2106
2112
  :param filename: A filename containing a search result encoded as a geojson
2107
- :returns: The search results encoded in `filename`
2113
+ :returns: The search results encoded in `filename`, ready for download and pagination
2108
2114
  """
2109
2115
  products = self.deserialize(filename)
2116
+ products._dag = self
2110
2117
  for i, product in enumerate(products):
2111
2118
  if product.downloader is None:
2112
2119
  downloader = self._plugins_manager.get_download_plugin(product)
@@ -2220,8 +2227,8 @@ class EODataAccessGateway:
2220
2227
  """
2221
2228
  # only fetch providers if collection is not found
2222
2229
  available_collections: list[str] = [
2223
- pt["ID"]
2224
- for pt in self.list_collections(provider=provider, fetch_providers=False)
2230
+ col.id
2231
+ for col in self.list_collections(provider=provider, fetch_providers=False)
2225
2232
  ]
2226
2233
  collection: Optional[str] = kwargs.get("collection")
2227
2234
  coll_alias: Optional[str] = collection
@@ -2231,8 +2238,8 @@ class EODataAccessGateway:
2231
2238
  if fetch_providers:
2232
2239
  # fetch providers and try again
2233
2240
  available_collections = [
2234
- pt["ID"]
2235
- for pt in self.list_collections(
2241
+ col.id
2242
+ for col in self.list_collections(
2236
2243
  provider=provider, fetch_providers=True
2237
2244
  )
2238
2245
  ]
@@ -2261,9 +2268,9 @@ class EODataAccessGateway:
2261
2268
  self._attach_collection_config(plugin, collection)
2262
2269
  collection_configs[collection] = plugin.config.collection_config
2263
2270
  else:
2264
- for pt in available_collections:
2265
- self._attach_collection_config(plugin, pt)
2266
- collection_configs[pt] = plugin.config.collection_config
2271
+ for col in available_collections:
2272
+ self._attach_collection_config(plugin, col)
2273
+ collection_configs[col] = plugin.config.collection_config
2267
2274
 
2268
2275
  # authenticate if required
2269
2276
  if getattr(plugin.config, "need_auth", False) and (
@@ -2277,8 +2284,14 @@ class EODataAccessGateway:
2277
2284
  plugin.provider,
2278
2285
  )
2279
2286
 
2287
+ # use queryables aliases
2288
+ kwargs_alias = {**kwargs}
2289
+ for search_param, field_info in Queryables.model_fields.items():
2290
+ if search_param in kwargs and field_info.alias:
2291
+ kwargs_alias[field_info.alias] = kwargs_alias.pop(search_param)
2292
+
2280
2293
  plugin_queryables = plugin.list_queryables(
2281
- kwargs,
2294
+ kwargs_alias,
2282
2295
  available_collections,
2283
2296
  collection_configs,
2284
2297
  collection,
@@ -2340,11 +2353,11 @@ class EODataAccessGateway:
2340
2353
  try:
2341
2354
  plugin.config.collection_config = dict(
2342
2355
  [
2343
- p
2344
- for p in self.list_collections(
2356
+ c.model_dump(mode="json", exclude={"id"})
2357
+ for c in self.list_collections(
2345
2358
  plugin.provider, fetch_providers=False
2346
2359
  )
2347
- if p["_id"] == collection
2360
+ if c._id == collection
2348
2361
  ][0],
2349
2362
  **{"collection": collection},
2350
2363
  )
@@ -2352,12 +2365,11 @@ class EODataAccessGateway:
2352
2365
  except IndexError:
2353
2366
  # Construct the GENERIC_COLLECTION metadata
2354
2367
  plugin.config.collection_config = dict(
2355
- ID=GENERIC_COLLECTION,
2356
- **self.collections_config[GENERIC_COLLECTION],
2368
+ **self.collections_config[GENERIC_COLLECTION].model_dump(
2369
+ mode="json", exclude={"id"}
2370
+ ),
2357
2371
  collection=collection,
2358
2372
  )
2359
- # Remove the ID since this is equal to collection.
2360
- plugin.config.collection_config.pop("ID", None)
2361
2373
 
2362
2374
  def import_stac_items(self, items_urls: list[str]) -> SearchResult:
2363
2375
  """Import STAC items from a list of URLs and convert them to SearchResult.