eodag 3.7.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eodag/api/core.py CHANGED
@@ -30,10 +30,6 @@ from typing import TYPE_CHECKING, Any, Iterator, Optional, Union
30
30
 
31
31
  import geojson
32
32
  import yaml.parser
33
- from whoosh import analysis, fields
34
- from whoosh.fields import Schema
35
- from whoosh.index import exists_in, open_dir
36
- from whoosh.qparser import QueryParser
37
33
 
38
34
  from eodag.api.product.metadata_mapping import (
39
35
  NOT_AVAILABLE,
@@ -61,7 +57,6 @@ from eodag.plugins.search.build_search_result import MeteoblueSearch
61
57
  from eodag.plugins.search.qssearch import PostJsonSearch
62
58
  from eodag.types import model_fields_to_annotated
63
59
  from eodag.types.queryables import CommonQueryables, QueryablesDict
64
- from eodag.types.whoosh import EODAGQueryParser, create_in
65
60
  from eodag.utils import (
66
61
  DEFAULT_DOWNLOAD_TIMEOUT,
67
62
  DEFAULT_DOWNLOAD_WAIT,
@@ -75,7 +70,6 @@ from eodag.utils import (
75
70
  _deprecated,
76
71
  get_geometry_from_various,
77
72
  makedirs,
78
- obj_md5sum,
79
73
  sort_dict,
80
74
  string_to_jsonpath,
81
75
  uri_to_path,
@@ -83,19 +77,18 @@ from eodag.utils import (
83
77
  from eodag.utils.env import is_env_var_true
84
78
  from eodag.utils.exceptions import (
85
79
  AuthenticationError,
86
- EodagError,
87
80
  NoMatchingProductType,
88
81
  PluginImplementationError,
89
82
  RequestError,
90
83
  UnsupportedProductType,
91
84
  UnsupportedProvider,
92
85
  )
86
+ from eodag.utils.free_text_search import compile_free_text_query
93
87
  from eodag.utils.rest import rfc3339_str_to_datetime
94
88
  from eodag.utils.stac_reader import fetch_stac_items
95
89
 
96
90
  if TYPE_CHECKING:
97
91
  from shapely.geometry.base import BaseGeometry
98
- from whoosh.index import Index
99
92
 
100
93
  from eodag.api.product import EOProduct
101
94
  from eodag.plugins.apis.base import Api
@@ -125,7 +118,6 @@ class EODataAccessGateway:
125
118
  res_files("eodag") / "resources" / "product_types.yml"
126
119
  )
127
120
  self.product_types_config = SimpleYamlProxyConfig(product_types_config_path)
128
- self.product_types_config_md5 = obj_md5sum(self.product_types_config.source)
129
121
  self.providers_config = load_default_config()
130
122
 
131
123
  env_var_cfg_dir = "EODAG_CFG_DIR"
@@ -189,6 +181,8 @@ class EODataAccessGateway:
189
181
  self._sync_provider_product_types(
190
182
  provider, available_product_types, strict_mode
191
183
  )
184
+ # init product types configuration
185
+ self._product_types_config_init()
192
186
 
193
187
  # re-build _plugins_manager using up-to-date providers_config
194
188
  self._plugins_manager.rebuild(self.providers_config)
@@ -201,10 +195,6 @@ class EODataAccessGateway:
201
195
  # Sort providers taking into account of possible new priority orders
202
196
  self._plugins_manager.sort_providers()
203
197
 
204
- # Build a search index for product types
205
- self._product_types_index: Optional[Index] = None
206
- self.build_index()
207
-
208
198
  # set locations configuration
209
199
  if locations_conf_path is None:
210
200
  locations_conf_path = os.getenv("EODAG_LOCS_CFG_FILE")
@@ -235,6 +225,11 @@ class EODataAccessGateway:
235
225
  )
236
226
  self.set_locations_conf(locations_conf_path)
237
227
 
228
+ def _product_types_config_init(self) -> None:
229
+ """Initialize product types configuration."""
230
+ for pt_id, pd_dict in self.product_types_config.source.items():
231
+ self.product_types_config.source[pt_id].setdefault("_id", pt_id)
232
+
238
233
  def _sync_provider_product_types(
239
234
  self,
240
235
  provider: str,
@@ -294,95 +289,6 @@ class EODataAccessGateway:
294
289
  """Get eodag package version"""
295
290
  return version("eodag")
296
291
 
297
- def build_index(self) -> None:
298
- """Build a `Whoosh <https://whoosh.readthedocs.io/en/latest/index.html>`_
299
- index for product types searches.
300
- """
301
- index_dir = os.path.join(self.conf_dir, ".index")
302
-
303
- try:
304
- create_index = not exists_in(index_dir)
305
- except ValueError as ve:
306
- # Whoosh uses pickle internally. New versions of Python sometimes introduce
307
- # a new pickle protocol (e.g. 3.4 -> 4, 3.8 -> 5), the new version not
308
- # being supported by previous versions of Python (e.g. Python 3.7 doesn't
309
- # support Protocol 5). In that case, we need to recreate the .index.
310
- if "unsupported pickle protocol" in str(ve):
311
- logger.debug("Need to recreate whoosh .index: '%s'", ve)
312
- create_index = True
313
- # Unexpected error
314
- else:
315
- logger.error(
316
- "Error while opening .index using whoosh, "
317
- "please report this issue and try to delete '%s' manually",
318
- index_dir,
319
- )
320
- raise
321
- # check index version
322
- if not create_index:
323
- if self._product_types_index is None:
324
- logger.debug("Opening product types index in %s", index_dir)
325
- self._product_types_index = open_dir(index_dir)
326
-
327
- with self._product_types_index.searcher() as searcher:
328
- p = QueryParser("md5", self._product_types_index.schema, plugins=[])
329
- query = p.parse(self.product_types_config_md5)
330
- results = searcher.search(query, limit=1)
331
-
332
- if not results:
333
- create_index = True
334
- logger.debug(
335
- "Out-of-date product types index removed from %s", index_dir
336
- )
337
-
338
- if create_index:
339
- logger.debug("Creating product types index in %s", index_dir)
340
- makedirs(index_dir)
341
-
342
- kw_analyzer = (
343
- analysis.CommaSeparatedTokenizer()
344
- | analysis.LowercaseFilter()
345
- | analysis.SubstitutionFilter("-", "")
346
- | analysis.SubstitutionFilter("_", "")
347
- )
348
-
349
- product_types_schema = Schema(
350
- ID=fields.ID(stored=True),
351
- abstract=fields.TEXT,
352
- instrument=fields.IDLIST,
353
- platform=fields.ID,
354
- platformSerialIdentifier=fields.IDLIST,
355
- processingLevel=fields.ID,
356
- sensorType=fields.ID,
357
- md5=fields.ID,
358
- license=fields.ID,
359
- title=fields.TEXT,
360
- missionStartDate=fields.STORED,
361
- missionEndDate=fields.STORED,
362
- keywords=fields.KEYWORD(analyzer=kw_analyzer),
363
- stacCollection=fields.STORED,
364
- )
365
- self._product_types_index = create_in(index_dir, product_types_schema)
366
- ix_writer = self._product_types_index.writer()
367
- for product_type in self.list_product_types(fetch_providers=False):
368
- versioned_product_type = dict(
369
- product_type, **{"md5": self.product_types_config_md5}
370
- )
371
- # add to index
372
- try:
373
- ix_writer.add_document(
374
- **{
375
- k: v
376
- for k, v in versioned_product_type.items()
377
- if k in product_types_schema.names()
378
- }
379
- )
380
- except TypeError as e:
381
- logger.error(
382
- f"Cannot write product type {product_type['ID']} into index. e={e} product_type={product_type}"
383
- )
384
- ix_writer.commit()
385
-
386
292
  def set_preferred_provider(self, provider: str) -> None:
387
293
  """Set max priority for the given provider.
388
294
 
@@ -674,8 +580,6 @@ class EODataAccessGateway:
674
580
  continue
675
581
 
676
582
  config = self.product_types_config[product_type_id]
677
- config["_id"] = product_type_id
678
-
679
583
  if "alias" in config:
680
584
  product_type_id = config["alias"]
681
585
  product_type = {"ID": product_type_id, **config}
@@ -977,14 +881,12 @@ class EODataAccessGateway:
977
881
  # to self.product_types_config
978
882
  self.product_types_config.source.update(
979
883
  {
980
- new_product_type: new_product_types_conf[
981
- "product_types_config"
982
- ][new_product_type]
884
+ new_product_type: {"_id": new_product_type}
885
+ | new_product_types_conf["product_types_config"][
886
+ new_product_type
887
+ ]
983
888
  }
984
889
  )
985
- self.product_types_config_md5 = obj_md5sum(
986
- self.product_types_config.source
987
- )
988
890
  ext_product_types_conf[provider] = new_product_types_conf
989
891
  new_product_types.append(new_product_type)
990
892
  if new_product_types:
@@ -1000,9 +902,6 @@ class EODataAccessGateway:
1000
902
  # re-create _plugins_manager using up-to-date providers_config
1001
903
  self._plugins_manager.build_product_type_to_provider_config_map()
1002
904
 
1003
- # rebuild index after product types list update
1004
- self.build_index()
1005
-
1006
905
  def available_providers(
1007
906
  self, product_type: Optional[str] = None, by_group: bool = False
1008
907
  ) -> list[str]:
@@ -1103,11 +1002,11 @@ class EODataAccessGateway:
1103
1002
  """
1104
1003
  Find EODAG product type IDs that best match a set of search parameters.
1105
1004
 
1106
- See https://whoosh.readthedocs.io/en/latest/querylang.html#the-default-query-language
1107
- for syntax.
1005
+ When using several filters, product types that match most of them will be returned at first.
1108
1006
 
1109
- :param free_text: Whoosh-compatible free text search filter used to search
1110
- accross all the following parameters
1007
+ :param free_text: Free text search filter used to search accross all the following parameters. Handles logical
1008
+ operators with parenthesis (``AND``/``OR``/``NOT``), quoted phrases (``"exact phrase"``),
1009
+ ``*`` and ``?`` wildcards.
1111
1010
  :param intersect: Join results for each parameter using INTERSECT instead of UNION.
1112
1011
  :param instrument: Instrument parameter.
1113
1012
  :param platform: Platform parameter.
@@ -1125,69 +1024,105 @@ class EODataAccessGateway:
1125
1024
  if productType := kwargs.get("productType"):
1126
1025
  return [productType]
1127
1026
 
1128
- if not self._product_types_index:
1129
- raise EodagError("Missing product types index")
1130
-
1131
- filters = {
1132
- "instrument": instrument,
1133
- "platform": platform,
1134
- "platformSerialIdentifier": platformSerialIdentifier,
1135
- "processingLevel": processingLevel,
1136
- "sensorType": sensorType,
1137
- "keywords": keywords,
1138
- "abstract": abstract,
1139
- "title": title,
1027
+ filters: dict[str, str] = {
1028
+ k: v
1029
+ for k, v in {
1030
+ "instrument": instrument,
1031
+ "platform": platform,
1032
+ "platformSerialIdentifier": platformSerialIdentifier,
1033
+ "processingLevel": processingLevel,
1034
+ "sensorType": sensorType,
1035
+ "keywords": keywords,
1036
+ "abstract": abstract,
1037
+ "title": title,
1038
+ }.items()
1039
+ if v is not None
1140
1040
  }
1141
- joint = " AND " if intersect else " OR "
1142
- filters_text = joint.join(
1143
- [f"{k}:({v})" for k, v in filters.items() if v is not None]
1041
+
1042
+ only_dates = (
1043
+ True
1044
+ if (not free_text and not filters and (missionStartDate or missionEndDate))
1045
+ else False
1144
1046
  )
1145
1047
 
1146
- text = f"({free_text})" if free_text else ""
1147
- if free_text and filters_text:
1148
- text += joint
1149
- if filters_text:
1150
- text += f"({filters_text})"
1151
-
1152
- if not text and (missionStartDate or missionEndDate):
1153
- text = "*"
1154
-
1155
- with self._product_types_index.searcher() as searcher:
1156
- p = EODAGQueryParser(list(filters.keys()), self._product_types_index.schema)
1157
- query = p.parse(text)
1158
- results = searcher.search(query, limit=None)
1159
-
1160
- guesses: list[dict[str, str]] = [dict(r) for r in results or []]
1161
-
1162
- # datetime filtering
1163
- if missionStartDate or missionEndDate:
1164
- min_aware = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
1165
- max_aware = datetime.datetime.max.replace(tzinfo=datetime.timezone.utc)
1166
- guesses = [
1167
- g
1168
- for g in guesses
1169
- if (
1170
- max(
1171
- rfc3339_str_to_datetime(missionStartDate)
1172
- if missionStartDate
1173
- else min_aware,
1174
- rfc3339_str_to_datetime(g["missionStartDate"])
1175
- if g.get("missionStartDate")
1176
- else min_aware,
1177
- )
1178
- <= min(
1179
- rfc3339_str_to_datetime(missionEndDate)
1180
- if missionEndDate
1181
- else max_aware,
1182
- rfc3339_str_to_datetime(g["missionEndDate"])
1183
- if g.get("missionEndDate")
1184
- else max_aware,
1185
- )
1048
+ free_text_evaluator = (
1049
+ compile_free_text_query(free_text) if free_text else lambda _: True
1050
+ )
1051
+
1052
+ guesses_with_score: list[tuple[str, int]] = []
1053
+
1054
+ for pt_id, pt_dict in self.product_types_config.source.items():
1055
+ if (
1056
+ pt_id == GENERIC_PRODUCT_TYPE
1057
+ or pt_id
1058
+ not in self._plugins_manager.product_type_to_provider_config_map
1059
+ ):
1060
+ continue
1061
+
1062
+ score = 0 # how many filters matched
1063
+
1064
+ # free text search
1065
+ if free_text:
1066
+ match = free_text_evaluator(pt_dict)
1067
+ if match:
1068
+ score += 1
1069
+ elif intersect:
1070
+ continue # must match all filters
1071
+
1072
+ # individual filters
1073
+ if filters:
1074
+ filters_matching_method = all if intersect else any
1075
+ filters_evaluators = {
1076
+ filter_name: compile_free_text_query(value)
1077
+ for filter_name, value in filters.items()
1078
+ if value is not None
1079
+ }
1080
+
1081
+ filter_matches = [
1082
+ filters_evaluators[filter_name]({filter_name: pt_dict[filter_name]})
1083
+ for filter_name, value in filters.items()
1084
+ if filter_name in pt_dict
1085
+ ]
1086
+
1087
+ if filters_matching_method(filter_matches):
1088
+ # add number of True matches to score
1089
+ score += sum(filter_matches)
1090
+ elif intersect:
1091
+ continue # must match all filters
1092
+
1093
+ if score == 0 and not only_dates:
1094
+ continue
1095
+
1096
+ # datetime filtering
1097
+ if missionStartDate or missionEndDate:
1098
+ min_aware = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
1099
+ max_aware = datetime.datetime.max.replace(tzinfo=datetime.timezone.utc)
1100
+
1101
+ max_start = max(
1102
+ rfc3339_str_to_datetime(missionStartDate)
1103
+ if missionStartDate
1104
+ else min_aware,
1105
+ rfc3339_str_to_datetime(pt_dict["missionStartDate"])
1106
+ if pt_dict.get("missionStartDate")
1107
+ else min_aware,
1186
1108
  )
1187
- ]
1109
+ min_end = min(
1110
+ rfc3339_str_to_datetime(missionEndDate)
1111
+ if missionEndDate
1112
+ else max_aware,
1113
+ rfc3339_str_to_datetime(pt_dict["missionEndDate"])
1114
+ if pt_dict.get("missionEndDate")
1115
+ else max_aware,
1116
+ )
1117
+ if not (max_start <= min_end):
1118
+ continue
1119
+
1120
+ guesses_with_score.append((pt_id, score))
1188
1121
 
1189
- if guesses:
1190
- return [g["ID"] for g in guesses or []]
1122
+ if guesses_with_score:
1123
+ # sort by score descending, then pt_id for stability
1124
+ guesses_with_score.sort(key=lambda x: (-x[1], x[0]))
1125
+ return [pt_id for pt_id, _ in guesses_with_score]
1191
1126
 
1192
1127
  raise NoMatchingProductType()
1193
1128
 
@@ -42,6 +42,7 @@ from shapely.ops import transform
42
42
  from eodag.types.queryables import Queryables
43
43
  from eodag.utils import (
44
44
  DEFAULT_PROJ,
45
+ _deprecated,
45
46
  deepcopy,
46
47
  dict_items_recursive_apply,
47
48
  format_string,
@@ -180,6 +181,7 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
180
181
  - ``to_datetime_dict``: convert a datetime string to a dictionary where values are either a string or a list
181
182
  - ``get_ecmwf_time``: get the time of a datetime string in the ECMWF format
182
183
  - ``sanitize``: sanitize string
184
+ - ``ceda_collection_name``: generate a CEDA collection name from a string
183
185
 
184
186
  :param search_param: The string to be formatted
185
187
  :param args: (optional) Additional arguments to use in the formatting process
@@ -527,6 +529,14 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
527
529
  old, new = ast.literal_eval(args)
528
530
  return re.sub(old, new, value)
529
531
 
532
+ @staticmethod
533
+ def convert_ceda_collection_name(value: str) -> str:
534
+ data_regex = re.compile(r"/data/(?P<name>.+?)/?$")
535
+ match = data_regex.search(value)
536
+ if match:
537
+ return match.group("name").replace("/", "_").upper()
538
+ return "NOT_AVAILABLE"
539
+
530
540
  @staticmethod
531
541
  def convert_recursive_sub_str(
532
542
  input_obj: Union[dict[Any, Any], list[Any]], args: str
@@ -631,6 +641,10 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
631
641
  return NOT_AVAILABLE
632
642
 
633
643
  @staticmethod
644
+ @_deprecated(
645
+ reason="Method that was used in previous wekeo provider configuration, but not used anymore",
646
+ version="3.7.1",
647
+ )
634
648
  def convert_split_id_into_s1_params(product_id: str) -> dict[str, str]:
635
649
  parts: list[str] = re.split(r"_(?!_)", product_id)
636
650
  if len(parts) < 9:
@@ -683,6 +697,10 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
683
697
  return params
684
698
 
685
699
  @staticmethod
700
+ @_deprecated(
701
+ reason="Method that was used in previous wekeo provider configuration, but not used anymore",
702
+ version="3.7.1",
703
+ )
686
704
  def convert_split_id_into_s5p_params(product_id: str) -> dict[str, str]:
687
705
  parts: list[str] = re.split(r"_(?!_)", product_id)
688
706
  params = {
@@ -701,6 +719,10 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str:
701
719
  return params
702
720
 
703
721
  @staticmethod
722
+ @_deprecated(
723
+ reason="Method that was used in previous wekeo provider configuration, but not used anymore",
724
+ version="3.7.1",
725
+ )
704
726
  def convert_split_cop_dem_id(product_id: str) -> list[int]:
705
727
  parts = product_id.split("_")
706
728
  lattitude = parts[3]
eodag/config.py CHANGED
@@ -307,7 +307,8 @@ class PluginConfig(yaml.YAMLObject):
307
307
  single_collection_fetch_url: str
308
308
  #: Query string to be added to the fetch_url to filter for a collection
309
309
  single_collection_fetch_qs: str
310
- #: Mapping for product type metadata returned by the endpoint given in single_collection_fetch_url
310
+ #: Mapping for product type metadata returned by the endpoint given in single_collection_fetch_url. If ``ID``
311
+ #: is redefined in this mapping, it will replace ``generic_product_type_id`` value
311
312
  single_product_type_parsable_metadata: dict[str, str]
312
313
 
313
314
  class DiscoverQueryables(TypedDict, total=False):
@@ -39,6 +39,7 @@ from eodag.utils import (
39
39
  HTTP_REQ_TIMEOUT,
40
40
  USER_AGENT,
41
41
  ProgressCallback,
42
+ _deprecated,
42
43
  get_bucket_name_and_prefix,
43
44
  path_to_uri,
44
45
  )
@@ -60,6 +61,10 @@ if TYPE_CHECKING:
60
61
  logger = logging.getLogger("eodag.download.s3rest")
61
62
 
62
63
 
64
+ @_deprecated(
65
+ reason="Plugin that was used in previous mundi provider configuration, but not used anymore",
66
+ version="3.7.1",
67
+ )
63
68
  class S3RestDownload(Download):
64
69
  """Http download on S3-like object storage location
65
70
 
@@ -87,7 +92,7 @@ class S3RestDownload(Download):
87
92
  """
88
93
 
89
94
  def __init__(self, provider: str, config: PluginConfig) -> None:
90
- super(S3RestDownload, self).__init__(provider, config)
95
+ super().__init__(provider, config)
91
96
  self.http_download_plugin = HTTPDownload(self.provider, self.config)
92
97
 
93
98
  def download(
@@ -41,6 +41,7 @@ from eodag.utils import (
41
41
  GENERIC_PRODUCT_TYPE,
42
42
  HTTP_REQ_TIMEOUT,
43
43
  USER_AGENT,
44
+ _deprecated,
44
45
  deepcopy,
45
46
  string_to_jsonpath,
46
47
  )
@@ -57,6 +58,10 @@ if TYPE_CHECKING:
57
58
  logger = logging.getLogger("eodag.search.data_request_search")
58
59
 
59
60
 
61
+ @_deprecated(
62
+ reason="Plugin that was used in previous wekeo provider configuration, but not used anymore",
63
+ version="3.7.1",
64
+ )
60
65
  class DataRequestSearch(Search):
61
66
  """
62
67
  Plugin to execute search requests composed of several steps:
@@ -166,7 +171,7 @@ class DataRequestSearch(Search):
166
171
  data_request_id: Optional[str]
167
172
 
168
173
  def __init__(self, provider: str, config: PluginConfig) -> None:
169
- super(DataRequestSearch, self).__init__(provider, config)
174
+ super().__init__(provider, config)
170
175
  self.config.__dict__.setdefault("result_type", "json")
171
176
  self.config.__dict__.setdefault("results_entry", "content")
172
177
  self.config.__dict__.setdefault("pagination", {})
@@ -627,6 +627,26 @@ class QueryStringSearch(Search):
627
627
  generic_product_type_id
628
628
  ].update(collection_data)
629
629
 
630
+ # update product type id if needed
631
+ if collection_data_id := collection_data.get("ID"):
632
+ if generic_product_type_id != collection_data_id:
633
+ logger.debug(
634
+ "Rename %s product type to %s",
635
+ generic_product_type_id,
636
+ collection_data_id,
637
+ )
638
+ conf_update_dict["providers_config"][
639
+ collection_data_id
640
+ ] = conf_update_dict["providers_config"].pop(
641
+ generic_product_type_id
642
+ )
643
+ conf_update_dict["product_types_config"][
644
+ collection_data_id
645
+ ] = conf_update_dict["product_types_config"].pop(
646
+ generic_product_type_id
647
+ )
648
+ generic_product_type_id = collection_data_id
649
+
630
650
  # update keywords
631
651
  keywords_fields = [
632
652
  "instrument",