eodag 2.12.1__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. eodag/__init__.py +6 -8
  2. eodag/api/core.py +654 -538
  3. eodag/api/product/__init__.py +12 -2
  4. eodag/api/product/_assets.py +59 -16
  5. eodag/api/product/_product.py +100 -93
  6. eodag/api/product/drivers/__init__.py +7 -2
  7. eodag/api/product/drivers/base.py +0 -3
  8. eodag/api/product/metadata_mapping.py +192 -96
  9. eodag/api/search_result.py +69 -10
  10. eodag/cli.py +55 -25
  11. eodag/config.py +391 -116
  12. eodag/plugins/apis/base.py +11 -168
  13. eodag/plugins/apis/ecmwf.py +36 -25
  14. eodag/plugins/apis/usgs.py +80 -35
  15. eodag/plugins/authentication/aws_auth.py +13 -4
  16. eodag/plugins/authentication/base.py +10 -1
  17. eodag/plugins/authentication/generic.py +2 -2
  18. eodag/plugins/authentication/header.py +31 -6
  19. eodag/plugins/authentication/keycloak.py +17 -84
  20. eodag/plugins/authentication/oauth.py +3 -3
  21. eodag/plugins/authentication/openid_connect.py +268 -49
  22. eodag/plugins/authentication/qsauth.py +4 -1
  23. eodag/plugins/authentication/sas_auth.py +9 -2
  24. eodag/plugins/authentication/token.py +98 -47
  25. eodag/plugins/authentication/token_exchange.py +122 -0
  26. eodag/plugins/crunch/base.py +3 -1
  27. eodag/plugins/crunch/filter_date.py +3 -9
  28. eodag/plugins/crunch/filter_latest_intersect.py +0 -3
  29. eodag/plugins/crunch/filter_latest_tpl_name.py +1 -4
  30. eodag/plugins/crunch/filter_overlap.py +4 -8
  31. eodag/plugins/crunch/filter_property.py +5 -11
  32. eodag/plugins/download/aws.py +149 -185
  33. eodag/plugins/download/base.py +88 -97
  34. eodag/plugins/download/creodias_s3.py +1 -1
  35. eodag/plugins/download/http.py +638 -310
  36. eodag/plugins/download/s3rest.py +47 -45
  37. eodag/plugins/manager.py +228 -88
  38. eodag/plugins/search/__init__.py +36 -0
  39. eodag/plugins/search/base.py +239 -30
  40. eodag/plugins/search/build_search_result.py +382 -37
  41. eodag/plugins/search/cop_marine.py +441 -0
  42. eodag/plugins/search/creodias_s3.py +25 -20
  43. eodag/plugins/search/csw.py +5 -7
  44. eodag/plugins/search/data_request_search.py +61 -30
  45. eodag/plugins/search/qssearch.py +713 -255
  46. eodag/plugins/search/static_stac_search.py +106 -40
  47. eodag/resources/ext_product_types.json +1 -1
  48. eodag/resources/product_types.yml +1921 -34
  49. eodag/resources/providers.yml +4091 -3655
  50. eodag/resources/stac.yml +50 -216
  51. eodag/resources/stac_api.yml +71 -25
  52. eodag/resources/stac_provider.yml +5 -0
  53. eodag/resources/user_conf_template.yml +89 -32
  54. eodag/rest/__init__.py +6 -0
  55. eodag/rest/cache.py +70 -0
  56. eodag/rest/config.py +68 -0
  57. eodag/rest/constants.py +26 -0
  58. eodag/rest/core.py +735 -0
  59. eodag/rest/errors.py +178 -0
  60. eodag/rest/server.py +264 -431
  61. eodag/rest/stac.py +442 -836
  62. eodag/rest/types/collections_search.py +44 -0
  63. eodag/rest/types/eodag_search.py +238 -47
  64. eodag/rest/types/queryables.py +164 -0
  65. eodag/rest/types/stac_search.py +273 -0
  66. eodag/rest/utils/__init__.py +216 -0
  67. eodag/rest/utils/cql_evaluate.py +119 -0
  68. eodag/rest/utils/rfc3339.py +64 -0
  69. eodag/types/__init__.py +106 -10
  70. eodag/types/bbox.py +15 -14
  71. eodag/types/download_args.py +40 -0
  72. eodag/types/search_args.py +57 -7
  73. eodag/types/whoosh.py +79 -0
  74. eodag/utils/__init__.py +110 -91
  75. eodag/utils/constraints.py +37 -45
  76. eodag/utils/exceptions.py +39 -22
  77. eodag/utils/import_system.py +0 -4
  78. eodag/utils/logging.py +37 -80
  79. eodag/utils/notebook.py +4 -4
  80. eodag/utils/repr.py +113 -0
  81. eodag/utils/requests.py +128 -0
  82. eodag/utils/rest.py +100 -0
  83. eodag/utils/stac_reader.py +93 -21
  84. {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/METADATA +88 -53
  85. eodag-3.0.0.dist-info/RECORD +109 -0
  86. {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/WHEEL +1 -1
  87. {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/entry_points.txt +7 -5
  88. eodag/plugins/apis/cds.py +0 -540
  89. eodag/rest/types/stac_queryables.py +0 -134
  90. eodag/rest/utils.py +0 -1133
  91. eodag-2.12.1.dist-info/RECORD +0 -94
  92. {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/LICENSE +0 -0
  93. {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/top_level.txt +0 -0
@@ -19,19 +19,44 @@ from __future__ import annotations
19
19
 
20
20
  import logging
21
21
  import re
22
- from collections.abc import Iterable
23
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, cast
22
+ from copy import copy as copy_copy
23
+ from datetime import datetime
24
+ from typing import (
25
+ TYPE_CHECKING,
26
+ Any,
27
+ Callable,
28
+ Dict,
29
+ List,
30
+ Optional,
31
+ Sequence,
32
+ Set,
33
+ Tuple,
34
+ TypedDict,
35
+ cast,
36
+ )
24
37
  from urllib.error import URLError
38
+ from urllib.parse import (
39
+ parse_qsl,
40
+ quote_plus,
41
+ unquote,
42
+ unquote_plus,
43
+ urlparse,
44
+ urlunparse,
45
+ )
25
46
  from urllib.request import Request, urlopen
26
47
 
48
+ import geojson
27
49
  import orjson
28
50
  import requests
29
51
  import yaml
52
+ from dateutil.utils import today
53
+ from jsonpath_ng import JSONPath
30
54
  from lxml import etree
31
55
  from pydantic import create_model
32
56
  from pydantic.fields import FieldInfo
33
57
  from requests import Response
34
58
  from requests.adapters import HTTPAdapter
59
+ from requests.auth import AuthBase
35
60
 
36
61
  from eodag.api.product import EOProduct
37
62
  from eodag.api.product.metadata_mapping import (
@@ -42,11 +67,13 @@ from eodag.api.product.metadata_mapping import (
42
67
  properties_from_json,
43
68
  properties_from_xml,
44
69
  )
70
+ from eodag.api.search_result import RawSearchResult
71
+ from eodag.plugins.search import PreparedSearch
45
72
  from eodag.plugins.search.base import Search
46
73
  from eodag.types import json_field_definition_to_python, model_fields_to_annotated
74
+ from eodag.types.queryables import CommonQueryables
75
+ from eodag.types.search_args import SortByList
47
76
  from eodag.utils import (
48
- DEFAULT_ITEMS_PER_PAGE,
49
- DEFAULT_PAGE,
50
77
  GENERIC_PRODUCT_TYPE,
51
78
  HTTP_REQ_TIMEOUT,
52
79
  USER_AGENT,
@@ -56,16 +83,23 @@ from eodag.utils import (
56
83
  dict_items_recursive_apply,
57
84
  format_dict_items,
58
85
  get_args,
86
+ get_ssl_context,
59
87
  quote,
60
88
  string_to_jsonpath,
61
89
  update_nested_dict,
62
90
  urlencode,
63
91
  )
92
+ from eodag.utils.constraints import (
93
+ fetch_constraints,
94
+ get_constraint_queryables_with_additional_params,
95
+ )
64
96
  from eodag.utils.exceptions import (
65
97
  AuthenticationError,
66
98
  MisconfiguredError,
99
+ PluginImplementationError,
67
100
  RequestError,
68
101
  TimeOutError,
102
+ ValidationError,
69
103
  )
70
104
 
71
105
  if TYPE_CHECKING:
@@ -167,13 +201,13 @@ class QueryStringSearch(Search):
167
201
  ``free_text_search_operations`` configuration parameter follow the same rule.
168
202
 
169
203
  :param provider: An eodag providers configuration dictionary
170
- :type provider: dict
171
204
  :param config: Path to the user configuration file
172
- :type config: str
173
205
  """
174
206
 
175
- DEFAULT_ITEMS_PER_PAGE = 10
176
- extract_properties = {"xml": properties_from_xml, "json": properties_from_json}
207
+ extract_properties: Dict[str, Callable[..., Dict[str, Any]]] = {
208
+ "xml": properties_from_xml,
209
+ "json": properties_from_json,
210
+ }
177
211
 
178
212
  def __init__(self, provider: str, config: PluginConfig) -> None:
179
213
  super(QueryStringSearch, self).__init__(provider, config)
@@ -251,6 +285,17 @@ class QueryStringSearch(Search):
251
285
  "generic_product_type_parsable_metadata"
252
286
  ]
253
287
  )
288
+ if (
289
+ "single_product_type_parsable_metadata"
290
+ in self.config.discover_product_types
291
+ ):
292
+ self.config.discover_product_types[
293
+ "single_product_type_parsable_metadata"
294
+ ] = mtd_cfg_as_conversion_and_querypath(
295
+ self.config.discover_product_types[
296
+ "single_product_type_parsable_metadata"
297
+ ]
298
+ )
254
299
 
255
300
  # parse jsonpath on init: queryables discovery
256
301
  if (
@@ -316,35 +361,58 @@ class QueryStringSearch(Search):
316
361
  self.next_page_query_obj = None
317
362
  self.next_page_merge = None
318
363
 
319
- def discover_product_types(self) -> Optional[Dict[str, Any]]:
364
+ def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
320
365
  """Fetch product types list from provider using `discover_product_types` conf
321
366
 
322
367
  :returns: configuration dict containing fetched product types information
323
- :rtype: (optional) dict
324
368
  """
325
369
  try:
326
- fetch_url = cast(
370
+ prep = PreparedSearch()
371
+
372
+ prep.url = cast(
327
373
  str,
328
374
  self.config.discover_product_types["fetch_url"].format(
329
375
  **self.config.__dict__
330
376
  ),
331
377
  )
332
- response = QueryStringSearch._request(
333
- self,
334
- fetch_url,
335
- info_message="Fetching product types: {}".format(fetch_url),
336
- exception_message="Skipping error while fetching product types for "
337
- "{} {} instance:".format(self.provider, self.__class__.__name__),
338
- )
378
+
379
+ # get auth if available
380
+ if "auth" in kwargs:
381
+ prep.auth = kwargs.pop("auth")
382
+
383
+ # try updating fetch_url qs using productType
384
+ fetch_qs_dict = {}
385
+ if "single_collection_fetch_qs" in self.config.discover_product_types:
386
+ try:
387
+ fetch_qs = self.config.discover_product_types[
388
+ "single_collection_fetch_qs"
389
+ ].format(**kwargs)
390
+ fetch_qs_dict = dict(parse_qsl(fetch_qs))
391
+ except KeyError:
392
+ pass
393
+ if fetch_qs_dict:
394
+ url_parse = urlparse(prep.url)
395
+ query = url_parse.query
396
+ url_dict = dict(parse_qsl(query))
397
+ url_dict.update(fetch_qs_dict)
398
+ url_new_query = urlencode(url_dict)
399
+ url_parse = url_parse._replace(query=url_new_query)
400
+ prep.url = urlunparse(url_parse)
401
+
402
+ prep.info_message = "Fetching product types: {}".format(prep.url)
403
+ prep.exception_message = (
404
+ "Skipping error while fetching product types for " "{} {} instance:"
405
+ ).format(self.provider, self.__class__.__name__)
406
+
407
+ response = QueryStringSearch._request(self, prep)
339
408
  except (RequestError, KeyError, AttributeError):
340
409
  return None
341
410
  else:
342
411
  try:
343
- conf_update_dict = {
412
+ conf_update_dict: Dict[str, Any] = {
344
413
  "providers_config": {},
345
414
  "product_types_config": {},
346
415
  }
347
-
348
416
  if self.config.discover_product_types["result_type"] == "json":
349
417
  resp_as_json = response.json()
350
418
  # extract results from response json
@@ -354,6 +422,8 @@ class QueryStringSearch(Search):
354
422
  "results_entry"
355
423
  ].find(resp_as_json)
356
424
  ]
425
+ if result and isinstance(result[0], list):
426
+ result = result[0]
357
427
 
358
428
  for product_type_result in result:
359
429
  # providers_config extraction
@@ -391,6 +461,17 @@ class QueryStringSearch(Search):
391
461
  ],
392
462
  )
393
463
 
464
+ if (
465
+ "single_product_type_parsable_metadata"
466
+ in self.config.discover_product_types
467
+ ):
468
+ collection_data = self._get_product_type_metadata_from_single_collection_endpoint(
469
+ generic_product_type_id
470
+ )
471
+ conf_update_dict["product_types_config"][
472
+ generic_product_type_id
473
+ ].update(collection_data)
474
+
394
475
  # update keywords
395
476
  keywords_fields = [
396
477
  "instrument",
@@ -438,38 +519,161 @@ class QueryStringSearch(Search):
438
519
  e,
439
520
  )
440
521
  return None
522
+ except requests.RequestException as e:
523
+ logger.debug(
524
+ "Could not parse discovered product types response from "
525
+ f"{self.provider}, {type(e).__name__}: {e.args}"
526
+ )
527
+ return None
441
528
  conf_update_dict["product_types_config"] = dict_items_recursive_apply(
442
529
  conf_update_dict["product_types_config"],
443
530
  lambda k, v: v if v != NOT_AVAILABLE else None,
444
531
  )
445
532
  return conf_update_dict
446
533
 
534
+ def _get_product_type_metadata_from_single_collection_endpoint(
535
+ self, product_type: str
536
+ ) -> Dict[str, Any]:
537
+ """
538
+ retrieves additional product type information from an endpoint returning data for a single collection
539
+ :param product_type: product type
540
+ :return: product types and their metadata
541
+ """
542
+ single_collection_url = self.config.discover_product_types[
543
+ "single_collection_fetch_url"
544
+ ].format(productType=product_type)
545
+ resp = QueryStringSearch._request(
546
+ self,
547
+ PreparedSearch(
548
+ url=single_collection_url,
549
+ info_message=f"Fetching data for product type: {product_type}",
550
+ exception_message="Skipping error while fetching product types for "
551
+ "{} {} instance:".format(self.provider, self.__class__.__name__),
552
+ ),
553
+ )
554
+ product_data = resp.json()
555
+ return properties_from_json(
556
+ product_data,
557
+ self.config.discover_product_types["single_product_type_parsable_metadata"],
558
+ )
559
+
560
+ def discover_queryables(
561
+ self, **kwargs: Any
562
+ ) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
563
+ """Fetch queryables list from provider using its constraints file
564
+
565
+ :param kwargs: additional filters for queryables (`productType` and other search
566
+ arguments)
567
+ :returns: fetched queryable parameters dict
568
+ """
569
+ product_type = kwargs.pop("productType", None)
570
+ if not product_type:
571
+ return {}
572
+ constraints_file_url = getattr(self.config, "constraints_file_url", "")
573
+ if not constraints_file_url:
574
+ return {}
575
+
576
+ constraints_file_dataset_key = getattr(
577
+ self.config, "constraints_file_dataset_key", "dataset"
578
+ )
579
+ provider_product_type = self.config.products.get(product_type, {}).get(
580
+ constraints_file_dataset_key, None
581
+ )
582
+
583
+ # defaults
584
+ default_queryables = self._get_defaults_as_queryables(product_type)
585
+ # remove unwanted queryables
586
+ for param in getattr(self.config, "remove_from_queryables", []):
587
+ default_queryables.pop(param, None)
588
+
589
+ non_empty_kwargs = {k: v for k, v in kwargs.items() if v}
590
+
591
+ if "{" in constraints_file_url:
592
+ constraints_file_url = constraints_file_url.format(
593
+ dataset=provider_product_type
594
+ )
595
+ constraints = fetch_constraints(constraints_file_url, self)
596
+ if not constraints:
597
+ return default_queryables
598
+
599
+ constraint_params: Dict[str, Dict[str, Set[Any]]] = {}
600
+ if len(kwargs) == 0:
601
+ # get values from constraints without additional filters
602
+ for constraint in constraints:
603
+ for key in constraint.keys():
604
+ if key in constraint_params:
605
+ constraint_params[key]["enum"].update(constraint[key])
606
+ else:
607
+ constraint_params[key] = {"enum": set(constraint[key])}
608
+ else:
609
+ # get values from constraints with additional filters
610
+ constraints_input_params = {k: v for k, v in non_empty_kwargs.items()}
611
+ constraint_params = get_constraint_queryables_with_additional_params(
612
+ constraints, constraints_input_params, self, product_type
613
+ )
614
+ # query params that are not in constraints but might be default queryables
615
+ if len(constraint_params) == 1 and "not_available" in constraint_params:
616
+ not_queryables = set()
617
+ for constraint_param in constraint_params["not_available"]["enum"]:
618
+ param = CommonQueryables.get_queryable_from_alias(constraint_param)
619
+ if param in dict(
620
+ CommonQueryables.model_fields, **default_queryables
621
+ ):
622
+ non_empty_kwargs.pop(constraint_param)
623
+ else:
624
+ not_queryables.add(constraint_param)
625
+ if not_queryables:
626
+ raise ValidationError(
627
+ f"parameter(s) {str(not_queryables)} not queryable"
628
+ )
629
+ else:
630
+ # get constraints again without common queryables
631
+ constraint_params = (
632
+ get_constraint_queryables_with_additional_params(
633
+ constraints, non_empty_kwargs, self, product_type
634
+ )
635
+ )
636
+
637
+ field_definitions: Dict[str, Any] = dict()
638
+ for json_param, json_mtd in constraint_params.items():
639
+ param = (
640
+ get_queryable_from_provider(
641
+ json_param, self.get_metadata_mapping(product_type)
642
+ )
643
+ or json_param
644
+ )
645
+ default = kwargs.get(param, None) or self.config.products.get(
646
+ product_type, {}
647
+ ).get(param, None)
648
+ annotated_def = json_field_definition_to_python(
649
+ json_mtd, default_value=default, required=True
650
+ )
651
+ field_definitions[param] = get_args(annotated_def)
652
+
653
+ python_queryables = create_model("m", **field_definitions).model_fields
654
+ return dict(default_queryables, **model_fields_to_annotated(python_queryables))
655
+
447
656
  def query(
448
657
  self,
449
- product_type: Optional[str] = None,
450
- items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
451
- page: int = DEFAULT_PAGE,
452
- count: bool = True,
658
+ prep: PreparedSearch = PreparedSearch(),
453
659
  **kwargs: Any,
454
660
  ) -> Tuple[List[EOProduct], Optional[int]]:
455
661
  """Perform a search on an OpenSearch-like interface
456
662
 
457
- :param items_per_page: (optional) The number of results that must appear in one
458
- single page
459
- :type items_per_page: int
460
- :param page: (optional) The page number to return
461
- :type page: int
462
- :param count: (optional) To trigger a count request
463
- :type count: bool
663
+ :param prep: Object collecting needed information for search.
464
664
  """
465
- product_type = kwargs.get("productType", None)
665
+ count = prep.count
666
+ product_type = kwargs.get("productType", prep.product_type)
466
667
  if product_type == GENERIC_PRODUCT_TYPE:
467
668
  logger.warning(
468
669
  "GENERIC_PRODUCT_TYPE is not a real product_type and should only be used internally as a template"
469
670
  )
470
- return [], 0
471
- # remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
472
- kwargs.pop("product_type", None)
671
+ return ([], 0) if prep.count else ([], None)
672
+
673
+ sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
674
+ prep.sort_by_qs, _ = (
675
+ ("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
676
+ )
473
677
 
474
678
  provider_product_type = self.map_product_type(product_type)
475
679
  keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
@@ -480,25 +684,25 @@ class QueryStringSearch(Search):
480
684
  )
481
685
 
482
686
  # provider product type specific conf
483
- self.product_type_def_params = (
687
+ prep.product_type_def_params = (
484
688
  self.get_product_type_def_params(product_type, **kwargs)
485
689
  if product_type is not None
486
690
  else {}
487
691
  )
488
692
 
489
693
  # if product_type_def_params is set, remove product_type as it may conflict with this conf
490
- if self.product_type_def_params:
694
+ if prep.product_type_def_params:
491
695
  keywords.pop("productType", None)
492
696
 
493
697
  if self.config.metadata_mapping:
494
698
  product_type_metadata_mapping = dict(
495
699
  self.config.metadata_mapping,
496
- **self.product_type_def_params.get("metadata_mapping", {}),
700
+ **prep.product_type_def_params.get("metadata_mapping", {}),
497
701
  )
498
702
  keywords.update(
499
703
  {
500
704
  k: v
501
- for k, v in self.product_type_def_params.items()
705
+ for k, v in prep.product_type_def_params.items()
502
706
  if k not in keywords.keys()
503
707
  and k in product_type_metadata_mapping.keys()
504
708
  and isinstance(product_type_metadata_mapping[k], list)
@@ -507,21 +711,26 @@ class QueryStringSearch(Search):
507
711
 
508
712
  qp, qs = self.build_query_string(product_type, **keywords)
509
713
 
510
- self.query_params = qp
511
- self.query_string = qs
512
- self.search_urls, total_items = self.collect_search_urls(
513
- page=page, items_per_page=items_per_page, count=count, **kwargs
714
+ prep.query_params = qp
715
+ prep.query_string = qs
716
+ prep.search_urls, total_items = self.collect_search_urls(
717
+ prep,
718
+ **kwargs,
514
719
  )
515
- if not count and hasattr(self, "total_items_nb"):
720
+ if not count and hasattr(prep, "total_items_nb"):
516
721
  # do not try to extract total_items from search results if count is False
517
- del self.total_items_nb
518
- del self.need_count
519
-
520
- provider_results = self.do_search(items_per_page=items_per_page, **kwargs)
521
- if count and total_items is None and hasattr(self, "total_items_nb"):
522
- total_items = self.total_items_nb
523
- eo_products = self.normalize_results(provider_results, **kwargs)
524
- total_items = len(eo_products) if total_items == 0 else total_items
722
+ del prep.total_items_nb
723
+ del prep.need_count
724
+
725
+ provider_results = self.do_search(prep, **kwargs)
726
+ if count and total_items is None and hasattr(prep, "total_items_nb"):
727
+ total_items = prep.total_items_nb
728
+
729
+ raw_search_result = RawSearchResult(provider_results)
730
+ raw_search_result.query_params = prep.query_params
731
+ raw_search_result.product_type_def_params = prep.product_type_def_params
732
+
733
+ eo_products = self.normalize_results(raw_search_result, **kwargs)
525
734
  return eo_products, total_items
526
735
 
527
736
  @_deprecated(
@@ -538,11 +747,13 @@ class QueryStringSearch(Search):
538
747
  ) -> Tuple[Dict[str, Any], str]:
539
748
  """Build The query string using the search parameters"""
540
749
  logger.debug("Building the query string that will be used for search")
541
- query_params = format_query_params(product_type, self.config, **kwargs)
750
+ query_params = format_query_params(product_type, self.config, kwargs)
542
751
 
543
752
  # Build the final query string, in one go without quoting it
544
753
  # (some providers do not operate well with urlencoded and quoted query strings)
545
- quote_via: Callable[[Any], str] = lambda x, *_args, **_kwargs: x
754
+ def quote_via(x: Any, *_args, **_kwargs) -> str:
755
+ return x
756
+
546
757
  return (
547
758
  query_params,
548
759
  urlencode(query_params, doseq=True, quote_via=quote_via),
@@ -550,22 +761,31 @@ class QueryStringSearch(Search):
550
761
 
551
762
  def collect_search_urls(
552
763
  self,
553
- page: Optional[int] = None,
554
- items_per_page: Optional[int] = None,
555
- count: bool = True,
764
+ prep: PreparedSearch = PreparedSearch(page=None, items_per_page=None),
556
765
  **kwargs: Any,
557
766
  ) -> Tuple[List[str], Optional[int]]:
558
767
  """Build paginated urls"""
768
+ page = prep.page
769
+ items_per_page = prep.items_per_page
770
+ count = prep.count
771
+
559
772
  urls = []
560
773
  total_results = 0 if count else None
561
774
 
775
+ # use only sort_by parameters for search, not for count
776
+ # and remove potential leading '&'
777
+ qs_with_sort = (prep.query_string + getattr(prep, "sort_by_qs", "")).strip("&")
778
+ # append count template if needed
779
+ if count:
780
+ qs_with_sort += self.config.pagination.get("count_tpl", "")
781
+
562
782
  if "count_endpoint" not in self.config.pagination:
563
783
  # if count_endpoint is not set, total_results should be extracted from search result
564
784
  total_results = None
565
- self.need_count = True
566
- self.total_items_nb = None
785
+ prep.need_count = True
786
+ prep.total_items_nb = None
567
787
 
568
- for collection in self.get_collections(**kwargs):
788
+ for collection in self.get_collections(prep, **kwargs) or (None,):
569
789
  # skip empty collection if one is required in api_endpoint
570
790
  if "{collection}" in self.config.api_endpoint and not collection:
571
791
  continue
@@ -573,12 +793,13 @@ class QueryStringSearch(Search):
573
793
  collection=collection
574
794
  )
575
795
  if page is not None and items_per_page is not None:
796
+ page = page - 1 + self.config.pagination.get("start_page", 1)
576
797
  if count:
577
798
  count_endpoint = self.config.pagination.get(
578
799
  "count_endpoint", ""
579
800
  ).format(collection=collection)
580
801
  if count_endpoint:
581
- count_url = "{}?{}".format(count_endpoint, self.query_string)
802
+ count_url = "{}?{}".format(count_endpoint, prep.query_string)
582
803
  _total_results = (
583
804
  self.count_hits(
584
805
  count_url, result_type=self.config.result_type
@@ -592,32 +813,36 @@ class QueryStringSearch(Search):
592
813
  0 if total_results is None else total_results
593
814
  )
594
815
  total_results += _total_results or 0
816
+ if "next_page_url_tpl" not in self.config.pagination:
817
+ raise MisconfiguredError(
818
+ f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
819
+ )
595
820
  next_url = self.config.pagination["next_page_url_tpl"].format(
596
821
  url=search_endpoint,
597
- search=self.query_string,
822
+ search=qs_with_sort,
598
823
  items_per_page=items_per_page,
599
824
  page=page,
600
825
  skip=(page - 1) * items_per_page,
601
826
  skip_base_1=(page - 1) * items_per_page + 1,
602
827
  )
603
828
  else:
604
- next_url = "{}?{}".format(search_endpoint, self.query_string)
829
+ next_url = "{}?{}".format(search_endpoint, qs_with_sort)
605
830
  urls.append(next_url)
606
- return urls, total_results
831
+ return list(dict.fromkeys(urls)), total_results
607
832
 
608
833
  def do_search(
609
- self, items_per_page: Optional[int] = None, **kwargs: Any
834
+ self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
610
835
  ) -> List[Any]:
611
836
  """Perform the actual search request.
612
837
 
613
838
  If there is a specified number of items per page, return the results as soon
614
839
  as this number is reached
615
840
 
616
- :param items_per_page: (optional) The number of items to return for one page
617
- :type items_per_page: int
841
+ :param prep: Object collecting needed information for search.
618
842
  """
843
+ items_per_page = prep.items_per_page
619
844
  total_items_nb = 0
620
- if getattr(self, "need_count", False):
845
+ if getattr(prep, "need_count", False):
621
846
  # extract total_items_nb from search results
622
847
  if self.config.result_type == "json":
623
848
  total_items_nb_key_path_parsed = self.config.pagination[
@@ -625,13 +850,17 @@ class QueryStringSearch(Search):
625
850
  ]
626
851
 
627
852
  results: List[Any] = []
628
- for search_url in self.search_urls:
629
- response = self._request(
630
- search_url,
631
- info_message="Sending search request: {}".format(search_url),
632
- exception_message="Skipping error while searching for {} {} "
633
- "instance:".format(self.provider, self.__class__.__name__),
853
+ for search_url in prep.search_urls:
854
+ single_search_prep = copy_copy(prep)
855
+ single_search_prep.url = search_url
856
+ single_search_prep.info_message = "Sending search request: {}".format(
857
+ search_url
634
858
  )
859
+ single_search_prep.exception_message = (
860
+ f"Skipping error while searching for {self.provider}"
861
+ f" {self.__class__.__name__} instance"
862
+ )
863
+ response = self._request(single_search_prep)
635
864
  next_page_url_key_path = self.config.pagination.get(
636
865
  "next_page_url_key_path", None
637
866
  )
@@ -649,7 +878,7 @@ class QueryStringSearch(Search):
649
878
  )
650
879
  result = (
651
880
  [etree.tostring(element_or_tree=entry) for entry in results_xpath]
652
- if isinstance(results_xpath, Iterable)
881
+ if isinstance(results_xpath, Sequence)
653
882
  else []
654
883
  )
655
884
 
@@ -658,7 +887,7 @@ class QueryStringSearch(Search):
658
887
  "Setting the next page url from an XML response has not "
659
888
  "been implemented yet"
660
889
  )
661
- if getattr(self, "need_count", False):
890
+ if getattr(prep, "need_count", False):
662
891
  # extract total_items_nb from search results
663
892
  try:
664
893
  total_nb_results_xpath = root_node.xpath(
@@ -669,7 +898,7 @@ class QueryStringSearch(Search):
669
898
  )
670
899
  total_nb_results = (
671
900
  total_nb_results_xpath
672
- if isinstance(total_nb_results_xpath, Iterable)
901
+ if isinstance(total_nb_results_xpath, Sequence)
673
902
  else []
674
903
  )[0]
675
904
  _total_items_nb = int(total_nb_results)
@@ -686,55 +915,60 @@ class QueryStringSearch(Search):
686
915
  resp_as_json = response.json()
687
916
  if next_page_url_key_path:
688
917
  path_parsed = next_page_url_key_path
689
- try:
690
- self.next_page_url = path_parsed.find(resp_as_json)[0].value
918
+ found_paths = path_parsed.find(resp_as_json)
919
+ if found_paths and not isinstance(found_paths, int):
920
+ self.next_page_url = found_paths[0].value
691
921
  logger.debug(
692
922
  "Next page URL collected and set for the next search",
693
923
  )
694
- except IndexError:
924
+ else:
695
925
  logger.debug("Next page URL could not be collected")
696
926
  if next_page_query_obj_key_path:
697
927
  path_parsed = next_page_query_obj_key_path
698
- try:
699
- self.next_page_query_obj = path_parsed.find(resp_as_json)[
700
- 0
701
- ].value
928
+ found_paths = path_parsed.find(resp_as_json)
929
+ if found_paths and not isinstance(found_paths, int):
930
+ self.next_page_query_obj = found_paths[0].value
702
931
  logger.debug(
703
932
  "Next page Query-object collected and set for the next search",
704
933
  )
705
- except IndexError:
934
+ else:
706
935
  logger.debug("Next page Query-object could not be collected")
707
936
  if next_page_merge_key_path:
708
937
  path_parsed = next_page_merge_key_path
709
- try:
710
- self.next_page_merge = path_parsed.find(resp_as_json)[0].value
938
+ found_paths = path_parsed.find(resp_as_json)
939
+ if found_paths and not isinstance(found_paths, int):
940
+ self.next_page_merge = found_paths[0].value
711
941
  logger.debug(
712
942
  "Next page merge collected and set for the next search",
713
943
  )
714
- except IndexError:
944
+ else:
715
945
  logger.debug("Next page merge could not be collected")
716
946
 
717
947
  results_entry = string_to_jsonpath(
718
948
  self.config.results_entry, force=True
719
949
  )
720
- try:
721
- result = results_entry.find(resp_as_json)[0].value
722
- except Exception:
950
+ found_entry_paths = results_entry.find(resp_as_json)
951
+ if found_entry_paths and not isinstance(found_entry_paths, int):
952
+ result = found_entry_paths[0].value
953
+ else:
723
954
  result = []
724
955
  if not isinstance(result, list):
725
956
  result = [result]
726
957
 
727
- if getattr(self, "need_count", False):
958
+ if getattr(prep, "need_count", False):
728
959
  # extract total_items_nb from search results
729
- try:
730
- _total_items_nb = total_items_nb_key_path_parsed.find(
731
- resp_as_json
732
- )[0].value
960
+ found_total_items_nb_paths = total_items_nb_key_path_parsed.find(
961
+ resp_as_json
962
+ )
963
+ if found_total_items_nb_paths and not isinstance(
964
+ found_total_items_nb_paths, int
965
+ ):
966
+ _total_items_nb = found_total_items_nb_paths[0].value
733
967
  if getattr(self.config, "merge_responses", False):
734
968
  total_items_nb = _total_items_nb or 0
735
969
  else:
736
970
  total_items_nb += _total_items_nb or 0
737
- except IndexError:
971
+ else:
738
972
  logger.debug(
739
973
  "Could not extract total_items_nb from search results"
740
974
  )
@@ -746,15 +980,22 @@ class QueryStringSearch(Search):
746
980
  )
747
981
  else:
748
982
  results.extend(result)
749
- if getattr(self, "need_count", False):
750
- self.total_items_nb = total_items_nb
751
- del self.need_count
983
+ if getattr(prep, "need_count", False):
984
+ prep.total_items_nb = total_items_nb
985
+ del prep.need_count
986
+ # remove prep.total_items_nb if value could not be extracted from response
987
+ if (
988
+ hasattr(prep, "total_items_nb")
989
+ and not prep.total_items_nb
990
+ and len(results) > 0
991
+ ):
992
+ del prep.total_items_nb
752
993
  if items_per_page is not None and len(results) == items_per_page:
753
994
  return results
754
995
  return results
755
996
 
756
997
  def normalize_results(
757
- self, results: List[Dict[str, Any]], **kwargs: Any
998
+ self, results: RawSearchResult, **kwargs: Any
758
999
  ) -> List[EOProduct]:
759
1000
  """Build EOProducts from provider results"""
760
1001
  normalize_remaining_count = len(results)
@@ -777,6 +1018,8 @@ class QueryStringSearch(Search):
777
1018
  product.properties = dict(
778
1019
  getattr(self.config, "product_type_config", {}), **product.properties
779
1020
  )
1021
+ # move assets from properties to product's attr
1022
+ product.assets.update(product.properties.pop("assets", {}))
780
1023
  products.append(product)
781
1024
  return products
782
1025
 
@@ -785,10 +1028,12 @@ class QueryStringSearch(Search):
785
1028
  # Handle a very annoying special case :'(
786
1029
  url = count_url.replace("$format=json&", "")
787
1030
  response = self._request(
788
- url,
789
- info_message="Sending count request: {}".format(url),
790
- exception_message="Skipping error while counting results for {} {} "
791
- "instance:".format(self.provider, self.__class__.__name__),
1031
+ PreparedSearch(
1032
+ url=url,
1033
+ info_message="Sending count request: {}".format(url),
1034
+ exception_message="Skipping error while counting results for {} {} "
1035
+ "instance:".format(self.provider, self.__class__.__name__),
1036
+ )
792
1037
  )
793
1038
  if result_type == "xml":
794
1039
  root_node = etree.fromstring(response.content)
@@ -801,20 +1046,34 @@ class QueryStringSearch(Search):
801
1046
  count_results = response.json()
802
1047
  if isinstance(count_results, dict):
803
1048
  path_parsed = self.config.pagination["total_items_nb_key_path"]
804
- total_results = path_parsed.find(count_results)[0].value
1049
+ if not isinstance(path_parsed, JSONPath):
1050
+ raise PluginImplementationError(
1051
+ "total_items_nb_key_path must be parsed to JSONPath on plugin init"
1052
+ )
1053
+ found_paths = path_parsed.find(count_results)
1054
+ if found_paths and not isinstance(found_paths, int):
1055
+ total_results = found_paths[0].value
1056
+ else:
1057
+ raise MisconfiguredError(
1058
+ "Could not get results count from response using total_items_nb_key_path"
1059
+ )
805
1060
  else: # interpret the result as a raw int
806
1061
  total_results = int(count_results)
807
1062
  return total_results
808
1063
 
809
- def get_collections(self, **kwargs: Any) -> Tuple[Set[Dict[str, Any]], ...]:
1064
+ def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> Tuple[str, ...]:
810
1065
  """Get the collection to which the product belongs"""
811
1066
  # See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
812
1067
  # /asset_publisher/Ac0d/content/change-of
813
1068
  # -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
814
1069
  product_type: Optional[str] = kwargs.get("productType")
815
- if product_type is None and not self.product_type_def_params:
816
- collections: Set[Dict[str, Any]] = set()
817
- collection: Optional[str] = getattr(self.config, "collection", None)
1070
+ collection: Optional[str] = None
1071
+ if product_type is None and (
1072
+ not hasattr(prep, "product_type_def_params")
1073
+ or not prep.product_type_def_params
1074
+ ):
1075
+ collections: Set[str] = set()
1076
+ collection = getattr(self.config, "collection", None)
818
1077
  if collection is None:
819
1078
  try:
820
1079
  for product_type, product_config in self.config.products.items():
@@ -832,31 +1091,44 @@ class QueryStringSearch(Search):
832
1091
  collections.add(collection)
833
1092
  return tuple(collections)
834
1093
 
835
- collection: Optional[str] = getattr(self.config, "collection", None)
1094
+ collection = getattr(self.config, "collection", None)
836
1095
  if collection is None:
837
1096
  collection = (
838
- self.product_type_def_params.get("collection", None) or product_type
1097
+ prep.product_type_def_params.get("collection", None) or product_type
839
1098
  )
840
- return (collection,) if not isinstance(collection, list) else tuple(collection)
1099
+
1100
+ if collection is None:
1101
+ return ()
1102
+ elif not isinstance(collection, list):
1103
+ return (collection,)
1104
+ else:
1105
+ return tuple(collection)
841
1106
 
842
1107
  def _request(
843
1108
  self,
844
- url: str,
845
- info_message: Optional[str] = None,
846
- exception_message: Optional[str] = None,
1109
+ prep: PreparedSearch,
847
1110
  ) -> Response:
1111
+ url = prep.url
1112
+ if url is None:
1113
+ raise ValidationError("Cannot request empty URL")
1114
+ info_message = prep.info_message
1115
+ exception_message = prep.exception_message
848
1116
  try:
849
1117
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1118
+ ssl_verify = getattr(self.config, "ssl_verify", True)
1119
+
1120
+ ssl_ctx = get_ssl_context(ssl_verify)
850
1121
  # auth if needed
851
1122
  kwargs: Dict[str, Any] = {}
852
1123
  if (
853
1124
  getattr(self.config, "need_auth", False)
854
- and hasattr(self, "auth")
855
- and callable(self.auth)
1125
+ and hasattr(prep, "auth")
1126
+ and callable(prep.auth)
856
1127
  ):
857
- kwargs["auth"] = self.auth
1128
+ kwargs["auth"] = prep.auth
858
1129
  # requests auto quote url params, without any option to prevent it
859
1130
  # use urllib instead of requests if req must be sent unquoted
1131
+
860
1132
  if hasattr(self.config, "dont_quote"):
861
1133
  # keep unquoted desired params
862
1134
  base_url, params = url.split("?") if "?" in url else (url, "")
@@ -868,21 +1140,27 @@ class QueryStringSearch(Search):
868
1140
  req = requests.Request(
869
1141
  method="GET", url=base_url, headers=USER_AGENT, **kwargs
870
1142
  )
871
- prep = req.prepare()
872
- prep.url = base_url + "?" + qry
1143
+ req_prep = req.prepare()
1144
+ req_prep.url = base_url + "?" + qry
873
1145
  # send urllib req
874
1146
  if info_message:
875
- logger.info(info_message.replace(url, prep.url))
876
- urllib_req = Request(prep.url, headers=USER_AGENT)
877
- urllib_response = urlopen(urllib_req, timeout=timeout)
1147
+ logger.info(info_message.replace(url, req_prep.url))
1148
+ urllib_req = Request(req_prep.url, headers=USER_AGENT)
1149
+ urllib_response = urlopen(urllib_req, timeout=timeout, context=ssl_ctx)
878
1150
  # build Response
879
1151
  adapter = HTTPAdapter()
880
- response = cast(Response, adapter.build_response(prep, urllib_response))
1152
+ response = cast(
1153
+ Response, adapter.build_response(req_prep, urllib_response)
1154
+ )
881
1155
  else:
882
1156
  if info_message:
883
1157
  logger.info(info_message)
884
1158
  response = requests.get(
885
- url, timeout=timeout, headers=USER_AGENT, **kwargs
1159
+ url,
1160
+ timeout=timeout,
1161
+ headers=USER_AGENT,
1162
+ verify=ssl_verify,
1163
+ **kwargs,
886
1164
  )
887
1165
  response.raise_for_status()
888
1166
  except requests.exceptions.Timeout as exc:
@@ -899,38 +1177,10 @@ class QueryStringSearch(Search):
899
1177
  self.__class__.__name__,
900
1178
  err_msg,
901
1179
  )
902
- raise RequestError(str(err))
1180
+ raise RequestError.from_error(err, exception_message) from err
903
1181
  return response
904
1182
 
905
1183
 
906
- class AwsSearch(QueryStringSearch):
907
- """A specialisation of RestoSearch that modifies the way the EOProducts are built
908
- from the search results"""
909
-
910
- def normalize_results(
911
- self, results: List[Dict[str, Any]], **kwargs: Any
912
- ) -> List[EOProduct]:
913
- """Transform metadata from provider representation to eodag representation"""
914
- normalized: List[EOProduct] = []
915
- logger.debug("Adapting plugin results to eodag product representation")
916
- for result in results:
917
- ref = result["properties"]["title"].split("_")[5]
918
- year = result["properties"]["completionDate"][0:4]
919
- month = str(int(result["properties"]["completionDate"][5:7]))
920
- day = str(int(result["properties"]["completionDate"][8:10]))
921
-
922
- properties = QueryStringSearch.extract_properties[self.config.result_type](
923
- result, self.get_metadata_mapping(kwargs.get("productType"))
924
- )
925
-
926
- properties["downloadLink"] = (
927
- "s3://tiles/{ref[1]}{ref[2]}/{ref[3]}/{ref[4]}{ref[5]}/{year}/"
928
- "{month}/{day}/0/"
929
- ).format(**locals())
930
- normalized.append(EOProduct(self.provider, properties, **kwargs))
931
- return normalized
932
-
933
-
934
1184
  class ODataV4Search(QueryStringSearch):
935
1185
  """A specialisation of a QueryStringSearch that does a two step search to retrieve
936
1186
  all products metadata"""
@@ -948,25 +1198,31 @@ class ODataV4Search(QueryStringSearch):
948
1198
  metadata_path
949
1199
  )
950
1200
 
951
- def do_search(self, *args: Any, **kwargs: Any) -> List[Any]:
1201
+ def do_search(
1202
+ self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
1203
+ ) -> List[Any]:
952
1204
  """A two step search can be performed if the metadata are not given into the search result"""
953
1205
 
954
1206
  if getattr(self.config, "per_product_metadata_query", False):
955
1207
  final_result = []
1208
+ ssl_verify = getattr(self.config, "ssl_verify", True)
956
1209
  # Query the products entity set for basic metadata about the product
957
- for entity in super(ODataV4Search, self).do_search(*args, **kwargs):
1210
+ for entity in super(ODataV4Search, self).do_search(prep, **kwargs):
958
1211
  metadata_url = self.get_metadata_search_url(entity)
959
1212
  try:
960
1213
  logger.debug("Sending metadata request: %s", metadata_url)
961
1214
  response = requests.get(
962
- metadata_url, headers=USER_AGENT, timeout=HTTP_REQ_TIMEOUT
1215
+ metadata_url,
1216
+ headers=USER_AGENT,
1217
+ timeout=HTTP_REQ_TIMEOUT,
1218
+ verify=ssl_verify,
963
1219
  )
964
1220
  response.raise_for_status()
965
1221
  except requests.exceptions.Timeout as exc:
966
1222
  raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
967
1223
  except requests.RequestException:
968
1224
  logger.exception(
969
- "Skipping error while searching for %s %s instance:",
1225
+ "Skipping error while searching for %s %s instance",
970
1226
  self.provider,
971
1227
  self.__class__.__name__,
972
1228
  )
@@ -977,7 +1233,7 @@ class ODataV4Search(QueryStringSearch):
977
1233
  final_result.append(entity)
978
1234
  return final_result
979
1235
  else:
980
- return super(ODataV4Search, self).do_search(*args, **kwargs)
1236
+ return super(ODataV4Search, self).do_search(prep, **kwargs)
981
1237
 
982
1238
  def get_metadata_search_url(self, entity: Dict[str, Any]) -> str:
983
1239
  """Build the metadata link for the given entity"""
@@ -986,7 +1242,7 @@ class ODataV4Search(QueryStringSearch):
986
1242
  )
987
1243
 
988
1244
  def normalize_results(
989
- self, results: List[Dict[str, Any]], **kwargs: Any
1245
+ self, results: RawSearchResult, **kwargs: Any
990
1246
  ) -> List[EOProduct]:
991
1247
  """Build EOProducts from provider results
992
1248
 
@@ -1020,43 +1276,135 @@ class ODataV4Search(QueryStringSearch):
1020
1276
  class PostJsonSearch(QueryStringSearch):
1021
1277
  """A specialisation of a QueryStringSearch that uses POST method"""
1022
1278
 
1279
+ def _get_default_end_date_from_start_date(
1280
+ self, start_datetime: str, product_type: str
1281
+ ) -> str:
1282
+ default_end_date = self.config.products.get(product_type, {}).get(
1283
+ "_default_end_date", None
1284
+ )
1285
+ if default_end_date:
1286
+ return default_end_date
1287
+ try:
1288
+ start_date = datetime.fromisoformat(start_datetime)
1289
+ except ValueError:
1290
+ start_date = datetime.strptime(start_datetime, "%Y-%m-%dT%H:%M:%SZ")
1291
+ product_type_conf = self.config.products[product_type]
1292
+ if (
1293
+ "metadata_mapping" in product_type_conf
1294
+ and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
1295
+ ):
1296
+ mapping = product_type_conf["metadata_mapping"][
1297
+ "startTimeFromAscendingNode"
1298
+ ]
1299
+ if isinstance(mapping, list) and "year" in mapping[0]:
1300
+ # if date is mapped to year/month/(day), use end_date = start_date to avoid large requests
1301
+ end_date = start_date
1302
+ return end_date.isoformat()
1303
+ return self.get_product_type_cfg_value("missionEndDate", today().isoformat())
1304
+
1305
+ def _check_date_params(self, keywords: Dict[str, Any], product_type: str) -> None:
1306
+ """checks if start and end date are present in the keywords and adds them if not"""
1307
+ if (
1308
+ "startTimeFromAscendingNode"
1309
+ and "completionTimeFromAscendingNode" in keywords
1310
+ ):
1311
+ return
1312
+ # start time given, end time missing
1313
+ if "startTimeFromAscendingNode" in keywords:
1314
+ keywords[
1315
+ "completionTimeFromAscendingNode"
1316
+ ] = self._get_default_end_date_from_start_date(
1317
+ keywords["startTimeFromAscendingNode"], product_type
1318
+ )
1319
+ return
1320
+ product_type_conf = self.config.products[product_type]
1321
+ if (
1322
+ "metadata_mapping" in product_type_conf
1323
+ and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
1324
+ ):
1325
+ mapping = product_type_conf["metadata_mapping"][
1326
+ "startTimeFromAscendingNode"
1327
+ ]
1328
+ if isinstance(mapping, list):
1329
+ # get time parameters (date, year, month, ...) from metadata mapping
1330
+ input_mapping = mapping[0].replace("{{", "").replace("}}", "")
1331
+ time_params = [
1332
+ values.split(":")[0].strip() for values in input_mapping.split(",")
1333
+ ]
1334
+ time_params = [
1335
+ tp.replace('"', "").replace("'", "") for tp in time_params
1336
+ ]
1337
+ # if startTime is not given but other time params (e.g. year/month/(day)) are given,
1338
+ # no default date is required
1339
+ in_keywords = True
1340
+ for tp in time_params:
1341
+ if tp not in keywords:
1342
+ in_keywords = False
1343
+ if not in_keywords:
1344
+ keywords[
1345
+ "startTimeFromAscendingNode"
1346
+ ] = self.get_product_type_cfg_value(
1347
+ "missionStartDate", today().isoformat()
1348
+ )
1349
+ keywords[
1350
+ "completionTimeFromAscendingNode"
1351
+ ] = self._get_default_end_date_from_start_date(
1352
+ keywords["startTimeFromAscendingNode"], product_type
1353
+ )
1354
+
1023
1355
  def query(
1024
1356
  self,
1025
- product_type: Optional[str] = None,
1026
- items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
1027
- page: int = DEFAULT_PAGE,
1028
- count: bool = True,
1357
+ prep: PreparedSearch = PreparedSearch(),
1029
1358
  **kwargs: Any,
1030
1359
  ) -> Tuple[List[EOProduct], Optional[int]]:
1031
1360
  """Perform a search on an OpenSearch-like interface"""
1032
1361
  product_type = kwargs.get("productType", None)
1362
+ count = prep.count
1033
1363
  # remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
1034
1364
  kwargs.pop("product_type", None)
1365
+ sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
1366
+ _, sort_by_qp = (
1367
+ ("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
1368
+ )
1035
1369
  provider_product_type = self.map_product_type(product_type)
1036
- keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
1370
+ _dc_qs = kwargs.pop("_dc_qs", None)
1371
+ if _dc_qs is not None:
1372
+ qs = unquote_plus(unquote_plus(_dc_qs))
1373
+ qp = geojson.loads(qs)
1374
+
1375
+ # provider product type specific conf
1376
+ prep.product_type_def_params = self.get_product_type_def_params(
1377
+ product_type, **kwargs
1378
+ )
1379
+ else:
1380
+ keywords = {
1381
+ k: v for k, v in kwargs.items() if k != "auth" and v is not None
1382
+ }
1037
1383
 
1038
- if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
1039
- keywords["productType"] = provider_product_type
1040
- elif product_type:
1041
- keywords["productType"] = product_type
1384
+ if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
1385
+ keywords["productType"] = provider_product_type
1386
+ elif product_type:
1387
+ keywords["productType"] = product_type
1042
1388
 
1043
- # provider product type specific conf
1044
- self.product_type_def_params = self.get_product_type_def_params(
1045
- product_type, **kwargs
1046
- )
1389
+ # provider product type specific conf
1390
+ prep.product_type_def_params = self.get_product_type_def_params(
1391
+ product_type, **kwargs
1392
+ )
1047
1393
 
1048
- # Add to the query, the queryable parameters set in the provider product type definition
1049
- keywords.update(
1050
- {
1051
- k: v
1052
- for k, v in self.product_type_def_params.items()
1053
- if k not in keywords.keys()
1054
- and k in self.config.metadata_mapping.keys()
1055
- and isinstance(self.config.metadata_mapping[k], list)
1056
- }
1057
- )
1394
+ # Add to the query, the queryable parameters set in the provider product type definition
1395
+ keywords.update(
1396
+ {
1397
+ k: v
1398
+ for k, v in prep.product_type_def_params.items()
1399
+ if k not in keywords.keys()
1400
+ and k in self.config.metadata_mapping.keys()
1401
+ and isinstance(self.config.metadata_mapping[k], list)
1402
+ }
1403
+ )
1404
+ if getattr(self.config, "dates_required", False):
1405
+ self._check_date_params(keywords, product_type)
1058
1406
 
1059
- qp, _ = self.build_query_string(product_type, **keywords)
1407
+ qp, _ = self.build_query_string(product_type, **keywords)
1060
1408
 
1061
1409
  for query_param, query_value in qp.items():
1062
1410
  if (
@@ -1086,12 +1434,15 @@ class PostJsonSearch(QueryStringSearch):
1086
1434
  "specific_qssearch"
1087
1435
  ].get("merge_responses", None)
1088
1436
 
1089
- self.count_hits = lambda *x, **y: 1
1090
- self._request = super(PostJsonSearch, self)._request
1437
+ def count_hits(self, *x, **y):
1438
+ return 1
1439
+
1440
+ def _request(self, *x, **y):
1441
+ return super(PostJsonSearch, self)._request(*x, **y)
1091
1442
 
1092
1443
  try:
1093
1444
  eo_products, total_items = super(PostJsonSearch, self).query(
1094
- items_per_page=items_per_page, page=page, **kwargs
1445
+ prep, **kwargs
1095
1446
  )
1096
1447
  except Exception:
1097
1448
  raise
@@ -1108,61 +1459,98 @@ class PostJsonSearch(QueryStringSearch):
1108
1459
  # stop searching right away
1109
1460
  product_type_metadata_mapping = dict(
1110
1461
  self.config.metadata_mapping,
1111
- **self.product_type_def_params.get("metadata_mapping", {}),
1462
+ **prep.product_type_def_params.get("metadata_mapping", {}),
1112
1463
  )
1113
1464
  if not qp and any(
1114
1465
  k
1115
1466
  for k in keywords.keys()
1116
1467
  if isinstance(product_type_metadata_mapping.get(k, []), list)
1117
1468
  ):
1118
- return [], 0
1119
- self.query_params = qp
1120
- self.search_urls, total_items = self.collect_search_urls(
1121
- page=page, items_per_page=items_per_page, count=count, **kwargs
1122
- )
1123
- if not count and getattr(self, "need_count", False):
1469
+ return ([], 0) if prep.count else ([], None)
1470
+ prep.query_params = dict(qp, **sort_by_qp)
1471
+ prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
1472
+ if not count and getattr(prep, "need_count", False):
1124
1473
  # do not try to extract total_items from search results if count is False
1125
- del self.total_items_nb
1126
- del self.need_count
1127
- provider_results = self.do_search(items_per_page=items_per_page, **kwargs)
1128
- if count and total_items is None and hasattr(self, "total_items_nb"):
1129
- total_items = self.total_items_nb
1130
- eo_products = self.normalize_results(provider_results, **kwargs)
1131
- total_items = len(eo_products) if total_items == 0 else total_items
1474
+ del prep.total_items_nb
1475
+ del prep.need_count
1476
+ provider_results = self.do_search(prep, **kwargs)
1477
+ if count and total_items is None and hasattr(prep, "total_items_nb"):
1478
+ total_items = prep.total_items_nb
1479
+
1480
+ raw_search_result = RawSearchResult(provider_results)
1481
+ raw_search_result.query_params = prep.query_params
1482
+ raw_search_result.product_type_def_params = prep.product_type_def_params
1483
+
1484
+ eo_products = self.normalize_results(raw_search_result, **kwargs)
1132
1485
  return eo_products, total_items
1133
1486
 
1487
+ def normalize_results(
1488
+ self, results: RawSearchResult, **kwargs: Any
1489
+ ) -> List[EOProduct]:
1490
+ """Build EOProducts from provider results"""
1491
+ normalized = super().normalize_results(results, **kwargs)
1492
+ for product in normalized:
1493
+ if "downloadLink" in product.properties:
1494
+ decoded_link = unquote(product.properties["downloadLink"])
1495
+ if decoded_link[0] == "{": # not a url but a dict
1496
+ default_values = deepcopy(
1497
+ self.config.products.get(product.product_type, {})
1498
+ )
1499
+ default_values.pop("metadata_mapping", None)
1500
+ searched_values = orjson.loads(decoded_link)
1501
+ _dc_qs = orjson.dumps(
1502
+ format_query_params(
1503
+ product.product_type,
1504
+ self.config,
1505
+ {**default_values, **searched_values},
1506
+ )
1507
+ )
1508
+ product.properties["_dc_qs"] = quote_plus(_dc_qs)
1509
+
1510
+ # workaround to add product type to wekeo cmems order links
1511
+ if (
1512
+ "orderLink" in product.properties
1513
+ and "productType" in product.properties["orderLink"]
1514
+ ):
1515
+ product.properties["orderLink"] = product.properties[
1516
+ "orderLink"
1517
+ ].replace("productType", product.product_type)
1518
+ return normalized
1519
+
1134
1520
  def collect_search_urls(
1135
1521
  self,
1136
- page: Optional[int] = None,
1137
- items_per_page: Optional[int] = None,
1138
- count: bool = True,
1522
+ prep: PreparedSearch = PreparedSearch(),
1139
1523
  **kwargs: Any,
1140
1524
  ) -> Tuple[List[str], Optional[int]]:
1141
1525
  """Adds pagination to query parameters, and auth to url"""
1526
+ page = prep.page
1527
+ items_per_page = prep.items_per_page
1528
+ count = prep.count
1142
1529
  urls: List[str] = []
1143
1530
  total_results = 0 if count else None
1144
1531
 
1145
1532
  if "count_endpoint" not in self.config.pagination:
1146
1533
  # if count_endpoint is not set, total_results should be extracted from search result
1147
1534
  total_results = None
1148
- self.need_count = True
1149
- self.total_items_nb = None
1535
+ prep.need_count = True
1536
+ prep.total_items_nb = None
1150
1537
 
1151
- if "auth" in kwargs and hasattr(kwargs["auth"], "config"):
1152
- auth_conf_dict = getattr(kwargs["auth"].config, "credentials", {})
1538
+ if prep.auth_plugin is not None and hasattr(prep.auth_plugin, "config"):
1539
+ auth_conf_dict = getattr(prep.auth_plugin.config, "credentials", {})
1153
1540
  else:
1154
1541
  auth_conf_dict = {}
1155
- for collection in self.get_collections(**kwargs):
1542
+ for collection in self.get_collections(prep, **kwargs) or (None,):
1156
1543
  try:
1157
1544
  search_endpoint: str = self.config.api_endpoint.rstrip("/").format(
1158
1545
  **dict(collection=collection, **auth_conf_dict)
1159
1546
  )
1160
1547
  except KeyError as e:
1548
+ provider = prep.auth_plugin.provider if prep.auth_plugin else ""
1161
1549
  raise MisconfiguredError(
1162
- "Missing %s in %s configuration"
1163
- % (",".join(e.args), kwargs["auth"].provider)
1550
+ "Missing %s in %s configuration" % (",".join(e.args), provider)
1164
1551
  )
1165
1552
  if page is not None and items_per_page is not None:
1553
+ page = page - 1 + self.config.pagination.get("start_page", 1)
1166
1554
  if count:
1167
1555
  count_endpoint = self.config.pagination.get(
1168
1556
  "count_endpoint", ""
@@ -1174,8 +1562,14 @@ class PostJsonSearch(QueryStringSearch):
1174
1562
  if getattr(self.config, "merge_responses", False):
1175
1563
  total_results = _total_results or 0
1176
1564
  else:
1177
- total_results += _total_results or 0
1178
- if isinstance(self.config.pagination["next_page_query_obj"], str):
1565
+ total_results = (
1566
+ (_total_results or 0)
1567
+ if total_results is None
1568
+ else total_results + (_total_results or 0)
1569
+ )
1570
+ if "next_page_query_obj" in self.config.pagination and isinstance(
1571
+ self.config.pagination["next_page_query_obj"], str
1572
+ ):
1179
1573
  # next_page_query_obj needs to be parsed
1180
1574
  next_page_query_obj = self.config.pagination[
1181
1575
  "next_page_query_obj"
@@ -1186,60 +1580,68 @@ class PostJsonSearch(QueryStringSearch):
1186
1580
  skip_base_1=(page - 1) * items_per_page + 1,
1187
1581
  )
1188
1582
  update_nested_dict(
1189
- self.query_params, orjson.loads(next_page_query_obj)
1583
+ prep.query_params, orjson.loads(next_page_query_obj)
1190
1584
  )
1191
1585
 
1192
1586
  urls.append(search_endpoint)
1193
- return urls, total_results
1587
+ return list(dict.fromkeys(urls)), total_results
1194
1588
 
1195
1589
  def _request(
1196
1590
  self,
1197
- url: str,
1198
- info_message: Optional[str] = None,
1199
- exception_message: Optional[str] = None,
1591
+ prep: PreparedSearch,
1200
1592
  ) -> Response:
1593
+ url = prep.url
1594
+ if url is None:
1595
+ raise ValidationError("Cannot request empty URL")
1596
+ info_message = prep.info_message
1597
+ exception_message = prep.exception_message
1201
1598
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1599
+ ssl_verify = getattr(self.config, "ssl_verify", True)
1202
1600
  try:
1203
1601
  # auth if needed
1204
- kwargs = {}
1602
+ RequestsKwargs = TypedDict(
1603
+ "RequestsKwargs", {"auth": AuthBase}, total=False
1604
+ )
1605
+ kwargs: RequestsKwargs = {}
1205
1606
  if (
1206
1607
  getattr(self.config, "need_auth", False)
1207
- and hasattr(self, "auth")
1208
- and callable(self.auth)
1608
+ and hasattr(prep, "auth")
1609
+ and callable(prep.auth)
1209
1610
  ):
1210
- kwargs["auth"] = self.auth
1611
+ kwargs["auth"] = prep.auth
1211
1612
 
1212
1613
  # perform the request using the next page arguments if they are defined
1213
- if getattr(self, "next_page_query_obj", None):
1214
- self.query_params = self.next_page_query_obj
1614
+ if (
1615
+ hasattr(self, "next_page_query_obj")
1616
+ and self.next_page_query_obj is not None
1617
+ ):
1618
+ prep.query_params = self.next_page_query_obj
1215
1619
  if info_message:
1216
1620
  logger.info(info_message)
1217
- logger.debug("Query parameters: %s" % self.query_params)
1621
+ logger.debug("Query parameters: %s" % prep.query_params)
1622
+ logger.debug("Query kwargs: %s" % kwargs)
1218
1623
  response = requests.post(
1219
1624
  url,
1220
- json=self.query_params,
1625
+ json=prep.query_params,
1221
1626
  headers=USER_AGENT,
1222
1627
  timeout=timeout,
1628
+ verify=ssl_verify,
1223
1629
  **kwargs,
1224
1630
  )
1225
1631
  response.raise_for_status()
1226
1632
  except requests.exceptions.Timeout as exc:
1227
1633
  raise TimeOutError(exc, timeout=timeout) from exc
1228
1634
  except (requests.RequestException, URLError) as err:
1635
+ response = locals().get("response", Response())
1229
1636
  # check if error is identified as auth_error in provider conf
1230
1637
  auth_errors = getattr(self.config, "auth_error_code", [None])
1231
1638
  if not isinstance(auth_errors, list):
1232
1639
  auth_errors = [auth_errors]
1233
- if (
1234
- hasattr(err.response, "status_code")
1235
- and err.response.status_code in auth_errors
1236
- ):
1640
+ if response.status_code and response.status_code in auth_errors:
1237
1641
  raise AuthenticationError(
1238
- "HTTP Error {} returned:\n{}\nPlease check your credentials for {}".format(
1239
- err.response.status_code,
1240
- err.response.text.strip(),
1241
- self.provider,
1242
- )
1642
+ f"Please check your credentials for {self.provider}.",
1643
+ f"HTTP Error {response.status_code} returned.",
1644
+ response.text.strip(),
1243
1645
  )
1244
1646
  if exception_message:
1245
1647
  logger.exception(exception_message)
@@ -1250,9 +1652,8 @@ class PostJsonSearch(QueryStringSearch):
1250
1652
  self.provider,
1251
1653
  self.__class__.__name__,
1252
1654
  )
1253
- if "response" in locals():
1254
- logger.debug(response.content)
1255
- raise RequestError(str(err))
1655
+ logger.debug(response.content or str(err))
1656
+ raise RequestError.from_error(err, exception_message) from err
1256
1657
  return response
1257
1658
 
1258
1659
 
@@ -1268,18 +1669,31 @@ class StacSearch(PostJsonSearch):
1268
1669
  # restore results_entry overwritten by init
1269
1670
  self.config.results_entry = results_entry
1270
1671
 
1271
- def normalize_results(
1272
- self, results: List[Dict[str, Any]], **kwargs: Any
1273
- ) -> List[EOProduct]:
1274
- """Build EOProducts from provider results"""
1672
+ def build_query_string(
1673
+ self, product_type: str, **kwargs: Any
1674
+ ) -> Tuple[Dict[str, Any], str]:
1675
+ """Build The query string using the search parameters"""
1676
+ logger.debug("Building the query string that will be used for search")
1275
1677
 
1276
- products = super(StacSearch, self).normalize_results(results, **kwargs)
1678
+ # handle opened time intervals
1679
+ if any(
1680
+ k in kwargs
1681
+ for k in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
1682
+ ):
1683
+ kwargs.setdefault("startTimeFromAscendingNode", "..")
1684
+ kwargs.setdefault("completionTimeFromAscendingNode", "..")
1277
1685
 
1278
- # move assets from properties to product's attr
1279
- for product in products:
1280
- product.assets.update(product.properties.pop("assets", {}))
1686
+ query_params = format_query_params(product_type, self.config, kwargs)
1281
1687
 
1282
- return products
1688
+ # Build the final query string, in one go without quoting it
1689
+ # (some providers do not operate well with urlencoded and quoted query strings)
1690
+ def quote_via(x: Any, *_args, **_kwargs) -> str:
1691
+ return x
1692
+
1693
+ return (
1694
+ query_params,
1695
+ urlencode(query_params, doseq=True, quote_via=quote_via),
1696
+ )
1283
1697
 
1284
1698
  def discover_queryables(
1285
1699
  self, **kwargs: Any
@@ -1288,16 +1702,37 @@ class StacSearch(PostJsonSearch):
1288
1702
 
1289
1703
  :param kwargs: additional filters for queryables (`productType` and other search
1290
1704
  arguments)
1291
- :type kwargs: Any
1292
1705
  :returns: fetched queryable parameters dict
1293
- :rtype: Optional[Dict[str, Annotated[Any, FieldInfo]]]
1294
1706
  """
1707
+ if (
1708
+ not self.config.discover_queryables["fetch_url"]
1709
+ and not self.config.discover_queryables["product_type_fetch_url"]
1710
+ ):
1711
+ logger.info(f"Cannot fetch queryables with {self.provider}")
1712
+ return None
1713
+
1295
1714
  product_type = kwargs.get("productType", None)
1296
1715
  provider_product_type = (
1297
1716
  self.config.products.get(product_type, {}).get("productType", product_type)
1298
1717
  if product_type
1299
1718
  else None
1300
1719
  )
1720
+ if (
1721
+ provider_product_type
1722
+ and not self.config.discover_queryables["product_type_fetch_url"]
1723
+ ):
1724
+ logger.info(
1725
+ f"Cannot fetch queryables for a specific product type with {self.provider}"
1726
+ )
1727
+ return None
1728
+ if (
1729
+ not provider_product_type
1730
+ and not self.config.discover_queryables["fetch_url"]
1731
+ ):
1732
+ logger.info(
1733
+ f"Cannot fetch global queryables with {self.provider}. A product type must be specified"
1734
+ )
1735
+ return None
1301
1736
 
1302
1737
  try:
1303
1738
  unparsed_fetch_url = (
@@ -1309,12 +1744,20 @@ class StacSearch(PostJsonSearch):
1309
1744
  fetch_url = unparsed_fetch_url.format(
1310
1745
  provider_product_type=provider_product_type, **self.config.__dict__
1311
1746
  )
1747
+ auth = (
1748
+ self.auth
1749
+ if hasattr(self, "auth") and isinstance(self.auth, AuthBase)
1750
+ else None
1751
+ )
1312
1752
  response = QueryStringSearch._request(
1313
1753
  self,
1314
- fetch_url,
1315
- info_message="Fetching queryables: {}".format(fetch_url),
1316
- exception_message="Skipping error while fetching queryables for "
1317
- "{} {} instance:".format(self.provider, self.__class__.__name__),
1754
+ PreparedSearch(
1755
+ url=fetch_url,
1756
+ auth=auth,
1757
+ info_message="Fetching queryables: {}".format(fetch_url),
1758
+ exception_message="Skipping error while fetching queryables for "
1759
+ "{} {} instance:".format(self.provider, self.__class__.__name__),
1760
+ ),
1318
1761
  )
1319
1762
  except (RequestError, KeyError, AttributeError):
1320
1763
  return None
@@ -1348,7 +1791,7 @@ class StacSearch(PostJsonSearch):
1348
1791
  for json_param, json_mtd in json_queryables.items():
1349
1792
  param = (
1350
1793
  get_queryable_from_provider(
1351
- json_param, self.config.metadata_mapping
1794
+ json_param, self.get_metadata_mapping(product_type)
1352
1795
  )
1353
1796
  or json_param
1354
1797
  )
@@ -1362,3 +1805,18 @@ class StacSearch(PostJsonSearch):
1362
1805
  python_queryables = create_model("m", **field_definitions).model_fields
1363
1806
 
1364
1807
  return model_fields_to_annotated(python_queryables)
1808
+
1809
+
1810
+ class PostJsonSearchWithStacQueryables(StacSearch, PostJsonSearch):
1811
+ """A specialisation of a :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that
1812
+ uses generic STAC configuration for queryables.
1813
+ """
1814
+
1815
+ def __init__(self, provider: str, config: PluginConfig) -> None:
1816
+ PostJsonSearch.__init__(self, provider, config)
1817
+
1818
+ def build_query_string(
1819
+ self, product_type: str, **kwargs: Any
1820
+ ) -> Tuple[Dict[str, Any], str]:
1821
+ """Build The query string using the search parameters"""
1822
+ return PostJsonSearch.build_query_string(self, product_type, **kwargs)