eodag 2.12.1__py3-none-any.whl → 3.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +440 -321
- eodag/api/product/__init__.py +5 -1
- eodag/api/product/_assets.py +57 -2
- eodag/api/product/_product.py +89 -68
- eodag/api/product/metadata_mapping.py +181 -66
- eodag/api/search_result.py +48 -1
- eodag/cli.py +20 -6
- eodag/config.py +95 -6
- eodag/plugins/apis/base.py +8 -165
- eodag/plugins/apis/ecmwf.py +36 -24
- eodag/plugins/apis/usgs.py +40 -24
- eodag/plugins/authentication/aws_auth.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +13 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +256 -46
- eodag/plugins/authentication/qsauth.py +3 -0
- eodag/plugins/authentication/sas_auth.py +8 -1
- eodag/plugins/authentication/token.py +92 -46
- eodag/plugins/authentication/token_exchange.py +120 -0
- eodag/plugins/download/aws.py +86 -91
- eodag/plugins/download/base.py +72 -40
- eodag/plugins/download/http.py +607 -264
- eodag/plugins/download/s3rest.py +28 -15
- eodag/plugins/manager.py +74 -57
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +225 -18
- eodag/plugins/search/build_search_result.py +389 -32
- eodag/plugins/search/cop_marine.py +378 -0
- eodag/plugins/search/creodias_s3.py +15 -14
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +44 -20
- eodag/plugins/search/qssearch.py +508 -203
- eodag/plugins/search/static_stac_search.py +99 -36
- eodag/resources/constraints/climate-dt.json +13 -0
- eodag/resources/constraints/extremes-dt.json +8 -0
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1897 -34
- eodag/resources/providers.yml +3539 -3277
- eodag/resources/stac.yml +48 -54
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +51 -3
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +27 -0
- eodag/rest/core.py +757 -0
- eodag/rest/server.py +397 -258
- eodag/rest/stac.py +438 -307
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +232 -43
- eodag/rest/types/{stac_queryables.py → queryables.py} +81 -43
- eodag/rest/types/stac_search.py +277 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +65 -0
- eodag/types/__init__.py +99 -9
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +31 -0
- eodag/types/search_args.py +58 -7
- eodag/types/whoosh.py +81 -0
- eodag/utils/__init__.py +72 -9
- eodag/utils/constraints.py +37 -37
- eodag/utils/exceptions.py +23 -17
- eodag/utils/repr.py +113 -0
- eodag/utils/requests.py +138 -0
- eodag/utils/rest.py +104 -0
- eodag/utils/stac_reader.py +100 -16
- {eodag-2.12.1.dist-info → eodag-3.0.0b2.dist-info}/METADATA +65 -44
- eodag-3.0.0b2.dist-info/RECORD +110 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0b2.dist-info}/WHEEL +1 -1
- {eodag-2.12.1.dist-info → eodag-3.0.0b2.dist-info}/entry_points.txt +6 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/utils.py +0 -1133
- eodag-2.12.1.dist-info/RECORD +0 -94
- {eodag-2.12.1.dist-info → eodag-3.0.0b2.dist-info}/LICENSE +0 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0b2.dist-info}/top_level.txt +0 -0
eodag/plugins/search/qssearch.py
CHANGED
|
@@ -20,10 +20,31 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import re
|
|
22
22
|
from collections.abc import Iterable
|
|
23
|
-
from
|
|
23
|
+
from copy import copy as copy_copy
|
|
24
|
+
from typing import (
|
|
25
|
+
TYPE_CHECKING,
|
|
26
|
+
Any,
|
|
27
|
+
Callable,
|
|
28
|
+
Dict,
|
|
29
|
+
List,
|
|
30
|
+
Optional,
|
|
31
|
+
Set,
|
|
32
|
+
Tuple,
|
|
33
|
+
TypedDict,
|
|
34
|
+
cast,
|
|
35
|
+
)
|
|
24
36
|
from urllib.error import URLError
|
|
37
|
+
from urllib.parse import (
|
|
38
|
+
parse_qsl,
|
|
39
|
+
quote_plus,
|
|
40
|
+
unquote,
|
|
41
|
+
unquote_plus,
|
|
42
|
+
urlparse,
|
|
43
|
+
urlunparse,
|
|
44
|
+
)
|
|
25
45
|
from urllib.request import Request, urlopen
|
|
26
46
|
|
|
47
|
+
import geojson
|
|
27
48
|
import orjson
|
|
28
49
|
import requests
|
|
29
50
|
import yaml
|
|
@@ -32,6 +53,7 @@ from pydantic import create_model
|
|
|
32
53
|
from pydantic.fields import FieldInfo
|
|
33
54
|
from requests import Response
|
|
34
55
|
from requests.adapters import HTTPAdapter
|
|
56
|
+
from requests.auth import AuthBase
|
|
35
57
|
|
|
36
58
|
from eodag.api.product import EOProduct
|
|
37
59
|
from eodag.api.product.metadata_mapping import (
|
|
@@ -42,11 +64,13 @@ from eodag.api.product.metadata_mapping import (
|
|
|
42
64
|
properties_from_json,
|
|
43
65
|
properties_from_xml,
|
|
44
66
|
)
|
|
67
|
+
from eodag.api.search_result import RawSearchResult
|
|
68
|
+
from eodag.plugins.search import PreparedSearch
|
|
45
69
|
from eodag.plugins.search.base import Search
|
|
46
70
|
from eodag.types import json_field_definition_to_python, model_fields_to_annotated
|
|
71
|
+
from eodag.types.queryables import CommonQueryables
|
|
72
|
+
from eodag.types.search_args import SortByList
|
|
47
73
|
from eodag.utils import (
|
|
48
|
-
DEFAULT_ITEMS_PER_PAGE,
|
|
49
|
-
DEFAULT_PAGE,
|
|
50
74
|
GENERIC_PRODUCT_TYPE,
|
|
51
75
|
HTTP_REQ_TIMEOUT,
|
|
52
76
|
USER_AGENT,
|
|
@@ -56,16 +80,22 @@ from eodag.utils import (
|
|
|
56
80
|
dict_items_recursive_apply,
|
|
57
81
|
format_dict_items,
|
|
58
82
|
get_args,
|
|
83
|
+
get_ssl_context,
|
|
59
84
|
quote,
|
|
60
85
|
string_to_jsonpath,
|
|
61
86
|
update_nested_dict,
|
|
62
87
|
urlencode,
|
|
63
88
|
)
|
|
89
|
+
from eodag.utils.constraints import (
|
|
90
|
+
fetch_constraints,
|
|
91
|
+
get_constraint_queryables_with_additional_params,
|
|
92
|
+
)
|
|
64
93
|
from eodag.utils.exceptions import (
|
|
65
94
|
AuthenticationError,
|
|
66
95
|
MisconfiguredError,
|
|
67
96
|
RequestError,
|
|
68
97
|
TimeOutError,
|
|
98
|
+
ValidationError,
|
|
69
99
|
)
|
|
70
100
|
|
|
71
101
|
if TYPE_CHECKING:
|
|
@@ -172,7 +202,6 @@ class QueryStringSearch(Search):
|
|
|
172
202
|
:type config: str
|
|
173
203
|
"""
|
|
174
204
|
|
|
175
|
-
DEFAULT_ITEMS_PER_PAGE = 10
|
|
176
205
|
extract_properties = {"xml": properties_from_xml, "json": properties_from_json}
|
|
177
206
|
|
|
178
207
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
@@ -251,6 +280,17 @@ class QueryStringSearch(Search):
|
|
|
251
280
|
"generic_product_type_parsable_metadata"
|
|
252
281
|
]
|
|
253
282
|
)
|
|
283
|
+
if (
|
|
284
|
+
"single_product_type_parsable_metadata"
|
|
285
|
+
in self.config.discover_product_types
|
|
286
|
+
):
|
|
287
|
+
self.config.discover_product_types[
|
|
288
|
+
"single_product_type_parsable_metadata"
|
|
289
|
+
] = mtd_cfg_as_conversion_and_querypath(
|
|
290
|
+
self.config.discover_product_types[
|
|
291
|
+
"single_product_type_parsable_metadata"
|
|
292
|
+
]
|
|
293
|
+
)
|
|
254
294
|
|
|
255
295
|
# parse jsonpath on init: queryables discovery
|
|
256
296
|
if (
|
|
@@ -316,35 +356,59 @@ class QueryStringSearch(Search):
|
|
|
316
356
|
self.next_page_query_obj = None
|
|
317
357
|
self.next_page_merge = None
|
|
318
358
|
|
|
319
|
-
def discover_product_types(self) -> Optional[Dict[str, Any]]:
|
|
359
|
+
def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
320
360
|
"""Fetch product types list from provider using `discover_product_types` conf
|
|
321
361
|
|
|
322
362
|
:returns: configuration dict containing fetched product types information
|
|
323
363
|
:rtype: (optional) dict
|
|
324
364
|
"""
|
|
325
365
|
try:
|
|
326
|
-
|
|
366
|
+
prep = PreparedSearch()
|
|
367
|
+
|
|
368
|
+
prep.url = cast(
|
|
327
369
|
str,
|
|
328
370
|
self.config.discover_product_types["fetch_url"].format(
|
|
329
371
|
**self.config.__dict__
|
|
330
372
|
),
|
|
331
373
|
)
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
374
|
+
|
|
375
|
+
# get auth if available
|
|
376
|
+
if "auth" in kwargs:
|
|
377
|
+
prep.auth = kwargs.pop("auth")
|
|
378
|
+
|
|
379
|
+
# try updating fetch_url qs using productType
|
|
380
|
+
fetch_qs_dict = {}
|
|
381
|
+
if "single_collection_fetch_qs" in self.config.discover_product_types:
|
|
382
|
+
try:
|
|
383
|
+
fetch_qs = self.config.discover_product_types[
|
|
384
|
+
"single_collection_fetch_qs"
|
|
385
|
+
].format(**kwargs)
|
|
386
|
+
fetch_qs_dict = dict(parse_qsl(fetch_qs))
|
|
387
|
+
except KeyError:
|
|
388
|
+
pass
|
|
389
|
+
if fetch_qs_dict:
|
|
390
|
+
url_parse = urlparse(prep.url)
|
|
391
|
+
query = url_parse.query
|
|
392
|
+
url_dict = dict(parse_qsl(query))
|
|
393
|
+
url_dict.update(fetch_qs_dict)
|
|
394
|
+
url_new_query = urlencode(url_dict)
|
|
395
|
+
url_parse = url_parse._replace(query=url_new_query)
|
|
396
|
+
prep.url = urlunparse(url_parse)
|
|
397
|
+
|
|
398
|
+
prep.info_message = "Fetching product types: {}".format(prep.url)
|
|
399
|
+
prep.exception_message = (
|
|
400
|
+
"Skipping error while fetching product types for " "{} {} instance:"
|
|
401
|
+
).format(self.provider, self.__class__.__name__)
|
|
402
|
+
|
|
403
|
+
response = QueryStringSearch._request(self, prep)
|
|
339
404
|
except (RequestError, KeyError, AttributeError):
|
|
340
405
|
return None
|
|
341
406
|
else:
|
|
342
407
|
try:
|
|
343
|
-
conf_update_dict = {
|
|
408
|
+
conf_update_dict: Dict[str, Any] = {
|
|
344
409
|
"providers_config": {},
|
|
345
410
|
"product_types_config": {},
|
|
346
411
|
}
|
|
347
|
-
|
|
348
412
|
if self.config.discover_product_types["result_type"] == "json":
|
|
349
413
|
resp_as_json = response.json()
|
|
350
414
|
# extract results from response json
|
|
@@ -354,6 +418,8 @@ class QueryStringSearch(Search):
|
|
|
354
418
|
"results_entry"
|
|
355
419
|
].find(resp_as_json)
|
|
356
420
|
]
|
|
421
|
+
if result and isinstance(result[0], list):
|
|
422
|
+
result = result[0]
|
|
357
423
|
|
|
358
424
|
for product_type_result in result:
|
|
359
425
|
# providers_config extraction
|
|
@@ -391,6 +457,17 @@ class QueryStringSearch(Search):
|
|
|
391
457
|
],
|
|
392
458
|
)
|
|
393
459
|
|
|
460
|
+
if (
|
|
461
|
+
"single_product_type_parsable_metadata"
|
|
462
|
+
in self.config.discover_product_types
|
|
463
|
+
):
|
|
464
|
+
collection_data = self._get_product_type_metadata_from_single_collection_endpoint(
|
|
465
|
+
generic_product_type_id
|
|
466
|
+
)
|
|
467
|
+
conf_update_dict["product_types_config"][
|
|
468
|
+
generic_product_type_id
|
|
469
|
+
].update(collection_data)
|
|
470
|
+
|
|
394
471
|
# update keywords
|
|
395
472
|
keywords_fields = [
|
|
396
473
|
"instrument",
|
|
@@ -444,32 +521,156 @@ class QueryStringSearch(Search):
|
|
|
444
521
|
)
|
|
445
522
|
return conf_update_dict
|
|
446
523
|
|
|
524
|
+
def _get_product_type_metadata_from_single_collection_endpoint(
|
|
525
|
+
self, product_type: str
|
|
526
|
+
) -> Dict[str, Any]:
|
|
527
|
+
"""
|
|
528
|
+
retrieves additional product type information from an endpoint returning data for a single collection
|
|
529
|
+
:param product_type: product type
|
|
530
|
+
:type product_type: str
|
|
531
|
+
:return: product types and their metadata
|
|
532
|
+
:rtype: Dict[str, Any]
|
|
533
|
+
"""
|
|
534
|
+
single_collection_url = self.config.discover_product_types[
|
|
535
|
+
"single_collection_fetch_url"
|
|
536
|
+
].format(productType=product_type)
|
|
537
|
+
resp = QueryStringSearch._request(
|
|
538
|
+
self,
|
|
539
|
+
PreparedSearch(
|
|
540
|
+
url=single_collection_url,
|
|
541
|
+
info_message="Fetching data for product type product type: {}".format(
|
|
542
|
+
product_type
|
|
543
|
+
),
|
|
544
|
+
exception_message="Skipping error while fetching product types for "
|
|
545
|
+
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
546
|
+
),
|
|
547
|
+
)
|
|
548
|
+
product_data = resp.json()
|
|
549
|
+
return properties_from_json(
|
|
550
|
+
product_data,
|
|
551
|
+
self.config.discover_product_types["single_product_type_parsable_metadata"],
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
def discover_queryables(
|
|
555
|
+
self, **kwargs: Any
|
|
556
|
+
) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
|
|
557
|
+
"""Fetch queryables list from provider using its constraints file
|
|
558
|
+
|
|
559
|
+
:param kwargs: additional filters for queryables (`productType` and other search
|
|
560
|
+
arguments)
|
|
561
|
+
:type kwargs: Any
|
|
562
|
+
:returns: fetched queryable parameters dict
|
|
563
|
+
:rtype: Optional[Dict[str, Annotated[Any, FieldInfo]]]
|
|
564
|
+
"""
|
|
565
|
+
product_type = kwargs.pop("productType", None)
|
|
566
|
+
if not product_type:
|
|
567
|
+
return {}
|
|
568
|
+
constraints_file_url = getattr(self.config, "constraints_file_url", "")
|
|
569
|
+
if not constraints_file_url:
|
|
570
|
+
return {}
|
|
571
|
+
|
|
572
|
+
constraints_file_dataset_key = getattr(
|
|
573
|
+
self.config, "constraints_file_dataset_key", "dataset"
|
|
574
|
+
)
|
|
575
|
+
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
576
|
+
constraints_file_dataset_key, None
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
# defaults
|
|
580
|
+
default_queryables = self._get_defaults_as_queryables(product_type)
|
|
581
|
+
# remove unwanted queryables
|
|
582
|
+
for param in getattr(self.config, "remove_from_queryables", []):
|
|
583
|
+
default_queryables.pop(param, None)
|
|
584
|
+
|
|
585
|
+
non_empty_kwargs = {k: v for k, v in kwargs.items() if v}
|
|
586
|
+
|
|
587
|
+
if "{" in constraints_file_url:
|
|
588
|
+
constraints_file_url = constraints_file_url.format(
|
|
589
|
+
dataset=provider_product_type
|
|
590
|
+
)
|
|
591
|
+
constraints = fetch_constraints(constraints_file_url, self)
|
|
592
|
+
if not constraints:
|
|
593
|
+
return default_queryables
|
|
594
|
+
|
|
595
|
+
constraint_params: Dict[str, Dict[str, Set[Any]]] = {}
|
|
596
|
+
if len(kwargs) == 0:
|
|
597
|
+
# get values from constraints without additional filters
|
|
598
|
+
for constraint in constraints:
|
|
599
|
+
for key in constraint.keys():
|
|
600
|
+
if key in constraint_params:
|
|
601
|
+
constraint_params[key]["enum"].update(constraint[key])
|
|
602
|
+
else:
|
|
603
|
+
constraint_params[key] = {"enum": set(constraint[key])}
|
|
604
|
+
else:
|
|
605
|
+
# get values from constraints with additional filters
|
|
606
|
+
constraints_input_params = {k: v for k, v in non_empty_kwargs.items()}
|
|
607
|
+
constraint_params = get_constraint_queryables_with_additional_params(
|
|
608
|
+
constraints, constraints_input_params, self, product_type
|
|
609
|
+
)
|
|
610
|
+
# query params that are not in constraints but might be default queryables
|
|
611
|
+
if len(constraint_params) == 1 and "not_available" in constraint_params:
|
|
612
|
+
not_queryables = set()
|
|
613
|
+
for constraint_param in constraint_params["not_available"]["enum"]:
|
|
614
|
+
param = CommonQueryables.get_queryable_from_alias(constraint_param)
|
|
615
|
+
if param in dict(
|
|
616
|
+
CommonQueryables.model_fields, **default_queryables
|
|
617
|
+
):
|
|
618
|
+
non_empty_kwargs.pop(constraint_param)
|
|
619
|
+
else:
|
|
620
|
+
not_queryables.add(constraint_param)
|
|
621
|
+
if not_queryables:
|
|
622
|
+
raise ValidationError(
|
|
623
|
+
f"parameter(s) {str(not_queryables)} not queryable"
|
|
624
|
+
)
|
|
625
|
+
else:
|
|
626
|
+
# get constraints again without common queryables
|
|
627
|
+
constraint_params = (
|
|
628
|
+
get_constraint_queryables_with_additional_params(
|
|
629
|
+
constraints, non_empty_kwargs, self, product_type
|
|
630
|
+
)
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
field_definitions = dict()
|
|
634
|
+
for json_param, json_mtd in constraint_params.items():
|
|
635
|
+
param = (
|
|
636
|
+
get_queryable_from_provider(
|
|
637
|
+
json_param, self.get_metadata_mapping(product_type)
|
|
638
|
+
)
|
|
639
|
+
or json_param
|
|
640
|
+
)
|
|
641
|
+
default = kwargs.get(param, None) or self.config.products.get(
|
|
642
|
+
product_type, {}
|
|
643
|
+
).get(param, None)
|
|
644
|
+
annotated_def = json_field_definition_to_python(
|
|
645
|
+
json_mtd, default_value=default, required=True
|
|
646
|
+
)
|
|
647
|
+
field_definitions[param] = get_args(annotated_def)
|
|
648
|
+
|
|
649
|
+
python_queryables = create_model("m", **field_definitions).model_fields
|
|
650
|
+
return dict(default_queryables, **model_fields_to_annotated(python_queryables))
|
|
651
|
+
|
|
447
652
|
def query(
|
|
448
653
|
self,
|
|
449
|
-
|
|
450
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
451
|
-
page: int = DEFAULT_PAGE,
|
|
452
|
-
count: bool = True,
|
|
654
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
453
655
|
**kwargs: Any,
|
|
454
656
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
455
657
|
"""Perform a search on an OpenSearch-like interface
|
|
456
658
|
|
|
457
|
-
:param
|
|
458
|
-
|
|
459
|
-
:type items_per_page: int
|
|
460
|
-
:param page: (optional) The page number to return
|
|
461
|
-
:type page: int
|
|
462
|
-
:param count: (optional) To trigger a count request
|
|
463
|
-
:type count: bool
|
|
659
|
+
:param prep: Object collecting needed information for search.
|
|
660
|
+
:type prep: :class:`~eodag.plugins.search.PreparedSearch`
|
|
464
661
|
"""
|
|
465
|
-
|
|
662
|
+
count = prep.count
|
|
663
|
+
product_type = kwargs.get("productType", prep.product_type)
|
|
466
664
|
if product_type == GENERIC_PRODUCT_TYPE:
|
|
467
665
|
logger.warning(
|
|
468
666
|
"GENERIC_PRODUCT_TYPE is not a real product_type and should only be used internally as a template"
|
|
469
667
|
)
|
|
470
|
-
return [], 0
|
|
471
|
-
|
|
472
|
-
|
|
668
|
+
return ([], 0) if prep.count else ([], None)
|
|
669
|
+
|
|
670
|
+
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
671
|
+
prep.sort_by_qs, _ = (
|
|
672
|
+
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
|
|
673
|
+
)
|
|
473
674
|
|
|
474
675
|
provider_product_type = self.map_product_type(product_type)
|
|
475
676
|
keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
|
|
@@ -480,48 +681,56 @@ class QueryStringSearch(Search):
|
|
|
480
681
|
)
|
|
481
682
|
|
|
482
683
|
# provider product type specific conf
|
|
483
|
-
|
|
684
|
+
prep.product_type_def_params = (
|
|
484
685
|
self.get_product_type_def_params(product_type, **kwargs)
|
|
485
686
|
if product_type is not None
|
|
486
687
|
else {}
|
|
487
688
|
)
|
|
488
689
|
|
|
489
690
|
# if product_type_def_params is set, remove product_type as it may conflict with this conf
|
|
490
|
-
if
|
|
691
|
+
if prep.product_type_def_params:
|
|
491
692
|
keywords.pop("productType", None)
|
|
492
693
|
|
|
493
694
|
if self.config.metadata_mapping:
|
|
494
695
|
product_type_metadata_mapping = dict(
|
|
495
696
|
self.config.metadata_mapping,
|
|
496
|
-
**
|
|
697
|
+
**prep.product_type_def_params.get("metadata_mapping", {}),
|
|
497
698
|
)
|
|
498
699
|
keywords.update(
|
|
499
700
|
{
|
|
500
701
|
k: v
|
|
501
|
-
for k, v in
|
|
702
|
+
for k, v in prep.product_type_def_params.items()
|
|
502
703
|
if k not in keywords.keys()
|
|
503
704
|
and k in product_type_metadata_mapping.keys()
|
|
504
705
|
and isinstance(product_type_metadata_mapping[k], list)
|
|
505
706
|
}
|
|
506
707
|
)
|
|
507
708
|
|
|
709
|
+
if product_type is None:
|
|
710
|
+
raise ValidationError("Required productType is missing")
|
|
711
|
+
|
|
508
712
|
qp, qs = self.build_query_string(product_type, **keywords)
|
|
509
713
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
714
|
+
prep.query_params = qp
|
|
715
|
+
prep.query_string = qs
|
|
716
|
+
prep.search_urls, total_items = self.collect_search_urls(
|
|
717
|
+
prep,
|
|
718
|
+
**kwargs,
|
|
514
719
|
)
|
|
515
|
-
if not count and hasattr(
|
|
720
|
+
if not count and hasattr(prep, "total_items_nb"):
|
|
516
721
|
# do not try to extract total_items from search results if count is False
|
|
517
|
-
del
|
|
518
|
-
del
|
|
519
|
-
|
|
520
|
-
provider_results = self.do_search(
|
|
521
|
-
if count and total_items is None and hasattr(
|
|
522
|
-
total_items =
|
|
523
|
-
|
|
524
|
-
|
|
722
|
+
del prep.total_items_nb
|
|
723
|
+
del prep.need_count
|
|
724
|
+
|
|
725
|
+
provider_results = self.do_search(prep, **kwargs)
|
|
726
|
+
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
727
|
+
total_items = prep.total_items_nb
|
|
728
|
+
|
|
729
|
+
raw_search_result = RawSearchResult(provider_results)
|
|
730
|
+
raw_search_result.query_params = prep.query_params
|
|
731
|
+
raw_search_result.product_type_def_params = prep.product_type_def_params
|
|
732
|
+
|
|
733
|
+
eo_products = self.normalize_results(raw_search_result, **kwargs)
|
|
525
734
|
return eo_products, total_items
|
|
526
735
|
|
|
527
736
|
@_deprecated(
|
|
@@ -538,11 +747,11 @@ class QueryStringSearch(Search):
|
|
|
538
747
|
) -> Tuple[Dict[str, Any], str]:
|
|
539
748
|
"""Build The query string using the search parameters"""
|
|
540
749
|
logger.debug("Building the query string that will be used for search")
|
|
541
|
-
query_params = format_query_params(product_type, self.config,
|
|
750
|
+
query_params = format_query_params(product_type, self.config, kwargs)
|
|
542
751
|
|
|
543
752
|
# Build the final query string, in one go without quoting it
|
|
544
753
|
# (some providers do not operate well with urlencoded and quoted query strings)
|
|
545
|
-
quote_via: Callable[[Any], str] = lambda x, *_args, **_kwargs: x
|
|
754
|
+
quote_via: Callable[[Any, str, str, str], str] = lambda x, *_args, **_kwargs: x
|
|
546
755
|
return (
|
|
547
756
|
query_params,
|
|
548
757
|
urlencode(query_params, doseq=True, quote_via=quote_via),
|
|
@@ -550,22 +759,31 @@ class QueryStringSearch(Search):
|
|
|
550
759
|
|
|
551
760
|
def collect_search_urls(
|
|
552
761
|
self,
|
|
553
|
-
|
|
554
|
-
items_per_page: Optional[int] = None,
|
|
555
|
-
count: bool = True,
|
|
762
|
+
prep: PreparedSearch = PreparedSearch(page=None, items_per_page=None),
|
|
556
763
|
**kwargs: Any,
|
|
557
764
|
) -> Tuple[List[str], Optional[int]]:
|
|
558
765
|
"""Build paginated urls"""
|
|
766
|
+
page = prep.page
|
|
767
|
+
items_per_page = prep.items_per_page
|
|
768
|
+
count = prep.count
|
|
769
|
+
|
|
559
770
|
urls = []
|
|
560
771
|
total_results = 0 if count else None
|
|
561
772
|
|
|
773
|
+
# use only sort_by parameters for search, not for count
|
|
774
|
+
# and remove potential leading '&'
|
|
775
|
+
qs_with_sort = (prep.query_string + getattr(prep, "sort_by_qs", "")).strip("&")
|
|
776
|
+
# append count template if needed
|
|
777
|
+
if count:
|
|
778
|
+
qs_with_sort += self.config.pagination.get("count_tpl", "")
|
|
779
|
+
|
|
562
780
|
if "count_endpoint" not in self.config.pagination:
|
|
563
781
|
# if count_endpoint is not set, total_results should be extracted from search result
|
|
564
782
|
total_results = None
|
|
565
|
-
|
|
566
|
-
|
|
783
|
+
prep.need_count = True
|
|
784
|
+
prep.total_items_nb = None
|
|
567
785
|
|
|
568
|
-
for collection in self.get_collections(**kwargs):
|
|
786
|
+
for collection in self.get_collections(prep, **kwargs):
|
|
569
787
|
# skip empty collection if one is required in api_endpoint
|
|
570
788
|
if "{collection}" in self.config.api_endpoint and not collection:
|
|
571
789
|
continue
|
|
@@ -573,12 +791,13 @@ class QueryStringSearch(Search):
|
|
|
573
791
|
collection=collection
|
|
574
792
|
)
|
|
575
793
|
if page is not None and items_per_page is not None:
|
|
794
|
+
page = page - 1 + self.config.pagination.get("start_page", 1)
|
|
576
795
|
if count:
|
|
577
796
|
count_endpoint = self.config.pagination.get(
|
|
578
797
|
"count_endpoint", ""
|
|
579
798
|
).format(collection=collection)
|
|
580
799
|
if count_endpoint:
|
|
581
|
-
count_url = "{}?{}".format(count_endpoint,
|
|
800
|
+
count_url = "{}?{}".format(count_endpoint, prep.query_string)
|
|
582
801
|
_total_results = (
|
|
583
802
|
self.count_hits(
|
|
584
803
|
count_url, result_type=self.config.result_type
|
|
@@ -594,30 +813,31 @@ class QueryStringSearch(Search):
|
|
|
594
813
|
total_results += _total_results or 0
|
|
595
814
|
next_url = self.config.pagination["next_page_url_tpl"].format(
|
|
596
815
|
url=search_endpoint,
|
|
597
|
-
search=
|
|
816
|
+
search=qs_with_sort,
|
|
598
817
|
items_per_page=items_per_page,
|
|
599
818
|
page=page,
|
|
600
819
|
skip=(page - 1) * items_per_page,
|
|
601
820
|
skip_base_1=(page - 1) * items_per_page + 1,
|
|
602
821
|
)
|
|
603
822
|
else:
|
|
604
|
-
next_url = "{}?{}".format(search_endpoint,
|
|
823
|
+
next_url = "{}?{}".format(search_endpoint, qs_with_sort)
|
|
605
824
|
urls.append(next_url)
|
|
606
825
|
return urls, total_results
|
|
607
826
|
|
|
608
827
|
def do_search(
|
|
609
|
-
self,
|
|
828
|
+
self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
|
|
610
829
|
) -> List[Any]:
|
|
611
830
|
"""Perform the actual search request.
|
|
612
831
|
|
|
613
832
|
If there is a specified number of items per page, return the results as soon
|
|
614
833
|
as this number is reached
|
|
615
834
|
|
|
616
|
-
:param
|
|
617
|
-
:type
|
|
835
|
+
:param prep: Object collecting needed information for search.
|
|
836
|
+
:type prep: :class:`~eodag.plugins.search.PreparedSearch`
|
|
618
837
|
"""
|
|
838
|
+
items_per_page = prep.items_per_page
|
|
619
839
|
total_items_nb = 0
|
|
620
|
-
if getattr(
|
|
840
|
+
if getattr(prep, "need_count", False):
|
|
621
841
|
# extract total_items_nb from search results
|
|
622
842
|
if self.config.result_type == "json":
|
|
623
843
|
total_items_nb_key_path_parsed = self.config.pagination[
|
|
@@ -625,13 +845,17 @@ class QueryStringSearch(Search):
|
|
|
625
845
|
]
|
|
626
846
|
|
|
627
847
|
results: List[Any] = []
|
|
628
|
-
for search_url in
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
"instance:".format(self.provider, self.__class__.__name__),
|
|
848
|
+
for search_url in prep.search_urls:
|
|
849
|
+
single_search_prep = copy_copy(prep)
|
|
850
|
+
single_search_prep.url = search_url
|
|
851
|
+
single_search_prep.info_message = "Sending search request: {}".format(
|
|
852
|
+
search_url
|
|
634
853
|
)
|
|
854
|
+
single_search_prep.exception_message = (
|
|
855
|
+
"Skipping error while searching for {} {} "
|
|
856
|
+
"instance:".format(self.provider, self.__class__.__name__)
|
|
857
|
+
)
|
|
858
|
+
response = self._request(single_search_prep)
|
|
635
859
|
next_page_url_key_path = self.config.pagination.get(
|
|
636
860
|
"next_page_url_key_path", None
|
|
637
861
|
)
|
|
@@ -658,7 +882,7 @@ class QueryStringSearch(Search):
|
|
|
658
882
|
"Setting the next page url from an XML response has not "
|
|
659
883
|
"been implemented yet"
|
|
660
884
|
)
|
|
661
|
-
if getattr(
|
|
885
|
+
if getattr(prep, "need_count", False):
|
|
662
886
|
# extract total_items_nb from search results
|
|
663
887
|
try:
|
|
664
888
|
total_nb_results_xpath = root_node.xpath(
|
|
@@ -724,7 +948,7 @@ class QueryStringSearch(Search):
|
|
|
724
948
|
if not isinstance(result, list):
|
|
725
949
|
result = [result]
|
|
726
950
|
|
|
727
|
-
if getattr(
|
|
951
|
+
if getattr(prep, "need_count", False):
|
|
728
952
|
# extract total_items_nb from search results
|
|
729
953
|
try:
|
|
730
954
|
_total_items_nb = total_items_nb_key_path_parsed.find(
|
|
@@ -746,15 +970,22 @@ class QueryStringSearch(Search):
|
|
|
746
970
|
)
|
|
747
971
|
else:
|
|
748
972
|
results.extend(result)
|
|
749
|
-
if getattr(
|
|
750
|
-
|
|
751
|
-
del
|
|
973
|
+
if getattr(prep, "need_count", False):
|
|
974
|
+
prep.total_items_nb = total_items_nb
|
|
975
|
+
del prep.need_count
|
|
976
|
+
# remove prep.total_items_nb if value could not be extracted from response
|
|
977
|
+
if (
|
|
978
|
+
hasattr(prep, "total_items_nb")
|
|
979
|
+
and not prep.total_items_nb
|
|
980
|
+
and len(results) > 0
|
|
981
|
+
):
|
|
982
|
+
del prep.total_items_nb
|
|
752
983
|
if items_per_page is not None and len(results) == items_per_page:
|
|
753
984
|
return results
|
|
754
985
|
return results
|
|
755
986
|
|
|
756
987
|
def normalize_results(
|
|
757
|
-
self, results:
|
|
988
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
758
989
|
) -> List[EOProduct]:
|
|
759
990
|
"""Build EOProducts from provider results"""
|
|
760
991
|
normalize_remaining_count = len(results)
|
|
@@ -777,6 +1008,8 @@ class QueryStringSearch(Search):
|
|
|
777
1008
|
product.properties = dict(
|
|
778
1009
|
getattr(self.config, "product_type_config", {}), **product.properties
|
|
779
1010
|
)
|
|
1011
|
+
# move assets from properties to product's attr
|
|
1012
|
+
product.assets.update(product.properties.pop("assets", {}))
|
|
780
1013
|
products.append(product)
|
|
781
1014
|
return products
|
|
782
1015
|
|
|
@@ -785,10 +1018,12 @@ class QueryStringSearch(Search):
|
|
|
785
1018
|
# Handle a very annoying special case :'(
|
|
786
1019
|
url = count_url.replace("$format=json&", "")
|
|
787
1020
|
response = self._request(
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
1021
|
+
PreparedSearch(
|
|
1022
|
+
url=url,
|
|
1023
|
+
info_message="Sending count request: {}".format(url),
|
|
1024
|
+
exception_message="Skipping error while counting results for {} {} "
|
|
1025
|
+
"instance:".format(self.provider, self.__class__.__name__),
|
|
1026
|
+
)
|
|
792
1027
|
)
|
|
793
1028
|
if result_type == "xml":
|
|
794
1029
|
root_node = etree.fromstring(response.content)
|
|
@@ -806,13 +1041,18 @@ class QueryStringSearch(Search):
|
|
|
806
1041
|
total_results = int(count_results)
|
|
807
1042
|
return total_results
|
|
808
1043
|
|
|
809
|
-
def get_collections(
|
|
1044
|
+
def get_collections(
|
|
1045
|
+
self, prep: PreparedSearch, **kwargs: Any
|
|
1046
|
+
) -> Tuple[Set[Dict[str, Any]], ...]:
|
|
810
1047
|
"""Get the collection to which the product belongs"""
|
|
811
1048
|
# See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
|
|
812
1049
|
# /asset_publisher/Ac0d/content/change-of
|
|
813
1050
|
# -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
|
|
814
1051
|
product_type: Optional[str] = kwargs.get("productType")
|
|
815
|
-
if product_type is None and
|
|
1052
|
+
if product_type is None and (
|
|
1053
|
+
not hasattr(prep, "product_type_def_params")
|
|
1054
|
+
or not prep.product_type_def_params
|
|
1055
|
+
):
|
|
816
1056
|
collections: Set[Dict[str, Any]] = set()
|
|
817
1057
|
collection: Optional[str] = getattr(self.config, "collection", None)
|
|
818
1058
|
if collection is None:
|
|
@@ -835,28 +1075,33 @@ class QueryStringSearch(Search):
|
|
|
835
1075
|
collection: Optional[str] = getattr(self.config, "collection", None)
|
|
836
1076
|
if collection is None:
|
|
837
1077
|
collection = (
|
|
838
|
-
|
|
1078
|
+
prep.product_type_def_params.get("collection", None) or product_type
|
|
839
1079
|
)
|
|
840
1080
|
return (collection,) if not isinstance(collection, list) else tuple(collection)
|
|
841
1081
|
|
|
842
1082
|
def _request(
|
|
843
1083
|
self,
|
|
844
|
-
|
|
845
|
-
info_message: Optional[str] = None,
|
|
846
|
-
exception_message: Optional[str] = None,
|
|
1084
|
+
prep: PreparedSearch,
|
|
847
1085
|
) -> Response:
|
|
1086
|
+
url = prep.url
|
|
1087
|
+
info_message = prep.info_message
|
|
1088
|
+
exception_message = prep.exception_message
|
|
848
1089
|
try:
|
|
849
1090
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1091
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1092
|
+
|
|
1093
|
+
ssl_ctx = get_ssl_context(ssl_verify)
|
|
850
1094
|
# auth if needed
|
|
851
1095
|
kwargs: Dict[str, Any] = {}
|
|
852
1096
|
if (
|
|
853
1097
|
getattr(self.config, "need_auth", False)
|
|
854
|
-
and hasattr(
|
|
855
|
-
and callable(
|
|
1098
|
+
and hasattr(prep, "auth")
|
|
1099
|
+
and callable(prep.auth)
|
|
856
1100
|
):
|
|
857
|
-
kwargs["auth"] =
|
|
1101
|
+
kwargs["auth"] = prep.auth
|
|
858
1102
|
# requests auto quote url params, without any option to prevent it
|
|
859
1103
|
# use urllib instead of requests if req must be sent unquoted
|
|
1104
|
+
|
|
860
1105
|
if hasattr(self.config, "dont_quote"):
|
|
861
1106
|
# keep unquoted desired params
|
|
862
1107
|
base_url, params = url.split("?") if "?" in url else (url, "")
|
|
@@ -868,21 +1113,27 @@ class QueryStringSearch(Search):
|
|
|
868
1113
|
req = requests.Request(
|
|
869
1114
|
method="GET", url=base_url, headers=USER_AGENT, **kwargs
|
|
870
1115
|
)
|
|
871
|
-
|
|
872
|
-
|
|
1116
|
+
req_prep = req.prepare()
|
|
1117
|
+
req_prep.url = base_url + "?" + qry
|
|
873
1118
|
# send urllib req
|
|
874
1119
|
if info_message:
|
|
875
|
-
logger.info(info_message.replace(url,
|
|
876
|
-
urllib_req = Request(
|
|
877
|
-
urllib_response = urlopen(urllib_req, timeout=timeout)
|
|
1120
|
+
logger.info(info_message.replace(url, req_prep.url))
|
|
1121
|
+
urllib_req = Request(req_prep.url, headers=USER_AGENT)
|
|
1122
|
+
urllib_response = urlopen(urllib_req, timeout=timeout, context=ssl_ctx)
|
|
878
1123
|
# build Response
|
|
879
1124
|
adapter = HTTPAdapter()
|
|
880
|
-
response = cast(
|
|
1125
|
+
response = cast(
|
|
1126
|
+
Response, adapter.build_response(req_prep, urllib_response)
|
|
1127
|
+
)
|
|
881
1128
|
else:
|
|
882
1129
|
if info_message:
|
|
883
1130
|
logger.info(info_message)
|
|
884
1131
|
response = requests.get(
|
|
885
|
-
url,
|
|
1132
|
+
url,
|
|
1133
|
+
timeout=timeout,
|
|
1134
|
+
headers=USER_AGENT,
|
|
1135
|
+
verify=ssl_verify,
|
|
1136
|
+
**kwargs,
|
|
886
1137
|
)
|
|
887
1138
|
response.raise_for_status()
|
|
888
1139
|
except requests.exceptions.Timeout as exc:
|
|
@@ -903,34 +1154,6 @@ class QueryStringSearch(Search):
|
|
|
903
1154
|
return response
|
|
904
1155
|
|
|
905
1156
|
|
|
906
|
-
class AwsSearch(QueryStringSearch):
|
|
907
|
-
"""A specialisation of RestoSearch that modifies the way the EOProducts are built
|
|
908
|
-
from the search results"""
|
|
909
|
-
|
|
910
|
-
def normalize_results(
|
|
911
|
-
self, results: List[Dict[str, Any]], **kwargs: Any
|
|
912
|
-
) -> List[EOProduct]:
|
|
913
|
-
"""Transform metadata from provider representation to eodag representation"""
|
|
914
|
-
normalized: List[EOProduct] = []
|
|
915
|
-
logger.debug("Adapting plugin results to eodag product representation")
|
|
916
|
-
for result in results:
|
|
917
|
-
ref = result["properties"]["title"].split("_")[5]
|
|
918
|
-
year = result["properties"]["completionDate"][0:4]
|
|
919
|
-
month = str(int(result["properties"]["completionDate"][5:7]))
|
|
920
|
-
day = str(int(result["properties"]["completionDate"][8:10]))
|
|
921
|
-
|
|
922
|
-
properties = QueryStringSearch.extract_properties[self.config.result_type](
|
|
923
|
-
result, self.get_metadata_mapping(kwargs.get("productType"))
|
|
924
|
-
)
|
|
925
|
-
|
|
926
|
-
properties["downloadLink"] = (
|
|
927
|
-
"s3://tiles/{ref[1]}{ref[2]}/{ref[3]}/{ref[4]}{ref[5]}/{year}/"
|
|
928
|
-
"{month}/{day}/0/"
|
|
929
|
-
).format(**locals())
|
|
930
|
-
normalized.append(EOProduct(self.provider, properties, **kwargs))
|
|
931
|
-
return normalized
|
|
932
|
-
|
|
933
|
-
|
|
934
1157
|
class ODataV4Search(QueryStringSearch):
|
|
935
1158
|
"""A specialisation of a QueryStringSearch that does a two step search to retrieve
|
|
936
1159
|
all products metadata"""
|
|
@@ -948,18 +1171,24 @@ class ODataV4Search(QueryStringSearch):
|
|
|
948
1171
|
metadata_path
|
|
949
1172
|
)
|
|
950
1173
|
|
|
951
|
-
def do_search(
|
|
1174
|
+
def do_search(
|
|
1175
|
+
self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
|
|
1176
|
+
) -> List[Any]:
|
|
952
1177
|
"""A two step search can be performed if the metadata are not given into the search result"""
|
|
953
1178
|
|
|
954
1179
|
if getattr(self.config, "per_product_metadata_query", False):
|
|
955
1180
|
final_result = []
|
|
1181
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
956
1182
|
# Query the products entity set for basic metadata about the product
|
|
957
|
-
for entity in super(ODataV4Search, self).do_search(
|
|
1183
|
+
for entity in super(ODataV4Search, self).do_search(prep, **kwargs):
|
|
958
1184
|
metadata_url = self.get_metadata_search_url(entity)
|
|
959
1185
|
try:
|
|
960
1186
|
logger.debug("Sending metadata request: %s", metadata_url)
|
|
961
1187
|
response = requests.get(
|
|
962
|
-
metadata_url,
|
|
1188
|
+
metadata_url,
|
|
1189
|
+
headers=USER_AGENT,
|
|
1190
|
+
timeout=HTTP_REQ_TIMEOUT,
|
|
1191
|
+
verify=ssl_verify,
|
|
963
1192
|
)
|
|
964
1193
|
response.raise_for_status()
|
|
965
1194
|
except requests.exceptions.Timeout as exc:
|
|
@@ -977,7 +1206,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
977
1206
|
final_result.append(entity)
|
|
978
1207
|
return final_result
|
|
979
1208
|
else:
|
|
980
|
-
return super(ODataV4Search, self).do_search(
|
|
1209
|
+
return super(ODataV4Search, self).do_search(prep, **kwargs)
|
|
981
1210
|
|
|
982
1211
|
def get_metadata_search_url(self, entity: Dict[str, Any]) -> str:
|
|
983
1212
|
"""Build the metadata link for the given entity"""
|
|
@@ -986,7 +1215,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
986
1215
|
)
|
|
987
1216
|
|
|
988
1217
|
def normalize_results(
|
|
989
|
-
self, results:
|
|
1218
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
990
1219
|
) -> List[EOProduct]:
|
|
991
1220
|
"""Build EOProducts from provider results
|
|
992
1221
|
|
|
@@ -1022,41 +1251,55 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1022
1251
|
|
|
1023
1252
|
def query(
|
|
1024
1253
|
self,
|
|
1025
|
-
|
|
1026
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
1027
|
-
page: int = DEFAULT_PAGE,
|
|
1028
|
-
count: bool = True,
|
|
1254
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
1029
1255
|
**kwargs: Any,
|
|
1030
1256
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
1031
1257
|
"""Perform a search on an OpenSearch-like interface"""
|
|
1032
1258
|
product_type = kwargs.get("productType", None)
|
|
1259
|
+
count = prep.count
|
|
1033
1260
|
# remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
|
|
1034
1261
|
kwargs.pop("product_type", None)
|
|
1262
|
+
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
1263
|
+
_, sort_by_qp = (
|
|
1264
|
+
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
|
|
1265
|
+
)
|
|
1035
1266
|
provider_product_type = self.map_product_type(product_type)
|
|
1036
|
-
|
|
1267
|
+
_dc_qs = kwargs.pop("_dc_qs", None)
|
|
1268
|
+
if _dc_qs is not None:
|
|
1269
|
+
qs = unquote_plus(unquote_plus(_dc_qs))
|
|
1270
|
+
qp = geojson.loads(qs)
|
|
1271
|
+
|
|
1272
|
+
# provider product type specific conf
|
|
1273
|
+
prep.product_type_def_params = self.get_product_type_def_params(
|
|
1274
|
+
product_type, **kwargs
|
|
1275
|
+
)
|
|
1276
|
+
else:
|
|
1277
|
+
keywords = {
|
|
1278
|
+
k: v for k, v in kwargs.items() if k != "auth" and v is not None
|
|
1279
|
+
}
|
|
1037
1280
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1281
|
+
if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
|
|
1282
|
+
keywords["productType"] = provider_product_type
|
|
1283
|
+
elif product_type:
|
|
1284
|
+
keywords["productType"] = product_type
|
|
1042
1285
|
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1286
|
+
# provider product type specific conf
|
|
1287
|
+
prep.product_type_def_params = self.get_product_type_def_params(
|
|
1288
|
+
product_type, **kwargs
|
|
1289
|
+
)
|
|
1047
1290
|
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1291
|
+
# Add to the query, the queryable parameters set in the provider product type definition
|
|
1292
|
+
keywords.update(
|
|
1293
|
+
{
|
|
1294
|
+
k: v
|
|
1295
|
+
for k, v in prep.product_type_def_params.items()
|
|
1296
|
+
if k not in keywords.keys()
|
|
1297
|
+
and k in self.config.metadata_mapping.keys()
|
|
1298
|
+
and isinstance(self.config.metadata_mapping[k], list)
|
|
1299
|
+
}
|
|
1300
|
+
)
|
|
1058
1301
|
|
|
1059
|
-
|
|
1302
|
+
qp, _ = self.build_query_string(product_type, **keywords)
|
|
1060
1303
|
|
|
1061
1304
|
for query_param, query_value in qp.items():
|
|
1062
1305
|
if (
|
|
@@ -1091,7 +1334,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1091
1334
|
|
|
1092
1335
|
try:
|
|
1093
1336
|
eo_products, total_items = super(PostJsonSearch, self).query(
|
|
1094
|
-
|
|
1337
|
+
prep, **kwargs
|
|
1095
1338
|
)
|
|
1096
1339
|
except Exception:
|
|
1097
1340
|
raise
|
|
@@ -1108,61 +1351,98 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1108
1351
|
# stop searching right away
|
|
1109
1352
|
product_type_metadata_mapping = dict(
|
|
1110
1353
|
self.config.metadata_mapping,
|
|
1111
|
-
**
|
|
1354
|
+
**prep.product_type_def_params.get("metadata_mapping", {}),
|
|
1112
1355
|
)
|
|
1113
1356
|
if not qp and any(
|
|
1114
1357
|
k
|
|
1115
1358
|
for k in keywords.keys()
|
|
1116
1359
|
if isinstance(product_type_metadata_mapping.get(k, []), list)
|
|
1117
1360
|
):
|
|
1118
|
-
return [], 0
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
)
|
|
1123
|
-
if not count and getattr(self, "need_count", False):
|
|
1361
|
+
return ([], 0) if prep.count else ([], None)
|
|
1362
|
+
prep.query_params = dict(qp, **sort_by_qp)
|
|
1363
|
+
prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
|
|
1364
|
+
if not count and getattr(prep, "need_count", False):
|
|
1124
1365
|
# do not try to extract total_items from search results if count is False
|
|
1125
|
-
del
|
|
1126
|
-
del
|
|
1127
|
-
provider_results = self.do_search(
|
|
1128
|
-
if count and total_items is None and hasattr(
|
|
1129
|
-
total_items =
|
|
1130
|
-
|
|
1131
|
-
|
|
1366
|
+
del prep.total_items_nb
|
|
1367
|
+
del prep.need_count
|
|
1368
|
+
provider_results = self.do_search(prep, **kwargs)
|
|
1369
|
+
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
1370
|
+
total_items = prep.total_items_nb
|
|
1371
|
+
|
|
1372
|
+
raw_search_result = RawSearchResult(provider_results)
|
|
1373
|
+
raw_search_result.query_params = prep.query_params
|
|
1374
|
+
raw_search_result.product_type_def_params = prep.product_type_def_params
|
|
1375
|
+
|
|
1376
|
+
eo_products = self.normalize_results(raw_search_result, **kwargs)
|
|
1132
1377
|
return eo_products, total_items
|
|
1133
1378
|
|
|
1379
|
+
def normalize_results(
|
|
1380
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
1381
|
+
) -> List[EOProduct]:
|
|
1382
|
+
"""Build EOProducts from provider results"""
|
|
1383
|
+
normalized = super().normalize_results(results, **kwargs)
|
|
1384
|
+
for product in normalized:
|
|
1385
|
+
if "downloadLink" in product.properties:
|
|
1386
|
+
decoded_link = unquote(product.properties["downloadLink"])
|
|
1387
|
+
if decoded_link[0] == "{": # not a url but a dict
|
|
1388
|
+
default_values = deepcopy(
|
|
1389
|
+
self.config.products.get(product.product_type, {})
|
|
1390
|
+
)
|
|
1391
|
+
default_values.pop("metadata_mapping", None)
|
|
1392
|
+
searched_values = orjson.loads(decoded_link)
|
|
1393
|
+
_dc_qs = orjson.dumps(
|
|
1394
|
+
format_query_params(
|
|
1395
|
+
product.product_type,
|
|
1396
|
+
self.config,
|
|
1397
|
+
{**default_values, **searched_values},
|
|
1398
|
+
)
|
|
1399
|
+
)
|
|
1400
|
+
product.properties["_dc_qs"] = quote_plus(_dc_qs)
|
|
1401
|
+
|
|
1402
|
+
# workaround to add product type to wekeo cmems order links
|
|
1403
|
+
if (
|
|
1404
|
+
"orderLink" in product.properties
|
|
1405
|
+
and "productType" in product.properties["orderLink"]
|
|
1406
|
+
):
|
|
1407
|
+
product.properties["orderLink"] = product.properties[
|
|
1408
|
+
"orderLink"
|
|
1409
|
+
].replace("productType", product.product_type)
|
|
1410
|
+
return normalized
|
|
1411
|
+
|
|
1134
1412
|
def collect_search_urls(
|
|
1135
1413
|
self,
|
|
1136
|
-
|
|
1137
|
-
items_per_page: Optional[int] = None,
|
|
1138
|
-
count: bool = True,
|
|
1414
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
1139
1415
|
**kwargs: Any,
|
|
1140
1416
|
) -> Tuple[List[str], Optional[int]]:
|
|
1141
1417
|
"""Adds pagination to query parameters, and auth to url"""
|
|
1418
|
+
page = prep.page
|
|
1419
|
+
items_per_page = prep.items_per_page
|
|
1420
|
+
count = prep.count
|
|
1142
1421
|
urls: List[str] = []
|
|
1143
1422
|
total_results = 0 if count else None
|
|
1144
1423
|
|
|
1145
1424
|
if "count_endpoint" not in self.config.pagination:
|
|
1146
1425
|
# if count_endpoint is not set, total_results should be extracted from search result
|
|
1147
1426
|
total_results = None
|
|
1148
|
-
|
|
1149
|
-
|
|
1427
|
+
prep.need_count = True
|
|
1428
|
+
prep.total_items_nb = None
|
|
1150
1429
|
|
|
1151
|
-
if
|
|
1152
|
-
auth_conf_dict = getattr(
|
|
1430
|
+
if prep.auth_plugin is not None and hasattr(prep.auth_plugin, "config"):
|
|
1431
|
+
auth_conf_dict = getattr(prep.auth_plugin.config, "credentials", {})
|
|
1153
1432
|
else:
|
|
1154
1433
|
auth_conf_dict = {}
|
|
1155
|
-
for collection in self.get_collections(**kwargs):
|
|
1434
|
+
for collection in self.get_collections(prep, **kwargs):
|
|
1156
1435
|
try:
|
|
1157
1436
|
search_endpoint: str = self.config.api_endpoint.rstrip("/").format(
|
|
1158
1437
|
**dict(collection=collection, **auth_conf_dict)
|
|
1159
1438
|
)
|
|
1160
1439
|
except KeyError as e:
|
|
1440
|
+
provider = prep.auth_plugin.provider if prep.auth_plugin else ""
|
|
1161
1441
|
raise MisconfiguredError(
|
|
1162
|
-
"Missing %s in %s configuration"
|
|
1163
|
-
% (",".join(e.args), kwargs["auth"].provider)
|
|
1442
|
+
"Missing %s in %s configuration" % (",".join(e.args), provider)
|
|
1164
1443
|
)
|
|
1165
1444
|
if page is not None and items_per_page is not None:
|
|
1445
|
+
page = page - 1 + self.config.pagination.get("start_page", 1)
|
|
1166
1446
|
if count:
|
|
1167
1447
|
count_endpoint = self.config.pagination.get(
|
|
1168
1448
|
"count_endpoint", ""
|
|
@@ -1175,7 +1455,9 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1175
1455
|
total_results = _total_results or 0
|
|
1176
1456
|
else:
|
|
1177
1457
|
total_results += _total_results or 0
|
|
1178
|
-
if
|
|
1458
|
+
if "next_page_query_obj" in self.config.pagination and isinstance(
|
|
1459
|
+
self.config.pagination["next_page_query_obj"], str
|
|
1460
|
+
):
|
|
1179
1461
|
# next_page_query_obj needs to be parsed
|
|
1180
1462
|
next_page_query_obj = self.config.pagination[
|
|
1181
1463
|
"next_page_query_obj"
|
|
@@ -1186,7 +1468,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1186
1468
|
skip_base_1=(page - 1) * items_per_page + 1,
|
|
1187
1469
|
)
|
|
1188
1470
|
update_nested_dict(
|
|
1189
|
-
|
|
1471
|
+
prep.query_params, orjson.loads(next_page_query_obj)
|
|
1190
1472
|
)
|
|
1191
1473
|
|
|
1192
1474
|
urls.append(search_endpoint)
|
|
@@ -1194,32 +1476,39 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1194
1476
|
|
|
1195
1477
|
def _request(
|
|
1196
1478
|
self,
|
|
1197
|
-
|
|
1198
|
-
info_message: Optional[str] = None,
|
|
1199
|
-
exception_message: Optional[str] = None,
|
|
1479
|
+
prep: PreparedSearch,
|
|
1200
1480
|
) -> Response:
|
|
1481
|
+
url = prep.url
|
|
1482
|
+
info_message = prep.info_message
|
|
1483
|
+
exception_message = prep.exception_message
|
|
1201
1484
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1485
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1202
1486
|
try:
|
|
1203
1487
|
# auth if needed
|
|
1204
|
-
|
|
1488
|
+
RequestsKwargs = TypedDict(
|
|
1489
|
+
"RequestsKwargs", {"auth": AuthBase}, total=False
|
|
1490
|
+
)
|
|
1491
|
+
kwargs: RequestsKwargs = {}
|
|
1205
1492
|
if (
|
|
1206
1493
|
getattr(self.config, "need_auth", False)
|
|
1207
|
-
and hasattr(
|
|
1208
|
-
and callable(
|
|
1494
|
+
and hasattr(prep, "auth")
|
|
1495
|
+
and callable(prep.auth)
|
|
1209
1496
|
):
|
|
1210
|
-
kwargs["auth"] =
|
|
1497
|
+
kwargs["auth"] = prep.auth
|
|
1211
1498
|
|
|
1212
1499
|
# perform the request using the next page arguments if they are defined
|
|
1213
1500
|
if getattr(self, "next_page_query_obj", None):
|
|
1214
|
-
|
|
1501
|
+
prep.query_params = self.next_page_query_obj
|
|
1215
1502
|
if info_message:
|
|
1216
1503
|
logger.info(info_message)
|
|
1217
|
-
logger.debug("Query parameters: %s" %
|
|
1504
|
+
logger.debug("Query parameters: %s" % prep.query_params)
|
|
1505
|
+
logger.debug("Query kwargs: %s" % kwargs)
|
|
1218
1506
|
response = requests.post(
|
|
1219
1507
|
url,
|
|
1220
|
-
json=
|
|
1508
|
+
json=prep.query_params,
|
|
1221
1509
|
headers=USER_AGENT,
|
|
1222
1510
|
timeout=timeout,
|
|
1511
|
+
verify=ssl_verify,
|
|
1223
1512
|
**kwargs,
|
|
1224
1513
|
)
|
|
1225
1514
|
response.raise_for_status()
|
|
@@ -1252,7 +1541,10 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1252
1541
|
)
|
|
1253
1542
|
if "response" in locals():
|
|
1254
1543
|
logger.debug(response.content)
|
|
1255
|
-
|
|
1544
|
+
error_text = str(err)
|
|
1545
|
+
if getattr(err, "response", None) is not None:
|
|
1546
|
+
error_text = err.response.text
|
|
1547
|
+
raise RequestError(error_text) from err
|
|
1256
1548
|
return response
|
|
1257
1549
|
|
|
1258
1550
|
|
|
@@ -1268,18 +1560,29 @@ class StacSearch(PostJsonSearch):
|
|
|
1268
1560
|
# restore results_entry overwritten by init
|
|
1269
1561
|
self.config.results_entry = results_entry
|
|
1270
1562
|
|
|
1271
|
-
def
|
|
1272
|
-
self,
|
|
1273
|
-
) ->
|
|
1274
|
-
"""Build
|
|
1563
|
+
def build_query_string(
|
|
1564
|
+
self, product_type: str, **kwargs: Any
|
|
1565
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
1566
|
+
"""Build The query string using the search parameters"""
|
|
1567
|
+
logger.debug("Building the query string that will be used for search")
|
|
1275
1568
|
|
|
1276
|
-
|
|
1569
|
+
# handle opened time intervals
|
|
1570
|
+
if any(
|
|
1571
|
+
k in kwargs
|
|
1572
|
+
for k in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
|
|
1573
|
+
):
|
|
1574
|
+
kwargs.setdefault("startTimeFromAscendingNode", "..")
|
|
1575
|
+
kwargs.setdefault("completionTimeFromAscendingNode", "..")
|
|
1277
1576
|
|
|
1278
|
-
|
|
1279
|
-
for product in products:
|
|
1280
|
-
product.assets.update(product.properties.pop("assets", {}))
|
|
1577
|
+
query_params = format_query_params(product_type, self.config, kwargs)
|
|
1281
1578
|
|
|
1282
|
-
|
|
1579
|
+
# Build the final query string, in one go without quoting it
|
|
1580
|
+
# (some providers do not operate well with urlencoded and quoted query strings)
|
|
1581
|
+
quote_via: Callable[[Any, str, str, str], str] = lambda x, *_args, **_kwargs: x
|
|
1582
|
+
return (
|
|
1583
|
+
query_params,
|
|
1584
|
+
urlencode(query_params, doseq=True, quote_via=quote_via),
|
|
1585
|
+
)
|
|
1283
1586
|
|
|
1284
1587
|
def discover_queryables(
|
|
1285
1588
|
self, **kwargs: Any
|
|
@@ -1311,10 +1614,12 @@ class StacSearch(PostJsonSearch):
|
|
|
1311
1614
|
)
|
|
1312
1615
|
response = QueryStringSearch._request(
|
|
1313
1616
|
self,
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1617
|
+
PreparedSearch(
|
|
1618
|
+
url=fetch_url,
|
|
1619
|
+
info_message="Fetching queryables: {}".format(fetch_url),
|
|
1620
|
+
exception_message="Skipping error while fetching queryables for "
|
|
1621
|
+
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
1622
|
+
),
|
|
1318
1623
|
)
|
|
1319
1624
|
except (RequestError, KeyError, AttributeError):
|
|
1320
1625
|
return None
|
|
@@ -1348,7 +1653,7 @@ class StacSearch(PostJsonSearch):
|
|
|
1348
1653
|
for json_param, json_mtd in json_queryables.items():
|
|
1349
1654
|
param = (
|
|
1350
1655
|
get_queryable_from_provider(
|
|
1351
|
-
json_param, self.
|
|
1656
|
+
json_param, self.get_metadata_mapping(product_type)
|
|
1352
1657
|
)
|
|
1353
1658
|
or json_param
|
|
1354
1659
|
)
|