eodag 2.12.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/__init__.py +6 -8
- eodag/api/core.py +654 -538
- eodag/api/product/__init__.py +12 -2
- eodag/api/product/_assets.py +59 -16
- eodag/api/product/_product.py +100 -93
- eodag/api/product/drivers/__init__.py +7 -2
- eodag/api/product/drivers/base.py +0 -3
- eodag/api/product/metadata_mapping.py +192 -96
- eodag/api/search_result.py +69 -10
- eodag/cli.py +55 -25
- eodag/config.py +391 -116
- eodag/plugins/apis/base.py +11 -165
- eodag/plugins/apis/ecmwf.py +36 -25
- eodag/plugins/apis/usgs.py +80 -35
- eodag/plugins/authentication/aws_auth.py +13 -4
- eodag/plugins/authentication/base.py +10 -1
- eodag/plugins/authentication/generic.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +17 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +268 -49
- eodag/plugins/authentication/qsauth.py +4 -1
- eodag/plugins/authentication/sas_auth.py +9 -2
- eodag/plugins/authentication/token.py +98 -47
- eodag/plugins/authentication/token_exchange.py +122 -0
- eodag/plugins/crunch/base.py +3 -1
- eodag/plugins/crunch/filter_date.py +3 -9
- eodag/plugins/crunch/filter_latest_intersect.py +0 -3
- eodag/plugins/crunch/filter_latest_tpl_name.py +1 -4
- eodag/plugins/crunch/filter_overlap.py +4 -8
- eodag/plugins/crunch/filter_property.py +5 -11
- eodag/plugins/download/aws.py +149 -185
- eodag/plugins/download/base.py +88 -97
- eodag/plugins/download/creodias_s3.py +1 -1
- eodag/plugins/download/http.py +638 -310
- eodag/plugins/download/s3rest.py +47 -45
- eodag/plugins/manager.py +228 -88
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +239 -30
- eodag/plugins/search/build_search_result.py +382 -37
- eodag/plugins/search/cop_marine.py +441 -0
- eodag/plugins/search/creodias_s3.py +25 -20
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +61 -30
- eodag/plugins/search/qssearch.py +713 -255
- eodag/plugins/search/static_stac_search.py +106 -40
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1921 -34
- eodag/resources/providers.yml +4091 -3655
- eodag/resources/stac.yml +50 -216
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +89 -32
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +26 -0
- eodag/rest/core.py +735 -0
- eodag/rest/errors.py +178 -0
- eodag/rest/server.py +264 -431
- eodag/rest/stac.py +442 -836
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +238 -47
- eodag/rest/types/queryables.py +164 -0
- eodag/rest/types/stac_search.py +273 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +64 -0
- eodag/types/__init__.py +106 -10
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +40 -0
- eodag/types/search_args.py +57 -7
- eodag/types/whoosh.py +79 -0
- eodag/utils/__init__.py +110 -91
- eodag/utils/constraints.py +37 -45
- eodag/utils/exceptions.py +39 -22
- eodag/utils/import_system.py +0 -4
- eodag/utils/logging.py +37 -80
- eodag/utils/notebook.py +4 -4
- eodag/utils/repr.py +113 -0
- eodag/utils/requests.py +128 -0
- eodag/utils/rest.py +100 -0
- eodag/utils/stac_reader.py +93 -21
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/METADATA +88 -53
- eodag-3.0.0.dist-info/RECORD +109 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/WHEEL +1 -1
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/entry_points.txt +7 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/types/stac_queryables.py +0 -134
- eodag/rest/utils.py +0 -1133
- eodag-2.12.0.dist-info/RECORD +0 -94
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/LICENSE +0 -0
- {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/top_level.txt +0 -0
eodag/plugins/search/qssearch.py
CHANGED
|
@@ -19,19 +19,44 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import re
|
|
22
|
-
from
|
|
23
|
-
from
|
|
22
|
+
from copy import copy as copy_copy
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from typing import (
|
|
25
|
+
TYPE_CHECKING,
|
|
26
|
+
Any,
|
|
27
|
+
Callable,
|
|
28
|
+
Dict,
|
|
29
|
+
List,
|
|
30
|
+
Optional,
|
|
31
|
+
Sequence,
|
|
32
|
+
Set,
|
|
33
|
+
Tuple,
|
|
34
|
+
TypedDict,
|
|
35
|
+
cast,
|
|
36
|
+
)
|
|
24
37
|
from urllib.error import URLError
|
|
38
|
+
from urllib.parse import (
|
|
39
|
+
parse_qsl,
|
|
40
|
+
quote_plus,
|
|
41
|
+
unquote,
|
|
42
|
+
unquote_plus,
|
|
43
|
+
urlparse,
|
|
44
|
+
urlunparse,
|
|
45
|
+
)
|
|
25
46
|
from urllib.request import Request, urlopen
|
|
26
47
|
|
|
48
|
+
import geojson
|
|
27
49
|
import orjson
|
|
28
50
|
import requests
|
|
29
51
|
import yaml
|
|
52
|
+
from dateutil.utils import today
|
|
53
|
+
from jsonpath_ng import JSONPath
|
|
30
54
|
from lxml import etree
|
|
31
55
|
from pydantic import create_model
|
|
32
56
|
from pydantic.fields import FieldInfo
|
|
33
57
|
from requests import Response
|
|
34
58
|
from requests.adapters import HTTPAdapter
|
|
59
|
+
from requests.auth import AuthBase
|
|
35
60
|
|
|
36
61
|
from eodag.api.product import EOProduct
|
|
37
62
|
from eodag.api.product.metadata_mapping import (
|
|
@@ -42,11 +67,13 @@ from eodag.api.product.metadata_mapping import (
|
|
|
42
67
|
properties_from_json,
|
|
43
68
|
properties_from_xml,
|
|
44
69
|
)
|
|
70
|
+
from eodag.api.search_result import RawSearchResult
|
|
71
|
+
from eodag.plugins.search import PreparedSearch
|
|
45
72
|
from eodag.plugins.search.base import Search
|
|
46
73
|
from eodag.types import json_field_definition_to_python, model_fields_to_annotated
|
|
74
|
+
from eodag.types.queryables import CommonQueryables
|
|
75
|
+
from eodag.types.search_args import SortByList
|
|
47
76
|
from eodag.utils import (
|
|
48
|
-
DEFAULT_ITEMS_PER_PAGE,
|
|
49
|
-
DEFAULT_PAGE,
|
|
50
77
|
GENERIC_PRODUCT_TYPE,
|
|
51
78
|
HTTP_REQ_TIMEOUT,
|
|
52
79
|
USER_AGENT,
|
|
@@ -56,16 +83,23 @@ from eodag.utils import (
|
|
|
56
83
|
dict_items_recursive_apply,
|
|
57
84
|
format_dict_items,
|
|
58
85
|
get_args,
|
|
86
|
+
get_ssl_context,
|
|
59
87
|
quote,
|
|
60
88
|
string_to_jsonpath,
|
|
61
89
|
update_nested_dict,
|
|
62
90
|
urlencode,
|
|
63
91
|
)
|
|
92
|
+
from eodag.utils.constraints import (
|
|
93
|
+
fetch_constraints,
|
|
94
|
+
get_constraint_queryables_with_additional_params,
|
|
95
|
+
)
|
|
64
96
|
from eodag.utils.exceptions import (
|
|
65
97
|
AuthenticationError,
|
|
66
98
|
MisconfiguredError,
|
|
99
|
+
PluginImplementationError,
|
|
67
100
|
RequestError,
|
|
68
101
|
TimeOutError,
|
|
102
|
+
ValidationError,
|
|
69
103
|
)
|
|
70
104
|
|
|
71
105
|
if TYPE_CHECKING:
|
|
@@ -167,13 +201,13 @@ class QueryStringSearch(Search):
|
|
|
167
201
|
``free_text_search_operations`` configuration parameter follow the same rule.
|
|
168
202
|
|
|
169
203
|
:param provider: An eodag providers configuration dictionary
|
|
170
|
-
:type provider: dict
|
|
171
204
|
:param config: Path to the user configuration file
|
|
172
|
-
:type config: str
|
|
173
205
|
"""
|
|
174
206
|
|
|
175
|
-
|
|
176
|
-
|
|
207
|
+
extract_properties: Dict[str, Callable[..., Dict[str, Any]]] = {
|
|
208
|
+
"xml": properties_from_xml,
|
|
209
|
+
"json": properties_from_json,
|
|
210
|
+
}
|
|
177
211
|
|
|
178
212
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
179
213
|
super(QueryStringSearch, self).__init__(provider, config)
|
|
@@ -251,6 +285,17 @@ class QueryStringSearch(Search):
|
|
|
251
285
|
"generic_product_type_parsable_metadata"
|
|
252
286
|
]
|
|
253
287
|
)
|
|
288
|
+
if (
|
|
289
|
+
"single_product_type_parsable_metadata"
|
|
290
|
+
in self.config.discover_product_types
|
|
291
|
+
):
|
|
292
|
+
self.config.discover_product_types[
|
|
293
|
+
"single_product_type_parsable_metadata"
|
|
294
|
+
] = mtd_cfg_as_conversion_and_querypath(
|
|
295
|
+
self.config.discover_product_types[
|
|
296
|
+
"single_product_type_parsable_metadata"
|
|
297
|
+
]
|
|
298
|
+
)
|
|
254
299
|
|
|
255
300
|
# parse jsonpath on init: queryables discovery
|
|
256
301
|
if (
|
|
@@ -316,35 +361,58 @@ class QueryStringSearch(Search):
|
|
|
316
361
|
self.next_page_query_obj = None
|
|
317
362
|
self.next_page_merge = None
|
|
318
363
|
|
|
319
|
-
def discover_product_types(self) -> Optional[Dict[str, Any]]:
|
|
364
|
+
def discover_product_types(self, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
320
365
|
"""Fetch product types list from provider using `discover_product_types` conf
|
|
321
366
|
|
|
322
367
|
:returns: configuration dict containing fetched product types information
|
|
323
|
-
:rtype: (optional) dict
|
|
324
368
|
"""
|
|
325
369
|
try:
|
|
326
|
-
|
|
370
|
+
prep = PreparedSearch()
|
|
371
|
+
|
|
372
|
+
prep.url = cast(
|
|
327
373
|
str,
|
|
328
374
|
self.config.discover_product_types["fetch_url"].format(
|
|
329
375
|
**self.config.__dict__
|
|
330
376
|
),
|
|
331
377
|
)
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
378
|
+
|
|
379
|
+
# get auth if available
|
|
380
|
+
if "auth" in kwargs:
|
|
381
|
+
prep.auth = kwargs.pop("auth")
|
|
382
|
+
|
|
383
|
+
# try updating fetch_url qs using productType
|
|
384
|
+
fetch_qs_dict = {}
|
|
385
|
+
if "single_collection_fetch_qs" in self.config.discover_product_types:
|
|
386
|
+
try:
|
|
387
|
+
fetch_qs = self.config.discover_product_types[
|
|
388
|
+
"single_collection_fetch_qs"
|
|
389
|
+
].format(**kwargs)
|
|
390
|
+
fetch_qs_dict = dict(parse_qsl(fetch_qs))
|
|
391
|
+
except KeyError:
|
|
392
|
+
pass
|
|
393
|
+
if fetch_qs_dict:
|
|
394
|
+
url_parse = urlparse(prep.url)
|
|
395
|
+
query = url_parse.query
|
|
396
|
+
url_dict = dict(parse_qsl(query))
|
|
397
|
+
url_dict.update(fetch_qs_dict)
|
|
398
|
+
url_new_query = urlencode(url_dict)
|
|
399
|
+
url_parse = url_parse._replace(query=url_new_query)
|
|
400
|
+
prep.url = urlunparse(url_parse)
|
|
401
|
+
|
|
402
|
+
prep.info_message = "Fetching product types: {}".format(prep.url)
|
|
403
|
+
prep.exception_message = (
|
|
404
|
+
"Skipping error while fetching product types for " "{} {} instance:"
|
|
405
|
+
).format(self.provider, self.__class__.__name__)
|
|
406
|
+
|
|
407
|
+
response = QueryStringSearch._request(self, prep)
|
|
339
408
|
except (RequestError, KeyError, AttributeError):
|
|
340
409
|
return None
|
|
341
410
|
else:
|
|
342
411
|
try:
|
|
343
|
-
conf_update_dict = {
|
|
412
|
+
conf_update_dict: Dict[str, Any] = {
|
|
344
413
|
"providers_config": {},
|
|
345
414
|
"product_types_config": {},
|
|
346
415
|
}
|
|
347
|
-
|
|
348
416
|
if self.config.discover_product_types["result_type"] == "json":
|
|
349
417
|
resp_as_json = response.json()
|
|
350
418
|
# extract results from response json
|
|
@@ -354,6 +422,8 @@ class QueryStringSearch(Search):
|
|
|
354
422
|
"results_entry"
|
|
355
423
|
].find(resp_as_json)
|
|
356
424
|
]
|
|
425
|
+
if result and isinstance(result[0], list):
|
|
426
|
+
result = result[0]
|
|
357
427
|
|
|
358
428
|
for product_type_result in result:
|
|
359
429
|
# providers_config extraction
|
|
@@ -391,6 +461,17 @@ class QueryStringSearch(Search):
|
|
|
391
461
|
],
|
|
392
462
|
)
|
|
393
463
|
|
|
464
|
+
if (
|
|
465
|
+
"single_product_type_parsable_metadata"
|
|
466
|
+
in self.config.discover_product_types
|
|
467
|
+
):
|
|
468
|
+
collection_data = self._get_product_type_metadata_from_single_collection_endpoint(
|
|
469
|
+
generic_product_type_id
|
|
470
|
+
)
|
|
471
|
+
conf_update_dict["product_types_config"][
|
|
472
|
+
generic_product_type_id
|
|
473
|
+
].update(collection_data)
|
|
474
|
+
|
|
394
475
|
# update keywords
|
|
395
476
|
keywords_fields = [
|
|
396
477
|
"instrument",
|
|
@@ -438,38 +519,161 @@ class QueryStringSearch(Search):
|
|
|
438
519
|
e,
|
|
439
520
|
)
|
|
440
521
|
return None
|
|
522
|
+
except requests.RequestException as e:
|
|
523
|
+
logger.debug(
|
|
524
|
+
"Could not parse discovered product types response from "
|
|
525
|
+
f"{self.provider}, {type(e).__name__}: {e.args}"
|
|
526
|
+
)
|
|
527
|
+
return None
|
|
441
528
|
conf_update_dict["product_types_config"] = dict_items_recursive_apply(
|
|
442
529
|
conf_update_dict["product_types_config"],
|
|
443
530
|
lambda k, v: v if v != NOT_AVAILABLE else None,
|
|
444
531
|
)
|
|
445
532
|
return conf_update_dict
|
|
446
533
|
|
|
534
|
+
def _get_product_type_metadata_from_single_collection_endpoint(
|
|
535
|
+
self, product_type: str
|
|
536
|
+
) -> Dict[str, Any]:
|
|
537
|
+
"""
|
|
538
|
+
retrieves additional product type information from an endpoint returning data for a single collection
|
|
539
|
+
:param product_type: product type
|
|
540
|
+
:return: product types and their metadata
|
|
541
|
+
"""
|
|
542
|
+
single_collection_url = self.config.discover_product_types[
|
|
543
|
+
"single_collection_fetch_url"
|
|
544
|
+
].format(productType=product_type)
|
|
545
|
+
resp = QueryStringSearch._request(
|
|
546
|
+
self,
|
|
547
|
+
PreparedSearch(
|
|
548
|
+
url=single_collection_url,
|
|
549
|
+
info_message=f"Fetching data for product type: {product_type}",
|
|
550
|
+
exception_message="Skipping error while fetching product types for "
|
|
551
|
+
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
product_data = resp.json()
|
|
555
|
+
return properties_from_json(
|
|
556
|
+
product_data,
|
|
557
|
+
self.config.discover_product_types["single_product_type_parsable_metadata"],
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
def discover_queryables(
|
|
561
|
+
self, **kwargs: Any
|
|
562
|
+
) -> Optional[Dict[str, Annotated[Any, FieldInfo]]]:
|
|
563
|
+
"""Fetch queryables list from provider using its constraints file
|
|
564
|
+
|
|
565
|
+
:param kwargs: additional filters for queryables (`productType` and other search
|
|
566
|
+
arguments)
|
|
567
|
+
:returns: fetched queryable parameters dict
|
|
568
|
+
"""
|
|
569
|
+
product_type = kwargs.pop("productType", None)
|
|
570
|
+
if not product_type:
|
|
571
|
+
return {}
|
|
572
|
+
constraints_file_url = getattr(self.config, "constraints_file_url", "")
|
|
573
|
+
if not constraints_file_url:
|
|
574
|
+
return {}
|
|
575
|
+
|
|
576
|
+
constraints_file_dataset_key = getattr(
|
|
577
|
+
self.config, "constraints_file_dataset_key", "dataset"
|
|
578
|
+
)
|
|
579
|
+
provider_product_type = self.config.products.get(product_type, {}).get(
|
|
580
|
+
constraints_file_dataset_key, None
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
# defaults
|
|
584
|
+
default_queryables = self._get_defaults_as_queryables(product_type)
|
|
585
|
+
# remove unwanted queryables
|
|
586
|
+
for param in getattr(self.config, "remove_from_queryables", []):
|
|
587
|
+
default_queryables.pop(param, None)
|
|
588
|
+
|
|
589
|
+
non_empty_kwargs = {k: v for k, v in kwargs.items() if v}
|
|
590
|
+
|
|
591
|
+
if "{" in constraints_file_url:
|
|
592
|
+
constraints_file_url = constraints_file_url.format(
|
|
593
|
+
dataset=provider_product_type
|
|
594
|
+
)
|
|
595
|
+
constraints = fetch_constraints(constraints_file_url, self)
|
|
596
|
+
if not constraints:
|
|
597
|
+
return default_queryables
|
|
598
|
+
|
|
599
|
+
constraint_params: Dict[str, Dict[str, Set[Any]]] = {}
|
|
600
|
+
if len(kwargs) == 0:
|
|
601
|
+
# get values from constraints without additional filters
|
|
602
|
+
for constraint in constraints:
|
|
603
|
+
for key in constraint.keys():
|
|
604
|
+
if key in constraint_params:
|
|
605
|
+
constraint_params[key]["enum"].update(constraint[key])
|
|
606
|
+
else:
|
|
607
|
+
constraint_params[key] = {"enum": set(constraint[key])}
|
|
608
|
+
else:
|
|
609
|
+
# get values from constraints with additional filters
|
|
610
|
+
constraints_input_params = {k: v for k, v in non_empty_kwargs.items()}
|
|
611
|
+
constraint_params = get_constraint_queryables_with_additional_params(
|
|
612
|
+
constraints, constraints_input_params, self, product_type
|
|
613
|
+
)
|
|
614
|
+
# query params that are not in constraints but might be default queryables
|
|
615
|
+
if len(constraint_params) == 1 and "not_available" in constraint_params:
|
|
616
|
+
not_queryables = set()
|
|
617
|
+
for constraint_param in constraint_params["not_available"]["enum"]:
|
|
618
|
+
param = CommonQueryables.get_queryable_from_alias(constraint_param)
|
|
619
|
+
if param in dict(
|
|
620
|
+
CommonQueryables.model_fields, **default_queryables
|
|
621
|
+
):
|
|
622
|
+
non_empty_kwargs.pop(constraint_param)
|
|
623
|
+
else:
|
|
624
|
+
not_queryables.add(constraint_param)
|
|
625
|
+
if not_queryables:
|
|
626
|
+
raise ValidationError(
|
|
627
|
+
f"parameter(s) {str(not_queryables)} not queryable"
|
|
628
|
+
)
|
|
629
|
+
else:
|
|
630
|
+
# get constraints again without common queryables
|
|
631
|
+
constraint_params = (
|
|
632
|
+
get_constraint_queryables_with_additional_params(
|
|
633
|
+
constraints, non_empty_kwargs, self, product_type
|
|
634
|
+
)
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
field_definitions: Dict[str, Any] = dict()
|
|
638
|
+
for json_param, json_mtd in constraint_params.items():
|
|
639
|
+
param = (
|
|
640
|
+
get_queryable_from_provider(
|
|
641
|
+
json_param, self.get_metadata_mapping(product_type)
|
|
642
|
+
)
|
|
643
|
+
or json_param
|
|
644
|
+
)
|
|
645
|
+
default = kwargs.get(param, None) or self.config.products.get(
|
|
646
|
+
product_type, {}
|
|
647
|
+
).get(param, None)
|
|
648
|
+
annotated_def = json_field_definition_to_python(
|
|
649
|
+
json_mtd, default_value=default, required=True
|
|
650
|
+
)
|
|
651
|
+
field_definitions[param] = get_args(annotated_def)
|
|
652
|
+
|
|
653
|
+
python_queryables = create_model("m", **field_definitions).model_fields
|
|
654
|
+
return dict(default_queryables, **model_fields_to_annotated(python_queryables))
|
|
655
|
+
|
|
447
656
|
def query(
|
|
448
657
|
self,
|
|
449
|
-
|
|
450
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
451
|
-
page: int = DEFAULT_PAGE,
|
|
452
|
-
count: bool = True,
|
|
658
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
453
659
|
**kwargs: Any,
|
|
454
660
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
455
661
|
"""Perform a search on an OpenSearch-like interface
|
|
456
662
|
|
|
457
|
-
:param
|
|
458
|
-
single page
|
|
459
|
-
:type items_per_page: int
|
|
460
|
-
:param page: (optional) The page number to return
|
|
461
|
-
:type page: int
|
|
462
|
-
:param count: (optional) To trigger a count request
|
|
463
|
-
:type count: bool
|
|
663
|
+
:param prep: Object collecting needed information for search.
|
|
464
664
|
"""
|
|
465
|
-
|
|
665
|
+
count = prep.count
|
|
666
|
+
product_type = kwargs.get("productType", prep.product_type)
|
|
466
667
|
if product_type == GENERIC_PRODUCT_TYPE:
|
|
467
668
|
logger.warning(
|
|
468
669
|
"GENERIC_PRODUCT_TYPE is not a real product_type and should only be used internally as a template"
|
|
469
670
|
)
|
|
470
|
-
return [], 0
|
|
471
|
-
|
|
472
|
-
|
|
671
|
+
return ([], 0) if prep.count else ([], None)
|
|
672
|
+
|
|
673
|
+
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
674
|
+
prep.sort_by_qs, _ = (
|
|
675
|
+
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
|
|
676
|
+
)
|
|
473
677
|
|
|
474
678
|
provider_product_type = self.map_product_type(product_type)
|
|
475
679
|
keywords = {k: v for k, v in kwargs.items() if k != "auth" and v is not None}
|
|
@@ -480,25 +684,25 @@ class QueryStringSearch(Search):
|
|
|
480
684
|
)
|
|
481
685
|
|
|
482
686
|
# provider product type specific conf
|
|
483
|
-
|
|
687
|
+
prep.product_type_def_params = (
|
|
484
688
|
self.get_product_type_def_params(product_type, **kwargs)
|
|
485
689
|
if product_type is not None
|
|
486
690
|
else {}
|
|
487
691
|
)
|
|
488
692
|
|
|
489
693
|
# if product_type_def_params is set, remove product_type as it may conflict with this conf
|
|
490
|
-
if
|
|
694
|
+
if prep.product_type_def_params:
|
|
491
695
|
keywords.pop("productType", None)
|
|
492
696
|
|
|
493
697
|
if self.config.metadata_mapping:
|
|
494
698
|
product_type_metadata_mapping = dict(
|
|
495
699
|
self.config.metadata_mapping,
|
|
496
|
-
**
|
|
700
|
+
**prep.product_type_def_params.get("metadata_mapping", {}),
|
|
497
701
|
)
|
|
498
702
|
keywords.update(
|
|
499
703
|
{
|
|
500
704
|
k: v
|
|
501
|
-
for k, v in
|
|
705
|
+
for k, v in prep.product_type_def_params.items()
|
|
502
706
|
if k not in keywords.keys()
|
|
503
707
|
and k in product_type_metadata_mapping.keys()
|
|
504
708
|
and isinstance(product_type_metadata_mapping[k], list)
|
|
@@ -507,21 +711,26 @@ class QueryStringSearch(Search):
|
|
|
507
711
|
|
|
508
712
|
qp, qs = self.build_query_string(product_type, **keywords)
|
|
509
713
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
714
|
+
prep.query_params = qp
|
|
715
|
+
prep.query_string = qs
|
|
716
|
+
prep.search_urls, total_items = self.collect_search_urls(
|
|
717
|
+
prep,
|
|
718
|
+
**kwargs,
|
|
514
719
|
)
|
|
515
|
-
if not count and hasattr(
|
|
720
|
+
if not count and hasattr(prep, "total_items_nb"):
|
|
516
721
|
# do not try to extract total_items from search results if count is False
|
|
517
|
-
del
|
|
518
|
-
del
|
|
519
|
-
|
|
520
|
-
provider_results = self.do_search(
|
|
521
|
-
if count and total_items is None and hasattr(
|
|
522
|
-
total_items =
|
|
523
|
-
|
|
524
|
-
|
|
722
|
+
del prep.total_items_nb
|
|
723
|
+
del prep.need_count
|
|
724
|
+
|
|
725
|
+
provider_results = self.do_search(prep, **kwargs)
|
|
726
|
+
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
727
|
+
total_items = prep.total_items_nb
|
|
728
|
+
|
|
729
|
+
raw_search_result = RawSearchResult(provider_results)
|
|
730
|
+
raw_search_result.query_params = prep.query_params
|
|
731
|
+
raw_search_result.product_type_def_params = prep.product_type_def_params
|
|
732
|
+
|
|
733
|
+
eo_products = self.normalize_results(raw_search_result, **kwargs)
|
|
525
734
|
return eo_products, total_items
|
|
526
735
|
|
|
527
736
|
@_deprecated(
|
|
@@ -538,11 +747,13 @@ class QueryStringSearch(Search):
|
|
|
538
747
|
) -> Tuple[Dict[str, Any], str]:
|
|
539
748
|
"""Build The query string using the search parameters"""
|
|
540
749
|
logger.debug("Building the query string that will be used for search")
|
|
541
|
-
query_params = format_query_params(product_type, self.config,
|
|
750
|
+
query_params = format_query_params(product_type, self.config, kwargs)
|
|
542
751
|
|
|
543
752
|
# Build the final query string, in one go without quoting it
|
|
544
753
|
# (some providers do not operate well with urlencoded and quoted query strings)
|
|
545
|
-
quote_via:
|
|
754
|
+
def quote_via(x: Any, *_args, **_kwargs) -> str:
|
|
755
|
+
return x
|
|
756
|
+
|
|
546
757
|
return (
|
|
547
758
|
query_params,
|
|
548
759
|
urlencode(query_params, doseq=True, quote_via=quote_via),
|
|
@@ -550,22 +761,31 @@ class QueryStringSearch(Search):
|
|
|
550
761
|
|
|
551
762
|
def collect_search_urls(
|
|
552
763
|
self,
|
|
553
|
-
|
|
554
|
-
items_per_page: Optional[int] = None,
|
|
555
|
-
count: bool = True,
|
|
764
|
+
prep: PreparedSearch = PreparedSearch(page=None, items_per_page=None),
|
|
556
765
|
**kwargs: Any,
|
|
557
766
|
) -> Tuple[List[str], Optional[int]]:
|
|
558
767
|
"""Build paginated urls"""
|
|
768
|
+
page = prep.page
|
|
769
|
+
items_per_page = prep.items_per_page
|
|
770
|
+
count = prep.count
|
|
771
|
+
|
|
559
772
|
urls = []
|
|
560
773
|
total_results = 0 if count else None
|
|
561
774
|
|
|
775
|
+
# use only sort_by parameters for search, not for count
|
|
776
|
+
# and remove potential leading '&'
|
|
777
|
+
qs_with_sort = (prep.query_string + getattr(prep, "sort_by_qs", "")).strip("&")
|
|
778
|
+
# append count template if needed
|
|
779
|
+
if count:
|
|
780
|
+
qs_with_sort += self.config.pagination.get("count_tpl", "")
|
|
781
|
+
|
|
562
782
|
if "count_endpoint" not in self.config.pagination:
|
|
563
783
|
# if count_endpoint is not set, total_results should be extracted from search result
|
|
564
784
|
total_results = None
|
|
565
|
-
|
|
566
|
-
|
|
785
|
+
prep.need_count = True
|
|
786
|
+
prep.total_items_nb = None
|
|
567
787
|
|
|
568
|
-
for collection in self.get_collections(**kwargs):
|
|
788
|
+
for collection in self.get_collections(prep, **kwargs) or (None,):
|
|
569
789
|
# skip empty collection if one is required in api_endpoint
|
|
570
790
|
if "{collection}" in self.config.api_endpoint and not collection:
|
|
571
791
|
continue
|
|
@@ -573,12 +793,13 @@ class QueryStringSearch(Search):
|
|
|
573
793
|
collection=collection
|
|
574
794
|
)
|
|
575
795
|
if page is not None and items_per_page is not None:
|
|
796
|
+
page = page - 1 + self.config.pagination.get("start_page", 1)
|
|
576
797
|
if count:
|
|
577
798
|
count_endpoint = self.config.pagination.get(
|
|
578
799
|
"count_endpoint", ""
|
|
579
800
|
).format(collection=collection)
|
|
580
801
|
if count_endpoint:
|
|
581
|
-
count_url = "{}?{}".format(count_endpoint,
|
|
802
|
+
count_url = "{}?{}".format(count_endpoint, prep.query_string)
|
|
582
803
|
_total_results = (
|
|
583
804
|
self.count_hits(
|
|
584
805
|
count_url, result_type=self.config.result_type
|
|
@@ -592,32 +813,36 @@ class QueryStringSearch(Search):
|
|
|
592
813
|
0 if total_results is None else total_results
|
|
593
814
|
)
|
|
594
815
|
total_results += _total_results or 0
|
|
816
|
+
if "next_page_url_tpl" not in self.config.pagination:
|
|
817
|
+
raise MisconfiguredError(
|
|
818
|
+
f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
|
|
819
|
+
)
|
|
595
820
|
next_url = self.config.pagination["next_page_url_tpl"].format(
|
|
596
821
|
url=search_endpoint,
|
|
597
|
-
search=
|
|
822
|
+
search=qs_with_sort,
|
|
598
823
|
items_per_page=items_per_page,
|
|
599
824
|
page=page,
|
|
600
825
|
skip=(page - 1) * items_per_page,
|
|
601
826
|
skip_base_1=(page - 1) * items_per_page + 1,
|
|
602
827
|
)
|
|
603
828
|
else:
|
|
604
|
-
next_url = "{}?{}".format(search_endpoint,
|
|
829
|
+
next_url = "{}?{}".format(search_endpoint, qs_with_sort)
|
|
605
830
|
urls.append(next_url)
|
|
606
|
-
return urls, total_results
|
|
831
|
+
return list(dict.fromkeys(urls)), total_results
|
|
607
832
|
|
|
608
833
|
def do_search(
|
|
609
|
-
self,
|
|
834
|
+
self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
|
|
610
835
|
) -> List[Any]:
|
|
611
836
|
"""Perform the actual search request.
|
|
612
837
|
|
|
613
838
|
If there is a specified number of items per page, return the results as soon
|
|
614
839
|
as this number is reached
|
|
615
840
|
|
|
616
|
-
:param
|
|
617
|
-
:type items_per_page: int
|
|
841
|
+
:param prep: Object collecting needed information for search.
|
|
618
842
|
"""
|
|
843
|
+
items_per_page = prep.items_per_page
|
|
619
844
|
total_items_nb = 0
|
|
620
|
-
if getattr(
|
|
845
|
+
if getattr(prep, "need_count", False):
|
|
621
846
|
# extract total_items_nb from search results
|
|
622
847
|
if self.config.result_type == "json":
|
|
623
848
|
total_items_nb_key_path_parsed = self.config.pagination[
|
|
@@ -625,13 +850,17 @@ class QueryStringSearch(Search):
|
|
|
625
850
|
]
|
|
626
851
|
|
|
627
852
|
results: List[Any] = []
|
|
628
|
-
for search_url in
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
"instance:".format(self.provider, self.__class__.__name__),
|
|
853
|
+
for search_url in prep.search_urls:
|
|
854
|
+
single_search_prep = copy_copy(prep)
|
|
855
|
+
single_search_prep.url = search_url
|
|
856
|
+
single_search_prep.info_message = "Sending search request: {}".format(
|
|
857
|
+
search_url
|
|
634
858
|
)
|
|
859
|
+
single_search_prep.exception_message = (
|
|
860
|
+
f"Skipping error while searching for {self.provider}"
|
|
861
|
+
f" {self.__class__.__name__} instance"
|
|
862
|
+
)
|
|
863
|
+
response = self._request(single_search_prep)
|
|
635
864
|
next_page_url_key_path = self.config.pagination.get(
|
|
636
865
|
"next_page_url_key_path", None
|
|
637
866
|
)
|
|
@@ -649,7 +878,7 @@ class QueryStringSearch(Search):
|
|
|
649
878
|
)
|
|
650
879
|
result = (
|
|
651
880
|
[etree.tostring(element_or_tree=entry) for entry in results_xpath]
|
|
652
|
-
if isinstance(results_xpath,
|
|
881
|
+
if isinstance(results_xpath, Sequence)
|
|
653
882
|
else []
|
|
654
883
|
)
|
|
655
884
|
|
|
@@ -658,7 +887,7 @@ class QueryStringSearch(Search):
|
|
|
658
887
|
"Setting the next page url from an XML response has not "
|
|
659
888
|
"been implemented yet"
|
|
660
889
|
)
|
|
661
|
-
if getattr(
|
|
890
|
+
if getattr(prep, "need_count", False):
|
|
662
891
|
# extract total_items_nb from search results
|
|
663
892
|
try:
|
|
664
893
|
total_nb_results_xpath = root_node.xpath(
|
|
@@ -669,7 +898,7 @@ class QueryStringSearch(Search):
|
|
|
669
898
|
)
|
|
670
899
|
total_nb_results = (
|
|
671
900
|
total_nb_results_xpath
|
|
672
|
-
if isinstance(total_nb_results_xpath,
|
|
901
|
+
if isinstance(total_nb_results_xpath, Sequence)
|
|
673
902
|
else []
|
|
674
903
|
)[0]
|
|
675
904
|
_total_items_nb = int(total_nb_results)
|
|
@@ -686,55 +915,60 @@ class QueryStringSearch(Search):
|
|
|
686
915
|
resp_as_json = response.json()
|
|
687
916
|
if next_page_url_key_path:
|
|
688
917
|
path_parsed = next_page_url_key_path
|
|
689
|
-
|
|
690
|
-
|
|
918
|
+
found_paths = path_parsed.find(resp_as_json)
|
|
919
|
+
if found_paths and not isinstance(found_paths, int):
|
|
920
|
+
self.next_page_url = found_paths[0].value
|
|
691
921
|
logger.debug(
|
|
692
922
|
"Next page URL collected and set for the next search",
|
|
693
923
|
)
|
|
694
|
-
|
|
924
|
+
else:
|
|
695
925
|
logger.debug("Next page URL could not be collected")
|
|
696
926
|
if next_page_query_obj_key_path:
|
|
697
927
|
path_parsed = next_page_query_obj_key_path
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
].value
|
|
928
|
+
found_paths = path_parsed.find(resp_as_json)
|
|
929
|
+
if found_paths and not isinstance(found_paths, int):
|
|
930
|
+
self.next_page_query_obj = found_paths[0].value
|
|
702
931
|
logger.debug(
|
|
703
932
|
"Next page Query-object collected and set for the next search",
|
|
704
933
|
)
|
|
705
|
-
|
|
934
|
+
else:
|
|
706
935
|
logger.debug("Next page Query-object could not be collected")
|
|
707
936
|
if next_page_merge_key_path:
|
|
708
937
|
path_parsed = next_page_merge_key_path
|
|
709
|
-
|
|
710
|
-
|
|
938
|
+
found_paths = path_parsed.find(resp_as_json)
|
|
939
|
+
if found_paths and not isinstance(found_paths, int):
|
|
940
|
+
self.next_page_merge = found_paths[0].value
|
|
711
941
|
logger.debug(
|
|
712
942
|
"Next page merge collected and set for the next search",
|
|
713
943
|
)
|
|
714
|
-
|
|
944
|
+
else:
|
|
715
945
|
logger.debug("Next page merge could not be collected")
|
|
716
946
|
|
|
717
947
|
results_entry = string_to_jsonpath(
|
|
718
948
|
self.config.results_entry, force=True
|
|
719
949
|
)
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
950
|
+
found_entry_paths = results_entry.find(resp_as_json)
|
|
951
|
+
if found_entry_paths and not isinstance(found_entry_paths, int):
|
|
952
|
+
result = found_entry_paths[0].value
|
|
953
|
+
else:
|
|
723
954
|
result = []
|
|
724
955
|
if not isinstance(result, list):
|
|
725
956
|
result = [result]
|
|
726
957
|
|
|
727
|
-
if getattr(
|
|
958
|
+
if getattr(prep, "need_count", False):
|
|
728
959
|
# extract total_items_nb from search results
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
960
|
+
found_total_items_nb_paths = total_items_nb_key_path_parsed.find(
|
|
961
|
+
resp_as_json
|
|
962
|
+
)
|
|
963
|
+
if found_total_items_nb_paths and not isinstance(
|
|
964
|
+
found_total_items_nb_paths, int
|
|
965
|
+
):
|
|
966
|
+
_total_items_nb = found_total_items_nb_paths[0].value
|
|
733
967
|
if getattr(self.config, "merge_responses", False):
|
|
734
968
|
total_items_nb = _total_items_nb or 0
|
|
735
969
|
else:
|
|
736
970
|
total_items_nb += _total_items_nb or 0
|
|
737
|
-
|
|
971
|
+
else:
|
|
738
972
|
logger.debug(
|
|
739
973
|
"Could not extract total_items_nb from search results"
|
|
740
974
|
)
|
|
@@ -746,15 +980,22 @@ class QueryStringSearch(Search):
|
|
|
746
980
|
)
|
|
747
981
|
else:
|
|
748
982
|
results.extend(result)
|
|
749
|
-
if getattr(
|
|
750
|
-
|
|
751
|
-
del
|
|
983
|
+
if getattr(prep, "need_count", False):
|
|
984
|
+
prep.total_items_nb = total_items_nb
|
|
985
|
+
del prep.need_count
|
|
986
|
+
# remove prep.total_items_nb if value could not be extracted from response
|
|
987
|
+
if (
|
|
988
|
+
hasattr(prep, "total_items_nb")
|
|
989
|
+
and not prep.total_items_nb
|
|
990
|
+
and len(results) > 0
|
|
991
|
+
):
|
|
992
|
+
del prep.total_items_nb
|
|
752
993
|
if items_per_page is not None and len(results) == items_per_page:
|
|
753
994
|
return results
|
|
754
995
|
return results
|
|
755
996
|
|
|
756
997
|
def normalize_results(
|
|
757
|
-
self, results:
|
|
998
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
758
999
|
) -> List[EOProduct]:
|
|
759
1000
|
"""Build EOProducts from provider results"""
|
|
760
1001
|
normalize_remaining_count = len(results)
|
|
@@ -777,6 +1018,8 @@ class QueryStringSearch(Search):
|
|
|
777
1018
|
product.properties = dict(
|
|
778
1019
|
getattr(self.config, "product_type_config", {}), **product.properties
|
|
779
1020
|
)
|
|
1021
|
+
# move assets from properties to product's attr
|
|
1022
|
+
product.assets.update(product.properties.pop("assets", {}))
|
|
780
1023
|
products.append(product)
|
|
781
1024
|
return products
|
|
782
1025
|
|
|
@@ -785,10 +1028,12 @@ class QueryStringSearch(Search):
|
|
|
785
1028
|
# Handle a very annoying special case :'(
|
|
786
1029
|
url = count_url.replace("$format=json&", "")
|
|
787
1030
|
response = self._request(
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
1031
|
+
PreparedSearch(
|
|
1032
|
+
url=url,
|
|
1033
|
+
info_message="Sending count request: {}".format(url),
|
|
1034
|
+
exception_message="Skipping error while counting results for {} {} "
|
|
1035
|
+
"instance:".format(self.provider, self.__class__.__name__),
|
|
1036
|
+
)
|
|
792
1037
|
)
|
|
793
1038
|
if result_type == "xml":
|
|
794
1039
|
root_node = etree.fromstring(response.content)
|
|
@@ -801,20 +1046,34 @@ class QueryStringSearch(Search):
|
|
|
801
1046
|
count_results = response.json()
|
|
802
1047
|
if isinstance(count_results, dict):
|
|
803
1048
|
path_parsed = self.config.pagination["total_items_nb_key_path"]
|
|
804
|
-
|
|
1049
|
+
if not isinstance(path_parsed, JSONPath):
|
|
1050
|
+
raise PluginImplementationError(
|
|
1051
|
+
"total_items_nb_key_path must be parsed to JSONPath on plugin init"
|
|
1052
|
+
)
|
|
1053
|
+
found_paths = path_parsed.find(count_results)
|
|
1054
|
+
if found_paths and not isinstance(found_paths, int):
|
|
1055
|
+
total_results = found_paths[0].value
|
|
1056
|
+
else:
|
|
1057
|
+
raise MisconfiguredError(
|
|
1058
|
+
"Could not get results count from response using total_items_nb_key_path"
|
|
1059
|
+
)
|
|
805
1060
|
else: # interpret the result as a raw int
|
|
806
1061
|
total_results = int(count_results)
|
|
807
1062
|
return total_results
|
|
808
1063
|
|
|
809
|
-
def get_collections(self, **kwargs: Any) -> Tuple[
|
|
1064
|
+
def get_collections(self, prep: PreparedSearch, **kwargs: Any) -> Tuple[str, ...]:
|
|
810
1065
|
"""Get the collection to which the product belongs"""
|
|
811
1066
|
# See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
|
|
812
1067
|
# /asset_publisher/Ac0d/content/change-of
|
|
813
1068
|
# -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
|
|
814
1069
|
product_type: Optional[str] = kwargs.get("productType")
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
1070
|
+
collection: Optional[str] = None
|
|
1071
|
+
if product_type is None and (
|
|
1072
|
+
not hasattr(prep, "product_type_def_params")
|
|
1073
|
+
or not prep.product_type_def_params
|
|
1074
|
+
):
|
|
1075
|
+
collections: Set[str] = set()
|
|
1076
|
+
collection = getattr(self.config, "collection", None)
|
|
818
1077
|
if collection is None:
|
|
819
1078
|
try:
|
|
820
1079
|
for product_type, product_config in self.config.products.items():
|
|
@@ -832,31 +1091,44 @@ class QueryStringSearch(Search):
|
|
|
832
1091
|
collections.add(collection)
|
|
833
1092
|
return tuple(collections)
|
|
834
1093
|
|
|
835
|
-
collection
|
|
1094
|
+
collection = getattr(self.config, "collection", None)
|
|
836
1095
|
if collection is None:
|
|
837
1096
|
collection = (
|
|
838
|
-
|
|
1097
|
+
prep.product_type_def_params.get("collection", None) or product_type
|
|
839
1098
|
)
|
|
840
|
-
|
|
1099
|
+
|
|
1100
|
+
if collection is None:
|
|
1101
|
+
return ()
|
|
1102
|
+
elif not isinstance(collection, list):
|
|
1103
|
+
return (collection,)
|
|
1104
|
+
else:
|
|
1105
|
+
return tuple(collection)
|
|
841
1106
|
|
|
842
1107
|
def _request(
|
|
843
1108
|
self,
|
|
844
|
-
|
|
845
|
-
info_message: Optional[str] = None,
|
|
846
|
-
exception_message: Optional[str] = None,
|
|
1109
|
+
prep: PreparedSearch,
|
|
847
1110
|
) -> Response:
|
|
1111
|
+
url = prep.url
|
|
1112
|
+
if url is None:
|
|
1113
|
+
raise ValidationError("Cannot request empty URL")
|
|
1114
|
+
info_message = prep.info_message
|
|
1115
|
+
exception_message = prep.exception_message
|
|
848
1116
|
try:
|
|
849
1117
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1118
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1119
|
+
|
|
1120
|
+
ssl_ctx = get_ssl_context(ssl_verify)
|
|
850
1121
|
# auth if needed
|
|
851
1122
|
kwargs: Dict[str, Any] = {}
|
|
852
1123
|
if (
|
|
853
1124
|
getattr(self.config, "need_auth", False)
|
|
854
|
-
and hasattr(
|
|
855
|
-
and callable(
|
|
1125
|
+
and hasattr(prep, "auth")
|
|
1126
|
+
and callable(prep.auth)
|
|
856
1127
|
):
|
|
857
|
-
kwargs["auth"] =
|
|
1128
|
+
kwargs["auth"] = prep.auth
|
|
858
1129
|
# requests auto quote url params, without any option to prevent it
|
|
859
1130
|
# use urllib instead of requests if req must be sent unquoted
|
|
1131
|
+
|
|
860
1132
|
if hasattr(self.config, "dont_quote"):
|
|
861
1133
|
# keep unquoted desired params
|
|
862
1134
|
base_url, params = url.split("?") if "?" in url else (url, "")
|
|
@@ -868,21 +1140,27 @@ class QueryStringSearch(Search):
|
|
|
868
1140
|
req = requests.Request(
|
|
869
1141
|
method="GET", url=base_url, headers=USER_AGENT, **kwargs
|
|
870
1142
|
)
|
|
871
|
-
|
|
872
|
-
|
|
1143
|
+
req_prep = req.prepare()
|
|
1144
|
+
req_prep.url = base_url + "?" + qry
|
|
873
1145
|
# send urllib req
|
|
874
1146
|
if info_message:
|
|
875
|
-
logger.info(info_message.replace(url,
|
|
876
|
-
urllib_req = Request(
|
|
877
|
-
urllib_response = urlopen(urllib_req, timeout=timeout)
|
|
1147
|
+
logger.info(info_message.replace(url, req_prep.url))
|
|
1148
|
+
urllib_req = Request(req_prep.url, headers=USER_AGENT)
|
|
1149
|
+
urllib_response = urlopen(urllib_req, timeout=timeout, context=ssl_ctx)
|
|
878
1150
|
# build Response
|
|
879
1151
|
adapter = HTTPAdapter()
|
|
880
|
-
response = cast(
|
|
1152
|
+
response = cast(
|
|
1153
|
+
Response, adapter.build_response(req_prep, urllib_response)
|
|
1154
|
+
)
|
|
881
1155
|
else:
|
|
882
1156
|
if info_message:
|
|
883
1157
|
logger.info(info_message)
|
|
884
1158
|
response = requests.get(
|
|
885
|
-
url,
|
|
1159
|
+
url,
|
|
1160
|
+
timeout=timeout,
|
|
1161
|
+
headers=USER_AGENT,
|
|
1162
|
+
verify=ssl_verify,
|
|
1163
|
+
**kwargs,
|
|
886
1164
|
)
|
|
887
1165
|
response.raise_for_status()
|
|
888
1166
|
except requests.exceptions.Timeout as exc:
|
|
@@ -899,38 +1177,10 @@ class QueryStringSearch(Search):
|
|
|
899
1177
|
self.__class__.__name__,
|
|
900
1178
|
err_msg,
|
|
901
1179
|
)
|
|
902
|
-
raise RequestError(
|
|
1180
|
+
raise RequestError.from_error(err, exception_message) from err
|
|
903
1181
|
return response
|
|
904
1182
|
|
|
905
1183
|
|
|
906
|
-
class AwsSearch(QueryStringSearch):
|
|
907
|
-
"""A specialisation of RestoSearch that modifies the way the EOProducts are built
|
|
908
|
-
from the search results"""
|
|
909
|
-
|
|
910
|
-
def normalize_results(
|
|
911
|
-
self, results: List[Dict[str, Any]], **kwargs: Any
|
|
912
|
-
) -> List[EOProduct]:
|
|
913
|
-
"""Transform metadata from provider representation to eodag representation"""
|
|
914
|
-
normalized: List[EOProduct] = []
|
|
915
|
-
logger.debug("Adapting plugin results to eodag product representation")
|
|
916
|
-
for result in results:
|
|
917
|
-
ref = result["properties"]["title"].split("_")[5]
|
|
918
|
-
year = result["properties"]["completionDate"][0:4]
|
|
919
|
-
month = str(int(result["properties"]["completionDate"][5:7]))
|
|
920
|
-
day = str(int(result["properties"]["completionDate"][8:10]))
|
|
921
|
-
|
|
922
|
-
properties = QueryStringSearch.extract_properties[self.config.result_type](
|
|
923
|
-
result, self.get_metadata_mapping(kwargs.get("productType"))
|
|
924
|
-
)
|
|
925
|
-
|
|
926
|
-
properties["downloadLink"] = (
|
|
927
|
-
"s3://tiles/{ref[1]}{ref[2]}/{ref[3]}/{ref[4]}{ref[5]}/{year}/"
|
|
928
|
-
"{month}/{day}/0/"
|
|
929
|
-
).format(**locals())
|
|
930
|
-
normalized.append(EOProduct(self.provider, properties, **kwargs))
|
|
931
|
-
return normalized
|
|
932
|
-
|
|
933
|
-
|
|
934
1184
|
class ODataV4Search(QueryStringSearch):
|
|
935
1185
|
"""A specialisation of a QueryStringSearch that does a two step search to retrieve
|
|
936
1186
|
all products metadata"""
|
|
@@ -948,25 +1198,31 @@ class ODataV4Search(QueryStringSearch):
|
|
|
948
1198
|
metadata_path
|
|
949
1199
|
)
|
|
950
1200
|
|
|
951
|
-
def do_search(
|
|
1201
|
+
def do_search(
|
|
1202
|
+
self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
|
|
1203
|
+
) -> List[Any]:
|
|
952
1204
|
"""A two step search can be performed if the metadata are not given into the search result"""
|
|
953
1205
|
|
|
954
1206
|
if getattr(self.config, "per_product_metadata_query", False):
|
|
955
1207
|
final_result = []
|
|
1208
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
956
1209
|
# Query the products entity set for basic metadata about the product
|
|
957
|
-
for entity in super(ODataV4Search, self).do_search(
|
|
1210
|
+
for entity in super(ODataV4Search, self).do_search(prep, **kwargs):
|
|
958
1211
|
metadata_url = self.get_metadata_search_url(entity)
|
|
959
1212
|
try:
|
|
960
1213
|
logger.debug("Sending metadata request: %s", metadata_url)
|
|
961
1214
|
response = requests.get(
|
|
962
|
-
metadata_url,
|
|
1215
|
+
metadata_url,
|
|
1216
|
+
headers=USER_AGENT,
|
|
1217
|
+
timeout=HTTP_REQ_TIMEOUT,
|
|
1218
|
+
verify=ssl_verify,
|
|
963
1219
|
)
|
|
964
1220
|
response.raise_for_status()
|
|
965
1221
|
except requests.exceptions.Timeout as exc:
|
|
966
1222
|
raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
|
|
967
1223
|
except requests.RequestException:
|
|
968
1224
|
logger.exception(
|
|
969
|
-
"Skipping error while searching for %s %s instance
|
|
1225
|
+
"Skipping error while searching for %s %s instance",
|
|
970
1226
|
self.provider,
|
|
971
1227
|
self.__class__.__name__,
|
|
972
1228
|
)
|
|
@@ -977,7 +1233,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
977
1233
|
final_result.append(entity)
|
|
978
1234
|
return final_result
|
|
979
1235
|
else:
|
|
980
|
-
return super(ODataV4Search, self).do_search(
|
|
1236
|
+
return super(ODataV4Search, self).do_search(prep, **kwargs)
|
|
981
1237
|
|
|
982
1238
|
def get_metadata_search_url(self, entity: Dict[str, Any]) -> str:
|
|
983
1239
|
"""Build the metadata link for the given entity"""
|
|
@@ -986,7 +1242,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
986
1242
|
)
|
|
987
1243
|
|
|
988
1244
|
def normalize_results(
|
|
989
|
-
self, results:
|
|
1245
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
990
1246
|
) -> List[EOProduct]:
|
|
991
1247
|
"""Build EOProducts from provider results
|
|
992
1248
|
|
|
@@ -1020,43 +1276,135 @@ class ODataV4Search(QueryStringSearch):
|
|
|
1020
1276
|
class PostJsonSearch(QueryStringSearch):
|
|
1021
1277
|
"""A specialisation of a QueryStringSearch that uses POST method"""
|
|
1022
1278
|
|
|
1279
|
+
def _get_default_end_date_from_start_date(
|
|
1280
|
+
self, start_datetime: str, product_type: str
|
|
1281
|
+
) -> str:
|
|
1282
|
+
default_end_date = self.config.products.get(product_type, {}).get(
|
|
1283
|
+
"_default_end_date", None
|
|
1284
|
+
)
|
|
1285
|
+
if default_end_date:
|
|
1286
|
+
return default_end_date
|
|
1287
|
+
try:
|
|
1288
|
+
start_date = datetime.fromisoformat(start_datetime)
|
|
1289
|
+
except ValueError:
|
|
1290
|
+
start_date = datetime.strptime(start_datetime, "%Y-%m-%dT%H:%M:%SZ")
|
|
1291
|
+
product_type_conf = self.config.products[product_type]
|
|
1292
|
+
if (
|
|
1293
|
+
"metadata_mapping" in product_type_conf
|
|
1294
|
+
and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
|
|
1295
|
+
):
|
|
1296
|
+
mapping = product_type_conf["metadata_mapping"][
|
|
1297
|
+
"startTimeFromAscendingNode"
|
|
1298
|
+
]
|
|
1299
|
+
if isinstance(mapping, list) and "year" in mapping[0]:
|
|
1300
|
+
# if date is mapped to year/month/(day), use end_date = start_date to avoid large requests
|
|
1301
|
+
end_date = start_date
|
|
1302
|
+
return end_date.isoformat()
|
|
1303
|
+
return self.get_product_type_cfg_value("missionEndDate", today().isoformat())
|
|
1304
|
+
|
|
1305
|
+
def _check_date_params(self, keywords: Dict[str, Any], product_type: str) -> None:
|
|
1306
|
+
"""checks if start and end date are present in the keywords and adds them if not"""
|
|
1307
|
+
if (
|
|
1308
|
+
"startTimeFromAscendingNode"
|
|
1309
|
+
and "completionTimeFromAscendingNode" in keywords
|
|
1310
|
+
):
|
|
1311
|
+
return
|
|
1312
|
+
# start time given, end time missing
|
|
1313
|
+
if "startTimeFromAscendingNode" in keywords:
|
|
1314
|
+
keywords[
|
|
1315
|
+
"completionTimeFromAscendingNode"
|
|
1316
|
+
] = self._get_default_end_date_from_start_date(
|
|
1317
|
+
keywords["startTimeFromAscendingNode"], product_type
|
|
1318
|
+
)
|
|
1319
|
+
return
|
|
1320
|
+
product_type_conf = self.config.products[product_type]
|
|
1321
|
+
if (
|
|
1322
|
+
"metadata_mapping" in product_type_conf
|
|
1323
|
+
and "startTimeFromAscendingNode" in product_type_conf["metadata_mapping"]
|
|
1324
|
+
):
|
|
1325
|
+
mapping = product_type_conf["metadata_mapping"][
|
|
1326
|
+
"startTimeFromAscendingNode"
|
|
1327
|
+
]
|
|
1328
|
+
if isinstance(mapping, list):
|
|
1329
|
+
# get time parameters (date, year, month, ...) from metadata mapping
|
|
1330
|
+
input_mapping = mapping[0].replace("{{", "").replace("}}", "")
|
|
1331
|
+
time_params = [
|
|
1332
|
+
values.split(":")[0].strip() for values in input_mapping.split(",")
|
|
1333
|
+
]
|
|
1334
|
+
time_params = [
|
|
1335
|
+
tp.replace('"', "").replace("'", "") for tp in time_params
|
|
1336
|
+
]
|
|
1337
|
+
# if startTime is not given but other time params (e.g. year/month/(day)) are given,
|
|
1338
|
+
# no default date is required
|
|
1339
|
+
in_keywords = True
|
|
1340
|
+
for tp in time_params:
|
|
1341
|
+
if tp not in keywords:
|
|
1342
|
+
in_keywords = False
|
|
1343
|
+
if not in_keywords:
|
|
1344
|
+
keywords[
|
|
1345
|
+
"startTimeFromAscendingNode"
|
|
1346
|
+
] = self.get_product_type_cfg_value(
|
|
1347
|
+
"missionStartDate", today().isoformat()
|
|
1348
|
+
)
|
|
1349
|
+
keywords[
|
|
1350
|
+
"completionTimeFromAscendingNode"
|
|
1351
|
+
] = self._get_default_end_date_from_start_date(
|
|
1352
|
+
keywords["startTimeFromAscendingNode"], product_type
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1023
1355
|
def query(
|
|
1024
1356
|
self,
|
|
1025
|
-
|
|
1026
|
-
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
1027
|
-
page: int = DEFAULT_PAGE,
|
|
1028
|
-
count: bool = True,
|
|
1357
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
1029
1358
|
**kwargs: Any,
|
|
1030
1359
|
) -> Tuple[List[EOProduct], Optional[int]]:
|
|
1031
1360
|
"""Perform a search on an OpenSearch-like interface"""
|
|
1032
1361
|
product_type = kwargs.get("productType", None)
|
|
1362
|
+
count = prep.count
|
|
1033
1363
|
# remove "product_type" from search args if exists for compatibility with QueryStringSearch methods
|
|
1034
1364
|
kwargs.pop("product_type", None)
|
|
1365
|
+
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
1366
|
+
_, sort_by_qp = (
|
|
1367
|
+
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
|
|
1368
|
+
)
|
|
1035
1369
|
provider_product_type = self.map_product_type(product_type)
|
|
1036
|
-
|
|
1370
|
+
_dc_qs = kwargs.pop("_dc_qs", None)
|
|
1371
|
+
if _dc_qs is not None:
|
|
1372
|
+
qs = unquote_plus(unquote_plus(_dc_qs))
|
|
1373
|
+
qp = geojson.loads(qs)
|
|
1374
|
+
|
|
1375
|
+
# provider product type specific conf
|
|
1376
|
+
prep.product_type_def_params = self.get_product_type_def_params(
|
|
1377
|
+
product_type, **kwargs
|
|
1378
|
+
)
|
|
1379
|
+
else:
|
|
1380
|
+
keywords = {
|
|
1381
|
+
k: v for k, v in kwargs.items() if k != "auth" and v is not None
|
|
1382
|
+
}
|
|
1037
1383
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1384
|
+
if provider_product_type and provider_product_type != GENERIC_PRODUCT_TYPE:
|
|
1385
|
+
keywords["productType"] = provider_product_type
|
|
1386
|
+
elif product_type:
|
|
1387
|
+
keywords["productType"] = product_type
|
|
1042
1388
|
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1389
|
+
# provider product type specific conf
|
|
1390
|
+
prep.product_type_def_params = self.get_product_type_def_params(
|
|
1391
|
+
product_type, **kwargs
|
|
1392
|
+
)
|
|
1047
1393
|
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1394
|
+
# Add to the query, the queryable parameters set in the provider product type definition
|
|
1395
|
+
keywords.update(
|
|
1396
|
+
{
|
|
1397
|
+
k: v
|
|
1398
|
+
for k, v in prep.product_type_def_params.items()
|
|
1399
|
+
if k not in keywords.keys()
|
|
1400
|
+
and k in self.config.metadata_mapping.keys()
|
|
1401
|
+
and isinstance(self.config.metadata_mapping[k], list)
|
|
1402
|
+
}
|
|
1403
|
+
)
|
|
1404
|
+
if getattr(self.config, "dates_required", False):
|
|
1405
|
+
self._check_date_params(keywords, product_type)
|
|
1058
1406
|
|
|
1059
|
-
|
|
1407
|
+
qp, _ = self.build_query_string(product_type, **keywords)
|
|
1060
1408
|
|
|
1061
1409
|
for query_param, query_value in qp.items():
|
|
1062
1410
|
if (
|
|
@@ -1086,12 +1434,15 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1086
1434
|
"specific_qssearch"
|
|
1087
1435
|
].get("merge_responses", None)
|
|
1088
1436
|
|
|
1089
|
-
self
|
|
1090
|
-
|
|
1437
|
+
def count_hits(self, *x, **y):
|
|
1438
|
+
return 1
|
|
1439
|
+
|
|
1440
|
+
def _request(self, *x, **y):
|
|
1441
|
+
return super(PostJsonSearch, self)._request(*x, **y)
|
|
1091
1442
|
|
|
1092
1443
|
try:
|
|
1093
1444
|
eo_products, total_items = super(PostJsonSearch, self).query(
|
|
1094
|
-
|
|
1445
|
+
prep, **kwargs
|
|
1095
1446
|
)
|
|
1096
1447
|
except Exception:
|
|
1097
1448
|
raise
|
|
@@ -1108,61 +1459,98 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1108
1459
|
# stop searching right away
|
|
1109
1460
|
product_type_metadata_mapping = dict(
|
|
1110
1461
|
self.config.metadata_mapping,
|
|
1111
|
-
**
|
|
1462
|
+
**prep.product_type_def_params.get("metadata_mapping", {}),
|
|
1112
1463
|
)
|
|
1113
1464
|
if not qp and any(
|
|
1114
1465
|
k
|
|
1115
1466
|
for k in keywords.keys()
|
|
1116
1467
|
if isinstance(product_type_metadata_mapping.get(k, []), list)
|
|
1117
1468
|
):
|
|
1118
|
-
return [], 0
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
)
|
|
1123
|
-
if not count and getattr(self, "need_count", False):
|
|
1469
|
+
return ([], 0) if prep.count else ([], None)
|
|
1470
|
+
prep.query_params = dict(qp, **sort_by_qp)
|
|
1471
|
+
prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
|
|
1472
|
+
if not count and getattr(prep, "need_count", False):
|
|
1124
1473
|
# do not try to extract total_items from search results if count is False
|
|
1125
|
-
del
|
|
1126
|
-
del
|
|
1127
|
-
provider_results = self.do_search(
|
|
1128
|
-
if count and total_items is None and hasattr(
|
|
1129
|
-
total_items =
|
|
1130
|
-
|
|
1131
|
-
|
|
1474
|
+
del prep.total_items_nb
|
|
1475
|
+
del prep.need_count
|
|
1476
|
+
provider_results = self.do_search(prep, **kwargs)
|
|
1477
|
+
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
1478
|
+
total_items = prep.total_items_nb
|
|
1479
|
+
|
|
1480
|
+
raw_search_result = RawSearchResult(provider_results)
|
|
1481
|
+
raw_search_result.query_params = prep.query_params
|
|
1482
|
+
raw_search_result.product_type_def_params = prep.product_type_def_params
|
|
1483
|
+
|
|
1484
|
+
eo_products = self.normalize_results(raw_search_result, **kwargs)
|
|
1132
1485
|
return eo_products, total_items
|
|
1133
1486
|
|
|
1487
|
+
def normalize_results(
|
|
1488
|
+
self, results: RawSearchResult, **kwargs: Any
|
|
1489
|
+
) -> List[EOProduct]:
|
|
1490
|
+
"""Build EOProducts from provider results"""
|
|
1491
|
+
normalized = super().normalize_results(results, **kwargs)
|
|
1492
|
+
for product in normalized:
|
|
1493
|
+
if "downloadLink" in product.properties:
|
|
1494
|
+
decoded_link = unquote(product.properties["downloadLink"])
|
|
1495
|
+
if decoded_link[0] == "{": # not a url but a dict
|
|
1496
|
+
default_values = deepcopy(
|
|
1497
|
+
self.config.products.get(product.product_type, {})
|
|
1498
|
+
)
|
|
1499
|
+
default_values.pop("metadata_mapping", None)
|
|
1500
|
+
searched_values = orjson.loads(decoded_link)
|
|
1501
|
+
_dc_qs = orjson.dumps(
|
|
1502
|
+
format_query_params(
|
|
1503
|
+
product.product_type,
|
|
1504
|
+
self.config,
|
|
1505
|
+
{**default_values, **searched_values},
|
|
1506
|
+
)
|
|
1507
|
+
)
|
|
1508
|
+
product.properties["_dc_qs"] = quote_plus(_dc_qs)
|
|
1509
|
+
|
|
1510
|
+
# workaround to add product type to wekeo cmems order links
|
|
1511
|
+
if (
|
|
1512
|
+
"orderLink" in product.properties
|
|
1513
|
+
and "productType" in product.properties["orderLink"]
|
|
1514
|
+
):
|
|
1515
|
+
product.properties["orderLink"] = product.properties[
|
|
1516
|
+
"orderLink"
|
|
1517
|
+
].replace("productType", product.product_type)
|
|
1518
|
+
return normalized
|
|
1519
|
+
|
|
1134
1520
|
def collect_search_urls(
|
|
1135
1521
|
self,
|
|
1136
|
-
|
|
1137
|
-
items_per_page: Optional[int] = None,
|
|
1138
|
-
count: bool = True,
|
|
1522
|
+
prep: PreparedSearch = PreparedSearch(),
|
|
1139
1523
|
**kwargs: Any,
|
|
1140
1524
|
) -> Tuple[List[str], Optional[int]]:
|
|
1141
1525
|
"""Adds pagination to query parameters, and auth to url"""
|
|
1526
|
+
page = prep.page
|
|
1527
|
+
items_per_page = prep.items_per_page
|
|
1528
|
+
count = prep.count
|
|
1142
1529
|
urls: List[str] = []
|
|
1143
1530
|
total_results = 0 if count else None
|
|
1144
1531
|
|
|
1145
1532
|
if "count_endpoint" not in self.config.pagination:
|
|
1146
1533
|
# if count_endpoint is not set, total_results should be extracted from search result
|
|
1147
1534
|
total_results = None
|
|
1148
|
-
|
|
1149
|
-
|
|
1535
|
+
prep.need_count = True
|
|
1536
|
+
prep.total_items_nb = None
|
|
1150
1537
|
|
|
1151
|
-
if
|
|
1152
|
-
auth_conf_dict = getattr(
|
|
1538
|
+
if prep.auth_plugin is not None and hasattr(prep.auth_plugin, "config"):
|
|
1539
|
+
auth_conf_dict = getattr(prep.auth_plugin.config, "credentials", {})
|
|
1153
1540
|
else:
|
|
1154
1541
|
auth_conf_dict = {}
|
|
1155
|
-
for collection in self.get_collections(**kwargs):
|
|
1542
|
+
for collection in self.get_collections(prep, **kwargs) or (None,):
|
|
1156
1543
|
try:
|
|
1157
1544
|
search_endpoint: str = self.config.api_endpoint.rstrip("/").format(
|
|
1158
1545
|
**dict(collection=collection, **auth_conf_dict)
|
|
1159
1546
|
)
|
|
1160
1547
|
except KeyError as e:
|
|
1548
|
+
provider = prep.auth_plugin.provider if prep.auth_plugin else ""
|
|
1161
1549
|
raise MisconfiguredError(
|
|
1162
|
-
"Missing %s in %s configuration"
|
|
1163
|
-
% (",".join(e.args), kwargs["auth"].provider)
|
|
1550
|
+
"Missing %s in %s configuration" % (",".join(e.args), provider)
|
|
1164
1551
|
)
|
|
1165
1552
|
if page is not None and items_per_page is not None:
|
|
1553
|
+
page = page - 1 + self.config.pagination.get("start_page", 1)
|
|
1166
1554
|
if count:
|
|
1167
1555
|
count_endpoint = self.config.pagination.get(
|
|
1168
1556
|
"count_endpoint", ""
|
|
@@ -1174,8 +1562,14 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1174
1562
|
if getattr(self.config, "merge_responses", False):
|
|
1175
1563
|
total_results = _total_results or 0
|
|
1176
1564
|
else:
|
|
1177
|
-
total_results
|
|
1178
|
-
|
|
1565
|
+
total_results = (
|
|
1566
|
+
(_total_results or 0)
|
|
1567
|
+
if total_results is None
|
|
1568
|
+
else total_results + (_total_results or 0)
|
|
1569
|
+
)
|
|
1570
|
+
if "next_page_query_obj" in self.config.pagination and isinstance(
|
|
1571
|
+
self.config.pagination["next_page_query_obj"], str
|
|
1572
|
+
):
|
|
1179
1573
|
# next_page_query_obj needs to be parsed
|
|
1180
1574
|
next_page_query_obj = self.config.pagination[
|
|
1181
1575
|
"next_page_query_obj"
|
|
@@ -1186,60 +1580,68 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1186
1580
|
skip_base_1=(page - 1) * items_per_page + 1,
|
|
1187
1581
|
)
|
|
1188
1582
|
update_nested_dict(
|
|
1189
|
-
|
|
1583
|
+
prep.query_params, orjson.loads(next_page_query_obj)
|
|
1190
1584
|
)
|
|
1191
1585
|
|
|
1192
1586
|
urls.append(search_endpoint)
|
|
1193
|
-
return urls, total_results
|
|
1587
|
+
return list(dict.fromkeys(urls)), total_results
|
|
1194
1588
|
|
|
1195
1589
|
def _request(
|
|
1196
1590
|
self,
|
|
1197
|
-
|
|
1198
|
-
info_message: Optional[str] = None,
|
|
1199
|
-
exception_message: Optional[str] = None,
|
|
1591
|
+
prep: PreparedSearch,
|
|
1200
1592
|
) -> Response:
|
|
1593
|
+
url = prep.url
|
|
1594
|
+
if url is None:
|
|
1595
|
+
raise ValidationError("Cannot request empty URL")
|
|
1596
|
+
info_message = prep.info_message
|
|
1597
|
+
exception_message = prep.exception_message
|
|
1201
1598
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1599
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1202
1600
|
try:
|
|
1203
1601
|
# auth if needed
|
|
1204
|
-
|
|
1602
|
+
RequestsKwargs = TypedDict(
|
|
1603
|
+
"RequestsKwargs", {"auth": AuthBase}, total=False
|
|
1604
|
+
)
|
|
1605
|
+
kwargs: RequestsKwargs = {}
|
|
1205
1606
|
if (
|
|
1206
1607
|
getattr(self.config, "need_auth", False)
|
|
1207
|
-
and hasattr(
|
|
1208
|
-
and callable(
|
|
1608
|
+
and hasattr(prep, "auth")
|
|
1609
|
+
and callable(prep.auth)
|
|
1209
1610
|
):
|
|
1210
|
-
kwargs["auth"] =
|
|
1611
|
+
kwargs["auth"] = prep.auth
|
|
1211
1612
|
|
|
1212
1613
|
# perform the request using the next page arguments if they are defined
|
|
1213
|
-
if
|
|
1214
|
-
self
|
|
1614
|
+
if (
|
|
1615
|
+
hasattr(self, "next_page_query_obj")
|
|
1616
|
+
and self.next_page_query_obj is not None
|
|
1617
|
+
):
|
|
1618
|
+
prep.query_params = self.next_page_query_obj
|
|
1215
1619
|
if info_message:
|
|
1216
1620
|
logger.info(info_message)
|
|
1217
|
-
logger.debug("Query parameters: %s" %
|
|
1621
|
+
logger.debug("Query parameters: %s" % prep.query_params)
|
|
1622
|
+
logger.debug("Query kwargs: %s" % kwargs)
|
|
1218
1623
|
response = requests.post(
|
|
1219
1624
|
url,
|
|
1220
|
-
json=
|
|
1625
|
+
json=prep.query_params,
|
|
1221
1626
|
headers=USER_AGENT,
|
|
1222
1627
|
timeout=timeout,
|
|
1628
|
+
verify=ssl_verify,
|
|
1223
1629
|
**kwargs,
|
|
1224
1630
|
)
|
|
1225
1631
|
response.raise_for_status()
|
|
1226
1632
|
except requests.exceptions.Timeout as exc:
|
|
1227
1633
|
raise TimeOutError(exc, timeout=timeout) from exc
|
|
1228
1634
|
except (requests.RequestException, URLError) as err:
|
|
1635
|
+
response = locals().get("response", Response())
|
|
1229
1636
|
# check if error is identified as auth_error in provider conf
|
|
1230
1637
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
1231
1638
|
if not isinstance(auth_errors, list):
|
|
1232
1639
|
auth_errors = [auth_errors]
|
|
1233
|
-
if
|
|
1234
|
-
hasattr(err.response, "status_code")
|
|
1235
|
-
and err.response.status_code in auth_errors
|
|
1236
|
-
):
|
|
1640
|
+
if response.status_code and response.status_code in auth_errors:
|
|
1237
1641
|
raise AuthenticationError(
|
|
1238
|
-
"
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
self.provider,
|
|
1242
|
-
)
|
|
1642
|
+
f"Please check your credentials for {self.provider}.",
|
|
1643
|
+
f"HTTP Error {response.status_code} returned.",
|
|
1644
|
+
response.text.strip(),
|
|
1243
1645
|
)
|
|
1244
1646
|
if exception_message:
|
|
1245
1647
|
logger.exception(exception_message)
|
|
@@ -1250,9 +1652,8 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1250
1652
|
self.provider,
|
|
1251
1653
|
self.__class__.__name__,
|
|
1252
1654
|
)
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
raise RequestError(str(err))
|
|
1655
|
+
logger.debug(response.content or str(err))
|
|
1656
|
+
raise RequestError.from_error(err, exception_message) from err
|
|
1256
1657
|
return response
|
|
1257
1658
|
|
|
1258
1659
|
|
|
@@ -1268,18 +1669,31 @@ class StacSearch(PostJsonSearch):
|
|
|
1268
1669
|
# restore results_entry overwritten by init
|
|
1269
1670
|
self.config.results_entry = results_entry
|
|
1270
1671
|
|
|
1271
|
-
def
|
|
1272
|
-
self,
|
|
1273
|
-
) ->
|
|
1274
|
-
"""Build
|
|
1672
|
+
def build_query_string(
|
|
1673
|
+
self, product_type: str, **kwargs: Any
|
|
1674
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
1675
|
+
"""Build The query string using the search parameters"""
|
|
1676
|
+
logger.debug("Building the query string that will be used for search")
|
|
1275
1677
|
|
|
1276
|
-
|
|
1678
|
+
# handle opened time intervals
|
|
1679
|
+
if any(
|
|
1680
|
+
k in kwargs
|
|
1681
|
+
for k in ("startTimeFromAscendingNode", "completionTimeFromAscendingNode")
|
|
1682
|
+
):
|
|
1683
|
+
kwargs.setdefault("startTimeFromAscendingNode", "..")
|
|
1684
|
+
kwargs.setdefault("completionTimeFromAscendingNode", "..")
|
|
1277
1685
|
|
|
1278
|
-
|
|
1279
|
-
for product in products:
|
|
1280
|
-
product.assets.update(product.properties.pop("assets", {}))
|
|
1686
|
+
query_params = format_query_params(product_type, self.config, kwargs)
|
|
1281
1687
|
|
|
1282
|
-
|
|
1688
|
+
# Build the final query string, in one go without quoting it
|
|
1689
|
+
# (some providers do not operate well with urlencoded and quoted query strings)
|
|
1690
|
+
def quote_via(x: Any, *_args, **_kwargs) -> str:
|
|
1691
|
+
return x
|
|
1692
|
+
|
|
1693
|
+
return (
|
|
1694
|
+
query_params,
|
|
1695
|
+
urlencode(query_params, doseq=True, quote_via=quote_via),
|
|
1696
|
+
)
|
|
1283
1697
|
|
|
1284
1698
|
def discover_queryables(
|
|
1285
1699
|
self, **kwargs: Any
|
|
@@ -1288,16 +1702,37 @@ class StacSearch(PostJsonSearch):
|
|
|
1288
1702
|
|
|
1289
1703
|
:param kwargs: additional filters for queryables (`productType` and other search
|
|
1290
1704
|
arguments)
|
|
1291
|
-
:type kwargs: Any
|
|
1292
1705
|
:returns: fetched queryable parameters dict
|
|
1293
|
-
:rtype: Optional[Dict[str, Annotated[Any, FieldInfo]]]
|
|
1294
1706
|
"""
|
|
1707
|
+
if (
|
|
1708
|
+
not self.config.discover_queryables["fetch_url"]
|
|
1709
|
+
and not self.config.discover_queryables["product_type_fetch_url"]
|
|
1710
|
+
):
|
|
1711
|
+
logger.info(f"Cannot fetch queryables with {self.provider}")
|
|
1712
|
+
return None
|
|
1713
|
+
|
|
1295
1714
|
product_type = kwargs.get("productType", None)
|
|
1296
1715
|
provider_product_type = (
|
|
1297
1716
|
self.config.products.get(product_type, {}).get("productType", product_type)
|
|
1298
1717
|
if product_type
|
|
1299
1718
|
else None
|
|
1300
1719
|
)
|
|
1720
|
+
if (
|
|
1721
|
+
provider_product_type
|
|
1722
|
+
and not self.config.discover_queryables["product_type_fetch_url"]
|
|
1723
|
+
):
|
|
1724
|
+
logger.info(
|
|
1725
|
+
f"Cannot fetch queryables for a specific product type with {self.provider}"
|
|
1726
|
+
)
|
|
1727
|
+
return None
|
|
1728
|
+
if (
|
|
1729
|
+
not provider_product_type
|
|
1730
|
+
and not self.config.discover_queryables["fetch_url"]
|
|
1731
|
+
):
|
|
1732
|
+
logger.info(
|
|
1733
|
+
f"Cannot fetch global queryables with {self.provider}. A product type must be specified"
|
|
1734
|
+
)
|
|
1735
|
+
return None
|
|
1301
1736
|
|
|
1302
1737
|
try:
|
|
1303
1738
|
unparsed_fetch_url = (
|
|
@@ -1309,12 +1744,20 @@ class StacSearch(PostJsonSearch):
|
|
|
1309
1744
|
fetch_url = unparsed_fetch_url.format(
|
|
1310
1745
|
provider_product_type=provider_product_type, **self.config.__dict__
|
|
1311
1746
|
)
|
|
1747
|
+
auth = (
|
|
1748
|
+
self.auth
|
|
1749
|
+
if hasattr(self, "auth") and isinstance(self.auth, AuthBase)
|
|
1750
|
+
else None
|
|
1751
|
+
)
|
|
1312
1752
|
response = QueryStringSearch._request(
|
|
1313
1753
|
self,
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1754
|
+
PreparedSearch(
|
|
1755
|
+
url=fetch_url,
|
|
1756
|
+
auth=auth,
|
|
1757
|
+
info_message="Fetching queryables: {}".format(fetch_url),
|
|
1758
|
+
exception_message="Skipping error while fetching queryables for "
|
|
1759
|
+
"{} {} instance:".format(self.provider, self.__class__.__name__),
|
|
1760
|
+
),
|
|
1318
1761
|
)
|
|
1319
1762
|
except (RequestError, KeyError, AttributeError):
|
|
1320
1763
|
return None
|
|
@@ -1348,7 +1791,7 @@ class StacSearch(PostJsonSearch):
|
|
|
1348
1791
|
for json_param, json_mtd in json_queryables.items():
|
|
1349
1792
|
param = (
|
|
1350
1793
|
get_queryable_from_provider(
|
|
1351
|
-
json_param, self.
|
|
1794
|
+
json_param, self.get_metadata_mapping(product_type)
|
|
1352
1795
|
)
|
|
1353
1796
|
or json_param
|
|
1354
1797
|
)
|
|
@@ -1362,3 +1805,18 @@ class StacSearch(PostJsonSearch):
|
|
|
1362
1805
|
python_queryables = create_model("m", **field_definitions).model_fields
|
|
1363
1806
|
|
|
1364
1807
|
return model_fields_to_annotated(python_queryables)
|
|
1808
|
+
|
|
1809
|
+
|
|
1810
|
+
class PostJsonSearchWithStacQueryables(StacSearch, PostJsonSearch):
|
|
1811
|
+
"""A specialisation of a :class:`~eodag.plugins.search.qssearch.PostJsonSearch` that
|
|
1812
|
+
uses generic STAC configuration for queryables.
|
|
1813
|
+
"""
|
|
1814
|
+
|
|
1815
|
+
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
1816
|
+
PostJsonSearch.__init__(self, provider, config)
|
|
1817
|
+
|
|
1818
|
+
def build_query_string(
|
|
1819
|
+
self, product_type: str, **kwargs: Any
|
|
1820
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
1821
|
+
"""Build The query string using the search parameters"""
|
|
1822
|
+
return PostJsonSearch.build_query_string(self, product_type, **kwargs)
|