eodag 4.0.0a1__py3-none-any.whl → 4.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/__init__.py +6 -1
- eodag/api/collection.py +353 -0
- eodag/api/core.py +308 -296
- eodag/api/product/_product.py +15 -29
- eodag/api/product/drivers/__init__.py +2 -42
- eodag/api/product/drivers/base.py +0 -11
- eodag/api/product/metadata_mapping.py +34 -5
- eodag/api/search_result.py +144 -9
- eodag/cli.py +18 -15
- eodag/config.py +37 -3
- eodag/plugins/apis/ecmwf.py +16 -4
- eodag/plugins/apis/usgs.py +18 -7
- eodag/plugins/crunch/filter_latest_intersect.py +1 -0
- eodag/plugins/crunch/filter_overlap.py +3 -7
- eodag/plugins/search/__init__.py +3 -0
- eodag/plugins/search/base.py +6 -6
- eodag/plugins/search/build_search_result.py +157 -56
- eodag/plugins/search/cop_marine.py +48 -8
- eodag/plugins/search/csw.py +18 -8
- eodag/plugins/search/qssearch.py +331 -88
- eodag/plugins/search/static_stac_search.py +11 -12
- eodag/resources/collections.yml +610 -348
- eodag/resources/ext_collections.json +1 -1
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/providers.yml +330 -58
- eodag/resources/stac_provider.yml +4 -2
- eodag/resources/user_conf_template.yml +9 -0
- eodag/types/__init__.py +2 -0
- eodag/types/queryables.py +16 -0
- eodag/utils/__init__.py +47 -2
- eodag/utils/repr.py +2 -0
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/METADATA +4 -2
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/RECORD +37 -36
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/WHEEL +0 -0
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/entry_points.txt +0 -0
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/licenses/LICENSE +0 -0
- {eodag-4.0.0a1.dist-info → eodag-4.0.0a2.dist-info}/top_level.txt +0 -0
eodag/plugins/search/qssearch.py
CHANGED
|
@@ -34,6 +34,7 @@ from typing import (
|
|
|
34
34
|
)
|
|
35
35
|
from urllib.error import URLError
|
|
36
36
|
from urllib.parse import (
|
|
37
|
+
parse_qs,
|
|
37
38
|
parse_qsl,
|
|
38
39
|
quote,
|
|
39
40
|
quote_plus,
|
|
@@ -68,16 +69,19 @@ from eodag.api.product.metadata_mapping import (
|
|
|
68
69
|
properties_from_json,
|
|
69
70
|
properties_from_xml,
|
|
70
71
|
)
|
|
71
|
-
from eodag.api.search_result import RawSearchResult
|
|
72
|
+
from eodag.api.search_result import RawSearchResult, SearchResult
|
|
72
73
|
from eodag.plugins.search import PreparedSearch
|
|
73
74
|
from eodag.plugins.search.base import Search
|
|
74
75
|
from eodag.types import json_field_definition_to_python, model_fields_to_annotated
|
|
75
76
|
from eodag.types.queryables import Queryables
|
|
76
77
|
from eodag.types.search_args import SortByList
|
|
77
78
|
from eodag.utils import (
|
|
79
|
+
DEFAULT_ITEMS_PER_PAGE,
|
|
80
|
+
DEFAULT_PAGE,
|
|
78
81
|
DEFAULT_SEARCH_TIMEOUT,
|
|
79
82
|
GENERIC_COLLECTION,
|
|
80
83
|
HTTP_REQ_TIMEOUT,
|
|
84
|
+
KNOWN_NEXT_PAGE_TOKEN_KEYS,
|
|
81
85
|
REQ_RETRY_BACKOFF_FACTOR,
|
|
82
86
|
REQ_RETRY_STATUS_FORCELIST,
|
|
83
87
|
REQ_RETRY_TOTAL,
|
|
@@ -150,8 +154,7 @@ class QueryStringSearch(Search):
|
|
|
150
154
|
pagination requests. This is a simple Python format string which will be resolved using the following
|
|
151
155
|
keywords: ``url`` (the base url of the search endpoint), ``search`` (the query string corresponding
|
|
152
156
|
to the search request), ``items_per_page`` (the number of items to return per page),
|
|
153
|
-
``skip`` (the number of items to skip)
|
|
154
|
-
starting from 1) and ``page`` (which page to return).
|
|
157
|
+
``skip`` (the number of items to skip).
|
|
155
158
|
* :attr:`~eodag.config.PluginConfig.Pagination.total_items_nb_key_path` (``str``): An XPath or JsonPath
|
|
156
159
|
leading to the total number of results satisfying a request. This is used for providers which provides the
|
|
157
160
|
total results metadata along with the result of the query and don't have an endpoint for querying
|
|
@@ -182,8 +185,8 @@ class QueryStringSearch(Search):
|
|
|
182
185
|
* :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_id` (``str``): mapping for the
|
|
183
186
|
collection id
|
|
184
187
|
* :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_metadata`
|
|
185
|
-
(``dict[str, str]``): mapping for collection metadata (e.g. ``
|
|
186
|
-
from the provider result
|
|
188
|
+
(``dict[str, str]``): mapping for collection metadata (e.g. ``description``, ``license``) which can be
|
|
189
|
+
parsed from the provider result
|
|
187
190
|
* :attr:`~eodag.config.PluginConfig.DiscoverCollections.generic_collection_parsable_properties`
|
|
188
191
|
(``dict[str, str]``): mapping for collection properties which can be parsed from the result and are not
|
|
189
192
|
collection metadata
|
|
@@ -603,15 +606,19 @@ class QueryStringSearch(Search):
|
|
|
603
606
|
),
|
|
604
607
|
)
|
|
605
608
|
# collections_config extraction
|
|
606
|
-
|
|
607
|
-
generic_collection_id
|
|
608
|
-
] = properties_from_json(
|
|
609
|
+
collection_properties = properties_from_json(
|
|
609
610
|
collection_result,
|
|
610
611
|
self.config.discover_collections[
|
|
611
612
|
"generic_collection_parsable_metadata"
|
|
612
613
|
],
|
|
613
614
|
)
|
|
614
|
-
|
|
615
|
+
conf_update_dict["collections_config"][
|
|
616
|
+
generic_collection_id
|
|
617
|
+
] = {
|
|
618
|
+
k: v
|
|
619
|
+
for k, v in collection_properties.items()
|
|
620
|
+
if v != NOT_AVAILABLE
|
|
621
|
+
}
|
|
615
622
|
if (
|
|
616
623
|
"single_collection_parsable_metadata"
|
|
617
624
|
in self.config.discover_collections
|
|
@@ -619,29 +626,50 @@ class QueryStringSearch(Search):
|
|
|
619
626
|
collection_data = self._get_collection_metadata_from_single_collection_endpoint(
|
|
620
627
|
generic_collection_id
|
|
621
628
|
)
|
|
629
|
+
collection_data_id = collection_data.pop("id", None)
|
|
630
|
+
|
|
631
|
+
# remove collection if it must have be renamed but renaming failed
|
|
632
|
+
if (
|
|
633
|
+
collection_data_id
|
|
634
|
+
and collection_data_id == NOT_AVAILABLE
|
|
635
|
+
):
|
|
636
|
+
del conf_update_dict["collections_config"][
|
|
637
|
+
generic_collection_id
|
|
638
|
+
]
|
|
639
|
+
del conf_update_dict["providers_config"][
|
|
640
|
+
generic_collection_id
|
|
641
|
+
]
|
|
642
|
+
return
|
|
643
|
+
|
|
622
644
|
conf_update_dict["collections_config"][
|
|
623
645
|
generic_collection_id
|
|
624
|
-
]
|
|
646
|
+
] |= {
|
|
647
|
+
k: v
|
|
648
|
+
for k, v in collection_data.items()
|
|
649
|
+
if v != NOT_AVAILABLE
|
|
650
|
+
}
|
|
625
651
|
|
|
626
652
|
# update collection id if needed
|
|
627
|
-
if
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
generic_collection_id
|
|
653
|
+
if (
|
|
654
|
+
collection_data_id
|
|
655
|
+
and collection_data_id != generic_collection_id
|
|
656
|
+
):
|
|
657
|
+
logger.debug(
|
|
658
|
+
"Rename %s collection to %s",
|
|
659
|
+
generic_collection_id,
|
|
660
|
+
collection_data_id,
|
|
661
|
+
)
|
|
662
|
+
conf_update_dict["providers_config"][
|
|
663
|
+
collection_data_id
|
|
664
|
+
] = conf_update_dict["providers_config"].pop(
|
|
665
|
+
generic_collection_id
|
|
666
|
+
)
|
|
667
|
+
conf_update_dict["collections_config"][
|
|
668
|
+
collection_data_id
|
|
669
|
+
] = conf_update_dict["collections_config"].pop(
|
|
670
|
+
generic_collection_id
|
|
671
|
+
)
|
|
672
|
+
generic_collection_id = collection_data_id
|
|
645
673
|
|
|
646
674
|
# update keywords
|
|
647
675
|
keywords_fields = [
|
|
@@ -681,12 +709,11 @@ class QueryStringSearch(Search):
|
|
|
681
709
|
r"[\[\]'\"]", "", keywords_values_str
|
|
682
710
|
)
|
|
683
711
|
# sorted list of unique lowercase keywords
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
)
|
|
712
|
+
keywords_values = sorted(set(keywords_values_str.split(",")))
|
|
713
|
+
|
|
687
714
|
conf_update_dict["collections_config"][generic_collection_id][
|
|
688
715
|
"keywords"
|
|
689
|
-
] =
|
|
716
|
+
] = keywords_values
|
|
690
717
|
|
|
691
718
|
# runs concurrent requests and aggregate results in conf_update_dict
|
|
692
719
|
max_connections = self.config.discover_collections.get(
|
|
@@ -750,18 +777,22 @@ class QueryStringSearch(Search):
|
|
|
750
777
|
self,
|
|
751
778
|
prep: PreparedSearch = PreparedSearch(),
|
|
752
779
|
**kwargs: Any,
|
|
753
|
-
) ->
|
|
780
|
+
) -> SearchResult:
|
|
754
781
|
"""Perform a search on an OpenSearch-like interface
|
|
755
782
|
|
|
756
783
|
:param prep: Object collecting needed information for search.
|
|
757
784
|
"""
|
|
758
785
|
count = prep.count
|
|
786
|
+
raise_errors = getattr(prep, "raise_errors", False)
|
|
759
787
|
collection = cast(str, kwargs.get("collection", prep.collection))
|
|
760
788
|
if collection == GENERIC_COLLECTION:
|
|
761
789
|
logger.warning(
|
|
762
790
|
"GENERIC_COLLECTION is not a real collection and should only be used internally as a template"
|
|
763
791
|
)
|
|
764
|
-
|
|
792
|
+
result = SearchResult([])
|
|
793
|
+
if prep.count and not result.number_matched:
|
|
794
|
+
result.number_matched = 0
|
|
795
|
+
return result
|
|
765
796
|
|
|
766
797
|
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
767
798
|
prep.sort_by_qs, _ = (
|
|
@@ -818,13 +849,18 @@ class QueryStringSearch(Search):
|
|
|
818
849
|
provider_results = self.do_search(prep, **kwargs)
|
|
819
850
|
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
820
851
|
total_items = prep.total_items_nb
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
852
|
+
if not count and "number_matched" in kwargs:
|
|
853
|
+
total_items = kwargs["number_matched"]
|
|
854
|
+
|
|
855
|
+
eo_products = self.normalize_results(provider_results, **kwargs)
|
|
856
|
+
formated_result = SearchResult(
|
|
857
|
+
eo_products,
|
|
858
|
+
total_items,
|
|
859
|
+
search_params=provider_results.search_params,
|
|
860
|
+
next_page_token=getattr(provider_results, "next_page_token", None),
|
|
861
|
+
raise_errors=raise_errors,
|
|
862
|
+
)
|
|
863
|
+
return formated_result
|
|
828
864
|
|
|
829
865
|
def build_query_string(
|
|
830
866
|
self, collection: str, query_dict: dict[str, Any]
|
|
@@ -852,9 +888,12 @@ class QueryStringSearch(Search):
|
|
|
852
888
|
**kwargs: Any,
|
|
853
889
|
) -> tuple[list[str], Optional[int]]:
|
|
854
890
|
"""Build paginated urls"""
|
|
855
|
-
|
|
891
|
+
token = getattr(prep, "next_page_token", None)
|
|
856
892
|
items_per_page = prep.items_per_page
|
|
857
893
|
count = prep.count
|
|
894
|
+
next_page_token_key = str(
|
|
895
|
+
self.config.pagination.get("next_page_token_key", "page")
|
|
896
|
+
)
|
|
858
897
|
|
|
859
898
|
urls = []
|
|
860
899
|
total_results = 0 if count else None
|
|
@@ -881,8 +920,19 @@ class QueryStringSearch(Search):
|
|
|
881
920
|
search_endpoint = self.config.api_endpoint.rstrip("/").format(
|
|
882
921
|
_collection=provider_collection
|
|
883
922
|
)
|
|
884
|
-
|
|
885
|
-
|
|
923
|
+
# numeric page token
|
|
924
|
+
if (
|
|
925
|
+
next_page_token_key == "page" or next_page_token_key == "skip"
|
|
926
|
+
) and items_per_page is not None:
|
|
927
|
+
if token is None and next_page_token_key == "skip":
|
|
928
|
+
# first page & next_page_token_key == skip
|
|
929
|
+
token = 0
|
|
930
|
+
elif token is None:
|
|
931
|
+
# first page & next_page_token_key == page
|
|
932
|
+
token = self.config.pagination.get("start_page", DEFAULT_PAGE)
|
|
933
|
+
else:
|
|
934
|
+
# next pages
|
|
935
|
+
token = int(token)
|
|
886
936
|
if count:
|
|
887
937
|
count_endpoint = self.config.pagination.get(
|
|
888
938
|
"count_endpoint", ""
|
|
@@ -906,22 +956,23 @@ class QueryStringSearch(Search):
|
|
|
906
956
|
raise MisconfiguredError(
|
|
907
957
|
f"next_page_url_tpl is missing in {self.provider} search.pagination configuration"
|
|
908
958
|
)
|
|
909
|
-
|
|
959
|
+
next_page_url = self.config.pagination["next_page_url_tpl"].format(
|
|
910
960
|
url=search_endpoint,
|
|
911
961
|
search=qs_with_sort,
|
|
912
962
|
items_per_page=items_per_page,
|
|
913
|
-
|
|
914
|
-
skip=
|
|
915
|
-
skip_base_1=(page - 1) * items_per_page + 1,
|
|
963
|
+
next_page_token=token,
|
|
964
|
+
skip=token,
|
|
916
965
|
)
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
966
|
+
|
|
967
|
+
if token is not None:
|
|
968
|
+
prep.next_page_token = token
|
|
969
|
+
urls.append(next_page_url)
|
|
970
|
+
|
|
920
971
|
return list(dict.fromkeys(urls)), total_results
|
|
921
972
|
|
|
922
973
|
def do_search(
|
|
923
974
|
self, prep: PreparedSearch = PreparedSearch(items_per_page=None), **kwargs: Any
|
|
924
|
-
) ->
|
|
975
|
+
) -> RawSearchResult:
|
|
925
976
|
"""Perform the actual search request.
|
|
926
977
|
|
|
927
978
|
If there is a specified number of items per page, return the results as soon
|
|
@@ -962,6 +1013,7 @@ class QueryStringSearch(Search):
|
|
|
962
1013
|
if self.config.result_type == "xml":
|
|
963
1014
|
root_node = etree.fromstring(response.content)
|
|
964
1015
|
namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
|
|
1016
|
+
resp_as_json = {}
|
|
965
1017
|
results_xpath = root_node.xpath(
|
|
966
1018
|
self.config.results_entry or "//ns:entry", namespaces=namespaces
|
|
967
1019
|
)
|
|
@@ -1006,7 +1058,6 @@ class QueryStringSearch(Search):
|
|
|
1006
1058
|
path_parsed = next_page_url_key_path
|
|
1007
1059
|
found_paths = path_parsed.find(resp_as_json)
|
|
1008
1060
|
if found_paths and not isinstance(found_paths, int):
|
|
1009
|
-
self.next_page_url = found_paths[0].value
|
|
1010
1061
|
logger.debug(
|
|
1011
1062
|
"Next page URL collected and set for the next search",
|
|
1012
1063
|
)
|
|
@@ -1016,7 +1067,6 @@ class QueryStringSearch(Search):
|
|
|
1016
1067
|
path_parsed = next_page_query_obj_key_path
|
|
1017
1068
|
found_paths = path_parsed.find(resp_as_json)
|
|
1018
1069
|
if found_paths and not isinstance(found_paths, int):
|
|
1019
|
-
self.next_page_query_obj = found_paths[0].value
|
|
1020
1070
|
logger.debug(
|
|
1021
1071
|
"Next page Query-object collected and set for the next search",
|
|
1022
1072
|
)
|
|
@@ -1084,8 +1134,135 @@ class QueryStringSearch(Search):
|
|
|
1084
1134
|
):
|
|
1085
1135
|
del prep.total_items_nb
|
|
1086
1136
|
if items_per_page is not None and len(results) == items_per_page:
|
|
1087
|
-
|
|
1088
|
-
|
|
1137
|
+
|
|
1138
|
+
raw_search_results = self._build_raw_search_results(
|
|
1139
|
+
results, resp_as_json, kwargs, items_per_page, prep
|
|
1140
|
+
)
|
|
1141
|
+
return raw_search_results
|
|
1142
|
+
|
|
1143
|
+
raw_search_results = self._build_raw_search_results(
|
|
1144
|
+
results, resp_as_json, kwargs, items_per_page, prep
|
|
1145
|
+
)
|
|
1146
|
+
return raw_search_results
|
|
1147
|
+
|
|
1148
|
+
def _build_raw_search_results(
|
|
1149
|
+
self,
|
|
1150
|
+
results: list[dict[str, Any]],
|
|
1151
|
+
resp_as_json: dict[str, Any],
|
|
1152
|
+
search_kwargs: dict[str, Any],
|
|
1153
|
+
items_per_page: Optional[int],
|
|
1154
|
+
prep: PreparedSearch,
|
|
1155
|
+
):
|
|
1156
|
+
"""
|
|
1157
|
+
Build a `RawSearchResult` object from raw search results.
|
|
1158
|
+
|
|
1159
|
+
This method initializes a `RawSearchResult` instance with the provided results,
|
|
1160
|
+
sets the search parameters, and determines the token or identifier for the next page
|
|
1161
|
+
based on the pagination configuration.
|
|
1162
|
+
|
|
1163
|
+
:param results: Raw results returned by the search.
|
|
1164
|
+
:param resp_as_json: The search response parsed as JSON.
|
|
1165
|
+
:param search_kwargs: Search parameters used for the query.
|
|
1166
|
+
:param items_per_page: Number of items per page.
|
|
1167
|
+
:param prep: Request preparation object containing query parameters.
|
|
1168
|
+
:returns: An object containing the raw results, search parameters, and the next page token if available.
|
|
1169
|
+
"""
|
|
1170
|
+
# Create the RawSearchResult object and populate basic fields
|
|
1171
|
+
raw_search_results = RawSearchResult(results)
|
|
1172
|
+
raw_search_results.search_params = search_kwargs | {
|
|
1173
|
+
"items_per_page": items_per_page
|
|
1174
|
+
}
|
|
1175
|
+
raw_search_results.query_params = prep.query_params
|
|
1176
|
+
raw_search_results.collection_def_params = prep.collection_def_params
|
|
1177
|
+
raw_search_results.next_page_token_key = prep.next_page_token_key
|
|
1178
|
+
|
|
1179
|
+
# If no JSON response is available, return the result as is
|
|
1180
|
+
if resp_as_json is None:
|
|
1181
|
+
return raw_search_results
|
|
1182
|
+
|
|
1183
|
+
# Handle pagination
|
|
1184
|
+
if self.config.pagination.get("next_page_query_obj_key_path") is not None:
|
|
1185
|
+
# Use next_page_query_obj_key_path to find the next page token in the response
|
|
1186
|
+
jsonpath_expr = string_to_jsonpath(
|
|
1187
|
+
self.config.pagination["next_page_query_obj_key_path"]
|
|
1188
|
+
)
|
|
1189
|
+
if isinstance(jsonpath_expr, str):
|
|
1190
|
+
raise PluginImplementationError(
|
|
1191
|
+
"next_page_query_obj_key_path must be parsed to JSONPath on plugin init"
|
|
1192
|
+
)
|
|
1193
|
+
jsonpath_match = jsonpath_expr.find(resp_as_json)
|
|
1194
|
+
if jsonpath_match:
|
|
1195
|
+
next_page_query_obj = jsonpath_match[0].value
|
|
1196
|
+
next_page_token_key = raw_search_results.next_page_token_key
|
|
1197
|
+
if next_page_token_key and next_page_token_key in next_page_query_obj:
|
|
1198
|
+
raw_search_results.next_page_token = next_page_query_obj[
|
|
1199
|
+
next_page_token_key
|
|
1200
|
+
]
|
|
1201
|
+
else:
|
|
1202
|
+
for token_key in KNOWN_NEXT_PAGE_TOKEN_KEYS:
|
|
1203
|
+
if token_key in next_page_query_obj:
|
|
1204
|
+
raw_search_results.next_page_token = next_page_query_obj[
|
|
1205
|
+
token_key
|
|
1206
|
+
]
|
|
1207
|
+
raw_search_results.next_page_token_key = token_key
|
|
1208
|
+
logger.debug(
|
|
1209
|
+
"Using '%s' as next_page_token_key for the next search",
|
|
1210
|
+
token_key,
|
|
1211
|
+
)
|
|
1212
|
+
break
|
|
1213
|
+
else:
|
|
1214
|
+
raw_search_results.next_page_token = None
|
|
1215
|
+
elif self.config.pagination.get("next_page_url_key_path") is not None:
|
|
1216
|
+
jsonpath_expr = string_to_jsonpath(
|
|
1217
|
+
self.config.pagination["next_page_url_key_path"]
|
|
1218
|
+
)
|
|
1219
|
+
# Use next_page_url_key_path to find the next page token in the response
|
|
1220
|
+
if isinstance(jsonpath_expr, str):
|
|
1221
|
+
raise PluginImplementationError(
|
|
1222
|
+
"next_page_url_key_path must be parsed to JSONPath on plugin init"
|
|
1223
|
+
)
|
|
1224
|
+
href = jsonpath_expr.find(resp_as_json)
|
|
1225
|
+
if href:
|
|
1226
|
+
# Determine the key to extract the token from the URL or object
|
|
1227
|
+
href_value = href[0].value
|
|
1228
|
+
next_page_token_key = (
|
|
1229
|
+
unquote(self.config.pagination["parse_url_key"])
|
|
1230
|
+
if "parse_url_key" in self.config.pagination
|
|
1231
|
+
else raw_search_results.next_page_token_key
|
|
1232
|
+
)
|
|
1233
|
+
raw_search_results.next_page_token_key = next_page_token_key
|
|
1234
|
+
# Try to extract the token from the found value
|
|
1235
|
+
if next_page_token_key in href_value:
|
|
1236
|
+
raw_search_results.next_page_token = href_value[next_page_token_key]
|
|
1237
|
+
elif next_page_token_key in unquote(href_value):
|
|
1238
|
+
# If the token is in the URL query string
|
|
1239
|
+
query = urlparse(href_value).query
|
|
1240
|
+
page_param = parse_qs(query).get(next_page_token_key)
|
|
1241
|
+
if page_param:
|
|
1242
|
+
raw_search_results.next_page_token = page_param[0]
|
|
1243
|
+
else:
|
|
1244
|
+
# Use the whole value as the token
|
|
1245
|
+
raw_search_results.next_page_token = href_value
|
|
1246
|
+
else:
|
|
1247
|
+
# No token found: set to empty string
|
|
1248
|
+
raw_search_results.next_page_token = None
|
|
1249
|
+
else:
|
|
1250
|
+
# pagination using next_page_token_key
|
|
1251
|
+
next_page_token_key = raw_search_results.next_page_token_key
|
|
1252
|
+
next_page_token = prep.next_page_token
|
|
1253
|
+
# page number as next_page_token_key
|
|
1254
|
+
if next_page_token is not None and next_page_token_key == "page":
|
|
1255
|
+
raw_search_results.next_page_token = str(int(next_page_token) + 1)
|
|
1256
|
+
# skip as next_page_token_key
|
|
1257
|
+
elif next_page_token is not None and next_page_token_key == "skip":
|
|
1258
|
+
raw_search_results.next_page_token = str(
|
|
1259
|
+
int(next_page_token)
|
|
1260
|
+
+ int(prep.items_per_page or DEFAULT_ITEMS_PER_PAGE)
|
|
1261
|
+
)
|
|
1262
|
+
else:
|
|
1263
|
+
raw_search_results.next_page_token = None
|
|
1264
|
+
|
|
1265
|
+
return raw_search_results
|
|
1089
1266
|
|
|
1090
1267
|
def normalize_results(
|
|
1091
1268
|
self, results: RawSearchResult, **kwargs: Any
|
|
@@ -1363,7 +1540,7 @@ class ODataV4Search(QueryStringSearch):
|
|
|
1363
1540
|
|
|
1364
1541
|
def do_search(
|
|
1365
1542
|
self, prep: PreparedSearch = PreparedSearch(), **kwargs: Any
|
|
1366
|
-
) ->
|
|
1543
|
+
) -> RawSearchResult:
|
|
1367
1544
|
"""A two step search can be performed if the metadata are not given into the search result"""
|
|
1368
1545
|
|
|
1369
1546
|
if getattr(self.config, "per_product_metadata_query", False):
|
|
@@ -1394,7 +1571,17 @@ class ODataV4Search(QueryStringSearch):
|
|
|
1394
1571
|
{item["id"]: item["value"] for item in response.json()["value"]}
|
|
1395
1572
|
)
|
|
1396
1573
|
final_result.append(entity)
|
|
1397
|
-
|
|
1574
|
+
raw_search_results = RawSearchResult(final_result)
|
|
1575
|
+
raw_search_results.search_params = kwargs
|
|
1576
|
+
raw_search_results.query_params = (
|
|
1577
|
+
prep.query_params if hasattr(prep, "query_params") else {}
|
|
1578
|
+
)
|
|
1579
|
+
raw_search_results.collection_def_params = (
|
|
1580
|
+
prep.collection_def_params
|
|
1581
|
+
if hasattr(prep, "collection_def_params")
|
|
1582
|
+
else {}
|
|
1583
|
+
)
|
|
1584
|
+
return raw_search_results
|
|
1398
1585
|
else:
|
|
1399
1586
|
return super(ODataV4Search, self).do_search(prep, **kwargs)
|
|
1400
1587
|
|
|
@@ -1466,10 +1653,12 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1466
1653
|
self,
|
|
1467
1654
|
prep: PreparedSearch = PreparedSearch(),
|
|
1468
1655
|
**kwargs: Any,
|
|
1469
|
-
) ->
|
|
1656
|
+
) -> SearchResult:
|
|
1470
1657
|
"""Perform a search on an OpenSearch-like interface"""
|
|
1471
1658
|
collection = kwargs.get("collection", "")
|
|
1472
1659
|
count = prep.count
|
|
1660
|
+
raise_errors = getattr(prep, "raise_errors", False)
|
|
1661
|
+
number_matched = kwargs.pop("number_matched", None)
|
|
1473
1662
|
sort_by_arg: Optional[SortByList] = self.get_sort_by_arg(kwargs)
|
|
1474
1663
|
_, sort_by_qp = (
|
|
1475
1664
|
("", {}) if sort_by_arg is None else self.build_sort_by(sort_by_arg)
|
|
@@ -1553,9 +1742,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1553
1742
|
return super(PostJsonSearch, self)._request(*x, **y)
|
|
1554
1743
|
|
|
1555
1744
|
try:
|
|
1556
|
-
eo_products
|
|
1557
|
-
prep, **kwargs
|
|
1558
|
-
)
|
|
1745
|
+
eo_products = super(PostJsonSearch, self).query(prep, **kwargs)
|
|
1559
1746
|
except Exception:
|
|
1560
1747
|
raise
|
|
1561
1748
|
finally:
|
|
@@ -1564,7 +1751,7 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1564
1751
|
plugin_config_backup, self.config.yaml_loader
|
|
1565
1752
|
)
|
|
1566
1753
|
|
|
1567
|
-
return eo_products
|
|
1754
|
+
return eo_products
|
|
1568
1755
|
|
|
1569
1756
|
# If we were not able to build query params but have queryable search criteria,
|
|
1570
1757
|
# this means the provider does not support the search criteria given. If so,
|
|
@@ -1578,7 +1765,10 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1578
1765
|
for k in keywords.keys()
|
|
1579
1766
|
if isinstance(collection_metadata_mapping.get(k), list)
|
|
1580
1767
|
):
|
|
1581
|
-
|
|
1768
|
+
result = SearchResult([])
|
|
1769
|
+
if prep.count:
|
|
1770
|
+
result.number_matched = 0
|
|
1771
|
+
return result
|
|
1582
1772
|
prep.query_params = dict(qp, **sort_by_qp)
|
|
1583
1773
|
prep.search_urls, total_items = self.collect_search_urls(prep, **kwargs)
|
|
1584
1774
|
if not count and getattr(prep, "need_count", False):
|
|
@@ -1589,13 +1779,19 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1589
1779
|
provider_results = self.do_search(prep, **kwargs)
|
|
1590
1780
|
if count and total_items is None and hasattr(prep, "total_items_nb"):
|
|
1591
1781
|
total_items = prep.total_items_nb
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1782
|
+
if not count and "number_matched" in kwargs and number_matched:
|
|
1783
|
+
total_items = number_matched
|
|
1784
|
+
|
|
1785
|
+
eo_products_normalize = self.normalize_results(provider_results, **kwargs)
|
|
1786
|
+
formated_result = SearchResult(
|
|
1787
|
+
eo_products_normalize,
|
|
1788
|
+
total_items,
|
|
1789
|
+
search_params=provider_results.search_params,
|
|
1790
|
+
next_page_token=getattr(provider_results, "next_page_token", None),
|
|
1791
|
+
next_page_token_key=getattr(provider_results, "next_page_token_key", None),
|
|
1792
|
+
raise_errors=raise_errors,
|
|
1793
|
+
)
|
|
1794
|
+
return formated_result
|
|
1599
1795
|
|
|
1600
1796
|
def normalize_results(
|
|
1601
1797
|
self, results: RawSearchResult, **kwargs: Any
|
|
@@ -1636,11 +1832,14 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1636
1832
|
**kwargs: Any,
|
|
1637
1833
|
) -> tuple[list[str], Optional[int]]:
|
|
1638
1834
|
"""Adds pagination to query parameters, and auth to url"""
|
|
1639
|
-
|
|
1835
|
+
token = getattr(prep, "next_page_token", None)
|
|
1640
1836
|
items_per_page = prep.items_per_page
|
|
1641
1837
|
count = prep.count
|
|
1642
1838
|
urls: list[str] = []
|
|
1643
1839
|
total_results = 0 if count else None
|
|
1840
|
+
next_page_token_key = prep.next_page_token_key or self.config.pagination.get(
|
|
1841
|
+
"next_page_token_key"
|
|
1842
|
+
)
|
|
1644
1843
|
|
|
1645
1844
|
if "count_endpoint" not in self.config.pagination:
|
|
1646
1845
|
# if count_endpoint is not set, total_results should be extracted from search result
|
|
@@ -1662,8 +1861,21 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1662
1861
|
raise MisconfiguredError(
|
|
1663
1862
|
"Missing %s in %s configuration" % (",".join(e.args), provider)
|
|
1664
1863
|
)
|
|
1665
|
-
|
|
1666
|
-
|
|
1864
|
+
# numeric page token
|
|
1865
|
+
if (
|
|
1866
|
+
next_page_token_key == "page" or next_page_token_key == "skip"
|
|
1867
|
+
) and items_per_page is not None:
|
|
1868
|
+
if token is None and next_page_token_key == "skip":
|
|
1869
|
+
# first page & next_page_token_key == skip
|
|
1870
|
+
token = max(
|
|
1871
|
+
0, self.config.pagination.get("start_page", DEFAULT_PAGE) - 1
|
|
1872
|
+
)
|
|
1873
|
+
elif token is None:
|
|
1874
|
+
# first page & next_page_token_key == page
|
|
1875
|
+
token = self.config.pagination.get("start_page", DEFAULT_PAGE)
|
|
1876
|
+
else:
|
|
1877
|
+
# next pages
|
|
1878
|
+
token = int(token)
|
|
1667
1879
|
if count:
|
|
1668
1880
|
count_endpoint = self.config.pagination.get(
|
|
1669
1881
|
"count_endpoint", ""
|
|
@@ -1680,21 +1892,52 @@ class PostJsonSearch(QueryStringSearch):
|
|
|
1680
1892
|
if total_results is None
|
|
1681
1893
|
else total_results + (_total_results or 0)
|
|
1682
1894
|
)
|
|
1683
|
-
|
|
1684
|
-
|
|
1895
|
+
# parse next page url if needed
|
|
1896
|
+
if "next_page_url_tpl" in self.config.pagination:
|
|
1897
|
+
search_endpoint = self.config.pagination["next_page_url_tpl"].format(
|
|
1898
|
+
url=search_endpoint,
|
|
1899
|
+
items_per_page=items_per_page,
|
|
1900
|
+
next_page_token=token,
|
|
1901
|
+
)
|
|
1902
|
+
|
|
1903
|
+
# parse next page body / query-obj if needed
|
|
1904
|
+
if "next_page_query_obj" in self.config.pagination and isinstance(
|
|
1905
|
+
self.config.pagination["next_page_query_obj"], str
|
|
1906
|
+
):
|
|
1907
|
+
if next_page_token_key is None or token is None:
|
|
1908
|
+
next_page_token_kwargs = {
|
|
1909
|
+
"next_page_token": -1,
|
|
1910
|
+
"next_page_token_key": NOT_AVAILABLE,
|
|
1911
|
+
}
|
|
1912
|
+
else:
|
|
1913
|
+
next_page_token_kwargs = {
|
|
1914
|
+
"next_page_token": token,
|
|
1915
|
+
"next_page_token_key": next_page_token_key,
|
|
1916
|
+
}
|
|
1917
|
+
next_page_token_kwargs["next_page_token_key"] = (
|
|
1918
|
+
next_page_token_key or NOT_AVAILABLE
|
|
1919
|
+
)
|
|
1920
|
+
next_page_token_kwargs["next_page_token"] = (
|
|
1921
|
+
token if token is not None else -1
|
|
1922
|
+
)
|
|
1923
|
+
|
|
1924
|
+
# next_page_query_obj needs to be parsed
|
|
1925
|
+
next_page_query_obj_str = self.config.pagination[
|
|
1926
|
+
"next_page_query_obj"
|
|
1927
|
+
].format(items_per_page=items_per_page, **next_page_token_kwargs)
|
|
1928
|
+
next_page_query_obj = orjson.loads(next_page_query_obj_str)
|
|
1929
|
+
# remove NOT_AVAILABLE entries
|
|
1930
|
+
next_page_query_obj.pop(NOT_AVAILABLE, None)
|
|
1931
|
+
if (
|
|
1932
|
+
next_page_token_key
|
|
1933
|
+
and next_page_query_obj.get(next_page_token_key) == "-1"
|
|
1685
1934
|
):
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
skip=(page - 1) * items_per_page,
|
|
1693
|
-
skip_base_1=(page - 1) * items_per_page + 1,
|
|
1694
|
-
)
|
|
1695
|
-
update_nested_dict(
|
|
1696
|
-
prep.query_params, orjson.loads(next_page_query_obj)
|
|
1697
|
-
)
|
|
1935
|
+
next_page_query_obj.pop(next_page_token_key, None)
|
|
1936
|
+
# update prep query_params with pagination info
|
|
1937
|
+
update_nested_dict(prep.query_params, next_page_query_obj)
|
|
1938
|
+
|
|
1939
|
+
if token is not None:
|
|
1940
|
+
prep.next_page_token = token
|
|
1698
1941
|
|
|
1699
1942
|
urls.append(search_endpoint)
|
|
1700
1943
|
return list(dict.fromkeys(urls)), total_results
|