eodag 2.12.1__py3-none-any.whl → 3.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/core.py +434 -319
- eodag/api/product/__init__.py +5 -1
- eodag/api/product/_assets.py +7 -2
- eodag/api/product/_product.py +46 -68
- eodag/api/product/metadata_mapping.py +181 -66
- eodag/api/search_result.py +21 -1
- eodag/cli.py +20 -6
- eodag/config.py +95 -6
- eodag/plugins/apis/base.py +8 -165
- eodag/plugins/apis/ecmwf.py +36 -24
- eodag/plugins/apis/usgs.py +40 -24
- eodag/plugins/authentication/aws_auth.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +13 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +256 -46
- eodag/plugins/authentication/qsauth.py +3 -0
- eodag/plugins/authentication/sas_auth.py +8 -1
- eodag/plugins/authentication/token.py +92 -46
- eodag/plugins/authentication/token_exchange.py +120 -0
- eodag/plugins/download/aws.py +86 -91
- eodag/plugins/download/base.py +72 -40
- eodag/plugins/download/http.py +607 -264
- eodag/plugins/download/s3rest.py +28 -15
- eodag/plugins/manager.py +73 -57
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +225 -18
- eodag/plugins/search/build_search_result.py +389 -32
- eodag/plugins/search/cop_marine.py +378 -0
- eodag/plugins/search/creodias_s3.py +15 -14
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +44 -20
- eodag/plugins/search/qssearch.py +508 -203
- eodag/plugins/search/static_stac_search.py +99 -36
- eodag/resources/constraints/climate-dt.json +13 -0
- eodag/resources/constraints/extremes-dt.json +8 -0
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1897 -34
- eodag/resources/providers.yml +3539 -3277
- eodag/resources/stac.yml +48 -54
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +51 -3
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +27 -0
- eodag/rest/core.py +757 -0
- eodag/rest/server.py +397 -258
- eodag/rest/stac.py +438 -307
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +232 -43
- eodag/rest/types/{stac_queryables.py → queryables.py} +81 -43
- eodag/rest/types/stac_search.py +277 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +65 -0
- eodag/types/__init__.py +99 -9
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +31 -0
- eodag/types/search_args.py +58 -7
- eodag/types/whoosh.py +81 -0
- eodag/utils/__init__.py +72 -9
- eodag/utils/constraints.py +37 -37
- eodag/utils/exceptions.py +23 -17
- eodag/utils/requests.py +138 -0
- eodag/utils/rest.py +104 -0
- eodag/utils/stac_reader.py +100 -16
- {eodag-2.12.1.dist-info → eodag-3.0.0b1.dist-info}/METADATA +64 -44
- eodag-3.0.0b1.dist-info/RECORD +109 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0b1.dist-info}/WHEEL +1 -1
- {eodag-2.12.1.dist-info → eodag-3.0.0b1.dist-info}/entry_points.txt +6 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/utils.py +0 -1133
- eodag-2.12.1.dist-info/RECORD +0 -94
- {eodag-2.12.1.dist-info → eodag-3.0.0b1.dist-info}/LICENSE +0 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0b1.dist-info}/top_level.txt +0 -0
eodag/plugins/download/http.py
CHANGED
|
@@ -20,23 +20,37 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
import shutil
|
|
23
|
+
import tarfile
|
|
23
24
|
import zipfile
|
|
24
25
|
from datetime import datetime
|
|
25
26
|
from email.message import Message
|
|
26
27
|
from itertools import chain
|
|
27
|
-
from typing import
|
|
28
|
+
from typing import (
|
|
29
|
+
TYPE_CHECKING,
|
|
30
|
+
Any,
|
|
31
|
+
Dict,
|
|
32
|
+
Iterator,
|
|
33
|
+
List,
|
|
34
|
+
Optional,
|
|
35
|
+
Tuple,
|
|
36
|
+
TypedDict,
|
|
37
|
+
Union,
|
|
38
|
+
cast,
|
|
39
|
+
)
|
|
28
40
|
from urllib.parse import parse_qs, urlparse
|
|
29
41
|
|
|
30
42
|
import geojson
|
|
31
43
|
import requests
|
|
32
|
-
import requests_ftp
|
|
33
44
|
from lxml import etree
|
|
34
45
|
from requests import RequestException
|
|
46
|
+
from requests.auth import AuthBase
|
|
35
47
|
from stream_zip import ZIP_AUTO, stream_zip
|
|
36
48
|
|
|
37
49
|
from eodag.api.product.metadata_mapping import (
|
|
50
|
+
NOT_AVAILABLE,
|
|
38
51
|
OFFLINE_STATUS,
|
|
39
52
|
ONLINE_STATUS,
|
|
53
|
+
STAGING_STATUS,
|
|
40
54
|
mtd_cfg_as_conversion_and_querypath,
|
|
41
55
|
properties_from_json,
|
|
42
56
|
properties_from_xml,
|
|
@@ -49,10 +63,14 @@ from eodag.utils import (
|
|
|
49
63
|
HTTP_REQ_TIMEOUT,
|
|
50
64
|
USER_AGENT,
|
|
51
65
|
ProgressCallback,
|
|
66
|
+
StreamResponse,
|
|
52
67
|
flatten_top_directories,
|
|
68
|
+
guess_extension,
|
|
69
|
+
guess_file_type,
|
|
53
70
|
parse_header,
|
|
54
71
|
path_to_uri,
|
|
55
72
|
sanitize,
|
|
73
|
+
string_to_jsonpath,
|
|
56
74
|
uri_to_path,
|
|
57
75
|
)
|
|
58
76
|
from eodag.utils.exceptions import (
|
|
@@ -66,10 +84,11 @@ from eodag.utils.exceptions import (
|
|
|
66
84
|
if TYPE_CHECKING:
|
|
67
85
|
from requests import Response
|
|
68
86
|
|
|
69
|
-
from eodag.api.product import EOProduct
|
|
87
|
+
from eodag.api.product import Asset, EOProduct # type: ignore
|
|
70
88
|
from eodag.api.search_result import SearchResult
|
|
71
89
|
from eodag.config import PluginConfig
|
|
72
|
-
from eodag.
|
|
90
|
+
from eodag.types.download_args import DownloadConf
|
|
91
|
+
from eodag.utils import DownloadedCallback, Unpack
|
|
73
92
|
|
|
74
93
|
logger = logging.getLogger("eodag.download.http")
|
|
75
94
|
|
|
@@ -81,7 +100,7 @@ class HTTPDownload(Download):
|
|
|
81
100
|
:type provider: str
|
|
82
101
|
:param config: Download plugin configuration:
|
|
83
102
|
|
|
84
|
-
* ``config.base_uri`` (str) - default endpoint url
|
|
103
|
+
* ``config.base_uri`` (str) - (optional) default endpoint url
|
|
85
104
|
* ``config.extract`` (bool) - (optional) extract downloaded archive or not
|
|
86
105
|
* ``config.auth_error_code`` (int) - (optional) authentication error code
|
|
87
106
|
* ``config.dl_url_params`` (dict) - (optional) attitional parameters to send in the request
|
|
@@ -92,9 +111,8 @@ class HTTPDownload(Download):
|
|
|
92
111
|
* ``config.order_method`` (str) - (optional) HTTP request method, GET (default) or POST
|
|
93
112
|
* ``config.order_headers`` (dict) - (optional) order request headers
|
|
94
113
|
* ``config.order_on_response`` (dict) - (optional) edit or add new product properties
|
|
95
|
-
* ``config.
|
|
96
|
-
|
|
97
|
-
* ``config.order_status_error`` (dict) - (optional) key/value identifying an error status
|
|
114
|
+
* ``config.order_status`` (:class:`~eodag.config.PluginConfig.OrderStatus`) - (optional) Order status handling
|
|
115
|
+
|
|
98
116
|
|
|
99
117
|
:type config: :class:`~eodag.config.PluginConfig`
|
|
100
118
|
|
|
@@ -102,19 +120,13 @@ class HTTPDownload(Download):
|
|
|
102
120
|
|
|
103
121
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
104
122
|
super(HTTPDownload, self).__init__(provider, config)
|
|
105
|
-
if not hasattr(self.config, "base_uri"):
|
|
106
|
-
raise MisconfiguredError(
|
|
107
|
-
"{} plugin require a base_uri configuration key".format(
|
|
108
|
-
type(self).__name__
|
|
109
|
-
)
|
|
110
|
-
)
|
|
111
123
|
|
|
112
124
|
def orderDownload(
|
|
113
125
|
self,
|
|
114
126
|
product: EOProduct,
|
|
115
|
-
auth: Optional[
|
|
116
|
-
**kwargs:
|
|
117
|
-
) ->
|
|
127
|
+
auth: Optional[AuthBase] = None,
|
|
128
|
+
**kwargs: Unpack[DownloadConf],
|
|
129
|
+
) -> Optional[Dict[str, Any]]:
|
|
118
130
|
"""Send product order request.
|
|
119
131
|
|
|
120
132
|
It will be executed once before the download retry loop, if the product is OFFLINE
|
|
@@ -136,85 +148,120 @@ class HTTPDownload(Download):
|
|
|
136
148
|
|
|
137
149
|
:param product: The EO product to order
|
|
138
150
|
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
139
|
-
:param auth: (optional)
|
|
140
|
-
:type auth:
|
|
151
|
+
:param auth: (optional) authenticated object
|
|
152
|
+
:type auth: Optional[AuthBase]
|
|
141
153
|
:param kwargs: download additional kwargs
|
|
142
154
|
:type kwargs: Union[str, bool, dict]
|
|
155
|
+
:returns: the returned json status response
|
|
156
|
+
:rtype: dict
|
|
143
157
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
158
|
+
product.properties["storageStatus"] = STAGING_STATUS
|
|
159
|
+
|
|
160
|
+
order_method = getattr(self.config, "order_method", "GET").upper()
|
|
161
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
162
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
163
|
+
OrderKwargs = TypedDict(
|
|
164
|
+
"OrderKwargs", {"json": Dict[str, Union[Any, List[str]]]}, total=False
|
|
165
|
+
)
|
|
166
|
+
order_kwargs: OrderKwargs = {}
|
|
167
|
+
if order_method == "POST":
|
|
146
168
|
# separate url & parameters
|
|
147
169
|
parts = urlparse(str(product.properties["orderLink"]))
|
|
148
170
|
query_dict = parse_qs(parts.query)
|
|
149
171
|
if not query_dict and parts.query:
|
|
150
172
|
query_dict = geojson.loads(parts.query)
|
|
151
173
|
order_url = parts._replace(query=None).geturl()
|
|
152
|
-
|
|
174
|
+
if query_dict:
|
|
175
|
+
order_kwargs["json"] = query_dict
|
|
153
176
|
else:
|
|
154
177
|
order_url = product.properties["orderLink"]
|
|
155
178
|
order_kwargs = {}
|
|
156
179
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
logger.debug(
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
180
|
+
headers = {**getattr(self.config, "order_headers", {}), **USER_AGENT}
|
|
181
|
+
try:
|
|
182
|
+
with requests.request(
|
|
183
|
+
method=order_method,
|
|
184
|
+
url=order_url,
|
|
185
|
+
auth=auth,
|
|
186
|
+
timeout=timeout,
|
|
187
|
+
headers=headers,
|
|
188
|
+
verify=ssl_verify,
|
|
189
|
+
**order_kwargs,
|
|
190
|
+
) as response:
|
|
191
|
+
logger.debug(f"{order_method} {order_url} {headers} {order_kwargs}")
|
|
192
|
+
try:
|
|
193
|
+
response.raise_for_status()
|
|
194
|
+
ordered_message = response.text
|
|
195
|
+
logger.debug(ordered_message)
|
|
196
|
+
product.properties["storageStatus"] = STAGING_STATUS
|
|
197
|
+
except RequestException as e:
|
|
198
|
+
if hasattr(e, "response") and (
|
|
199
|
+
content := getattr(e.response, "content", None)
|
|
200
|
+
):
|
|
201
|
+
error_message = f"{content.decode('utf-8')} - {e}"
|
|
202
|
+
else:
|
|
203
|
+
error_message = str(e)
|
|
204
|
+
logger.warning(
|
|
205
|
+
"%s could not be ordered, request returned %s",
|
|
206
|
+
product.properties["title"],
|
|
207
|
+
error_message,
|
|
208
|
+
)
|
|
209
|
+
self._check_auth_exception(e)
|
|
210
|
+
return self.order_response_process(response, product)
|
|
211
|
+
except requests.exceptions.Timeout as exc:
|
|
212
|
+
raise TimeOutError(exc, timeout=timeout) from exc
|
|
213
|
+
|
|
214
|
+
def order_response_process(
|
|
215
|
+
self, response: Response, product: EOProduct
|
|
216
|
+
) -> Optional[Dict[str, Any]]:
|
|
217
|
+
"""Process order response
|
|
218
|
+
|
|
219
|
+
:param response: The order response
|
|
220
|
+
:type response: :class:`~requests.Response`
|
|
221
|
+
:param product: The orderd EO product
|
|
222
|
+
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
223
|
+
:returns: the returned json status response
|
|
224
|
+
:rtype: dict
|
|
225
|
+
"""
|
|
226
|
+
on_response_mm = getattr(self.config, "order_on_response", {}).get(
|
|
184
227
|
"metadata_mapping", {}
|
|
185
228
|
)
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
229
|
+
if not on_response_mm:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
logger.debug("Parsing order response to update product metada-mapping")
|
|
233
|
+
on_response_mm_jsonpath = mtd_cfg_as_conversion_and_querypath(
|
|
234
|
+
on_response_mm,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
json_response = response.json()
|
|
238
|
+
|
|
239
|
+
properties_update = properties_from_json(
|
|
240
|
+
{"json": json_response, "headers": {**response.headers}},
|
|
241
|
+
on_response_mm_jsonpath,
|
|
242
|
+
)
|
|
243
|
+
product.properties.update(
|
|
244
|
+
{k: v for k, v in properties_update.items() if v != NOT_AVAILABLE}
|
|
245
|
+
)
|
|
246
|
+
if "downloadLink" in product.properties:
|
|
247
|
+
product.remote_location = product.location = product.properties[
|
|
248
|
+
"downloadLink"
|
|
249
|
+
]
|
|
250
|
+
logger.debug(f"Product location updated to {product.location}")
|
|
251
|
+
|
|
252
|
+
return json_response
|
|
201
253
|
|
|
202
254
|
def orderDownloadStatus(
|
|
203
255
|
self,
|
|
204
256
|
product: EOProduct,
|
|
205
|
-
auth: Optional[
|
|
206
|
-
**kwargs: Union[str, bool, Dict[str, Any]],
|
|
257
|
+
auth: Optional[AuthBase] = None,
|
|
207
258
|
) -> None:
|
|
208
259
|
"""Send product order status request.
|
|
209
260
|
|
|
210
261
|
It will be executed before each download retry.
|
|
211
262
|
Product order status request can be configured using the following download plugin parameters:
|
|
212
263
|
|
|
213
|
-
- **
|
|
214
|
-
|
|
215
|
-
- **order_status_percent**: (optional) progress percentage key in obtained response
|
|
216
|
-
|
|
217
|
-
- **order_status_error**: (optional) key/value identifying an error status
|
|
264
|
+
- **order_status**: :class:`~eodag.config.PluginConfig.OrderStatus`
|
|
218
265
|
|
|
219
266
|
Product properties used for order status:
|
|
220
267
|
|
|
@@ -222,162 +269,289 @@ class HTTPDownload(Download):
|
|
|
222
269
|
|
|
223
270
|
:param product: The ordered EO product
|
|
224
271
|
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
225
|
-
:param auth: (optional)
|
|
226
|
-
:type auth:
|
|
272
|
+
:param auth: (optional) authenticated object
|
|
273
|
+
:type auth: Optional[AuthBase]
|
|
227
274
|
:param kwargs: download additional kwargs
|
|
228
275
|
:type kwargs: Union[str, bool, dict]
|
|
229
276
|
"""
|
|
230
|
-
|
|
231
|
-
|
|
277
|
+
|
|
278
|
+
status_config = getattr(self.config, "order_status", {})
|
|
279
|
+
success_code: Optional[int] = status_config.get("success", {}).get("http_code")
|
|
280
|
+
|
|
281
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
282
|
+
|
|
283
|
+
def _request(
|
|
284
|
+
url: str,
|
|
285
|
+
method: str = "GET",
|
|
286
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
287
|
+
json: Optional[Any] = None,
|
|
288
|
+
timeout: int = HTTP_REQ_TIMEOUT,
|
|
289
|
+
) -> Response:
|
|
290
|
+
"""Send request and handle allow redirects"""
|
|
291
|
+
|
|
292
|
+
logger.debug(f"{method} {url} {headers} {json}")
|
|
293
|
+
try:
|
|
294
|
+
response = requests.request(
|
|
295
|
+
method=method,
|
|
296
|
+
url=url,
|
|
297
|
+
auth=auth,
|
|
298
|
+
timeout=timeout,
|
|
299
|
+
headers={**(headers or {}), **USER_AGENT},
|
|
300
|
+
allow_redirects=False, # Redirection is manually handled
|
|
301
|
+
json=json,
|
|
302
|
+
)
|
|
303
|
+
logger.debug(
|
|
304
|
+
f"Order download status request responded with {response.status_code}"
|
|
305
|
+
)
|
|
306
|
+
response.raise_for_status() # Raise an exception if status code indicates an error
|
|
307
|
+
|
|
308
|
+
# Handle redirection (if needed)
|
|
309
|
+
if (
|
|
310
|
+
300 <= response.status_code < 400
|
|
311
|
+
and response.status_code != success_code
|
|
312
|
+
):
|
|
313
|
+
# cf: https://www.rfc-editor.org/rfc/rfc9110.html#name-303-see-other
|
|
314
|
+
if response.status_code == 303:
|
|
315
|
+
method = "GET"
|
|
316
|
+
if new_url := response.headers.get("Location"):
|
|
317
|
+
return _request(new_url, method, headers, json, timeout)
|
|
318
|
+
return response
|
|
319
|
+
except requests.exceptions.Timeout as exc:
|
|
320
|
+
raise TimeOutError(exc, timeout=timeout) from exc
|
|
321
|
+
|
|
322
|
+
status_request: Dict[str, Any] = status_config.get("request", {})
|
|
323
|
+
status_request_method = str(status_request.get("method", "GET")).upper()
|
|
324
|
+
|
|
325
|
+
if status_request_method == "POST":
|
|
232
326
|
# separate url & parameters
|
|
233
327
|
parts = urlparse(str(product.properties["orderStatusLink"]))
|
|
328
|
+
status_url = parts._replace(query=None).geturl()
|
|
234
329
|
query_dict = parse_qs(parts.query)
|
|
235
330
|
if not query_dict and parts.query:
|
|
236
331
|
query_dict = geojson.loads(parts.query)
|
|
237
|
-
|
|
238
|
-
status_kwargs = {"json": query_dict} if query_dict else {}
|
|
332
|
+
json_data = query_dict if query_dict else None
|
|
239
333
|
else:
|
|
240
334
|
status_url = product.properties["orderStatusLink"]
|
|
241
|
-
|
|
335
|
+
json_data = None
|
|
336
|
+
|
|
337
|
+
# check header for success before full status request
|
|
338
|
+
skip_parsing_status_response = False
|
|
339
|
+
status_dict: Dict[str, Any] = {}
|
|
340
|
+
config_on_success: Dict[str, Any] = status_config.get("on_success", {})
|
|
341
|
+
on_success_mm = config_on_success.get("metadata_mapping", {})
|
|
342
|
+
|
|
343
|
+
status_response_content_needed = (
|
|
344
|
+
False
|
|
345
|
+
if not any([v.startswith("$.json.") for v in on_success_mm.values()])
|
|
346
|
+
else True
|
|
347
|
+
)
|
|
242
348
|
|
|
243
|
-
|
|
244
|
-
method=status_method,
|
|
245
|
-
url=status_url,
|
|
246
|
-
auth=auth,
|
|
247
|
-
timeout=HTTP_REQ_TIMEOUT,
|
|
248
|
-
headers=dict(
|
|
249
|
-
getattr(self.config, "order_status_headers", {}), **USER_AGENT
|
|
250
|
-
),
|
|
251
|
-
**status_kwargs,
|
|
252
|
-
) as response:
|
|
349
|
+
if success_code:
|
|
253
350
|
try:
|
|
254
|
-
response
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
351
|
+
response = _request(
|
|
352
|
+
status_url,
|
|
353
|
+
"HEAD",
|
|
354
|
+
status_request.get("headers"),
|
|
355
|
+
json_data,
|
|
356
|
+
timeout,
|
|
260
357
|
)
|
|
261
|
-
if order_status_percent_key and order_status_percent_key in status_dict:
|
|
262
|
-
order_status_value = str(status_dict[order_status_percent_key])
|
|
263
|
-
if order_status_value.isdigit():
|
|
264
|
-
order_status_value += "%"
|
|
265
|
-
logger.info(
|
|
266
|
-
f"{product.properties['title']} order status: {order_status_value}"
|
|
267
|
-
)
|
|
268
|
-
# display error if any
|
|
269
|
-
order_status_error_dict = getattr(self.config, "order_status_error", {})
|
|
270
358
|
if (
|
|
271
|
-
|
|
272
|
-
and
|
|
359
|
+
response.status_code == success_code
|
|
360
|
+
and not status_response_content_needed
|
|
273
361
|
):
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
362
|
+
# success and no need to get status response content
|
|
363
|
+
skip_parsing_status_response = True
|
|
364
|
+
except RequestException as e:
|
|
365
|
+
logger.debug(e)
|
|
366
|
+
|
|
367
|
+
if not skip_parsing_status_response:
|
|
368
|
+
# status request
|
|
369
|
+
try:
|
|
370
|
+
response = _request(
|
|
371
|
+
status_url,
|
|
372
|
+
status_request_method,
|
|
373
|
+
status_request.get("headers"),
|
|
374
|
+
json_data,
|
|
375
|
+
timeout,
|
|
281
376
|
)
|
|
282
377
|
if (
|
|
283
|
-
|
|
284
|
-
and
|
|
285
|
-
and "message" in status_dict
|
|
286
|
-
and status_dict["message"] == order_status_success_dict["message"]
|
|
287
|
-
):
|
|
288
|
-
product.properties["storageStatus"] = ONLINE_STATUS
|
|
289
|
-
if (
|
|
290
|
-
order_status_success_dict
|
|
291
|
-
and order_status_success_dict.items() <= status_dict.items()
|
|
292
|
-
and getattr(self.config, "order_status_on_success", {}).get(
|
|
293
|
-
"need_search"
|
|
294
|
-
)
|
|
378
|
+
response.status_code == success_code
|
|
379
|
+
and not status_response_content_needed
|
|
295
380
|
):
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
381
|
+
# success and no need to get status response content
|
|
382
|
+
skip_parsing_status_response = True
|
|
383
|
+
except RequestException as e:
|
|
384
|
+
raise DownloadError(
|
|
385
|
+
"%s order status could not be checked, request returned %s"
|
|
386
|
+
% (
|
|
387
|
+
product.properties["title"],
|
|
388
|
+
e,
|
|
304
389
|
)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
assert isinstance(
|
|
328
|
-
results, list
|
|
329
|
-
), "results must be in a list"
|
|
330
|
-
# single result
|
|
331
|
-
result = results[0]
|
|
332
|
-
# parse result
|
|
333
|
-
new_search_metadata_mapping = (
|
|
334
|
-
self.config.order_status_on_success["metadata_mapping"]
|
|
335
|
-
)
|
|
336
|
-
order_metadata_mapping_jsonpath = {}
|
|
337
|
-
order_metadata_mapping_jsonpath = (
|
|
338
|
-
mtd_cfg_as_conversion_and_querypath(
|
|
339
|
-
new_search_metadata_mapping,
|
|
340
|
-
order_metadata_mapping_jsonpath,
|
|
341
|
-
)
|
|
342
|
-
)
|
|
343
|
-
properties_update = properties_from_xml(
|
|
344
|
-
result,
|
|
345
|
-
order_metadata_mapping_jsonpath,
|
|
346
|
-
)
|
|
347
|
-
except Exception as e:
|
|
348
|
-
logger.debug(e)
|
|
349
|
-
raise DownloadError(
|
|
350
|
-
f"Could not parse result after order success for {product.properties['searchLink']} "
|
|
351
|
-
f"request. Please search and download {product} again"
|
|
352
|
-
)
|
|
353
|
-
# update product
|
|
354
|
-
product.properties.update(properties_update)
|
|
355
|
-
product.location = product.remote_location = product.properties[
|
|
356
|
-
"downloadLink"
|
|
357
|
-
]
|
|
358
|
-
else:
|
|
359
|
-
logger.warning(
|
|
360
|
-
"JSON response parsing is not implemented yet for new searches "
|
|
361
|
-
f"after order success. Please search and download {product} again"
|
|
362
|
-
)
|
|
390
|
+
) from e
|
|
391
|
+
|
|
392
|
+
if not skip_parsing_status_response:
|
|
393
|
+
# status request
|
|
394
|
+
json_response = response.json()
|
|
395
|
+
if not isinstance(json_response, dict):
|
|
396
|
+
raise RequestException("response content is not a dict")
|
|
397
|
+
status_dict = json_response
|
|
398
|
+
|
|
399
|
+
status_mm = status_config.get("metadata_mapping", {})
|
|
400
|
+
status_mm_jsonpath = (
|
|
401
|
+
mtd_cfg_as_conversion_and_querypath(
|
|
402
|
+
status_mm,
|
|
403
|
+
)
|
|
404
|
+
if status_mm
|
|
405
|
+
else {}
|
|
406
|
+
)
|
|
407
|
+
logger.debug("Parsing order status response")
|
|
408
|
+
status_dict = properties_from_json(
|
|
409
|
+
{"json": response.json(), "headers": {**response.headers}},
|
|
410
|
+
status_mm_jsonpath,
|
|
411
|
+
)
|
|
363
412
|
|
|
364
|
-
|
|
365
|
-
|
|
413
|
+
# display progress percentage
|
|
414
|
+
if "percent" in status_dict:
|
|
415
|
+
status_percent = str(status_dict["percent"])
|
|
416
|
+
if status_percent.isdigit():
|
|
417
|
+
status_percent += "%"
|
|
418
|
+
logger.info(
|
|
419
|
+
f"{product.properties['title']} order status: {status_percent}"
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
status_message = status_dict.get("message")
|
|
423
|
+
product.properties["orderStatus"] = status_dict.get("status")
|
|
424
|
+
|
|
425
|
+
# handle status error
|
|
426
|
+
errors: Dict[str, Any] = status_config.get("error", {})
|
|
427
|
+
if errors and errors.items() <= status_dict.items():
|
|
428
|
+
raise DownloadError(
|
|
429
|
+
f"Provider {product.provider} returned: {status_dict.get('error_message', status_message)}"
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
success_status: Dict[str, Any] = status_config.get("success", {}).get("status")
|
|
433
|
+
# if not success
|
|
434
|
+
if (success_status and success_status != status_dict.get("status")) or (
|
|
435
|
+
success_code and success_code != response.status_code
|
|
436
|
+
):
|
|
437
|
+
error = NotAvailableError(status_message)
|
|
438
|
+
raise error
|
|
439
|
+
|
|
440
|
+
product.properties["storageStatus"] = ONLINE_STATUS
|
|
441
|
+
|
|
442
|
+
if not config_on_success:
|
|
443
|
+
# Nothing left to do
|
|
444
|
+
return None
|
|
445
|
+
|
|
446
|
+
# need search on success ?
|
|
447
|
+
if config_on_success.get("need_search"):
|
|
448
|
+
logger.debug(f"Search for new location: {product.properties['searchLink']}")
|
|
449
|
+
try:
|
|
450
|
+
response = _request(product.properties["searchLink"], timeout=timeout)
|
|
366
451
|
except RequestException as e:
|
|
367
452
|
logger.warning(
|
|
368
453
|
"%s order status could not be checked, request returned %s",
|
|
369
454
|
product.properties["title"],
|
|
370
455
|
e,
|
|
371
456
|
)
|
|
457
|
+
return None
|
|
458
|
+
|
|
459
|
+
result_type = config_on_success.get("result_type", "json")
|
|
460
|
+
result_entry = config_on_success.get("results_entry")
|
|
461
|
+
|
|
462
|
+
on_success_mm_querypath = (
|
|
463
|
+
# append product.properties as input for on success response parsing
|
|
464
|
+
mtd_cfg_as_conversion_and_querypath(
|
|
465
|
+
dict(
|
|
466
|
+
{k: str(v) for k, v in product.properties.items()}, **on_success_mm
|
|
467
|
+
),
|
|
468
|
+
)
|
|
469
|
+
if on_success_mm
|
|
470
|
+
else {}
|
|
471
|
+
)
|
|
472
|
+
try:
|
|
473
|
+
if result_type == "xml":
|
|
474
|
+
if not result_entry:
|
|
475
|
+
raise MisconfiguredError(
|
|
476
|
+
'"result_entry" is required with "result_type" "xml"'
|
|
477
|
+
'in "order_status.on_success"'
|
|
478
|
+
)
|
|
479
|
+
root_node = etree.fromstring(response.content)
|
|
480
|
+
namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
|
|
481
|
+
results = [
|
|
482
|
+
etree.tostring(entry)
|
|
483
|
+
for entry in root_node.xpath(
|
|
484
|
+
result_entry,
|
|
485
|
+
namespaces=namespaces,
|
|
486
|
+
)
|
|
487
|
+
]
|
|
488
|
+
if len(results) != 1:
|
|
489
|
+
raise DownloadError(
|
|
490
|
+
"Could not get a single result after order success for "
|
|
491
|
+
f"{product.properties['searchLink']} request. "
|
|
492
|
+
f"Please search and download {product} again"
|
|
493
|
+
)
|
|
494
|
+
assert isinstance(results, list), "results must be in a list"
|
|
495
|
+
# single result
|
|
496
|
+
result = results[0]
|
|
497
|
+
if on_success_mm_querypath:
|
|
498
|
+
properties_update = properties_from_xml(
|
|
499
|
+
result,
|
|
500
|
+
on_success_mm_querypath,
|
|
501
|
+
)
|
|
502
|
+
else:
|
|
503
|
+
properties_update = {}
|
|
504
|
+
else:
|
|
505
|
+
json_response = (
|
|
506
|
+
response.json()
|
|
507
|
+
if "application/json" in response.headers.get("Content-Type", "")
|
|
508
|
+
else {}
|
|
509
|
+
)
|
|
510
|
+
if result_entry:
|
|
511
|
+
entry_jsonpath = string_to_jsonpath(result_entry, force=True)
|
|
512
|
+
json_response = entry_jsonpath.find(json_response)
|
|
513
|
+
raise NotImplementedError(
|
|
514
|
+
'result_entry in config.on_success is not yet supported for result_type "json"'
|
|
515
|
+
)
|
|
516
|
+
if on_success_mm_querypath:
|
|
517
|
+
logger.debug(
|
|
518
|
+
"Parsing on-success metadata-mapping using order status response"
|
|
519
|
+
)
|
|
520
|
+
properties_update = properties_from_json(
|
|
521
|
+
{"json": json_response, "headers": {**response.headers}},
|
|
522
|
+
on_success_mm_querypath,
|
|
523
|
+
)
|
|
524
|
+
# only keep properties to update (remove product.properties added for parsing)
|
|
525
|
+
properties_update = {
|
|
526
|
+
k: v for k, v in properties_update.items() if k in on_success_mm
|
|
527
|
+
}
|
|
528
|
+
else:
|
|
529
|
+
properties_update = {}
|
|
530
|
+
except Exception as e:
|
|
531
|
+
if isinstance(e, DownloadError):
|
|
532
|
+
raise
|
|
533
|
+
logger.debug(e)
|
|
534
|
+
raise DownloadError(
|
|
535
|
+
f"Could not parse result after order success. Please search and download {product} again"
|
|
536
|
+
) from e
|
|
537
|
+
|
|
538
|
+
# update product
|
|
539
|
+
product.properties.update(properties_update)
|
|
540
|
+
if "downloadLink" in properties_update:
|
|
541
|
+
product.location = product.remote_location = product.properties[
|
|
542
|
+
"downloadLink"
|
|
543
|
+
]
|
|
544
|
+
else:
|
|
545
|
+
self.order_response_process(response, product)
|
|
372
546
|
|
|
373
547
|
def download(
|
|
374
548
|
self,
|
|
375
549
|
product: EOProduct,
|
|
376
|
-
auth: Optional[
|
|
550
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
377
551
|
progress_callback: Optional[ProgressCallback] = None,
|
|
378
552
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
379
553
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
380
|
-
**kwargs:
|
|
554
|
+
**kwargs: Unpack[DownloadConf],
|
|
381
555
|
) -> Optional[str]:
|
|
382
556
|
"""Download a product using HTTP protocol.
|
|
383
557
|
|
|
@@ -385,14 +559,26 @@ class HTTPDownload(Download):
|
|
|
385
559
|
the user is warned, it is renamed to remove the zip extension and
|
|
386
560
|
no further treatment is done (no extraction)
|
|
387
561
|
"""
|
|
562
|
+
if auth is not None and not isinstance(auth, AuthBase):
|
|
563
|
+
raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
|
|
564
|
+
|
|
388
565
|
if progress_callback is None:
|
|
389
566
|
logger.info(
|
|
390
567
|
"Progress bar unavailable, please call product.download() instead of plugin.download()"
|
|
391
568
|
)
|
|
392
569
|
progress_callback = ProgressCallback(disable=True)
|
|
393
570
|
|
|
571
|
+
outputs_extension = getattr(self.config, "products", {}).get(
|
|
572
|
+
product.product_type, {}
|
|
573
|
+
).get("outputs_extension", None) or getattr(
|
|
574
|
+
self.config, "outputs_extension", ".zip"
|
|
575
|
+
)
|
|
576
|
+
kwargs["outputs_extension"] = kwargs.get("outputs_extension", outputs_extension)
|
|
577
|
+
|
|
394
578
|
fs_path, record_filename = self._prepare_download(
|
|
395
|
-
product,
|
|
579
|
+
product,
|
|
580
|
+
progress_callback=progress_callback,
|
|
581
|
+
**kwargs,
|
|
396
582
|
)
|
|
397
583
|
if not fs_path or not record_filename:
|
|
398
584
|
if fs_path:
|
|
@@ -400,7 +586,10 @@ class HTTPDownload(Download):
|
|
|
400
586
|
return fs_path
|
|
401
587
|
|
|
402
588
|
# download assets if exist instead of remote_location
|
|
403
|
-
if len(product.assets) > 0 and
|
|
589
|
+
if len(product.assets) > 0 and (
|
|
590
|
+
not getattr(self.config, "ignore_assets", False)
|
|
591
|
+
or kwargs.get("asset", None) is not None
|
|
592
|
+
):
|
|
404
593
|
try:
|
|
405
594
|
fs_path = self._download_assets(
|
|
406
595
|
product,
|
|
@@ -424,18 +613,23 @@ class HTTPDownload(Download):
|
|
|
424
613
|
@self._download_retry(product, wait, timeout)
|
|
425
614
|
def download_request(
|
|
426
615
|
product: EOProduct,
|
|
427
|
-
auth:
|
|
616
|
+
auth: AuthBase,
|
|
428
617
|
progress_callback: ProgressCallback,
|
|
429
618
|
wait: int,
|
|
430
619
|
timeout: int,
|
|
431
|
-
**kwargs:
|
|
620
|
+
**kwargs: Unpack[DownloadConf],
|
|
432
621
|
) -> None:
|
|
433
622
|
chunks = self._stream_download(product, auth, progress_callback, **kwargs)
|
|
623
|
+
is_empty = True
|
|
434
624
|
|
|
435
625
|
with open(fs_path, "wb") as fhandle:
|
|
436
626
|
for chunk in chunks:
|
|
627
|
+
is_empty = False
|
|
437
628
|
fhandle.write(chunk)
|
|
438
629
|
|
|
630
|
+
if is_empty:
|
|
631
|
+
raise DownloadError(f"product {product.properties['id']} is empty")
|
|
632
|
+
|
|
439
633
|
download_request(product, auth, progress_callback, wait, timeout, **kwargs)
|
|
440
634
|
|
|
441
635
|
with open(record_filename, "w") as fh:
|
|
@@ -443,19 +637,57 @@ class HTTPDownload(Download):
|
|
|
443
637
|
logger.debug("Download recorded in %s", record_filename)
|
|
444
638
|
|
|
445
639
|
# Check that the downloaded file is really a zip file
|
|
446
|
-
outputs_extension = kwargs.get("outputs_extension", None) or getattr(
|
|
447
|
-
self.config, "outputs_extension", ".zip"
|
|
448
|
-
)
|
|
449
640
|
if not zipfile.is_zipfile(fs_path) and outputs_extension == ".zip":
|
|
450
641
|
logger.warning(
|
|
451
642
|
"Downloaded product is not a Zip File. Please check its file type before using it"
|
|
452
643
|
)
|
|
453
|
-
new_fs_path =
|
|
644
|
+
new_fs_path = os.path.join(
|
|
645
|
+
os.path.dirname(fs_path),
|
|
646
|
+
sanitize(product.properties["title"]),
|
|
647
|
+
)
|
|
648
|
+
if os.path.isfile(fs_path) and not tarfile.is_tarfile(fs_path):
|
|
649
|
+
if not os.path.isdir(new_fs_path):
|
|
650
|
+
os.makedirs(new_fs_path)
|
|
651
|
+
shutil.move(fs_path, new_fs_path)
|
|
652
|
+
file_path = os.path.join(new_fs_path, os.path.basename(fs_path))
|
|
653
|
+
new_file_path = file_path[: file_path.index(".zip")]
|
|
654
|
+
shutil.move(file_path, new_file_path)
|
|
655
|
+
# in the case where the outputs extension has not been set
|
|
656
|
+
# to ".tar" in the product type nor provider configuration
|
|
657
|
+
elif tarfile.is_tarfile(fs_path):
|
|
658
|
+
if not new_fs_path.endswith(".tar"):
|
|
659
|
+
new_fs_path += ".tar"
|
|
660
|
+
shutil.move(fs_path, new_fs_path)
|
|
661
|
+
kwargs["outputs_extension"] = ".tar"
|
|
662
|
+
product_path = self._finalize(
|
|
663
|
+
new_fs_path,
|
|
664
|
+
progress_callback=progress_callback,
|
|
665
|
+
**kwargs,
|
|
666
|
+
)
|
|
667
|
+
product.location = path_to_uri(product_path)
|
|
668
|
+
return product_path
|
|
669
|
+
else:
|
|
670
|
+
# not a file (dir with zip extension)
|
|
671
|
+
shutil.move(fs_path, new_fs_path)
|
|
672
|
+
product.location = path_to_uri(new_fs_path)
|
|
673
|
+
return new_fs_path
|
|
674
|
+
|
|
675
|
+
if os.path.isfile(fs_path) and not (
|
|
676
|
+
zipfile.is_zipfile(fs_path) or tarfile.is_tarfile(fs_path)
|
|
677
|
+
):
|
|
678
|
+
new_fs_path = os.path.join(
|
|
679
|
+
os.path.dirname(fs_path),
|
|
680
|
+
sanitize(product.properties["title"]),
|
|
681
|
+
)
|
|
682
|
+
if not os.path.isdir(new_fs_path):
|
|
683
|
+
os.makedirs(new_fs_path)
|
|
454
684
|
shutil.move(fs_path, new_fs_path)
|
|
455
685
|
product.location = path_to_uri(new_fs_path)
|
|
456
686
|
return new_fs_path
|
|
457
687
|
product_path = self._finalize(
|
|
458
|
-
fs_path,
|
|
688
|
+
fs_path,
|
|
689
|
+
progress_callback=progress_callback,
|
|
690
|
+
**kwargs,
|
|
459
691
|
)
|
|
460
692
|
product.location = path_to_uri(product_path)
|
|
461
693
|
return product_path
|
|
@@ -477,23 +709,51 @@ class HTTPDownload(Download):
|
|
|
477
709
|
)
|
|
478
710
|
return stream_size
|
|
479
711
|
|
|
712
|
+
def _check_product_filename(self, product: EOProduct) -> str:
|
|
713
|
+
filename = None
|
|
714
|
+
asset_content_disposition = self.stream.headers.get("content-disposition", None)
|
|
715
|
+
if asset_content_disposition:
|
|
716
|
+
filename = cast(
|
|
717
|
+
Optional[str],
|
|
718
|
+
parse_header(asset_content_disposition).get_param("filename", None),
|
|
719
|
+
)
|
|
720
|
+
if not filename:
|
|
721
|
+
# default filename extracted from path
|
|
722
|
+
filename = str(os.path.basename(self.stream.url))
|
|
723
|
+
filename_extension = os.path.splitext(filename)[1]
|
|
724
|
+
if not filename_extension:
|
|
725
|
+
if content_type := getattr(product, "headers", {}).get("Content-Type"):
|
|
726
|
+
ext = guess_extension(content_type)
|
|
727
|
+
if ext:
|
|
728
|
+
filename += ext
|
|
729
|
+
else:
|
|
730
|
+
outputs_extension: Optional[str] = (
|
|
731
|
+
getattr(self.config, "products", {})
|
|
732
|
+
.get(product.product_type, {})
|
|
733
|
+
.get("outputs_extension")
|
|
734
|
+
)
|
|
735
|
+
if outputs_extension:
|
|
736
|
+
filename += outputs_extension
|
|
737
|
+
|
|
738
|
+
return filename
|
|
739
|
+
|
|
480
740
|
def _stream_download_dict(
|
|
481
741
|
self,
|
|
482
742
|
product: EOProduct,
|
|
483
|
-
auth: Optional[
|
|
743
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
484
744
|
progress_callback: Optional[ProgressCallback] = None,
|
|
485
745
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
486
746
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
487
|
-
**kwargs:
|
|
488
|
-
) ->
|
|
747
|
+
**kwargs: Unpack[DownloadConf],
|
|
748
|
+
) -> StreamResponse:
|
|
489
749
|
r"""
|
|
490
750
|
Returns dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments.
|
|
491
751
|
It contains a generator to streamed download chunks and the response headers.
|
|
492
752
|
|
|
493
753
|
:param product: The EO product to download
|
|
494
754
|
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
495
|
-
:param auth: (optional)
|
|
496
|
-
:type auth:
|
|
755
|
+
:param auth: (optional) authenticated object
|
|
756
|
+
:type auth: Optional[Union[AuthBase, Dict[str, str]]]
|
|
497
757
|
:param progress_callback: (optional) A progress callback
|
|
498
758
|
:type progress_callback: :class:`~eodag.utils.ProgressCallback`
|
|
499
759
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
@@ -509,8 +769,14 @@ class HTTPDownload(Download):
|
|
|
509
769
|
:returns: Dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
|
|
510
770
|
:rtype: dict
|
|
511
771
|
"""
|
|
772
|
+
if auth is not None and not isinstance(auth, AuthBase):
|
|
773
|
+
raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
|
|
774
|
+
|
|
512
775
|
# download assets if exist instead of remote_location
|
|
513
|
-
if len(product.assets) > 0 and
|
|
776
|
+
if len(product.assets) > 0 and (
|
|
777
|
+
not getattr(self.config, "ignore_assets", False)
|
|
778
|
+
or kwargs.get("asset") is not None
|
|
779
|
+
):
|
|
514
780
|
try:
|
|
515
781
|
assets_values = product.assets.get_values(kwargs.get("asset", None))
|
|
516
782
|
chunks_tuples = self._stream_download_assets(
|
|
@@ -534,7 +800,7 @@ class HTTPDownload(Download):
|
|
|
534
800
|
"type"
|
|
535
801
|
]
|
|
536
802
|
|
|
537
|
-
return
|
|
803
|
+
return StreamResponse(
|
|
538
804
|
content=chain(iter([first_chunks_tuple]), chunks_tuples),
|
|
539
805
|
headers=assets_values[0].headers,
|
|
540
806
|
)
|
|
@@ -545,7 +811,7 @@ class HTTPDownload(Download):
|
|
|
545
811
|
if "title" in product.properties
|
|
546
812
|
else sanitize(product.properties.get("id", "download"))
|
|
547
813
|
)
|
|
548
|
-
return
|
|
814
|
+
return StreamResponse(
|
|
549
815
|
content=stream_zip(chunks_tuples),
|
|
550
816
|
media_type="application/zip",
|
|
551
817
|
headers={
|
|
@@ -560,36 +826,55 @@ class HTTPDownload(Download):
|
|
|
560
826
|
|
|
561
827
|
chunks = self._stream_download(product, auth, progress_callback, **kwargs)
|
|
562
828
|
# start reading chunks to set product.headers
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
829
|
+
try:
|
|
830
|
+
first_chunk = next(chunks)
|
|
831
|
+
except StopIteration:
|
|
832
|
+
# product is empty file
|
|
833
|
+
logger.error("product %s is empty", product.properties["id"])
|
|
834
|
+
raise NotAvailableError(f"product {product.properties['id']} is empty")
|
|
835
|
+
|
|
836
|
+
return StreamResponse(
|
|
566
837
|
content=chain(iter([first_chunk]), chunks),
|
|
567
838
|
headers=product.headers,
|
|
568
839
|
)
|
|
569
840
|
|
|
570
|
-
def
|
|
571
|
-
self, e: RequestException, product: EOProduct, ordered_message: str
|
|
572
|
-
) -> None:
|
|
841
|
+
def _check_auth_exception(self, e: Optional[RequestException]) -> None:
|
|
573
842
|
# check if error is identified as auth_error in provider conf
|
|
574
843
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
575
844
|
if not isinstance(auth_errors, list):
|
|
576
845
|
auth_errors = [auth_errors]
|
|
577
|
-
|
|
846
|
+
response_text = (
|
|
847
|
+
e.response.text.strip() if e is not None and e.response is not None else ""
|
|
848
|
+
)
|
|
849
|
+
if (
|
|
850
|
+
e is not None
|
|
851
|
+
and e.response is not None
|
|
852
|
+
and e.response.status_code in auth_errors
|
|
853
|
+
):
|
|
578
854
|
raise AuthenticationError(
|
|
579
855
|
"HTTP Error %s returned, %s\nPlease check your credentials for %s"
|
|
580
856
|
% (
|
|
581
857
|
e.response.status_code,
|
|
582
|
-
|
|
858
|
+
response_text,
|
|
583
859
|
self.provider,
|
|
584
860
|
)
|
|
585
861
|
)
|
|
862
|
+
|
|
863
|
+
def _process_exception(
|
|
864
|
+
self, e: Optional[RequestException], product: EOProduct, ordered_message: str
|
|
865
|
+
) -> None:
|
|
866
|
+
self._check_auth_exception(e)
|
|
867
|
+
response_text = (
|
|
868
|
+
e.response.text.strip() if e is not None and e.response is not None else ""
|
|
869
|
+
)
|
|
586
870
|
# product not available
|
|
587
|
-
|
|
871
|
+
if product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
|
|
588
872
|
msg = (
|
|
589
873
|
ordered_message
|
|
590
|
-
if ordered_message and not
|
|
591
|
-
else
|
|
874
|
+
if ordered_message and not response_text
|
|
875
|
+
else response_text
|
|
592
876
|
)
|
|
877
|
+
|
|
593
878
|
raise NotAvailableError(
|
|
594
879
|
"%s(initially %s) requested, returned: %s"
|
|
595
880
|
% (
|
|
@@ -601,18 +886,21 @@ class HTTPDownload(Download):
|
|
|
601
886
|
else:
|
|
602
887
|
import traceback as tb
|
|
603
888
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
889
|
+
if e:
|
|
890
|
+
logger.error(
|
|
891
|
+
"Error while getting resource :\n%s\n%s",
|
|
892
|
+
tb.format_exc(),
|
|
893
|
+
response_text,
|
|
894
|
+
)
|
|
895
|
+
else:
|
|
896
|
+
logger.error("Error while getting resource :\n%s", tb.format_exc())
|
|
609
897
|
|
|
610
898
|
def _stream_download(
|
|
611
899
|
self,
|
|
612
900
|
product: EOProduct,
|
|
613
|
-
auth: Optional[
|
|
901
|
+
auth: Optional[AuthBase] = None,
|
|
614
902
|
progress_callback: Optional[ProgressCallback] = None,
|
|
615
|
-
**kwargs:
|
|
903
|
+
**kwargs: Unpack[DownloadConf],
|
|
616
904
|
) -> Iterator[Any]:
|
|
617
905
|
"""
|
|
618
906
|
fetches a zip file containing the assets of a given product as a stream
|
|
@@ -620,7 +908,7 @@ class HTTPDownload(Download):
|
|
|
620
908
|
:param product: product for which the assets should be downloaded
|
|
621
909
|
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
622
910
|
:param auth: The configuration of a plugin of type Authentication
|
|
623
|
-
:type auth:
|
|
911
|
+
:type auth: Optional[Union[AuthBase, Dict[str, str]]]
|
|
624
912
|
:param progress_callback: A method or a callable object
|
|
625
913
|
which takes a current size and a maximum
|
|
626
914
|
size as inputs and handle progress bar
|
|
@@ -637,12 +925,15 @@ class HTTPDownload(Download):
|
|
|
637
925
|
ordered_message = ""
|
|
638
926
|
if (
|
|
639
927
|
"orderLink" in product.properties
|
|
640
|
-
and "storageStatus"
|
|
641
|
-
and product.properties
|
|
928
|
+
and product.properties.get("storageStatus") == OFFLINE_STATUS
|
|
929
|
+
and not product.properties.get("orderStatus")
|
|
642
930
|
):
|
|
643
931
|
self.orderDownload(product=product, auth=auth)
|
|
644
932
|
|
|
645
|
-
if
|
|
933
|
+
if (
|
|
934
|
+
product.properties.get("orderStatusLink", None)
|
|
935
|
+
and product.properties.get("storageStatus") != ONLINE_STATUS
|
|
936
|
+
):
|
|
646
937
|
self.orderDownloadStatus(product=product, auth=auth)
|
|
647
938
|
|
|
648
939
|
params = kwargs.pop("dl_url_params", None) or getattr(
|
|
@@ -666,8 +957,12 @@ class HTTPDownload(Download):
|
|
|
666
957
|
req_url = url
|
|
667
958
|
req_kwargs = {}
|
|
668
959
|
|
|
669
|
-
|
|
670
|
-
|
|
960
|
+
if req_url.startswith(NOT_AVAILABLE):
|
|
961
|
+
raise NotAvailableError("Download link is not available")
|
|
962
|
+
|
|
963
|
+
if getattr(self.config, "no_auth_download", False):
|
|
964
|
+
auth = None
|
|
965
|
+
|
|
671
966
|
s = requests.Session()
|
|
672
967
|
with s.request(
|
|
673
968
|
req_method,
|
|
@@ -681,7 +976,6 @@ class HTTPDownload(Download):
|
|
|
681
976
|
) as self.stream:
|
|
682
977
|
try:
|
|
683
978
|
self.stream.raise_for_status()
|
|
684
|
-
|
|
685
979
|
except requests.exceptions.Timeout as exc:
|
|
686
980
|
raise TimeOutError(
|
|
687
981
|
exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT
|
|
@@ -689,8 +983,30 @@ class HTTPDownload(Download):
|
|
|
689
983
|
except RequestException as e:
|
|
690
984
|
self._process_exception(e, product, ordered_message)
|
|
691
985
|
else:
|
|
692
|
-
|
|
986
|
+
# check if product was ordered
|
|
987
|
+
|
|
988
|
+
if getattr(
|
|
989
|
+
self.stream, "status_code", None
|
|
990
|
+
) is not None and self.stream.status_code == getattr(
|
|
991
|
+
self.config, "order_status", {}
|
|
992
|
+
).get(
|
|
993
|
+
"ordered", {}
|
|
994
|
+
).get(
|
|
995
|
+
"http_code"
|
|
996
|
+
):
|
|
997
|
+
product.properties["storageStatus"] = "ORDERED"
|
|
998
|
+
self._process_exception(None, product, ordered_message)
|
|
999
|
+
stream_size = self._check_stream_size(product) or None
|
|
1000
|
+
|
|
693
1001
|
product.headers = self.stream.headers
|
|
1002
|
+
filename = self._check_product_filename(product) or None
|
|
1003
|
+
product.headers[
|
|
1004
|
+
"content-disposition"
|
|
1005
|
+
] = f"attachment; filename={filename}"
|
|
1006
|
+
content_type = product.headers.get("Content-Type")
|
|
1007
|
+
if filename and not content_type:
|
|
1008
|
+
product.headers["Content-Type"] = guess_file_type(filename)
|
|
1009
|
+
|
|
694
1010
|
progress_callback.reset(total=stream_size)
|
|
695
1011
|
for chunk in self.stream.iter_content(chunk_size=64 * 1024):
|
|
696
1012
|
if chunk:
|
|
@@ -700,9 +1016,10 @@ class HTTPDownload(Download):
|
|
|
700
1016
|
def _stream_download_assets(
|
|
701
1017
|
self,
|
|
702
1018
|
product: EOProduct,
|
|
703
|
-
auth: Optional[
|
|
1019
|
+
auth: Optional[AuthBase] = None,
|
|
704
1020
|
progress_callback: Optional[ProgressCallback] = None,
|
|
705
|
-
|
|
1021
|
+
assets_values: List[Asset] = [],
|
|
1022
|
+
**kwargs: Unpack[DownloadConf],
|
|
706
1023
|
) -> Iterator[Tuple[str, datetime, int, Any, Iterator[Any]]]:
|
|
707
1024
|
if progress_callback is None:
|
|
708
1025
|
logger.info("Progress bar unavailable, please call product.download()")
|
|
@@ -715,14 +1032,12 @@ class HTTPDownload(Download):
|
|
|
715
1032
|
if not assets_urls:
|
|
716
1033
|
raise NotAvailableError("No assets available for %s" % product)
|
|
717
1034
|
|
|
718
|
-
assets_values = kwargs.get("assets_values", [])
|
|
719
|
-
|
|
720
1035
|
# get extra parameters to pass to the query
|
|
721
1036
|
params = kwargs.pop("dl_url_params", None) or getattr(
|
|
722
1037
|
self.config, "dl_url_params", {}
|
|
723
1038
|
)
|
|
724
1039
|
|
|
725
|
-
total_size = self._get_asset_sizes(assets_values, auth, params)
|
|
1040
|
+
total_size = self._get_asset_sizes(assets_values, auth, params) or None
|
|
726
1041
|
|
|
727
1042
|
progress_callback.reset(total=total_size)
|
|
728
1043
|
|
|
@@ -753,12 +1068,14 @@ class HTTPDownload(Download):
|
|
|
753
1068
|
product.product_type, {}
|
|
754
1069
|
)
|
|
755
1070
|
flatten_top_dirs = product_conf.get(
|
|
756
|
-
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
|
|
1071
|
+
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
757
1072
|
)
|
|
1073
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
758
1074
|
|
|
759
1075
|
# loop for assets download
|
|
760
1076
|
for asset in assets_values:
|
|
761
|
-
|
|
1077
|
+
|
|
1078
|
+
if not asset["href"] or asset["href"].startswith("file:"):
|
|
762
1079
|
logger.info(
|
|
763
1080
|
f"Local asset detected. Download skipped for {asset['href']}"
|
|
764
1081
|
)
|
|
@@ -771,6 +1088,7 @@ class HTTPDownload(Download):
|
|
|
771
1088
|
params=params,
|
|
772
1089
|
headers=USER_AGENT,
|
|
773
1090
|
timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
|
|
1091
|
+
verify=ssl_verify,
|
|
774
1092
|
) as stream:
|
|
775
1093
|
try:
|
|
776
1094
|
stream.raise_for_status()
|
|
@@ -795,15 +1113,20 @@ class HTTPDownload(Download):
|
|
|
795
1113
|
"content-disposition", None
|
|
796
1114
|
)
|
|
797
1115
|
if asset_content_disposition:
|
|
798
|
-
asset.filename =
|
|
799
|
-
|
|
800
|
-
|
|
1116
|
+
asset.filename = cast(
|
|
1117
|
+
Optional[str],
|
|
1118
|
+
parse_header(asset_content_disposition).get_param(
|
|
1119
|
+
"filename", None
|
|
1120
|
+
),
|
|
1121
|
+
)
|
|
801
1122
|
|
|
802
1123
|
if not getattr(asset, "filename", None):
|
|
803
1124
|
# default filename extracted from path
|
|
804
1125
|
asset.filename = os.path.basename(asset.rel_path)
|
|
805
1126
|
|
|
806
|
-
asset.rel_path = os.path.join(
|
|
1127
|
+
asset.rel_path = os.path.join(
|
|
1128
|
+
asset_rel_dir, cast(str, asset.filename)
|
|
1129
|
+
)
|
|
807
1130
|
|
|
808
1131
|
if len(assets_values) == 1:
|
|
809
1132
|
# apply headers to asset
|
|
@@ -824,9 +1147,9 @@ class HTTPDownload(Download):
|
|
|
824
1147
|
product: EOProduct,
|
|
825
1148
|
fs_dir_path: str,
|
|
826
1149
|
record_filename: str,
|
|
827
|
-
auth: Optional[
|
|
1150
|
+
auth: Optional[AuthBase] = None,
|
|
828
1151
|
progress_callback: Optional[ProgressCallback] = None,
|
|
829
|
-
**kwargs:
|
|
1152
|
+
**kwargs: Unpack[DownloadConf],
|
|
830
1153
|
) -> str:
|
|
831
1154
|
"""Download product assets if they exist"""
|
|
832
1155
|
if progress_callback is None:
|
|
@@ -857,7 +1180,7 @@ class HTTPDownload(Download):
|
|
|
857
1180
|
product.product_type, {}
|
|
858
1181
|
)
|
|
859
1182
|
flatten_top_dirs = product_conf.get(
|
|
860
|
-
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
|
|
1183
|
+
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
861
1184
|
)
|
|
862
1185
|
|
|
863
1186
|
# count local assets
|
|
@@ -877,23 +1200,35 @@ class HTTPDownload(Download):
|
|
|
877
1200
|
asset_path = chunk_tuple[0]
|
|
878
1201
|
asset_chunks = chunk_tuple[4]
|
|
879
1202
|
asset_abs_path = os.path.join(fs_dir_path, asset_path)
|
|
1203
|
+
asset_abs_path_temp = asset_abs_path + "~"
|
|
880
1204
|
# create asset subdir if not exist
|
|
881
1205
|
asset_abs_path_dir = os.path.dirname(asset_abs_path)
|
|
882
1206
|
if not os.path.isdir(asset_abs_path_dir):
|
|
883
1207
|
os.makedirs(asset_abs_path_dir)
|
|
1208
|
+
# remove temporary file
|
|
1209
|
+
if os.path.isfile(asset_abs_path_temp):
|
|
1210
|
+
os.remove(asset_abs_path_temp)
|
|
884
1211
|
if not os.path.isfile(asset_abs_path):
|
|
885
|
-
|
|
1212
|
+
logger.debug("Downloading to temporary file '%s'", asset_abs_path_temp)
|
|
1213
|
+
with open(asset_abs_path_temp, "wb") as fhandle:
|
|
886
1214
|
for chunk in asset_chunks:
|
|
887
1215
|
if chunk:
|
|
888
1216
|
fhandle.write(chunk)
|
|
889
1217
|
progress_callback(len(chunk))
|
|
890
|
-
|
|
1218
|
+
logger.debug(
|
|
1219
|
+
"Download completed. Renaming temporary file '%s' to '%s'",
|
|
1220
|
+
os.path.basename(asset_abs_path_temp),
|
|
1221
|
+
os.path.basename(asset_abs_path),
|
|
1222
|
+
)
|
|
1223
|
+
os.rename(asset_abs_path_temp, asset_abs_path)
|
|
891
1224
|
# only one local asset
|
|
892
1225
|
if local_assets_count == len(assets_urls) and local_assets_count == 1:
|
|
893
1226
|
# remove empty {fs_dir_path}
|
|
894
1227
|
shutil.rmtree(fs_dir_path)
|
|
895
1228
|
# and return assets_urls[0] path
|
|
896
1229
|
fs_dir_path = uri_to_path(assets_urls[0])
|
|
1230
|
+
# do not flatten dir
|
|
1231
|
+
flatten_top_dirs = False
|
|
897
1232
|
# several local assets
|
|
898
1233
|
elif local_assets_count == len(assets_urls) and local_assets_count > 0:
|
|
899
1234
|
common_path = os.path.commonpath([uri_to_path(uri) for uri in assets_urls])
|
|
@@ -901,6 +1236,8 @@ class HTTPDownload(Download):
|
|
|
901
1236
|
shutil.rmtree(fs_dir_path)
|
|
902
1237
|
# and return assets_urls common path
|
|
903
1238
|
fs_dir_path = common_path
|
|
1239
|
+
# do not flatten dir
|
|
1240
|
+
flatten_top_dirs = False
|
|
904
1241
|
# no assets downloaded but some should have been
|
|
905
1242
|
elif len(os.listdir(fs_dir_path)) == 0:
|
|
906
1243
|
raise NotAvailableError("No assets could be downloaded")
|
|
@@ -918,13 +1255,13 @@ class HTTPDownload(Download):
|
|
|
918
1255
|
return fs_dir_path
|
|
919
1256
|
|
|
920
1257
|
def _handle_asset_exception(
|
|
921
|
-
self, e: RequestException, asset:
|
|
1258
|
+
self, e: RequestException, asset: Asset, raise_errors: bool = False
|
|
922
1259
|
) -> None:
|
|
923
1260
|
# check if error is identified as auth_error in provider conf
|
|
924
1261
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
925
1262
|
if not isinstance(auth_errors, list):
|
|
926
1263
|
auth_errors = [auth_errors]
|
|
927
|
-
if e.response and e.response.status_code in auth_errors:
|
|
1264
|
+
if e.response is not None and e.response.status_code in auth_errors:
|
|
928
1265
|
raise AuthenticationError(
|
|
929
1266
|
"HTTP Error %s returned, %s\nPlease check your credentials for %s"
|
|
930
1267
|
% (
|
|
@@ -941,22 +1278,24 @@ class HTTPDownload(Download):
|
|
|
941
1278
|
|
|
942
1279
|
def _get_asset_sizes(
|
|
943
1280
|
self,
|
|
944
|
-
assets_values: List[
|
|
945
|
-
auth: Optional[
|
|
1281
|
+
assets_values: List[Asset],
|
|
1282
|
+
auth: Optional[AuthBase],
|
|
946
1283
|
params: Optional[Dict[str, str]],
|
|
947
1284
|
zipped: bool = False,
|
|
948
1285
|
) -> int:
|
|
949
1286
|
total_size = 0
|
|
950
1287
|
|
|
1288
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1289
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
951
1290
|
# loop for assets size & filename
|
|
952
1291
|
for asset in assets_values:
|
|
953
|
-
if not asset["href"].startswith("file:"):
|
|
1292
|
+
if asset["href"] and not asset["href"].startswith("file:"):
|
|
954
1293
|
# HEAD request for size & filename
|
|
955
1294
|
asset_headers = requests.head(
|
|
956
1295
|
asset["href"],
|
|
957
1296
|
auth=auth,
|
|
958
1297
|
headers=USER_AGENT,
|
|
959
|
-
timeout=
|
|
1298
|
+
timeout=timeout,
|
|
960
1299
|
).headers
|
|
961
1300
|
|
|
962
1301
|
if not getattr(asset, "size", 0):
|
|
@@ -971,12 +1310,14 @@ class HTTPDownload(Download):
|
|
|
971
1310
|
)
|
|
972
1311
|
if not getattr(asset, "size", 0):
|
|
973
1312
|
# size from HEAD header / content-disposition / size
|
|
974
|
-
|
|
1313
|
+
size_str = str(header_content_disposition.get_param("size", 0))
|
|
1314
|
+
asset.size = int(size_str) if size_str.isdigit() else 0
|
|
975
1315
|
if not getattr(asset, "filename", 0):
|
|
976
1316
|
# filename from HEAD header / content-disposition / size
|
|
977
|
-
|
|
1317
|
+
asset_filename = header_content_disposition.get_param(
|
|
978
1318
|
"filename", None
|
|
979
1319
|
)
|
|
1320
|
+
asset.filename = str(asset_filename) if asset_filename else None
|
|
980
1321
|
|
|
981
1322
|
if not getattr(asset, "size", 0):
|
|
982
1323
|
# GET request for size
|
|
@@ -987,16 +1328,18 @@ class HTTPDownload(Download):
|
|
|
987
1328
|
params=params,
|
|
988
1329
|
headers=USER_AGENT,
|
|
989
1330
|
timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
|
|
1331
|
+
verify=ssl_verify,
|
|
990
1332
|
) as stream:
|
|
991
1333
|
# size from GET header / Content-length
|
|
992
1334
|
asset.size = int(stream.headers.get("Content-length", 0))
|
|
993
1335
|
if not getattr(asset, "size", 0):
|
|
994
1336
|
# size from GET header / content-disposition / size
|
|
995
|
-
|
|
1337
|
+
size_str = str(
|
|
996
1338
|
parse_header(
|
|
997
1339
|
stream.headers.get("content-disposition", "")
|
|
998
1340
|
).get_param("size", 0)
|
|
999
1341
|
)
|
|
1342
|
+
asset.size = int(size_str) if size_str.isdigit() else 0
|
|
1000
1343
|
|
|
1001
1344
|
total_size += asset.size
|
|
1002
1345
|
return total_size
|
|
@@ -1004,12 +1347,12 @@ class HTTPDownload(Download):
|
|
|
1004
1347
|
def download_all(
|
|
1005
1348
|
self,
|
|
1006
1349
|
products: SearchResult,
|
|
1007
|
-
auth: Optional[
|
|
1350
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
1008
1351
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
1009
1352
|
progress_callback: Optional[ProgressCallback] = None,
|
|
1010
1353
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
1011
1354
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
1012
|
-
**kwargs:
|
|
1355
|
+
**kwargs: Unpack[DownloadConf],
|
|
1013
1356
|
):
|
|
1014
1357
|
"""
|
|
1015
1358
|
Download all using parent (base plugin) method
|