eodag 2.12.1__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/__init__.py +6 -8
- eodag/api/core.py +654 -538
- eodag/api/product/__init__.py +12 -2
- eodag/api/product/_assets.py +59 -16
- eodag/api/product/_product.py +100 -93
- eodag/api/product/drivers/__init__.py +7 -2
- eodag/api/product/drivers/base.py +0 -3
- eodag/api/product/metadata_mapping.py +192 -96
- eodag/api/search_result.py +69 -10
- eodag/cli.py +55 -25
- eodag/config.py +391 -116
- eodag/plugins/apis/base.py +11 -168
- eodag/plugins/apis/ecmwf.py +36 -25
- eodag/plugins/apis/usgs.py +80 -35
- eodag/plugins/authentication/aws_auth.py +13 -4
- eodag/plugins/authentication/base.py +10 -1
- eodag/plugins/authentication/generic.py +2 -2
- eodag/plugins/authentication/header.py +31 -6
- eodag/plugins/authentication/keycloak.py +17 -84
- eodag/plugins/authentication/oauth.py +3 -3
- eodag/plugins/authentication/openid_connect.py +268 -49
- eodag/plugins/authentication/qsauth.py +4 -1
- eodag/plugins/authentication/sas_auth.py +9 -2
- eodag/plugins/authentication/token.py +98 -47
- eodag/plugins/authentication/token_exchange.py +122 -0
- eodag/plugins/crunch/base.py +3 -1
- eodag/plugins/crunch/filter_date.py +3 -9
- eodag/plugins/crunch/filter_latest_intersect.py +0 -3
- eodag/plugins/crunch/filter_latest_tpl_name.py +1 -4
- eodag/plugins/crunch/filter_overlap.py +4 -8
- eodag/plugins/crunch/filter_property.py +5 -11
- eodag/plugins/download/aws.py +149 -185
- eodag/plugins/download/base.py +88 -97
- eodag/plugins/download/creodias_s3.py +1 -1
- eodag/plugins/download/http.py +638 -310
- eodag/plugins/download/s3rest.py +47 -45
- eodag/plugins/manager.py +228 -88
- eodag/plugins/search/__init__.py +36 -0
- eodag/plugins/search/base.py +239 -30
- eodag/plugins/search/build_search_result.py +382 -37
- eodag/plugins/search/cop_marine.py +441 -0
- eodag/plugins/search/creodias_s3.py +25 -20
- eodag/plugins/search/csw.py +5 -7
- eodag/plugins/search/data_request_search.py +61 -30
- eodag/plugins/search/qssearch.py +713 -255
- eodag/plugins/search/static_stac_search.py +106 -40
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/product_types.yml +1921 -34
- eodag/resources/providers.yml +4091 -3655
- eodag/resources/stac.yml +50 -216
- eodag/resources/stac_api.yml +71 -25
- eodag/resources/stac_provider.yml +5 -0
- eodag/resources/user_conf_template.yml +89 -32
- eodag/rest/__init__.py +6 -0
- eodag/rest/cache.py +70 -0
- eodag/rest/config.py +68 -0
- eodag/rest/constants.py +26 -0
- eodag/rest/core.py +735 -0
- eodag/rest/errors.py +178 -0
- eodag/rest/server.py +264 -431
- eodag/rest/stac.py +442 -836
- eodag/rest/types/collections_search.py +44 -0
- eodag/rest/types/eodag_search.py +238 -47
- eodag/rest/types/queryables.py +164 -0
- eodag/rest/types/stac_search.py +273 -0
- eodag/rest/utils/__init__.py +216 -0
- eodag/rest/utils/cql_evaluate.py +119 -0
- eodag/rest/utils/rfc3339.py +64 -0
- eodag/types/__init__.py +106 -10
- eodag/types/bbox.py +15 -14
- eodag/types/download_args.py +40 -0
- eodag/types/search_args.py +57 -7
- eodag/types/whoosh.py +79 -0
- eodag/utils/__init__.py +110 -91
- eodag/utils/constraints.py +37 -45
- eodag/utils/exceptions.py +39 -22
- eodag/utils/import_system.py +0 -4
- eodag/utils/logging.py +37 -80
- eodag/utils/notebook.py +4 -4
- eodag/utils/repr.py +113 -0
- eodag/utils/requests.py +128 -0
- eodag/utils/rest.py +100 -0
- eodag/utils/stac_reader.py +93 -21
- {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/METADATA +88 -53
- eodag-3.0.0.dist-info/RECORD +109 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/WHEEL +1 -1
- {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/entry_points.txt +7 -5
- eodag/plugins/apis/cds.py +0 -540
- eodag/rest/types/stac_queryables.py +0 -134
- eodag/rest/utils.py +0 -1133
- eodag-2.12.1.dist-info/RECORD +0 -94
- {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/LICENSE +0 -0
- {eodag-2.12.1.dist-info → eodag-3.0.0.dist-info}/top_level.txt +0 -0
eodag/plugins/download/http.py
CHANGED
|
@@ -20,23 +20,38 @@ from __future__ import annotations
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
import shutil
|
|
23
|
+
import tarfile
|
|
23
24
|
import zipfile
|
|
24
25
|
from datetime import datetime
|
|
25
26
|
from email.message import Message
|
|
26
27
|
from itertools import chain
|
|
27
|
-
from
|
|
28
|
+
from json import JSONDecodeError
|
|
29
|
+
from typing import (
|
|
30
|
+
TYPE_CHECKING,
|
|
31
|
+
Any,
|
|
32
|
+
Dict,
|
|
33
|
+
Iterator,
|
|
34
|
+
List,
|
|
35
|
+
Optional,
|
|
36
|
+
TypedDict,
|
|
37
|
+
Union,
|
|
38
|
+
cast,
|
|
39
|
+
)
|
|
28
40
|
from urllib.parse import parse_qs, urlparse
|
|
29
41
|
|
|
30
42
|
import geojson
|
|
31
43
|
import requests
|
|
32
|
-
import requests_ftp
|
|
33
44
|
from lxml import etree
|
|
34
45
|
from requests import RequestException
|
|
46
|
+
from requests.auth import AuthBase
|
|
47
|
+
from requests.structures import CaseInsensitiveDict
|
|
35
48
|
from stream_zip import ZIP_AUTO, stream_zip
|
|
36
49
|
|
|
37
50
|
from eodag.api.product.metadata_mapping import (
|
|
51
|
+
NOT_AVAILABLE,
|
|
38
52
|
OFFLINE_STATUS,
|
|
39
53
|
ONLINE_STATUS,
|
|
54
|
+
STAGING_STATUS,
|
|
40
55
|
mtd_cfg_as_conversion_and_querypath,
|
|
41
56
|
properties_from_json,
|
|
42
57
|
properties_from_xml,
|
|
@@ -49,10 +64,14 @@ from eodag.utils import (
|
|
|
49
64
|
HTTP_REQ_TIMEOUT,
|
|
50
65
|
USER_AGENT,
|
|
51
66
|
ProgressCallback,
|
|
67
|
+
StreamResponse,
|
|
52
68
|
flatten_top_directories,
|
|
69
|
+
guess_extension,
|
|
70
|
+
guess_file_type,
|
|
53
71
|
parse_header,
|
|
54
72
|
path_to_uri,
|
|
55
73
|
sanitize,
|
|
74
|
+
string_to_jsonpath,
|
|
56
75
|
uri_to_path,
|
|
57
76
|
)
|
|
58
77
|
from eodag.utils.exceptions import (
|
|
@@ -66,10 +85,11 @@ from eodag.utils.exceptions import (
|
|
|
66
85
|
if TYPE_CHECKING:
|
|
67
86
|
from requests import Response
|
|
68
87
|
|
|
69
|
-
from eodag.api.product import EOProduct
|
|
88
|
+
from eodag.api.product import Asset, EOProduct # type: ignore
|
|
70
89
|
from eodag.api.search_result import SearchResult
|
|
71
90
|
from eodag.config import PluginConfig
|
|
72
|
-
from eodag.
|
|
91
|
+
from eodag.types.download_args import DownloadConf
|
|
92
|
+
from eodag.utils import DownloadedCallback, Unpack
|
|
73
93
|
|
|
74
94
|
logger = logging.getLogger("eodag.download.http")
|
|
75
95
|
|
|
@@ -78,10 +98,9 @@ class HTTPDownload(Download):
|
|
|
78
98
|
"""HTTPDownload plugin. Handles product download over HTTP protocol
|
|
79
99
|
|
|
80
100
|
:param provider: provider name
|
|
81
|
-
:type provider: str
|
|
82
101
|
:param config: Download plugin configuration:
|
|
83
102
|
|
|
84
|
-
* ``config.base_uri`` (str) - default endpoint url
|
|
103
|
+
* ``config.base_uri`` (str) - (optional) default endpoint url
|
|
85
104
|
* ``config.extract`` (bool) - (optional) extract downloaded archive or not
|
|
86
105
|
* ``config.auth_error_code`` (int) - (optional) authentication error code
|
|
87
106
|
* ``config.dl_url_params`` (dict) - (optional) attitional parameters to send in the request
|
|
@@ -92,29 +111,19 @@ class HTTPDownload(Download):
|
|
|
92
111
|
* ``config.order_method`` (str) - (optional) HTTP request method, GET (default) or POST
|
|
93
112
|
* ``config.order_headers`` (dict) - (optional) order request headers
|
|
94
113
|
* ``config.order_on_response`` (dict) - (optional) edit or add new product properties
|
|
95
|
-
* ``config.
|
|
96
|
-
* ``config.order_status_percent`` (str) - (optional) progress percentage key in obtained status response
|
|
97
|
-
* ``config.order_status_error`` (dict) - (optional) key/value identifying an error status
|
|
98
|
-
|
|
99
|
-
:type config: :class:`~eodag.config.PluginConfig`
|
|
114
|
+
* ``config.order_status`` (:class:`~eodag.config.PluginConfig.OrderStatus`) - (optional) Order status handling
|
|
100
115
|
|
|
101
116
|
"""
|
|
102
117
|
|
|
103
118
|
def __init__(self, provider: str, config: PluginConfig) -> None:
|
|
104
119
|
super(HTTPDownload, self).__init__(provider, config)
|
|
105
|
-
if not hasattr(self.config, "base_uri"):
|
|
106
|
-
raise MisconfiguredError(
|
|
107
|
-
"{} plugin require a base_uri configuration key".format(
|
|
108
|
-
type(self).__name__
|
|
109
|
-
)
|
|
110
|
-
)
|
|
111
120
|
|
|
112
|
-
def
|
|
121
|
+
def order_download(
|
|
113
122
|
self,
|
|
114
123
|
product: EOProduct,
|
|
115
|
-
auth: Optional[
|
|
116
|
-
**kwargs:
|
|
117
|
-
) ->
|
|
124
|
+
auth: Optional[AuthBase] = None,
|
|
125
|
+
**kwargs: Unpack[DownloadConf],
|
|
126
|
+
) -> Optional[Dict[str, Any]]:
|
|
118
127
|
"""Send product order request.
|
|
119
128
|
|
|
120
129
|
It will be executed once before the download retry loop, if the product is OFFLINE
|
|
@@ -135,249 +144,406 @@ class HTTPDownload(Download):
|
|
|
135
144
|
- **orderLink**: order request URL
|
|
136
145
|
|
|
137
146
|
:param product: The EO product to order
|
|
138
|
-
:
|
|
139
|
-
:param auth: (optional) The configuration of a plugin of type Authentication
|
|
140
|
-
:type auth: :class:`~eodag.config.PluginConfig`
|
|
147
|
+
:param auth: (optional) authenticated object
|
|
141
148
|
:param kwargs: download additional kwargs
|
|
142
|
-
:
|
|
149
|
+
:returns: the returned json status response
|
|
143
150
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
151
|
+
product.properties["storageStatus"] = STAGING_STATUS
|
|
152
|
+
|
|
153
|
+
order_method = getattr(self.config, "order_method", "GET").upper()
|
|
154
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
155
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
156
|
+
OrderKwargs = TypedDict(
|
|
157
|
+
"OrderKwargs", {"json": Dict[str, Union[Any, List[str]]]}, total=False
|
|
158
|
+
)
|
|
159
|
+
order_kwargs: OrderKwargs = {}
|
|
160
|
+
if order_method == "POST":
|
|
146
161
|
# separate url & parameters
|
|
147
162
|
parts = urlparse(str(product.properties["orderLink"]))
|
|
148
|
-
query_dict =
|
|
149
|
-
|
|
163
|
+
query_dict = {}
|
|
164
|
+
# `parts.query` may be a JSON with query strings as one of values. If `parse_qs` is executed as first step,
|
|
165
|
+
# the resulting `query_dict` would be erroneous.
|
|
166
|
+
try:
|
|
150
167
|
query_dict = geojson.loads(parts.query)
|
|
151
|
-
|
|
152
|
-
|
|
168
|
+
except JSONDecodeError:
|
|
169
|
+
if parts.query:
|
|
170
|
+
query_dict = parse_qs(parts.query)
|
|
171
|
+
order_url = parts._replace(query="").geturl()
|
|
172
|
+
if query_dict:
|
|
173
|
+
order_kwargs["json"] = query_dict
|
|
153
174
|
else:
|
|
154
175
|
order_url = product.properties["orderLink"]
|
|
155
176
|
order_kwargs = {}
|
|
156
177
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
logger.debug(
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
178
|
+
headers = {**getattr(self.config, "order_headers", {}), **USER_AGENT}
|
|
179
|
+
try:
|
|
180
|
+
with requests.request(
|
|
181
|
+
method=order_method,
|
|
182
|
+
url=order_url,
|
|
183
|
+
auth=auth,
|
|
184
|
+
timeout=timeout,
|
|
185
|
+
headers=headers,
|
|
186
|
+
verify=ssl_verify,
|
|
187
|
+
**order_kwargs,
|
|
188
|
+
) as response:
|
|
189
|
+
logger.debug(f"{order_method} {order_url} {headers} {order_kwargs}")
|
|
190
|
+
try:
|
|
191
|
+
response.raise_for_status()
|
|
192
|
+
ordered_message = response.text
|
|
193
|
+
logger.debug(ordered_message)
|
|
194
|
+
product.properties["storageStatus"] = STAGING_STATUS
|
|
195
|
+
except RequestException as e:
|
|
196
|
+
if hasattr(e, "response") and (
|
|
197
|
+
content := getattr(e.response, "content", None)
|
|
198
|
+
):
|
|
199
|
+
error_message = f"{content.decode('utf-8')} - {e}"
|
|
200
|
+
else:
|
|
201
|
+
error_message = str(e)
|
|
202
|
+
logger.warning(
|
|
203
|
+
"%s could not be ordered, request returned %s",
|
|
204
|
+
product.properties["title"],
|
|
205
|
+
error_message,
|
|
206
|
+
)
|
|
207
|
+
self._check_auth_exception(e)
|
|
208
|
+
return self.order_response_process(response, product)
|
|
209
|
+
except requests.exceptions.Timeout as exc:
|
|
210
|
+
raise TimeOutError(exc, timeout=timeout) from exc
|
|
211
|
+
|
|
212
|
+
def order_response_process(
|
|
213
|
+
self, response: Response, product: EOProduct
|
|
214
|
+
) -> Optional[Dict[str, Any]]:
|
|
215
|
+
"""Process order response
|
|
216
|
+
|
|
217
|
+
:param response: The order response
|
|
218
|
+
:param product: The orderd EO product
|
|
219
|
+
:returns: the returned json status response
|
|
220
|
+
"""
|
|
221
|
+
on_response_mm = getattr(self.config, "order_on_response", {}).get(
|
|
184
222
|
"metadata_mapping", {}
|
|
185
223
|
)
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
order_metadata_mapping_jsonpath = mtd_cfg_as_conversion_and_querypath(
|
|
189
|
-
order_metadata_mapping,
|
|
190
|
-
)
|
|
191
|
-
properties_update = properties_from_json(
|
|
192
|
-
response.json(),
|
|
193
|
-
order_metadata_mapping_jsonpath,
|
|
194
|
-
)
|
|
195
|
-
product.properties.update(properties_update)
|
|
196
|
-
if "downloadLink" in properties_update:
|
|
197
|
-
product.remote_location = product.location = product.properties[
|
|
198
|
-
"downloadLink"
|
|
199
|
-
]
|
|
200
|
-
logger.debug(f"Product location updated to {product.location}")
|
|
224
|
+
if not on_response_mm:
|
|
225
|
+
return None
|
|
201
226
|
|
|
202
|
-
|
|
227
|
+
logger.debug("Parsing order response to update product metada-mapping")
|
|
228
|
+
on_response_mm_jsonpath = mtd_cfg_as_conversion_and_querypath(
|
|
229
|
+
on_response_mm,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
json_response = response.json()
|
|
233
|
+
|
|
234
|
+
properties_update = properties_from_json(
|
|
235
|
+
{"json": json_response, "headers": {**response.headers}},
|
|
236
|
+
on_response_mm_jsonpath,
|
|
237
|
+
)
|
|
238
|
+
product.properties.update(
|
|
239
|
+
{k: v for k, v in properties_update.items() if v != NOT_AVAILABLE}
|
|
240
|
+
)
|
|
241
|
+
if "downloadLink" in product.properties:
|
|
242
|
+
product.remote_location = product.location = product.properties[
|
|
243
|
+
"downloadLink"
|
|
244
|
+
]
|
|
245
|
+
logger.debug(f"Product location updated to {product.location}")
|
|
246
|
+
|
|
247
|
+
return json_response
|
|
248
|
+
|
|
249
|
+
def order_download_status(
|
|
203
250
|
self,
|
|
204
251
|
product: EOProduct,
|
|
205
|
-
auth: Optional[
|
|
206
|
-
**kwargs: Union[str, bool, Dict[str, Any]],
|
|
252
|
+
auth: Optional[AuthBase] = None,
|
|
207
253
|
) -> None:
|
|
208
254
|
"""Send product order status request.
|
|
209
255
|
|
|
210
256
|
It will be executed before each download retry.
|
|
211
257
|
Product order status request can be configured using the following download plugin parameters:
|
|
212
258
|
|
|
213
|
-
- **
|
|
214
|
-
|
|
215
|
-
- **order_status_percent**: (optional) progress percentage key in obtained response
|
|
216
|
-
|
|
217
|
-
- **order_status_error**: (optional) key/value identifying an error status
|
|
259
|
+
- **order_status**: :class:`~eodag.config.PluginConfig.OrderStatus`
|
|
218
260
|
|
|
219
261
|
Product properties used for order status:
|
|
220
262
|
|
|
221
263
|
- **orderStatusLink**: order status request URL
|
|
222
264
|
|
|
223
265
|
:param product: The ordered EO product
|
|
224
|
-
:
|
|
225
|
-
:param auth: (optional) The configuration of a plugin of type Authentication
|
|
226
|
-
:type auth: :class:`~eodag.config.PluginConfig`
|
|
266
|
+
:param auth: (optional) authenticated object
|
|
227
267
|
:param kwargs: download additional kwargs
|
|
228
|
-
:type kwargs: Union[str, bool, dict]
|
|
229
268
|
"""
|
|
230
|
-
|
|
231
|
-
|
|
269
|
+
|
|
270
|
+
status_config = getattr(self.config, "order_status", {})
|
|
271
|
+
success_code: Optional[int] = status_config.get("success", {}).get("http_code")
|
|
272
|
+
|
|
273
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
274
|
+
|
|
275
|
+
def _request(
|
|
276
|
+
url: str,
|
|
277
|
+
method: str = "GET",
|
|
278
|
+
headers: Optional[Dict[str, Any]] = None,
|
|
279
|
+
json: Optional[Any] = None,
|
|
280
|
+
timeout: int = HTTP_REQ_TIMEOUT,
|
|
281
|
+
) -> Response:
|
|
282
|
+
"""Send request and handle allow redirects"""
|
|
283
|
+
|
|
284
|
+
logger.debug(f"{method} {url} {headers} {json}")
|
|
285
|
+
try:
|
|
286
|
+
response = requests.request(
|
|
287
|
+
method=method,
|
|
288
|
+
url=url,
|
|
289
|
+
auth=auth,
|
|
290
|
+
timeout=timeout,
|
|
291
|
+
headers={**(headers or {}), **USER_AGENT},
|
|
292
|
+
allow_redirects=False, # Redirection is manually handled
|
|
293
|
+
json=json,
|
|
294
|
+
)
|
|
295
|
+
logger.debug(
|
|
296
|
+
f"Order download status request responded with {response.status_code}"
|
|
297
|
+
)
|
|
298
|
+
response.raise_for_status() # Raise an exception if status code indicates an error
|
|
299
|
+
|
|
300
|
+
# Handle redirection (if needed)
|
|
301
|
+
if (
|
|
302
|
+
300 <= response.status_code < 400
|
|
303
|
+
and response.status_code != success_code
|
|
304
|
+
):
|
|
305
|
+
# cf: https://www.rfc-editor.org/rfc/rfc9110.html#name-303-see-other
|
|
306
|
+
if response.status_code == 303:
|
|
307
|
+
method = "GET"
|
|
308
|
+
if new_url := response.headers.get("Location"):
|
|
309
|
+
return _request(new_url, method, headers, json, timeout)
|
|
310
|
+
return response
|
|
311
|
+
except requests.exceptions.Timeout as exc:
|
|
312
|
+
raise TimeOutError(exc, timeout=timeout) from exc
|
|
313
|
+
|
|
314
|
+
status_request: Dict[str, Any] = status_config.get("request", {})
|
|
315
|
+
status_request_method = str(status_request.get("method", "GET")).upper()
|
|
316
|
+
|
|
317
|
+
if status_request_method == "POST":
|
|
232
318
|
# separate url & parameters
|
|
233
319
|
parts = urlparse(str(product.properties["orderStatusLink"]))
|
|
320
|
+
status_url = parts._replace(query="").geturl()
|
|
234
321
|
query_dict = parse_qs(parts.query)
|
|
235
322
|
if not query_dict and parts.query:
|
|
236
323
|
query_dict = geojson.loads(parts.query)
|
|
237
|
-
|
|
238
|
-
status_kwargs = {"json": query_dict} if query_dict else {}
|
|
324
|
+
json_data = query_dict if query_dict else None
|
|
239
325
|
else:
|
|
240
326
|
status_url = product.properties["orderStatusLink"]
|
|
241
|
-
|
|
327
|
+
json_data = None
|
|
328
|
+
|
|
329
|
+
# check header for success before full status request
|
|
330
|
+
skip_parsing_status_response = False
|
|
331
|
+
status_dict: Dict[str, Any] = {}
|
|
332
|
+
config_on_success: Dict[str, Any] = status_config.get("on_success", {})
|
|
333
|
+
on_success_mm = config_on_success.get("metadata_mapping", {})
|
|
334
|
+
|
|
335
|
+
status_response_content_needed = (
|
|
336
|
+
False
|
|
337
|
+
if not any([v.startswith("$.json.") for v in on_success_mm.values()])
|
|
338
|
+
else True
|
|
339
|
+
)
|
|
242
340
|
|
|
243
|
-
|
|
244
|
-
method=status_method,
|
|
245
|
-
url=status_url,
|
|
246
|
-
auth=auth,
|
|
247
|
-
timeout=HTTP_REQ_TIMEOUT,
|
|
248
|
-
headers=dict(
|
|
249
|
-
getattr(self.config, "order_status_headers", {}), **USER_AGENT
|
|
250
|
-
),
|
|
251
|
-
**status_kwargs,
|
|
252
|
-
) as response:
|
|
341
|
+
if success_code:
|
|
253
342
|
try:
|
|
254
|
-
response
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
343
|
+
response = _request(
|
|
344
|
+
status_url,
|
|
345
|
+
"HEAD",
|
|
346
|
+
status_request.get("headers"),
|
|
347
|
+
json_data,
|
|
348
|
+
timeout,
|
|
260
349
|
)
|
|
261
|
-
if order_status_percent_key and order_status_percent_key in status_dict:
|
|
262
|
-
order_status_value = str(status_dict[order_status_percent_key])
|
|
263
|
-
if order_status_value.isdigit():
|
|
264
|
-
order_status_value += "%"
|
|
265
|
-
logger.info(
|
|
266
|
-
f"{product.properties['title']} order status: {order_status_value}"
|
|
267
|
-
)
|
|
268
|
-
# display error if any
|
|
269
|
-
order_status_error_dict = getattr(self.config, "order_status_error", {})
|
|
270
350
|
if (
|
|
271
|
-
|
|
272
|
-
and
|
|
351
|
+
response.status_code == success_code
|
|
352
|
+
and not status_response_content_needed
|
|
273
353
|
):
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
354
|
+
# success and no need to get status response content
|
|
355
|
+
skip_parsing_status_response = True
|
|
356
|
+
except RequestException as e:
|
|
357
|
+
logger.debug(e)
|
|
358
|
+
|
|
359
|
+
if not skip_parsing_status_response:
|
|
360
|
+
# status request
|
|
361
|
+
try:
|
|
362
|
+
response = _request(
|
|
363
|
+
status_url,
|
|
364
|
+
status_request_method,
|
|
365
|
+
status_request.get("headers"),
|
|
366
|
+
json_data,
|
|
367
|
+
timeout,
|
|
281
368
|
)
|
|
282
369
|
if (
|
|
283
|
-
|
|
284
|
-
and
|
|
285
|
-
and "message" in status_dict
|
|
286
|
-
and status_dict["message"] == order_status_success_dict["message"]
|
|
287
|
-
):
|
|
288
|
-
product.properties["storageStatus"] = ONLINE_STATUS
|
|
289
|
-
if (
|
|
290
|
-
order_status_success_dict
|
|
291
|
-
and order_status_success_dict.items() <= status_dict.items()
|
|
292
|
-
and getattr(self.config, "order_status_on_success", {}).get(
|
|
293
|
-
"need_search"
|
|
294
|
-
)
|
|
370
|
+
response.status_code == success_code
|
|
371
|
+
and not status_response_content_needed
|
|
295
372
|
):
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
373
|
+
# success and no need to get status response content
|
|
374
|
+
skip_parsing_status_response = True
|
|
375
|
+
except RequestException as e:
|
|
376
|
+
raise DownloadError(
|
|
377
|
+
"%s order status could not be checked, request returned %s"
|
|
378
|
+
% (
|
|
379
|
+
product.properties["title"],
|
|
380
|
+
e,
|
|
304
381
|
)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
assert isinstance(
|
|
328
|
-
results, list
|
|
329
|
-
), "results must be in a list"
|
|
330
|
-
# single result
|
|
331
|
-
result = results[0]
|
|
332
|
-
# parse result
|
|
333
|
-
new_search_metadata_mapping = (
|
|
334
|
-
self.config.order_status_on_success["metadata_mapping"]
|
|
335
|
-
)
|
|
336
|
-
order_metadata_mapping_jsonpath = {}
|
|
337
|
-
order_metadata_mapping_jsonpath = (
|
|
338
|
-
mtd_cfg_as_conversion_and_querypath(
|
|
339
|
-
new_search_metadata_mapping,
|
|
340
|
-
order_metadata_mapping_jsonpath,
|
|
341
|
-
)
|
|
342
|
-
)
|
|
343
|
-
properties_update = properties_from_xml(
|
|
344
|
-
result,
|
|
345
|
-
order_metadata_mapping_jsonpath,
|
|
346
|
-
)
|
|
347
|
-
except Exception as e:
|
|
348
|
-
logger.debug(e)
|
|
349
|
-
raise DownloadError(
|
|
350
|
-
f"Could not parse result after order success for {product.properties['searchLink']} "
|
|
351
|
-
f"request. Please search and download {product} again"
|
|
352
|
-
)
|
|
353
|
-
# update product
|
|
354
|
-
product.properties.update(properties_update)
|
|
355
|
-
product.location = product.remote_location = product.properties[
|
|
356
|
-
"downloadLink"
|
|
357
|
-
]
|
|
358
|
-
else:
|
|
359
|
-
logger.warning(
|
|
360
|
-
"JSON response parsing is not implemented yet for new searches "
|
|
361
|
-
f"after order success. Please search and download {product} again"
|
|
362
|
-
)
|
|
382
|
+
) from e
|
|
383
|
+
|
|
384
|
+
if not skip_parsing_status_response:
|
|
385
|
+
# status request
|
|
386
|
+
json_response = response.json()
|
|
387
|
+
if not isinstance(json_response, dict):
|
|
388
|
+
raise RequestException("response content is not a dict")
|
|
389
|
+
status_dict = json_response
|
|
390
|
+
|
|
391
|
+
status_mm = status_config.get("metadata_mapping", {})
|
|
392
|
+
status_mm_jsonpath = (
|
|
393
|
+
mtd_cfg_as_conversion_and_querypath(
|
|
394
|
+
status_mm,
|
|
395
|
+
)
|
|
396
|
+
if status_mm
|
|
397
|
+
else {}
|
|
398
|
+
)
|
|
399
|
+
logger.debug("Parsing order status response")
|
|
400
|
+
status_dict = properties_from_json(
|
|
401
|
+
{"json": response.json(), "headers": {**response.headers}},
|
|
402
|
+
status_mm_jsonpath,
|
|
403
|
+
)
|
|
363
404
|
|
|
364
|
-
|
|
365
|
-
|
|
405
|
+
# display progress percentage
|
|
406
|
+
if "percent" in status_dict:
|
|
407
|
+
status_percent = str(status_dict["percent"])
|
|
408
|
+
if status_percent.isdigit():
|
|
409
|
+
status_percent += "%"
|
|
410
|
+
logger.info(
|
|
411
|
+
f"{product.properties['title']} order status: {status_percent}"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
status_message = status_dict.get("message")
|
|
415
|
+
product.properties["orderStatus"] = status_dict.get("status")
|
|
416
|
+
|
|
417
|
+
# handle status error
|
|
418
|
+
errors: Dict[str, Any] = status_config.get("error", {})
|
|
419
|
+
if errors and errors.items() <= status_dict.items():
|
|
420
|
+
raise DownloadError(
|
|
421
|
+
f"Provider {product.provider} returned: {status_dict.get('error_message', status_message)}"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
success_status: Dict[str, Any] = status_config.get("success", {}).get("status")
|
|
425
|
+
# if not success
|
|
426
|
+
if (success_status and success_status != status_dict.get("status")) or (
|
|
427
|
+
success_code and success_code != response.status_code
|
|
428
|
+
):
|
|
429
|
+
error = NotAvailableError(status_message)
|
|
430
|
+
raise error
|
|
431
|
+
|
|
432
|
+
product.properties["storageStatus"] = ONLINE_STATUS
|
|
433
|
+
|
|
434
|
+
if not config_on_success:
|
|
435
|
+
# Nothing left to do
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
# need search on success ?
|
|
439
|
+
if config_on_success.get("need_search"):
|
|
440
|
+
logger.debug(f"Search for new location: {product.properties['searchLink']}")
|
|
441
|
+
try:
|
|
442
|
+
response = _request(product.properties["searchLink"], timeout=timeout)
|
|
366
443
|
except RequestException as e:
|
|
367
444
|
logger.warning(
|
|
368
445
|
"%s order status could not be checked, request returned %s",
|
|
369
446
|
product.properties["title"],
|
|
370
447
|
e,
|
|
371
448
|
)
|
|
449
|
+
return None
|
|
450
|
+
|
|
451
|
+
result_type = config_on_success.get("result_type", "json")
|
|
452
|
+
result_entry = config_on_success.get("results_entry")
|
|
453
|
+
|
|
454
|
+
on_success_mm_querypath = (
|
|
455
|
+
# append product.properties as input for on success response parsing
|
|
456
|
+
mtd_cfg_as_conversion_and_querypath(
|
|
457
|
+
dict(
|
|
458
|
+
{k: str(v) for k, v in product.properties.items()}, **on_success_mm
|
|
459
|
+
),
|
|
460
|
+
)
|
|
461
|
+
if on_success_mm
|
|
462
|
+
else {}
|
|
463
|
+
)
|
|
464
|
+
try:
|
|
465
|
+
if result_type == "xml":
|
|
466
|
+
if not result_entry:
|
|
467
|
+
raise MisconfiguredError(
|
|
468
|
+
'"result_entry" is required with "result_type" "xml"'
|
|
469
|
+
'in "order_status.on_success"'
|
|
470
|
+
)
|
|
471
|
+
root_node = etree.fromstring(response.content)
|
|
472
|
+
namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
|
|
473
|
+
results = [
|
|
474
|
+
etree.tostring(entry)
|
|
475
|
+
for entry in root_node.xpath(
|
|
476
|
+
result_entry,
|
|
477
|
+
namespaces=namespaces,
|
|
478
|
+
)
|
|
479
|
+
]
|
|
480
|
+
if len(results) != 1:
|
|
481
|
+
raise DownloadError(
|
|
482
|
+
"Could not get a single result after order success for "
|
|
483
|
+
f"{product.properties['searchLink']} request. "
|
|
484
|
+
f"Please search and download {product} again"
|
|
485
|
+
)
|
|
486
|
+
assert isinstance(results, list), "results must be in a list"
|
|
487
|
+
# single result
|
|
488
|
+
result = results[0]
|
|
489
|
+
if on_success_mm_querypath:
|
|
490
|
+
properties_update = properties_from_xml(
|
|
491
|
+
result,
|
|
492
|
+
on_success_mm_querypath,
|
|
493
|
+
)
|
|
494
|
+
else:
|
|
495
|
+
properties_update = {}
|
|
496
|
+
else:
|
|
497
|
+
json_response = (
|
|
498
|
+
response.json()
|
|
499
|
+
if "application/json" in response.headers.get("Content-Type", "")
|
|
500
|
+
else {}
|
|
501
|
+
)
|
|
502
|
+
if result_entry:
|
|
503
|
+
entry_jsonpath = string_to_jsonpath(result_entry, force=True)
|
|
504
|
+
json_response = entry_jsonpath.find(json_response)
|
|
505
|
+
raise NotImplementedError(
|
|
506
|
+
'result_entry in config.on_success is not yet supported for result_type "json"'
|
|
507
|
+
)
|
|
508
|
+
if on_success_mm_querypath:
|
|
509
|
+
logger.debug(
|
|
510
|
+
"Parsing on-success metadata-mapping using order status response"
|
|
511
|
+
)
|
|
512
|
+
properties_update = properties_from_json(
|
|
513
|
+
{"json": json_response, "headers": {**response.headers}},
|
|
514
|
+
on_success_mm_querypath,
|
|
515
|
+
)
|
|
516
|
+
# only keep properties to update (remove product.properties added for parsing)
|
|
517
|
+
properties_update = {
|
|
518
|
+
k: v for k, v in properties_update.items() if k in on_success_mm
|
|
519
|
+
}
|
|
520
|
+
else:
|
|
521
|
+
properties_update = {}
|
|
522
|
+
except Exception as e:
|
|
523
|
+
if isinstance(e, DownloadError):
|
|
524
|
+
raise
|
|
525
|
+
logger.debug(e)
|
|
526
|
+
raise DownloadError(
|
|
527
|
+
f"Could not parse result after order success. Please search and download {product} again"
|
|
528
|
+
) from e
|
|
529
|
+
|
|
530
|
+
# update product
|
|
531
|
+
product.properties.update(properties_update)
|
|
532
|
+
if "downloadLink" in properties_update:
|
|
533
|
+
product.location = product.remote_location = product.properties[
|
|
534
|
+
"downloadLink"
|
|
535
|
+
]
|
|
536
|
+
else:
|
|
537
|
+
self.order_response_process(response, product)
|
|
372
538
|
|
|
373
539
|
def download(
|
|
374
540
|
self,
|
|
375
541
|
product: EOProduct,
|
|
376
|
-
auth: Optional[
|
|
542
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
377
543
|
progress_callback: Optional[ProgressCallback] = None,
|
|
378
544
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
379
545
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
380
|
-
**kwargs:
|
|
546
|
+
**kwargs: Unpack[DownloadConf],
|
|
381
547
|
) -> Optional[str]:
|
|
382
548
|
"""Download a product using HTTP protocol.
|
|
383
549
|
|
|
@@ -385,14 +551,26 @@ class HTTPDownload(Download):
|
|
|
385
551
|
the user is warned, it is renamed to remove the zip extension and
|
|
386
552
|
no further treatment is done (no extraction)
|
|
387
553
|
"""
|
|
554
|
+
if auth is not None and not isinstance(auth, AuthBase):
|
|
555
|
+
raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
|
|
556
|
+
|
|
388
557
|
if progress_callback is None:
|
|
389
558
|
logger.info(
|
|
390
559
|
"Progress bar unavailable, please call product.download() instead of plugin.download()"
|
|
391
560
|
)
|
|
392
561
|
progress_callback = ProgressCallback(disable=True)
|
|
393
562
|
|
|
563
|
+
output_extension = getattr(self.config, "products", {}).get(
|
|
564
|
+
product.product_type, {}
|
|
565
|
+
).get("output_extension", None) or getattr(
|
|
566
|
+
self.config, "output_extension", ".zip"
|
|
567
|
+
)
|
|
568
|
+
kwargs["output_extension"] = kwargs.get("output_extension", output_extension)
|
|
569
|
+
|
|
394
570
|
fs_path, record_filename = self._prepare_download(
|
|
395
|
-
product,
|
|
571
|
+
product,
|
|
572
|
+
progress_callback=progress_callback,
|
|
573
|
+
**kwargs,
|
|
396
574
|
)
|
|
397
575
|
if not fs_path or not record_filename:
|
|
398
576
|
if fs_path:
|
|
@@ -400,7 +578,10 @@ class HTTPDownload(Download):
|
|
|
400
578
|
return fs_path
|
|
401
579
|
|
|
402
580
|
# download assets if exist instead of remote_location
|
|
403
|
-
if len(product.assets) > 0 and
|
|
581
|
+
if len(product.assets) > 0 and (
|
|
582
|
+
not getattr(self.config, "ignore_assets", False)
|
|
583
|
+
or kwargs.get("asset", None) is not None
|
|
584
|
+
):
|
|
404
585
|
try:
|
|
405
586
|
fs_path = self._download_assets(
|
|
406
587
|
product,
|
|
@@ -424,18 +605,23 @@ class HTTPDownload(Download):
|
|
|
424
605
|
@self._download_retry(product, wait, timeout)
|
|
425
606
|
def download_request(
|
|
426
607
|
product: EOProduct,
|
|
427
|
-
auth:
|
|
608
|
+
auth: AuthBase,
|
|
428
609
|
progress_callback: ProgressCallback,
|
|
429
610
|
wait: int,
|
|
430
611
|
timeout: int,
|
|
431
|
-
**kwargs:
|
|
612
|
+
**kwargs: Unpack[DownloadConf],
|
|
432
613
|
) -> None:
|
|
433
614
|
chunks = self._stream_download(product, auth, progress_callback, **kwargs)
|
|
615
|
+
is_empty = True
|
|
434
616
|
|
|
435
617
|
with open(fs_path, "wb") as fhandle:
|
|
436
618
|
for chunk in chunks:
|
|
619
|
+
is_empty = False
|
|
437
620
|
fhandle.write(chunk)
|
|
438
621
|
|
|
622
|
+
if is_empty:
|
|
623
|
+
raise DownloadError(f"product {product.properties['id']} is empty")
|
|
624
|
+
|
|
439
625
|
download_request(product, auth, progress_callback, wait, timeout, **kwargs)
|
|
440
626
|
|
|
441
627
|
with open(record_filename, "w") as fh:
|
|
@@ -443,19 +629,57 @@ class HTTPDownload(Download):
|
|
|
443
629
|
logger.debug("Download recorded in %s", record_filename)
|
|
444
630
|
|
|
445
631
|
# Check that the downloaded file is really a zip file
|
|
446
|
-
|
|
447
|
-
self.config, "outputs_extension", ".zip"
|
|
448
|
-
)
|
|
449
|
-
if not zipfile.is_zipfile(fs_path) and outputs_extension == ".zip":
|
|
632
|
+
if not zipfile.is_zipfile(fs_path) and output_extension == ".zip":
|
|
450
633
|
logger.warning(
|
|
451
634
|
"Downloaded product is not a Zip File. Please check its file type before using it"
|
|
452
635
|
)
|
|
453
|
-
new_fs_path =
|
|
636
|
+
new_fs_path = os.path.join(
|
|
637
|
+
os.path.dirname(fs_path),
|
|
638
|
+
sanitize(product.properties["title"]),
|
|
639
|
+
)
|
|
640
|
+
if os.path.isfile(fs_path) and not tarfile.is_tarfile(fs_path):
|
|
641
|
+
if not os.path.isdir(new_fs_path):
|
|
642
|
+
os.makedirs(new_fs_path)
|
|
643
|
+
shutil.move(fs_path, new_fs_path)
|
|
644
|
+
file_path = os.path.join(new_fs_path, os.path.basename(fs_path))
|
|
645
|
+
new_file_path = file_path[: file_path.index(".zip")]
|
|
646
|
+
shutil.move(file_path, new_file_path)
|
|
647
|
+
# in the case where the outputs extension has not been set
|
|
648
|
+
# to ".tar" in the product type nor provider configuration
|
|
649
|
+
elif tarfile.is_tarfile(fs_path):
|
|
650
|
+
if not new_fs_path.endswith(".tar"):
|
|
651
|
+
new_fs_path += ".tar"
|
|
652
|
+
shutil.move(fs_path, new_fs_path)
|
|
653
|
+
kwargs["output_extension"] = ".tar"
|
|
654
|
+
product_path = self._finalize(
|
|
655
|
+
new_fs_path,
|
|
656
|
+
progress_callback=progress_callback,
|
|
657
|
+
**kwargs,
|
|
658
|
+
)
|
|
659
|
+
product.location = path_to_uri(product_path)
|
|
660
|
+
return product_path
|
|
661
|
+
else:
|
|
662
|
+
# not a file (dir with zip extension)
|
|
663
|
+
shutil.move(fs_path, new_fs_path)
|
|
664
|
+
product.location = path_to_uri(new_fs_path)
|
|
665
|
+
return new_fs_path
|
|
666
|
+
|
|
667
|
+
if os.path.isfile(fs_path) and not (
|
|
668
|
+
zipfile.is_zipfile(fs_path) or tarfile.is_tarfile(fs_path)
|
|
669
|
+
):
|
|
670
|
+
new_fs_path = os.path.join(
|
|
671
|
+
os.path.dirname(fs_path),
|
|
672
|
+
sanitize(product.properties["title"]),
|
|
673
|
+
)
|
|
674
|
+
if not os.path.isdir(new_fs_path):
|
|
675
|
+
os.makedirs(new_fs_path)
|
|
454
676
|
shutil.move(fs_path, new_fs_path)
|
|
455
677
|
product.location = path_to_uri(new_fs_path)
|
|
456
678
|
return new_fs_path
|
|
457
679
|
product_path = self._finalize(
|
|
458
|
-
fs_path,
|
|
680
|
+
fs_path,
|
|
681
|
+
progress_callback=progress_callback,
|
|
682
|
+
**kwargs,
|
|
459
683
|
)
|
|
460
684
|
product.location = path_to_uri(product_path)
|
|
461
685
|
return product_path
|
|
@@ -477,40 +701,67 @@ class HTTPDownload(Download):
|
|
|
477
701
|
)
|
|
478
702
|
return stream_size
|
|
479
703
|
|
|
704
|
+
def _check_product_filename(self, product: EOProduct) -> str:
|
|
705
|
+
filename = None
|
|
706
|
+
asset_content_disposition = self.stream.headers.get("content-disposition", None)
|
|
707
|
+
if asset_content_disposition:
|
|
708
|
+
filename = cast(
|
|
709
|
+
Optional[str],
|
|
710
|
+
parse_header(asset_content_disposition).get_param("filename", None),
|
|
711
|
+
)
|
|
712
|
+
if not filename:
|
|
713
|
+
# default filename extracted from path
|
|
714
|
+
filename = str(os.path.basename(self.stream.url))
|
|
715
|
+
filename_extension = os.path.splitext(filename)[1]
|
|
716
|
+
if not filename_extension:
|
|
717
|
+
if content_type := getattr(product, "headers", {}).get("Content-Type"):
|
|
718
|
+
ext = guess_extension(content_type)
|
|
719
|
+
if ext:
|
|
720
|
+
filename += ext
|
|
721
|
+
else:
|
|
722
|
+
output_extension: Optional[str] = (
|
|
723
|
+
getattr(self.config, "products", {})
|
|
724
|
+
.get(product.product_type, {})
|
|
725
|
+
.get("output_extension")
|
|
726
|
+
)
|
|
727
|
+
if output_extension:
|
|
728
|
+
filename += output_extension
|
|
729
|
+
|
|
730
|
+
return filename
|
|
731
|
+
|
|
480
732
|
def _stream_download_dict(
|
|
481
733
|
self,
|
|
482
734
|
product: EOProduct,
|
|
483
|
-
auth: Optional[
|
|
735
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
484
736
|
progress_callback: Optional[ProgressCallback] = None,
|
|
485
737
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
486
738
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
487
|
-
**kwargs:
|
|
488
|
-
) ->
|
|
739
|
+
**kwargs: Unpack[DownloadConf],
|
|
740
|
+
) -> StreamResponse:
|
|
489
741
|
r"""
|
|
490
|
-
Returns
|
|
742
|
+
Returns dictionary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments.
|
|
491
743
|
It contains a generator to streamed download chunks and the response headers.
|
|
492
744
|
|
|
493
745
|
:param product: The EO product to download
|
|
494
|
-
:
|
|
495
|
-
:param auth: (optional) The configuration of a plugin of type Authentication
|
|
496
|
-
:type auth: :class:`~eodag.config.PluginConfig`
|
|
746
|
+
:param auth: (optional) authenticated object
|
|
497
747
|
:param progress_callback: (optional) A progress callback
|
|
498
|
-
:type progress_callback: :class:`~eodag.utils.ProgressCallback`
|
|
499
748
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
500
|
-
:type wait: int
|
|
501
749
|
:param timeout: (optional) If download fails, maximum time in minutes before stop retrying
|
|
502
750
|
to download
|
|
503
|
-
:
|
|
504
|
-
:param kwargs: `outputs_prefix` (str), `extract` (bool), `delete_archive` (bool)
|
|
751
|
+
:param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
|
|
505
752
|
and `dl_url_params` (dict) can be provided as additional kwargs
|
|
506
753
|
and will override any other values defined in a configuration
|
|
507
754
|
file or with environment variables.
|
|
508
|
-
:
|
|
509
|
-
:returns: Dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
|
|
510
|
-
:rtype: dict
|
|
755
|
+
:returns: Dictionary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
|
|
511
756
|
"""
|
|
757
|
+
if auth is not None and not isinstance(auth, AuthBase):
|
|
758
|
+
raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
|
|
759
|
+
|
|
512
760
|
# download assets if exist instead of remote_location
|
|
513
|
-
if len(product.assets) > 0 and
|
|
761
|
+
if len(product.assets) > 0 and (
|
|
762
|
+
not getattr(self.config, "ignore_assets", False)
|
|
763
|
+
or kwargs.get("asset") is not None
|
|
764
|
+
):
|
|
514
765
|
try:
|
|
515
766
|
assets_values = product.assets.get_values(kwargs.get("asset", None))
|
|
516
767
|
chunks_tuples = self._stream_download_assets(
|
|
@@ -534,7 +785,7 @@ class HTTPDownload(Download):
|
|
|
534
785
|
"type"
|
|
535
786
|
]
|
|
536
787
|
|
|
537
|
-
return
|
|
788
|
+
return StreamResponse(
|
|
538
789
|
content=chain(iter([first_chunks_tuple]), chunks_tuples),
|
|
539
790
|
headers=assets_values[0].headers,
|
|
540
791
|
)
|
|
@@ -545,7 +796,7 @@ class HTTPDownload(Download):
|
|
|
545
796
|
if "title" in product.properties
|
|
546
797
|
else sanitize(product.properties.get("id", "download"))
|
|
547
798
|
)
|
|
548
|
-
return
|
|
799
|
+
return StreamResponse(
|
|
549
800
|
content=stream_zip(chunks_tuples),
|
|
550
801
|
media_type="application/zip",
|
|
551
802
|
headers={
|
|
@@ -560,36 +811,52 @@ class HTTPDownload(Download):
|
|
|
560
811
|
|
|
561
812
|
chunks = self._stream_download(product, auth, progress_callback, **kwargs)
|
|
562
813
|
# start reading chunks to set product.headers
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
814
|
+
try:
|
|
815
|
+
first_chunk = next(chunks)
|
|
816
|
+
except StopIteration:
|
|
817
|
+
# product is empty file
|
|
818
|
+
logger.error("product %s is empty", product.properties["id"])
|
|
819
|
+
raise NotAvailableError(f"product {product.properties['id']} is empty")
|
|
820
|
+
|
|
821
|
+
return StreamResponse(
|
|
566
822
|
content=chain(iter([first_chunk]), chunks),
|
|
567
823
|
headers=product.headers,
|
|
568
824
|
)
|
|
569
825
|
|
|
570
|
-
def
|
|
571
|
-
self, e: RequestException, product: EOProduct, ordered_message: str
|
|
572
|
-
) -> None:
|
|
826
|
+
def _check_auth_exception(self, e: Optional[RequestException]) -> None:
|
|
573
827
|
# check if error is identified as auth_error in provider conf
|
|
574
828
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
575
829
|
if not isinstance(auth_errors, list):
|
|
576
830
|
auth_errors = [auth_errors]
|
|
577
|
-
|
|
831
|
+
response_text = (
|
|
832
|
+
e.response.text.strip() if e is not None and e.response is not None else ""
|
|
833
|
+
)
|
|
834
|
+
if (
|
|
835
|
+
e is not None
|
|
836
|
+
and e.response is not None
|
|
837
|
+
and e.response.status_code in auth_errors
|
|
838
|
+
):
|
|
578
839
|
raise AuthenticationError(
|
|
579
|
-
"
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
e.response.text.strip(),
|
|
583
|
-
self.provider,
|
|
584
|
-
)
|
|
840
|
+
f"Please check your credentials for {self.provider}.",
|
|
841
|
+
f"HTTP Error {e.response.status_code} returned.",
|
|
842
|
+
response_text,
|
|
585
843
|
)
|
|
844
|
+
|
|
845
|
+
def _process_exception(
|
|
846
|
+
self, e: Optional[RequestException], product: EOProduct, ordered_message: str
|
|
847
|
+
) -> None:
|
|
848
|
+
self._check_auth_exception(e)
|
|
849
|
+
response_text = (
|
|
850
|
+
e.response.text.strip() if e is not None and e.response is not None else ""
|
|
851
|
+
)
|
|
586
852
|
# product not available
|
|
587
|
-
|
|
853
|
+
if product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
|
|
588
854
|
msg = (
|
|
589
855
|
ordered_message
|
|
590
|
-
if ordered_message and not
|
|
591
|
-
else
|
|
856
|
+
if ordered_message and not response_text
|
|
857
|
+
else response_text
|
|
592
858
|
)
|
|
859
|
+
|
|
593
860
|
raise NotAvailableError(
|
|
594
861
|
"%s(initially %s) requested, returned: %s"
|
|
595
862
|
% (
|
|
@@ -601,34 +868,33 @@ class HTTPDownload(Download):
|
|
|
601
868
|
else:
|
|
602
869
|
import traceback as tb
|
|
603
870
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
871
|
+
if e:
|
|
872
|
+
logger.error(
|
|
873
|
+
"Error while getting resource :\n%s\n%s",
|
|
874
|
+
tb.format_exc(),
|
|
875
|
+
response_text,
|
|
876
|
+
)
|
|
877
|
+
else:
|
|
878
|
+
logger.error("Error while getting resource :\n%s", tb.format_exc())
|
|
609
879
|
|
|
610
880
|
def _stream_download(
|
|
611
881
|
self,
|
|
612
882
|
product: EOProduct,
|
|
613
|
-
auth: Optional[
|
|
883
|
+
auth: Optional[AuthBase] = None,
|
|
614
884
|
progress_callback: Optional[ProgressCallback] = None,
|
|
615
|
-
**kwargs:
|
|
885
|
+
**kwargs: Unpack[DownloadConf],
|
|
616
886
|
) -> Iterator[Any]:
|
|
617
887
|
"""
|
|
618
888
|
fetches a zip file containing the assets of a given product as a stream
|
|
619
889
|
and returns a generator yielding the chunks of the file
|
|
620
890
|
:param product: product for which the assets should be downloaded
|
|
621
|
-
:type product: :class:`~eodag.api.product._product.EOProduct`
|
|
622
891
|
:param auth: The configuration of a plugin of type Authentication
|
|
623
|
-
:type auth: :class:`~eodag.config.PluginConfig`
|
|
624
892
|
:param progress_callback: A method or a callable object
|
|
625
893
|
which takes a current size and a maximum
|
|
626
894
|
size as inputs and handle progress bar
|
|
627
895
|
creation and update to give the user a
|
|
628
896
|
feedback on the download progress
|
|
629
|
-
:type progress_callback: :class:`~eodag.utils.ProgressCallback`
|
|
630
897
|
:param kwargs: additional arguments
|
|
631
|
-
:type kwargs: dict
|
|
632
898
|
"""
|
|
633
899
|
if progress_callback is None:
|
|
634
900
|
logger.info("Progress bar unavailable, please call product.download()")
|
|
@@ -637,13 +903,16 @@ class HTTPDownload(Download):
|
|
|
637
903
|
ordered_message = ""
|
|
638
904
|
if (
|
|
639
905
|
"orderLink" in product.properties
|
|
640
|
-
and "storageStatus"
|
|
641
|
-
and product.properties
|
|
906
|
+
and product.properties.get("storageStatus") == OFFLINE_STATUS
|
|
907
|
+
and not product.properties.get("orderStatus")
|
|
642
908
|
):
|
|
643
|
-
self.
|
|
909
|
+
self.order_download(product=product, auth=auth)
|
|
644
910
|
|
|
645
|
-
if
|
|
646
|
-
|
|
911
|
+
if (
|
|
912
|
+
product.properties.get("orderStatusLink", None)
|
|
913
|
+
and product.properties.get("storageStatus") != ONLINE_STATUS
|
|
914
|
+
):
|
|
915
|
+
self.order_download_status(product=product, auth=auth)
|
|
647
916
|
|
|
648
917
|
params = kwargs.pop("dl_url_params", None) or getattr(
|
|
649
918
|
self.config, "dl_url_params", {}
|
|
@@ -660,14 +929,18 @@ class HTTPDownload(Download):
|
|
|
660
929
|
query_dict = parse_qs(parts.query)
|
|
661
930
|
if not query_dict and parts.query:
|
|
662
931
|
query_dict = geojson.loads(parts.query)
|
|
663
|
-
req_url = parts._replace(query=
|
|
932
|
+
req_url = parts._replace(query="").geturl()
|
|
664
933
|
req_kwargs: Dict[str, Any] = {"json": query_dict} if query_dict else {}
|
|
665
934
|
else:
|
|
666
935
|
req_url = url
|
|
667
936
|
req_kwargs = {}
|
|
668
937
|
|
|
669
|
-
|
|
670
|
-
|
|
938
|
+
if req_url.startswith(NOT_AVAILABLE):
|
|
939
|
+
raise NotAvailableError("Download link is not available")
|
|
940
|
+
|
|
941
|
+
if getattr(self.config, "no_auth_download", False):
|
|
942
|
+
auth = None
|
|
943
|
+
|
|
671
944
|
s = requests.Session()
|
|
672
945
|
with s.request(
|
|
673
946
|
req_method,
|
|
@@ -681,7 +954,6 @@ class HTTPDownload(Download):
|
|
|
681
954
|
) as self.stream:
|
|
682
955
|
try:
|
|
683
956
|
self.stream.raise_for_status()
|
|
684
|
-
|
|
685
957
|
except requests.exceptions.Timeout as exc:
|
|
686
958
|
raise TimeOutError(
|
|
687
959
|
exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT
|
|
@@ -689,8 +961,33 @@ class HTTPDownload(Download):
|
|
|
689
961
|
except RequestException as e:
|
|
690
962
|
self._process_exception(e, product, ordered_message)
|
|
691
963
|
else:
|
|
692
|
-
|
|
964
|
+
# check if product was ordered
|
|
965
|
+
|
|
966
|
+
if getattr(
|
|
967
|
+
self.stream, "status_code", None
|
|
968
|
+
) is not None and self.stream.status_code == getattr(
|
|
969
|
+
self.config, "order_status", {}
|
|
970
|
+
).get(
|
|
971
|
+
"ordered", {}
|
|
972
|
+
).get(
|
|
973
|
+
"http_code"
|
|
974
|
+
):
|
|
975
|
+
product.properties["storageStatus"] = "ORDERED"
|
|
976
|
+
self._process_exception(None, product, ordered_message)
|
|
977
|
+
stream_size = self._check_stream_size(product) or None
|
|
978
|
+
|
|
693
979
|
product.headers = self.stream.headers
|
|
980
|
+
filename = self._check_product_filename(product) or None
|
|
981
|
+
product.headers[
|
|
982
|
+
"content-disposition"
|
|
983
|
+
] = f"attachment; filename={filename}"
|
|
984
|
+
content_type = product.headers.get("Content-Type")
|
|
985
|
+
guessed_content_type = (
|
|
986
|
+
guess_file_type(filename) if filename and not content_type else None
|
|
987
|
+
)
|
|
988
|
+
if guessed_content_type is not None:
|
|
989
|
+
product.headers["Content-Type"] = guessed_content_type
|
|
990
|
+
|
|
694
991
|
progress_callback.reset(total=stream_size)
|
|
695
992
|
for chunk in self.stream.iter_content(chunk_size=64 * 1024):
|
|
696
993
|
if chunk:
|
|
@@ -700,10 +997,11 @@ class HTTPDownload(Download):
|
|
|
700
997
|
def _stream_download_assets(
|
|
701
998
|
self,
|
|
702
999
|
product: EOProduct,
|
|
703
|
-
auth: Optional[
|
|
1000
|
+
auth: Optional[AuthBase] = None,
|
|
704
1001
|
progress_callback: Optional[ProgressCallback] = None,
|
|
705
|
-
|
|
706
|
-
|
|
1002
|
+
assets_values: List[Asset] = [],
|
|
1003
|
+
**kwargs: Unpack[DownloadConf],
|
|
1004
|
+
) -> Iterator[Any]:
|
|
707
1005
|
if progress_callback is None:
|
|
708
1006
|
logger.info("Progress bar unavailable, please call product.download()")
|
|
709
1007
|
progress_callback = ProgressCallback(disable=True)
|
|
@@ -715,14 +1013,12 @@ class HTTPDownload(Download):
|
|
|
715
1013
|
if not assets_urls:
|
|
716
1014
|
raise NotAvailableError("No assets available for %s" % product)
|
|
717
1015
|
|
|
718
|
-
assets_values = kwargs.get("assets_values", [])
|
|
719
|
-
|
|
720
1016
|
# get extra parameters to pass to the query
|
|
721
1017
|
params = kwargs.pop("dl_url_params", None) or getattr(
|
|
722
1018
|
self.config, "dl_url_params", {}
|
|
723
1019
|
)
|
|
724
1020
|
|
|
725
|
-
total_size = self._get_asset_sizes(assets_values, auth, params)
|
|
1021
|
+
total_size = self._get_asset_sizes(assets_values, auth, params) or None
|
|
726
1022
|
|
|
727
1023
|
progress_callback.reset(total=total_size)
|
|
728
1024
|
|
|
@@ -753,12 +1049,13 @@ class HTTPDownload(Download):
|
|
|
753
1049
|
product.product_type, {}
|
|
754
1050
|
)
|
|
755
1051
|
flatten_top_dirs = product_conf.get(
|
|
756
|
-
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
|
|
1052
|
+
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
757
1053
|
)
|
|
1054
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
758
1055
|
|
|
759
1056
|
# loop for assets download
|
|
760
1057
|
for asset in assets_values:
|
|
761
|
-
if asset["href"].startswith("file:"):
|
|
1058
|
+
if not asset["href"] or asset["href"].startswith("file:"):
|
|
762
1059
|
logger.info(
|
|
763
1060
|
f"Local asset detected. Download skipped for {asset['href']}"
|
|
764
1061
|
)
|
|
@@ -771,6 +1068,7 @@ class HTTPDownload(Download):
|
|
|
771
1068
|
params=params,
|
|
772
1069
|
headers=USER_AGENT,
|
|
773
1070
|
timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
|
|
1071
|
+
verify=ssl_verify,
|
|
774
1072
|
) as stream:
|
|
775
1073
|
try:
|
|
776
1074
|
stream.raise_for_status()
|
|
@@ -795,15 +1093,20 @@ class HTTPDownload(Download):
|
|
|
795
1093
|
"content-disposition", None
|
|
796
1094
|
)
|
|
797
1095
|
if asset_content_disposition:
|
|
798
|
-
asset.filename =
|
|
799
|
-
|
|
800
|
-
|
|
1096
|
+
asset.filename = cast(
|
|
1097
|
+
Optional[str],
|
|
1098
|
+
parse_header(asset_content_disposition).get_param(
|
|
1099
|
+
"filename", None
|
|
1100
|
+
),
|
|
1101
|
+
)
|
|
801
1102
|
|
|
802
1103
|
if not getattr(asset, "filename", None):
|
|
803
1104
|
# default filename extracted from path
|
|
804
1105
|
asset.filename = os.path.basename(asset.rel_path)
|
|
805
1106
|
|
|
806
|
-
asset.rel_path = os.path.join(
|
|
1107
|
+
asset.rel_path = os.path.join(
|
|
1108
|
+
asset_rel_dir, cast(str, asset.filename)
|
|
1109
|
+
)
|
|
807
1110
|
|
|
808
1111
|
if len(assets_values) == 1:
|
|
809
1112
|
# apply headers to asset
|
|
@@ -824,9 +1127,9 @@ class HTTPDownload(Download):
|
|
|
824
1127
|
product: EOProduct,
|
|
825
1128
|
fs_dir_path: str,
|
|
826
1129
|
record_filename: str,
|
|
827
|
-
auth: Optional[
|
|
1130
|
+
auth: Optional[AuthBase] = None,
|
|
828
1131
|
progress_callback: Optional[ProgressCallback] = None,
|
|
829
|
-
**kwargs:
|
|
1132
|
+
**kwargs: Unpack[DownloadConf],
|
|
830
1133
|
) -> str:
|
|
831
1134
|
"""Download product assets if they exist"""
|
|
832
1135
|
if progress_callback is None:
|
|
@@ -857,7 +1160,7 @@ class HTTPDownload(Download):
|
|
|
857
1160
|
product.product_type, {}
|
|
858
1161
|
)
|
|
859
1162
|
flatten_top_dirs = product_conf.get(
|
|
860
|
-
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
|
|
1163
|
+
"flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
|
|
861
1164
|
)
|
|
862
1165
|
|
|
863
1166
|
# count local assets
|
|
@@ -871,29 +1174,43 @@ class HTTPDownload(Download):
|
|
|
871
1174
|
# start reading chunks to set asset.rel_path
|
|
872
1175
|
first_chunks_tuple = next(chunks_tuples)
|
|
873
1176
|
chunks = chain(iter([first_chunks_tuple]), chunks_tuples)
|
|
874
|
-
chunks_tuples =
|
|
1177
|
+
chunks_tuples = iter(
|
|
1178
|
+
[(assets_values[0].rel_path, None, None, None, chunks)]
|
|
1179
|
+
)
|
|
875
1180
|
|
|
876
1181
|
for chunk_tuple in chunks_tuples:
|
|
877
1182
|
asset_path = chunk_tuple[0]
|
|
878
1183
|
asset_chunks = chunk_tuple[4]
|
|
879
1184
|
asset_abs_path = os.path.join(fs_dir_path, asset_path)
|
|
1185
|
+
asset_abs_path_temp = asset_abs_path + "~"
|
|
880
1186
|
# create asset subdir if not exist
|
|
881
1187
|
asset_abs_path_dir = os.path.dirname(asset_abs_path)
|
|
882
1188
|
if not os.path.isdir(asset_abs_path_dir):
|
|
883
1189
|
os.makedirs(asset_abs_path_dir)
|
|
1190
|
+
# remove temporary file
|
|
1191
|
+
if os.path.isfile(asset_abs_path_temp):
|
|
1192
|
+
os.remove(asset_abs_path_temp)
|
|
884
1193
|
if not os.path.isfile(asset_abs_path):
|
|
885
|
-
|
|
1194
|
+
logger.debug("Downloading to temporary file '%s'", asset_abs_path_temp)
|
|
1195
|
+
with open(asset_abs_path_temp, "wb") as fhandle:
|
|
886
1196
|
for chunk in asset_chunks:
|
|
887
1197
|
if chunk:
|
|
888
1198
|
fhandle.write(chunk)
|
|
889
1199
|
progress_callback(len(chunk))
|
|
890
|
-
|
|
1200
|
+
logger.debug(
|
|
1201
|
+
"Download completed. Renaming temporary file '%s' to '%s'",
|
|
1202
|
+
os.path.basename(asset_abs_path_temp),
|
|
1203
|
+
os.path.basename(asset_abs_path),
|
|
1204
|
+
)
|
|
1205
|
+
os.rename(asset_abs_path_temp, asset_abs_path)
|
|
891
1206
|
# only one local asset
|
|
892
1207
|
if local_assets_count == len(assets_urls) and local_assets_count == 1:
|
|
893
1208
|
# remove empty {fs_dir_path}
|
|
894
1209
|
shutil.rmtree(fs_dir_path)
|
|
895
1210
|
# and return assets_urls[0] path
|
|
896
1211
|
fs_dir_path = uri_to_path(assets_urls[0])
|
|
1212
|
+
# do not flatten dir
|
|
1213
|
+
flatten_top_dirs = False
|
|
897
1214
|
# several local assets
|
|
898
1215
|
elif local_assets_count == len(assets_urls) and local_assets_count > 0:
|
|
899
1216
|
common_path = os.path.commonpath([uri_to_path(uri) for uri in assets_urls])
|
|
@@ -901,6 +1218,8 @@ class HTTPDownload(Download):
|
|
|
901
1218
|
shutil.rmtree(fs_dir_path)
|
|
902
1219
|
# and return assets_urls common path
|
|
903
1220
|
fs_dir_path = common_path
|
|
1221
|
+
# do not flatten dir
|
|
1222
|
+
flatten_top_dirs = False
|
|
904
1223
|
# no assets downloaded but some should have been
|
|
905
1224
|
elif len(os.listdir(fs_dir_path)) == 0:
|
|
906
1225
|
raise NotAvailableError("No assets could be downloaded")
|
|
@@ -918,20 +1237,17 @@ class HTTPDownload(Download):
|
|
|
918
1237
|
return fs_dir_path
|
|
919
1238
|
|
|
920
1239
|
def _handle_asset_exception(
|
|
921
|
-
self, e: RequestException, asset:
|
|
1240
|
+
self, e: RequestException, asset: Asset, raise_errors: bool = False
|
|
922
1241
|
) -> None:
|
|
923
1242
|
# check if error is identified as auth_error in provider conf
|
|
924
1243
|
auth_errors = getattr(self.config, "auth_error_code", [None])
|
|
925
1244
|
if not isinstance(auth_errors, list):
|
|
926
1245
|
auth_errors = [auth_errors]
|
|
927
|
-
if e.response and e.response.status_code in auth_errors:
|
|
1246
|
+
if e.response is not None and e.response.status_code in auth_errors:
|
|
928
1247
|
raise AuthenticationError(
|
|
929
|
-
"
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
e.response.text.strip(),
|
|
933
|
-
self.provider,
|
|
934
|
-
)
|
|
1248
|
+
f"Please check your credentials for {self.provider}.",
|
|
1249
|
+
f"HTTP Error {e.response.status_code} returned.",
|
|
1250
|
+
e.response.text.strip(),
|
|
935
1251
|
)
|
|
936
1252
|
elif raise_errors:
|
|
937
1253
|
raise DownloadError(e)
|
|
@@ -941,23 +1257,31 @@ class HTTPDownload(Download):
|
|
|
941
1257
|
|
|
942
1258
|
def _get_asset_sizes(
|
|
943
1259
|
self,
|
|
944
|
-
assets_values: List[
|
|
945
|
-
auth: Optional[
|
|
1260
|
+
assets_values: List[Asset],
|
|
1261
|
+
auth: Optional[AuthBase],
|
|
946
1262
|
params: Optional[Dict[str, str]],
|
|
947
1263
|
zipped: bool = False,
|
|
948
1264
|
) -> int:
|
|
949
1265
|
total_size = 0
|
|
950
1266
|
|
|
1267
|
+
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1268
|
+
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
951
1269
|
# loop for assets size & filename
|
|
952
1270
|
for asset in assets_values:
|
|
953
|
-
if not asset["href"].startswith("file:"):
|
|
1271
|
+
if asset["href"] and not asset["href"].startswith("file:"):
|
|
954
1272
|
# HEAD request for size & filename
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
1273
|
+
try:
|
|
1274
|
+
asset_headers = requests.head(
|
|
1275
|
+
asset["href"],
|
|
1276
|
+
auth=auth,
|
|
1277
|
+
params=params,
|
|
1278
|
+
headers=USER_AGENT,
|
|
1279
|
+
timeout=timeout,
|
|
1280
|
+
verify=ssl_verify,
|
|
1281
|
+
).headers
|
|
1282
|
+
except RequestException as e:
|
|
1283
|
+
logger.debug(f"HEAD request failed: {str(e)}")
|
|
1284
|
+
asset_headers = CaseInsensitiveDict()
|
|
961
1285
|
|
|
962
1286
|
if not getattr(asset, "size", 0):
|
|
963
1287
|
# size from HEAD header / Content-length
|
|
@@ -971,12 +1295,14 @@ class HTTPDownload(Download):
|
|
|
971
1295
|
)
|
|
972
1296
|
if not getattr(asset, "size", 0):
|
|
973
1297
|
# size from HEAD header / content-disposition / size
|
|
974
|
-
|
|
1298
|
+
size_str = str(header_content_disposition.get_param("size", 0))
|
|
1299
|
+
asset.size = int(size_str) if size_str.isdigit() else 0
|
|
975
1300
|
if not getattr(asset, "filename", 0):
|
|
976
1301
|
# filename from HEAD header / content-disposition / size
|
|
977
|
-
|
|
1302
|
+
asset_filename = header_content_disposition.get_param(
|
|
978
1303
|
"filename", None
|
|
979
1304
|
)
|
|
1305
|
+
asset.filename = str(asset_filename) if asset_filename else None
|
|
980
1306
|
|
|
981
1307
|
if not getattr(asset, "size", 0):
|
|
982
1308
|
# GET request for size
|
|
@@ -987,16 +1313,18 @@ class HTTPDownload(Download):
|
|
|
987
1313
|
params=params,
|
|
988
1314
|
headers=USER_AGENT,
|
|
989
1315
|
timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
|
|
1316
|
+
verify=ssl_verify,
|
|
990
1317
|
) as stream:
|
|
991
1318
|
# size from GET header / Content-length
|
|
992
1319
|
asset.size = int(stream.headers.get("Content-length", 0))
|
|
993
1320
|
if not getattr(asset, "size", 0):
|
|
994
1321
|
# size from GET header / content-disposition / size
|
|
995
|
-
|
|
1322
|
+
size_str = str(
|
|
996
1323
|
parse_header(
|
|
997
1324
|
stream.headers.get("content-disposition", "")
|
|
998
1325
|
).get_param("size", 0)
|
|
999
1326
|
)
|
|
1327
|
+
asset.size = int(size_str) if size_str.isdigit() else 0
|
|
1000
1328
|
|
|
1001
1329
|
total_size += asset.size
|
|
1002
1330
|
return total_size
|
|
@@ -1004,12 +1332,12 @@ class HTTPDownload(Download):
|
|
|
1004
1332
|
def download_all(
|
|
1005
1333
|
self,
|
|
1006
1334
|
products: SearchResult,
|
|
1007
|
-
auth: Optional[
|
|
1335
|
+
auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
|
|
1008
1336
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
1009
1337
|
progress_callback: Optional[ProgressCallback] = None,
|
|
1010
1338
|
wait: int = DEFAULT_DOWNLOAD_WAIT,
|
|
1011
1339
|
timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
1012
|
-
**kwargs:
|
|
1340
|
+
**kwargs: Unpack[DownloadConf],
|
|
1013
1341
|
):
|
|
1014
1342
|
"""
|
|
1015
1343
|
Download all using parent (base plugin) method
|