eodag 2.12.0__py3-none-any.whl → 3.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. eodag/api/core.py +434 -319
  2. eodag/api/product/__init__.py +5 -1
  3. eodag/api/product/_assets.py +7 -2
  4. eodag/api/product/_product.py +46 -68
  5. eodag/api/product/metadata_mapping.py +181 -66
  6. eodag/api/search_result.py +21 -1
  7. eodag/cli.py +20 -6
  8. eodag/config.py +95 -6
  9. eodag/plugins/apis/base.py +8 -162
  10. eodag/plugins/apis/ecmwf.py +36 -24
  11. eodag/plugins/apis/usgs.py +40 -24
  12. eodag/plugins/authentication/aws_auth.py +2 -2
  13. eodag/plugins/authentication/header.py +31 -6
  14. eodag/plugins/authentication/keycloak.py +13 -84
  15. eodag/plugins/authentication/oauth.py +3 -3
  16. eodag/plugins/authentication/openid_connect.py +256 -46
  17. eodag/plugins/authentication/qsauth.py +3 -0
  18. eodag/plugins/authentication/sas_auth.py +8 -1
  19. eodag/plugins/authentication/token.py +92 -46
  20. eodag/plugins/authentication/token_exchange.py +120 -0
  21. eodag/plugins/download/aws.py +86 -91
  22. eodag/plugins/download/base.py +72 -40
  23. eodag/plugins/download/http.py +607 -264
  24. eodag/plugins/download/s3rest.py +28 -15
  25. eodag/plugins/manager.py +73 -57
  26. eodag/plugins/search/__init__.py +36 -0
  27. eodag/plugins/search/base.py +225 -18
  28. eodag/plugins/search/build_search_result.py +389 -32
  29. eodag/plugins/search/cop_marine.py +378 -0
  30. eodag/plugins/search/creodias_s3.py +15 -14
  31. eodag/plugins/search/csw.py +5 -7
  32. eodag/plugins/search/data_request_search.py +44 -20
  33. eodag/plugins/search/qssearch.py +508 -203
  34. eodag/plugins/search/static_stac_search.py +99 -36
  35. eodag/resources/constraints/climate-dt.json +13 -0
  36. eodag/resources/constraints/extremes-dt.json +8 -0
  37. eodag/resources/ext_product_types.json +1 -1
  38. eodag/resources/product_types.yml +1897 -34
  39. eodag/resources/providers.yml +3539 -3277
  40. eodag/resources/stac.yml +48 -54
  41. eodag/resources/stac_api.yml +71 -25
  42. eodag/resources/stac_provider.yml +5 -0
  43. eodag/resources/user_conf_template.yml +51 -3
  44. eodag/rest/__init__.py +6 -0
  45. eodag/rest/cache.py +70 -0
  46. eodag/rest/config.py +68 -0
  47. eodag/rest/constants.py +27 -0
  48. eodag/rest/core.py +757 -0
  49. eodag/rest/server.py +397 -258
  50. eodag/rest/stac.py +438 -307
  51. eodag/rest/types/collections_search.py +44 -0
  52. eodag/rest/types/eodag_search.py +232 -43
  53. eodag/rest/types/{stac_queryables.py → queryables.py} +81 -43
  54. eodag/rest/types/stac_search.py +277 -0
  55. eodag/rest/utils/__init__.py +216 -0
  56. eodag/rest/utils/cql_evaluate.py +119 -0
  57. eodag/rest/utils/rfc3339.py +65 -0
  58. eodag/types/__init__.py +99 -9
  59. eodag/types/bbox.py +15 -14
  60. eodag/types/download_args.py +31 -0
  61. eodag/types/search_args.py +58 -7
  62. eodag/types/whoosh.py +81 -0
  63. eodag/utils/__init__.py +72 -9
  64. eodag/utils/constraints.py +37 -37
  65. eodag/utils/exceptions.py +23 -17
  66. eodag/utils/requests.py +138 -0
  67. eodag/utils/rest.py +104 -0
  68. eodag/utils/stac_reader.py +100 -16
  69. {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/METADATA +64 -44
  70. eodag-3.0.0b1.dist-info/RECORD +109 -0
  71. {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/WHEEL +1 -1
  72. {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/entry_points.txt +6 -5
  73. eodag/plugins/apis/cds.py +0 -540
  74. eodag/rest/utils.py +0 -1133
  75. eodag-2.12.0.dist-info/RECORD +0 -94
  76. {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/LICENSE +0 -0
  77. {eodag-2.12.0.dist-info → eodag-3.0.0b1.dist-info}/top_level.txt +0 -0
@@ -20,23 +20,37 @@ from __future__ import annotations
20
20
  import logging
21
21
  import os
22
22
  import shutil
23
+ import tarfile
23
24
  import zipfile
24
25
  from datetime import datetime
25
26
  from email.message import Message
26
27
  from itertools import chain
27
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
28
+ from typing import (
29
+ TYPE_CHECKING,
30
+ Any,
31
+ Dict,
32
+ Iterator,
33
+ List,
34
+ Optional,
35
+ Tuple,
36
+ TypedDict,
37
+ Union,
38
+ cast,
39
+ )
28
40
  from urllib.parse import parse_qs, urlparse
29
41
 
30
42
  import geojson
31
43
  import requests
32
- import requests_ftp
33
44
  from lxml import etree
34
45
  from requests import RequestException
46
+ from requests.auth import AuthBase
35
47
  from stream_zip import ZIP_AUTO, stream_zip
36
48
 
37
49
  from eodag.api.product.metadata_mapping import (
50
+ NOT_AVAILABLE,
38
51
  OFFLINE_STATUS,
39
52
  ONLINE_STATUS,
53
+ STAGING_STATUS,
40
54
  mtd_cfg_as_conversion_and_querypath,
41
55
  properties_from_json,
42
56
  properties_from_xml,
@@ -49,10 +63,14 @@ from eodag.utils import (
49
63
  HTTP_REQ_TIMEOUT,
50
64
  USER_AGENT,
51
65
  ProgressCallback,
66
+ StreamResponse,
52
67
  flatten_top_directories,
68
+ guess_extension,
69
+ guess_file_type,
53
70
  parse_header,
54
71
  path_to_uri,
55
72
  sanitize,
73
+ string_to_jsonpath,
56
74
  uri_to_path,
57
75
  )
58
76
  from eodag.utils.exceptions import (
@@ -66,10 +84,11 @@ from eodag.utils.exceptions import (
66
84
  if TYPE_CHECKING:
67
85
  from requests import Response
68
86
 
69
- from eodag.api.product import EOProduct
87
+ from eodag.api.product import Asset, EOProduct # type: ignore
70
88
  from eodag.api.search_result import SearchResult
71
89
  from eodag.config import PluginConfig
72
- from eodag.utils import DownloadedCallback
90
+ from eodag.types.download_args import DownloadConf
91
+ from eodag.utils import DownloadedCallback, Unpack
73
92
 
74
93
  logger = logging.getLogger("eodag.download.http")
75
94
 
@@ -81,7 +100,7 @@ class HTTPDownload(Download):
81
100
  :type provider: str
82
101
  :param config: Download plugin configuration:
83
102
 
84
- * ``config.base_uri`` (str) - default endpoint url
103
+ * ``config.base_uri`` (str) - (optional) default endpoint url
85
104
  * ``config.extract`` (bool) - (optional) extract downloaded archive or not
86
105
  * ``config.auth_error_code`` (int) - (optional) authentication error code
87
106
  * ``config.dl_url_params`` (dict) - (optional) attitional parameters to send in the request
@@ -92,9 +111,8 @@ class HTTPDownload(Download):
92
111
  * ``config.order_method`` (str) - (optional) HTTP request method, GET (default) or POST
93
112
  * ``config.order_headers`` (dict) - (optional) order request headers
94
113
  * ``config.order_on_response`` (dict) - (optional) edit or add new product properties
95
- * ``config.order_status_method`` (str) - (optional) status HTTP request method, GET (default) or POST
96
- * ``config.order_status_percent`` (str) - (optional) progress percentage key in obtained status response
97
- * ``config.order_status_error`` (dict) - (optional) key/value identifying an error status
114
+ * ``config.order_status`` (:class:`~eodag.config.PluginConfig.OrderStatus`) - (optional) Order status handling
115
+
98
116
 
99
117
  :type config: :class:`~eodag.config.PluginConfig`
100
118
 
@@ -102,19 +120,13 @@ class HTTPDownload(Download):
102
120
 
103
121
  def __init__(self, provider: str, config: PluginConfig) -> None:
104
122
  super(HTTPDownload, self).__init__(provider, config)
105
- if not hasattr(self.config, "base_uri"):
106
- raise MisconfiguredError(
107
- "{} plugin require a base_uri configuration key".format(
108
- type(self).__name__
109
- )
110
- )
111
123
 
112
124
  def orderDownload(
113
125
  self,
114
126
  product: EOProduct,
115
- auth: Optional[PluginConfig] = None,
116
- **kwargs: Union[str, bool, Dict[str, Any]],
117
- ) -> None:
127
+ auth: Optional[AuthBase] = None,
128
+ **kwargs: Unpack[DownloadConf],
129
+ ) -> Optional[Dict[str, Any]]:
118
130
  """Send product order request.
119
131
 
120
132
  It will be executed once before the download retry loop, if the product is OFFLINE
@@ -136,85 +148,120 @@ class HTTPDownload(Download):
136
148
 
137
149
  :param product: The EO product to order
138
150
  :type product: :class:`~eodag.api.product._product.EOProduct`
139
- :param auth: (optional) The configuration of a plugin of type Authentication
140
- :type auth: :class:`~eodag.config.PluginConfig`
151
+ :param auth: (optional) authenticated object
152
+ :type auth: Optional[AuthBase]
141
153
  :param kwargs: download additional kwargs
142
154
  :type kwargs: Union[str, bool, dict]
155
+ :returns: the returned json status response
156
+ :rtype: dict
143
157
  """
144
- order_method = getattr(self.config, "order_method", "GET").lower()
145
- if order_method == "post":
158
+ product.properties["storageStatus"] = STAGING_STATUS
159
+
160
+ order_method = getattr(self.config, "order_method", "GET").upper()
161
+ ssl_verify = getattr(self.config, "ssl_verify", True)
162
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
163
+ OrderKwargs = TypedDict(
164
+ "OrderKwargs", {"json": Dict[str, Union[Any, List[str]]]}, total=False
165
+ )
166
+ order_kwargs: OrderKwargs = {}
167
+ if order_method == "POST":
146
168
  # separate url & parameters
147
169
  parts = urlparse(str(product.properties["orderLink"]))
148
170
  query_dict = parse_qs(parts.query)
149
171
  if not query_dict and parts.query:
150
172
  query_dict = geojson.loads(parts.query)
151
173
  order_url = parts._replace(query=None).geturl()
152
- order_kwargs = {"json": query_dict} if query_dict else {}
174
+ if query_dict:
175
+ order_kwargs["json"] = query_dict
153
176
  else:
154
177
  order_url = product.properties["orderLink"]
155
178
  order_kwargs = {}
156
179
 
157
- with requests.request(
158
- method=order_method,
159
- url=order_url,
160
- auth=auth,
161
- timeout=HTTP_REQ_TIMEOUT,
162
- headers=dict(getattr(self.config, "order_headers", {}), **USER_AGENT),
163
- **order_kwargs,
164
- ) as response:
165
- try:
166
- response.raise_for_status()
167
- ordered_message = response.text
168
- logger.debug(ordered_message)
169
- logger.info("%s was ordered", product.properties["title"])
170
- except requests.exceptions.Timeout as exc:
171
- raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
172
- except RequestException as e:
173
- if e.response and hasattr(e.response, "content"):
174
- error_message = f"{e.response.content.decode('utf-8')} - {e}"
175
- else:
176
- error_message = str(e)
177
- logger.warning(
178
- "%s could not be ordered, request returned %s",
179
- product.properties["title"],
180
- error_message,
181
- )
182
-
183
- order_metadata_mapping = getattr(self.config, "order_on_response", {}).get(
180
+ headers = {**getattr(self.config, "order_headers", {}), **USER_AGENT}
181
+ try:
182
+ with requests.request(
183
+ method=order_method,
184
+ url=order_url,
185
+ auth=auth,
186
+ timeout=timeout,
187
+ headers=headers,
188
+ verify=ssl_verify,
189
+ **order_kwargs,
190
+ ) as response:
191
+ logger.debug(f"{order_method} {order_url} {headers} {order_kwargs}")
192
+ try:
193
+ response.raise_for_status()
194
+ ordered_message = response.text
195
+ logger.debug(ordered_message)
196
+ product.properties["storageStatus"] = STAGING_STATUS
197
+ except RequestException as e:
198
+ if hasattr(e, "response") and (
199
+ content := getattr(e.response, "content", None)
200
+ ):
201
+ error_message = f"{content.decode('utf-8')} - {e}"
202
+ else:
203
+ error_message = str(e)
204
+ logger.warning(
205
+ "%s could not be ordered, request returned %s",
206
+ product.properties["title"],
207
+ error_message,
208
+ )
209
+ self._check_auth_exception(e)
210
+ return self.order_response_process(response, product)
211
+ except requests.exceptions.Timeout as exc:
212
+ raise TimeOutError(exc, timeout=timeout) from exc
213
+
214
+ def order_response_process(
215
+ self, response: Response, product: EOProduct
216
+ ) -> Optional[Dict[str, Any]]:
217
+ """Process order response
218
+
219
+ :param response: The order response
220
+ :type response: :class:`~requests.Response`
221
+ :param product: The orderd EO product
222
+ :type product: :class:`~eodag.api.product._product.EOProduct`
223
+ :returns: the returned json status response
224
+ :rtype: dict
225
+ """
226
+ on_response_mm = getattr(self.config, "order_on_response", {}).get(
184
227
  "metadata_mapping", {}
185
228
  )
186
- if order_metadata_mapping:
187
- logger.debug("Parsing order response to update product metada-mapping")
188
- order_metadata_mapping_jsonpath = mtd_cfg_as_conversion_and_querypath(
189
- order_metadata_mapping,
190
- )
191
- properties_update = properties_from_json(
192
- response.json(),
193
- order_metadata_mapping_jsonpath,
194
- )
195
- product.properties.update(properties_update)
196
- if "downloadLink" in properties_update:
197
- product.remote_location = product.location = product.properties[
198
- "downloadLink"
199
- ]
200
- logger.debug(f"Product location updated to {product.location}")
229
+ if not on_response_mm:
230
+ return None
231
+
232
+ logger.debug("Parsing order response to update product metada-mapping")
233
+ on_response_mm_jsonpath = mtd_cfg_as_conversion_and_querypath(
234
+ on_response_mm,
235
+ )
236
+
237
+ json_response = response.json()
238
+
239
+ properties_update = properties_from_json(
240
+ {"json": json_response, "headers": {**response.headers}},
241
+ on_response_mm_jsonpath,
242
+ )
243
+ product.properties.update(
244
+ {k: v for k, v in properties_update.items() if v != NOT_AVAILABLE}
245
+ )
246
+ if "downloadLink" in product.properties:
247
+ product.remote_location = product.location = product.properties[
248
+ "downloadLink"
249
+ ]
250
+ logger.debug(f"Product location updated to {product.location}")
251
+
252
+ return json_response
201
253
 
202
254
  def orderDownloadStatus(
203
255
  self,
204
256
  product: EOProduct,
205
- auth: Optional[PluginConfig] = None,
206
- **kwargs: Union[str, bool, Dict[str, Any]],
257
+ auth: Optional[AuthBase] = None,
207
258
  ) -> None:
208
259
  """Send product order status request.
209
260
 
210
261
  It will be executed before each download retry.
211
262
  Product order status request can be configured using the following download plugin parameters:
212
263
 
213
- - **order_status_method**: (optional) HTTP request method, GET (default) or POST
214
-
215
- - **order_status_percent**: (optional) progress percentage key in obtained response
216
-
217
- - **order_status_error**: (optional) key/value identifying an error status
264
+ - **order_status**: :class:`~eodag.config.PluginConfig.OrderStatus`
218
265
 
219
266
  Product properties used for order status:
220
267
 
@@ -222,162 +269,289 @@ class HTTPDownload(Download):
222
269
 
223
270
  :param product: The ordered EO product
224
271
  :type product: :class:`~eodag.api.product._product.EOProduct`
225
- :param auth: (optional) The configuration of a plugin of type Authentication
226
- :type auth: :class:`~eodag.config.PluginConfig`
272
+ :param auth: (optional) authenticated object
273
+ :type auth: Optional[AuthBase]
227
274
  :param kwargs: download additional kwargs
228
275
  :type kwargs: Union[str, bool, dict]
229
276
  """
230
- status_method = getattr(self.config, "order_status_method", "GET").lower()
231
- if status_method == "post":
277
+
278
+ status_config = getattr(self.config, "order_status", {})
279
+ success_code: Optional[int] = status_config.get("success", {}).get("http_code")
280
+
281
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
282
+
283
+ def _request(
284
+ url: str,
285
+ method: str = "GET",
286
+ headers: Optional[Dict[str, Any]] = None,
287
+ json: Optional[Any] = None,
288
+ timeout: int = HTTP_REQ_TIMEOUT,
289
+ ) -> Response:
290
+ """Send request and handle allow redirects"""
291
+
292
+ logger.debug(f"{method} {url} {headers} {json}")
293
+ try:
294
+ response = requests.request(
295
+ method=method,
296
+ url=url,
297
+ auth=auth,
298
+ timeout=timeout,
299
+ headers={**(headers or {}), **USER_AGENT},
300
+ allow_redirects=False, # Redirection is manually handled
301
+ json=json,
302
+ )
303
+ logger.debug(
304
+ f"Order download status request responded with {response.status_code}"
305
+ )
306
+ response.raise_for_status() # Raise an exception if status code indicates an error
307
+
308
+ # Handle redirection (if needed)
309
+ if (
310
+ 300 <= response.status_code < 400
311
+ and response.status_code != success_code
312
+ ):
313
+ # cf: https://www.rfc-editor.org/rfc/rfc9110.html#name-303-see-other
314
+ if response.status_code == 303:
315
+ method = "GET"
316
+ if new_url := response.headers.get("Location"):
317
+ return _request(new_url, method, headers, json, timeout)
318
+ return response
319
+ except requests.exceptions.Timeout as exc:
320
+ raise TimeOutError(exc, timeout=timeout) from exc
321
+
322
+ status_request: Dict[str, Any] = status_config.get("request", {})
323
+ status_request_method = str(status_request.get("method", "GET")).upper()
324
+
325
+ if status_request_method == "POST":
232
326
  # separate url & parameters
233
327
  parts = urlparse(str(product.properties["orderStatusLink"]))
328
+ status_url = parts._replace(query=None).geturl()
234
329
  query_dict = parse_qs(parts.query)
235
330
  if not query_dict and parts.query:
236
331
  query_dict = geojson.loads(parts.query)
237
- status_url = parts._replace(query=None).geturl()
238
- status_kwargs = {"json": query_dict} if query_dict else {}
332
+ json_data = query_dict if query_dict else None
239
333
  else:
240
334
  status_url = product.properties["orderStatusLink"]
241
- status_kwargs = {}
335
+ json_data = None
336
+
337
+ # check header for success before full status request
338
+ skip_parsing_status_response = False
339
+ status_dict: Dict[str, Any] = {}
340
+ config_on_success: Dict[str, Any] = status_config.get("on_success", {})
341
+ on_success_mm = config_on_success.get("metadata_mapping", {})
342
+
343
+ status_response_content_needed = (
344
+ False
345
+ if not any([v.startswith("$.json.") for v in on_success_mm.values()])
346
+ else True
347
+ )
242
348
 
243
- with requests.request(
244
- method=status_method,
245
- url=status_url,
246
- auth=auth,
247
- timeout=HTTP_REQ_TIMEOUT,
248
- headers=dict(
249
- getattr(self.config, "order_status_headers", {}), **USER_AGENT
250
- ),
251
- **status_kwargs,
252
- ) as response:
349
+ if success_code:
253
350
  try:
254
- response.raise_for_status()
255
- status_message = response.text
256
- status_dict = response.json()
257
- # display progress percentage
258
- order_status_percent_key = getattr(
259
- self.config, "order_status_percent", None
351
+ response = _request(
352
+ status_url,
353
+ "HEAD",
354
+ status_request.get("headers"),
355
+ json_data,
356
+ timeout,
260
357
  )
261
- if order_status_percent_key and order_status_percent_key in status_dict:
262
- order_status_value = str(status_dict[order_status_percent_key])
263
- if order_status_value.isdigit():
264
- order_status_value += "%"
265
- logger.info(
266
- f"{product.properties['title']} order status: {order_status_value}"
267
- )
268
- # display error if any
269
- order_status_error_dict = getattr(self.config, "order_status_error", {})
270
358
  if (
271
- order_status_error_dict
272
- and order_status_error_dict.items() <= status_dict.items()
359
+ response.status_code == success_code
360
+ and not status_response_content_needed
273
361
  ):
274
- # order_status_error_dict is a subset of status_dict : error
275
- logger.warning(status_message)
276
- else:
277
- logger.debug(status_message)
278
- # check if succeeds and need search again
279
- order_status_success_dict = getattr(
280
- self.config, "order_status_success", {}
362
+ # success and no need to get status response content
363
+ skip_parsing_status_response = True
364
+ except RequestException as e:
365
+ logger.debug(e)
366
+
367
+ if not skip_parsing_status_response:
368
+ # status request
369
+ try:
370
+ response = _request(
371
+ status_url,
372
+ status_request_method,
373
+ status_request.get("headers"),
374
+ json_data,
375
+ timeout,
281
376
  )
282
377
  if (
283
- "status" in status_dict
284
- and status_dict["status"] == order_status_success_dict["status"]
285
- and "message" in status_dict
286
- and status_dict["message"] == order_status_success_dict["message"]
287
- ):
288
- product.properties["storageStatus"] = ONLINE_STATUS
289
- if (
290
- order_status_success_dict
291
- and order_status_success_dict.items() <= status_dict.items()
292
- and getattr(self.config, "order_status_on_success", {}).get(
293
- "need_search"
294
- )
378
+ response.status_code == success_code
379
+ and not status_response_content_needed
295
380
  ):
296
- logger.debug(
297
- f"Search for new location: {product.properties['searchLink']}"
298
- )
299
- # search again
300
- response = requests.get(
301
- product.properties["searchLink"],
302
- timeout=HTTP_REQ_TIMEOUT,
303
- headers=USER_AGENT,
381
+ # success and no need to get status response content
382
+ skip_parsing_status_response = True
383
+ except RequestException as e:
384
+ raise DownloadError(
385
+ "%s order status could not be checked, request returned %s"
386
+ % (
387
+ product.properties["title"],
388
+ e,
304
389
  )
305
- response.raise_for_status()
306
- if (
307
- self.config.order_status_on_success.get("result_type", "json")
308
- == "xml"
309
- ):
310
- root_node = etree.fromstring(response.content)
311
- namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
312
- results = [
313
- etree.tostring(entry)
314
- for entry in root_node.xpath(
315
- self.config.order_status_on_success["results_entry"],
316
- namespaces=namespaces,
317
- )
318
- ]
319
- if isinstance(results, list) and len(results) != 1:
320
- raise DownloadError(
321
- "Could not get a single result after order success for "
322
- f"{product.properties['searchLink']} request. "
323
- f"Please search and download {product} again"
324
- )
325
- return
326
- try:
327
- assert isinstance(
328
- results, list
329
- ), "results must be in a list"
330
- # single result
331
- result = results[0]
332
- # parse result
333
- new_search_metadata_mapping = (
334
- self.config.order_status_on_success["metadata_mapping"]
335
- )
336
- order_metadata_mapping_jsonpath = {}
337
- order_metadata_mapping_jsonpath = (
338
- mtd_cfg_as_conversion_and_querypath(
339
- new_search_metadata_mapping,
340
- order_metadata_mapping_jsonpath,
341
- )
342
- )
343
- properties_update = properties_from_xml(
344
- result,
345
- order_metadata_mapping_jsonpath,
346
- )
347
- except Exception as e:
348
- logger.debug(e)
349
- raise DownloadError(
350
- f"Could not parse result after order success for {product.properties['searchLink']} "
351
- f"request. Please search and download {product} again"
352
- )
353
- # update product
354
- product.properties.update(properties_update)
355
- product.location = product.remote_location = product.properties[
356
- "downloadLink"
357
- ]
358
- else:
359
- logger.warning(
360
- "JSON response parsing is not implemented yet for new searches "
361
- f"after order success. Please search and download {product} again"
362
- )
390
+ ) from e
391
+
392
+ if not skip_parsing_status_response:
393
+ # status request
394
+ json_response = response.json()
395
+ if not isinstance(json_response, dict):
396
+ raise RequestException("response content is not a dict")
397
+ status_dict = json_response
398
+
399
+ status_mm = status_config.get("metadata_mapping", {})
400
+ status_mm_jsonpath = (
401
+ mtd_cfg_as_conversion_and_querypath(
402
+ status_mm,
403
+ )
404
+ if status_mm
405
+ else {}
406
+ )
407
+ logger.debug("Parsing order status response")
408
+ status_dict = properties_from_json(
409
+ {"json": response.json(), "headers": {**response.headers}},
410
+ status_mm_jsonpath,
411
+ )
363
412
 
364
- except requests.exceptions.Timeout as exc:
365
- raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
413
+ # display progress percentage
414
+ if "percent" in status_dict:
415
+ status_percent = str(status_dict["percent"])
416
+ if status_percent.isdigit():
417
+ status_percent += "%"
418
+ logger.info(
419
+ f"{product.properties['title']} order status: {status_percent}"
420
+ )
421
+
422
+ status_message = status_dict.get("message")
423
+ product.properties["orderStatus"] = status_dict.get("status")
424
+
425
+ # handle status error
426
+ errors: Dict[str, Any] = status_config.get("error", {})
427
+ if errors and errors.items() <= status_dict.items():
428
+ raise DownloadError(
429
+ f"Provider {product.provider} returned: {status_dict.get('error_message', status_message)}"
430
+ )
431
+
432
+ success_status: Dict[str, Any] = status_config.get("success", {}).get("status")
433
+ # if not success
434
+ if (success_status and success_status != status_dict.get("status")) or (
435
+ success_code and success_code != response.status_code
436
+ ):
437
+ error = NotAvailableError(status_message)
438
+ raise error
439
+
440
+ product.properties["storageStatus"] = ONLINE_STATUS
441
+
442
+ if not config_on_success:
443
+ # Nothing left to do
444
+ return None
445
+
446
+ # need search on success ?
447
+ if config_on_success.get("need_search"):
448
+ logger.debug(f"Search for new location: {product.properties['searchLink']}")
449
+ try:
450
+ response = _request(product.properties["searchLink"], timeout=timeout)
366
451
  except RequestException as e:
367
452
  logger.warning(
368
453
  "%s order status could not be checked, request returned %s",
369
454
  product.properties["title"],
370
455
  e,
371
456
  )
457
+ return None
458
+
459
+ result_type = config_on_success.get("result_type", "json")
460
+ result_entry = config_on_success.get("results_entry")
461
+
462
+ on_success_mm_querypath = (
463
+ # append product.properties as input for on success response parsing
464
+ mtd_cfg_as_conversion_and_querypath(
465
+ dict(
466
+ {k: str(v) for k, v in product.properties.items()}, **on_success_mm
467
+ ),
468
+ )
469
+ if on_success_mm
470
+ else {}
471
+ )
472
+ try:
473
+ if result_type == "xml":
474
+ if not result_entry:
475
+ raise MisconfiguredError(
476
+ '"result_entry" is required with "result_type" "xml"'
477
+ 'in "order_status.on_success"'
478
+ )
479
+ root_node = etree.fromstring(response.content)
480
+ namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
481
+ results = [
482
+ etree.tostring(entry)
483
+ for entry in root_node.xpath(
484
+ result_entry,
485
+ namespaces=namespaces,
486
+ )
487
+ ]
488
+ if len(results) != 1:
489
+ raise DownloadError(
490
+ "Could not get a single result after order success for "
491
+ f"{product.properties['searchLink']} request. "
492
+ f"Please search and download {product} again"
493
+ )
494
+ assert isinstance(results, list), "results must be in a list"
495
+ # single result
496
+ result = results[0]
497
+ if on_success_mm_querypath:
498
+ properties_update = properties_from_xml(
499
+ result,
500
+ on_success_mm_querypath,
501
+ )
502
+ else:
503
+ properties_update = {}
504
+ else:
505
+ json_response = (
506
+ response.json()
507
+ if "application/json" in response.headers.get("Content-Type", "")
508
+ else {}
509
+ )
510
+ if result_entry:
511
+ entry_jsonpath = string_to_jsonpath(result_entry, force=True)
512
+ json_response = entry_jsonpath.find(json_response)
513
+ raise NotImplementedError(
514
+ 'result_entry in config.on_success is not yet supported for result_type "json"'
515
+ )
516
+ if on_success_mm_querypath:
517
+ logger.debug(
518
+ "Parsing on-success metadata-mapping using order status response"
519
+ )
520
+ properties_update = properties_from_json(
521
+ {"json": json_response, "headers": {**response.headers}},
522
+ on_success_mm_querypath,
523
+ )
524
+ # only keep properties to update (remove product.properties added for parsing)
525
+ properties_update = {
526
+ k: v for k, v in properties_update.items() if k in on_success_mm
527
+ }
528
+ else:
529
+ properties_update = {}
530
+ except Exception as e:
531
+ if isinstance(e, DownloadError):
532
+ raise
533
+ logger.debug(e)
534
+ raise DownloadError(
535
+ f"Could not parse result after order success. Please search and download {product} again"
536
+ ) from e
537
+
538
+ # update product
539
+ product.properties.update(properties_update)
540
+ if "downloadLink" in properties_update:
541
+ product.location = product.remote_location = product.properties[
542
+ "downloadLink"
543
+ ]
544
+ else:
545
+ self.order_response_process(response, product)
372
546
 
373
547
  def download(
374
548
  self,
375
549
  product: EOProduct,
376
- auth: Optional[PluginConfig] = None,
550
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
377
551
  progress_callback: Optional[ProgressCallback] = None,
378
552
  wait: int = DEFAULT_DOWNLOAD_WAIT,
379
553
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
380
- **kwargs: Union[str, bool, Dict[str, Any]],
554
+ **kwargs: Unpack[DownloadConf],
381
555
  ) -> Optional[str]:
382
556
  """Download a product using HTTP protocol.
383
557
 
@@ -385,14 +559,26 @@ class HTTPDownload(Download):
385
559
  the user is warned, it is renamed to remove the zip extension and
386
560
  no further treatment is done (no extraction)
387
561
  """
562
+ if auth is not None and not isinstance(auth, AuthBase):
563
+ raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
564
+
388
565
  if progress_callback is None:
389
566
  logger.info(
390
567
  "Progress bar unavailable, please call product.download() instead of plugin.download()"
391
568
  )
392
569
  progress_callback = ProgressCallback(disable=True)
393
570
 
571
+ outputs_extension = getattr(self.config, "products", {}).get(
572
+ product.product_type, {}
573
+ ).get("outputs_extension", None) or getattr(
574
+ self.config, "outputs_extension", ".zip"
575
+ )
576
+ kwargs["outputs_extension"] = kwargs.get("outputs_extension", outputs_extension)
577
+
394
578
  fs_path, record_filename = self._prepare_download(
395
- product, progress_callback=progress_callback, **kwargs
579
+ product,
580
+ progress_callback=progress_callback,
581
+ **kwargs,
396
582
  )
397
583
  if not fs_path or not record_filename:
398
584
  if fs_path:
@@ -400,7 +586,10 @@ class HTTPDownload(Download):
400
586
  return fs_path
401
587
 
402
588
  # download assets if exist instead of remote_location
403
- if len(product.assets) > 0 and not getattr(self.config, "ignore_assets", False):
589
+ if len(product.assets) > 0 and (
590
+ not getattr(self.config, "ignore_assets", False)
591
+ or kwargs.get("asset", None) is not None
592
+ ):
404
593
  try:
405
594
  fs_path = self._download_assets(
406
595
  product,
@@ -424,18 +613,23 @@ class HTTPDownload(Download):
424
613
  @self._download_retry(product, wait, timeout)
425
614
  def download_request(
426
615
  product: EOProduct,
427
- auth: PluginConfig,
616
+ auth: AuthBase,
428
617
  progress_callback: ProgressCallback,
429
618
  wait: int,
430
619
  timeout: int,
431
- **kwargs: Dict[str, Any],
620
+ **kwargs: Unpack[DownloadConf],
432
621
  ) -> None:
433
622
  chunks = self._stream_download(product, auth, progress_callback, **kwargs)
623
+ is_empty = True
434
624
 
435
625
  with open(fs_path, "wb") as fhandle:
436
626
  for chunk in chunks:
627
+ is_empty = False
437
628
  fhandle.write(chunk)
438
629
 
630
+ if is_empty:
631
+ raise DownloadError(f"product {product.properties['id']} is empty")
632
+
439
633
  download_request(product, auth, progress_callback, wait, timeout, **kwargs)
440
634
 
441
635
  with open(record_filename, "w") as fh:
@@ -443,19 +637,57 @@ class HTTPDownload(Download):
443
637
  logger.debug("Download recorded in %s", record_filename)
444
638
 
445
639
  # Check that the downloaded file is really a zip file
446
- outputs_extension = kwargs.get("outputs_extension", None) or getattr(
447
- self.config, "outputs_extension", ".zip"
448
- )
449
640
  if not zipfile.is_zipfile(fs_path) and outputs_extension == ".zip":
450
641
  logger.warning(
451
642
  "Downloaded product is not a Zip File. Please check its file type before using it"
452
643
  )
453
- new_fs_path = fs_path[: fs_path.index(".zip")]
644
+ new_fs_path = os.path.join(
645
+ os.path.dirname(fs_path),
646
+ sanitize(product.properties["title"]),
647
+ )
648
+ if os.path.isfile(fs_path) and not tarfile.is_tarfile(fs_path):
649
+ if not os.path.isdir(new_fs_path):
650
+ os.makedirs(new_fs_path)
651
+ shutil.move(fs_path, new_fs_path)
652
+ file_path = os.path.join(new_fs_path, os.path.basename(fs_path))
653
+ new_file_path = file_path[: file_path.index(".zip")]
654
+ shutil.move(file_path, new_file_path)
655
+ # in the case where the outputs extension has not been set
656
+ # to ".tar" in the product type nor provider configuration
657
+ elif tarfile.is_tarfile(fs_path):
658
+ if not new_fs_path.endswith(".tar"):
659
+ new_fs_path += ".tar"
660
+ shutil.move(fs_path, new_fs_path)
661
+ kwargs["outputs_extension"] = ".tar"
662
+ product_path = self._finalize(
663
+ new_fs_path,
664
+ progress_callback=progress_callback,
665
+ **kwargs,
666
+ )
667
+ product.location = path_to_uri(product_path)
668
+ return product_path
669
+ else:
670
+ # not a file (dir with zip extension)
671
+ shutil.move(fs_path, new_fs_path)
672
+ product.location = path_to_uri(new_fs_path)
673
+ return new_fs_path
674
+
675
+ if os.path.isfile(fs_path) and not (
676
+ zipfile.is_zipfile(fs_path) or tarfile.is_tarfile(fs_path)
677
+ ):
678
+ new_fs_path = os.path.join(
679
+ os.path.dirname(fs_path),
680
+ sanitize(product.properties["title"]),
681
+ )
682
+ if not os.path.isdir(new_fs_path):
683
+ os.makedirs(new_fs_path)
454
684
  shutil.move(fs_path, new_fs_path)
455
685
  product.location = path_to_uri(new_fs_path)
456
686
  return new_fs_path
457
687
  product_path = self._finalize(
458
- fs_path, progress_callback=progress_callback, **kwargs
688
+ fs_path,
689
+ progress_callback=progress_callback,
690
+ **kwargs,
459
691
  )
460
692
  product.location = path_to_uri(product_path)
461
693
  return product_path
@@ -477,23 +709,51 @@ class HTTPDownload(Download):
477
709
  )
478
710
  return stream_size
479
711
 
712
+ def _check_product_filename(self, product: EOProduct) -> str:
713
+ filename = None
714
+ asset_content_disposition = self.stream.headers.get("content-disposition", None)
715
+ if asset_content_disposition:
716
+ filename = cast(
717
+ Optional[str],
718
+ parse_header(asset_content_disposition).get_param("filename", None),
719
+ )
720
+ if not filename:
721
+ # default filename extracted from path
722
+ filename = str(os.path.basename(self.stream.url))
723
+ filename_extension = os.path.splitext(filename)[1]
724
+ if not filename_extension:
725
+ if content_type := getattr(product, "headers", {}).get("Content-Type"):
726
+ ext = guess_extension(content_type)
727
+ if ext:
728
+ filename += ext
729
+ else:
730
+ outputs_extension: Optional[str] = (
731
+ getattr(self.config, "products", {})
732
+ .get(product.product_type, {})
733
+ .get("outputs_extension")
734
+ )
735
+ if outputs_extension:
736
+ filename += outputs_extension
737
+
738
+ return filename
739
+
480
740
  def _stream_download_dict(
481
741
  self,
482
742
  product: EOProduct,
483
- auth: Optional[PluginConfig] = None,
743
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
484
744
  progress_callback: Optional[ProgressCallback] = None,
485
745
  wait: int = DEFAULT_DOWNLOAD_WAIT,
486
746
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
487
- **kwargs: Union[str, bool, Dict[str, Any]],
488
- ) -> Dict[str, Any]:
747
+ **kwargs: Unpack[DownloadConf],
748
+ ) -> StreamResponse:
489
749
  r"""
490
750
  Returns dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments.
491
751
  It contains a generator to streamed download chunks and the response headers.
492
752
 
493
753
  :param product: The EO product to download
494
754
  :type product: :class:`~eodag.api.product._product.EOProduct`
495
- :param auth: (optional) The configuration of a plugin of type Authentication
496
- :type auth: :class:`~eodag.config.PluginConfig`
755
+ :param auth: (optional) authenticated object
756
+ :type auth: Optional[Union[AuthBase, Dict[str, str]]]
497
757
  :param progress_callback: (optional) A progress callback
498
758
  :type progress_callback: :class:`~eodag.utils.ProgressCallback`
499
759
  :param wait: (optional) If download fails, wait time in minutes between two download tries
@@ -509,8 +769,14 @@ class HTTPDownload(Download):
509
769
  :returns: Dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
510
770
  :rtype: dict
511
771
  """
772
+ if auth is not None and not isinstance(auth, AuthBase):
773
+ raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
774
+
512
775
  # download assets if exist instead of remote_location
513
- if len(product.assets) > 0 and not getattr(self.config, "ignore_assets", False):
776
+ if len(product.assets) > 0 and (
777
+ not getattr(self.config, "ignore_assets", False)
778
+ or kwargs.get("asset") is not None
779
+ ):
514
780
  try:
515
781
  assets_values = product.assets.get_values(kwargs.get("asset", None))
516
782
  chunks_tuples = self._stream_download_assets(
@@ -534,7 +800,7 @@ class HTTPDownload(Download):
534
800
  "type"
535
801
  ]
536
802
 
537
- return dict(
803
+ return StreamResponse(
538
804
  content=chain(iter([first_chunks_tuple]), chunks_tuples),
539
805
  headers=assets_values[0].headers,
540
806
  )
@@ -545,7 +811,7 @@ class HTTPDownload(Download):
545
811
  if "title" in product.properties
546
812
  else sanitize(product.properties.get("id", "download"))
547
813
  )
548
- return dict(
814
+ return StreamResponse(
549
815
  content=stream_zip(chunks_tuples),
550
816
  media_type="application/zip",
551
817
  headers={
@@ -560,36 +826,55 @@ class HTTPDownload(Download):
560
826
 
561
827
  chunks = self._stream_download(product, auth, progress_callback, **kwargs)
562
828
  # start reading chunks to set product.headers
563
- first_chunk = next(chunks)
564
-
565
- return dict(
829
+ try:
830
+ first_chunk = next(chunks)
831
+ except StopIteration:
832
+ # product is empty file
833
+ logger.error("product %s is empty", product.properties["id"])
834
+ raise NotAvailableError(f"product {product.properties['id']} is empty")
835
+
836
+ return StreamResponse(
566
837
  content=chain(iter([first_chunk]), chunks),
567
838
  headers=product.headers,
568
839
  )
569
840
 
570
- def _process_exception(
571
- self, e: RequestException, product: EOProduct, ordered_message: str
572
- ) -> None:
841
+ def _check_auth_exception(self, e: Optional[RequestException]) -> None:
573
842
  # check if error is identified as auth_error in provider conf
574
843
  auth_errors = getattr(self.config, "auth_error_code", [None])
575
844
  if not isinstance(auth_errors, list):
576
845
  auth_errors = [auth_errors]
577
- if e.response and e.response.status_code in auth_errors:
846
+ response_text = (
847
+ e.response.text.strip() if e is not None and e.response is not None else ""
848
+ )
849
+ if (
850
+ e is not None
851
+ and e.response is not None
852
+ and e.response.status_code in auth_errors
853
+ ):
578
854
  raise AuthenticationError(
579
855
  "HTTP Error %s returned, %s\nPlease check your credentials for %s"
580
856
  % (
581
857
  e.response.status_code,
582
- e.response.text.strip(),
858
+ response_text,
583
859
  self.provider,
584
860
  )
585
861
  )
862
+
863
+ def _process_exception(
864
+ self, e: Optional[RequestException], product: EOProduct, ordered_message: str
865
+ ) -> None:
866
+ self._check_auth_exception(e)
867
+ response_text = (
868
+ e.response.text.strip() if e is not None and e.response is not None else ""
869
+ )
586
870
  # product not available
587
- elif product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
871
+ if product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
588
872
  msg = (
589
873
  ordered_message
590
- if ordered_message and not e.response.text.strip()
591
- else e.response.text.strip()
874
+ if ordered_message and not response_text
875
+ else response_text
592
876
  )
877
+
593
878
  raise NotAvailableError(
594
879
  "%s(initially %s) requested, returned: %s"
595
880
  % (
@@ -601,18 +886,21 @@ class HTTPDownload(Download):
601
886
  else:
602
887
  import traceback as tb
603
888
 
604
- logger.error(
605
- "Error while getting resource :\n%s\n%s",
606
- tb.format_exc(),
607
- e.response.text.strip(),
608
- )
889
+ if e:
890
+ logger.error(
891
+ "Error while getting resource :\n%s\n%s",
892
+ tb.format_exc(),
893
+ response_text,
894
+ )
895
+ else:
896
+ logger.error("Error while getting resource :\n%s", tb.format_exc())
609
897
 
610
898
  def _stream_download(
611
899
  self,
612
900
  product: EOProduct,
613
- auth: Optional[PluginConfig] = None,
901
+ auth: Optional[AuthBase] = None,
614
902
  progress_callback: Optional[ProgressCallback] = None,
615
- **kwargs: Dict[str, Any],
903
+ **kwargs: Unpack[DownloadConf],
616
904
  ) -> Iterator[Any]:
617
905
  """
618
906
  fetches a zip file containing the assets of a given product as a stream
@@ -620,7 +908,7 @@ class HTTPDownload(Download):
620
908
  :param product: product for which the assets should be downloaded
621
909
  :type product: :class:`~eodag.api.product._product.EOProduct`
622
910
  :param auth: The configuration of a plugin of type Authentication
623
- :type auth: :class:`~eodag.config.PluginConfig`
911
+ :type auth: Optional[Union[AuthBase, Dict[str, str]]]
624
912
  :param progress_callback: A method or a callable object
625
913
  which takes a current size and a maximum
626
914
  size as inputs and handle progress bar
@@ -637,12 +925,15 @@ class HTTPDownload(Download):
637
925
  ordered_message = ""
638
926
  if (
639
927
  "orderLink" in product.properties
640
- and "storageStatus" in product.properties
641
- and product.properties["storageStatus"] == OFFLINE_STATUS
928
+ and product.properties.get("storageStatus") == OFFLINE_STATUS
929
+ and not product.properties.get("orderStatus")
642
930
  ):
643
931
  self.orderDownload(product=product, auth=auth)
644
932
 
645
- if product.properties.get("orderStatusLink", None):
933
+ if (
934
+ product.properties.get("orderStatusLink", None)
935
+ and product.properties.get("storageStatus") != ONLINE_STATUS
936
+ ):
646
937
  self.orderDownloadStatus(product=product, auth=auth)
647
938
 
648
939
  params = kwargs.pop("dl_url_params", None) or getattr(
@@ -666,8 +957,12 @@ class HTTPDownload(Download):
666
957
  req_url = url
667
958
  req_kwargs = {}
668
959
 
669
- # url where data is downloaded from can be ftp -> add ftp adapter
670
- requests_ftp.monkeypatch_session()
960
+ if req_url.startswith(NOT_AVAILABLE):
961
+ raise NotAvailableError("Download link is not available")
962
+
963
+ if getattr(self.config, "no_auth_download", False):
964
+ auth = None
965
+
671
966
  s = requests.Session()
672
967
  with s.request(
673
968
  req_method,
@@ -681,7 +976,6 @@ class HTTPDownload(Download):
681
976
  ) as self.stream:
682
977
  try:
683
978
  self.stream.raise_for_status()
684
-
685
979
  except requests.exceptions.Timeout as exc:
686
980
  raise TimeOutError(
687
981
  exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT
@@ -689,8 +983,30 @@ class HTTPDownload(Download):
689
983
  except RequestException as e:
690
984
  self._process_exception(e, product, ordered_message)
691
985
  else:
692
- stream_size = self._check_stream_size(product)
986
+ # check if product was ordered
987
+
988
+ if getattr(
989
+ self.stream, "status_code", None
990
+ ) is not None and self.stream.status_code == getattr(
991
+ self.config, "order_status", {}
992
+ ).get(
993
+ "ordered", {}
994
+ ).get(
995
+ "http_code"
996
+ ):
997
+ product.properties["storageStatus"] = "ORDERED"
998
+ self._process_exception(None, product, ordered_message)
999
+ stream_size = self._check_stream_size(product) or None
1000
+
693
1001
  product.headers = self.stream.headers
1002
+ filename = self._check_product_filename(product) or None
1003
+ product.headers[
1004
+ "content-disposition"
1005
+ ] = f"attachment; filename={filename}"
1006
+ content_type = product.headers.get("Content-Type")
1007
+ if filename and not content_type:
1008
+ product.headers["Content-Type"] = guess_file_type(filename)
1009
+
694
1010
  progress_callback.reset(total=stream_size)
695
1011
  for chunk in self.stream.iter_content(chunk_size=64 * 1024):
696
1012
  if chunk:
@@ -700,9 +1016,10 @@ class HTTPDownload(Download):
700
1016
  def _stream_download_assets(
701
1017
  self,
702
1018
  product: EOProduct,
703
- auth: Optional[PluginConfig] = None,
1019
+ auth: Optional[AuthBase] = None,
704
1020
  progress_callback: Optional[ProgressCallback] = None,
705
- **kwargs: Union[str, bool, Dict[str, Any]],
1021
+ assets_values: List[Asset] = [],
1022
+ **kwargs: Unpack[DownloadConf],
706
1023
  ) -> Iterator[Tuple[str, datetime, int, Any, Iterator[Any]]]:
707
1024
  if progress_callback is None:
708
1025
  logger.info("Progress bar unavailable, please call product.download()")
@@ -715,14 +1032,12 @@ class HTTPDownload(Download):
715
1032
  if not assets_urls:
716
1033
  raise NotAvailableError("No assets available for %s" % product)
717
1034
 
718
- assets_values = kwargs.get("assets_values", [])
719
-
720
1035
  # get extra parameters to pass to the query
721
1036
  params = kwargs.pop("dl_url_params", None) or getattr(
722
1037
  self.config, "dl_url_params", {}
723
1038
  )
724
1039
 
725
- total_size = self._get_asset_sizes(assets_values, auth, params)
1040
+ total_size = self._get_asset_sizes(assets_values, auth, params) or None
726
1041
 
727
1042
  progress_callback.reset(total=total_size)
728
1043
 
@@ -753,12 +1068,14 @@ class HTTPDownload(Download):
753
1068
  product.product_type, {}
754
1069
  )
755
1070
  flatten_top_dirs = product_conf.get(
756
- "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)
1071
+ "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
757
1072
  )
1073
+ ssl_verify = getattr(self.config, "ssl_verify", True)
758
1074
 
759
1075
  # loop for assets download
760
1076
  for asset in assets_values:
761
- if asset["href"].startswith("file:"):
1077
+
1078
+ if not asset["href"] or asset["href"].startswith("file:"):
762
1079
  logger.info(
763
1080
  f"Local asset detected. Download skipped for {asset['href']}"
764
1081
  )
@@ -771,6 +1088,7 @@ class HTTPDownload(Download):
771
1088
  params=params,
772
1089
  headers=USER_AGENT,
773
1090
  timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
1091
+ verify=ssl_verify,
774
1092
  ) as stream:
775
1093
  try:
776
1094
  stream.raise_for_status()
@@ -795,15 +1113,20 @@ class HTTPDownload(Download):
795
1113
  "content-disposition", None
796
1114
  )
797
1115
  if asset_content_disposition:
798
- asset.filename = parse_header(
799
- asset_content_disposition
800
- ).get_param("filename", None)
1116
+ asset.filename = cast(
1117
+ Optional[str],
1118
+ parse_header(asset_content_disposition).get_param(
1119
+ "filename", None
1120
+ ),
1121
+ )
801
1122
 
802
1123
  if not getattr(asset, "filename", None):
803
1124
  # default filename extracted from path
804
1125
  asset.filename = os.path.basename(asset.rel_path)
805
1126
 
806
- asset.rel_path = os.path.join(asset_rel_dir, asset.filename)
1127
+ asset.rel_path = os.path.join(
1128
+ asset_rel_dir, cast(str, asset.filename)
1129
+ )
807
1130
 
808
1131
  if len(assets_values) == 1:
809
1132
  # apply headers to asset
@@ -824,9 +1147,9 @@ class HTTPDownload(Download):
824
1147
  product: EOProduct,
825
1148
  fs_dir_path: str,
826
1149
  record_filename: str,
827
- auth: Optional[PluginConfig] = None,
1150
+ auth: Optional[AuthBase] = None,
828
1151
  progress_callback: Optional[ProgressCallback] = None,
829
- **kwargs: Union[str, bool, Dict[str, Any]],
1152
+ **kwargs: Unpack[DownloadConf],
830
1153
  ) -> str:
831
1154
  """Download product assets if they exist"""
832
1155
  if progress_callback is None:
@@ -857,7 +1180,7 @@ class HTTPDownload(Download):
857
1180
  product.product_type, {}
858
1181
  )
859
1182
  flatten_top_dirs = product_conf.get(
860
- "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)
1183
+ "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
861
1184
  )
862
1185
 
863
1186
  # count local assets
@@ -877,23 +1200,35 @@ class HTTPDownload(Download):
877
1200
  asset_path = chunk_tuple[0]
878
1201
  asset_chunks = chunk_tuple[4]
879
1202
  asset_abs_path = os.path.join(fs_dir_path, asset_path)
1203
+ asset_abs_path_temp = asset_abs_path + "~"
880
1204
  # create asset subdir if not exist
881
1205
  asset_abs_path_dir = os.path.dirname(asset_abs_path)
882
1206
  if not os.path.isdir(asset_abs_path_dir):
883
1207
  os.makedirs(asset_abs_path_dir)
1208
+ # remove temporary file
1209
+ if os.path.isfile(asset_abs_path_temp):
1210
+ os.remove(asset_abs_path_temp)
884
1211
  if not os.path.isfile(asset_abs_path):
885
- with open(asset_abs_path, "wb") as fhandle:
1212
+ logger.debug("Downloading to temporary file '%s'", asset_abs_path_temp)
1213
+ with open(asset_abs_path_temp, "wb") as fhandle:
886
1214
  for chunk in asset_chunks:
887
1215
  if chunk:
888
1216
  fhandle.write(chunk)
889
1217
  progress_callback(len(chunk))
890
-
1218
+ logger.debug(
1219
+ "Download completed. Renaming temporary file '%s' to '%s'",
1220
+ os.path.basename(asset_abs_path_temp),
1221
+ os.path.basename(asset_abs_path),
1222
+ )
1223
+ os.rename(asset_abs_path_temp, asset_abs_path)
891
1224
  # only one local asset
892
1225
  if local_assets_count == len(assets_urls) and local_assets_count == 1:
893
1226
  # remove empty {fs_dir_path}
894
1227
  shutil.rmtree(fs_dir_path)
895
1228
  # and return assets_urls[0] path
896
1229
  fs_dir_path = uri_to_path(assets_urls[0])
1230
+ # do not flatten dir
1231
+ flatten_top_dirs = False
897
1232
  # several local assets
898
1233
  elif local_assets_count == len(assets_urls) and local_assets_count > 0:
899
1234
  common_path = os.path.commonpath([uri_to_path(uri) for uri in assets_urls])
@@ -901,6 +1236,8 @@ class HTTPDownload(Download):
901
1236
  shutil.rmtree(fs_dir_path)
902
1237
  # and return assets_urls common path
903
1238
  fs_dir_path = common_path
1239
+ # do not flatten dir
1240
+ flatten_top_dirs = False
904
1241
  # no assets downloaded but some should have been
905
1242
  elif len(os.listdir(fs_dir_path)) == 0:
906
1243
  raise NotAvailableError("No assets could be downloaded")
@@ -918,13 +1255,13 @@ class HTTPDownload(Download):
918
1255
  return fs_dir_path
919
1256
 
920
1257
  def _handle_asset_exception(
921
- self, e: RequestException, asset: Dict[str, Any], raise_errors: bool = False
1258
+ self, e: RequestException, asset: Asset, raise_errors: bool = False
922
1259
  ) -> None:
923
1260
  # check if error is identified as auth_error in provider conf
924
1261
  auth_errors = getattr(self.config, "auth_error_code", [None])
925
1262
  if not isinstance(auth_errors, list):
926
1263
  auth_errors = [auth_errors]
927
- if e.response and e.response.status_code in auth_errors:
1264
+ if e.response is not None and e.response.status_code in auth_errors:
928
1265
  raise AuthenticationError(
929
1266
  "HTTP Error %s returned, %s\nPlease check your credentials for %s"
930
1267
  % (
@@ -941,22 +1278,24 @@ class HTTPDownload(Download):
941
1278
 
942
1279
  def _get_asset_sizes(
943
1280
  self,
944
- assets_values: List[Dict[str, Any]],
945
- auth: Optional[PluginConfig],
1281
+ assets_values: List[Asset],
1282
+ auth: Optional[AuthBase],
946
1283
  params: Optional[Dict[str, str]],
947
1284
  zipped: bool = False,
948
1285
  ) -> int:
949
1286
  total_size = 0
950
1287
 
1288
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1289
+ ssl_verify = getattr(self.config, "ssl_verify", True)
951
1290
  # loop for assets size & filename
952
1291
  for asset in assets_values:
953
- if not asset["href"].startswith("file:"):
1292
+ if asset["href"] and not asset["href"].startswith("file:"):
954
1293
  # HEAD request for size & filename
955
1294
  asset_headers = requests.head(
956
1295
  asset["href"],
957
1296
  auth=auth,
958
1297
  headers=USER_AGENT,
959
- timeout=HTTP_REQ_TIMEOUT,
1298
+ timeout=timeout,
960
1299
  ).headers
961
1300
 
962
1301
  if not getattr(asset, "size", 0):
@@ -971,12 +1310,14 @@ class HTTPDownload(Download):
971
1310
  )
972
1311
  if not getattr(asset, "size", 0):
973
1312
  # size from HEAD header / content-disposition / size
974
- asset.size = int(header_content_disposition.get_param("size", 0))
1313
+ size_str = str(header_content_disposition.get_param("size", 0))
1314
+ asset.size = int(size_str) if size_str.isdigit() else 0
975
1315
  if not getattr(asset, "filename", 0):
976
1316
  # filename from HEAD header / content-disposition / size
977
- asset.filename = header_content_disposition.get_param(
1317
+ asset_filename = header_content_disposition.get_param(
978
1318
  "filename", None
979
1319
  )
1320
+ asset.filename = str(asset_filename) if asset_filename else None
980
1321
 
981
1322
  if not getattr(asset, "size", 0):
982
1323
  # GET request for size
@@ -987,16 +1328,18 @@ class HTTPDownload(Download):
987
1328
  params=params,
988
1329
  headers=USER_AGENT,
989
1330
  timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
1331
+ verify=ssl_verify,
990
1332
  ) as stream:
991
1333
  # size from GET header / Content-length
992
1334
  asset.size = int(stream.headers.get("Content-length", 0))
993
1335
  if not getattr(asset, "size", 0):
994
1336
  # size from GET header / content-disposition / size
995
- asset.size = int(
1337
+ size_str = str(
996
1338
  parse_header(
997
1339
  stream.headers.get("content-disposition", "")
998
1340
  ).get_param("size", 0)
999
1341
  )
1342
+ asset.size = int(size_str) if size_str.isdigit() else 0
1000
1343
 
1001
1344
  total_size += asset.size
1002
1345
  return total_size
@@ -1004,12 +1347,12 @@ class HTTPDownload(Download):
1004
1347
  def download_all(
1005
1348
  self,
1006
1349
  products: SearchResult,
1007
- auth: Optional[PluginConfig] = None,
1350
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
1008
1351
  downloaded_callback: Optional[DownloadedCallback] = None,
1009
1352
  progress_callback: Optional[ProgressCallback] = None,
1010
1353
  wait: int = DEFAULT_DOWNLOAD_WAIT,
1011
1354
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
1012
- **kwargs: Union[str, bool, Dict[str, Any]],
1355
+ **kwargs: Unpack[DownloadConf],
1013
1356
  ):
1014
1357
  """
1015
1358
  Download all using parent (base plugin) method