eodag 2.12.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. eodag/__init__.py +6 -8
  2. eodag/api/core.py +654 -538
  3. eodag/api/product/__init__.py +12 -2
  4. eodag/api/product/_assets.py +59 -16
  5. eodag/api/product/_product.py +100 -93
  6. eodag/api/product/drivers/__init__.py +7 -2
  7. eodag/api/product/drivers/base.py +0 -3
  8. eodag/api/product/metadata_mapping.py +192 -96
  9. eodag/api/search_result.py +69 -10
  10. eodag/cli.py +55 -25
  11. eodag/config.py +391 -116
  12. eodag/plugins/apis/base.py +11 -165
  13. eodag/plugins/apis/ecmwf.py +36 -25
  14. eodag/plugins/apis/usgs.py +80 -35
  15. eodag/plugins/authentication/aws_auth.py +13 -4
  16. eodag/plugins/authentication/base.py +10 -1
  17. eodag/plugins/authentication/generic.py +2 -2
  18. eodag/plugins/authentication/header.py +31 -6
  19. eodag/plugins/authentication/keycloak.py +17 -84
  20. eodag/plugins/authentication/oauth.py +3 -3
  21. eodag/plugins/authentication/openid_connect.py +268 -49
  22. eodag/plugins/authentication/qsauth.py +4 -1
  23. eodag/plugins/authentication/sas_auth.py +9 -2
  24. eodag/plugins/authentication/token.py +98 -47
  25. eodag/plugins/authentication/token_exchange.py +122 -0
  26. eodag/plugins/crunch/base.py +3 -1
  27. eodag/plugins/crunch/filter_date.py +3 -9
  28. eodag/plugins/crunch/filter_latest_intersect.py +0 -3
  29. eodag/plugins/crunch/filter_latest_tpl_name.py +1 -4
  30. eodag/plugins/crunch/filter_overlap.py +4 -8
  31. eodag/plugins/crunch/filter_property.py +5 -11
  32. eodag/plugins/download/aws.py +149 -185
  33. eodag/plugins/download/base.py +88 -97
  34. eodag/plugins/download/creodias_s3.py +1 -1
  35. eodag/plugins/download/http.py +638 -310
  36. eodag/plugins/download/s3rest.py +47 -45
  37. eodag/plugins/manager.py +228 -88
  38. eodag/plugins/search/__init__.py +36 -0
  39. eodag/plugins/search/base.py +239 -30
  40. eodag/plugins/search/build_search_result.py +382 -37
  41. eodag/plugins/search/cop_marine.py +441 -0
  42. eodag/plugins/search/creodias_s3.py +25 -20
  43. eodag/plugins/search/csw.py +5 -7
  44. eodag/plugins/search/data_request_search.py +61 -30
  45. eodag/plugins/search/qssearch.py +713 -255
  46. eodag/plugins/search/static_stac_search.py +106 -40
  47. eodag/resources/ext_product_types.json +1 -1
  48. eodag/resources/product_types.yml +1921 -34
  49. eodag/resources/providers.yml +4091 -3655
  50. eodag/resources/stac.yml +50 -216
  51. eodag/resources/stac_api.yml +71 -25
  52. eodag/resources/stac_provider.yml +5 -0
  53. eodag/resources/user_conf_template.yml +89 -32
  54. eodag/rest/__init__.py +6 -0
  55. eodag/rest/cache.py +70 -0
  56. eodag/rest/config.py +68 -0
  57. eodag/rest/constants.py +26 -0
  58. eodag/rest/core.py +735 -0
  59. eodag/rest/errors.py +178 -0
  60. eodag/rest/server.py +264 -431
  61. eodag/rest/stac.py +442 -836
  62. eodag/rest/types/collections_search.py +44 -0
  63. eodag/rest/types/eodag_search.py +238 -47
  64. eodag/rest/types/queryables.py +164 -0
  65. eodag/rest/types/stac_search.py +273 -0
  66. eodag/rest/utils/__init__.py +216 -0
  67. eodag/rest/utils/cql_evaluate.py +119 -0
  68. eodag/rest/utils/rfc3339.py +64 -0
  69. eodag/types/__init__.py +106 -10
  70. eodag/types/bbox.py +15 -14
  71. eodag/types/download_args.py +40 -0
  72. eodag/types/search_args.py +57 -7
  73. eodag/types/whoosh.py +79 -0
  74. eodag/utils/__init__.py +110 -91
  75. eodag/utils/constraints.py +37 -45
  76. eodag/utils/exceptions.py +39 -22
  77. eodag/utils/import_system.py +0 -4
  78. eodag/utils/logging.py +37 -80
  79. eodag/utils/notebook.py +4 -4
  80. eodag/utils/repr.py +113 -0
  81. eodag/utils/requests.py +128 -0
  82. eodag/utils/rest.py +100 -0
  83. eodag/utils/stac_reader.py +93 -21
  84. {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/METADATA +88 -53
  85. eodag-3.0.0.dist-info/RECORD +109 -0
  86. {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/WHEEL +1 -1
  87. {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/entry_points.txt +7 -5
  88. eodag/plugins/apis/cds.py +0 -540
  89. eodag/rest/types/stac_queryables.py +0 -134
  90. eodag/rest/utils.py +0 -1133
  91. eodag-2.12.0.dist-info/RECORD +0 -94
  92. {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/LICENSE +0 -0
  93. {eodag-2.12.0.dist-info → eodag-3.0.0.dist-info}/top_level.txt +0 -0
@@ -20,23 +20,38 @@ from __future__ import annotations
20
20
  import logging
21
21
  import os
22
22
  import shutil
23
+ import tarfile
23
24
  import zipfile
24
25
  from datetime import datetime
25
26
  from email.message import Message
26
27
  from itertools import chain
27
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
28
+ from json import JSONDecodeError
29
+ from typing import (
30
+ TYPE_CHECKING,
31
+ Any,
32
+ Dict,
33
+ Iterator,
34
+ List,
35
+ Optional,
36
+ TypedDict,
37
+ Union,
38
+ cast,
39
+ )
28
40
  from urllib.parse import parse_qs, urlparse
29
41
 
30
42
  import geojson
31
43
  import requests
32
- import requests_ftp
33
44
  from lxml import etree
34
45
  from requests import RequestException
46
+ from requests.auth import AuthBase
47
+ from requests.structures import CaseInsensitiveDict
35
48
  from stream_zip import ZIP_AUTO, stream_zip
36
49
 
37
50
  from eodag.api.product.metadata_mapping import (
51
+ NOT_AVAILABLE,
38
52
  OFFLINE_STATUS,
39
53
  ONLINE_STATUS,
54
+ STAGING_STATUS,
40
55
  mtd_cfg_as_conversion_and_querypath,
41
56
  properties_from_json,
42
57
  properties_from_xml,
@@ -49,10 +64,14 @@ from eodag.utils import (
49
64
  HTTP_REQ_TIMEOUT,
50
65
  USER_AGENT,
51
66
  ProgressCallback,
67
+ StreamResponse,
52
68
  flatten_top_directories,
69
+ guess_extension,
70
+ guess_file_type,
53
71
  parse_header,
54
72
  path_to_uri,
55
73
  sanitize,
74
+ string_to_jsonpath,
56
75
  uri_to_path,
57
76
  )
58
77
  from eodag.utils.exceptions import (
@@ -66,10 +85,11 @@ from eodag.utils.exceptions import (
66
85
  if TYPE_CHECKING:
67
86
  from requests import Response
68
87
 
69
- from eodag.api.product import EOProduct
88
+ from eodag.api.product import Asset, EOProduct # type: ignore
70
89
  from eodag.api.search_result import SearchResult
71
90
  from eodag.config import PluginConfig
72
- from eodag.utils import DownloadedCallback
91
+ from eodag.types.download_args import DownloadConf
92
+ from eodag.utils import DownloadedCallback, Unpack
73
93
 
74
94
  logger = logging.getLogger("eodag.download.http")
75
95
 
@@ -78,10 +98,9 @@ class HTTPDownload(Download):
78
98
  """HTTPDownload plugin. Handles product download over HTTP protocol
79
99
 
80
100
  :param provider: provider name
81
- :type provider: str
82
101
  :param config: Download plugin configuration:
83
102
 
84
- * ``config.base_uri`` (str) - default endpoint url
103
+ * ``config.base_uri`` (str) - (optional) default endpoint url
85
104
  * ``config.extract`` (bool) - (optional) extract downloaded archive or not
86
105
  * ``config.auth_error_code`` (int) - (optional) authentication error code
87
106
  * ``config.dl_url_params`` (dict) - (optional) attitional parameters to send in the request
@@ -92,29 +111,19 @@ class HTTPDownload(Download):
92
111
  * ``config.order_method`` (str) - (optional) HTTP request method, GET (default) or POST
93
112
  * ``config.order_headers`` (dict) - (optional) order request headers
94
113
  * ``config.order_on_response`` (dict) - (optional) edit or add new product properties
95
- * ``config.order_status_method`` (str) - (optional) status HTTP request method, GET (default) or POST
96
- * ``config.order_status_percent`` (str) - (optional) progress percentage key in obtained status response
97
- * ``config.order_status_error`` (dict) - (optional) key/value identifying an error status
98
-
99
- :type config: :class:`~eodag.config.PluginConfig`
114
+ * ``config.order_status`` (:class:`~eodag.config.PluginConfig.OrderStatus`) - (optional) Order status handling
100
115
 
101
116
  """
102
117
 
103
118
  def __init__(self, provider: str, config: PluginConfig) -> None:
104
119
  super(HTTPDownload, self).__init__(provider, config)
105
- if not hasattr(self.config, "base_uri"):
106
- raise MisconfiguredError(
107
- "{} plugin require a base_uri configuration key".format(
108
- type(self).__name__
109
- )
110
- )
111
120
 
112
- def orderDownload(
121
+ def order_download(
113
122
  self,
114
123
  product: EOProduct,
115
- auth: Optional[PluginConfig] = None,
116
- **kwargs: Union[str, bool, Dict[str, Any]],
117
- ) -> None:
124
+ auth: Optional[AuthBase] = None,
125
+ **kwargs: Unpack[DownloadConf],
126
+ ) -> Optional[Dict[str, Any]]:
118
127
  """Send product order request.
119
128
 
120
129
  It will be executed once before the download retry loop, if the product is OFFLINE
@@ -135,249 +144,406 @@ class HTTPDownload(Download):
135
144
  - **orderLink**: order request URL
136
145
 
137
146
  :param product: The EO product to order
138
- :type product: :class:`~eodag.api.product._product.EOProduct`
139
- :param auth: (optional) The configuration of a plugin of type Authentication
140
- :type auth: :class:`~eodag.config.PluginConfig`
147
+ :param auth: (optional) authenticated object
141
148
  :param kwargs: download additional kwargs
142
- :type kwargs: Union[str, bool, dict]
149
+ :returns: the returned json status response
143
150
  """
144
- order_method = getattr(self.config, "order_method", "GET").lower()
145
- if order_method == "post":
151
+ product.properties["storageStatus"] = STAGING_STATUS
152
+
153
+ order_method = getattr(self.config, "order_method", "GET").upper()
154
+ ssl_verify = getattr(self.config, "ssl_verify", True)
155
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
156
+ OrderKwargs = TypedDict(
157
+ "OrderKwargs", {"json": Dict[str, Union[Any, List[str]]]}, total=False
158
+ )
159
+ order_kwargs: OrderKwargs = {}
160
+ if order_method == "POST":
146
161
  # separate url & parameters
147
162
  parts = urlparse(str(product.properties["orderLink"]))
148
- query_dict = parse_qs(parts.query)
149
- if not query_dict and parts.query:
163
+ query_dict = {}
164
+ # `parts.query` may be a JSON with query strings as one of values. If `parse_qs` is executed as first step,
165
+ # the resulting `query_dict` would be erroneous.
166
+ try:
150
167
  query_dict = geojson.loads(parts.query)
151
- order_url = parts._replace(query=None).geturl()
152
- order_kwargs = {"json": query_dict} if query_dict else {}
168
+ except JSONDecodeError:
169
+ if parts.query:
170
+ query_dict = parse_qs(parts.query)
171
+ order_url = parts._replace(query="").geturl()
172
+ if query_dict:
173
+ order_kwargs["json"] = query_dict
153
174
  else:
154
175
  order_url = product.properties["orderLink"]
155
176
  order_kwargs = {}
156
177
 
157
- with requests.request(
158
- method=order_method,
159
- url=order_url,
160
- auth=auth,
161
- timeout=HTTP_REQ_TIMEOUT,
162
- headers=dict(getattr(self.config, "order_headers", {}), **USER_AGENT),
163
- **order_kwargs,
164
- ) as response:
165
- try:
166
- response.raise_for_status()
167
- ordered_message = response.text
168
- logger.debug(ordered_message)
169
- logger.info("%s was ordered", product.properties["title"])
170
- except requests.exceptions.Timeout as exc:
171
- raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
172
- except RequestException as e:
173
- if e.response and hasattr(e.response, "content"):
174
- error_message = f"{e.response.content.decode('utf-8')} - {e}"
175
- else:
176
- error_message = str(e)
177
- logger.warning(
178
- "%s could not be ordered, request returned %s",
179
- product.properties["title"],
180
- error_message,
181
- )
182
-
183
- order_metadata_mapping = getattr(self.config, "order_on_response", {}).get(
178
+ headers = {**getattr(self.config, "order_headers", {}), **USER_AGENT}
179
+ try:
180
+ with requests.request(
181
+ method=order_method,
182
+ url=order_url,
183
+ auth=auth,
184
+ timeout=timeout,
185
+ headers=headers,
186
+ verify=ssl_verify,
187
+ **order_kwargs,
188
+ ) as response:
189
+ logger.debug(f"{order_method} {order_url} {headers} {order_kwargs}")
190
+ try:
191
+ response.raise_for_status()
192
+ ordered_message = response.text
193
+ logger.debug(ordered_message)
194
+ product.properties["storageStatus"] = STAGING_STATUS
195
+ except RequestException as e:
196
+ if hasattr(e, "response") and (
197
+ content := getattr(e.response, "content", None)
198
+ ):
199
+ error_message = f"{content.decode('utf-8')} - {e}"
200
+ else:
201
+ error_message = str(e)
202
+ logger.warning(
203
+ "%s could not be ordered, request returned %s",
204
+ product.properties["title"],
205
+ error_message,
206
+ )
207
+ self._check_auth_exception(e)
208
+ return self.order_response_process(response, product)
209
+ except requests.exceptions.Timeout as exc:
210
+ raise TimeOutError(exc, timeout=timeout) from exc
211
+
212
+ def order_response_process(
213
+ self, response: Response, product: EOProduct
214
+ ) -> Optional[Dict[str, Any]]:
215
+ """Process order response
216
+
217
+ :param response: The order response
218
+ :param product: The orderd EO product
219
+ :returns: the returned json status response
220
+ """
221
+ on_response_mm = getattr(self.config, "order_on_response", {}).get(
184
222
  "metadata_mapping", {}
185
223
  )
186
- if order_metadata_mapping:
187
- logger.debug("Parsing order response to update product metada-mapping")
188
- order_metadata_mapping_jsonpath = mtd_cfg_as_conversion_and_querypath(
189
- order_metadata_mapping,
190
- )
191
- properties_update = properties_from_json(
192
- response.json(),
193
- order_metadata_mapping_jsonpath,
194
- )
195
- product.properties.update(properties_update)
196
- if "downloadLink" in properties_update:
197
- product.remote_location = product.location = product.properties[
198
- "downloadLink"
199
- ]
200
- logger.debug(f"Product location updated to {product.location}")
224
+ if not on_response_mm:
225
+ return None
201
226
 
202
- def orderDownloadStatus(
227
+ logger.debug("Parsing order response to update product metada-mapping")
228
+ on_response_mm_jsonpath = mtd_cfg_as_conversion_and_querypath(
229
+ on_response_mm,
230
+ )
231
+
232
+ json_response = response.json()
233
+
234
+ properties_update = properties_from_json(
235
+ {"json": json_response, "headers": {**response.headers}},
236
+ on_response_mm_jsonpath,
237
+ )
238
+ product.properties.update(
239
+ {k: v for k, v in properties_update.items() if v != NOT_AVAILABLE}
240
+ )
241
+ if "downloadLink" in product.properties:
242
+ product.remote_location = product.location = product.properties[
243
+ "downloadLink"
244
+ ]
245
+ logger.debug(f"Product location updated to {product.location}")
246
+
247
+ return json_response
248
+
249
+ def order_download_status(
203
250
  self,
204
251
  product: EOProduct,
205
- auth: Optional[PluginConfig] = None,
206
- **kwargs: Union[str, bool, Dict[str, Any]],
252
+ auth: Optional[AuthBase] = None,
207
253
  ) -> None:
208
254
  """Send product order status request.
209
255
 
210
256
  It will be executed before each download retry.
211
257
  Product order status request can be configured using the following download plugin parameters:
212
258
 
213
- - **order_status_method**: (optional) HTTP request method, GET (default) or POST
214
-
215
- - **order_status_percent**: (optional) progress percentage key in obtained response
216
-
217
- - **order_status_error**: (optional) key/value identifying an error status
259
+ - **order_status**: :class:`~eodag.config.PluginConfig.OrderStatus`
218
260
 
219
261
  Product properties used for order status:
220
262
 
221
263
  - **orderStatusLink**: order status request URL
222
264
 
223
265
  :param product: The ordered EO product
224
- :type product: :class:`~eodag.api.product._product.EOProduct`
225
- :param auth: (optional) The configuration of a plugin of type Authentication
226
- :type auth: :class:`~eodag.config.PluginConfig`
266
+ :param auth: (optional) authenticated object
227
267
  :param kwargs: download additional kwargs
228
- :type kwargs: Union[str, bool, dict]
229
268
  """
230
- status_method = getattr(self.config, "order_status_method", "GET").lower()
231
- if status_method == "post":
269
+
270
+ status_config = getattr(self.config, "order_status", {})
271
+ success_code: Optional[int] = status_config.get("success", {}).get("http_code")
272
+
273
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
274
+
275
+ def _request(
276
+ url: str,
277
+ method: str = "GET",
278
+ headers: Optional[Dict[str, Any]] = None,
279
+ json: Optional[Any] = None,
280
+ timeout: int = HTTP_REQ_TIMEOUT,
281
+ ) -> Response:
282
+ """Send request and handle allow redirects"""
283
+
284
+ logger.debug(f"{method} {url} {headers} {json}")
285
+ try:
286
+ response = requests.request(
287
+ method=method,
288
+ url=url,
289
+ auth=auth,
290
+ timeout=timeout,
291
+ headers={**(headers or {}), **USER_AGENT},
292
+ allow_redirects=False, # Redirection is manually handled
293
+ json=json,
294
+ )
295
+ logger.debug(
296
+ f"Order download status request responded with {response.status_code}"
297
+ )
298
+ response.raise_for_status() # Raise an exception if status code indicates an error
299
+
300
+ # Handle redirection (if needed)
301
+ if (
302
+ 300 <= response.status_code < 400
303
+ and response.status_code != success_code
304
+ ):
305
+ # cf: https://www.rfc-editor.org/rfc/rfc9110.html#name-303-see-other
306
+ if response.status_code == 303:
307
+ method = "GET"
308
+ if new_url := response.headers.get("Location"):
309
+ return _request(new_url, method, headers, json, timeout)
310
+ return response
311
+ except requests.exceptions.Timeout as exc:
312
+ raise TimeOutError(exc, timeout=timeout) from exc
313
+
314
+ status_request: Dict[str, Any] = status_config.get("request", {})
315
+ status_request_method = str(status_request.get("method", "GET")).upper()
316
+
317
+ if status_request_method == "POST":
232
318
  # separate url & parameters
233
319
  parts = urlparse(str(product.properties["orderStatusLink"]))
320
+ status_url = parts._replace(query="").geturl()
234
321
  query_dict = parse_qs(parts.query)
235
322
  if not query_dict and parts.query:
236
323
  query_dict = geojson.loads(parts.query)
237
- status_url = parts._replace(query=None).geturl()
238
- status_kwargs = {"json": query_dict} if query_dict else {}
324
+ json_data = query_dict if query_dict else None
239
325
  else:
240
326
  status_url = product.properties["orderStatusLink"]
241
- status_kwargs = {}
327
+ json_data = None
328
+
329
+ # check header for success before full status request
330
+ skip_parsing_status_response = False
331
+ status_dict: Dict[str, Any] = {}
332
+ config_on_success: Dict[str, Any] = status_config.get("on_success", {})
333
+ on_success_mm = config_on_success.get("metadata_mapping", {})
334
+
335
+ status_response_content_needed = (
336
+ False
337
+ if not any([v.startswith("$.json.") for v in on_success_mm.values()])
338
+ else True
339
+ )
242
340
 
243
- with requests.request(
244
- method=status_method,
245
- url=status_url,
246
- auth=auth,
247
- timeout=HTTP_REQ_TIMEOUT,
248
- headers=dict(
249
- getattr(self.config, "order_status_headers", {}), **USER_AGENT
250
- ),
251
- **status_kwargs,
252
- ) as response:
341
+ if success_code:
253
342
  try:
254
- response.raise_for_status()
255
- status_message = response.text
256
- status_dict = response.json()
257
- # display progress percentage
258
- order_status_percent_key = getattr(
259
- self.config, "order_status_percent", None
343
+ response = _request(
344
+ status_url,
345
+ "HEAD",
346
+ status_request.get("headers"),
347
+ json_data,
348
+ timeout,
260
349
  )
261
- if order_status_percent_key and order_status_percent_key in status_dict:
262
- order_status_value = str(status_dict[order_status_percent_key])
263
- if order_status_value.isdigit():
264
- order_status_value += "%"
265
- logger.info(
266
- f"{product.properties['title']} order status: {order_status_value}"
267
- )
268
- # display error if any
269
- order_status_error_dict = getattr(self.config, "order_status_error", {})
270
350
  if (
271
- order_status_error_dict
272
- and order_status_error_dict.items() <= status_dict.items()
351
+ response.status_code == success_code
352
+ and not status_response_content_needed
273
353
  ):
274
- # order_status_error_dict is a subset of status_dict : error
275
- logger.warning(status_message)
276
- else:
277
- logger.debug(status_message)
278
- # check if succeeds and need search again
279
- order_status_success_dict = getattr(
280
- self.config, "order_status_success", {}
354
+ # success and no need to get status response content
355
+ skip_parsing_status_response = True
356
+ except RequestException as e:
357
+ logger.debug(e)
358
+
359
+ if not skip_parsing_status_response:
360
+ # status request
361
+ try:
362
+ response = _request(
363
+ status_url,
364
+ status_request_method,
365
+ status_request.get("headers"),
366
+ json_data,
367
+ timeout,
281
368
  )
282
369
  if (
283
- "status" in status_dict
284
- and status_dict["status"] == order_status_success_dict["status"]
285
- and "message" in status_dict
286
- and status_dict["message"] == order_status_success_dict["message"]
287
- ):
288
- product.properties["storageStatus"] = ONLINE_STATUS
289
- if (
290
- order_status_success_dict
291
- and order_status_success_dict.items() <= status_dict.items()
292
- and getattr(self.config, "order_status_on_success", {}).get(
293
- "need_search"
294
- )
370
+ response.status_code == success_code
371
+ and not status_response_content_needed
295
372
  ):
296
- logger.debug(
297
- f"Search for new location: {product.properties['searchLink']}"
298
- )
299
- # search again
300
- response = requests.get(
301
- product.properties["searchLink"],
302
- timeout=HTTP_REQ_TIMEOUT,
303
- headers=USER_AGENT,
373
+ # success and no need to get status response content
374
+ skip_parsing_status_response = True
375
+ except RequestException as e:
376
+ raise DownloadError(
377
+ "%s order status could not be checked, request returned %s"
378
+ % (
379
+ product.properties["title"],
380
+ e,
304
381
  )
305
- response.raise_for_status()
306
- if (
307
- self.config.order_status_on_success.get("result_type", "json")
308
- == "xml"
309
- ):
310
- root_node = etree.fromstring(response.content)
311
- namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
312
- results = [
313
- etree.tostring(entry)
314
- for entry in root_node.xpath(
315
- self.config.order_status_on_success["results_entry"],
316
- namespaces=namespaces,
317
- )
318
- ]
319
- if isinstance(results, list) and len(results) != 1:
320
- raise DownloadError(
321
- "Could not get a single result after order success for "
322
- f"{product.properties['searchLink']} request. "
323
- f"Please search and download {product} again"
324
- )
325
- return
326
- try:
327
- assert isinstance(
328
- results, list
329
- ), "results must be in a list"
330
- # single result
331
- result = results[0]
332
- # parse result
333
- new_search_metadata_mapping = (
334
- self.config.order_status_on_success["metadata_mapping"]
335
- )
336
- order_metadata_mapping_jsonpath = {}
337
- order_metadata_mapping_jsonpath = (
338
- mtd_cfg_as_conversion_and_querypath(
339
- new_search_metadata_mapping,
340
- order_metadata_mapping_jsonpath,
341
- )
342
- )
343
- properties_update = properties_from_xml(
344
- result,
345
- order_metadata_mapping_jsonpath,
346
- )
347
- except Exception as e:
348
- logger.debug(e)
349
- raise DownloadError(
350
- f"Could not parse result after order success for {product.properties['searchLink']} "
351
- f"request. Please search and download {product} again"
352
- )
353
- # update product
354
- product.properties.update(properties_update)
355
- product.location = product.remote_location = product.properties[
356
- "downloadLink"
357
- ]
358
- else:
359
- logger.warning(
360
- "JSON response parsing is not implemented yet for new searches "
361
- f"after order success. Please search and download {product} again"
362
- )
382
+ ) from e
383
+
384
+ if not skip_parsing_status_response:
385
+ # status request
386
+ json_response = response.json()
387
+ if not isinstance(json_response, dict):
388
+ raise RequestException("response content is not a dict")
389
+ status_dict = json_response
390
+
391
+ status_mm = status_config.get("metadata_mapping", {})
392
+ status_mm_jsonpath = (
393
+ mtd_cfg_as_conversion_and_querypath(
394
+ status_mm,
395
+ )
396
+ if status_mm
397
+ else {}
398
+ )
399
+ logger.debug("Parsing order status response")
400
+ status_dict = properties_from_json(
401
+ {"json": response.json(), "headers": {**response.headers}},
402
+ status_mm_jsonpath,
403
+ )
363
404
 
364
- except requests.exceptions.Timeout as exc:
365
- raise TimeOutError(exc, timeout=HTTP_REQ_TIMEOUT) from exc
405
+ # display progress percentage
406
+ if "percent" in status_dict:
407
+ status_percent = str(status_dict["percent"])
408
+ if status_percent.isdigit():
409
+ status_percent += "%"
410
+ logger.info(
411
+ f"{product.properties['title']} order status: {status_percent}"
412
+ )
413
+
414
+ status_message = status_dict.get("message")
415
+ product.properties["orderStatus"] = status_dict.get("status")
416
+
417
+ # handle status error
418
+ errors: Dict[str, Any] = status_config.get("error", {})
419
+ if errors and errors.items() <= status_dict.items():
420
+ raise DownloadError(
421
+ f"Provider {product.provider} returned: {status_dict.get('error_message', status_message)}"
422
+ )
423
+
424
+ success_status: Dict[str, Any] = status_config.get("success", {}).get("status")
425
+ # if not success
426
+ if (success_status and success_status != status_dict.get("status")) or (
427
+ success_code and success_code != response.status_code
428
+ ):
429
+ error = NotAvailableError(status_message)
430
+ raise error
431
+
432
+ product.properties["storageStatus"] = ONLINE_STATUS
433
+
434
+ if not config_on_success:
435
+ # Nothing left to do
436
+ return None
437
+
438
+ # need search on success ?
439
+ if config_on_success.get("need_search"):
440
+ logger.debug(f"Search for new location: {product.properties['searchLink']}")
441
+ try:
442
+ response = _request(product.properties["searchLink"], timeout=timeout)
366
443
  except RequestException as e:
367
444
  logger.warning(
368
445
  "%s order status could not be checked, request returned %s",
369
446
  product.properties["title"],
370
447
  e,
371
448
  )
449
+ return None
450
+
451
+ result_type = config_on_success.get("result_type", "json")
452
+ result_entry = config_on_success.get("results_entry")
453
+
454
+ on_success_mm_querypath = (
455
+ # append product.properties as input for on success response parsing
456
+ mtd_cfg_as_conversion_and_querypath(
457
+ dict(
458
+ {k: str(v) for k, v in product.properties.items()}, **on_success_mm
459
+ ),
460
+ )
461
+ if on_success_mm
462
+ else {}
463
+ )
464
+ try:
465
+ if result_type == "xml":
466
+ if not result_entry:
467
+ raise MisconfiguredError(
468
+ '"result_entry" is required with "result_type" "xml"'
469
+ 'in "order_status.on_success"'
470
+ )
471
+ root_node = etree.fromstring(response.content)
472
+ namespaces = {k or "ns": v for k, v in root_node.nsmap.items()}
473
+ results = [
474
+ etree.tostring(entry)
475
+ for entry in root_node.xpath(
476
+ result_entry,
477
+ namespaces=namespaces,
478
+ )
479
+ ]
480
+ if len(results) != 1:
481
+ raise DownloadError(
482
+ "Could not get a single result after order success for "
483
+ f"{product.properties['searchLink']} request. "
484
+ f"Please search and download {product} again"
485
+ )
486
+ assert isinstance(results, list), "results must be in a list"
487
+ # single result
488
+ result = results[0]
489
+ if on_success_mm_querypath:
490
+ properties_update = properties_from_xml(
491
+ result,
492
+ on_success_mm_querypath,
493
+ )
494
+ else:
495
+ properties_update = {}
496
+ else:
497
+ json_response = (
498
+ response.json()
499
+ if "application/json" in response.headers.get("Content-Type", "")
500
+ else {}
501
+ )
502
+ if result_entry:
503
+ entry_jsonpath = string_to_jsonpath(result_entry, force=True)
504
+ json_response = entry_jsonpath.find(json_response)
505
+ raise NotImplementedError(
506
+ 'result_entry in config.on_success is not yet supported for result_type "json"'
507
+ )
508
+ if on_success_mm_querypath:
509
+ logger.debug(
510
+ "Parsing on-success metadata-mapping using order status response"
511
+ )
512
+ properties_update = properties_from_json(
513
+ {"json": json_response, "headers": {**response.headers}},
514
+ on_success_mm_querypath,
515
+ )
516
+ # only keep properties to update (remove product.properties added for parsing)
517
+ properties_update = {
518
+ k: v for k, v in properties_update.items() if k in on_success_mm
519
+ }
520
+ else:
521
+ properties_update = {}
522
+ except Exception as e:
523
+ if isinstance(e, DownloadError):
524
+ raise
525
+ logger.debug(e)
526
+ raise DownloadError(
527
+ f"Could not parse result after order success. Please search and download {product} again"
528
+ ) from e
529
+
530
+ # update product
531
+ product.properties.update(properties_update)
532
+ if "downloadLink" in properties_update:
533
+ product.location = product.remote_location = product.properties[
534
+ "downloadLink"
535
+ ]
536
+ else:
537
+ self.order_response_process(response, product)
372
538
 
373
539
  def download(
374
540
  self,
375
541
  product: EOProduct,
376
- auth: Optional[PluginConfig] = None,
542
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
377
543
  progress_callback: Optional[ProgressCallback] = None,
378
544
  wait: int = DEFAULT_DOWNLOAD_WAIT,
379
545
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
380
- **kwargs: Union[str, bool, Dict[str, Any]],
546
+ **kwargs: Unpack[DownloadConf],
381
547
  ) -> Optional[str]:
382
548
  """Download a product using HTTP protocol.
383
549
 
@@ -385,14 +551,26 @@ class HTTPDownload(Download):
385
551
  the user is warned, it is renamed to remove the zip extension and
386
552
  no further treatment is done (no extraction)
387
553
  """
554
+ if auth is not None and not isinstance(auth, AuthBase):
555
+ raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
556
+
388
557
  if progress_callback is None:
389
558
  logger.info(
390
559
  "Progress bar unavailable, please call product.download() instead of plugin.download()"
391
560
  )
392
561
  progress_callback = ProgressCallback(disable=True)
393
562
 
563
+ output_extension = getattr(self.config, "products", {}).get(
564
+ product.product_type, {}
565
+ ).get("output_extension", None) or getattr(
566
+ self.config, "output_extension", ".zip"
567
+ )
568
+ kwargs["output_extension"] = kwargs.get("output_extension", output_extension)
569
+
394
570
  fs_path, record_filename = self._prepare_download(
395
- product, progress_callback=progress_callback, **kwargs
571
+ product,
572
+ progress_callback=progress_callback,
573
+ **kwargs,
396
574
  )
397
575
  if not fs_path or not record_filename:
398
576
  if fs_path:
@@ -400,7 +578,10 @@ class HTTPDownload(Download):
400
578
  return fs_path
401
579
 
402
580
  # download assets if exist instead of remote_location
403
- if len(product.assets) > 0 and not getattr(self.config, "ignore_assets", False):
581
+ if len(product.assets) > 0 and (
582
+ not getattr(self.config, "ignore_assets", False)
583
+ or kwargs.get("asset", None) is not None
584
+ ):
404
585
  try:
405
586
  fs_path = self._download_assets(
406
587
  product,
@@ -424,18 +605,23 @@ class HTTPDownload(Download):
424
605
  @self._download_retry(product, wait, timeout)
425
606
  def download_request(
426
607
  product: EOProduct,
427
- auth: PluginConfig,
608
+ auth: AuthBase,
428
609
  progress_callback: ProgressCallback,
429
610
  wait: int,
430
611
  timeout: int,
431
- **kwargs: Dict[str, Any],
612
+ **kwargs: Unpack[DownloadConf],
432
613
  ) -> None:
433
614
  chunks = self._stream_download(product, auth, progress_callback, **kwargs)
615
+ is_empty = True
434
616
 
435
617
  with open(fs_path, "wb") as fhandle:
436
618
  for chunk in chunks:
619
+ is_empty = False
437
620
  fhandle.write(chunk)
438
621
 
622
+ if is_empty:
623
+ raise DownloadError(f"product {product.properties['id']} is empty")
624
+
439
625
  download_request(product, auth, progress_callback, wait, timeout, **kwargs)
440
626
 
441
627
  with open(record_filename, "w") as fh:
@@ -443,19 +629,57 @@ class HTTPDownload(Download):
443
629
  logger.debug("Download recorded in %s", record_filename)
444
630
 
445
631
  # Check that the downloaded file is really a zip file
446
- outputs_extension = kwargs.get("outputs_extension", None) or getattr(
447
- self.config, "outputs_extension", ".zip"
448
- )
449
- if not zipfile.is_zipfile(fs_path) and outputs_extension == ".zip":
632
+ if not zipfile.is_zipfile(fs_path) and output_extension == ".zip":
450
633
  logger.warning(
451
634
  "Downloaded product is not a Zip File. Please check its file type before using it"
452
635
  )
453
- new_fs_path = fs_path[: fs_path.index(".zip")]
636
+ new_fs_path = os.path.join(
637
+ os.path.dirname(fs_path),
638
+ sanitize(product.properties["title"]),
639
+ )
640
+ if os.path.isfile(fs_path) and not tarfile.is_tarfile(fs_path):
641
+ if not os.path.isdir(new_fs_path):
642
+ os.makedirs(new_fs_path)
643
+ shutil.move(fs_path, new_fs_path)
644
+ file_path = os.path.join(new_fs_path, os.path.basename(fs_path))
645
+ new_file_path = file_path[: file_path.index(".zip")]
646
+ shutil.move(file_path, new_file_path)
647
+ # in the case where the outputs extension has not been set
648
+ # to ".tar" in the product type nor provider configuration
649
+ elif tarfile.is_tarfile(fs_path):
650
+ if not new_fs_path.endswith(".tar"):
651
+ new_fs_path += ".tar"
652
+ shutil.move(fs_path, new_fs_path)
653
+ kwargs["output_extension"] = ".tar"
654
+ product_path = self._finalize(
655
+ new_fs_path,
656
+ progress_callback=progress_callback,
657
+ **kwargs,
658
+ )
659
+ product.location = path_to_uri(product_path)
660
+ return product_path
661
+ else:
662
+ # not a file (dir with zip extension)
663
+ shutil.move(fs_path, new_fs_path)
664
+ product.location = path_to_uri(new_fs_path)
665
+ return new_fs_path
666
+
667
+ if os.path.isfile(fs_path) and not (
668
+ zipfile.is_zipfile(fs_path) or tarfile.is_tarfile(fs_path)
669
+ ):
670
+ new_fs_path = os.path.join(
671
+ os.path.dirname(fs_path),
672
+ sanitize(product.properties["title"]),
673
+ )
674
+ if not os.path.isdir(new_fs_path):
675
+ os.makedirs(new_fs_path)
454
676
  shutil.move(fs_path, new_fs_path)
455
677
  product.location = path_to_uri(new_fs_path)
456
678
  return new_fs_path
457
679
  product_path = self._finalize(
458
- fs_path, progress_callback=progress_callback, **kwargs
680
+ fs_path,
681
+ progress_callback=progress_callback,
682
+ **kwargs,
459
683
  )
460
684
  product.location = path_to_uri(product_path)
461
685
  return product_path
@@ -477,40 +701,67 @@ class HTTPDownload(Download):
477
701
  )
478
702
  return stream_size
479
703
 
704
+ def _check_product_filename(self, product: EOProduct) -> str:
705
+ filename = None
706
+ asset_content_disposition = self.stream.headers.get("content-disposition", None)
707
+ if asset_content_disposition:
708
+ filename = cast(
709
+ Optional[str],
710
+ parse_header(asset_content_disposition).get_param("filename", None),
711
+ )
712
+ if not filename:
713
+ # default filename extracted from path
714
+ filename = str(os.path.basename(self.stream.url))
715
+ filename_extension = os.path.splitext(filename)[1]
716
+ if not filename_extension:
717
+ if content_type := getattr(product, "headers", {}).get("Content-Type"):
718
+ ext = guess_extension(content_type)
719
+ if ext:
720
+ filename += ext
721
+ else:
722
+ output_extension: Optional[str] = (
723
+ getattr(self.config, "products", {})
724
+ .get(product.product_type, {})
725
+ .get("output_extension")
726
+ )
727
+ if output_extension:
728
+ filename += output_extension
729
+
730
+ return filename
731
+
480
732
  def _stream_download_dict(
481
733
  self,
482
734
  product: EOProduct,
483
- auth: Optional[PluginConfig] = None,
735
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
484
736
  progress_callback: Optional[ProgressCallback] = None,
485
737
  wait: int = DEFAULT_DOWNLOAD_WAIT,
486
738
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
487
- **kwargs: Union[str, bool, Dict[str, Any]],
488
- ) -> Dict[str, Any]:
739
+ **kwargs: Unpack[DownloadConf],
740
+ ) -> StreamResponse:
489
741
  r"""
490
- Returns dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments.
742
+ Returns dictionary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments.
491
743
  It contains a generator to streamed download chunks and the response headers.
492
744
 
493
745
  :param product: The EO product to download
494
- :type product: :class:`~eodag.api.product._product.EOProduct`
495
- :param auth: (optional) The configuration of a plugin of type Authentication
496
- :type auth: :class:`~eodag.config.PluginConfig`
746
+ :param auth: (optional) authenticated object
497
747
  :param progress_callback: (optional) A progress callback
498
- :type progress_callback: :class:`~eodag.utils.ProgressCallback`
499
748
  :param wait: (optional) If download fails, wait time in minutes between two download tries
500
- :type wait: int
501
749
  :param timeout: (optional) If download fails, maximum time in minutes before stop retrying
502
750
  to download
503
- :type timeout: int
504
- :param kwargs: `outputs_prefix` (str), `extract` (bool), `delete_archive` (bool)
751
+ :param kwargs: `output_dir` (str), `extract` (bool), `delete_archive` (bool)
505
752
  and `dl_url_params` (dict) can be provided as additional kwargs
506
753
  and will override any other values defined in a configuration
507
754
  file or with environment variables.
508
- :type kwargs: Union[str, bool, dict]
509
- :returns: Dictionnary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
510
- :rtype: dict
755
+ :returns: Dictionary of :class:`~fastapi.responses.StreamingResponse` keyword-arguments
511
756
  """
757
+ if auth is not None and not isinstance(auth, AuthBase):
758
+ raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}")
759
+
512
760
  # download assets if exist instead of remote_location
513
- if len(product.assets) > 0 and not getattr(self.config, "ignore_assets", False):
761
+ if len(product.assets) > 0 and (
762
+ not getattr(self.config, "ignore_assets", False)
763
+ or kwargs.get("asset") is not None
764
+ ):
514
765
  try:
515
766
  assets_values = product.assets.get_values(kwargs.get("asset", None))
516
767
  chunks_tuples = self._stream_download_assets(
@@ -534,7 +785,7 @@ class HTTPDownload(Download):
534
785
  "type"
535
786
  ]
536
787
 
537
- return dict(
788
+ return StreamResponse(
538
789
  content=chain(iter([first_chunks_tuple]), chunks_tuples),
539
790
  headers=assets_values[0].headers,
540
791
  )
@@ -545,7 +796,7 @@ class HTTPDownload(Download):
545
796
  if "title" in product.properties
546
797
  else sanitize(product.properties.get("id", "download"))
547
798
  )
548
- return dict(
799
+ return StreamResponse(
549
800
  content=stream_zip(chunks_tuples),
550
801
  media_type="application/zip",
551
802
  headers={
@@ -560,36 +811,52 @@ class HTTPDownload(Download):
560
811
 
561
812
  chunks = self._stream_download(product, auth, progress_callback, **kwargs)
562
813
  # start reading chunks to set product.headers
563
- first_chunk = next(chunks)
564
-
565
- return dict(
814
+ try:
815
+ first_chunk = next(chunks)
816
+ except StopIteration:
817
+ # product is empty file
818
+ logger.error("product %s is empty", product.properties["id"])
819
+ raise NotAvailableError(f"product {product.properties['id']} is empty")
820
+
821
+ return StreamResponse(
566
822
  content=chain(iter([first_chunk]), chunks),
567
823
  headers=product.headers,
568
824
  )
569
825
 
570
- def _process_exception(
571
- self, e: RequestException, product: EOProduct, ordered_message: str
572
- ) -> None:
826
+ def _check_auth_exception(self, e: Optional[RequestException]) -> None:
573
827
  # check if error is identified as auth_error in provider conf
574
828
  auth_errors = getattr(self.config, "auth_error_code", [None])
575
829
  if not isinstance(auth_errors, list):
576
830
  auth_errors = [auth_errors]
577
- if e.response and e.response.status_code in auth_errors:
831
+ response_text = (
832
+ e.response.text.strip() if e is not None and e.response is not None else ""
833
+ )
834
+ if (
835
+ e is not None
836
+ and e.response is not None
837
+ and e.response.status_code in auth_errors
838
+ ):
578
839
  raise AuthenticationError(
579
- "HTTP Error %s returned, %s\nPlease check your credentials for %s"
580
- % (
581
- e.response.status_code,
582
- e.response.text.strip(),
583
- self.provider,
584
- )
840
+ f"Please check your credentials for {self.provider}.",
841
+ f"HTTP Error {e.response.status_code} returned.",
842
+ response_text,
585
843
  )
844
+
845
+ def _process_exception(
846
+ self, e: Optional[RequestException], product: EOProduct, ordered_message: str
847
+ ) -> None:
848
+ self._check_auth_exception(e)
849
+ response_text = (
850
+ e.response.text.strip() if e is not None and e.response is not None else ""
851
+ )
586
852
  # product not available
587
- elif product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
853
+ if product.properties.get("storageStatus", ONLINE_STATUS) != ONLINE_STATUS:
588
854
  msg = (
589
855
  ordered_message
590
- if ordered_message and not e.response.text.strip()
591
- else e.response.text.strip()
856
+ if ordered_message and not response_text
857
+ else response_text
592
858
  )
859
+
593
860
  raise NotAvailableError(
594
861
  "%s(initially %s) requested, returned: %s"
595
862
  % (
@@ -601,34 +868,33 @@ class HTTPDownload(Download):
601
868
  else:
602
869
  import traceback as tb
603
870
 
604
- logger.error(
605
- "Error while getting resource :\n%s\n%s",
606
- tb.format_exc(),
607
- e.response.text.strip(),
608
- )
871
+ if e:
872
+ logger.error(
873
+ "Error while getting resource :\n%s\n%s",
874
+ tb.format_exc(),
875
+ response_text,
876
+ )
877
+ else:
878
+ logger.error("Error while getting resource :\n%s", tb.format_exc())
609
879
 
610
880
  def _stream_download(
611
881
  self,
612
882
  product: EOProduct,
613
- auth: Optional[PluginConfig] = None,
883
+ auth: Optional[AuthBase] = None,
614
884
  progress_callback: Optional[ProgressCallback] = None,
615
- **kwargs: Dict[str, Any],
885
+ **kwargs: Unpack[DownloadConf],
616
886
  ) -> Iterator[Any]:
617
887
  """
618
888
  fetches a zip file containing the assets of a given product as a stream
619
889
  and returns a generator yielding the chunks of the file
620
890
  :param product: product for which the assets should be downloaded
621
- :type product: :class:`~eodag.api.product._product.EOProduct`
622
891
  :param auth: The configuration of a plugin of type Authentication
623
- :type auth: :class:`~eodag.config.PluginConfig`
624
892
  :param progress_callback: A method or a callable object
625
893
  which takes a current size and a maximum
626
894
  size as inputs and handle progress bar
627
895
  creation and update to give the user a
628
896
  feedback on the download progress
629
- :type progress_callback: :class:`~eodag.utils.ProgressCallback`
630
897
  :param kwargs: additional arguments
631
- :type kwargs: dict
632
898
  """
633
899
  if progress_callback is None:
634
900
  logger.info("Progress bar unavailable, please call product.download()")
@@ -637,13 +903,16 @@ class HTTPDownload(Download):
637
903
  ordered_message = ""
638
904
  if (
639
905
  "orderLink" in product.properties
640
- and "storageStatus" in product.properties
641
- and product.properties["storageStatus"] == OFFLINE_STATUS
906
+ and product.properties.get("storageStatus") == OFFLINE_STATUS
907
+ and not product.properties.get("orderStatus")
642
908
  ):
643
- self.orderDownload(product=product, auth=auth)
909
+ self.order_download(product=product, auth=auth)
644
910
 
645
- if product.properties.get("orderStatusLink", None):
646
- self.orderDownloadStatus(product=product, auth=auth)
911
+ if (
912
+ product.properties.get("orderStatusLink", None)
913
+ and product.properties.get("storageStatus") != ONLINE_STATUS
914
+ ):
915
+ self.order_download_status(product=product, auth=auth)
647
916
 
648
917
  params = kwargs.pop("dl_url_params", None) or getattr(
649
918
  self.config, "dl_url_params", {}
@@ -660,14 +929,18 @@ class HTTPDownload(Download):
660
929
  query_dict = parse_qs(parts.query)
661
930
  if not query_dict and parts.query:
662
931
  query_dict = geojson.loads(parts.query)
663
- req_url = parts._replace(query=None).geturl()
932
+ req_url = parts._replace(query="").geturl()
664
933
  req_kwargs: Dict[str, Any] = {"json": query_dict} if query_dict else {}
665
934
  else:
666
935
  req_url = url
667
936
  req_kwargs = {}
668
937
 
669
- # url where data is downloaded from can be ftp -> add ftp adapter
670
- requests_ftp.monkeypatch_session()
938
+ if req_url.startswith(NOT_AVAILABLE):
939
+ raise NotAvailableError("Download link is not available")
940
+
941
+ if getattr(self.config, "no_auth_download", False):
942
+ auth = None
943
+
671
944
  s = requests.Session()
672
945
  with s.request(
673
946
  req_method,
@@ -681,7 +954,6 @@ class HTTPDownload(Download):
681
954
  ) as self.stream:
682
955
  try:
683
956
  self.stream.raise_for_status()
684
-
685
957
  except requests.exceptions.Timeout as exc:
686
958
  raise TimeOutError(
687
959
  exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT
@@ -689,8 +961,33 @@ class HTTPDownload(Download):
689
961
  except RequestException as e:
690
962
  self._process_exception(e, product, ordered_message)
691
963
  else:
692
- stream_size = self._check_stream_size(product)
964
+ # check if product was ordered
965
+
966
+ if getattr(
967
+ self.stream, "status_code", None
968
+ ) is not None and self.stream.status_code == getattr(
969
+ self.config, "order_status", {}
970
+ ).get(
971
+ "ordered", {}
972
+ ).get(
973
+ "http_code"
974
+ ):
975
+ product.properties["storageStatus"] = "ORDERED"
976
+ self._process_exception(None, product, ordered_message)
977
+ stream_size = self._check_stream_size(product) or None
978
+
693
979
  product.headers = self.stream.headers
980
+ filename = self._check_product_filename(product) or None
981
+ product.headers[
982
+ "content-disposition"
983
+ ] = f"attachment; filename={filename}"
984
+ content_type = product.headers.get("Content-Type")
985
+ guessed_content_type = (
986
+ guess_file_type(filename) if filename and not content_type else None
987
+ )
988
+ if guessed_content_type is not None:
989
+ product.headers["Content-Type"] = guessed_content_type
990
+
694
991
  progress_callback.reset(total=stream_size)
695
992
  for chunk in self.stream.iter_content(chunk_size=64 * 1024):
696
993
  if chunk:
@@ -700,10 +997,11 @@ class HTTPDownload(Download):
700
997
  def _stream_download_assets(
701
998
  self,
702
999
  product: EOProduct,
703
- auth: Optional[PluginConfig] = None,
1000
+ auth: Optional[AuthBase] = None,
704
1001
  progress_callback: Optional[ProgressCallback] = None,
705
- **kwargs: Union[str, bool, Dict[str, Any]],
706
- ) -> Iterator[Tuple[str, datetime, int, Any, Iterator[Any]]]:
1002
+ assets_values: List[Asset] = [],
1003
+ **kwargs: Unpack[DownloadConf],
1004
+ ) -> Iterator[Any]:
707
1005
  if progress_callback is None:
708
1006
  logger.info("Progress bar unavailable, please call product.download()")
709
1007
  progress_callback = ProgressCallback(disable=True)
@@ -715,14 +1013,12 @@ class HTTPDownload(Download):
715
1013
  if not assets_urls:
716
1014
  raise NotAvailableError("No assets available for %s" % product)
717
1015
 
718
- assets_values = kwargs.get("assets_values", [])
719
-
720
1016
  # get extra parameters to pass to the query
721
1017
  params = kwargs.pop("dl_url_params", None) or getattr(
722
1018
  self.config, "dl_url_params", {}
723
1019
  )
724
1020
 
725
- total_size = self._get_asset_sizes(assets_values, auth, params)
1021
+ total_size = self._get_asset_sizes(assets_values, auth, params) or None
726
1022
 
727
1023
  progress_callback.reset(total=total_size)
728
1024
 
@@ -753,12 +1049,13 @@ class HTTPDownload(Download):
753
1049
  product.product_type, {}
754
1050
  )
755
1051
  flatten_top_dirs = product_conf.get(
756
- "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)
1052
+ "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
757
1053
  )
1054
+ ssl_verify = getattr(self.config, "ssl_verify", True)
758
1055
 
759
1056
  # loop for assets download
760
1057
  for asset in assets_values:
761
- if asset["href"].startswith("file:"):
1058
+ if not asset["href"] or asset["href"].startswith("file:"):
762
1059
  logger.info(
763
1060
  f"Local asset detected. Download skipped for {asset['href']}"
764
1061
  )
@@ -771,6 +1068,7 @@ class HTTPDownload(Download):
771
1068
  params=params,
772
1069
  headers=USER_AGENT,
773
1070
  timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
1071
+ verify=ssl_verify,
774
1072
  ) as stream:
775
1073
  try:
776
1074
  stream.raise_for_status()
@@ -795,15 +1093,20 @@ class HTTPDownload(Download):
795
1093
  "content-disposition", None
796
1094
  )
797
1095
  if asset_content_disposition:
798
- asset.filename = parse_header(
799
- asset_content_disposition
800
- ).get_param("filename", None)
1096
+ asset.filename = cast(
1097
+ Optional[str],
1098
+ parse_header(asset_content_disposition).get_param(
1099
+ "filename", None
1100
+ ),
1101
+ )
801
1102
 
802
1103
  if not getattr(asset, "filename", None):
803
1104
  # default filename extracted from path
804
1105
  asset.filename = os.path.basename(asset.rel_path)
805
1106
 
806
- asset.rel_path = os.path.join(asset_rel_dir, asset.filename)
1107
+ asset.rel_path = os.path.join(
1108
+ asset_rel_dir, cast(str, asset.filename)
1109
+ )
807
1110
 
808
1111
  if len(assets_values) == 1:
809
1112
  # apply headers to asset
@@ -824,9 +1127,9 @@ class HTTPDownload(Download):
824
1127
  product: EOProduct,
825
1128
  fs_dir_path: str,
826
1129
  record_filename: str,
827
- auth: Optional[PluginConfig] = None,
1130
+ auth: Optional[AuthBase] = None,
828
1131
  progress_callback: Optional[ProgressCallback] = None,
829
- **kwargs: Union[str, bool, Dict[str, Any]],
1132
+ **kwargs: Unpack[DownloadConf],
830
1133
  ) -> str:
831
1134
  """Download product assets if they exist"""
832
1135
  if progress_callback is None:
@@ -857,7 +1160,7 @@ class HTTPDownload(Download):
857
1160
  product.product_type, {}
858
1161
  )
859
1162
  flatten_top_dirs = product_conf.get(
860
- "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)
1163
+ "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", True)
861
1164
  )
862
1165
 
863
1166
  # count local assets
@@ -871,29 +1174,43 @@ class HTTPDownload(Download):
871
1174
  # start reading chunks to set asset.rel_path
872
1175
  first_chunks_tuple = next(chunks_tuples)
873
1176
  chunks = chain(iter([first_chunks_tuple]), chunks_tuples)
874
- chunks_tuples = [(assets_values[0].rel_path, None, None, None, chunks)]
1177
+ chunks_tuples = iter(
1178
+ [(assets_values[0].rel_path, None, None, None, chunks)]
1179
+ )
875
1180
 
876
1181
  for chunk_tuple in chunks_tuples:
877
1182
  asset_path = chunk_tuple[0]
878
1183
  asset_chunks = chunk_tuple[4]
879
1184
  asset_abs_path = os.path.join(fs_dir_path, asset_path)
1185
+ asset_abs_path_temp = asset_abs_path + "~"
880
1186
  # create asset subdir if not exist
881
1187
  asset_abs_path_dir = os.path.dirname(asset_abs_path)
882
1188
  if not os.path.isdir(asset_abs_path_dir):
883
1189
  os.makedirs(asset_abs_path_dir)
1190
+ # remove temporary file
1191
+ if os.path.isfile(asset_abs_path_temp):
1192
+ os.remove(asset_abs_path_temp)
884
1193
  if not os.path.isfile(asset_abs_path):
885
- with open(asset_abs_path, "wb") as fhandle:
1194
+ logger.debug("Downloading to temporary file '%s'", asset_abs_path_temp)
1195
+ with open(asset_abs_path_temp, "wb") as fhandle:
886
1196
  for chunk in asset_chunks:
887
1197
  if chunk:
888
1198
  fhandle.write(chunk)
889
1199
  progress_callback(len(chunk))
890
-
1200
+ logger.debug(
1201
+ "Download completed. Renaming temporary file '%s' to '%s'",
1202
+ os.path.basename(asset_abs_path_temp),
1203
+ os.path.basename(asset_abs_path),
1204
+ )
1205
+ os.rename(asset_abs_path_temp, asset_abs_path)
891
1206
  # only one local asset
892
1207
  if local_assets_count == len(assets_urls) and local_assets_count == 1:
893
1208
  # remove empty {fs_dir_path}
894
1209
  shutil.rmtree(fs_dir_path)
895
1210
  # and return assets_urls[0] path
896
1211
  fs_dir_path = uri_to_path(assets_urls[0])
1212
+ # do not flatten dir
1213
+ flatten_top_dirs = False
897
1214
  # several local assets
898
1215
  elif local_assets_count == len(assets_urls) and local_assets_count > 0:
899
1216
  common_path = os.path.commonpath([uri_to_path(uri) for uri in assets_urls])
@@ -901,6 +1218,8 @@ class HTTPDownload(Download):
901
1218
  shutil.rmtree(fs_dir_path)
902
1219
  # and return assets_urls common path
903
1220
  fs_dir_path = common_path
1221
+ # do not flatten dir
1222
+ flatten_top_dirs = False
904
1223
  # no assets downloaded but some should have been
905
1224
  elif len(os.listdir(fs_dir_path)) == 0:
906
1225
  raise NotAvailableError("No assets could be downloaded")
@@ -918,20 +1237,17 @@ class HTTPDownload(Download):
918
1237
  return fs_dir_path
919
1238
 
920
1239
  def _handle_asset_exception(
921
- self, e: RequestException, asset: Dict[str, Any], raise_errors: bool = False
1240
+ self, e: RequestException, asset: Asset, raise_errors: bool = False
922
1241
  ) -> None:
923
1242
  # check if error is identified as auth_error in provider conf
924
1243
  auth_errors = getattr(self.config, "auth_error_code", [None])
925
1244
  if not isinstance(auth_errors, list):
926
1245
  auth_errors = [auth_errors]
927
- if e.response and e.response.status_code in auth_errors:
1246
+ if e.response is not None and e.response.status_code in auth_errors:
928
1247
  raise AuthenticationError(
929
- "HTTP Error %s returned, %s\nPlease check your credentials for %s"
930
- % (
931
- e.response.status_code,
932
- e.response.text.strip(),
933
- self.provider,
934
- )
1248
+ f"Please check your credentials for {self.provider}.",
1249
+ f"HTTP Error {e.response.status_code} returned.",
1250
+ e.response.text.strip(),
935
1251
  )
936
1252
  elif raise_errors:
937
1253
  raise DownloadError(e)
@@ -941,23 +1257,31 @@ class HTTPDownload(Download):
941
1257
 
942
1258
  def _get_asset_sizes(
943
1259
  self,
944
- assets_values: List[Dict[str, Any]],
945
- auth: Optional[PluginConfig],
1260
+ assets_values: List[Asset],
1261
+ auth: Optional[AuthBase],
946
1262
  params: Optional[Dict[str, str]],
947
1263
  zipped: bool = False,
948
1264
  ) -> int:
949
1265
  total_size = 0
950
1266
 
1267
+ timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1268
+ ssl_verify = getattr(self.config, "ssl_verify", True)
951
1269
  # loop for assets size & filename
952
1270
  for asset in assets_values:
953
- if not asset["href"].startswith("file:"):
1271
+ if asset["href"] and not asset["href"].startswith("file:"):
954
1272
  # HEAD request for size & filename
955
- asset_headers = requests.head(
956
- asset["href"],
957
- auth=auth,
958
- headers=USER_AGENT,
959
- timeout=HTTP_REQ_TIMEOUT,
960
- ).headers
1273
+ try:
1274
+ asset_headers = requests.head(
1275
+ asset["href"],
1276
+ auth=auth,
1277
+ params=params,
1278
+ headers=USER_AGENT,
1279
+ timeout=timeout,
1280
+ verify=ssl_verify,
1281
+ ).headers
1282
+ except RequestException as e:
1283
+ logger.debug(f"HEAD request failed: {str(e)}")
1284
+ asset_headers = CaseInsensitiveDict()
961
1285
 
962
1286
  if not getattr(asset, "size", 0):
963
1287
  # size from HEAD header / Content-length
@@ -971,12 +1295,14 @@ class HTTPDownload(Download):
971
1295
  )
972
1296
  if not getattr(asset, "size", 0):
973
1297
  # size from HEAD header / content-disposition / size
974
- asset.size = int(header_content_disposition.get_param("size", 0))
1298
+ size_str = str(header_content_disposition.get_param("size", 0))
1299
+ asset.size = int(size_str) if size_str.isdigit() else 0
975
1300
  if not getattr(asset, "filename", 0):
976
1301
  # filename from HEAD header / content-disposition / size
977
- asset.filename = header_content_disposition.get_param(
1302
+ asset_filename = header_content_disposition.get_param(
978
1303
  "filename", None
979
1304
  )
1305
+ asset.filename = str(asset_filename) if asset_filename else None
980
1306
 
981
1307
  if not getattr(asset, "size", 0):
982
1308
  # GET request for size
@@ -987,16 +1313,18 @@ class HTTPDownload(Download):
987
1313
  params=params,
988
1314
  headers=USER_AGENT,
989
1315
  timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
1316
+ verify=ssl_verify,
990
1317
  ) as stream:
991
1318
  # size from GET header / Content-length
992
1319
  asset.size = int(stream.headers.get("Content-length", 0))
993
1320
  if not getattr(asset, "size", 0):
994
1321
  # size from GET header / content-disposition / size
995
- asset.size = int(
1322
+ size_str = str(
996
1323
  parse_header(
997
1324
  stream.headers.get("content-disposition", "")
998
1325
  ).get_param("size", 0)
999
1326
  )
1327
+ asset.size = int(size_str) if size_str.isdigit() else 0
1000
1328
 
1001
1329
  total_size += asset.size
1002
1330
  return total_size
@@ -1004,12 +1332,12 @@ class HTTPDownload(Download):
1004
1332
  def download_all(
1005
1333
  self,
1006
1334
  products: SearchResult,
1007
- auth: Optional[PluginConfig] = None,
1335
+ auth: Optional[Union[AuthBase, Dict[str, str]]] = None,
1008
1336
  downloaded_callback: Optional[DownloadedCallback] = None,
1009
1337
  progress_callback: Optional[ProgressCallback] = None,
1010
1338
  wait: int = DEFAULT_DOWNLOAD_WAIT,
1011
1339
  timeout: int = DEFAULT_DOWNLOAD_TIMEOUT,
1012
- **kwargs: Union[str, bool, Dict[str, Any]],
1340
+ **kwargs: Unpack[DownloadConf],
1013
1341
  ):
1014
1342
  """
1015
1343
  Download all using parent (base plugin) method