eodag 4.0.0a3__py3-none-any.whl → 4.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,7 @@ from urllib.parse import parse_qs, urlparse
41
41
 
42
42
  import geojson
43
43
  import requests
44
+ from concurrent.futures import ThreadPoolExecutor, as_completed
44
45
  from lxml import etree
45
46
  from requests import RequestException
46
47
  from requests.auth import AuthBase
@@ -90,10 +91,9 @@ if TYPE_CHECKING:
90
91
  from requests import Response
91
92
 
92
93
  from eodag.api.product import Asset, EOProduct # type: ignore
93
- from eodag.api.search_result import SearchResult
94
94
  from eodag.config import PluginConfig
95
95
  from eodag.types.download_args import DownloadConf
96
- from eodag.utils import DownloadedCallback, Unpack
96
+ from eodag.utils import Unpack
97
97
 
98
98
  logger = logging.getLogger("eodag.download.http")
99
99
 
@@ -155,7 +155,6 @@ class HTTPDownload(Download):
155
155
  auth: Optional[AuthBase] = None,
156
156
  **kwargs: Unpack[DownloadConf],
157
157
  ) -> Optional[dict[str, Any]]:
158
-
159
158
  """Send product order request.
160
159
 
161
160
  It will be executed once before the download retry loop, if the product is orderable
@@ -597,6 +596,7 @@ class HTTPDownload(Download):
597
596
  product: EOProduct,
598
597
  auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
599
598
  progress_callback: Optional[ProgressCallback] = None,
599
+ executor: Optional[ThreadPoolExecutor] = None,
600
600
  wait: float = DEFAULT_DOWNLOAD_WAIT,
601
601
  timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
602
602
  **kwargs: Unpack[DownloadConf],
@@ -638,6 +638,7 @@ class HTTPDownload(Download):
638
638
  record_filename,
639
639
  auth,
640
640
  progress_callback,
641
+ executor,
641
642
  **kwargs,
642
643
  )
643
644
  if kwargs.get("asset") is None:
@@ -675,7 +676,7 @@ class HTTPDownload(Download):
675
676
  is_empty = False
676
677
  progress_callback(len(chunk))
677
678
  fhandle.write(chunk)
678
- self.stream.close() # Closing response stream
679
+ product._stream.close() # Closing response stream
679
680
 
680
681
  if is_empty:
681
682
  raise DownloadError(f"product {product.properties['id']} is empty")
@@ -721,7 +722,7 @@ class HTTPDownload(Download):
721
722
  return product_path
722
723
 
723
724
  def _check_stream_size(self, product: EOProduct) -> int:
724
- stream_size = int(self.stream.headers.get("content-length", 0))
725
+ stream_size = int(product._stream.headers.get("content-length", 0))
725
726
  if (
726
727
  stream_size == 0
727
728
  and "order:status" in product.properties
@@ -732,14 +733,14 @@ class HTTPDownload(Download):
732
733
  % (
733
734
  product.properties["title"],
734
735
  product.properties["order:status"],
735
- self.stream.reason,
736
+ product._stream.reason,
736
737
  )
737
738
  )
738
739
  return stream_size
739
740
 
740
741
  def _check_product_filename(self, product: EOProduct) -> str:
741
742
  filename = None
742
- asset_content_disposition = self.stream.headers.get("content-disposition")
743
+ asset_content_disposition = product._stream.headers.get("content-disposition")
743
744
  if asset_content_disposition:
744
745
  filename = cast(
745
746
  Optional[str],
@@ -747,7 +748,7 @@ class HTTPDownload(Download):
747
748
  )
748
749
  if not filename:
749
750
  # default filename extracted from path
750
- filename = str(os.path.basename(self.stream.url))
751
+ filename = str(os.path.basename(product._stream.url))
751
752
  filename_extension = os.path.splitext(filename)[1]
752
753
  if not filename_extension:
753
754
  if content_type := getattr(product, "headers", {}).get("Content-Type"):
@@ -790,15 +791,20 @@ class HTTPDownload(Download):
790
791
  not getattr(self.config, "ignore_assets", False)
791
792
  or kwargs.get("asset") is not None
792
793
  ):
794
+ executor = ThreadPoolExecutor(
795
+ max_workers=getattr(self.config, "max_workers", None)
796
+ )
793
797
  try:
794
798
  assets_values = product.assets.get_values(kwargs.get("asset"))
795
- assets_stream_list = self._stream_download_assets(
796
- product,
797
- auth,
798
- None,
799
- assets_values=assets_values,
800
- **kwargs,
801
- )
799
+ with executor:
800
+ assets_stream_list = self._stream_download_assets(
801
+ product,
802
+ executor,
803
+ auth,
804
+ None,
805
+ assets_values,
806
+ **kwargs,
807
+ )
802
808
 
803
809
  # single asset
804
810
  if len(assets_stream_list) == 1:
@@ -1010,7 +1016,7 @@ class HTTPDownload(Download):
1010
1016
 
1011
1017
  s = requests.Session()
1012
1018
  try:
1013
- self.stream = s.request(
1019
+ product._stream = s.request(
1014
1020
  req_method,
1015
1021
  req_url,
1016
1022
  stream=True,
@@ -1025,7 +1031,7 @@ class HTTPDownload(Download):
1025
1031
  # location is not a valid url -> product is not available yet
1026
1032
  raise NotAvailableError("Product is not available yet")
1027
1033
  try:
1028
- self.stream.raise_for_status()
1034
+ product._stream.raise_for_status()
1029
1035
  except requests.exceptions.Timeout as exc:
1030
1036
  raise TimeOutError(exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT) from exc
1031
1037
  except RequestException as e:
@@ -1037,8 +1043,8 @@ class HTTPDownload(Download):
1037
1043
  # check if product was ordered
1038
1044
 
1039
1045
  if getattr(
1040
- self.stream, "status_code", None
1041
- ) is not None and self.stream.status_code == getattr(
1046
+ product._stream, "status_code", None
1047
+ ) is not None and product._stream.status_code == getattr(
1042
1048
  self.config, "order_status", {}
1043
1049
  ).get(
1044
1050
  "ordered", {}
@@ -1049,7 +1055,7 @@ class HTTPDownload(Download):
1049
1055
  self._process_exception(None, product, ordered_message)
1050
1056
  stream_size = self._check_stream_size(product) or None
1051
1057
 
1052
- product.headers = self.stream.headers
1058
+ product.headers = product._stream.headers
1053
1059
  filename = self._check_product_filename(product)
1054
1060
  content_type = product.headers.get("Content-Type")
1055
1061
  guessed_content_type = (
@@ -1062,11 +1068,12 @@ class HTTPDownload(Download):
1062
1068
  product.size = stream_size
1063
1069
 
1064
1070
  product.filename = filename
1065
- return self.stream.iter_content(chunk_size=64 * 1024)
1071
+ return product._stream.iter_content(chunk_size=64 * 1024)
1066
1072
 
1067
1073
  def _stream_download_assets(
1068
1074
  self,
1069
1075
  product: EOProduct,
1076
+ executor: ThreadPoolExecutor,
1070
1077
  auth: Optional[AuthBase] = None,
1071
1078
  progress_callback: Optional[ProgressCallback] = None,
1072
1079
  assets_values: list[Asset] = [],
@@ -1083,7 +1090,9 @@ class HTTPDownload(Download):
1083
1090
  self.config, "dl_url_params", {}
1084
1091
  )
1085
1092
 
1086
- total_size = self._get_asset_sizes(assets_values, auth, params) or None
1093
+ total_size = (
1094
+ self._get_asset_sizes(assets_values, executor, auth, params) or None
1095
+ )
1087
1096
 
1088
1097
  progress_callback.reset(total=total_size)
1089
1098
 
@@ -1189,11 +1198,6 @@ class HTTPDownload(Download):
1189
1198
 
1190
1199
  # Process each asset
1191
1200
  for asset in assets_values:
1192
- if not asset["href"] or asset["href"].startswith("file:"):
1193
- logger.info(
1194
- f"Local asset detected. Download skipped for {asset['href']}"
1195
- )
1196
- continue
1197
1201
  asset_chunks = get_chunks_generator(asset)
1198
1202
  try:
1199
1203
  # start reading chunks to set assets attributes
@@ -1221,6 +1225,7 @@ class HTTPDownload(Download):
1221
1225
  record_filename: str,
1222
1226
  auth: Optional[AuthBase] = None,
1223
1227
  progress_callback: Optional[ProgressCallback] = None,
1228
+ executor: Optional[ThreadPoolExecutor] = None,
1224
1229
  **kwargs: Unpack[DownloadConf],
1225
1230
  ) -> str:
1226
1231
  """Download product assets if they exist"""
@@ -1228,6 +1233,12 @@ class HTTPDownload(Download):
1228
1233
  logger.info("Progress bar unavailable, please call product.download()")
1229
1234
  progress_callback = ProgressCallback(disable=True)
1230
1235
 
1236
+ # create an executor if not given and anticipate the possible need to shut it down
1237
+ executor, shutdown_executor = (
1238
+ (ThreadPoolExecutor(), True) if executor is None else (executor, False)
1239
+ )
1240
+ self._config_executor(executor)
1241
+
1231
1242
  assets_urls = [
1232
1243
  a["href"] for a in getattr(product, "assets", {}).values() if "href" in a
1233
1244
  ]
@@ -1237,7 +1248,7 @@ class HTTPDownload(Download):
1237
1248
  assets_values = product.assets.get_values(kwargs.get("asset"))
1238
1249
 
1239
1250
  assets_stream_list = self._stream_download_assets(
1240
- product, auth, progress_callback, assets_values=assets_values, **kwargs
1251
+ product, executor, auth, progress_callback, assets_values, **kwargs
1241
1252
  )
1242
1253
 
1243
1254
  # remove existing incomplete file
@@ -1260,15 +1271,14 @@ class HTTPDownload(Download):
1260
1271
  local_assets_count += 1
1261
1272
  continue
1262
1273
 
1263
- for asset_stream in assets_stream_list:
1274
+ def download_asset(asset_stream: StreamResponse) -> None:
1264
1275
  asset_chunks = asset_stream.content
1265
1276
  asset_path = cast(str, asset_stream.arcname)
1266
1277
  asset_abs_path = os.path.join(fs_dir_path, asset_path)
1267
1278
  asset_abs_path_temp = asset_abs_path + "~"
1268
1279
  # create asset subdir if not exist
1269
1280
  asset_abs_path_dir = os.path.dirname(asset_abs_path)
1270
- if not os.path.isdir(asset_abs_path_dir):
1271
- os.makedirs(asset_abs_path_dir)
1281
+ os.makedirs(asset_abs_path_dir, exist_ok=True)
1272
1282
  # remove temporary file
1273
1283
  if os.path.isfile(asset_abs_path_temp):
1274
1284
  os.remove(asset_abs_path_temp)
@@ -1284,6 +1294,27 @@ class HTTPDownload(Download):
1284
1294
  os.path.basename(asset_abs_path),
1285
1295
  )
1286
1296
  os.rename(asset_abs_path_temp, asset_abs_path)
1297
+ return
1298
+
1299
+ # use parallelization if possible
1300
+ # when products are already downloaded in parallel but the executor has only one worker,
1301
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1302
+ if (
1303
+ executor._thread_name_prefix == "eodag-download-all"
1304
+ and executor._max_workers == 1
1305
+ ):
1306
+ for asset_stream in assets_stream_list:
1307
+ download_asset(asset_stream)
1308
+ else:
1309
+ futures = (
1310
+ executor.submit(download_asset, asset_stream)
1311
+ for asset_stream in assets_stream_list
1312
+ )
1313
+ [f.result() for f in as_completed(futures)]
1314
+
1315
+ if shutdown_executor:
1316
+ executor.shutdown(wait=True)
1317
+
1287
1318
  # only one local asset
1288
1319
  if local_assets_count == len(assets_urls) and local_assets_count == 1:
1289
1320
  # remove empty {fs_dir_path}
@@ -1337,6 +1368,7 @@ class HTTPDownload(Download):
1337
1368
  def _get_asset_sizes(
1338
1369
  self,
1339
1370
  assets_values: list[Asset],
1371
+ executor: ThreadPoolExecutor,
1340
1372
  auth: Optional[AuthBase],
1341
1373
  params: Optional[dict[str, str]],
1342
1374
  zipped: bool = False,
@@ -1345,8 +1377,11 @@ class HTTPDownload(Download):
1345
1377
 
1346
1378
  timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
1347
1379
  ssl_verify = getattr(self.config, "ssl_verify", True)
1348
- # loop for assets size & filename
1349
- for asset in assets_values:
1380
+
1381
+ # loop for assets size & filename in parallel
1382
+ def fetch_asset_size(asset: Asset) -> None:
1383
+ nonlocal total_size
1384
+
1350
1385
  if asset["href"] and not asset["href"].startswith("file:"):
1351
1386
  # HEAD request for size & filename
1352
1387
  try:
@@ -1408,27 +1443,20 @@ class HTTPDownload(Download):
1408
1443
  asset.size = int(size_str) if size_str.isdigit() else 0
1409
1444
 
1410
1445
  total_size += asset.size
1411
- return total_size
1412
1446
 
1413
- def download_all(
1414
- self,
1415
- products: SearchResult,
1416
- auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
1417
- downloaded_callback: Optional[DownloadedCallback] = None,
1418
- progress_callback: Optional[ProgressCallback] = None,
1419
- wait: float = DEFAULT_DOWNLOAD_WAIT,
1420
- timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
1421
- **kwargs: Unpack[DownloadConf],
1422
- ):
1423
- """
1424
- Download all using parent (base plugin) method
1425
- """
1426
- return super(HTTPDownload, self).download_all(
1427
- products,
1428
- auth=auth,
1429
- downloaded_callback=downloaded_callback,
1430
- progress_callback=progress_callback,
1431
- wait=wait,
1432
- timeout=timeout,
1433
- **kwargs,
1434
- )
1447
+ # use parallelization if possible
1448
+ # when products are already downloaded in parallel but the executor has only one worker,
1449
+ # we avoid submitting nested tasks to the executor to prevent deadlocks
1450
+ if (
1451
+ executor._thread_name_prefix == "eodag-download-all"
1452
+ and executor._max_workers == 1
1453
+ ):
1454
+ for asset in assets_values:
1455
+ fetch_asset_size(asset)
1456
+ else:
1457
+ futures = (
1458
+ executor.submit(fetch_asset_size, asset) for asset in assets_values
1459
+ )
1460
+ [f.result() for f in as_completed(futures)]
1461
+
1462
+ return total_size
eodag/plugins/manager.py CHANGED
@@ -24,12 +24,8 @@ from typing import TYPE_CHECKING, Any, Iterator, Optional, Union, cast
24
24
 
25
25
  import importlib_metadata
26
26
 
27
- from eodag.config import (
28
- AUTH_TOPIC_KEYS,
29
- PLUGINS_TOPICS_KEYS,
30
- load_config,
31
- merge_configs,
32
- )
27
+ from eodag.api.provider import ProvidersDict
28
+ from eodag.config import AUTH_TOPIC_KEYS, PLUGINS_TOPICS_KEYS, load_config
33
29
  from eodag.plugins.apis.base import Api
34
30
  from eodag.plugins.authentication.base import Authentication
35
31
  from eodag.plugins.base import EODAGPluginMount
@@ -48,7 +44,8 @@ if TYPE_CHECKING:
48
44
  from requests.auth import AuthBase
49
45
 
50
46
  from eodag.api.product import EOProduct
51
- from eodag.config import PluginConfig, ProviderConfig
47
+ from eodag.api.provider import ProviderConfig
48
+ from eodag.config import PluginConfig
52
49
  from eodag.plugins.base import PluginTopic
53
50
 
54
51
 
@@ -67,8 +64,8 @@ class PluginManager:
67
64
  it, and the plugins to use to perform defined actions (search, download,
68
65
  authenticate, crunch).
69
66
 
70
- :param providers_config: The configuration with all information about the providers
71
- supported by ``eodag``
67
+ :param providers: The ProvidersDict instance with all information about the providers
68
+ supported by ``eodag``
72
69
  """
73
70
 
74
71
  supported_topics = set(PLUGINS_TOPICS_KEYS)
@@ -77,9 +74,9 @@ class PluginManager:
77
74
 
78
75
  skipped_plugins: list[str]
79
76
 
80
- def __init__(self, providers_config: dict[str, ProviderConfig]) -> None:
77
+ def __init__(self, providers: ProvidersDict) -> None:
81
78
  self.skipped_plugins = []
82
- self.providers_config = providers_config
79
+ self.providers = providers
83
80
  # Load all the plugins. This will make all plugin classes of a particular
84
81
  # type to be available in the base plugin class's 'plugins' attribute.
85
82
  # For example, by importing module 'eodag.plugins.search.resto', the plugin
@@ -119,19 +116,14 @@ class PluginManager:
119
116
  str(x) for x in dist.locate_file(name).rglob("providers.yml")
120
117
  ]
121
118
  if plugin_providers_config_path:
122
- plugin_providers_config = load_config(
123
- plugin_providers_config_path[0]
124
- )
125
- merge_configs(plugin_providers_config, self.providers_config)
126
- self.providers_config = plugin_providers_config
119
+ plugin_configs = load_config(plugin_providers_config_path[0])
120
+ self.providers.update_from_configs(plugin_configs)
127
121
  self.rebuild()
128
122
 
129
- def rebuild(
130
- self, providers_config: Optional[dict[str, ProviderConfig]] = None
131
- ) -> None:
123
+ def rebuild(self, providers: Optional[ProvidersDict] = None) -> None:
132
124
  """(Re)Build plugin manager mapping and cache"""
133
- if providers_config is not None:
134
- self.providers_config = providers_config
125
+ if providers is not None:
126
+ self.providers = providers
135
127
 
136
128
  self.build_collection_to_provider_config_map()
137
129
  self._built_plugins_cache: dict[tuple[str, str, str], Any] = {}
@@ -139,27 +131,22 @@ class PluginManager:
139
131
  def build_collection_to_provider_config_map(self) -> None:
140
132
  """Build mapping conf between collections and providers"""
141
133
  self.collection_to_provider_config_map = {}
142
- for provider in list(self.providers_config):
143
- provider_config = self.providers_config[provider]
144
- if not hasattr(provider_config, "products") or not provider_config.products:
134
+ for provider in list(self.providers.values()):
135
+ if not provider.collections_config:
145
136
  logger.info(
146
137
  "%s: provider has no product configured and will be skipped",
147
138
  provider,
148
139
  )
149
- self.providers_config.pop(provider)
140
+ del self.providers[provider.name]
150
141
  continue
151
142
 
152
- # provider priority set to lowest if not set
153
- if getattr(provider_config, "priority", None) is None:
154
- self.providers_config[provider].priority = provider_config.priority = 0
155
-
156
- for collection in provider_config.products:
143
+ for collection in provider.collections_config:
157
144
  collection_providers = (
158
145
  self.collection_to_provider_config_map.setdefault( # noqa
159
146
  collection, []
160
147
  )
161
148
  )
162
- collection_providers.append(provider_config)
149
+ collection_providers.append(provider.config)
163
150
  collection_providers.sort(key=attrgetter("priority"), reverse=True)
164
151
 
165
152
  def get_search_plugins(
@@ -203,7 +190,7 @@ class PluginManager:
203
190
  )
204
191
  configs = self.collection_to_provider_config_map[GENERIC_COLLECTION]
205
192
  else:
206
- configs = list(self.providers_config.values())
193
+ configs = list(p.config for p in self.providers.values())
207
194
 
208
195
  if provider:
209
196
  configs = [
@@ -227,7 +214,10 @@ class PluginManager:
227
214
  :param product: The product to get a download plugin for
228
215
  :returns: The download plugin capable of downloading the product
229
216
  """
230
- plugin_conf = self.providers_config[product.provider]
217
+ plugin_conf = self.providers.get_config(product.provider)
218
+ if plugin_conf is None:
219
+ msg = f"Provider {product.provider} not found"
220
+ raise UnsupportedProvider(msg)
231
221
  if download := getattr(plugin_conf, "download", None):
232
222
  plugin_conf.download.priority = plugin_conf.priority
233
223
  plugin = cast(
@@ -322,7 +312,7 @@ class PluginManager:
322
312
  return False
323
313
 
324
314
  # providers configs with given provider at first
325
- sorted_providers_config = deepcopy(self.providers_config)
315
+ sorted_providers_config = deepcopy(self.providers.configs)
326
316
  sorted_providers_config = {
327
317
  provider: sorted_providers_config.pop(provider),
328
318
  **sorted_providers_config,