eodag 4.0.0a4__py3-none-any.whl → 4.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eodag/api/collection.py +65 -1
- eodag/api/core.py +65 -19
- eodag/api/product/_assets.py +1 -1
- eodag/api/product/_product.py +133 -18
- eodag/api/product/drivers/__init__.py +3 -1
- eodag/api/product/drivers/base.py +3 -1
- eodag/api/product/drivers/generic.py +9 -5
- eodag/api/product/drivers/sentinel1.py +14 -9
- eodag/api/product/drivers/sentinel2.py +14 -7
- eodag/api/product/metadata_mapping.py +5 -2
- eodag/api/provider.py +1 -0
- eodag/api/search_result.py +4 -1
- eodag/cli.py +17 -8
- eodag/config.py +22 -4
- eodag/plugins/apis/ecmwf.py +3 -24
- eodag/plugins/apis/usgs.py +3 -24
- eodag/plugins/download/aws.py +85 -44
- eodag/plugins/download/base.py +117 -41
- eodag/plugins/download/http.py +88 -65
- eodag/plugins/search/base.py +8 -3
- eodag/plugins/search/build_search_result.py +108 -120
- eodag/plugins/search/cop_marine.py +3 -1
- eodag/plugins/search/qssearch.py +7 -6
- eodag/resources/collections.yml +255 -0
- eodag/resources/ext_collections.json +1 -1
- eodag/resources/ext_product_types.json +1 -1
- eodag/resources/providers.yml +62 -25
- eodag/resources/user_conf_template.yml +6 -0
- eodag/types/__init__.py +22 -16
- eodag/types/download_args.py +3 -1
- eodag/types/queryables.py +125 -55
- eodag/types/stac_extensions.py +408 -0
- eodag/types/stac_metadata.py +312 -0
- eodag/utils/__init__.py +42 -4
- eodag/utils/dates.py +202 -2
- eodag/utils/s3.py +4 -4
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/METADATA +7 -13
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/RECORD +42 -40
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/WHEEL +1 -1
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/entry_points.txt +1 -1
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/licenses/LICENSE +0 -0
- {eodag-4.0.0a4.dist-info → eodag-4.0.0b1.dist-info}/top_level.txt +0 -0
eodag/plugins/download/base.py
CHANGED
|
@@ -29,6 +29,8 @@ from pathlib import Path
|
|
|
29
29
|
from time import sleep
|
|
30
30
|
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union
|
|
31
31
|
|
|
32
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
33
|
+
|
|
32
34
|
from eodag.api.product.metadata_mapping import ONLINE_STATUS
|
|
33
35
|
from eodag.plugins.base import PluginTopic
|
|
34
36
|
from eodag.utils import (
|
|
@@ -105,6 +107,7 @@ class Download(PluginTopic):
|
|
|
105
107
|
product: EOProduct,
|
|
106
108
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
107
109
|
progress_callback: Optional[ProgressCallback] = None,
|
|
110
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
108
111
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
109
112
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
110
113
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -115,6 +118,7 @@ class Download(PluginTopic):
|
|
|
115
118
|
:param product: The EO product to download
|
|
116
119
|
:param auth: (optional) authenticated object
|
|
117
120
|
:param progress_callback: (optional) A progress callback
|
|
121
|
+
:param executor: (optional) An executor to download assets of ``product`` in parallel if it has any
|
|
118
122
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
119
123
|
:param timeout: (optional) If download fails, maximum time in minutes before stop retrying
|
|
120
124
|
to download
|
|
@@ -447,6 +451,7 @@ class Download(PluginTopic):
|
|
|
447
451
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
448
452
|
downloaded_callback: Optional[DownloadedCallback] = None,
|
|
449
453
|
progress_callback: Optional[ProgressCallback] = None,
|
|
454
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
450
455
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
451
456
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
452
457
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -454,7 +459,7 @@ class Download(PluginTopic):
|
|
|
454
459
|
"""
|
|
455
460
|
Base download_all method.
|
|
456
461
|
|
|
457
|
-
This specific implementation uses the :meth
|
|
462
|
+
This specific implementation uses the :meth:`~eodag.api.product._product.EOProduct.download` method
|
|
458
463
|
implemented by the plugin to **sequentially** attempt to download products.
|
|
459
464
|
|
|
460
465
|
:param products: Products to download
|
|
@@ -465,6 +470,8 @@ class Download(PluginTopic):
|
|
|
465
470
|
its ``__call__`` method. Will be called each time a product
|
|
466
471
|
finishes downloading
|
|
467
472
|
:param progress_callback: (optional) A progress callback
|
|
473
|
+
:param executor: (optional) An executor to download products in parallel which may
|
|
474
|
+
be reused to also download assets of these products in parallel.
|
|
468
475
|
:param wait: (optional) If download fails, wait time in minutes between two download tries
|
|
469
476
|
:param timeout: (optional) If download fails, maximum time in minutes before stop retrying
|
|
470
477
|
to download
|
|
@@ -485,9 +492,15 @@ class Download(PluginTopic):
|
|
|
485
492
|
stop_time = start_time + timedelta(minutes=timeout)
|
|
486
493
|
nb_products = len(products)
|
|
487
494
|
retry_count = 0
|
|
488
|
-
# another output for
|
|
495
|
+
# another output for notebooks
|
|
489
496
|
nb_info = NotebookWidgets()
|
|
490
497
|
|
|
498
|
+
# create an executor if not given
|
|
499
|
+
executor = ThreadPoolExecutor() if executor is None else executor
|
|
500
|
+
# set thread name prefix so that the EOProduct download() method can identify
|
|
501
|
+
# whether the executor was created during parallel product downloads or not
|
|
502
|
+
self._config_executor(executor, "eodag-download-all")
|
|
503
|
+
|
|
491
504
|
for product in products:
|
|
492
505
|
product.next_try = start_time
|
|
493
506
|
|
|
@@ -508,53 +521,88 @@ class Download(PluginTopic):
|
|
|
508
521
|
progress_callback.unit_scale = False
|
|
509
522
|
progress_callback.refresh()
|
|
510
523
|
|
|
524
|
+
# anticipate nested tasks to download assets in parallel for at least one product
|
|
525
|
+
nested_asset_downloads = any(
|
|
526
|
+
product
|
|
527
|
+
for product in products
|
|
528
|
+
if (
|
|
529
|
+
product.downloader
|
|
530
|
+
and product.downloader.config.type == "AwsDownload"
|
|
531
|
+
or len(product.assets) > 0
|
|
532
|
+
and (
|
|
533
|
+
not getattr(self.config, "ignore_assets", False)
|
|
534
|
+
or kwargs.get("asset") is not None
|
|
535
|
+
)
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
|
|
511
539
|
with progress_callback as bar:
|
|
512
540
|
while "Loop until all products are download or timeout is reached":
|
|
513
|
-
# try downloading each product before retry
|
|
514
|
-
|
|
541
|
+
# try downloading each product in parallel before retry
|
|
542
|
+
|
|
543
|
+
# Download products in batches to handle nested tasks to download assets in parallel.
|
|
544
|
+
# We avoid having less workers in the executor than the number of products to download in parallel
|
|
545
|
+
# to prevent deadlocks. This could happen by submiting and waiting for a task within a task.
|
|
546
|
+
# We ensure at least one thread is available for these tasks and at least one product is downloaded
|
|
547
|
+
# at a time.
|
|
548
|
+
# If there is only one worker, a specific process at assets download level is used to avoid deadlocks.
|
|
549
|
+
batch_size = len(products)
|
|
550
|
+
if nested_asset_downloads and executor._max_workers <= batch_size:
|
|
551
|
+
batch_size = max(executor._max_workers - 1, 1)
|
|
552
|
+
|
|
553
|
+
products_batch = products[:batch_size]
|
|
554
|
+
futures = {}
|
|
555
|
+
|
|
556
|
+
for idx, product in enumerate(products_batch):
|
|
515
557
|
if datetime.now() >= product.next_try:
|
|
516
558
|
products[idx].next_try += timedelta(minutes=wait)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
559
|
+
future = executor.submit(
|
|
560
|
+
product.download,
|
|
561
|
+
progress_callback=product_progress_callback,
|
|
562
|
+
executor=executor,
|
|
563
|
+
wait=wait,
|
|
564
|
+
timeout=-1,
|
|
565
|
+
**kwargs, # type: ignore
|
|
566
|
+
)
|
|
567
|
+
futures[future] = product
|
|
526
568
|
|
|
527
|
-
|
|
528
|
-
|
|
569
|
+
for future in as_completed(futures.keys()):
|
|
570
|
+
product = futures[future]
|
|
571
|
+
try:
|
|
572
|
+
result = future.result()
|
|
573
|
+
paths.append(result)
|
|
529
574
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
bar(1)
|
|
575
|
+
if downloaded_callback:
|
|
576
|
+
downloaded_callback(product)
|
|
533
577
|
|
|
534
|
-
|
|
535
|
-
|
|
578
|
+
# product downloaded, to not retry it
|
|
579
|
+
products.remove(product)
|
|
580
|
+
bar(1)
|
|
536
581
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
continue
|
|
582
|
+
# reset stop time for next product
|
|
583
|
+
stop_time = datetime.now() + timedelta(minutes=timeout)
|
|
540
584
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
)
|
|
545
|
-
raise
|
|
585
|
+
except NotAvailableError as e:
|
|
586
|
+
logger.info(e)
|
|
587
|
+
continue
|
|
546
588
|
|
|
547
|
-
|
|
548
|
-
|
|
589
|
+
except (AuthenticationError, MisconfiguredError):
|
|
590
|
+
logger.exception(
|
|
591
|
+
f"Stopped because of credentials problems with provider {self.provider}"
|
|
592
|
+
)
|
|
593
|
+
raise
|
|
549
594
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
595
|
+
except (RuntimeError, Exception):
|
|
596
|
+
import traceback as tb
|
|
597
|
+
|
|
598
|
+
logger.error(
|
|
599
|
+
f"A problem occurred during download of product: {product}. "
|
|
600
|
+
"Skipping it"
|
|
601
|
+
)
|
|
602
|
+
logger.debug(f"\n{tb.format_exc()}")
|
|
555
603
|
|
|
556
|
-
|
|
557
|
-
|
|
604
|
+
# product skipped, to not retry it
|
|
605
|
+
products.remove(product)
|
|
558
606
|
|
|
559
607
|
if (
|
|
560
608
|
len(products) > 0
|
|
@@ -567,6 +615,7 @@ class Download(PluginTopic):
|
|
|
567
615
|
f"[Retry #{retry_count}, {nb_products - len(products)}/{nb_products} D/L] "
|
|
568
616
|
f"Waiting {wait_seconds}s until next download try (retry every {wait}' for {timeout}')"
|
|
569
617
|
)
|
|
618
|
+
|
|
570
619
|
logger.info(info_message)
|
|
571
620
|
nb_info.display_html(info_message)
|
|
572
621
|
sleep(wait_seconds + 1)
|
|
@@ -579,6 +628,9 @@ class Download(PluginTopic):
|
|
|
579
628
|
elif len(products) == 0:
|
|
580
629
|
break
|
|
581
630
|
|
|
631
|
+
# Shutdown executor at the end
|
|
632
|
+
executor.shutdown(wait=True)
|
|
633
|
+
|
|
582
634
|
return paths
|
|
583
635
|
|
|
584
636
|
def _order_download_retry(
|
|
@@ -641,8 +693,8 @@ class Download(PluginTopic):
|
|
|
641
693
|
)
|
|
642
694
|
logger.info(not_available_info)
|
|
643
695
|
# Retry-After info from Response header
|
|
644
|
-
if hasattr(
|
|
645
|
-
retry_server_info =
|
|
696
|
+
if hasattr(product, "_stream"):
|
|
697
|
+
retry_server_info = product._stream.headers.get(
|
|
646
698
|
"Retry-After", ""
|
|
647
699
|
)
|
|
648
700
|
if retry_server_info:
|
|
@@ -663,8 +715,8 @@ class Download(PluginTopic):
|
|
|
663
715
|
)
|
|
664
716
|
logger.info(not_available_info)
|
|
665
717
|
# Retry-After info from Response header
|
|
666
|
-
if hasattr(
|
|
667
|
-
retry_server_info =
|
|
718
|
+
if hasattr(product, "_stream"):
|
|
719
|
+
retry_server_info = product._stream.headers.get(
|
|
668
720
|
"Retry-After", ""
|
|
669
721
|
)
|
|
670
722
|
if retry_server_info:
|
|
@@ -690,3 +742,27 @@ class Download(PluginTopic):
|
|
|
690
742
|
return download_and_retry
|
|
691
743
|
|
|
692
744
|
return decorator
|
|
745
|
+
|
|
746
|
+
def _config_executor(
|
|
747
|
+
self, executor: ThreadPoolExecutor, thread_name_prefix: Optional[str] = None
|
|
748
|
+
) -> None:
|
|
749
|
+
"""
|
|
750
|
+
Configure a ThreadPoolExecutor instance.
|
|
751
|
+
|
|
752
|
+
This method ensures that a ThreadPoolExecutor is correctly set for downloads by adjusting its
|
|
753
|
+
maximum number of workers if necessary. It also configures the thread name prefix to identify
|
|
754
|
+
threads created by the executor, which is useful for distinguishing between executors created
|
|
755
|
+
for parallel product downloads versus those created for other purposes.
|
|
756
|
+
|
|
757
|
+
:param executor: A ThreadPoolExecutor instance.
|
|
758
|
+
:param thread_name_prefix: (optional) A prefix for naming threads created by the executor.
|
|
759
|
+
When provided, threads will be named using this prefix to help
|
|
760
|
+
identify the executor's purpose (e.g., "eodag-download-all").
|
|
761
|
+
"""
|
|
762
|
+
if (
|
|
763
|
+
max_workers := getattr(self.config, "max_workers", executor._max_workers)
|
|
764
|
+
) < executor._max_workers:
|
|
765
|
+
executor._max_workers = max_workers
|
|
766
|
+
|
|
767
|
+
if thread_name_prefix:
|
|
768
|
+
executor._thread_name_prefix = "eodag-download-all"
|
eodag/plugins/download/http.py
CHANGED
|
@@ -27,24 +27,17 @@ from email.message import Message
|
|
|
27
27
|
from itertools import chain
|
|
28
28
|
from json import JSONDecodeError
|
|
29
29
|
from pathlib import Path
|
|
30
|
-
from typing import
|
|
31
|
-
TYPE_CHECKING,
|
|
32
|
-
Any,
|
|
33
|
-
Iterator,
|
|
34
|
-
Literal,
|
|
35
|
-
Optional,
|
|
36
|
-
TypedDict,
|
|
37
|
-
Union,
|
|
38
|
-
cast,
|
|
39
|
-
)
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional, Union, cast
|
|
40
31
|
from urllib.parse import parse_qs, urlparse
|
|
41
32
|
|
|
42
33
|
import geojson
|
|
43
34
|
import requests
|
|
35
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
44
36
|
from lxml import etree
|
|
45
37
|
from requests import RequestException
|
|
46
38
|
from requests.auth import AuthBase
|
|
47
39
|
from requests.structures import CaseInsensitiveDict
|
|
40
|
+
from typing_extensions import TypedDict
|
|
48
41
|
from zipstream import ZipStream
|
|
49
42
|
|
|
50
43
|
from eodag.api.product.metadata_mapping import (
|
|
@@ -90,10 +83,9 @@ if TYPE_CHECKING:
|
|
|
90
83
|
from requests import Response
|
|
91
84
|
|
|
92
85
|
from eodag.api.product import Asset, EOProduct # type: ignore
|
|
93
|
-
from eodag.api.search_result import SearchResult
|
|
94
86
|
from eodag.config import PluginConfig
|
|
95
87
|
from eodag.types.download_args import DownloadConf
|
|
96
|
-
from eodag.utils import
|
|
88
|
+
from eodag.utils import Unpack
|
|
97
89
|
|
|
98
90
|
logger = logging.getLogger("eodag.download.http")
|
|
99
91
|
|
|
@@ -476,6 +468,8 @@ class HTTPDownload(Download):
|
|
|
476
468
|
if (
|
|
477
469
|
success_status and success_status != status_dict.get("eodag:order_status")
|
|
478
470
|
) or (success_code and success_code != response.status_code):
|
|
471
|
+
# Remove the download link if the order has not been completed or was not successful
|
|
472
|
+
product.properties.pop("eodag:download_link", None)
|
|
479
473
|
return None
|
|
480
474
|
|
|
481
475
|
product.properties["order:status"] = ONLINE_STATUS
|
|
@@ -596,6 +590,7 @@ class HTTPDownload(Download):
|
|
|
596
590
|
product: EOProduct,
|
|
597
591
|
auth: Optional[Union[AuthBase, S3ServiceResource]] = None,
|
|
598
592
|
progress_callback: Optional[ProgressCallback] = None,
|
|
593
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
599
594
|
wait: float = DEFAULT_DOWNLOAD_WAIT,
|
|
600
595
|
timeout: float = DEFAULT_DOWNLOAD_TIMEOUT,
|
|
601
596
|
**kwargs: Unpack[DownloadConf],
|
|
@@ -637,6 +632,7 @@ class HTTPDownload(Download):
|
|
|
637
632
|
record_filename,
|
|
638
633
|
auth,
|
|
639
634
|
progress_callback,
|
|
635
|
+
executor,
|
|
640
636
|
**kwargs,
|
|
641
637
|
)
|
|
642
638
|
if kwargs.get("asset") is None:
|
|
@@ -674,7 +670,7 @@ class HTTPDownload(Download):
|
|
|
674
670
|
is_empty = False
|
|
675
671
|
progress_callback(len(chunk))
|
|
676
672
|
fhandle.write(chunk)
|
|
677
|
-
|
|
673
|
+
product._stream.close() # Closing response stream
|
|
678
674
|
|
|
679
675
|
if is_empty:
|
|
680
676
|
raise DownloadError(f"product {product.properties['id']} is empty")
|
|
@@ -720,7 +716,7 @@ class HTTPDownload(Download):
|
|
|
720
716
|
return product_path
|
|
721
717
|
|
|
722
718
|
def _check_stream_size(self, product: EOProduct) -> int:
|
|
723
|
-
stream_size = int(
|
|
719
|
+
stream_size = int(product._stream.headers.get("content-length", 0))
|
|
724
720
|
if (
|
|
725
721
|
stream_size == 0
|
|
726
722
|
and "order:status" in product.properties
|
|
@@ -731,14 +727,14 @@ class HTTPDownload(Download):
|
|
|
731
727
|
% (
|
|
732
728
|
product.properties["title"],
|
|
733
729
|
product.properties["order:status"],
|
|
734
|
-
|
|
730
|
+
product._stream.reason,
|
|
735
731
|
)
|
|
736
732
|
)
|
|
737
733
|
return stream_size
|
|
738
734
|
|
|
739
735
|
def _check_product_filename(self, product: EOProduct) -> str:
|
|
740
736
|
filename = None
|
|
741
|
-
asset_content_disposition =
|
|
737
|
+
asset_content_disposition = product._stream.headers.get("content-disposition")
|
|
742
738
|
if asset_content_disposition:
|
|
743
739
|
filename = cast(
|
|
744
740
|
Optional[str],
|
|
@@ -746,7 +742,7 @@ class HTTPDownload(Download):
|
|
|
746
742
|
)
|
|
747
743
|
if not filename:
|
|
748
744
|
# default filename extracted from path
|
|
749
|
-
filename = str(os.path.basename(
|
|
745
|
+
filename = str(os.path.basename(product._stream.url))
|
|
750
746
|
filename_extension = os.path.splitext(filename)[1]
|
|
751
747
|
if not filename_extension:
|
|
752
748
|
if content_type := getattr(product, "headers", {}).get("Content-Type"):
|
|
@@ -789,15 +785,20 @@ class HTTPDownload(Download):
|
|
|
789
785
|
not getattr(self.config, "ignore_assets", False)
|
|
790
786
|
or kwargs.get("asset") is not None
|
|
791
787
|
):
|
|
788
|
+
executor = ThreadPoolExecutor(
|
|
789
|
+
max_workers=getattr(self.config, "max_workers", None)
|
|
790
|
+
)
|
|
792
791
|
try:
|
|
793
792
|
assets_values = product.assets.get_values(kwargs.get("asset"))
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
793
|
+
with executor:
|
|
794
|
+
assets_stream_list = self._stream_download_assets(
|
|
795
|
+
product,
|
|
796
|
+
executor,
|
|
797
|
+
auth,
|
|
798
|
+
None,
|
|
799
|
+
assets_values,
|
|
800
|
+
**kwargs,
|
|
801
|
+
)
|
|
801
802
|
|
|
802
803
|
# single asset
|
|
803
804
|
if len(assets_stream_list) == 1:
|
|
@@ -1009,7 +1010,7 @@ class HTTPDownload(Download):
|
|
|
1009
1010
|
|
|
1010
1011
|
s = requests.Session()
|
|
1011
1012
|
try:
|
|
1012
|
-
|
|
1013
|
+
product._stream = s.request(
|
|
1013
1014
|
req_method,
|
|
1014
1015
|
req_url,
|
|
1015
1016
|
stream=True,
|
|
@@ -1024,7 +1025,7 @@ class HTTPDownload(Download):
|
|
|
1024
1025
|
# location is not a valid url -> product is not available yet
|
|
1025
1026
|
raise NotAvailableError("Product is not available yet")
|
|
1026
1027
|
try:
|
|
1027
|
-
|
|
1028
|
+
product._stream.raise_for_status()
|
|
1028
1029
|
except requests.exceptions.Timeout as exc:
|
|
1029
1030
|
raise TimeOutError(exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT) from exc
|
|
1030
1031
|
except RequestException as e:
|
|
@@ -1036,8 +1037,8 @@ class HTTPDownload(Download):
|
|
|
1036
1037
|
# check if product was ordered
|
|
1037
1038
|
|
|
1038
1039
|
if getattr(
|
|
1039
|
-
|
|
1040
|
-
) is not None and
|
|
1040
|
+
product._stream, "status_code", None
|
|
1041
|
+
) is not None and product._stream.status_code == getattr(
|
|
1041
1042
|
self.config, "order_status", {}
|
|
1042
1043
|
).get(
|
|
1043
1044
|
"ordered", {}
|
|
@@ -1048,7 +1049,7 @@ class HTTPDownload(Download):
|
|
|
1048
1049
|
self._process_exception(None, product, ordered_message)
|
|
1049
1050
|
stream_size = self._check_stream_size(product) or None
|
|
1050
1051
|
|
|
1051
|
-
product.headers =
|
|
1052
|
+
product.headers = product._stream.headers
|
|
1052
1053
|
filename = self._check_product_filename(product)
|
|
1053
1054
|
content_type = product.headers.get("Content-Type")
|
|
1054
1055
|
guessed_content_type = (
|
|
@@ -1061,11 +1062,12 @@ class HTTPDownload(Download):
|
|
|
1061
1062
|
product.size = stream_size
|
|
1062
1063
|
|
|
1063
1064
|
product.filename = filename
|
|
1064
|
-
return
|
|
1065
|
+
return product._stream.iter_content(chunk_size=64 * 1024)
|
|
1065
1066
|
|
|
1066
1067
|
def _stream_download_assets(
|
|
1067
1068
|
self,
|
|
1068
1069
|
product: EOProduct,
|
|
1070
|
+
executor: ThreadPoolExecutor,
|
|
1069
1071
|
auth: Optional[AuthBase] = None,
|
|
1070
1072
|
progress_callback: Optional[ProgressCallback] = None,
|
|
1071
1073
|
assets_values: list[Asset] = [],
|
|
@@ -1082,7 +1084,9 @@ class HTTPDownload(Download):
|
|
|
1082
1084
|
self.config, "dl_url_params", {}
|
|
1083
1085
|
)
|
|
1084
1086
|
|
|
1085
|
-
total_size =
|
|
1087
|
+
total_size = (
|
|
1088
|
+
self._get_asset_sizes(assets_values, executor, auth, params) or None
|
|
1089
|
+
)
|
|
1086
1090
|
|
|
1087
1091
|
progress_callback.reset(total=total_size)
|
|
1088
1092
|
|
|
@@ -1188,11 +1192,6 @@ class HTTPDownload(Download):
|
|
|
1188
1192
|
|
|
1189
1193
|
# Process each asset
|
|
1190
1194
|
for asset in assets_values:
|
|
1191
|
-
if not asset["href"] or asset["href"].startswith("file:"):
|
|
1192
|
-
logger.info(
|
|
1193
|
-
f"Local asset detected. Download skipped for {asset['href']}"
|
|
1194
|
-
)
|
|
1195
|
-
continue
|
|
1196
1195
|
asset_chunks = get_chunks_generator(asset)
|
|
1197
1196
|
try:
|
|
1198
1197
|
# start reading chunks to set assets attributes
|
|
@@ -1220,6 +1219,7 @@ class HTTPDownload(Download):
|
|
|
1220
1219
|
record_filename: str,
|
|
1221
1220
|
auth: Optional[AuthBase] = None,
|
|
1222
1221
|
progress_callback: Optional[ProgressCallback] = None,
|
|
1222
|
+
executor: Optional[ThreadPoolExecutor] = None,
|
|
1223
1223
|
**kwargs: Unpack[DownloadConf],
|
|
1224
1224
|
) -> str:
|
|
1225
1225
|
"""Download product assets if they exist"""
|
|
@@ -1227,6 +1227,12 @@ class HTTPDownload(Download):
|
|
|
1227
1227
|
logger.info("Progress bar unavailable, please call product.download()")
|
|
1228
1228
|
progress_callback = ProgressCallback(disable=True)
|
|
1229
1229
|
|
|
1230
|
+
# create an executor if not given and anticipate the possible need to shut it down
|
|
1231
|
+
executor, shutdown_executor = (
|
|
1232
|
+
(ThreadPoolExecutor(), True) if executor is None else (executor, False)
|
|
1233
|
+
)
|
|
1234
|
+
self._config_executor(executor)
|
|
1235
|
+
|
|
1230
1236
|
assets_urls = [
|
|
1231
1237
|
a["href"] for a in getattr(product, "assets", {}).values() if "href" in a
|
|
1232
1238
|
]
|
|
@@ -1236,7 +1242,7 @@ class HTTPDownload(Download):
|
|
|
1236
1242
|
assets_values = product.assets.get_values(kwargs.get("asset"))
|
|
1237
1243
|
|
|
1238
1244
|
assets_stream_list = self._stream_download_assets(
|
|
1239
|
-
product, auth, progress_callback, assets_values
|
|
1245
|
+
product, executor, auth, progress_callback, assets_values, **kwargs
|
|
1240
1246
|
)
|
|
1241
1247
|
|
|
1242
1248
|
# remove existing incomplete file
|
|
@@ -1259,15 +1265,14 @@ class HTTPDownload(Download):
|
|
|
1259
1265
|
local_assets_count += 1
|
|
1260
1266
|
continue
|
|
1261
1267
|
|
|
1262
|
-
|
|
1268
|
+
def download_asset(asset_stream: StreamResponse) -> None:
|
|
1263
1269
|
asset_chunks = asset_stream.content
|
|
1264
1270
|
asset_path = cast(str, asset_stream.arcname)
|
|
1265
1271
|
asset_abs_path = os.path.join(fs_dir_path, asset_path)
|
|
1266
1272
|
asset_abs_path_temp = asset_abs_path + "~"
|
|
1267
1273
|
# create asset subdir if not exist
|
|
1268
1274
|
asset_abs_path_dir = os.path.dirname(asset_abs_path)
|
|
1269
|
-
|
|
1270
|
-
os.makedirs(asset_abs_path_dir)
|
|
1275
|
+
os.makedirs(asset_abs_path_dir, exist_ok=True)
|
|
1271
1276
|
# remove temporary file
|
|
1272
1277
|
if os.path.isfile(asset_abs_path_temp):
|
|
1273
1278
|
os.remove(asset_abs_path_temp)
|
|
@@ -1283,6 +1288,27 @@ class HTTPDownload(Download):
|
|
|
1283
1288
|
os.path.basename(asset_abs_path),
|
|
1284
1289
|
)
|
|
1285
1290
|
os.rename(asset_abs_path_temp, asset_abs_path)
|
|
1291
|
+
return
|
|
1292
|
+
|
|
1293
|
+
# use parallelization if possible
|
|
1294
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
1295
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
1296
|
+
if (
|
|
1297
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
1298
|
+
and executor._max_workers == 1
|
|
1299
|
+
):
|
|
1300
|
+
for asset_stream in assets_stream_list:
|
|
1301
|
+
download_asset(asset_stream)
|
|
1302
|
+
else:
|
|
1303
|
+
futures = (
|
|
1304
|
+
executor.submit(download_asset, asset_stream)
|
|
1305
|
+
for asset_stream in assets_stream_list
|
|
1306
|
+
)
|
|
1307
|
+
[f.result() for f in as_completed(futures)]
|
|
1308
|
+
|
|
1309
|
+
if shutdown_executor:
|
|
1310
|
+
executor.shutdown(wait=True)
|
|
1311
|
+
|
|
1286
1312
|
# only one local asset
|
|
1287
1313
|
if local_assets_count == len(assets_urls) and local_assets_count == 1:
|
|
1288
1314
|
# remove empty {fs_dir_path}
|
|
@@ -1336,6 +1362,7 @@ class HTTPDownload(Download):
|
|
|
1336
1362
|
def _get_asset_sizes(
|
|
1337
1363
|
self,
|
|
1338
1364
|
assets_values: list[Asset],
|
|
1365
|
+
executor: ThreadPoolExecutor,
|
|
1339
1366
|
auth: Optional[AuthBase],
|
|
1340
1367
|
params: Optional[dict[str, str]],
|
|
1341
1368
|
zipped: bool = False,
|
|
@@ -1344,8 +1371,11 @@ class HTTPDownload(Download):
|
|
|
1344
1371
|
|
|
1345
1372
|
timeout = getattr(self.config, "timeout", HTTP_REQ_TIMEOUT)
|
|
1346
1373
|
ssl_verify = getattr(self.config, "ssl_verify", True)
|
|
1347
|
-
|
|
1348
|
-
for
|
|
1374
|
+
|
|
1375
|
+
# loop for assets size & filename in parallel
|
|
1376
|
+
def fetch_asset_size(asset: Asset) -> None:
|
|
1377
|
+
nonlocal total_size
|
|
1378
|
+
|
|
1349
1379
|
if asset["href"] and not asset["href"].startswith("file:"):
|
|
1350
1380
|
# HEAD request for size & filename
|
|
1351
1381
|
try:
|
|
@@ -1407,27 +1437,20 @@ class HTTPDownload(Download):
|
|
|
1407
1437
|
asset.size = int(size_str) if size_str.isdigit() else 0
|
|
1408
1438
|
|
|
1409
1439
|
total_size += asset.size
|
|
1410
|
-
return total_size
|
|
1411
1440
|
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
downloaded_callback=downloaded_callback,
|
|
1429
|
-
progress_callback=progress_callback,
|
|
1430
|
-
wait=wait,
|
|
1431
|
-
timeout=timeout,
|
|
1432
|
-
**kwargs,
|
|
1433
|
-
)
|
|
1441
|
+
# use parallelization if possible
|
|
1442
|
+
# when products are already downloaded in parallel but the executor has only one worker,
|
|
1443
|
+
# we avoid submitting nested tasks to the executor to prevent deadlocks
|
|
1444
|
+
if (
|
|
1445
|
+
executor._thread_name_prefix == "eodag-download-all"
|
|
1446
|
+
and executor._max_workers == 1
|
|
1447
|
+
):
|
|
1448
|
+
for asset in assets_values:
|
|
1449
|
+
fetch_asset_size(asset)
|
|
1450
|
+
else:
|
|
1451
|
+
futures = (
|
|
1452
|
+
executor.submit(fetch_asset_size, asset) for asset in assets_values
|
|
1453
|
+
)
|
|
1454
|
+
[f.result() for f in as_completed(futures)]
|
|
1455
|
+
|
|
1456
|
+
return total_size
|
eodag/plugins/search/base.py
CHANGED
|
@@ -35,6 +35,7 @@ from eodag.plugins.search import PreparedSearch
|
|
|
35
35
|
from eodag.types import model_fields_to_annotated
|
|
36
36
|
from eodag.types.queryables import Queryables, QueryablesDict
|
|
37
37
|
from eodag.types.search_args import SortByList
|
|
38
|
+
from eodag.types.stac_metadata import CommonStacMetadata, create_stac_metadata_model
|
|
38
39
|
from eodag.utils import (
|
|
39
40
|
GENERIC_COLLECTION,
|
|
40
41
|
copy_deepcopy,
|
|
@@ -358,7 +359,7 @@ class Search(PluginTopic):
|
|
|
358
359
|
queryables = self.discover_queryables(**{**default_values, **filters}) or {}
|
|
359
360
|
except NotImplementedError as e:
|
|
360
361
|
if str(e):
|
|
361
|
-
logger.debug(str(e))
|
|
362
|
+
logger.debug("%s, configured metadata-mapping used", str(e))
|
|
362
363
|
queryables = self.queryables_from_metadata_mapping(collection, alias)
|
|
363
364
|
|
|
364
365
|
return QueryablesDict(**queryables)
|
|
@@ -408,9 +409,10 @@ class Search(PluginTopic):
|
|
|
408
409
|
col_queryables = self._get_collection_queryables(col, None, filters)
|
|
409
410
|
all_queryables.update(col_queryables)
|
|
410
411
|
# reset defaults because they may vary between collections
|
|
412
|
+
queryables_fields = Queryables.from_stac_models().model_fields
|
|
411
413
|
for k, v in all_queryables.items():
|
|
412
414
|
v.__metadata__[0].default = getattr(
|
|
413
|
-
|
|
415
|
+
queryables_fields.get(k, Field(None)), "default", None
|
|
414
416
|
)
|
|
415
417
|
return QueryablesDict(
|
|
416
418
|
additional_properties=auto_discovery,
|
|
@@ -468,8 +470,11 @@ class Search(PluginTopic):
|
|
|
468
470
|
):
|
|
469
471
|
del metadata_mapping[param]
|
|
470
472
|
|
|
473
|
+
queryables_model = create_stac_metadata_model(
|
|
474
|
+
base_models=[Queryables, CommonStacMetadata]
|
|
475
|
+
)
|
|
471
476
|
eodag_queryables = copy_deepcopy(
|
|
472
|
-
model_fields_to_annotated(
|
|
477
|
+
model_fields_to_annotated(queryables_model.model_fields)
|
|
473
478
|
)
|
|
474
479
|
queryables["collection"] = eodag_queryables.pop("collection")
|
|
475
480
|
# add default value for collection
|