lsst-resources 29.2025.1700__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +197 -0
- lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
- lsst/resources/_resourceHandles/_httpResourceHandle.py +7 -4
- lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
- lsst/resources/_resourcePath.py +311 -79
- lsst/resources/dav.py +912 -0
- lsst/resources/davutils.py +2659 -0
- lsst/resources/file.py +41 -16
- lsst/resources/gs.py +6 -3
- lsst/resources/http.py +194 -65
- lsst/resources/mem.py +7 -1
- lsst/resources/s3.py +141 -15
- lsst/resources/s3utils.py +8 -1
- lsst/resources/schemeless.py +6 -3
- lsst/resources/tests.py +66 -12
- lsst/resources/utils.py +43 -0
- lsst/resources/version.py +1 -1
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/METADATA +3 -3
- lsst_resources-29.2025.4600.dist-info/RECORD +31 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/WHEEL +1 -1
- lsst_resources-29.2025.1700.dist-info/RECORD +0 -28
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/resources/_resourcePath.py
CHANGED
|
@@ -19,14 +19,15 @@ import copy
|
|
|
19
19
|
import io
|
|
20
20
|
import locale
|
|
21
21
|
import logging
|
|
22
|
-
import multiprocessing
|
|
23
22
|
import os
|
|
24
23
|
import posixpath
|
|
25
24
|
import re
|
|
25
|
+
import sys
|
|
26
26
|
import urllib.parse
|
|
27
|
-
from
|
|
27
|
+
from collections import defaultdict
|
|
28
28
|
from pathlib import Path, PurePath, PurePosixPath
|
|
29
29
|
from random import Random
|
|
30
|
+
from typing import TypeAlias
|
|
30
31
|
|
|
31
32
|
try:
|
|
32
33
|
import fsspec
|
|
@@ -39,7 +40,7 @@ from collections.abc import Iterable, Iterator
|
|
|
39
40
|
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, overload
|
|
40
41
|
|
|
41
42
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
42
|
-
from .utils import get_tempdir
|
|
43
|
+
from .utils import _get_num_workers, get_tempdir
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
45
46
|
from .utils import TransactionProtocol
|
|
@@ -53,49 +54,81 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
|
|
|
53
54
|
# Precomputed escaped hash
|
|
54
55
|
ESCAPED_HASH = urllib.parse.quote("#")
|
|
55
56
|
|
|
56
|
-
# Maximum number of worker threads for parallelized operations.
|
|
57
|
-
# If greater than 10, be aware that this number has to be consistent
|
|
58
|
-
# with connection pool sizing (for example in urllib3).
|
|
59
|
-
MAX_WORKERS = 10
|
|
60
57
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"""Report on a bulk transfer."""
|
|
58
|
+
class MBulkResult(NamedTuple):
|
|
59
|
+
"""Report on a bulk operation."""
|
|
64
60
|
|
|
65
61
|
success: bool
|
|
66
62
|
exception: Exception | None
|
|
67
63
|
|
|
68
64
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
65
|
+
_EXECUTOR_TYPE: TypeAlias = type[
|
|
66
|
+
concurrent.futures.ThreadPoolExecutor | concurrent.futures.ProcessPoolExecutor
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
# Cache value for executor class so as not to issue warning multiple
|
|
70
|
+
# times but still allow tests to override the value.
|
|
71
|
+
_POOL_EXECUTOR_CLASS: _EXECUTOR_TYPE | None = None
|
|
76
72
|
|
|
77
73
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
f"""Calculate the number of workers to use.
|
|
74
|
+
def _get_executor_class() -> _EXECUTOR_TYPE:
|
|
75
|
+
"""Return the executor class used for parallelized execution.
|
|
81
76
|
|
|
82
77
|
Returns
|
|
83
78
|
-------
|
|
84
|
-
|
|
85
|
-
The
|
|
86
|
-
|
|
87
|
-
|
|
79
|
+
cls : `concurrent.futures.Executor`
|
|
80
|
+
The ``Executor`` class. Default is
|
|
81
|
+
`concurrent.futures.ThreadPoolExecutor`. Can be set explicitly by
|
|
82
|
+
setting the ``$LSST_RESOURCES_EXECUTOR`` environment variable to
|
|
83
|
+
"thread" or "process". Returns "thread" pool if the value of the
|
|
84
|
+
variable is not recognized.
|
|
85
|
+
"""
|
|
86
|
+
global _POOL_EXECUTOR_CLASS
|
|
87
|
+
|
|
88
|
+
if _POOL_EXECUTOR_CLASS is not None:
|
|
89
|
+
return _POOL_EXECUTOR_CLASS
|
|
90
|
+
|
|
91
|
+
pool_executor_classes = {
|
|
92
|
+
"threads": concurrent.futures.ThreadPoolExecutor,
|
|
93
|
+
"process": concurrent.futures.ProcessPoolExecutor,
|
|
94
|
+
}
|
|
95
|
+
default_executor = "threads"
|
|
96
|
+
external = os.getenv("LSST_RESOURCES_EXECUTOR", default_executor)
|
|
97
|
+
if not external:
|
|
98
|
+
external = default_executor
|
|
99
|
+
if external not in pool_executor_classes:
|
|
100
|
+
log.warning(
|
|
101
|
+
"Unrecognized value of '%s' for LSST_RESOURCES_EXECUTOR env var. Using '%s'",
|
|
102
|
+
external,
|
|
103
|
+
default_executor,
|
|
104
|
+
)
|
|
105
|
+
external = default_executor
|
|
106
|
+
_POOL_EXECUTOR_CLASS = pool_executor_classes[external]
|
|
107
|
+
return _POOL_EXECUTOR_CLASS
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@contextlib.contextmanager
|
|
111
|
+
def _patch_environ(new_values: dict[str, str]) -> Iterator[None]:
|
|
112
|
+
"""Patch os.environ temporarily using the supplied values.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
new_values : `dict` [ `str`, `str` ]
|
|
117
|
+
New values to be stored in the environment.
|
|
88
118
|
"""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if cpu_limit is not None:
|
|
95
|
-
num_workers = cpu_limit + 2
|
|
119
|
+
old_values: dict[str, str] = {}
|
|
120
|
+
for k, v in new_values.items():
|
|
121
|
+
if k in os.environ:
|
|
122
|
+
old_values[k] = os.environ[k]
|
|
123
|
+
os.environ[k] = v
|
|
96
124
|
|
|
97
|
-
|
|
98
|
-
|
|
125
|
+
try:
|
|
126
|
+
yield
|
|
127
|
+
finally:
|
|
128
|
+
for k in new_values:
|
|
129
|
+
del os.environ[k]
|
|
130
|
+
if k in old_values:
|
|
131
|
+
os.environ[k] = old_values[k]
|
|
99
132
|
|
|
100
133
|
|
|
101
134
|
class ResourcePath: # numpydoc ignore=PR02
|
|
@@ -336,9 +369,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
336
369
|
|
|
337
370
|
subclass = HttpResourcePath
|
|
338
371
|
elif parsed.scheme in {"dav", "davs"}:
|
|
339
|
-
from .
|
|
372
|
+
from .dav import DavResourcePath
|
|
340
373
|
|
|
341
|
-
subclass =
|
|
374
|
+
subclass = DavResourcePath
|
|
342
375
|
elif parsed.scheme == "gs":
|
|
343
376
|
from .gs import GSResourcePath
|
|
344
377
|
|
|
@@ -517,7 +550,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
517
550
|
return self, ""
|
|
518
551
|
|
|
519
552
|
head, tail = self._pathModule.split(self.path)
|
|
520
|
-
headuri = self._uri._replace(path=head)
|
|
553
|
+
headuri = self._uri._replace(path=head, fragment="", query="", params="")
|
|
521
554
|
|
|
522
555
|
# The file part should never include quoted metacharacters
|
|
523
556
|
tail = urllib.parse.unquote(tail)
|
|
@@ -587,7 +620,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
587
620
|
# regardless of the presence of a trailing separator
|
|
588
621
|
originalPath = self._pathLib(self.path)
|
|
589
622
|
parentPath = originalPath.parent
|
|
590
|
-
return self.replace(path=str(parentPath), forceDirectory=True)
|
|
623
|
+
return self.replace(path=str(parentPath), forceDirectory=True, fragment="", query="", params="")
|
|
591
624
|
|
|
592
625
|
def replace(
|
|
593
626
|
self, forceDirectory: bool | None = None, isTemporary: bool = False, **kwargs: Any
|
|
@@ -617,9 +650,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
617
650
|
# Disallow a change in scheme
|
|
618
651
|
if "scheme" in kwargs:
|
|
619
652
|
raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
|
|
620
|
-
|
|
653
|
+
result = self.__class__(
|
|
621
654
|
self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
|
|
622
655
|
)
|
|
656
|
+
result._copy_extra_attributes(self)
|
|
657
|
+
return result
|
|
623
658
|
|
|
624
659
|
def updatedFile(self, newfile: str) -> ResourcePath:
|
|
625
660
|
"""Return new URI with an updated final component of the path.
|
|
@@ -832,9 +867,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
832
867
|
forceDirectory=forceDirectory,
|
|
833
868
|
isTemporary=isTemporary,
|
|
834
869
|
fragment=path_uri.fragment,
|
|
870
|
+
query=path_uri.query,
|
|
871
|
+
params=path_uri.params,
|
|
835
872
|
)
|
|
836
873
|
|
|
837
|
-
def relative_to(self, other: ResourcePath) -> str | None:
|
|
874
|
+
def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
|
|
838
875
|
"""Return the relative path from this URI to the other URI.
|
|
839
876
|
|
|
840
877
|
Parameters
|
|
@@ -842,6 +879,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
842
879
|
other : `ResourcePath`
|
|
843
880
|
URI to use to calculate the relative path. Must be a parent
|
|
844
881
|
of this URI.
|
|
882
|
+
walk_up : `bool`, optional
|
|
883
|
+
Control whether "``..``" can be used to resolve a relative path.
|
|
884
|
+
Default is `False`. Can not be `True` on Python version 3.11.
|
|
845
885
|
|
|
846
886
|
Returns
|
|
847
887
|
-------
|
|
@@ -860,11 +900,22 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
860
900
|
if not {self.netloc, other.netloc}.issubset(local_netlocs):
|
|
861
901
|
return None
|
|
862
902
|
|
|
903
|
+
# Rather than trying to guess a failure reason from the TypeError
|
|
904
|
+
# explicitly check for python 3.11. Doing this will simplify the
|
|
905
|
+
# rediscovery of a useless python version check when we set a new
|
|
906
|
+
# minimum version.
|
|
907
|
+
kwargs = {}
|
|
908
|
+
if walk_up:
|
|
909
|
+
if sys.version_info < (3, 12, 0):
|
|
910
|
+
raise TypeError("walk_up parameter can not be true in python 3.11 and older")
|
|
911
|
+
|
|
912
|
+
kwargs["walk_up"] = True
|
|
913
|
+
|
|
863
914
|
enclosed_path = self._pathLib(self.relativeToPathRoot)
|
|
864
915
|
parent_path = other.relativeToPathRoot
|
|
865
916
|
subpath: str | None
|
|
866
917
|
try:
|
|
867
|
-
subpath = str(enclosed_path.relative_to(parent_path))
|
|
918
|
+
subpath = str(enclosed_path.relative_to(parent_path, **kwargs))
|
|
868
919
|
except ValueError:
|
|
869
920
|
subpath = None
|
|
870
921
|
else:
|
|
@@ -882,51 +933,101 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
882
933
|
raise NotImplementedError()
|
|
883
934
|
|
|
884
935
|
@classmethod
|
|
885
|
-
def
|
|
936
|
+
def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
|
|
937
|
+
"""Group URIs by class/scheme."""
|
|
938
|
+
grouped: dict[type, list[ResourcePath]] = defaultdict(list)
|
|
939
|
+
for uri in uris:
|
|
940
|
+
grouped[uri.__class__].append(uri)
|
|
941
|
+
return grouped
|
|
942
|
+
|
|
943
|
+
@classmethod
|
|
944
|
+
def mexists(
|
|
945
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
946
|
+
) -> dict[ResourcePath, bool]:
|
|
886
947
|
"""Check for existence of multiple URIs at once.
|
|
887
948
|
|
|
888
949
|
Parameters
|
|
889
950
|
----------
|
|
890
951
|
uris : iterable of `ResourcePath`
|
|
891
952
|
The URIs to test.
|
|
953
|
+
num_workers : `int` or `None`, optional
|
|
954
|
+
The number of parallel workers to use when checking for existence
|
|
955
|
+
If `None`, the default value will be taken from the environment.
|
|
956
|
+
If this number is higher than the default and a thread pool is
|
|
957
|
+
used, there may not be enough cached connections available.
|
|
892
958
|
|
|
893
959
|
Returns
|
|
894
960
|
-------
|
|
895
961
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
896
962
|
Mapping of original URI to boolean indicating existence.
|
|
897
963
|
"""
|
|
898
|
-
# Group by scheme to allow a subclass to be able to use
|
|
899
|
-
# specialized implementations.
|
|
900
|
-
grouped: dict[type, list[ResourcePath]] = {}
|
|
901
|
-
for uri in uris:
|
|
902
|
-
uri_class = uri.__class__
|
|
903
|
-
if uri_class not in grouped:
|
|
904
|
-
grouped[uri_class] = []
|
|
905
|
-
grouped[uri_class].append(uri)
|
|
906
|
-
|
|
907
964
|
existence: dict[ResourcePath, bool] = {}
|
|
908
|
-
for uri_class in
|
|
909
|
-
existence.update(uri_class._mexists(
|
|
965
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
966
|
+
existence.update(uri_class._mexists(group, num_workers=num_workers))
|
|
910
967
|
|
|
911
968
|
return existence
|
|
912
969
|
|
|
913
970
|
@classmethod
|
|
914
|
-
def _mexists(
|
|
971
|
+
def _mexists(
|
|
972
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
973
|
+
) -> dict[ResourcePath, bool]:
|
|
915
974
|
"""Check for existence of multiple URIs at once.
|
|
916
975
|
|
|
917
976
|
Implementation helper method for `mexists`.
|
|
918
977
|
|
|
978
|
+
|
|
919
979
|
Parameters
|
|
920
980
|
----------
|
|
921
981
|
uris : iterable of `ResourcePath`
|
|
922
982
|
The URIs to test.
|
|
983
|
+
num_workers : `int` or `None`, optional
|
|
984
|
+
The number of parallel workers to use when checking for existence
|
|
985
|
+
If `None`, the default value will be taken from the environment.
|
|
923
986
|
|
|
924
987
|
Returns
|
|
925
988
|
-------
|
|
926
989
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
927
990
|
Mapping of original URI to boolean indicating existence.
|
|
928
991
|
"""
|
|
929
|
-
|
|
992
|
+
pool_executor_class = _get_executor_class()
|
|
993
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
994
|
+
# Patch the environment to make it think there is only one worker
|
|
995
|
+
# for each subprocess.
|
|
996
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
997
|
+
return cls._mexists_pool(pool_executor_class, uris)
|
|
998
|
+
else:
|
|
999
|
+
return cls._mexists_pool(pool_executor_class, uris, num_workers=num_workers)
|
|
1000
|
+
|
|
1001
|
+
@classmethod
|
|
1002
|
+
def _mexists_pool(
|
|
1003
|
+
cls,
|
|
1004
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1005
|
+
uris: Iterable[ResourcePath],
|
|
1006
|
+
*,
|
|
1007
|
+
num_workers: int | None = None,
|
|
1008
|
+
) -> dict[ResourcePath, bool]:
|
|
1009
|
+
"""Check for existence of multiple URIs at once using specified pool
|
|
1010
|
+
executor.
|
|
1011
|
+
|
|
1012
|
+
Implementation helper method for `_mexists`.
|
|
1013
|
+
|
|
1014
|
+
Parameters
|
|
1015
|
+
----------
|
|
1016
|
+
pool_executor_class : `type` [ `concurrent.futures.Executor` ]
|
|
1017
|
+
Type of executor pool to use.
|
|
1018
|
+
uris : iterable of `ResourcePath`
|
|
1019
|
+
The URIs to test.
|
|
1020
|
+
num_workers : `int` or `None`, optional
|
|
1021
|
+
The number of parallel workers to use when checking for existence
|
|
1022
|
+
If `None`, the default value will be taken from the environment.
|
|
1023
|
+
|
|
1024
|
+
Returns
|
|
1025
|
+
-------
|
|
1026
|
+
existence : `dict` of [`ResourcePath`, `bool`]
|
|
1027
|
+
Mapping of original URI to boolean indicating existence.
|
|
1028
|
+
"""
|
|
1029
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1030
|
+
with pool_executor_class(max_workers=max_workers) as exists_executor:
|
|
930
1031
|
future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
|
|
931
1032
|
|
|
932
1033
|
results: dict[ResourcePath, bool] = {}
|
|
@@ -947,7 +1048,68 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
947
1048
|
overwrite: bool = False,
|
|
948
1049
|
transaction: TransactionProtocol | None = None,
|
|
949
1050
|
do_raise: bool = True,
|
|
950
|
-
) -> dict[ResourcePath,
|
|
1051
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1052
|
+
"""Transfer many files in bulk.
|
|
1053
|
+
|
|
1054
|
+
Parameters
|
|
1055
|
+
----------
|
|
1056
|
+
transfer : `str`
|
|
1057
|
+
Mode to use for transferring the resource. Generically there are
|
|
1058
|
+
many standard options: copy, link, symlink, hardlink, relsymlink.
|
|
1059
|
+
Not all URIs support all modes.
|
|
1060
|
+
from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
|
|
1061
|
+
A sequence of the source URIs and the target URIs.
|
|
1062
|
+
overwrite : `bool`, optional
|
|
1063
|
+
Allow an existing file to be overwritten. Defaults to `False`.
|
|
1064
|
+
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
1065
|
+
A transaction object that can (depending on implementation)
|
|
1066
|
+
rollback transfers on error. Not guaranteed to be implemented.
|
|
1067
|
+
The transaction object must be thread safe.
|
|
1068
|
+
do_raise : `bool`, optional
|
|
1069
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1070
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1071
|
+
there were no exceptions, a dict reporting the status of each
|
|
1072
|
+
`ResourcePath` will be returned.
|
|
1073
|
+
|
|
1074
|
+
Returns
|
|
1075
|
+
-------
|
|
1076
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1077
|
+
A dict of all the transfer attempts with a value indicating
|
|
1078
|
+
whether the transfer succeeded for the target URI. If ``do_raise``
|
|
1079
|
+
is `True`, this will only be returned if there are no errors.
|
|
1080
|
+
"""
|
|
1081
|
+
pool_executor_class = _get_executor_class()
|
|
1082
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1083
|
+
# Patch the environment to make it think there is only one worker
|
|
1084
|
+
# for each subprocess.
|
|
1085
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1086
|
+
return cls._mtransfer(
|
|
1087
|
+
pool_executor_class,
|
|
1088
|
+
transfer,
|
|
1089
|
+
from_to,
|
|
1090
|
+
overwrite=overwrite,
|
|
1091
|
+
transaction=transaction,
|
|
1092
|
+
do_raise=do_raise,
|
|
1093
|
+
)
|
|
1094
|
+
return cls._mtransfer(
|
|
1095
|
+
pool_executor_class,
|
|
1096
|
+
transfer,
|
|
1097
|
+
from_to,
|
|
1098
|
+
overwrite=overwrite,
|
|
1099
|
+
transaction=transaction,
|
|
1100
|
+
do_raise=do_raise,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
@classmethod
|
|
1104
|
+
def _mtransfer(
|
|
1105
|
+
cls,
|
|
1106
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1107
|
+
transfer: str,
|
|
1108
|
+
from_to: Iterable[tuple[ResourcePath, ResourcePath]],
|
|
1109
|
+
overwrite: bool = False,
|
|
1110
|
+
transaction: TransactionProtocol | None = None,
|
|
1111
|
+
do_raise: bool = True,
|
|
1112
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
951
1113
|
"""Transfer many files in bulk.
|
|
952
1114
|
|
|
953
1115
|
Parameters
|
|
@@ -971,11 +1133,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
971
1133
|
|
|
972
1134
|
Returns
|
|
973
1135
|
-------
|
|
974
|
-
copy_status : `dict` [ `ResourcePath`, `
|
|
1136
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
975
1137
|
A dict of all the transfer attempts with a value indicating
|
|
976
1138
|
whether the transfer succeeded for the target URI.
|
|
977
1139
|
"""
|
|
978
|
-
with
|
|
1140
|
+
with pool_executor_class(max_workers=_get_num_workers()) as transfer_executor:
|
|
979
1141
|
future_transfers = {
|
|
980
1142
|
transfer_executor.submit(
|
|
981
1143
|
to_uri.transfer_from,
|
|
@@ -987,17 +1149,17 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
987
1149
|
): to_uri
|
|
988
1150
|
for from_uri, to_uri in from_to
|
|
989
1151
|
}
|
|
990
|
-
results: dict[ResourcePath,
|
|
1152
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
991
1153
|
failed = False
|
|
992
1154
|
for future in concurrent.futures.as_completed(future_transfers):
|
|
993
1155
|
to_uri = future_transfers[future]
|
|
994
1156
|
try:
|
|
995
1157
|
future.result()
|
|
996
1158
|
except Exception as e:
|
|
997
|
-
transferred =
|
|
1159
|
+
transferred = MBulkResult(False, e)
|
|
998
1160
|
failed = True
|
|
999
1161
|
else:
|
|
1000
|
-
transferred =
|
|
1162
|
+
transferred = MBulkResult(True, None)
|
|
1001
1163
|
results[to_uri] = transferred
|
|
1002
1164
|
|
|
1003
1165
|
if do_raise and failed:
|
|
@@ -1012,6 +1174,81 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1012
1174
|
"""Remove the resource."""
|
|
1013
1175
|
raise NotImplementedError()
|
|
1014
1176
|
|
|
1177
|
+
@classmethod
|
|
1178
|
+
def mremove(
|
|
1179
|
+
cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
|
|
1180
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1181
|
+
"""Remove multiple URIs at once.
|
|
1182
|
+
|
|
1183
|
+
Parameters
|
|
1184
|
+
----------
|
|
1185
|
+
uris : iterable of `ResourcePath`
|
|
1186
|
+
URIs to remove.
|
|
1187
|
+
do_raise : `bool`, optional
|
|
1188
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1189
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1190
|
+
there were no exceptions, a dict reporting the status of each
|
|
1191
|
+
`ResourcePath` will be returned.
|
|
1192
|
+
|
|
1193
|
+
Returns
|
|
1194
|
+
-------
|
|
1195
|
+
results : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1196
|
+
Dictionary mapping each URI to a result object indicating whether
|
|
1197
|
+
the removal succeeded or resulted in an exception. If ``do_raise``
|
|
1198
|
+
is `True` this will only be returned if everything succeeded.
|
|
1199
|
+
"""
|
|
1200
|
+
# Group URIs by scheme since some URI schemes support native bulk
|
|
1201
|
+
# APIs.
|
|
1202
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1203
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
1204
|
+
results.update(uri_class._mremove(group))
|
|
1205
|
+
if do_raise:
|
|
1206
|
+
failed = any(not r.success for r in results.values())
|
|
1207
|
+
if failed:
|
|
1208
|
+
s = "s" if len(results) != 1 else ""
|
|
1209
|
+
raise ExceptionGroup(
|
|
1210
|
+
f"Error{s} removing {len(results)} artifact{s}",
|
|
1211
|
+
tuple(res.exception for res in results.values() if res.exception is not None),
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
return results
|
|
1215
|
+
|
|
1216
|
+
@classmethod
|
|
1217
|
+
def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
|
|
1218
|
+
"""Remove multiple URIs using futures."""
|
|
1219
|
+
pool_executor_class = _get_executor_class()
|
|
1220
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1221
|
+
# Patch the environment to make it think there is only one worker
|
|
1222
|
+
# for each subprocess.
|
|
1223
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1224
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1225
|
+
else:
|
|
1226
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1227
|
+
|
|
1228
|
+
@classmethod
|
|
1229
|
+
def _mremove_pool(
|
|
1230
|
+
cls,
|
|
1231
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1232
|
+
uris: Iterable[ResourcePath],
|
|
1233
|
+
*,
|
|
1234
|
+
num_workers: int | None = None,
|
|
1235
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1236
|
+
"""Remove URIs using a futures pool."""
|
|
1237
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1238
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1239
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
1240
|
+
future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
|
|
1241
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
1242
|
+
try:
|
|
1243
|
+
future.result()
|
|
1244
|
+
except Exception as e:
|
|
1245
|
+
removed = MBulkResult(False, e)
|
|
1246
|
+
else:
|
|
1247
|
+
removed = MBulkResult(True, None)
|
|
1248
|
+
uri = future_remove[future]
|
|
1249
|
+
results[uri] = removed
|
|
1250
|
+
return results
|
|
1251
|
+
|
|
1015
1252
|
def isabs(self) -> bool:
|
|
1016
1253
|
"""Indicate that the resource is fully specified.
|
|
1017
1254
|
|
|
@@ -1035,7 +1272,10 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1035
1272
|
"""
|
|
1036
1273
|
return self
|
|
1037
1274
|
|
|
1038
|
-
|
|
1275
|
+
@contextlib.contextmanager
|
|
1276
|
+
def _as_local(
|
|
1277
|
+
self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
|
|
1278
|
+
) -> Iterator[ResourcePath]:
|
|
1039
1279
|
"""Return the location of the (possibly remote) resource as local file.
|
|
1040
1280
|
|
|
1041
1281
|
This is a helper function for `as_local` context manager.
|
|
@@ -1054,13 +1294,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1054
1294
|
|
|
1055
1295
|
Returns
|
|
1056
1296
|
-------
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
For a local resource this should be the actual path to the
|
|
1061
|
-
resource.
|
|
1062
|
-
is_temporary : `bool`
|
|
1063
|
-
Indicates if the local path is a temporary file or not.
|
|
1297
|
+
local_uri : `ResourcePath`
|
|
1298
|
+
A URI to a local POSIX file. This can either be the same resource
|
|
1299
|
+
or a local downloaded copy of the resource.
|
|
1064
1300
|
"""
|
|
1065
1301
|
raise NotImplementedError()
|
|
1066
1302
|
|
|
@@ -1110,18 +1346,8 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1110
1346
|
temp_dir = ResourcePath(tmpdir, forceDirectory=True) if tmpdir is not None else None
|
|
1111
1347
|
if temp_dir is not None and not temp_dir.isLocal:
|
|
1112
1348
|
raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
|
|
1113
|
-
|
|
1114
|
-
local_uri = ResourcePath(local_src, isTemporary=is_temporary)
|
|
1115
|
-
|
|
1116
|
-
try:
|
|
1349
|
+
with self._as_local(multithreaded=multithreaded, tmpdir=temp_dir) as local_uri:
|
|
1117
1350
|
yield local_uri
|
|
1118
|
-
finally:
|
|
1119
|
-
# The caller might have relocated the temporary file.
|
|
1120
|
-
# Do not ever delete if the temporary matches self
|
|
1121
|
-
# (since it may have been that a temporary file was made local
|
|
1122
|
-
# but already was local).
|
|
1123
|
-
if self != local_uri and is_temporary and local_uri.exists():
|
|
1124
|
-
local_uri.remove()
|
|
1125
1351
|
|
|
1126
1352
|
@classmethod
|
|
1127
1353
|
@contextlib.contextmanager
|
|
@@ -1685,6 +1911,12 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1685
1911
|
"""
|
|
1686
1912
|
raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
|
|
1687
1913
|
|
|
1914
|
+
def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
|
|
1915
|
+
# May be overridden by subclasses to transfer attributes when a
|
|
1916
|
+
# ResourcePath is constructed using the "clone" version of the
|
|
1917
|
+
# ResourcePath constructor by passing in a ResourcePath object.
|
|
1918
|
+
pass
|
|
1919
|
+
|
|
1688
1920
|
|
|
1689
1921
|
ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
|
|
1690
1922
|
"""Type-annotation alias for objects that can be coerced to ResourcePath.
|