lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +197 -0
- lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
- lsst/resources/_resourceHandles/_httpResourceHandle.py +16 -2
- lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
- lsst/resources/_resourcePath.py +448 -81
- lsst/resources/dav.py +912 -0
- lsst/resources/davutils.py +2659 -0
- lsst/resources/file.py +97 -57
- lsst/resources/gs.py +11 -4
- lsst/resources/http.py +229 -62
- lsst/resources/mem.py +7 -1
- lsst/resources/packageresource.py +13 -2
- lsst/resources/s3.py +174 -17
- lsst/resources/s3utils.py +8 -1
- lsst/resources/schemeless.py +6 -3
- lsst/resources/tests.py +140 -12
- lsst/resources/utils.py +74 -1
- lsst/resources/version.py +1 -1
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/METADATA +3 -3
- lsst_resources-29.2025.4600.dist-info/RECORD +31 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/WHEEL +1 -1
- lsst_resources-29.0.0rc7.dist-info/RECORD +0 -28
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/resources/_resourcePath.py
CHANGED
|
@@ -22,11 +22,12 @@ import logging
|
|
|
22
22
|
import os
|
|
23
23
|
import posixpath
|
|
24
24
|
import re
|
|
25
|
-
import
|
|
26
|
-
import tempfile
|
|
25
|
+
import sys
|
|
27
26
|
import urllib.parse
|
|
27
|
+
from collections import defaultdict
|
|
28
28
|
from pathlib import Path, PurePath, PurePosixPath
|
|
29
29
|
from random import Random
|
|
30
|
+
from typing import TypeAlias
|
|
30
31
|
|
|
31
32
|
try:
|
|
32
33
|
import fsspec
|
|
@@ -36,10 +37,10 @@ except ImportError:
|
|
|
36
37
|
AbstractFileSystem = type
|
|
37
38
|
|
|
38
39
|
from collections.abc import Iterable, Iterator
|
|
39
|
-
from typing import TYPE_CHECKING, Any, Literal, overload
|
|
40
|
+
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, overload
|
|
40
41
|
|
|
41
42
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
42
|
-
from .utils import
|
|
43
|
+
from .utils import _get_num_workers, get_tempdir
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
45
46
|
from .utils import TransactionProtocol
|
|
@@ -53,10 +54,81 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
|
|
|
53
54
|
# Precomputed escaped hash
|
|
54
55
|
ESCAPED_HASH = urllib.parse.quote("#")
|
|
55
56
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
|
|
58
|
+
class MBulkResult(NamedTuple):
|
|
59
|
+
"""Report on a bulk operation."""
|
|
60
|
+
|
|
61
|
+
success: bool
|
|
62
|
+
exception: Exception | None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_EXECUTOR_TYPE: TypeAlias = type[
|
|
66
|
+
concurrent.futures.ThreadPoolExecutor | concurrent.futures.ProcessPoolExecutor
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
# Cache value for executor class so as not to issue warning multiple
|
|
70
|
+
# times but still allow tests to override the value.
|
|
71
|
+
_POOL_EXECUTOR_CLASS: _EXECUTOR_TYPE | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get_executor_class() -> _EXECUTOR_TYPE:
|
|
75
|
+
"""Return the executor class used for parallelized execution.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
cls : `concurrent.futures.Executor`
|
|
80
|
+
The ``Executor`` class. Default is
|
|
81
|
+
`concurrent.futures.ThreadPoolExecutor`. Can be set explicitly by
|
|
82
|
+
setting the ``$LSST_RESOURCES_EXECUTOR`` environment variable to
|
|
83
|
+
"thread" or "process". Returns "thread" pool if the value of the
|
|
84
|
+
variable is not recognized.
|
|
85
|
+
"""
|
|
86
|
+
global _POOL_EXECUTOR_CLASS
|
|
87
|
+
|
|
88
|
+
if _POOL_EXECUTOR_CLASS is not None:
|
|
89
|
+
return _POOL_EXECUTOR_CLASS
|
|
90
|
+
|
|
91
|
+
pool_executor_classes = {
|
|
92
|
+
"threads": concurrent.futures.ThreadPoolExecutor,
|
|
93
|
+
"process": concurrent.futures.ProcessPoolExecutor,
|
|
94
|
+
}
|
|
95
|
+
default_executor = "threads"
|
|
96
|
+
external = os.getenv("LSST_RESOURCES_EXECUTOR", default_executor)
|
|
97
|
+
if not external:
|
|
98
|
+
external = default_executor
|
|
99
|
+
if external not in pool_executor_classes:
|
|
100
|
+
log.warning(
|
|
101
|
+
"Unrecognized value of '%s' for LSST_RESOURCES_EXECUTOR env var. Using '%s'",
|
|
102
|
+
external,
|
|
103
|
+
default_executor,
|
|
104
|
+
)
|
|
105
|
+
external = default_executor
|
|
106
|
+
_POOL_EXECUTOR_CLASS = pool_executor_classes[external]
|
|
107
|
+
return _POOL_EXECUTOR_CLASS
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@contextlib.contextmanager
|
|
111
|
+
def _patch_environ(new_values: dict[str, str]) -> Iterator[None]:
|
|
112
|
+
"""Patch os.environ temporarily using the supplied values.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
new_values : `dict` [ `str`, `str` ]
|
|
117
|
+
New values to be stored in the environment.
|
|
118
|
+
"""
|
|
119
|
+
old_values: dict[str, str] = {}
|
|
120
|
+
for k, v in new_values.items():
|
|
121
|
+
if k in os.environ:
|
|
122
|
+
old_values[k] = os.environ[k]
|
|
123
|
+
os.environ[k] = v
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
yield
|
|
127
|
+
finally:
|
|
128
|
+
for k in new_values:
|
|
129
|
+
del os.environ[k]
|
|
130
|
+
if k in old_values:
|
|
131
|
+
os.environ[k] = old_values[k]
|
|
60
132
|
|
|
61
133
|
|
|
62
134
|
class ResourcePath: # numpydoc ignore=PR02
|
|
@@ -296,6 +368,10 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
296
368
|
from .http import HttpResourcePath
|
|
297
369
|
|
|
298
370
|
subclass = HttpResourcePath
|
|
371
|
+
elif parsed.scheme in {"dav", "davs"}:
|
|
372
|
+
from .dav import DavResourcePath
|
|
373
|
+
|
|
374
|
+
subclass = DavResourcePath
|
|
299
375
|
elif parsed.scheme == "gs":
|
|
300
376
|
from .gs import GSResourcePath
|
|
301
377
|
|
|
@@ -474,7 +550,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
474
550
|
return self, ""
|
|
475
551
|
|
|
476
552
|
head, tail = self._pathModule.split(self.path)
|
|
477
|
-
headuri = self._uri._replace(path=head)
|
|
553
|
+
headuri = self._uri._replace(path=head, fragment="", query="", params="")
|
|
478
554
|
|
|
479
555
|
# The file part should never include quoted metacharacters
|
|
480
556
|
tail = urllib.parse.unquote(tail)
|
|
@@ -544,7 +620,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
544
620
|
# regardless of the presence of a trailing separator
|
|
545
621
|
originalPath = self._pathLib(self.path)
|
|
546
622
|
parentPath = originalPath.parent
|
|
547
|
-
return self.replace(path=str(parentPath), forceDirectory=True)
|
|
623
|
+
return self.replace(path=str(parentPath), forceDirectory=True, fragment="", query="", params="")
|
|
548
624
|
|
|
549
625
|
def replace(
|
|
550
626
|
self, forceDirectory: bool | None = None, isTemporary: bool = False, **kwargs: Any
|
|
@@ -574,9 +650,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
574
650
|
# Disallow a change in scheme
|
|
575
651
|
if "scheme" in kwargs:
|
|
576
652
|
raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
|
|
577
|
-
|
|
653
|
+
result = self.__class__(
|
|
578
654
|
self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
|
|
579
655
|
)
|
|
656
|
+
result._copy_extra_attributes(self)
|
|
657
|
+
return result
|
|
580
658
|
|
|
581
659
|
def updatedFile(self, newfile: str) -> ResourcePath:
|
|
582
660
|
"""Return new URI with an updated final component of the path.
|
|
@@ -789,9 +867,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
789
867
|
forceDirectory=forceDirectory,
|
|
790
868
|
isTemporary=isTemporary,
|
|
791
869
|
fragment=path_uri.fragment,
|
|
870
|
+
query=path_uri.query,
|
|
871
|
+
params=path_uri.params,
|
|
792
872
|
)
|
|
793
873
|
|
|
794
|
-
def relative_to(self, other: ResourcePath) -> str | None:
|
|
874
|
+
def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
|
|
795
875
|
"""Return the relative path from this URI to the other URI.
|
|
796
876
|
|
|
797
877
|
Parameters
|
|
@@ -799,6 +879,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
799
879
|
other : `ResourcePath`
|
|
800
880
|
URI to use to calculate the relative path. Must be a parent
|
|
801
881
|
of this URI.
|
|
882
|
+
walk_up : `bool`, optional
|
|
883
|
+
Control whether "``..``" can be used to resolve a relative path.
|
|
884
|
+
Default is `False`. Can not be `True` on Python version 3.11.
|
|
802
885
|
|
|
803
886
|
Returns
|
|
804
887
|
-------
|
|
@@ -817,11 +900,22 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
817
900
|
if not {self.netloc, other.netloc}.issubset(local_netlocs):
|
|
818
901
|
return None
|
|
819
902
|
|
|
903
|
+
# Rather than trying to guess a failure reason from the TypeError
|
|
904
|
+
# explicitly check for python 3.11. Doing this will simplify the
|
|
905
|
+
# rediscovery of a useless python version check when we set a new
|
|
906
|
+
# minimum version.
|
|
907
|
+
kwargs = {}
|
|
908
|
+
if walk_up:
|
|
909
|
+
if sys.version_info < (3, 12, 0):
|
|
910
|
+
raise TypeError("walk_up parameter can not be true in python 3.11 and older")
|
|
911
|
+
|
|
912
|
+
kwargs["walk_up"] = True
|
|
913
|
+
|
|
820
914
|
enclosed_path = self._pathLib(self.relativeToPathRoot)
|
|
821
915
|
parent_path = other.relativeToPathRoot
|
|
822
916
|
subpath: str | None
|
|
823
917
|
try:
|
|
824
|
-
subpath = str(enclosed_path.relative_to(parent_path))
|
|
918
|
+
subpath = str(enclosed_path.relative_to(parent_path, **kwargs))
|
|
825
919
|
except ValueError:
|
|
826
920
|
subpath = None
|
|
827
921
|
else:
|
|
@@ -839,67 +933,322 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
839
933
|
raise NotImplementedError()
|
|
840
934
|
|
|
841
935
|
@classmethod
|
|
842
|
-
def
|
|
936
|
+
def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
|
|
937
|
+
"""Group URIs by class/scheme."""
|
|
938
|
+
grouped: dict[type, list[ResourcePath]] = defaultdict(list)
|
|
939
|
+
for uri in uris:
|
|
940
|
+
grouped[uri.__class__].append(uri)
|
|
941
|
+
return grouped
|
|
942
|
+
|
|
943
|
+
@classmethod
|
|
944
|
+
def mexists(
|
|
945
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
946
|
+
) -> dict[ResourcePath, bool]:
|
|
843
947
|
"""Check for existence of multiple URIs at once.
|
|
844
948
|
|
|
845
949
|
Parameters
|
|
846
950
|
----------
|
|
847
951
|
uris : iterable of `ResourcePath`
|
|
848
952
|
The URIs to test.
|
|
953
|
+
num_workers : `int` or `None`, optional
|
|
954
|
+
The number of parallel workers to use when checking for existence
|
|
955
|
+
If `None`, the default value will be taken from the environment.
|
|
956
|
+
If this number is higher than the default and a thread pool is
|
|
957
|
+
used, there may not be enough cached connections available.
|
|
849
958
|
|
|
850
959
|
Returns
|
|
851
960
|
-------
|
|
852
961
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
853
962
|
Mapping of original URI to boolean indicating existence.
|
|
854
963
|
"""
|
|
855
|
-
# Group by scheme to allow a subclass to be able to use
|
|
856
|
-
# specialized implementations.
|
|
857
|
-
grouped: dict[type, list[ResourcePath]] = {}
|
|
858
|
-
for uri in uris:
|
|
859
|
-
uri_class = uri.__class__
|
|
860
|
-
if uri_class not in grouped:
|
|
861
|
-
grouped[uri_class] = []
|
|
862
|
-
grouped[uri_class].append(uri)
|
|
863
|
-
|
|
864
964
|
existence: dict[ResourcePath, bool] = {}
|
|
865
|
-
for uri_class in
|
|
866
|
-
existence.update(uri_class._mexists(
|
|
965
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
966
|
+
existence.update(uri_class._mexists(group, num_workers=num_workers))
|
|
867
967
|
|
|
868
968
|
return existence
|
|
869
969
|
|
|
870
970
|
@classmethod
|
|
871
|
-
def _mexists(
|
|
971
|
+
def _mexists(
|
|
972
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
973
|
+
) -> dict[ResourcePath, bool]:
|
|
872
974
|
"""Check for existence of multiple URIs at once.
|
|
873
975
|
|
|
874
976
|
Implementation helper method for `mexists`.
|
|
875
977
|
|
|
978
|
+
|
|
979
|
+
Parameters
|
|
980
|
+
----------
|
|
981
|
+
uris : iterable of `ResourcePath`
|
|
982
|
+
The URIs to test.
|
|
983
|
+
num_workers : `int` or `None`, optional
|
|
984
|
+
The number of parallel workers to use when checking for existence
|
|
985
|
+
If `None`, the default value will be taken from the environment.
|
|
986
|
+
|
|
987
|
+
Returns
|
|
988
|
+
-------
|
|
989
|
+
existence : `dict` of [`ResourcePath`, `bool`]
|
|
990
|
+
Mapping of original URI to boolean indicating existence.
|
|
991
|
+
"""
|
|
992
|
+
pool_executor_class = _get_executor_class()
|
|
993
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
994
|
+
# Patch the environment to make it think there is only one worker
|
|
995
|
+
# for each subprocess.
|
|
996
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
997
|
+
return cls._mexists_pool(pool_executor_class, uris)
|
|
998
|
+
else:
|
|
999
|
+
return cls._mexists_pool(pool_executor_class, uris, num_workers=num_workers)
|
|
1000
|
+
|
|
1001
|
+
@classmethod
|
|
1002
|
+
def _mexists_pool(
|
|
1003
|
+
cls,
|
|
1004
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1005
|
+
uris: Iterable[ResourcePath],
|
|
1006
|
+
*,
|
|
1007
|
+
num_workers: int | None = None,
|
|
1008
|
+
) -> dict[ResourcePath, bool]:
|
|
1009
|
+
"""Check for existence of multiple URIs at once using specified pool
|
|
1010
|
+
executor.
|
|
1011
|
+
|
|
1012
|
+
Implementation helper method for `_mexists`.
|
|
1013
|
+
|
|
876
1014
|
Parameters
|
|
877
1015
|
----------
|
|
1016
|
+
pool_executor_class : `type` [ `concurrent.futures.Executor` ]
|
|
1017
|
+
Type of executor pool to use.
|
|
878
1018
|
uris : iterable of `ResourcePath`
|
|
879
1019
|
The URIs to test.
|
|
1020
|
+
num_workers : `int` or `None`, optional
|
|
1021
|
+
The number of parallel workers to use when checking for existence
|
|
1022
|
+
If `None`, the default value will be taken from the environment.
|
|
880
1023
|
|
|
881
1024
|
Returns
|
|
882
1025
|
-------
|
|
883
1026
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
884
1027
|
Mapping of original URI to boolean indicating existence.
|
|
885
1028
|
"""
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
1029
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1030
|
+
with pool_executor_class(max_workers=max_workers) as exists_executor:
|
|
1031
|
+
future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
|
|
1032
|
+
|
|
1033
|
+
results: dict[ResourcePath, bool] = {}
|
|
1034
|
+
for future in concurrent.futures.as_completed(future_exists):
|
|
1035
|
+
uri = future_exists[future]
|
|
1036
|
+
try:
|
|
1037
|
+
exists = future.result()
|
|
1038
|
+
except Exception:
|
|
1039
|
+
exists = False
|
|
1040
|
+
results[uri] = exists
|
|
1041
|
+
return results
|
|
1042
|
+
|
|
1043
|
+
@classmethod
|
|
1044
|
+
def mtransfer(
|
|
1045
|
+
cls,
|
|
1046
|
+
transfer: str,
|
|
1047
|
+
from_to: Iterable[tuple[ResourcePath, ResourcePath]],
|
|
1048
|
+
overwrite: bool = False,
|
|
1049
|
+
transaction: TransactionProtocol | None = None,
|
|
1050
|
+
do_raise: bool = True,
|
|
1051
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1052
|
+
"""Transfer many files in bulk.
|
|
1053
|
+
|
|
1054
|
+
Parameters
|
|
1055
|
+
----------
|
|
1056
|
+
transfer : `str`
|
|
1057
|
+
Mode to use for transferring the resource. Generically there are
|
|
1058
|
+
many standard options: copy, link, symlink, hardlink, relsymlink.
|
|
1059
|
+
Not all URIs support all modes.
|
|
1060
|
+
from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
|
|
1061
|
+
A sequence of the source URIs and the target URIs.
|
|
1062
|
+
overwrite : `bool`, optional
|
|
1063
|
+
Allow an existing file to be overwritten. Defaults to `False`.
|
|
1064
|
+
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
1065
|
+
A transaction object that can (depending on implementation)
|
|
1066
|
+
rollback transfers on error. Not guaranteed to be implemented.
|
|
1067
|
+
The transaction object must be thread safe.
|
|
1068
|
+
do_raise : `bool`, optional
|
|
1069
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1070
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1071
|
+
there were no exceptions, a dict reporting the status of each
|
|
1072
|
+
`ResourcePath` will be returned.
|
|
1073
|
+
|
|
1074
|
+
Returns
|
|
1075
|
+
-------
|
|
1076
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1077
|
+
A dict of all the transfer attempts with a value indicating
|
|
1078
|
+
whether the transfer succeeded for the target URI. If ``do_raise``
|
|
1079
|
+
is `True`, this will only be returned if there are no errors.
|
|
1080
|
+
"""
|
|
1081
|
+
pool_executor_class = _get_executor_class()
|
|
1082
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1083
|
+
# Patch the environment to make it think there is only one worker
|
|
1084
|
+
# for each subprocess.
|
|
1085
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1086
|
+
return cls._mtransfer(
|
|
1087
|
+
pool_executor_class,
|
|
1088
|
+
transfer,
|
|
1089
|
+
from_to,
|
|
1090
|
+
overwrite=overwrite,
|
|
1091
|
+
transaction=transaction,
|
|
1092
|
+
do_raise=do_raise,
|
|
1093
|
+
)
|
|
1094
|
+
return cls._mtransfer(
|
|
1095
|
+
pool_executor_class,
|
|
1096
|
+
transfer,
|
|
1097
|
+
from_to,
|
|
1098
|
+
overwrite=overwrite,
|
|
1099
|
+
transaction=transaction,
|
|
1100
|
+
do_raise=do_raise,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
@classmethod
|
|
1104
|
+
def _mtransfer(
|
|
1105
|
+
cls,
|
|
1106
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1107
|
+
transfer: str,
|
|
1108
|
+
from_to: Iterable[tuple[ResourcePath, ResourcePath]],
|
|
1109
|
+
overwrite: bool = False,
|
|
1110
|
+
transaction: TransactionProtocol | None = None,
|
|
1111
|
+
do_raise: bool = True,
|
|
1112
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1113
|
+
"""Transfer many files in bulk.
|
|
1114
|
+
|
|
1115
|
+
Parameters
|
|
1116
|
+
----------
|
|
1117
|
+
transfer : `str`
|
|
1118
|
+
Mode to use for transferring the resource. Generically there are
|
|
1119
|
+
many standard options: copy, link, symlink, hardlink, relsymlink.
|
|
1120
|
+
Not all URIs support all modes.
|
|
1121
|
+
from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
|
|
1122
|
+
A sequence of the source URIs and the target URIs.
|
|
1123
|
+
overwrite : `bool`, optional
|
|
1124
|
+
Allow an existing file to be overwritten. Defaults to `False`.
|
|
1125
|
+
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
1126
|
+
A transaction object that can (depending on implementation)
|
|
1127
|
+
rollback transfers on error. Not guaranteed to be implemented.
|
|
1128
|
+
The transaction object must be thread safe.
|
|
1129
|
+
do_raise : `bool`, optional
|
|
1130
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1131
|
+
exceptions raised by the individual transfers. Else a dict
|
|
1132
|
+
reporting the status of each `ResourcePath` will be returned.
|
|
1133
|
+
|
|
1134
|
+
Returns
|
|
1135
|
+
-------
|
|
1136
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1137
|
+
A dict of all the transfer attempts with a value indicating
|
|
1138
|
+
whether the transfer succeeded for the target URI.
|
|
1139
|
+
"""
|
|
1140
|
+
with pool_executor_class(max_workers=_get_num_workers()) as transfer_executor:
|
|
1141
|
+
future_transfers = {
|
|
1142
|
+
transfer_executor.submit(
|
|
1143
|
+
to_uri.transfer_from,
|
|
1144
|
+
from_uri,
|
|
1145
|
+
transfer=transfer,
|
|
1146
|
+
overwrite=overwrite,
|
|
1147
|
+
transaction=transaction,
|
|
1148
|
+
multithreaded=False,
|
|
1149
|
+
): to_uri
|
|
1150
|
+
for from_uri, to_uri in from_to
|
|
1151
|
+
}
|
|
1152
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1153
|
+
failed = False
|
|
1154
|
+
for future in concurrent.futures.as_completed(future_transfers):
|
|
1155
|
+
to_uri = future_transfers[future]
|
|
1156
|
+
try:
|
|
1157
|
+
future.result()
|
|
1158
|
+
except Exception as e:
|
|
1159
|
+
transferred = MBulkResult(False, e)
|
|
1160
|
+
failed = True
|
|
1161
|
+
else:
|
|
1162
|
+
transferred = MBulkResult(True, None)
|
|
1163
|
+
results[to_uri] = transferred
|
|
1164
|
+
|
|
1165
|
+
if do_raise and failed:
|
|
1166
|
+
raise ExceptionGroup(
|
|
1167
|
+
f"Errors transferring {len(results)} artifacts",
|
|
1168
|
+
tuple(res.exception for res in results.values() if res.exception is not None),
|
|
1169
|
+
)
|
|
1170
|
+
|
|
897
1171
|
return results
|
|
898
1172
|
|
|
899
1173
|
def remove(self) -> None:
|
|
900
1174
|
"""Remove the resource."""
|
|
901
1175
|
raise NotImplementedError()
|
|
902
1176
|
|
|
1177
|
+
@classmethod
|
|
1178
|
+
def mremove(
|
|
1179
|
+
cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
|
|
1180
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1181
|
+
"""Remove multiple URIs at once.
|
|
1182
|
+
|
|
1183
|
+
Parameters
|
|
1184
|
+
----------
|
|
1185
|
+
uris : iterable of `ResourcePath`
|
|
1186
|
+
URIs to remove.
|
|
1187
|
+
do_raise : `bool`, optional
|
|
1188
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1189
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1190
|
+
there were no exceptions, a dict reporting the status of each
|
|
1191
|
+
`ResourcePath` will be returned.
|
|
1192
|
+
|
|
1193
|
+
Returns
|
|
1194
|
+
-------
|
|
1195
|
+
results : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1196
|
+
Dictionary mapping each URI to a result object indicating whether
|
|
1197
|
+
the removal succeeded or resulted in an exception. If ``do_raise``
|
|
1198
|
+
is `True` this will only be returned if everything succeeded.
|
|
1199
|
+
"""
|
|
1200
|
+
# Group URIs by scheme since some URI schemes support native bulk
|
|
1201
|
+
# APIs.
|
|
1202
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1203
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
1204
|
+
results.update(uri_class._mremove(group))
|
|
1205
|
+
if do_raise:
|
|
1206
|
+
failed = any(not r.success for r in results.values())
|
|
1207
|
+
if failed:
|
|
1208
|
+
s = "s" if len(results) != 1 else ""
|
|
1209
|
+
raise ExceptionGroup(
|
|
1210
|
+
f"Error{s} removing {len(results)} artifact{s}",
|
|
1211
|
+
tuple(res.exception for res in results.values() if res.exception is not None),
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
return results
|
|
1215
|
+
|
|
1216
|
+
@classmethod
|
|
1217
|
+
def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
|
|
1218
|
+
"""Remove multiple URIs using futures."""
|
|
1219
|
+
pool_executor_class = _get_executor_class()
|
|
1220
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1221
|
+
# Patch the environment to make it think there is only one worker
|
|
1222
|
+
# for each subprocess.
|
|
1223
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1224
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1225
|
+
else:
|
|
1226
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1227
|
+
|
|
1228
|
+
@classmethod
|
|
1229
|
+
def _mremove_pool(
|
|
1230
|
+
cls,
|
|
1231
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1232
|
+
uris: Iterable[ResourcePath],
|
|
1233
|
+
*,
|
|
1234
|
+
num_workers: int | None = None,
|
|
1235
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1236
|
+
"""Remove URIs using a futures pool."""
|
|
1237
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1238
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1239
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
1240
|
+
future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
|
|
1241
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
1242
|
+
try:
|
|
1243
|
+
future.result()
|
|
1244
|
+
except Exception as e:
|
|
1245
|
+
removed = MBulkResult(False, e)
|
|
1246
|
+
else:
|
|
1247
|
+
removed = MBulkResult(True, None)
|
|
1248
|
+
uri = future_remove[future]
|
|
1249
|
+
results[uri] = removed
|
|
1250
|
+
return results
|
|
1251
|
+
|
|
903
1252
|
def isabs(self) -> bool:
|
|
904
1253
|
"""Indicate that the resource is fully specified.
|
|
905
1254
|
|
|
@@ -923,27 +1272,53 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
923
1272
|
"""
|
|
924
1273
|
return self
|
|
925
1274
|
|
|
926
|
-
|
|
1275
|
+
@contextlib.contextmanager
|
|
1276
|
+
def _as_local(
|
|
1277
|
+
self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
|
|
1278
|
+
) -> Iterator[ResourcePath]:
|
|
927
1279
|
"""Return the location of the (possibly remote) resource as local file.
|
|
928
1280
|
|
|
929
1281
|
This is a helper function for `as_local` context manager.
|
|
930
1282
|
|
|
1283
|
+
Parameters
|
|
1284
|
+
----------
|
|
1285
|
+
multithreaded : `bool`, optional
|
|
1286
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
1287
|
+
throughput by using parallel download streams. This may of no
|
|
1288
|
+
effect if the URI scheme does not support parallel streams or
|
|
1289
|
+
if a global override has been applied. If `False` parallel
|
|
1290
|
+
streams will be disabled.
|
|
1291
|
+
tmpdir : `ResourcePath` or `None`, optional
|
|
1292
|
+
Explicit override of the temporary directory to use for remote
|
|
1293
|
+
downloads.
|
|
1294
|
+
|
|
931
1295
|
Returns
|
|
932
1296
|
-------
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
For a local resource this should be the actual path to the
|
|
937
|
-
resource.
|
|
938
|
-
is_temporary : `bool`
|
|
939
|
-
Indicates if the local path is a temporary file or not.
|
|
1297
|
+
local_uri : `ResourcePath`
|
|
1298
|
+
A URI to a local POSIX file. This can either be the same resource
|
|
1299
|
+
or a local downloaded copy of the resource.
|
|
940
1300
|
"""
|
|
941
1301
|
raise NotImplementedError()
|
|
942
1302
|
|
|
943
1303
|
@contextlib.contextmanager
|
|
944
|
-
def as_local(
|
|
1304
|
+
def as_local(
|
|
1305
|
+
self, multithreaded: bool = True, tmpdir: ResourcePathExpression | None = None
|
|
1306
|
+
) -> Iterator[ResourcePath]:
|
|
945
1307
|
"""Return the location of the (possibly remote) resource as local file.
|
|
946
1308
|
|
|
1309
|
+
Parameters
|
|
1310
|
+
----------
|
|
1311
|
+
multithreaded : `bool`, optional
|
|
1312
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
1313
|
+
throughput by using parallel download streams. This may of no
|
|
1314
|
+
effect if the URI scheme does not support parallel streams or
|
|
1315
|
+
if a global override has been applied. If `False` parallel
|
|
1316
|
+
streams will be disabled.
|
|
1317
|
+
tmpdir : `lsst.resources.ResourcePathExpression` or `None`, optional
|
|
1318
|
+
Explicit override of the temporary directory to use for remote
|
|
1319
|
+
downloads. This directory must be a local POSIX directory and
|
|
1320
|
+
must exist.
|
|
1321
|
+
|
|
947
1322
|
Yields
|
|
948
1323
|
------
|
|
949
1324
|
local : `ResourcePath`
|
|
@@ -968,18 +1343,11 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
968
1343
|
"""
|
|
969
1344
|
if self.isdir():
|
|
970
1345
|
raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
1346
|
+
temp_dir = ResourcePath(tmpdir, forceDirectory=True) if tmpdir is not None else None
|
|
1347
|
+
if temp_dir is not None and not temp_dir.isLocal:
|
|
1348
|
+
raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
|
|
1349
|
+
with self._as_local(multithreaded=multithreaded, tmpdir=temp_dir) as local_uri:
|
|
975
1350
|
yield local_uri
|
|
976
|
-
finally:
|
|
977
|
-
# The caller might have relocated the temporary file.
|
|
978
|
-
# Do not ever delete if the temporary matches self
|
|
979
|
-
# (since it may have been that a temporary file was made local
|
|
980
|
-
# but already was local).
|
|
981
|
-
if self != local_uri and is_temporary and local_uri.exists():
|
|
982
|
-
local_uri.remove()
|
|
983
1351
|
|
|
984
1352
|
@classmethod
|
|
985
1353
|
@contextlib.contextmanager
|
|
@@ -994,36 +1362,25 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
994
1362
|
Parameters
|
|
995
1363
|
----------
|
|
996
1364
|
prefix : `ResourcePath`, optional
|
|
997
|
-
|
|
998
|
-
file URI in a temporary directory
|
|
999
|
-
|
|
1365
|
+
Temporary directory to use (can be any scheme). Without this the
|
|
1366
|
+
path will be formed as a local file URI in a temporary directory
|
|
1367
|
+
obtained from `lsst.resources.utils.get_tempdir`. Ensuring that the
|
|
1368
|
+
prefix location exists is the responsibility of the caller.
|
|
1000
1369
|
suffix : `str`, optional
|
|
1001
1370
|
A file suffix to be used. The ``.`` should be included in this
|
|
1002
1371
|
suffix.
|
|
1003
1372
|
delete : `bool`, optional
|
|
1004
1373
|
By default the resource will be deleted when the context manager
|
|
1005
1374
|
is exited. Setting this flag to `False` will leave the resource
|
|
1006
|
-
alone.
|
|
1007
|
-
been created.
|
|
1375
|
+
alone.
|
|
1008
1376
|
|
|
1009
1377
|
Yields
|
|
1010
1378
|
------
|
|
1011
1379
|
uri : `ResourcePath`
|
|
1012
1380
|
The temporary URI. Will be removed when the context is completed.
|
|
1013
1381
|
"""
|
|
1014
|
-
use_tempdir = False
|
|
1015
1382
|
if prefix is None:
|
|
1016
|
-
|
|
1017
|
-
# If the user has set a umask that restricts the owner-write bit,
|
|
1018
|
-
# the directory returned from mkdtemp may not initially be
|
|
1019
|
-
# writeable by us
|
|
1020
|
-
ensure_directory_is_writeable(directory)
|
|
1021
|
-
|
|
1022
|
-
prefix = ResourcePath(directory, forceDirectory=True, isTemporary=True)
|
|
1023
|
-
# Record that we need to delete this directory. Can not rely
|
|
1024
|
-
# on isTemporary flag since an external prefix may have that
|
|
1025
|
-
# set as well.
|
|
1026
|
-
use_tempdir = True
|
|
1383
|
+
prefix = ResourcePath(get_tempdir(), forceDirectory=True)
|
|
1027
1384
|
|
|
1028
1385
|
# Need to create a randomized file name. For consistency do not
|
|
1029
1386
|
# use mkstemp for local and something else for remote. Additionally
|
|
@@ -1042,13 +1399,10 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1042
1399
|
yield temporary_uri
|
|
1043
1400
|
finally:
|
|
1044
1401
|
if delete:
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
# It's okay if this does not work because the user
|
|
1050
|
-
# removed the file.
|
|
1051
|
-
temporary_uri.remove()
|
|
1402
|
+
with contextlib.suppress(FileNotFoundError):
|
|
1403
|
+
# It's okay if this does not work because the user
|
|
1404
|
+
# removed the file.
|
|
1405
|
+
temporary_uri.remove()
|
|
1052
1406
|
|
|
1053
1407
|
def read(self, size: int = -1) -> bytes:
|
|
1054
1408
|
"""Open the resource and return the contents in bytes.
|
|
@@ -1247,6 +1601,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1247
1601
|
transfer: str,
|
|
1248
1602
|
overwrite: bool = False,
|
|
1249
1603
|
transaction: TransactionProtocol | None = None,
|
|
1604
|
+
multithreaded: bool = True,
|
|
1250
1605
|
) -> None:
|
|
1251
1606
|
"""Transfer to this URI from another.
|
|
1252
1607
|
|
|
@@ -1263,6 +1618,12 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1263
1618
|
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
1264
1619
|
A transaction object that can (depending on implementation)
|
|
1265
1620
|
rollback transfers on error. Not guaranteed to be implemented.
|
|
1621
|
+
multithreaded : `bool`, optional
|
|
1622
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
1623
|
+
throughput by using parallel download streams. This may of no
|
|
1624
|
+
effect if the URI scheme does not support parallel streams or
|
|
1625
|
+
if a global override has been applied. If `False` parallel
|
|
1626
|
+
streams will be disabled.
|
|
1266
1627
|
|
|
1267
1628
|
Notes
|
|
1268
1629
|
-----
|
|
@@ -1550,6 +1911,12 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1550
1911
|
"""
|
|
1551
1912
|
raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
|
|
1552
1913
|
|
|
1914
|
+
def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
|
|
1915
|
+
# May be overridden by subclasses to transfer attributes when a
|
|
1916
|
+
# ResourcePath is constructed using the "clone" version of the
|
|
1917
|
+
# ResourcePath constructor by passing in a ResourcePath object.
|
|
1918
|
+
pass
|
|
1919
|
+
|
|
1553
1920
|
|
|
1554
1921
|
ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
|
|
1555
1922
|
"""Type-annotation alias for objects that can be coerced to ResourcePath.
|