lsst-resources 29.2025.1800__py3-none-any.whl → 29.2025.2000__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +190 -0
- lsst/resources/_resourcePath.py +102 -25
- lsst/resources/dav.py +896 -0
- lsst/resources/davutils.py +2584 -0
- lsst/resources/http.py +10 -18
- lsst/resources/s3.py +59 -1
- lsst/resources/s3utils.py +3 -1
- lsst/resources/tests.py +9 -0
- lsst/resources/version.py +1 -1
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/METADATA +1 -1
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/RECORD +16 -13
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/WHEEL +1 -1
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.2025.1800.dist-info → lsst_resources-29.2025.2000.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# This file is part of lsst-resources.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (https://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# Use of this source code is governed by a 3-clause BSD-style
|
|
10
|
+
# license that can be found in the LICENSE file.
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
__all__ = ("DavReadResourceHandle",)
|
|
15
|
+
|
|
16
|
+
import io
|
|
17
|
+
import logging
|
|
18
|
+
from collections.abc import Callable, Iterable
|
|
19
|
+
from typing import TYPE_CHECKING, AnyStr
|
|
20
|
+
|
|
21
|
+
from ..davutils import DavFileMetadata
|
|
22
|
+
from ._baseResourceHandle import BaseResourceHandle, CloseStatus
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from ..dav import DavResourcePath
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DavReadResourceHandle(BaseResourceHandle[bytes]):
|
|
29
|
+
"""WebDAV-based specialization of `.BaseResourceHandle`.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
mode : `str`
|
|
34
|
+
Handle modes as described in the python `io` module.
|
|
35
|
+
log : `~logging.Logger`
|
|
36
|
+
Logger to used when writing messages.
|
|
37
|
+
uri : `lsst.resources.dav.DavResourcePath`
|
|
38
|
+
URI of remote resource.
|
|
39
|
+
newline : `str` or `None`, optional
|
|
40
|
+
When doing multiline operations, break the stream on given character.
|
|
41
|
+
Defaults to newline. If a file is opened in binary mode, this argument
|
|
42
|
+
is not used, as binary files will only split lines on the binary
|
|
43
|
+
newline representation.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
mode: str,
|
|
49
|
+
log: logging.Logger,
|
|
50
|
+
uri: DavResourcePath,
|
|
51
|
+
stat: DavFileMetadata,
|
|
52
|
+
*,
|
|
53
|
+
newline: AnyStr | None = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__(mode, log, uri, newline=newline)
|
|
56
|
+
self._uri: DavResourcePath = uri
|
|
57
|
+
self._stat: DavFileMetadata = stat
|
|
58
|
+
self._current_position = 0
|
|
59
|
+
self._cache: io.BytesIO | None = None
|
|
60
|
+
self._buffer: io.BytesIO | None = None
|
|
61
|
+
self._closed = CloseStatus.OPEN
|
|
62
|
+
|
|
63
|
+
def close(self) -> None:
|
|
64
|
+
self._closed = CloseStatus.CLOSED
|
|
65
|
+
self._cache = None
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def closed(self) -> bool:
|
|
69
|
+
return self._closed == CloseStatus.CLOSED
|
|
70
|
+
|
|
71
|
+
def fileno(self) -> int:
|
|
72
|
+
raise io.UnsupportedOperation("DavReadResourceHandle does not have a file number")
|
|
73
|
+
|
|
74
|
+
def flush(self) -> None:
|
|
75
|
+
modes = set(self._mode)
|
|
76
|
+
if {"w", "x", "a", "+"} & modes:
|
|
77
|
+
raise io.UnsupportedOperation("DavReadResourceHandles are read only")
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def isatty(self) -> bool | Callable[[], bool]:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def readable(self) -> bool:
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
def readline(self, size: int = -1) -> bytes:
|
|
87
|
+
raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
|
|
88
|
+
|
|
89
|
+
def readlines(self, size: int = -1) -> Iterable[bytes]:
|
|
90
|
+
raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
|
|
91
|
+
|
|
92
|
+
def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
|
|
93
|
+
match whence:
|
|
94
|
+
case io.SEEK_SET:
|
|
95
|
+
if offset < 0:
|
|
96
|
+
raise ValueError(f"negative seek value {offset}")
|
|
97
|
+
self._current_position = offset
|
|
98
|
+
case io.SEEK_CUR:
|
|
99
|
+
self._current_position += offset
|
|
100
|
+
case io.SEEK_END:
|
|
101
|
+
self._current_position = self._stat.size + offset
|
|
102
|
+
case _:
|
|
103
|
+
raise ValueError(f"unexpected value {whence} for whence in seek()")
|
|
104
|
+
|
|
105
|
+
if self._current_position < 0:
|
|
106
|
+
self._current_position = 0
|
|
107
|
+
|
|
108
|
+
return self._current_position
|
|
109
|
+
|
|
110
|
+
def seekable(self) -> bool:
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def tell(self) -> int:
|
|
114
|
+
return self._current_position
|
|
115
|
+
|
|
116
|
+
def truncate(self, size: int | None = None) -> int:
|
|
117
|
+
raise io.UnsupportedOperation("DavReadResourceHandles Do not support truncation")
|
|
118
|
+
|
|
119
|
+
def writable(self) -> bool:
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
def write(self, b: bytes, /) -> int:
|
|
123
|
+
raise io.UnsupportedOperation("DavReadResourceHandles are read only")
|
|
124
|
+
|
|
125
|
+
def writelines(self, b: Iterable[bytes], /) -> None:
|
|
126
|
+
raise io.UnsupportedOperation("DavReadResourceHandles are read only")
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def _eof(self) -> bool:
|
|
130
|
+
return self._current_position >= self._stat.size
|
|
131
|
+
|
|
132
|
+
def _download_to_cache(self) -> io.BytesIO:
|
|
133
|
+
"""Download the entire content of the remote resource to an internal
|
|
134
|
+
memory buffer.
|
|
135
|
+
"""
|
|
136
|
+
if self._cache is None:
|
|
137
|
+
self._cache = io.BytesIO()
|
|
138
|
+
self._cache.write(self._uri.read())
|
|
139
|
+
|
|
140
|
+
return self._cache
|
|
141
|
+
|
|
142
|
+
def read(self, size: int = -1) -> bytes:
|
|
143
|
+
if self._eof or size == 0:
|
|
144
|
+
return b""
|
|
145
|
+
|
|
146
|
+
# If this file's size is small than the buffer size configured for
|
|
147
|
+
# this URI's client, download the entire file in one request and cache
|
|
148
|
+
# its content. This avoids multiple roundtrips to the server
|
|
149
|
+
# for retrieving small chunks.
|
|
150
|
+
if self._stat.size <= self._uri._client._config.buffer_size:
|
|
151
|
+
self._download_to_cache()
|
|
152
|
+
|
|
153
|
+
# If we are asked to read the whole file content, cache the entire
|
|
154
|
+
# file content and return a copy-on-write memory view of our internal
|
|
155
|
+
# cache.
|
|
156
|
+
if self._current_position == 0 and size == -1:
|
|
157
|
+
cache = self._download_to_cache()
|
|
158
|
+
self._current_position = self._stat.size
|
|
159
|
+
return cache.getvalue()
|
|
160
|
+
|
|
161
|
+
# This is a partial read. If we have already cached the whole file
|
|
162
|
+
# content use the cache to build the return value.
|
|
163
|
+
if self._cache is not None:
|
|
164
|
+
start = self._current_position
|
|
165
|
+
end = self._current_position = self._stat.size if size < 0 else start + size
|
|
166
|
+
return self._cache.getvalue()[start:end]
|
|
167
|
+
|
|
168
|
+
# We need to make a partial read from the server. Reuse our internal
|
|
169
|
+
# I/O buffer to reduce memory allocations.
|
|
170
|
+
if self._buffer is None:
|
|
171
|
+
self._buffer = io.BytesIO()
|
|
172
|
+
|
|
173
|
+
start = self._current_position
|
|
174
|
+
end = self._stat.size if size < 0 else min(start + size, self._stat.size)
|
|
175
|
+
self._buffer.seek(0)
|
|
176
|
+
self._buffer.write(self._uri.read_range(start=start, end=end - 1))
|
|
177
|
+
count = self._buffer.tell()
|
|
178
|
+
self._current_position += count
|
|
179
|
+
return self._buffer.getvalue()[0:count]
|
|
180
|
+
|
|
181
|
+
def readinto(self, output: bytearray) -> int:
|
|
182
|
+
"""Read up to `len(output)` bytes into `output` and return the number
|
|
183
|
+
of bytes read.
|
|
184
|
+
"""
|
|
185
|
+
if self._eof or len(output) == 0:
|
|
186
|
+
return 0
|
|
187
|
+
|
|
188
|
+
data = self.read(len(output))
|
|
189
|
+
output[:] = data
|
|
190
|
+
return len(data)
|
lsst/resources/_resourcePath.py
CHANGED
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
import posixpath
|
|
24
24
|
import re
|
|
25
25
|
import urllib.parse
|
|
26
|
+
from collections import defaultdict
|
|
26
27
|
from pathlib import Path, PurePath, PurePosixPath
|
|
27
28
|
from random import Random
|
|
28
29
|
from typing import TypeAlias
|
|
@@ -53,8 +54,8 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
|
|
|
53
54
|
ESCAPED_HASH = urllib.parse.quote("#")
|
|
54
55
|
|
|
55
56
|
|
|
56
|
-
class
|
|
57
|
-
"""Report on a bulk
|
|
57
|
+
class MBulkResult(NamedTuple):
|
|
58
|
+
"""Report on a bulk operation."""
|
|
58
59
|
|
|
59
60
|
success: bool
|
|
60
61
|
exception: Exception | None
|
|
@@ -367,9 +368,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
367
368
|
|
|
368
369
|
subclass = HttpResourcePath
|
|
369
370
|
elif parsed.scheme in {"dav", "davs"}:
|
|
370
|
-
from .
|
|
371
|
+
from .dav import DavResourcePath
|
|
371
372
|
|
|
372
|
-
subclass =
|
|
373
|
+
subclass = DavResourcePath
|
|
373
374
|
elif parsed.scheme == "gs":
|
|
374
375
|
from .gs import GSResourcePath
|
|
375
376
|
|
|
@@ -912,6 +913,14 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
912
913
|
"""
|
|
913
914
|
raise NotImplementedError()
|
|
914
915
|
|
|
916
|
+
@classmethod
|
|
917
|
+
def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
|
|
918
|
+
"""Group URIs by class/scheme."""
|
|
919
|
+
grouped: dict[type, list[ResourcePath]] = defaultdict(list)
|
|
920
|
+
for uri in uris:
|
|
921
|
+
grouped[uri.__class__].append(uri)
|
|
922
|
+
return grouped
|
|
923
|
+
|
|
915
924
|
@classmethod
|
|
916
925
|
def mexists(
|
|
917
926
|
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
@@ -933,18 +942,9 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
933
942
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
934
943
|
Mapping of original URI to boolean indicating existence.
|
|
935
944
|
"""
|
|
936
|
-
# Group by scheme to allow a subclass to be able to use
|
|
937
|
-
# specialized implementations.
|
|
938
|
-
grouped: dict[type, list[ResourcePath]] = {}
|
|
939
|
-
for uri in uris:
|
|
940
|
-
uri_class = uri.__class__
|
|
941
|
-
if uri_class not in grouped:
|
|
942
|
-
grouped[uri_class] = []
|
|
943
|
-
grouped[uri_class].append(uri)
|
|
944
|
-
|
|
945
945
|
existence: dict[ResourcePath, bool] = {}
|
|
946
|
-
for uri_class in
|
|
947
|
-
existence.update(uri_class._mexists(
|
|
946
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
947
|
+
existence.update(uri_class._mexists(group, num_workers=num_workers))
|
|
948
948
|
|
|
949
949
|
return existence
|
|
950
950
|
|
|
@@ -1029,7 +1029,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1029
1029
|
overwrite: bool = False,
|
|
1030
1030
|
transaction: TransactionProtocol | None = None,
|
|
1031
1031
|
do_raise: bool = True,
|
|
1032
|
-
) -> dict[ResourcePath,
|
|
1032
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1033
1033
|
"""Transfer many files in bulk.
|
|
1034
1034
|
|
|
1035
1035
|
Parameters
|
|
@@ -1048,14 +1048,16 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1048
1048
|
The transaction object must be thread safe.
|
|
1049
1049
|
do_raise : `bool`, optional
|
|
1050
1050
|
If `True` an `ExceptionGroup` will be raised containing any
|
|
1051
|
-
exceptions raised by the individual transfers.
|
|
1052
|
-
reporting the status of each
|
|
1051
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1052
|
+
there were no exceptions, a dict reporting the status of each
|
|
1053
|
+
`ResourcePath` will be returned.
|
|
1053
1054
|
|
|
1054
1055
|
Returns
|
|
1055
1056
|
-------
|
|
1056
|
-
copy_status : `dict` [ `ResourcePath`, `
|
|
1057
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1057
1058
|
A dict of all the transfer attempts with a value indicating
|
|
1058
|
-
whether the transfer succeeded for the target URI.
|
|
1059
|
+
whether the transfer succeeded for the target URI. If ``do_raise``
|
|
1060
|
+
is `True`, this will only be returned if there are no errors.
|
|
1059
1061
|
"""
|
|
1060
1062
|
pool_executor_class = _get_executor_class()
|
|
1061
1063
|
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
@@ -1088,7 +1090,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1088
1090
|
overwrite: bool = False,
|
|
1089
1091
|
transaction: TransactionProtocol | None = None,
|
|
1090
1092
|
do_raise: bool = True,
|
|
1091
|
-
) -> dict[ResourcePath,
|
|
1093
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1092
1094
|
"""Transfer many files in bulk.
|
|
1093
1095
|
|
|
1094
1096
|
Parameters
|
|
@@ -1112,7 +1114,7 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1112
1114
|
|
|
1113
1115
|
Returns
|
|
1114
1116
|
-------
|
|
1115
|
-
copy_status : `dict` [ `ResourcePath`, `
|
|
1117
|
+
copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1116
1118
|
A dict of all the transfer attempts with a value indicating
|
|
1117
1119
|
whether the transfer succeeded for the target URI.
|
|
1118
1120
|
"""
|
|
@@ -1128,17 +1130,17 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1128
1130
|
): to_uri
|
|
1129
1131
|
for from_uri, to_uri in from_to
|
|
1130
1132
|
}
|
|
1131
|
-
results: dict[ResourcePath,
|
|
1133
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1132
1134
|
failed = False
|
|
1133
1135
|
for future in concurrent.futures.as_completed(future_transfers):
|
|
1134
1136
|
to_uri = future_transfers[future]
|
|
1135
1137
|
try:
|
|
1136
1138
|
future.result()
|
|
1137
1139
|
except Exception as e:
|
|
1138
|
-
transferred =
|
|
1140
|
+
transferred = MBulkResult(False, e)
|
|
1139
1141
|
failed = True
|
|
1140
1142
|
else:
|
|
1141
|
-
transferred =
|
|
1143
|
+
transferred = MBulkResult(True, None)
|
|
1142
1144
|
results[to_uri] = transferred
|
|
1143
1145
|
|
|
1144
1146
|
if do_raise and failed:
|
|
@@ -1153,6 +1155,81 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
1153
1155
|
"""Remove the resource."""
|
|
1154
1156
|
raise NotImplementedError()
|
|
1155
1157
|
|
|
1158
|
+
@classmethod
|
|
1159
|
+
def mremove(
|
|
1160
|
+
cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
|
|
1161
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1162
|
+
"""Remove multiple URIs at once.
|
|
1163
|
+
|
|
1164
|
+
Parameters
|
|
1165
|
+
----------
|
|
1166
|
+
uris : iterable of `ResourcePath`
|
|
1167
|
+
URIs to remove.
|
|
1168
|
+
do_raise : `bool`, optional
|
|
1169
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1170
|
+
exceptions raised by the individual transfers. If `False`, or if
|
|
1171
|
+
there were no exceptions, a dict reporting the status of each
|
|
1172
|
+
`ResourcePath` will be returned.
|
|
1173
|
+
|
|
1174
|
+
Returns
|
|
1175
|
+
-------
|
|
1176
|
+
results : `dict` [ `ResourcePath`, `MBulkResult` ]
|
|
1177
|
+
Dictionary mapping each URI to a result object indicating whether
|
|
1178
|
+
the removal succeeded or resulted in an exception. If ``do_raise``
|
|
1179
|
+
is `True` this will only be returned if everything succeeded.
|
|
1180
|
+
"""
|
|
1181
|
+
# Group URIs by scheme since some URI schemes support native bulk
|
|
1182
|
+
# APIs.
|
|
1183
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1184
|
+
for uri_class, group in cls._group_uris(uris).items():
|
|
1185
|
+
results.update(uri_class._mremove(group))
|
|
1186
|
+
if do_raise:
|
|
1187
|
+
failed = any(not r.success for r in results.values())
|
|
1188
|
+
if failed:
|
|
1189
|
+
s = "s" if len(results) != 1 else ""
|
|
1190
|
+
raise ExceptionGroup(
|
|
1191
|
+
f"Error{s} removing {len(results)} artifact{s}",
|
|
1192
|
+
tuple(res.exception for res in results.values() if res.exception is not None),
|
|
1193
|
+
)
|
|
1194
|
+
|
|
1195
|
+
return results
|
|
1196
|
+
|
|
1197
|
+
@classmethod
|
|
1198
|
+
def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
|
|
1199
|
+
"""Remove multiple URIs using futures."""
|
|
1200
|
+
pool_executor_class = _get_executor_class()
|
|
1201
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1202
|
+
# Patch the environment to make it think there is only one worker
|
|
1203
|
+
# for each subprocess.
|
|
1204
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1205
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1206
|
+
else:
|
|
1207
|
+
return cls._mremove_pool(pool_executor_class, uris)
|
|
1208
|
+
|
|
1209
|
+
@classmethod
|
|
1210
|
+
def _mremove_pool(
|
|
1211
|
+
cls,
|
|
1212
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1213
|
+
uris: Iterable[ResourcePath],
|
|
1214
|
+
*,
|
|
1215
|
+
num_workers: int | None = None,
|
|
1216
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
1217
|
+
"""Remove URIs using a futures pool."""
|
|
1218
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1219
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
1220
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
1221
|
+
future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
|
|
1222
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
1223
|
+
try:
|
|
1224
|
+
future.result()
|
|
1225
|
+
except Exception as e:
|
|
1226
|
+
removed = MBulkResult(False, e)
|
|
1227
|
+
else:
|
|
1228
|
+
removed = MBulkResult(True, None)
|
|
1229
|
+
uri = future_remove[future]
|
|
1230
|
+
results[uri] = removed
|
|
1231
|
+
return results
|
|
1232
|
+
|
|
1156
1233
|
def isabs(self) -> bool:
|
|
1157
1234
|
"""Indicate that the resource is fully specified.
|
|
1158
1235
|
|