lsst-resources 29.2025.1800__py3-none-any.whl → 29.2025.2000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ # This file is part of lsst-resources.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # Use of this source code is governed by a 3-clause BSD-style
10
+ # license that can be found in the LICENSE file.
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ("DavReadResourceHandle",)
15
+
16
+ import io
17
+ import logging
18
+ from collections.abc import Callable, Iterable
19
+ from typing import TYPE_CHECKING, AnyStr
20
+
21
+ from ..davutils import DavFileMetadata
22
+ from ._baseResourceHandle import BaseResourceHandle, CloseStatus
23
+
24
+ if TYPE_CHECKING:
25
+ from ..dav import DavResourcePath
26
+
27
+
28
+ class DavReadResourceHandle(BaseResourceHandle[bytes]):
29
+ """WebDAV-based specialization of `.BaseResourceHandle`.
30
+
31
+ Parameters
32
+ ----------
33
+ mode : `str`
34
+ Handle modes as described in the python `io` module.
35
+ log : `~logging.Logger`
36
+ Logger to used when writing messages.
37
+ uri : `lsst.resources.dav.DavResourcePath`
38
+ URI of remote resource.
39
+ newline : `str` or `None`, optional
40
+ When doing multiline operations, break the stream on given character.
41
+ Defaults to newline. If a file is opened in binary mode, this argument
42
+ is not used, as binary files will only split lines on the binary
43
+ newline representation.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ mode: str,
49
+ log: logging.Logger,
50
+ uri: DavResourcePath,
51
+ stat: DavFileMetadata,
52
+ *,
53
+ newline: AnyStr | None = None,
54
+ ) -> None:
55
+ super().__init__(mode, log, uri, newline=newline)
56
+ self._uri: DavResourcePath = uri
57
+ self._stat: DavFileMetadata = stat
58
+ self._current_position = 0
59
+ self._cache: io.BytesIO | None = None
60
+ self._buffer: io.BytesIO | None = None
61
+ self._closed = CloseStatus.OPEN
62
+
63
+ def close(self) -> None:
64
+ self._closed = CloseStatus.CLOSED
65
+ self._cache = None
66
+
67
+ @property
68
+ def closed(self) -> bool:
69
+ return self._closed == CloseStatus.CLOSED
70
+
71
+ def fileno(self) -> int:
72
+ raise io.UnsupportedOperation("DavReadResourceHandle does not have a file number")
73
+
74
+ def flush(self) -> None:
75
+ modes = set(self._mode)
76
+ if {"w", "x", "a", "+"} & modes:
77
+ raise io.UnsupportedOperation("DavReadResourceHandles are read only")
78
+
79
+ @property
80
+ def isatty(self) -> bool | Callable[[], bool]:
81
+ return False
82
+
83
+ def readable(self) -> bool:
84
+ return True
85
+
86
+ def readline(self, size: int = -1) -> bytes:
87
+ raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
88
+
89
+ def readlines(self, size: int = -1) -> Iterable[bytes]:
90
+ raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
91
+
92
+ def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
93
+ match whence:
94
+ case io.SEEK_SET:
95
+ if offset < 0:
96
+ raise ValueError(f"negative seek value {offset}")
97
+ self._current_position = offset
98
+ case io.SEEK_CUR:
99
+ self._current_position += offset
100
+ case io.SEEK_END:
101
+ self._current_position = self._stat.size + offset
102
+ case _:
103
+ raise ValueError(f"unexpected value {whence} for whence in seek()")
104
+
105
+ if self._current_position < 0:
106
+ self._current_position = 0
107
+
108
+ return self._current_position
109
+
110
+ def seekable(self) -> bool:
111
+ return True
112
+
113
+ def tell(self) -> int:
114
+ return self._current_position
115
+
116
+ def truncate(self, size: int | None = None) -> int:
117
+ raise io.UnsupportedOperation("DavReadResourceHandles Do not support truncation")
118
+
119
+ def writable(self) -> bool:
120
+ return False
121
+
122
+ def write(self, b: bytes, /) -> int:
123
+ raise io.UnsupportedOperation("DavReadResourceHandles are read only")
124
+
125
+ def writelines(self, b: Iterable[bytes], /) -> None:
126
+ raise io.UnsupportedOperation("DavReadResourceHandles are read only")
127
+
128
+ @property
129
+ def _eof(self) -> bool:
130
+ return self._current_position >= self._stat.size
131
+
132
+ def _download_to_cache(self) -> io.BytesIO:
133
+ """Download the entire content of the remote resource to an internal
134
+ memory buffer.
135
+ """
136
+ if self._cache is None:
137
+ self._cache = io.BytesIO()
138
+ self._cache.write(self._uri.read())
139
+
140
+ return self._cache
141
+
142
+ def read(self, size: int = -1) -> bytes:
143
+ if self._eof or size == 0:
144
+ return b""
145
+
146
+ # If this file's size is small than the buffer size configured for
147
+ # this URI's client, download the entire file in one request and cache
148
+ # its content. This avoids multiple roundtrips to the server
149
+ # for retrieving small chunks.
150
+ if self._stat.size <= self._uri._client._config.buffer_size:
151
+ self._download_to_cache()
152
+
153
+ # If we are asked to read the whole file content, cache the entire
154
+ # file content and return a copy-on-write memory view of our internal
155
+ # cache.
156
+ if self._current_position == 0 and size == -1:
157
+ cache = self._download_to_cache()
158
+ self._current_position = self._stat.size
159
+ return cache.getvalue()
160
+
161
+ # This is a partial read. If we have already cached the whole file
162
+ # content use the cache to build the return value.
163
+ if self._cache is not None:
164
+ start = self._current_position
165
+ end = self._current_position = self._stat.size if size < 0 else start + size
166
+ return self._cache.getvalue()[start:end]
167
+
168
+ # We need to make a partial read from the server. Reuse our internal
169
+ # I/O buffer to reduce memory allocations.
170
+ if self._buffer is None:
171
+ self._buffer = io.BytesIO()
172
+
173
+ start = self._current_position
174
+ end = self._stat.size if size < 0 else min(start + size, self._stat.size)
175
+ self._buffer.seek(0)
176
+ self._buffer.write(self._uri.read_range(start=start, end=end - 1))
177
+ count = self._buffer.tell()
178
+ self._current_position += count
179
+ return self._buffer.getvalue()[0:count]
180
+
181
+ def readinto(self, output: bytearray) -> int:
182
+ """Read up to `len(output)` bytes into `output` and return the number
183
+ of bytes read.
184
+ """
185
+ if self._eof or len(output) == 0:
186
+ return 0
187
+
188
+ data = self.read(len(output))
189
+ output[:] = data
190
+ return len(data)
@@ -23,6 +23,7 @@ import os
23
23
  import posixpath
24
24
  import re
25
25
  import urllib.parse
26
+ from collections import defaultdict
26
27
  from pathlib import Path, PurePath, PurePosixPath
27
28
  from random import Random
28
29
  from typing import TypeAlias
@@ -53,8 +54,8 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
53
54
  ESCAPED_HASH = urllib.parse.quote("#")
54
55
 
55
56
 
56
- class MTransferResult(NamedTuple):
57
- """Report on a bulk transfer."""
57
+ class MBulkResult(NamedTuple):
58
+ """Report on a bulk operation."""
58
59
 
59
60
  success: bool
60
61
  exception: Exception | None
@@ -367,9 +368,9 @@ class ResourcePath: # numpydoc ignore=PR02
367
368
 
368
369
  subclass = HttpResourcePath
369
370
  elif parsed.scheme in {"dav", "davs"}:
370
- from .http import HttpResourcePath
371
+ from .dav import DavResourcePath
371
372
 
372
- subclass = HttpResourcePath
373
+ subclass = DavResourcePath
373
374
  elif parsed.scheme == "gs":
374
375
  from .gs import GSResourcePath
375
376
 
@@ -912,6 +913,14 @@ class ResourcePath: # numpydoc ignore=PR02
912
913
  """
913
914
  raise NotImplementedError()
914
915
 
916
+ @classmethod
917
+ def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
918
+ """Group URIs by class/scheme."""
919
+ grouped: dict[type, list[ResourcePath]] = defaultdict(list)
920
+ for uri in uris:
921
+ grouped[uri.__class__].append(uri)
922
+ return grouped
923
+
915
924
  @classmethod
916
925
  def mexists(
917
926
  cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
@@ -933,18 +942,9 @@ class ResourcePath: # numpydoc ignore=PR02
933
942
  existence : `dict` of [`ResourcePath`, `bool`]
934
943
  Mapping of original URI to boolean indicating existence.
935
944
  """
936
- # Group by scheme to allow a subclass to be able to use
937
- # specialized implementations.
938
- grouped: dict[type, list[ResourcePath]] = {}
939
- for uri in uris:
940
- uri_class = uri.__class__
941
- if uri_class not in grouped:
942
- grouped[uri_class] = []
943
- grouped[uri_class].append(uri)
944
-
945
945
  existence: dict[ResourcePath, bool] = {}
946
- for uri_class in grouped:
947
- existence.update(uri_class._mexists(grouped[uri_class], num_workers=num_workers))
946
+ for uri_class, group in cls._group_uris(uris).items():
947
+ existence.update(uri_class._mexists(group, num_workers=num_workers))
948
948
 
949
949
  return existence
950
950
 
@@ -1029,7 +1029,7 @@ class ResourcePath: # numpydoc ignore=PR02
1029
1029
  overwrite: bool = False,
1030
1030
  transaction: TransactionProtocol | None = None,
1031
1031
  do_raise: bool = True,
1032
- ) -> dict[ResourcePath, MTransferResult]:
1032
+ ) -> dict[ResourcePath, MBulkResult]:
1033
1033
  """Transfer many files in bulk.
1034
1034
 
1035
1035
  Parameters
@@ -1048,14 +1048,16 @@ class ResourcePath: # numpydoc ignore=PR02
1048
1048
  The transaction object must be thread safe.
1049
1049
  do_raise : `bool`, optional
1050
1050
  If `True` an `ExceptionGroup` will be raised containing any
1051
- exceptions raised by the individual transfers. Else a dict
1052
- reporting the status of each `ResourcePath` will be returned.
1051
+ exceptions raised by the individual transfers. If `False`, or if
1052
+ there were no exceptions, a dict reporting the status of each
1053
+ `ResourcePath` will be returned.
1053
1054
 
1054
1055
  Returns
1055
1056
  -------
1056
- copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
1057
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1057
1058
  A dict of all the transfer attempts with a value indicating
1058
- whether the transfer succeeded for the target URI.
1059
+ whether the transfer succeeded for the target URI. If ``do_raise``
1060
+ is `True`, this will only be returned if there are no errors.
1059
1061
  """
1060
1062
  pool_executor_class = _get_executor_class()
1061
1063
  if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
@@ -1088,7 +1090,7 @@ class ResourcePath: # numpydoc ignore=PR02
1088
1090
  overwrite: bool = False,
1089
1091
  transaction: TransactionProtocol | None = None,
1090
1092
  do_raise: bool = True,
1091
- ) -> dict[ResourcePath, MTransferResult]:
1093
+ ) -> dict[ResourcePath, MBulkResult]:
1092
1094
  """Transfer many files in bulk.
1093
1095
 
1094
1096
  Parameters
@@ -1112,7 +1114,7 @@ class ResourcePath: # numpydoc ignore=PR02
1112
1114
 
1113
1115
  Returns
1114
1116
  -------
1115
- copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
1117
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1116
1118
  A dict of all the transfer attempts with a value indicating
1117
1119
  whether the transfer succeeded for the target URI.
1118
1120
  """
@@ -1128,17 +1130,17 @@ class ResourcePath: # numpydoc ignore=PR02
1128
1130
  ): to_uri
1129
1131
  for from_uri, to_uri in from_to
1130
1132
  }
1131
- results: dict[ResourcePath, MTransferResult] = {}
1133
+ results: dict[ResourcePath, MBulkResult] = {}
1132
1134
  failed = False
1133
1135
  for future in concurrent.futures.as_completed(future_transfers):
1134
1136
  to_uri = future_transfers[future]
1135
1137
  try:
1136
1138
  future.result()
1137
1139
  except Exception as e:
1138
- transferred = MTransferResult(False, e)
1140
+ transferred = MBulkResult(False, e)
1139
1141
  failed = True
1140
1142
  else:
1141
- transferred = MTransferResult(True, None)
1143
+ transferred = MBulkResult(True, None)
1142
1144
  results[to_uri] = transferred
1143
1145
 
1144
1146
  if do_raise and failed:
@@ -1153,6 +1155,81 @@ class ResourcePath: # numpydoc ignore=PR02
1153
1155
  """Remove the resource."""
1154
1156
  raise NotImplementedError()
1155
1157
 
1158
+ @classmethod
1159
+ def mremove(
1160
+ cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
1161
+ ) -> dict[ResourcePath, MBulkResult]:
1162
+ """Remove multiple URIs at once.
1163
+
1164
+ Parameters
1165
+ ----------
1166
+ uris : iterable of `ResourcePath`
1167
+ URIs to remove.
1168
+ do_raise : `bool`, optional
1169
+ If `True` an `ExceptionGroup` will be raised containing any
1170
+ exceptions raised by the individual transfers. If `False`, or if
1171
+ there were no exceptions, a dict reporting the status of each
1172
+ `ResourcePath` will be returned.
1173
+
1174
+ Returns
1175
+ -------
1176
+ results : `dict` [ `ResourcePath`, `MBulkResult` ]
1177
+ Dictionary mapping each URI to a result object indicating whether
1178
+ the removal succeeded or resulted in an exception. If ``do_raise``
1179
+ is `True` this will only be returned if everything succeeded.
1180
+ """
1181
+ # Group URIs by scheme since some URI schemes support native bulk
1182
+ # APIs.
1183
+ results: dict[ResourcePath, MBulkResult] = {}
1184
+ for uri_class, group in cls._group_uris(uris).items():
1185
+ results.update(uri_class._mremove(group))
1186
+ if do_raise:
1187
+ failed = any(not r.success for r in results.values())
1188
+ if failed:
1189
+ s = "s" if len(results) != 1 else ""
1190
+ raise ExceptionGroup(
1191
+ f"Error{s} removing {len(results)} artifact{s}",
1192
+ tuple(res.exception for res in results.values() if res.exception is not None),
1193
+ )
1194
+
1195
+ return results
1196
+
1197
+ @classmethod
1198
+ def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
1199
+ """Remove multiple URIs using futures."""
1200
+ pool_executor_class = _get_executor_class()
1201
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
1202
+ # Patch the environment to make it think there is only one worker
1203
+ # for each subprocess.
1204
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
1205
+ return cls._mremove_pool(pool_executor_class, uris)
1206
+ else:
1207
+ return cls._mremove_pool(pool_executor_class, uris)
1208
+
1209
+ @classmethod
1210
+ def _mremove_pool(
1211
+ cls,
1212
+ pool_executor_class: _EXECUTOR_TYPE,
1213
+ uris: Iterable[ResourcePath],
1214
+ *,
1215
+ num_workers: int | None = None,
1216
+ ) -> dict[ResourcePath, MBulkResult]:
1217
+ """Remove URIs using a futures pool."""
1218
+ max_workers = num_workers if num_workers is not None else _get_num_workers()
1219
+ results: dict[ResourcePath, MBulkResult] = {}
1220
+ with pool_executor_class(max_workers=max_workers) as remove_executor:
1221
+ future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
1222
+ for future in concurrent.futures.as_completed(future_remove):
1223
+ try:
1224
+ future.result()
1225
+ except Exception as e:
1226
+ removed = MBulkResult(False, e)
1227
+ else:
1228
+ removed = MBulkResult(True, None)
1229
+ uri = future_remove[future]
1230
+ results[uri] = removed
1231
+ return results
1232
+
1156
1233
  def isabs(self) -> bool:
1157
1234
  """Indicate that the resource is fully specified.
1158
1235