PyPI - lsst-resources - Versions diffs - 29.2025.1800__py3-none-any.whl → 29.2025.2000__py3-none-any.whl - Mend

lsst-resources 29.2025.1800py3-none-any.whl → 29.2025.2000py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

lsst/resources/_resourceHandles/_davResourceHandle.py ADDED Viewed

@@ -0,0 +1,190 @@
+# This file is part of lsst-resources.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# Use of this source code is governed by a 3-clause BSD-style
+# license that can be found in the LICENSE file.
+from __future__ import annotations
+__all__ = ("DavReadResourceHandle",)
+import io
+import logging
+from collections.abc import Callable, Iterable
+from typing import TYPE_CHECKING, AnyStr
+from ..davutils import DavFileMetadata
+from ._baseResourceHandle import BaseResourceHandle, CloseStatus
+if TYPE_CHECKING:
+    from ..dav import DavResourcePath
+class DavReadResourceHandle(BaseResourceHandle[bytes]):
+    """WebDAV-based specialization of `.BaseResourceHandle`.
+    Parameters
+    ----------
+    mode : `str`
+        Handle modes as described in the python `io` module.
+    log : `~logging.Logger`
+        Logger to used when writing messages.
+    uri : `lsst.resources.dav.DavResourcePath`
+        URI of remote resource.
+    newline : `str` or `None`, optional
+        When doing multiline operations, break the stream on given character.
+        Defaults to newline. If a file is opened in binary mode, this argument
+        is not used, as binary files will only split lines on the binary
+        newline representation.
+    """
+    def __init__(
+        self,
+        mode: str,
+        log: logging.Logger,
+        uri: DavResourcePath,
+        stat: DavFileMetadata,
+        *,
+        newline: AnyStr | None = None,
+    ) -> None:
+        super().__init__(mode, log, uri, newline=newline)
+        self._uri: DavResourcePath = uri
+        self._stat: DavFileMetadata = stat
+        self._current_position = 0
+        self._cache: io.BytesIO | None = None
+        self._buffer: io.BytesIO | None = None
+        self._closed = CloseStatus.OPEN
+    def close(self) -> None:
+        self._closed = CloseStatus.CLOSED
+        self._cache = None
+    @property
+    def closed(self) -> bool:
+        return self._closed == CloseStatus.CLOSED
+    def fileno(self) -> int:
+        raise io.UnsupportedOperation("DavReadResourceHandle does not have a file number")
+    def flush(self) -> None:
+        modes = set(self._mode)
+        if {"w", "x", "a", "+"} & modes:
+            raise io.UnsupportedOperation("DavReadResourceHandles are read only")
+    @property
+    def isatty(self) -> bool | Callable[[], bool]:
+        return False
+    def readable(self) -> bool:
+        return True
+    def readline(self, size: int = -1) -> bytes:
+        raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
+    def readlines(self, size: int = -1) -> Iterable[bytes]:
+        raise io.UnsupportedOperation("DavReadResourceHandles Do not support line by line reading")
+    def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
+        match whence:
+            case io.SEEK_SET:
+                if offset < 0:
+                    raise ValueError(f"negative seek value {offset}")
+                self._current_position = offset
+            case io.SEEK_CUR:
+                self._current_position += offset
+            case io.SEEK_END:
+                self._current_position = self._stat.size + offset
+            case _:
+                raise ValueError(f"unexpected value {whence} for whence in seek()")
+        if self._current_position < 0:
+            self._current_position = 0
+        return self._current_position
+    def seekable(self) -> bool:
+        return True
+    def tell(self) -> int:
+        return self._current_position
+    def truncate(self, size: int | None = None) -> int:
+        raise io.UnsupportedOperation("DavReadResourceHandles Do not support truncation")
+    def writable(self) -> bool:
+        return False
+    def write(self, b: bytes, /) -> int:
+        raise io.UnsupportedOperation("DavReadResourceHandles are read only")
+    def writelines(self, b: Iterable[bytes], /) -> None:
+        raise io.UnsupportedOperation("DavReadResourceHandles are read only")
+    @property
+    def _eof(self) -> bool:
+        return self._current_position >= self._stat.size
+    def _download_to_cache(self) -> io.BytesIO:
+        """Download the entire content of the remote resource to an internal
+        memory buffer.
+        """
+        if self._cache is None:
+            self._cache = io.BytesIO()
+            self._cache.write(self._uri.read())
+        return self._cache
+    def read(self, size: int = -1) -> bytes:
+        if self._eof or size == 0:
+            return b""
+        # If this file's size is small than the buffer size configured for
+        # this URI's client, download the entire file in one request and cache
+        # its content. This avoids multiple roundtrips to the server
+        # for retrieving small chunks.
+        if self._stat.size <= self._uri._client._config.buffer_size:
+            self._download_to_cache()
+        # If we are asked to read the whole file content, cache the entire
+        # file content and return a copy-on-write memory view of our internal
+        # cache.
+        if self._current_position == 0 and size == -1:
+            cache = self._download_to_cache()
+            self._current_position = self._stat.size
+            return cache.getvalue()
+        # This is a partial read. If we have already cached the whole file
+        # content use the cache to build the return value.
+        if self._cache is not None:
+            start = self._current_position
+            end = self._current_position = self._stat.size if size < 0 else start + size
+            return self._cache.getvalue()[start:end]
+        # We need to make a partial read from the server. Reuse our internal
+        # I/O buffer to reduce memory allocations.
+        if self._buffer is None:
+            self._buffer = io.BytesIO()
+        start = self._current_position
+        end = self._stat.size if size < 0 else min(start + size, self._stat.size)
+        self._buffer.seek(0)
+        self._buffer.write(self._uri.read_range(start=start, end=end - 1))
+        count = self._buffer.tell()
+        self._current_position += count
+        return self._buffer.getvalue()[0:count]
+    def readinto(self, output: bytearray) -> int:
+        """Read up to `len(output)` bytes into `output` and return the number
+        of bytes read.
+        """
+        if self._eof or len(output) == 0:
+            return 0
+        data = self.read(len(output))
+        output[:] = data
+        return len(data)

lsst/resources/_resourcePath.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 import posixpath
 import re
 import urllib.parse
+from collections import defaultdict
 from pathlib import Path, PurePath, PurePosixPath
 from random import Random
 from typing import TypeAlias
@@ -53,8 +54,8 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
 ESCAPED_HASH = urllib.parse.quote("#")
-class MTransferResult(NamedTuple):
-    """Report on a bulk transfer."""
+class MBulkResult(NamedTuple):
+    """Report on a bulk operation."""
     success: bool
     exception: Exception | None
@@ -367,9 +368,9 @@ class ResourcePath:  # numpydoc ignore=PR02
                 subclass = HttpResourcePath
             elif parsed.scheme in {"dav", "davs"}:
-                from .http import HttpResourcePath
+                from .dav import DavResourcePath
-                subclass = HttpResourcePath
+                subclass = DavResourcePath
             elif parsed.scheme == "gs":
                 from .gs import GSResourcePath
@@ -912,6 +913,14 @@ class ResourcePath:  # numpydoc ignore=PR02
         """
         raise NotImplementedError()
+    @classmethod
+    def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
+        """Group URIs by class/scheme."""
+        grouped: dict[type, list[ResourcePath]] = defaultdict(list)
+        for uri in uris:
+            grouped[uri.__class__].append(uri)
+        return grouped
     @classmethod
     def mexists(
         cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
@@ -933,18 +942,9 @@ class ResourcePath:  # numpydoc ignore=PR02
         existence : `dict` of [`ResourcePath`, `bool`]
             Mapping of original URI to boolean indicating existence.
         """
-        # Group by scheme to allow a subclass to be able to use
-        # specialized implementations.
-        grouped: dict[type, list[ResourcePath]] = {}
-        for uri in uris:
-            uri_class = uri.__class__
-            if uri_class not in grouped:
-                grouped[uri_class] = []
-            grouped[uri_class].append(uri)
         existence: dict[ResourcePath, bool] = {}
-        for uri_class in grouped:
-            existence.update(uri_class._mexists(grouped[uri_class], num_workers=num_workers))
+        for uri_class, group in cls._group_uris(uris).items():
+            existence.update(uri_class._mexists(group, num_workers=num_workers))
         return existence
@@ -1029,7 +1029,7 @@ class ResourcePath:  # numpydoc ignore=PR02
         overwrite: bool = False,
         transaction: TransactionProtocol | None = None,
         do_raise: bool = True,
-    ) -> dict[ResourcePath, MTransferResult]:
+    ) -> dict[ResourcePath, MBulkResult]:
         """Transfer many files in bulk.
         Parameters
@@ -1048,14 +1048,16 @@ class ResourcePath:  # numpydoc ignore=PR02
             The transaction object must be thread safe.
         do_raise : `bool`, optional
             If `True` an `ExceptionGroup` will be raised containing any
-            exceptions raised by the individual transfers. Else a dict
-            reporting the status of each `ResourcePath` will be returned.
+            exceptions raised by the individual transfers. If `False`, or if
+            there were no exceptions, a dict reporting the status of each
+            `ResourcePath` will be returned.
         Returns
         -------
-        copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
+        copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
             A dict of all the transfer attempts with a value indicating
-            whether the transfer succeeded for the target URI.
+            whether the transfer succeeded for the target URI. If ``do_raise``
+            is `True`, this will only be returned if there are no errors.
         """
         pool_executor_class = _get_executor_class()
         if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
@@ -1088,7 +1090,7 @@ class ResourcePath:  # numpydoc ignore=PR02
         overwrite: bool = False,
         transaction: TransactionProtocol | None = None,
         do_raise: bool = True,
-    ) -> dict[ResourcePath, MTransferResult]:
+    ) -> dict[ResourcePath, MBulkResult]:
         """Transfer many files in bulk.
         Parameters
@@ -1112,7 +1114,7 @@ class ResourcePath:  # numpydoc ignore=PR02
         Returns
         -------
-        copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
+        copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
             A dict of all the transfer attempts with a value indicating
             whether the transfer succeeded for the target URI.
         """
@@ -1128,17 +1130,17 @@ class ResourcePath:  # numpydoc ignore=PR02
                 ): to_uri
                 for from_uri, to_uri in from_to
             }
-            results: dict[ResourcePath, MTransferResult] = {}
+            results: dict[ResourcePath, MBulkResult] = {}
             failed = False
             for future in concurrent.futures.as_completed(future_transfers):
                 to_uri = future_transfers[future]
                 try:
                     future.result()
                 except Exception as e:
-                    transferred = MTransferResult(False, e)
+                    transferred = MBulkResult(False, e)
                     failed = True
                 else:
-                    transferred = MTransferResult(True, None)
+                    transferred = MBulkResult(True, None)
                 results[to_uri] = transferred
         if do_raise and failed:
@@ -1153,6 +1155,81 @@ class ResourcePath:  # numpydoc ignore=PR02
         """Remove the resource."""
         raise NotImplementedError()
+    @classmethod
+    def mremove(
+        cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
+    ) -> dict[ResourcePath, MBulkResult]:
+        """Remove multiple URIs at once.
+        Parameters
+        ----------
+        uris : iterable of `ResourcePath`
+            URIs to remove.
+        do_raise : `bool`, optional
+            If `True` an `ExceptionGroup` will be raised containing any
+            exceptions raised by the individual transfers. If `False`, or if
+            there were no exceptions, a dict reporting the status of each
+            `ResourcePath` will be returned.
+        Returns
+        -------
+        results : `dict` [ `ResourcePath`, `MBulkResult` ]
+            Dictionary mapping each URI to a result object indicating whether
+            the removal succeeded or resulted in an exception. If ``do_raise``
+            is `True` this will only be returned if everything succeeded.
+        """
+        # Group URIs by scheme since some URI schemes support native bulk
+        # APIs.
+        results: dict[ResourcePath, MBulkResult] = {}
+        for uri_class, group in cls._group_uris(uris).items():
+            results.update(uri_class._mremove(group))
+        if do_raise:
+            failed = any(not r.success for r in results.values())
+            if failed:
+                s = "s" if len(results) != 1 else ""
+                raise ExceptionGroup(
+                    f"Error{s} removing {len(results)} artifact{s}",
+                    tuple(res.exception for res in results.values() if res.exception is not None),
+                )
+        return results
+    @classmethod
+    def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
+        """Remove multiple URIs using futures."""
+        pool_executor_class = _get_executor_class()
+        if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
+            # Patch the environment to make it think there is only one worker
+            # for each subprocess.
+            with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
+                return cls._mremove_pool(pool_executor_class, uris)
+        else:
+            return cls._mremove_pool(pool_executor_class, uris)
+    @classmethod
+    def _mremove_pool(
+        cls,
+        pool_executor_class: _EXECUTOR_TYPE,
+        uris: Iterable[ResourcePath],
+        *,
+        num_workers: int | None = None,
+    ) -> dict[ResourcePath, MBulkResult]:
+        """Remove URIs using a futures pool."""
+        max_workers = num_workers if num_workers is not None else _get_num_workers()
+        results: dict[ResourcePath, MBulkResult] = {}
+        with pool_executor_class(max_workers=max_workers) as remove_executor:
+            future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
+            for future in concurrent.futures.as_completed(future_remove):
+                try:
+                    future.result()
+                except Exception as e:
+                    removed = MBulkResult(False, e)
+                else:
+                    removed = MBulkResult(True, None)
+                uri = future_remove[future]
+                results[uri] = removed
+        return results
     def isabs(self) -> bool:
         """Indicate that the resource is fully specified.

lsst-resources 29.2025.1800__py3-none-any.whl → 29.2025.2000__py3-none-any.whl

lsst-resources 29.2025.1800py3-none-any.whl → 29.2025.2000py3-none-any.whl