lsst-resources 29.2025.2100__tar.gz → 29.2025.2500__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {lsst_resources-29.2025.2100/python/lsst_resources.egg-info → lsst_resources-29.2025.2500}/PKG-INFO +1 -1
  2. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/doc/lsst.resources/CHANGES.rst +31 -0
  3. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
  4. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
  5. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourcePath.py +17 -20
  6. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/dav.py +25 -9
  7. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/file.py +7 -6
  8. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/gs.py +6 -3
  9. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/http.py +173 -30
  10. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/mem.py +7 -1
  11. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/s3.py +14 -12
  12. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/tests.py +13 -3
  13. lsst_resources-29.2025.2500/python/lsst/resources/version.py +2 -0
  14. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500/python/lsst_resources.egg-info}/PKG-INFO +1 -1
  15. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_dav.py +6 -6
  16. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_http.py +41 -6
  17. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_location.py +5 -5
  18. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_s3.py +11 -0
  19. lsst_resources-29.2025.2100/python/lsst/resources/version.py +0 -2
  20. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/COPYRIGHT +0 -0
  21. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/LICENSE +0 -0
  22. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/MANIFEST.in +0 -0
  23. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/README.md +0 -0
  24. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/doc/lsst.resources/dav.rst +0 -0
  25. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/doc/lsst.resources/index.rst +0 -0
  26. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/doc/lsst.resources/internal-api.rst +0 -0
  27. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/doc/lsst.resources/s3.rst +0 -0
  28. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/pyproject.toml +0 -0
  29. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/__init__.py +0 -0
  30. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/__init__.py +0 -0
  31. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/__init__.py +0 -0
  32. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/_baseResourceHandle.py +0 -0
  33. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/_davResourceHandle.py +0 -0
  34. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/_resourceHandles/_httpResourceHandle.py +0 -0
  35. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/davutils.py +0 -0
  36. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/location.py +0 -0
  37. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/packageresource.py +0 -0
  38. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/py.typed +0 -0
  39. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/s3utils.py +0 -0
  40. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/schemeless.py +0 -0
  41. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst/resources/utils.py +0 -0
  42. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst_resources.egg-info/SOURCES.txt +0 -0
  43. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst_resources.egg-info/dependency_links.txt +0 -0
  44. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst_resources.egg-info/requires.txt +0 -0
  45. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst_resources.egg-info/top_level.txt +0 -0
  46. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/python/lsst_resources.egg-info/zip-safe +0 -0
  47. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/setup.cfg +0 -0
  48. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_file.py +0 -0
  49. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_gs.py +0 -0
  50. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_mem.py +0 -0
  51. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_resource.py +0 -0
  52. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_s3utils.py +0 -0
  53. {lsst_resources-29.2025.2100 → lsst_resources-29.2025.2500}/tests/test_schemeless.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-resources
3
- Version: 29.2025.2100
3
+ Version: 29.2025.2500
4
4
  Summary: An abstraction layer for reading and writing from URI file resources.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -1,3 +1,34 @@
1
+ Resources v29.1.0 (2025-06-13)
2
+ ==============================
3
+
4
+ Miscellaneous Changes of Minor Interest
5
+ ---------------------------------------
6
+
7
+ New Features
8
+ ------------
9
+
10
+ - * Added ``ResourcePath.mtransfer()`` for doing multiple transfers in parallel.
11
+ The number of workers can be controlled using the ``$LSST_RESOURCES_NUM_WORKERS`` environment variable.
12
+ * ``transfer_from`` and ``as_local`` now have an additional parameter that can control whether implicit multithreading should be used for a single download.
13
+ * ``as_local`` has a new parameter that can be used to explicitly specify the local download location.
14
+ This can be used for ``transfer_from`` to allow the file to be downloaded to the local destination directory immediately. (`DM-31824 <https://rubinobs.atlassian.net/browse/DM-31824>`_)
15
+ - Added specialized support for schemes ``davs://`` and ``dav://`` hosted by storage endpoints implementing WebDAV protocol as described in `RFC-4918 HTTP Extensions for Web Distributed Authoring and Versioning (WebDAV) <http://www.webdav.org/specs/rfc4918.html>`_. (`DM-49784 <https://rubinobs.atlassian.net/browse/DM-49784>`_)
16
+ - Added new bulk removal API: ``ResourcePath.mremove()``.
17
+ This can be 10 times faster than calling ``remove()`` in a loop. (`DM-50724 <https://rubinobs.atlassian.net/browse/DM-50724>`_)
18
+
19
+
20
+ Miscellaneous Changes of Minor Interest
21
+ ---------------------------------------
22
+
23
+ - It is now possible to control how bulk APIs such as ``mexists()`` and ``mtransfer()`` work.
24
+ Added ``$LSST_RESOURCES_NUM_WORKERS`` environment variable to specify how many workers should be used.
25
+ The default is derived from the number of CPUs but capped at 10.
26
+ Also the ``mexists()`` method has an explicit parameter to allow the number of workers to be specified.
27
+ Added ``$LSST_RESOURCES_EXECUTOR`` to specify how the jobs should be executed.
28
+ The default is ``threads`` (which is the same as used previously) but on Linux more performance may be achievable by setting this environment variable to ``process``. (`DM-50074 <https://rubinobs.atlassian.net/browse/DM-50074>`_)
29
+ - * Fixed problem with multiple ``flush()`` calls with S3 resource handle for small chunks.
30
+ * Fixed bug in File resource handle where ``flush()`` was mistakenly calling ``close()``. (`DM-51087 <https://rubinobs.atlassian.net/browse/DM-51087>`_)
31
+
1
32
  Resources v29.0.0 (2025-03-25)
2
33
  ==============================
3
34
 
@@ -79,7 +79,7 @@ class FileResourceHandle(BaseResourceHandle[U]):
79
79
  return self._fileHandle.fileno()
80
80
 
81
81
  def flush(self) -> None:
82
- self._fileHandle.close()
82
+ self._fileHandle.flush()
83
83
 
84
84
  @property
85
85
  def isatty(self) -> bool:
@@ -14,14 +14,12 @@ from __future__ import annotations
14
14
  __all__ = ("S3ResourceHandle",)
15
15
 
16
16
  import logging
17
- import warnings
18
17
  from collections.abc import Iterable, Mapping
19
18
  from io import SEEK_CUR, SEEK_END, SEEK_SET, BytesIO, UnsupportedOperation
20
19
  from typing import TYPE_CHECKING
21
20
 
22
21
  from botocore.exceptions import ClientError
23
22
 
24
- from lsst.utils.introspection import find_outside_stacklevel
25
23
  from lsst.utils.timer import time_this
26
24
 
27
25
  from ..s3utils import all_retryable_errors, backoff, max_retry_time, translate_client_error
@@ -168,21 +166,9 @@ class S3ResourceHandle(BaseResourceHandle[bytes]):
168
166
  # written to.
169
167
  s3_min_bits = 5 * 1024 * 1024 # S3 flush threshold is 5 Mib.
170
168
  if (
171
- (self.tell() - (self._last_flush_position or 0)) < s3_min_bits
172
- and self._closed != CloseStatus.CLOSING
173
- and not self._warned
174
- ):
175
- amount = s3_min_bits / (1024 * 1024)
176
- warnings.warn(
177
- f"S3 does not support flushing objects less than {amount} Mib, skipping",
178
- stacklevel=find_outside_stacklevel(
179
- "lsst.resources",
180
- "backoff",
181
- "contextlib",
182
- allow_modules={"lsst.resources.tests"},
183
- ),
184
- )
185
- self._warned = True
169
+ self.tell() - (self._last_flush_position or 0)
170
+ ) < s3_min_bits and self._closed != CloseStatus.CLOSING:
171
+ # Return until the buffer is big enough.
186
172
  return
187
173
  # nothing to write, don't create an empty upload
188
174
  if self.tell() == 0:
@@ -649,9 +649,11 @@ class ResourcePath: # numpydoc ignore=PR02
649
649
  # Disallow a change in scheme
650
650
  if "scheme" in kwargs:
651
651
  raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
652
- return self.__class__(
652
+ result = self.__class__(
653
653
  self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
654
654
  )
655
+ result._copy_extra_attributes(self)
656
+ return result
655
657
 
656
658
  def updatedFile(self, newfile: str) -> ResourcePath:
657
659
  """Return new URI with an updated final component of the path.
@@ -1253,7 +1255,10 @@ class ResourcePath: # numpydoc ignore=PR02
1253
1255
  """
1254
1256
  return self
1255
1257
 
1256
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
1258
+ @contextlib.contextmanager
1259
+ def _as_local(
1260
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
1261
+ ) -> Iterator[ResourcePath]:
1257
1262
  """Return the location of the (possibly remote) resource as local file.
1258
1263
 
1259
1264
  This is a helper function for `as_local` context manager.
@@ -1272,13 +1277,9 @@ class ResourcePath: # numpydoc ignore=PR02
1272
1277
 
1273
1278
  Returns
1274
1279
  -------
1275
- path : `str`
1276
- If this is a remote resource, it will be a copy of the resource
1277
- on the local file system, probably in a temporary directory.
1278
- For a local resource this should be the actual path to the
1279
- resource.
1280
- is_temporary : `bool`
1281
- Indicates if the local path is a temporary file or not.
1280
+ local_uri : `ResourcePath`
1281
+ A URI to a local POSIX file. This can either be the same resource
1282
+ or a local downloaded copy of the resource.
1282
1283
  """
1283
1284
  raise NotImplementedError()
1284
1285
 
@@ -1328,18 +1329,8 @@ class ResourcePath: # numpydoc ignore=PR02
1328
1329
  temp_dir = ResourcePath(tmpdir, forceDirectory=True) if tmpdir is not None else None
1329
1330
  if temp_dir is not None and not temp_dir.isLocal:
1330
1331
  raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
1331
- local_src, is_temporary = self._as_local(multithreaded=multithreaded, tmpdir=temp_dir)
1332
- local_uri = ResourcePath(local_src, isTemporary=is_temporary)
1333
-
1334
- try:
1332
+ with self._as_local(multithreaded=multithreaded, tmpdir=temp_dir) as local_uri:
1335
1333
  yield local_uri
1336
- finally:
1337
- # The caller might have relocated the temporary file.
1338
- # Do not ever delete if the temporary matches self
1339
- # (since it may have been that a temporary file was made local
1340
- # but already was local).
1341
- if self != local_uri and is_temporary and local_uri.exists():
1342
- local_uri.remove()
1343
1334
 
1344
1335
  @classmethod
1345
1336
  @contextlib.contextmanager
@@ -1903,6 +1894,12 @@ class ResourcePath: # numpydoc ignore=PR02
1903
1894
  """
1904
1895
  raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
1905
1896
 
1897
+ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
1898
+ # May be overridden by subclasses to transfer attributes when a
1899
+ # ResourcePath is constructed using the "clone" version of the
1900
+ # ResourcePath constructor by passing in a ResourcePath object.
1901
+ pass
1902
+
1906
1903
 
1907
1904
  ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1908
1905
  """Type-annotation alias for objects that can be coerced to ResourcePath.
@@ -172,7 +172,21 @@ dav_globals: DavGlobals = DavGlobals()
172
172
 
173
173
 
174
174
  class DavResourcePath(ResourcePath):
175
- """WebDAV resource."""
175
+ """WebDAV resource.
176
+
177
+ Parameters
178
+ ----------
179
+ uri : `ResourcePathExpression`
180
+ URI to store in object.
181
+ root : `str` or `ResourcePath` or `None`, optional
182
+ Root for relative URIs. Not used in this constructor.
183
+ forceAbsolute : `bool`
184
+ Whether to force absolute URI. A WebDAV URI is always absolute.
185
+ forceDirectory : `bool` or `None`, optional
186
+ Whether this URI represents a directory.
187
+ isTemporary : `bool` or `None`, optional
188
+ Whether this URI represents a temporary resource.
189
+ """
176
190
 
177
191
  def __init__(
178
192
  self,
@@ -382,7 +396,10 @@ class DavResourcePath(ResourcePath):
382
396
  headers.update({"Accept-Encoding": "identity"})
383
397
  return self._client.read_range(self._internal_url, start=start, end=end, headers=headers)
384
398
 
385
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
399
+ @contextlib.contextmanager
400
+ def _as_local(
401
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
402
+ ) -> Iterator[ResourcePath]:
386
403
  """Download object and place in temporary directory.
387
404
 
388
405
  Parameters
@@ -399,10 +416,9 @@ class DavResourcePath(ResourcePath):
399
416
 
400
417
  Returns
401
418
  -------
402
- path : `str`
403
- Path to local temporary file.
404
- temporary : `bool`
405
- Always returns `True`. This is always a temporary file.
419
+ local_uri : `ResourcePath`
420
+ A URI to a local POSIX file corresponding to a local temporary
421
+ downloaded copy of the resource.
406
422
  """
407
423
  # We need to ensure that this resource is actually a file. dCache
408
424
  # responds with a HTML-formatted content to a HTTP GET request to a
@@ -417,9 +433,9 @@ class DavResourcePath(ResourcePath):
417
433
  else:
418
434
  buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
419
435
 
420
- with ResourcePath.temporary_uri(suffix=self.getExtension(), prefix=tmpdir, delete=False) as tmp_uri:
436
+ with ResourcePath.temporary_uri(suffix=self.getExtension(), prefix=tmpdir, delete=True) as tmp_uri:
421
437
  self._client.download(self._internal_url, tmp_uri.ospath, buffer_size)
422
- return tmp_uri.ospath, True
438
+ yield tmp_uri
423
439
 
424
440
  def write(self, data: BinaryIO | bytes, overwrite: bool = True) -> None:
425
441
  """Write the supplied bytes to the new resource.
@@ -470,7 +486,7 @@ class DavResourcePath(ResourcePath):
470
486
 
471
487
  Parameters
472
488
  ----------
473
- recursive: `bool`
489
+ recursive : `bool`
474
490
  If `True` recursively remove all files and directories under this
475
491
  directory.
476
492
 
@@ -79,7 +79,10 @@ class FileResourcePath(ResourcePath):
79
79
  """Remove the resource."""
80
80
  os.remove(self.ospath)
81
81
 
82
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
82
+ @contextlib.contextmanager
83
+ def _as_local(
84
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
85
+ ) -> Iterator[ResourcePath]:
83
86
  """Return the local path of the file.
84
87
 
85
88
  This is an internal helper for ``as_local()``.
@@ -93,12 +96,10 @@ class FileResourcePath(ResourcePath):
93
96
 
94
97
  Returns
95
98
  -------
96
- path : `str`
97
- The local path to this file.
98
- temporary : `bool`
99
- Always returns the temporary nature of the input file resource.
99
+ local_uri : `ResourcePath`
100
+ A local URI. In this case it will be itself.
100
101
  """
101
- return self.ospath, self.isTemporary
102
+ yield self
102
103
 
103
104
  def read(self, size: int = -1) -> bytes:
104
105
  with open(self.ospath, "rb") as fh:
@@ -202,17 +202,20 @@ class GSResourcePath(ResourcePath):
202
202
  # Should this method do anything at all?
203
203
  self.blob.upload_from_string(b"", retry=_RETRY_POLICY)
204
204
 
205
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
205
+ @contextlib.contextmanager
206
+ def _as_local(
207
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
208
+ ) -> Iterator[ResourcePath]:
206
209
  with (
207
- ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=False) as tmp_uri,
210
+ ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=True) as tmp_uri,
208
211
  time_this(log, msg="Downloading %s to local file", args=(self,)),
209
212
  ):
210
213
  try:
211
214
  with tmp_uri.open("wb") as tmpFile:
212
215
  self.blob.download_to_file(tmpFile, retry=_RETRY_POLICY)
216
+ yield tmp_uri
213
217
  except NotFound as e:
214
218
  raise FileNotFoundError(f"No such resource: {self}") from e
215
- return tmp_uri.ospath, True
216
219
 
217
220
  def transfer_from(
218
221
  self,
@@ -759,6 +759,42 @@ class HttpResourcePath(ResourcePath):
759
759
  a HTTP URL. The value of the variable is not inspected.
760
760
  """
761
761
 
762
+ @staticmethod
763
+ def create_http_resource_path(
764
+ path: str, *, extra_headers: dict[str, str] | None = None
765
+ ) -> HttpResourcePath:
766
+ """Create an instance of `HttpResourcePath` with additional
767
+ HTTP-specific configuration.
768
+
769
+ Parameters
770
+ ----------
771
+ path : `str`
772
+ HTTP URL to be wrapped in a `ResourcePath` instance.
773
+ extra_headers : `dict` [ `str`, `str` ], optional
774
+ Additional headers that will be sent with every HTTP request made
775
+ by this `ResourcePath`. These override any headers that may be
776
+ generated internally by `HttpResourcePath` (e.g. authentication
777
+ headers).
778
+
779
+ Return
780
+ ------
781
+ instance : `ResourcePath`
782
+ Newly-created `HttpResourcePath` instance.
783
+
784
+ Notes
785
+ -----
786
+ Most users should use the `ResourcePath` constructor, instead.
787
+ """
788
+ # Make sure we instantiate ResourcePath using a string to guarantee we
789
+ # get a new ResourcePath. If we accidentally provided a ResourcePath
790
+ # instance instead, the ResourcePath constructor sometimes returns
791
+ # the original object and we would be modifying an object that is
792
+ # supposed to be immutable.
793
+ instance = ResourcePath(str(path))
794
+ assert isinstance(instance, HttpResourcePath)
795
+ instance._extra_headers = extra_headers
796
+ return instance
797
+
762
798
  # WebDAV servers known to be able to sign URLs. The values are lowercased
763
799
  # server identifiers retrieved from the 'Server' header included in
764
800
  # the response to a HTTP OPTIONS request.
@@ -805,39 +841,48 @@ class HttpResourcePath(ResourcePath):
805
841
  # and is shared by all instances of this class.
806
842
  _tcp_connector: TCPConnector | None = None
807
843
 
844
+ # Additional headers added to every request.
845
+ _extra_headers: dict[str, str] | None = None
846
+
808
847
  @property
809
- def metadata_session(self) -> requests.Session:
848
+ def metadata_session(self) -> _SessionWrapper:
810
849
  """Client session to send requests which do not require upload or
811
850
  download of data, i.e. mostly metadata requests.
812
851
  """
852
+ session = None
813
853
  if hasattr(self, "_metadata_session"):
814
854
  if HttpResourcePath._pid == os.getpid():
815
- return self._metadata_session
855
+ session = self._metadata_session
816
856
  else:
817
857
  # The metadata session we have in cache was likely created by
818
858
  # a parent process. Discard all the sessions in that store.
819
859
  self._metadata_session_store.clear()
820
860
 
821
861
  # Retrieve a new metadata session.
822
- HttpResourcePath._pid = os.getpid()
823
- self._metadata_session: requests.Session = self._metadata_session_store.get(self)
824
- return self._metadata_session
862
+ if session is None:
863
+ HttpResourcePath._pid = os.getpid()
864
+ session = self._metadata_session_store.get(self)
865
+ self._metadata_session: requests.Session = session
866
+ return _SessionWrapper(session, extra_headers=self._extra_headers)
825
867
 
826
868
  @property
827
- def data_session(self) -> requests.Session:
869
+ def data_session(self) -> _SessionWrapper:
828
870
  """Client session for uploading and downloading data."""
871
+ session = None
829
872
  if hasattr(self, "_data_session"):
830
873
  if HttpResourcePath._pid == os.getpid():
831
- return self._data_session
874
+ session = self._data_session
832
875
  else:
833
876
  # The data session we have in cache was likely created by
834
877
  # a parent process. Discard all the sessions in that store.
835
878
  self._data_session_store.clear()
836
879
 
837
880
  # Retrieve a new data session.
838
- HttpResourcePath._pid = os.getpid()
839
- self._data_session: requests.Session = self._data_session_store.get(self)
840
- return self._data_session
881
+ if session is None:
882
+ HttpResourcePath._pid = os.getpid()
883
+ session = self._data_session_store.get(self)
884
+ self._data_session: requests.Session = session
885
+ return _SessionWrapper(session, extra_headers=self._extra_headers)
841
886
 
842
887
  def _clear_sessions(self) -> None:
843
888
  """Close the socket connections that are still open.
@@ -1486,7 +1531,10 @@ class HttpResourcePath(ResourcePath):
1486
1531
  except json.JSONDecodeError:
1487
1532
  raise ValueError(f"could not deserialize response to POST request for URL {self}")
1488
1533
 
1489
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
1534
+ @contextlib.contextmanager
1535
+ def _as_local(
1536
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
1537
+ ) -> Iterator[ResourcePath]:
1490
1538
  """Download object over HTTP and place in temporary directory.
1491
1539
 
1492
1540
  Parameters
@@ -1503,10 +1551,9 @@ class HttpResourcePath(ResourcePath):
1503
1551
 
1504
1552
  Returns
1505
1553
  -------
1506
- path : `str`
1507
- Path to local temporary file.
1508
- temporary : `bool`
1509
- Always returns `True`. This is always a temporary file.
1554
+ local_uri : `ResourcePath`
1555
+ A URI to a local POSIX file corresponding to a local temporary
1556
+ downloaded copy of the resource.
1510
1557
  """
1511
1558
  # Use the session as a context manager to ensure that connections
1512
1559
  # to both the front end and back end servers are closed after the
@@ -1525,7 +1572,7 @@ class HttpResourcePath(ResourcePath):
1525
1572
  buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
1526
1573
 
1527
1574
  with ResourcePath.temporary_uri(
1528
- suffix=self.getExtension(), prefix=tmpdir, delete=False
1575
+ suffix=self.getExtension(), prefix=tmpdir, delete=True
1529
1576
  ) as tmp_uri:
1530
1577
  expected_length = int(resp.headers.get("Content-Length", "-1"))
1531
1578
  with time_this(
@@ -1541,20 +1588,20 @@ class HttpResourcePath(ResourcePath):
1541
1588
  tmpFile.write(chunk)
1542
1589
  content_length += len(chunk)
1543
1590
 
1544
- # Check that the expected and actual content lengths match. Perform
1545
- # this check only when the contents of the file was not encoded by
1546
- # the server.
1547
- if (
1548
- "Content-Encoding" not in resp.headers
1549
- and expected_length >= 0
1550
- and expected_length != content_length
1551
- ):
1552
- raise ValueError(
1553
- f"Size of downloaded file does not match value in Content-Length header for {self}: "
1554
- f"expecting {expected_length} and got {content_length} bytes"
1555
- )
1591
+ # Check that the expected and actual content lengths match.
1592
+ # Perform this check only when the contents of the file was not
1593
+ # encoded by the server.
1594
+ if (
1595
+ "Content-Encoding" not in resp.headers
1596
+ and expected_length >= 0
1597
+ and expected_length != content_length
1598
+ ):
1599
+ raise ValueError(
1600
+ f"Size of downloaded file does not match value in Content-Length header for {self}: "
1601
+ f"expecting {expected_length} and got {content_length} bytes"
1602
+ )
1556
1603
 
1557
- return tmpFile.name, True
1604
+ yield tmp_uri
1558
1605
 
1559
1606
  def _send_webdav_request(
1560
1607
  self,
@@ -1562,7 +1609,7 @@ class HttpResourcePath(ResourcePath):
1562
1609
  url: str | None = None,
1563
1610
  headers: dict[str, str] | None = None,
1564
1611
  body: str | None = None,
1565
- session: requests.Session | None = None,
1612
+ session: _SessionWrapper | None = None,
1566
1613
  timeout: tuple[float, float] | None = None,
1567
1614
  ) -> requests.Response:
1568
1615
  """Send a webDAV request and correctly handle redirects.
@@ -1983,6 +2030,10 @@ class HttpResourcePath(ResourcePath):
1983
2030
  with super()._openImpl(mode, encoding=encoding) as http_handle:
1984
2031
  yield http_handle
1985
2032
 
2033
+ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
2034
+ assert isinstance(original_uri, HttpResourcePath)
2035
+ self._extra_headers = original_uri._extra_headers
2036
+
1986
2037
 
1987
2038
  def _dump_response(resp: requests.Response) -> None:
1988
2039
  """Log the contents of a HTTP or webDAV request and its response.
@@ -2193,3 +2244,95 @@ class DavProperty:
2193
2244
  @property
2194
2245
  def href(self) -> str:
2195
2246
  return self._href
2247
+
2248
+
2249
+ class _SessionWrapper(contextlib.AbstractContextManager):
2250
+ """Wraps a `requests.Session` to allow header values to be injected with
2251
+ all requests.
2252
+
2253
+ Notes
2254
+ -----
2255
+ `requests.Session` already has a feature for setting headers globally, but
2256
+ our session objects are global and authorization headers can vary for each
2257
+ HttpResourcePath instance.
2258
+ """
2259
+
2260
+ def __init__(self, session: requests.Session, *, extra_headers: dict[str, str] | None) -> None:
2261
+ self._session = session
2262
+ self._extra_headers = extra_headers
2263
+
2264
+ def __enter__(self) -> _SessionWrapper:
2265
+ self._session.__enter__()
2266
+ return self
2267
+
2268
+ def __exit__(
2269
+ self,
2270
+ exc_type: Any,
2271
+ exc_value: Any,
2272
+ traceback: Any,
2273
+ ) -> None:
2274
+ return self._session.__exit__(exc_type, exc_value, traceback)
2275
+
2276
+ def get(
2277
+ self,
2278
+ url: str,
2279
+ *,
2280
+ timeout: tuple[float, float],
2281
+ allow_redirects: bool = True,
2282
+ stream: bool,
2283
+ headers: dict[str, str] | None = None,
2284
+ ) -> requests.Response:
2285
+ return self._session.get(
2286
+ url,
2287
+ timeout=timeout,
2288
+ allow_redirects=allow_redirects,
2289
+ stream=stream,
2290
+ headers=self._augment_headers(headers),
2291
+ )
2292
+
2293
+ def head(
2294
+ self,
2295
+ url: str,
2296
+ *,
2297
+ timeout: tuple[float, float],
2298
+ allow_redirects: bool,
2299
+ stream: bool,
2300
+ headers: dict[str, str] | None = None,
2301
+ ) -> requests.Response:
2302
+ return self._session.head(
2303
+ url,
2304
+ timeout=timeout,
2305
+ allow_redirects=allow_redirects,
2306
+ stream=stream,
2307
+ headers=self._augment_headers(headers),
2308
+ )
2309
+
2310
+ def request(
2311
+ self,
2312
+ method: str,
2313
+ url: str,
2314
+ *,
2315
+ data: str | bytes | BinaryIO | None,
2316
+ timeout: tuple[float, float],
2317
+ allow_redirects: bool,
2318
+ stream: bool,
2319
+ headers: dict[str, str] | None = None,
2320
+ ) -> requests.Response:
2321
+ return self._session.request(
2322
+ method,
2323
+ url,
2324
+ data=data,
2325
+ timeout=timeout,
2326
+ allow_redirects=allow_redirects,
2327
+ stream=stream,
2328
+ headers=self._augment_headers(headers),
2329
+ )
2330
+
2331
+ def _augment_headers(self, headers: dict[str, str] | None) -> dict[str, str]:
2332
+ if headers is None:
2333
+ headers = {}
2334
+
2335
+ if self._extra_headers is not None:
2336
+ headers = headers | self._extra_headers
2337
+
2338
+ return headers
@@ -13,6 +13,9 @@ from __future__ import annotations
13
13
 
14
14
  __all__ = ("InMemoryResourcePath",)
15
15
 
16
+ import contextlib
17
+ from collections.abc import Iterator
18
+
16
19
  from ._resourcePath import ResourcePath
17
20
 
18
21
 
@@ -27,5 +30,8 @@ class InMemoryResourcePath(ResourcePath):
27
30
  """Test for existence and always return False."""
28
31
  return True
29
32
 
30
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
33
+ @contextlib.contextmanager
34
+ def _as_local(
35
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
36
+ ) -> Iterator[ResourcePath]:
31
37
  raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'")
@@ -477,7 +477,10 @@ class S3ResourcePath(ResourcePath):
477
477
 
478
478
  return s3, f"{self._bucket}/{self.relativeToPathRoot}"
479
479
 
480
- def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
480
+ @contextlib.contextmanager
481
+ def _as_local(
482
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
483
+ ) -> Iterator[ResourcePath]:
481
484
  """Download object from S3 and place in temporary directory.
482
485
 
483
486
  Parameters
@@ -494,13 +497,12 @@ class S3ResourcePath(ResourcePath):
494
497
 
495
498
  Returns
496
499
  -------
497
- path : `str`
498
- Path to local temporary file.
499
- temporary : `bool`
500
- Always returns `True`. This is always a temporary file.
500
+ local_uri : `ResourcePath`
501
+ A URI to a local POSIX file corresponding to a local temporary
502
+ downloaded copy of the resource.
501
503
  """
502
504
  with (
503
- ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=False) as tmp_uri,
505
+ ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=True) as tmp_uri,
504
506
  self._use_threads_temp_override(multithreaded),
505
507
  time_this(log, msg="Downloading %s to local file", args=(self,)),
506
508
  ):
@@ -511,7 +513,7 @@ class S3ResourcePath(ResourcePath):
511
513
  )
512
514
  with tmp_uri.open("wb") as tmpFile:
513
515
  self._download_file(tmpFile, progress)
514
- return tmp_uri.ospath, True
516
+ yield tmp_uri
515
517
 
516
518
  @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
517
519
  def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage | None) -> None:
@@ -542,7 +544,7 @@ class S3ResourcePath(ResourcePath):
542
544
  try:
543
545
  self.client.copy_object(CopySource=copy_source, Bucket=self._bucket, Key=self.relativeToPathRoot)
544
546
  except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
545
- raise FileNotFoundError("No such resource to transfer: {self}") from err
547
+ raise FileNotFoundError(f"No such resource to transfer: {src} -> {self}") from err
546
548
  except ClientError as err:
547
549
  translate_client_error(err, self)
548
550
  raise
@@ -609,10 +611,10 @@ class S3ResourcePath(ResourcePath):
609
611
  timer_msg = "Transfer from %s to %s"
610
612
  timer_args = (src, self)
611
613
 
612
- if isinstance(src, type(self)):
613
- # Looks like an S3 remote uri so we can use direct copy
614
- # note that boto3.resource.meta.copy is cleverer than the low
615
- # level copy_object
614
+ if isinstance(src, type(self)) and self.client == src.client:
615
+ # Looks like an S3 remote uri so we can use direct copy.
616
+ # This only works if the source and destination are using the same
617
+ # S3 endpoint and profile.
616
618
  with time_this(log, msg=timer_msg, args=timer_args):
617
619
  self._copy_from(src)
618
620
 
@@ -61,18 +61,18 @@ def _check_open(
61
61
  """
62
62
  text_content = "abcdefghijklmnopqrstuvwxyz🙂"
63
63
  bytes_content = uuid.uuid4().bytes
64
- content_by_mode_suffix = {
64
+ content_by_mode_suffix: dict[str, str | bytes] = {
65
65
  "": text_content,
66
66
  "t": text_content,
67
67
  "b": bytes_content,
68
68
  }
69
- empty_content_by_mode_suffix = {
69
+ empty_content_by_mode_suffix: dict[str, str | bytes] = {
70
70
  "": "",
71
71
  "t": "",
72
72
  "b": b"",
73
73
  }
74
74
  # To appease mypy
75
- double_content_by_mode_suffix = {
75
+ double_content_by_mode_suffix: dict[str, str | bytes] = {
76
76
  "": text_content + text_content,
77
77
  "t": text_content + text_content,
78
78
  "b": bytes_content + bytes_content,
@@ -143,6 +143,16 @@ def _check_open(
143
143
  content_read = read_buffer.read()
144
144
  test_case.assertEqual(len(content_read), 0, f"Read: {content_read!r}, expected empty.")
145
145
 
146
+ # Write multiple chunks with flushing to ensure that any handles that
147
+ # cache without flushing work properly.
148
+ n = 3
149
+ with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
150
+ for _ in range(n):
151
+ write_buffer.write(content)
152
+ write_buffer.flush()
153
+ with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
154
+ test_case.assertEqual(read_buffer.read(), content * n)
155
+
146
156
  # Write two copies of the content, overwriting the single copy there.
147
157
  with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
148
158
  write_buffer.write(double_content)
@@ -0,0 +1,2 @@
1
+ __all__ = ["__version__"]
2
+ __version__ = "29.2025.2500"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-resources
3
- Version: 29.2025.2100
3
+ Version: 29.2025.2500
4
4
  Summary: An abstraction layer for reading and writing from URI file resources.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -284,12 +284,12 @@ class DavReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase):
284
284
  self.assertTrue(remote_file.exists())
285
285
  self.assertEqual(remote_file.size(), len(contents))
286
286
 
287
- local_path, is_temp = remote_file._as_local()
288
- self.assertTrue(is_temp)
289
- self.assertTrue(os.path.exists(local_path))
290
- self.assertTrue(os.stat(local_path).st_size, len(contents))
291
- self.assertEqual(ResourcePath(local_path).read(), contents)
292
- os.remove(local_path)
287
+ with remote_file._as_local() as local_uri:
288
+ self.assertTrue(local_uri.isTemporary)
289
+ self.assertTrue(os.path.exists(local_uri.ospath))
290
+ self.assertTrue(os.stat(local_uri.ospath).st_size, len(contents))
291
+ self.assertEqual(local_uri.read(), contents)
292
+ self.assertFalse(local_uri.exists())
293
293
 
294
294
  def test_dav_size(self):
295
295
  # Retrieving the size of a non-existent file must raise.
@@ -12,6 +12,7 @@
12
12
  import hashlib
13
13
  import io
14
14
  import os.path
15
+ import pickle
15
16
  import random
16
17
  import shutil
17
18
  import socket
@@ -20,6 +21,7 @@ import string
20
21
  import tempfile
21
22
  import time
22
23
  import unittest
24
+ import unittest.mock
23
25
  import warnings
24
26
  from collections.abc import Callable
25
27
  from threading import Thread
@@ -33,6 +35,7 @@ except ImportError:
33
35
 
34
36
  import requests
35
37
  import responses
38
+ import responses.matchers
36
39
 
37
40
  import lsst.resources
38
41
  from lsst.resources import ResourcePath
@@ -82,6 +85,38 @@ class GenericHttpTestCase(GenericTestCase, unittest.TestCase):
82
85
  ResourcePath("http://user:password@server.com:3000/"),
83
86
  )
84
87
 
88
+ @responses.activate
89
+ def test_extra_headers(self):
90
+ url = "http://test.example/something.txt"
91
+ path = HttpResourcePath.create_http_resource_path(
92
+ url, extra_headers={"Authorization": "Bearer my-token"}
93
+ )
94
+
95
+ self.assertEqual(str(path), "http://test.example/something.txt")
96
+ self.assertEqual(path._extra_headers, {"Authorization": "Bearer my-token"})
97
+
98
+ # Make sure that headers are added to requests.
99
+ responses.add(
100
+ responses.GET,
101
+ url,
102
+ b"test",
103
+ match=[responses.matchers.header_matcher({"Authorization": "Bearer my-token"})],
104
+ )
105
+ self.assertEqual(path.read(), b"test")
106
+
107
+ # Extra headers should be preserved through pickle, to ensure that
108
+ # `mtransfer` and similar methods work in multi-process mode.
109
+ dump = pickle.dumps(path)
110
+ restored = pickle.loads(dump)
111
+ self.assertEqual(restored._extra_headers, {"Authorization": "Bearer my-token"})
112
+
113
+ # Extra headers should be preserved when making a modified copy of the
114
+ # ResourcePath using replace() or the ResourcePath constructor.
115
+ replacement = path.replace(forceDirectory=True)
116
+ self.assertEqual(replacement._extra_headers, {"Authorization": "Bearer my-token"})
117
+ copy = ResourcePath(path, forceDirectory=True)
118
+ self.assertEqual(copy._extra_headers, {"Authorization": "Bearer my-token"})
119
+
85
120
 
86
121
  class HttpReadWriteWebdavTestCase(GenericReadWriteTestCase, unittest.TestCase):
87
122
  """Test with a real webDAV server, as opposed to mocking responses."""
@@ -280,12 +315,12 @@ class HttpReadWriteWebdavTestCase(GenericReadWriteTestCase, unittest.TestCase):
280
315
  remote_file = self.tmpdir.join(self._get_file_name())
281
316
  self.assertIsNone(remote_file.write(data=contents, overwrite=True))
282
317
 
283
- local_path, is_temp = remote_file._as_local()
284
- self.assertTrue(is_temp)
285
- self.assertTrue(os.path.exists(local_path))
286
- self.assertTrue(os.stat(local_path).st_size, len(contents))
287
- self.assertEqual(ResourcePath(local_path).read(), contents)
288
- os.remove(local_path)
318
+ with remote_file._as_local() as local_uri:
319
+ self.assertTrue(local_uri.isTemporary)
320
+ self.assertTrue(os.path.exists(local_uri.ospath))
321
+ self.assertTrue(os.stat(local_uri.ospath).st_size, len(contents))
322
+ self.assertEqual(local_uri.read(), contents)
323
+ self.assertFalse(local_uri.exists())
289
324
 
290
325
  def test_dav_size(self):
291
326
  # Size of a non-existent file must raise.
@@ -98,7 +98,7 @@ class LocationTestCase(unittest.TestCase):
98
98
 
99
99
  for uriInfo in uriStrings:
100
100
  uri = ResourcePath(uriInfo[0], root=testRoot, forceAbsolute=uriInfo[1], forceDirectory=uriInfo[2])
101
- with self.subTest(in_uri=uriInfo[0], out_uri=uri):
101
+ with self.subTest(in_uri=repr(uriInfo[0]), out_uri=repr(uri)):
102
102
  self.assertEqual(uri.scheme, uriInfo[3], "test scheme")
103
103
  self.assertEqual(uri.netloc, uriInfo[4], "test netloc")
104
104
  self.assertEqual(uri.path, uriInfo[5], "test path")
@@ -115,7 +115,7 @@ class LocationTestCase(unittest.TestCase):
115
115
 
116
116
  for uriInfo in uriStrings:
117
117
  uri = ResourcePath(uriInfo[0], forceAbsolute=uriInfo[1], forceDirectory=uriInfo[2])
118
- with self.subTest(in_uri=uriInfo[0], out_uri=uri):
118
+ with self.subTest(in_uri=repr(uriInfo[0]), out_uri=repr(uri)):
119
119
  self.assertEqual(uri.scheme, uriInfo[3], "test scheme")
120
120
  self.assertEqual(uri.netloc, uriInfo[4], "test netloc")
121
121
  # Use ospath here to ensure that we have unquoted any
@@ -133,7 +133,7 @@ class LocationTestCase(unittest.TestCase):
133
133
 
134
134
  for uriInfo in uriStrings:
135
135
  uri = ResourcePath(uriInfo[0], forceAbsolute=False).updatedFile(uriInfo[1])
136
- with self.subTest(in_uri=uriInfo[0], out_uri=uri):
136
+ with self.subTest(in_uri=repr(uriInfo[0]), out_uri=repr(uri)):
137
137
  self.assertEqual(uri.path, uriInfo[2])
138
138
 
139
139
  # Check that schemeless can become file scheme.
@@ -333,7 +333,7 @@ class LocationTestCase(unittest.TestCase):
333
333
  """Test round tripping of the posix to os.path conversion helpers."""
334
334
  testPaths = ("/a/b/c.e", "a/b", "a/b/", "/a/b", "/a/b/", "a/b/c.e")
335
335
  for p in testPaths:
336
- with self.subTest(path=p):
336
+ with self.subTest(path=repr(p)):
337
337
  self.assertEqual(os2posix(posix2os(p)), p)
338
338
 
339
339
  def testSplit(self):
@@ -368,7 +368,7 @@ class LocationTestCase(unittest.TestCase):
368
368
  )
369
369
 
370
370
  for p, e in zip(testPaths, expected, strict=True):
371
- with self.subTest(path=p):
371
+ with self.subTest(path=repr(p)):
372
372
  uri = ResourcePath(p, testRoot)
373
373
  head, tail = uri.split()
374
374
  self.assertEqual((head.geturl(), tail), e)
@@ -351,6 +351,17 @@ class S3WithProfileReadWriteTestCase(S3ReadWriteTestCaseBase, unittest.TestCase)
351
351
  self.assertEqual(path2._bucket, "ceph:bucket2")
352
352
  self.assertIsNone(path2._profile)
353
353
 
354
+ def test_transfer_from_different_endpoints(self):
355
+ # Create a bucket using a different endpoint (the default endpoint.)
356
+ boto3.resource("s3").create_bucket(Bucket="source-bucket")
357
+ source_path = ResourcePath("s3://source-bucket/file.txt")
358
+ source_path.write(b"123")
359
+ target_path = ResourcePath(f"s3://{self.netloc}/target.txt")
360
+ # Transfer from default endpoint to custom endpoint with custom
361
+ # profile.
362
+ target_path.transfer_from(source_path)
363
+ self.assertEqual(target_path.read(), b"123")
364
+
354
365
 
355
366
  if __name__ == "__main__":
356
367
  unittest.main()
@@ -1,2 +0,0 @@
1
- __all__ = ["__version__"]
2
- __version__ = "29.2025.2100"