lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lsst/resources/http.py CHANGED
@@ -26,7 +26,6 @@ import random
26
26
  import re
27
27
  import ssl
28
28
  import stat
29
- import tempfile
30
29
  from collections.abc import Iterator
31
30
  from typing import TYPE_CHECKING, Any, BinaryIO, cast
32
31
 
@@ -60,6 +59,7 @@ from lsst.utils.timer import time_this
60
59
  from ._resourceHandles import ResourceHandleProtocol
61
60
  from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle, parse_content_range_header
62
61
  from ._resourcePath import ResourcePath
62
+ from .utils import _get_num_workers, get_tempdir
63
63
 
64
64
  if TYPE_CHECKING:
65
65
  from .utils import TransactionProtocol
@@ -98,6 +98,20 @@ def _timeout_from_environment(env_var: str, default_value: float) -> float:
98
98
  return timeout
99
99
 
100
100
 
101
+ @functools.lru_cache
102
+ def _calc_tmpdir_buffer_size(tmpdir: str) -> int:
103
+ """Compute the block size as 256 blocks of typical size
104
+ (i.e. 4096 bytes) or 10 times the file system block size,
105
+ whichever is higher.
106
+
107
+ This is a reasonable compromise between
108
+ using memory for buffering and the number of system calls
109
+ issued to read from or write to temporary files.
110
+ """
111
+ fsstats = os.statvfs(tmpdir)
112
+ return max(10 * fsstats.f_bsize, 256 * 4096)
113
+
114
+
101
115
  class HttpResourcePathConfig:
102
116
  """Configuration class to encapsulate the configurable items used by class
103
117
  HttpResourcePath.
@@ -144,14 +158,14 @@ class HttpResourcePathConfig:
144
158
  if self._front_end_connections is not None:
145
159
  return self._front_end_connections
146
160
 
161
+ default_pool_size = max(_get_num_workers(), self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS)
162
+
147
163
  try:
148
164
  self._front_end_connections = int(
149
- os.environ.get(
150
- "LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS
151
- )
165
+ os.environ.get("LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", default_pool_size)
152
166
  )
153
167
  except ValueError:
154
- self._front_end_connections = self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS
168
+ self._front_end_connections = default_pool_size
155
169
 
156
170
  return self._front_end_connections
157
171
 
@@ -161,14 +175,14 @@ class HttpResourcePathConfig:
161
175
  if self._back_end_connections is not None:
162
176
  return self._back_end_connections
163
177
 
178
+ default_pool_size = max(_get_num_workers(), self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS)
179
+
164
180
  try:
165
181
  self._back_end_connections = int(
166
- os.environ.get(
167
- "LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", self.DEFAULT_BACKEND_PERSISTENT_CONNECTIONS
168
- )
182
+ os.environ.get("LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", default_pool_size)
169
183
  )
170
184
  except ValueError:
171
- self._back_end_connections = self.DEFAULT_BACKEND_PERSISTENT_CONNECTIONS
185
+ self._back_end_connections = default_pool_size
172
186
 
173
187
  return self._back_end_connections
174
188
 
@@ -363,26 +377,15 @@ class HttpResourcePathConfig:
363
377
  if self._tmpdir_buffersize is not None:
364
378
  return self._tmpdir_buffersize
365
379
 
366
- # Use the value of environment variables 'LSST_RESOURCES_TMPDIR' or
367
- # 'TMPDIR', if defined. Otherwise use the system temporary directory,
368
- # with a last-resort fallback to the current working directory if
369
- # nothing else is available.
370
- tmpdir = None
371
- for dir in (os.getenv(v) for v in ("LSST_RESOURCES_TMPDIR", "TMPDIR")):
372
- if dir and os.path.isdir(dir):
373
- tmpdir = dir
374
- break
375
-
376
- if tmpdir is None:
377
- tmpdir = tempfile.gettempdir()
380
+ tmpdir = get_tempdir()
378
381
 
379
382
  # Compute the block size as 256 blocks of typical size
380
383
  # (i.e. 4096 bytes) or 10 times the file system block size,
381
384
  # whichever is higher. This is a reasonable compromise between
382
385
  # using memory for buffering and the number of system calls
383
386
  # issued to read from or write to temporary files.
384
- fsstats = os.statvfs(tmpdir)
385
- self._tmpdir_buffersize = (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096))
387
+ bufsize = _calc_tmpdir_buffer_size(tmpdir)
388
+ self._tmpdir_buffersize = (tmpdir, bufsize)
386
389
 
387
390
  return self._tmpdir_buffersize
388
391
 
@@ -427,9 +430,7 @@ def _get_dav_and_server_headers(path: ResourcePath | str) -> tuple[str | None, s
427
430
  config = HttpResourcePathConfig()
428
431
  with SessionStore(config=config).get(path) as session:
429
432
  resp = session.options(
430
- str(path),
431
- stream=False,
432
- timeout=config.timeout,
433
+ str(path), stream=False, timeout=config.timeout, headers=path._extra_headers
433
434
  )
434
435
 
435
436
  dav_header = server_header = None
@@ -756,6 +757,42 @@ class HttpResourcePath(ResourcePath):
756
757
  a HTTP URL. The value of the variable is not inspected.
757
758
  """
758
759
 
760
+ @staticmethod
761
+ def create_http_resource_path(
762
+ path: str, *, extra_headers: dict[str, str] | None = None
763
+ ) -> HttpResourcePath:
764
+ """Create an instance of `HttpResourcePath` with additional
765
+ HTTP-specific configuration.
766
+
767
+ Parameters
768
+ ----------
769
+ path : `str`
770
+ HTTP URL to be wrapped in a `ResourcePath` instance.
771
+ extra_headers : `dict` [ `str`, `str` ], optional
772
+ Additional headers that will be sent with every HTTP request made
773
+ by this `ResourcePath`. These override any headers that may be
774
+ generated internally by `HttpResourcePath` (e.g. authentication
775
+ headers).
776
+
777
+ Returns
778
+ -------
779
+ instance : `ResourcePath`
780
+ Newly-created `HttpResourcePath` instance.
781
+
782
+ Notes
783
+ -----
784
+ Most users should use the `ResourcePath` constructor, instead.
785
+ """
786
+ # Make sure we instantiate ResourcePath using a string to guarantee we
787
+ # get a new ResourcePath. If we accidentally provided a ResourcePath
788
+ # instance instead, the ResourcePath constructor sometimes returns
789
+ # the original object and we would be modifying an object that is
790
+ # supposed to be immutable.
791
+ instance = ResourcePath(str(path))
792
+ assert isinstance(instance, HttpResourcePath)
793
+ instance._extra_headers = extra_headers
794
+ return instance
795
+
759
796
  # WebDAV servers known to be able to sign URLs. The values are lowercased
760
797
  # server identifiers retrieved from the 'Server' header included in
761
798
  # the response to a HTTP OPTIONS request.
@@ -802,39 +839,48 @@ class HttpResourcePath(ResourcePath):
802
839
  # and is shared by all instances of this class.
803
840
  _tcp_connector: TCPConnector | None = None
804
841
 
842
+ # Additional headers added to every request.
843
+ _extra_headers: dict[str, str] | None = None
844
+
805
845
  @property
806
- def metadata_session(self) -> requests.Session:
846
+ def metadata_session(self) -> _SessionWrapper:
807
847
  """Client session to send requests which do not require upload or
808
848
  download of data, i.e. mostly metadata requests.
809
849
  """
850
+ session = None
810
851
  if hasattr(self, "_metadata_session"):
811
852
  if HttpResourcePath._pid == os.getpid():
812
- return self._metadata_session
853
+ session = self._metadata_session
813
854
  else:
814
855
  # The metadata session we have in cache was likely created by
815
856
  # a parent process. Discard all the sessions in that store.
816
857
  self._metadata_session_store.clear()
817
858
 
818
859
  # Retrieve a new metadata session.
819
- HttpResourcePath._pid = os.getpid()
820
- self._metadata_session: requests.Session = self._metadata_session_store.get(self)
821
- return self._metadata_session
860
+ if session is None:
861
+ HttpResourcePath._pid = os.getpid()
862
+ session = self._metadata_session_store.get(self)
863
+ self._metadata_session: requests.Session = session
864
+ return _SessionWrapper(session, extra_headers=self._extra_headers)
822
865
 
823
866
  @property
824
- def data_session(self) -> requests.Session:
867
+ def data_session(self) -> _SessionWrapper:
825
868
  """Client session for uploading and downloading data."""
869
+ session = None
826
870
  if hasattr(self, "_data_session"):
827
871
  if HttpResourcePath._pid == os.getpid():
828
- return self._data_session
872
+ session = self._data_session
829
873
  else:
830
874
  # The data session we have in cache was likely created by
831
875
  # a parent process. Discard all the sessions in that store.
832
876
  self._data_session_store.clear()
833
877
 
834
878
  # Retrieve a new data session.
835
- HttpResourcePath._pid = os.getpid()
836
- self._data_session: requests.Session = self._data_session_store.get(self)
837
- return self._data_session
879
+ if session is None:
880
+ HttpResourcePath._pid = os.getpid()
881
+ session = self._data_session_store.get(self)
882
+ self._data_session: requests.Session = session
883
+ return _SessionWrapper(session, extra_headers=self._extra_headers)
838
884
 
839
885
  def _clear_sessions(self) -> None:
840
886
  """Close the socket connections that are still open.
@@ -1160,6 +1206,7 @@ class HttpResourcePath(ResourcePath):
1160
1206
  transfer: str = "copy",
1161
1207
  overwrite: bool = False,
1162
1208
  transaction: TransactionProtocol | None = None,
1209
+ multithreaded: bool = True,
1163
1210
  ) -> None:
1164
1211
  """Transfer the current resource to a Webdav repository.
1165
1212
 
@@ -1174,6 +1221,12 @@ class HttpResourcePath(ResourcePath):
1174
1221
  Whether overwriting the remote resource is allowed or not.
1175
1222
  transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1176
1223
  Currently unused.
1224
+ multithreaded : `bool`, optional
1225
+ If `True` the transfer will be allowed to attempt to improve
1226
+ throughput by using parallel download streams. This may of no
1227
+ effect if the URI scheme does not support parallel streams or
1228
+ if a global override has been applied. If `False` parallel
1229
+ streams will be disabled.
1177
1230
  """
1178
1231
  # Fail early to prevent delays if remote resources are requested.
1179
1232
  if transfer not in self.transferModes:
@@ -1325,13 +1378,12 @@ class HttpResourcePath(ResourcePath):
1325
1378
  path : `str`
1326
1379
  A path that can be opened by the file system object.
1327
1380
  """
1328
- if (
1329
- fsspec is None
1330
- or not self.is_webdav_endpoint
1331
- or self.server not in HttpResourcePath.SUPPORTED_URL_SIGNERS
1332
- ):
1381
+ if fsspec is None:
1333
1382
  return super().to_fsspec()
1334
1383
 
1384
+ if not self.is_webdav_endpoint or self.server not in HttpResourcePath.SUPPORTED_URL_SIGNERS:
1385
+ return fsspec.url_to_fs(self.geturl(), client_kwargs={"headers": self._extra_headers})
1386
+
1335
1387
  if self.isdir():
1336
1388
  raise NotImplementedError(
1337
1389
  f"method HttpResourcePath.to_fsspec() not implemented for directory {self}"
@@ -1473,15 +1525,29 @@ class HttpResourcePath(ResourcePath):
1473
1525
  except json.JSONDecodeError:
1474
1526
  raise ValueError(f"could not deserialize response to POST request for URL {self}")
1475
1527
 
1476
- def _as_local(self) -> tuple[str, bool]:
1528
+ @contextlib.contextmanager
1529
+ def _as_local(
1530
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
1531
+ ) -> Iterator[ResourcePath]:
1477
1532
  """Download object over HTTP and place in temporary directory.
1478
1533
 
1534
+ Parameters
1535
+ ----------
1536
+ multithreaded : `bool`, optional
1537
+ If `True` the transfer will be allowed to attempt to improve
1538
+ throughput by using parallel download streams. This may of no
1539
+ effect if the URI scheme does not support parallel streams or
1540
+ if a global override has been applied. If `False` parallel
1541
+ streams will be disabled.
1542
+ tmpdir : `ResourcePath` or `None`, optional
1543
+ Explicit override of the temporary directory to use for remote
1544
+ downloads.
1545
+
1479
1546
  Returns
1480
1547
  -------
1481
- path : `str`
1482
- Path to local temporary file.
1483
- temporary : `bool`
1484
- Always returns `True`. This is always a temporary file.
1548
+ local_uri : `ResourcePath`
1549
+ A URI to a local POSIX file corresponding to a local temporary
1550
+ downloaded copy of the resource.
1485
1551
  """
1486
1552
  # Use the session as a context manager to ensure that connections
1487
1553
  # to both the front end and back end servers are closed after the
@@ -1493,9 +1559,14 @@ class HttpResourcePath(ResourcePath):
1493
1559
  f"Unable to download resource {self}; status: {resp.status_code} {resp.reason}"
1494
1560
  )
1495
1561
 
1496
- tmpdir, buffer_size = self._config.tmpdir_buffersize
1562
+ if tmpdir is None:
1563
+ temp_dir, buffer_size = self._config.tmpdir_buffersize
1564
+ tmpdir = ResourcePath(temp_dir, forceDirectory=True)
1565
+ else:
1566
+ buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
1567
+
1497
1568
  with ResourcePath.temporary_uri(
1498
- suffix=self.getExtension(), prefix=ResourcePath(tmpdir, forceDirectory=True), delete=False
1569
+ suffix=self.getExtension(), prefix=tmpdir, delete=True
1499
1570
  ) as tmp_uri:
1500
1571
  expected_length = int(resp.headers.get("Content-Length", "-1"))
1501
1572
  with time_this(
@@ -1511,20 +1582,20 @@ class HttpResourcePath(ResourcePath):
1511
1582
  tmpFile.write(chunk)
1512
1583
  content_length += len(chunk)
1513
1584
 
1514
- # Check that the expected and actual content lengths match. Perform
1515
- # this check only when the contents of the file was not encoded by
1516
- # the server.
1517
- if (
1518
- "Content-Encoding" not in resp.headers
1519
- and expected_length >= 0
1520
- and expected_length != content_length
1521
- ):
1522
- raise ValueError(
1523
- f"Size of downloaded file does not match value in Content-Length header for {self}: "
1524
- f"expecting {expected_length} and got {content_length} bytes"
1525
- )
1585
+ # Check that the expected and actual content lengths match.
1586
+ # Perform this check only when the contents of the file was not
1587
+ # encoded by the server.
1588
+ if (
1589
+ "Content-Encoding" not in resp.headers
1590
+ and expected_length >= 0
1591
+ and expected_length != content_length
1592
+ ):
1593
+ raise ValueError(
1594
+ f"Size of downloaded file does not match value in Content-Length header for {self}: "
1595
+ f"expecting {expected_length} and got {content_length} bytes"
1596
+ )
1526
1597
 
1527
- return tmpFile.name, True
1598
+ yield tmp_uri
1528
1599
 
1529
1600
  def _send_webdav_request(
1530
1601
  self,
@@ -1532,7 +1603,7 @@ class HttpResourcePath(ResourcePath):
1532
1603
  url: str | None = None,
1533
1604
  headers: dict[str, str] | None = None,
1534
1605
  body: str | None = None,
1535
- session: requests.Session | None = None,
1606
+ session: _SessionWrapper | None = None,
1536
1607
  timeout: tuple[float, float] | None = None,
1537
1608
  ) -> requests.Response:
1538
1609
  """Send a webDAV request and correctly handle redirects.
@@ -1953,6 +2024,10 @@ class HttpResourcePath(ResourcePath):
1953
2024
  with super()._openImpl(mode, encoding=encoding) as http_handle:
1954
2025
  yield http_handle
1955
2026
 
2027
+ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None:
2028
+ assert isinstance(original_uri, HttpResourcePath)
2029
+ self._extra_headers = original_uri._extra_headers
2030
+
1956
2031
 
1957
2032
  def _dump_response(resp: requests.Response) -> None:
1958
2033
  """Log the contents of a HTTP or webDAV request and its response.
@@ -2163,3 +2238,95 @@ class DavProperty:
2163
2238
  @property
2164
2239
  def href(self) -> str:
2165
2240
  return self._href
2241
+
2242
+
2243
+ class _SessionWrapper(contextlib.AbstractContextManager):
2244
+ """Wraps a `requests.Session` to allow header values to be injected with
2245
+ all requests.
2246
+
2247
+ Notes
2248
+ -----
2249
+ `requests.Session` already has a feature for setting headers globally, but
2250
+ our session objects are global and authorization headers can vary for each
2251
+ HttpResourcePath instance.
2252
+ """
2253
+
2254
+ def __init__(self, session: requests.Session, *, extra_headers: dict[str, str] | None) -> None:
2255
+ self._session = session
2256
+ self._extra_headers = extra_headers
2257
+
2258
+ def __enter__(self) -> _SessionWrapper:
2259
+ self._session.__enter__()
2260
+ return self
2261
+
2262
+ def __exit__(
2263
+ self,
2264
+ exc_type: Any,
2265
+ exc_value: Any,
2266
+ traceback: Any,
2267
+ ) -> None:
2268
+ return self._session.__exit__(exc_type, exc_value, traceback)
2269
+
2270
+ def get(
2271
+ self,
2272
+ url: str,
2273
+ *,
2274
+ timeout: tuple[float, float],
2275
+ allow_redirects: bool = True,
2276
+ stream: bool,
2277
+ headers: dict[str, str] | None = None,
2278
+ ) -> requests.Response:
2279
+ return self._session.get(
2280
+ url,
2281
+ timeout=timeout,
2282
+ allow_redirects=allow_redirects,
2283
+ stream=stream,
2284
+ headers=self._augment_headers(headers),
2285
+ )
2286
+
2287
+ def head(
2288
+ self,
2289
+ url: str,
2290
+ *,
2291
+ timeout: tuple[float, float],
2292
+ allow_redirects: bool,
2293
+ stream: bool,
2294
+ headers: dict[str, str] | None = None,
2295
+ ) -> requests.Response:
2296
+ return self._session.head(
2297
+ url,
2298
+ timeout=timeout,
2299
+ allow_redirects=allow_redirects,
2300
+ stream=stream,
2301
+ headers=self._augment_headers(headers),
2302
+ )
2303
+
2304
+ def request(
2305
+ self,
2306
+ method: str,
2307
+ url: str,
2308
+ *,
2309
+ data: str | bytes | BinaryIO | None,
2310
+ timeout: tuple[float, float],
2311
+ allow_redirects: bool,
2312
+ stream: bool,
2313
+ headers: dict[str, str] | None = None,
2314
+ ) -> requests.Response:
2315
+ return self._session.request(
2316
+ method,
2317
+ url,
2318
+ data=data,
2319
+ timeout=timeout,
2320
+ allow_redirects=allow_redirects,
2321
+ stream=stream,
2322
+ headers=self._augment_headers(headers),
2323
+ )
2324
+
2325
+ def _augment_headers(self, headers: dict[str, str] | None) -> dict[str, str]:
2326
+ if headers is None:
2327
+ headers = {}
2328
+
2329
+ if self._extra_headers is not None:
2330
+ headers = headers | self._extra_headers
2331
+
2332
+ return headers
lsst/resources/mem.py CHANGED
@@ -13,6 +13,9 @@ from __future__ import annotations
13
13
 
14
14
  __all__ = ("InMemoryResourcePath",)
15
15
 
16
+ import contextlib
17
+ from collections.abc import Iterator
18
+
16
19
  from ._resourcePath import ResourcePath
17
20
 
18
21
 
@@ -27,5 +30,8 @@ class InMemoryResourcePath(ResourcePath):
27
30
  """Test for existence and always return False."""
28
31
  return True
29
32
 
30
- def _as_local(self) -> tuple[str, bool]:
33
+ @contextlib.contextmanager
34
+ def _as_local(
35
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
36
+ ) -> Iterator[ResourcePath]:
31
37
  raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'")
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
29
29
  AbstractFileSystem = type
30
30
 
31
31
  from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
32
- from ._resourcePath import ResourcePath
32
+ from ._resourcePath import ResourcePath, ResourcePathExpression
33
33
 
34
34
  log = logging.getLogger(__name__)
35
35
 
@@ -85,14 +85,25 @@ class PackageResourcePath(ResourcePath):
85
85
  return fh.read(size)
86
86
 
87
87
  @contextlib.contextmanager
88
- def as_local(self) -> Iterator[ResourcePath]:
88
+ def as_local(
89
+ self, multithreaded: bool = True, tmpdir: ResourcePathExpression | None = None
90
+ ) -> Iterator[ResourcePath]:
89
91
  """Return the location of the Python resource as local file.
90
92
 
93
+ Parameters
94
+ ----------
95
+ multithreaded : `bool`, optional
96
+ Unused.
97
+ tmpdir : `ResourcePathExpression` or `None`, optional
98
+ Unused.
99
+
91
100
  Yields
92
101
  ------
93
102
  local : `ResourcePath`
94
103
  This might be the original resource or a copy on the local file
95
104
  system.
105
+ multithreaded : `bool`, optional
106
+ Unused.
96
107
 
97
108
  Notes
98
109
  -----