lsst-resources 29.2025.1600__py3-none-any.whl → 29.2025.1800__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_httpResourceHandle.py +7 -4
- lsst/resources/_resourcePath.py +177 -36
- lsst/resources/http.py +10 -10
- lsst/resources/s3.py +4 -2
- lsst/resources/s3utils.py +6 -1
- lsst/resources/tests.py +4 -1
- lsst/resources/utils.py +43 -0
- lsst/resources/version.py +1 -1
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/METADATA +1 -1
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/RECORD +15 -15
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/WHEEL +1 -1
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/zip-safe +0 -0
|
@@ -168,7 +168,9 @@ class HttpReadResourceHandle(BaseResourceHandle[bytes]):
|
|
|
168
168
|
# return the result
|
|
169
169
|
self._completeBuffer = io.BytesIO()
|
|
170
170
|
with time_this(self._log, msg="Read from remote resource %s", args=(self._url,)):
|
|
171
|
-
|
|
171
|
+
with self._session as session:
|
|
172
|
+
resp = session.get(_dav_to_http(self._url), stream=False, timeout=self._timeout)
|
|
173
|
+
|
|
172
174
|
if (code := resp.status_code) not in (requests.codes.ok, requests.codes.partial):
|
|
173
175
|
raise FileNotFoundError(f"Unable to read resource {self._url}; status code: {code}")
|
|
174
176
|
self._completeBuffer.write(resp.content)
|
|
@@ -190,9 +192,10 @@ class HttpReadResourceHandle(BaseResourceHandle[bytes]):
|
|
|
190
192
|
with time_this(
|
|
191
193
|
self._log, msg="Read from remote resource %s using headers %s", args=(self._url, headers)
|
|
192
194
|
):
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
195
|
+
with self._session as session:
|
|
196
|
+
resp = session.get(
|
|
197
|
+
_dav_to_http(self._url), stream=False, timeout=self._timeout, headers=headers
|
|
198
|
+
)
|
|
196
199
|
|
|
197
200
|
if resp.status_code == requests.codes.range_not_satisfiable:
|
|
198
201
|
# Must have run off the end of the file. A standard file handle
|
lsst/resources/_resourcePath.py
CHANGED
|
@@ -19,14 +19,13 @@ import copy
|
|
|
19
19
|
import io
|
|
20
20
|
import locale
|
|
21
21
|
import logging
|
|
22
|
-
import multiprocessing
|
|
23
22
|
import os
|
|
24
23
|
import posixpath
|
|
25
24
|
import re
|
|
26
25
|
import urllib.parse
|
|
27
|
-
from functools import cache
|
|
28
26
|
from pathlib import Path, PurePath, PurePosixPath
|
|
29
27
|
from random import Random
|
|
28
|
+
from typing import TypeAlias
|
|
30
29
|
|
|
31
30
|
try:
|
|
32
31
|
import fsspec
|
|
@@ -39,7 +38,7 @@ from collections.abc import Iterable, Iterator
|
|
|
39
38
|
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, overload
|
|
40
39
|
|
|
41
40
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
42
|
-
from .utils import get_tempdir
|
|
41
|
+
from .utils import _get_num_workers, get_tempdir
|
|
43
42
|
|
|
44
43
|
if TYPE_CHECKING:
|
|
45
44
|
from .utils import TransactionProtocol
|
|
@@ -53,11 +52,6 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
|
|
|
53
52
|
# Precomputed escaped hash
|
|
54
53
|
ESCAPED_HASH = urllib.parse.quote("#")
|
|
55
54
|
|
|
56
|
-
# Maximum number of worker threads for parallelized operations.
|
|
57
|
-
# If greater than 10, be aware that this number has to be consistent
|
|
58
|
-
# with connection pool sizing (for example in urllib3).
|
|
59
|
-
MAX_WORKERS = 10
|
|
60
|
-
|
|
61
55
|
|
|
62
56
|
class MTransferResult(NamedTuple):
|
|
63
57
|
"""Report on a bulk transfer."""
|
|
@@ -66,36 +60,73 @@ class MTransferResult(NamedTuple):
|
|
|
66
60
|
exception: Exception | None
|
|
67
61
|
|
|
68
62
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
63
|
+
_EXECUTOR_TYPE: TypeAlias = type[
|
|
64
|
+
concurrent.futures.ThreadPoolExecutor | concurrent.futures.ProcessPoolExecutor
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
# Cache value for executor class so as not to issue warning multiple
|
|
68
|
+
# times but still allow tests to override the value.
|
|
69
|
+
_POOL_EXECUTOR_CLASS: _EXECUTOR_TYPE | None = None
|
|
76
70
|
|
|
77
71
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
f"""Calculate the number of workers to use.
|
|
72
|
+
def _get_executor_class() -> _EXECUTOR_TYPE:
|
|
73
|
+
"""Return the executor class used for parallelized execution.
|
|
81
74
|
|
|
82
75
|
Returns
|
|
83
76
|
-------
|
|
84
|
-
|
|
85
|
-
The
|
|
86
|
-
|
|
87
|
-
|
|
77
|
+
cls : `concurrent.futures.Executor`
|
|
78
|
+
The ``Executor`` class. Default is
|
|
79
|
+
`concurrent.futures.ThreadPoolExecutor`. Can be set explicitly by
|
|
80
|
+
setting the ``$LSST_RESOURCES_EXECUTOR`` environment variable to
|
|
81
|
+
"thread" or "process". Returns "thread" pool if the value of the
|
|
82
|
+
variable is not recognized.
|
|
83
|
+
"""
|
|
84
|
+
global _POOL_EXECUTOR_CLASS
|
|
85
|
+
|
|
86
|
+
if _POOL_EXECUTOR_CLASS is not None:
|
|
87
|
+
return _POOL_EXECUTOR_CLASS
|
|
88
|
+
|
|
89
|
+
pool_executor_classes = {
|
|
90
|
+
"threads": concurrent.futures.ThreadPoolExecutor,
|
|
91
|
+
"process": concurrent.futures.ProcessPoolExecutor,
|
|
92
|
+
}
|
|
93
|
+
default_executor = "threads"
|
|
94
|
+
external = os.getenv("LSST_RESOURCES_EXECUTOR", default_executor)
|
|
95
|
+
if not external:
|
|
96
|
+
external = default_executor
|
|
97
|
+
if external not in pool_executor_classes:
|
|
98
|
+
log.warning(
|
|
99
|
+
"Unrecognized value of '%s' for LSST_RESOURCES_EXECUTOR env var. Using '%s'",
|
|
100
|
+
external,
|
|
101
|
+
default_executor,
|
|
102
|
+
)
|
|
103
|
+
external = default_executor
|
|
104
|
+
_POOL_EXECUTOR_CLASS = pool_executor_classes[external]
|
|
105
|
+
return _POOL_EXECUTOR_CLASS
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@contextlib.contextmanager
|
|
109
|
+
def _patch_environ(new_values: dict[str, str]) -> Iterator[None]:
|
|
110
|
+
"""Patch os.environ temporarily using the supplied values.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
new_values : `dict` [ `str`, `str` ]
|
|
115
|
+
New values to be stored in the environment.
|
|
88
116
|
"""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if cpu_limit is not None:
|
|
95
|
-
num_workers = cpu_limit + 2
|
|
117
|
+
old_values: dict[str, str] = {}
|
|
118
|
+
for k, v in new_values.items():
|
|
119
|
+
if k in os.environ:
|
|
120
|
+
old_values[k] = os.environ[k]
|
|
121
|
+
os.environ[k] = v
|
|
96
122
|
|
|
97
|
-
|
|
98
|
-
|
|
123
|
+
try:
|
|
124
|
+
yield
|
|
125
|
+
finally:
|
|
126
|
+
for k in new_values:
|
|
127
|
+
del os.environ[k]
|
|
128
|
+
if k in old_values:
|
|
129
|
+
os.environ[k] = old_values[k]
|
|
99
130
|
|
|
100
131
|
|
|
101
132
|
class ResourcePath: # numpydoc ignore=PR02
|
|
@@ -882,13 +913,20 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
882
913
|
raise NotImplementedError()
|
|
883
914
|
|
|
884
915
|
@classmethod
|
|
885
|
-
def mexists(
|
|
916
|
+
def mexists(
|
|
917
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
918
|
+
) -> dict[ResourcePath, bool]:
|
|
886
919
|
"""Check for existence of multiple URIs at once.
|
|
887
920
|
|
|
888
921
|
Parameters
|
|
889
922
|
----------
|
|
890
923
|
uris : iterable of `ResourcePath`
|
|
891
924
|
The URIs to test.
|
|
925
|
+
num_workers : `int` or `None`, optional
|
|
926
|
+
The number of parallel workers to use when checking for existence
|
|
927
|
+
If `None`, the default value will be taken from the environment.
|
|
928
|
+
If this number is higher than the default and a thread pool is
|
|
929
|
+
used, there may not be enough cached connections available.
|
|
892
930
|
|
|
893
931
|
Returns
|
|
894
932
|
-------
|
|
@@ -906,27 +944,71 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
906
944
|
|
|
907
945
|
existence: dict[ResourcePath, bool] = {}
|
|
908
946
|
for uri_class in grouped:
|
|
909
|
-
existence.update(uri_class._mexists(grouped[uri_class]))
|
|
947
|
+
existence.update(uri_class._mexists(grouped[uri_class], num_workers=num_workers))
|
|
910
948
|
|
|
911
949
|
return existence
|
|
912
950
|
|
|
913
951
|
@classmethod
|
|
914
|
-
def _mexists(
|
|
952
|
+
def _mexists(
|
|
953
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
954
|
+
) -> dict[ResourcePath, bool]:
|
|
915
955
|
"""Check for existence of multiple URIs at once.
|
|
916
956
|
|
|
917
957
|
Implementation helper method for `mexists`.
|
|
918
958
|
|
|
959
|
+
|
|
960
|
+
Parameters
|
|
961
|
+
----------
|
|
962
|
+
uris : iterable of `ResourcePath`
|
|
963
|
+
The URIs to test.
|
|
964
|
+
num_workers : `int` or `None`, optional
|
|
965
|
+
The number of parallel workers to use when checking for existence
|
|
966
|
+
If `None`, the default value will be taken from the environment.
|
|
967
|
+
|
|
968
|
+
Returns
|
|
969
|
+
-------
|
|
970
|
+
existence : `dict` of [`ResourcePath`, `bool`]
|
|
971
|
+
Mapping of original URI to boolean indicating existence.
|
|
972
|
+
"""
|
|
973
|
+
pool_executor_class = _get_executor_class()
|
|
974
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
975
|
+
# Patch the environment to make it think there is only one worker
|
|
976
|
+
# for each subprocess.
|
|
977
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
978
|
+
return cls._mexists_pool(pool_executor_class, uris)
|
|
979
|
+
else:
|
|
980
|
+
return cls._mexists_pool(pool_executor_class, uris, num_workers=num_workers)
|
|
981
|
+
|
|
982
|
+
@classmethod
|
|
983
|
+
def _mexists_pool(
|
|
984
|
+
cls,
|
|
985
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
986
|
+
uris: Iterable[ResourcePath],
|
|
987
|
+
*,
|
|
988
|
+
num_workers: int | None = None,
|
|
989
|
+
) -> dict[ResourcePath, bool]:
|
|
990
|
+
"""Check for existence of multiple URIs at once using specified pool
|
|
991
|
+
executor.
|
|
992
|
+
|
|
993
|
+
Implementation helper method for `_mexists`.
|
|
994
|
+
|
|
919
995
|
Parameters
|
|
920
996
|
----------
|
|
997
|
+
pool_executor_class : `type` [ `concurrent.futures.Executor` ]
|
|
998
|
+
Type of executor pool to use.
|
|
921
999
|
uris : iterable of `ResourcePath`
|
|
922
1000
|
The URIs to test.
|
|
1001
|
+
num_workers : `int` or `None`, optional
|
|
1002
|
+
The number of parallel workers to use when checking for existence
|
|
1003
|
+
If `None`, the default value will be taken from the environment.
|
|
923
1004
|
|
|
924
1005
|
Returns
|
|
925
1006
|
-------
|
|
926
1007
|
existence : `dict` of [`ResourcePath`, `bool`]
|
|
927
1008
|
Mapping of original URI to boolean indicating existence.
|
|
928
1009
|
"""
|
|
929
|
-
|
|
1010
|
+
max_workers = num_workers if num_workers is not None else _get_num_workers()
|
|
1011
|
+
with pool_executor_class(max_workers=max_workers) as exists_executor:
|
|
930
1012
|
future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
|
|
931
1013
|
|
|
932
1014
|
results: dict[ResourcePath, bool] = {}
|
|
@@ -975,7 +1057,66 @@ class ResourcePath: # numpydoc ignore=PR02
|
|
|
975
1057
|
A dict of all the transfer attempts with a value indicating
|
|
976
1058
|
whether the transfer succeeded for the target URI.
|
|
977
1059
|
"""
|
|
978
|
-
|
|
1060
|
+
pool_executor_class = _get_executor_class()
|
|
1061
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
1062
|
+
# Patch the environment to make it think there is only one worker
|
|
1063
|
+
# for each subprocess.
|
|
1064
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
1065
|
+
return cls._mtransfer(
|
|
1066
|
+
pool_executor_class,
|
|
1067
|
+
transfer,
|
|
1068
|
+
from_to,
|
|
1069
|
+
overwrite=overwrite,
|
|
1070
|
+
transaction=transaction,
|
|
1071
|
+
do_raise=do_raise,
|
|
1072
|
+
)
|
|
1073
|
+
return cls._mtransfer(
|
|
1074
|
+
pool_executor_class,
|
|
1075
|
+
transfer,
|
|
1076
|
+
from_to,
|
|
1077
|
+
overwrite=overwrite,
|
|
1078
|
+
transaction=transaction,
|
|
1079
|
+
do_raise=do_raise,
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
@classmethod
|
|
1083
|
+
def _mtransfer(
|
|
1084
|
+
cls,
|
|
1085
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
1086
|
+
transfer: str,
|
|
1087
|
+
from_to: Iterable[tuple[ResourcePath, ResourcePath]],
|
|
1088
|
+
overwrite: bool = False,
|
|
1089
|
+
transaction: TransactionProtocol | None = None,
|
|
1090
|
+
do_raise: bool = True,
|
|
1091
|
+
) -> dict[ResourcePath, MTransferResult]:
|
|
1092
|
+
"""Transfer many files in bulk.
|
|
1093
|
+
|
|
1094
|
+
Parameters
|
|
1095
|
+
----------
|
|
1096
|
+
transfer : `str`
|
|
1097
|
+
Mode to use for transferring the resource. Generically there are
|
|
1098
|
+
many standard options: copy, link, symlink, hardlink, relsymlink.
|
|
1099
|
+
Not all URIs support all modes.
|
|
1100
|
+
from_to : `list` [ `tuple` [ `ResourcePath`, `ResourcePath` ] ]
|
|
1101
|
+
A sequence of the source URIs and the target URIs.
|
|
1102
|
+
overwrite : `bool`, optional
|
|
1103
|
+
Allow an existing file to be overwritten. Defaults to `False`.
|
|
1104
|
+
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
1105
|
+
A transaction object that can (depending on implementation)
|
|
1106
|
+
rollback transfers on error. Not guaranteed to be implemented.
|
|
1107
|
+
The transaction object must be thread safe.
|
|
1108
|
+
do_raise : `bool`, optional
|
|
1109
|
+
If `True` an `ExceptionGroup` will be raised containing any
|
|
1110
|
+
exceptions raised by the individual transfers. Else a dict
|
|
1111
|
+
reporting the status of each `ResourcePath` will be returned.
|
|
1112
|
+
|
|
1113
|
+
Returns
|
|
1114
|
+
-------
|
|
1115
|
+
copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
|
|
1116
|
+
A dict of all the transfer attempts with a value indicating
|
|
1117
|
+
whether the transfer succeeded for the target URI.
|
|
1118
|
+
"""
|
|
1119
|
+
with pool_executor_class(max_workers=_get_num_workers()) as transfer_executor:
|
|
979
1120
|
future_transfers = {
|
|
980
1121
|
transfer_executor.submit(
|
|
981
1122
|
to_uri.transfer_from,
|
lsst/resources/http.py
CHANGED
|
@@ -59,7 +59,7 @@ from lsst.utils.timer import time_this
|
|
|
59
59
|
from ._resourceHandles import ResourceHandleProtocol
|
|
60
60
|
from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle, parse_content_range_header
|
|
61
61
|
from ._resourcePath import ResourcePath
|
|
62
|
-
from .utils import get_tempdir
|
|
62
|
+
from .utils import _get_num_workers, get_tempdir
|
|
63
63
|
|
|
64
64
|
if TYPE_CHECKING:
|
|
65
65
|
from .utils import TransactionProtocol
|
|
@@ -165,14 +165,14 @@ class HttpResourcePathConfig:
|
|
|
165
165
|
if self._front_end_connections is not None:
|
|
166
166
|
return self._front_end_connections
|
|
167
167
|
|
|
168
|
+
default_pool_size = max(_get_num_workers(), self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS)
|
|
169
|
+
|
|
168
170
|
try:
|
|
169
171
|
self._front_end_connections = int(
|
|
170
|
-
os.environ.get(
|
|
171
|
-
"LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS
|
|
172
|
-
)
|
|
172
|
+
os.environ.get("LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", default_pool_size)
|
|
173
173
|
)
|
|
174
174
|
except ValueError:
|
|
175
|
-
self._front_end_connections =
|
|
175
|
+
self._front_end_connections = default_pool_size
|
|
176
176
|
|
|
177
177
|
return self._front_end_connections
|
|
178
178
|
|
|
@@ -182,14 +182,14 @@ class HttpResourcePathConfig:
|
|
|
182
182
|
if self._back_end_connections is not None:
|
|
183
183
|
return self._back_end_connections
|
|
184
184
|
|
|
185
|
+
default_pool_size = max(_get_num_workers(), self.DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS)
|
|
186
|
+
|
|
185
187
|
try:
|
|
186
188
|
self._back_end_connections = int(
|
|
187
|
-
os.environ.get(
|
|
188
|
-
"LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", self.DEFAULT_BACKEND_PERSISTENT_CONNECTIONS
|
|
189
|
-
)
|
|
189
|
+
os.environ.get("LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", default_pool_size)
|
|
190
190
|
)
|
|
191
191
|
except ValueError:
|
|
192
|
-
self._back_end_connections =
|
|
192
|
+
self._back_end_connections = default_pool_size
|
|
193
193
|
|
|
194
194
|
return self._back_end_connections
|
|
195
195
|
|
|
@@ -587,7 +587,7 @@ class SessionStore:
|
|
|
587
587
|
Note that "https://www.example.org" and "https://www.example.org:12345"
|
|
588
588
|
will have different sessions since the port number is not identical.
|
|
589
589
|
"""
|
|
590
|
-
root_uri = str(rpath.root_uri())
|
|
590
|
+
root_uri = _dav_to_http(str(rpath.root_uri()))
|
|
591
591
|
if root_uri not in self._sessions:
|
|
592
592
|
# We don't have yet a session for this endpoint: create a new one.
|
|
593
593
|
self._sessions[root_uri] = self._make_session(rpath)
|
lsst/resources/s3.py
CHANGED
|
@@ -220,7 +220,9 @@ class S3ResourcePath(ResourcePath):
|
|
|
220
220
|
return bucket
|
|
221
221
|
|
|
222
222
|
@classmethod
|
|
223
|
-
def _mexists(
|
|
223
|
+
def _mexists(
|
|
224
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
225
|
+
) -> dict[ResourcePath, bool]:
|
|
224
226
|
# Force client to be created for each profile before creating threads.
|
|
225
227
|
profiles = set[str | None]()
|
|
226
228
|
for path in uris:
|
|
@@ -230,7 +232,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
230
232
|
for profile in profiles:
|
|
231
233
|
getS3Client(profile)
|
|
232
234
|
|
|
233
|
-
return super()._mexists(uris)
|
|
235
|
+
return super()._mexists(uris, num_workers=num_workers)
|
|
234
236
|
|
|
235
237
|
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
236
238
|
def exists(self) -> bool:
|
lsst/resources/s3utils.py
CHANGED
|
@@ -53,6 +53,7 @@ except ImportError:
|
|
|
53
53
|
|
|
54
54
|
from ._resourcePath import ResourcePath
|
|
55
55
|
from .location import Location
|
|
56
|
+
from .utils import _get_num_workers
|
|
56
57
|
|
|
57
58
|
# https://pypi.org/project/backoff/
|
|
58
59
|
try:
|
|
@@ -246,7 +247,11 @@ def _s3_disable_bucket_validation(client: boto3.client) -> None:
|
|
|
246
247
|
@functools.lru_cache
|
|
247
248
|
def _get_s3_client(endpoint_config: _EndpointConfig, skip_validation: bool) -> boto3.client:
|
|
248
249
|
# Helper function to cache the client for this endpoint
|
|
249
|
-
config = botocore.config.Config(
|
|
250
|
+
config = botocore.config.Config(
|
|
251
|
+
read_timeout=180,
|
|
252
|
+
max_pool_connections=_get_num_workers(),
|
|
253
|
+
retries={"mode": "adaptive", "max_attempts": 10},
|
|
254
|
+
)
|
|
250
255
|
|
|
251
256
|
session = boto3.Session(profile_name=endpoint_config.profile)
|
|
252
257
|
|
lsst/resources/tests.py
CHANGED
|
@@ -1027,6 +1027,8 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
1027
1027
|
expected_files = {
|
|
1028
1028
|
"dir1/a.yaml",
|
|
1029
1029
|
"dir1/b.yaml",
|
|
1030
|
+
"dir1/c.yaml",
|
|
1031
|
+
"dir1/d.yaml",
|
|
1030
1032
|
"dir2/e.yaml",
|
|
1031
1033
|
}
|
|
1032
1034
|
expected_uris = {root.join(f) for f in expected_files}
|
|
@@ -1035,7 +1037,8 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
1035
1037
|
self.assertTrue(uri.exists())
|
|
1036
1038
|
expected_uris.add(file)
|
|
1037
1039
|
|
|
1038
|
-
|
|
1040
|
+
# Force to run with fewer workers than there are files.
|
|
1041
|
+
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
1039
1042
|
|
|
1040
1043
|
for uri, is_there in multi.items():
|
|
1041
1044
|
if uri == file:
|
lsst/resources/utils.py
CHANGED
|
@@ -15,6 +15,7 @@ __all__ = ("NoTransaction", "TransactionProtocol", "get_tempdir", "os2posix", "p
|
|
|
15
15
|
|
|
16
16
|
import contextlib
|
|
17
17
|
import logging
|
|
18
|
+
import multiprocessing
|
|
18
19
|
import os
|
|
19
20
|
import posixpath
|
|
20
21
|
import shutil
|
|
@@ -33,6 +34,11 @@ IS_POSIX = os.sep == posixpath.sep
|
|
|
33
34
|
# posix means posix and only determine explicitly in the non-posix case.
|
|
34
35
|
OS_ROOT_PATH = posixpath.sep if IS_POSIX else Path().resolve().root
|
|
35
36
|
|
|
37
|
+
# Maximum number of worker threads for parallelized operations.
|
|
38
|
+
# If greater than 10, be aware that this number has to be consistent
|
|
39
|
+
# with connection pool sizing (for example in urllib3).
|
|
40
|
+
MAX_WORKERS = 10
|
|
41
|
+
|
|
36
42
|
log = logging.getLogger(__name__)
|
|
37
43
|
|
|
38
44
|
|
|
@@ -226,3 +232,40 @@ def ensure_directory_is_writeable(directory_path: str | bytes) -> None:
|
|
|
226
232
|
desired_mode = current_mode | stat.S_IWUSR | stat.S_IXUSR
|
|
227
233
|
if current_mode != desired_mode:
|
|
228
234
|
os.chmod(directory_path, desired_mode)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _get_int_env_var(env_var: str) -> int | None:
|
|
238
|
+
int_value = None
|
|
239
|
+
env_value = os.getenv(env_var)
|
|
240
|
+
if env_value is not None:
|
|
241
|
+
with contextlib.suppress(TypeError):
|
|
242
|
+
int_value = int(env_value)
|
|
243
|
+
return int_value
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@cache
|
|
247
|
+
def _get_num_workers() -> int:
|
|
248
|
+
f"""Calculate the number of workers to use.
|
|
249
|
+
|
|
250
|
+
Returns
|
|
251
|
+
-------
|
|
252
|
+
num : `int`
|
|
253
|
+
The number of workers to use. Will use the value of the
|
|
254
|
+
``LSST_RESOURCES_NUM_WORKERS`` environment variable if set. Will fall
|
|
255
|
+
back to using the CPU count (plus 2) but capped at {MAX_WORKERS}.
|
|
256
|
+
"""
|
|
257
|
+
num_workers: int | None = None
|
|
258
|
+
num_workers = _get_int_env_var("LSST_RESOURCES_NUM_WORKERS")
|
|
259
|
+
|
|
260
|
+
# If someone is explicitly specifying a number, let them use that number.
|
|
261
|
+
if num_workers is not None:
|
|
262
|
+
return num_workers
|
|
263
|
+
|
|
264
|
+
if num_workers is None:
|
|
265
|
+
# CPU_LIMIT is used on nublado.
|
|
266
|
+
cpu_limit = _get_int_env_var("CPU_LIMIT") or multiprocessing.cpu_count()
|
|
267
|
+
if cpu_limit is not None:
|
|
268
|
+
num_workers = cpu_limit + 2
|
|
269
|
+
|
|
270
|
+
# But don't ever return more than the maximum allowed.
|
|
271
|
+
return min([num_workers, MAX_WORKERS])
|
lsst/resources/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "29.2025.
|
|
2
|
+
__version__ = "29.2025.1800"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-resources
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.1800
|
|
4
4
|
Summary: An abstraction layer for reading and writing from URI file resources.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
lsst/__init__.py,sha256=9I6UQ9gj-ZcPlvsa0OPBo76UujxXVehVzw9yMAOQvyM,466
|
|
2
2
|
lsst/resources/__init__.py,sha256=BDj6uokvd0ZQNGl-Xgz5gZd83Z0L2gFqGSk0KJpylP8,778
|
|
3
|
-
lsst/resources/_resourcePath.py,sha256=
|
|
3
|
+
lsst/resources/_resourcePath.py,sha256=DTsZTrCaXu9GLT1QwkhJaQcb-b8VuaRH1TU-J58PJ0M,70548
|
|
4
4
|
lsst/resources/file.py,sha256=-jPuoHvTEtx5tnDyNkfwhWAyX0cTwkuMd-JvJn9EGdE,23226
|
|
5
5
|
lsst/resources/gs.py,sha256=Lpo5GAzH7R7HG8E5RMGOdP4j4hjWJn-k6M3OXj0nHQM,12783
|
|
6
|
-
lsst/resources/http.py,sha256=
|
|
6
|
+
lsst/resources/http.py,sha256=9a_VadSabznPC0FTQtDtfV041zH25wZgXFtJ7HkvHp0,88275
|
|
7
7
|
lsst/resources/location.py,sha256=x3Tq0x5o1OXYmZDxYBenUG1N71wtDhnjVAr3s2ZEiu8,7937
|
|
8
8
|
lsst/resources/mem.py,sha256=VOWh7XxJPfqKcFdLZSjKEAfORQ2AHZHpxmjT8LniV60,1008
|
|
9
9
|
lsst/resources/packageresource.py,sha256=vnfeRlpVwpC5cDQZE6Lnh8EH6oZy1sH2vLz9ONYjJ4k,6817
|
|
10
10
|
lsst/resources/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
lsst/resources/s3.py,sha256=
|
|
12
|
-
lsst/resources/s3utils.py,sha256=
|
|
11
|
+
lsst/resources/s3.py,sha256=X5ujR-w4LOJpOWAIBShn6hS0oeXQwvl3leeuj6c9LJ4,24198
|
|
12
|
+
lsst/resources/s3utils.py,sha256=rhMvZjbHjymObDWsEUODoLaw70DrrJHn_cUi3G78VvI,14463
|
|
13
13
|
lsst/resources/schemeless.py,sha256=GfJcKzZ0XIeepfQdW4HPZWiZlSp_ej0SEtSiJTrDUQs,10666
|
|
14
|
-
lsst/resources/tests.py,sha256=
|
|
15
|
-
lsst/resources/utils.py,sha256=
|
|
16
|
-
lsst/resources/version.py,sha256=
|
|
14
|
+
lsst/resources/tests.py,sha256=LZOxPfZhSS76z7s5kWuULaMlTo65_m8XROdHrQqubQQ,43646
|
|
15
|
+
lsst/resources/utils.py,sha256=6O3Mq7JbPEtqyD2lM77pRpwcPMfV5SxiNMknw-F2vNs,8097
|
|
16
|
+
lsst/resources/version.py,sha256=uRdffTNnwH4GQy77H1Wk3Md64craDCIuRxmCtF7ON0g,55
|
|
17
17
|
lsst/resources/_resourceHandles/__init__.py,sha256=zOcZ8gVEBdAWcHJaZabA8Vdq-wAVcxjbmA_1b1IWM6M,76
|
|
18
18
|
lsst/resources/_resourceHandles/_baseResourceHandle.py,sha256=lQwxDOmFUNJndTxsjpz-HxrQBL0L-z4aXQocHdOEI7c,4676
|
|
19
19
|
lsst/resources/_resourceHandles/_fileResourceHandle.py,sha256=A7_WQPzD0ZlOzNmaI_TPdZybrNxrXPkNHWVla3UFxfs,3676
|
|
20
|
-
lsst/resources/_resourceHandles/_httpResourceHandle.py,sha256=
|
|
20
|
+
lsst/resources/_resourceHandles/_httpResourceHandle.py,sha256=Yami8IVGeru4bLQCag-OvGG0ltz1qyEg57FY4IEB87Y,10995
|
|
21
21
|
lsst/resources/_resourceHandles/_s3ResourceHandle.py,sha256=NkDmPb9bm_zMvr6mMnb-tBmqJDt0yUJrt2gZXR8l7ok,12923
|
|
22
|
-
lsst_resources-29.2025.
|
|
23
|
-
lsst_resources-29.2025.
|
|
24
|
-
lsst_resources-29.2025.
|
|
25
|
-
lsst_resources-29.2025.
|
|
26
|
-
lsst_resources-29.2025.
|
|
27
|
-
lsst_resources-29.2025.
|
|
28
|
-
lsst_resources-29.2025.
|
|
22
|
+
lsst_resources-29.2025.1800.dist-info/licenses/COPYRIGHT,sha256=yazVsoMmFwhiw5itGrdT4YPmXbpsQyUFjlpOyZIa77M,148
|
|
23
|
+
lsst_resources-29.2025.1800.dist-info/licenses/LICENSE,sha256=7wrtgl8meQ0_RIuv2TjIKpAnNrl-ODH-QLwyHe9citI,1516
|
|
24
|
+
lsst_resources-29.2025.1800.dist-info/METADATA,sha256=QUD23JcY-DYeGrrGfeFiZRUJ6dDpWuVeSu0YwW5HliI,2237
|
|
25
|
+
lsst_resources-29.2025.1800.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
|
|
26
|
+
lsst_resources-29.2025.1800.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
|
|
27
|
+
lsst_resources-29.2025.1800.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
28
|
+
lsst_resources-29.2025.1800.dist-info/RECORD,,
|
{lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/licenses/COPYRIGHT
RENAMED
|
File without changes
|
{lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{lsst_resources-29.2025.1600.dist-info → lsst_resources-29.2025.1800.dist-info}/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|