lsst-resources 29.2025.1700__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +197 -0
- lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
- lsst/resources/_resourceHandles/_httpResourceHandle.py +7 -4
- lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
- lsst/resources/_resourcePath.py +311 -79
- lsst/resources/dav.py +912 -0
- lsst/resources/davutils.py +2659 -0
- lsst/resources/file.py +41 -16
- lsst/resources/gs.py +6 -3
- lsst/resources/http.py +194 -65
- lsst/resources/mem.py +7 -1
- lsst/resources/s3.py +141 -15
- lsst/resources/s3utils.py +8 -1
- lsst/resources/schemeless.py +6 -3
- lsst/resources/tests.py +66 -12
- lsst/resources/utils.py +43 -0
- lsst/resources/version.py +1 -1
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/METADATA +3 -3
- lsst_resources-29.2025.4600.dist-info/RECORD +31 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/WHEEL +1 -1
- lsst_resources-29.2025.1700.dist-info/RECORD +0 -28
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/resources/s3.py
CHANGED
|
@@ -13,6 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
__all__ = ("S3ResourcePath",)
|
|
15
15
|
|
|
16
|
+
import concurrent.futures
|
|
16
17
|
import contextlib
|
|
17
18
|
import io
|
|
18
19
|
import logging
|
|
@@ -20,17 +21,19 @@ import os
|
|
|
20
21
|
import re
|
|
21
22
|
import sys
|
|
22
23
|
import threading
|
|
24
|
+
from collections import defaultdict
|
|
23
25
|
from collections.abc import Iterable, Iterator
|
|
24
26
|
from functools import cache, cached_property
|
|
25
27
|
from typing import IO, TYPE_CHECKING, cast
|
|
26
28
|
|
|
27
29
|
from botocore.exceptions import ClientError
|
|
28
30
|
|
|
31
|
+
from lsst.utils.iteration import chunk_iterable
|
|
29
32
|
from lsst.utils.timer import time_this
|
|
30
33
|
|
|
31
34
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
32
35
|
from ._resourceHandles._s3ResourceHandle import S3ResourceHandle
|
|
33
|
-
from ._resourcePath import ResourcePath
|
|
36
|
+
from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ
|
|
34
37
|
from .s3utils import (
|
|
35
38
|
_get_s3_connection_parameters,
|
|
36
39
|
_s3_disable_bucket_validation,
|
|
@@ -44,6 +47,7 @@ from .s3utils import (
|
|
|
44
47
|
s3CheckFileExists,
|
|
45
48
|
translate_client_error,
|
|
46
49
|
)
|
|
50
|
+
from .utils import _get_num_workers
|
|
47
51
|
|
|
48
52
|
try:
|
|
49
53
|
from boto3.s3.transfer import TransferConfig # type: ignore
|
|
@@ -220,7 +224,9 @@ class S3ResourcePath(ResourcePath):
|
|
|
220
224
|
return bucket
|
|
221
225
|
|
|
222
226
|
@classmethod
|
|
223
|
-
def _mexists(
|
|
227
|
+
def _mexists(
|
|
228
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
229
|
+
) -> dict[ResourcePath, bool]:
|
|
224
230
|
# Force client to be created for each profile before creating threads.
|
|
225
231
|
profiles = set[str | None]()
|
|
226
232
|
for path in uris:
|
|
@@ -230,7 +236,125 @@ class S3ResourcePath(ResourcePath):
|
|
|
230
236
|
for profile in profiles:
|
|
231
237
|
getS3Client(profile)
|
|
232
238
|
|
|
233
|
-
return super()._mexists(uris)
|
|
239
|
+
return super()._mexists(uris, num_workers=num_workers)
|
|
240
|
+
|
|
241
|
+
@classmethod
|
|
242
|
+
def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
|
|
243
|
+
# Delete multiple objects in one API call.
|
|
244
|
+
# Must group by profile and bucket.
|
|
245
|
+
grouped_uris: dict[tuple[str | None, str], list[S3ResourcePath]] = defaultdict(list)
|
|
246
|
+
for uri in uris:
|
|
247
|
+
uri = cast(S3ResourcePath, uri)
|
|
248
|
+
grouped_uris[uri._profile, uri._bucket].append(uri)
|
|
249
|
+
|
|
250
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
251
|
+
for related_uris in grouped_uris.values():
|
|
252
|
+
# API requires no more than 1000 per call.
|
|
253
|
+
chunk_num = 0
|
|
254
|
+
chunks: list[tuple[ResourcePath, ...]] = []
|
|
255
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
256
|
+
for chunk in chunk_iterable(related_uris, chunk_size=1_000):
|
|
257
|
+
for uri in chunk:
|
|
258
|
+
key = uri.relativeToPathRoot
|
|
259
|
+
key_to_uri[key] = uri
|
|
260
|
+
# Default to assuming everything worked.
|
|
261
|
+
results[uri] = MBulkResult(True, None)
|
|
262
|
+
chunk_num += 1
|
|
263
|
+
chunks.append(chunk)
|
|
264
|
+
|
|
265
|
+
# Bulk remove.
|
|
266
|
+
with time_this(
|
|
267
|
+
log,
|
|
268
|
+
msg="Bulk delete; %d chunk%s; totalling %d dataset%s",
|
|
269
|
+
args=(
|
|
270
|
+
len(chunks),
|
|
271
|
+
"s" if len(chunks) != 1 else "",
|
|
272
|
+
len(related_uris),
|
|
273
|
+
"s" if len(related_uris) != 1 else "",
|
|
274
|
+
),
|
|
275
|
+
):
|
|
276
|
+
errored = cls._mremove_select(chunks)
|
|
277
|
+
|
|
278
|
+
# Update with error information.
|
|
279
|
+
results.update(errored)
|
|
280
|
+
|
|
281
|
+
return results
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def _mremove_select(cls, chunks: list[tuple[ResourcePath, ...]]) -> dict[ResourcePath, MBulkResult]:
|
|
285
|
+
if len(chunks) == 1:
|
|
286
|
+
# Do the removal directly without futures.
|
|
287
|
+
return cls._delete_objects_wrapper(chunks[0])
|
|
288
|
+
pool_executor_class = _get_executor_class()
|
|
289
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
290
|
+
# Patch the environment to make it think there is only one worker
|
|
291
|
+
# for each subprocess.
|
|
292
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
293
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
294
|
+
else:
|
|
295
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def _mremove_with_pool(
|
|
299
|
+
cls,
|
|
300
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
301
|
+
chunks: list[tuple[ResourcePath, ...]],
|
|
302
|
+
*,
|
|
303
|
+
num_workers: int | None = None,
|
|
304
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
305
|
+
# Different name because different API to base class.
|
|
306
|
+
# No need to make more workers than we have chunks.
|
|
307
|
+
max_workers = num_workers if num_workers is not None else min(len(chunks), _get_num_workers())
|
|
308
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
309
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
310
|
+
future_remove = {
|
|
311
|
+
remove_executor.submit(cls._delete_objects_wrapper, chunk): i
|
|
312
|
+
for i, chunk in enumerate(chunks)
|
|
313
|
+
}
|
|
314
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
315
|
+
try:
|
|
316
|
+
results.update(future.result())
|
|
317
|
+
except Exception as e:
|
|
318
|
+
# The chunk utterly failed.
|
|
319
|
+
chunk = chunks[future_remove[future]]
|
|
320
|
+
for uri in chunk:
|
|
321
|
+
results[uri] = MBulkResult(False, e)
|
|
322
|
+
return results
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def _delete_objects_wrapper(cls, uris: tuple[ResourcePath, ...]) -> dict[ResourcePath, MBulkResult]:
|
|
326
|
+
"""Convert URIs to keys and call low-level API."""
|
|
327
|
+
if not uris:
|
|
328
|
+
return {}
|
|
329
|
+
keys: list[dict[str, str]] = []
|
|
330
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
331
|
+
for uri in uris:
|
|
332
|
+
key = uri.relativeToPathRoot
|
|
333
|
+
key_to_uri[key] = uri
|
|
334
|
+
keys.append({"Key": key})
|
|
335
|
+
|
|
336
|
+
first_uri = cast(S3ResourcePath, uris[0])
|
|
337
|
+
results = cls._delete_related_objects(first_uri.client, first_uri._bucket, keys)
|
|
338
|
+
|
|
339
|
+
# Remap error object keys to uris.
|
|
340
|
+
return {key_to_uri[key]: result for key, result in results.items()}
|
|
341
|
+
|
|
342
|
+
@classmethod
|
|
343
|
+
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
344
|
+
def _delete_related_objects(
|
|
345
|
+
cls, client: boto3.client, bucket: str, keys: list[dict[str, str]]
|
|
346
|
+
) -> dict[str, MBulkResult]:
|
|
347
|
+
# Delete multiple objects from the same bucket, allowing for backoff
|
|
348
|
+
# retry.
|
|
349
|
+
response = client.delete_objects(Bucket=bucket, Delete={"Objects": keys, "Quiet": True})
|
|
350
|
+
# Use Quiet mode so we assume everything worked unless told otherwise.
|
|
351
|
+
# Only returning errors -- indexed by Key name.
|
|
352
|
+
errors: dict[str, MBulkResult] = {}
|
|
353
|
+
for errored_key in response.get("Errors", []):
|
|
354
|
+
errors[errored_key["Key"]] = MBulkResult(
|
|
355
|
+
False, ClientError({"Error": errored_key}, f"delete_objects: {errored_key['Key']}")
|
|
356
|
+
)
|
|
357
|
+
return errors
|
|
234
358
|
|
|
235
359
|
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
236
360
|
def exists(self) -> bool:
|
|
@@ -353,7 +477,10 @@ class S3ResourcePath(ResourcePath):
|
|
|
353
477
|
|
|
354
478
|
return s3, f"{self._bucket}/{self.relativeToPathRoot}"
|
|
355
479
|
|
|
356
|
-
|
|
480
|
+
@contextlib.contextmanager
|
|
481
|
+
def _as_local(
|
|
482
|
+
self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
|
|
483
|
+
) -> Iterator[ResourcePath]:
|
|
357
484
|
"""Download object from S3 and place in temporary directory.
|
|
358
485
|
|
|
359
486
|
Parameters
|
|
@@ -370,13 +497,12 @@ class S3ResourcePath(ResourcePath):
|
|
|
370
497
|
|
|
371
498
|
Returns
|
|
372
499
|
-------
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
Always returns `True`. This is always a temporary file.
|
|
500
|
+
local_uri : `ResourcePath`
|
|
501
|
+
A URI to a local POSIX file corresponding to a local temporary
|
|
502
|
+
downloaded copy of the resource.
|
|
377
503
|
"""
|
|
378
504
|
with (
|
|
379
|
-
ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=
|
|
505
|
+
ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=True) as tmp_uri,
|
|
380
506
|
self._use_threads_temp_override(multithreaded),
|
|
381
507
|
time_this(log, msg="Downloading %s to local file", args=(self,)),
|
|
382
508
|
):
|
|
@@ -387,7 +513,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
387
513
|
)
|
|
388
514
|
with tmp_uri.open("wb") as tmpFile:
|
|
389
515
|
self._download_file(tmpFile, progress)
|
|
390
|
-
|
|
516
|
+
yield tmp_uri
|
|
391
517
|
|
|
392
518
|
@backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
|
|
393
519
|
def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage | None) -> None:
|
|
@@ -418,7 +544,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
418
544
|
try:
|
|
419
545
|
self.client.copy_object(CopySource=copy_source, Bucket=self._bucket, Key=self.relativeToPathRoot)
|
|
420
546
|
except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
|
|
421
|
-
raise FileNotFoundError("No such resource to transfer: {self}") from err
|
|
547
|
+
raise FileNotFoundError(f"No such resource to transfer: {src} -> {self}") from err
|
|
422
548
|
except ClientError as err:
|
|
423
549
|
translate_client_error(err, self)
|
|
424
550
|
raise
|
|
@@ -485,10 +611,10 @@ class S3ResourcePath(ResourcePath):
|
|
|
485
611
|
timer_msg = "Transfer from %s to %s"
|
|
486
612
|
timer_args = (src, self)
|
|
487
613
|
|
|
488
|
-
if isinstance(src, type(self)):
|
|
489
|
-
# Looks like an S3 remote uri so we can use direct copy
|
|
490
|
-
#
|
|
491
|
-
#
|
|
614
|
+
if isinstance(src, type(self)) and self.client == src.client:
|
|
615
|
+
# Looks like an S3 remote uri so we can use direct copy.
|
|
616
|
+
# This only works if the source and destination are using the same
|
|
617
|
+
# S3 endpoint and profile.
|
|
492
618
|
with time_this(log, msg=timer_msg, args=timer_args):
|
|
493
619
|
self._copy_from(src)
|
|
494
620
|
|
lsst/resources/s3utils.py
CHANGED
|
@@ -53,6 +53,7 @@ except ImportError:
|
|
|
53
53
|
|
|
54
54
|
from ._resourcePath import ResourcePath
|
|
55
55
|
from .location import Location
|
|
56
|
+
from .utils import _get_num_workers
|
|
56
57
|
|
|
57
58
|
# https://pypi.org/project/backoff/
|
|
58
59
|
try:
|
|
@@ -246,7 +247,13 @@ def _s3_disable_bucket_validation(client: boto3.client) -> None:
|
|
|
246
247
|
@functools.lru_cache
|
|
247
248
|
def _get_s3_client(endpoint_config: _EndpointConfig, skip_validation: bool) -> boto3.client:
|
|
248
249
|
# Helper function to cache the client for this endpoint
|
|
249
|
-
|
|
250
|
+
# boto seems to assume it will always have at least 10 available.
|
|
251
|
+
max_pool_size = max(_get_num_workers(), 10)
|
|
252
|
+
config = botocore.config.Config(
|
|
253
|
+
read_timeout=180,
|
|
254
|
+
max_pool_connections=max_pool_size,
|
|
255
|
+
retries={"mode": "adaptive", "max_attempts": 10},
|
|
256
|
+
)
|
|
250
257
|
|
|
251
258
|
session = boto3.Session(profile_name=endpoint_config.profile)
|
|
252
259
|
|
lsst/resources/schemeless.py
CHANGED
|
@@ -105,13 +105,16 @@ class SchemelessResourcePath(FileResourcePath):
|
|
|
105
105
|
return stat.S_ISDIR(status.st_mode)
|
|
106
106
|
return self.dirLike
|
|
107
107
|
|
|
108
|
-
def relative_to(self, other: ResourcePath) -> str | None:
|
|
108
|
+
def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
|
|
109
109
|
"""Return the relative path from this URI to the other URI.
|
|
110
110
|
|
|
111
111
|
Parameters
|
|
112
112
|
----------
|
|
113
113
|
other : `ResourcePath`
|
|
114
114
|
URI to use to calculate the relative path.
|
|
115
|
+
walk_up : `bool`, optional
|
|
116
|
+
Control whether "``..``" can be used to resolve a relative path.
|
|
117
|
+
Default is `False`. Can not be `True` on Python version 3.11.
|
|
115
118
|
|
|
116
119
|
Returns
|
|
117
120
|
-------
|
|
@@ -146,8 +149,8 @@ class SchemelessResourcePath(FileResourcePath):
|
|
|
146
149
|
raise RuntimeError(f"Unexpected combination of {child}.relative_to({other}).")
|
|
147
150
|
|
|
148
151
|
if child is None:
|
|
149
|
-
return super().relative_to(other)
|
|
150
|
-
return child.relative_to(other)
|
|
152
|
+
return super().relative_to(other, walk_up=walk_up)
|
|
153
|
+
return child.relative_to(other, walk_up=walk_up)
|
|
151
154
|
|
|
152
155
|
@classmethod
|
|
153
156
|
def _fixupPathUri(
|
lsst/resources/tests.py
CHANGED
|
@@ -17,6 +17,7 @@ import os
|
|
|
17
17
|
import pathlib
|
|
18
18
|
import random
|
|
19
19
|
import string
|
|
20
|
+
import sys
|
|
20
21
|
import tempfile
|
|
21
22
|
import unittest
|
|
22
23
|
import urllib.parse
|
|
@@ -61,18 +62,18 @@ def _check_open(
|
|
|
61
62
|
"""
|
|
62
63
|
text_content = "abcdefghijklmnopqrstuvwxyz🙂"
|
|
63
64
|
bytes_content = uuid.uuid4().bytes
|
|
64
|
-
content_by_mode_suffix = {
|
|
65
|
+
content_by_mode_suffix: dict[str, str | bytes] = {
|
|
65
66
|
"": text_content,
|
|
66
67
|
"t": text_content,
|
|
67
68
|
"b": bytes_content,
|
|
68
69
|
}
|
|
69
|
-
empty_content_by_mode_suffix = {
|
|
70
|
+
empty_content_by_mode_suffix: dict[str, str | bytes] = {
|
|
70
71
|
"": "",
|
|
71
72
|
"t": "",
|
|
72
73
|
"b": b"",
|
|
73
74
|
}
|
|
74
75
|
# To appease mypy
|
|
75
|
-
double_content_by_mode_suffix = {
|
|
76
|
+
double_content_by_mode_suffix: dict[str, str | bytes] = {
|
|
76
77
|
"": text_content + text_content,
|
|
77
78
|
"t": text_content + text_content,
|
|
78
79
|
"b": bytes_content + bytes_content,
|
|
@@ -143,6 +144,16 @@ def _check_open(
|
|
|
143
144
|
content_read = read_buffer.read()
|
|
144
145
|
test_case.assertEqual(len(content_read), 0, f"Read: {content_read!r}, expected empty.")
|
|
145
146
|
|
|
147
|
+
# Write multiple chunks with flushing to ensure that any handles that
|
|
148
|
+
# cache without flushing work properly.
|
|
149
|
+
n = 3
|
|
150
|
+
with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
|
|
151
|
+
for _ in range(n):
|
|
152
|
+
write_buffer.write(content)
|
|
153
|
+
write_buffer.flush()
|
|
154
|
+
with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
|
|
155
|
+
test_case.assertEqual(read_buffer.read(), content * n)
|
|
156
|
+
|
|
146
157
|
# Write two copies of the content, overwriting the single copy there.
|
|
147
158
|
with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
|
|
148
159
|
write_buffer.write(double_content)
|
|
@@ -365,6 +376,19 @@ class GenericTestCase(_GenericTestCase):
|
|
|
365
376
|
parent = ResourcePath("d/e.txt", forceAbsolute=False)
|
|
366
377
|
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
|
|
367
378
|
|
|
379
|
+
# Allow .. in response.
|
|
380
|
+
child = ResourcePath(self._make_uri("a/b/c/d.txt"), forceAbsolute=False)
|
|
381
|
+
parent = ResourcePath(self._make_uri("a/b/d/e/"), forceAbsolute=False)
|
|
382
|
+
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
|
|
383
|
+
|
|
384
|
+
if sys.version_info >= (3, 12, 0):
|
|
385
|
+
# Fails on python 3.11.
|
|
386
|
+
self.assertEqual(
|
|
387
|
+
child.relative_to(parent, walk_up=True),
|
|
388
|
+
"../../c/d.txt",
|
|
389
|
+
f"{child}.relative_to({parent}, walk_up=True)",
|
|
390
|
+
)
|
|
391
|
+
|
|
368
392
|
def test_parents(self) -> None:
|
|
369
393
|
"""Test of splitting and parent walking."""
|
|
370
394
|
parent = ResourcePath(self._make_uri("somedir"), forceDirectory=True)
|
|
@@ -382,6 +406,14 @@ class GenericTestCase(_GenericTestCase):
|
|
|
382
406
|
self.assertEqual(child_file.parent().parent(), parent)
|
|
383
407
|
self.assertEqual(child_subdir.dirname(), child_subdir)
|
|
384
408
|
|
|
409
|
+
# Make sure that the parent doesn't retain any fragment from the
|
|
410
|
+
# child.
|
|
411
|
+
child_fragment = child_subdir.join("a.txt#fragment")
|
|
412
|
+
self.assertEqual(child_fragment.fragment, "fragment")
|
|
413
|
+
fragment_parent = child_fragment.parent()
|
|
414
|
+
self.assertEqual(fragment_parent.fragment, "")
|
|
415
|
+
self.assertTrue(str(fragment_parent).endswith("/"))
|
|
416
|
+
|
|
385
417
|
def test_escapes(self) -> None:
|
|
386
418
|
"""Special characters in file paths."""
|
|
387
419
|
src = self.root_uri.join("bbb/???/test.txt")
|
|
@@ -486,6 +518,13 @@ class GenericTestCase(_GenericTestCase):
|
|
|
486
518
|
self.assertEqual(fnew3.fragment, "fragment")
|
|
487
519
|
self.assertEqual(fnew3.basename(), "b.txt", msg=f"Got: {fnew3._uri}")
|
|
488
520
|
|
|
521
|
+
# Check that fragment on the directory is dropped on join.
|
|
522
|
+
frag_dir = add_dir.join("subdir/#dir_fragment")
|
|
523
|
+
self.assertEqual(frag_dir.fragment, "dir_fragment")
|
|
524
|
+
fnew4 = frag_dir.join("a.txt")
|
|
525
|
+
self.assertEqual(fnew4.fragment, "")
|
|
526
|
+
self.assertTrue(str(fnew4).endswith("/a.txt"))
|
|
527
|
+
|
|
489
528
|
# Join a resource path.
|
|
490
529
|
subpath = ResourcePath("a/b.txt#fragment2", forceAbsolute=False, forceDirectory=False)
|
|
491
530
|
fnew3 = root.join(subpath)
|
|
@@ -557,6 +596,10 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
557
596
|
|
|
558
597
|
transfer_modes: tuple[str, ...] = ("copy", "move")
|
|
559
598
|
testdir: str | None = None
|
|
599
|
+
# Number of files to use for mremove() testing to ensure difference code
|
|
600
|
+
# paths are hit. Do not want to generically use many files for schemes
|
|
601
|
+
# where it makes no difference.
|
|
602
|
+
n_mremove_files: int = 15
|
|
560
603
|
|
|
561
604
|
def setUp(self) -> None:
|
|
562
605
|
if self.scheme is None:
|
|
@@ -1023,22 +1066,33 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
1023
1066
|
# A file that is not there.
|
|
1024
1067
|
file = root.join("config/basic/butler.yaml")
|
|
1025
1068
|
|
|
1026
|
-
# Create some files.
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
"dir2/e.yaml",
|
|
1031
|
-
}
|
|
1032
|
-
expected_uris = {root.join(f) for f in expected_files}
|
|
1069
|
+
# Create some files. Most schemes the code paths do not change for 10
|
|
1070
|
+
# vs 1000 files but in some schemes it does.
|
|
1071
|
+
expected_files = [f"dir1/f{n}.yaml" for n in range(self.n_mremove_files)]
|
|
1072
|
+
expected_uris = [root.join(f) for f in expected_files]
|
|
1033
1073
|
for uri in expected_uris:
|
|
1034
1074
|
uri.write(b"")
|
|
1035
1075
|
self.assertTrue(uri.exists())
|
|
1036
|
-
expected_uris.
|
|
1076
|
+
expected_uris.append(file)
|
|
1037
1077
|
|
|
1038
|
-
|
|
1078
|
+
# Force to run with fewer workers than there are files.
|
|
1079
|
+
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
1039
1080
|
|
|
1040
1081
|
for uri, is_there in multi.items():
|
|
1041
1082
|
if uri == file:
|
|
1042
1083
|
self.assertFalse(is_there)
|
|
1043
1084
|
else:
|
|
1044
1085
|
self.assertTrue(is_there)
|
|
1086
|
+
|
|
1087
|
+
# Clean up. Unfortunately POSIX raises a FileNotFoundError but
|
|
1088
|
+
# S3 boto does not complain if there is no key.
|
|
1089
|
+
ResourcePath.mremove(expected_uris, do_raise=False)
|
|
1090
|
+
|
|
1091
|
+
# Check they were really removed.
|
|
1092
|
+
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
1093
|
+
for uri, is_there in multi.items():
|
|
1094
|
+
self.assertFalse(is_there)
|
|
1095
|
+
|
|
1096
|
+
# Clean up a subset of files that are already gone, but this can
|
|
1097
|
+
# trigger a different code path.
|
|
1098
|
+
ResourcePath.mremove(expected_uris[:5], do_raise=False)
|
lsst/resources/utils.py
CHANGED
|
@@ -15,6 +15,7 @@ __all__ = ("NoTransaction", "TransactionProtocol", "get_tempdir", "os2posix", "p
|
|
|
15
15
|
|
|
16
16
|
import contextlib
|
|
17
17
|
import logging
|
|
18
|
+
import multiprocessing
|
|
18
19
|
import os
|
|
19
20
|
import posixpath
|
|
20
21
|
import shutil
|
|
@@ -33,6 +34,11 @@ IS_POSIX = os.sep == posixpath.sep
|
|
|
33
34
|
# posix means posix and only determine explicitly in the non-posix case.
|
|
34
35
|
OS_ROOT_PATH = posixpath.sep if IS_POSIX else Path().resolve().root
|
|
35
36
|
|
|
37
|
+
# Maximum number of worker threads for parallelized operations.
|
|
38
|
+
# If greater than 10, be aware that this number has to be consistent
|
|
39
|
+
# with connection pool sizing (for example in urllib3).
|
|
40
|
+
MAX_WORKERS = 10
|
|
41
|
+
|
|
36
42
|
log = logging.getLogger(__name__)
|
|
37
43
|
|
|
38
44
|
|
|
@@ -226,3 +232,40 @@ def ensure_directory_is_writeable(directory_path: str | bytes) -> None:
|
|
|
226
232
|
desired_mode = current_mode | stat.S_IWUSR | stat.S_IXUSR
|
|
227
233
|
if current_mode != desired_mode:
|
|
228
234
|
os.chmod(directory_path, desired_mode)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _get_int_env_var(env_var: str) -> int | None:
|
|
238
|
+
int_value = None
|
|
239
|
+
env_value = os.getenv(env_var)
|
|
240
|
+
if env_value is not None:
|
|
241
|
+
with contextlib.suppress(TypeError):
|
|
242
|
+
int_value = int(env_value)
|
|
243
|
+
return int_value
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@cache
|
|
247
|
+
def _get_num_workers() -> int:
|
|
248
|
+
f"""Calculate the number of workers to use.
|
|
249
|
+
|
|
250
|
+
Returns
|
|
251
|
+
-------
|
|
252
|
+
num : `int`
|
|
253
|
+
The number of workers to use. Will use the value of the
|
|
254
|
+
``LSST_RESOURCES_NUM_WORKERS`` environment variable if set. Will fall
|
|
255
|
+
back to using the CPU count (plus 2) but capped at {MAX_WORKERS}.
|
|
256
|
+
"""
|
|
257
|
+
num_workers: int | None = None
|
|
258
|
+
num_workers = _get_int_env_var("LSST_RESOURCES_NUM_WORKERS")
|
|
259
|
+
|
|
260
|
+
# If someone is explicitly specifying a number, let them use that number.
|
|
261
|
+
if num_workers is not None:
|
|
262
|
+
return num_workers
|
|
263
|
+
|
|
264
|
+
if num_workers is None:
|
|
265
|
+
# CPU_LIMIT is used on nublado.
|
|
266
|
+
cpu_limit = _get_int_env_var("CPU_LIMIT") or multiprocessing.cpu_count()
|
|
267
|
+
if cpu_limit is not None:
|
|
268
|
+
num_workers = cpu_limit + 2
|
|
269
|
+
|
|
270
|
+
# But don't ever return more than the maximum allowed.
|
|
271
|
+
return min([num_workers, MAX_WORKERS])
|
lsst/resources/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "29.2025.
|
|
2
|
+
__version__ = "29.2025.4600"
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-resources
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.4600
|
|
4
4
|
Summary: An abstraction layer for reading and writing from URI file resources.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
|
-
License: BSD
|
|
6
|
+
License-Expression: BSD-3-Clause
|
|
7
7
|
Project-URL: Homepage, https://github.com/lsst/resources
|
|
8
8
|
Keywords: lsst
|
|
9
9
|
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
11
10
|
Classifier: Operating System :: OS Independent
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
15
|
Requires-Python: >=3.11.0
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: COPYRIGHT
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
lsst/__init__.py,sha256=9I6UQ9gj-ZcPlvsa0OPBo76UujxXVehVzw9yMAOQvyM,466
|
|
2
|
+
lsst/resources/__init__.py,sha256=BDj6uokvd0ZQNGl-Xgz5gZd83Z0L2gFqGSk0KJpylP8,778
|
|
3
|
+
lsst/resources/_resourcePath.py,sha256=fnB8XNUfk25lV378U0kg1O6c1QnSJL7F9iqaEt08IOA,74508
|
|
4
|
+
lsst/resources/dav.py,sha256=ZYP7PnQzS7epm5woxnn1_t1XhsPQZm6_q1kv8baUfn4,32100
|
|
5
|
+
lsst/resources/davutils.py,sha256=5geEl_44lrWX-Si1VDfYJ6WP1rg22PBqlyD_v1HE4yI,100300
|
|
6
|
+
lsst/resources/file.py,sha256=v2XLOzflfhI6kjUGB1mE8p-1e1B2eE58PW-qsQSCqdA,24360
|
|
7
|
+
lsst/resources/gs.py,sha256=3qMEqO1wIK05BJmuUHtsEunuYWgR4-eB5Z3ffxEtb0o,12827
|
|
8
|
+
lsst/resources/http.py,sha256=WSx2VXKFd6486TytV2NMfdgLntioL6FvliZWpn9LtDE,92426
|
|
9
|
+
lsst/resources/location.py,sha256=x3Tq0x5o1OXYmZDxYBenUG1N71wtDhnjVAr3s2ZEiu8,7937
|
|
10
|
+
lsst/resources/mem.py,sha256=xCpGgvxF2gmO5gLkOikKvIet2RPvaPCiARenR9pUWCk,1115
|
|
11
|
+
lsst/resources/packageresource.py,sha256=vnfeRlpVwpC5cDQZE6Lnh8EH6oZy1sH2vLz9ONYjJ4k,6817
|
|
12
|
+
lsst/resources/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
lsst/resources/s3.py,sha256=NGJPM4BjtqFIPvg9vbp_blrIRt009NbOm06cr65Wqmw,29662
|
|
14
|
+
lsst/resources/s3utils.py,sha256=ojWf9BPrK9mhGQ8jvs4_8Nsqf9360e79U5FnPTxe24A,14576
|
|
15
|
+
lsst/resources/schemeless.py,sha256=9tgqf0eQI3ErGpGSscTRFk_8amF6GwpykPBaTa-KqLI,10909
|
|
16
|
+
lsst/resources/tests.py,sha256=UD2Pql8olpW9oCDlsA_jtl23SZtknp7ReuJHLcMPSa0,46237
|
|
17
|
+
lsst/resources/utils.py,sha256=6O3Mq7JbPEtqyD2lM77pRpwcPMfV5SxiNMknw-F2vNs,8097
|
|
18
|
+
lsst/resources/version.py,sha256=vB8XwkFrtPN40DIouI1mFlxm5jpAZvcGwleFGT1yZhI,55
|
|
19
|
+
lsst/resources/_resourceHandles/__init__.py,sha256=zOcZ8gVEBdAWcHJaZabA8Vdq-wAVcxjbmA_1b1IWM6M,76
|
|
20
|
+
lsst/resources/_resourceHandles/_baseResourceHandle.py,sha256=lQwxDOmFUNJndTxsjpz-HxrQBL0L-z4aXQocHdOEI7c,4676
|
|
21
|
+
lsst/resources/_resourceHandles/_davResourceHandle.py,sha256=xcJNFUj8VzlPOKlHdXXoFFyiLNiSFiT-RFNqJRzKniQ,6799
|
|
22
|
+
lsst/resources/_resourceHandles/_fileResourceHandle.py,sha256=2nC8tfP_ynAfjpzrtkw_1ahx1CuMEFpZ5mLmofSShUk,3676
|
|
23
|
+
lsst/resources/_resourceHandles/_httpResourceHandle.py,sha256=Yami8IVGeru4bLQCag-OvGG0ltz1qyEg57FY4IEB87Y,10995
|
|
24
|
+
lsst/resources/_resourceHandles/_s3ResourceHandle.py,sha256=Cp-eBtptskbmthy3DwLKPpYPLvU_lrqtK10X37inHt0,12406
|
|
25
|
+
lsst_resources-29.2025.4600.dist-info/licenses/COPYRIGHT,sha256=yazVsoMmFwhiw5itGrdT4YPmXbpsQyUFjlpOyZIa77M,148
|
|
26
|
+
lsst_resources-29.2025.4600.dist-info/licenses/LICENSE,sha256=7wrtgl8meQ0_RIuv2TjIKpAnNrl-ODH-QLwyHe9citI,1516
|
|
27
|
+
lsst_resources-29.2025.4600.dist-info/METADATA,sha256=AHUIXOjx8MQBorWKENL6Q360iyEcLVrF7IOzOEDMGIM,2240
|
|
28
|
+
lsst_resources-29.2025.4600.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
lsst_resources-29.2025.4600.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
|
|
30
|
+
lsst_resources-29.2025.4600.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
31
|
+
lsst_resources-29.2025.4600.dist-info/RECORD,,
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
lsst/__init__.py,sha256=9I6UQ9gj-ZcPlvsa0OPBo76UujxXVehVzw9yMAOQvyM,466
|
|
2
|
-
lsst/resources/__init__.py,sha256=BDj6uokvd0ZQNGl-Xgz5gZd83Z0L2gFqGSk0KJpylP8,778
|
|
3
|
-
lsst/resources/_resourcePath.py,sha256=xTVyDHD-UHlF5FeDvSXXnsmOuoSFnORZD_wMksxiFfA,64926
|
|
4
|
-
lsst/resources/file.py,sha256=-jPuoHvTEtx5tnDyNkfwhWAyX0cTwkuMd-JvJn9EGdE,23226
|
|
5
|
-
lsst/resources/gs.py,sha256=Lpo5GAzH7R7HG8E5RMGOdP4j4hjWJn-k6M3OXj0nHQM,12783
|
|
6
|
-
lsst/resources/http.py,sha256=JW3cBe4MERyjopFKkELui1BRr4b4Mkgp0Iqt9YFIxuc,88227
|
|
7
|
-
lsst/resources/location.py,sha256=x3Tq0x5o1OXYmZDxYBenUG1N71wtDhnjVAr3s2ZEiu8,7937
|
|
8
|
-
lsst/resources/mem.py,sha256=VOWh7XxJPfqKcFdLZSjKEAfORQ2AHZHpxmjT8LniV60,1008
|
|
9
|
-
lsst/resources/packageresource.py,sha256=vnfeRlpVwpC5cDQZE6Lnh8EH6oZy1sH2vLz9ONYjJ4k,6817
|
|
10
|
-
lsst/resources/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
lsst/resources/s3.py,sha256=wrOMdFWltxpGEWeL--kPCbk5Le_viCIsEn4lOPZbXhM,24124
|
|
12
|
-
lsst/resources/s3utils.py,sha256=cKJ9GWHETHhn1djezyikWwAaw4k1B3hFvfif96THHDQ,14355
|
|
13
|
-
lsst/resources/schemeless.py,sha256=GfJcKzZ0XIeepfQdW4HPZWiZlSp_ej0SEtSiJTrDUQs,10666
|
|
14
|
-
lsst/resources/tests.py,sha256=MLB8hERKuNredzzg3Qq9M_U7IesV3xrbcjFwKuMp3Ok,43513
|
|
15
|
-
lsst/resources/utils.py,sha256=IHVrOdj0szNWxiXk-jbZu1RhTR8WXks1vI9JCpBxeBA,6706
|
|
16
|
-
lsst/resources/version.py,sha256=k6PcnewHb2vETZCsCS6q8mJ_uyMg-otfwuQ8B0oDDwQ,55
|
|
17
|
-
lsst/resources/_resourceHandles/__init__.py,sha256=zOcZ8gVEBdAWcHJaZabA8Vdq-wAVcxjbmA_1b1IWM6M,76
|
|
18
|
-
lsst/resources/_resourceHandles/_baseResourceHandle.py,sha256=lQwxDOmFUNJndTxsjpz-HxrQBL0L-z4aXQocHdOEI7c,4676
|
|
19
|
-
lsst/resources/_resourceHandles/_fileResourceHandle.py,sha256=A7_WQPzD0ZlOzNmaI_TPdZybrNxrXPkNHWVla3UFxfs,3676
|
|
20
|
-
lsst/resources/_resourceHandles/_httpResourceHandle.py,sha256=JRjpE-ZQfgKX5OyVLulIbzW38FdhovcoOd1D4rhb5vk,10900
|
|
21
|
-
lsst/resources/_resourceHandles/_s3ResourceHandle.py,sha256=NkDmPb9bm_zMvr6mMnb-tBmqJDt0yUJrt2gZXR8l7ok,12923
|
|
22
|
-
lsst_resources-29.2025.1700.dist-info/licenses/COPYRIGHT,sha256=yazVsoMmFwhiw5itGrdT4YPmXbpsQyUFjlpOyZIa77M,148
|
|
23
|
-
lsst_resources-29.2025.1700.dist-info/licenses/LICENSE,sha256=7wrtgl8meQ0_RIuv2TjIKpAnNrl-ODH-QLwyHe9citI,1516
|
|
24
|
-
lsst_resources-29.2025.1700.dist-info/METADATA,sha256=gybCKJmJyKdf0oqRgCbF6IKA0SzNalaw1nhptr1Y51I,2237
|
|
25
|
-
lsst_resources-29.2025.1700.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
26
|
-
lsst_resources-29.2025.1700.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
|
|
27
|
-
lsst_resources-29.2025.1700.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
28
|
-
lsst_resources-29.2025.1700.dist-info/RECORD,,
|
{lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT
RENAMED
|
File without changes
|
{lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|