lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +197 -0
- lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
- lsst/resources/_resourceHandles/_httpResourceHandle.py +16 -2
- lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
- lsst/resources/_resourcePath.py +448 -81
- lsst/resources/dav.py +912 -0
- lsst/resources/davutils.py +2659 -0
- lsst/resources/file.py +97 -57
- lsst/resources/gs.py +11 -4
- lsst/resources/http.py +229 -62
- lsst/resources/mem.py +7 -1
- lsst/resources/packageresource.py +13 -2
- lsst/resources/s3.py +174 -17
- lsst/resources/s3utils.py +8 -1
- lsst/resources/schemeless.py +6 -3
- lsst/resources/tests.py +140 -12
- lsst/resources/utils.py +74 -1
- lsst/resources/version.py +1 -1
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/METADATA +3 -3
- lsst_resources-29.2025.4600.dist-info/RECORD +31 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/WHEEL +1 -1
- lsst_resources-29.0.0rc7.dist-info/RECORD +0 -28
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.0.0rc7.dist-info → lsst_resources-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/resources/s3.py
CHANGED
|
@@ -13,6 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
__all__ = ("S3ResourcePath",)
|
|
15
15
|
|
|
16
|
+
import concurrent.futures
|
|
16
17
|
import contextlib
|
|
17
18
|
import io
|
|
18
19
|
import logging
|
|
@@ -20,17 +21,19 @@ import os
|
|
|
20
21
|
import re
|
|
21
22
|
import sys
|
|
22
23
|
import threading
|
|
24
|
+
from collections import defaultdict
|
|
23
25
|
from collections.abc import Iterable, Iterator
|
|
24
26
|
from functools import cache, cached_property
|
|
25
27
|
from typing import IO, TYPE_CHECKING, cast
|
|
26
28
|
|
|
27
29
|
from botocore.exceptions import ClientError
|
|
28
30
|
|
|
31
|
+
from lsst.utils.iteration import chunk_iterable
|
|
29
32
|
from lsst.utils.timer import time_this
|
|
30
33
|
|
|
31
34
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
32
35
|
from ._resourceHandles._s3ResourceHandle import S3ResourceHandle
|
|
33
|
-
from ._resourcePath import ResourcePath
|
|
36
|
+
from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ
|
|
34
37
|
from .s3utils import (
|
|
35
38
|
_get_s3_connection_parameters,
|
|
36
39
|
_s3_disable_bucket_validation,
|
|
@@ -44,6 +47,7 @@ from .s3utils import (
|
|
|
44
47
|
s3CheckFileExists,
|
|
45
48
|
translate_client_error,
|
|
46
49
|
)
|
|
50
|
+
from .utils import _get_num_workers
|
|
47
51
|
|
|
48
52
|
try:
|
|
49
53
|
from boto3.s3.transfer import TransferConfig # type: ignore
|
|
@@ -164,6 +168,14 @@ class S3ResourcePath(ResourcePath):
|
|
|
164
168
|
|
|
165
169
|
return use_threads
|
|
166
170
|
|
|
171
|
+
@contextlib.contextmanager
|
|
172
|
+
def _use_threads_temp_override(self, multithreaded: bool) -> Iterator:
|
|
173
|
+
"""Temporarily override the value of use_threads."""
|
|
174
|
+
original = self.use_threads
|
|
175
|
+
self.use_threads = multithreaded
|
|
176
|
+
yield
|
|
177
|
+
self.use_threads = original
|
|
178
|
+
|
|
167
179
|
@property
|
|
168
180
|
def _transfer_config(self) -> TransferConfig:
|
|
169
181
|
if self.use_threads is None:
|
|
@@ -212,7 +224,9 @@ class S3ResourcePath(ResourcePath):
|
|
|
212
224
|
return bucket
|
|
213
225
|
|
|
214
226
|
@classmethod
|
|
215
|
-
def _mexists(
|
|
227
|
+
def _mexists(
|
|
228
|
+
cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
|
|
229
|
+
) -> dict[ResourcePath, bool]:
|
|
216
230
|
# Force client to be created for each profile before creating threads.
|
|
217
231
|
profiles = set[str | None]()
|
|
218
232
|
for path in uris:
|
|
@@ -222,7 +236,125 @@ class S3ResourcePath(ResourcePath):
|
|
|
222
236
|
for profile in profiles:
|
|
223
237
|
getS3Client(profile)
|
|
224
238
|
|
|
225
|
-
return super()._mexists(uris)
|
|
239
|
+
return super()._mexists(uris, num_workers=num_workers)
|
|
240
|
+
|
|
241
|
+
@classmethod
|
|
242
|
+
def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
|
|
243
|
+
# Delete multiple objects in one API call.
|
|
244
|
+
# Must group by profile and bucket.
|
|
245
|
+
grouped_uris: dict[tuple[str | None, str], list[S3ResourcePath]] = defaultdict(list)
|
|
246
|
+
for uri in uris:
|
|
247
|
+
uri = cast(S3ResourcePath, uri)
|
|
248
|
+
grouped_uris[uri._profile, uri._bucket].append(uri)
|
|
249
|
+
|
|
250
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
251
|
+
for related_uris in grouped_uris.values():
|
|
252
|
+
# API requires no more than 1000 per call.
|
|
253
|
+
chunk_num = 0
|
|
254
|
+
chunks: list[tuple[ResourcePath, ...]] = []
|
|
255
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
256
|
+
for chunk in chunk_iterable(related_uris, chunk_size=1_000):
|
|
257
|
+
for uri in chunk:
|
|
258
|
+
key = uri.relativeToPathRoot
|
|
259
|
+
key_to_uri[key] = uri
|
|
260
|
+
# Default to assuming everything worked.
|
|
261
|
+
results[uri] = MBulkResult(True, None)
|
|
262
|
+
chunk_num += 1
|
|
263
|
+
chunks.append(chunk)
|
|
264
|
+
|
|
265
|
+
# Bulk remove.
|
|
266
|
+
with time_this(
|
|
267
|
+
log,
|
|
268
|
+
msg="Bulk delete; %d chunk%s; totalling %d dataset%s",
|
|
269
|
+
args=(
|
|
270
|
+
len(chunks),
|
|
271
|
+
"s" if len(chunks) != 1 else "",
|
|
272
|
+
len(related_uris),
|
|
273
|
+
"s" if len(related_uris) != 1 else "",
|
|
274
|
+
),
|
|
275
|
+
):
|
|
276
|
+
errored = cls._mremove_select(chunks)
|
|
277
|
+
|
|
278
|
+
# Update with error information.
|
|
279
|
+
results.update(errored)
|
|
280
|
+
|
|
281
|
+
return results
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def _mremove_select(cls, chunks: list[tuple[ResourcePath, ...]]) -> dict[ResourcePath, MBulkResult]:
|
|
285
|
+
if len(chunks) == 1:
|
|
286
|
+
# Do the removal directly without futures.
|
|
287
|
+
return cls._delete_objects_wrapper(chunks[0])
|
|
288
|
+
pool_executor_class = _get_executor_class()
|
|
289
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
290
|
+
# Patch the environment to make it think there is only one worker
|
|
291
|
+
# for each subprocess.
|
|
292
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
293
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
294
|
+
else:
|
|
295
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def _mremove_with_pool(
|
|
299
|
+
cls,
|
|
300
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
301
|
+
chunks: list[tuple[ResourcePath, ...]],
|
|
302
|
+
*,
|
|
303
|
+
num_workers: int | None = None,
|
|
304
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
305
|
+
# Different name because different API to base class.
|
|
306
|
+
# No need to make more workers than we have chunks.
|
|
307
|
+
max_workers = num_workers if num_workers is not None else min(len(chunks), _get_num_workers())
|
|
308
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
309
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
310
|
+
future_remove = {
|
|
311
|
+
remove_executor.submit(cls._delete_objects_wrapper, chunk): i
|
|
312
|
+
for i, chunk in enumerate(chunks)
|
|
313
|
+
}
|
|
314
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
315
|
+
try:
|
|
316
|
+
results.update(future.result())
|
|
317
|
+
except Exception as e:
|
|
318
|
+
# The chunk utterly failed.
|
|
319
|
+
chunk = chunks[future_remove[future]]
|
|
320
|
+
for uri in chunk:
|
|
321
|
+
results[uri] = MBulkResult(False, e)
|
|
322
|
+
return results
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def _delete_objects_wrapper(cls, uris: tuple[ResourcePath, ...]) -> dict[ResourcePath, MBulkResult]:
|
|
326
|
+
"""Convert URIs to keys and call low-level API."""
|
|
327
|
+
if not uris:
|
|
328
|
+
return {}
|
|
329
|
+
keys: list[dict[str, str]] = []
|
|
330
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
331
|
+
for uri in uris:
|
|
332
|
+
key = uri.relativeToPathRoot
|
|
333
|
+
key_to_uri[key] = uri
|
|
334
|
+
keys.append({"Key": key})
|
|
335
|
+
|
|
336
|
+
first_uri = cast(S3ResourcePath, uris[0])
|
|
337
|
+
results = cls._delete_related_objects(first_uri.client, first_uri._bucket, keys)
|
|
338
|
+
|
|
339
|
+
# Remap error object keys to uris.
|
|
340
|
+
return {key_to_uri[key]: result for key, result in results.items()}
|
|
341
|
+
|
|
342
|
+
@classmethod
|
|
343
|
+
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
344
|
+
def _delete_related_objects(
|
|
345
|
+
cls, client: boto3.client, bucket: str, keys: list[dict[str, str]]
|
|
346
|
+
) -> dict[str, MBulkResult]:
|
|
347
|
+
# Delete multiple objects from the same bucket, allowing for backoff
|
|
348
|
+
# retry.
|
|
349
|
+
response = client.delete_objects(Bucket=bucket, Delete={"Objects": keys, "Quiet": True})
|
|
350
|
+
# Use Quiet mode so we assume everything worked unless told otherwise.
|
|
351
|
+
# Only returning errors -- indexed by Key name.
|
|
352
|
+
errors: dict[str, MBulkResult] = {}
|
|
353
|
+
for errored_key in response.get("Errors", []):
|
|
354
|
+
errors[errored_key["Key"]] = MBulkResult(
|
|
355
|
+
False, ClientError({"Error": errored_key}, f"delete_objects: {errored_key['Key']}")
|
|
356
|
+
)
|
|
357
|
+
return errors
|
|
226
358
|
|
|
227
359
|
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
228
360
|
def exists(self) -> bool:
|
|
@@ -345,18 +477,33 @@ class S3ResourcePath(ResourcePath):
|
|
|
345
477
|
|
|
346
478
|
return s3, f"{self._bucket}/{self.relativeToPathRoot}"
|
|
347
479
|
|
|
348
|
-
|
|
480
|
+
@contextlib.contextmanager
|
|
481
|
+
def _as_local(
|
|
482
|
+
self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
|
|
483
|
+
) -> Iterator[ResourcePath]:
|
|
349
484
|
"""Download object from S3 and place in temporary directory.
|
|
350
485
|
|
|
486
|
+
Parameters
|
|
487
|
+
----------
|
|
488
|
+
multithreaded : `bool`, optional
|
|
489
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
490
|
+
throughput by using parallel download streams. This may of no
|
|
491
|
+
effect if the URI scheme does not support parallel streams or
|
|
492
|
+
if a global override has been applied. If `False` parallel
|
|
493
|
+
streams will be disabled.
|
|
494
|
+
tmpdir : `ResourcePath` or `None`, optional
|
|
495
|
+
Explicit override of the temporary directory to use for remote
|
|
496
|
+
downloads.
|
|
497
|
+
|
|
351
498
|
Returns
|
|
352
499
|
-------
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
Always returns `True`. This is always a temporary file.
|
|
500
|
+
local_uri : `ResourcePath`
|
|
501
|
+
A URI to a local POSIX file corresponding to a local temporary
|
|
502
|
+
downloaded copy of the resource.
|
|
357
503
|
"""
|
|
358
504
|
with (
|
|
359
|
-
ResourcePath.temporary_uri(suffix=self.getExtension(), delete=
|
|
505
|
+
ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=True) as tmp_uri,
|
|
506
|
+
self._use_threads_temp_override(multithreaded),
|
|
360
507
|
time_this(log, msg="Downloading %s to local file", args=(self,)),
|
|
361
508
|
):
|
|
362
509
|
progress = (
|
|
@@ -366,7 +513,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
366
513
|
)
|
|
367
514
|
with tmp_uri.open("wb") as tmpFile:
|
|
368
515
|
self._download_file(tmpFile, progress)
|
|
369
|
-
|
|
516
|
+
yield tmp_uri
|
|
370
517
|
|
|
371
518
|
@backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
|
|
372
519
|
def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage | None) -> None:
|
|
@@ -397,7 +544,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
397
544
|
try:
|
|
398
545
|
self.client.copy_object(CopySource=copy_source, Bucket=self._bucket, Key=self.relativeToPathRoot)
|
|
399
546
|
except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
|
|
400
|
-
raise FileNotFoundError("No such resource to transfer: {self}") from err
|
|
547
|
+
raise FileNotFoundError(f"No such resource to transfer: {src} -> {self}") from err
|
|
401
548
|
except ClientError as err:
|
|
402
549
|
translate_client_error(err, self)
|
|
403
550
|
raise
|
|
@@ -408,6 +555,7 @@ class S3ResourcePath(ResourcePath):
|
|
|
408
555
|
transfer: str = "copy",
|
|
409
556
|
overwrite: bool = False,
|
|
410
557
|
transaction: TransactionProtocol | None = None,
|
|
558
|
+
multithreaded: bool = True,
|
|
411
559
|
) -> None:
|
|
412
560
|
"""Transfer the current resource to an S3 bucket.
|
|
413
561
|
|
|
@@ -422,6 +570,12 @@ class S3ResourcePath(ResourcePath):
|
|
|
422
570
|
Allow an existing file to be overwritten. Defaults to `False`.
|
|
423
571
|
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
424
572
|
Currently unused.
|
|
573
|
+
multithreaded : `bool`, optional
|
|
574
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
575
|
+
throughput by using parallel download streams. This may of no
|
|
576
|
+
effect if the URI scheme does not support parallel streams or
|
|
577
|
+
if a global override has been applied. If `False` parallel
|
|
578
|
+
streams will be disabled.
|
|
425
579
|
"""
|
|
426
580
|
# Fail early to prevent delays if remote resources are requested
|
|
427
581
|
if transfer not in self.transferModes:
|
|
@@ -457,22 +611,25 @@ class S3ResourcePath(ResourcePath):
|
|
|
457
611
|
timer_msg = "Transfer from %s to %s"
|
|
458
612
|
timer_args = (src, self)
|
|
459
613
|
|
|
460
|
-
if isinstance(src, type(self)):
|
|
461
|
-
# Looks like an S3 remote uri so we can use direct copy
|
|
462
|
-
#
|
|
463
|
-
#
|
|
614
|
+
if isinstance(src, type(self)) and self.client == src.client:
|
|
615
|
+
# Looks like an S3 remote uri so we can use direct copy.
|
|
616
|
+
# This only works if the source and destination are using the same
|
|
617
|
+
# S3 endpoint and profile.
|
|
464
618
|
with time_this(log, msg=timer_msg, args=timer_args):
|
|
465
619
|
self._copy_from(src)
|
|
466
620
|
|
|
467
621
|
else:
|
|
468
622
|
# Use local file and upload it
|
|
469
|
-
with src.as_local() as local_uri:
|
|
623
|
+
with src.as_local(multithreaded=multithreaded) as local_uri:
|
|
470
624
|
progress = (
|
|
471
625
|
ProgressPercentage(local_uri, file_for_msg=src, msg="Uploading:")
|
|
472
626
|
if log.isEnabledFor(ProgressPercentage.log_level)
|
|
473
627
|
else None
|
|
474
628
|
)
|
|
475
|
-
with
|
|
629
|
+
with (
|
|
630
|
+
time_this(log, msg=timer_msg, args=timer_args),
|
|
631
|
+
self._use_threads_temp_override(multithreaded),
|
|
632
|
+
):
|
|
476
633
|
self._upload_file(local_uri, progress)
|
|
477
634
|
|
|
478
635
|
# This was an explicit move requested from a remote resource
|
lsst/resources/s3utils.py
CHANGED
|
@@ -53,6 +53,7 @@ except ImportError:
|
|
|
53
53
|
|
|
54
54
|
from ._resourcePath import ResourcePath
|
|
55
55
|
from .location import Location
|
|
56
|
+
from .utils import _get_num_workers
|
|
56
57
|
|
|
57
58
|
# https://pypi.org/project/backoff/
|
|
58
59
|
try:
|
|
@@ -246,7 +247,13 @@ def _s3_disable_bucket_validation(client: boto3.client) -> None:
|
|
|
246
247
|
@functools.lru_cache
|
|
247
248
|
def _get_s3_client(endpoint_config: _EndpointConfig, skip_validation: bool) -> boto3.client:
|
|
248
249
|
# Helper function to cache the client for this endpoint
|
|
249
|
-
|
|
250
|
+
# boto seems to assume it will always have at least 10 available.
|
|
251
|
+
max_pool_size = max(_get_num_workers(), 10)
|
|
252
|
+
config = botocore.config.Config(
|
|
253
|
+
read_timeout=180,
|
|
254
|
+
max_pool_connections=max_pool_size,
|
|
255
|
+
retries={"mode": "adaptive", "max_attempts": 10},
|
|
256
|
+
)
|
|
250
257
|
|
|
251
258
|
session = boto3.Session(profile_name=endpoint_config.profile)
|
|
252
259
|
|
lsst/resources/schemeless.py
CHANGED
|
@@ -105,13 +105,16 @@ class SchemelessResourcePath(FileResourcePath):
|
|
|
105
105
|
return stat.S_ISDIR(status.st_mode)
|
|
106
106
|
return self.dirLike
|
|
107
107
|
|
|
108
|
-
def relative_to(self, other: ResourcePath) -> str | None:
|
|
108
|
+
def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
|
|
109
109
|
"""Return the relative path from this URI to the other URI.
|
|
110
110
|
|
|
111
111
|
Parameters
|
|
112
112
|
----------
|
|
113
113
|
other : `ResourcePath`
|
|
114
114
|
URI to use to calculate the relative path.
|
|
115
|
+
walk_up : `bool`, optional
|
|
116
|
+
Control whether "``..``" can be used to resolve a relative path.
|
|
117
|
+
Default is `False`. Can not be `True` on Python version 3.11.
|
|
115
118
|
|
|
116
119
|
Returns
|
|
117
120
|
-------
|
|
@@ -146,8 +149,8 @@ class SchemelessResourcePath(FileResourcePath):
|
|
|
146
149
|
raise RuntimeError(f"Unexpected combination of {child}.relative_to({other}).")
|
|
147
150
|
|
|
148
151
|
if child is None:
|
|
149
|
-
return super().relative_to(other)
|
|
150
|
-
return child.relative_to(other)
|
|
152
|
+
return super().relative_to(other, walk_up=walk_up)
|
|
153
|
+
return child.relative_to(other, walk_up=walk_up)
|
|
151
154
|
|
|
152
155
|
@classmethod
|
|
153
156
|
def _fixupPathUri(
|
lsst/resources/tests.py
CHANGED
|
@@ -17,6 +17,8 @@ import os
|
|
|
17
17
|
import pathlib
|
|
18
18
|
import random
|
|
19
19
|
import string
|
|
20
|
+
import sys
|
|
21
|
+
import tempfile
|
|
20
22
|
import unittest
|
|
21
23
|
import urllib.parse
|
|
22
24
|
import uuid
|
|
@@ -60,18 +62,18 @@ def _check_open(
|
|
|
60
62
|
"""
|
|
61
63
|
text_content = "abcdefghijklmnopqrstuvwxyz🙂"
|
|
62
64
|
bytes_content = uuid.uuid4().bytes
|
|
63
|
-
content_by_mode_suffix = {
|
|
65
|
+
content_by_mode_suffix: dict[str, str | bytes] = {
|
|
64
66
|
"": text_content,
|
|
65
67
|
"t": text_content,
|
|
66
68
|
"b": bytes_content,
|
|
67
69
|
}
|
|
68
|
-
empty_content_by_mode_suffix = {
|
|
70
|
+
empty_content_by_mode_suffix: dict[str, str | bytes] = {
|
|
69
71
|
"": "",
|
|
70
72
|
"t": "",
|
|
71
73
|
"b": b"",
|
|
72
74
|
}
|
|
73
75
|
# To appease mypy
|
|
74
|
-
double_content_by_mode_suffix = {
|
|
76
|
+
double_content_by_mode_suffix: dict[str, str | bytes] = {
|
|
75
77
|
"": text_content + text_content,
|
|
76
78
|
"t": text_content + text_content,
|
|
77
79
|
"b": bytes_content + bytes_content,
|
|
@@ -142,6 +144,16 @@ def _check_open(
|
|
|
142
144
|
content_read = read_buffer.read()
|
|
143
145
|
test_case.assertEqual(len(content_read), 0, f"Read: {content_read!r}, expected empty.")
|
|
144
146
|
|
|
147
|
+
# Write multiple chunks with flushing to ensure that any handles that
|
|
148
|
+
# cache without flushing work properly.
|
|
149
|
+
n = 3
|
|
150
|
+
with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
|
|
151
|
+
for _ in range(n):
|
|
152
|
+
write_buffer.write(content)
|
|
153
|
+
write_buffer.flush()
|
|
154
|
+
with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
|
|
155
|
+
test_case.assertEqual(read_buffer.read(), content * n)
|
|
156
|
+
|
|
145
157
|
# Write two copies of the content, overwriting the single copy there.
|
|
146
158
|
with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
|
|
147
159
|
write_buffer.write(double_content)
|
|
@@ -364,6 +376,19 @@ class GenericTestCase(_GenericTestCase):
|
|
|
364
376
|
parent = ResourcePath("d/e.txt", forceAbsolute=False)
|
|
365
377
|
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
|
|
366
378
|
|
|
379
|
+
# Allow .. in response.
|
|
380
|
+
child = ResourcePath(self._make_uri("a/b/c/d.txt"), forceAbsolute=False)
|
|
381
|
+
parent = ResourcePath(self._make_uri("a/b/d/e/"), forceAbsolute=False)
|
|
382
|
+
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
|
|
383
|
+
|
|
384
|
+
if sys.version_info >= (3, 12, 0):
|
|
385
|
+
# Fails on python 3.11.
|
|
386
|
+
self.assertEqual(
|
|
387
|
+
child.relative_to(parent, walk_up=True),
|
|
388
|
+
"../../c/d.txt",
|
|
389
|
+
f"{child}.relative_to({parent}, walk_up=True)",
|
|
390
|
+
)
|
|
391
|
+
|
|
367
392
|
def test_parents(self) -> None:
|
|
368
393
|
"""Test of splitting and parent walking."""
|
|
369
394
|
parent = ResourcePath(self._make_uri("somedir"), forceDirectory=True)
|
|
@@ -381,6 +406,14 @@ class GenericTestCase(_GenericTestCase):
|
|
|
381
406
|
self.assertEqual(child_file.parent().parent(), parent)
|
|
382
407
|
self.assertEqual(child_subdir.dirname(), child_subdir)
|
|
383
408
|
|
|
409
|
+
# Make sure that the parent doesn't retain any fragment from the
|
|
410
|
+
# child.
|
|
411
|
+
child_fragment = child_subdir.join("a.txt#fragment")
|
|
412
|
+
self.assertEqual(child_fragment.fragment, "fragment")
|
|
413
|
+
fragment_parent = child_fragment.parent()
|
|
414
|
+
self.assertEqual(fragment_parent.fragment, "")
|
|
415
|
+
self.assertTrue(str(fragment_parent).endswith("/"))
|
|
416
|
+
|
|
384
417
|
def test_escapes(self) -> None:
|
|
385
418
|
"""Special characters in file paths."""
|
|
386
419
|
src = self.root_uri.join("bbb/???/test.txt")
|
|
@@ -485,6 +518,13 @@ class GenericTestCase(_GenericTestCase):
|
|
|
485
518
|
self.assertEqual(fnew3.fragment, "fragment")
|
|
486
519
|
self.assertEqual(fnew3.basename(), "b.txt", msg=f"Got: {fnew3._uri}")
|
|
487
520
|
|
|
521
|
+
# Check that fragment on the directory is dropped on join.
|
|
522
|
+
frag_dir = add_dir.join("subdir/#dir_fragment")
|
|
523
|
+
self.assertEqual(frag_dir.fragment, "dir_fragment")
|
|
524
|
+
fnew4 = frag_dir.join("a.txt")
|
|
525
|
+
self.assertEqual(fnew4.fragment, "")
|
|
526
|
+
self.assertTrue(str(fnew4).endswith("/a.txt"))
|
|
527
|
+
|
|
488
528
|
# Join a resource path.
|
|
489
529
|
subpath = ResourcePath("a/b.txt#fragment2", forceAbsolute=False, forceDirectory=False)
|
|
490
530
|
fnew3 = root.join(subpath)
|
|
@@ -556,6 +596,10 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
556
596
|
|
|
557
597
|
transfer_modes: tuple[str, ...] = ("copy", "move")
|
|
558
598
|
testdir: str | None = None
|
|
599
|
+
# Number of files to use for mremove() testing to ensure difference code
|
|
600
|
+
# paths are hit. Do not want to generically use many files for schemes
|
|
601
|
+
# where it makes no difference.
|
|
602
|
+
n_mremove_files: int = 15
|
|
559
603
|
|
|
560
604
|
def setUp(self) -> None:
|
|
561
605
|
if self.scheme is None:
|
|
@@ -702,6 +746,37 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
702
746
|
with self.assertRaises(FileNotFoundError):
|
|
703
747
|
dest.transfer_from(src, "auto")
|
|
704
748
|
|
|
749
|
+
def test_mtransfer(self) -> None:
|
|
750
|
+
n_files = 10
|
|
751
|
+
sources = [self.tmpdir.join(f"test{n}.txt") for n in range(n_files)]
|
|
752
|
+
destinations = [self.tmpdir.join(f"dest_test{n}.txt") for n in range(n_files)]
|
|
753
|
+
|
|
754
|
+
for i, src in enumerate(sources):
|
|
755
|
+
content = f"{i}\nContent is some content\nwith something to say\n\n"
|
|
756
|
+
src.write(content.encode())
|
|
757
|
+
|
|
758
|
+
results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True))
|
|
759
|
+
self.assertTrue(all(res.success for res in results.values()))
|
|
760
|
+
self.assertTrue(all(dest.exists() for dest in results))
|
|
761
|
+
|
|
762
|
+
for i, dest in enumerate(destinations):
|
|
763
|
+
new_content = dest.read().decode()
|
|
764
|
+
self.assertTrue(new_content.startswith(f"{i}\n"))
|
|
765
|
+
|
|
766
|
+
# Overwrite should work.
|
|
767
|
+
results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True), overwrite=True)
|
|
768
|
+
|
|
769
|
+
# Overwrite failure.
|
|
770
|
+
results = ResourcePath.mtransfer(
|
|
771
|
+
"copy", zip(sources, destinations, strict=True), overwrite=False, do_raise=False
|
|
772
|
+
)
|
|
773
|
+
self.assertFalse(all(res.success for res in results.values()))
|
|
774
|
+
|
|
775
|
+
with self.assertRaises(ExceptionGroup):
|
|
776
|
+
results = ResourcePath.mtransfer(
|
|
777
|
+
"copy", zip(sources, destinations, strict=True), overwrite=False, do_raise=True
|
|
778
|
+
)
|
|
779
|
+
|
|
705
780
|
def test_local_transfer(self) -> None:
|
|
706
781
|
"""Test we can transfer to and from local file."""
|
|
707
782
|
remote_src = self.tmpdir.join("src.json")
|
|
@@ -762,6 +837,48 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
762
837
|
with self.root_uri.as_local() as local_uri:
|
|
763
838
|
pass
|
|
764
839
|
|
|
840
|
+
if not src.isLocal:
|
|
841
|
+
# as_local tmpdir can not be a remote resource.
|
|
842
|
+
with self.assertRaises(ValueError):
|
|
843
|
+
with src.as_local(tmpdir=self.root_uri) as local_uri:
|
|
844
|
+
pass
|
|
845
|
+
|
|
846
|
+
# tmpdir is ignored for local file.
|
|
847
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
848
|
+
temp_dir = ResourcePath(tmpdir, forceDirectory=True)
|
|
849
|
+
with src.as_local(tmpdir=temp_dir) as local_uri:
|
|
850
|
+
self.assertEqual(local_uri.dirname(), temp_dir)
|
|
851
|
+
self.assertTrue(local_uri.exists())
|
|
852
|
+
|
|
853
|
+
def test_local_mtransfer(self) -> None:
|
|
854
|
+
"""Check that bulk transfer to/from local works."""
|
|
855
|
+
# Create remote resources
|
|
856
|
+
n_files = 10
|
|
857
|
+
sources = [self.tmpdir.join(f"test{n}.txt") for n in range(n_files)]
|
|
858
|
+
|
|
859
|
+
for i, src in enumerate(sources):
|
|
860
|
+
content = f"{i}\nContent is some content\nwith something to say\n\n"
|
|
861
|
+
src.write(content.encode())
|
|
862
|
+
|
|
863
|
+
# Potentially remote to local.
|
|
864
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
865
|
+
temp_dir = ResourcePath(tmpdir, forceDirectory=True)
|
|
866
|
+
destinations = [temp_dir.join(f"dest_test{n}.txt") for n in range(n_files)]
|
|
867
|
+
|
|
868
|
+
results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True))
|
|
869
|
+
self.assertTrue(all(res.success for res in results.values()))
|
|
870
|
+
self.assertTrue(all(dest.exists() for dest in results))
|
|
871
|
+
|
|
872
|
+
# Overwrite should work.
|
|
873
|
+
results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True), overwrite=True)
|
|
874
|
+
|
|
875
|
+
# Now reverse so local to potentially remote.
|
|
876
|
+
for src in sources:
|
|
877
|
+
src.remove()
|
|
878
|
+
results = ResourcePath.mtransfer("copy", zip(destinations, sources, strict=True), overwrite=False)
|
|
879
|
+
self.assertTrue(all(res.success for res in results.values()))
|
|
880
|
+
self.assertTrue(all(dest.exists() for dest in results))
|
|
881
|
+
|
|
765
882
|
def test_walk(self) -> None:
|
|
766
883
|
"""Walk a directory hierarchy."""
|
|
767
884
|
root = self.tmpdir.join("walk/")
|
|
@@ -949,22 +1066,33 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
949
1066
|
# A file that is not there.
|
|
950
1067
|
file = root.join("config/basic/butler.yaml")
|
|
951
1068
|
|
|
952
|
-
# Create some files.
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
"dir2/e.yaml",
|
|
957
|
-
}
|
|
958
|
-
expected_uris = {root.join(f) for f in expected_files}
|
|
1069
|
+
# Create some files. Most schemes the code paths do not change for 10
|
|
1070
|
+
# vs 1000 files but in some schemes it does.
|
|
1071
|
+
expected_files = [f"dir1/f{n}.yaml" for n in range(self.n_mremove_files)]
|
|
1072
|
+
expected_uris = [root.join(f) for f in expected_files]
|
|
959
1073
|
for uri in expected_uris:
|
|
960
1074
|
uri.write(b"")
|
|
961
1075
|
self.assertTrue(uri.exists())
|
|
962
|
-
expected_uris.
|
|
1076
|
+
expected_uris.append(file)
|
|
963
1077
|
|
|
964
|
-
|
|
1078
|
+
# Force to run with fewer workers than there are files.
|
|
1079
|
+
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
965
1080
|
|
|
966
1081
|
for uri, is_there in multi.items():
|
|
967
1082
|
if uri == file:
|
|
968
1083
|
self.assertFalse(is_there)
|
|
969
1084
|
else:
|
|
970
1085
|
self.assertTrue(is_there)
|
|
1086
|
+
|
|
1087
|
+
# Clean up. Unfortunately POSIX raises a FileNotFoundError but
|
|
1088
|
+
# S3 boto does not complain if there is no key.
|
|
1089
|
+
ResourcePath.mremove(expected_uris, do_raise=False)
|
|
1090
|
+
|
|
1091
|
+
# Check they were really removed.
|
|
1092
|
+
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
1093
|
+
for uri, is_there in multi.items():
|
|
1094
|
+
self.assertFalse(is_there)
|
|
1095
|
+
|
|
1096
|
+
# Clean up a subset of files that are already gone, but this can
|
|
1097
|
+
# trigger a different code path.
|
|
1098
|
+
ResourcePath.mremove(expected_uris[:5], do_raise=False)
|