lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lsst/resources/s3.py CHANGED
@@ -13,6 +13,7 @@ from __future__ import annotations
13
13
 
14
14
  __all__ = ("S3ResourcePath",)
15
15
 
16
+ import concurrent.futures
16
17
  import contextlib
17
18
  import io
18
19
  import logging
@@ -20,17 +21,19 @@ import os
20
21
  import re
21
22
  import sys
22
23
  import threading
24
+ from collections import defaultdict
23
25
  from collections.abc import Iterable, Iterator
24
26
  from functools import cache, cached_property
25
27
  from typing import IO, TYPE_CHECKING, cast
26
28
 
27
29
  from botocore.exceptions import ClientError
28
30
 
31
+ from lsst.utils.iteration import chunk_iterable
29
32
  from lsst.utils.timer import time_this
30
33
 
31
34
  from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
32
35
  from ._resourceHandles._s3ResourceHandle import S3ResourceHandle
33
- from ._resourcePath import ResourcePath
36
+ from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ
34
37
  from .s3utils import (
35
38
  _get_s3_connection_parameters,
36
39
  _s3_disable_bucket_validation,
@@ -44,6 +47,7 @@ from .s3utils import (
44
47
  s3CheckFileExists,
45
48
  translate_client_error,
46
49
  )
50
+ from .utils import _get_num_workers
47
51
 
48
52
  try:
49
53
  from boto3.s3.transfer import TransferConfig # type: ignore
@@ -164,6 +168,14 @@ class S3ResourcePath(ResourcePath):
164
168
 
165
169
  return use_threads
166
170
 
171
+ @contextlib.contextmanager
172
+ def _use_threads_temp_override(self, multithreaded: bool) -> Iterator:
173
+ """Temporarily override the value of use_threads."""
174
+ original = self.use_threads
175
+ self.use_threads = multithreaded
176
+ yield
177
+ self.use_threads = original
178
+
167
179
  @property
168
180
  def _transfer_config(self) -> TransferConfig:
169
181
  if self.use_threads is None:
@@ -212,7 +224,9 @@ class S3ResourcePath(ResourcePath):
212
224
  return bucket
213
225
 
214
226
  @classmethod
215
- def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
227
+ def _mexists(
228
+ cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
229
+ ) -> dict[ResourcePath, bool]:
216
230
  # Force client to be created for each profile before creating threads.
217
231
  profiles = set[str | None]()
218
232
  for path in uris:
@@ -222,7 +236,125 @@ class S3ResourcePath(ResourcePath):
222
236
  for profile in profiles:
223
237
  getS3Client(profile)
224
238
 
225
- return super()._mexists(uris)
239
+ return super()._mexists(uris, num_workers=num_workers)
240
+
241
+ @classmethod
242
+ def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
243
+ # Delete multiple objects in one API call.
244
+ # Must group by profile and bucket.
245
+ grouped_uris: dict[tuple[str | None, str], list[S3ResourcePath]] = defaultdict(list)
246
+ for uri in uris:
247
+ uri = cast(S3ResourcePath, uri)
248
+ grouped_uris[uri._profile, uri._bucket].append(uri)
249
+
250
+ results: dict[ResourcePath, MBulkResult] = {}
251
+ for related_uris in grouped_uris.values():
252
+ # API requires no more than 1000 per call.
253
+ chunk_num = 0
254
+ chunks: list[tuple[ResourcePath, ...]] = []
255
+ key_to_uri: dict[str, ResourcePath] = {}
256
+ for chunk in chunk_iterable(related_uris, chunk_size=1_000):
257
+ for uri in chunk:
258
+ key = uri.relativeToPathRoot
259
+ key_to_uri[key] = uri
260
+ # Default to assuming everything worked.
261
+ results[uri] = MBulkResult(True, None)
262
+ chunk_num += 1
263
+ chunks.append(chunk)
264
+
265
+ # Bulk remove.
266
+ with time_this(
267
+ log,
268
+ msg="Bulk delete; %d chunk%s; totalling %d dataset%s",
269
+ args=(
270
+ len(chunks),
271
+ "s" if len(chunks) != 1 else "",
272
+ len(related_uris),
273
+ "s" if len(related_uris) != 1 else "",
274
+ ),
275
+ ):
276
+ errored = cls._mremove_select(chunks)
277
+
278
+ # Update with error information.
279
+ results.update(errored)
280
+
281
+ return results
282
+
283
+ @classmethod
284
+ def _mremove_select(cls, chunks: list[tuple[ResourcePath, ...]]) -> dict[ResourcePath, MBulkResult]:
285
+ if len(chunks) == 1:
286
+ # Do the removal directly without futures.
287
+ return cls._delete_objects_wrapper(chunks[0])
288
+ pool_executor_class = _get_executor_class()
289
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
290
+ # Patch the environment to make it think there is only one worker
291
+ # for each subprocess.
292
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
293
+ return cls._mremove_with_pool(pool_executor_class, chunks)
294
+ else:
295
+ return cls._mremove_with_pool(pool_executor_class, chunks)
296
+
297
+ @classmethod
298
+ def _mremove_with_pool(
299
+ cls,
300
+ pool_executor_class: _EXECUTOR_TYPE,
301
+ chunks: list[tuple[ResourcePath, ...]],
302
+ *,
303
+ num_workers: int | None = None,
304
+ ) -> dict[ResourcePath, MBulkResult]:
305
+ # Different name because different API to base class.
306
+ # No need to make more workers than we have chunks.
307
+ max_workers = num_workers if num_workers is not None else min(len(chunks), _get_num_workers())
308
+ results: dict[ResourcePath, MBulkResult] = {}
309
+ with pool_executor_class(max_workers=max_workers) as remove_executor:
310
+ future_remove = {
311
+ remove_executor.submit(cls._delete_objects_wrapper, chunk): i
312
+ for i, chunk in enumerate(chunks)
313
+ }
314
+ for future in concurrent.futures.as_completed(future_remove):
315
+ try:
316
+ results.update(future.result())
317
+ except Exception as e:
318
+ # The chunk utterly failed.
319
+ chunk = chunks[future_remove[future]]
320
+ for uri in chunk:
321
+ results[uri] = MBulkResult(False, e)
322
+ return results
323
+
324
+ @classmethod
325
+ def _delete_objects_wrapper(cls, uris: tuple[ResourcePath, ...]) -> dict[ResourcePath, MBulkResult]:
326
+ """Convert URIs to keys and call low-level API."""
327
+ if not uris:
328
+ return {}
329
+ keys: list[dict[str, str]] = []
330
+ key_to_uri: dict[str, ResourcePath] = {}
331
+ for uri in uris:
332
+ key = uri.relativeToPathRoot
333
+ key_to_uri[key] = uri
334
+ keys.append({"Key": key})
335
+
336
+ first_uri = cast(S3ResourcePath, uris[0])
337
+ results = cls._delete_related_objects(first_uri.client, first_uri._bucket, keys)
338
+
339
+ # Remap error object keys to uris.
340
+ return {key_to_uri[key]: result for key, result in results.items()}
341
+
342
+ @classmethod
343
+ @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
344
+ def _delete_related_objects(
345
+ cls, client: boto3.client, bucket: str, keys: list[dict[str, str]]
346
+ ) -> dict[str, MBulkResult]:
347
+ # Delete multiple objects from the same bucket, allowing for backoff
348
+ # retry.
349
+ response = client.delete_objects(Bucket=bucket, Delete={"Objects": keys, "Quiet": True})
350
+ # Use Quiet mode so we assume everything worked unless told otherwise.
351
+ # Only returning errors -- indexed by Key name.
352
+ errors: dict[str, MBulkResult] = {}
353
+ for errored_key in response.get("Errors", []):
354
+ errors[errored_key["Key"]] = MBulkResult(
355
+ False, ClientError({"Error": errored_key}, f"delete_objects: {errored_key['Key']}")
356
+ )
357
+ return errors
226
358
 
227
359
  @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
228
360
  def exists(self) -> bool:
@@ -345,18 +477,33 @@ class S3ResourcePath(ResourcePath):
345
477
 
346
478
  return s3, f"{self._bucket}/{self.relativeToPathRoot}"
347
479
 
348
- def _as_local(self) -> tuple[str, bool]:
480
+ @contextlib.contextmanager
481
+ def _as_local(
482
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
483
+ ) -> Iterator[ResourcePath]:
349
484
  """Download object from S3 and place in temporary directory.
350
485
 
486
+ Parameters
487
+ ----------
488
+ multithreaded : `bool`, optional
489
+ If `True` the transfer will be allowed to attempt to improve
490
+ throughput by using parallel download streams. This may of no
491
+ effect if the URI scheme does not support parallel streams or
492
+ if a global override has been applied. If `False` parallel
493
+ streams will be disabled.
494
+ tmpdir : `ResourcePath` or `None`, optional
495
+ Explicit override of the temporary directory to use for remote
496
+ downloads.
497
+
351
498
  Returns
352
499
  -------
353
- path : `str`
354
- Path to local temporary file.
355
- temporary : `bool`
356
- Always returns `True`. This is always a temporary file.
500
+ local_uri : `ResourcePath`
501
+ A URI to a local POSIX file corresponding to a local temporary
502
+ downloaded copy of the resource.
357
503
  """
358
504
  with (
359
- ResourcePath.temporary_uri(suffix=self.getExtension(), delete=False) as tmp_uri,
505
+ ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=True) as tmp_uri,
506
+ self._use_threads_temp_override(multithreaded),
360
507
  time_this(log, msg="Downloading %s to local file", args=(self,)),
361
508
  ):
362
509
  progress = (
@@ -366,7 +513,7 @@ class S3ResourcePath(ResourcePath):
366
513
  )
367
514
  with tmp_uri.open("wb") as tmpFile:
368
515
  self._download_file(tmpFile, progress)
369
- return tmp_uri.ospath, True
516
+ yield tmp_uri
370
517
 
371
518
  @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
372
519
  def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage | None) -> None:
@@ -397,7 +544,7 @@ class S3ResourcePath(ResourcePath):
397
544
  try:
398
545
  self.client.copy_object(CopySource=copy_source, Bucket=self._bucket, Key=self.relativeToPathRoot)
399
546
  except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
400
- raise FileNotFoundError("No such resource to transfer: {self}") from err
547
+ raise FileNotFoundError(f"No such resource to transfer: {src} -> {self}") from err
401
548
  except ClientError as err:
402
549
  translate_client_error(err, self)
403
550
  raise
@@ -408,6 +555,7 @@ class S3ResourcePath(ResourcePath):
408
555
  transfer: str = "copy",
409
556
  overwrite: bool = False,
410
557
  transaction: TransactionProtocol | None = None,
558
+ multithreaded: bool = True,
411
559
  ) -> None:
412
560
  """Transfer the current resource to an S3 bucket.
413
561
 
@@ -422,6 +570,12 @@ class S3ResourcePath(ResourcePath):
422
570
  Allow an existing file to be overwritten. Defaults to `False`.
423
571
  transaction : `~lsst.resources.utils.TransactionProtocol`, optional
424
572
  Currently unused.
573
+ multithreaded : `bool`, optional
574
+ If `True` the transfer will be allowed to attempt to improve
575
+ throughput by using parallel download streams. This may of no
576
+ effect if the URI scheme does not support parallel streams or
577
+ if a global override has been applied. If `False` parallel
578
+ streams will be disabled.
425
579
  """
426
580
  # Fail early to prevent delays if remote resources are requested
427
581
  if transfer not in self.transferModes:
@@ -457,22 +611,25 @@ class S3ResourcePath(ResourcePath):
457
611
  timer_msg = "Transfer from %s to %s"
458
612
  timer_args = (src, self)
459
613
 
460
- if isinstance(src, type(self)):
461
- # Looks like an S3 remote uri so we can use direct copy
462
- # note that boto3.resource.meta.copy is cleverer than the low
463
- # level copy_object
614
+ if isinstance(src, type(self)) and self.client == src.client:
615
+ # Looks like an S3 remote uri so we can use direct copy.
616
+ # This only works if the source and destination are using the same
617
+ # S3 endpoint and profile.
464
618
  with time_this(log, msg=timer_msg, args=timer_args):
465
619
  self._copy_from(src)
466
620
 
467
621
  else:
468
622
  # Use local file and upload it
469
- with src.as_local() as local_uri:
623
+ with src.as_local(multithreaded=multithreaded) as local_uri:
470
624
  progress = (
471
625
  ProgressPercentage(local_uri, file_for_msg=src, msg="Uploading:")
472
626
  if log.isEnabledFor(ProgressPercentage.log_level)
473
627
  else None
474
628
  )
475
- with time_this(log, msg=timer_msg, args=timer_args):
629
+ with (
630
+ time_this(log, msg=timer_msg, args=timer_args),
631
+ self._use_threads_temp_override(multithreaded),
632
+ ):
476
633
  self._upload_file(local_uri, progress)
477
634
 
478
635
  # This was an explicit move requested from a remote resource
lsst/resources/s3utils.py CHANGED
@@ -53,6 +53,7 @@ except ImportError:
53
53
 
54
54
  from ._resourcePath import ResourcePath
55
55
  from .location import Location
56
+ from .utils import _get_num_workers
56
57
 
57
58
  # https://pypi.org/project/backoff/
58
59
  try:
@@ -246,7 +247,13 @@ def _s3_disable_bucket_validation(client: boto3.client) -> None:
246
247
  @functools.lru_cache
247
248
  def _get_s3_client(endpoint_config: _EndpointConfig, skip_validation: bool) -> boto3.client:
248
249
  # Helper function to cache the client for this endpoint
249
- config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
250
+ # boto seems to assume it will always have at least 10 available.
251
+ max_pool_size = max(_get_num_workers(), 10)
252
+ config = botocore.config.Config(
253
+ read_timeout=180,
254
+ max_pool_connections=max_pool_size,
255
+ retries={"mode": "adaptive", "max_attempts": 10},
256
+ )
250
257
 
251
258
  session = boto3.Session(profile_name=endpoint_config.profile)
252
259
 
@@ -105,13 +105,16 @@ class SchemelessResourcePath(FileResourcePath):
105
105
  return stat.S_ISDIR(status.st_mode)
106
106
  return self.dirLike
107
107
 
108
- def relative_to(self, other: ResourcePath) -> str | None:
108
+ def relative_to(self, other: ResourcePath, walk_up: bool = False) -> str | None:
109
109
  """Return the relative path from this URI to the other URI.
110
110
 
111
111
  Parameters
112
112
  ----------
113
113
  other : `ResourcePath`
114
114
  URI to use to calculate the relative path.
115
+ walk_up : `bool`, optional
116
+ Control whether "``..``" can be used to resolve a relative path.
117
+ Default is `False`. Can not be `True` on Python version 3.11.
115
118
 
116
119
  Returns
117
120
  -------
@@ -146,8 +149,8 @@ class SchemelessResourcePath(FileResourcePath):
146
149
  raise RuntimeError(f"Unexpected combination of {child}.relative_to({other}).")
147
150
 
148
151
  if child is None:
149
- return super().relative_to(other)
150
- return child.relative_to(other)
152
+ return super().relative_to(other, walk_up=walk_up)
153
+ return child.relative_to(other, walk_up=walk_up)
151
154
 
152
155
  @classmethod
153
156
  def _fixupPathUri(
lsst/resources/tests.py CHANGED
@@ -17,6 +17,8 @@ import os
17
17
  import pathlib
18
18
  import random
19
19
  import string
20
+ import sys
21
+ import tempfile
20
22
  import unittest
21
23
  import urllib.parse
22
24
  import uuid
@@ -60,18 +62,18 @@ def _check_open(
60
62
  """
61
63
  text_content = "abcdefghijklmnopqrstuvwxyz🙂"
62
64
  bytes_content = uuid.uuid4().bytes
63
- content_by_mode_suffix = {
65
+ content_by_mode_suffix: dict[str, str | bytes] = {
64
66
  "": text_content,
65
67
  "t": text_content,
66
68
  "b": bytes_content,
67
69
  }
68
- empty_content_by_mode_suffix = {
70
+ empty_content_by_mode_suffix: dict[str, str | bytes] = {
69
71
  "": "",
70
72
  "t": "",
71
73
  "b": b"",
72
74
  }
73
75
  # To appease mypy
74
- double_content_by_mode_suffix = {
76
+ double_content_by_mode_suffix: dict[str, str | bytes] = {
75
77
  "": text_content + text_content,
76
78
  "t": text_content + text_content,
77
79
  "b": bytes_content + bytes_content,
@@ -142,6 +144,16 @@ def _check_open(
142
144
  content_read = read_buffer.read()
143
145
  test_case.assertEqual(len(content_read), 0, f"Read: {content_read!r}, expected empty.")
144
146
 
147
+ # Write multiple chunks with flushing to ensure that any handles that
148
+ # cache without flushing work properly.
149
+ n = 3
150
+ with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
151
+ for _ in range(n):
152
+ write_buffer.write(content)
153
+ write_buffer.flush()
154
+ with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
155
+ test_case.assertEqual(read_buffer.read(), content * n)
156
+
145
157
  # Write two copies of the content, overwriting the single copy there.
146
158
  with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
147
159
  write_buffer.write(double_content)
@@ -364,6 +376,19 @@ class GenericTestCase(_GenericTestCase):
364
376
  parent = ResourcePath("d/e.txt", forceAbsolute=False)
365
377
  self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
366
378
 
379
+ # Allow .. in response.
380
+ child = ResourcePath(self._make_uri("a/b/c/d.txt"), forceAbsolute=False)
381
+ parent = ResourcePath(self._make_uri("a/b/d/e/"), forceAbsolute=False)
382
+ self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
383
+
384
+ if sys.version_info >= (3, 12, 0):
385
+ # Fails on python 3.11.
386
+ self.assertEqual(
387
+ child.relative_to(parent, walk_up=True),
388
+ "../../c/d.txt",
389
+ f"{child}.relative_to({parent}, walk_up=True)",
390
+ )
391
+
367
392
  def test_parents(self) -> None:
368
393
  """Test of splitting and parent walking."""
369
394
  parent = ResourcePath(self._make_uri("somedir"), forceDirectory=True)
@@ -381,6 +406,14 @@ class GenericTestCase(_GenericTestCase):
381
406
  self.assertEqual(child_file.parent().parent(), parent)
382
407
  self.assertEqual(child_subdir.dirname(), child_subdir)
383
408
 
409
+ # Make sure that the parent doesn't retain any fragment from the
410
+ # child.
411
+ child_fragment = child_subdir.join("a.txt#fragment")
412
+ self.assertEqual(child_fragment.fragment, "fragment")
413
+ fragment_parent = child_fragment.parent()
414
+ self.assertEqual(fragment_parent.fragment, "")
415
+ self.assertTrue(str(fragment_parent).endswith("/"))
416
+
384
417
  def test_escapes(self) -> None:
385
418
  """Special characters in file paths."""
386
419
  src = self.root_uri.join("bbb/???/test.txt")
@@ -485,6 +518,13 @@ class GenericTestCase(_GenericTestCase):
485
518
  self.assertEqual(fnew3.fragment, "fragment")
486
519
  self.assertEqual(fnew3.basename(), "b.txt", msg=f"Got: {fnew3._uri}")
487
520
 
521
+ # Check that fragment on the directory is dropped on join.
522
+ frag_dir = add_dir.join("subdir/#dir_fragment")
523
+ self.assertEqual(frag_dir.fragment, "dir_fragment")
524
+ fnew4 = frag_dir.join("a.txt")
525
+ self.assertEqual(fnew4.fragment, "")
526
+ self.assertTrue(str(fnew4).endswith("/a.txt"))
527
+
488
528
  # Join a resource path.
489
529
  subpath = ResourcePath("a/b.txt#fragment2", forceAbsolute=False, forceDirectory=False)
490
530
  fnew3 = root.join(subpath)
@@ -556,6 +596,10 @@ class GenericReadWriteTestCase(_GenericTestCase):
556
596
 
557
597
  transfer_modes: tuple[str, ...] = ("copy", "move")
558
598
  testdir: str | None = None
599
+ # Number of files to use for mremove() testing to ensure difference code
600
+ # paths are hit. Do not want to generically use many files for schemes
601
+ # where it makes no difference.
602
+ n_mremove_files: int = 15
559
603
 
560
604
  def setUp(self) -> None:
561
605
  if self.scheme is None:
@@ -702,6 +746,37 @@ class GenericReadWriteTestCase(_GenericTestCase):
702
746
  with self.assertRaises(FileNotFoundError):
703
747
  dest.transfer_from(src, "auto")
704
748
 
749
+ def test_mtransfer(self) -> None:
750
+ n_files = 10
751
+ sources = [self.tmpdir.join(f"test{n}.txt") for n in range(n_files)]
752
+ destinations = [self.tmpdir.join(f"dest_test{n}.txt") for n in range(n_files)]
753
+
754
+ for i, src in enumerate(sources):
755
+ content = f"{i}\nContent is some content\nwith something to say\n\n"
756
+ src.write(content.encode())
757
+
758
+ results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True))
759
+ self.assertTrue(all(res.success for res in results.values()))
760
+ self.assertTrue(all(dest.exists() for dest in results))
761
+
762
+ for i, dest in enumerate(destinations):
763
+ new_content = dest.read().decode()
764
+ self.assertTrue(new_content.startswith(f"{i}\n"))
765
+
766
+ # Overwrite should work.
767
+ results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True), overwrite=True)
768
+
769
+ # Overwrite failure.
770
+ results = ResourcePath.mtransfer(
771
+ "copy", zip(sources, destinations, strict=True), overwrite=False, do_raise=False
772
+ )
773
+ self.assertFalse(all(res.success for res in results.values()))
774
+
775
+ with self.assertRaises(ExceptionGroup):
776
+ results = ResourcePath.mtransfer(
777
+ "copy", zip(sources, destinations, strict=True), overwrite=False, do_raise=True
778
+ )
779
+
705
780
  def test_local_transfer(self) -> None:
706
781
  """Test we can transfer to and from local file."""
707
782
  remote_src = self.tmpdir.join("src.json")
@@ -762,6 +837,48 @@ class GenericReadWriteTestCase(_GenericTestCase):
762
837
  with self.root_uri.as_local() as local_uri:
763
838
  pass
764
839
 
840
+ if not src.isLocal:
841
+ # as_local tmpdir can not be a remote resource.
842
+ with self.assertRaises(ValueError):
843
+ with src.as_local(tmpdir=self.root_uri) as local_uri:
844
+ pass
845
+
846
+ # tmpdir is ignored for local file.
847
+ with tempfile.TemporaryDirectory() as tmpdir:
848
+ temp_dir = ResourcePath(tmpdir, forceDirectory=True)
849
+ with src.as_local(tmpdir=temp_dir) as local_uri:
850
+ self.assertEqual(local_uri.dirname(), temp_dir)
851
+ self.assertTrue(local_uri.exists())
852
+
853
+ def test_local_mtransfer(self) -> None:
854
+ """Check that bulk transfer to/from local works."""
855
+ # Create remote resources
856
+ n_files = 10
857
+ sources = [self.tmpdir.join(f"test{n}.txt") for n in range(n_files)]
858
+
859
+ for i, src in enumerate(sources):
860
+ content = f"{i}\nContent is some content\nwith something to say\n\n"
861
+ src.write(content.encode())
862
+
863
+ # Potentially remote to local.
864
+ with tempfile.TemporaryDirectory() as tmpdir:
865
+ temp_dir = ResourcePath(tmpdir, forceDirectory=True)
866
+ destinations = [temp_dir.join(f"dest_test{n}.txt") for n in range(n_files)]
867
+
868
+ results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True))
869
+ self.assertTrue(all(res.success for res in results.values()))
870
+ self.assertTrue(all(dest.exists() for dest in results))
871
+
872
+ # Overwrite should work.
873
+ results = ResourcePath.mtransfer("copy", zip(sources, destinations, strict=True), overwrite=True)
874
+
875
+ # Now reverse so local to potentially remote.
876
+ for src in sources:
877
+ src.remove()
878
+ results = ResourcePath.mtransfer("copy", zip(destinations, sources, strict=True), overwrite=False)
879
+ self.assertTrue(all(res.success for res in results.values()))
880
+ self.assertTrue(all(dest.exists() for dest in results))
881
+
765
882
  def test_walk(self) -> None:
766
883
  """Walk a directory hierarchy."""
767
884
  root = self.tmpdir.join("walk/")
@@ -949,22 +1066,33 @@ class GenericReadWriteTestCase(_GenericTestCase):
949
1066
  # A file that is not there.
950
1067
  file = root.join("config/basic/butler.yaml")
951
1068
 
952
- # Create some files.
953
- expected_files = {
954
- "dir1/a.yaml",
955
- "dir1/b.yaml",
956
- "dir2/e.yaml",
957
- }
958
- expected_uris = {root.join(f) for f in expected_files}
1069
+ # Create some files. Most schemes the code paths do not change for 10
1070
+ # vs 1000 files but in some schemes it does.
1071
+ expected_files = [f"dir1/f{n}.yaml" for n in range(self.n_mremove_files)]
1072
+ expected_uris = [root.join(f) for f in expected_files]
959
1073
  for uri in expected_uris:
960
1074
  uri.write(b"")
961
1075
  self.assertTrue(uri.exists())
962
- expected_uris.add(file)
1076
+ expected_uris.append(file)
963
1077
 
964
- multi = ResourcePath.mexists(expected_uris)
1078
+ # Force to run with fewer workers than there are files.
1079
+ multi = ResourcePath.mexists(expected_uris, num_workers=3)
965
1080
 
966
1081
  for uri, is_there in multi.items():
967
1082
  if uri == file:
968
1083
  self.assertFalse(is_there)
969
1084
  else:
970
1085
  self.assertTrue(is_there)
1086
+
1087
+ # Clean up. Unfortunately POSIX raises a FileNotFoundError but
1088
+ # S3 boto does not complain if there is no key.
1089
+ ResourcePath.mremove(expected_uris, do_raise=False)
1090
+
1091
+ # Check they were really removed.
1092
+ multi = ResourcePath.mexists(expected_uris, num_workers=3)
1093
+ for uri, is_there in multi.items():
1094
+ self.assertFalse(is_there)
1095
+
1096
+ # Clean up a subset of files that are already gone, but this can
1097
+ # trigger a different code path.
1098
+ ResourcePath.mremove(expected_uris[:5], do_raise=False)