lsst-resources 29.2025.1800__py3-none-any.whl → 29.2025.1900__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,7 @@ import os
23
23
  import posixpath
24
24
  import re
25
25
  import urllib.parse
26
+ from collections import defaultdict
26
27
  from pathlib import Path, PurePath, PurePosixPath
27
28
  from random import Random
28
29
  from typing import TypeAlias
@@ -53,8 +54,8 @@ ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
53
54
  ESCAPED_HASH = urllib.parse.quote("#")
54
55
 
55
56
 
56
- class MTransferResult(NamedTuple):
57
- """Report on a bulk transfer."""
57
+ class MBulkResult(NamedTuple):
58
+ """Report on a bulk operation."""
58
59
 
59
60
  success: bool
60
61
  exception: Exception | None
@@ -912,6 +913,14 @@ class ResourcePath: # numpydoc ignore=PR02
912
913
  """
913
914
  raise NotImplementedError()
914
915
 
916
+ @classmethod
917
+ def _group_uris(cls, uris: Iterable[ResourcePath]) -> dict[type[ResourcePath], list[ResourcePath]]:
918
+ """Group URIs by class/scheme."""
919
+ grouped: dict[type, list[ResourcePath]] = defaultdict(list)
920
+ for uri in uris:
921
+ grouped[uri.__class__].append(uri)
922
+ return grouped
923
+
915
924
  @classmethod
916
925
  def mexists(
917
926
  cls, uris: Iterable[ResourcePath], *, num_workers: int | None = None
@@ -933,18 +942,9 @@ class ResourcePath: # numpydoc ignore=PR02
933
942
  existence : `dict` of [`ResourcePath`, `bool`]
934
943
  Mapping of original URI to boolean indicating existence.
935
944
  """
936
- # Group by scheme to allow a subclass to be able to use
937
- # specialized implementations.
938
- grouped: dict[type, list[ResourcePath]] = {}
939
- for uri in uris:
940
- uri_class = uri.__class__
941
- if uri_class not in grouped:
942
- grouped[uri_class] = []
943
- grouped[uri_class].append(uri)
944
-
945
945
  existence: dict[ResourcePath, bool] = {}
946
- for uri_class in grouped:
947
- existence.update(uri_class._mexists(grouped[uri_class], num_workers=num_workers))
946
+ for uri_class, group in cls._group_uris(uris).items():
947
+ existence.update(uri_class._mexists(group, num_workers=num_workers))
948
948
 
949
949
  return existence
950
950
 
@@ -1029,7 +1029,7 @@ class ResourcePath: # numpydoc ignore=PR02
1029
1029
  overwrite: bool = False,
1030
1030
  transaction: TransactionProtocol | None = None,
1031
1031
  do_raise: bool = True,
1032
- ) -> dict[ResourcePath, MTransferResult]:
1032
+ ) -> dict[ResourcePath, MBulkResult]:
1033
1033
  """Transfer many files in bulk.
1034
1034
 
1035
1035
  Parameters
@@ -1048,14 +1048,16 @@ class ResourcePath: # numpydoc ignore=PR02
1048
1048
  The transaction object must be thread safe.
1049
1049
  do_raise : `bool`, optional
1050
1050
  If `True` an `ExceptionGroup` will be raised containing any
1051
- exceptions raised by the individual transfers. Else a dict
1052
- reporting the status of each `ResourcePath` will be returned.
1051
+ exceptions raised by the individual transfers. If `False`, or if
1052
+ there were no exceptions, a dict reporting the status of each
1053
+ `ResourcePath` will be returned.
1053
1054
 
1054
1055
  Returns
1055
1056
  -------
1056
- copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
1057
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1057
1058
  A dict of all the transfer attempts with a value indicating
1058
- whether the transfer succeeded for the target URI.
1059
+ whether the transfer succeeded for the target URI. If ``do_raise``
1060
+ is `True`, this will only be returned if there are no errors.
1059
1061
  """
1060
1062
  pool_executor_class = _get_executor_class()
1061
1063
  if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
@@ -1088,7 +1090,7 @@ class ResourcePath: # numpydoc ignore=PR02
1088
1090
  overwrite: bool = False,
1089
1091
  transaction: TransactionProtocol | None = None,
1090
1092
  do_raise: bool = True,
1091
- ) -> dict[ResourcePath, MTransferResult]:
1093
+ ) -> dict[ResourcePath, MBulkResult]:
1092
1094
  """Transfer many files in bulk.
1093
1095
 
1094
1096
  Parameters
@@ -1112,7 +1114,7 @@ class ResourcePath: # numpydoc ignore=PR02
1112
1114
 
1113
1115
  Returns
1114
1116
  -------
1115
- copy_status : `dict` [ `ResourcePath`, `MTransferResult` ]
1117
+ copy_status : `dict` [ `ResourcePath`, `MBulkResult` ]
1116
1118
  A dict of all the transfer attempts with a value indicating
1117
1119
  whether the transfer succeeded for the target URI.
1118
1120
  """
@@ -1128,17 +1130,17 @@ class ResourcePath: # numpydoc ignore=PR02
1128
1130
  ): to_uri
1129
1131
  for from_uri, to_uri in from_to
1130
1132
  }
1131
- results: dict[ResourcePath, MTransferResult] = {}
1133
+ results: dict[ResourcePath, MBulkResult] = {}
1132
1134
  failed = False
1133
1135
  for future in concurrent.futures.as_completed(future_transfers):
1134
1136
  to_uri = future_transfers[future]
1135
1137
  try:
1136
1138
  future.result()
1137
1139
  except Exception as e:
1138
- transferred = MTransferResult(False, e)
1140
+ transferred = MBulkResult(False, e)
1139
1141
  failed = True
1140
1142
  else:
1141
- transferred = MTransferResult(True, None)
1143
+ transferred = MBulkResult(True, None)
1142
1144
  results[to_uri] = transferred
1143
1145
 
1144
1146
  if do_raise and failed:
@@ -1153,6 +1155,81 @@ class ResourcePath: # numpydoc ignore=PR02
1153
1155
  """Remove the resource."""
1154
1156
  raise NotImplementedError()
1155
1157
 
1158
+ @classmethod
1159
+ def mremove(
1160
+ cls, uris: Iterable[ResourcePath], *, do_raise: bool = True
1161
+ ) -> dict[ResourcePath, MBulkResult]:
1162
+ """Remove multiple URIs at once.
1163
+
1164
+ Parameters
1165
+ ----------
1166
+ uris : iterable of `ResourcePath`
1167
+ URIs to remove.
1168
+ do_raise : `bool`, optional
1169
+ If `True` an `ExceptionGroup` will be raised containing any
1170
+ exceptions raised by the individual transfers. If `False`, or if
1171
+ there were no exceptions, a dict reporting the status of each
1172
+ `ResourcePath` will be returned.
1173
+
1174
+ Returns
1175
+ -------
1176
+ results : `dict` [ `ResourcePath`, `MBulkResult` ]
1177
+ Dictionary mapping each URI to a result object indicating whether
1178
+ the removal succeeded or resulted in an exception. If ``do_raise``
1179
+ is `True` this will only be returned if everything succeeded.
1180
+ """
1181
+ # Group URIs by scheme since some URI schemes support native bulk
1182
+ # APIs.
1183
+ results: dict[ResourcePath, MBulkResult] = {}
1184
+ for uri_class, group in cls._group_uris(uris).items():
1185
+ results.update(uri_class._mremove(group))
1186
+ if do_raise:
1187
+ failed = any(not r.success for r in results.values())
1188
+ if failed:
1189
+ s = "s" if len(results) != 1 else ""
1190
+ raise ExceptionGroup(
1191
+ f"Error{s} removing {len(results)} artifact{s}",
1192
+ tuple(res.exception for res in results.values() if res.exception is not None),
1193
+ )
1194
+
1195
+ return results
1196
+
1197
+ @classmethod
1198
+ def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
1199
+ """Remove multiple URIs using futures."""
1200
+ pool_executor_class = _get_executor_class()
1201
+ if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
1202
+ # Patch the environment to make it think there is only one worker
1203
+ # for each subprocess.
1204
+ with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
1205
+ return cls._mremove_pool(pool_executor_class, uris)
1206
+ else:
1207
+ return cls._mremove_pool(pool_executor_class, uris)
1208
+
1209
+ @classmethod
1210
+ def _mremove_pool(
1211
+ cls,
1212
+ pool_executor_class: _EXECUTOR_TYPE,
1213
+ uris: Iterable[ResourcePath],
1214
+ *,
1215
+ num_workers: int | None = None,
1216
+ ) -> dict[ResourcePath, MBulkResult]:
1217
+ """Remove URIs using a futures pool."""
1218
+ max_workers = num_workers if num_workers is not None else _get_num_workers()
1219
+ results: dict[ResourcePath, MBulkResult] = {}
1220
+ with pool_executor_class(max_workers=max_workers) as remove_executor:
1221
+ future_remove = {remove_executor.submit(uri.remove): uri for uri in uris}
1222
+ for future in concurrent.futures.as_completed(future_remove):
1223
+ try:
1224
+ future.result()
1225
+ except Exception as e:
1226
+ removed = MBulkResult(False, e)
1227
+ else:
1228
+ removed = MBulkResult(True, None)
1229
+ uri = future_remove[future]
1230
+ results[uri] = removed
1231
+ return results
1232
+
1156
1233
  def isabs(self) -> bool:
1157
1234
  """Indicate that the resource is fully specified.
1158
1235
 
lsst/resources/s3.py CHANGED
@@ -20,17 +20,19 @@ import os
20
20
  import re
21
21
  import sys
22
22
  import threading
23
+ from collections import defaultdict
23
24
  from collections.abc import Iterable, Iterator
24
25
  from functools import cache, cached_property
25
26
  from typing import IO, TYPE_CHECKING, cast
26
27
 
27
28
  from botocore.exceptions import ClientError
28
29
 
30
+ from lsst.utils.iteration import chunk_iterable
29
31
  from lsst.utils.timer import time_this
30
32
 
31
33
  from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
32
34
  from ._resourceHandles._s3ResourceHandle import S3ResourceHandle
33
- from ._resourcePath import ResourcePath
35
+ from ._resourcePath import MBulkResult, ResourcePath
34
36
  from .s3utils import (
35
37
  _get_s3_connection_parameters,
36
38
  _s3_disable_bucket_validation,
@@ -234,6 +236,55 @@ class S3ResourcePath(ResourcePath):
234
236
 
235
237
  return super()._mexists(uris, num_workers=num_workers)
236
238
 
239
+ @classmethod
240
+ def _mremove(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, MBulkResult]:
241
+ # Delete multiple objects in one API call.
242
+ # Must group by profile and bucket.
243
+ grouped_uris: dict[tuple[str | None, str], list[S3ResourcePath]] = defaultdict(list)
244
+ for uri in uris:
245
+ uri = cast(S3ResourcePath, uri)
246
+ grouped_uris[uri._profile, uri._bucket].append(uri)
247
+
248
+ results: dict[ResourcePath, MBulkResult] = {}
249
+ for related_uris in grouped_uris.values():
250
+ # The client and bucket are the same for each of the remaining
251
+ # URIs.
252
+ first_uri = related_uris[0]
253
+ # API requires no more than 1000 per call.
254
+ for chunk in chunk_iterable(related_uris, chunk_size=1_000):
255
+ key_to_uri: dict[str, ResourcePath] = {}
256
+ keys: list[dict[str, str]] = []
257
+ for uri in chunk:
258
+ key = uri.relativeToPathRoot
259
+ key_to_uri[key] = uri
260
+ keys.append({"Key": key})
261
+ # Default to assuming everything worked.
262
+ results[uri] = MBulkResult(True, None)
263
+ errored = cls._delete_related_objects(first_uri.client, first_uri._bucket, keys)
264
+
265
+ # Update with error information.
266
+ for key, bulk_result in errored.items():
267
+ results[key_to_uri[key]] = bulk_result
268
+
269
+ return results
270
+
271
+ @classmethod
272
+ @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
273
+ def _delete_related_objects(
274
+ cls, client: boto3.client, bucket: str, keys: list[dict[str, str]]
275
+ ) -> dict[str, MBulkResult]:
276
+ # Delete multiple objects from the same bucket, allowing for backoff
277
+ # retry.
278
+ response = client.delete_objects(Bucket=bucket, Delete={"Objects": keys, "Quiet": True})
279
+ # Use Quiet mode so we assume everything worked unless told otherwise.
280
+ # Only returning errors -- indexed by Key name.
281
+ errors: dict[str, MBulkResult] = {}
282
+ for errored_key in response.get("Errors", []):
283
+ errors[errored_key["Key"]] = MBulkResult(
284
+ False, ClientError({"Error": errored_key}, f"delete_objects: {errored_key['Key']}")
285
+ )
286
+ return errors
287
+
237
288
  @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
238
289
  def exists(self) -> bool:
239
290
  """Check that the S3 resource exists."""
lsst/resources/s3utils.py CHANGED
@@ -247,9 +247,11 @@ def _s3_disable_bucket_validation(client: boto3.client) -> None:
247
247
  @functools.lru_cache
248
248
  def _get_s3_client(endpoint_config: _EndpointConfig, skip_validation: bool) -> boto3.client:
249
249
  # Helper function to cache the client for this endpoint
250
+ # boto seems to assume it will always have at least 10 available.
251
+ max_pool_size = max(_get_num_workers(), 10)
250
252
  config = botocore.config.Config(
251
253
  read_timeout=180,
252
- max_pool_connections=_get_num_workers(),
254
+ max_pool_connections=max_pool_size,
253
255
  retries={"mode": "adaptive", "max_attempts": 10},
254
256
  )
255
257
 
lsst/resources/tests.py CHANGED
@@ -1045,3 +1045,12 @@ class GenericReadWriteTestCase(_GenericTestCase):
1045
1045
  self.assertFalse(is_there)
1046
1046
  else:
1047
1047
  self.assertTrue(is_there)
1048
+
1049
+ # Clean up. Unfortunately POSIX raises a FileNotFoundError but
1050
+ # S3 boto does not complain if there is no key.
1051
+ ResourcePath.mremove(expected_uris, do_raise=False)
1052
+
1053
+ # Check they were really removed.
1054
+ multi = ResourcePath.mexists(expected_uris, num_workers=3)
1055
+ for uri, is_there in multi.items():
1056
+ self.assertFalse(is_there)
lsst/resources/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  __all__ = ["__version__"]
2
- __version__ = "29.2025.1800"
2
+ __version__ = "29.2025.1900"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-resources
3
- Version: 29.2025.1800
3
+ Version: 29.2025.1900
4
4
  Summary: An abstraction layer for reading and writing from URI file resources.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -1,6 +1,6 @@
1
1
  lsst/__init__.py,sha256=9I6UQ9gj-ZcPlvsa0OPBo76UujxXVehVzw9yMAOQvyM,466
2
2
  lsst/resources/__init__.py,sha256=BDj6uokvd0ZQNGl-Xgz5gZd83Z0L2gFqGSk0KJpylP8,778
3
- lsst/resources/_resourcePath.py,sha256=DTsZTrCaXu9GLT1QwkhJaQcb-b8VuaRH1TU-J58PJ0M,70548
3
+ lsst/resources/_resourcePath.py,sha256=AnyqWz6TVgDjuZO4DjFj6q4J0-zbIduCFuxwopSS6Nk,73905
4
4
  lsst/resources/file.py,sha256=-jPuoHvTEtx5tnDyNkfwhWAyX0cTwkuMd-JvJn9EGdE,23226
5
5
  lsst/resources/gs.py,sha256=Lpo5GAzH7R7HG8E5RMGOdP4j4hjWJn-k6M3OXj0nHQM,12783
6
6
  lsst/resources/http.py,sha256=9a_VadSabznPC0FTQtDtfV041zH25wZgXFtJ7HkvHp0,88275
@@ -8,21 +8,21 @@ lsst/resources/location.py,sha256=x3Tq0x5o1OXYmZDxYBenUG1N71wtDhnjVAr3s2ZEiu8,79
8
8
  lsst/resources/mem.py,sha256=VOWh7XxJPfqKcFdLZSjKEAfORQ2AHZHpxmjT8LniV60,1008
9
9
  lsst/resources/packageresource.py,sha256=vnfeRlpVwpC5cDQZE6Lnh8EH6oZy1sH2vLz9ONYjJ4k,6817
10
10
  lsst/resources/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- lsst/resources/s3.py,sha256=X5ujR-w4LOJpOWAIBShn6hS0oeXQwvl3leeuj6c9LJ4,24198
12
- lsst/resources/s3utils.py,sha256=rhMvZjbHjymObDWsEUODoLaw70DrrJHn_cUi3G78VvI,14463
11
+ lsst/resources/s3.py,sha256=KH9oPThUMG6bvkE_gEgJU3KXY8RgIVbKsLu0orgv1Mo,26634
12
+ lsst/resources/s3utils.py,sha256=ojWf9BPrK9mhGQ8jvs4_8Nsqf9360e79U5FnPTxe24A,14576
13
13
  lsst/resources/schemeless.py,sha256=GfJcKzZ0XIeepfQdW4HPZWiZlSp_ej0SEtSiJTrDUQs,10666
14
- lsst/resources/tests.py,sha256=LZOxPfZhSS76z7s5kWuULaMlTo65_m8XROdHrQqubQQ,43646
14
+ lsst/resources/tests.py,sha256=SqYLbDG6QkZTB-0UvrsiPtfmdL1TcglGeqBTPQxu9GE,44027
15
15
  lsst/resources/utils.py,sha256=6O3Mq7JbPEtqyD2lM77pRpwcPMfV5SxiNMknw-F2vNs,8097
16
- lsst/resources/version.py,sha256=uRdffTNnwH4GQy77H1Wk3Md64craDCIuRxmCtF7ON0g,55
16
+ lsst/resources/version.py,sha256=duakYcQBT5t4KBaOX14WCtq_sSvpoXvNcxi3sUb8GvI,55
17
17
  lsst/resources/_resourceHandles/__init__.py,sha256=zOcZ8gVEBdAWcHJaZabA8Vdq-wAVcxjbmA_1b1IWM6M,76
18
18
  lsst/resources/_resourceHandles/_baseResourceHandle.py,sha256=lQwxDOmFUNJndTxsjpz-HxrQBL0L-z4aXQocHdOEI7c,4676
19
19
  lsst/resources/_resourceHandles/_fileResourceHandle.py,sha256=A7_WQPzD0ZlOzNmaI_TPdZybrNxrXPkNHWVla3UFxfs,3676
20
20
  lsst/resources/_resourceHandles/_httpResourceHandle.py,sha256=Yami8IVGeru4bLQCag-OvGG0ltz1qyEg57FY4IEB87Y,10995
21
21
  lsst/resources/_resourceHandles/_s3ResourceHandle.py,sha256=NkDmPb9bm_zMvr6mMnb-tBmqJDt0yUJrt2gZXR8l7ok,12923
22
- lsst_resources-29.2025.1800.dist-info/licenses/COPYRIGHT,sha256=yazVsoMmFwhiw5itGrdT4YPmXbpsQyUFjlpOyZIa77M,148
23
- lsst_resources-29.2025.1800.dist-info/licenses/LICENSE,sha256=7wrtgl8meQ0_RIuv2TjIKpAnNrl-ODH-QLwyHe9citI,1516
24
- lsst_resources-29.2025.1800.dist-info/METADATA,sha256=QUD23JcY-DYeGrrGfeFiZRUJ6dDpWuVeSu0YwW5HliI,2237
25
- lsst_resources-29.2025.1800.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
26
- lsst_resources-29.2025.1800.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
27
- lsst_resources-29.2025.1800.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
28
- lsst_resources-29.2025.1800.dist-info/RECORD,,
22
+ lsst_resources-29.2025.1900.dist-info/licenses/COPYRIGHT,sha256=yazVsoMmFwhiw5itGrdT4YPmXbpsQyUFjlpOyZIa77M,148
23
+ lsst_resources-29.2025.1900.dist-info/licenses/LICENSE,sha256=7wrtgl8meQ0_RIuv2TjIKpAnNrl-ODH-QLwyHe9citI,1516
24
+ lsst_resources-29.2025.1900.dist-info/METADATA,sha256=AsHgImBvDFy_E373VzU-E3sARIx2y1E7-gfLP-991is,2237
25
+ lsst_resources-29.2025.1900.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
26
+ lsst_resources-29.2025.1900.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
27
+ lsst_resources-29.2025.1900.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
28
+ lsst_resources-29.2025.1900.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5