lsst-resources 29.2025.2000__tar.gz → 29.2025.2100__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lsst_resources-29.2025.2000/python/lsst_resources.egg-info → lsst_resources-29.2025.2100}/PKG-INFO +1 -1
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/s3.py +80 -16
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/tests.py +13 -10
- lsst_resources-29.2025.2100/python/lsst/resources/version.py +2 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100/python/lsst_resources.egg-info}/PKG-INFO +1 -1
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_s3.py +2 -0
- lsst_resources-29.2025.2000/python/lsst/resources/version.py +0 -2
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/COPYRIGHT +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/LICENSE +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/MANIFEST.in +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/README.md +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/CHANGES.rst +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/dav.rst +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/index.rst +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/internal-api.rst +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/s3.rst +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/pyproject.toml +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/__init__.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/__init__.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/__init__.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/_baseResourceHandle.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/_davResourceHandle.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/_fileResourceHandle.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/_httpResourceHandle.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourceHandles/_s3ResourceHandle.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourcePath.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/dav.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/davutils.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/file.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/gs.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/http.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/location.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/mem.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/packageresource.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/py.typed +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/s3utils.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/schemeless.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/utils.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/SOURCES.txt +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/dependency_links.txt +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/requires.txt +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/top_level.txt +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/zip-safe +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/setup.cfg +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_dav.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_file.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_gs.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_http.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_location.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_mem.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_resource.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_s3utils.py +0 -0
- {lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/tests/test_schemeless.py +0 -0
{lsst_resources-29.2025.2000/python/lsst_resources.egg-info → lsst_resources-29.2025.2100}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-resources
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.2100
|
|
4
4
|
Summary: An abstraction layer for reading and writing from URI file resources.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -13,6 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
__all__ = ("S3ResourcePath",)
|
|
15
15
|
|
|
16
|
+
import concurrent.futures
|
|
16
17
|
import contextlib
|
|
17
18
|
import io
|
|
18
19
|
import logging
|
|
@@ -32,7 +33,7 @@ from lsst.utils.timer import time_this
|
|
|
32
33
|
|
|
33
34
|
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
|
|
34
35
|
from ._resourceHandles._s3ResourceHandle import S3ResourceHandle
|
|
35
|
-
from ._resourcePath import MBulkResult, ResourcePath
|
|
36
|
+
from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ
|
|
36
37
|
from .s3utils import (
|
|
37
38
|
_get_s3_connection_parameters,
|
|
38
39
|
_s3_disable_bucket_validation,
|
|
@@ -46,6 +47,7 @@ from .s3utils import (
|
|
|
46
47
|
s3CheckFileExists,
|
|
47
48
|
translate_client_error,
|
|
48
49
|
)
|
|
50
|
+
from .utils import _get_num_workers
|
|
49
51
|
|
|
50
52
|
try:
|
|
51
53
|
from boto3.s3.transfer import TransferConfig # type: ignore
|
|
@@ -247,34 +249,96 @@ class S3ResourcePath(ResourcePath):
|
|
|
247
249
|
|
|
248
250
|
results: dict[ResourcePath, MBulkResult] = {}
|
|
249
251
|
for related_uris in grouped_uris.values():
|
|
250
|
-
# The client and bucket are the same for each of the remaining
|
|
251
|
-
# URIs.
|
|
252
|
-
first_uri = related_uris[0]
|
|
253
252
|
# API requires no more than 1000 per call.
|
|
254
253
|
chunk_num = 0
|
|
254
|
+
chunks: list[tuple[ResourcePath, ...]] = []
|
|
255
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
255
256
|
for chunk in chunk_iterable(related_uris, chunk_size=1_000):
|
|
256
|
-
key_to_uri: dict[str, ResourcePath] = {}
|
|
257
|
-
keys: list[dict[str, str]] = []
|
|
258
257
|
for uri in chunk:
|
|
259
258
|
key = uri.relativeToPathRoot
|
|
260
259
|
key_to_uri[key] = uri
|
|
261
|
-
keys.append({"Key": key})
|
|
262
260
|
# Default to assuming everything worked.
|
|
263
261
|
results[uri] = MBulkResult(True, None)
|
|
264
262
|
chunk_num += 1
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
263
|
+
chunks.append(chunk)
|
|
264
|
+
|
|
265
|
+
# Bulk remove.
|
|
266
|
+
with time_this(
|
|
267
|
+
log,
|
|
268
|
+
msg="Bulk delete; %d chunk%s; totalling %d dataset%s",
|
|
269
|
+
args=(
|
|
270
|
+
len(chunks),
|
|
271
|
+
"s" if len(chunks) != 1 else "",
|
|
272
|
+
len(related_uris),
|
|
273
|
+
"s" if len(related_uris) != 1 else "",
|
|
274
|
+
),
|
|
275
|
+
):
|
|
276
|
+
errored = cls._mremove_select(chunks)
|
|
277
|
+
|
|
278
|
+
# Update with error information.
|
|
279
|
+
results.update(errored)
|
|
271
280
|
|
|
272
|
-
|
|
273
|
-
for key, bulk_result in errored.items():
|
|
274
|
-
results[key_to_uri[key]] = bulk_result
|
|
281
|
+
return results
|
|
275
282
|
|
|
283
|
+
@classmethod
|
|
284
|
+
def _mremove_select(cls, chunks: list[tuple[ResourcePath, ...]]) -> dict[ResourcePath, MBulkResult]:
|
|
285
|
+
if len(chunks) == 1:
|
|
286
|
+
# Do the removal directly without futures.
|
|
287
|
+
return cls._delete_objects_wrapper(chunks[0])
|
|
288
|
+
pool_executor_class = _get_executor_class()
|
|
289
|
+
if issubclass(pool_executor_class, concurrent.futures.ProcessPoolExecutor):
|
|
290
|
+
# Patch the environment to make it think there is only one worker
|
|
291
|
+
# for each subprocess.
|
|
292
|
+
with _patch_environ({"LSST_RESOURCES_NUM_WORKERS": "1"}):
|
|
293
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
294
|
+
else:
|
|
295
|
+
return cls._mremove_with_pool(pool_executor_class, chunks)
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def _mremove_with_pool(
|
|
299
|
+
cls,
|
|
300
|
+
pool_executor_class: _EXECUTOR_TYPE,
|
|
301
|
+
chunks: list[tuple[ResourcePath, ...]],
|
|
302
|
+
*,
|
|
303
|
+
num_workers: int | None = None,
|
|
304
|
+
) -> dict[ResourcePath, MBulkResult]:
|
|
305
|
+
# Different name because different API to base class.
|
|
306
|
+
# No need to make more workers than we have chunks.
|
|
307
|
+
max_workers = num_workers if num_workers is not None else min(len(chunks), _get_num_workers())
|
|
308
|
+
results: dict[ResourcePath, MBulkResult] = {}
|
|
309
|
+
with pool_executor_class(max_workers=max_workers) as remove_executor:
|
|
310
|
+
future_remove = {
|
|
311
|
+
remove_executor.submit(cls._delete_objects_wrapper, chunk): i
|
|
312
|
+
for i, chunk in enumerate(chunks)
|
|
313
|
+
}
|
|
314
|
+
for future in concurrent.futures.as_completed(future_remove):
|
|
315
|
+
try:
|
|
316
|
+
results.update(future.result())
|
|
317
|
+
except Exception as e:
|
|
318
|
+
# The chunk utterly failed.
|
|
319
|
+
chunk = chunks[future_remove[future]]
|
|
320
|
+
for uri in chunk:
|
|
321
|
+
results[uri] = MBulkResult(False, e)
|
|
276
322
|
return results
|
|
277
323
|
|
|
324
|
+
@classmethod
|
|
325
|
+
def _delete_objects_wrapper(cls, uris: tuple[ResourcePath, ...]) -> dict[ResourcePath, MBulkResult]:
|
|
326
|
+
"""Convert URIs to keys and call low-level API."""
|
|
327
|
+
if not uris:
|
|
328
|
+
return {}
|
|
329
|
+
keys: list[dict[str, str]] = []
|
|
330
|
+
key_to_uri: dict[str, ResourcePath] = {}
|
|
331
|
+
for uri in uris:
|
|
332
|
+
key = uri.relativeToPathRoot
|
|
333
|
+
key_to_uri[key] = uri
|
|
334
|
+
keys.append({"Key": key})
|
|
335
|
+
|
|
336
|
+
first_uri = cast(S3ResourcePath, uris[0])
|
|
337
|
+
results = cls._delete_related_objects(first_uri.client, first_uri._bucket, keys)
|
|
338
|
+
|
|
339
|
+
# Remap error object keys to uris.
|
|
340
|
+
return {key_to_uri[key]: result for key, result in results.items()}
|
|
341
|
+
|
|
278
342
|
@classmethod
|
|
279
343
|
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
|
|
280
344
|
def _delete_related_objects(
|
|
@@ -557,6 +557,10 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
557
557
|
|
|
558
558
|
transfer_modes: tuple[str, ...] = ("copy", "move")
|
|
559
559
|
testdir: str | None = None
|
|
560
|
+
# Number of files to use for mremove() testing to ensure difference code
|
|
561
|
+
# paths are hit. Do not want to generically use many files for schemes
|
|
562
|
+
# where it makes no difference.
|
|
563
|
+
n_mremove_files: int = 15
|
|
560
564
|
|
|
561
565
|
def setUp(self) -> None:
|
|
562
566
|
if self.scheme is None:
|
|
@@ -1023,19 +1027,14 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
1023
1027
|
# A file that is not there.
|
|
1024
1028
|
file = root.join("config/basic/butler.yaml")
|
|
1025
1029
|
|
|
1026
|
-
# Create some files.
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
"dir1/c.yaml",
|
|
1031
|
-
"dir1/d.yaml",
|
|
1032
|
-
"dir2/e.yaml",
|
|
1033
|
-
}
|
|
1034
|
-
expected_uris = {root.join(f) for f in expected_files}
|
|
1030
|
+
# Create some files. Most schemes the code paths do not change for 10
|
|
1031
|
+
# vs 1000 files but in some schemes it does.
|
|
1032
|
+
expected_files = [f"dir1/f{n}.yaml" for n in range(self.n_mremove_files)]
|
|
1033
|
+
expected_uris = [root.join(f) for f in expected_files]
|
|
1035
1034
|
for uri in expected_uris:
|
|
1036
1035
|
uri.write(b"")
|
|
1037
1036
|
self.assertTrue(uri.exists())
|
|
1038
|
-
expected_uris.
|
|
1037
|
+
expected_uris.append(file)
|
|
1039
1038
|
|
|
1040
1039
|
# Force to run with fewer workers than there are files.
|
|
1041
1040
|
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
@@ -1054,3 +1053,7 @@ class GenericReadWriteTestCase(_GenericTestCase):
|
|
|
1054
1053
|
multi = ResourcePath.mexists(expected_uris, num_workers=3)
|
|
1055
1054
|
for uri, is_there in multi.items():
|
|
1056
1055
|
self.assertFalse(is_there)
|
|
1056
|
+
|
|
1057
|
+
# Clean up a subset of files that are already gone, but this can
|
|
1058
|
+
# trigger a different code path.
|
|
1059
|
+
ResourcePath.mremove(expected_uris[:5], do_raise=False)
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100/python/lsst_resources.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-resources
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.2100
|
|
4
4
|
Summary: An abstraction layer for reading and writing from URI file resources.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -55,6 +55,8 @@ class S3ReadWriteTestCaseBase(GenericReadWriteTestCase):
|
|
|
55
55
|
|
|
56
56
|
scheme = "s3"
|
|
57
57
|
s3_endpoint_url: str | None = None
|
|
58
|
+
# S3 batches in 1000 files so need more than that.
|
|
59
|
+
n_mremove_files: int = 1015
|
|
58
60
|
|
|
59
61
|
def setUp(self):
|
|
60
62
|
self.enterContext(clean_test_environment_for_s3())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/doc/lsst.resources/internal-api.rst
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/_resourcePath.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/davutils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/location.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/packageresource.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/s3utils.py
RENAMED
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst/resources/schemeless.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_resources-29.2025.2000 → lsst_resources-29.2025.2100}/python/lsst_resources.egg-info/zip-safe
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|