lsst-resources 29.2025.1700__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/resources/_resourceHandles/_davResourceHandle.py +197 -0
- lsst/resources/_resourceHandles/_fileResourceHandle.py +1 -1
- lsst/resources/_resourceHandles/_httpResourceHandle.py +7 -4
- lsst/resources/_resourceHandles/_s3ResourceHandle.py +3 -17
- lsst/resources/_resourcePath.py +311 -79
- lsst/resources/dav.py +912 -0
- lsst/resources/davutils.py +2659 -0
- lsst/resources/file.py +41 -16
- lsst/resources/gs.py +6 -3
- lsst/resources/http.py +194 -65
- lsst/resources/mem.py +7 -1
- lsst/resources/s3.py +141 -15
- lsst/resources/s3utils.py +8 -1
- lsst/resources/schemeless.py +6 -3
- lsst/resources/tests.py +66 -12
- lsst/resources/utils.py +43 -0
- lsst/resources/version.py +1 -1
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/METADATA +3 -3
- lsst_resources-29.2025.4600.dist-info/RECORD +31 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/WHEEL +1 -1
- lsst_resources-29.2025.1700.dist-info/RECORD +0 -28
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_resources-29.2025.1700.dist-info → lsst_resources-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/resources/dav.py
ADDED
|
@@ -0,0 +1,912 @@
|
|
|
1
|
+
# This file is part of lsst-resources.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (https://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# Use of this source code is governed by a 3-clause BSD-style
|
|
10
|
+
# license that can be found in the LICENSE file.
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
__all__ = ("DavResourcePath",)
|
|
15
|
+
|
|
16
|
+
import contextlib
|
|
17
|
+
import datetime
|
|
18
|
+
import functools
|
|
19
|
+
import io
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import threading
|
|
24
|
+
import urllib
|
|
25
|
+
from collections.abc import Iterator
|
|
26
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, cast
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
import fsspec
|
|
30
|
+
from fsspec.spec import AbstractFileSystem
|
|
31
|
+
except ImportError:
|
|
32
|
+
fsspec = None
|
|
33
|
+
AbstractFileSystem = type
|
|
34
|
+
|
|
35
|
+
from ._resourceHandles import ResourceHandleProtocol
|
|
36
|
+
from ._resourceHandles._davResourceHandle import DavReadResourceHandle
|
|
37
|
+
from ._resourcePath import ResourcePath, ResourcePathExpression
|
|
38
|
+
from .davutils import (
|
|
39
|
+
DavClient,
|
|
40
|
+
DavClientPool,
|
|
41
|
+
DavConfigPool,
|
|
42
|
+
DavFileMetadata,
|
|
43
|
+
normalize_path,
|
|
44
|
+
normalize_url,
|
|
45
|
+
)
|
|
46
|
+
from .utils import get_tempdir
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
from .utils import TransactionProtocol
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
log = logging.getLogger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@functools.lru_cache
|
|
56
|
+
def _calc_tmpdir_buffer_size(tmpdir: str) -> int:
|
|
57
|
+
"""Compute the block size to use for writing files in `tmpdir` as
|
|
58
|
+
256 blocks of typical size (i.e. 4096 bytes) or 10 times the file system
|
|
59
|
+
block size, whichever is higher.
|
|
60
|
+
|
|
61
|
+
This is a reasonable compromise between using memory for buffering and
|
|
62
|
+
the number of system calls issued to read from or write to temporary
|
|
63
|
+
files.
|
|
64
|
+
"""
|
|
65
|
+
fsstats = os.statvfs(tmpdir)
|
|
66
|
+
return max(10 * fsstats.f_bsize, 256 * 4096)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class DavResourcePathConfig:
|
|
70
|
+
"""Configuration class to encapsulate the configurable items used by
|
|
71
|
+
all instances of class `DavResourcePath`.
|
|
72
|
+
|
|
73
|
+
Instantiating this class creates a thread-safe singleton.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
_instance = None
|
|
77
|
+
_lock = threading.Lock()
|
|
78
|
+
|
|
79
|
+
def __new__(cls) -> DavResourcePathConfig:
|
|
80
|
+
if cls._instance is None:
|
|
81
|
+
with cls._lock:
|
|
82
|
+
if cls._instance is None:
|
|
83
|
+
cls._instance = super().__new__(cls)
|
|
84
|
+
|
|
85
|
+
return cls._instance
|
|
86
|
+
|
|
87
|
+
def __init__(self) -> None:
|
|
88
|
+
# Path to the local temporary directory all instances of
|
|
89
|
+
# `DavResourcePath`must use and its associated buffer size (in bytes).
|
|
90
|
+
self._tmpdir_buffersize: tuple[str, int] | None = None
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def tmpdir_buffersize(self) -> tuple[str, int]:
|
|
94
|
+
"""Return the path to a temporary directory and the preferred buffer
|
|
95
|
+
size to use when reading/writing files from/to that directory.
|
|
96
|
+
"""
|
|
97
|
+
if self._tmpdir_buffersize is not None:
|
|
98
|
+
return self._tmpdir_buffersize
|
|
99
|
+
|
|
100
|
+
# Retrieve and cache the path and the blocksize for the temporary
|
|
101
|
+
# directory if no other thread has done that in the meantime.
|
|
102
|
+
with DavResourcePathConfig._lock:
|
|
103
|
+
if self._tmpdir_buffersize is None:
|
|
104
|
+
tmpdir = get_tempdir()
|
|
105
|
+
bufsize = _calc_tmpdir_buffer_size(tmpdir)
|
|
106
|
+
self._tmpdir_buffersize = (tmpdir, bufsize)
|
|
107
|
+
|
|
108
|
+
return self._tmpdir_buffersize
|
|
109
|
+
|
|
110
|
+
def _destroy(self) -> None:
|
|
111
|
+
"""Destroy this class singleton instance.
|
|
112
|
+
|
|
113
|
+
Helper method to be used in tests to reset global configuration.
|
|
114
|
+
"""
|
|
115
|
+
with DavResourcePathConfig._lock:
|
|
116
|
+
DavResourcePathConfig._instance = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class DavGlobals:
|
|
120
|
+
"""Helper container to encapsulate all the gloal objects needed by this
|
|
121
|
+
module.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self) -> None:
|
|
125
|
+
# Client pool used by all DavResourcePath instances.
|
|
126
|
+
# Use Any as type annotation to keep mypy happy.
|
|
127
|
+
self._client_pool: Any = None
|
|
128
|
+
|
|
129
|
+
# Configuration used by all DavResourcePath instances.
|
|
130
|
+
self._config: Any = None
|
|
131
|
+
|
|
132
|
+
# (Re)Initialize the objects above.
|
|
133
|
+
self._reset()
|
|
134
|
+
|
|
135
|
+
def _reset(self) -> None:
|
|
136
|
+
"""
|
|
137
|
+
Initialize all the globals.
|
|
138
|
+
|
|
139
|
+
This method is a helper for reinitializing globals in tests.
|
|
140
|
+
"""
|
|
141
|
+
# Initialize the singleton instance of the webdav endpoint
|
|
142
|
+
# configuration pool.
|
|
143
|
+
config_pool: DavConfigPool = DavConfigPool("LSST_RESOURCES_WEBDAV_CONFIG")
|
|
144
|
+
|
|
145
|
+
# Initialize the singleton instance of the webdav client pool. This is
|
|
146
|
+
# a thread-safe singleton shared by all instances of DavResourcePath.
|
|
147
|
+
if self._client_pool is not None:
|
|
148
|
+
self._client_pool._destroy()
|
|
149
|
+
|
|
150
|
+
self._client_pool = DavClientPool(config_pool)
|
|
151
|
+
|
|
152
|
+
# Initialize the singleton instance of the configuration shared
|
|
153
|
+
# all DavResourcePath objects.
|
|
154
|
+
if self._config is not None:
|
|
155
|
+
self._config._destroy()
|
|
156
|
+
|
|
157
|
+
self._config = DavResourcePathConfig()
|
|
158
|
+
|
|
159
|
+
def client_pool(self) -> DavClientPool:
|
|
160
|
+
"""Return the pool of reusable webDAV clients."""
|
|
161
|
+
return self._client_pool
|
|
162
|
+
|
|
163
|
+
def config(self) -> DavResourcePathConfig:
|
|
164
|
+
"""Return the configuration settings for all `DavResourcePath`
|
|
165
|
+
objects.
|
|
166
|
+
"""
|
|
167
|
+
return self._config
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# Convenience object to encapsulate all global objects needed by this module.
|
|
171
|
+
dav_globals: DavGlobals = DavGlobals()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class DavResourcePath(ResourcePath):
|
|
175
|
+
"""WebDAV resource.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
uri : `ResourcePathExpression`
|
|
180
|
+
URI to store in object.
|
|
181
|
+
root : `str` or `ResourcePath` or `None`, optional
|
|
182
|
+
Root for relative URIs. Not used in this constructor.
|
|
183
|
+
forceAbsolute : `bool`
|
|
184
|
+
Whether to force absolute URI. A WebDAV URI is always absolute.
|
|
185
|
+
forceDirectory : `bool` or `None`, optional
|
|
186
|
+
Whether this URI represents a directory.
|
|
187
|
+
isTemporary : `bool` or `None`, optional
|
|
188
|
+
Whether this URI represents a temporary resource.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
def __init__(
|
|
192
|
+
self,
|
|
193
|
+
uri: ResourcePathExpression,
|
|
194
|
+
root: str | ResourcePath | None = None,
|
|
195
|
+
forceAbsolute: bool = True,
|
|
196
|
+
forceDirectory: bool | None = None,
|
|
197
|
+
isTemporary: bool | None = None,
|
|
198
|
+
) -> None:
|
|
199
|
+
# Build the internal URL we use to talk to the server, which
|
|
200
|
+
# uses "http" or "https" as scheme instead of "dav" or "davs".
|
|
201
|
+
self._internal_url: str = normalize_url(self.geturl())
|
|
202
|
+
|
|
203
|
+
# WebDAV client this path must use to interact with the server.
|
|
204
|
+
self._dav_client: DavClient | None = None
|
|
205
|
+
|
|
206
|
+
# Retrieve the configuration shared by all instances of this class.
|
|
207
|
+
self._config: DavResourcePathConfig = dav_globals.config()
|
|
208
|
+
|
|
209
|
+
# Cached attributes of this file
|
|
210
|
+
self._cached_metadata: DavFileMetadata | None = None
|
|
211
|
+
|
|
212
|
+
@classmethod
|
|
213
|
+
def _fixupPathUri(
|
|
214
|
+
cls,
|
|
215
|
+
parsed: urllib.parse.ParseResult,
|
|
216
|
+
root: ResourcePath | None = None,
|
|
217
|
+
forceAbsolute: bool = False,
|
|
218
|
+
forceDirectory: bool | None = None,
|
|
219
|
+
) -> tuple[urllib.parse.ParseResult, bool | None]:
|
|
220
|
+
"""Correct any issues with the supplied URI.
|
|
221
|
+
|
|
222
|
+
This function ensures that the path of the URI is normalized.
|
|
223
|
+
"""
|
|
224
|
+
# Call the superclass' _fixupPathUri.
|
|
225
|
+
parsed, dirLike = super()._fixupPathUri(parsed, forceDirectory=forceDirectory)
|
|
226
|
+
|
|
227
|
+
# Clean the URL's path and ensure dir-like paths end by "/".
|
|
228
|
+
path = normalize_path(parsed.path)
|
|
229
|
+
if dirLike and path != "/":
|
|
230
|
+
path += "/"
|
|
231
|
+
|
|
232
|
+
return parsed._replace(path=path), dirLike
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def _client(self) -> DavClient:
|
|
236
|
+
"""Return the webDAV client for this resource."""
|
|
237
|
+
# If we already have a client, use it.
|
|
238
|
+
if self._dav_client is not None:
|
|
239
|
+
return self._dav_client
|
|
240
|
+
|
|
241
|
+
# Retrieve the client this resource must use to interact with the
|
|
242
|
+
# server from the global client pool.
|
|
243
|
+
self._dav_client = dav_globals.client_pool().get_client_for_url(self._internal_url)
|
|
244
|
+
return self._dav_client
|
|
245
|
+
|
|
246
|
+
def _stat(self, refresh: bool = False) -> DavFileMetadata:
|
|
247
|
+
"""Retrieve metadata about this resource.
|
|
248
|
+
|
|
249
|
+
We cache this resource's metadata to avoid expensive roundtrips to
|
|
250
|
+
the server for each call.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
refresh : `bool`, optional
|
|
255
|
+
If True, metadata is retrieved again from the server even if it
|
|
256
|
+
is already cached.
|
|
257
|
+
|
|
258
|
+
Notes
|
|
259
|
+
-----
|
|
260
|
+
Cached metadata is explicitly invalidated when this resource is
|
|
261
|
+
modified, for instance as a result of calling write(), transfer_from()
|
|
262
|
+
remove(), etc.
|
|
263
|
+
"""
|
|
264
|
+
# Caching metadata is a compromise because each roundtrip is
|
|
265
|
+
# relatively expensive and is fragile if this same resource is
|
|
266
|
+
# modified by a different thread or by a different process.
|
|
267
|
+
if refresh or self._cached_metadata is None:
|
|
268
|
+
self._cached_metadata = self._client.stat(self._internal_url)
|
|
269
|
+
|
|
270
|
+
return self._cached_metadata
|
|
271
|
+
|
|
272
|
+
def _invalidate_metatada_cache(self) -> None:
|
|
273
|
+
"""Invalidate cached metadata for this resource.
|
|
274
|
+
|
|
275
|
+
This method is intended to be explicitly invoked when a method
|
|
276
|
+
modifies the content of this resource (e.g. write, remove,
|
|
277
|
+
transfer_from).
|
|
278
|
+
"""
|
|
279
|
+
self._cached_metadata = None
|
|
280
|
+
|
|
281
|
+
def mkdir(self) -> None:
|
|
282
|
+
"""Create the directory resource if it does not already exist."""
|
|
283
|
+
if not self.isdir():
|
|
284
|
+
raise NotADirectoryError(f"Can not create a directory for file-like URI {self}")
|
|
285
|
+
|
|
286
|
+
stat = self._stat()
|
|
287
|
+
if stat.is_dir:
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
if stat.is_file:
|
|
291
|
+
# A file exists at this path.
|
|
292
|
+
raise NotADirectoryError(
|
|
293
|
+
f"Can not create a directory for {self} because a file already exists at that URL"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Target directory does not exist. Create it and its ancestors as
|
|
297
|
+
# needed. We need to test if parent URL is different from self URL,
|
|
298
|
+
# otherwise we could be stuck in a recursive loop
|
|
299
|
+
# where self == parent.
|
|
300
|
+
if self.geturl() != self.parent().geturl():
|
|
301
|
+
self.parent().mkdir()
|
|
302
|
+
|
|
303
|
+
self._client.mkcol(self._internal_url)
|
|
304
|
+
self._invalidate_metatada_cache()
|
|
305
|
+
|
|
306
|
+
def exists(self) -> bool:
|
|
307
|
+
"""Check that this resource exists."""
|
|
308
|
+
# Force checking for existence against the server for all the
|
|
309
|
+
# external calls to this method.
|
|
310
|
+
return self._stat(refresh=True).exists
|
|
311
|
+
|
|
312
|
+
def size(self) -> int:
|
|
313
|
+
"""Return the size of the remote resource in bytes."""
|
|
314
|
+
if self.isdir():
|
|
315
|
+
return 0
|
|
316
|
+
|
|
317
|
+
stat = self._stat()
|
|
318
|
+
if not stat.exists:
|
|
319
|
+
raise FileNotFoundError(f"No file or directory found at {self}")
|
|
320
|
+
|
|
321
|
+
return stat.size
|
|
322
|
+
|
|
323
|
+
def info(self) -> dict[str, Any]:
|
|
324
|
+
"""Return metadata details about this resource."""
|
|
325
|
+
return self._client.info(self._internal_url, name=str(self))
|
|
326
|
+
|
|
327
|
+
def read(self, size: int = -1) -> bytes:
|
|
328
|
+
"""Open the resource and return the contents in bytes.
|
|
329
|
+
|
|
330
|
+
Parameters
|
|
331
|
+
----------
|
|
332
|
+
size : `int`, optional
|
|
333
|
+
The number of bytes to read. Negative or omitted indicates that
|
|
334
|
+
all data should be read.
|
|
335
|
+
"""
|
|
336
|
+
# A GET request on a dCache directory returns the contents of the
|
|
337
|
+
# directory in HTML, to be visualized with a browser. This means
|
|
338
|
+
# that we need to check first that this resource is not a directory.
|
|
339
|
+
#
|
|
340
|
+
# Since isdir() only checks that the URL of the resource ends in "/"
|
|
341
|
+
# without actually asking the server, this check is not robust.
|
|
342
|
+
# However, it is a reasonable compromise since it prevents doing
|
|
343
|
+
# an additional roundtrip to the server to retrieve this resource's
|
|
344
|
+
# metadata.
|
|
345
|
+
if self.isdir():
|
|
346
|
+
raise ValueError(f"method read() is not implemented for directory {self}")
|
|
347
|
+
|
|
348
|
+
stat = self._stat()
|
|
349
|
+
if stat.is_dir:
|
|
350
|
+
raise ValueError(f"method read() is not implemented for directory {self}")
|
|
351
|
+
elif not stat.exists:
|
|
352
|
+
raise FileNotFoundError(f"no file found at {self}")
|
|
353
|
+
elif stat.size == 0:
|
|
354
|
+
# This is an empty file.
|
|
355
|
+
return b""
|
|
356
|
+
|
|
357
|
+
if size > 0:
|
|
358
|
+
end_range = min(stat.size, size) - 1
|
|
359
|
+
return self._client.read_range(self._internal_url, start=0, end=end_range)
|
|
360
|
+
else:
|
|
361
|
+
return self._client.read(self._internal_url)
|
|
362
|
+
|
|
363
|
+
def read_range(
|
|
364
|
+
self,
|
|
365
|
+
start: int,
|
|
366
|
+
end: int | None = None,
|
|
367
|
+
check_exists: bool = False,
|
|
368
|
+
headers: dict[str, str] | None = None,
|
|
369
|
+
) -> bytes:
|
|
370
|
+
"""Read the specified range of the resource and return the bytes read.
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
start : `int`
|
|
375
|
+
Position of the first byte to read.
|
|
376
|
+
end : `int`, optional
|
|
377
|
+
Position of the last byte to read.
|
|
378
|
+
check_exists : `bool`, optional
|
|
379
|
+
Check the file exists before sending the GET request, which may
|
|
380
|
+
fail if the file does not exist. This is useful when the caller
|
|
381
|
+
has already checked the file exists before doing several
|
|
382
|
+
partial reads, so we want to avoid checking for every call.
|
|
383
|
+
headers : `dict[str, str]`, optional
|
|
384
|
+
Headers to include in the partial GET request.
|
|
385
|
+
"""
|
|
386
|
+
if check_exists:
|
|
387
|
+
stat = self._stat()
|
|
388
|
+
if not stat.is_file:
|
|
389
|
+
raise FileNotFoundError(f"No file found at {self}")
|
|
390
|
+
|
|
391
|
+
if stat.size == 0:
|
|
392
|
+
# This is an empty file.
|
|
393
|
+
return b""
|
|
394
|
+
|
|
395
|
+
headers = {} if headers is None else dict(headers)
|
|
396
|
+
headers.update({"Accept-Encoding": "identity"})
|
|
397
|
+
return self._client.read_range(self._internal_url, start=start, end=end, headers=headers)
|
|
398
|
+
|
|
399
|
+
@contextlib.contextmanager
|
|
400
|
+
def _as_local(
|
|
401
|
+
self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
|
|
402
|
+
) -> Iterator[ResourcePath]:
|
|
403
|
+
"""Download object and place in temporary directory.
|
|
404
|
+
|
|
405
|
+
Parameters
|
|
406
|
+
----------
|
|
407
|
+
multithreaded : `bool`, optional
|
|
408
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
409
|
+
throughput by using parallel download streams. This may of no
|
|
410
|
+
effect if the URI scheme does not support parallel streams or
|
|
411
|
+
if a global override has been applied. If `False` parallel
|
|
412
|
+
streams will be disabled.
|
|
413
|
+
tmpdir : `ResourcePath` or `None`, optional
|
|
414
|
+
Explicit override of the temporary directory to use for remote
|
|
415
|
+
downloads.
|
|
416
|
+
|
|
417
|
+
Returns
|
|
418
|
+
-------
|
|
419
|
+
local_uri : `ResourcePath`
|
|
420
|
+
A URI to a local POSIX file corresponding to a local temporary
|
|
421
|
+
downloaded copy of the resource.
|
|
422
|
+
"""
|
|
423
|
+
# We need to ensure that this resource is actually a file. dCache
|
|
424
|
+
# responds with a HTML-formatted content to a HTTP GET request to a
|
|
425
|
+
# directory, which is not what we want.
|
|
426
|
+
stat = self._stat()
|
|
427
|
+
if not stat.is_file:
|
|
428
|
+
raise FileNotFoundError(f"No file found at {self}")
|
|
429
|
+
|
|
430
|
+
if tmpdir is None:
|
|
431
|
+
local_dir, buffer_size = self._config.tmpdir_buffersize
|
|
432
|
+
tmpdir = ResourcePath(local_dir, forceDirectory=True)
|
|
433
|
+
else:
|
|
434
|
+
buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
|
|
435
|
+
|
|
436
|
+
with ResourcePath.temporary_uri(suffix=self.getExtension(), prefix=tmpdir, delete=True) as tmp_uri:
|
|
437
|
+
self._client.download(self._internal_url, tmp_uri.ospath, buffer_size)
|
|
438
|
+
yield tmp_uri
|
|
439
|
+
|
|
440
|
+
def write(self, data: BinaryIO | bytes, overwrite: bool = True) -> None:
|
|
441
|
+
"""Write the supplied bytes to the new resource.
|
|
442
|
+
|
|
443
|
+
Parameters
|
|
444
|
+
----------
|
|
445
|
+
data : `bytes`
|
|
446
|
+
The bytes to write to the resource. The entire contents of the
|
|
447
|
+
resource will be replaced.
|
|
448
|
+
overwrite : `bool`, optional
|
|
449
|
+
If `True` the resource will be overwritten if it exists. Otherwise
|
|
450
|
+
the write will fail.
|
|
451
|
+
"""
|
|
452
|
+
if self.isdir():
|
|
453
|
+
raise ValueError(f"Method write() is not implemented for directory {self}")
|
|
454
|
+
|
|
455
|
+
stat = self._stat()
|
|
456
|
+
if stat.is_file and not overwrite:
|
|
457
|
+
raise FileExistsError(f"File {self} exists and overwrite has been disabled")
|
|
458
|
+
|
|
459
|
+
# Create parent directory and upload the data.
|
|
460
|
+
self.parent().mkdir()
|
|
461
|
+
self._client.write(self._internal_url, data)
|
|
462
|
+
self._invalidate_metatada_cache()
|
|
463
|
+
|
|
464
|
+
def remove(self) -> None:
|
|
465
|
+
"""Remove the resource.
|
|
466
|
+
|
|
467
|
+
If the resource is a directory, it must be empty otherwise this
|
|
468
|
+
method raises. Removing a non-existent file or directory is not
|
|
469
|
+
considered an error.
|
|
470
|
+
"""
|
|
471
|
+
if not self.exists():
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
if self.isdir():
|
|
475
|
+
entries = self._client.read_dir(self._internal_url)
|
|
476
|
+
if len(entries) > 0:
|
|
477
|
+
raise IsADirectoryError(f"directory {self} is not empty")
|
|
478
|
+
|
|
479
|
+
# This resource is a either file or an empty directory, we can remove
|
|
480
|
+
# it.
|
|
481
|
+
self._client.delete(self._internal_url)
|
|
482
|
+
self._invalidate_metatada_cache()
|
|
483
|
+
|
|
484
|
+
def remove_dir(self, recursive: bool = False) -> None:
|
|
485
|
+
"""Remove a directory if empty.
|
|
486
|
+
|
|
487
|
+
Parameters
|
|
488
|
+
----------
|
|
489
|
+
recursive : `bool`
|
|
490
|
+
If `True` recursively remove all files and directories under this
|
|
491
|
+
directory.
|
|
492
|
+
|
|
493
|
+
Notes
|
|
494
|
+
-----
|
|
495
|
+
This method is not present in the superclass.
|
|
496
|
+
"""
|
|
497
|
+
if not self.isdir():
|
|
498
|
+
raise NotADirectoryError(f"{self} is not a directory")
|
|
499
|
+
|
|
500
|
+
for root, subdirs, files in self.walk():
|
|
501
|
+
if not recursive and (len(subdirs) > 0 or len(files) > 0):
|
|
502
|
+
raise IsADirectoryError(f"directory {self} is not empty and recursive argument is False")
|
|
503
|
+
|
|
504
|
+
for file in files:
|
|
505
|
+
root.join(file).remove()
|
|
506
|
+
|
|
507
|
+
for subdir in subdirs:
|
|
508
|
+
DavResourcePath(root.join(subdir, forceDirectory=True)).remove_dir(recursive=recursive)
|
|
509
|
+
|
|
510
|
+
# Remove empty top directory
|
|
511
|
+
self.remove()
|
|
512
|
+
|
|
513
|
+
def transfer_from(
|
|
514
|
+
self,
|
|
515
|
+
src: ResourcePath,
|
|
516
|
+
transfer: str = "copy",
|
|
517
|
+
overwrite: bool = False,
|
|
518
|
+
transaction: TransactionProtocol | None = None,
|
|
519
|
+
multithreaded: bool = True,
|
|
520
|
+
) -> None:
|
|
521
|
+
"""Transfer to this URI from another.
|
|
522
|
+
|
|
523
|
+
Parameters
|
|
524
|
+
----------
|
|
525
|
+
src : `ResourcePath`
|
|
526
|
+
Source URI.
|
|
527
|
+
transfer : `str`
|
|
528
|
+
Mode to use for transferring the resource. Generically there are
|
|
529
|
+
many standard options: copy, link, symlink, hardlink, relsymlink.
|
|
530
|
+
Not all URIs support all modes.
|
|
531
|
+
overwrite : `bool`, optional
|
|
532
|
+
Allow an existing file to be overwritten. Defaults to `False`.
|
|
533
|
+
transaction : `~lsst.resources.utils.TransactionProtocol`, optional
|
|
534
|
+
A transaction object that can (depending on implementation)
|
|
535
|
+
rollback transfers on error. Not guaranteed to be implemented.
|
|
536
|
+
multithreaded : `bool`, optional
|
|
537
|
+
If `True` the transfer will be allowed to attempt to improve
|
|
538
|
+
throughput by using parallel download streams. This may of no
|
|
539
|
+
effect if the URI scheme does not support parallel streams or
|
|
540
|
+
if a global override has been applied. If `False` parallel
|
|
541
|
+
streams will be disabled.
|
|
542
|
+
"""
|
|
543
|
+
# Fail early to prevent delays if remote resources are requested.
|
|
544
|
+
if transfer not in self.transferModes:
|
|
545
|
+
raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
|
|
546
|
+
|
|
547
|
+
# Existence checks cost time so do not call this unless we know
|
|
548
|
+
# that debugging is enabled.
|
|
549
|
+
destination_exists = self.exists()
|
|
550
|
+
if log.isEnabledFor(logging.DEBUG):
|
|
551
|
+
log.debug(
|
|
552
|
+
"Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
|
|
553
|
+
src,
|
|
554
|
+
src.exists(),
|
|
555
|
+
self,
|
|
556
|
+
destination_exists,
|
|
557
|
+
transfer,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# Short circuit immediately if the URIs are identical.
|
|
561
|
+
if self == src:
|
|
562
|
+
log.debug(
|
|
563
|
+
"Target and destination URIs are identical: %s, returning immediately."
|
|
564
|
+
" No further action required.",
|
|
565
|
+
self,
|
|
566
|
+
)
|
|
567
|
+
return
|
|
568
|
+
|
|
569
|
+
if not overwrite and destination_exists:
|
|
570
|
+
raise FileExistsError(f"Destination path {self} already exists.")
|
|
571
|
+
|
|
572
|
+
if transfer == "auto":
|
|
573
|
+
transfer = self.transferDefault
|
|
574
|
+
|
|
575
|
+
# We can use webDAV 'COPY' or 'MOVE' if both the current and source
|
|
576
|
+
# resources are located in the same server.
|
|
577
|
+
if isinstance(src, type(self)) and self.root_uri() == src.root_uri():
|
|
578
|
+
log.debug("Transfer from %s to %s directly", src, self)
|
|
579
|
+
return (
|
|
580
|
+
self._move_from(src, overwrite=overwrite)
|
|
581
|
+
if transfer == "move"
|
|
582
|
+
else self._copy_from(src, overwrite=overwrite)
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# For resources of different classes we can perform the copy or move
|
|
586
|
+
# operation by downloading to a local file and uploading to the
|
|
587
|
+
# destination.
|
|
588
|
+
self._copy_via_local(src)
|
|
589
|
+
|
|
590
|
+
# This was an explicit move, try to remove the source.
|
|
591
|
+
if transfer == "move":
|
|
592
|
+
src.remove()
|
|
593
|
+
|
|
594
|
+
def _copy_via_local(self, source: ResourcePath) -> None:
|
|
595
|
+
"""Replace the contents of this resource with the contents of a remote
|
|
596
|
+
resource by using a local temporary file.
|
|
597
|
+
|
|
598
|
+
Parameters
|
|
599
|
+
----------
|
|
600
|
+
source : `ResourcePath`
|
|
601
|
+
The source of the contents to copy to `self`.
|
|
602
|
+
"""
|
|
603
|
+
with source.as_local() as local_uri:
|
|
604
|
+
log.debug("Transfer from %s to %s via local file %s", source, self, local_uri)
|
|
605
|
+
with open(local_uri.ospath, "rb") as f:
|
|
606
|
+
self.write(data=f)
|
|
607
|
+
|
|
608
|
+
self._invalidate_metatada_cache()
|
|
609
|
+
|
|
610
|
+
def _copy_from(self, source: DavResourcePath, overwrite: bool = False) -> None:
|
|
611
|
+
"""Copy the contents of `source` to this resource. `source` must
|
|
612
|
+
be a file.
|
|
613
|
+
"""
|
|
614
|
+
# Copy is only supported for files, not directories.
|
|
615
|
+
if source.isdir():
|
|
616
|
+
raise ValueError(f"Copy is not supported for directory {source}")
|
|
617
|
+
|
|
618
|
+
src_stat = source._stat()
|
|
619
|
+
if not src_stat.is_file:
|
|
620
|
+
raise FileNotFoundError(f"No such file {source}")
|
|
621
|
+
|
|
622
|
+
dst_stat = self._stat()
|
|
623
|
+
if dst_stat.is_dir:
|
|
624
|
+
raise ValueError(f"Copy is not supported because destination {self} is a directory")
|
|
625
|
+
|
|
626
|
+
self.parent().mkdir()
|
|
627
|
+
self._client.copy(source._internal_url, self._internal_url, overwrite)
|
|
628
|
+
self._invalidate_metatada_cache()
|
|
629
|
+
|
|
630
|
+
def _move_from(self, source: DavResourcePath, overwrite: bool = False) -> None:
|
|
631
|
+
"""Send a MOVE webDAV request to replace the contents of this resource
|
|
632
|
+
with the contents of another resource located in the same server.
|
|
633
|
+
|
|
634
|
+
Parameters
|
|
635
|
+
----------
|
|
636
|
+
source : `DavResourcePath`
|
|
637
|
+
The source of the contents to move to `self`.
|
|
638
|
+
"""
|
|
639
|
+
# Move is only supported for files, not directories.
|
|
640
|
+
if source.isdir():
|
|
641
|
+
raise ValueError(f"Move is not supported for directory {source}")
|
|
642
|
+
|
|
643
|
+
src_stat = source._stat()
|
|
644
|
+
if not src_stat.is_file:
|
|
645
|
+
raise FileNotFoundError(f"No such file {source}")
|
|
646
|
+
|
|
647
|
+
dst_stat = self._stat()
|
|
648
|
+
if dst_stat.is_dir:
|
|
649
|
+
raise ValueError(f"Move is not supported for destination directory {self}")
|
|
650
|
+
|
|
651
|
+
# Create the destination's parent directory, move the source to
|
|
652
|
+
# this resource and invalidate caches for both.
|
|
653
|
+
self.parent().mkdir()
|
|
654
|
+
self._client.move(source._internal_url, self._internal_url, overwrite)
|
|
655
|
+
self._invalidate_metatada_cache()
|
|
656
|
+
source._invalidate_metatada_cache()
|
|
657
|
+
|
|
658
|
+
def walk(
|
|
659
|
+
self, file_filter: str | re.Pattern | None = None
|
|
660
|
+
) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
|
|
661
|
+
"""Walk the directory tree returning matching files and directories.
|
|
662
|
+
|
|
663
|
+
Parameters
|
|
664
|
+
----------
|
|
665
|
+
file_filter : `str` or `re.Pattern`, optional
|
|
666
|
+
Regex to filter out files from the list before it is returned.
|
|
667
|
+
|
|
668
|
+
Yields
|
|
669
|
+
------
|
|
670
|
+
dirpath : `ResourcePath`
|
|
671
|
+
Current directory being examined.
|
|
672
|
+
dirnames : `list` of `str`
|
|
673
|
+
Names of subdirectories within dirpath.
|
|
674
|
+
filenames : `list` of `str`
|
|
675
|
+
Names of all the files within dirpath.
|
|
676
|
+
"""
|
|
677
|
+
if not self.isdir():
|
|
678
|
+
raise ValueError("Can not walk a non-directory URI")
|
|
679
|
+
|
|
680
|
+
# We must return no entries for non-existent directories.
|
|
681
|
+
if not self._stat().exists:
|
|
682
|
+
return
|
|
683
|
+
|
|
684
|
+
# Retrieve the entries in this directory
|
|
685
|
+
entries = self._client.read_dir(self._internal_url)
|
|
686
|
+
files = [e.name for e in entries if e.is_file]
|
|
687
|
+
subdirs = [e.name for e in entries if e.is_dir]
|
|
688
|
+
|
|
689
|
+
# Filter files
|
|
690
|
+
if isinstance(file_filter, str):
|
|
691
|
+
file_filter = re.compile(file_filter)
|
|
692
|
+
|
|
693
|
+
if file_filter is not None:
|
|
694
|
+
files = [f for f in files if file_filter.search(f)]
|
|
695
|
+
|
|
696
|
+
if not subdirs and not files:
|
|
697
|
+
return
|
|
698
|
+
else:
|
|
699
|
+
yield type(self)(self, forceAbsolute=False, forceDirectory=True), subdirs, files
|
|
700
|
+
|
|
701
|
+
for subdir in subdirs:
|
|
702
|
+
new_uri = self.join(subdir, forceDirectory=True)
|
|
703
|
+
yield from new_uri.walk(file_filter)
|
|
704
|
+
|
|
705
|
+
def generate_presigned_get_url(self, *, expiration_time_seconds: int) -> str:
|
|
706
|
+
"""Return a pre-signed URL that can be used to retrieve this resource
|
|
707
|
+
using an HTTP GET without supplying any access credentials.
|
|
708
|
+
|
|
709
|
+
Parameters
|
|
710
|
+
----------
|
|
711
|
+
expiration_time_seconds : `int`
|
|
712
|
+
Number of seconds until the generated URL is no longer valid.
|
|
713
|
+
|
|
714
|
+
Returns
|
|
715
|
+
-------
|
|
716
|
+
url : `str`
|
|
717
|
+
HTTP URL signed for GET.
|
|
718
|
+
"""
|
|
719
|
+
return self._client.generate_presigned_get_url(self._internal_url, expiration_time_seconds)
|
|
720
|
+
|
|
721
|
+
def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
|
|
722
|
+
"""Return a pre-signed URL that can be used to upload a file to this
|
|
723
|
+
path using an HTTP PUT without supplying any access credentials.
|
|
724
|
+
|
|
725
|
+
Parameters
|
|
726
|
+
----------
|
|
727
|
+
expiration_time_seconds : `int`
|
|
728
|
+
Number of seconds until the generated URL is no longer valid.
|
|
729
|
+
|
|
730
|
+
Returns
|
|
731
|
+
-------
|
|
732
|
+
url : `str`
|
|
733
|
+
HTTP URL signed for PUT.
|
|
734
|
+
"""
|
|
735
|
+
return self._client.generate_presigned_put_url(self._internal_url, expiration_time_seconds)
|
|
736
|
+
|
|
737
|
+
def to_fsspec(self) -> tuple[DavFileSystem, str]:
|
|
738
|
+
"""Return an abstract file system and path that can be used by fsspec.
|
|
739
|
+
|
|
740
|
+
Returns
|
|
741
|
+
-------
|
|
742
|
+
fs : `fsspec.spec.AbstractFileSystem`
|
|
743
|
+
A file system object suitable for use with the returned path.
|
|
744
|
+
path : `str`
|
|
745
|
+
A path that can be opened by the file system object.
|
|
746
|
+
"""
|
|
747
|
+
if fsspec is None or not self._client._config.enable_fsspec:
|
|
748
|
+
raise ImportError("fsspec is not available")
|
|
749
|
+
|
|
750
|
+
path: str = self.path
|
|
751
|
+
return DavFileSystem(self, path), path
|
|
752
|
+
|
|
753
|
+
@contextlib.contextmanager
|
|
754
|
+
def _openImpl(
|
|
755
|
+
self,
|
|
756
|
+
mode: str = "r",
|
|
757
|
+
*,
|
|
758
|
+
encoding: str | None = None,
|
|
759
|
+
) -> Iterator[ResourceHandleProtocol]:
|
|
760
|
+
if self.isdir():
|
|
761
|
+
raise OSError(f"open is not implemented for directory {self}")
|
|
762
|
+
|
|
763
|
+
if mode in ("rb", "r") and self._client.accepts_ranges(self._internal_url):
|
|
764
|
+
stat: DavFileMetadata = self._stat(refresh=True)
|
|
765
|
+
if not stat.exists:
|
|
766
|
+
raise FileNotFoundError(f"No such file {self}")
|
|
767
|
+
|
|
768
|
+
if not stat.is_file:
|
|
769
|
+
raise OSError(f"open is not implemented for directory {self}")
|
|
770
|
+
|
|
771
|
+
handle: ResourceHandleProtocol = DavReadResourceHandle(mode, log, self, stat)
|
|
772
|
+
if mode == "r":
|
|
773
|
+
# cast because the protocol is compatible, but does not have
|
|
774
|
+
# BytesIO in the inheritance tree
|
|
775
|
+
yield io.TextIOWrapper(cast(Any, handle), encoding=encoding)
|
|
776
|
+
else:
|
|
777
|
+
yield handle
|
|
778
|
+
else:
|
|
779
|
+
with super()._openImpl(mode, encoding=encoding) as handle:
|
|
780
|
+
yield handle
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
class DavFileSystem(AbstractFileSystem):
|
|
784
|
+
"""Minimal fsspec-compatible read-only file system which contains a single
|
|
785
|
+
file.
|
|
786
|
+
|
|
787
|
+
Parameters
|
|
788
|
+
----------
|
|
789
|
+
uri : `DavResourcePath`
|
|
790
|
+
URI of the single resource contained in the file system.
|
|
791
|
+
|
|
792
|
+
path : `str`
|
|
793
|
+
Path within the file system of the file.
|
|
794
|
+
"""
|
|
795
|
+
|
|
796
|
+
def __init__(self, uri: DavResourcePath, path: str):
|
|
797
|
+
self._uri: DavResourcePath = uri
|
|
798
|
+
self._path: str = path
|
|
799
|
+
|
|
800
|
+
def info(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
801
|
+
if path != self._path:
|
|
802
|
+
raise FileNotFoundError(path)
|
|
803
|
+
|
|
804
|
+
return {
|
|
805
|
+
"name": path,
|
|
806
|
+
"size": self._uri.size(),
|
|
807
|
+
"type": "file",
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
def ls(self, path: str, detail: bool = True, **kwargs: Any) -> list[str] | list[dict[str, str]]:
|
|
811
|
+
if path != self._path:
|
|
812
|
+
raise FileNotFoundError(path)
|
|
813
|
+
|
|
814
|
+
return list(self.info(path)) if detail else list(path)
|
|
815
|
+
|
|
816
|
+
def modified(self, path: str) -> datetime.datetime:
|
|
817
|
+
if path != self._path:
|
|
818
|
+
raise FileNotFoundError(path)
|
|
819
|
+
|
|
820
|
+
return self._uri._stat().last_modified
|
|
821
|
+
|
|
822
|
+
def size(self, path: str) -> int:
|
|
823
|
+
if path != self._path:
|
|
824
|
+
raise FileNotFoundError(path)
|
|
825
|
+
|
|
826
|
+
return self._uri.size()
|
|
827
|
+
|
|
828
|
+
def isfile(self, path: str) -> bool:
|
|
829
|
+
return path == self._path
|
|
830
|
+
|
|
831
|
+
def isdir(self, path: str) -> bool:
|
|
832
|
+
return False
|
|
833
|
+
|
|
834
|
+
def exists(self, path: str, **kwargs: Any) -> bool:
|
|
835
|
+
return path == self._path
|
|
836
|
+
|
|
837
|
+
def open(
|
|
838
|
+
self,
|
|
839
|
+
path: str,
|
|
840
|
+
mode: str = "rb",
|
|
841
|
+
encoding: str | None = None,
|
|
842
|
+
block_size: int | None = None,
|
|
843
|
+
cache_options: dict[Any, Any] | None = None,
|
|
844
|
+
compression: str | None = None,
|
|
845
|
+
**kwargs: Any,
|
|
846
|
+
) -> ResourceHandleProtocol[Any]:
|
|
847
|
+
if path != self._path:
|
|
848
|
+
raise FileNotFoundError(path)
|
|
849
|
+
|
|
850
|
+
with self._uri.open(mode=mode, encoding=encoding) as handle:
|
|
851
|
+
return handle
|
|
852
|
+
|
|
853
|
+
@property
|
|
854
|
+
def fsid(self) -> Any:
|
|
855
|
+
raise NotImplementedError
|
|
856
|
+
|
|
857
|
+
def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None:
|
|
858
|
+
raise NotImplementedError
|
|
859
|
+
|
|
860
|
+
def makedirs(self, path: str, exist_ok: bool = False) -> None:
|
|
861
|
+
raise NotImplementedError
|
|
862
|
+
|
|
863
|
+
def rmdir(self, path: str) -> None:
|
|
864
|
+
raise NotImplementedError
|
|
865
|
+
|
|
866
|
+
def walk(
|
|
867
|
+
self,
|
|
868
|
+
path: str,
|
|
869
|
+
maxdepth: int | None = None,
|
|
870
|
+
topdown: bool = True,
|
|
871
|
+
on_error: str = "omit",
|
|
872
|
+
**kwargs: Any,
|
|
873
|
+
) -> None:
|
|
874
|
+
raise NotImplementedError
|
|
875
|
+
|
|
876
|
+
def find(
|
|
877
|
+
self,
|
|
878
|
+
path: str,
|
|
879
|
+
maxdepth: int | None = None,
|
|
880
|
+
withdirs: bool = False,
|
|
881
|
+
detail: bool = False,
|
|
882
|
+
**kwargs: Any,
|
|
883
|
+
) -> None:
|
|
884
|
+
raise NotImplementedError
|
|
885
|
+
|
|
886
|
+
def du(
|
|
887
|
+
self,
|
|
888
|
+
path: str,
|
|
889
|
+
total: bool = True,
|
|
890
|
+
maxdepth: int | None = None,
|
|
891
|
+
withdirs: bool = False,
|
|
892
|
+
**kwargs: Any,
|
|
893
|
+
) -> None:
|
|
894
|
+
raise NotImplementedError
|
|
895
|
+
|
|
896
|
+
def glob(self, path: str, maxdepth: int | None = None, **kwargs: Any) -> None:
|
|
897
|
+
raise NotImplementedError
|
|
898
|
+
|
|
899
|
+
def rm_file(self, path: str) -> None:
|
|
900
|
+
raise NotImplementedError
|
|
901
|
+
|
|
902
|
+
def rm(self, path: str, recursive: bool = False, maxdepth: int | None = None) -> None:
|
|
903
|
+
raise NotImplementedError
|
|
904
|
+
|
|
905
|
+
def touch(self, path: str, truncate: bool = True, **kwargs: Any) -> None:
|
|
906
|
+
raise NotImplementedError
|
|
907
|
+
|
|
908
|
+
def ukey(self, path: str) -> None:
|
|
909
|
+
raise NotImplementedError
|
|
910
|
+
|
|
911
|
+
def created(self, path: str) -> None:
|
|
912
|
+
raise NotImplementedError
|