lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lsst/resources/dav.py ADDED
@@ -0,0 +1,912 @@
1
+ # This file is part of lsst-resources.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # Use of this source code is governed by a 3-clause BSD-style
10
+ # license that can be found in the LICENSE file.
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ("DavResourcePath",)
15
+
16
+ import contextlib
17
+ import datetime
18
+ import functools
19
+ import io
20
+ import logging
21
+ import os
22
+ import re
23
+ import threading
24
+ import urllib
25
+ from collections.abc import Iterator
26
+ from typing import TYPE_CHECKING, Any, BinaryIO, cast
27
+
28
+ try:
29
+ import fsspec
30
+ from fsspec.spec import AbstractFileSystem
31
+ except ImportError:
32
+ fsspec = None
33
+ AbstractFileSystem = type
34
+
35
+ from ._resourceHandles import ResourceHandleProtocol
36
+ from ._resourceHandles._davResourceHandle import DavReadResourceHandle
37
+ from ._resourcePath import ResourcePath, ResourcePathExpression
38
+ from .davutils import (
39
+ DavClient,
40
+ DavClientPool,
41
+ DavConfigPool,
42
+ DavFileMetadata,
43
+ normalize_path,
44
+ normalize_url,
45
+ )
46
+ from .utils import get_tempdir
47
+
48
+ if TYPE_CHECKING:
49
+ from .utils import TransactionProtocol
50
+
51
+
52
+ log = logging.getLogger(__name__)
53
+
54
+
55
+ @functools.lru_cache
56
+ def _calc_tmpdir_buffer_size(tmpdir: str) -> int:
57
+ """Compute the block size to use for writing files in `tmpdir` as
58
+ 256 blocks of typical size (i.e. 4096 bytes) or 10 times the file system
59
+ block size, whichever is higher.
60
+
61
+ This is a reasonable compromise between using memory for buffering and
62
+ the number of system calls issued to read from or write to temporary
63
+ files.
64
+ """
65
+ fsstats = os.statvfs(tmpdir)
66
+ return max(10 * fsstats.f_bsize, 256 * 4096)
67
+
68
+
69
+ class DavResourcePathConfig:
70
+ """Configuration class to encapsulate the configurable items used by
71
+ all instances of class `DavResourcePath`.
72
+
73
+ Instantiating this class creates a thread-safe singleton.
74
+ """
75
+
76
+ _instance = None
77
+ _lock = threading.Lock()
78
+
79
+ def __new__(cls) -> DavResourcePathConfig:
80
+ if cls._instance is None:
81
+ with cls._lock:
82
+ if cls._instance is None:
83
+ cls._instance = super().__new__(cls)
84
+
85
+ return cls._instance
86
+
87
+ def __init__(self) -> None:
88
+ # Path to the local temporary directory all instances of
89
+ # `DavResourcePath`must use and its associated buffer size (in bytes).
90
+ self._tmpdir_buffersize: tuple[str, int] | None = None
91
+
92
+ @property
93
+ def tmpdir_buffersize(self) -> tuple[str, int]:
94
+ """Return the path to a temporary directory and the preferred buffer
95
+ size to use when reading/writing files from/to that directory.
96
+ """
97
+ if self._tmpdir_buffersize is not None:
98
+ return self._tmpdir_buffersize
99
+
100
+ # Retrieve and cache the path and the blocksize for the temporary
101
+ # directory if no other thread has done that in the meantime.
102
+ with DavResourcePathConfig._lock:
103
+ if self._tmpdir_buffersize is None:
104
+ tmpdir = get_tempdir()
105
+ bufsize = _calc_tmpdir_buffer_size(tmpdir)
106
+ self._tmpdir_buffersize = (tmpdir, bufsize)
107
+
108
+ return self._tmpdir_buffersize
109
+
110
+ def _destroy(self) -> None:
111
+ """Destroy this class singleton instance.
112
+
113
+ Helper method to be used in tests to reset global configuration.
114
+ """
115
+ with DavResourcePathConfig._lock:
116
+ DavResourcePathConfig._instance = None
117
+
118
+
119
+ class DavGlobals:
120
+ """Helper container to encapsulate all the gloal objects needed by this
121
+ module.
122
+ """
123
+
124
+ def __init__(self) -> None:
125
+ # Client pool used by all DavResourcePath instances.
126
+ # Use Any as type annotation to keep mypy happy.
127
+ self._client_pool: Any = None
128
+
129
+ # Configuration used by all DavResourcePath instances.
130
+ self._config: Any = None
131
+
132
+ # (Re)Initialize the objects above.
133
+ self._reset()
134
+
135
+ def _reset(self) -> None:
136
+ """
137
+ Initialize all the globals.
138
+
139
+ This method is a helper for reinitializing globals in tests.
140
+ """
141
+ # Initialize the singleton instance of the webdav endpoint
142
+ # configuration pool.
143
+ config_pool: DavConfigPool = DavConfigPool("LSST_RESOURCES_WEBDAV_CONFIG")
144
+
145
+ # Initialize the singleton instance of the webdav client pool. This is
146
+ # a thread-safe singleton shared by all instances of DavResourcePath.
147
+ if self._client_pool is not None:
148
+ self._client_pool._destroy()
149
+
150
+ self._client_pool = DavClientPool(config_pool)
151
+
152
+ # Initialize the singleton instance of the configuration shared
153
+ # all DavResourcePath objects.
154
+ if self._config is not None:
155
+ self._config._destroy()
156
+
157
+ self._config = DavResourcePathConfig()
158
+
159
+ def client_pool(self) -> DavClientPool:
160
+ """Return the pool of reusable webDAV clients."""
161
+ return self._client_pool
162
+
163
+ def config(self) -> DavResourcePathConfig:
164
+ """Return the configuration settings for all `DavResourcePath`
165
+ objects.
166
+ """
167
+ return self._config
168
+
169
+
170
+ # Convenience object to encapsulate all global objects needed by this module.
171
+ dav_globals: DavGlobals = DavGlobals()
172
+
173
+
174
+ class DavResourcePath(ResourcePath):
175
+ """WebDAV resource.
176
+
177
+ Parameters
178
+ ----------
179
+ uri : `ResourcePathExpression`
180
+ URI to store in object.
181
+ root : `str` or `ResourcePath` or `None`, optional
182
+ Root for relative URIs. Not used in this constructor.
183
+ forceAbsolute : `bool`
184
+ Whether to force absolute URI. A WebDAV URI is always absolute.
185
+ forceDirectory : `bool` or `None`, optional
186
+ Whether this URI represents a directory.
187
+ isTemporary : `bool` or `None`, optional
188
+ Whether this URI represents a temporary resource.
189
+ """
190
+
191
+ def __init__(
192
+ self,
193
+ uri: ResourcePathExpression,
194
+ root: str | ResourcePath | None = None,
195
+ forceAbsolute: bool = True,
196
+ forceDirectory: bool | None = None,
197
+ isTemporary: bool | None = None,
198
+ ) -> None:
199
+ # Build the internal URL we use to talk to the server, which
200
+ # uses "http" or "https" as scheme instead of "dav" or "davs".
201
+ self._internal_url: str = normalize_url(self.geturl())
202
+
203
+ # WebDAV client this path must use to interact with the server.
204
+ self._dav_client: DavClient | None = None
205
+
206
+ # Retrieve the configuration shared by all instances of this class.
207
+ self._config: DavResourcePathConfig = dav_globals.config()
208
+
209
+ # Cached attributes of this file
210
+ self._cached_metadata: DavFileMetadata | None = None
211
+
212
+ @classmethod
213
+ def _fixupPathUri(
214
+ cls,
215
+ parsed: urllib.parse.ParseResult,
216
+ root: ResourcePath | None = None,
217
+ forceAbsolute: bool = False,
218
+ forceDirectory: bool | None = None,
219
+ ) -> tuple[urllib.parse.ParseResult, bool | None]:
220
+ """Correct any issues with the supplied URI.
221
+
222
+ This function ensures that the path of the URI is normalized.
223
+ """
224
+ # Call the superclass' _fixupPathUri.
225
+ parsed, dirLike = super()._fixupPathUri(parsed, forceDirectory=forceDirectory)
226
+
227
+ # Clean the URL's path and ensure dir-like paths end by "/".
228
+ path = normalize_path(parsed.path)
229
+ if dirLike and path != "/":
230
+ path += "/"
231
+
232
+ return parsed._replace(path=path), dirLike
233
+
234
+ @property
235
+ def _client(self) -> DavClient:
236
+ """Return the webDAV client for this resource."""
237
+ # If we already have a client, use it.
238
+ if self._dav_client is not None:
239
+ return self._dav_client
240
+
241
+ # Retrieve the client this resource must use to interact with the
242
+ # server from the global client pool.
243
+ self._dav_client = dav_globals.client_pool().get_client_for_url(self._internal_url)
244
+ return self._dav_client
245
+
246
+ def _stat(self, refresh: bool = False) -> DavFileMetadata:
247
+ """Retrieve metadata about this resource.
248
+
249
+ We cache this resource's metadata to avoid expensive roundtrips to
250
+ the server for each call.
251
+
252
+ Parameters
253
+ ----------
254
+ refresh : `bool`, optional
255
+ If True, metadata is retrieved again from the server even if it
256
+ is already cached.
257
+
258
+ Notes
259
+ -----
260
+ Cached metadata is explicitly invalidated when this resource is
261
+ modified, for instance as a result of calling write(), transfer_from()
262
+ remove(), etc.
263
+ """
264
+ # Caching metadata is a compromise because each roundtrip is
265
+ # relatively expensive and is fragile if this same resource is
266
+ # modified by a different thread or by a different process.
267
+ if refresh or self._cached_metadata is None:
268
+ self._cached_metadata = self._client.stat(self._internal_url)
269
+
270
+ return self._cached_metadata
271
+
272
+ def _invalidate_metatada_cache(self) -> None:
273
+ """Invalidate cached metadata for this resource.
274
+
275
+ This method is intended to be explicitly invoked when a method
276
+ modifies the content of this resource (e.g. write, remove,
277
+ transfer_from).
278
+ """
279
+ self._cached_metadata = None
280
+
281
+ def mkdir(self) -> None:
282
+ """Create the directory resource if it does not already exist."""
283
+ if not self.isdir():
284
+ raise NotADirectoryError(f"Can not create a directory for file-like URI {self}")
285
+
286
+ stat = self._stat()
287
+ if stat.is_dir:
288
+ return
289
+
290
+ if stat.is_file:
291
+ # A file exists at this path.
292
+ raise NotADirectoryError(
293
+ f"Can not create a directory for {self} because a file already exists at that URL"
294
+ )
295
+
296
+ # Target directory does not exist. Create it and its ancestors as
297
+ # needed. We need to test if parent URL is different from self URL,
298
+ # otherwise we could be stuck in a recursive loop
299
+ # where self == parent.
300
+ if self.geturl() != self.parent().geturl():
301
+ self.parent().mkdir()
302
+
303
+ self._client.mkcol(self._internal_url)
304
+ self._invalidate_metatada_cache()
305
+
306
+ def exists(self) -> bool:
307
+ """Check that this resource exists."""
308
+ # Force checking for existence against the server for all the
309
+ # external calls to this method.
310
+ return self._stat(refresh=True).exists
311
+
312
+ def size(self) -> int:
313
+ """Return the size of the remote resource in bytes."""
314
+ if self.isdir():
315
+ return 0
316
+
317
+ stat = self._stat()
318
+ if not stat.exists:
319
+ raise FileNotFoundError(f"No file or directory found at {self}")
320
+
321
+ return stat.size
322
+
323
+ def info(self) -> dict[str, Any]:
324
+ """Return metadata details about this resource."""
325
+ return self._client.info(self._internal_url, name=str(self))
326
+
327
+ def read(self, size: int = -1) -> bytes:
328
+ """Open the resource and return the contents in bytes.
329
+
330
+ Parameters
331
+ ----------
332
+ size : `int`, optional
333
+ The number of bytes to read. Negative or omitted indicates that
334
+ all data should be read.
335
+ """
336
+ # A GET request on a dCache directory returns the contents of the
337
+ # directory in HTML, to be visualized with a browser. This means
338
+ # that we need to check first that this resource is not a directory.
339
+ #
340
+ # Since isdir() only checks that the URL of the resource ends in "/"
341
+ # without actually asking the server, this check is not robust.
342
+ # However, it is a reasonable compromise since it prevents doing
343
+ # an additional roundtrip to the server to retrieve this resource's
344
+ # metadata.
345
+ if self.isdir():
346
+ raise ValueError(f"method read() is not implemented for directory {self}")
347
+
348
+ stat = self._stat()
349
+ if stat.is_dir:
350
+ raise ValueError(f"method read() is not implemented for directory {self}")
351
+ elif not stat.exists:
352
+ raise FileNotFoundError(f"no file found at {self}")
353
+ elif stat.size == 0:
354
+ # This is an empty file.
355
+ return b""
356
+
357
+ if size > 0:
358
+ end_range = min(stat.size, size) - 1
359
+ return self._client.read_range(self._internal_url, start=0, end=end_range)
360
+ else:
361
+ return self._client.read(self._internal_url)
362
+
363
+ def read_range(
364
+ self,
365
+ start: int,
366
+ end: int | None = None,
367
+ check_exists: bool = False,
368
+ headers: dict[str, str] | None = None,
369
+ ) -> bytes:
370
+ """Read the specified range of the resource and return the bytes read.
371
+
372
+ Parameters
373
+ ----------
374
+ start : `int`
375
+ Position of the first byte to read.
376
+ end : `int`, optional
377
+ Position of the last byte to read.
378
+ check_exists : `bool`, optional
379
+ Check the file exists before sending the GET request, which may
380
+ fail if the file does not exist. This is useful when the caller
381
+ has already checked the file exists before doing several
382
+ partial reads, so we want to avoid checking for every call.
383
+ headers : `dict[str, str]`, optional
384
+ Headers to include in the partial GET request.
385
+ """
386
+ if check_exists:
387
+ stat = self._stat()
388
+ if not stat.is_file:
389
+ raise FileNotFoundError(f"No file found at {self}")
390
+
391
+ if stat.size == 0:
392
+ # This is an empty file.
393
+ return b""
394
+
395
+ headers = {} if headers is None else dict(headers)
396
+ headers.update({"Accept-Encoding": "identity"})
397
+ return self._client.read_range(self._internal_url, start=start, end=end, headers=headers)
398
+
399
+ @contextlib.contextmanager
400
+ def _as_local(
401
+ self, multithreaded: bool = True, tmpdir: ResourcePath | None = None
402
+ ) -> Iterator[ResourcePath]:
403
+ """Download object and place in temporary directory.
404
+
405
+ Parameters
406
+ ----------
407
+ multithreaded : `bool`, optional
408
+ If `True` the transfer will be allowed to attempt to improve
409
+ throughput by using parallel download streams. This may of no
410
+ effect if the URI scheme does not support parallel streams or
411
+ if a global override has been applied. If `False` parallel
412
+ streams will be disabled.
413
+ tmpdir : `ResourcePath` or `None`, optional
414
+ Explicit override of the temporary directory to use for remote
415
+ downloads.
416
+
417
+ Returns
418
+ -------
419
+ local_uri : `ResourcePath`
420
+ A URI to a local POSIX file corresponding to a local temporary
421
+ downloaded copy of the resource.
422
+ """
423
+ # We need to ensure that this resource is actually a file. dCache
424
+ # responds with a HTML-formatted content to a HTTP GET request to a
425
+ # directory, which is not what we want.
426
+ stat = self._stat()
427
+ if not stat.is_file:
428
+ raise FileNotFoundError(f"No file found at {self}")
429
+
430
+ if tmpdir is None:
431
+ local_dir, buffer_size = self._config.tmpdir_buffersize
432
+ tmpdir = ResourcePath(local_dir, forceDirectory=True)
433
+ else:
434
+ buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
435
+
436
+ with ResourcePath.temporary_uri(suffix=self.getExtension(), prefix=tmpdir, delete=True) as tmp_uri:
437
+ self._client.download(self._internal_url, tmp_uri.ospath, buffer_size)
438
+ yield tmp_uri
439
+
440
+ def write(self, data: BinaryIO | bytes, overwrite: bool = True) -> None:
441
+ """Write the supplied bytes to the new resource.
442
+
443
+ Parameters
444
+ ----------
445
+ data : `bytes`
446
+ The bytes to write to the resource. The entire contents of the
447
+ resource will be replaced.
448
+ overwrite : `bool`, optional
449
+ If `True` the resource will be overwritten if it exists. Otherwise
450
+ the write will fail.
451
+ """
452
+ if self.isdir():
453
+ raise ValueError(f"Method write() is not implemented for directory {self}")
454
+
455
+ stat = self._stat()
456
+ if stat.is_file and not overwrite:
457
+ raise FileExistsError(f"File {self} exists and overwrite has been disabled")
458
+
459
+ # Create parent directory and upload the data.
460
+ self.parent().mkdir()
461
+ self._client.write(self._internal_url, data)
462
+ self._invalidate_metatada_cache()
463
+
464
+ def remove(self) -> None:
465
+ """Remove the resource.
466
+
467
+ If the resource is a directory, it must be empty otherwise this
468
+ method raises. Removing a non-existent file or directory is not
469
+ considered an error.
470
+ """
471
+ if not self.exists():
472
+ return
473
+
474
+ if self.isdir():
475
+ entries = self._client.read_dir(self._internal_url)
476
+ if len(entries) > 0:
477
+ raise IsADirectoryError(f"directory {self} is not empty")
478
+
479
+ # This resource is a either file or an empty directory, we can remove
480
+ # it.
481
+ self._client.delete(self._internal_url)
482
+ self._invalidate_metatada_cache()
483
+
484
+ def remove_dir(self, recursive: bool = False) -> None:
485
+ """Remove a directory if empty.
486
+
487
+ Parameters
488
+ ----------
489
+ recursive : `bool`
490
+ If `True` recursively remove all files and directories under this
491
+ directory.
492
+
493
+ Notes
494
+ -----
495
+ This method is not present in the superclass.
496
+ """
497
+ if not self.isdir():
498
+ raise NotADirectoryError(f"{self} is not a directory")
499
+
500
+ for root, subdirs, files in self.walk():
501
+ if not recursive and (len(subdirs) > 0 or len(files) > 0):
502
+ raise IsADirectoryError(f"directory {self} is not empty and recursive argument is False")
503
+
504
+ for file in files:
505
+ root.join(file).remove()
506
+
507
+ for subdir in subdirs:
508
+ DavResourcePath(root.join(subdir, forceDirectory=True)).remove_dir(recursive=recursive)
509
+
510
+ # Remove empty top directory
511
+ self.remove()
512
+
513
+ def transfer_from(
514
+ self,
515
+ src: ResourcePath,
516
+ transfer: str = "copy",
517
+ overwrite: bool = False,
518
+ transaction: TransactionProtocol | None = None,
519
+ multithreaded: bool = True,
520
+ ) -> None:
521
+ """Transfer to this URI from another.
522
+
523
+ Parameters
524
+ ----------
525
+ src : `ResourcePath`
526
+ Source URI.
527
+ transfer : `str`
528
+ Mode to use for transferring the resource. Generically there are
529
+ many standard options: copy, link, symlink, hardlink, relsymlink.
530
+ Not all URIs support all modes.
531
+ overwrite : `bool`, optional
532
+ Allow an existing file to be overwritten. Defaults to `False`.
533
+ transaction : `~lsst.resources.utils.TransactionProtocol`, optional
534
+ A transaction object that can (depending on implementation)
535
+ rollback transfers on error. Not guaranteed to be implemented.
536
+ multithreaded : `bool`, optional
537
+ If `True` the transfer will be allowed to attempt to improve
538
+ throughput by using parallel download streams. This may of no
539
+ effect if the URI scheme does not support parallel streams or
540
+ if a global override has been applied. If `False` parallel
541
+ streams will be disabled.
542
+ """
543
+ # Fail early to prevent delays if remote resources are requested.
544
+ if transfer not in self.transferModes:
545
+ raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
546
+
547
+ # Existence checks cost time so do not call this unless we know
548
+ # that debugging is enabled.
549
+ destination_exists = self.exists()
550
+ if log.isEnabledFor(logging.DEBUG):
551
+ log.debug(
552
+ "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
553
+ src,
554
+ src.exists(),
555
+ self,
556
+ destination_exists,
557
+ transfer,
558
+ )
559
+
560
+ # Short circuit immediately if the URIs are identical.
561
+ if self == src:
562
+ log.debug(
563
+ "Target and destination URIs are identical: %s, returning immediately."
564
+ " No further action required.",
565
+ self,
566
+ )
567
+ return
568
+
569
+ if not overwrite and destination_exists:
570
+ raise FileExistsError(f"Destination path {self} already exists.")
571
+
572
+ if transfer == "auto":
573
+ transfer = self.transferDefault
574
+
575
+ # We can use webDAV 'COPY' or 'MOVE' if both the current and source
576
+ # resources are located in the same server.
577
+ if isinstance(src, type(self)) and self.root_uri() == src.root_uri():
578
+ log.debug("Transfer from %s to %s directly", src, self)
579
+ return (
580
+ self._move_from(src, overwrite=overwrite)
581
+ if transfer == "move"
582
+ else self._copy_from(src, overwrite=overwrite)
583
+ )
584
+
585
+ # For resources of different classes we can perform the copy or move
586
+ # operation by downloading to a local file and uploading to the
587
+ # destination.
588
+ self._copy_via_local(src)
589
+
590
+ # This was an explicit move, try to remove the source.
591
+ if transfer == "move":
592
+ src.remove()
593
+
594
+ def _copy_via_local(self, source: ResourcePath) -> None:
595
+ """Replace the contents of this resource with the contents of a remote
596
+ resource by using a local temporary file.
597
+
598
+ Parameters
599
+ ----------
600
+ source : `ResourcePath`
601
+ The source of the contents to copy to `self`.
602
+ """
603
+ with source.as_local() as local_uri:
604
+ log.debug("Transfer from %s to %s via local file %s", source, self, local_uri)
605
+ with open(local_uri.ospath, "rb") as f:
606
+ self.write(data=f)
607
+
608
+ self._invalidate_metatada_cache()
609
+
610
+ def _copy_from(self, source: DavResourcePath, overwrite: bool = False) -> None:
611
+ """Copy the contents of `source` to this resource. `source` must
612
+ be a file.
613
+ """
614
+ # Copy is only supported for files, not directories.
615
+ if source.isdir():
616
+ raise ValueError(f"Copy is not supported for directory {source}")
617
+
618
+ src_stat = source._stat()
619
+ if not src_stat.is_file:
620
+ raise FileNotFoundError(f"No such file {source}")
621
+
622
+ dst_stat = self._stat()
623
+ if dst_stat.is_dir:
624
+ raise ValueError(f"Copy is not supported because destination {self} is a directory")
625
+
626
+ self.parent().mkdir()
627
+ self._client.copy(source._internal_url, self._internal_url, overwrite)
628
+ self._invalidate_metatada_cache()
629
+
630
+ def _move_from(self, source: DavResourcePath, overwrite: bool = False) -> None:
631
+ """Send a MOVE webDAV request to replace the contents of this resource
632
+ with the contents of another resource located in the same server.
633
+
634
+ Parameters
635
+ ----------
636
+ source : `DavResourcePath`
637
+ The source of the contents to move to `self`.
638
+ """
639
+ # Move is only supported for files, not directories.
640
+ if source.isdir():
641
+ raise ValueError(f"Move is not supported for directory {source}")
642
+
643
+ src_stat = source._stat()
644
+ if not src_stat.is_file:
645
+ raise FileNotFoundError(f"No such file {source}")
646
+
647
+ dst_stat = self._stat()
648
+ if dst_stat.is_dir:
649
+ raise ValueError(f"Move is not supported for destination directory {self}")
650
+
651
+ # Create the destination's parent directory, move the source to
652
+ # this resource and invalidate caches for both.
653
+ self.parent().mkdir()
654
+ self._client.move(source._internal_url, self._internal_url, overwrite)
655
+ self._invalidate_metatada_cache()
656
+ source._invalidate_metatada_cache()
657
+
658
+ def walk(
659
+ self, file_filter: str | re.Pattern | None = None
660
+ ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
661
+ """Walk the directory tree returning matching files and directories.
662
+
663
+ Parameters
664
+ ----------
665
+ file_filter : `str` or `re.Pattern`, optional
666
+ Regex to filter out files from the list before it is returned.
667
+
668
+ Yields
669
+ ------
670
+ dirpath : `ResourcePath`
671
+ Current directory being examined.
672
+ dirnames : `list` of `str`
673
+ Names of subdirectories within dirpath.
674
+ filenames : `list` of `str`
675
+ Names of all the files within dirpath.
676
+ """
677
+ if not self.isdir():
678
+ raise ValueError("Can not walk a non-directory URI")
679
+
680
+ # We must return no entries for non-existent directories.
681
+ if not self._stat().exists:
682
+ return
683
+
684
+ # Retrieve the entries in this directory
685
+ entries = self._client.read_dir(self._internal_url)
686
+ files = [e.name for e in entries if e.is_file]
687
+ subdirs = [e.name for e in entries if e.is_dir]
688
+
689
+ # Filter files
690
+ if isinstance(file_filter, str):
691
+ file_filter = re.compile(file_filter)
692
+
693
+ if file_filter is not None:
694
+ files = [f for f in files if file_filter.search(f)]
695
+
696
+ if not subdirs and not files:
697
+ return
698
+ else:
699
+ yield type(self)(self, forceAbsolute=False, forceDirectory=True), subdirs, files
700
+
701
+ for subdir in subdirs:
702
+ new_uri = self.join(subdir, forceDirectory=True)
703
+ yield from new_uri.walk(file_filter)
704
+
705
+ def generate_presigned_get_url(self, *, expiration_time_seconds: int) -> str:
706
+ """Return a pre-signed URL that can be used to retrieve this resource
707
+ using an HTTP GET without supplying any access credentials.
708
+
709
+ Parameters
710
+ ----------
711
+ expiration_time_seconds : `int`
712
+ Number of seconds until the generated URL is no longer valid.
713
+
714
+ Returns
715
+ -------
716
+ url : `str`
717
+ HTTP URL signed for GET.
718
+ """
719
+ return self._client.generate_presigned_get_url(self._internal_url, expiration_time_seconds)
720
+
721
+ def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
722
+ """Return a pre-signed URL that can be used to upload a file to this
723
+ path using an HTTP PUT without supplying any access credentials.
724
+
725
+ Parameters
726
+ ----------
727
+ expiration_time_seconds : `int`
728
+ Number of seconds until the generated URL is no longer valid.
729
+
730
+ Returns
731
+ -------
732
+ url : `str`
733
+ HTTP URL signed for PUT.
734
+ """
735
+ return self._client.generate_presigned_put_url(self._internal_url, expiration_time_seconds)
736
+
737
+ def to_fsspec(self) -> tuple[DavFileSystem, str]:
738
+ """Return an abstract file system and path that can be used by fsspec.
739
+
740
+ Returns
741
+ -------
742
+ fs : `fsspec.spec.AbstractFileSystem`
743
+ A file system object suitable for use with the returned path.
744
+ path : `str`
745
+ A path that can be opened by the file system object.
746
+ """
747
+ if fsspec is None or not self._client._config.enable_fsspec:
748
+ raise ImportError("fsspec is not available")
749
+
750
+ path: str = self.path
751
+ return DavFileSystem(self, path), path
752
+
753
+ @contextlib.contextmanager
754
+ def _openImpl(
755
+ self,
756
+ mode: str = "r",
757
+ *,
758
+ encoding: str | None = None,
759
+ ) -> Iterator[ResourceHandleProtocol]:
760
+ if self.isdir():
761
+ raise OSError(f"open is not implemented for directory {self}")
762
+
763
+ if mode in ("rb", "r") and self._client.accepts_ranges(self._internal_url):
764
+ stat: DavFileMetadata = self._stat(refresh=True)
765
+ if not stat.exists:
766
+ raise FileNotFoundError(f"No such file {self}")
767
+
768
+ if not stat.is_file:
769
+ raise OSError(f"open is not implemented for directory {self}")
770
+
771
+ handle: ResourceHandleProtocol = DavReadResourceHandle(mode, log, self, stat)
772
+ if mode == "r":
773
+ # cast because the protocol is compatible, but does not have
774
+ # BytesIO in the inheritance tree
775
+ yield io.TextIOWrapper(cast(Any, handle), encoding=encoding)
776
+ else:
777
+ yield handle
778
+ else:
779
+ with super()._openImpl(mode, encoding=encoding) as handle:
780
+ yield handle
781
+
782
+
783
+ class DavFileSystem(AbstractFileSystem):
784
+ """Minimal fsspec-compatible read-only file system which contains a single
785
+ file.
786
+
787
+ Parameters
788
+ ----------
789
+ uri : `DavResourcePath`
790
+ URI of the single resource contained in the file system.
791
+
792
+ path : `str`
793
+ Path within the file system of the file.
794
+ """
795
+
796
+ def __init__(self, uri: DavResourcePath, path: str):
797
+ self._uri: DavResourcePath = uri
798
+ self._path: str = path
799
+
800
+ def info(self, path: str, **kwargs: Any) -> dict[str, Any]:
801
+ if path != self._path:
802
+ raise FileNotFoundError(path)
803
+
804
+ return {
805
+ "name": path,
806
+ "size": self._uri.size(),
807
+ "type": "file",
808
+ }
809
+
810
+ def ls(self, path: str, detail: bool = True, **kwargs: Any) -> list[str] | list[dict[str, str]]:
811
+ if path != self._path:
812
+ raise FileNotFoundError(path)
813
+
814
+ return list(self.info(path)) if detail else list(path)
815
+
816
+ def modified(self, path: str) -> datetime.datetime:
817
+ if path != self._path:
818
+ raise FileNotFoundError(path)
819
+
820
+ return self._uri._stat().last_modified
821
+
822
+ def size(self, path: str) -> int:
823
+ if path != self._path:
824
+ raise FileNotFoundError(path)
825
+
826
+ return self._uri.size()
827
+
828
+ def isfile(self, path: str) -> bool:
829
+ return path == self._path
830
+
831
+ def isdir(self, path: str) -> bool:
832
+ return False
833
+
834
+ def exists(self, path: str, **kwargs: Any) -> bool:
835
+ return path == self._path
836
+
837
+ def open(
838
+ self,
839
+ path: str,
840
+ mode: str = "rb",
841
+ encoding: str | None = None,
842
+ block_size: int | None = None,
843
+ cache_options: dict[Any, Any] | None = None,
844
+ compression: str | None = None,
845
+ **kwargs: Any,
846
+ ) -> ResourceHandleProtocol[Any]:
847
+ if path != self._path:
848
+ raise FileNotFoundError(path)
849
+
850
+ with self._uri.open(mode=mode, encoding=encoding) as handle:
851
+ return handle
852
+
853
+ @property
854
+ def fsid(self) -> Any:
855
+ raise NotImplementedError
856
+
857
+ def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None:
858
+ raise NotImplementedError
859
+
860
+ def makedirs(self, path: str, exist_ok: bool = False) -> None:
861
+ raise NotImplementedError
862
+
863
+ def rmdir(self, path: str) -> None:
864
+ raise NotImplementedError
865
+
866
+ def walk(
867
+ self,
868
+ path: str,
869
+ maxdepth: int | None = None,
870
+ topdown: bool = True,
871
+ on_error: str = "omit",
872
+ **kwargs: Any,
873
+ ) -> None:
874
+ raise NotImplementedError
875
+
876
+ def find(
877
+ self,
878
+ path: str,
879
+ maxdepth: int | None = None,
880
+ withdirs: bool = False,
881
+ detail: bool = False,
882
+ **kwargs: Any,
883
+ ) -> None:
884
+ raise NotImplementedError
885
+
886
+ def du(
887
+ self,
888
+ path: str,
889
+ total: bool = True,
890
+ maxdepth: int | None = None,
891
+ withdirs: bool = False,
892
+ **kwargs: Any,
893
+ ) -> None:
894
+ raise NotImplementedError
895
+
896
+ def glob(self, path: str, maxdepth: int | None = None, **kwargs: Any) -> None:
897
+ raise NotImplementedError
898
+
899
+ def rm_file(self, path: str) -> None:
900
+ raise NotImplementedError
901
+
902
+ def rm(self, path: str, recursive: bool = False, maxdepth: int | None = None) -> None:
903
+ raise NotImplementedError
904
+
905
+ def touch(self, path: str, truncate: bool = True, **kwargs: Any) -> None:
906
+ raise NotImplementedError
907
+
908
+ def ukey(self, path: str) -> None:
909
+ raise NotImplementedError
910
+
911
+ def created(self, path: str) -> None:
912
+ raise NotImplementedError