lsst-resources 29.0.0rc7__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2659 @@
1
+ # This file is part of lsst-resources.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # Use of this source code is governed by a 3-clause BSD-style
10
+ # license that can be found in the LICENSE file.
11
+
12
+ from __future__ import annotations
13
+
14
+ import base64
15
+ import enum
16
+ import io
17
+ import json
18
+ import logging
19
+ import os
20
+ import posixpath
21
+ import random
22
+ import re
23
+ import stat
24
+ import threading
25
+ import time
26
+ import xml.etree.ElementTree as eTree
27
+ from datetime import datetime
28
+ from http import HTTPStatus
29
+ from typing import Any, BinaryIO
30
+
31
+ try:
32
+ import fsspec
33
+ from fsspec.spec import AbstractFileSystem
34
+ except ImportError:
35
+ fsspec = None
36
+ AbstractFileSystem = type
37
+
38
+ import yaml
39
+ from astropy import units as u
40
+ from urllib3 import PoolManager
41
+ from urllib3.response import HTTPResponse
42
+ from urllib3.util import Retry, Timeout, Url, parse_url
43
+
44
+ from lsst.utils.timer import time_this
45
+
46
+ # Use the same logger than `dav.py`.
47
+ log = logging.getLogger(f"""{__name__.replace(".davutils", ".dav")}""")
48
+
49
+
50
+ def normalize_path(path: str | None) -> str:
51
+ """Normalize a path intended to be part of a URL.
52
+
53
+ A path of the form "///a/b/c///../d/e/" would be normalized as "/a/b/d/e".
54
+ The returned path is always absolute, i.e. starts by "/" and never
55
+ ends by "/" except when the path is exactly "/" and does not contain
56
+ "." nor "..". It does not contain consecutive "/" either.
57
+
58
+ Parameters
59
+ ----------
60
+ path : `str`, optional
61
+ Path to normalize (e.g., '/path/to/..///normalize/').
62
+
63
+ Returns
64
+ -------
65
+ url : `str`
66
+ Normalized URL (e.g., '/path/normalize').
67
+ """
68
+ return "/" if not path else "/" + posixpath.normpath(path).lstrip("/")
69
+
70
+
71
+ def normalize_url(url: str, preserve_scheme: bool = False, preserve_path: bool = True) -> str:
72
+ """Normalize a URL so that scheme be 'http' or 'https' and the URL path
73
+ is normalized.
74
+
75
+ Parameters
76
+ ----------
77
+ url : `str`
78
+ URL to normalize (e.g., 'davs://example.org:1234///path/to//../dir/').
79
+ preserve_scheme : `bool`
80
+ If True the scheme of `url` will be preserved. Otherwise the scheme
81
+ of the returned normalized URL will be 'http' or 'https'.
82
+ preserve_path : `bool`
83
+ If True, the path of `url` will be preserved in the returned
84
+ normalized URL, otherwise, the returned URL will have '/' as path.
85
+
86
+ Returns
87
+ -------
88
+ url : `str`
89
+ Normalized URL (e.g. 'https://example.org:1234/path/dir').
90
+ """
91
+ parsed = parse_url(url)
92
+ if parsed.scheme is None:
93
+ scheme = "http"
94
+ else:
95
+ scheme = parsed.scheme if preserve_scheme else parsed.scheme.replace("dav", "http")
96
+ path = normalize_path(parsed.path) if preserve_path else "/"
97
+ return Url(scheme=scheme, host=parsed.host, port=parsed.port, path=path).url
98
+
99
+
100
+ class DavConfig:
101
+ """Configurable settings a webDAV client must use when interacting with a
102
+ particular storage endpoint.
103
+
104
+ Parameters
105
+ ----------
106
+ config : `dict[str, str]`
107
+ Dictionary of configurable settings for the webdav endpoint which
108
+ base URL is `config["base_url"]`.
109
+
110
+ For instance, if `config["base_url"]` is
111
+
112
+ "davs://webdav.example.org:1234/"
113
+
114
+ any object of class `DavResourcePath` like
115
+
116
+ "davs://webdav.example.org:1234/path/to/any/file"
117
+
118
+ will use the settings in this configuration to configure its client.
119
+ """
120
+
121
+ # Timeout in seconds to establish a network connection with the remote
122
+ # server.
123
+ DEFAULT_TIMEOUT_CONNECT: float = 10.0
124
+
125
+ # Timeout in seconds to read the response to a request sent to a server.
126
+ # This is total time for reading both the headers and the response body.
127
+ # It must be large enough to allow for upload and download of files
128
+ # of typical size the webdav client supports.
129
+ DEFAULT_TIMEOUT_READ: float = 300.0
130
+
131
+ # Maximum number of network connections to persist against each one of
132
+ # the hosts in the frontend and backend server pools.
133
+ # Servers in the frontend pool typically respond to requests such as
134
+ # OPTIONS, PROPFIND, MKCOL, etc.
135
+ #
136
+ # Frontend servers redirect to backend servers to respond to GET and PUT
137
+ # requests (e.g. dCache) but sometimes also for metadata requests such as
138
+ # PROPFIND or HEAD (e.g. XRootD).
139
+ DEFAULT_PERSISTENT_CONNECTIONS_FRONTEND: int = 50
140
+ DEFAULT_PERSISTENT_CONNECTIONS_BACKEND: int = 100
141
+
142
+ # Size of the buffer (in mebibytes, i.e. 1024*1024 bytes) the webdav
143
+ # client of this endpoint will use when sending requests and receiving
144
+ # responses.
145
+ DEFAULT_BUFFER_SIZE: int = 5
146
+
147
+ # Number of times to retry requests before failing. Retry happens only
148
+ # under certain conditions.
149
+ DEFAULT_RETRIES: int = 3
150
+
151
+ # Minimal and maximal retry backoff (in seconds) for the client to compute
152
+ # the wait time before retrying a request.
153
+ # A value in this interval is randomly selected as the backoff factor
154
+ # every time a request is retried.
155
+ DEFAULT_RETRY_BACKOFF_MIN: float = 1.0
156
+ DEFAULT_RETRY_BACKOFF_MAX: float = 3.0
157
+
158
+ # Path to a directory or certificate bundle file where the certificates
159
+ # of the trusted certificate authorities can be found.
160
+ # Those certificates will be used by the client of the webdav endpoint
161
+ # to verify the server's host certificate.
162
+ # If None, the certificates trusted by the system are used.
163
+ DEFAULT_TRUSTED_AUTHORITIES: str | None = None
164
+
165
+ # Path to the client certificate and associated private key the webdav
166
+ # client must present to the server for authentication purposes.
167
+ # If None, no client certificate is presented.
168
+ DEFAULT_USER_CERT: str | None = None
169
+ DEFAULT_USER_KEY: str | None = None
170
+
171
+ # Token the webdav client must sent to the server for authentication
172
+ # purposes. The token may be the value of the token itself or the path
173
+ # to a file where the token can be found.
174
+ DEFAULT_TOKEN: str | None = None
175
+
176
+ # Default checksum algorithm to request the server to compute on every
177
+ # file upload. Not al servers support this.
178
+ # See RFC 3230 for details.
179
+ DEFAULT_REQUEST_CHECKSUM: str | None = None
180
+
181
+ # If this option is set to True, the webdav client can return objects
182
+ # compliant to the fsspec specification.
183
+ # See: https://filesystem-spec.readthedocs.io
184
+ DEFAULT_ENABLE_FSSPEC: bool = True
185
+
186
+ # If this option is set to True, memory usage is computed and reported
187
+ # when executing in debug mode. Computing memory usage is costly, so only
188
+ # set this when debugging.
189
+ DEFAULT_COLLECT_MEMORY_USAGE: bool = False
190
+
191
+ # Accepted checksum algorithms. Must be lowercase.
192
+ ACCEPTED_CHECKSUMS: list[str] = ["adler32", "md5", "sha-256", "sha-512"]
193
+
194
+ def __init__(self, config: dict | None = None) -> None:
195
+ if config is None:
196
+ config = {}
197
+
198
+ if (base_url := config.get("base_url")) is None:
199
+ self._base_url = "_default_"
200
+ else:
201
+ self._base_url = normalize_url(base_url, preserve_path=False)
202
+
203
+ self._timeout_connect: float = float(config.get("timeout_connect", DavConfig.DEFAULT_TIMEOUT_CONNECT))
204
+ self._timeout_read: float = float(config.get("timeout_read", DavConfig.DEFAULT_TIMEOUT_READ))
205
+ self._persistent_connections_frontend: int = int(
206
+ config.get(
207
+ "persistent_connections_frontend",
208
+ DavConfig.DEFAULT_PERSISTENT_CONNECTIONS_FRONTEND,
209
+ )
210
+ )
211
+ self._persistent_connections_backend: int = int(
212
+ config.get(
213
+ "persistent_connections_backend",
214
+ DavConfig.DEFAULT_PERSISTENT_CONNECTIONS_BACKEND,
215
+ )
216
+ )
217
+ self._buffer_size: int = 1_048_576 * int(config.get("buffer_size", DavConfig.DEFAULT_BUFFER_SIZE))
218
+ self._retries: int = int(config.get("retries", DavConfig.DEFAULT_RETRIES))
219
+ self._retry_backoff_min: float = float(
220
+ config.get("retry_backoff_min", DavConfig.DEFAULT_RETRY_BACKOFF_MIN)
221
+ )
222
+ self._retry_backoff_max: float = float(
223
+ config.get("retry_backoff_max", DavConfig.DEFAULT_RETRY_BACKOFF_MAX)
224
+ )
225
+ self._trusted_authorities: str | None = expand_vars(
226
+ config.get("trusted_authorities", DavConfig.DEFAULT_TRUSTED_AUTHORITIES)
227
+ )
228
+ self._user_cert: str | None = expand_vars(config.get("user_cert", DavConfig.DEFAULT_USER_CERT))
229
+ self._user_key: str | None = expand_vars(config.get("user_key", DavConfig.DEFAULT_USER_KEY))
230
+ self._token: str | None = expand_vars(config.get("token", DavConfig.DEFAULT_TOKEN))
231
+ self._enable_fsspec: bool = config.get("enable_fsspec", DavConfig.DEFAULT_ENABLE_FSSPEC)
232
+ self._collect_memory_usage: bool = config.get(
233
+ "collect_memory_usage", DavConfig.DEFAULT_COLLECT_MEMORY_USAGE
234
+ )
235
+ self._request_checksum: str | None = config.get(
236
+ "request_checksum", DavConfig.DEFAULT_REQUEST_CHECKSUM
237
+ )
238
+ if self._request_checksum is not None:
239
+ self._request_checksum = self._request_checksum.lower()
240
+ if self._request_checksum not in DavConfig.ACCEPTED_CHECKSUMS:
241
+ raise ValueError(
242
+ f"""Value for checksum algorithm {self._request_checksum} for storage endpoint """
243
+ f"""{self._base_url} is not among the accepted values: {DavConfig.ACCEPTED_CHECKSUMS}"""
244
+ )
245
+
246
+ @property
247
+ def base_url(self) -> str:
248
+ return self._base_url
249
+
250
+ @property
251
+ def timeout_connect(self) -> float:
252
+ return self._timeout_connect
253
+
254
+ @property
255
+ def timeout_read(self) -> float:
256
+ return self._timeout_read
257
+
258
+ @property
259
+ def persistent_connections_frontend(self) -> int:
260
+ return self._persistent_connections_frontend
261
+
262
+ @property
263
+ def persistent_connections_backend(self) -> int:
264
+ return self._persistent_connections_backend
265
+
266
+ @property
267
+ def buffer_size(self) -> int:
268
+ return self._buffer_size
269
+
270
+ @property
271
+ def retries(self) -> int:
272
+ return self._retries
273
+
274
+ @property
275
+ def retry_backoff_min(self) -> float:
276
+ return self._retry_backoff_min
277
+
278
+ @property
279
+ def retry_backoff_max(self) -> float:
280
+ return self._retry_backoff_max
281
+
282
+ @property
283
+ def trusted_authorities(self) -> str | None:
284
+ return self._trusted_authorities
285
+
286
+ @property
287
+ def token(self) -> str | None:
288
+ return self._token
289
+
290
+ @property
291
+ def request_checksum(self) -> str | None:
292
+ return self._request_checksum
293
+
294
+ @property
295
+ def user_cert(self) -> str | None:
296
+ return self._user_cert
297
+
298
+ @property
299
+ def user_key(self) -> str | None:
300
+ # If no user certificate was specified in the configuration,
301
+ # ignore the private key, even if it was provided.
302
+ if self._user_cert is None:
303
+ return None
304
+
305
+ # If we have a user certificate but not a private key, assume the
306
+ # private key is included in the same file as the user certificate.
307
+ # That is typically the case when using a X.509 grid proxy as
308
+ # client certificate.
309
+ return self._user_cert if self._user_key is None else self._user_key
310
+
311
+ @property
312
+ def enable_fsspec(self) -> bool:
313
+ return self._enable_fsspec
314
+
315
+ @property
316
+ def collect_memory_usage(self) -> bool:
317
+ return self._collect_memory_usage
318
+
319
+
320
+ class DavConfigPool:
321
+ """Registry of configurable settings for all known webDAV endpoints.
322
+
323
+ Parameters
324
+ ----------
325
+ filename : `list` [ `str` ]
326
+ List of environment variables or file names to load the configuration
327
+ from. The first file found in the list will be read and the
328
+ configuration settings for all webDAV endpoints will be extracted
329
+ from it. Other files will be ignored.
330
+
331
+ Each component of `filenames` can be an environment variable or
332
+ the path of a file which itself can include an environment variable,
333
+ e.g. '$HOME/path/to/config.yaml'.
334
+
335
+ The configuration file is a YAML file with the structure below:
336
+
337
+ - base_url: "davs://webdav1.example.org:1234/"
338
+ persistent_connections_frontend: 10
339
+ persistent_connections_backend: 100
340
+ timeout_connect: 20.0
341
+ timeout_read: 120.0
342
+ retries: 3
343
+ retry_backoff_min: 1.0
344
+ retry_backoff_max: 3.0
345
+ user_cert: "${X509_USER_PROXY}"
346
+ user_key: "${X509_USER_PROXY}"
347
+ token: "/path/to/bearer/token/file"
348
+ trusted_authorities: "/etc/grid-security/certificates"
349
+ buffer_size: 5
350
+ enable_fsspec: false
351
+ request_checksum: "md5"
352
+ collect_memory_usage: false
353
+
354
+ - base_url: "davs://webdav2.example.org:1234/"
355
+ persistent_connections_frontend: 5
356
+ ...
357
+
358
+ All settings are optional. If no settings are found in the
359
+ configuration file for a particular webDAV endpoint, sensible
360
+ defaults will be used.
361
+
362
+ There is only a single instance of this class. This thead-safe
363
+ singleton is intended to be initialized when the module is imported
364
+ the first time.
365
+ """
366
+
367
+ _instance = None
368
+ _lock = threading.Lock()
369
+
370
+ def __new__(cls, filename: str | None = None) -> DavConfigPool:
371
+ if cls._instance is None:
372
+ with cls._lock:
373
+ if cls._instance is None:
374
+ cls._instance = super().__new__(cls)
375
+
376
+ return cls._instance
377
+
378
+ def __init__(self, filename: str | None = None) -> None:
379
+ # Create a default configuration. This configuration is
380
+ # used when a URL doest not match any of the endpoints in the
381
+ # configuration.
382
+ self._default_config: DavConfig = DavConfig()
383
+
384
+ # The key of this dictionary is the URL of the webDAV endpoint,
385
+ # e.g. "davs://host.example.org:1234/"
386
+ self._configs: dict[str, DavConfig] = {}
387
+
388
+ # Load the configuration from the file we have been provided with,
389
+ # if any.
390
+ if filename is None:
391
+ return
392
+
393
+ # filename can be the name of an environment variable or a path.
394
+ # A path can include environment variables
395
+ # (e.g. "$HOME/path/to/config.yaml") or "~"
396
+ # (e.g. "~/path/to/config.yaml")
397
+ if (filename := os.getenv(filename)) is not None:
398
+ # Expand environment variables and '~' in the file name, if any.
399
+ filename = os.path.expandvars(filename)
400
+ filename = os.path.expanduser(filename)
401
+ with open(filename) as file:
402
+ for config_item in yaml.safe_load(file):
403
+ config = DavConfig(config_item)
404
+ if config.base_url not in self._configs:
405
+ self._configs[config.base_url] = config
406
+ else:
407
+ # We already have a configuration for the same
408
+ # endpoint. That is likely a human error in
409
+ # the configuration file.
410
+ raise ValueError(
411
+ f"""configuration file {filename} contains two configurations for """
412
+ f"""endpoint {config.base_url}"""
413
+ )
414
+
415
+ def get_config_for_url(self, url: str) -> DavConfig:
416
+ """Return the configuration to use a webDAV client when interacting
417
+ with the server which hosts the resource at `url`.
418
+
419
+ Parameters
420
+ ----------
421
+ url : `str`
422
+ URL for which to obtain a configuration.
423
+ """
424
+ # Select the configuration for the endpoint of the provided URL.
425
+ normalized_url: str = normalize_url(url, preserve_path=False)
426
+ if (config := self._configs.get(normalized_url)) is not None:
427
+ return config
428
+
429
+ # No config was found for the specified URL. Use the default.
430
+ return self._default_config
431
+
432
+ def _destroy(self) -> None:
433
+ """Destroy this class singleton instance.
434
+
435
+ Helper method to be used in tests to reset global configuration.
436
+ """
437
+ with DavConfigPool._lock:
438
+ DavConfigPool._instance = None
439
+
440
+
441
+ def make_retry(config: DavConfig) -> Retry:
442
+ """Create a ``urllib3.util.Retry`` object from settings in `config`.
443
+
444
+ Parameters
445
+ ----------
446
+ config : `DavConfig`
447
+ Configurable settings for a webDAV storage endpoint.
448
+
449
+ Returns
450
+ -------
451
+ retry : `urllib3.util.Retry`
452
+ Retry object to he used when creating a ``urllib3.PoolManager``.
453
+ """
454
+ backoff_min: float = config.retry_backoff_min
455
+ backoff_max: float = config.retry_backoff_max
456
+ retry = Retry(
457
+ # Total number of retries to allow. Takes precedence over other
458
+ # counts.
459
+ total=2 * config.retries,
460
+ # How many connection-related errors to retry on.
461
+ connect=config.retries,
462
+ # How many times to retry on read errors.
463
+ read=config.retries,
464
+ # Backoff factor to apply between attempts after the second try
465
+ # (seconds). Compute a random jitter to prevent all the clients which
466
+ # started at the same time (even on different hosts) to overwhelm the
467
+ # server by sending requests at the same time.
468
+ backoff_factor=backoff_min + (backoff_max - backoff_min) * random.random(),
469
+ # How many times to retry on bad status codes.
470
+ status=config.retries,
471
+ # Set of uppercased HTTP method verbs that we should retry on.
472
+ # We only automatically retry idempotent requests.
473
+ allowed_methods=frozenset(
474
+ [
475
+ "COPY",
476
+ "DELETE",
477
+ "GET",
478
+ "HEAD",
479
+ "MKCOL",
480
+ "OPTIONS",
481
+ "PROPFIND",
482
+ "PUT",
483
+ ]
484
+ ),
485
+ # HTTP status codes that we should force a retry on.
486
+ status_forcelist=frozenset(
487
+ [
488
+ HTTPStatus.TOO_MANY_REQUESTS, # 429
489
+ HTTPStatus.INTERNAL_SERVER_ERROR, # 500
490
+ HTTPStatus.BAD_GATEWAY, # 502
491
+ HTTPStatus.SERVICE_UNAVAILABLE, # 503
492
+ HTTPStatus.GATEWAY_TIMEOUT, # 504
493
+ ]
494
+ ),
495
+ # Whether to respect "Retry-After" header on status codes defined
496
+ # above.
497
+ respect_retry_after_header=True,
498
+ )
499
+ return retry
500
+
501
+
502
+ class DavClientPool:
503
+ """Container of reusable webDAV clients, each one specifically configured
504
+ to talk to a single storage endpoint.
505
+
506
+ Parameters
507
+ ----------
508
+ config_pool : `DavConfigPool`
509
+ Pool of all known webDAV client configurations.
510
+
511
+ Notes
512
+ -----
513
+ There is a single instance of this class. This thead-safe singleton is
514
+ intended to be initialized when the module is imported the first time.
515
+ """
516
+
517
+ _instance = None
518
+ _lock = threading.Lock()
519
+
520
+ def __new__(cls, config_pool: DavConfigPool) -> DavClientPool:
521
+ if cls._instance is None:
522
+ with cls._lock:
523
+ if cls._instance is None:
524
+ cls._instance = super().__new__(cls)
525
+
526
+ return cls._instance
527
+
528
+ def __init__(self, config_pool: DavConfigPool) -> None:
529
+ self._config_pool: DavConfigPool = config_pool
530
+
531
+ # The key of this dictionnary is a path-stripped URL of the form
532
+ # "davs://host.example.org:1234/". The value is a reusable
533
+ # DavClient to interact with that endpoint.
534
+ self._clients: dict[str, DavClient] = {}
535
+
536
+ def get_client_for_url(self, url: str) -> DavClient:
537
+ """Return a client for interacting with the endpoint where `url`
538
+ is hosted.
539
+
540
+ Parameters
541
+ ----------
542
+ url : `str`
543
+ URL for which to obtain a client.
544
+
545
+ Notes
546
+ -----
547
+ The returned client is thread-safe. If a client for that endpoint
548
+ already exists it is reused, otherwise a new client is created
549
+ with the appropriate configuration for interacting with the storage
550
+ endpoint.
551
+ """
552
+ # If we already have a client for this endpoint reuse it.
553
+ url = normalize_url(url, preserve_path=False)
554
+ if (client := self._clients.get(url)) is not None:
555
+ return client
556
+
557
+ # No client for this endpoint was found. Create a new one and save it
558
+ # for serving subsequent requests.
559
+ with DavClientPool._lock:
560
+ # If another client was created in the meantime by another thread
561
+ # reuse it.
562
+ if (client := self._clients.get(url)) is not None:
563
+ return client
564
+
565
+ config: DavConfig = self._config_pool.get_config_for_url(url)
566
+ self._clients[url] = self._make_client(url, config)
567
+
568
+ return self._clients[url]
569
+
570
+ def _make_client(self, url: str, config: DavConfig) -> DavClient:
571
+ """Make a webDAV client for interacting with the server at `url`."""
572
+ # Check the server implements webDAV protocol and retrieve its
573
+ # identity so that we can build a client for that specific
574
+ # server implementation.
575
+ client = DavClient(url, config)
576
+ server_details = client.get_server_details(url)
577
+ server_id = server_details.get("Server", None)
578
+ accepts_ranges: bool | str | None = server_details.get("Accept-Ranges", None)
579
+ if accepts_ranges is not None:
580
+ accepts_ranges = accepts_ranges == "bytes"
581
+
582
+ if server_id is None:
583
+ # Create a generic webDAV client
584
+ return DavClient(url, config, accepts_ranges)
585
+
586
+ if server_id.startswith("dCache/"):
587
+ # Create a client for a dCache webDAV server
588
+ return DavClientDCache(url, config, accepts_ranges)
589
+ elif server_id.startswith("XrootD/"):
590
+ # Create a client for a XrootD webDAV server
591
+ return DavClientXrootD(url, config, accepts_ranges)
592
+ else:
593
+ # Return a generic webDAV client
594
+ return DavClient(url, config, accepts_ranges)
595
+
596
+ def _destroy(self) -> None:
597
+ """Destroy this class singleton instance.
598
+
599
+ Helper method to be used in tests to reset global configuration.
600
+ """
601
+ with DavClientPool._lock:
602
+ DavClientPool._instance = None
603
+
604
+
605
+ class DavClient:
606
+ """WebDAV client, configured to talk to a single storage endpoint.
607
+
608
+ Instances of this class are thread-safe.
609
+
610
+ Parameters
611
+ ----------
612
+ url : `str`
613
+ Root URL of the storage endpoint (e.g.
614
+ "https://host.example.org:1234/").
615
+ config : `DavConfig`
616
+ Configuration to initialize this client.
617
+ accepts_ranges : `bool` | `None`
618
+ Indicate whether the remote server accepts the ``Range`` header in GET
619
+ requests.
620
+ """
621
+
622
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
623
+ # Lock to protect this client fields from concurrent modification.
624
+ self._lock = threading.Lock()
625
+
626
+ # Configuration for the storage endpoint.
627
+ self._config: DavConfig = config
628
+
629
+ # Prepare the trusted authorities certificates
630
+ ca_certs, ca_cert_dir = None, None
631
+ if self._config.trusted_authorities is not None:
632
+ if os.path.isdir(self._config.trusted_authorities):
633
+ ca_cert_dir = self._config.trusted_authorities
634
+ elif os.path.isfile(self._config.trusted_authorities):
635
+ ca_certs = self._config.trusted_authorities
636
+ else:
637
+ raise FileNotFoundError(
638
+ f"Trusted authorities file or directory {self._config.trusted_authorities} does not exist"
639
+ )
640
+
641
+ # If a token was specified for this endpoint, prefer it as the
642
+ # authentication method, instead of a <user certificate, private key>
643
+ # pair, even if they were also specified.
644
+ self._authorizer: TokenAuthorizer | None = None
645
+ if self._config.token is not None:
646
+ self._authorizer = TokenAuthorizer(self._config.token)
647
+ user_cert, user_key = None, None
648
+ else:
649
+ user_cert = self._config.user_cert
650
+ user_key = self._config.user_key
651
+
652
+ # We use this pool manager for sending requests that the front
653
+ # server typically responds to directly without redirecting (e.g.
654
+ # OPTIONS, HEAD, etc.)
655
+ #
656
+ # Connections in this pool are generally left open by the client but
657
+ # the front-end server may choose to close them in some specific
658
+ # situations (e.g. PUT request with "Expect: 100-continue" header).
659
+ self._frontend = PoolManager(
660
+ # Number of connection pools to cache before discarding the least
661
+ # recently used pool. Each connection pool manages network
662
+ # connections to a single host, so this is basically the number
663
+ # of "host:port" we persist network connections to.
664
+ num_pools=10,
665
+ # Number of connections to the same "host:port" to persist for
666
+ # later reuse. More than 1 is useful in multithreaded situations.
667
+ # If more than this number of network connections are needed at
668
+ # a particular moment, they will be created and used but not
669
+ # perrsisted.
670
+ maxsize=self._config.persistent_connections_frontend,
671
+ # Retry configuration to use by default with requests sent to
672
+ # host in the front end.
673
+ retries=make_retry(self._config),
674
+ # Socket timeout in seconds for each individual connection.
675
+ timeout=Timeout(
676
+ connect=self._config.timeout_connect,
677
+ read=self._config.timeout_read,
678
+ ),
679
+ # Size in bytes of the buffer for reading/writing data from/to
680
+ # the underlying socket.
681
+ blocksize=self._config.buffer_size,
682
+ # Client certificate and private key for esablishing TLS
683
+ # connections. If None, no client certificate is sent to the
684
+ # server. Only relevant for endpoints using secure HTTP protocol.
685
+ cert_file=user_cert,
686
+ key_file=user_key,
687
+ # We require verification of the server certificate.
688
+ cert_reqs="CERT_REQUIRED",
689
+ # Directory where the certificates of the trusted certificate
690
+ # authorities can be found. The contents of that directory
691
+ # must be as expected by OpenSSL.
692
+ ca_cert_dir=ca_cert_dir,
693
+ # Path to a file of concatenated CA certificates in PEM format.
694
+ ca_certs=ca_certs,
695
+ )
696
+
697
+ # We use this pool manager to send requests to the backend hosts.
698
+ # Those requests are typically 'GET' and 'PUT'. The backend servers
699
+ # typically leave the connection open after serving the request,
700
+ # but we want the client to have the possibility to close them
701
+ # when there is no benefit of persist those connections.
702
+ #
703
+ # That is the case, for instance, when the backend servers use a
704
+ # range of ports for listening for new connections. In that case
705
+ # it is likely that a connection to the same pair
706
+ # <backend server, port number>
707
+ # is not going to be reused in a short interval of time
708
+ self._backend = PoolManager(
709
+ num_pools=100,
710
+ maxsize=self._config.persistent_connections_backend,
711
+ retries=make_retry(self._config),
712
+ timeout=Timeout(
713
+ connect=self._config.timeout_connect,
714
+ read=self._config.timeout_read,
715
+ ),
716
+ blocksize=self._config.buffer_size,
717
+ cert_file=user_cert,
718
+ key_file=user_key,
719
+ cert_reqs="CERT_REQUIRED",
720
+ ca_cert_dir=ca_cert_dir,
721
+ ca_certs=ca_certs,
722
+ )
723
+
724
+ # Parser of PROPFIND responses.
725
+ self._propfind_parser: DavPropfindParser = DavPropfindParser()
726
+
727
+ # Does the remote server accept "Range" header in GET requests?
728
+ # This field is lazy initialized.
729
+ self._accepts_ranges: bool | None = accepts_ranges
730
+
731
+ # Base URL of the server this is a client for. It is of the form:
732
+ # "davs://host.example.org:1234./"
733
+ self._base_url: str = url
734
+
735
+ def get_server_details(self, url: str) -> dict[str, str]:
736
+ """
737
+ Retrieve the details of the server and check it advertises compliance
738
+ to class 1 of webDAV protocol.
739
+
740
+ Parameters
741
+ ----------
742
+ url : `str`
743
+ URL to check.
744
+
745
+ Returns
746
+ -------
747
+ details: `dic[str, str]`
748
+ The keys of the returned dictionary can be "Server" and
749
+ "Accept-Ranges". Any of those keys may not exist in the returned
750
+ dictionary if the server did not include it in its response.
751
+
752
+ The values are the values of the corresponding
753
+ headers found in the response to the OPTIONS request.
754
+ Examples of values for the "Server" header are 'dCache/9.2.4' or
755
+ 'XrootD/v5.7.1'.
756
+ """
757
+ # Check that the value "1" is part of the value of the "DAV" header in
758
+ # the response to an 'OPTIONS' request.
759
+ #
760
+ # We don't rely on webDAV locks, so a server complying to class 1 is
761
+ # enough for our purposes. All webDAV servers must advertise at least
762
+ # compliance class "1".
763
+ #
764
+ # Compliance classes are documented in
765
+ # http://www.webdav.org/specs/rfc4918.html#dav.compliance.classes
766
+ #
767
+ # Examples of values for header DAV are:
768
+ # DAV: 1, 2
769
+ # DAV: 1, <http://apache.org/dav/propset/fs/1>
770
+ resp = self._options(url)
771
+ if "DAV" not in resp.headers:
772
+ raise ValueError(f"Server of {resp.geturl()} does not implement webDAV protocol")
773
+
774
+ if "1" not in resp.headers.get("DAV").replace(" ", "").split(","):
775
+ raise ValueError(
776
+ f"Server of {resp.geturl()} does not advertise required compliance to webDAV protocol class 1"
777
+ )
778
+
779
+ # The value of 'Server' header is expected to be of the form
780
+ # 'dCache/9.2.4' or 'XrootD/v5.7.1'. Not all servers include such a
781
+ # header in their response to an OPTIONS request. If no such a
782
+ # header is found in the response, use "_unknown_".
783
+ details: dict[str, str] = {}
784
+ for header in ("Server", "Accept-Ranges"):
785
+ value = resp.headers.get(header, None)
786
+ if value is not None:
787
+ details[header] = value
788
+
789
+ return details
790
+
791
+ def _options(self, url: str) -> HTTPResponse:
792
+ """Send a HTTP OPTIONS request and return the response.
793
+
794
+ Parameters
795
+ ----------
796
+ url : `str`
797
+ Target URL.
798
+ """
799
+ resp = self._request("OPTIONS", url)
800
+ if resp.status in (HTTPStatus.OK, HTTPStatus.CREATED):
801
+ return resp
802
+ else:
803
+ raise ValueError(
804
+ f"""Unexpected response to OPTIONS request to {resp.geturl()}: status {resp.status} """
805
+ f"""{resp.reason}"""
806
+ )
807
+
808
+ def _request(
809
+ self,
810
+ method: str,
811
+ url: str,
812
+ headers: dict[str, str] | None = None,
813
+ body: BinaryIO | bytes | str | None = None,
814
+ pool_manager: PoolManager | None = None,
815
+ preload_content: bool = True,
816
+ redirect: bool = True,
817
+ ) -> HTTPResponse:
818
+ """Send a generic HTTP request and return the response.
819
+
820
+ Parameters
821
+ ----------
822
+ method : `str`
823
+ Request method, e.g. 'GET', 'PUT', 'PROPFIND'.
824
+ url : `str`
825
+ Target URL.
826
+ headers : `dict[str, str]`, optional
827
+ Headers to sent with the request.
828
+ body : `bytes` or `str` or `None`, optional
829
+ Request body.
830
+ pool_manager : `PoolManager`, optional
831
+ Pool manager to use to send the request. By default, the requests
832
+ are sent to the frontend servers.
833
+ preload_content : `bool`, optional
834
+ If True, the response body is downloaded and can be retrieved
835
+ via the returned response `.data` property. If False, the
836
+ caller needs to call `.read()` on the returned response object to
837
+ download the body, either entirely in one call or by chunks.
838
+ redirect : `bool`, optional
839
+ If True, automatically handle redirects. If False, the returned
840
+ response may contain a redirection to another location.
841
+
842
+ Returns
843
+ -------
844
+ resp: `HTTPResponse`
845
+ Response to the request as received from the server.
846
+ """
847
+ # If this client is configured to use a bearer token for
848
+ # authentication, ensure we only set the token to requests over secure
849
+ # HTTP to avoid leaking the token.
850
+ headers = {} if headers is None else dict(headers)
851
+ if self._authorizer is not None and url.startswith("https://"):
852
+ self._authorizer.set_authorization(headers)
853
+
854
+ # By default, send the request to a frontend server.
855
+ if pool_manager is None:
856
+ pool_manager = self._frontend
857
+
858
+ log.debug("sending request %s %s", method, url)
859
+
860
+ with time_this(
861
+ log,
862
+ msg="%s %s",
863
+ args=(
864
+ method,
865
+ url,
866
+ ),
867
+ mem_usage=self._config.collect_memory_usage,
868
+ mem_unit=u.mebibyte,
869
+ ):
870
+ resp = pool_manager.request(
871
+ method,
872
+ url,
873
+ body=body,
874
+ headers=headers,
875
+ preload_content=preload_content,
876
+ redirect=redirect,
877
+ )
878
+
879
+ return resp
880
+
881
+ def _get(
882
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
883
+ ) -> HTTPResponse:
884
+ """Send a HTTP GET request.
885
+
886
+ Parameters
887
+ ----------
888
+ url : `str`
889
+ Target URL.
890
+ headers : `dict[str, str]`, optional
891
+ Headers to sent with the request.
892
+ preload_content : `bool`, optional
893
+ If True, the response body is downloaded and can be retrieved
894
+ via the returned response `.data` property. If False, the
895
+ caller needs to call the `.read()` on the returned response
896
+ object to download the body.
897
+
898
+ Returns
899
+ -------
900
+ resp: `HTTPResponse`
901
+ Response to the GET request as received from the server.
902
+ """
903
+ # Send the GET request to the frontend servers. We handle redirections
904
+ # ourselves.
905
+ headers = {} if headers is None else dict(headers)
906
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
907
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
908
+ return resp
909
+
910
+ if resp.status == HTTPStatus.NOT_FOUND:
911
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
912
+
913
+ redirect_location = resp.get_redirect_location()
914
+ if redirect_location is None or redirect_location is False:
915
+ raise ValueError(
916
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
917
+ )
918
+
919
+ # We were redirected to a backend server so follow the redirection.
920
+ # The response body will be automatically downloaded when
921
+ # `preload_content` is true and the underlying network connection
922
+ # may be kept open for future reuse if the maximum number of
923
+ # connections for the backend pool is not reached.
924
+ url = redirect_location
925
+ resp = self._request(
926
+ "GET",
927
+ url,
928
+ headers=headers,
929
+ pool_manager=self._backend,
930
+ preload_content=preload_content,
931
+ )
932
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
933
+ raise ValueError(
934
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
935
+ )
936
+
937
+ # The caller will access the `resp.data` property or use
938
+ # the `resp.read()` method to read the contents of the
939
+ # response body. If `preload_content` argument is True, the
940
+ # response body is already downloaded, otherwise `resp.read()`
941
+ # will download it.
942
+ return resp
943
+
944
+ def _put(
945
+ self,
946
+ url: str,
947
+ data: BinaryIO | bytes,
948
+ ) -> None:
949
+ """Send a HTTP PUT request.
950
+
951
+ Parameters
952
+ ----------
953
+ url : `str`
954
+ Target URL.
955
+ data : `BinaryIO` or `bytes`
956
+ Request body.
957
+ """
958
+ # Send a PUT request with empty body and handle redirection. This
959
+ # is useful if the server redirects us; since we cannot rewind the
960
+ # data we are uploading, we don't start uploading data until we
961
+ # connect to the server that will actually serve our request.
962
+ headers = {"Content-Length": "0"}
963
+ resp = self._request("PUT", url, headers=headers, redirect=False)
964
+ if redirect_location := resp.get_redirect_location():
965
+ url = redirect_location
966
+ elif resp.status not in (
967
+ HTTPStatus.OK,
968
+ HTTPStatus.CREATED,
969
+ HTTPStatus.NO_CONTENT,
970
+ ):
971
+ raise ValueError(
972
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
973
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
974
+ )
975
+
976
+ # We may have been redirectred. Upload the file contents to
977
+ # its final destination.
978
+
979
+ # Ask the server to compute and record a checksum of the uploaded
980
+ # file contents, for later integrity checks. Since we don't compute
981
+ # the digest ourselves while uploading the data, we cannot control
982
+ # after the request is complete that the data we uploaded is
983
+ # identical to the data recorded by the server, but at least the
984
+ # server has recorded a digest of the data it stored.
985
+ #
986
+ # See RFC-3230 for details and
987
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
988
+ # for the list of supported digest algorithhms.
989
+ #
990
+ # In addition, note that not all servers implement this RFC so
991
+ # the checksum reqquest may be ignored by the server.
992
+ headers = {}
993
+ if (checksum := self._config.request_checksum) is not None:
994
+ headers = {"Want-Digest": checksum}
995
+
996
+ resp = self._request(
997
+ "PUT",
998
+ url,
999
+ body=data,
1000
+ headers=headers,
1001
+ pool_manager=self._backend,
1002
+ )
1003
+
1004
+ if resp.status not in (
1005
+ HTTPStatus.OK,
1006
+ HTTPStatus.CREATED,
1007
+ HTTPStatus.NO_CONTENT,
1008
+ ):
1009
+ raise ValueError(
1010
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
1011
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
1012
+ )
1013
+
1014
+ def _head(self, url: str, headers: dict[str, str] | None = None) -> HTTPResponse:
1015
+ """Send a HTTP HEAD request and return the response.
1016
+
1017
+ Parameters
1018
+ ----------
1019
+ url : `str`
1020
+ Target URL.
1021
+ headers : `bool``
1022
+ If the target URL is not found, raise an exception. Otherwise
1023
+ just return the response.
1024
+ """
1025
+ headers = {} if headers is None else dict(headers)
1026
+ resp = self._request("HEAD", url, headers=headers)
1027
+ match resp.status:
1028
+ case HTTPStatus.OK:
1029
+ return resp
1030
+ case HTTPStatus.NOT_FOUND:
1031
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
1032
+ case _:
1033
+ raise ValueError(
1034
+ f"""Unexpected response to HEAD request to {resp.geturl()}: status {resp.status} """
1035
+ f"""{resp.reason}"""
1036
+ )
1037
+
1038
+ def _propfind(self, url: str, body: str | None = None, depth: str = "0") -> HTTPResponse:
1039
+ """Send a HTTP PROPFIND request and return the response.
1040
+
1041
+ Parameters
1042
+ ----------
1043
+ url : `str`
1044
+ Target URL.
1045
+ body : `str`, optional
1046
+ Request body.
1047
+ """
1048
+ if body is None:
1049
+ # Request only the DAV live properties we are explicitly interested
1050
+ # in namely 'resourcetype', 'getcontentlength', 'getlastmodified'
1051
+ # and 'displayname'.
1052
+ body = (
1053
+ """<?xml version="1.0" encoding="utf-8"?>"""
1054
+ """<D:propfind xmlns:D="DAV:"><D:prop>"""
1055
+ """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>"""
1056
+ """</D:prop></D:propfind>"""
1057
+ )
1058
+
1059
+ headers = {
1060
+ "Depth": depth,
1061
+ "Content-Type": 'application/xml; charset="utf-8"',
1062
+ "Content-Length": str(len(body)),
1063
+ }
1064
+ resp = self._request("PROPFIND", url=url, headers=headers, body=body)
1065
+ if resp.status in (HTTPStatus.MULTI_STATUS, HTTPStatus.NOT_FOUND):
1066
+ return resp
1067
+ else:
1068
+ raise ValueError(
1069
+ f"Unexpected response to PROPFIND {resp.geturl()}: status {resp.status} {resp.reason}"
1070
+ )
1071
+
1072
+ def stat(self, url: str) -> DavFileMetadata:
1073
+ """Return the properties of file or directory located at `url`.
1074
+
1075
+ Parameters
1076
+ ----------
1077
+ url : `str`
1078
+ Target URL.
1079
+
1080
+ Returns
1081
+ -------
1082
+ result: `DavResourceMetadata``
1083
+ Details of the resources at `url`. If no resource was found at
1084
+ that URL no exception is raised. Instead the returned details allow
1085
+ for detecting that the resource does not exist.
1086
+ """
1087
+ resp = self._propfind(url)
1088
+ match resp.status:
1089
+ case HTTPStatus.NOT_FOUND:
1090
+ href = url.replace(self._base_url, "", 1)
1091
+ return DavFileMetadata(base_url=self._base_url, href=href)
1092
+ case HTTPStatus.MULTI_STATUS:
1093
+ property = self._propfind_parser.parse(resp.data)[0]
1094
+ return DavFileMetadata.from_property(base_url=self._base_url, property=property)
1095
+ case _:
1096
+ raise ValueError(
1097
+ f"""Unexpected response to HTTP PROPFIND request to {resp.geturl()}: status """
1098
+ f"""{resp.status} {resp.reason}"""
1099
+ )
1100
+
1101
+ def info(self, url: str, name: str | None = None) -> dict[str, Any]:
1102
+ """Return the details about the file or directory at `url`.
1103
+
1104
+ Parameters
1105
+ ----------
1106
+ url : `str`
1107
+ Target URL.
1108
+ name : `str`
1109
+ Name of the object to be included in the returned value. If None,
1110
+ the `url` is used as name.
1111
+
1112
+ Returns
1113
+ -------
1114
+ result: `dict``
1115
+ For an existing file, the returned value has the form:
1116
+
1117
+ .. code-block:: json
1118
+
1119
+ {
1120
+ "name": name,
1121
+ "size": 1234,
1122
+ "type": "file",
1123
+ "last_modified":
1124
+ datetime.datetime(2025, 4, 10, 15, 12, 51, 227854),
1125
+ "checksums": {
1126
+ "adler32": "0fc5f83f",
1127
+ "md5": "1f57339acdec099c6c0a41f8e3d5fcd0",
1128
+ }
1129
+ }
1130
+
1131
+ For an existing directory, the returned value has the form:
1132
+
1133
+ .. code-block:: json
1134
+
1135
+ {
1136
+ "name": name,
1137
+ "size": 0,
1138
+ "type": "directory",
1139
+ "last_modified":
1140
+ datetime.datetime(2025, 4, 10, 15, 12, 51, 227854),
1141
+ "checksums": {},
1142
+ }
1143
+
1144
+ For a non-existing file or directory, the returned value has the
1145
+ form:
1146
+
1147
+ .. code-block:: json
1148
+
1149
+ {
1150
+ "name": name,
1151
+ "size": None,
1152
+ "type": None,
1153
+ "last_modified":
1154
+ datetime.datetime(1, 1, 1, 0, 0),
1155
+ "checksums": {},
1156
+ }
1157
+
1158
+ Notes
1159
+ -----
1160
+ The format of the returned directory is inspired and compatible with
1161
+ `fsspec`.
1162
+
1163
+ The size of existing directories is always zero. The `checksums``
1164
+ dictionary may be empty if the storage endpoint does not compute
1165
+ and store the checksum of the files it stores.
1166
+ """
1167
+ result: dict[str, Any] = {
1168
+ "name": name if name is not None else url,
1169
+ "type": None,
1170
+ "size": None,
1171
+ "last_modified": datetime.min,
1172
+ "checksums": {},
1173
+ }
1174
+ metadata = self.stat(url)
1175
+ if not metadata.exists:
1176
+ return result
1177
+
1178
+ if metadata.is_dir:
1179
+ result.update({"type": "directory", "size": 0})
1180
+ else:
1181
+ result.update({"type": "file", "size": metadata.size, "checksums": metadata.checksums})
1182
+
1183
+ result.update({"last_modified": metadata.last_modified})
1184
+ return result
1185
+
1186
+ def read_dir(self, url: str) -> list[DavFileMetadata]:
1187
+ """Return the properties of the files or directories contained in
1188
+ directory located at `url`.
1189
+
1190
+ If `url` designates a file, only the details of itself are returned.
1191
+
1192
+ Parameters
1193
+ ----------
1194
+ url : `str`
1195
+ Target URL.
1196
+
1197
+ Returns
1198
+ -------
1199
+ result: `list[DavResourceMetadata]`
1200
+ List of details of each file or directory within `url`.
1201
+ """
1202
+ resp = self._propfind(url, depth="1")
1203
+ if resp.status == HTTPStatus.NOT_FOUND:
1204
+ raise FileNotFoundError(f"No directory found at {resp.geturl()}")
1205
+ elif resp.status != HTTPStatus.MULTI_STATUS:
1206
+ raise ValueError(
1207
+ f"""Unexpected response to HTTP PROPFIND request to {resp.geturl()}: status {resp.status} """
1208
+ f"""{resp.reason}"""
1209
+ )
1210
+
1211
+ if (path := parse_url(url).path) is not None:
1212
+ this_dir_href = path.rstrip("/") + "/"
1213
+ else:
1214
+ this_dir_href = "/"
1215
+
1216
+ result = []
1217
+ for property in self._propfind_parser.parse(resp.data):
1218
+ # Don't include in the results the metadata of the directory we
1219
+ # traversing.
1220
+ # Some webDAV servers do not append a "/" to the href of a
1221
+ # directory in their response to PROPFIND, so we must take into
1222
+ # account that.
1223
+ if property.is_file:
1224
+ result.append(DavFileMetadata.from_property(base_url=self._base_url, property=property))
1225
+ elif property.is_dir and property.href != this_dir_href:
1226
+ result.append(DavFileMetadata.from_property(base_url=self._base_url, property=property))
1227
+
1228
+ return result
1229
+
1230
+ def read(self, url: str) -> bytes:
1231
+ """Download the contents of file located at `url`.
1232
+
1233
+ Parameters
1234
+ ----------
1235
+ url : `str`
1236
+ Target URL.
1237
+
1238
+ Returns
1239
+ -------
1240
+ read: `bytes`
1241
+ Contents of the file.
1242
+
1243
+ Notes
1244
+ -----
1245
+ The caller must ensure that the resource at `url` is a file, not
1246
+ a directory.
1247
+ """
1248
+ return self._get(url).data
1249
+
1250
+ def read_range(
1251
+ self, url: str, start: int, end: int | None, headers: dict[str, str] | None = None
1252
+ ) -> bytes:
1253
+ """Download partial content of file located at `url`.
1254
+
1255
+ Parameters
1256
+ ----------
1257
+ url : `str`
1258
+ Target URL.
1259
+ start : `int`
1260
+ Starting byte offset of the range to download.
1261
+ end : `int`
1262
+ Ending byte offset of the range to download.
1263
+ headers : `dict[str,str]`, optional
1264
+ Specific headers to sent with the GET request.
1265
+
1266
+ Returns
1267
+ -------
1268
+ read: `bytes`
1269
+ Partial contents of the file.
1270
+
1271
+ Notes
1272
+ -----
1273
+ The caller must ensure that the resource at `url` is a file, not
1274
+ a directory. This is important because some webDAV servers respond
1275
+ with an HTML document when asked for reading a directory.
1276
+ """
1277
+ headers = {} if headers is None else dict(headers)
1278
+ if end is None:
1279
+ headers.update({"Range": f"bytes={start}-"})
1280
+ else:
1281
+ headers.update({"Range": f"bytes={start}-{end}"})
1282
+
1283
+ return self._get(url, headers=headers).data
1284
+
1285
+ def download(self, url: str, filename: str, chunk_size: int, close_connection: bool = False) -> int:
1286
+ """Download the content of a file and write it to local file.
1287
+
1288
+ Parameters
1289
+ ----------
1290
+ url : `str`
1291
+ Target URL.
1292
+ filename : `str`
1293
+ Local file to write the content to. If the file already exists,
1294
+ it will be rewritten.
1295
+ chunk_size : `int`
1296
+ Size of the chunks to write to `filename`.
1297
+ close_connection : `bool`
1298
+ Whether to close the connection after download.
1299
+
1300
+ Returns
1301
+ -------
1302
+ count: `int`
1303
+ Number of bytes written to `filename`.
1304
+
1305
+ Notes
1306
+ -----
1307
+ The caller must ensure that the resource at `url` is a file, not
1308
+ a directory.
1309
+ """
1310
+ try:
1311
+ resp = self._get(url, preload_content=False)
1312
+
1313
+ # If we were asked to close the connection to the server, disable
1314
+ # auto close so that we can explicitly close the connection.
1315
+ # By default, urrlib3 releases the connection and keeps it open
1316
+ # for later reuse when it consumes the response body.
1317
+ if close_connection:
1318
+ resp.auto_close = False
1319
+
1320
+ content_length = 0
1321
+ with open(filename, "wb", buffering=chunk_size) as file:
1322
+ for chunk in resp.stream(chunk_size):
1323
+ file.write(chunk)
1324
+ content_length += len(chunk)
1325
+
1326
+ # Check that the expected and actual content lengths match. Perform
1327
+ # this check only when the content of the file was not encoded by
1328
+ # the server.
1329
+ expected_length: int = int(resp.headers.get("Content-Length", -1))
1330
+ if (
1331
+ "Content-Encoding" not in resp.headers
1332
+ and expected_length != -1
1333
+ and expected_length != content_length
1334
+ ):
1335
+ raise ValueError(
1336
+ f"Size of downloaded file does not match value in Content-Length header for {self}: "
1337
+ f"expecting {expected_length} and got {content_length} bytes"
1338
+ )
1339
+
1340
+ return content_length
1341
+ finally:
1342
+ # Close this connection
1343
+ if close_connection:
1344
+ resp.close()
1345
+
1346
+ def write(self, url: str, data: BinaryIO | bytes) -> None:
1347
+ """Create or rewrite a remote file at `url` with `data` as its
1348
+ contents.
1349
+
1350
+ Parameters
1351
+ ----------
1352
+ url : `str`
1353
+ Target URL.
1354
+ data : `bytes`
1355
+ Sequence of bytes to upload.
1356
+
1357
+ Notes
1358
+ -----
1359
+ If a file already exists at `url` it will be rewritten.
1360
+ """
1361
+ self._put(url, data)
1362
+
1363
+ def checksums(self, url: str) -> dict[str, str]:
1364
+ """Return the checksums of the contents of file located at `url`.
1365
+
1366
+ The checksums are retrieved from the storage endpoint. There may be
1367
+ none if the storage endpoint does not automatically expose the
1368
+ checksums it computes.
1369
+
1370
+ Parameters
1371
+ ----------
1372
+ url : `str`
1373
+ Target URL.
1374
+
1375
+ Returns
1376
+ -------
1377
+ checksums: `dict[str, str]`
1378
+ A file exists at `url`.
1379
+ The key of the dictionary is the lowercased name of the checksum
1380
+ algorithm (e.g. "md5", "adler32"). The value is the lowercased
1381
+ checksum itself (e.g. "78441cec2479ec8b545c4d6699f542da").
1382
+ """
1383
+ stat = self.stat(url)
1384
+ if not stat.exists:
1385
+ raise FileNotFoundError(f"No file found at {url}")
1386
+
1387
+ return stat.checksums if stat.is_file else {}
1388
+
1389
+ def mkcol(self, url: str) -> None:
1390
+ """Create a directory at `url`.
1391
+
1392
+ If a directory already exists at `url` no error is returned nor
1393
+ exception is raised. An exception is raised if a file exists at `url`.
1394
+
1395
+ Parameters
1396
+ ----------
1397
+ url : `str`
1398
+ Target URL.
1399
+ """
1400
+ resp = self._request("MKCOL", url)
1401
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.METHOD_NOT_ALLOWED):
1402
+ raise ValueError(f"Can not create directory {resp.geturl()}: status {resp.status} {resp.reason}")
1403
+
1404
+ def delete(self, url: str) -> None:
1405
+ """Delete the file or directory at `url`.
1406
+
1407
+ If there is no file or directory at `url` is not considered an error.
1408
+
1409
+ Parameters
1410
+ ----------
1411
+ url : `str`
1412
+ Target URL.
1413
+
1414
+ Notes
1415
+ -----
1416
+ If `url` designates a directory, some webDAV servers recursively
1417
+ remove the directory and its contents. Others, only remove the
1418
+ directory if it is empty.
1419
+
1420
+ For a consisten behavior, the caller must check what kind of object
1421
+ the target URL is and walk the hierarchy removing all objects.
1422
+ """
1423
+ resp = self._request("DELETE", url)
1424
+ if resp.status not in (
1425
+ HTTPStatus.OK,
1426
+ HTTPStatus.ACCEPTED,
1427
+ HTTPStatus.NO_CONTENT,
1428
+ HTTPStatus.NOT_FOUND,
1429
+ ):
1430
+ raise ValueError(f"Unable to delete resource {resp.geturl()}: status {resp.status} {resp.reason}")
1431
+
1432
+ def accepts_ranges(self, url: str) -> bool:
1433
+ """Return `True` if the server supports a 'Range' header in
1434
+ GET requests against `url`.
1435
+
1436
+ Parameters
1437
+ ----------
1438
+ url : `str`
1439
+ Target URL.
1440
+ """
1441
+ # If we have already determined that the server accepts "Range" for
1442
+ # another URL, we assume that it implements that feature for any
1443
+ # file it serves, so reuse that information.
1444
+ if self._accepts_ranges is not None:
1445
+ return self._accepts_ranges
1446
+
1447
+ with self._lock:
1448
+ if self._accepts_ranges is None:
1449
+ self._accepts_ranges = self._head(url).headers.get("Accept-Ranges", "") == "bytes"
1450
+
1451
+ return self._accepts_ranges
1452
+
1453
+ def copy(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1454
+ """Copy the file at `source_url` to `destination_url` in the same
1455
+ storage endpoint.
1456
+
1457
+ Parameters
1458
+ ----------
1459
+ source_url : `str`
1460
+ URL of the source file.
1461
+ destination_url : `str`
1462
+ URL of the destination file. Its parent directory must exist.
1463
+ overwrite : `bool`
1464
+ If True and a file exists at `destination_url` it will be
1465
+ overwritten. Otherwise an exception is raised.
1466
+ """
1467
+ # Check the source is a file
1468
+ if self.stat(source_url).is_dir:
1469
+ raise NotImplementedError(f"copy is not implemented for directory {source_url}")
1470
+
1471
+ # Send a COPY request for this file.
1472
+ headers = {
1473
+ "Destination": destination_url,
1474
+ "Overwrite": "T" if overwrite else "F",
1475
+ }
1476
+ resp = self._request("COPY", source_url, headers=headers)
1477
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.NO_CONTENT):
1478
+ raise ValueError(
1479
+ f"Could not copy {resp.geturl()} to {destination_url}: status {resp.status} {resp.reason}"
1480
+ )
1481
+ return
1482
+
1483
+ def move(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1484
+ """Move the file at `source_url` to `destination_url` in the same
1485
+ storage endpoint.
1486
+
1487
+ Parameters
1488
+ ----------
1489
+ source_url : `str`
1490
+ URL of the source file.
1491
+ destination_url : `str`
1492
+ URL of the destination file. Its parent directory must exist.
1493
+ overwrite : `bool`
1494
+ If True and a file exists at `destination_url` it will be
1495
+ overwritten. Otherwise an exception is raised.
1496
+ """
1497
+ headers = {
1498
+ "Destination": destination_url,
1499
+ "Overwrite": "T" if overwrite else "F",
1500
+ }
1501
+ resp = self._request("MOVE", source_url, headers=headers)
1502
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.NO_CONTENT):
1503
+ raise ValueError(
1504
+ f"""Could not move file {resp.geturl()} to {destination_url}: status {resp.status} """
1505
+ f"""{resp.reason}"""
1506
+ )
1507
+
1508
+ def generate_presigned_get_url(self, url: str, expiration_time_seconds: int) -> str:
1509
+ """Return a pre-signed URL that can be used to retrieve this resource
1510
+ using an HTTP GET without supplying any access credentials.
1511
+
1512
+ Parameters
1513
+ ----------
1514
+ url : `str`
1515
+ Target URL.
1516
+ expiration_time_seconds : `int`
1517
+ Number of seconds until the generated URL is no longer valid.
1518
+
1519
+ Returns
1520
+ -------
1521
+ url : `str`
1522
+ HTTP URL signed for GET.
1523
+ """
1524
+ raise NotImplementedError(f"URL signing is not supported by server for {self}")
1525
+
1526
+ def generate_presigned_put_url(self, url: str, expiration_time_seconds: int) -> str:
1527
+ """Return a pre-signed URL that can be used to upload a file to this
1528
+ path using an HTTP PUT without supplying any access credentials.
1529
+
1530
+ Parameters
1531
+ ----------
1532
+ url : `str`
1533
+ Target URL.
1534
+ expiration_time_seconds : `int`
1535
+ Number of seconds until the generated URL is no longer valid.
1536
+
1537
+ Returns
1538
+ -------
1539
+ url : `str`
1540
+ HTTP URL signed for PUT.
1541
+ """
1542
+ raise NotImplementedError(f"URL signing is not supported by server for {self}")
1543
+
1544
+
1545
+ class ActivityCaveat(enum.Enum):
1546
+ """Helper class for enumerating accepted activity caveats for requesting
1547
+ macaroons for dCache or XRootD webDAV servers.
1548
+ """
1549
+
1550
+ DOWNLOAD = 1
1551
+ UPLOAD = 2
1552
+
1553
+
1554
+ class DavClientURLSigner(DavClient):
1555
+ """WebDAV client which supports signing of URL for upload and download.
1556
+
1557
+ Instances of this class are thread-safe.
1558
+
1559
+ Parameters
1560
+ ----------
1561
+ url : `str`
1562
+ Root URL of the storage endpoint
1563
+ (e.g. "https://host.example.org:1234/").
1564
+ config : `DavConfig`
1565
+ Configuration to initialize this client.
1566
+ accepts_ranges : `bool` | `None`
1567
+ Indicate whether the remote server accepts the ``Range`` header in GET
1568
+ requests.
1569
+ """
1570
+
1571
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
1572
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
1573
+
1574
+ def generate_presigned_get_url(self, url: str, expiration_time_seconds: int) -> str:
1575
+ """Return a pre-signed URL that can be used to retrieve the resource
1576
+ at `url` using an HTTP GET without supplying any access credentials.
1577
+
1578
+ Parameters
1579
+ ----------
1580
+ url : `str`
1581
+ URL of an existing file.
1582
+ expiration_time_seconds : `int`
1583
+ Number of seconds until the generated URL is no longer valid.
1584
+
1585
+ Returns
1586
+ -------
1587
+ url : `str`
1588
+ HTTP URL signed for GET.
1589
+
1590
+ Notes
1591
+ -----
1592
+ Although the returned URL allows for downloading the file at `url`
1593
+ without supplying credentials, the HTTP client must be configured
1594
+ to accept the certificate the server will present if the client wants
1595
+ validate it. The server's certificate may be issued by a certificate
1596
+ authority unknown to the client.
1597
+ """
1598
+ macaroon: str = self._get_macaroon(url, ActivityCaveat.DOWNLOAD, expiration_time_seconds)
1599
+ return f"{url}?authz={macaroon}"
1600
+
1601
+ def generate_presigned_put_url(self, url: str, expiration_time_seconds: int) -> str:
1602
+ """Return a pre-signed URL that can be used to upload a file to `url`
1603
+ using an HTTP PUT without supplying any access credentials.
1604
+
1605
+ Parameters
1606
+ ----------
1607
+ url : `str`
1608
+ URL of an existing file.
1609
+ expiration_time_seconds : `int`
1610
+ Number of seconds until the generated URL is no longer valid.
1611
+
1612
+ Returns
1613
+ -------
1614
+ url : `str`
1615
+ HTTP URL signed for PUT.
1616
+
1617
+ Notes
1618
+ -----
1619
+ Although the returned URL allows for uploading a file to `url`
1620
+ without supplying credentials, the HTTP client must be configured
1621
+ to accept the certificate the server will present if the client wants
1622
+ validate it. The server's certificate may be issued by a certificate
1623
+ authority unknown to the client.
1624
+ """
1625
+ macaroon: str = self._get_macaroon(url, ActivityCaveat.UPLOAD, expiration_time_seconds)
1626
+ return f"{url}?authz={macaroon}"
1627
+
1628
+ def _get_macaroon(self, url: str, activity: ActivityCaveat, expiration_time_seconds: int) -> str:
1629
+ """Return a macaroon for uploading or downloading the file at `url`.
1630
+
1631
+ Parameters
1632
+ ----------
1633
+ url : `str`
1634
+ URL of an existing file.
1635
+ activity : `ActivityCaveat`
1636
+ the activity the macaroon is requested for.
1637
+ expiration_time_seconds : `int`
1638
+ Requested duration of the macaroon, in seconds.
1639
+
1640
+ Returns
1641
+ -------
1642
+ macaroon : `str`
1643
+ Macaroon to be used with `url` in a GET or PUT request.
1644
+ """
1645
+ # dCache and XRootD webDAV servers support delivery of macaroons.
1646
+ #
1647
+ # For details about dCache macaroons see:
1648
+ # https://www.dcache.org/manuals/UserGuide-9.2/macaroons.shtml
1649
+ match activity:
1650
+ case ActivityCaveat.DOWNLOAD:
1651
+ activity_caveat = "DOWNLOAD,LIST"
1652
+ case ActivityCaveat.UPLOAD:
1653
+ activity_caveat = "UPLOAD,LIST,DELETE,MANAGE"
1654
+
1655
+ # Retrieve a macaroon for the requested activities and duration
1656
+ headers = {"Content-Type": "application/macaroon-request"}
1657
+ body = {
1658
+ "caveats": [
1659
+ f"activity:{activity_caveat}",
1660
+ ],
1661
+ "validity": f"PT{expiration_time_seconds}S",
1662
+ }
1663
+ resp = self._request("POST", url, headers=headers, body=json.dumps(body))
1664
+ if resp.status != HTTPStatus.OK:
1665
+ raise ValueError(
1666
+ f"Could not retrieve a macaroon for URL {resp.geturl()}, status: {resp.status} {resp.reason}"
1667
+ )
1668
+
1669
+ # We are expecting the body of the response to be formatted in JSON.
1670
+ # dCache sets the 'Content-Type' of the response to 'application/json'
1671
+ # but XRootD does not set any 'Content-Type' header 8-[
1672
+ #
1673
+ # An example of a response body returned by dCache is shown below:
1674
+ # {
1675
+ # "macaroon": "MDA[...]Qo",
1676
+ # "uri": {
1677
+ # "targetWithMacaroon": "https://dcache.example.org/?authz=MD...",
1678
+ # "baseWithMacaroon": "https://dcache.example.org/?authz=MD...",
1679
+ # "target": "https://dcache.example.org/",
1680
+ # "base": "https://dcache.example.org/"
1681
+ # }
1682
+ # }
1683
+ #
1684
+ # An example of a response body returned by XRootD is shown below:
1685
+ # {
1686
+ # "macaroon": "MDA[...]Qo",
1687
+ # "expires_in": 86400
1688
+ # }
1689
+ try:
1690
+ response_body = json.loads(resp.data.decode("utf-8"))
1691
+ except json.JSONDecodeError:
1692
+ raise ValueError(f"Could not deserialize response to POST request for URL {resp.geturl()}")
1693
+
1694
+ if "macaroon" in response_body:
1695
+ return response_body["macaroon"]
1696
+
1697
+ raise ValueError(f"Could not retrieve macaroon for URL {resp.geturl()}")
1698
+
1699
+ def copy(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1700
+ """Copy the file at `source_url` to `destination_url` in the same
1701
+ storage endpoint.
1702
+
1703
+ Parameters
1704
+ ----------
1705
+ source_url : `str`
1706
+ URL of the source file.
1707
+ destination_url : `str`
1708
+ URL of the destination file. Its parent directory must exist.
1709
+ overwrite : `bool`
1710
+ If True and a file exists at `destination_url` it will be
1711
+ overwritten. Otherwise an exception is raised.
1712
+ """
1713
+ # Check the source is a file
1714
+ if self.stat(source_url).is_dir:
1715
+ raise NotImplementedError(f"copy is not implemented for directory {source_url}")
1716
+
1717
+ # Neither dCache nor XrootD currently implement the COPY
1718
+ # webDAV method as documented in
1719
+ #
1720
+ # http://www.webdav.org/specs/rfc4918.html#METHOD_COPY
1721
+ #
1722
+ # (See issues DM-37603 and DM-37651 for details)
1723
+ # With those servers use third-party copy instead.
1724
+ return self._copy_via_third_party(source_url, destination_url, overwrite)
1725
+
1726
+ def _copy_via_third_party(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1727
+ """Copy the file at `source_url` to `destination_url` in the same
1728
+ storage endpoint using the third-party copy functionality
1729
+ implemented by dCache and XRootD servers.
1730
+
1731
+ Parameters
1732
+ ----------
1733
+ source_url : `str`
1734
+ URL of the source file.
1735
+ destination_url : `str`
1736
+ URL of the destination file. Its parent directory must exist.
1737
+ overwrite : `bool`
1738
+ If True and a file exists at `destination_url` it will be
1739
+ overwritten. Otherwise an exception is raised.
1740
+ """
1741
+ # To implement COPY we use dCache's third-party copy mechanism
1742
+ # documented at:
1743
+ #
1744
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#third-party-transfers
1745
+ #
1746
+ # The reason is that dCache does not correctly implement webDAV's COPY
1747
+ # method. See https://github.com/dCache/dcache/issues/6950
1748
+
1749
+ # Retrieve a macaroon for downloading the source
1750
+ download_macaroon = self._get_macaroon(source_url, ActivityCaveat.DOWNLOAD, 300)
1751
+
1752
+ # Prepare and send the COPY request
1753
+ try:
1754
+ headers = {
1755
+ "Source": source_url,
1756
+ "TransferHeaderAuthorization": f"Bearer {download_macaroon}",
1757
+ "Credential": "none",
1758
+ "Depth": "0",
1759
+ "Overwrite": "T" if overwrite else "F",
1760
+ "RequireChecksumVerification": "false",
1761
+ }
1762
+ resp = self._request("COPY", destination_url, headers=headers, preload_content=False)
1763
+ if resp.status == HTTPStatus.CREATED:
1764
+ return
1765
+
1766
+ if resp.status != HTTPStatus.ACCEPTED:
1767
+ raise ValueError(
1768
+ f"Unable to copy resource {resp.geturl()}; status: {resp.status} {resp.reason}"
1769
+ )
1770
+
1771
+ content_type = resp.headers.get("Content-Type")
1772
+ if content_type != "text/perf-marker-stream":
1773
+ raise ValueError(
1774
+ f"""Unexpected Content-Type {content_type} in response to COPY request from """
1775
+ f"""{source_url} to {destination_url}"""
1776
+ )
1777
+
1778
+ # Read the performance markers in the response body.
1779
+ # Documentation:
1780
+ # https://dcache.org/manuals/UserGuide-10.2/webdav.shtml#third-party-transfers
1781
+ for marker in io.TextIOWrapper(resp): # type: ignore
1782
+ marker = marker.rstrip("\n")
1783
+ if marker == "": # EOF
1784
+ raise ValueError(
1785
+ f"""Copying file from {source_url} to {destination_url} failed: """
1786
+ """could not get response from server"""
1787
+ )
1788
+ elif marker.startswith("failure:"):
1789
+ raise ValueError(
1790
+ f"""Copying file from {source_url} to {destination_url} failed with error: """
1791
+ f"""{marker}"""
1792
+ )
1793
+ elif marker.startswith("success:"):
1794
+ return
1795
+ finally:
1796
+ resp.drain_conn()
1797
+
1798
+
1799
+ class DavClientDCache(DavClientURLSigner):
1800
+ """Client for interacting with a dCache webDAV server.
1801
+
1802
+ Instances of this class are thread-safe.
1803
+
1804
+ Parameters
1805
+ ----------
1806
+ url : `str`
1807
+ Root URL of the storage endpoint
1808
+ (e.g. "https://host.example.org:1234/").
1809
+ config : `DavConfig`
1810
+ Configuration to initialize this client.
1811
+ accepts_ranges : `bool` | `None`
1812
+ Indicate whether the remote server accepts the ``Range`` header in GET
1813
+ requests.
1814
+ """
1815
+
1816
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
1817
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
1818
+
1819
+ def _propfind(self, url: str, body: str | None = None, depth: str = "0") -> HTTPResponse:
1820
+ """Send a HTTP PROPFIND request and return the response.
1821
+
1822
+ Parameters
1823
+ ----------
1824
+ url : `str`
1825
+ Target URL.
1826
+ body : `str`, optional
1827
+ Request body.
1828
+ """
1829
+ if body is None:
1830
+ # Request only the DAV live properties we are explicitly interested
1831
+ # in namely 'resourcetype', 'getcontentlength', 'getlastmodified'
1832
+ # and 'displayname'. In addition, request dCache-specific
1833
+ # checksums.
1834
+ body = (
1835
+ """<?xml version="1.0" encoding="utf-8"?>"""
1836
+ """<D:propfind xmlns:D="DAV:" xmlns:dcache="http://www.dcache.org/2013/webdav"><D:prop>"""
1837
+ """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>"""
1838
+ """<dcache:Checksums/>"""
1839
+ """</D:prop></D:propfind>"""
1840
+ )
1841
+
1842
+ return super()._propfind(url=url, body=body, depth=depth)
1843
+
1844
+ def _get(
1845
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
1846
+ ) -> HTTPResponse:
1847
+ """Send a HTTP GET request to a dCache webDAV server.
1848
+
1849
+ Parameters
1850
+ ----------
1851
+ url : `str`
1852
+ Target URL.
1853
+ headers : `dict[str, str]`, optional
1854
+ Headers to sent with the request.
1855
+ preload_content : `bool`, optional
1856
+ If True, the response body is downloaded and can be retrieved
1857
+ via the returned response `.data` property. If False, the
1858
+ caller needs to call the `.read()` on the returned response
1859
+ object to download the body.
1860
+
1861
+ Returns
1862
+ -------
1863
+ resp: `HTTPResponse`
1864
+ Response to the GET request as received from the server.
1865
+ """
1866
+ # Send the GET request to the frontend servers. We handle
1867
+ # redirections ourselves.
1868
+ headers = {} if headers is None else dict(headers)
1869
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
1870
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
1871
+ return resp
1872
+
1873
+ if resp.status == HTTPStatus.NOT_FOUND:
1874
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
1875
+
1876
+ redirect_location = resp.get_redirect_location()
1877
+ if redirect_location is None or redirect_location is False:
1878
+ raise ValueError(
1879
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
1880
+ )
1881
+
1882
+ # We were redirected to a backend server so follow the redirection.
1883
+ # The response body will be automatically downloaded when
1884
+ # `preload_content` is true and the underlying network connection
1885
+ # may be kept open for future reuse if the maximum number of
1886
+ # connections for the backend pool is not reached.
1887
+ try:
1888
+ # Explicitly ask the backend server to close the connection after
1889
+ # serving this request.
1890
+ if preload_content:
1891
+ headers.update({"Connection": "close"})
1892
+
1893
+ url = redirect_location
1894
+ resp = self._request(
1895
+ "GET",
1896
+ url,
1897
+ headers=headers,
1898
+ pool_manager=self._backend,
1899
+ preload_content=preload_content,
1900
+ )
1901
+
1902
+ # Mark this connection so that it won't be be automatically
1903
+ # returned to the reusable connection pool. We will close it
1904
+ # ourselves if appropriate.
1905
+ if preload_content:
1906
+ resp.auto_close = False
1907
+
1908
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
1909
+ raise ValueError(
1910
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
1911
+ )
1912
+
1913
+ # The caller will access the `resp.data` property or use
1914
+ # the `resp.read()` method to read the contents of the
1915
+ # response body. If `preload_content` argument is True, the
1916
+ # response body is already downloaded, otherwise `resp.read()`
1917
+ # will download it.
1918
+ return resp
1919
+ finally:
1920
+ # Don't keep this connection to the backend server open. Given
1921
+ # that dCache pools may be configured to serve requests over a
1922
+ # range of ports, it is unlikely we will reuse this particular
1923
+ # connection again in the short term.
1924
+ if preload_content:
1925
+ resp.close()
1926
+
1927
+ def _put(
1928
+ self,
1929
+ url: str,
1930
+ data: BinaryIO | bytes,
1931
+ ) -> None:
1932
+ """Send a HTTP PUT request to a dCache webDAV server.
1933
+
1934
+ Parameters
1935
+ ----------
1936
+ url : `str`
1937
+ Target URL.
1938
+ data : `BinaryIO` or `bytes`
1939
+ Request body.
1940
+ """
1941
+ # Send a PUT request with empty body to the dCache frontend server to
1942
+ # get redirected to the backend.
1943
+ #
1944
+ # Details:
1945
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#redirection
1946
+ #
1947
+ # Note that we use the backend pool manager for PUT requests, since
1948
+ # the dCache webDAV door closes the connection when redirecting a
1949
+ # PUT request to the backend.
1950
+ #
1951
+ # We want to reuse the connections to the door as much as possible so
1952
+ # that metadata operations are faster; all metadata operations use the
1953
+ # frontend pool manager.
1954
+ headers = {"Content-Length": "0", "Expect": "100-continue"}
1955
+ resp = self._request("PUT", url, headers=headers, redirect=False, pool_manager=self._backend)
1956
+ if redirect_location := resp.get_redirect_location():
1957
+ url = redirect_location
1958
+ elif resp.status not in (
1959
+ HTTPStatus.OK,
1960
+ HTTPStatus.CREATED,
1961
+ HTTPStatus.NO_CONTENT,
1962
+ ):
1963
+ raise ValueError(
1964
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
1965
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
1966
+ )
1967
+
1968
+ # We were redirected to a backend server. Upload the file contents to
1969
+ # its final destination. Explicitly ask the server to close this
1970
+ # network connection after serving this PUT request to release
1971
+ # the associated dCache mover.
1972
+
1973
+ # Ask dCache to compute and record a checksum of the uploaded
1974
+ # file contents, for later integrity checks. Since we don't compute
1975
+ # the digest ourselves while uploading the data, we cannot control
1976
+ # after the request is complete that the data we uploaded is
1977
+ # identical to the data recorded by the server, but at least the
1978
+ # server has recorded a digest of the data it stored.
1979
+ #
1980
+ # See RFC-3230 for details and
1981
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
1982
+ # for the list of supported digest algorithhms.
1983
+ headers = {"Connection": "close"}
1984
+ if (checksum := self._config.request_checksum) is not None:
1985
+ headers.update({"Want-Digest": checksum})
1986
+
1987
+ try:
1988
+ resp = self._request(
1989
+ "PUT",
1990
+ url,
1991
+ body=data,
1992
+ headers=headers,
1993
+ pool_manager=self._backend,
1994
+ # Don't consume the response body, so that we can explicitly
1995
+ # close the connection.
1996
+ preload_content=False,
1997
+ )
1998
+
1999
+ # Disable automatically returning the connection to the pool
2000
+ # to be reused later on, since we want that connection to be
2001
+ # closed. By default, when preload_content is True, the network
2002
+ # connection is returned to the connection pool once the response
2003
+ # body is completely consumed. Once this happens, we don't have a
2004
+ # mecanism to force closing the connection.
2005
+ resp.auto_close = False
2006
+
2007
+ if resp.status not in (
2008
+ HTTPStatus.OK,
2009
+ HTTPStatus.CREATED,
2010
+ HTTPStatus.NO_CONTENT,
2011
+ ):
2012
+ raise ValueError(
2013
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2014
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
2015
+ )
2016
+
2017
+ finally:
2018
+ # Explicitly close this connection to the dCache backend server.
2019
+ resp.close()
2020
+
2021
+ def download(self, url: str, filename: str, chunk_size: int, close_connection: bool = True) -> int:
2022
+ # Close the connection to the backend servers after downloading
2023
+ # the entire file content.
2024
+ return super().download(
2025
+ url=url, filename=filename, chunk_size=chunk_size, close_connection=close_connection
2026
+ )
2027
+
2028
+
2029
+ class DavClientXrootD(DavClientURLSigner):
2030
+ """Client for interacting with a XrootD webDAV server.
2031
+
2032
+ Instances of this class are thread-safe.
2033
+
2034
+ Parameters
2035
+ ----------
2036
+ url : `str`
2037
+ Root URL of the storage endpoint
2038
+ (e.g. "https://host.example.org:1234/").
2039
+ config : `DavConfig`
2040
+ Configuration to initialize this client.
2041
+ accepts_ranges : `bool` | `None`
2042
+ Indicate whether the remote server accepts the ``Range`` header in GET
2043
+ requests.
2044
+ """
2045
+
2046
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
2047
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
2048
+
2049
+ def _get(
2050
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
2051
+ ) -> HTTPResponse:
2052
+ """Send a HTTP GET request to a XrootD webDAV server.
2053
+
2054
+ Parameters
2055
+ ----------
2056
+ url : `str`
2057
+ Target URL.
2058
+ headers : `dict[str, str]`, optional
2059
+ Headers to sent with the request.
2060
+ preload_content : `bool`, optional
2061
+ If True, the response body is downloaded and can be retrieved
2062
+ via the returned response `.data` property. If False, the
2063
+ caller needs to call the `.read()` on the returned response
2064
+ object to download the body.
2065
+
2066
+ Returns
2067
+ -------
2068
+ resp: `HTTPResponse`
2069
+ Response to the GET request as received from the server.
2070
+ """
2071
+ # Send the GET request to the frontend servers and follow redirection.
2072
+ headers = {} if headers is None else dict(headers)
2073
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
2074
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
2075
+ return resp
2076
+
2077
+ if resp.status == HTTPStatus.NOT_FOUND:
2078
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
2079
+
2080
+ redirect_location = resp.get_redirect_location()
2081
+ if redirect_location is None or redirect_location is False:
2082
+ raise ValueError(
2083
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
2084
+ )
2085
+
2086
+ # We were redirected to a backend server so follow the redirection.
2087
+ # The response body will be automatically downloaded when
2088
+ # `preload_content` is true and the underlying network connection
2089
+ # may be kept open for future reuse if the maximum number of
2090
+ # connections for the backend pool is not reached.
2091
+ #
2092
+ # For XRootD endpoints, we always use the same pool manager, namely
2093
+ # the frontend pool manager, to increase the chance of reusing
2094
+ # network connections.
2095
+ url = redirect_location
2096
+ resp = self._request(
2097
+ "GET",
2098
+ url,
2099
+ headers=headers,
2100
+ pool_manager=self._frontend,
2101
+ preload_content=preload_content,
2102
+ )
2103
+
2104
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
2105
+ resp.close()
2106
+ raise ValueError(
2107
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
2108
+ )
2109
+
2110
+ # The caller will access the `resp.data` property or use
2111
+ # the `resp.read()` method to read the contents of the
2112
+ # response body. If `preload_content` argument is True, the
2113
+ # response body is already downloaded, otherwise `resp.read()`
2114
+ # will download it.
2115
+ return resp
2116
+
2117
+ def _put(
2118
+ self,
2119
+ url: str,
2120
+ data: BinaryIO | bytes,
2121
+ ) -> None:
2122
+ """Send a HTTP PUT request to a dCache webDAV server.
2123
+
2124
+ Parameters
2125
+ ----------
2126
+ url : `str`
2127
+ Target URL.
2128
+ data : `BinaryIO` or `bytes`
2129
+ Request body.
2130
+ """
2131
+ # Send a PUT request with empty body to the XRootD frontend server to
2132
+ # get redirected to the backend.
2133
+ headers = {"Content-Length": "0", "Expect": "100-continue"}
2134
+ for attempt in range(max_attempts := 3):
2135
+ resp = self._request("PUT", url, headers=headers, redirect=False)
2136
+ if redirect_location := resp.get_redirect_location():
2137
+ url = redirect_location
2138
+ break
2139
+ elif resp.status == HTTPStatus.LOCKED:
2140
+ # Sometimes XRootD servers respond with status code LOCKED and
2141
+ # response body of the form:
2142
+ #
2143
+ # "Output file /path/to/file is already opened by 1 writer;
2144
+ # open denied."
2145
+ #
2146
+ # If we get such a response, try again, unless we reached
2147
+ # the maximum number of attempts.
2148
+ if attempt == max_attempts - 1:
2149
+ raise ValueError(
2150
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2151
+ f"""{resp.reason} [{resp.data.decode("utf-8")}] after {max_attempts} attempts"""
2152
+ )
2153
+
2154
+ # Wait a bit and try again
2155
+ log.warning(
2156
+ f"""got unexpected response status {HTTPStatus.LOCKED} Locked for {url} """
2157
+ f"""(attempt {attempt}/{max_attempts}), retrying..."""
2158
+ )
2159
+ time.sleep((attempt + 1) * 0.100)
2160
+ continue
2161
+ elif resp.status not in (
2162
+ HTTPStatus.OK,
2163
+ HTTPStatus.CREATED,
2164
+ HTTPStatus.NO_CONTENT,
2165
+ ):
2166
+ raise ValueError(
2167
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2168
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
2169
+ )
2170
+
2171
+ # We were redirected to a backend server. Upload the file contents to
2172
+ # its final destination.
2173
+
2174
+ # XRootD backend servers typically use a single port number for
2175
+ # accepting connections from clients. It is therefore beneficial
2176
+ # to keep those connections open, if the server allows.
2177
+
2178
+ # Ask the server to compute and record a checksum of the uploaded
2179
+ # file contents, for later integrity checks. Since we don't compute
2180
+ # the digest ourselves while uploading the data, we cannot control
2181
+ # after the request is complete that the data we uploaded is
2182
+ # identical to the data recorded by the server, but at least the
2183
+ # server has recorded a digest of the data it stored.
2184
+ #
2185
+ # See RFC-3230 for details and
2186
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
2187
+ # for the list of supported digest algorithhms.
2188
+ #
2189
+ # In addition, note that not all servers implement this RFC so
2190
+ # the checksum reqquest may be ignored by the server.
2191
+ headers = {}
2192
+ if (checksum := self._config.request_checksum) is not None:
2193
+ headers = {"Want-Digest": checksum}
2194
+
2195
+ # For XRootD endpoints, we always use the same pool manager, namely
2196
+ # the frontend pool manager, to increase the chance of reusing
2197
+ # network connections.
2198
+ resp = self._request(
2199
+ "PUT",
2200
+ url,
2201
+ body=data,
2202
+ headers=headers,
2203
+ pool_manager=self._frontend,
2204
+ )
2205
+
2206
+ if resp.status not in (
2207
+ HTTPStatus.OK,
2208
+ HTTPStatus.CREATED,
2209
+ HTTPStatus.NO_CONTENT,
2210
+ ):
2211
+ raise ValueError(
2212
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2213
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
2214
+ )
2215
+
2216
+ def info(self, url: str, name: str | None = None) -> dict[str, Any]:
2217
+ # XRootD does not include checksums in the response to PROPFIND
2218
+ # requqest. We need to send a specific HEAD request to retrieve
2219
+ # the ADLER32 checksum.
2220
+ #
2221
+ # If found, the checksum is included in the response header "Digest",
2222
+ # which is of the form:
2223
+ #
2224
+ # Digest: adler32=0e4709f2
2225
+ result = super().info(url, name)
2226
+ if result["type"] == "file":
2227
+ headers: dict[str, str] = {"Want-Digest": "adler32"}
2228
+ resp = self._head(url=url, headers=headers)
2229
+ if (digest := resp.headers.get("Digest")) is not None:
2230
+ value = digest.split("=")[1]
2231
+ result["checksums"].update({"adler32": value})
2232
+
2233
+ return result
2234
+
2235
+
2236
+ class DavFileMetadata:
2237
+ """Container for attributes of interest of a webDAV file or directory.
2238
+
2239
+ Parameters
2240
+ ----------
2241
+ base_url : `str`
2242
+ Base URL.
2243
+ href : `str`, optional
2244
+ Path component that can be added to the base URL.
2245
+ name : `str`, optional
2246
+ Name.
2247
+ exists : `bool`, optional
2248
+ Whether file or directory exist.
2249
+ size : `int`, optional
2250
+ Size of file.
2251
+ is_dir : `bool`, optional
2252
+ Whether the URL points to a directory or file.
2253
+ last_modified : `bool`, optional
2254
+ Last modified date.
2255
+ checksums : `dict` [ `str`, `str` ] | `None`, optional
2256
+ Checksums.
2257
+ """
2258
+
2259
+ def __init__(
2260
+ self,
2261
+ base_url: str,
2262
+ href: str = "",
2263
+ name: str = "",
2264
+ exists: bool = False,
2265
+ size: int = -1,
2266
+ is_dir: bool = False,
2267
+ last_modified: datetime = datetime.min,
2268
+ checksums: dict[str, str] | None = None,
2269
+ ):
2270
+ self._url: str = base_url if not href else base_url.rstrip("/") + href
2271
+ self._href: str = href
2272
+ self._name: str = name
2273
+ self._exists: bool = exists
2274
+ self._size: int = size
2275
+ self._is_dir: bool = is_dir
2276
+ self._last_modified: datetime = last_modified
2277
+ self._checksums: dict[str, str] = {} if checksums is None else dict(checksums)
2278
+
2279
+ @staticmethod
2280
+ def from_property(base_url: str, property: DavProperty) -> DavFileMetadata:
2281
+ """Create an instance from the values in `property`.
2282
+
2283
+ Parameters
2284
+ ----------
2285
+ base_url : `str`
2286
+ Base URL.
2287
+ property : `DavProperty`
2288
+ Properties to associate with URL.
2289
+ """
2290
+ return DavFileMetadata(
2291
+ base_url=base_url,
2292
+ href=property.href,
2293
+ name=property.name,
2294
+ exists=property.exists,
2295
+ size=property.size,
2296
+ is_dir=property.is_dir,
2297
+ last_modified=property.last_modified,
2298
+ checksums=dict(property.checksums),
2299
+ )
2300
+
2301
+ def __str__(self) -> str:
2302
+ return (
2303
+ f"""{self._url} {self._href} {self._name} {self._exists} {self._size} {self._is_dir} """
2304
+ f"""{self._checksums}"""
2305
+ )
2306
+
2307
+ @property
2308
+ def url(self) -> str:
2309
+ return self._url
2310
+
2311
+ @property
2312
+ def href(self) -> str:
2313
+ return self._href
2314
+
2315
+ @property
2316
+ def name(self) -> str:
2317
+ return self._name
2318
+
2319
+ @property
2320
+ def exists(self) -> bool:
2321
+ return self._exists
2322
+
2323
+ @property
2324
+ def size(self) -> int:
2325
+ if not self._exists:
2326
+ return -1
2327
+
2328
+ return 0 if self._is_dir else self._size
2329
+
2330
+ @property
2331
+ def is_dir(self) -> bool:
2332
+ return self._exists and self._is_dir
2333
+
2334
+ @property
2335
+ def is_file(self) -> bool:
2336
+ return self._exists and not self._is_dir
2337
+
2338
+ @property
2339
+ def last_modified(self) -> datetime:
2340
+ return self._last_modified
2341
+
2342
+ @property
2343
+ def checksums(self) -> dict[str, str]:
2344
+ return self._checksums
2345
+
2346
+
2347
+ class DavProperty:
2348
+ """Helper class to encapsulate select live DAV properties of a single
2349
+ resource, as retrieved via a PROPFIND request.
2350
+
2351
+ Parameters
2352
+ ----------
2353
+ response : `eTree.Element` or `None`
2354
+ The XML response defining the DAV property.
2355
+ """
2356
+
2357
+ # Regular expression to compare against the 'status' element of a
2358
+ # PROPFIND response's 'propstat' element.
2359
+ _status_ok_rex = re.compile(r"^HTTP/.* 200 .*$", re.IGNORECASE)
2360
+
2361
+ def __init__(self, response: eTree.Element | None):
2362
+ self._href: str = ""
2363
+ self._displayname: str = ""
2364
+ self._collection: bool = False
2365
+ self._getlastmodified: str = ""
2366
+ self._getcontentlength: int = -1
2367
+ self._checksums: dict[str, str] = {}
2368
+
2369
+ if response is not None:
2370
+ self._parse(response)
2371
+
2372
+ def _parse(self, response: eTree.Element) -> None:
2373
+ # Extract 'href'.
2374
+ if (element := response.find("./{DAV:}href")) is not None:
2375
+ # We need to use "str(element.text)"" instead of "element.text" to
2376
+ # keep mypy happy.
2377
+ self._href = str(element.text).strip()
2378
+ else:
2379
+ raise ValueError(
2380
+ "Property 'href' expected but not found in PROPFIND response: "
2381
+ f"{eTree.tostring(response, encoding='unicode')}"
2382
+ )
2383
+
2384
+ for propstat in response.findall("./{DAV:}propstat"):
2385
+ # Only extract properties of interest with status OK.
2386
+ status = propstat.find("./{DAV:}status")
2387
+ if status is None or not self._status_ok_rex.match(str(status.text)):
2388
+ continue
2389
+
2390
+ for prop in propstat.findall("./{DAV:}prop"):
2391
+ # Parse "collection".
2392
+ if (element := prop.find("./{DAV:}resourcetype/{DAV:}collection")) is not None:
2393
+ self._collection = True
2394
+
2395
+ # Parse "getlastmodified".
2396
+ if (element := prop.find("./{DAV:}getlastmodified")) is not None:
2397
+ self._getlastmodified = str(element.text)
2398
+
2399
+ # Parse "getcontentlength".
2400
+ if (element := prop.find("./{DAV:}getcontentlength")) is not None:
2401
+ self._getcontentlength = int(str(element.text))
2402
+
2403
+ # Parse "displayname".
2404
+ if (element := prop.find("./{DAV:}displayname")) is not None:
2405
+ self._displayname = str(element.text)
2406
+
2407
+ # Parse "Checksums"
2408
+ if (element := prop.find("./{http://www.dcache.org/2013/webdav}Checksums")) is not None:
2409
+ self._checksums = self._parse_checksums(element.text)
2410
+
2411
+ # Some webDAV servers don't include the 'displayname' property in the
2412
+ # response so try to infer it from the value of the 'href' property.
2413
+ # Depending on the server the href value may end with '/'.
2414
+ if not self._displayname:
2415
+ self._displayname = os.path.basename(self._href.rstrip("/"))
2416
+
2417
+ # Some webDAV servers do not append a "/" to the href of directories.
2418
+ # Ensure we include a single final "/" in our response.
2419
+ if self._collection:
2420
+ self._href = self._href.rstrip("/") + "/"
2421
+
2422
+ # Force a size of 0 for collections.
2423
+ if self._collection:
2424
+ self._getcontentlength = 0
2425
+
2426
+ def _parse_checksums(self, checksums: str | None) -> dict[str, str]:
2427
+ # checksums argument is of the form
2428
+ # md5=MyS/wljSzI9WYiyrsuyoxw==,adler32=23b104f2
2429
+ result: dict[str, str] = {}
2430
+ if checksums is not None:
2431
+ for checksum in checksums.split(","):
2432
+ if (pos := checksum.find("=")) != -1:
2433
+ algorithm, value = checksum[:pos].lower(), checksum[pos + 1 :]
2434
+ if algorithm == "md5":
2435
+ # dCache documentation about how it encodes the
2436
+ # MD5 checksum:
2437
+ #
2438
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#checksums
2439
+ result[algorithm] = bytes.hex(base64.standard_b64decode(value))
2440
+ else:
2441
+ result[algorithm] = value
2442
+
2443
+ return result
2444
+
2445
+ @property
2446
+ def exists(self) -> bool:
2447
+ # It is either a directory or a file with length of at least zero
2448
+ return self._collection or self._getcontentlength >= 0
2449
+
2450
+ @property
2451
+ def is_dir(self) -> bool:
2452
+ return self._collection
2453
+
2454
+ @property
2455
+ def is_file(self) -> bool:
2456
+ return not self._collection
2457
+
2458
+ @property
2459
+ def last_modified(self) -> datetime:
2460
+ if not self._getlastmodified:
2461
+ return datetime.min
2462
+
2463
+ # Last modified timestamp is of the form:
2464
+ # 'Wed, 12 Mar 2025 10:11:13 GMT'
2465
+ return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z")
2466
+
2467
+ @property
2468
+ def size(self) -> int:
2469
+ return self._getcontentlength
2470
+
2471
+ @property
2472
+ def name(self) -> str:
2473
+ return self._displayname
2474
+
2475
+ @property
2476
+ def href(self) -> str:
2477
+ return self._href
2478
+
2479
+ @property
2480
+ def checksums(self) -> dict[str, str]:
2481
+ return self._checksums
2482
+
2483
+
2484
+ class DavPropfindParser:
2485
+ """Helper class to parse the response body of a PROPFIND request."""
2486
+
2487
+ def __init__(self) -> None:
2488
+ return
2489
+
2490
+ def parse(self, body: bytes) -> list[DavProperty]:
2491
+ """Parse the XML-encoded contents of the response body to a webDAV
2492
+ PROPFIND request.
2493
+
2494
+ Parameters
2495
+ ----------
2496
+ body : `bytes`
2497
+ XML-encoded response body to a PROPFIND request.
2498
+
2499
+ Returns
2500
+ -------
2501
+ responses : `list` [ `DavProperty` ]
2502
+ Parsed content of the response.
2503
+
2504
+ Notes
2505
+ -----
2506
+ Is is expected that there is at least one reponse in `body`, otherwise
2507
+ this function raises.
2508
+ """
2509
+ # A response body to a PROPFIND request is of the form (indented for
2510
+ # readability):
2511
+ #
2512
+ # <?xml version="1.0" encoding="UTF-8"?>
2513
+ # <D:multistatus xmlns:D="DAV:">
2514
+ # <D:response>
2515
+ # <D:href>path/to/resource</D:href>
2516
+ # <D:propstat>
2517
+ # <D:prop>
2518
+ # <D:resourcetype>
2519
+ # <D:collection xmlns:D="DAV:"/>
2520
+ # </D:resourcetype>
2521
+ # <D:getlastmodified>
2522
+ # Fri, 27 Jan 2 023 13:59:01 GMT
2523
+ # </D:getlastmodified>
2524
+ # <D:getcontentlength>
2525
+ # 12345
2526
+ # </D:getcontentlength>
2527
+ # </D:prop>
2528
+ # <D:status>
2529
+ # HTTP/1.1 200 OK
2530
+ # </D:status>
2531
+ # </D:propstat>
2532
+ # </D:response>
2533
+ # <D:response>
2534
+ # ...
2535
+ # </D:response>
2536
+ # <D:response>
2537
+ # ...
2538
+ # </D:response>
2539
+ # </D:multistatus>
2540
+
2541
+ # Scan all the 'response' elements and extract the relevant properties
2542
+ decoded_body: str = body.decode("utf-8").strip()
2543
+ responses = []
2544
+ multistatus = eTree.fromstring(decoded_body)
2545
+ for response in multistatus.findall("./{DAV:}response"):
2546
+ responses.append(DavProperty(response))
2547
+
2548
+ if responses:
2549
+ return responses
2550
+ else:
2551
+ # Could not parse the body
2552
+ raise ValueError(f"Unable to parse response for PROPFIND request: {decoded_body}")
2553
+
2554
+
2555
+ class TokenAuthorizer:
2556
+ """Attach a bearer token 'Authorization' header to each request.
2557
+
2558
+ Parameters
2559
+ ----------
2560
+ token : `str`
2561
+ Can be either the path to a local file which contains the
2562
+ value of the token or the token itself. If `token` is a file
2563
+ it must be protected so that only the owner can read and write it.
2564
+ """
2565
+
2566
+ def __init__(self, token: str | None = None) -> None:
2567
+ self._token = self._path = None
2568
+ self._mtime: float = -1.0
2569
+ if token is None:
2570
+ return
2571
+
2572
+ self._token = token
2573
+ if os.path.isfile(token):
2574
+ self._path = os.path.abspath(token)
2575
+ if not self._is_protected(self._path):
2576
+ raise PermissionError(
2577
+ f"""Authorization token file at {self._path} must be protected for access only """
2578
+ """by its owner"""
2579
+ )
2580
+ self._refresh()
2581
+
2582
+ def _refresh(self) -> None:
2583
+ """Read the token file (if any) if its modification time is more recent
2584
+ than the last time we read it.
2585
+ """
2586
+ if self._path is None:
2587
+ return
2588
+
2589
+ if (mtime := os.stat(self._path).st_mtime) > self._mtime:
2590
+ log.debug("Reading authorization token from file %s", self._path)
2591
+ self._mtime = mtime
2592
+ with open(self._path) as f:
2593
+ self._token = f.read().rstrip("\n")
2594
+
2595
+ def _is_protected(self, filepath: str) -> bool:
2596
+ """Return true if the permissions of file at filepath only allow for
2597
+ access by its owner.
2598
+
2599
+ Parameters
2600
+ ----------
2601
+ filepath : `str`
2602
+ Path of a local file.
2603
+ """
2604
+ if not os.path.isfile(filepath):
2605
+ return False
2606
+
2607
+ mode = stat.S_IMODE(os.stat(filepath).st_mode)
2608
+ owner_accessible = bool(mode & stat.S_IRWXU)
2609
+ group_accessible = bool(mode & stat.S_IRWXG)
2610
+ other_accessible = bool(mode & stat.S_IRWXO)
2611
+ return owner_accessible and not group_accessible and not other_accessible
2612
+
2613
+ def set_authorization(self, headers: dict[str, str]) -> None:
2614
+ """Add the 'Authorization' header to `headers`.
2615
+
2616
+ Parameters
2617
+ ----------
2618
+ headers : `dict` [ `str`, `str` ]
2619
+ Dict to augment with authorization information.
2620
+ """
2621
+ if self._token is None:
2622
+ return
2623
+
2624
+ self._refresh()
2625
+ headers["Authorization"] = f"Bearer {self._token}"
2626
+
2627
+
2628
+ def expand_vars(path: str | None) -> str | None:
2629
+ """Expand the environment variables in `path` and return the path with
2630
+ the value of the variable expanded.
2631
+
2632
+ Parameters
2633
+ ----------
2634
+ path : `str` or `None`
2635
+ Abolute or relative path which may include an environment variable
2636
+ (e.g. '$HOME/path/to/my/file').
2637
+
2638
+ Returns
2639
+ -------
2640
+ path: `str`
2641
+ The path with the values of the environment variables expanded.
2642
+ """
2643
+ return None if path is None else os.path.expandvars(path)
2644
+
2645
+
2646
+ def dump_response(method: str, resp: HTTPResponse) -> None:
2647
+ """Dump response for debugging purposes.
2648
+
2649
+ Parameters
2650
+ ----------
2651
+ method : `str`
2652
+ Method name to include in log output.
2653
+ resp : `HTTPResponse`
2654
+ Response to dump.
2655
+ """
2656
+ log.debug("%s %s", method, resp.geturl())
2657
+ for header, value in resp.headers.items():
2658
+ log.debug(" %s: %s", header, value)
2659
+ log.debug(" response body length: %d", len(resp.data.decode("utf-8")))