lsst-resources 29.2025.1900__py3-none-any.whl → 29.2025.2000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2584 @@
1
+ # This file is part of lsst-resources.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # Use of this source code is governed by a 3-clause BSD-style
10
+ # license that can be found in the LICENSE file.
11
+
12
+ from __future__ import annotations
13
+
14
+ import base64
15
+ import enum
16
+ import io
17
+ import json
18
+ import logging
19
+ import os
20
+ import posixpath
21
+ import random
22
+ import re
23
+ import stat
24
+ import threading
25
+ import time
26
+ import xml.etree.ElementTree as eTree
27
+ from datetime import datetime
28
+ from http import HTTPStatus
29
+ from typing import Any, BinaryIO
30
+
31
+ try:
32
+ import fsspec
33
+ from fsspec.spec import AbstractFileSystem
34
+ except ImportError:
35
+ fsspec = None
36
+ AbstractFileSystem = type
37
+
38
+ import yaml
39
+ from astropy import units as u
40
+ from urllib3 import PoolManager
41
+ from urllib3.response import HTTPResponse
42
+ from urllib3.util import Retry, Timeout, Url, parse_url
43
+
44
+ from lsst.utils.timer import time_this
45
+
46
+ # Use the same logger than `dav.py`.
47
+ log = logging.getLogger(f"""{__name__.replace(".davutils", ".dav")}""")
48
+
49
+
50
+ def normalize_path(path: str | None) -> str:
51
+ """Normalize a path intended to be part of a URL.
52
+
53
+ A path of the form "///a/b/c///../d/e/" would be normalized as "/a/b/d/e".
54
+ The returned path is always absolute, i.e. starts by "/" and never
55
+ ends by "/" except when the path is exactly "/" and does not contain
56
+ "." nor "..". It does not contain consecutive "/" either.
57
+
58
+ Parameters
59
+ ----------
60
+ path : `str`, optional
61
+ Path to normalize, e.g. '/path/to/..///normalize/'
62
+
63
+ Returns
64
+ -------
65
+ url : `str`
66
+ Normalized URL, e.g. '/path/normalize'
67
+ """
68
+ return "/" if not path else "/" + posixpath.normpath(path).lstrip("/")
69
+
70
+
71
+ def normalize_url(url: str, preserve_scheme: bool = False, preserve_path: bool = True) -> str:
72
+ """Normalize a URL so that scheme be 'http' or 'https' and the URL path
73
+ is normalized.
74
+
75
+ Parameters
76
+ ----------
77
+ url : `str`
78
+ URL to normalize, e.g. 'davs://example.org:1234///path/to//../dir/'
79
+
80
+ preserve_scheme : `bool`
81
+ If True the scheme of `url` will be preserved. Otherwise the scheme
82
+ of the returned normalized URL will be 'http' or 'https'.
83
+
84
+ preserve_path : `bool`
85
+ if True, the path of `url` will be preserved in the returned
86
+ normalized URL, otherwise, the returned URL will have '/' as path.
87
+
88
+ Returns
89
+ -------
90
+ url : `str`
91
+ Normalized URL, e.g. 'https://example.org:1234/path/dir'
92
+ """
93
+ parsed = parse_url(url)
94
+ if parsed.scheme is None:
95
+ scheme = "http"
96
+ else:
97
+ scheme = parsed.scheme if preserve_scheme else parsed.scheme.replace("dav", "http")
98
+ path = normalize_path(parsed.path) if preserve_path else "/"
99
+ return Url(scheme=scheme, host=parsed.host, port=parsed.port, path=path).url
100
+
101
+
102
+ class DavConfig:
103
+ """Configurable settings a webDAV client must use when interacting with a
104
+ particular storage endpoint.
105
+
106
+ Parameters
107
+ ----------
108
+ config : `dict[str, str]`
109
+ Dictionary of configurable settings for the webdav endpoint which
110
+ base URL is `config["base_url"]`.
111
+
112
+ For instance, if `config["base_url"]` is
113
+
114
+ "davs://webdav.example.org:1234/"
115
+
116
+ any object of class `DavResourcePath` like
117
+
118
+ "davs://webdav.example.org:1234/path/to/any/file"
119
+
120
+ will use the settings in this configuration to configure its client.
121
+ """
122
+
123
+ # Timeout in seconds to establish a network connection with the remote
124
+ # server.
125
+ DEFAULT_TIMEOUT_CONNECT: float = 10.0
126
+
127
+ # Timeout in seconds to read the response to a request sent to a server.
128
+ # This is total time for reading both the headers and the response body.
129
+ # It must be large enough to allow for upload and download of files
130
+ # of typical size the webdav client supports.
131
+ DEFAULT_TIMEOUT_READ: float = 300.0
132
+
133
+ # Maximum number of network connections to persist against each one of
134
+ # the hosts in the frontend and backend server pools.
135
+ # Servers in the frontend pool typically respond to requests such as
136
+ # OPTIONS, PROPFIND, MKCOL, etc.
137
+ #
138
+ # Frontend servers redirect to backend servers to respond to GET and PUT
139
+ # requests (e.g. dCache) but sometimes also for metadata requests such as
140
+ # PROPFIND or HEAD (e.g. XRootD).
141
+ DEFAULT_PERSISTENT_CONNECTIONS_FRONTEND: int = 50
142
+ DEFAULT_PERSISTENT_CONNECTIONS_BACKEND: int = 100
143
+
144
+ # Size of the buffer (in mebibytes, i.e. 1024*1024 bytes) the webdav
145
+ # client of this endpoint will use when sending requests and receiving
146
+ # responses.
147
+ DEFAULT_BUFFER_SIZE: int = 5
148
+
149
+ # Number of times to retry requests before failing. Retry happens only
150
+ # under certain conditions.
151
+ DEFAULT_RETRIES: int = 3
152
+
153
+ # Minimal and maximal retry backoff (in seconds) for the client to compute
154
+ # the wait time before retrying a request.
155
+ # A value in this interval is randomly selected as the backoff factor
156
+ # every time a request is retried.
157
+ DEFAULT_RETRY_BACKOFF_MIN: float = 1.0
158
+ DEFAULT_RETRY_BACKOFF_MAX: float = 3.0
159
+
160
+ # Path to a directory or certificate bundle file where the certificates
161
+ # of the trusted certificate authorities can be found.
162
+ # Those certificates will be used by the client of the webdav endpoint
163
+ # to verify the server's host certificate.
164
+ # If None, the certificates trusted by the system are used.
165
+ DEFAULT_TRUSTED_AUTHORITIES: str | None = None
166
+
167
+ # Path to the client certificate and associated private key the webdav
168
+ # client must present to the server for authentication purposes.
169
+ # If None, no client certificate is presented.
170
+ DEFAULT_USER_CERT: str | None = None
171
+ DEFAULT_USER_KEY: str | None = None
172
+
173
+ # Token the webdav client must sent to the server for authentication
174
+ # purposes. The token may be the value of the token itself or the path
175
+ # to a file where the token can be found.
176
+ DEFAULT_TOKEN: str | None = None
177
+
178
+ # Default checksum algorithm to request the server to compute on every
179
+ # file upload. Not al servers support this.
180
+ # See RFC 3230 for details.
181
+ DEFAULT_REQUEST_CHECKSUM: str | None = None
182
+
183
+ # If this option is set to True, the webdav client can return objects
184
+ # compliant to the fsspec specification.
185
+ # See: https://filesystem-spec.readthedocs.io
186
+ DEFAULT_ENABLE_FSSPEC: bool = True
187
+
188
+ # If this option is set to True, memory usage is computed and reported
189
+ # when executing in debug mode. Computing memory usage is costly, so only
190
+ # set this when debugging.
191
+ DEFAULT_COLLECT_MEMORY_USAGE: bool = False
192
+
193
+ # Accepted checksum algorithms. Must be lowercase.
194
+ ACCEPTED_CHECKSUMS: list[str] = ["adler32", "md5", "sha-256", "sha-512"]
195
+
196
+ def __init__(self, config: dict | None = None) -> None:
197
+ if config is None:
198
+ config = {}
199
+
200
+ if (base_url := config.get("base_url")) is None:
201
+ self._base_url = "_default_"
202
+ else:
203
+ self._base_url = normalize_url(base_url, preserve_path=False)
204
+
205
+ self._timeout_connect: float = float(config.get("timeout_connect", DavConfig.DEFAULT_TIMEOUT_CONNECT))
206
+ self._timeout_read: float = float(config.get("timeout_read", DavConfig.DEFAULT_TIMEOUT_READ))
207
+ self._persistent_connections_frontend: int = int(
208
+ config.get(
209
+ "persistent_connections_frontend",
210
+ DavConfig.DEFAULT_PERSISTENT_CONNECTIONS_FRONTEND,
211
+ )
212
+ )
213
+ self._persistent_connections_backend: int = int(
214
+ config.get(
215
+ "persistent_connections_backend",
216
+ DavConfig.DEFAULT_PERSISTENT_CONNECTIONS_BACKEND,
217
+ )
218
+ )
219
+ self._buffer_size: int = 1_048_576 * int(config.get("buffer_size", DavConfig.DEFAULT_BUFFER_SIZE))
220
+ self._retries: int = int(config.get("retries", DavConfig.DEFAULT_RETRIES))
221
+ self._retry_backoff_min: float = float(
222
+ config.get("retry_backoff_min", DavConfig.DEFAULT_RETRY_BACKOFF_MIN)
223
+ )
224
+ self._retry_backoff_max: float = float(
225
+ config.get("retry_backoff_max", DavConfig.DEFAULT_RETRY_BACKOFF_MAX)
226
+ )
227
+ self._trusted_authorities: str | None = expand_vars(
228
+ config.get("trusted_authorities", DavConfig.DEFAULT_TRUSTED_AUTHORITIES)
229
+ )
230
+ self._user_cert: str | None = expand_vars(config.get("user_cert", DavConfig.DEFAULT_USER_CERT))
231
+ self._user_key: str | None = expand_vars(config.get("user_key", DavConfig.DEFAULT_USER_KEY))
232
+ self._token: str | None = expand_vars(config.get("token", DavConfig.DEFAULT_TOKEN))
233
+ self._enable_fsspec: bool = config.get("enable_fsspec", DavConfig.DEFAULT_ENABLE_FSSPEC)
234
+ self._collect_memory_usage: bool = config.get(
235
+ "collect_memory_usage", DavConfig.DEFAULT_COLLECT_MEMORY_USAGE
236
+ )
237
+ self._request_checksum: str | None = config.get(
238
+ "request_checksum", DavConfig.DEFAULT_REQUEST_CHECKSUM
239
+ )
240
+ if self._request_checksum is not None:
241
+ self._request_checksum = self._request_checksum.lower()
242
+ if self._request_checksum not in DavConfig.ACCEPTED_CHECKSUMS:
243
+ raise ValueError(
244
+ f"""Value for checksum algorithm {self._request_checksum} for storage endpoint """
245
+ f"""{self._base_url} is not among the accepted values: {DavConfig.ACCEPTED_CHECKSUMS}"""
246
+ )
247
+
248
+ @property
249
+ def base_url(self) -> str:
250
+ return self._base_url
251
+
252
+ @property
253
+ def timeout_connect(self) -> float:
254
+ return self._timeout_connect
255
+
256
+ @property
257
+ def timeout_read(self) -> float:
258
+ return self._timeout_read
259
+
260
+ @property
261
+ def persistent_connections_frontend(self) -> int:
262
+ return self._persistent_connections_frontend
263
+
264
+ @property
265
+ def persistent_connections_backend(self) -> int:
266
+ return self._persistent_connections_backend
267
+
268
+ @property
269
+ def buffer_size(self) -> int:
270
+ return self._buffer_size
271
+
272
+ @property
273
+ def retries(self) -> int:
274
+ return self._retries
275
+
276
+ @property
277
+ def retry_backoff_min(self) -> float:
278
+ return self._retry_backoff_min
279
+
280
+ @property
281
+ def retry_backoff_max(self) -> float:
282
+ return self._retry_backoff_max
283
+
284
+ @property
285
+ def trusted_authorities(self) -> str | None:
286
+ return self._trusted_authorities
287
+
288
+ @property
289
+ def token(self) -> str | None:
290
+ return self._token
291
+
292
+ @property
293
+ def request_checksum(self) -> str | None:
294
+ return self._request_checksum
295
+
296
+ @property
297
+ def user_cert(self) -> str | None:
298
+ return self._user_cert
299
+
300
+ @property
301
+ def user_key(self) -> str | None:
302
+ # If no user certificate was specified in the configuration,
303
+ # ignore the private key, even if it was provided.
304
+ if self._user_cert is None:
305
+ return None
306
+
307
+ # If we have a user certificate but not a private key, assume the
308
+ # private key is included in the same file as the user certificate.
309
+ # That is typically the case when using a X.509 grid proxy as
310
+ # client certificate.
311
+ return self._user_cert if self._user_key is None else self._user_key
312
+
313
+ @property
314
+ def enable_fsspec(self) -> bool:
315
+ return self._enable_fsspec
316
+
317
+ @property
318
+ def collect_memory_usage(self) -> bool:
319
+ return self._collect_memory_usage
320
+
321
+
322
+ class DavConfigPool:
323
+ """Registry of configurable settings for all known webDAV endpoints.
324
+
325
+ Parameters
326
+ ----------
327
+ filenames : `list[str]`
328
+ List of environment variables or file names to load the configuration
329
+ from. The first file found in the list will be read and the
330
+ configuration settings for all webDAV endpoints will be extracted
331
+ from it. Other files will be ignored.
332
+
333
+ Each component of `filenames` can be an environment variable or
334
+ the path of a file which itself can include an environment variable,
335
+ e.g. '$HOME/path/to/config.yaml'.
336
+
337
+ The configuration file is a YAML file with the structure below:
338
+
339
+ - base_url: "davs://webdav1.example.org:1234/"
340
+ persistent_connections_frontend: 10
341
+ persistent_connections_backend: 100
342
+ timeout_connect: 20.0
343
+ timeout_read: 120.0
344
+ retries: 3
345
+ retry_backoff_min: 1.0
346
+ retry_backoff_max: 3.0
347
+ user_cert: "${X509_USER_PROXY}"
348
+ user_key: "${X509_USER_PROXY}"
349
+ token: "/path/to/bearer/token/file"
350
+ trusted_authorities: "/etc/grid-security/certificates"
351
+ buffer_size: 5
352
+ enable_fsspec: false
353
+ request_checksum: "md5"
354
+ collect_memory_usage: false
355
+
356
+ - base_url: "davs://webdav2.example.org:1234/"
357
+ persistent_connections_frontend: 5
358
+ ...
359
+
360
+ All settings are optional. If no settings are found in the
361
+ configuration file for a particular webDAV endpoint, sensible
362
+ defaults will be used.
363
+
364
+ There is only a single instance of this class. This thead-safe
365
+ singleton is intended to be initialized when the module is imported
366
+ the first time.
367
+ """
368
+
369
+ _instance = None
370
+ _lock = threading.Lock()
371
+
372
+ def __new__(cls, filename: str | None = None) -> DavConfigPool:
373
+ if cls._instance is None:
374
+ with cls._lock:
375
+ if cls._instance is None:
376
+ cls._instance = super().__new__(cls)
377
+
378
+ return cls._instance
379
+
380
+ def __init__(self, filename: str | None = None) -> None:
381
+ # Create a default configuration. This configuration is
382
+ # used when a URL doest not match any of the endpoints in the
383
+ # configuration.
384
+ self._default_config: DavConfig = DavConfig()
385
+
386
+ # The key of this dictionary is the URL of the webDAV endpoint,
387
+ # e.g. "davs://host.example.org:1234/"
388
+ self._configs: dict[str, DavConfig] = {}
389
+
390
+ # Load the configuration from the file we have been provided with,
391
+ # if any.
392
+ if filename is None:
393
+ return
394
+
395
+ # filename can be the name of an environment variable or a path.
396
+ # A path can include environment variables
397
+ # (e.g. "$HOME/path/to/config.yaml") or "~"
398
+ # (e.g. "~/path/to/config.yaml")
399
+ if (filename := os.getenv(filename)) is not None:
400
+ # Expand environment variables and '~' in the file name, if any.
401
+ filename = os.path.expandvars(filename)
402
+ filename = os.path.expanduser(filename)
403
+ with open(filename) as file:
404
+ for config_item in yaml.safe_load(file):
405
+ config = DavConfig(config_item)
406
+ if config.base_url not in self._configs:
407
+ self._configs[config.base_url] = config
408
+ else:
409
+ # We already have a configuration for the same
410
+ # endpoint. That is likely a human error in
411
+ # the configuration file.
412
+ raise ValueError(
413
+ f"""configuration file {filename} contains two configurations for """
414
+ f"""endpoint {config.base_url}"""
415
+ )
416
+
417
+ def get_config_for_url(self, url: str) -> DavConfig:
418
+ """Return the configuration to use a webDAV client when interacting
419
+ whith the server which hosts the resource at `url`.
420
+ """
421
+ # Select the configuration for the endpoint of the provided URL.
422
+ normalized_url: str = normalize_url(url, preserve_path=False)
423
+ if (config := self._configs.get(normalized_url)) is not None:
424
+ return config
425
+
426
+ # No config was found for the specified URL. Use the default.
427
+ return self._default_config
428
+
429
+ def _destroy(self) -> None:
430
+ """Destroy this class singleton instance.
431
+
432
+ Helper method to be used in tests to reset global configuration.
433
+ """
434
+ with DavConfigPool._lock:
435
+ DavConfigPool._instance = None
436
+
437
+
438
+ def make_retry(config: DavConfig) -> Retry:
439
+ """Create a ``urllib3.util.Retry`` object from settings in `config`.
440
+
441
+ Parameters
442
+ ----------
443
+ config : `DavConfig`
444
+ Configurable settings for a webDAV storage endpoint.
445
+
446
+ Returns
447
+ -------
448
+ retry : `urllib3.util.Retry`
449
+ retry object to he used when creating a ``urllib3.PoolManager``
450
+ """
451
+ backoff_min: float = config.retry_backoff_min
452
+ backoff_max: float = config.retry_backoff_max
453
+ retry = Retry(
454
+ # Total number of retries to allow. Takes precedence over other
455
+ # counts.
456
+ total=2 * config.retries,
457
+ # How many connection-related errors to retry on.
458
+ connect=config.retries,
459
+ # How many times to retry on read errors.
460
+ read=config.retries,
461
+ # Backoff factor to apply between attempts after the second try
462
+ # (seconds). Compute a random jitter to prevent all the clients which
463
+ # started at the same time (even on different hosts) to overwhelm the
464
+ # server by sending requests at the same time.
465
+ backoff_factor=backoff_min + (backoff_max - backoff_min) * random.random(),
466
+ # How many times to retry on bad status codes.
467
+ status=config.retries,
468
+ # Set of uppercased HTTP method verbs that we should retry on.
469
+ # We only automatically retry idempotent requests.
470
+ allowed_methods=frozenset(
471
+ [
472
+ "COPY",
473
+ "DELETE",
474
+ "GET",
475
+ "HEAD",
476
+ "MKCOL",
477
+ "OPTIONS",
478
+ "PROPFIND",
479
+ "PUT",
480
+ ]
481
+ ),
482
+ # HTTP status codes that we should force a retry on.
483
+ status_forcelist=frozenset(
484
+ [
485
+ HTTPStatus.TOO_MANY_REQUESTS, # 429
486
+ HTTPStatus.INTERNAL_SERVER_ERROR, # 500
487
+ HTTPStatus.BAD_GATEWAY, # 502
488
+ HTTPStatus.SERVICE_UNAVAILABLE, # 503
489
+ HTTPStatus.GATEWAY_TIMEOUT, # 504
490
+ ]
491
+ ),
492
+ # Whether to respect "Retry-After" header on status codes defined
493
+ # above.
494
+ respect_retry_after_header=True,
495
+ )
496
+ return retry
497
+
498
+
499
+ class DavClientPool:
500
+ """Container of reusable webDAV clients, each one specifically configured
501
+ to talk to a single storage endpoint.
502
+
503
+ Parameters
504
+ ----------
505
+ config_pool : `DavConfigPool`
506
+ Pool of all known webDAV client configurations.
507
+
508
+ Notes
509
+ -----
510
+ There is a single instance of this class. This thead-safe singleton is
511
+ intended to be initialized when the module is imported the first time.
512
+ """
513
+
514
+ _instance = None
515
+ _lock = threading.Lock()
516
+
517
+ def __new__(cls, config_pool: DavConfigPool) -> DavClientPool:
518
+ if cls._instance is None:
519
+ with cls._lock:
520
+ if cls._instance is None:
521
+ cls._instance = super().__new__(cls)
522
+
523
+ return cls._instance
524
+
525
+ def __init__(self, config_pool: DavConfigPool) -> None:
526
+ self._config_pool: DavConfigPool = config_pool
527
+
528
+ # The key of this dictionnary is a path-stripped URL of the form
529
+ # "davs://host.example.org:1234/". The value is a reusable
530
+ # DavClient to interact with that endpoint.
531
+ self._clients: dict[str, DavClient] = {}
532
+
533
+ def get_client_for_url(self, url: str) -> DavClient:
534
+ """Return a client for interacting with the endpoint where `url`
535
+ is hosted.
536
+
537
+ The returned client is thread-safe. If a client for that endpoint
538
+ already exists it is reused, otherwise a new client is created
539
+ with the appropriate configuration for interacting with the storage
540
+ endpoint.
541
+ """
542
+ # If we already have a client for this endpoint reuse it.
543
+ url = normalize_url(url, preserve_path=False)
544
+ if (client := self._clients.get(url)) is not None:
545
+ return client
546
+
547
+ # No client for this endpoint was found. Create a new one and save it
548
+ # for serving subsequent requests.
549
+ with DavClientPool._lock:
550
+ # If another client was created in the meantime by another thread
551
+ # reuse it.
552
+ if (client := self._clients.get(url)) is not None:
553
+ return client
554
+
555
+ config: DavConfig = self._config_pool.get_config_for_url(url)
556
+ self._clients[url] = self._make_client(url, config)
557
+
558
+ return self._clients[url]
559
+
560
+ def _make_client(self, url: str, config: DavConfig) -> DavClient:
561
+ """Make a webDAV client for interacting with the server at `url`."""
562
+ # Check the server implements webDAV protocol and retrieve its
563
+ # identity so that we can build a client for that specific
564
+ # server implementation.
565
+ client = DavClient(url, config)
566
+ server_details = client.get_server_details(url)
567
+ server_id = server_details.get("Server", None)
568
+ accepts_ranges: bool | str | None = server_details.get("Accept-Ranges", None)
569
+ if accepts_ranges is not None:
570
+ accepts_ranges = accepts_ranges == "bytes"
571
+
572
+ if server_id is None:
573
+ # Create a generic webDAV client
574
+ return DavClient(url, config, accepts_ranges)
575
+
576
+ if server_id.startswith("dCache/"):
577
+ # Create a client for a dCache webDAV server
578
+ return DavClientDCache(url, config, accepts_ranges)
579
+ elif server_id.startswith("XrootD/"):
580
+ # Create a client for a XrootD webDAV server
581
+ return DavClientXrootD(url, config, accepts_ranges)
582
+ else:
583
+ # Return a generic webDAV client
584
+ return DavClient(url, config, accepts_ranges)
585
+
586
+ def _destroy(self) -> None:
587
+ """Destroy this class singleton instance.
588
+
589
+ Helper method to be used in tests to reset global configuration.
590
+ """
591
+ with DavClientPool._lock:
592
+ DavClientPool._instance = None
593
+
594
+
595
+ class DavClient:
596
+ """WebDAV client, configured to talk to a single storage endpoint.
597
+
598
+ Instances of this class are thread-safe.
599
+
600
+ Parameters
601
+ ----------
602
+ url : `str`
603
+ Root URL of the storage endpoint (e.g. "https://host.example.org:1234/")
604
+
605
+ config : `DavConfig`
606
+ Configuration to initialize this client.
607
+ """
608
+
609
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
610
+ # Lock to protect this client fields from concurrent modification.
611
+ self._lock = threading.Lock()
612
+
613
+ # Configuration for the storage endpoint.
614
+ self._config: DavConfig = config
615
+
616
+ # Prepare the trusted authorities certificates
617
+ ca_certs, ca_cert_dir = None, None
618
+ if self._config.trusted_authorities is not None:
619
+ if os.path.isdir(self._config.trusted_authorities):
620
+ ca_cert_dir = self._config.trusted_authorities
621
+ elif os.path.isfile(self._config.trusted_authorities):
622
+ ca_certs = self._config.trusted_authorities
623
+ else:
624
+ raise FileNotFoundError(
625
+ f"Trusted authorities file or directory {self._config.trusted_authorities} does not exist"
626
+ )
627
+
628
+ # If a token was specified for this endpoint, prefer it as the
629
+ # authentication method, instead of a <user certificate, private key>
630
+ # pair, even if they were also specified.
631
+ self._authorizer: TokenAuthorizer | None = None
632
+ if self._config.token is not None:
633
+ self._authorizer = TokenAuthorizer(self._config.token)
634
+ user_cert, user_key = None, None
635
+ else:
636
+ user_cert = self._config.user_cert
637
+ user_key = self._config.user_key
638
+
639
+ # We use this pool manager for sending requests that the front
640
+ # server typically responds to directly without redirecting (e.g.
641
+ # OPTIONS, HEAD, etc.)
642
+ #
643
+ # Connections in this pool are generally left open by the client but
644
+ # the front-end server may choose to close them in some specific
645
+ # situations (e.g. PUT request with "Expect: 100-continue" header).
646
+ self._frontend = PoolManager(
647
+ # Number of connection pools to cache before discarding the least
648
+ # recently used pool. Each connection pool manages network
649
+ # connections to a single host, so this is basically the number
650
+ # of "host:port" we persist network connections to.
651
+ num_pools=10,
652
+ # Number of connections to the same "host:port" to persist for
653
+ # later reuse. More than 1 is useful in multithreaded situations.
654
+ # If more than this number of network connections are needed at
655
+ # a particular moment, they will be created and used but not
656
+ # perrsisted.
657
+ maxsize=self._config.persistent_connections_frontend,
658
+ # Retry configuration to use by default with requests sent to
659
+ # host in the front end.
660
+ retries=make_retry(self._config),
661
+ # Socket timeout in seconds for each individual connection.
662
+ timeout=Timeout(
663
+ connect=self._config.timeout_connect,
664
+ read=self._config.timeout_read,
665
+ ),
666
+ # Size in bytes of the buffer for reading/writing data from/to
667
+ # the underlying socket.
668
+ blocksize=self._config.buffer_size,
669
+ # Client certificate and private key for esablishing TLS
670
+ # connections. If None, no client certificate is sent to the
671
+ # server. Only relevant for endpoints using secure HTTP protocol.
672
+ cert_file=user_cert,
673
+ key_file=user_key,
674
+ # We require verification of the server certificate.
675
+ cert_reqs="CERT_REQUIRED",
676
+ # Directory where the certificates of the trusted certificate
677
+ # authorities can be found. The contents of that directory
678
+ # must be as expected by OpenSSL.
679
+ ca_cert_dir=ca_cert_dir,
680
+ # Path to a file of concatenated CA certificates in PEM format.
681
+ ca_certs=ca_certs,
682
+ )
683
+
684
+ # We use this pool manager to send requests to the backend hosts.
685
+ # Those requests are typically 'GET' and 'PUT'. The backend servers
686
+ # typically leave the connection open after serving the request,
687
+ # but we want the client to have the possibility to close them
688
+ # when there is no benefit of persist those connections.
689
+ #
690
+ # That is the case, for instance, when the backend servers use a
691
+ # range of ports for listening for new connections. In that case
692
+ # it is likely that a connection to the same pair
693
+ # <backend server, port number>
694
+ # is not going to be reused in a short interval of time
695
+ self._backend = PoolManager(
696
+ num_pools=100,
697
+ maxsize=self._config.persistent_connections_backend,
698
+ retries=make_retry(self._config),
699
+ timeout=Timeout(
700
+ connect=self._config.timeout_connect,
701
+ read=self._config.timeout_read,
702
+ ),
703
+ blocksize=self._config.buffer_size,
704
+ cert_file=user_cert,
705
+ key_file=user_key,
706
+ cert_reqs="CERT_REQUIRED",
707
+ ca_cert_dir=ca_cert_dir,
708
+ ca_certs=ca_certs,
709
+ )
710
+
711
+ # Parser of PROPFIND responses.
712
+ self._propfind_parser: DavPropfindParser = DavPropfindParser()
713
+
714
+ # Does the remote server accept "Range" header in GET requests?
715
+ # This field is lazy initialized.
716
+ self._accepts_ranges: bool | None = accepts_ranges
717
+
718
+ # Base URL of the server this is a client for. It is of the form:
719
+ # "davs://host.example.org:1234./"
720
+ self._base_url: str = url
721
+
722
+ def get_server_details(self, url: str) -> dict[str, str]:
723
+ """
724
+ Retrieve the details of the server and check it advertises compliance
725
+ to class 1 of webDAV protocol.
726
+
727
+ Returns
728
+ -------
729
+ details: `dic[str, str]`
730
+ The keys of the returned dictionary can be "Server" and
731
+ "Accept-Ranges". Any of those keys may not exist in the returned
732
+ dictionary if the server did not include it in its response.
733
+
734
+ The values are the values of the corresponding
735
+ headers found in the response to the OPTIONS request.
736
+ Examples of values for the "Server" header are 'dCache/9.2.4' or
737
+ 'XrootD/v5.7.1'.
738
+ """
739
+ # Check that the value "1" is part of the value of the "DAV" header in
740
+ # the response to an 'OPTIONS' request.
741
+ #
742
+ # We don't rely on webDAV locks, so a server complying to class 1 is
743
+ # enough for our purposes. All webDAV servers must advertise at least
744
+ # compliance class "1".
745
+ #
746
+ # Compliance classes are documented in
747
+ # http://www.webdav.org/specs/rfc4918.html#dav.compliance.classes
748
+ #
749
+ # Examples of values for header DAV are:
750
+ # DAV: 1, 2
751
+ # DAV: 1, <http://apache.org/dav/propset/fs/1>
752
+ resp = self._options(url)
753
+ if "DAV" not in resp.headers:
754
+ raise ValueError(f"Server of {resp.geturl()} does not implement webDAV protocol")
755
+
756
+ if "1" not in resp.headers.get("DAV").replace(" ", "").split(","):
757
+ raise ValueError(
758
+ f"Server of {resp.geturl()} does not advertise required compliance to webDAV protocol class 1"
759
+ )
760
+
761
+ # The value of 'Server' header is expected to be of the form
762
+ # 'dCache/9.2.4' or 'XrootD/v5.7.1'. Not all servers include such a
763
+ # header in their response to an OPTIONS request. If no such a
764
+ # header is found in the response, use "_unknown_".
765
+ details: dict[str, str] = {}
766
+ for header in ("Server", "Accept-Ranges"):
767
+ value = resp.headers.get(header, None)
768
+ if value is not None:
769
+ details[header] = value
770
+
771
+ return details
772
+
773
+ def _options(self, url: str) -> HTTPResponse:
774
+ """Send a HTTP OPTIONS request and return the response.
775
+
776
+ Parameters
777
+ ----------
778
+ url : `str`
779
+ Target URL.
780
+ """
781
+ resp = self._request("OPTIONS", url)
782
+ if resp.status in (HTTPStatus.OK, HTTPStatus.CREATED):
783
+ return resp
784
+ else:
785
+ raise ValueError(
786
+ f"""Unexpected response to OPTIONS request to {resp.geturl()}: status {resp.status} """
787
+ f"""{resp.reason}"""
788
+ )
789
+
790
+ def _request(
791
+ self,
792
+ method: str,
793
+ url: str,
794
+ headers: dict[str, str] | None = None,
795
+ body: BinaryIO | bytes | str | None = None,
796
+ pool_manager: PoolManager | None = None,
797
+ preload_content: bool = True,
798
+ redirect: bool = True,
799
+ ) -> HTTPResponse:
800
+ """Send a generic HTTP request and return the response.
801
+
802
+ Parameters
803
+ ----------
804
+ method : `str`
805
+ Request method, e.g. 'GET', 'PUT', 'PROPFIND'.
806
+ url : `str`
807
+ Target URL.
808
+ headers : `dict[str, str]`, optional
809
+ Headers to sent with the request.
810
+ body: `bytes` or `str` or `None`, optional
811
+ Request body.
812
+ pool_manager: `PoolManager`, optional
813
+ Pool manager to use to send the request. By default, the requests
814
+ are sent to the frontend servers.
815
+ preload_content: `bool`, optional
816
+ If True, the response body is downloaded and can be retrieved
817
+ via the returned response `.data` property. If False, the
818
+ caller needs to call `.read()` on the returned response object to
819
+ download the body, either entirely in one call or by chunks.
820
+ redirect: `bool`, optional
821
+ If True, automatically handle redirects. If False, the returned
822
+ response may contain a redirection to another location.
823
+
824
+ Returns
825
+ -------
826
+ resp: `HTTPResponse`
827
+ Response to the request as received from the server.
828
+ """
829
+ # If this client is configured to use a bearer token for
830
+ # authentication, ensure we only set the token to requests over secure
831
+ # HTTP to avoid leaking the token.
832
+ headers = {} if headers is None else dict(headers)
833
+ if self._authorizer is not None and url.startswith("https://"):
834
+ self._authorizer.set_authorization(headers)
835
+
836
+ # By default, send the request to a frontend server.
837
+ if pool_manager is None:
838
+ pool_manager = self._frontend
839
+
840
+ log.debug("sending request %s %s", method, url)
841
+
842
+ with time_this(
843
+ log,
844
+ msg="%s %s",
845
+ args=(
846
+ method,
847
+ url,
848
+ ),
849
+ mem_usage=self._config.collect_memory_usage,
850
+ mem_unit=u.mebibyte,
851
+ ):
852
+ resp = pool_manager.request(
853
+ method,
854
+ url,
855
+ body=body,
856
+ headers=headers,
857
+ preload_content=preload_content,
858
+ redirect=redirect,
859
+ )
860
+
861
+ return resp
862
+
863
+ def _get(
864
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
865
+ ) -> HTTPResponse:
866
+ """Send a HTTP GET request.
867
+
868
+ Parameters
869
+ ----------
870
+ url : `str`
871
+ Target URL.
872
+ headers : `dict[str, str]`, optional
873
+ Headers to sent with the request.
874
+ preload_content: `bool`, optional
875
+ If True, the response body is downloaded and can be retrieved
876
+ via the returned response `.data` property. If False, the
877
+ caller needs to call the `.read()` on the returned response
878
+ object to download the body.
879
+
880
+ Returns
881
+ -------
882
+ resp: `HTTPResponse`
883
+ Response to the GET request as received from the server.
884
+ """
885
+ # Send the GET request to the frontend servers. We handle redirections
886
+ # ourselves.
887
+ headers = {} if headers is None else dict(headers)
888
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
889
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
890
+ return resp
891
+
892
+ if resp.status == HTTPStatus.NOT_FOUND:
893
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
894
+
895
+ redirect_location = resp.get_redirect_location()
896
+ if redirect_location is None or redirect_location is False:
897
+ raise ValueError(
898
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
899
+ )
900
+
901
+ # We were redirected to a backend server so follow the redirection.
902
+ # The response body will be automatically downloaded when
903
+ # `preload_content` is true and the underlying network connection
904
+ # may be kept open for future reuse if the maximum number of
905
+ # connections for the backend pool is not reached.
906
+ url = redirect_location
907
+ resp = self._request(
908
+ "GET",
909
+ url,
910
+ headers=headers,
911
+ pool_manager=self._backend,
912
+ preload_content=preload_content,
913
+ )
914
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
915
+ raise ValueError(
916
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
917
+ )
918
+
919
+ # The caller will access the `resp.data` property or use
920
+ # the `resp.read()` method to read the contents of the
921
+ # response body. If `preload_content` argument is True, the
922
+ # response body is already downloaded, otherwise `resp.read()`
923
+ # will download it.
924
+ return resp
925
+
926
+ def _put(
927
+ self,
928
+ url: str,
929
+ data: BinaryIO | bytes,
930
+ ) -> None:
931
+ """Send a HTTP PUT request.
932
+
933
+ Parameters
934
+ ----------
935
+ url : `str`
936
+ Target URL.
937
+ data: `BinaryIO` or `bytes`
938
+ Request body.
939
+ """
940
+ # Send a PUT request with empty body and handle redirection. This
941
+ # is useful if the server redirects us; since we cannot rewind the
942
+ # data we are uploading, we don't start uploading data until we
943
+ # connect to the server that will actually serve our request.
944
+ headers = {"Content-Length": "0"}
945
+ resp = self._request("PUT", url, headers=headers, redirect=False)
946
+ if redirect_location := resp.get_redirect_location():
947
+ url = redirect_location
948
+ elif resp.status not in (
949
+ HTTPStatus.OK,
950
+ HTTPStatus.CREATED,
951
+ HTTPStatus.NO_CONTENT,
952
+ ):
953
+ raise ValueError(
954
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
955
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
956
+ )
957
+
958
+ # We may have been redirectred. Upload the file contents to
959
+ # its final destination.
960
+
961
+ # Ask the server to compute and record a checksum of the uploaded
962
+ # file contents, for later integrity checks. Since we don't compute
963
+ # the digest ourselves while uploading the data, we cannot control
964
+ # after the request is complete that the data we uploaded is
965
+ # identical to the data recorded by the server, but at least the
966
+ # server has recorded a digest of the data it stored.
967
+ #
968
+ # See RFC-3230 for details and
969
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
970
+ # for the list of supported digest algorithhms.
971
+ #
972
+ # In addition, note that not all servers implement this RFC so
973
+ # the checksum reqquest may be ignored by the server.
974
+ headers = {}
975
+ if (checksum := self._config.request_checksum) is not None:
976
+ headers = {"Want-Digest": checksum}
977
+
978
+ resp = self._request(
979
+ "PUT",
980
+ url,
981
+ body=data,
982
+ headers=headers,
983
+ pool_manager=self._backend,
984
+ )
985
+
986
+ if resp.status not in (
987
+ HTTPStatus.OK,
988
+ HTTPStatus.CREATED,
989
+ HTTPStatus.NO_CONTENT,
990
+ ):
991
+ raise ValueError(
992
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
993
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
994
+ )
995
+
996
+ def _head(self, url: str, headers: dict[str, str] | None = None) -> HTTPResponse:
997
+ """Send a HTTP HEAD request and return the response.
998
+
999
+ Parameters
1000
+ ----------
1001
+ url : `str`
1002
+ Target URL.
1003
+
1004
+ raise_if_not_found: `bool``
1005
+ If the target URL is not found, raise an exception. Otherwise
1006
+ just return the response.
1007
+ """
1008
+ headers = {} if headers is None else dict(headers)
1009
+ resp = self._request("HEAD", url, headers=headers)
1010
+ match resp.status:
1011
+ case HTTPStatus.OK:
1012
+ return resp
1013
+ case HTTPStatus.NOT_FOUND:
1014
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
1015
+ case _:
1016
+ raise ValueError(
1017
+ f"""Unexpected response to HEAD request to {resp.geturl()}: status {resp.status} """
1018
+ f"""{resp.reason}"""
1019
+ )
1020
+
1021
+ def _propfind(self, url: str, body: str | None = None, depth: str = "0") -> HTTPResponse:
1022
+ """Send a HTTP PROPFIND request and return the response.
1023
+
1024
+ Parameters
1025
+ ----------
1026
+ url : `str`
1027
+ Target URL.
1028
+ body : `str`, optional
1029
+ Request body.
1030
+ """
1031
+ if body is None:
1032
+ # Request only the DAV live properties we are explicitly interested
1033
+ # in namely 'resourcetype', 'getcontentlength', 'getlastmodified'
1034
+ # and 'displayname'.
1035
+ body = (
1036
+ """<?xml version="1.0" encoding="utf-8"?>"""
1037
+ """<D:propfind xmlns:D="DAV:"><D:prop>"""
1038
+ """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>"""
1039
+ """</D:prop></D:propfind>"""
1040
+ )
1041
+
1042
+ headers = {
1043
+ "Depth": depth,
1044
+ "Content-Type": 'application/xml; charset="utf-8"',
1045
+ "Content-Length": str(len(body)),
1046
+ }
1047
+ resp = self._request("PROPFIND", url=url, headers=headers, body=body)
1048
+ if resp.status in (HTTPStatus.MULTI_STATUS, HTTPStatus.NOT_FOUND):
1049
+ return resp
1050
+ else:
1051
+ raise ValueError(
1052
+ f"Unexpected response to PROPFIND {resp.geturl()}: status {resp.status} {resp.reason}"
1053
+ )
1054
+
1055
+ def stat(self, url: str) -> DavFileMetadata:
1056
+ """Return the properties of file or directory located at `url`.
1057
+
1058
+ Parameters
1059
+ ----------
1060
+ url : `str`
1061
+ Target URL.
1062
+
1063
+ Returns
1064
+ -------
1065
+ result: `DavResourceMetadata``
1066
+ Details of the resources at `url`. If no resource was found at
1067
+ that URL no exception is raised. Instead the returned details allow
1068
+ for detecting that the resource does not exist.
1069
+ """
1070
+ resp = self._propfind(url)
1071
+ match resp.status:
1072
+ case HTTPStatus.NOT_FOUND:
1073
+ href = url.replace(self._base_url, "", 1)
1074
+ return DavFileMetadata(base_url=self._base_url, href=href)
1075
+ case HTTPStatus.MULTI_STATUS:
1076
+ property = self._propfind_parser.parse(resp.data)[0]
1077
+ return DavFileMetadata.from_property(base_url=self._base_url, property=property)
1078
+ case _:
1079
+ raise ValueError(
1080
+ f"""Unexpected response to HTTP PROPFIND request to {resp.geturl()}: status """
1081
+ f"""{resp.status} {resp.reason}"""
1082
+ )
1083
+
1084
+ def info(self, url: str, name: str | None = None) -> dict[str, Any]:
1085
+ """Return the details about the file or directory at `url`.
1086
+
1087
+ Parameters
1088
+ ----------
1089
+ url : `str`
1090
+ Target URL.
1091
+ name : `str`
1092
+ Name of the object to be included in the returned value. If None,
1093
+ the `url` is used as name.
1094
+
1095
+ Returns
1096
+ -------
1097
+ result: `dict``
1098
+
1099
+ For an existing file, the returned value has the form:
1100
+
1101
+ {
1102
+ "name": name,
1103
+ "size": 1234,
1104
+ "type": "file",
1105
+ "last_modified":
1106
+ datetime.datetime(2025, 4, 10, 15, 12, 51, 227854),
1107
+ "checksums": {
1108
+ "adler32": "0fc5f83f",
1109
+ "md5": "1f57339acdec099c6c0a41f8e3d5fcd0",
1110
+ }
1111
+ }
1112
+
1113
+ For an existing directory, the returned value has the form:
1114
+
1115
+ {
1116
+ "name": name,
1117
+ "size": 0,
1118
+ "type": "directory",
1119
+ "last_modified":
1120
+ datetime.datetime(2025, 4, 10, 15, 12, 51, 227854),
1121
+ "checksums": {},
1122
+ }
1123
+
1124
+ For an inexisting file or directory, the returned value has the
1125
+ form:
1126
+
1127
+ {
1128
+ "name": name,
1129
+ "size": None,
1130
+ "type": None,
1131
+ "last_modified":
1132
+ datetime.datetime(1, 1, 1, 0, 0),
1133
+ "checksums": {},
1134
+ }
1135
+
1136
+ Notes
1137
+ -----
1138
+ The format of the returned directory is inspired and compatible with
1139
+ `fsspec`.
1140
+
1141
+ The size of existing directories is always zero. The `checksums``
1142
+ dictionary may be empty if the storage endpoint does not compute
1143
+ and store the checksum of the files it stores.
1144
+ """
1145
+ result: dict[str, Any] = {
1146
+ "name": name if name is not None else url,
1147
+ "type": None,
1148
+ "size": None,
1149
+ "last_modified": datetime.min,
1150
+ "checksums": {},
1151
+ }
1152
+ metadata = self.stat(url)
1153
+ if not metadata.exists:
1154
+ return result
1155
+
1156
+ if metadata.is_dir:
1157
+ result.update({"type": "directory", "size": 0})
1158
+ else:
1159
+ result.update({"type": "file", "size": metadata.size, "checksums": metadata.checksums})
1160
+
1161
+ result.update({"last_modified": metadata.last_modified})
1162
+ return result
1163
+
1164
+ def read_dir(self, url: str) -> list[DavFileMetadata]:
1165
+ """Return the properties of the files or directories contained in
1166
+ directory located at `url`.
1167
+
1168
+ If `url` designates a file, only the details of itself are returned.
1169
+
1170
+ Parameters
1171
+ ----------
1172
+ url : `str`
1173
+ Target URL.
1174
+
1175
+ Returns
1176
+ -------
1177
+ result: `list[DavResourceMetadata]`
1178
+ List of details of each file or directory within `url`.
1179
+ """
1180
+ resp = self._propfind(url, depth="1")
1181
+ if resp.status == HTTPStatus.NOT_FOUND:
1182
+ raise FileNotFoundError(f"No directory found at {resp.geturl()}")
1183
+ elif resp.status != HTTPStatus.MULTI_STATUS:
1184
+ raise ValueError(
1185
+ f"""Unexpected response to HTTP PROPFIND request to {resp.geturl()}: status {resp.status} """
1186
+ f"""{resp.reason}"""
1187
+ )
1188
+
1189
+ if (path := parse_url(url).path) is not None:
1190
+ this_dir_href = path.rstrip("/") + "/"
1191
+ else:
1192
+ this_dir_href = "/"
1193
+
1194
+ result = []
1195
+ for property in self._propfind_parser.parse(resp.data):
1196
+ # Don't include in the results the metadata of the directory we
1197
+ # traversing.
1198
+ # Some webDAV servers do not append a "/" to the href of a
1199
+ # directory in their response to PROPFIND, so we must take into
1200
+ # account that.
1201
+ if property.is_file:
1202
+ result.append(DavFileMetadata.from_property(base_url=self._base_url, property=property))
1203
+ elif property.is_dir and property.href != this_dir_href:
1204
+ result.append(DavFileMetadata.from_property(base_url=self._base_url, property=property))
1205
+
1206
+ return result
1207
+
1208
+ def read(self, url: str) -> bytes:
1209
+ """Download the contents of file located at `url`.
1210
+
1211
+ Parameters
1212
+ ----------
1213
+ url : `str`
1214
+ Target URL.
1215
+
1216
+ Returns
1217
+ -------
1218
+ read: `bytes`
1219
+ Contents of the file.
1220
+
1221
+ Notes
1222
+ -----
1223
+ The caller must ensure that the resource at `url` is a file, not
1224
+ a directory.
1225
+ """
1226
+ return self._get(url).data
1227
+
1228
+ def read_range(
1229
+ self, url: str, start: int, end: int | None, headers: dict[str, str] | None = None
1230
+ ) -> bytes:
1231
+ """Download partial content of file located at `url`.
1232
+
1233
+ Parameters
1234
+ ----------
1235
+ url : `str`
1236
+ Target URL.
1237
+
1238
+ start: `int`
1239
+ Starting byte offset of the range to download.
1240
+ end: `int`
1241
+ Ending byte offset of the range to download.
1242
+ headers: `dict[str,str]`, optional
1243
+ Specific headers to sent with the GET request.
1244
+
1245
+ Returns
1246
+ -------
1247
+ read: `bytes`
1248
+ Partial contents of the file.
1249
+
1250
+ Notes
1251
+ -----
1252
+ The caller must ensure that the resource at `url` is a file, not
1253
+ a directory. This is important because some webDAV servers respond
1254
+ with an HTML document when asked for reading a directory.
1255
+ """
1256
+ headers = {} if headers is None else dict(headers)
1257
+ if end is None:
1258
+ headers.update({"Range": f"bytes={start}-"})
1259
+ else:
1260
+ headers.update({"Range": f"bytes={start}-{end}"})
1261
+
1262
+ return self._get(url, headers=headers).data
1263
+
1264
+ def download(self, url: str, filename: str, chunk_size: int, close_connection: bool = False) -> int:
1265
+ """Download the content of a file and write it to local file.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ url : `str`
1270
+ Target URL.
1271
+
1272
+ filename: `str`
1273
+ Local file to write the content to. If the file already exists,
1274
+ it will be rewritten.
1275
+
1276
+ chunk_size: `int`
1277
+ Size of the chunks to write to `filename`.
1278
+
1279
+ Returns
1280
+ -------
1281
+ count: `int`
1282
+ Number of bytes written to `filename`.
1283
+
1284
+ Notes
1285
+ -----
1286
+ The caller must ensure that the resource at `url` is a file, not
1287
+ a directory.
1288
+ """
1289
+ try:
1290
+ resp = self._get(url, preload_content=False)
1291
+
1292
+ # If we were asked to close the connection to the server, disable
1293
+ # auto close so that we can explicitly close the connection.
1294
+ # By default, urrlib3 releases the connection and keeps it open
1295
+ # for later reuse when it consumes the response body.
1296
+ if close_connection:
1297
+ resp.auto_close = False
1298
+
1299
+ content_length = 0
1300
+ with open(filename, "wb", buffering=chunk_size) as file:
1301
+ for chunk in resp.stream(chunk_size):
1302
+ file.write(chunk)
1303
+ content_length += len(chunk)
1304
+
1305
+ # Check that the expected and actual content lengths match. Perform
1306
+ # this check only when the content of the file was not encoded by
1307
+ # the server.
1308
+ expected_length: int = int(resp.headers.get("Content-Length", -1))
1309
+ if (
1310
+ "Content-Encoding" not in resp.headers
1311
+ and expected_length != -1
1312
+ and expected_length != content_length
1313
+ ):
1314
+ raise ValueError(
1315
+ f"Size of downloaded file does not match value in Content-Length header for {self}: "
1316
+ f"expecting {expected_length} and got {content_length} bytes"
1317
+ )
1318
+
1319
+ return content_length
1320
+ finally:
1321
+ # Close this connection
1322
+ if close_connection:
1323
+ resp.close()
1324
+
1325
+ def write(self, url: str, data: BinaryIO | bytes) -> None:
1326
+ """Create or rewrite a remote file at `url` with `data` as its
1327
+ contents.
1328
+
1329
+ Parameters
1330
+ ----------
1331
+ url : `str`
1332
+ Target URL.
1333
+
1334
+ data: `bytes`
1335
+ Sequence of bytes to upload.
1336
+
1337
+ Notes
1338
+ -----
1339
+ If a file already exists at `url` it will be rewritten.
1340
+ """
1341
+ self._put(url, data)
1342
+
1343
+ def checksums(self, url: str) -> dict[str, str]:
1344
+ """Return the checksums of the contents of file located at `url`.
1345
+
1346
+ The checksums are retrieved from the storage endpoint. There may be
1347
+ none if the storage endpoint does not automatically expose the
1348
+ checksums it computes.
1349
+
1350
+ Parameters
1351
+ ----------
1352
+ url : `str`
1353
+ Target URL
1354
+
1355
+ Returns
1356
+ -------
1357
+ checksums: `dict[str, str]`
1358
+ A file exists at `url`.
1359
+ The key of the dictionary is the lowercased name of the checksum
1360
+ algorithm (e.g. "md5", "adler32"). The value is the lowercased
1361
+ checksum itself (e.g. "78441cec2479ec8b545c4d6699f542da").
1362
+ """
1363
+ stat = self.stat(url)
1364
+ if not stat.exists:
1365
+ raise FileNotFoundError(f"No file found at {url}")
1366
+
1367
+ return stat.checksums if stat.is_file else {}
1368
+
1369
+ def mkcol(self, url: str) -> None:
1370
+ """Create a directory at `url`.
1371
+
1372
+ If a directory already exists at `url` no error is returned nor
1373
+ exception is raised. An exception is raised if a file exists at `url`.
1374
+
1375
+ Parameters
1376
+ ----------
1377
+ url : `str`
1378
+ Target URL.
1379
+ """
1380
+ resp = self._request("MKCOL", url)
1381
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.METHOD_NOT_ALLOWED):
1382
+ raise ValueError(f"Can not create directory {resp.geturl()}: status {resp.status} {resp.reason}")
1383
+
1384
+ def delete(self, url: str) -> None:
1385
+ """Delete the file or directory at `url`.
1386
+
1387
+ If there is no file or directory at `url` is not considered an error.
1388
+
1389
+ Parameters
1390
+ ----------
1391
+ url : `str`
1392
+ Target URL.
1393
+
1394
+ Notes
1395
+ -----
1396
+ If `url` designates a directory, some webDAV servers recursively
1397
+ remove the directory and its contents. Others, only remove the
1398
+ directory if it is empty.
1399
+
1400
+ For a consisten behavior, the caller must check what kind of object
1401
+ the target URL is and walk the hierarchy removing all objects.
1402
+ """
1403
+ resp = self._request("DELETE", url)
1404
+ if resp.status not in (
1405
+ HTTPStatus.OK,
1406
+ HTTPStatus.ACCEPTED,
1407
+ HTTPStatus.NO_CONTENT,
1408
+ HTTPStatus.NOT_FOUND,
1409
+ ):
1410
+ raise ValueError(f"Unable to delete resource {resp.geturl()}: status {resp.status} {resp.reason}")
1411
+
1412
+ def accepts_ranges(self, url: str) -> bool:
1413
+ """Return `True` if the server supports a 'Range' header in
1414
+ GET requests against `url`.
1415
+ """
1416
+ # If we have already determined that the server accepts "Range" for
1417
+ # another URL, we assume that it implements that feature for any
1418
+ # file it serves, so reuse that information.
1419
+ if self._accepts_ranges is not None:
1420
+ return self._accepts_ranges
1421
+
1422
+ with self._lock:
1423
+ if self._accepts_ranges is None:
1424
+ self._accepts_ranges = self._head(url).headers.get("Accept-Ranges", "") == "bytes"
1425
+
1426
+ return self._accepts_ranges
1427
+
1428
+ def copy(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1429
+ """Copy the file at `source_url` to `destination_url` in the same
1430
+ storage endpoint.
1431
+
1432
+ Parameters
1433
+ ----------
1434
+ source_url : `str`
1435
+ URL of the source file.
1436
+ destination_url : `str`
1437
+ URL of the destination file. Its parent directory must exist.
1438
+ overwrite : `bool`
1439
+ If True and a file exists at `destination_url` it will be
1440
+ overwritten. Otherwise an exception is raised.
1441
+ """
1442
+ # Check the source is a file
1443
+ if self.stat(source_url).is_dir:
1444
+ raise NotImplementedError(f"copy is not implemented for directory {source_url}")
1445
+
1446
+ # Send a COPY request for this file.
1447
+ headers = {
1448
+ "Destination": destination_url,
1449
+ "Overwrite": "T" if overwrite else "F",
1450
+ }
1451
+ resp = self._request("COPY", source_url, headers=headers)
1452
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.NO_CONTENT):
1453
+ raise ValueError(
1454
+ f"Could not copy {resp.geturl()} to {destination_url}: status {resp.status} {resp.reason}"
1455
+ )
1456
+ return
1457
+
1458
+ def move(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1459
+ """Move the file at `source_url` to `destination_url` in the same
1460
+ storage endpoint.
1461
+
1462
+ Parameters
1463
+ ----------
1464
+ source_url : `str`
1465
+ URL of the source file.
1466
+ destination_url : `str`
1467
+ URL of the destination file. Its parent directory must exist.
1468
+ overwrite : `bool`
1469
+ If True and a file exists at `destination_url` it will be
1470
+ overwritten. Otherwise an exception is raised.
1471
+ """
1472
+ headers = {
1473
+ "Destination": destination_url,
1474
+ "Overwrite": "T" if overwrite else "F",
1475
+ }
1476
+ resp = self._request("MOVE", source_url, headers=headers)
1477
+ if resp.status not in (HTTPStatus.CREATED, HTTPStatus.NO_CONTENT):
1478
+ raise ValueError(
1479
+ f"""Could not move file {resp.geturl()} to {destination_url}: status {resp.status} """
1480
+ f"""{resp.reason}"""
1481
+ )
1482
+
1483
+ def generate_presigned_get_url(self, url: str, expiration_time_seconds: int) -> str:
1484
+ """Return a pre-signed URL that can be used to retrieve this resource
1485
+ using an HTTP GET without supplying any access credentials.
1486
+
1487
+ Parameters
1488
+ ----------
1489
+ expiration_time_seconds : `int`
1490
+ Number of seconds until the generated URL is no longer valid.
1491
+
1492
+ Returns
1493
+ -------
1494
+ url : `str`
1495
+ HTTP URL signed for GET.
1496
+ """
1497
+ raise NotImplementedError(f"URL signing is not supported by server for {self}")
1498
+
1499
+ def generate_presigned_put_url(self, url: str, expiration_time_seconds: int) -> str:
1500
+ """Return a pre-signed URL that can be used to upload a file to this
1501
+ path using an HTTP PUT without supplying any access credentials.
1502
+
1503
+ Parameters
1504
+ ----------
1505
+ expiration_time_seconds : `int`
1506
+ Number of seconds until the generated URL is no longer valid.
1507
+
1508
+ Returns
1509
+ -------
1510
+ url : `str`
1511
+ HTTP URL signed for PUT.
1512
+ """
1513
+ raise NotImplementedError(f"URL signing is not supported by server for {self}")
1514
+
1515
+
1516
+ class ActivityCaveat(enum.Enum):
1517
+ """Helper class for enumerating accepted activity caveats for requesting
1518
+ macaroons for dCache or XRootD webDAV servers.
1519
+ """
1520
+
1521
+ DOWNLOAD = 1
1522
+ UPLOAD = 2
1523
+
1524
+
1525
+ class DavClientURLSigner(DavClient):
1526
+ """WebDAV client which supports signing of URL for upload and download.
1527
+
1528
+ Instances of this class are thread-safe.
1529
+
1530
+ Parameters
1531
+ ----------
1532
+ url : `str`
1533
+ Root URL of the storage endpoint (e.g. "https://host.example.org:1234/")
1534
+
1535
+ config : `DavConfig`
1536
+ Configuration to initialize this client.
1537
+ """
1538
+
1539
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
1540
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
1541
+
1542
+ def generate_presigned_get_url(self, url: str, expiration_time_seconds: int) -> str:
1543
+ """Return a pre-signed URL that can be used to retrieve the resource
1544
+ at `url` using an HTTP GET without supplying any access credentials.
1545
+
1546
+ Parameters
1547
+ ----------
1548
+ url : `str`
1549
+ URL of an existing file.
1550
+ expiration_time_seconds : `int`
1551
+ Number of seconds until the generated URL is no longer valid.
1552
+
1553
+ Returns
1554
+ -------
1555
+ url : `str`
1556
+ HTTP URL signed for GET.
1557
+
1558
+ Notes
1559
+ -----
1560
+ Although the returned URL allows for downloading the file at `url`
1561
+ without supplying credentials, the HTTP client must be configured
1562
+ to accept the certificate the server will present if the client wants
1563
+ validate it. The server's certificate may be issued by a certificate
1564
+ authority unknown to the client.
1565
+ """
1566
+ macaroon: str = self._get_macaroon(url, ActivityCaveat.DOWNLOAD, expiration_time_seconds)
1567
+ return f"{url}?authz={macaroon}"
1568
+
1569
+ def generate_presigned_put_url(self, url: str, expiration_time_seconds: int) -> str:
1570
+ """Return a pre-signed URL that can be used to upload a file to `url`
1571
+ using an HTTP PUT without supplying any access credentials.
1572
+
1573
+ Parameters
1574
+ ----------
1575
+ url : `str`
1576
+ URL of an existing file.
1577
+ expiration_time_seconds : `int`
1578
+ Number of seconds until the generated URL is no longer valid.
1579
+
1580
+ Returns
1581
+ -------
1582
+ url : `str`
1583
+ HTTP URL signed for PUT.
1584
+
1585
+ Notes
1586
+ -----
1587
+ Although the returned URL allows for uploading a file to `url`
1588
+ without supplying credentials, the HTTP client must be configured
1589
+ to accept the certificate the server will present if the client wants
1590
+ validate it. The server's certificate may be issued by a certificate
1591
+ authority unknown to the client.
1592
+ """
1593
+ macaroon: str = self._get_macaroon(url, ActivityCaveat.UPLOAD, expiration_time_seconds)
1594
+ return f"{url}?authz={macaroon}"
1595
+
1596
+ def _get_macaroon(self, url: str, activity: ActivityCaveat, expiration_time_seconds: int) -> str:
1597
+ """Return a macaroon for uploading or downloading the file at `url`.
1598
+
1599
+ Parameters
1600
+ ----------
1601
+ url : `str`
1602
+ URL of an existing file.
1603
+ activity : `ActivityCaveat`
1604
+ the activity the macaroon is requested for.
1605
+ expiration_time_seconds : `int`
1606
+ Requested duration of the macaroon, in seconds.
1607
+
1608
+ Returns
1609
+ -------
1610
+ macaroon : `str`
1611
+ Macaroon to be used with `url` in a GET or PUT request.
1612
+ """
1613
+ # dCache and XRootD webDAV servers support delivery of macaroons.
1614
+ #
1615
+ # For details about dCache macaroons see:
1616
+ # https://www.dcache.org/manuals/UserGuide-9.2/macaroons.shtml
1617
+ match activity:
1618
+ case ActivityCaveat.DOWNLOAD:
1619
+ activity_caveat = "DOWNLOAD,LIST"
1620
+ case ActivityCaveat.UPLOAD:
1621
+ activity_caveat = "UPLOAD,LIST,DELETE,MANAGE"
1622
+
1623
+ # Retrieve a macaroon for the requested activities and duration
1624
+ headers = {"Content-Type": "application/macaroon-request"}
1625
+ body = {
1626
+ "caveats": [
1627
+ f"activity:{activity_caveat}",
1628
+ ],
1629
+ "validity": f"PT{expiration_time_seconds}S",
1630
+ }
1631
+ resp = self._request("POST", url, headers=headers, body=json.dumps(body))
1632
+ if resp.status != HTTPStatus.OK:
1633
+ raise ValueError(
1634
+ f"Could not retrieve a macaroon for URL {resp.geturl()}, status: {resp.status} {resp.reason}"
1635
+ )
1636
+
1637
+ # We are expecting the body of the response to be formatted in JSON.
1638
+ # dCache sets the 'Content-Type' of the response to 'application/json'
1639
+ # but XRootD does not set any 'Content-Type' header 8-[
1640
+ #
1641
+ # An example of a response body returned by dCache is shown below:
1642
+ # {
1643
+ # "macaroon": "MDA[...]Qo",
1644
+ # "uri": {
1645
+ # "targetWithMacaroon": "https://dcache.example.org/?authz=MD...",
1646
+ # "baseWithMacaroon": "https://dcache.example.org/?authz=MD...",
1647
+ # "target": "https://dcache.example.org/",
1648
+ # "base": "https://dcache.example.org/"
1649
+ # }
1650
+ # }
1651
+ #
1652
+ # An example of a response body returned by XRootD is shown below:
1653
+ # {
1654
+ # "macaroon": "MDA[...]Qo",
1655
+ # "expires_in": 86400
1656
+ # }
1657
+ try:
1658
+ response_body = json.loads(resp.data.decode("utf-8"))
1659
+ except json.JSONDecodeError:
1660
+ raise ValueError(f"Could not deserialize response to POST request for URL {resp.geturl()}")
1661
+
1662
+ if "macaroon" in response_body:
1663
+ return response_body["macaroon"]
1664
+
1665
+ raise ValueError(f"Could not retrieve macaroon for URL {resp.geturl()}")
1666
+
1667
+ def copy(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1668
+ """Copy the file at `source_url` to `destination_url` in the same
1669
+ storage endpoint.
1670
+
1671
+ Parameters
1672
+ ----------
1673
+ source_url : `str`
1674
+ URL of the source file.
1675
+ destination_url : `str`
1676
+ URL of the destination file. Its parent directory must exist.
1677
+ overwrite : `bool`
1678
+ If True and a file exists at `destination_url` it will be
1679
+ overwritten. Otherwise an exception is raised.
1680
+ """
1681
+ # Check the source is a file
1682
+ if self.stat(source_url).is_dir:
1683
+ raise NotImplementedError(f"copy is not implemented for directory {source_url}")
1684
+
1685
+ # Neither dCache nor XrootD currently implement the COPY
1686
+ # webDAV method as documented in
1687
+ #
1688
+ # http://www.webdav.org/specs/rfc4918.html#METHOD_COPY
1689
+ #
1690
+ # (See issues DM-37603 and DM-37651 for details)
1691
+ # With those servers use third-party copy instead.
1692
+ return self._copy_via_third_party(source_url, destination_url, overwrite)
1693
+
1694
+ def _copy_via_third_party(self, source_url: str, destination_url: str, overwrite: bool = False) -> None:
1695
+ """Copy the file at `source_url` to `destination_url` in the same
1696
+ storage endpoint using the third-party copy functionality
1697
+ implemented by dCache and XRootD servers.
1698
+
1699
+ Parameters
1700
+ ----------
1701
+ source_url : `str`
1702
+ URL of the source file.
1703
+ destination_url : `str`
1704
+ URL of the destination file. Its parent directory must exist.
1705
+ overwrite : `bool`
1706
+ If True and a file exists at `destination_url` it will be
1707
+ overwritten. Otherwise an exception is raised.
1708
+ """
1709
+ # To implement COPY we use dCache's third-party copy mechanism
1710
+ # documented at:
1711
+ #
1712
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#third-party-transfers
1713
+ #
1714
+ # The reason is that dCache does not correctly implement webDAV's COPY
1715
+ # method. See https://github.com/dCache/dcache/issues/6950
1716
+
1717
+ # Retrieve a macaroon for downloading the source
1718
+ download_macaroon = self._get_macaroon(source_url, ActivityCaveat.DOWNLOAD, 300)
1719
+
1720
+ # Prepare and send the COPY request
1721
+ try:
1722
+ headers = {
1723
+ "Source": source_url,
1724
+ "TransferHeaderAuthorization": f"Bearer {download_macaroon}",
1725
+ "Credential": "none",
1726
+ "Depth": "0",
1727
+ "Overwrite": "T" if overwrite else "F",
1728
+ "RequireChecksumVerification": "false",
1729
+ }
1730
+ resp = self._request("COPY", destination_url, headers=headers, preload_content=False)
1731
+ if resp.status == HTTPStatus.CREATED:
1732
+ return
1733
+
1734
+ if resp.status != HTTPStatus.ACCEPTED:
1735
+ raise ValueError(
1736
+ f"Unable to copy resource {resp.geturl()}; status: {resp.status} {resp.reason}"
1737
+ )
1738
+
1739
+ content_type = resp.headers.get("Content-Type")
1740
+ if content_type != "text/perf-marker-stream":
1741
+ raise ValueError(
1742
+ f"""Unexpected Content-Type {content_type} in response to COPY request from """
1743
+ f"""{source_url} to {destination_url}"""
1744
+ )
1745
+
1746
+ # Read the performance markers in the response body.
1747
+ # Documentation:
1748
+ # https://dcache.org/manuals/UserGuide-10.2/webdav.shtml#third-party-transfers
1749
+ for marker in io.TextIOWrapper(resp): # type: ignore
1750
+ marker = marker.rstrip("\n")
1751
+ if marker == "": # EOF
1752
+ raise ValueError(
1753
+ f"""Copying file from {source_url} to {destination_url} failed: """
1754
+ """could not get response from server"""
1755
+ )
1756
+ elif marker.startswith("failure:"):
1757
+ raise ValueError(
1758
+ f"""Copying file from {source_url} to {destination_url} failed with error: """
1759
+ f"""{marker}"""
1760
+ )
1761
+ elif marker.startswith("success:"):
1762
+ return
1763
+ finally:
1764
+ resp.drain_conn()
1765
+
1766
+
1767
+ class DavClientDCache(DavClientURLSigner):
1768
+ """Client for interacting with a dCache webDAV server.
1769
+
1770
+ Instances of this class are thread-safe.
1771
+
1772
+ Parameters
1773
+ ----------
1774
+ url : `str`
1775
+ Root URL of the storage endpoint (e.g. "https://host.example.org:1234/")
1776
+
1777
+ config : `DavConfig`
1778
+ Configuration to initialize this client.
1779
+ """
1780
+
1781
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
1782
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
1783
+
1784
+ def _propfind(self, url: str, body: str | None = None, depth: str = "0") -> HTTPResponse:
1785
+ """Send a HTTP PROPFIND request and return the response.
1786
+
1787
+ Parameters
1788
+ ----------
1789
+ url : `str`
1790
+ Target URL.
1791
+ body : `str`, optional
1792
+ Request body.
1793
+ """
1794
+ if body is None:
1795
+ # Request only the DAV live properties we are explicitly interested
1796
+ # in namely 'resourcetype', 'getcontentlength', 'getlastmodified'
1797
+ # and 'displayname'. In addition, request dCache-specific
1798
+ # checksums.
1799
+ body = (
1800
+ """<?xml version="1.0" encoding="utf-8"?>"""
1801
+ """<D:propfind xmlns:D="DAV:" xmlns:dcache="http://www.dcache.org/2013/webdav"><D:prop>"""
1802
+ """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>"""
1803
+ """<dcache:Checksums/>"""
1804
+ """</D:prop></D:propfind>"""
1805
+ )
1806
+
1807
+ return super()._propfind(url=url, body=body, depth=depth)
1808
+
1809
+ def _get(
1810
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
1811
+ ) -> HTTPResponse:
1812
+ """Send a HTTP GET request to a dCache webDAV server.
1813
+
1814
+ Parameters
1815
+ ----------
1816
+ url : `str`
1817
+ Target URL.
1818
+ headers : `dict[str, str]`, optional
1819
+ Headers to sent with the request.
1820
+ preload_content: `bool`, optional
1821
+ If True, the response body is downloaded and can be retrieved
1822
+ via the returned response `.data` property. If False, the
1823
+ caller needs to call the `.read()` on the returned response
1824
+ object to download the body.
1825
+
1826
+ Returns
1827
+ -------
1828
+ resp: `HTTPResponse`
1829
+ Response to the GET request as received from the server.
1830
+ """
1831
+ # Send the GET request to the frontend servers. We handle
1832
+ # redirections ourselves.
1833
+ headers = {} if headers is None else dict(headers)
1834
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
1835
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
1836
+ return resp
1837
+
1838
+ if resp.status == HTTPStatus.NOT_FOUND:
1839
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
1840
+
1841
+ redirect_location = resp.get_redirect_location()
1842
+ if redirect_location is None or redirect_location is False:
1843
+ raise ValueError(
1844
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
1845
+ )
1846
+
1847
+ # We were redirected to a backend server so follow the redirection.
1848
+ # The response body will be automatically downloaded when
1849
+ # `preload_content` is true and the underlying network connection
1850
+ # may be kept open for future reuse if the maximum number of
1851
+ # connections for the backend pool is not reached.
1852
+ try:
1853
+ # Explicitly ask the backend server to close the connection after
1854
+ # serving this request.
1855
+ if preload_content:
1856
+ headers.update({"Connection": "close"})
1857
+
1858
+ url = redirect_location
1859
+ resp = self._request(
1860
+ "GET",
1861
+ url,
1862
+ headers=headers,
1863
+ pool_manager=self._backend,
1864
+ preload_content=preload_content,
1865
+ )
1866
+
1867
+ # Mark this connection so that it won't be be automatically
1868
+ # returned to the reusable connection pool. We will close it
1869
+ # ourselves if appropriate.
1870
+ if preload_content:
1871
+ resp.auto_close = False
1872
+
1873
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
1874
+ raise ValueError(
1875
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
1876
+ )
1877
+
1878
+ # The caller will access the `resp.data` property or use
1879
+ # the `resp.read()` method to read the contents of the
1880
+ # response body. If `preload_content` argument is True, the
1881
+ # response body is already downloaded, otherwise `resp.read()`
1882
+ # will download it.
1883
+ return resp
1884
+ finally:
1885
+ # Don't keep this connection to the backend server open. Given
1886
+ # that dCache pools may be configured to serve requests over a
1887
+ # range of ports, it is unlikely we will reuse this particular
1888
+ # connection again in the short term.
1889
+ if preload_content:
1890
+ resp.close()
1891
+
1892
+ def _put(
1893
+ self,
1894
+ url: str,
1895
+ data: BinaryIO | bytes,
1896
+ ) -> None:
1897
+ """Send a HTTP PUT request to a dCache webDAV server.
1898
+
1899
+ Parameters
1900
+ ----------
1901
+ url : `str`
1902
+ Target URL.
1903
+ data: `BinaryIO` or `bytes`
1904
+ Request body.
1905
+ """
1906
+ # Send a PUT request with empty body to the dCache frontend server to
1907
+ # get redirected to the backend.
1908
+ #
1909
+ # Details:
1910
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#redirection
1911
+ #
1912
+ # Note that we use the backend pool manager for PUT requests, since
1913
+ # the dCache webDAV door closes the connection when redirecting a
1914
+ # PUT request to the backend.
1915
+ #
1916
+ # We want to reuse the connections to the door as much as possible so
1917
+ # that metadata operations are faster; all metadata operations use the
1918
+ # frontend pool manager.
1919
+ headers = {"Content-Length": "0", "Expect": "100-continue"}
1920
+ resp = self._request("PUT", url, headers=headers, redirect=False, pool_manager=self._backend)
1921
+ if redirect_location := resp.get_redirect_location():
1922
+ url = redirect_location
1923
+ elif resp.status not in (
1924
+ HTTPStatus.OK,
1925
+ HTTPStatus.CREATED,
1926
+ HTTPStatus.NO_CONTENT,
1927
+ ):
1928
+ raise ValueError(
1929
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
1930
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
1931
+ )
1932
+
1933
+ # We were redirected to a backend server. Upload the file contents to
1934
+ # its final destination. Explicitly ask the server to close this
1935
+ # network connection after serving this PUT request to release
1936
+ # the associated dCache mover.
1937
+
1938
+ # Ask dCache to compute and record a checksum of the uploaded
1939
+ # file contents, for later integrity checks. Since we don't compute
1940
+ # the digest ourselves while uploading the data, we cannot control
1941
+ # after the request is complete that the data we uploaded is
1942
+ # identical to the data recorded by the server, but at least the
1943
+ # server has recorded a digest of the data it stored.
1944
+ #
1945
+ # See RFC-3230 for details and
1946
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
1947
+ # for the list of supported digest algorithhms.
1948
+ headers = {"Connection": "close"}
1949
+ if (checksum := self._config.request_checksum) is not None:
1950
+ headers.update({"Want-Digest": checksum})
1951
+
1952
+ try:
1953
+ resp = self._request(
1954
+ "PUT",
1955
+ url,
1956
+ body=data,
1957
+ headers=headers,
1958
+ pool_manager=self._backend,
1959
+ # Don't consume the response body, so that we can explicitly
1960
+ # close the connection.
1961
+ preload_content=False,
1962
+ )
1963
+
1964
+ # Disable automatically returning the connection to the pool
1965
+ # to be reused later on, since we want that connection to be
1966
+ # closed. By default, when preload_content is True, the network
1967
+ # connection is returned to the connection pool once the response
1968
+ # body is completely consumed. Once this happens, we don't have a
1969
+ # mecanism to force closing the connection.
1970
+ resp.auto_close = False
1971
+
1972
+ if resp.status not in (
1973
+ HTTPStatus.OK,
1974
+ HTTPStatus.CREATED,
1975
+ HTTPStatus.NO_CONTENT,
1976
+ ):
1977
+ raise ValueError(
1978
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
1979
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
1980
+ )
1981
+
1982
+ finally:
1983
+ # Explicitly close this connection to the dCache backend server.
1984
+ resp.close()
1985
+
1986
+ def download(self, url: str, filename: str, chunk_size: int, close_connection: bool = True) -> int:
1987
+ # Close the connection to the backend servers after downloading
1988
+ # the entire file content.
1989
+ return super().download(
1990
+ url=url, filename=filename, chunk_size=chunk_size, close_connection=close_connection
1991
+ )
1992
+
1993
+
1994
+ class DavClientXrootD(DavClientURLSigner):
1995
+ """Client for interacting with a XrootD webDAV server.
1996
+
1997
+ Instances of this class are thread-safe.
1998
+
1999
+ Parameters
2000
+ ----------
2001
+ url : `str`
2002
+ Root URL of the storage endpoint (e.g. "https://host.example.org:1234/")
2003
+
2004
+ config : `DavConfig`
2005
+ Configuration to initialize this client.
2006
+ """
2007
+
2008
+ def __init__(self, url: str, config: DavConfig, accepts_ranges: bool | None = None) -> None:
2009
+ super().__init__(url=url, config=config, accepts_ranges=accepts_ranges)
2010
+
2011
+ def _get(
2012
+ self, url: str, headers: dict[str, str] | None = None, preload_content: bool = True
2013
+ ) -> HTTPResponse:
2014
+ """Send a HTTP GET request to a XrootD webDAV server.
2015
+
2016
+ Parameters
2017
+ ----------
2018
+ url : `str`
2019
+ Target URL.
2020
+ headers : `dict[str, str]`, optional
2021
+ Headers to sent with the request.
2022
+ preload_content: `bool`, optional
2023
+ If True, the response body is downloaded and can be retrieved
2024
+ via the returned response `.data` property. If False, the
2025
+ caller needs to call the `.read()` on the returned response
2026
+ object to download the body.
2027
+
2028
+ Returns
2029
+ -------
2030
+ resp: `HTTPResponse`
2031
+ Response to the GET request as received from the server.
2032
+ """
2033
+ # Send the GET request to the frontend servers and follow redirection.
2034
+ headers = {} if headers is None else dict(headers)
2035
+ resp = self._request("GET", url, headers=headers, preload_content=preload_content, redirect=False)
2036
+ if resp.status in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
2037
+ return resp
2038
+
2039
+ if resp.status == HTTPStatus.NOT_FOUND:
2040
+ raise FileNotFoundError(f"No file found at {resp.geturl()}")
2041
+
2042
+ redirect_location = resp.get_redirect_location()
2043
+ if redirect_location is None or redirect_location is False:
2044
+ raise ValueError(
2045
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
2046
+ )
2047
+
2048
+ # We were redirected to a backend server so follow the redirection.
2049
+ # The response body will be automatically downloaded when
2050
+ # `preload_content` is true and the underlying network connection
2051
+ # may be kept open for future reuse if the maximum number of
2052
+ # connections for the backend pool is not reached.
2053
+ #
2054
+ # For XRootD endpoints, we always use the same pool manager, namely
2055
+ # the frontend pool manager, to increase the chance of reusing
2056
+ # network connections.
2057
+ url = redirect_location
2058
+ resp = self._request(
2059
+ "GET",
2060
+ url,
2061
+ headers=headers,
2062
+ pool_manager=self._frontend,
2063
+ preload_content=preload_content,
2064
+ )
2065
+
2066
+ if resp.status not in (HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT):
2067
+ resp.close()
2068
+ raise ValueError(
2069
+ f"Unexpected error in HTTP GET {resp.geturl()}: status {resp.status} {resp.reason}"
2070
+ )
2071
+
2072
+ # The caller will access the `resp.data` property or use
2073
+ # the `resp.read()` method to read the contents of the
2074
+ # response body. If `preload_content` argument is True, the
2075
+ # response body is already downloaded, otherwise `resp.read()`
2076
+ # will download it.
2077
+ return resp
2078
+
2079
+ def _put(
2080
+ self,
2081
+ url: str,
2082
+ data: BinaryIO | bytes,
2083
+ ) -> None:
2084
+ """Send a HTTP PUT request to a dCache webDAV server.
2085
+
2086
+ Parameters
2087
+ ----------
2088
+ url : `str`
2089
+ Target URL.
2090
+ data: `BinaryIO` or `bytes`
2091
+ Request body.
2092
+ """
2093
+ # Send a PUT request with empty body to the XRootD frontend server to
2094
+ # get redirected to the backend.
2095
+ headers = {"Content-Length": "0", "Expect": "100-continue"}
2096
+ for attempt in range(max_attempts := 3):
2097
+ resp = self._request("PUT", url, headers=headers, redirect=False)
2098
+ if redirect_location := resp.get_redirect_location():
2099
+ url = redirect_location
2100
+ break
2101
+ elif resp.status == HTTPStatus.LOCKED:
2102
+ # Sometimes XRootD servers respond with status code LOCKED and
2103
+ # response body of the form:
2104
+ #
2105
+ # "Output file /path/to/file is already opened by 1 writer;
2106
+ # open denied."
2107
+ #
2108
+ # If we get such a response, try again, unless we reached
2109
+ # the maximum number of attempts.
2110
+ if attempt == max_attempts - 1:
2111
+ raise ValueError(
2112
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2113
+ f"""{resp.reason} [{resp.data.decode("utf-8")}] after {max_attempts} attempts"""
2114
+ )
2115
+
2116
+ # Wait a bit and try again
2117
+ log.warning(
2118
+ f"""got unexpected response status {HTTPStatus.LOCKED} Locked for {url} """
2119
+ f"""(attempt {attempt}/{max_attempts}), retrying..."""
2120
+ )
2121
+ time.sleep((attempt + 1) * 0.100)
2122
+ continue
2123
+ elif resp.status not in (
2124
+ HTTPStatus.OK,
2125
+ HTTPStatus.CREATED,
2126
+ HTTPStatus.NO_CONTENT,
2127
+ ):
2128
+ raise ValueError(
2129
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2130
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
2131
+ )
2132
+
2133
+ # We were redirected to a backend server. Upload the file contents to
2134
+ # its final destination.
2135
+
2136
+ # XRootD backend servers typically use a single port number for
2137
+ # accepting connections from clients. It is therefore beneficial
2138
+ # to keep those connections open, if the server allows.
2139
+
2140
+ # Ask the server to compute and record a checksum of the uploaded
2141
+ # file contents, for later integrity checks. Since we don't compute
2142
+ # the digest ourselves while uploading the data, we cannot control
2143
+ # after the request is complete that the data we uploaded is
2144
+ # identical to the data recorded by the server, but at least the
2145
+ # server has recorded a digest of the data it stored.
2146
+ #
2147
+ # See RFC-3230 for details and
2148
+ # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
2149
+ # for the list of supported digest algorithhms.
2150
+ #
2151
+ # In addition, note that not all servers implement this RFC so
2152
+ # the checksum reqquest may be ignored by the server.
2153
+ headers = {}
2154
+ if (checksum := self._config.request_checksum) is not None:
2155
+ headers = {"Want-Digest": checksum}
2156
+
2157
+ # For XRootD endpoints, we always use the same pool manager, namely
2158
+ # the frontend pool manager, to increase the chance of reusing
2159
+ # network connections.
2160
+ resp = self._request(
2161
+ "PUT",
2162
+ url,
2163
+ body=data,
2164
+ headers=headers,
2165
+ pool_manager=self._frontend,
2166
+ )
2167
+
2168
+ if resp.status not in (
2169
+ HTTPStatus.OK,
2170
+ HTTPStatus.CREATED,
2171
+ HTTPStatus.NO_CONTENT,
2172
+ ):
2173
+ raise ValueError(
2174
+ f"""Unexpected response to HTTP request PUT {resp.geturl()}: status {resp.status} """
2175
+ f"""{resp.reason} [{resp.data.decode("utf-8")}]"""
2176
+ )
2177
+
2178
+ def info(self, url: str, name: str | None = None) -> dict[str, Any]:
2179
+ # XRootD does not include checksums in the response to PROPFIND
2180
+ # requqest. We need to send a specific HEAD request to retrieve
2181
+ # the ADLER32 checksum.
2182
+ #
2183
+ # If found, the checksum is included in the response header "Digest",
2184
+ # which is of the form:
2185
+ #
2186
+ # Digest: adler32=0e4709f2
2187
+ result = super().info(url, name)
2188
+ if result["type"] == "file":
2189
+ headers: dict[str, str] = {"Want-Digest": "adler32"}
2190
+ resp = self._head(url=url, headers=headers)
2191
+ if (digest := resp.headers.get("Digest")) is not None:
2192
+ value = digest.split("=")[1]
2193
+ result["checksums"].update({"adler32": value})
2194
+
2195
+ return result
2196
+
2197
+
2198
+ class DavFileMetadata:
2199
+ """Container for attributes of interest of a webDAV file or directory."""
2200
+
2201
+ def __init__(
2202
+ self,
2203
+ base_url: str,
2204
+ href: str = "",
2205
+ name: str = "",
2206
+ exists: bool = False,
2207
+ size: int = -1,
2208
+ is_dir: bool = False,
2209
+ last_modified: datetime = datetime.min,
2210
+ checksums: dict[str, str] | None = None,
2211
+ ):
2212
+ self._url: str = base_url if not href else base_url.rstrip("/") + href
2213
+ self._href: str = href
2214
+ self._name: str = name
2215
+ self._exists: bool = exists
2216
+ self._size: int = size
2217
+ self._is_dir: bool = is_dir
2218
+ self._last_modified: datetime = last_modified
2219
+ self._checksums: dict[str, str] = {} if checksums is None else dict(checksums)
2220
+
2221
+ @staticmethod
2222
+ def from_property(base_url: str, property: DavProperty) -> DavFileMetadata:
2223
+ """Create an instance from the values in `property`."""
2224
+ return DavFileMetadata(
2225
+ base_url=base_url,
2226
+ href=property.href,
2227
+ name=property.name,
2228
+ exists=property.exists,
2229
+ size=property.size,
2230
+ is_dir=property.is_dir,
2231
+ last_modified=property.last_modified,
2232
+ checksums=dict(property.checksums),
2233
+ )
2234
+
2235
+ def __str__(self) -> str:
2236
+ return (
2237
+ f"""{self._url} {self._href} {self._name} {self._exists} {self._size} {self._is_dir} """
2238
+ f"""{self._checksums}"""
2239
+ )
2240
+
2241
+ @property
2242
+ def url(self) -> str:
2243
+ return self._url
2244
+
2245
+ @property
2246
+ def href(self) -> str:
2247
+ return self._href
2248
+
2249
+ @property
2250
+ def name(self) -> str:
2251
+ return self._name
2252
+
2253
+ @property
2254
+ def exists(self) -> bool:
2255
+ return self._exists
2256
+
2257
+ @property
2258
+ def size(self) -> int:
2259
+ if not self._exists:
2260
+ return -1
2261
+
2262
+ return 0 if self._is_dir else self._size
2263
+
2264
+ @property
2265
+ def is_dir(self) -> bool:
2266
+ return self._exists and self._is_dir
2267
+
2268
+ @property
2269
+ def is_file(self) -> bool:
2270
+ return self._exists and not self._is_dir
2271
+
2272
+ @property
2273
+ def last_modified(self) -> datetime:
2274
+ return self._last_modified
2275
+
2276
+ @property
2277
+ def checksums(self) -> dict[str, str]:
2278
+ return self._checksums
2279
+
2280
+
2281
+ class DavProperty:
2282
+ """Helper class to encapsulate select live DAV properties of a single
2283
+ resource, as retrieved via a PROPFIND request.
2284
+
2285
+ Parameters
2286
+ ----------
2287
+ response : `eTree.Element` or `None`
2288
+ The XML response defining the DAV property.
2289
+ """
2290
+
2291
+ # Regular expression to compare against the 'status' element of a
2292
+ # PROPFIND response's 'propstat' element.
2293
+ _status_ok_rex = re.compile(r"^HTTP/.* 200 .*$", re.IGNORECASE)
2294
+
2295
+ def __init__(self, response: eTree.Element | None):
2296
+ self._href: str = ""
2297
+ self._displayname: str = ""
2298
+ self._collection: bool = False
2299
+ self._getlastmodified: str = ""
2300
+ self._getcontentlength: int = -1
2301
+ self._checksums: dict[str, str] = {}
2302
+
2303
+ if response is not None:
2304
+ self._parse(response)
2305
+
2306
+ def _parse(self, response: eTree.Element) -> None:
2307
+ # Extract 'href'.
2308
+ if (element := response.find("./{DAV:}href")) is not None:
2309
+ # We need to use "str(element.text)"" instead of "element.text" to
2310
+ # keep mypy happy.
2311
+ self._href = str(element.text).strip()
2312
+ else:
2313
+ raise ValueError(
2314
+ "Property 'href' expected but not found in PROPFIND response: "
2315
+ f"{eTree.tostring(response, encoding='unicode')}"
2316
+ )
2317
+
2318
+ for propstat in response.findall("./{DAV:}propstat"):
2319
+ # Only extract properties of interest with status OK.
2320
+ status = propstat.find("./{DAV:}status")
2321
+ if status is None or not self._status_ok_rex.match(str(status.text)):
2322
+ continue
2323
+
2324
+ for prop in propstat.findall("./{DAV:}prop"):
2325
+ # Parse "collection".
2326
+ if (element := prop.find("./{DAV:}resourcetype/{DAV:}collection")) is not None:
2327
+ self._collection = True
2328
+
2329
+ # Parse "getlastmodified".
2330
+ if (element := prop.find("./{DAV:}getlastmodified")) is not None:
2331
+ self._getlastmodified = str(element.text)
2332
+
2333
+ # Parse "getcontentlength".
2334
+ if (element := prop.find("./{DAV:}getcontentlength")) is not None:
2335
+ self._getcontentlength = int(str(element.text))
2336
+
2337
+ # Parse "displayname".
2338
+ if (element := prop.find("./{DAV:}displayname")) is not None:
2339
+ self._displayname = str(element.text)
2340
+
2341
+ # Parse "Checksums"
2342
+ if (element := prop.find("./{http://www.dcache.org/2013/webdav}Checksums")) is not None:
2343
+ self._checksums = self._parse_checksums(element.text)
2344
+
2345
+ # Some webDAV servers don't include the 'displayname' property in the
2346
+ # response so try to infer it from the value of the 'href' property.
2347
+ # Depending on the server the href value may end with '/'.
2348
+ if not self._displayname:
2349
+ self._displayname = os.path.basename(self._href.rstrip("/"))
2350
+
2351
+ # Some webDAV servers do not append a "/" to the href of directories.
2352
+ # Ensure we include a single final "/" in our response.
2353
+ if self._collection:
2354
+ self._href = self._href.rstrip("/") + "/"
2355
+
2356
+ # Force a size of 0 for collections.
2357
+ if self._collection:
2358
+ self._getcontentlength = 0
2359
+
2360
+ def _parse_checksums(self, checksums: str | None) -> dict[str, str]:
2361
+ # checksums argument is of the form
2362
+ # md5=MyS/wljSzI9WYiyrsuyoxw==,adler32=23b104f2
2363
+ result: dict[str, str] = {}
2364
+ if checksums is not None:
2365
+ for checksum in checksums.split(","):
2366
+ if (pos := checksum.find("=")) != -1:
2367
+ algorithm, value = checksum[:pos].lower(), checksum[pos + 1 :]
2368
+ if algorithm == "md5":
2369
+ # dCache documentation about how it encodes the
2370
+ # MD5 checksum:
2371
+ #
2372
+ # https://www.dcache.org/manuals/UserGuide-10.2/webdav.shtml#checksums
2373
+ result[algorithm] = bytes.hex(base64.standard_b64decode(value))
2374
+ else:
2375
+ result[algorithm] = value
2376
+
2377
+ return result
2378
+
2379
+ @property
2380
+ def exists(self) -> bool:
2381
+ # It is either a directory or a file with length of at least zero
2382
+ return self._collection or self._getcontentlength >= 0
2383
+
2384
+ @property
2385
+ def is_dir(self) -> bool:
2386
+ return self._collection
2387
+
2388
+ @property
2389
+ def is_file(self) -> bool:
2390
+ return not self._collection
2391
+
2392
+ @property
2393
+ def last_modified(self) -> datetime:
2394
+ if not self._getlastmodified:
2395
+ return datetime.min
2396
+
2397
+ # Last modified timestamp is of the form:
2398
+ # 'Wed, 12 Mar 2025 10:11:13 GMT'
2399
+ return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z")
2400
+
2401
+ @property
2402
+ def size(self) -> int:
2403
+ return self._getcontentlength
2404
+
2405
+ @property
2406
+ def name(self) -> str:
2407
+ return self._displayname
2408
+
2409
+ @property
2410
+ def href(self) -> str:
2411
+ return self._href
2412
+
2413
+ @property
2414
+ def checksums(self) -> dict[str, str]:
2415
+ return self._checksums
2416
+
2417
+
2418
+ class DavPropfindParser:
2419
+ """Helper class to parse the response body of a PROPFIND request.
2420
+
2421
+ Parameters
2422
+ ----------
2423
+ body : `bytes`
2424
+ The XML-encoded response body to PROPFIND.
2425
+ """
2426
+
2427
+ def __init__(self) -> None:
2428
+ return
2429
+
2430
+ def parse(self, body: bytes) -> list[DavProperty]:
2431
+ """Parse the XML-encoded contents of the response body to a webDAV
2432
+ PROPFIND request.
2433
+
2434
+ Parameters
2435
+ ----------
2436
+ body : `bytes`
2437
+ XML-encoded response body to a PROPFIND request
2438
+
2439
+ Returns
2440
+ -------
2441
+ responses : `List[DavProperty]`
2442
+
2443
+ Notes
2444
+ -----
2445
+ Is is expected that there is at least one reponse in `body`, otherwise
2446
+ this function raises.
2447
+ """
2448
+ # A response body to a PROPFIND request is of the form (indented for
2449
+ # readability):
2450
+ #
2451
+ # <?xml version="1.0" encoding="UTF-8"?>
2452
+ # <D:multistatus xmlns:D="DAV:">
2453
+ # <D:response>
2454
+ # <D:href>path/to/resource</D:href>
2455
+ # <D:propstat>
2456
+ # <D:prop>
2457
+ # <D:resourcetype>
2458
+ # <D:collection xmlns:D="DAV:"/>
2459
+ # </D:resourcetype>
2460
+ # <D:getlastmodified>
2461
+ # Fri, 27 Jan 2 023 13:59:01 GMT
2462
+ # </D:getlastmodified>
2463
+ # <D:getcontentlength>
2464
+ # 12345
2465
+ # </D:getcontentlength>
2466
+ # </D:prop>
2467
+ # <D:status>
2468
+ # HTTP/1.1 200 OK
2469
+ # </D:status>
2470
+ # </D:propstat>
2471
+ # </D:response>
2472
+ # <D:response>
2473
+ # ...
2474
+ # </D:response>
2475
+ # <D:response>
2476
+ # ...
2477
+ # </D:response>
2478
+ # </D:multistatus>
2479
+
2480
+ # Scan all the 'response' elements and extract the relevant properties
2481
+ decoded_body: str = body.decode("utf-8").strip()
2482
+ responses = []
2483
+ multistatus = eTree.fromstring(decoded_body)
2484
+ for response in multistatus.findall("./{DAV:}response"):
2485
+ responses.append(DavProperty(response))
2486
+
2487
+ if responses:
2488
+ return responses
2489
+ else:
2490
+ # Could not parse the body
2491
+ raise ValueError(f"Unable to parse response for PROPFIND request: {decoded_body}")
2492
+
2493
+
2494
+ class TokenAuthorizer:
2495
+ """Attach a bearer token 'Authorization' header to each request.
2496
+
2497
+ Parameters
2498
+ ----------
2499
+ token : `str`
2500
+ Can be either the path to a local file which contains the
2501
+ value of the token or the token itself. If `token` is a file
2502
+ it must be protected so that only the owner can read and write it.
2503
+ """
2504
+
2505
+ def __init__(self, token: str | None = None) -> None:
2506
+ self._token = self._path = None
2507
+ self._mtime: float = -1.0
2508
+ if token is None:
2509
+ return
2510
+
2511
+ self._token = token
2512
+ if os.path.isfile(token):
2513
+ self._path = os.path.abspath(token)
2514
+ if not self._is_protected(self._path):
2515
+ raise PermissionError(
2516
+ f"""Authorization token file at {self._path} must be protected for access only """
2517
+ """by its owner"""
2518
+ )
2519
+ self._refresh()
2520
+
2521
+ def _refresh(self) -> None:
2522
+ """Read the token file (if any) if its modification time is more recent
2523
+ than the last time we read it.
2524
+ """
2525
+ if self._path is None:
2526
+ return
2527
+
2528
+ if (mtime := os.stat(self._path).st_mtime) > self._mtime:
2529
+ log.debug("Reading authorization token from file %s", self._path)
2530
+ self._mtime = mtime
2531
+ with open(self._path) as f:
2532
+ self._token = f.read().rstrip("\n")
2533
+
2534
+ def _is_protected(self, filepath: str) -> bool:
2535
+ """Return true if the permissions of file at filepath only allow for
2536
+ access by its owner.
2537
+
2538
+ Parameters
2539
+ ----------
2540
+ filepath : `str`
2541
+ Path of a local file.
2542
+ """
2543
+ if not os.path.isfile(filepath):
2544
+ return False
2545
+
2546
+ mode = stat.S_IMODE(os.stat(filepath).st_mode)
2547
+ owner_accessible = bool(mode & stat.S_IRWXU)
2548
+ group_accessible = bool(mode & stat.S_IRWXG)
2549
+ other_accessible = bool(mode & stat.S_IRWXO)
2550
+ return owner_accessible and not group_accessible and not other_accessible
2551
+
2552
+ def set_authorization(self, headers: dict[str, str]) -> None:
2553
+ """Add the 'Authorization' header to `headers`."""
2554
+ if self._token is None:
2555
+ return
2556
+
2557
+ self._refresh()
2558
+ headers["Authorization"] = f"Bearer {self._token}"
2559
+
2560
+
2561
+ def expand_vars(path: str | None) -> str | None:
2562
+ """Expand the environment variables in `path` and return the path with
2563
+ the value of the variable expanded.
2564
+
2565
+ Parameters
2566
+ ----------
2567
+ path : `str` or `None`
2568
+ Abolute or relative path which may include an environment variable
2569
+ e.g. '$HOME/path/to/my/file'
2570
+
2571
+ Returns
2572
+ -------
2573
+ path: `str`
2574
+ The path with the values of the environment variables expanded.
2575
+ """
2576
+ return None if path is None else os.path.expandvars(path)
2577
+
2578
+
2579
+ def dump_response(method: str, resp: HTTPResponse) -> None:
2580
+ """Dump response for debugging purposes."""
2581
+ log.debug("%s %s", method, resp.geturl())
2582
+ for header, value in resp.headers.items():
2583
+ log.debug(" %s: %s", header, value)
2584
+ log.debug(" response body length: %d", len(resp.data.decode("utf-8")))