rucio-clients 35.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio-clients might be problematic. Click here for more details.

Files changed (88) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/client/__init__.py +15 -0
  4. rucio/client/accountclient.py +433 -0
  5. rucio/client/accountlimitclient.py +183 -0
  6. rucio/client/baseclient.py +974 -0
  7. rucio/client/client.py +76 -0
  8. rucio/client/configclient.py +126 -0
  9. rucio/client/credentialclient.py +59 -0
  10. rucio/client/didclient.py +866 -0
  11. rucio/client/diracclient.py +56 -0
  12. rucio/client/downloadclient.py +1785 -0
  13. rucio/client/exportclient.py +44 -0
  14. rucio/client/fileclient.py +50 -0
  15. rucio/client/importclient.py +42 -0
  16. rucio/client/lifetimeclient.py +90 -0
  17. rucio/client/lockclient.py +109 -0
  18. rucio/client/metaconventionsclient.py +140 -0
  19. rucio/client/pingclient.py +44 -0
  20. rucio/client/replicaclient.py +454 -0
  21. rucio/client/requestclient.py +125 -0
  22. rucio/client/rseclient.py +746 -0
  23. rucio/client/ruleclient.py +294 -0
  24. rucio/client/scopeclient.py +90 -0
  25. rucio/client/subscriptionclient.py +173 -0
  26. rucio/client/touchclient.py +82 -0
  27. rucio/client/uploadclient.py +955 -0
  28. rucio/common/__init__.py +13 -0
  29. rucio/common/cache.py +74 -0
  30. rucio/common/config.py +801 -0
  31. rucio/common/constants.py +159 -0
  32. rucio/common/constraints.py +17 -0
  33. rucio/common/didtype.py +189 -0
  34. rucio/common/exception.py +1151 -0
  35. rucio/common/extra.py +36 -0
  36. rucio/common/logging.py +420 -0
  37. rucio/common/pcache.py +1408 -0
  38. rucio/common/plugins.py +153 -0
  39. rucio/common/policy.py +84 -0
  40. rucio/common/schema/__init__.py +150 -0
  41. rucio/common/schema/atlas.py +413 -0
  42. rucio/common/schema/belleii.py +408 -0
  43. rucio/common/schema/domatpc.py +401 -0
  44. rucio/common/schema/escape.py +426 -0
  45. rucio/common/schema/generic.py +433 -0
  46. rucio/common/schema/generic_multi_vo.py +412 -0
  47. rucio/common/schema/icecube.py +406 -0
  48. rucio/common/stomp_utils.py +159 -0
  49. rucio/common/stopwatch.py +55 -0
  50. rucio/common/test_rucio_server.py +148 -0
  51. rucio/common/types.py +403 -0
  52. rucio/common/utils.py +2238 -0
  53. rucio/rse/__init__.py +96 -0
  54. rucio/rse/protocols/__init__.py +13 -0
  55. rucio/rse/protocols/bittorrent.py +184 -0
  56. rucio/rse/protocols/cache.py +122 -0
  57. rucio/rse/protocols/dummy.py +111 -0
  58. rucio/rse/protocols/gfal.py +703 -0
  59. rucio/rse/protocols/globus.py +243 -0
  60. rucio/rse/protocols/gsiftp.py +92 -0
  61. rucio/rse/protocols/http_cache.py +82 -0
  62. rucio/rse/protocols/mock.py +123 -0
  63. rucio/rse/protocols/ngarc.py +209 -0
  64. rucio/rse/protocols/posix.py +250 -0
  65. rucio/rse/protocols/protocol.py +594 -0
  66. rucio/rse/protocols/rclone.py +364 -0
  67. rucio/rse/protocols/rfio.py +136 -0
  68. rucio/rse/protocols/srm.py +338 -0
  69. rucio/rse/protocols/ssh.py +413 -0
  70. rucio/rse/protocols/storm.py +206 -0
  71. rucio/rse/protocols/webdav.py +550 -0
  72. rucio/rse/protocols/xrootd.py +301 -0
  73. rucio/rse/rsemanager.py +764 -0
  74. rucio/vcsversion.py +11 -0
  75. rucio/version.py +38 -0
  76. rucio_clients-35.7.0.data/data/etc/rse-accounts.cfg.template +25 -0
  77. rucio_clients-35.7.0.data/data/etc/rucio.cfg.atlas.client.template +42 -0
  78. rucio_clients-35.7.0.data/data/etc/rucio.cfg.template +257 -0
  79. rucio_clients-35.7.0.data/data/requirements.client.txt +15 -0
  80. rucio_clients-35.7.0.data/data/rucio_client/merge_rucio_configs.py +144 -0
  81. rucio_clients-35.7.0.data/scripts/rucio +2542 -0
  82. rucio_clients-35.7.0.data/scripts/rucio-admin +2447 -0
  83. rucio_clients-35.7.0.dist-info/METADATA +50 -0
  84. rucio_clients-35.7.0.dist-info/RECORD +88 -0
  85. rucio_clients-35.7.0.dist-info/WHEEL +5 -0
  86. rucio_clients-35.7.0.dist-info/licenses/AUTHORS.rst +97 -0
  87. rucio_clients-35.7.0.dist-info/licenses/LICENSE +201 -0
  88. rucio_clients-35.7.0.dist-info/top_level.txt +1 -0
rucio/common/utils.py ADDED
@@ -0,0 +1,2238 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import argparse
16
+ import base64
17
+ import copy
18
+ import datetime
19
+ import errno
20
+ import getpass
21
+ import hashlib
22
+ import io
23
+ import ipaddress
24
+ import itertools
25
+ import json
26
+ import logging
27
+ import math
28
+ import mmap
29
+ import os
30
+ import os.path
31
+ import re
32
+ import signal
33
+ import socket
34
+ import subprocess
35
+ import tempfile
36
+ import threading
37
+ import time
38
+ import zlib
39
+ from collections import OrderedDict
40
+ from collections.abc import Callable, Iterable, Iterator, Sequence
41
+ from enum import Enum
42
+ from functools import partial, wraps
43
+ from io import StringIO
44
+ from itertools import zip_longest
45
+ from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
46
+ from urllib.parse import parse_qsl, quote, urlencode, urlparse, urlunparse
47
+ from uuid import uuid4 as uuid
48
+ from xml.etree import ElementTree
49
+
50
+ import requests
51
+
52
+ from rucio.common.config import config_get, config_has_section
53
+ from rucio.common.exception import ConfigNotFound, DIDFilterSyntaxError, DuplicateCriteriaInDIDFilter, InputValidationError, InvalidType, MetalinkJsonParsingError, MissingModuleException, PolicyPackageVersionError, RucioException
54
+ from rucio.common.extra import import_extras
55
+ from rucio.common.plugins import PolicyPackageAlgorithms
56
+ from rucio.common.types import InternalAccount, InternalScope, TraceDict
57
+
58
+ EXTRA_MODULES = import_extras(['paramiko'])
59
+
60
+ if EXTRA_MODULES['paramiko']:
61
+ try:
62
+ from paramiko import RSAKey
63
+ except Exception:
64
+ EXTRA_MODULES['paramiko'] = False
65
+
66
+ if TYPE_CHECKING:
67
+ T = TypeVar('T')
68
+ from _typeshed import FileDescriptorOrPath
69
+ from sqlalchemy.orm import Session
70
+
71
+ from rucio.common.types import IPDict, LoggerFunction
72
+
73
+
74
+ # HTTP code dictionary. Not complete. Can be extended if needed.
75
+ codes = {
76
+ # Informational.
77
+ 200: '200 OK',
78
+ 201: '201 Created',
79
+ 202: '202 Accepted',
80
+
81
+ # Client Error.
82
+ 400: '400 Bad Request',
83
+ 401: '401 Unauthorized',
84
+ 403: '403 Forbidden',
85
+ 404: '404 Not Found',
86
+ 405: '405 Method Not Allowed',
87
+ 406: '406 Not Acceptable',
88
+ 408: '408 Request Timeout',
89
+ 409: '409 Conflict',
90
+ 410: '410 Gone',
91
+
92
+ # Server Error.
93
+ 500: '500 Internal Server Error',
94
+ 501: '501 Not Implemented',
95
+ 502: '502 Bad Gateway',
96
+ 503: '503 Service Unavailable',
97
+ 504: '504 Gateway Timeout'
98
+ }
99
+
100
+ # RFC 1123 (ex RFC 822)
101
+ DATE_FORMAT = '%a, %d %b %Y %H:%M:%S UTC'
102
+
103
+
104
+ def invert_dict(d: dict[Any, Any]) -> dict[Any, Any]:
105
+ """
106
+ Invert the dictionary.
107
+ CAUTION: this function is not deterministic unless the input dictionary is one-to-one mapping.
108
+
109
+ :param d: source dictionary
110
+ :returns: dictionary {value: key for key, value in d.items()}
111
+ """
112
+ return {value: key for key, value in d.items()}
113
+
114
+
115
+ def dids_as_dicts(did_list: Iterable[Union[str, dict[str, str]]]) -> list[dict[str, str]]:
116
+ """
117
+ Converts list of DIDs to list of dictionaries
118
+ :param did_list: list of DIDs as either "scope:name" or {"scope":"scope", "name":"name"}
119
+ :returns: list of dictionaries {"scope":"scope", "name":"name"}
120
+ """
121
+ out = []
122
+ for did in did_list:
123
+ if isinstance(did, str):
124
+ scope, name = did.split(":", 1)
125
+ did = dict(scope=scope, name=name)
126
+ if isinstance(did, dict):
127
+ if not ("name" in did and "scope" in did):
128
+ raise ValueError("Scope or name missing in: %s" % (did,))
129
+ else:
130
+ raise ValueError("Can not convert item %s (%s) to a DID" % (did, type(did)))
131
+ out.append(did)
132
+ return out
133
+
134
+
135
+ def build_url(
136
+ url: str,
137
+ path: Optional[str] = None,
138
+ params: Optional[Union[str, dict[Any, Any], list[tuple[Any, Any]]]] = None,
139
+ doseq: bool = False
140
+ ) -> str:
141
+ """
142
+ utitily function to build an url for requests to the rucio system.
143
+
144
+ If the optional parameter doseq is evaluates to True, individual key=value pairs
145
+ separated by '&' are generated for each element of the value sequence for the key.
146
+ """
147
+ complete_url = url
148
+ if path is not None:
149
+ complete_url += "/" + path
150
+ if params is not None:
151
+ complete_url += "?"
152
+ if isinstance(params, str):
153
+ complete_url += quote(params)
154
+ else:
155
+ complete_url += urlencode(params, doseq=doseq)
156
+ return complete_url
157
+
158
+
159
+ def all_oidc_req_claims_present(
160
+ scope: Optional[Union[str, list[str]]],
161
+ audience: Optional[Union[str, list[str]]],
162
+ required_scope: Optional[Union[str, list[str]]],
163
+ required_audience: Optional[Union[str, list[str]]],
164
+ separator: str = " "
165
+ ) -> bool:
166
+ """
167
+ Checks if both of the following statements are true:
168
+ - all items in required_scope are present in scope string
169
+ - all items in required_audience are present in audience
170
+ returns false otherwise. audience and scope must be both strings
171
+ or both lists. Similarly for required_* variables.
172
+ If this condition is satisfied, False is returned.
173
+ :params scope: list of strings or one string where items are separated by a separator input variable
174
+ :params audience: list of strings or one string where items are separated by a separator input variable
175
+ :params required_scope: list of strings or one string where items are separated by a separator input variable
176
+ :params required_audience: list of strings or one string where items are separated by a separator input variable
177
+ :params separator: separator string, space by default
178
+ :returns : True or False
179
+ """
180
+ if not scope:
181
+ scope = ""
182
+ if not audience:
183
+ audience = ""
184
+ if not required_scope:
185
+ required_scope = ""
186
+ if not required_audience:
187
+ required_audience = ""
188
+ if (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, list) and isinstance(required_audience, list)):
189
+ scope = [str(it) for it in scope]
190
+ audience = [str(it) for it in audience]
191
+ required_scope = [str(it) for it in required_scope]
192
+ required_audience = [str(it) for it in required_audience]
193
+ req_scope_present = all(elem in scope for elem in required_scope)
194
+ req_audience_present = all(elem in audience for elem in required_audience)
195
+ return req_scope_present and req_audience_present
196
+ elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, str) and isinstance(required_audience, str)):
197
+ scope = str(scope)
198
+ audience = str(audience)
199
+ required_scope = str(required_scope)
200
+ required_audience = str(required_audience)
201
+ req_scope_present = all(elem in scope.split(separator) for elem in required_scope.split(separator))
202
+ req_audience_present = all(elem in audience.split(separator) for elem in required_audience.split(separator))
203
+ return req_scope_present and req_audience_present
204
+ elif (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, str) and isinstance(required_audience, str)):
205
+ scope = [str(it) for it in scope]
206
+ audience = [str(it) for it in audience]
207
+ required_scope = str(required_scope)
208
+ required_audience = str(required_audience)
209
+ req_scope_present = all(elem in scope for elem in required_scope.split(separator))
210
+ req_audience_present = all(elem in audience for elem in required_audience.split(separator))
211
+ return req_scope_present and req_audience_present
212
+ elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, list) and isinstance(required_audience, list)):
213
+ scope = str(scope)
214
+ audience = str(audience)
215
+ required_scope = [str(it) for it in required_scope]
216
+ required_audience = [str(it) for it in required_audience]
217
+ req_scope_present = all(elem in scope.split(separator) for elem in required_scope)
218
+ req_audience_present = all(elem in audience.split(separator) for elem in required_audience)
219
+ return req_scope_present and req_audience_present
220
+ else:
221
+ return False
222
+
223
+
224
+ def generate_uuid() -> str:
225
+ return str(uuid()).replace('-', '').lower()
226
+
227
+
228
+ def generate_uuid_bytes() -> bytes:
229
+ return uuid().bytes
230
+
231
+
232
+ # GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5', 'sha256', 'crc32']
233
+ GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5']
234
+ CHECKSUM_ALGO_DICT = {}
235
+ PREFERRED_CHECKSUM = GLOBALLY_SUPPORTED_CHECKSUMS[0]
236
+ CHECKSUM_KEY = 'supported_checksums'
237
+
238
+
239
+ def is_checksum_valid(checksum_name: str) -> bool:
240
+ """
241
+ A simple function to check whether a checksum algorithm is supported.
242
+ Relies on GLOBALLY_SUPPORTED_CHECKSUMS to allow for expandability.
243
+
244
+ :param checksum_name: The name of the checksum to be verified.
245
+ :returns: True if checksum_name is in GLOBALLY_SUPPORTED_CHECKSUMS list, False otherwise.
246
+ """
247
+
248
+ return checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS
249
+
250
+
251
+ def set_preferred_checksum(checksum_name: str) -> None:
252
+ """
253
+ If the input checksum name is valid,
254
+ set it as PREFERRED_CHECKSUM.
255
+
256
+ :param checksum_name: The name of the checksum to be verified.
257
+ """
258
+ if is_checksum_valid(checksum_name):
259
+ global PREFERRED_CHECKSUM
260
+ PREFERRED_CHECKSUM = checksum_name
261
+
262
+
263
+ def adler32(file: "FileDescriptorOrPath") -> str:
264
+ """
265
+ An Adler-32 checksum is obtained by calculating two 16-bit checksums A and B
266
+ and concatenating their bits into a 32-bit integer. A is the sum of all bytes in the
267
+ stream plus one, and B is the sum of the individual values of A from each step.
268
+
269
+ :param file: file name
270
+ :returns: Hexified string, padded to 8 values.
271
+ """
272
+
273
+ # adler starting value is _not_ 0
274
+ adler = 1
275
+
276
+ can_mmap = False
277
+ # try:
278
+ # with open(file, 'r+b') as f:
279
+ # can_mmap = True
280
+ # except:
281
+ # pass
282
+
283
+ try:
284
+ # use mmap if possible
285
+ if can_mmap:
286
+ with open(file, 'r+b') as f:
287
+ m = mmap.mmap(f.fileno(), 0)
288
+ # partial block reads at slightly increased buffer sizes
289
+ for block in iter(partial(m.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
290
+ adler = zlib.adler32(block, adler)
291
+ else:
292
+ with open(file, 'rb') as f:
293
+ # partial block reads at slightly increased buffer sizes
294
+ for block in iter(partial(f.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
295
+ adler = zlib.adler32(block, adler)
296
+
297
+ except Exception as e:
298
+ raise Exception('FATAL - could not get Adler-32 checksum of file %s: %s' % (file, e))
299
+
300
+ # backflip on 32bit -- can be removed once everything is fully migrated to 64bit
301
+ if adler < 0:
302
+ adler = adler + 2 ** 32
303
+
304
+ return str('%08x' % adler)
305
+
306
+
307
+ CHECKSUM_ALGO_DICT['adler32'] = adler32
308
+
309
+
310
+ def md5(file: "FileDescriptorOrPath") -> str:
311
+ """
312
+ Runs the MD5 algorithm (RFC-1321) on the binary content of the file named file and returns the hexadecimal digest
313
+
314
+ :param file: file name
315
+ :returns: string of 32 hexadecimal digits
316
+ """
317
+ hash_md5 = hashlib.md5()
318
+ try:
319
+ with open(file, "rb") as f:
320
+ list(map(hash_md5.update, iter(lambda: f.read(4096), b"")))
321
+ except Exception as e:
322
+ raise Exception('FATAL - could not get MD5 checksum of file %s - %s' % (file, e))
323
+
324
+ return hash_md5.hexdigest()
325
+
326
+
327
+ CHECKSUM_ALGO_DICT['md5'] = md5
328
+
329
+
330
+ def sha256(file: "FileDescriptorOrPath") -> str:
331
+ """
332
+ Runs the SHA256 algorithm on the binary content of the file named file and returns the hexadecimal digest
333
+
334
+ :param file: file name
335
+ :returns: string of 32 hexadecimal digits
336
+ """
337
+ with open(file, "rb") as f:
338
+ bytes_ = f.read() # read entire file as bytes
339
+ readable_hash = hashlib.sha256(bytes_).hexdigest()
340
+ print(readable_hash)
341
+ return readable_hash
342
+
343
+
344
+ CHECKSUM_ALGO_DICT['sha256'] = sha256
345
+
346
+
347
+ def crc32(file: "FileDescriptorOrPath") -> str:
348
+ """
349
+ Runs the CRC32 algorithm on the binary content of the file named file and returns the hexadecimal digest
350
+
351
+ :param file: file name
352
+ :returns: string of 32 hexadecimal digits
353
+ """
354
+ prev = 0
355
+ for eachLine in open(file, "rb"):
356
+ prev = zlib.crc32(eachLine, prev)
357
+ return "%X" % (prev & 0xFFFFFFFF)
358
+
359
+
360
+ CHECKSUM_ALGO_DICT['crc32'] = crc32
361
+
362
+
363
+ def _next_pow2(num: int) -> int:
364
+ if not num:
365
+ return 0
366
+ return math.ceil(math.log2(num))
367
+
368
+
369
+ def _bittorrent_v2_piece_length_pow2(file_size: int) -> int:
370
+ """
371
+ Automatically chooses the `piece size` so that `piece layers`
372
+ is kept small(er) than usually. This is a balancing act:
373
+ having a big piece_length requires more work on bittorrent client
374
+ side to validate hashes, but having it small requires more
375
+ place to store the `piece layers` in the database.
376
+
377
+ Returns the result as the exponent 'x' for power of 2.
378
+ To get the actual length in bytes, the caller should compute 2^x.
379
+ """
380
+
381
+ # by the bittorrent v2 specification, the piece size is equal to block size = 16KiB
382
+ min_piece_len_pow2 = 14 # 2 ** 14 == 16 KiB
383
+ if not file_size:
384
+ return min_piece_len_pow2
385
+ # Limit the maximum size of pieces_layers hash chain for bittorrent v2,
386
+ # because we'll have to store it in the database
387
+ max_pieces_layers_size_pow2 = 20 # 2 ** 20 == 1 MiB
388
+ # sha256 requires 2 ** 5 == 32 Bytes == 256 bits
389
+ hash_size_pow2 = 5
390
+
391
+ # The closest power of two bigger than the file size
392
+ file_size_pow2 = _next_pow2(file_size)
393
+
394
+ # Compute the target size for the 'pieces layers' in the torrent
395
+ # (as power of two: the closest power-of-two smaller than the number)
396
+ # Will cap at max_pieces_layers_size for files larger than 1TB.
397
+ target_pieces_layers_size = math.sqrt(file_size)
398
+ target_pieces_layers_size_pow2 = min(math.floor(math.log2(target_pieces_layers_size)), max_pieces_layers_size_pow2)
399
+ target_piece_num_pow2 = max(target_pieces_layers_size_pow2 - hash_size_pow2, 0)
400
+
401
+ piece_length_pow2 = max(file_size_pow2 - target_piece_num_pow2, min_piece_len_pow2)
402
+ return piece_length_pow2
403
+
404
+
405
+ def bittorrent_v2_piece_length(file_size: int) -> int:
406
+ return 2 ** _bittorrent_v2_piece_length_pow2(file_size)
407
+
408
+
409
+ def bittorrent_v2_merkle_sha256(file: "FileDescriptorOrPath") -> tuple[bytes, bytes, int]:
410
+ """
411
+ Compute the .torrent v2 hash tree for the given file.
412
+ (http://www.bittorrent.org/beps/bep_0052.html)
413
+ In particular, it will return the root of the merkle hash
414
+ tree of the file, the 'piece layers' as described in the
415
+ previous BEP, and the chosen `piece size`
416
+
417
+ This function will read the file in chunks of 16KiB
418
+ (which is the imposed block size by bittorrent v2) and compute
419
+ the sha256 hash of each block. When enough blocks are read
420
+ to form a `piece`, will compute the merkle hash root of the
421
+ piece from the hashes of its blocks. At the end, the hashes
422
+ of pieces are combined to create the global pieces_root.
423
+ """
424
+
425
+ # by the bittorrent v2 specification, the block size and the
426
+ # minimum piece size are both fixed to 16KiB
427
+ block_size = 16384
428
+ block_size_pow2 = 14 # 2 ** 14 == 16 KiB
429
+ # sha256 requires 2 ** 5 == 32 Bytes == 256 bits
430
+ hash_size = 32
431
+
432
+ def _merkle_root(leafs: list[bytes], nb_levels: int, padding: bytes) -> bytes:
433
+ """
434
+ Build the root of the merkle hash tree from the (possibly incomplete) leafs layer.
435
+ If len(leafs) < 2 ** nb_levels, it will be padded with the padding repeated as many times
436
+ as needed to have 2 ** nb_levels leafs in total.
437
+ """
438
+ nodes = copy.copy(leafs)
439
+ level = nb_levels
440
+
441
+ while level > 0:
442
+ for i in range(2 ** (level - 1)):
443
+ node1 = nodes[2 * i] if 2 * i < len(nodes) else padding
444
+ node2 = nodes[2 * i + 1] if 2 * i + 1 < len(nodes) else padding
445
+ h = hashlib.sha256(node1)
446
+ h.update(node2)
447
+ if i < len(nodes):
448
+ nodes[i] = h.digest()
449
+ else:
450
+ nodes.append(h.digest())
451
+ level -= 1
452
+ return nodes[0] if nodes else padding
453
+
454
+ file_size = os.stat(file).st_size
455
+ piece_length_pow2 = _bittorrent_v2_piece_length_pow2(file_size)
456
+
457
+ block_per_piece_pow2 = piece_length_pow2 - block_size_pow2
458
+ piece_length = 2 ** piece_length_pow2
459
+ block_per_piece = 2 ** block_per_piece_pow2
460
+ piece_num = math.ceil(file_size / piece_length)
461
+
462
+ remaining = file_size
463
+ remaining_in_block = min(file_size, block_size)
464
+ block_hashes = []
465
+ piece_hashes = []
466
+ current_hash = hashlib.sha256()
467
+ block_padding = bytes(hash_size)
468
+ with open(file, 'rb') as f:
469
+ while True:
470
+ data = f.read(remaining_in_block)
471
+ if not data:
472
+ break
473
+
474
+ current_hash.update(data)
475
+
476
+ remaining_in_block -= len(data)
477
+ remaining -= len(data)
478
+
479
+ if not remaining_in_block:
480
+ block_hashes.append(current_hash.digest())
481
+ if len(block_hashes) == block_per_piece or not remaining:
482
+ piece_hashes.append(_merkle_root(block_hashes, nb_levels=block_per_piece_pow2, padding=block_padding))
483
+ block_hashes = []
484
+ current_hash = hashlib.sha256()
485
+ remaining_in_block = min(block_size, remaining)
486
+
487
+ if not remaining:
488
+ break
489
+
490
+ if remaining or remaining_in_block or len(piece_hashes) != piece_num:
491
+ raise RucioException(f'Error while computing merkle sha256 of {file}')
492
+
493
+ piece_padding = _merkle_root([], nb_levels=block_per_piece_pow2, padding=block_padding)
494
+ pieces_root = _merkle_root(piece_hashes, nb_levels=_next_pow2(piece_num), padding=piece_padding)
495
+ pieces_layers = b''.join(piece_hashes) if len(piece_hashes) > 1 else b''
496
+
497
+ return pieces_root, pieces_layers, piece_length
498
+
499
+
500
+ def merkle_sha256(file: "FileDescriptorOrPath") -> str:
501
+ """
502
+ The root of the sha256 merkle hash tree with leaf size of 16 KiB.
503
+ """
504
+ pieces_root, _, _ = bittorrent_v2_merkle_sha256(file)
505
+ return pieces_root.hex()
506
+
507
+
508
+ CHECKSUM_ALGO_DICT['merkle_sha256'] = merkle_sha256
509
+
510
+
511
+ def bencode(obj: Union[int, bytes, str, list, dict[bytes, Any]]) -> bytes:
512
+ """
513
+ Copied from the reference implementation of v2 bittorrent:
514
+ http://bittorrent.org/beps/bep_0052_torrent_creator.py
515
+ """
516
+
517
+ if isinstance(obj, int):
518
+ return b"i" + str(obj).encode() + b"e"
519
+ elif isinstance(obj, bytes):
520
+ return str(len(obj)).encode() + b":" + obj
521
+ elif isinstance(obj, str):
522
+ return bencode(obj.encode("utf-8"))
523
+ elif isinstance(obj, list):
524
+ return b"l" + b"".join(map(bencode, obj)) + b"e"
525
+ elif isinstance(obj, dict):
526
+ if all(isinstance(i, bytes) for i in obj.keys()):
527
+ items = list(obj.items())
528
+ items.sort()
529
+ return b"d" + b"".join(map(bencode, itertools.chain(*items))) + b"e"
530
+ else:
531
+ raise ValueError("dict keys should be bytes " + str(obj.keys()))
532
+ raise ValueError("Allowed types: int, bytes, str, list, dict; not %s", type(obj))
533
+
534
+
535
+ def construct_torrent(
536
+ scope: str,
537
+ name: str,
538
+ length: int,
539
+ piece_length: int,
540
+ pieces_root: bytes,
541
+ pieces_layers: "Optional[bytes]" = None,
542
+ trackers: "Optional[list[str]]" = None,
543
+ ) -> "tuple[str, bytes]":
544
+
545
+ torrent_dict = {
546
+ b'creation date': int(time.time()),
547
+ b'info': {
548
+ b'meta version': 2,
549
+ b'private': 1,
550
+ b'name': f'{scope}:{name}'.encode(),
551
+ b'piece length': piece_length,
552
+ b'file tree': {
553
+ name.encode(): {
554
+ b'': {
555
+ b'length': length,
556
+ b'pieces root': pieces_root,
557
+ }
558
+ }
559
+ }
560
+ },
561
+ b'piece layers': {},
562
+ }
563
+ if trackers:
564
+ torrent_dict[b'announce'] = trackers[0].encode()
565
+ if len(trackers) > 1:
566
+ torrent_dict[b'announce-list'] = [t.encode() for t in trackers]
567
+ if pieces_layers:
568
+ torrent_dict[b'piece layers'][pieces_root] = pieces_layers
569
+
570
+ torrent_id = hashlib.sha256(bencode(torrent_dict[b'info'])).hexdigest()[:40]
571
+ torrent = bencode(torrent_dict)
572
+ return torrent_id, torrent
573
+
574
+
575
+ def str_to_date(string: str) -> Optional[datetime.datetime]:
576
+ """ Converts a RFC-1123 string to the corresponding datetime value.
577
+
578
+ :param string: the RFC-1123 string to convert to datetime value.
579
+ """
580
+ return datetime.datetime.strptime(string, DATE_FORMAT) if string else None
581
+
582
+
583
+ def val_to_space_sep_str(vallist: list[str]) -> str:
584
+ """ Converts a list of values into a string of space separated values
585
+
586
+ :param vallist: the list of values to to convert into string
587
+ :return: the string of space separated values or the value initially passed as parameter
588
+ """
589
+ try:
590
+ if isinstance(vallist, list):
591
+ return str(" ".join(vallist))
592
+ else:
593
+ return str(vallist)
594
+ except:
595
+ return ''
596
+
597
+
598
+ def date_to_str(date: datetime.datetime) -> Optional[str]:
599
+ """ Converts a datetime value to the corresponding RFC-1123 string.
600
+
601
+ :param date: the datetime value to convert.
602
+ """
603
+ return datetime.datetime.strftime(date, DATE_FORMAT) if date else None
604
+
605
+
606
+ class APIEncoder(json.JSONEncoder):
607
+ """ Propretary JSONEconder subclass used by the json render function.
608
+ This is needed to address the encoding of special values.
609
+ """
610
+
611
+ def default(self, obj): # pylint: disable=E0202
612
+ if isinstance(obj, datetime.datetime):
613
+ # convert any datetime to RFC 1123 format
614
+ return date_to_str(obj)
615
+ elif isinstance(obj, (datetime.time, datetime.date)):
616
+ # should not happen since the only supported date-like format
617
+ # supported at dmain schema level is 'datetime' .
618
+ return obj.isoformat()
619
+ elif isinstance(obj, datetime.timedelta):
620
+ return obj.days * 24 * 60 * 60 + obj.seconds
621
+ elif isinstance(obj, Enum):
622
+ return obj.name
623
+ elif isinstance(obj, (InternalAccount, InternalScope)):
624
+ return obj.external
625
+ return json.JSONEncoder.default(self, obj)
626
+
627
+
628
+ def render_json(*args, **kwargs) -> str:
629
+ """ Render a list or a dict as a JSON-formatted string. """
630
+ if args and isinstance(args[0], list):
631
+ data = args[0]
632
+ elif isinstance(kwargs, dict):
633
+ data = kwargs
634
+ else:
635
+ raise ValueError("Error while serializing object to JSON-formatted string: supported input types are list or dict.")
636
+ return json.dumps(data, cls=APIEncoder)
637
+
638
+
639
+ def datetime_parser(dct: dict[Any, Any]) -> dict[Any, Any]:
640
+ """ datetime parser
641
+ """
642
+ for k, v in list(dct.items()):
643
+ if isinstance(v, str) and re.search(" UTC", v):
644
+ try:
645
+ dct[k] = datetime.datetime.strptime(v, DATE_FORMAT)
646
+ except Exception:
647
+ pass
648
+ return dct
649
+
650
+
651
+ def parse_response(data: Union[str, bytes, bytearray]) -> Any:
652
+ """
653
+ JSON render function
654
+ """
655
+ if isinstance(data, (bytes, bytearray)):
656
+ data = data.decode('utf-8')
657
+
658
+ return json.loads(data, object_hook=datetime_parser)
659
+
660
+
661
+ def execute(cmd: str) -> tuple[int, str, str]:
662
+ """
663
+ Executes a command in a subprocess. Returns a tuple
664
+ of (exitcode, out, err), where out is the string output
665
+ from stdout and err is the string output from stderr when
666
+ executing the command.
667
+
668
+ :param cmd: Command string to execute
669
+ """
670
+
671
+ process = subprocess.Popen(cmd,
672
+ shell=True,
673
+ stdin=subprocess.PIPE,
674
+ stdout=subprocess.PIPE,
675
+ stderr=subprocess.PIPE)
676
+
677
+ result = process.communicate()
678
+ (out, err) = result
679
+ exitcode = process.returncode
680
+ return exitcode, out.decode(encoding='utf-8'), err.decode(encoding='utf-8')
681
+
682
+
683
+ def rse_supported_protocol_domains() -> list[str]:
684
+ """ Returns a list with all supported RSE protocol domains."""
685
+ return ['lan', 'wan']
686
+
687
+
688
+ def grouper(iterable: Iterable[Any], n: int, fillvalue: Optional[object] = None) -> zip_longest:
689
+ """ Collect data into fixed-length chunks or blocks """
690
+ # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
691
+ args = [iter(iterable)] * n
692
+ return zip_longest(*args, fillvalue=fillvalue)
693
+
694
+
695
+ def chunks(iterable, n):
696
+ """
697
+ Yield successive n-sized chunks from l.
698
+ """
699
+ if isinstance(iterable, list):
700
+ for i in range(0, len(iterable), n):
701
+ yield iterable[i:i + n]
702
+ else:
703
+ it = iter(iterable)
704
+ while True:
705
+ chunk = list(itertools.islice(it, n))
706
+ if not chunk:
707
+ return
708
+ yield chunk
709
+
710
+
711
+ def dict_chunks(dict_: dict[Any, Any], n: int) -> Iterator[dict[Any, Any]]:
712
+ """
713
+ Iterate over the dictionary in groups of the requested size
714
+ """
715
+ it = iter(dict_)
716
+ for _ in range(0, len(dict_), n):
717
+ yield {k: dict_[k] for k in itertools.islice(it, n)}
718
+
719
+
720
+ def my_key_generator(namespace: str, fn: Callable, **kw) -> Callable[..., str]:
721
+ """
722
+ Customized key generator for dogpile
723
+ """
724
+ fname = fn.__name__
725
+
726
+ def generate_key(*arg, **kw) -> str:
727
+ return namespace + "_" + fname + "_".join(str(s) for s in filter(None, arg))
728
+
729
+ return generate_key
730
+
731
+
732
+ NonDeterministicPFNAlgorithmsT = TypeVar('NonDeterministicPFNAlgorithmsT', bound='NonDeterministicPFNAlgorithms')
733
+
734
+
735
+ class NonDeterministicPFNAlgorithms(PolicyPackageAlgorithms):
736
+ """
737
+ Handle PFN construction for non-deterministic RSEs, including registration of algorithms
738
+ from policy packages
739
+ """
740
+
741
+ _algorithm_type = 'non_deterministic_pfn'
742
+
743
+ def __init__(self) -> None:
744
+ """
745
+ Initialises a non-deterministic PFN construction object
746
+ """
747
+ super().__init__()
748
+
749
+ def construct_non_deterministic_pfn(self, dsn: str, scope: Optional[str], filename: str, naming_convention: str) -> str:
750
+ """
751
+ Calls the correct algorithm to generate a non-deterministic PFN
752
+ """
753
+ return self.get_algorithm(naming_convention)(dsn, scope, filename)
754
+
755
+ @classmethod
756
+ def supports(cls: type[NonDeterministicPFNAlgorithmsT], naming_convention: str) -> bool:
757
+ """
758
+ Checks whether a non-deterministic PFN algorithm is supported
759
+ """
760
+ return super()._supports(cls._algorithm_type, naming_convention)
761
+
762
+ @classmethod
763
+ def _module_init_(cls: type[NonDeterministicPFNAlgorithmsT]) -> None:
764
+ """
765
+ Registers the included non-deterministic PFN algorithms
766
+ """
767
+ cls.register('T0', cls.construct_non_deterministic_pfn_T0)
768
+ cls.register('DQ2', cls.construct_non_deterministic_pfn_DQ2)
769
+ cls.register('BelleII', cls.construct_non_deterministic_pfn_BelleII)
770
+
771
+ @classmethod
772
+ def get_algorithm(cls: type[NonDeterministicPFNAlgorithmsT], naming_convention: str) -> Callable[[str, Optional[str], str], str]:
773
+ """
774
+ Looks up a non-deterministic PFN algorithm by name
775
+ """
776
+ return super()._get_one_algorithm(cls._algorithm_type, naming_convention)
777
+
778
+ @classmethod
779
+ def register(cls: type[NonDeterministicPFNAlgorithmsT], name: str, fn_construct_non_deterministic_pfn: Callable[[str, Optional[str], str], Optional[str]]) -> None:
780
+ """
781
+ Register a new non-deterministic PFN algorithm
782
+ """
783
+ algorithm_dict = {name: fn_construct_non_deterministic_pfn}
784
+ super()._register(cls._algorithm_type, algorithm_dict)
785
+
786
+ @staticmethod
787
+ def __strip_dsn(dsn: str) -> str:
788
+ """
789
+ Drop the _sub and _dis suffixes for panda datasets from the lfc path
790
+ they will be registered in.
791
+ Method imported from DQ2.
792
+ """
793
+
794
+ suffixes_to_drop = ['_dis', '_sub', '_frag']
795
+ fields = dsn.split('.')
796
+ last_field = fields[-1]
797
+ try:
798
+ for suffix in suffixes_to_drop:
799
+ last_field = re.sub('%s.*$' % suffix, '', last_field)
800
+ except IndexError:
801
+ return dsn
802
+ fields[-1] = last_field
803
+ stripped_dsn = '.'.join(fields)
804
+ return stripped_dsn
805
+
806
+ @staticmethod
807
+ def __strip_tag(tag: str) -> str:
808
+ """
809
+ Drop the _sub and _dis suffixes for panda datasets from the lfc path
810
+ they will be registered in
811
+ Method imported from DQ2.
812
+ """
813
+ suffixes_to_drop = ['_dis', '_sub', '_tid']
814
+ stripped_tag = tag
815
+ try:
816
+ for suffix in suffixes_to_drop:
817
+ stripped_tag = re.sub('%s.*$' % suffix, '', stripped_tag)
818
+ except IndexError:
819
+ return stripped_tag
820
+ return stripped_tag
821
+
822
+ @staticmethod
823
+ def construct_non_deterministic_pfn_DQ2(dsn: str, scope: Optional[str], filename: str) -> str:
824
+ """
825
+ Defines relative PFN for new replicas. This method
826
+ contains DQ2 convention. To be used for non-deterministic sites.
827
+ Method imported from DQ2.
828
+
829
+ @return: relative PFN for new replica.
830
+ @rtype: str
831
+ """
832
+ # check how many dots in dsn
833
+ fields = dsn.split('.')
834
+ nfields = len(fields)
835
+
836
+ if nfields == 0:
837
+ return '/other/other/%s' % (filename)
838
+ elif nfields == 1:
839
+ stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
840
+ return '/other/%s/%s' % (stripped_dsn, filename)
841
+ elif nfields == 2:
842
+ project = fields[0]
843
+ stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
844
+ return '/%s/%s/%s' % (project, stripped_dsn, filename)
845
+ elif nfields < 5 or re.match('user*|group*', fields[0]):
846
+ project = fields[0]
847
+ f2 = fields[1]
848
+ f3 = fields[2]
849
+ stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
850
+ return '/%s/%s/%s/%s/%s' % (project, f2, f3, stripped_dsn, filename)
851
+ else:
852
+ project = fields[0]
853
+ dataset_type = fields[4]
854
+ if nfields == 5:
855
+ tag = 'other'
856
+ else:
857
+ tag = NonDeterministicPFNAlgorithms.__strip_tag(fields[-1])
858
+ stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
859
+ return '/%s/%s/%s/%s/%s' % (project, dataset_type, tag, stripped_dsn, filename)
860
+
861
+ @staticmethod
862
+ def construct_non_deterministic_pfn_T0(dsn: str, scope: Optional[str], filename: str) -> Optional[str]:
863
+ """
864
+ Defines relative PFN for new replicas. This method
865
+ contains Tier0 convention. To be used for non-deterministic sites.
866
+
867
+ @return: relative PFN for new replica.
868
+ @rtype: str
869
+ """
870
+ fields = dsn.split('.')
871
+ nfields = len(fields)
872
+ if nfields >= 3:
873
+ return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], fields[1], dsn, filename)
874
+ elif nfields == 1:
875
+ return '/%s/%s/%s/%s/%s' % (fields[0], 'other', 'other', dsn, filename)
876
+ elif nfields == 2:
877
+ return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], 'other', dsn, filename)
878
+ elif nfields == 0:
879
+ return '/other/other/other/other/%s' % (filename)
880
+
881
+ @staticmethod
882
+ def construct_non_deterministic_pfn_BelleII(dsn: str, scope: Optional[str], filename: str) -> str:
883
+ """
884
+ Defines relative PFN for Belle II specific replicas.
885
+ This method contains the Belle II convention.
886
+ To be used for non-deterministic Belle II sites.
887
+ DSN (or datablock in the Belle II naming) contains /
888
+ """
889
+
890
+ fields = dsn.split("/")
891
+ nfields = len(fields)
892
+ if nfields == 0:
893
+ return '/other/%s' % (filename)
894
+ else:
895
+ return '%s/%s' % (dsn, filename)
896
+
897
+
898
+ _DEFAULT_NON_DETERMINISTIC_PFN = 'DQ2'
899
+ NonDeterministicPFNAlgorithms._module_init_()
900
+
901
+
902
+ def construct_non_deterministic_pfn(dsn: str, scope: Optional[str], filename: str, naming_convention: Optional[str] = None) -> str:
903
+ """
904
+ Applies non-deterministic PFN convention to the given replica.
905
+ use the naming_convention to call the actual function which will do the job.
906
+ Rucio administrators can potentially register additional PFN generation algorithms,
907
+ which are not implemented inside this main rucio repository, so changing the
908
+ argument list must be done with caution.
909
+ """
910
+ pfn_algorithms = NonDeterministicPFNAlgorithms()
911
+ if naming_convention is None or not NonDeterministicPFNAlgorithms.supports(naming_convention):
912
+ naming_convention = _DEFAULT_NON_DETERMINISTIC_PFN
913
+ return pfn_algorithms.construct_non_deterministic_pfn(dsn, scope, filename, naming_convention)
914
+
915
+
916
+ def clean_pfns(pfns: Iterable[str]) -> list[str]:
917
+ res = []
918
+ for pfn in pfns:
919
+ if pfn.startswith('srm'):
920
+ pfn = re.sub(':[0-9]+/', '/', pfn)
921
+ pfn = re.sub(r'/srm/managerv1\?SFN=', '', pfn)
922
+ pfn = re.sub(r'/srm/v2/server\?SFN=', '', pfn)
923
+ pfn = re.sub(r'/srm/managerv2\?SFN=', '', pfn)
924
+ if '?GoogleAccessId' in pfn:
925
+ pfn = pfn.split('?GoogleAccessId')[0]
926
+ if '?X-Amz' in pfn:
927
+ pfn = pfn.split('?X-Amz')[0]
928
+ res.append(pfn)
929
+ res.sort()
930
+ return res
931
+
932
+
933
+ ScopeExtractionAlgorithmsT = TypeVar('ScopeExtractionAlgorithmsT', bound='ScopeExtractionAlgorithms')
934
+
935
+
936
+ class ScopeExtractionAlgorithms(PolicyPackageAlgorithms):
937
+ """
938
+ Handle scope extraction algorithms
939
+ """
940
+
941
+ _algorithm_type = 'scope'
942
+
943
+ def __init__(self) -> None:
944
+ """
945
+ Initialises scope extraction algorithms object
946
+ """
947
+ super().__init__()
948
+
949
+ def extract_scope(self, did: str, scopes: Optional[Sequence[str]], extract_scope_convention: str) -> Sequence[str]:
950
+ """
951
+ Calls the correct algorithm for scope extraction
952
+ """
953
+ return self.get_algorithm(extract_scope_convention)(did, scopes)
954
+
955
+ @classmethod
956
+ def supports(cls: type[ScopeExtractionAlgorithmsT], extract_scope_convention: str) -> bool:
957
+ """
958
+ Checks whether the specified scope extraction algorithm is supported
959
+ """
960
+ return super()._supports(cls._algorithm_type, extract_scope_convention)
961
+
962
+ @classmethod
963
+ def _module_init_(cls: type[ScopeExtractionAlgorithmsT]) -> None:
964
+ """
965
+ Registers the included scope extraction algorithms
966
+ """
967
+ cls.register('atlas', cls.extract_scope_atlas)
968
+ cls.register('belleii', cls.extract_scope_belleii)
969
+ cls.register('dirac', cls.extract_scope_dirac)
970
+
971
+ @classmethod
972
+ def get_algorithm(cls: type[ScopeExtractionAlgorithmsT], extract_scope_convention: str) -> Callable[[str, Optional[Sequence[str]]], Sequence[str]]:
973
+ """
974
+ Looks up a scope extraction algorithm by name
975
+ """
976
+ return super()._get_one_algorithm(cls._algorithm_type, extract_scope_convention)
977
+
978
+ @classmethod
979
+ def register(cls: type[ScopeExtractionAlgorithmsT], name: str, fn_extract_scope: Callable[[str, Optional[Sequence[str]]], Sequence[str]]) -> None:
980
+ """
981
+ Registers a new scope extraction algorithm
982
+ """
983
+ algorithm_dict = {name: fn_extract_scope}
984
+ super()._register(cls._algorithm_type, algorithm_dict)
985
+
986
+ @staticmethod
987
+ def extract_scope_atlas(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
988
+ # Try to extract the scope from the DSN
989
+ if did.find(':') > -1:
990
+ if len(did.split(':')) > 2:
991
+ raise RucioException('Too many colons. Cannot extract scope and name')
992
+ scope, name = did.split(':')[0], did.split(':')[1]
993
+ if name.endswith('/'):
994
+ name = name[:-1]
995
+ return scope, name
996
+ else:
997
+ scope = did.split('.')[0]
998
+ if did.startswith('user') or did.startswith('group'):
999
+ scope = ".".join(did.split('.')[0:2])
1000
+ if did.endswith('/'):
1001
+ did = did[:-1]
1002
+ return scope, did
1003
+
1004
+ @staticmethod
1005
+ def extract_scope_dirac(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
1006
+ # Default dirac scope extract algorithm. Scope is the second element in the LFN or the first one (VO name)
1007
+ # if only one element is the result of a split.
1008
+ elem = did.rstrip('/').split('/')
1009
+ if len(elem) > 2:
1010
+ scope = elem[2]
1011
+ else:
1012
+ scope = elem[1]
1013
+ return scope, did
1014
+
1015
+ @staticmethod
1016
+ def extract_scope_belleii(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
1017
+ split_did = did.split('/')
1018
+ if did.startswith('/belle/mock/'):
1019
+ return 'mock', did
1020
+ if did.startswith('/belle/MC/'):
1021
+ if did.startswith('/belle/MC/BG') or \
1022
+ did.startswith('/belle/MC/build') or \
1023
+ did.startswith('/belle/MC/generic') or \
1024
+ did.startswith('/belle/MC/log') or \
1025
+ did.startswith('/belle/MC/mcprod') or \
1026
+ did.startswith('/belle/MC/prerelease') or \
1027
+ did.startswith('/belle/MC/release'):
1028
+ return 'mc', did
1029
+ if did.startswith('/belle/MC/cert') or \
1030
+ did.startswith('/belle/MC/dirac') or \
1031
+ did.startswith('/belle/MC/dr3') or \
1032
+ did.startswith('/belle/MC/fab') or \
1033
+ did.startswith('/belle/MC/hideki') or \
1034
+ did.startswith('/belle/MC/merge') or \
1035
+ did.startswith('/belle/MC/migration') or \
1036
+ did.startswith('/belle/MC/skim') or \
1037
+ did.startswith('/belle/MC/test'):
1038
+ return 'mc_tmp', did
1039
+ if len(split_did) > 4:
1040
+ if split_did[3].find('fab') > -1 or split_did[3].find('merge') > -1 or split_did[3].find('skim') > -1:
1041
+ return 'mc_tmp', did
1042
+ if split_did[3].find('release') > -1:
1043
+ return 'mc', did
1044
+ return 'mc_tmp', did
1045
+ if did.startswith('/belle/Raw/'):
1046
+ return 'raw', did
1047
+ if did.startswith('/belle/hRaw'):
1048
+ return 'hraw', did
1049
+ if did.startswith('/belle/user/'):
1050
+ if len(split_did) > 4:
1051
+ if len(split_did[3]) == 1 and scopes is not None and 'user.%s' % (split_did[4]) in scopes:
1052
+ return 'user.%s' % split_did[4], did
1053
+ if len(split_did) > 3:
1054
+ if scopes is not None and 'user.%s' % (split_did[3]) in scopes:
1055
+ return 'user.%s' % split_did[3], did
1056
+ return 'user', did
1057
+ if did.startswith('/belle/group/'):
1058
+ if len(split_did) > 4:
1059
+ if scopes is not None and 'group.%s' % (split_did[4]) in scopes:
1060
+ return 'group.%s' % split_did[4], did
1061
+ return 'group', did
1062
+ if did.startswith('/belle/data/') or did.startswith('/belle/Data/'):
1063
+ if len(split_did) > 4:
1064
+ if split_did[3] in ['fab', 'skim']: # /belle/Data/fab --> data_tmp
1065
+ return 'data_tmp', did
1066
+ if split_did[3].find('release') > -1: # /belle/Data/release --> data
1067
+ return 'data', did
1068
+ if len(split_did) > 5:
1069
+ if split_did[3] in ['proc']: # /belle/Data/proc
1070
+ if split_did[4].find('release') > -1: # /belle/Data/proc/release*
1071
+ if len(split_did) > 7 and split_did[6] in ['GCR2c', 'prod00000007', 'prod6b', 'proc7b',
1072
+ 'proc8b', 'Bucket4', 'Bucket6test', 'bucket6',
1073
+ 'proc9', 'bucket7', 'SKIMDATAx1', 'proc10Valid',
1074
+ 'proc10', 'SkimP10x1', 'SkimP11x1', 'SkimB9x1',
1075
+ 'SkimB10x1', 'SkimB11x1']: # /belle/Data/proc/release*/*/proc10/* --> data_tmp (Old convention)
1076
+ return 'data_tmp', did
1077
+ else: # /belle/Data/proc/release*/*/proc11/* --> data (New convention)
1078
+ return 'data', did
1079
+ if split_did[4].find('fab') > -1: # /belle/Data/proc/fab* --> data_tmp
1080
+ return 'data_tmp', did
1081
+ return 'data_tmp', did
1082
+ if did.startswith('/belle/ddm/functional_tests/') or did.startswith('/belle/ddm/tests/') or did.startswith('/belle/test/ddm_test'):
1083
+ return 'test', did
1084
+ if did.startswith('/belle/BG/'):
1085
+ return 'data', did
1086
+ if did.startswith('/belle/collection'):
1087
+ return 'collection', did
1088
+ return 'other', did
1089
+
1090
+
1091
+ _DEFAULT_EXTRACT = 'atlas'
1092
+ ScopeExtractionAlgorithms._module_init_()
1093
+
1094
+
1095
+ def extract_scope(
1096
+ did: str,
1097
+ scopes: Optional[Sequence[str]] = None,
1098
+ default_extract: str = _DEFAULT_EXTRACT
1099
+ ) -> Sequence[str]:
1100
+ scope_extraction_algorithms = ScopeExtractionAlgorithms()
1101
+ extract_scope_convention = config_get('common', 'extract_scope', False, None) or config_get('policy', 'extract_scope', False, None)
1102
+ if extract_scope_convention is None or not ScopeExtractionAlgorithms.supports(extract_scope_convention):
1103
+ extract_scope_convention = default_extract
1104
+ return scope_extraction_algorithms.extract_scope(did, scopes, extract_scope_convention)
1105
+
1106
+
1107
+ def pid_exists(pid: int) -> bool:
1108
+ """
1109
+ Check whether pid exists in the current process table.
1110
+ UNIX only.
1111
+ """
1112
+ if pid < 0:
1113
+ return False
1114
+ if pid == 0:
1115
+ # According to "man 2 kill" PID 0 refers to every process
1116
+ # in the process group of the calling process.
1117
+ # On certain systems 0 is a valid PID but we have no way
1118
+ # to know that in a portable fashion.
1119
+ raise ValueError('invalid PID 0')
1120
+ try:
1121
+ os.kill(pid, 0)
1122
+ except OSError as err:
1123
+ if err.errno == errno.ESRCH:
1124
+ # ESRCH == No such process
1125
+ return False
1126
+ elif err.errno == errno.EPERM:
1127
+ # EPERM clearly means there's a process to deny access to
1128
+ return True
1129
+ else:
1130
+ # According to "man 2 kill" possible error values are
1131
+ # (EINVAL, EPERM, ESRCH)
1132
+ raise
1133
+ else:
1134
+ return True
1135
+
1136
+
1137
+ def sizefmt(num: Union[int, float, None], human: bool = True) -> str:
1138
+ """
1139
+ Print human readable file sizes
1140
+ """
1141
+ if num is None:
1142
+ return '0.0 B'
1143
+ try:
1144
+ num = int(num)
1145
+ if human:
1146
+ for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']:
1147
+ if abs(num) < 1000.0:
1148
+ return "%3.3f %sB" % (num, unit)
1149
+ num /= 1000.0
1150
+ return "%.1f %sB" % (num, 'Y')
1151
+ else:
1152
+ return str(num)
1153
+ except OverflowError:
1154
+ return 'Inf'
1155
+
1156
+
1157
+ def get_tmp_dir() -> str:
1158
+ """
1159
+ Get a path where to store temporary files.
1160
+
1161
+ Rucio searches a standard list of temporary directories. The list is:
1162
+
1163
+ The directory named by the TMP environment variable.
1164
+ The directory named by the TMPDIR environment variable.
1165
+ The directory named by the TEMP environment variable.
1166
+
1167
+ As a last resort, the /tmp/ directory.
1168
+
1169
+ :return: A path.
1170
+ """
1171
+ base_dir = os.path.abspath(tempfile.gettempdir())
1172
+ try:
1173
+ return os.path.join(base_dir, getpass.getuser())
1174
+ except Exception:
1175
+ pass
1176
+
1177
+ try:
1178
+ return os.path.join(base_dir, str(os.getuid()))
1179
+ except Exception:
1180
+ pass
1181
+
1182
+ return base_dir
1183
+
1184
+
1185
+ def is_archive(name: str) -> bool:
1186
+ '''
1187
+ Check if a file name is an archive file or not.
1188
+
1189
+ :return: A boolean.
1190
+ '''
1191
+ regexp = r'^.*\.(zip|zipx|tar.gz|tgz|tar.Z|tar.bz2|tbz2)(\.\d+)*$'
1192
+ if re.match(regexp, name, re.I):
1193
+ return True
1194
+ return False
1195
+
1196
+
1197
+ class Color:
1198
+ PURPLE = '\033[95m'
1199
+ CYAN = '\033[96m'
1200
+ DARKCYAN = '\033[36m'
1201
+ BLUE = '\033[94m'
1202
+ GREEN = '\033[92m'
1203
+ YELLOW = '\033[93m'
1204
+ RED = '\033[91m'
1205
+ BOLD = '\033[1m'
1206
+ UNDERLINE = '\033[4m'
1207
+ END = '\033[0m'
1208
+
1209
+
1210
+ def resolve_ips(hostname: str) -> list[str]:
1211
+ try:
1212
+ ipaddress.ip_address(hostname)
1213
+ return [hostname]
1214
+ except ValueError:
1215
+ pass
1216
+ try:
1217
+ addrinfo = socket.getaddrinfo(hostname, 0, socket.AF_INET, 0, socket.IPPROTO_TCP)
1218
+ return [ai[4][0] for ai in addrinfo]
1219
+ except socket.gaierror:
1220
+ pass
1221
+ return []
1222
+
1223
+
1224
+ def resolve_ip(hostname: str) -> str:
1225
+ ips = resolve_ips(hostname)
1226
+ if ips:
1227
+ return ips[0]
1228
+ return hostname
1229
+
1230
+
1231
+ def detect_client_location() -> "IPDict":
1232
+ """
1233
+ Normally client IP will be set on the server side (request.remote_addr)
1234
+ Here setting ip on the one seen by the host itself. There is no connection
1235
+ to Google DNS servers.
1236
+ Try to determine the sitename automatically from common environment variables,
1237
+ in this order: SITE_NAME, ATLAS_SITE_NAME, OSG_SITE_NAME. If none of these exist
1238
+ use the fixed string 'ROAMING'.
1239
+
1240
+ If environment variables sets location, it uses it.
1241
+ """
1242
+
1243
+ ip = None
1244
+
1245
+ try:
1246
+ with socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) as s:
1247
+ s.connect(("2001:4860:4860:0:0:0:0:8888", 80))
1248
+ ip = s.getsockname()[0]
1249
+ except Exception:
1250
+ pass
1251
+
1252
+ if not ip:
1253
+ try:
1254
+ with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
1255
+ s.connect(("8.8.8.8", 80))
1256
+ ip = s.getsockname()[0]
1257
+ except Exception:
1258
+ pass
1259
+
1260
+ if not ip:
1261
+ ip = '0.0.0.0' # noqa: S104
1262
+
1263
+ site = os.environ.get('SITE_NAME',
1264
+ os.environ.get('ATLAS_SITE_NAME',
1265
+ os.environ.get('OSG_SITE_NAME',
1266
+ 'ROAMING')))
1267
+
1268
+ latitude = os.environ.get('RUCIO_LATITUDE')
1269
+ longitude = os.environ.get('RUCIO_LONGITUDE')
1270
+ if latitude and longitude:
1271
+ try:
1272
+ latitude = float(latitude)
1273
+ longitude = float(longitude)
1274
+ except ValueError:
1275
+ latitude = longitude = 0
1276
+ print('Client set latitude and longitude are not valid.')
1277
+ else:
1278
+ latitude = longitude = None
1279
+
1280
+ return {'ip': ip,
1281
+ 'fqdn': socket.getfqdn(),
1282
+ 'site': site,
1283
+ 'latitude': latitude,
1284
+ 'longitude': longitude}
1285
+
1286
+
1287
+ def ssh_sign(private_key: str, message: str) -> str:
1288
+ """
1289
+ Sign a string message using the private key.
1290
+
1291
+ :param private_key: The SSH RSA private key as a string.
1292
+ :param message: The message to sign as a string.
1293
+ :return: Base64 encoded signature as a string.
1294
+ """
1295
+ encoded_message = message.encode()
1296
+ if not EXTRA_MODULES['paramiko']:
1297
+ raise MissingModuleException('The paramiko module is not installed or faulty.')
1298
+ sio_private_key = StringIO(private_key)
1299
+ priv_k = RSAKey.from_private_key(sio_private_key)
1300
+ sio_private_key.close()
1301
+ signature_stream = priv_k.sign_ssh_data(encoded_message)
1302
+ signature_stream.rewind()
1303
+ base64_encoded = base64.b64encode(signature_stream.get_remainder())
1304
+ base64_encoded = base64_encoded.decode()
1305
+ return base64_encoded
1306
+
1307
+
1308
+ def make_valid_did(lfn_dict: dict[str, Any]) -> dict[str, Any]:
1309
+ """
1310
+ When managing information about a LFN (such as in `rucio upload` or
1311
+ the RSE manager's upload), we add the `filename` attribute to record
1312
+ the name of the file on the local disk in addition to the remainder
1313
+ of the DID information.
1314
+
1315
+ This function will take that python dictionary, and strip out the
1316
+ additional `filename` key. If this is not done, then the dictionary
1317
+ will not pass the DID JSON schema validation.
1318
+ """
1319
+ if 'filename' not in lfn_dict:
1320
+ return lfn_dict
1321
+
1322
+ lfn_copy = dict(lfn_dict)
1323
+ lfn_copy['name'] = lfn_copy.get('name', lfn_copy['filename'])
1324
+ del lfn_copy['filename']
1325
+ return lfn_copy
1326
+
1327
+
1328
+ def send_trace(trace: TraceDict, trace_endpoint: str, user_agent: str, retries: int = 5) -> int:
1329
+ """
1330
+ Send the given trace to the trace endpoint
1331
+
1332
+ :param trace: the trace dictionary to send
1333
+ :param trace_endpoint: the endpoint where the trace should be send
1334
+ :param user_agent: the user agent sending the trace
1335
+ :param retries: the number of retries if sending fails
1336
+ :return: 0 on success, 1 on failure
1337
+ """
1338
+ if user_agent.startswith('pilot'):
1339
+ return 0
1340
+ for dummy in range(retries):
1341
+ try:
1342
+ requests.post(trace_endpoint + '/traces/', verify=False, data=json.dumps(trace))
1343
+ return 0
1344
+ except Exception:
1345
+ pass
1346
+ return 1
1347
+
1348
+
1349
+ def add_url_query(url: str, query: dict[str, str]) -> str:
1350
+ """
1351
+ Add a new dictionary to URL parameters
1352
+
1353
+ :param url: The existing URL
1354
+ :param query: A dictionary containing key/value pairs to be added to the URL
1355
+ :return: The expanded URL with the new query parameters
1356
+ """
1357
+
1358
+ url_parts = list(urlparse(url))
1359
+ mod_query = dict(parse_qsl(url_parts[4]))
1360
+ mod_query.update(query)
1361
+ url_parts[4] = urlencode(mod_query)
1362
+ return urlunparse(url_parts)
1363
+
1364
+
1365
+ def get_bytes_value_from_string(input_string: str) -> Union[bool, int]:
1366
+ """
1367
+ Get bytes from a string that represents a storage value and unit
1368
+
1369
+ :param input_string: String containing a value and an unit
1370
+ :return: Integer value representing the value in bytes
1371
+ """
1372
+ result = re.findall('^([0-9]+)([A-Za-z]+)$', input_string)
1373
+ if result:
1374
+ value = int(result[0][0])
1375
+ unit = result[0][1].lower()
1376
+ if unit == 'b':
1377
+ value = value
1378
+ elif unit == 'kb':
1379
+ value = value * 1000
1380
+ elif unit == 'mb':
1381
+ value = value * 1000000
1382
+ elif unit == 'gb':
1383
+ value = value * 1000000000
1384
+ elif unit == 'tb':
1385
+ value = value * 1000000000000
1386
+ elif unit == 'pb':
1387
+ value = value * 1000000000000000
1388
+ else:
1389
+ return False
1390
+ return value
1391
+ else:
1392
+ return False
1393
+
1394
+
1395
+ def parse_did_filter_from_string(input_string: str) -> tuple[dict[str, Any], str]:
1396
+ """
1397
+ Parse DID filter options in format 'length<3,type=all' from string.
1398
+
1399
+ :param input_string: String containing the filter options.
1400
+ :return: filter dictionary and type as string.
1401
+ """
1402
+ filters = {}
1403
+ type_ = 'collection'
1404
+ if input_string:
1405
+ filter_options = input_string.replace(' ', '').split(',')
1406
+ for option in filter_options:
1407
+ value = None
1408
+ key = None
1409
+
1410
+ if '>=' in option:
1411
+ key, value = option.split('>=')
1412
+ if key == 'length':
1413
+ key = 'length.gte'
1414
+ elif '>' in option:
1415
+ key, value = option.split('>')
1416
+ if key == 'length':
1417
+ key = 'length.gt'
1418
+ elif '<=' in option:
1419
+ key, value = option.split('<=')
1420
+ if key == 'length':
1421
+ key = 'length.lte'
1422
+ elif '<' in option:
1423
+ key, value = option.split('<')
1424
+ if key == 'length':
1425
+ key = 'length.lt'
1426
+ elif '=' in option:
1427
+ key, value = option.split('=')
1428
+ if key == 'created_after' or key == 'created_before':
1429
+ value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ')
1430
+
1431
+ if key == 'type':
1432
+ if value.upper() in ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']: # type: ignore
1433
+ type_ = value.lower() # type: ignore
1434
+ else:
1435
+ raise InvalidType('{0} is not a valid type. Valid types are {1}'.format(value, ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']))
1436
+ elif key in ('length.gt', 'length.lt', 'length.gte', 'length.lte', 'length'):
1437
+ try:
1438
+ value = int(value) # type: ignore
1439
+ filters[key] = value
1440
+ except ValueError:
1441
+ raise ValueError('Length has to be an integer value.')
1442
+ filters[key] = value
1443
+ elif isinstance(value, str):
1444
+ if value.lower() == 'true':
1445
+ value = '1'
1446
+ elif value.lower() == 'false':
1447
+ value = '0'
1448
+ filters[key] = value
1449
+ else:
1450
+ filters[key] = value
1451
+
1452
+ return filters, type_
1453
+
1454
+
1455
+ def parse_did_filter_from_string_fe(
1456
+ input_string: str,
1457
+ name: str = '*',
1458
+ type: str = 'collection',
1459
+ omit_name: bool = False
1460
+ ) -> tuple[list[dict[str, Any]], str]:
1461
+ """
1462
+ Parse DID filter string for the filter engine (fe).
1463
+
1464
+ Should adhere to the following conventions:
1465
+ - ';' represents the logical OR operator
1466
+ - ',' represents the logical AND operator
1467
+ - all operators belong to set of (<=, >=, ==, !=, >, <, =)
1468
+ - there should be no duplicate key+operator criteria.
1469
+
1470
+ One sided and compound inequalities are supported.
1471
+
1472
+ Sanity checking of input is left to the filter engine.
1473
+
1474
+ :param input_string: String containing the filter options.
1475
+ :param name: DID name.
1476
+ :param type: The type of the did: all(container, dataset, file), collection(dataset or container), dataset, container.
1477
+ :param omit_name: omit addition of name to filters.
1478
+ :return: list of dictionaries with each dictionary as a separate OR expression.
1479
+ """
1480
+ # lookup table unifying all comprehended operators to a nominal suffix.
1481
+ # note that the order matters as the regex engine is eager, e.g. don't want to evaluate '<=' as '<' and '='.
1482
+ operators_suffix_LUT = OrderedDict({
1483
+ '<=': 'lte',
1484
+ '>=': 'gte',
1485
+ '==': '',
1486
+ '!=': 'ne',
1487
+ '>': 'gt',
1488
+ '<': 'lt',
1489
+ '=': ''
1490
+ })
1491
+
1492
+ # lookup table mapping operator opposites, used to reverse compound inequalities.
1493
+ operator_opposites_LUT = {
1494
+ 'lt': 'gt',
1495
+ 'lte': 'gte'
1496
+ }
1497
+ operator_opposites_LUT.update({op2: op1 for op1, op2 in operator_opposites_LUT.items()})
1498
+
1499
+ filters = []
1500
+ if input_string:
1501
+ or_groups = list(filter(None, input_string.split(';'))) # split <input_string> into OR clauses
1502
+ for or_group in or_groups:
1503
+ or_group = or_group.strip()
1504
+ and_groups = list(filter(None, or_group.split(','))) # split <or_group> into AND clauses
1505
+ and_group_filters = {}
1506
+ for and_group in and_groups:
1507
+ and_group = and_group.strip()
1508
+ # tokenise this AND clause using operators as delimiters.
1509
+ tokenisation_regex = "({})".format('|'.join(operators_suffix_LUT.keys()))
1510
+ and_group_split_by_operator = list(filter(None, re.split(tokenisation_regex, and_group)))
1511
+ if len(and_group_split_by_operator) == 3: # this is a one-sided inequality or expression
1512
+ key, operator, value = [token.strip() for token in and_group_split_by_operator]
1513
+
1514
+ # substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
1515
+ operator_mapped = operators_suffix_LUT.get(operator)
1516
+
1517
+ filter_key_full = key
1518
+ if operator_mapped is not None:
1519
+ if operator_mapped:
1520
+ filter_key_full = "{}.{}".format(key, operator_mapped)
1521
+ else:
1522
+ raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
1523
+
1524
+ if filter_key_full in and_group_filters:
1525
+ raise DuplicateCriteriaInDIDFilter(filter_key_full)
1526
+ else:
1527
+ and_group_filters[filter_key_full] = value
1528
+ elif len(and_group_split_by_operator) == 5: # this is a compound inequality
1529
+ value1, operator1, key, operator2, value2 = [token.strip() for token in and_group_split_by_operator]
1530
+
1531
+ # substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
1532
+ operator1_mapped = operator_opposites_LUT.get(operators_suffix_LUT.get(operator1))
1533
+ operator2_mapped = operators_suffix_LUT.get(operator2)
1534
+
1535
+ filter_key1_full = filter_key2_full = key
1536
+ if operator1_mapped is not None and operator2_mapped is not None:
1537
+ if operator1_mapped: # ignore '' operator (maps from equals)
1538
+ filter_key1_full = "{}.{}".format(key, operator1_mapped)
1539
+ if operator2_mapped: # ignore '' operator (maps from equals)
1540
+ filter_key2_full = "{}.{}".format(key, operator2_mapped)
1541
+ else:
1542
+ raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
1543
+
1544
+ if filter_key1_full in and_group_filters:
1545
+ raise DuplicateCriteriaInDIDFilter(filter_key1_full)
1546
+ else:
1547
+ and_group_filters[filter_key1_full] = value1
1548
+ if filter_key2_full in and_group_filters:
1549
+ raise DuplicateCriteriaInDIDFilter(filter_key2_full)
1550
+ else:
1551
+ and_group_filters[filter_key2_full] = value2
1552
+ else:
1553
+ raise DIDFilterSyntaxError(and_group)
1554
+
1555
+ # add name key to each AND clause if it hasn't already been populated from the filter and <omit_name> not set.
1556
+ if not omit_name and 'name' not in and_group_filters:
1557
+ and_group_filters['name'] = name
1558
+
1559
+ filters.append(and_group_filters)
1560
+ else:
1561
+ if not omit_name:
1562
+ filters.append({
1563
+ 'name': name
1564
+ })
1565
+ return filters, type
1566
+
1567
+
1568
+ def parse_replicas_from_file(path: "FileDescriptorOrPath") -> Any:
1569
+ """
1570
+ Parses the output of list_replicas from a json or metalink file
1571
+ into a dictionary. Metalink parsing is tried first and if it fails
1572
+ it tries to parse json.
1573
+
1574
+ :param path: the path to the input file
1575
+
1576
+ :returns: a list with a dictionary for each file
1577
+ """
1578
+ with open(path) as fp:
1579
+ try:
1580
+ root = ElementTree.parse(fp).getroot() # noqa: S314
1581
+ return parse_replicas_metalink(root)
1582
+ except ElementTree.ParseError as xml_err:
1583
+ try:
1584
+ return json.load(fp)
1585
+ except ValueError as json_err:
1586
+ raise MetalinkJsonParsingError(path, xml_err, json_err)
1587
+
1588
+
1589
+ def parse_replicas_from_string(string: str) -> Any:
1590
+ """
1591
+ Parses the output of list_replicas from a json or metalink string
1592
+ into a dictionary. Metalink parsing is tried first and if it fails
1593
+ it tries to parse json.
1594
+
1595
+ :param string: the string to parse
1596
+
1597
+ :returns: a list with a dictionary for each file
1598
+ """
1599
+ try:
1600
+ root = ElementTree.fromstring(string) # noqa: S314
1601
+ return parse_replicas_metalink(root)
1602
+ except ElementTree.ParseError as xml_err:
1603
+ try:
1604
+ return json.loads(string)
1605
+ except ValueError as json_err:
1606
+ raise MetalinkJsonParsingError(string, xml_err, json_err)
1607
+
1608
+
1609
+ def parse_replicas_metalink(root: ElementTree.Element) -> list[dict[str, Any]]:
1610
+ """
1611
+ Transforms the metalink tree into a list of dictionaries where
1612
+ each dictionary describes a file with its replicas.
1613
+ Will be called by parse_replicas_from_file and parse_replicas_from_string.
1614
+
1615
+ :param root: root node of the metalink tree
1616
+
1617
+ :returns: a list with a dictionary for each file
1618
+ """
1619
+ files = []
1620
+
1621
+ # metalink namespace
1622
+ ns = '{urn:ietf:params:xml:ns:metalink}'
1623
+ str_to_bool = {'true': True, 'True': True, 'false': False, 'False': False}
1624
+
1625
+ # loop over all <file> tags of the metalink string
1626
+ for file_tag_obj in root.findall(ns + 'file'):
1627
+ # search for identity-tag
1628
+ identity_tag_obj = file_tag_obj.find(ns + 'identity')
1629
+ if not ElementTree.iselement(identity_tag_obj):
1630
+ raise InputValidationError('Failed to locate identity-tag inside %s' % ElementTree.tostring(file_tag_obj))
1631
+
1632
+ cur_file = {'did': identity_tag_obj.text,
1633
+ 'adler32': None,
1634
+ 'md5': None,
1635
+ 'sources': []}
1636
+
1637
+ parent_dids = set()
1638
+ parent_dids_tag_obj = file_tag_obj.find(ns + 'parents')
1639
+ if ElementTree.iselement(parent_dids_tag_obj):
1640
+ for did_tag_obj in parent_dids_tag_obj.findall(ns + 'did'):
1641
+ parent_dids.add(did_tag_obj.text)
1642
+ cur_file['parent_dids'] = parent_dids
1643
+
1644
+ size_tag_obj = file_tag_obj.find(ns + 'size')
1645
+ cur_file['bytes'] = int(size_tag_obj.text) if ElementTree.iselement(size_tag_obj) else None
1646
+
1647
+ for hash_tag_obj in file_tag_obj.findall(ns + 'hash'):
1648
+ hash_type = hash_tag_obj.get('type')
1649
+ if hash_type:
1650
+ cur_file[hash_type] = hash_tag_obj.text
1651
+
1652
+ for url_tag_obj in file_tag_obj.findall(ns + 'url'):
1653
+ key_rename_map = {'location': 'rse'}
1654
+ src = {}
1655
+ for k, v in url_tag_obj.items():
1656
+ k = key_rename_map.get(k, k)
1657
+ src[k] = str_to_bool.get(v, v)
1658
+ src['pfn'] = url_tag_obj.text
1659
+ cur_file['sources'].append(src)
1660
+
1661
+ files.append(cur_file)
1662
+
1663
+ return files
1664
+
1665
+
1666
+ def get_thread_with_periodic_running_function(
1667
+ interval: Union[int, float],
1668
+ action: Callable[..., Any],
1669
+ graceful_stop: threading.Event
1670
+ ) -> threading.Thread:
1671
+ """
1672
+ Get a thread where a function runs periodically.
1673
+
1674
+ :param interval: Interval in seconds when the action function should run.
1675
+ :param action: Function, that should run periodically.
1676
+ :param graceful_stop: Threading event used to check for graceful stop.
1677
+ """
1678
+ def start():
1679
+ while not graceful_stop.is_set():
1680
+ starttime = time.time()
1681
+ action()
1682
+ time.sleep(interval - (time.time() - starttime))
1683
+ t = threading.Thread(target=start)
1684
+ return t
1685
+
1686
+
1687
+ def run_cmd_process(cmd: str, timeout: int = 3600) -> tuple[int, str]:
1688
+ """
1689
+ shell command parser with timeout
1690
+
1691
+ :param cmd: shell command as a string
1692
+ :param timeout: in seconds
1693
+
1694
+ :return: stdout xor stderr, and errorcode
1695
+ """
1696
+
1697
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, preexec_fn=os.setsid, universal_newlines=True)
1698
+
1699
+ try:
1700
+ stdout, stderr = process.communicate(timeout=timeout)
1701
+ except subprocess.TimeoutExpired:
1702
+ try:
1703
+ # Kill the whole process group since we're using shell=True.
1704
+ os.killpg(os.getpgid(process.pid), signal.SIGTERM)
1705
+ stdout, stderr = process.communicate(timeout=3)
1706
+ except subprocess.TimeoutExpired:
1707
+ os.killpg(os.getpgid(process.pid), signal.SIGKILL)
1708
+ stdout, stderr = process.communicate()
1709
+
1710
+ if not stderr:
1711
+ stderr = ''
1712
+ if not stdout:
1713
+ stdout = ''
1714
+ if stderr and stderr != '':
1715
+ stdout += " Error: " + stderr
1716
+ if process:
1717
+ returncode = process.returncode
1718
+ else:
1719
+ returncode = 1
1720
+ if returncode != 1 and 'Command time-out' in stdout:
1721
+ returncode = 1
1722
+ if returncode is None:
1723
+ returncode = 0
1724
+
1725
+ return returncode, stdout
1726
+
1727
+
1728
+ def gateway_update_return_dict(
1729
+ dictionary: dict[str, Any],
1730
+ session: Optional["Session"] = None
1731
+ ) -> dict[str, Any]:
1732
+ """
1733
+ Ensure that rse is in a dictionary returned from core
1734
+
1735
+ :param dictionary: The dictionary to edit
1736
+ :param session: The DB session to use
1737
+ :returns dictionary: The edited dictionary
1738
+ """
1739
+ if not isinstance(dictionary, dict):
1740
+ return dictionary
1741
+
1742
+ copied = False # Avoid side effects from pass by object
1743
+
1744
+ for rse_str in ['rse', 'src_rse', 'source_rse', 'dest_rse', 'destination_rse']:
1745
+ rse_id_str = '%s_id' % rse_str
1746
+ if rse_id_str in dictionary.keys() and dictionary[rse_id_str] is not None:
1747
+ if rse_str not in dictionary.keys():
1748
+ if not copied:
1749
+ dictionary = dictionary.copy()
1750
+ copied = True
1751
+ import rucio.core.rse
1752
+ dictionary[rse_str] = rucio.core.rse.get_rse_name(rse_id=dictionary[rse_id_str], session=session)
1753
+
1754
+ if 'account' in dictionary.keys() and dictionary['account'] is not None:
1755
+ if not copied:
1756
+ dictionary = dictionary.copy()
1757
+ copied = True
1758
+ dictionary['account'] = dictionary['account'].external
1759
+
1760
+ if 'scope' in dictionary.keys() and dictionary['scope'] is not None:
1761
+ if not copied:
1762
+ dictionary = dictionary.copy()
1763
+ copied = True
1764
+ dictionary['scope'] = dictionary['scope'].external
1765
+
1766
+ return dictionary
1767
+
1768
+
1769
+ def setup_logger(
1770
+ module_name: Optional[str] = None,
1771
+ logger_name: Optional[str] = None,
1772
+ logger_level: Optional[int] = None,
1773
+ verbose: bool = False
1774
+ ) -> logging.Logger:
1775
+ '''
1776
+ Factory method to set logger with handlers.
1777
+ :param module_name: __name__ of the module that is calling this method
1778
+ :param logger_name: name of the logger, typically name of the module.
1779
+ :param logger_level: if not given, fetched from config.
1780
+ :param verbose: verbose option set in bin/rucio
1781
+ '''
1782
+ # helper method for cfg check
1783
+ def _force_cfg_log_level(cfg_option: str) -> bool:
1784
+ cfg_forced_modules = config_get('logging', cfg_option, raise_exception=False, default=None, clean_cached=True,
1785
+ check_config_table=False)
1786
+ if cfg_forced_modules and module_name is not None:
1787
+ if re.match(str(cfg_forced_modules), module_name):
1788
+ return True
1789
+ return False
1790
+
1791
+ # creating log
1792
+ if not logger_name:
1793
+ if not module_name:
1794
+ logger_name = 'usr'
1795
+ else:
1796
+ logger_name = module_name.split('.')[-1]
1797
+ logger = logging.getLogger(logger_name)
1798
+
1799
+ # extracting the log level
1800
+ if not logger_level:
1801
+ logger_level = logging.INFO
1802
+ if verbose:
1803
+ logger_level = logging.DEBUG
1804
+
1805
+ # overriding by the config
1806
+ cfg_levels = (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)
1807
+ for level in cfg_levels:
1808
+ cfg_opt = 'forceloglevel' + logging.getLevelName(level)
1809
+ if _force_cfg_log_level(cfg_opt):
1810
+ logger_level = level
1811
+
1812
+ # setting the log level
1813
+ logger.setLevel(logger_level)
1814
+
1815
+ # preferred logger handling
1816
+ def add_handler(logger: logging.Logger) -> None:
1817
+ hdlr = logging.StreamHandler()
1818
+
1819
+ def emit_decorator(fnc: Callable[..., Any]) -> Callable[..., Any]:
1820
+ def func(*args) -> Callable[..., Any]:
1821
+ if 'RUCIO_LOGGING_FORMAT' not in os.environ:
1822
+ levelno = args[0].levelno
1823
+ format_str = '%(asctime)s\t%(levelname)s\t%(message)s\033[0m'
1824
+ if levelno >= logging.CRITICAL:
1825
+ color = '\033[31;1m'
1826
+ elif levelno >= logging.ERROR:
1827
+ color = '\033[31;1m'
1828
+ elif levelno >= logging.WARNING:
1829
+ color = '\033[33;1m'
1830
+ elif levelno >= logging.INFO:
1831
+ color = '\033[32;1m'
1832
+ elif levelno >= logging.DEBUG:
1833
+ color = '\033[36;1m'
1834
+ format_str = '%(asctime)s\t%(levelname)s\t%(filename)s\t%(message)s\033[0m'
1835
+ else:
1836
+ color = '\033[0m'
1837
+ formatter = logging.Formatter('{0}{1}'.format(color, format_str))
1838
+ else:
1839
+ formatter = logging.Formatter(os.environ['RUCIO_LOGGING_FORMAT'])
1840
+ hdlr.setFormatter(formatter)
1841
+ return fnc(*args)
1842
+ return func
1843
+ hdlr.emit = emit_decorator(hdlr.emit)
1844
+ logger.addHandler(hdlr)
1845
+
1846
+ # setting handler and formatter
1847
+ if not logger.handlers:
1848
+ add_handler(logger)
1849
+
1850
+ return logger
1851
+
1852
+
1853
+ def daemon_sleep(
1854
+ start_time: float,
1855
+ sleep_time: float,
1856
+ graceful_stop: threading.Event,
1857
+ logger: "LoggerFunction" = logging.log
1858
+ ) -> None:
1859
+ """Sleeps a daemon the time provided by sleep_time"""
1860
+ end_time = time.time()
1861
+ time_diff = end_time - start_time
1862
+ if time_diff < sleep_time:
1863
+ logger(logging.INFO, 'Sleeping for a while : %s seconds', (sleep_time - time_diff))
1864
+ graceful_stop.wait(sleep_time - time_diff)
1865
+
1866
+
1867
+ def is_client() -> bool:
1868
+ """"
1869
+ Checks if the function is called from a client or from a server/daemon
1870
+
1871
+ :returns client_mode: True if is called from a client, False if it is called from a server/daemon
1872
+ """
1873
+ if 'RUCIO_CLIENT_MODE' not in os.environ:
1874
+ try:
1875
+ if config_has_section('database'):
1876
+ client_mode = False
1877
+ elif config_has_section('client'):
1878
+ client_mode = True
1879
+ else:
1880
+ client_mode = False
1881
+ except (RuntimeError, ConfigNotFound):
1882
+ # If no configuration file is found the default value should be True
1883
+ client_mode = True
1884
+ else:
1885
+ if os.environ['RUCIO_CLIENT_MODE']:
1886
+ client_mode = True
1887
+ else:
1888
+ client_mode = False
1889
+
1890
+ return client_mode
1891
+
1892
+
1893
+ class retry:
1894
+ """Retry callable object with configuragle number of attempts"""
1895
+
1896
+ def __init__(self, func: Callable[..., Any], *args, **kwargs):
1897
+ '''
1898
+ :param func: a method that should be executed with retries
1899
+ :param args: parameters of the func
1900
+ :param kwargs: key word arguments of the func
1901
+ '''
1902
+ self.func, self.args, self.kwargs = func, args, kwargs
1903
+
1904
+ def __call__(self, mtries: int = 3, logger: "LoggerFunction" = logging.log) -> Callable[..., Any]:
1905
+ '''
1906
+ :param mtries: maximum number of attempts to execute the function
1907
+ :param logger: preferred logger
1908
+ '''
1909
+ attempt = mtries
1910
+ while attempt > 1:
1911
+ try:
1912
+ if logger:
1913
+ logger(logging.DEBUG, '{}: Attempt {}'.format(self.func.__name__, mtries - attempt + 1))
1914
+ return self.func(*self.args, **self.kwargs)
1915
+ except Exception as e:
1916
+ if logger:
1917
+ logger(logging.DEBUG, '{}: Attempt failed {}'.format(self.func.__name__, mtries - attempt + 1))
1918
+ logger(logging.DEBUG, str(e))
1919
+ attempt -= 1
1920
+ return self.func(*self.args, **self.kwargs)
1921
+
1922
+
1923
+ class StoreAndDeprecateWarningAction(argparse.Action):
1924
+ '''
1925
+ StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
1926
+ and represents a store action with a deprecated argument name.
1927
+ '''
1928
+
1929
+ def __init__(self,
1930
+ option_strings: Sequence[str],
1931
+ new_option_string: str,
1932
+ dest: str,
1933
+ **kwargs):
1934
+ """
1935
+ :param option_strings: all possible argument name strings
1936
+ :param new_option_string: the new option string which replaces the old
1937
+ :param dest: name of variable to store the value in
1938
+ :param kwargs: everything else
1939
+ """
1940
+ super(StoreAndDeprecateWarningAction, self).__init__(
1941
+ option_strings=option_strings,
1942
+ dest=dest,
1943
+ **kwargs)
1944
+ if new_option_string not in option_strings:
1945
+ raise ValueError("%s not supported as a string option." % new_option_string)
1946
+ self.new_option_string = new_option_string
1947
+
1948
+ def __call__(self, parser, namespace, values, option_string: Optional[str] = None):
1949
+ if option_string and option_string != self.new_option_string:
1950
+ # The logger gets typically initialized after the argument parser
1951
+ # to set the verbosity of the logger. Thus using simple print to console.
1952
+ print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
1953
+
1954
+ setattr(namespace, self.dest, values)
1955
+
1956
+
1957
+ class StoreTrueAndDeprecateWarningAction(argparse._StoreConstAction):
1958
+ '''
1959
+ StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
1960
+ and represents a store action with a deprecated argument name.
1961
+ '''
1962
+
1963
+ def __init__(self,
1964
+ option_strings: Sequence[str],
1965
+ new_option_string: str,
1966
+ dest: str,
1967
+ default: bool = False,
1968
+ required: bool = False,
1969
+ help: Optional[str] = None):
1970
+ """
1971
+ :param option_strings: all possible argument name strings
1972
+ :param new_option_string: the new option string which replaces the old
1973
+ :param dest: name of variable to store the value in
1974
+ :param kwargs: everything else
1975
+ """
1976
+ super(StoreTrueAndDeprecateWarningAction, self).__init__(
1977
+ option_strings=option_strings,
1978
+ dest=dest,
1979
+ const=True,
1980
+ default=default,
1981
+ required=required,
1982
+ help=help)
1983
+ if new_option_string not in option_strings:
1984
+ raise ValueError("%s not supported as a string option." % new_option_string)
1985
+ self.new_option_string = new_option_string
1986
+
1987
+ def __call__(self, parser, namespace, values, option_string: Optional[str] = None):
1988
+ super(StoreTrueAndDeprecateWarningAction, self).__call__(parser, namespace, values, option_string=option_string)
1989
+ if option_string and option_string != self.new_option_string:
1990
+ # The logger gets typically initialized after the argument parser
1991
+ # to set the verbosity of the logger. Thus using simple print to console.
1992
+ print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
1993
+
1994
+
1995
+ class PriorityQueue:
1996
+ """
1997
+ Heap-based [1] priority queue which supports priority update operations
1998
+
1999
+ It is used as a dictionary: pq['element'] = priority
2000
+ The element with the highest priority can be accessed with pq.top() or pq.pop(),
2001
+ depending on the desire to keep it in the heap or not.
2002
+
2003
+ [1] https://en.wikipedia.org/wiki/Heap_(data_structure)
2004
+ """
2005
+ class ContainerSlot:
2006
+ def __init__(self, position: int, priority: int):
2007
+ self.pos = position
2008
+ self.prio = priority
2009
+
2010
+ def __init__(self):
2011
+ self.heap = []
2012
+ self.container = {}
2013
+
2014
+ def __len__(self):
2015
+ return len(self.heap)
2016
+
2017
+ def __getitem__(self, item):
2018
+ return self.container[item].prio
2019
+
2020
+ def __setitem__(self, key, value):
2021
+ if key in self.container:
2022
+ existing_prio = self.container[key].prio
2023
+ self.container[key].prio = value
2024
+ if value < existing_prio:
2025
+ self._priority_decreased(key)
2026
+ elif existing_prio < value:
2027
+ self._priority_increased(key)
2028
+ else:
2029
+ self.heap.append(key)
2030
+ self.container[key] = self.ContainerSlot(position=len(self.heap) - 1, priority=value)
2031
+ self._priority_decreased(key)
2032
+
2033
+ def __contains__(self, item):
2034
+ return item in self.container
2035
+
2036
+ def top(self):
2037
+ return self.heap[0]
2038
+
2039
+ def pop(self):
2040
+ item = self.heap[0]
2041
+ self.container.pop(item)
2042
+
2043
+ tmp_item = self.heap.pop()
2044
+ if self.heap:
2045
+ self.heap[0] = tmp_item
2046
+ self.container[tmp_item].pos = 0
2047
+ self._priority_increased(tmp_item)
2048
+ return item
2049
+
2050
+ def _priority_decreased(self, item):
2051
+ heap_changed = False
2052
+
2053
+ pos = self.container[item].pos
2054
+ pos_parent = (pos - 1) // 2
2055
+ while pos > 0 and self.container[self.heap[pos]].prio < self.container[self.heap[pos_parent]].prio:
2056
+ tmp_item, parent = self.heap[pos], self.heap[pos_parent] = self.heap[pos_parent], self.heap[pos]
2057
+ self.container[tmp_item].pos, self.container[parent].pos = self.container[parent].pos, self.container[tmp_item].pos
2058
+
2059
+ pos = pos_parent
2060
+ pos_parent = (pos - 1) // 2
2061
+
2062
+ heap_changed = True
2063
+ return heap_changed
2064
+
2065
+ def _priority_increased(self, item):
2066
+ heap_changed = False
2067
+ heap_len = len(self.heap)
2068
+ pos = self.container[item].pos
2069
+ pos_child1 = 2 * pos + 1
2070
+ pos_child2 = 2 * pos + 2
2071
+
2072
+ heap_restored = False
2073
+ while not heap_restored:
2074
+ # find minimum between item, child1, and child2
2075
+ if pos_child1 < heap_len and self.container[self.heap[pos_child1]].prio < self.container[self.heap[pos]].prio:
2076
+ pos_min = pos_child1
2077
+ else:
2078
+ pos_min = pos
2079
+ if pos_child2 < heap_len and self.container[self.heap[pos_child2]].prio < self.container[self.heap[pos_min]].prio:
2080
+ pos_min = pos_child2
2081
+
2082
+ if pos_min != pos:
2083
+ _, tmp_item = self.heap[pos_min], self.heap[pos] = self.heap[pos], self.heap[pos_min]
2084
+ self.container[tmp_item].pos = pos
2085
+
2086
+ pos = pos_min
2087
+ pos_child1 = 2 * pos + 1
2088
+ pos_child2 = 2 * pos + 2
2089
+
2090
+ heap_changed = True
2091
+ else:
2092
+ heap_restored = True
2093
+
2094
+ self.container[self.heap[pos]].pos = pos
2095
+ return heap_changed
2096
+
2097
+
2098
+ def check_policy_package_version(package: str) -> None:
2099
+ import importlib
2100
+
2101
+ from rucio.version import version_string
2102
+ '''
2103
+ Checks that the Rucio version supported by the policy package is compatible
2104
+ with this version. Raises an exception if not.
2105
+ :param package: the fully qualified name of the policy package
2106
+ '''
2107
+ try:
2108
+ module = importlib.import_module(package)
2109
+ except ImportError:
2110
+ # package not found. Will be picked up elsewhere
2111
+ return
2112
+ if not hasattr(module, 'SUPPORTED_VERSION'):
2113
+ # package is not versioned
2114
+ return
2115
+ supported_version = module.SUPPORTED_VERSION if isinstance(module.SUPPORTED_VERSION, list) else [module.SUPPORTED_VERSION]
2116
+ components = 2 if version_string().startswith("1.") else 1
2117
+ current_version = ".".join(version_string().split(".")[:components])
2118
+ if current_version not in supported_version:
2119
+ raise PolicyPackageVersionError(package)
2120
+
2121
+
2122
+ class Availability:
2123
+ """
2124
+ This util class acts as a translator between the availability stored as
2125
+ integer and as boolean values.
2126
+
2127
+ `None` represents a missing value. This lets a user update a specific value
2128
+ without altering the other ones. If it needs to be evaluated, it will
2129
+ correspond to `True`.
2130
+ """
2131
+
2132
+ read = None
2133
+ write = None
2134
+ delete = None
2135
+
2136
+ def __init__(
2137
+ self,
2138
+ read: Optional[bool] = None,
2139
+ write: Optional[bool] = None,
2140
+ delete: Optional[bool] = None
2141
+ ):
2142
+ self.read = read
2143
+ self.write = write
2144
+ self.delete = delete
2145
+
2146
+ def __iter__(self):
2147
+ """
2148
+ The iterator provides the feature to unpack the values of this class.
2149
+
2150
+ e.g. `read, write, delete = Availability(True, False, True)`
2151
+
2152
+ :returns: An iterator over the values `read`, `write`, `delete`.
2153
+ """
2154
+ return iter((self.read, self.write, self.delete))
2155
+
2156
+ def __repr__(self):
2157
+ return "Availability({}, {}, {})".format(self.read, self.write, self.delete)
2158
+
2159
+ def __eq__(self, other):
2160
+ return self.read == other.read and self.write == other.write and self.delete == other.delete
2161
+
2162
+ def __hash__(self):
2163
+ return hash(self.integer)
2164
+
2165
+ @classmethod
2166
+ def from_integer(cls, n):
2167
+ """
2168
+ Returns a new Availability instance where the values are set to the
2169
+ corresponding bit values in the integer.
2170
+
2171
+ :param n: The integer value to get the availabilities from.
2172
+ :returns: The corresponding Availability instance.
2173
+ """
2174
+ if n is None:
2175
+ return cls(None, None, None)
2176
+
2177
+ return cls(
2178
+ (n >> 2) % 2 == 1,
2179
+ (n >> 1) % 2 == 1,
2180
+ (n >> 0) % 2 == 1
2181
+ )
2182
+
2183
+ @property
2184
+ def integer(self):
2185
+ """
2186
+ Returns the corresponding integer for the instance values. The three
2187
+ least-significant bits correspond to the availability values.
2188
+
2189
+ :returns: An integer corresponding to the availability values. `None`
2190
+ gets treated as `True`.
2191
+ """
2192
+ read_value = (self.read or self.read is None) * 4
2193
+ write_value = (self.write or self.write is None) * 2
2194
+ delete_value = (self.delete or self.delete is None) * 1
2195
+
2196
+ return read_value + write_value + delete_value
2197
+
2198
+
2199
+ def retrying(
2200
+ retry_on_exception: "Callable[[Exception], bool]",
2201
+ wait_fixed: int,
2202
+ stop_max_attempt_number: int
2203
+ ) -> "Callable[[Callable[..., T]], Callable[..., T]]":
2204
+ """
2205
+ Decorator which retries a function multiple times on certain types of exceptions.
2206
+ :param retry_on_exception: Function which takes an exception as argument and returns True if we must retry on this exception
2207
+ :param wait_fixed: the amount of time to wait in-between two tries
2208
+ :param stop_max_attempt_number: maximum number of allowed attempts
2209
+ """
2210
+ def _decorator(fn):
2211
+ @wraps(fn)
2212
+ def _wrapper(*args, **kwargs):
2213
+ attempt = 0
2214
+ while True:
2215
+ attempt += 1
2216
+ try:
2217
+ return fn(*args, **kwargs)
2218
+ except Exception as e:
2219
+ if attempt >= stop_max_attempt_number:
2220
+ raise
2221
+ if not retry_on_exception(e):
2222
+ raise
2223
+ time.sleep(wait_fixed / 1000.0)
2224
+ return _wrapper
2225
+ return _decorator
2226
+
2227
+
2228
+ def deep_merge_dict(source: dict, destination: dict) -> dict:
2229
+ """Merge two dictionaries together recursively"""
2230
+ for key, value in source.items():
2231
+ if isinstance(value, dict):
2232
+ # get node or create one
2233
+ node = destination.setdefault(key, {})
2234
+ deep_merge_dict(value, node)
2235
+ else:
2236
+ destination[key] = value
2237
+
2238
+ return destination