rucio-clients 35.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rucio-clients might be problematic. Click here for more details.
- rucio/__init__.py +17 -0
- rucio/alembicrevision.py +15 -0
- rucio/client/__init__.py +15 -0
- rucio/client/accountclient.py +433 -0
- rucio/client/accountlimitclient.py +183 -0
- rucio/client/baseclient.py +974 -0
- rucio/client/client.py +76 -0
- rucio/client/configclient.py +126 -0
- rucio/client/credentialclient.py +59 -0
- rucio/client/didclient.py +866 -0
- rucio/client/diracclient.py +56 -0
- rucio/client/downloadclient.py +1785 -0
- rucio/client/exportclient.py +44 -0
- rucio/client/fileclient.py +50 -0
- rucio/client/importclient.py +42 -0
- rucio/client/lifetimeclient.py +90 -0
- rucio/client/lockclient.py +109 -0
- rucio/client/metaconventionsclient.py +140 -0
- rucio/client/pingclient.py +44 -0
- rucio/client/replicaclient.py +454 -0
- rucio/client/requestclient.py +125 -0
- rucio/client/rseclient.py +746 -0
- rucio/client/ruleclient.py +294 -0
- rucio/client/scopeclient.py +90 -0
- rucio/client/subscriptionclient.py +173 -0
- rucio/client/touchclient.py +82 -0
- rucio/client/uploadclient.py +955 -0
- rucio/common/__init__.py +13 -0
- rucio/common/cache.py +74 -0
- rucio/common/config.py +801 -0
- rucio/common/constants.py +159 -0
- rucio/common/constraints.py +17 -0
- rucio/common/didtype.py +189 -0
- rucio/common/exception.py +1151 -0
- rucio/common/extra.py +36 -0
- rucio/common/logging.py +420 -0
- rucio/common/pcache.py +1408 -0
- rucio/common/plugins.py +153 -0
- rucio/common/policy.py +84 -0
- rucio/common/schema/__init__.py +150 -0
- rucio/common/schema/atlas.py +413 -0
- rucio/common/schema/belleii.py +408 -0
- rucio/common/schema/domatpc.py +401 -0
- rucio/common/schema/escape.py +426 -0
- rucio/common/schema/generic.py +433 -0
- rucio/common/schema/generic_multi_vo.py +412 -0
- rucio/common/schema/icecube.py +406 -0
- rucio/common/stomp_utils.py +159 -0
- rucio/common/stopwatch.py +55 -0
- rucio/common/test_rucio_server.py +148 -0
- rucio/common/types.py +403 -0
- rucio/common/utils.py +2238 -0
- rucio/rse/__init__.py +96 -0
- rucio/rse/protocols/__init__.py +13 -0
- rucio/rse/protocols/bittorrent.py +184 -0
- rucio/rse/protocols/cache.py +122 -0
- rucio/rse/protocols/dummy.py +111 -0
- rucio/rse/protocols/gfal.py +703 -0
- rucio/rse/protocols/globus.py +243 -0
- rucio/rse/protocols/gsiftp.py +92 -0
- rucio/rse/protocols/http_cache.py +82 -0
- rucio/rse/protocols/mock.py +123 -0
- rucio/rse/protocols/ngarc.py +209 -0
- rucio/rse/protocols/posix.py +250 -0
- rucio/rse/protocols/protocol.py +594 -0
- rucio/rse/protocols/rclone.py +364 -0
- rucio/rse/protocols/rfio.py +136 -0
- rucio/rse/protocols/srm.py +338 -0
- rucio/rse/protocols/ssh.py +413 -0
- rucio/rse/protocols/storm.py +206 -0
- rucio/rse/protocols/webdav.py +550 -0
- rucio/rse/protocols/xrootd.py +301 -0
- rucio/rse/rsemanager.py +764 -0
- rucio/vcsversion.py +11 -0
- rucio/version.py +38 -0
- rucio_clients-35.7.0.data/data/etc/rse-accounts.cfg.template +25 -0
- rucio_clients-35.7.0.data/data/etc/rucio.cfg.atlas.client.template +42 -0
- rucio_clients-35.7.0.data/data/etc/rucio.cfg.template +257 -0
- rucio_clients-35.7.0.data/data/requirements.client.txt +15 -0
- rucio_clients-35.7.0.data/data/rucio_client/merge_rucio_configs.py +144 -0
- rucio_clients-35.7.0.data/scripts/rucio +2542 -0
- rucio_clients-35.7.0.data/scripts/rucio-admin +2447 -0
- rucio_clients-35.7.0.dist-info/METADATA +50 -0
- rucio_clients-35.7.0.dist-info/RECORD +88 -0
- rucio_clients-35.7.0.dist-info/WHEEL +5 -0
- rucio_clients-35.7.0.dist-info/licenses/AUTHORS.rst +97 -0
- rucio_clients-35.7.0.dist-info/licenses/LICENSE +201 -0
- rucio_clients-35.7.0.dist-info/top_level.txt +1 -0
rucio/common/utils.py
ADDED
|
@@ -0,0 +1,2238 @@
|
|
|
1
|
+
# Copyright European Organization for Nuclear Research (CERN) since 2012
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import base64
|
|
17
|
+
import copy
|
|
18
|
+
import datetime
|
|
19
|
+
import errno
|
|
20
|
+
import getpass
|
|
21
|
+
import hashlib
|
|
22
|
+
import io
|
|
23
|
+
import ipaddress
|
|
24
|
+
import itertools
|
|
25
|
+
import json
|
|
26
|
+
import logging
|
|
27
|
+
import math
|
|
28
|
+
import mmap
|
|
29
|
+
import os
|
|
30
|
+
import os.path
|
|
31
|
+
import re
|
|
32
|
+
import signal
|
|
33
|
+
import socket
|
|
34
|
+
import subprocess
|
|
35
|
+
import tempfile
|
|
36
|
+
import threading
|
|
37
|
+
import time
|
|
38
|
+
import zlib
|
|
39
|
+
from collections import OrderedDict
|
|
40
|
+
from collections.abc import Callable, Iterable, Iterator, Sequence
|
|
41
|
+
from enum import Enum
|
|
42
|
+
from functools import partial, wraps
|
|
43
|
+
from io import StringIO
|
|
44
|
+
from itertools import zip_longest
|
|
45
|
+
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
|
|
46
|
+
from urllib.parse import parse_qsl, quote, urlencode, urlparse, urlunparse
|
|
47
|
+
from uuid import uuid4 as uuid
|
|
48
|
+
from xml.etree import ElementTree
|
|
49
|
+
|
|
50
|
+
import requests
|
|
51
|
+
|
|
52
|
+
from rucio.common.config import config_get, config_has_section
|
|
53
|
+
from rucio.common.exception import ConfigNotFound, DIDFilterSyntaxError, DuplicateCriteriaInDIDFilter, InputValidationError, InvalidType, MetalinkJsonParsingError, MissingModuleException, PolicyPackageVersionError, RucioException
|
|
54
|
+
from rucio.common.extra import import_extras
|
|
55
|
+
from rucio.common.plugins import PolicyPackageAlgorithms
|
|
56
|
+
from rucio.common.types import InternalAccount, InternalScope, TraceDict
|
|
57
|
+
|
|
58
|
+
EXTRA_MODULES = import_extras(['paramiko'])
|
|
59
|
+
|
|
60
|
+
if EXTRA_MODULES['paramiko']:
|
|
61
|
+
try:
|
|
62
|
+
from paramiko import RSAKey
|
|
63
|
+
except Exception:
|
|
64
|
+
EXTRA_MODULES['paramiko'] = False
|
|
65
|
+
|
|
66
|
+
if TYPE_CHECKING:
|
|
67
|
+
T = TypeVar('T')
|
|
68
|
+
from _typeshed import FileDescriptorOrPath
|
|
69
|
+
from sqlalchemy.orm import Session
|
|
70
|
+
|
|
71
|
+
from rucio.common.types import IPDict, LoggerFunction
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# HTTP code dictionary. Not complete. Can be extended if needed.
|
|
75
|
+
codes = {
|
|
76
|
+
# Informational.
|
|
77
|
+
200: '200 OK',
|
|
78
|
+
201: '201 Created',
|
|
79
|
+
202: '202 Accepted',
|
|
80
|
+
|
|
81
|
+
# Client Error.
|
|
82
|
+
400: '400 Bad Request',
|
|
83
|
+
401: '401 Unauthorized',
|
|
84
|
+
403: '403 Forbidden',
|
|
85
|
+
404: '404 Not Found',
|
|
86
|
+
405: '405 Method Not Allowed',
|
|
87
|
+
406: '406 Not Acceptable',
|
|
88
|
+
408: '408 Request Timeout',
|
|
89
|
+
409: '409 Conflict',
|
|
90
|
+
410: '410 Gone',
|
|
91
|
+
|
|
92
|
+
# Server Error.
|
|
93
|
+
500: '500 Internal Server Error',
|
|
94
|
+
501: '501 Not Implemented',
|
|
95
|
+
502: '502 Bad Gateway',
|
|
96
|
+
503: '503 Service Unavailable',
|
|
97
|
+
504: '504 Gateway Timeout'
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# RFC 1123 (ex RFC 822)
|
|
101
|
+
DATE_FORMAT = '%a, %d %b %Y %H:%M:%S UTC'
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def invert_dict(d: dict[Any, Any]) -> dict[Any, Any]:
|
|
105
|
+
"""
|
|
106
|
+
Invert the dictionary.
|
|
107
|
+
CAUTION: this function is not deterministic unless the input dictionary is one-to-one mapping.
|
|
108
|
+
|
|
109
|
+
:param d: source dictionary
|
|
110
|
+
:returns: dictionary {value: key for key, value in d.items()}
|
|
111
|
+
"""
|
|
112
|
+
return {value: key for key, value in d.items()}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def dids_as_dicts(did_list: Iterable[Union[str, dict[str, str]]]) -> list[dict[str, str]]:
|
|
116
|
+
"""
|
|
117
|
+
Converts list of DIDs to list of dictionaries
|
|
118
|
+
:param did_list: list of DIDs as either "scope:name" or {"scope":"scope", "name":"name"}
|
|
119
|
+
:returns: list of dictionaries {"scope":"scope", "name":"name"}
|
|
120
|
+
"""
|
|
121
|
+
out = []
|
|
122
|
+
for did in did_list:
|
|
123
|
+
if isinstance(did, str):
|
|
124
|
+
scope, name = did.split(":", 1)
|
|
125
|
+
did = dict(scope=scope, name=name)
|
|
126
|
+
if isinstance(did, dict):
|
|
127
|
+
if not ("name" in did and "scope" in did):
|
|
128
|
+
raise ValueError("Scope or name missing in: %s" % (did,))
|
|
129
|
+
else:
|
|
130
|
+
raise ValueError("Can not convert item %s (%s) to a DID" % (did, type(did)))
|
|
131
|
+
out.append(did)
|
|
132
|
+
return out
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_url(
|
|
136
|
+
url: str,
|
|
137
|
+
path: Optional[str] = None,
|
|
138
|
+
params: Optional[Union[str, dict[Any, Any], list[tuple[Any, Any]]]] = None,
|
|
139
|
+
doseq: bool = False
|
|
140
|
+
) -> str:
|
|
141
|
+
"""
|
|
142
|
+
utitily function to build an url for requests to the rucio system.
|
|
143
|
+
|
|
144
|
+
If the optional parameter doseq is evaluates to True, individual key=value pairs
|
|
145
|
+
separated by '&' are generated for each element of the value sequence for the key.
|
|
146
|
+
"""
|
|
147
|
+
complete_url = url
|
|
148
|
+
if path is not None:
|
|
149
|
+
complete_url += "/" + path
|
|
150
|
+
if params is not None:
|
|
151
|
+
complete_url += "?"
|
|
152
|
+
if isinstance(params, str):
|
|
153
|
+
complete_url += quote(params)
|
|
154
|
+
else:
|
|
155
|
+
complete_url += urlencode(params, doseq=doseq)
|
|
156
|
+
return complete_url
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def all_oidc_req_claims_present(
|
|
160
|
+
scope: Optional[Union[str, list[str]]],
|
|
161
|
+
audience: Optional[Union[str, list[str]]],
|
|
162
|
+
required_scope: Optional[Union[str, list[str]]],
|
|
163
|
+
required_audience: Optional[Union[str, list[str]]],
|
|
164
|
+
separator: str = " "
|
|
165
|
+
) -> bool:
|
|
166
|
+
"""
|
|
167
|
+
Checks if both of the following statements are true:
|
|
168
|
+
- all items in required_scope are present in scope string
|
|
169
|
+
- all items in required_audience are present in audience
|
|
170
|
+
returns false otherwise. audience and scope must be both strings
|
|
171
|
+
or both lists. Similarly for required_* variables.
|
|
172
|
+
If this condition is satisfied, False is returned.
|
|
173
|
+
:params scope: list of strings or one string where items are separated by a separator input variable
|
|
174
|
+
:params audience: list of strings or one string where items are separated by a separator input variable
|
|
175
|
+
:params required_scope: list of strings or one string where items are separated by a separator input variable
|
|
176
|
+
:params required_audience: list of strings or one string where items are separated by a separator input variable
|
|
177
|
+
:params separator: separator string, space by default
|
|
178
|
+
:returns : True or False
|
|
179
|
+
"""
|
|
180
|
+
if not scope:
|
|
181
|
+
scope = ""
|
|
182
|
+
if not audience:
|
|
183
|
+
audience = ""
|
|
184
|
+
if not required_scope:
|
|
185
|
+
required_scope = ""
|
|
186
|
+
if not required_audience:
|
|
187
|
+
required_audience = ""
|
|
188
|
+
if (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, list) and isinstance(required_audience, list)):
|
|
189
|
+
scope = [str(it) for it in scope]
|
|
190
|
+
audience = [str(it) for it in audience]
|
|
191
|
+
required_scope = [str(it) for it in required_scope]
|
|
192
|
+
required_audience = [str(it) for it in required_audience]
|
|
193
|
+
req_scope_present = all(elem in scope for elem in required_scope)
|
|
194
|
+
req_audience_present = all(elem in audience for elem in required_audience)
|
|
195
|
+
return req_scope_present and req_audience_present
|
|
196
|
+
elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, str) and isinstance(required_audience, str)):
|
|
197
|
+
scope = str(scope)
|
|
198
|
+
audience = str(audience)
|
|
199
|
+
required_scope = str(required_scope)
|
|
200
|
+
required_audience = str(required_audience)
|
|
201
|
+
req_scope_present = all(elem in scope.split(separator) for elem in required_scope.split(separator))
|
|
202
|
+
req_audience_present = all(elem in audience.split(separator) for elem in required_audience.split(separator))
|
|
203
|
+
return req_scope_present and req_audience_present
|
|
204
|
+
elif (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, str) and isinstance(required_audience, str)):
|
|
205
|
+
scope = [str(it) for it in scope]
|
|
206
|
+
audience = [str(it) for it in audience]
|
|
207
|
+
required_scope = str(required_scope)
|
|
208
|
+
required_audience = str(required_audience)
|
|
209
|
+
req_scope_present = all(elem in scope for elem in required_scope.split(separator))
|
|
210
|
+
req_audience_present = all(elem in audience for elem in required_audience.split(separator))
|
|
211
|
+
return req_scope_present and req_audience_present
|
|
212
|
+
elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, list) and isinstance(required_audience, list)):
|
|
213
|
+
scope = str(scope)
|
|
214
|
+
audience = str(audience)
|
|
215
|
+
required_scope = [str(it) for it in required_scope]
|
|
216
|
+
required_audience = [str(it) for it in required_audience]
|
|
217
|
+
req_scope_present = all(elem in scope.split(separator) for elem in required_scope)
|
|
218
|
+
req_audience_present = all(elem in audience.split(separator) for elem in required_audience)
|
|
219
|
+
return req_scope_present and req_audience_present
|
|
220
|
+
else:
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def generate_uuid() -> str:
|
|
225
|
+
return str(uuid()).replace('-', '').lower()
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def generate_uuid_bytes() -> bytes:
|
|
229
|
+
return uuid().bytes
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5', 'sha256', 'crc32']
|
|
233
|
+
GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5']
|
|
234
|
+
CHECKSUM_ALGO_DICT = {}
|
|
235
|
+
PREFERRED_CHECKSUM = GLOBALLY_SUPPORTED_CHECKSUMS[0]
|
|
236
|
+
CHECKSUM_KEY = 'supported_checksums'
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def is_checksum_valid(checksum_name: str) -> bool:
|
|
240
|
+
"""
|
|
241
|
+
A simple function to check whether a checksum algorithm is supported.
|
|
242
|
+
Relies on GLOBALLY_SUPPORTED_CHECKSUMS to allow for expandability.
|
|
243
|
+
|
|
244
|
+
:param checksum_name: The name of the checksum to be verified.
|
|
245
|
+
:returns: True if checksum_name is in GLOBALLY_SUPPORTED_CHECKSUMS list, False otherwise.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
return checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def set_preferred_checksum(checksum_name: str) -> None:
|
|
252
|
+
"""
|
|
253
|
+
If the input checksum name is valid,
|
|
254
|
+
set it as PREFERRED_CHECKSUM.
|
|
255
|
+
|
|
256
|
+
:param checksum_name: The name of the checksum to be verified.
|
|
257
|
+
"""
|
|
258
|
+
if is_checksum_valid(checksum_name):
|
|
259
|
+
global PREFERRED_CHECKSUM
|
|
260
|
+
PREFERRED_CHECKSUM = checksum_name
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def adler32(file: "FileDescriptorOrPath") -> str:
|
|
264
|
+
"""
|
|
265
|
+
An Adler-32 checksum is obtained by calculating two 16-bit checksums A and B
|
|
266
|
+
and concatenating their bits into a 32-bit integer. A is the sum of all bytes in the
|
|
267
|
+
stream plus one, and B is the sum of the individual values of A from each step.
|
|
268
|
+
|
|
269
|
+
:param file: file name
|
|
270
|
+
:returns: Hexified string, padded to 8 values.
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
# adler starting value is _not_ 0
|
|
274
|
+
adler = 1
|
|
275
|
+
|
|
276
|
+
can_mmap = False
|
|
277
|
+
# try:
|
|
278
|
+
# with open(file, 'r+b') as f:
|
|
279
|
+
# can_mmap = True
|
|
280
|
+
# except:
|
|
281
|
+
# pass
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
# use mmap if possible
|
|
285
|
+
if can_mmap:
|
|
286
|
+
with open(file, 'r+b') as f:
|
|
287
|
+
m = mmap.mmap(f.fileno(), 0)
|
|
288
|
+
# partial block reads at slightly increased buffer sizes
|
|
289
|
+
for block in iter(partial(m.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
|
|
290
|
+
adler = zlib.adler32(block, adler)
|
|
291
|
+
else:
|
|
292
|
+
with open(file, 'rb') as f:
|
|
293
|
+
# partial block reads at slightly increased buffer sizes
|
|
294
|
+
for block in iter(partial(f.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
|
|
295
|
+
adler = zlib.adler32(block, adler)
|
|
296
|
+
|
|
297
|
+
except Exception as e:
|
|
298
|
+
raise Exception('FATAL - could not get Adler-32 checksum of file %s: %s' % (file, e))
|
|
299
|
+
|
|
300
|
+
# backflip on 32bit -- can be removed once everything is fully migrated to 64bit
|
|
301
|
+
if adler < 0:
|
|
302
|
+
adler = adler + 2 ** 32
|
|
303
|
+
|
|
304
|
+
return str('%08x' % adler)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
CHECKSUM_ALGO_DICT['adler32'] = adler32
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def md5(file: "FileDescriptorOrPath") -> str:
|
|
311
|
+
"""
|
|
312
|
+
Runs the MD5 algorithm (RFC-1321) on the binary content of the file named file and returns the hexadecimal digest
|
|
313
|
+
|
|
314
|
+
:param file: file name
|
|
315
|
+
:returns: string of 32 hexadecimal digits
|
|
316
|
+
"""
|
|
317
|
+
hash_md5 = hashlib.md5()
|
|
318
|
+
try:
|
|
319
|
+
with open(file, "rb") as f:
|
|
320
|
+
list(map(hash_md5.update, iter(lambda: f.read(4096), b"")))
|
|
321
|
+
except Exception as e:
|
|
322
|
+
raise Exception('FATAL - could not get MD5 checksum of file %s - %s' % (file, e))
|
|
323
|
+
|
|
324
|
+
return hash_md5.hexdigest()
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
CHECKSUM_ALGO_DICT['md5'] = md5
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def sha256(file: "FileDescriptorOrPath") -> str:
|
|
331
|
+
"""
|
|
332
|
+
Runs the SHA256 algorithm on the binary content of the file named file and returns the hexadecimal digest
|
|
333
|
+
|
|
334
|
+
:param file: file name
|
|
335
|
+
:returns: string of 32 hexadecimal digits
|
|
336
|
+
"""
|
|
337
|
+
with open(file, "rb") as f:
|
|
338
|
+
bytes_ = f.read() # read entire file as bytes
|
|
339
|
+
readable_hash = hashlib.sha256(bytes_).hexdigest()
|
|
340
|
+
print(readable_hash)
|
|
341
|
+
return readable_hash
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
CHECKSUM_ALGO_DICT['sha256'] = sha256
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def crc32(file: "FileDescriptorOrPath") -> str:
|
|
348
|
+
"""
|
|
349
|
+
Runs the CRC32 algorithm on the binary content of the file named file and returns the hexadecimal digest
|
|
350
|
+
|
|
351
|
+
:param file: file name
|
|
352
|
+
:returns: string of 32 hexadecimal digits
|
|
353
|
+
"""
|
|
354
|
+
prev = 0
|
|
355
|
+
for eachLine in open(file, "rb"):
|
|
356
|
+
prev = zlib.crc32(eachLine, prev)
|
|
357
|
+
return "%X" % (prev & 0xFFFFFFFF)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
CHECKSUM_ALGO_DICT['crc32'] = crc32
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _next_pow2(num: int) -> int:
|
|
364
|
+
if not num:
|
|
365
|
+
return 0
|
|
366
|
+
return math.ceil(math.log2(num))
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _bittorrent_v2_piece_length_pow2(file_size: int) -> int:
|
|
370
|
+
"""
|
|
371
|
+
Automatically chooses the `piece size` so that `piece layers`
|
|
372
|
+
is kept small(er) than usually. This is a balancing act:
|
|
373
|
+
having a big piece_length requires more work on bittorrent client
|
|
374
|
+
side to validate hashes, but having it small requires more
|
|
375
|
+
place to store the `piece layers` in the database.
|
|
376
|
+
|
|
377
|
+
Returns the result as the exponent 'x' for power of 2.
|
|
378
|
+
To get the actual length in bytes, the caller should compute 2^x.
|
|
379
|
+
"""
|
|
380
|
+
|
|
381
|
+
# by the bittorrent v2 specification, the piece size is equal to block size = 16KiB
|
|
382
|
+
min_piece_len_pow2 = 14 # 2 ** 14 == 16 KiB
|
|
383
|
+
if not file_size:
|
|
384
|
+
return min_piece_len_pow2
|
|
385
|
+
# Limit the maximum size of pieces_layers hash chain for bittorrent v2,
|
|
386
|
+
# because we'll have to store it in the database
|
|
387
|
+
max_pieces_layers_size_pow2 = 20 # 2 ** 20 == 1 MiB
|
|
388
|
+
# sha256 requires 2 ** 5 == 32 Bytes == 256 bits
|
|
389
|
+
hash_size_pow2 = 5
|
|
390
|
+
|
|
391
|
+
# The closest power of two bigger than the file size
|
|
392
|
+
file_size_pow2 = _next_pow2(file_size)
|
|
393
|
+
|
|
394
|
+
# Compute the target size for the 'pieces layers' in the torrent
|
|
395
|
+
# (as power of two: the closest power-of-two smaller than the number)
|
|
396
|
+
# Will cap at max_pieces_layers_size for files larger than 1TB.
|
|
397
|
+
target_pieces_layers_size = math.sqrt(file_size)
|
|
398
|
+
target_pieces_layers_size_pow2 = min(math.floor(math.log2(target_pieces_layers_size)), max_pieces_layers_size_pow2)
|
|
399
|
+
target_piece_num_pow2 = max(target_pieces_layers_size_pow2 - hash_size_pow2, 0)
|
|
400
|
+
|
|
401
|
+
piece_length_pow2 = max(file_size_pow2 - target_piece_num_pow2, min_piece_len_pow2)
|
|
402
|
+
return piece_length_pow2
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def bittorrent_v2_piece_length(file_size: int) -> int:
|
|
406
|
+
return 2 ** _bittorrent_v2_piece_length_pow2(file_size)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def bittorrent_v2_merkle_sha256(file: "FileDescriptorOrPath") -> tuple[bytes, bytes, int]:
|
|
410
|
+
"""
|
|
411
|
+
Compute the .torrent v2 hash tree for the given file.
|
|
412
|
+
(http://www.bittorrent.org/beps/bep_0052.html)
|
|
413
|
+
In particular, it will return the root of the merkle hash
|
|
414
|
+
tree of the file, the 'piece layers' as described in the
|
|
415
|
+
previous BEP, and the chosen `piece size`
|
|
416
|
+
|
|
417
|
+
This function will read the file in chunks of 16KiB
|
|
418
|
+
(which is the imposed block size by bittorrent v2) and compute
|
|
419
|
+
the sha256 hash of each block. When enough blocks are read
|
|
420
|
+
to form a `piece`, will compute the merkle hash root of the
|
|
421
|
+
piece from the hashes of its blocks. At the end, the hashes
|
|
422
|
+
of pieces are combined to create the global pieces_root.
|
|
423
|
+
"""
|
|
424
|
+
|
|
425
|
+
# by the bittorrent v2 specification, the block size and the
|
|
426
|
+
# minimum piece size are both fixed to 16KiB
|
|
427
|
+
block_size = 16384
|
|
428
|
+
block_size_pow2 = 14 # 2 ** 14 == 16 KiB
|
|
429
|
+
# sha256 requires 2 ** 5 == 32 Bytes == 256 bits
|
|
430
|
+
hash_size = 32
|
|
431
|
+
|
|
432
|
+
def _merkle_root(leafs: list[bytes], nb_levels: int, padding: bytes) -> bytes:
|
|
433
|
+
"""
|
|
434
|
+
Build the root of the merkle hash tree from the (possibly incomplete) leafs layer.
|
|
435
|
+
If len(leafs) < 2 ** nb_levels, it will be padded with the padding repeated as many times
|
|
436
|
+
as needed to have 2 ** nb_levels leafs in total.
|
|
437
|
+
"""
|
|
438
|
+
nodes = copy.copy(leafs)
|
|
439
|
+
level = nb_levels
|
|
440
|
+
|
|
441
|
+
while level > 0:
|
|
442
|
+
for i in range(2 ** (level - 1)):
|
|
443
|
+
node1 = nodes[2 * i] if 2 * i < len(nodes) else padding
|
|
444
|
+
node2 = nodes[2 * i + 1] if 2 * i + 1 < len(nodes) else padding
|
|
445
|
+
h = hashlib.sha256(node1)
|
|
446
|
+
h.update(node2)
|
|
447
|
+
if i < len(nodes):
|
|
448
|
+
nodes[i] = h.digest()
|
|
449
|
+
else:
|
|
450
|
+
nodes.append(h.digest())
|
|
451
|
+
level -= 1
|
|
452
|
+
return nodes[0] if nodes else padding
|
|
453
|
+
|
|
454
|
+
file_size = os.stat(file).st_size
|
|
455
|
+
piece_length_pow2 = _bittorrent_v2_piece_length_pow2(file_size)
|
|
456
|
+
|
|
457
|
+
block_per_piece_pow2 = piece_length_pow2 - block_size_pow2
|
|
458
|
+
piece_length = 2 ** piece_length_pow2
|
|
459
|
+
block_per_piece = 2 ** block_per_piece_pow2
|
|
460
|
+
piece_num = math.ceil(file_size / piece_length)
|
|
461
|
+
|
|
462
|
+
remaining = file_size
|
|
463
|
+
remaining_in_block = min(file_size, block_size)
|
|
464
|
+
block_hashes = []
|
|
465
|
+
piece_hashes = []
|
|
466
|
+
current_hash = hashlib.sha256()
|
|
467
|
+
block_padding = bytes(hash_size)
|
|
468
|
+
with open(file, 'rb') as f:
|
|
469
|
+
while True:
|
|
470
|
+
data = f.read(remaining_in_block)
|
|
471
|
+
if not data:
|
|
472
|
+
break
|
|
473
|
+
|
|
474
|
+
current_hash.update(data)
|
|
475
|
+
|
|
476
|
+
remaining_in_block -= len(data)
|
|
477
|
+
remaining -= len(data)
|
|
478
|
+
|
|
479
|
+
if not remaining_in_block:
|
|
480
|
+
block_hashes.append(current_hash.digest())
|
|
481
|
+
if len(block_hashes) == block_per_piece or not remaining:
|
|
482
|
+
piece_hashes.append(_merkle_root(block_hashes, nb_levels=block_per_piece_pow2, padding=block_padding))
|
|
483
|
+
block_hashes = []
|
|
484
|
+
current_hash = hashlib.sha256()
|
|
485
|
+
remaining_in_block = min(block_size, remaining)
|
|
486
|
+
|
|
487
|
+
if not remaining:
|
|
488
|
+
break
|
|
489
|
+
|
|
490
|
+
if remaining or remaining_in_block or len(piece_hashes) != piece_num:
|
|
491
|
+
raise RucioException(f'Error while computing merkle sha256 of {file}')
|
|
492
|
+
|
|
493
|
+
piece_padding = _merkle_root([], nb_levels=block_per_piece_pow2, padding=block_padding)
|
|
494
|
+
pieces_root = _merkle_root(piece_hashes, nb_levels=_next_pow2(piece_num), padding=piece_padding)
|
|
495
|
+
pieces_layers = b''.join(piece_hashes) if len(piece_hashes) > 1 else b''
|
|
496
|
+
|
|
497
|
+
return pieces_root, pieces_layers, piece_length
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def merkle_sha256(file: "FileDescriptorOrPath") -> str:
|
|
501
|
+
"""
|
|
502
|
+
The root of the sha256 merkle hash tree with leaf size of 16 KiB.
|
|
503
|
+
"""
|
|
504
|
+
pieces_root, _, _ = bittorrent_v2_merkle_sha256(file)
|
|
505
|
+
return pieces_root.hex()
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
CHECKSUM_ALGO_DICT['merkle_sha256'] = merkle_sha256
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def bencode(obj: Union[int, bytes, str, list, dict[bytes, Any]]) -> bytes:
|
|
512
|
+
"""
|
|
513
|
+
Copied from the reference implementation of v2 bittorrent:
|
|
514
|
+
http://bittorrent.org/beps/bep_0052_torrent_creator.py
|
|
515
|
+
"""
|
|
516
|
+
|
|
517
|
+
if isinstance(obj, int):
|
|
518
|
+
return b"i" + str(obj).encode() + b"e"
|
|
519
|
+
elif isinstance(obj, bytes):
|
|
520
|
+
return str(len(obj)).encode() + b":" + obj
|
|
521
|
+
elif isinstance(obj, str):
|
|
522
|
+
return bencode(obj.encode("utf-8"))
|
|
523
|
+
elif isinstance(obj, list):
|
|
524
|
+
return b"l" + b"".join(map(bencode, obj)) + b"e"
|
|
525
|
+
elif isinstance(obj, dict):
|
|
526
|
+
if all(isinstance(i, bytes) for i in obj.keys()):
|
|
527
|
+
items = list(obj.items())
|
|
528
|
+
items.sort()
|
|
529
|
+
return b"d" + b"".join(map(bencode, itertools.chain(*items))) + b"e"
|
|
530
|
+
else:
|
|
531
|
+
raise ValueError("dict keys should be bytes " + str(obj.keys()))
|
|
532
|
+
raise ValueError("Allowed types: int, bytes, str, list, dict; not %s", type(obj))
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def construct_torrent(
|
|
536
|
+
scope: str,
|
|
537
|
+
name: str,
|
|
538
|
+
length: int,
|
|
539
|
+
piece_length: int,
|
|
540
|
+
pieces_root: bytes,
|
|
541
|
+
pieces_layers: "Optional[bytes]" = None,
|
|
542
|
+
trackers: "Optional[list[str]]" = None,
|
|
543
|
+
) -> "tuple[str, bytes]":
|
|
544
|
+
|
|
545
|
+
torrent_dict = {
|
|
546
|
+
b'creation date': int(time.time()),
|
|
547
|
+
b'info': {
|
|
548
|
+
b'meta version': 2,
|
|
549
|
+
b'private': 1,
|
|
550
|
+
b'name': f'{scope}:{name}'.encode(),
|
|
551
|
+
b'piece length': piece_length,
|
|
552
|
+
b'file tree': {
|
|
553
|
+
name.encode(): {
|
|
554
|
+
b'': {
|
|
555
|
+
b'length': length,
|
|
556
|
+
b'pieces root': pieces_root,
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
},
|
|
561
|
+
b'piece layers': {},
|
|
562
|
+
}
|
|
563
|
+
if trackers:
|
|
564
|
+
torrent_dict[b'announce'] = trackers[0].encode()
|
|
565
|
+
if len(trackers) > 1:
|
|
566
|
+
torrent_dict[b'announce-list'] = [t.encode() for t in trackers]
|
|
567
|
+
if pieces_layers:
|
|
568
|
+
torrent_dict[b'piece layers'][pieces_root] = pieces_layers
|
|
569
|
+
|
|
570
|
+
torrent_id = hashlib.sha256(bencode(torrent_dict[b'info'])).hexdigest()[:40]
|
|
571
|
+
torrent = bencode(torrent_dict)
|
|
572
|
+
return torrent_id, torrent
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def str_to_date(string: str) -> Optional[datetime.datetime]:
|
|
576
|
+
""" Converts a RFC-1123 string to the corresponding datetime value.
|
|
577
|
+
|
|
578
|
+
:param string: the RFC-1123 string to convert to datetime value.
|
|
579
|
+
"""
|
|
580
|
+
return datetime.datetime.strptime(string, DATE_FORMAT) if string else None
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def val_to_space_sep_str(vallist: list[str]) -> str:
|
|
584
|
+
""" Converts a list of values into a string of space separated values
|
|
585
|
+
|
|
586
|
+
:param vallist: the list of values to to convert into string
|
|
587
|
+
:return: the string of space separated values or the value initially passed as parameter
|
|
588
|
+
"""
|
|
589
|
+
try:
|
|
590
|
+
if isinstance(vallist, list):
|
|
591
|
+
return str(" ".join(vallist))
|
|
592
|
+
else:
|
|
593
|
+
return str(vallist)
|
|
594
|
+
except:
|
|
595
|
+
return ''
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def date_to_str(date: datetime.datetime) -> Optional[str]:
|
|
599
|
+
""" Converts a datetime value to the corresponding RFC-1123 string.
|
|
600
|
+
|
|
601
|
+
:param date: the datetime value to convert.
|
|
602
|
+
"""
|
|
603
|
+
return datetime.datetime.strftime(date, DATE_FORMAT) if date else None
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
class APIEncoder(json.JSONEncoder):
|
|
607
|
+
""" Propretary JSONEconder subclass used by the json render function.
|
|
608
|
+
This is needed to address the encoding of special values.
|
|
609
|
+
"""
|
|
610
|
+
|
|
611
|
+
def default(self, obj): # pylint: disable=E0202
|
|
612
|
+
if isinstance(obj, datetime.datetime):
|
|
613
|
+
# convert any datetime to RFC 1123 format
|
|
614
|
+
return date_to_str(obj)
|
|
615
|
+
elif isinstance(obj, (datetime.time, datetime.date)):
|
|
616
|
+
# should not happen since the only supported date-like format
|
|
617
|
+
# supported at dmain schema level is 'datetime' .
|
|
618
|
+
return obj.isoformat()
|
|
619
|
+
elif isinstance(obj, datetime.timedelta):
|
|
620
|
+
return obj.days * 24 * 60 * 60 + obj.seconds
|
|
621
|
+
elif isinstance(obj, Enum):
|
|
622
|
+
return obj.name
|
|
623
|
+
elif isinstance(obj, (InternalAccount, InternalScope)):
|
|
624
|
+
return obj.external
|
|
625
|
+
return json.JSONEncoder.default(self, obj)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def render_json(*args, **kwargs) -> str:
|
|
629
|
+
""" Render a list or a dict as a JSON-formatted string. """
|
|
630
|
+
if args and isinstance(args[0], list):
|
|
631
|
+
data = args[0]
|
|
632
|
+
elif isinstance(kwargs, dict):
|
|
633
|
+
data = kwargs
|
|
634
|
+
else:
|
|
635
|
+
raise ValueError("Error while serializing object to JSON-formatted string: supported input types are list or dict.")
|
|
636
|
+
return json.dumps(data, cls=APIEncoder)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def datetime_parser(dct: dict[Any, Any]) -> dict[Any, Any]:
|
|
640
|
+
""" datetime parser
|
|
641
|
+
"""
|
|
642
|
+
for k, v in list(dct.items()):
|
|
643
|
+
if isinstance(v, str) and re.search(" UTC", v):
|
|
644
|
+
try:
|
|
645
|
+
dct[k] = datetime.datetime.strptime(v, DATE_FORMAT)
|
|
646
|
+
except Exception:
|
|
647
|
+
pass
|
|
648
|
+
return dct
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def parse_response(data: Union[str, bytes, bytearray]) -> Any:
|
|
652
|
+
"""
|
|
653
|
+
JSON render function
|
|
654
|
+
"""
|
|
655
|
+
if isinstance(data, (bytes, bytearray)):
|
|
656
|
+
data = data.decode('utf-8')
|
|
657
|
+
|
|
658
|
+
return json.loads(data, object_hook=datetime_parser)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def execute(cmd: str) -> tuple[int, str, str]:
|
|
662
|
+
"""
|
|
663
|
+
Executes a command in a subprocess. Returns a tuple
|
|
664
|
+
of (exitcode, out, err), where out is the string output
|
|
665
|
+
from stdout and err is the string output from stderr when
|
|
666
|
+
executing the command.
|
|
667
|
+
|
|
668
|
+
:param cmd: Command string to execute
|
|
669
|
+
"""
|
|
670
|
+
|
|
671
|
+
process = subprocess.Popen(cmd,
|
|
672
|
+
shell=True,
|
|
673
|
+
stdin=subprocess.PIPE,
|
|
674
|
+
stdout=subprocess.PIPE,
|
|
675
|
+
stderr=subprocess.PIPE)
|
|
676
|
+
|
|
677
|
+
result = process.communicate()
|
|
678
|
+
(out, err) = result
|
|
679
|
+
exitcode = process.returncode
|
|
680
|
+
return exitcode, out.decode(encoding='utf-8'), err.decode(encoding='utf-8')
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def rse_supported_protocol_domains() -> list[str]:
|
|
684
|
+
""" Returns a list with all supported RSE protocol domains."""
|
|
685
|
+
return ['lan', 'wan']
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def grouper(iterable: Iterable[Any], n: int, fillvalue: Optional[object] = None) -> zip_longest:
|
|
689
|
+
""" Collect data into fixed-length chunks or blocks """
|
|
690
|
+
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
|
|
691
|
+
args = [iter(iterable)] * n
|
|
692
|
+
return zip_longest(*args, fillvalue=fillvalue)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def chunks(iterable, n):
|
|
696
|
+
"""
|
|
697
|
+
Yield successive n-sized chunks from l.
|
|
698
|
+
"""
|
|
699
|
+
if isinstance(iterable, list):
|
|
700
|
+
for i in range(0, len(iterable), n):
|
|
701
|
+
yield iterable[i:i + n]
|
|
702
|
+
else:
|
|
703
|
+
it = iter(iterable)
|
|
704
|
+
while True:
|
|
705
|
+
chunk = list(itertools.islice(it, n))
|
|
706
|
+
if not chunk:
|
|
707
|
+
return
|
|
708
|
+
yield chunk
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def dict_chunks(dict_: dict[Any, Any], n: int) -> Iterator[dict[Any, Any]]:
|
|
712
|
+
"""
|
|
713
|
+
Iterate over the dictionary in groups of the requested size
|
|
714
|
+
"""
|
|
715
|
+
it = iter(dict_)
|
|
716
|
+
for _ in range(0, len(dict_), n):
|
|
717
|
+
yield {k: dict_[k] for k in itertools.islice(it, n)}
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def my_key_generator(namespace: str, fn: Callable, **kw) -> Callable[..., str]:
|
|
721
|
+
"""
|
|
722
|
+
Customized key generator for dogpile
|
|
723
|
+
"""
|
|
724
|
+
fname = fn.__name__
|
|
725
|
+
|
|
726
|
+
def generate_key(*arg, **kw) -> str:
|
|
727
|
+
return namespace + "_" + fname + "_".join(str(s) for s in filter(None, arg))
|
|
728
|
+
|
|
729
|
+
return generate_key
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
NonDeterministicPFNAlgorithmsT = TypeVar('NonDeterministicPFNAlgorithmsT', bound='NonDeterministicPFNAlgorithms')
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
class NonDeterministicPFNAlgorithms(PolicyPackageAlgorithms):
|
|
736
|
+
"""
|
|
737
|
+
Handle PFN construction for non-deterministic RSEs, including registration of algorithms
|
|
738
|
+
from policy packages
|
|
739
|
+
"""
|
|
740
|
+
|
|
741
|
+
_algorithm_type = 'non_deterministic_pfn'
|
|
742
|
+
|
|
743
|
+
def __init__(self) -> None:
|
|
744
|
+
"""
|
|
745
|
+
Initialises a non-deterministic PFN construction object
|
|
746
|
+
"""
|
|
747
|
+
super().__init__()
|
|
748
|
+
|
|
749
|
+
def construct_non_deterministic_pfn(self, dsn: str, scope: Optional[str], filename: str, naming_convention: str) -> str:
|
|
750
|
+
"""
|
|
751
|
+
Calls the correct algorithm to generate a non-deterministic PFN
|
|
752
|
+
"""
|
|
753
|
+
return self.get_algorithm(naming_convention)(dsn, scope, filename)
|
|
754
|
+
|
|
755
|
+
@classmethod
|
|
756
|
+
def supports(cls: type[NonDeterministicPFNAlgorithmsT], naming_convention: str) -> bool:
|
|
757
|
+
"""
|
|
758
|
+
Checks whether a non-deterministic PFN algorithm is supported
|
|
759
|
+
"""
|
|
760
|
+
return super()._supports(cls._algorithm_type, naming_convention)
|
|
761
|
+
|
|
762
|
+
@classmethod
|
|
763
|
+
def _module_init_(cls: type[NonDeterministicPFNAlgorithmsT]) -> None:
|
|
764
|
+
"""
|
|
765
|
+
Registers the included non-deterministic PFN algorithms
|
|
766
|
+
"""
|
|
767
|
+
cls.register('T0', cls.construct_non_deterministic_pfn_T0)
|
|
768
|
+
cls.register('DQ2', cls.construct_non_deterministic_pfn_DQ2)
|
|
769
|
+
cls.register('BelleII', cls.construct_non_deterministic_pfn_BelleII)
|
|
770
|
+
|
|
771
|
+
@classmethod
|
|
772
|
+
def get_algorithm(cls: type[NonDeterministicPFNAlgorithmsT], naming_convention: str) -> Callable[[str, Optional[str], str], str]:
|
|
773
|
+
"""
|
|
774
|
+
Looks up a non-deterministic PFN algorithm by name
|
|
775
|
+
"""
|
|
776
|
+
return super()._get_one_algorithm(cls._algorithm_type, naming_convention)
|
|
777
|
+
|
|
778
|
+
@classmethod
|
|
779
|
+
def register(cls: type[NonDeterministicPFNAlgorithmsT], name: str, fn_construct_non_deterministic_pfn: Callable[[str, Optional[str], str], Optional[str]]) -> None:
|
|
780
|
+
"""
|
|
781
|
+
Register a new non-deterministic PFN algorithm
|
|
782
|
+
"""
|
|
783
|
+
algorithm_dict = {name: fn_construct_non_deterministic_pfn}
|
|
784
|
+
super()._register(cls._algorithm_type, algorithm_dict)
|
|
785
|
+
|
|
786
|
+
@staticmethod
|
|
787
|
+
def __strip_dsn(dsn: str) -> str:
|
|
788
|
+
"""
|
|
789
|
+
Drop the _sub and _dis suffixes for panda datasets from the lfc path
|
|
790
|
+
they will be registered in.
|
|
791
|
+
Method imported from DQ2.
|
|
792
|
+
"""
|
|
793
|
+
|
|
794
|
+
suffixes_to_drop = ['_dis', '_sub', '_frag']
|
|
795
|
+
fields = dsn.split('.')
|
|
796
|
+
last_field = fields[-1]
|
|
797
|
+
try:
|
|
798
|
+
for suffix in suffixes_to_drop:
|
|
799
|
+
last_field = re.sub('%s.*$' % suffix, '', last_field)
|
|
800
|
+
except IndexError:
|
|
801
|
+
return dsn
|
|
802
|
+
fields[-1] = last_field
|
|
803
|
+
stripped_dsn = '.'.join(fields)
|
|
804
|
+
return stripped_dsn
|
|
805
|
+
|
|
806
|
+
@staticmethod
|
|
807
|
+
def __strip_tag(tag: str) -> str:
|
|
808
|
+
"""
|
|
809
|
+
Drop the _sub and _dis suffixes for panda datasets from the lfc path
|
|
810
|
+
they will be registered in
|
|
811
|
+
Method imported from DQ2.
|
|
812
|
+
"""
|
|
813
|
+
suffixes_to_drop = ['_dis', '_sub', '_tid']
|
|
814
|
+
stripped_tag = tag
|
|
815
|
+
try:
|
|
816
|
+
for suffix in suffixes_to_drop:
|
|
817
|
+
stripped_tag = re.sub('%s.*$' % suffix, '', stripped_tag)
|
|
818
|
+
except IndexError:
|
|
819
|
+
return stripped_tag
|
|
820
|
+
return stripped_tag
|
|
821
|
+
|
|
822
|
+
@staticmethod
|
|
823
|
+
def construct_non_deterministic_pfn_DQ2(dsn: str, scope: Optional[str], filename: str) -> str:
|
|
824
|
+
"""
|
|
825
|
+
Defines relative PFN for new replicas. This method
|
|
826
|
+
contains DQ2 convention. To be used for non-deterministic sites.
|
|
827
|
+
Method imported from DQ2.
|
|
828
|
+
|
|
829
|
+
@return: relative PFN for new replica.
|
|
830
|
+
@rtype: str
|
|
831
|
+
"""
|
|
832
|
+
# check how many dots in dsn
|
|
833
|
+
fields = dsn.split('.')
|
|
834
|
+
nfields = len(fields)
|
|
835
|
+
|
|
836
|
+
if nfields == 0:
|
|
837
|
+
return '/other/other/%s' % (filename)
|
|
838
|
+
elif nfields == 1:
|
|
839
|
+
stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
|
|
840
|
+
return '/other/%s/%s' % (stripped_dsn, filename)
|
|
841
|
+
elif nfields == 2:
|
|
842
|
+
project = fields[0]
|
|
843
|
+
stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
|
|
844
|
+
return '/%s/%s/%s' % (project, stripped_dsn, filename)
|
|
845
|
+
elif nfields < 5 or re.match('user*|group*', fields[0]):
|
|
846
|
+
project = fields[0]
|
|
847
|
+
f2 = fields[1]
|
|
848
|
+
f3 = fields[2]
|
|
849
|
+
stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
|
|
850
|
+
return '/%s/%s/%s/%s/%s' % (project, f2, f3, stripped_dsn, filename)
|
|
851
|
+
else:
|
|
852
|
+
project = fields[0]
|
|
853
|
+
dataset_type = fields[4]
|
|
854
|
+
if nfields == 5:
|
|
855
|
+
tag = 'other'
|
|
856
|
+
else:
|
|
857
|
+
tag = NonDeterministicPFNAlgorithms.__strip_tag(fields[-1])
|
|
858
|
+
stripped_dsn = NonDeterministicPFNAlgorithms.__strip_dsn(dsn)
|
|
859
|
+
return '/%s/%s/%s/%s/%s' % (project, dataset_type, tag, stripped_dsn, filename)
|
|
860
|
+
|
|
861
|
+
@staticmethod
|
|
862
|
+
def construct_non_deterministic_pfn_T0(dsn: str, scope: Optional[str], filename: str) -> Optional[str]:
|
|
863
|
+
"""
|
|
864
|
+
Defines relative PFN for new replicas. This method
|
|
865
|
+
contains Tier0 convention. To be used for non-deterministic sites.
|
|
866
|
+
|
|
867
|
+
@return: relative PFN for new replica.
|
|
868
|
+
@rtype: str
|
|
869
|
+
"""
|
|
870
|
+
fields = dsn.split('.')
|
|
871
|
+
nfields = len(fields)
|
|
872
|
+
if nfields >= 3:
|
|
873
|
+
return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], fields[1], dsn, filename)
|
|
874
|
+
elif nfields == 1:
|
|
875
|
+
return '/%s/%s/%s/%s/%s' % (fields[0], 'other', 'other', dsn, filename)
|
|
876
|
+
elif nfields == 2:
|
|
877
|
+
return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], 'other', dsn, filename)
|
|
878
|
+
elif nfields == 0:
|
|
879
|
+
return '/other/other/other/other/%s' % (filename)
|
|
880
|
+
|
|
881
|
+
@staticmethod
|
|
882
|
+
def construct_non_deterministic_pfn_BelleII(dsn: str, scope: Optional[str], filename: str) -> str:
|
|
883
|
+
"""
|
|
884
|
+
Defines relative PFN for Belle II specific replicas.
|
|
885
|
+
This method contains the Belle II convention.
|
|
886
|
+
To be used for non-deterministic Belle II sites.
|
|
887
|
+
DSN (or datablock in the Belle II naming) contains /
|
|
888
|
+
"""
|
|
889
|
+
|
|
890
|
+
fields = dsn.split("/")
|
|
891
|
+
nfields = len(fields)
|
|
892
|
+
if nfields == 0:
|
|
893
|
+
return '/other/%s' % (filename)
|
|
894
|
+
else:
|
|
895
|
+
return '%s/%s' % (dsn, filename)
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
_DEFAULT_NON_DETERMINISTIC_PFN = 'DQ2'
|
|
899
|
+
NonDeterministicPFNAlgorithms._module_init_()
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
def construct_non_deterministic_pfn(dsn: str, scope: Optional[str], filename: str, naming_convention: Optional[str] = None) -> str:
|
|
903
|
+
"""
|
|
904
|
+
Applies non-deterministic PFN convention to the given replica.
|
|
905
|
+
use the naming_convention to call the actual function which will do the job.
|
|
906
|
+
Rucio administrators can potentially register additional PFN generation algorithms,
|
|
907
|
+
which are not implemented inside this main rucio repository, so changing the
|
|
908
|
+
argument list must be done with caution.
|
|
909
|
+
"""
|
|
910
|
+
pfn_algorithms = NonDeterministicPFNAlgorithms()
|
|
911
|
+
if naming_convention is None or not NonDeterministicPFNAlgorithms.supports(naming_convention):
|
|
912
|
+
naming_convention = _DEFAULT_NON_DETERMINISTIC_PFN
|
|
913
|
+
return pfn_algorithms.construct_non_deterministic_pfn(dsn, scope, filename, naming_convention)
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
def clean_pfns(pfns: Iterable[str]) -> list[str]:
|
|
917
|
+
res = []
|
|
918
|
+
for pfn in pfns:
|
|
919
|
+
if pfn.startswith('srm'):
|
|
920
|
+
pfn = re.sub(':[0-9]+/', '/', pfn)
|
|
921
|
+
pfn = re.sub(r'/srm/managerv1\?SFN=', '', pfn)
|
|
922
|
+
pfn = re.sub(r'/srm/v2/server\?SFN=', '', pfn)
|
|
923
|
+
pfn = re.sub(r'/srm/managerv2\?SFN=', '', pfn)
|
|
924
|
+
if '?GoogleAccessId' in pfn:
|
|
925
|
+
pfn = pfn.split('?GoogleAccessId')[0]
|
|
926
|
+
if '?X-Amz' in pfn:
|
|
927
|
+
pfn = pfn.split('?X-Amz')[0]
|
|
928
|
+
res.append(pfn)
|
|
929
|
+
res.sort()
|
|
930
|
+
return res
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
ScopeExtractionAlgorithmsT = TypeVar('ScopeExtractionAlgorithmsT', bound='ScopeExtractionAlgorithms')
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
class ScopeExtractionAlgorithms(PolicyPackageAlgorithms):
|
|
937
|
+
"""
|
|
938
|
+
Handle scope extraction algorithms
|
|
939
|
+
"""
|
|
940
|
+
|
|
941
|
+
_algorithm_type = 'scope'
|
|
942
|
+
|
|
943
|
+
def __init__(self) -> None:
|
|
944
|
+
"""
|
|
945
|
+
Initialises scope extraction algorithms object
|
|
946
|
+
"""
|
|
947
|
+
super().__init__()
|
|
948
|
+
|
|
949
|
+
def extract_scope(self, did: str, scopes: Optional[Sequence[str]], extract_scope_convention: str) -> Sequence[str]:
|
|
950
|
+
"""
|
|
951
|
+
Calls the correct algorithm for scope extraction
|
|
952
|
+
"""
|
|
953
|
+
return self.get_algorithm(extract_scope_convention)(did, scopes)
|
|
954
|
+
|
|
955
|
+
@classmethod
|
|
956
|
+
def supports(cls: type[ScopeExtractionAlgorithmsT], extract_scope_convention: str) -> bool:
|
|
957
|
+
"""
|
|
958
|
+
Checks whether the specified scope extraction algorithm is supported
|
|
959
|
+
"""
|
|
960
|
+
return super()._supports(cls._algorithm_type, extract_scope_convention)
|
|
961
|
+
|
|
962
|
+
@classmethod
|
|
963
|
+
def _module_init_(cls: type[ScopeExtractionAlgorithmsT]) -> None:
|
|
964
|
+
"""
|
|
965
|
+
Registers the included scope extraction algorithms
|
|
966
|
+
"""
|
|
967
|
+
cls.register('atlas', cls.extract_scope_atlas)
|
|
968
|
+
cls.register('belleii', cls.extract_scope_belleii)
|
|
969
|
+
cls.register('dirac', cls.extract_scope_dirac)
|
|
970
|
+
|
|
971
|
+
@classmethod
|
|
972
|
+
def get_algorithm(cls: type[ScopeExtractionAlgorithmsT], extract_scope_convention: str) -> Callable[[str, Optional[Sequence[str]]], Sequence[str]]:
|
|
973
|
+
"""
|
|
974
|
+
Looks up a scope extraction algorithm by name
|
|
975
|
+
"""
|
|
976
|
+
return super()._get_one_algorithm(cls._algorithm_type, extract_scope_convention)
|
|
977
|
+
|
|
978
|
+
@classmethod
|
|
979
|
+
def register(cls: type[ScopeExtractionAlgorithmsT], name: str, fn_extract_scope: Callable[[str, Optional[Sequence[str]]], Sequence[str]]) -> None:
|
|
980
|
+
"""
|
|
981
|
+
Registers a new scope extraction algorithm
|
|
982
|
+
"""
|
|
983
|
+
algorithm_dict = {name: fn_extract_scope}
|
|
984
|
+
super()._register(cls._algorithm_type, algorithm_dict)
|
|
985
|
+
|
|
986
|
+
@staticmethod
|
|
987
|
+
def extract_scope_atlas(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
|
|
988
|
+
# Try to extract the scope from the DSN
|
|
989
|
+
if did.find(':') > -1:
|
|
990
|
+
if len(did.split(':')) > 2:
|
|
991
|
+
raise RucioException('Too many colons. Cannot extract scope and name')
|
|
992
|
+
scope, name = did.split(':')[0], did.split(':')[1]
|
|
993
|
+
if name.endswith('/'):
|
|
994
|
+
name = name[:-1]
|
|
995
|
+
return scope, name
|
|
996
|
+
else:
|
|
997
|
+
scope = did.split('.')[0]
|
|
998
|
+
if did.startswith('user') or did.startswith('group'):
|
|
999
|
+
scope = ".".join(did.split('.')[0:2])
|
|
1000
|
+
if did.endswith('/'):
|
|
1001
|
+
did = did[:-1]
|
|
1002
|
+
return scope, did
|
|
1003
|
+
|
|
1004
|
+
@staticmethod
|
|
1005
|
+
def extract_scope_dirac(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
|
|
1006
|
+
# Default dirac scope extract algorithm. Scope is the second element in the LFN or the first one (VO name)
|
|
1007
|
+
# if only one element is the result of a split.
|
|
1008
|
+
elem = did.rstrip('/').split('/')
|
|
1009
|
+
if len(elem) > 2:
|
|
1010
|
+
scope = elem[2]
|
|
1011
|
+
else:
|
|
1012
|
+
scope = elem[1]
|
|
1013
|
+
return scope, did
|
|
1014
|
+
|
|
1015
|
+
@staticmethod
|
|
1016
|
+
def extract_scope_belleii(did: str, scopes: Optional[Sequence[str]]) -> Sequence[str]:
|
|
1017
|
+
split_did = did.split('/')
|
|
1018
|
+
if did.startswith('/belle/mock/'):
|
|
1019
|
+
return 'mock', did
|
|
1020
|
+
if did.startswith('/belle/MC/'):
|
|
1021
|
+
if did.startswith('/belle/MC/BG') or \
|
|
1022
|
+
did.startswith('/belle/MC/build') or \
|
|
1023
|
+
did.startswith('/belle/MC/generic') or \
|
|
1024
|
+
did.startswith('/belle/MC/log') or \
|
|
1025
|
+
did.startswith('/belle/MC/mcprod') or \
|
|
1026
|
+
did.startswith('/belle/MC/prerelease') or \
|
|
1027
|
+
did.startswith('/belle/MC/release'):
|
|
1028
|
+
return 'mc', did
|
|
1029
|
+
if did.startswith('/belle/MC/cert') or \
|
|
1030
|
+
did.startswith('/belle/MC/dirac') or \
|
|
1031
|
+
did.startswith('/belle/MC/dr3') or \
|
|
1032
|
+
did.startswith('/belle/MC/fab') or \
|
|
1033
|
+
did.startswith('/belle/MC/hideki') or \
|
|
1034
|
+
did.startswith('/belle/MC/merge') or \
|
|
1035
|
+
did.startswith('/belle/MC/migration') or \
|
|
1036
|
+
did.startswith('/belle/MC/skim') or \
|
|
1037
|
+
did.startswith('/belle/MC/test'):
|
|
1038
|
+
return 'mc_tmp', did
|
|
1039
|
+
if len(split_did) > 4:
|
|
1040
|
+
if split_did[3].find('fab') > -1 or split_did[3].find('merge') > -1 or split_did[3].find('skim') > -1:
|
|
1041
|
+
return 'mc_tmp', did
|
|
1042
|
+
if split_did[3].find('release') > -1:
|
|
1043
|
+
return 'mc', did
|
|
1044
|
+
return 'mc_tmp', did
|
|
1045
|
+
if did.startswith('/belle/Raw/'):
|
|
1046
|
+
return 'raw', did
|
|
1047
|
+
if did.startswith('/belle/hRaw'):
|
|
1048
|
+
return 'hraw', did
|
|
1049
|
+
if did.startswith('/belle/user/'):
|
|
1050
|
+
if len(split_did) > 4:
|
|
1051
|
+
if len(split_did[3]) == 1 and scopes is not None and 'user.%s' % (split_did[4]) in scopes:
|
|
1052
|
+
return 'user.%s' % split_did[4], did
|
|
1053
|
+
if len(split_did) > 3:
|
|
1054
|
+
if scopes is not None and 'user.%s' % (split_did[3]) in scopes:
|
|
1055
|
+
return 'user.%s' % split_did[3], did
|
|
1056
|
+
return 'user', did
|
|
1057
|
+
if did.startswith('/belle/group/'):
|
|
1058
|
+
if len(split_did) > 4:
|
|
1059
|
+
if scopes is not None and 'group.%s' % (split_did[4]) in scopes:
|
|
1060
|
+
return 'group.%s' % split_did[4], did
|
|
1061
|
+
return 'group', did
|
|
1062
|
+
if did.startswith('/belle/data/') or did.startswith('/belle/Data/'):
|
|
1063
|
+
if len(split_did) > 4:
|
|
1064
|
+
if split_did[3] in ['fab', 'skim']: # /belle/Data/fab --> data_tmp
|
|
1065
|
+
return 'data_tmp', did
|
|
1066
|
+
if split_did[3].find('release') > -1: # /belle/Data/release --> data
|
|
1067
|
+
return 'data', did
|
|
1068
|
+
if len(split_did) > 5:
|
|
1069
|
+
if split_did[3] in ['proc']: # /belle/Data/proc
|
|
1070
|
+
if split_did[4].find('release') > -1: # /belle/Data/proc/release*
|
|
1071
|
+
if len(split_did) > 7 and split_did[6] in ['GCR2c', 'prod00000007', 'prod6b', 'proc7b',
|
|
1072
|
+
'proc8b', 'Bucket4', 'Bucket6test', 'bucket6',
|
|
1073
|
+
'proc9', 'bucket7', 'SKIMDATAx1', 'proc10Valid',
|
|
1074
|
+
'proc10', 'SkimP10x1', 'SkimP11x1', 'SkimB9x1',
|
|
1075
|
+
'SkimB10x1', 'SkimB11x1']: # /belle/Data/proc/release*/*/proc10/* --> data_tmp (Old convention)
|
|
1076
|
+
return 'data_tmp', did
|
|
1077
|
+
else: # /belle/Data/proc/release*/*/proc11/* --> data (New convention)
|
|
1078
|
+
return 'data', did
|
|
1079
|
+
if split_did[4].find('fab') > -1: # /belle/Data/proc/fab* --> data_tmp
|
|
1080
|
+
return 'data_tmp', did
|
|
1081
|
+
return 'data_tmp', did
|
|
1082
|
+
if did.startswith('/belle/ddm/functional_tests/') or did.startswith('/belle/ddm/tests/') or did.startswith('/belle/test/ddm_test'):
|
|
1083
|
+
return 'test', did
|
|
1084
|
+
if did.startswith('/belle/BG/'):
|
|
1085
|
+
return 'data', did
|
|
1086
|
+
if did.startswith('/belle/collection'):
|
|
1087
|
+
return 'collection', did
|
|
1088
|
+
return 'other', did
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
_DEFAULT_EXTRACT = 'atlas'
|
|
1092
|
+
ScopeExtractionAlgorithms._module_init_()
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def extract_scope(
|
|
1096
|
+
did: str,
|
|
1097
|
+
scopes: Optional[Sequence[str]] = None,
|
|
1098
|
+
default_extract: str = _DEFAULT_EXTRACT
|
|
1099
|
+
) -> Sequence[str]:
|
|
1100
|
+
scope_extraction_algorithms = ScopeExtractionAlgorithms()
|
|
1101
|
+
extract_scope_convention = config_get('common', 'extract_scope', False, None) or config_get('policy', 'extract_scope', False, None)
|
|
1102
|
+
if extract_scope_convention is None or not ScopeExtractionAlgorithms.supports(extract_scope_convention):
|
|
1103
|
+
extract_scope_convention = default_extract
|
|
1104
|
+
return scope_extraction_algorithms.extract_scope(did, scopes, extract_scope_convention)
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
def pid_exists(pid: int) -> bool:
|
|
1108
|
+
"""
|
|
1109
|
+
Check whether pid exists in the current process table.
|
|
1110
|
+
UNIX only.
|
|
1111
|
+
"""
|
|
1112
|
+
if pid < 0:
|
|
1113
|
+
return False
|
|
1114
|
+
if pid == 0:
|
|
1115
|
+
# According to "man 2 kill" PID 0 refers to every process
|
|
1116
|
+
# in the process group of the calling process.
|
|
1117
|
+
# On certain systems 0 is a valid PID but we have no way
|
|
1118
|
+
# to know that in a portable fashion.
|
|
1119
|
+
raise ValueError('invalid PID 0')
|
|
1120
|
+
try:
|
|
1121
|
+
os.kill(pid, 0)
|
|
1122
|
+
except OSError as err:
|
|
1123
|
+
if err.errno == errno.ESRCH:
|
|
1124
|
+
# ESRCH == No such process
|
|
1125
|
+
return False
|
|
1126
|
+
elif err.errno == errno.EPERM:
|
|
1127
|
+
# EPERM clearly means there's a process to deny access to
|
|
1128
|
+
return True
|
|
1129
|
+
else:
|
|
1130
|
+
# According to "man 2 kill" possible error values are
|
|
1131
|
+
# (EINVAL, EPERM, ESRCH)
|
|
1132
|
+
raise
|
|
1133
|
+
else:
|
|
1134
|
+
return True
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
def sizefmt(num: Union[int, float, None], human: bool = True) -> str:
|
|
1138
|
+
"""
|
|
1139
|
+
Print human readable file sizes
|
|
1140
|
+
"""
|
|
1141
|
+
if num is None:
|
|
1142
|
+
return '0.0 B'
|
|
1143
|
+
try:
|
|
1144
|
+
num = int(num)
|
|
1145
|
+
if human:
|
|
1146
|
+
for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']:
|
|
1147
|
+
if abs(num) < 1000.0:
|
|
1148
|
+
return "%3.3f %sB" % (num, unit)
|
|
1149
|
+
num /= 1000.0
|
|
1150
|
+
return "%.1f %sB" % (num, 'Y')
|
|
1151
|
+
else:
|
|
1152
|
+
return str(num)
|
|
1153
|
+
except OverflowError:
|
|
1154
|
+
return 'Inf'
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def get_tmp_dir() -> str:
|
|
1158
|
+
"""
|
|
1159
|
+
Get a path where to store temporary files.
|
|
1160
|
+
|
|
1161
|
+
Rucio searches a standard list of temporary directories. The list is:
|
|
1162
|
+
|
|
1163
|
+
The directory named by the TMP environment variable.
|
|
1164
|
+
The directory named by the TMPDIR environment variable.
|
|
1165
|
+
The directory named by the TEMP environment variable.
|
|
1166
|
+
|
|
1167
|
+
As a last resort, the /tmp/ directory.
|
|
1168
|
+
|
|
1169
|
+
:return: A path.
|
|
1170
|
+
"""
|
|
1171
|
+
base_dir = os.path.abspath(tempfile.gettempdir())
|
|
1172
|
+
try:
|
|
1173
|
+
return os.path.join(base_dir, getpass.getuser())
|
|
1174
|
+
except Exception:
|
|
1175
|
+
pass
|
|
1176
|
+
|
|
1177
|
+
try:
|
|
1178
|
+
return os.path.join(base_dir, str(os.getuid()))
|
|
1179
|
+
except Exception:
|
|
1180
|
+
pass
|
|
1181
|
+
|
|
1182
|
+
return base_dir
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
def is_archive(name: str) -> bool:
|
|
1186
|
+
'''
|
|
1187
|
+
Check if a file name is an archive file or not.
|
|
1188
|
+
|
|
1189
|
+
:return: A boolean.
|
|
1190
|
+
'''
|
|
1191
|
+
regexp = r'^.*\.(zip|zipx|tar.gz|tgz|tar.Z|tar.bz2|tbz2)(\.\d+)*$'
|
|
1192
|
+
if re.match(regexp, name, re.I):
|
|
1193
|
+
return True
|
|
1194
|
+
return False
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
class Color:
|
|
1198
|
+
PURPLE = '\033[95m'
|
|
1199
|
+
CYAN = '\033[96m'
|
|
1200
|
+
DARKCYAN = '\033[36m'
|
|
1201
|
+
BLUE = '\033[94m'
|
|
1202
|
+
GREEN = '\033[92m'
|
|
1203
|
+
YELLOW = '\033[93m'
|
|
1204
|
+
RED = '\033[91m'
|
|
1205
|
+
BOLD = '\033[1m'
|
|
1206
|
+
UNDERLINE = '\033[4m'
|
|
1207
|
+
END = '\033[0m'
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def resolve_ips(hostname: str) -> list[str]:
|
|
1211
|
+
try:
|
|
1212
|
+
ipaddress.ip_address(hostname)
|
|
1213
|
+
return [hostname]
|
|
1214
|
+
except ValueError:
|
|
1215
|
+
pass
|
|
1216
|
+
try:
|
|
1217
|
+
addrinfo = socket.getaddrinfo(hostname, 0, socket.AF_INET, 0, socket.IPPROTO_TCP)
|
|
1218
|
+
return [ai[4][0] for ai in addrinfo]
|
|
1219
|
+
except socket.gaierror:
|
|
1220
|
+
pass
|
|
1221
|
+
return []
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
def resolve_ip(hostname: str) -> str:
|
|
1225
|
+
ips = resolve_ips(hostname)
|
|
1226
|
+
if ips:
|
|
1227
|
+
return ips[0]
|
|
1228
|
+
return hostname
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def detect_client_location() -> "IPDict":
|
|
1232
|
+
"""
|
|
1233
|
+
Normally client IP will be set on the server side (request.remote_addr)
|
|
1234
|
+
Here setting ip on the one seen by the host itself. There is no connection
|
|
1235
|
+
to Google DNS servers.
|
|
1236
|
+
Try to determine the sitename automatically from common environment variables,
|
|
1237
|
+
in this order: SITE_NAME, ATLAS_SITE_NAME, OSG_SITE_NAME. If none of these exist
|
|
1238
|
+
use the fixed string 'ROAMING'.
|
|
1239
|
+
|
|
1240
|
+
If environment variables sets location, it uses it.
|
|
1241
|
+
"""
|
|
1242
|
+
|
|
1243
|
+
ip = None
|
|
1244
|
+
|
|
1245
|
+
try:
|
|
1246
|
+
with socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) as s:
|
|
1247
|
+
s.connect(("2001:4860:4860:0:0:0:0:8888", 80))
|
|
1248
|
+
ip = s.getsockname()[0]
|
|
1249
|
+
except Exception:
|
|
1250
|
+
pass
|
|
1251
|
+
|
|
1252
|
+
if not ip:
|
|
1253
|
+
try:
|
|
1254
|
+
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
|
1255
|
+
s.connect(("8.8.8.8", 80))
|
|
1256
|
+
ip = s.getsockname()[0]
|
|
1257
|
+
except Exception:
|
|
1258
|
+
pass
|
|
1259
|
+
|
|
1260
|
+
if not ip:
|
|
1261
|
+
ip = '0.0.0.0' # noqa: S104
|
|
1262
|
+
|
|
1263
|
+
site = os.environ.get('SITE_NAME',
|
|
1264
|
+
os.environ.get('ATLAS_SITE_NAME',
|
|
1265
|
+
os.environ.get('OSG_SITE_NAME',
|
|
1266
|
+
'ROAMING')))
|
|
1267
|
+
|
|
1268
|
+
latitude = os.environ.get('RUCIO_LATITUDE')
|
|
1269
|
+
longitude = os.environ.get('RUCIO_LONGITUDE')
|
|
1270
|
+
if latitude and longitude:
|
|
1271
|
+
try:
|
|
1272
|
+
latitude = float(latitude)
|
|
1273
|
+
longitude = float(longitude)
|
|
1274
|
+
except ValueError:
|
|
1275
|
+
latitude = longitude = 0
|
|
1276
|
+
print('Client set latitude and longitude are not valid.')
|
|
1277
|
+
else:
|
|
1278
|
+
latitude = longitude = None
|
|
1279
|
+
|
|
1280
|
+
return {'ip': ip,
|
|
1281
|
+
'fqdn': socket.getfqdn(),
|
|
1282
|
+
'site': site,
|
|
1283
|
+
'latitude': latitude,
|
|
1284
|
+
'longitude': longitude}
|
|
1285
|
+
|
|
1286
|
+
|
|
1287
|
+
def ssh_sign(private_key: str, message: str) -> str:
|
|
1288
|
+
"""
|
|
1289
|
+
Sign a string message using the private key.
|
|
1290
|
+
|
|
1291
|
+
:param private_key: The SSH RSA private key as a string.
|
|
1292
|
+
:param message: The message to sign as a string.
|
|
1293
|
+
:return: Base64 encoded signature as a string.
|
|
1294
|
+
"""
|
|
1295
|
+
encoded_message = message.encode()
|
|
1296
|
+
if not EXTRA_MODULES['paramiko']:
|
|
1297
|
+
raise MissingModuleException('The paramiko module is not installed or faulty.')
|
|
1298
|
+
sio_private_key = StringIO(private_key)
|
|
1299
|
+
priv_k = RSAKey.from_private_key(sio_private_key)
|
|
1300
|
+
sio_private_key.close()
|
|
1301
|
+
signature_stream = priv_k.sign_ssh_data(encoded_message)
|
|
1302
|
+
signature_stream.rewind()
|
|
1303
|
+
base64_encoded = base64.b64encode(signature_stream.get_remainder())
|
|
1304
|
+
base64_encoded = base64_encoded.decode()
|
|
1305
|
+
return base64_encoded
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def make_valid_did(lfn_dict: dict[str, Any]) -> dict[str, Any]:
|
|
1309
|
+
"""
|
|
1310
|
+
When managing information about a LFN (such as in `rucio upload` or
|
|
1311
|
+
the RSE manager's upload), we add the `filename` attribute to record
|
|
1312
|
+
the name of the file on the local disk in addition to the remainder
|
|
1313
|
+
of the DID information.
|
|
1314
|
+
|
|
1315
|
+
This function will take that python dictionary, and strip out the
|
|
1316
|
+
additional `filename` key. If this is not done, then the dictionary
|
|
1317
|
+
will not pass the DID JSON schema validation.
|
|
1318
|
+
"""
|
|
1319
|
+
if 'filename' not in lfn_dict:
|
|
1320
|
+
return lfn_dict
|
|
1321
|
+
|
|
1322
|
+
lfn_copy = dict(lfn_dict)
|
|
1323
|
+
lfn_copy['name'] = lfn_copy.get('name', lfn_copy['filename'])
|
|
1324
|
+
del lfn_copy['filename']
|
|
1325
|
+
return lfn_copy
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
def send_trace(trace: TraceDict, trace_endpoint: str, user_agent: str, retries: int = 5) -> int:
|
|
1329
|
+
"""
|
|
1330
|
+
Send the given trace to the trace endpoint
|
|
1331
|
+
|
|
1332
|
+
:param trace: the trace dictionary to send
|
|
1333
|
+
:param trace_endpoint: the endpoint where the trace should be send
|
|
1334
|
+
:param user_agent: the user agent sending the trace
|
|
1335
|
+
:param retries: the number of retries if sending fails
|
|
1336
|
+
:return: 0 on success, 1 on failure
|
|
1337
|
+
"""
|
|
1338
|
+
if user_agent.startswith('pilot'):
|
|
1339
|
+
return 0
|
|
1340
|
+
for dummy in range(retries):
|
|
1341
|
+
try:
|
|
1342
|
+
requests.post(trace_endpoint + '/traces/', verify=False, data=json.dumps(trace))
|
|
1343
|
+
return 0
|
|
1344
|
+
except Exception:
|
|
1345
|
+
pass
|
|
1346
|
+
return 1
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def add_url_query(url: str, query: dict[str, str]) -> str:
|
|
1350
|
+
"""
|
|
1351
|
+
Add a new dictionary to URL parameters
|
|
1352
|
+
|
|
1353
|
+
:param url: The existing URL
|
|
1354
|
+
:param query: A dictionary containing key/value pairs to be added to the URL
|
|
1355
|
+
:return: The expanded URL with the new query parameters
|
|
1356
|
+
"""
|
|
1357
|
+
|
|
1358
|
+
url_parts = list(urlparse(url))
|
|
1359
|
+
mod_query = dict(parse_qsl(url_parts[4]))
|
|
1360
|
+
mod_query.update(query)
|
|
1361
|
+
url_parts[4] = urlencode(mod_query)
|
|
1362
|
+
return urlunparse(url_parts)
|
|
1363
|
+
|
|
1364
|
+
|
|
1365
|
+
def get_bytes_value_from_string(input_string: str) -> Union[bool, int]:
|
|
1366
|
+
"""
|
|
1367
|
+
Get bytes from a string that represents a storage value and unit
|
|
1368
|
+
|
|
1369
|
+
:param input_string: String containing a value and an unit
|
|
1370
|
+
:return: Integer value representing the value in bytes
|
|
1371
|
+
"""
|
|
1372
|
+
result = re.findall('^([0-9]+)([A-Za-z]+)$', input_string)
|
|
1373
|
+
if result:
|
|
1374
|
+
value = int(result[0][0])
|
|
1375
|
+
unit = result[0][1].lower()
|
|
1376
|
+
if unit == 'b':
|
|
1377
|
+
value = value
|
|
1378
|
+
elif unit == 'kb':
|
|
1379
|
+
value = value * 1000
|
|
1380
|
+
elif unit == 'mb':
|
|
1381
|
+
value = value * 1000000
|
|
1382
|
+
elif unit == 'gb':
|
|
1383
|
+
value = value * 1000000000
|
|
1384
|
+
elif unit == 'tb':
|
|
1385
|
+
value = value * 1000000000000
|
|
1386
|
+
elif unit == 'pb':
|
|
1387
|
+
value = value * 1000000000000000
|
|
1388
|
+
else:
|
|
1389
|
+
return False
|
|
1390
|
+
return value
|
|
1391
|
+
else:
|
|
1392
|
+
return False
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def parse_did_filter_from_string(input_string: str) -> tuple[dict[str, Any], str]:
|
|
1396
|
+
"""
|
|
1397
|
+
Parse DID filter options in format 'length<3,type=all' from string.
|
|
1398
|
+
|
|
1399
|
+
:param input_string: String containing the filter options.
|
|
1400
|
+
:return: filter dictionary and type as string.
|
|
1401
|
+
"""
|
|
1402
|
+
filters = {}
|
|
1403
|
+
type_ = 'collection'
|
|
1404
|
+
if input_string:
|
|
1405
|
+
filter_options = input_string.replace(' ', '').split(',')
|
|
1406
|
+
for option in filter_options:
|
|
1407
|
+
value = None
|
|
1408
|
+
key = None
|
|
1409
|
+
|
|
1410
|
+
if '>=' in option:
|
|
1411
|
+
key, value = option.split('>=')
|
|
1412
|
+
if key == 'length':
|
|
1413
|
+
key = 'length.gte'
|
|
1414
|
+
elif '>' in option:
|
|
1415
|
+
key, value = option.split('>')
|
|
1416
|
+
if key == 'length':
|
|
1417
|
+
key = 'length.gt'
|
|
1418
|
+
elif '<=' in option:
|
|
1419
|
+
key, value = option.split('<=')
|
|
1420
|
+
if key == 'length':
|
|
1421
|
+
key = 'length.lte'
|
|
1422
|
+
elif '<' in option:
|
|
1423
|
+
key, value = option.split('<')
|
|
1424
|
+
if key == 'length':
|
|
1425
|
+
key = 'length.lt'
|
|
1426
|
+
elif '=' in option:
|
|
1427
|
+
key, value = option.split('=')
|
|
1428
|
+
if key == 'created_after' or key == 'created_before':
|
|
1429
|
+
value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ')
|
|
1430
|
+
|
|
1431
|
+
if key == 'type':
|
|
1432
|
+
if value.upper() in ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']: # type: ignore
|
|
1433
|
+
type_ = value.lower() # type: ignore
|
|
1434
|
+
else:
|
|
1435
|
+
raise InvalidType('{0} is not a valid type. Valid types are {1}'.format(value, ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']))
|
|
1436
|
+
elif key in ('length.gt', 'length.lt', 'length.gte', 'length.lte', 'length'):
|
|
1437
|
+
try:
|
|
1438
|
+
value = int(value) # type: ignore
|
|
1439
|
+
filters[key] = value
|
|
1440
|
+
except ValueError:
|
|
1441
|
+
raise ValueError('Length has to be an integer value.')
|
|
1442
|
+
filters[key] = value
|
|
1443
|
+
elif isinstance(value, str):
|
|
1444
|
+
if value.lower() == 'true':
|
|
1445
|
+
value = '1'
|
|
1446
|
+
elif value.lower() == 'false':
|
|
1447
|
+
value = '0'
|
|
1448
|
+
filters[key] = value
|
|
1449
|
+
else:
|
|
1450
|
+
filters[key] = value
|
|
1451
|
+
|
|
1452
|
+
return filters, type_
|
|
1453
|
+
|
|
1454
|
+
|
|
1455
|
+
def parse_did_filter_from_string_fe(
|
|
1456
|
+
input_string: str,
|
|
1457
|
+
name: str = '*',
|
|
1458
|
+
type: str = 'collection',
|
|
1459
|
+
omit_name: bool = False
|
|
1460
|
+
) -> tuple[list[dict[str, Any]], str]:
|
|
1461
|
+
"""
|
|
1462
|
+
Parse DID filter string for the filter engine (fe).
|
|
1463
|
+
|
|
1464
|
+
Should adhere to the following conventions:
|
|
1465
|
+
- ';' represents the logical OR operator
|
|
1466
|
+
- ',' represents the logical AND operator
|
|
1467
|
+
- all operators belong to set of (<=, >=, ==, !=, >, <, =)
|
|
1468
|
+
- there should be no duplicate key+operator criteria.
|
|
1469
|
+
|
|
1470
|
+
One sided and compound inequalities are supported.
|
|
1471
|
+
|
|
1472
|
+
Sanity checking of input is left to the filter engine.
|
|
1473
|
+
|
|
1474
|
+
:param input_string: String containing the filter options.
|
|
1475
|
+
:param name: DID name.
|
|
1476
|
+
:param type: The type of the did: all(container, dataset, file), collection(dataset or container), dataset, container.
|
|
1477
|
+
:param omit_name: omit addition of name to filters.
|
|
1478
|
+
:return: list of dictionaries with each dictionary as a separate OR expression.
|
|
1479
|
+
"""
|
|
1480
|
+
# lookup table unifying all comprehended operators to a nominal suffix.
|
|
1481
|
+
# note that the order matters as the regex engine is eager, e.g. don't want to evaluate '<=' as '<' and '='.
|
|
1482
|
+
operators_suffix_LUT = OrderedDict({
|
|
1483
|
+
'<=': 'lte',
|
|
1484
|
+
'>=': 'gte',
|
|
1485
|
+
'==': '',
|
|
1486
|
+
'!=': 'ne',
|
|
1487
|
+
'>': 'gt',
|
|
1488
|
+
'<': 'lt',
|
|
1489
|
+
'=': ''
|
|
1490
|
+
})
|
|
1491
|
+
|
|
1492
|
+
# lookup table mapping operator opposites, used to reverse compound inequalities.
|
|
1493
|
+
operator_opposites_LUT = {
|
|
1494
|
+
'lt': 'gt',
|
|
1495
|
+
'lte': 'gte'
|
|
1496
|
+
}
|
|
1497
|
+
operator_opposites_LUT.update({op2: op1 for op1, op2 in operator_opposites_LUT.items()})
|
|
1498
|
+
|
|
1499
|
+
filters = []
|
|
1500
|
+
if input_string:
|
|
1501
|
+
or_groups = list(filter(None, input_string.split(';'))) # split <input_string> into OR clauses
|
|
1502
|
+
for or_group in or_groups:
|
|
1503
|
+
or_group = or_group.strip()
|
|
1504
|
+
and_groups = list(filter(None, or_group.split(','))) # split <or_group> into AND clauses
|
|
1505
|
+
and_group_filters = {}
|
|
1506
|
+
for and_group in and_groups:
|
|
1507
|
+
and_group = and_group.strip()
|
|
1508
|
+
# tokenise this AND clause using operators as delimiters.
|
|
1509
|
+
tokenisation_regex = "({})".format('|'.join(operators_suffix_LUT.keys()))
|
|
1510
|
+
and_group_split_by_operator = list(filter(None, re.split(tokenisation_regex, and_group)))
|
|
1511
|
+
if len(and_group_split_by_operator) == 3: # this is a one-sided inequality or expression
|
|
1512
|
+
key, operator, value = [token.strip() for token in and_group_split_by_operator]
|
|
1513
|
+
|
|
1514
|
+
# substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
|
|
1515
|
+
operator_mapped = operators_suffix_LUT.get(operator)
|
|
1516
|
+
|
|
1517
|
+
filter_key_full = key
|
|
1518
|
+
if operator_mapped is not None:
|
|
1519
|
+
if operator_mapped:
|
|
1520
|
+
filter_key_full = "{}.{}".format(key, operator_mapped)
|
|
1521
|
+
else:
|
|
1522
|
+
raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
|
|
1523
|
+
|
|
1524
|
+
if filter_key_full in and_group_filters:
|
|
1525
|
+
raise DuplicateCriteriaInDIDFilter(filter_key_full)
|
|
1526
|
+
else:
|
|
1527
|
+
and_group_filters[filter_key_full] = value
|
|
1528
|
+
elif len(and_group_split_by_operator) == 5: # this is a compound inequality
|
|
1529
|
+
value1, operator1, key, operator2, value2 = [token.strip() for token in and_group_split_by_operator]
|
|
1530
|
+
|
|
1531
|
+
# substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
|
|
1532
|
+
operator1_mapped = operator_opposites_LUT.get(operators_suffix_LUT.get(operator1))
|
|
1533
|
+
operator2_mapped = operators_suffix_LUT.get(operator2)
|
|
1534
|
+
|
|
1535
|
+
filter_key1_full = filter_key2_full = key
|
|
1536
|
+
if operator1_mapped is not None and operator2_mapped is not None:
|
|
1537
|
+
if operator1_mapped: # ignore '' operator (maps from equals)
|
|
1538
|
+
filter_key1_full = "{}.{}".format(key, operator1_mapped)
|
|
1539
|
+
if operator2_mapped: # ignore '' operator (maps from equals)
|
|
1540
|
+
filter_key2_full = "{}.{}".format(key, operator2_mapped)
|
|
1541
|
+
else:
|
|
1542
|
+
raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
|
|
1543
|
+
|
|
1544
|
+
if filter_key1_full in and_group_filters:
|
|
1545
|
+
raise DuplicateCriteriaInDIDFilter(filter_key1_full)
|
|
1546
|
+
else:
|
|
1547
|
+
and_group_filters[filter_key1_full] = value1
|
|
1548
|
+
if filter_key2_full in and_group_filters:
|
|
1549
|
+
raise DuplicateCriteriaInDIDFilter(filter_key2_full)
|
|
1550
|
+
else:
|
|
1551
|
+
and_group_filters[filter_key2_full] = value2
|
|
1552
|
+
else:
|
|
1553
|
+
raise DIDFilterSyntaxError(and_group)
|
|
1554
|
+
|
|
1555
|
+
# add name key to each AND clause if it hasn't already been populated from the filter and <omit_name> not set.
|
|
1556
|
+
if not omit_name and 'name' not in and_group_filters:
|
|
1557
|
+
and_group_filters['name'] = name
|
|
1558
|
+
|
|
1559
|
+
filters.append(and_group_filters)
|
|
1560
|
+
else:
|
|
1561
|
+
if not omit_name:
|
|
1562
|
+
filters.append({
|
|
1563
|
+
'name': name
|
|
1564
|
+
})
|
|
1565
|
+
return filters, type
|
|
1566
|
+
|
|
1567
|
+
|
|
1568
|
+
def parse_replicas_from_file(path: "FileDescriptorOrPath") -> Any:
|
|
1569
|
+
"""
|
|
1570
|
+
Parses the output of list_replicas from a json or metalink file
|
|
1571
|
+
into a dictionary. Metalink parsing is tried first and if it fails
|
|
1572
|
+
it tries to parse json.
|
|
1573
|
+
|
|
1574
|
+
:param path: the path to the input file
|
|
1575
|
+
|
|
1576
|
+
:returns: a list with a dictionary for each file
|
|
1577
|
+
"""
|
|
1578
|
+
with open(path) as fp:
|
|
1579
|
+
try:
|
|
1580
|
+
root = ElementTree.parse(fp).getroot() # noqa: S314
|
|
1581
|
+
return parse_replicas_metalink(root)
|
|
1582
|
+
except ElementTree.ParseError as xml_err:
|
|
1583
|
+
try:
|
|
1584
|
+
return json.load(fp)
|
|
1585
|
+
except ValueError as json_err:
|
|
1586
|
+
raise MetalinkJsonParsingError(path, xml_err, json_err)
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def parse_replicas_from_string(string: str) -> Any:
|
|
1590
|
+
"""
|
|
1591
|
+
Parses the output of list_replicas from a json or metalink string
|
|
1592
|
+
into a dictionary. Metalink parsing is tried first and if it fails
|
|
1593
|
+
it tries to parse json.
|
|
1594
|
+
|
|
1595
|
+
:param string: the string to parse
|
|
1596
|
+
|
|
1597
|
+
:returns: a list with a dictionary for each file
|
|
1598
|
+
"""
|
|
1599
|
+
try:
|
|
1600
|
+
root = ElementTree.fromstring(string) # noqa: S314
|
|
1601
|
+
return parse_replicas_metalink(root)
|
|
1602
|
+
except ElementTree.ParseError as xml_err:
|
|
1603
|
+
try:
|
|
1604
|
+
return json.loads(string)
|
|
1605
|
+
except ValueError as json_err:
|
|
1606
|
+
raise MetalinkJsonParsingError(string, xml_err, json_err)
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
def parse_replicas_metalink(root: ElementTree.Element) -> list[dict[str, Any]]:
|
|
1610
|
+
"""
|
|
1611
|
+
Transforms the metalink tree into a list of dictionaries where
|
|
1612
|
+
each dictionary describes a file with its replicas.
|
|
1613
|
+
Will be called by parse_replicas_from_file and parse_replicas_from_string.
|
|
1614
|
+
|
|
1615
|
+
:param root: root node of the metalink tree
|
|
1616
|
+
|
|
1617
|
+
:returns: a list with a dictionary for each file
|
|
1618
|
+
"""
|
|
1619
|
+
files = []
|
|
1620
|
+
|
|
1621
|
+
# metalink namespace
|
|
1622
|
+
ns = '{urn:ietf:params:xml:ns:metalink}'
|
|
1623
|
+
str_to_bool = {'true': True, 'True': True, 'false': False, 'False': False}
|
|
1624
|
+
|
|
1625
|
+
# loop over all <file> tags of the metalink string
|
|
1626
|
+
for file_tag_obj in root.findall(ns + 'file'):
|
|
1627
|
+
# search for identity-tag
|
|
1628
|
+
identity_tag_obj = file_tag_obj.find(ns + 'identity')
|
|
1629
|
+
if not ElementTree.iselement(identity_tag_obj):
|
|
1630
|
+
raise InputValidationError('Failed to locate identity-tag inside %s' % ElementTree.tostring(file_tag_obj))
|
|
1631
|
+
|
|
1632
|
+
cur_file = {'did': identity_tag_obj.text,
|
|
1633
|
+
'adler32': None,
|
|
1634
|
+
'md5': None,
|
|
1635
|
+
'sources': []}
|
|
1636
|
+
|
|
1637
|
+
parent_dids = set()
|
|
1638
|
+
parent_dids_tag_obj = file_tag_obj.find(ns + 'parents')
|
|
1639
|
+
if ElementTree.iselement(parent_dids_tag_obj):
|
|
1640
|
+
for did_tag_obj in parent_dids_tag_obj.findall(ns + 'did'):
|
|
1641
|
+
parent_dids.add(did_tag_obj.text)
|
|
1642
|
+
cur_file['parent_dids'] = parent_dids
|
|
1643
|
+
|
|
1644
|
+
size_tag_obj = file_tag_obj.find(ns + 'size')
|
|
1645
|
+
cur_file['bytes'] = int(size_tag_obj.text) if ElementTree.iselement(size_tag_obj) else None
|
|
1646
|
+
|
|
1647
|
+
for hash_tag_obj in file_tag_obj.findall(ns + 'hash'):
|
|
1648
|
+
hash_type = hash_tag_obj.get('type')
|
|
1649
|
+
if hash_type:
|
|
1650
|
+
cur_file[hash_type] = hash_tag_obj.text
|
|
1651
|
+
|
|
1652
|
+
for url_tag_obj in file_tag_obj.findall(ns + 'url'):
|
|
1653
|
+
key_rename_map = {'location': 'rse'}
|
|
1654
|
+
src = {}
|
|
1655
|
+
for k, v in url_tag_obj.items():
|
|
1656
|
+
k = key_rename_map.get(k, k)
|
|
1657
|
+
src[k] = str_to_bool.get(v, v)
|
|
1658
|
+
src['pfn'] = url_tag_obj.text
|
|
1659
|
+
cur_file['sources'].append(src)
|
|
1660
|
+
|
|
1661
|
+
files.append(cur_file)
|
|
1662
|
+
|
|
1663
|
+
return files
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
def get_thread_with_periodic_running_function(
|
|
1667
|
+
interval: Union[int, float],
|
|
1668
|
+
action: Callable[..., Any],
|
|
1669
|
+
graceful_stop: threading.Event
|
|
1670
|
+
) -> threading.Thread:
|
|
1671
|
+
"""
|
|
1672
|
+
Get a thread where a function runs periodically.
|
|
1673
|
+
|
|
1674
|
+
:param interval: Interval in seconds when the action function should run.
|
|
1675
|
+
:param action: Function, that should run periodically.
|
|
1676
|
+
:param graceful_stop: Threading event used to check for graceful stop.
|
|
1677
|
+
"""
|
|
1678
|
+
def start():
|
|
1679
|
+
while not graceful_stop.is_set():
|
|
1680
|
+
starttime = time.time()
|
|
1681
|
+
action()
|
|
1682
|
+
time.sleep(interval - (time.time() - starttime))
|
|
1683
|
+
t = threading.Thread(target=start)
|
|
1684
|
+
return t
|
|
1685
|
+
|
|
1686
|
+
|
|
1687
|
+
def run_cmd_process(cmd: str, timeout: int = 3600) -> tuple[int, str]:
|
|
1688
|
+
"""
|
|
1689
|
+
shell command parser with timeout
|
|
1690
|
+
|
|
1691
|
+
:param cmd: shell command as a string
|
|
1692
|
+
:param timeout: in seconds
|
|
1693
|
+
|
|
1694
|
+
:return: stdout xor stderr, and errorcode
|
|
1695
|
+
"""
|
|
1696
|
+
|
|
1697
|
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, preexec_fn=os.setsid, universal_newlines=True)
|
|
1698
|
+
|
|
1699
|
+
try:
|
|
1700
|
+
stdout, stderr = process.communicate(timeout=timeout)
|
|
1701
|
+
except subprocess.TimeoutExpired:
|
|
1702
|
+
try:
|
|
1703
|
+
# Kill the whole process group since we're using shell=True.
|
|
1704
|
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
|
1705
|
+
stdout, stderr = process.communicate(timeout=3)
|
|
1706
|
+
except subprocess.TimeoutExpired:
|
|
1707
|
+
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
|
|
1708
|
+
stdout, stderr = process.communicate()
|
|
1709
|
+
|
|
1710
|
+
if not stderr:
|
|
1711
|
+
stderr = ''
|
|
1712
|
+
if not stdout:
|
|
1713
|
+
stdout = ''
|
|
1714
|
+
if stderr and stderr != '':
|
|
1715
|
+
stdout += " Error: " + stderr
|
|
1716
|
+
if process:
|
|
1717
|
+
returncode = process.returncode
|
|
1718
|
+
else:
|
|
1719
|
+
returncode = 1
|
|
1720
|
+
if returncode != 1 and 'Command time-out' in stdout:
|
|
1721
|
+
returncode = 1
|
|
1722
|
+
if returncode is None:
|
|
1723
|
+
returncode = 0
|
|
1724
|
+
|
|
1725
|
+
return returncode, stdout
|
|
1726
|
+
|
|
1727
|
+
|
|
1728
|
+
def gateway_update_return_dict(
|
|
1729
|
+
dictionary: dict[str, Any],
|
|
1730
|
+
session: Optional["Session"] = None
|
|
1731
|
+
) -> dict[str, Any]:
|
|
1732
|
+
"""
|
|
1733
|
+
Ensure that rse is in a dictionary returned from core
|
|
1734
|
+
|
|
1735
|
+
:param dictionary: The dictionary to edit
|
|
1736
|
+
:param session: The DB session to use
|
|
1737
|
+
:returns dictionary: The edited dictionary
|
|
1738
|
+
"""
|
|
1739
|
+
if not isinstance(dictionary, dict):
|
|
1740
|
+
return dictionary
|
|
1741
|
+
|
|
1742
|
+
copied = False # Avoid side effects from pass by object
|
|
1743
|
+
|
|
1744
|
+
for rse_str in ['rse', 'src_rse', 'source_rse', 'dest_rse', 'destination_rse']:
|
|
1745
|
+
rse_id_str = '%s_id' % rse_str
|
|
1746
|
+
if rse_id_str in dictionary.keys() and dictionary[rse_id_str] is not None:
|
|
1747
|
+
if rse_str not in dictionary.keys():
|
|
1748
|
+
if not copied:
|
|
1749
|
+
dictionary = dictionary.copy()
|
|
1750
|
+
copied = True
|
|
1751
|
+
import rucio.core.rse
|
|
1752
|
+
dictionary[rse_str] = rucio.core.rse.get_rse_name(rse_id=dictionary[rse_id_str], session=session)
|
|
1753
|
+
|
|
1754
|
+
if 'account' in dictionary.keys() and dictionary['account'] is not None:
|
|
1755
|
+
if not copied:
|
|
1756
|
+
dictionary = dictionary.copy()
|
|
1757
|
+
copied = True
|
|
1758
|
+
dictionary['account'] = dictionary['account'].external
|
|
1759
|
+
|
|
1760
|
+
if 'scope' in dictionary.keys() and dictionary['scope'] is not None:
|
|
1761
|
+
if not copied:
|
|
1762
|
+
dictionary = dictionary.copy()
|
|
1763
|
+
copied = True
|
|
1764
|
+
dictionary['scope'] = dictionary['scope'].external
|
|
1765
|
+
|
|
1766
|
+
return dictionary
|
|
1767
|
+
|
|
1768
|
+
|
|
1769
|
+
def setup_logger(
|
|
1770
|
+
module_name: Optional[str] = None,
|
|
1771
|
+
logger_name: Optional[str] = None,
|
|
1772
|
+
logger_level: Optional[int] = None,
|
|
1773
|
+
verbose: bool = False
|
|
1774
|
+
) -> logging.Logger:
|
|
1775
|
+
'''
|
|
1776
|
+
Factory method to set logger with handlers.
|
|
1777
|
+
:param module_name: __name__ of the module that is calling this method
|
|
1778
|
+
:param logger_name: name of the logger, typically name of the module.
|
|
1779
|
+
:param logger_level: if not given, fetched from config.
|
|
1780
|
+
:param verbose: verbose option set in bin/rucio
|
|
1781
|
+
'''
|
|
1782
|
+
# helper method for cfg check
|
|
1783
|
+
def _force_cfg_log_level(cfg_option: str) -> bool:
|
|
1784
|
+
cfg_forced_modules = config_get('logging', cfg_option, raise_exception=False, default=None, clean_cached=True,
|
|
1785
|
+
check_config_table=False)
|
|
1786
|
+
if cfg_forced_modules and module_name is not None:
|
|
1787
|
+
if re.match(str(cfg_forced_modules), module_name):
|
|
1788
|
+
return True
|
|
1789
|
+
return False
|
|
1790
|
+
|
|
1791
|
+
# creating log
|
|
1792
|
+
if not logger_name:
|
|
1793
|
+
if not module_name:
|
|
1794
|
+
logger_name = 'usr'
|
|
1795
|
+
else:
|
|
1796
|
+
logger_name = module_name.split('.')[-1]
|
|
1797
|
+
logger = logging.getLogger(logger_name)
|
|
1798
|
+
|
|
1799
|
+
# extracting the log level
|
|
1800
|
+
if not logger_level:
|
|
1801
|
+
logger_level = logging.INFO
|
|
1802
|
+
if verbose:
|
|
1803
|
+
logger_level = logging.DEBUG
|
|
1804
|
+
|
|
1805
|
+
# overriding by the config
|
|
1806
|
+
cfg_levels = (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)
|
|
1807
|
+
for level in cfg_levels:
|
|
1808
|
+
cfg_opt = 'forceloglevel' + logging.getLevelName(level)
|
|
1809
|
+
if _force_cfg_log_level(cfg_opt):
|
|
1810
|
+
logger_level = level
|
|
1811
|
+
|
|
1812
|
+
# setting the log level
|
|
1813
|
+
logger.setLevel(logger_level)
|
|
1814
|
+
|
|
1815
|
+
# preferred logger handling
|
|
1816
|
+
def add_handler(logger: logging.Logger) -> None:
|
|
1817
|
+
hdlr = logging.StreamHandler()
|
|
1818
|
+
|
|
1819
|
+
def emit_decorator(fnc: Callable[..., Any]) -> Callable[..., Any]:
|
|
1820
|
+
def func(*args) -> Callable[..., Any]:
|
|
1821
|
+
if 'RUCIO_LOGGING_FORMAT' not in os.environ:
|
|
1822
|
+
levelno = args[0].levelno
|
|
1823
|
+
format_str = '%(asctime)s\t%(levelname)s\t%(message)s\033[0m'
|
|
1824
|
+
if levelno >= logging.CRITICAL:
|
|
1825
|
+
color = '\033[31;1m'
|
|
1826
|
+
elif levelno >= logging.ERROR:
|
|
1827
|
+
color = '\033[31;1m'
|
|
1828
|
+
elif levelno >= logging.WARNING:
|
|
1829
|
+
color = '\033[33;1m'
|
|
1830
|
+
elif levelno >= logging.INFO:
|
|
1831
|
+
color = '\033[32;1m'
|
|
1832
|
+
elif levelno >= logging.DEBUG:
|
|
1833
|
+
color = '\033[36;1m'
|
|
1834
|
+
format_str = '%(asctime)s\t%(levelname)s\t%(filename)s\t%(message)s\033[0m'
|
|
1835
|
+
else:
|
|
1836
|
+
color = '\033[0m'
|
|
1837
|
+
formatter = logging.Formatter('{0}{1}'.format(color, format_str))
|
|
1838
|
+
else:
|
|
1839
|
+
formatter = logging.Formatter(os.environ['RUCIO_LOGGING_FORMAT'])
|
|
1840
|
+
hdlr.setFormatter(formatter)
|
|
1841
|
+
return fnc(*args)
|
|
1842
|
+
return func
|
|
1843
|
+
hdlr.emit = emit_decorator(hdlr.emit)
|
|
1844
|
+
logger.addHandler(hdlr)
|
|
1845
|
+
|
|
1846
|
+
# setting handler and formatter
|
|
1847
|
+
if not logger.handlers:
|
|
1848
|
+
add_handler(logger)
|
|
1849
|
+
|
|
1850
|
+
return logger
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
def daemon_sleep(
|
|
1854
|
+
start_time: float,
|
|
1855
|
+
sleep_time: float,
|
|
1856
|
+
graceful_stop: threading.Event,
|
|
1857
|
+
logger: "LoggerFunction" = logging.log
|
|
1858
|
+
) -> None:
|
|
1859
|
+
"""Sleeps a daemon the time provided by sleep_time"""
|
|
1860
|
+
end_time = time.time()
|
|
1861
|
+
time_diff = end_time - start_time
|
|
1862
|
+
if time_diff < sleep_time:
|
|
1863
|
+
logger(logging.INFO, 'Sleeping for a while : %s seconds', (sleep_time - time_diff))
|
|
1864
|
+
graceful_stop.wait(sleep_time - time_diff)
|
|
1865
|
+
|
|
1866
|
+
|
|
1867
|
+
def is_client() -> bool:
|
|
1868
|
+
""""
|
|
1869
|
+
Checks if the function is called from a client or from a server/daemon
|
|
1870
|
+
|
|
1871
|
+
:returns client_mode: True if is called from a client, False if it is called from a server/daemon
|
|
1872
|
+
"""
|
|
1873
|
+
if 'RUCIO_CLIENT_MODE' not in os.environ:
|
|
1874
|
+
try:
|
|
1875
|
+
if config_has_section('database'):
|
|
1876
|
+
client_mode = False
|
|
1877
|
+
elif config_has_section('client'):
|
|
1878
|
+
client_mode = True
|
|
1879
|
+
else:
|
|
1880
|
+
client_mode = False
|
|
1881
|
+
except (RuntimeError, ConfigNotFound):
|
|
1882
|
+
# If no configuration file is found the default value should be True
|
|
1883
|
+
client_mode = True
|
|
1884
|
+
else:
|
|
1885
|
+
if os.environ['RUCIO_CLIENT_MODE']:
|
|
1886
|
+
client_mode = True
|
|
1887
|
+
else:
|
|
1888
|
+
client_mode = False
|
|
1889
|
+
|
|
1890
|
+
return client_mode
|
|
1891
|
+
|
|
1892
|
+
|
|
1893
|
+
class retry:
|
|
1894
|
+
"""Retry callable object with configuragle number of attempts"""
|
|
1895
|
+
|
|
1896
|
+
def __init__(self, func: Callable[..., Any], *args, **kwargs):
|
|
1897
|
+
'''
|
|
1898
|
+
:param func: a method that should be executed with retries
|
|
1899
|
+
:param args: parameters of the func
|
|
1900
|
+
:param kwargs: key word arguments of the func
|
|
1901
|
+
'''
|
|
1902
|
+
self.func, self.args, self.kwargs = func, args, kwargs
|
|
1903
|
+
|
|
1904
|
+
def __call__(self, mtries: int = 3, logger: "LoggerFunction" = logging.log) -> Callable[..., Any]:
|
|
1905
|
+
'''
|
|
1906
|
+
:param mtries: maximum number of attempts to execute the function
|
|
1907
|
+
:param logger: preferred logger
|
|
1908
|
+
'''
|
|
1909
|
+
attempt = mtries
|
|
1910
|
+
while attempt > 1:
|
|
1911
|
+
try:
|
|
1912
|
+
if logger:
|
|
1913
|
+
logger(logging.DEBUG, '{}: Attempt {}'.format(self.func.__name__, mtries - attempt + 1))
|
|
1914
|
+
return self.func(*self.args, **self.kwargs)
|
|
1915
|
+
except Exception as e:
|
|
1916
|
+
if logger:
|
|
1917
|
+
logger(logging.DEBUG, '{}: Attempt failed {}'.format(self.func.__name__, mtries - attempt + 1))
|
|
1918
|
+
logger(logging.DEBUG, str(e))
|
|
1919
|
+
attempt -= 1
|
|
1920
|
+
return self.func(*self.args, **self.kwargs)
|
|
1921
|
+
|
|
1922
|
+
|
|
1923
|
+
class StoreAndDeprecateWarningAction(argparse.Action):
|
|
1924
|
+
'''
|
|
1925
|
+
StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
|
|
1926
|
+
and represents a store action with a deprecated argument name.
|
|
1927
|
+
'''
|
|
1928
|
+
|
|
1929
|
+
def __init__(self,
|
|
1930
|
+
option_strings: Sequence[str],
|
|
1931
|
+
new_option_string: str,
|
|
1932
|
+
dest: str,
|
|
1933
|
+
**kwargs):
|
|
1934
|
+
"""
|
|
1935
|
+
:param option_strings: all possible argument name strings
|
|
1936
|
+
:param new_option_string: the new option string which replaces the old
|
|
1937
|
+
:param dest: name of variable to store the value in
|
|
1938
|
+
:param kwargs: everything else
|
|
1939
|
+
"""
|
|
1940
|
+
super(StoreAndDeprecateWarningAction, self).__init__(
|
|
1941
|
+
option_strings=option_strings,
|
|
1942
|
+
dest=dest,
|
|
1943
|
+
**kwargs)
|
|
1944
|
+
if new_option_string not in option_strings:
|
|
1945
|
+
raise ValueError("%s not supported as a string option." % new_option_string)
|
|
1946
|
+
self.new_option_string = new_option_string
|
|
1947
|
+
|
|
1948
|
+
def __call__(self, parser, namespace, values, option_string: Optional[str] = None):
|
|
1949
|
+
if option_string and option_string != self.new_option_string:
|
|
1950
|
+
# The logger gets typically initialized after the argument parser
|
|
1951
|
+
# to set the verbosity of the logger. Thus using simple print to console.
|
|
1952
|
+
print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
|
|
1953
|
+
|
|
1954
|
+
setattr(namespace, self.dest, values)
|
|
1955
|
+
|
|
1956
|
+
|
|
1957
|
+
class StoreTrueAndDeprecateWarningAction(argparse._StoreConstAction):
|
|
1958
|
+
'''
|
|
1959
|
+
StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
|
|
1960
|
+
and represents a store action with a deprecated argument name.
|
|
1961
|
+
'''
|
|
1962
|
+
|
|
1963
|
+
def __init__(self,
|
|
1964
|
+
option_strings: Sequence[str],
|
|
1965
|
+
new_option_string: str,
|
|
1966
|
+
dest: str,
|
|
1967
|
+
default: bool = False,
|
|
1968
|
+
required: bool = False,
|
|
1969
|
+
help: Optional[str] = None):
|
|
1970
|
+
"""
|
|
1971
|
+
:param option_strings: all possible argument name strings
|
|
1972
|
+
:param new_option_string: the new option string which replaces the old
|
|
1973
|
+
:param dest: name of variable to store the value in
|
|
1974
|
+
:param kwargs: everything else
|
|
1975
|
+
"""
|
|
1976
|
+
super(StoreTrueAndDeprecateWarningAction, self).__init__(
|
|
1977
|
+
option_strings=option_strings,
|
|
1978
|
+
dest=dest,
|
|
1979
|
+
const=True,
|
|
1980
|
+
default=default,
|
|
1981
|
+
required=required,
|
|
1982
|
+
help=help)
|
|
1983
|
+
if new_option_string not in option_strings:
|
|
1984
|
+
raise ValueError("%s not supported as a string option." % new_option_string)
|
|
1985
|
+
self.new_option_string = new_option_string
|
|
1986
|
+
|
|
1987
|
+
def __call__(self, parser, namespace, values, option_string: Optional[str] = None):
|
|
1988
|
+
super(StoreTrueAndDeprecateWarningAction, self).__call__(parser, namespace, values, option_string=option_string)
|
|
1989
|
+
if option_string and option_string != self.new_option_string:
|
|
1990
|
+
# The logger gets typically initialized after the argument parser
|
|
1991
|
+
# to set the verbosity of the logger. Thus using simple print to console.
|
|
1992
|
+
print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
|
|
1993
|
+
|
|
1994
|
+
|
|
1995
|
+
class PriorityQueue:
|
|
1996
|
+
"""
|
|
1997
|
+
Heap-based [1] priority queue which supports priority update operations
|
|
1998
|
+
|
|
1999
|
+
It is used as a dictionary: pq['element'] = priority
|
|
2000
|
+
The element with the highest priority can be accessed with pq.top() or pq.pop(),
|
|
2001
|
+
depending on the desire to keep it in the heap or not.
|
|
2002
|
+
|
|
2003
|
+
[1] https://en.wikipedia.org/wiki/Heap_(data_structure)
|
|
2004
|
+
"""
|
|
2005
|
+
class ContainerSlot:
|
|
2006
|
+
def __init__(self, position: int, priority: int):
|
|
2007
|
+
self.pos = position
|
|
2008
|
+
self.prio = priority
|
|
2009
|
+
|
|
2010
|
+
def __init__(self):
|
|
2011
|
+
self.heap = []
|
|
2012
|
+
self.container = {}
|
|
2013
|
+
|
|
2014
|
+
def __len__(self):
|
|
2015
|
+
return len(self.heap)
|
|
2016
|
+
|
|
2017
|
+
def __getitem__(self, item):
|
|
2018
|
+
return self.container[item].prio
|
|
2019
|
+
|
|
2020
|
+
def __setitem__(self, key, value):
|
|
2021
|
+
if key in self.container:
|
|
2022
|
+
existing_prio = self.container[key].prio
|
|
2023
|
+
self.container[key].prio = value
|
|
2024
|
+
if value < existing_prio:
|
|
2025
|
+
self._priority_decreased(key)
|
|
2026
|
+
elif existing_prio < value:
|
|
2027
|
+
self._priority_increased(key)
|
|
2028
|
+
else:
|
|
2029
|
+
self.heap.append(key)
|
|
2030
|
+
self.container[key] = self.ContainerSlot(position=len(self.heap) - 1, priority=value)
|
|
2031
|
+
self._priority_decreased(key)
|
|
2032
|
+
|
|
2033
|
+
def __contains__(self, item):
|
|
2034
|
+
return item in self.container
|
|
2035
|
+
|
|
2036
|
+
def top(self):
|
|
2037
|
+
return self.heap[0]
|
|
2038
|
+
|
|
2039
|
+
def pop(self):
|
|
2040
|
+
item = self.heap[0]
|
|
2041
|
+
self.container.pop(item)
|
|
2042
|
+
|
|
2043
|
+
tmp_item = self.heap.pop()
|
|
2044
|
+
if self.heap:
|
|
2045
|
+
self.heap[0] = tmp_item
|
|
2046
|
+
self.container[tmp_item].pos = 0
|
|
2047
|
+
self._priority_increased(tmp_item)
|
|
2048
|
+
return item
|
|
2049
|
+
|
|
2050
|
+
def _priority_decreased(self, item):
|
|
2051
|
+
heap_changed = False
|
|
2052
|
+
|
|
2053
|
+
pos = self.container[item].pos
|
|
2054
|
+
pos_parent = (pos - 1) // 2
|
|
2055
|
+
while pos > 0 and self.container[self.heap[pos]].prio < self.container[self.heap[pos_parent]].prio:
|
|
2056
|
+
tmp_item, parent = self.heap[pos], self.heap[pos_parent] = self.heap[pos_parent], self.heap[pos]
|
|
2057
|
+
self.container[tmp_item].pos, self.container[parent].pos = self.container[parent].pos, self.container[tmp_item].pos
|
|
2058
|
+
|
|
2059
|
+
pos = pos_parent
|
|
2060
|
+
pos_parent = (pos - 1) // 2
|
|
2061
|
+
|
|
2062
|
+
heap_changed = True
|
|
2063
|
+
return heap_changed
|
|
2064
|
+
|
|
2065
|
+
def _priority_increased(self, item):
|
|
2066
|
+
heap_changed = False
|
|
2067
|
+
heap_len = len(self.heap)
|
|
2068
|
+
pos = self.container[item].pos
|
|
2069
|
+
pos_child1 = 2 * pos + 1
|
|
2070
|
+
pos_child2 = 2 * pos + 2
|
|
2071
|
+
|
|
2072
|
+
heap_restored = False
|
|
2073
|
+
while not heap_restored:
|
|
2074
|
+
# find minimum between item, child1, and child2
|
|
2075
|
+
if pos_child1 < heap_len and self.container[self.heap[pos_child1]].prio < self.container[self.heap[pos]].prio:
|
|
2076
|
+
pos_min = pos_child1
|
|
2077
|
+
else:
|
|
2078
|
+
pos_min = pos
|
|
2079
|
+
if pos_child2 < heap_len and self.container[self.heap[pos_child2]].prio < self.container[self.heap[pos_min]].prio:
|
|
2080
|
+
pos_min = pos_child2
|
|
2081
|
+
|
|
2082
|
+
if pos_min != pos:
|
|
2083
|
+
_, tmp_item = self.heap[pos_min], self.heap[pos] = self.heap[pos], self.heap[pos_min]
|
|
2084
|
+
self.container[tmp_item].pos = pos
|
|
2085
|
+
|
|
2086
|
+
pos = pos_min
|
|
2087
|
+
pos_child1 = 2 * pos + 1
|
|
2088
|
+
pos_child2 = 2 * pos + 2
|
|
2089
|
+
|
|
2090
|
+
heap_changed = True
|
|
2091
|
+
else:
|
|
2092
|
+
heap_restored = True
|
|
2093
|
+
|
|
2094
|
+
self.container[self.heap[pos]].pos = pos
|
|
2095
|
+
return heap_changed
|
|
2096
|
+
|
|
2097
|
+
|
|
2098
|
+
def check_policy_package_version(package: str) -> None:
|
|
2099
|
+
import importlib
|
|
2100
|
+
|
|
2101
|
+
from rucio.version import version_string
|
|
2102
|
+
'''
|
|
2103
|
+
Checks that the Rucio version supported by the policy package is compatible
|
|
2104
|
+
with this version. Raises an exception if not.
|
|
2105
|
+
:param package: the fully qualified name of the policy package
|
|
2106
|
+
'''
|
|
2107
|
+
try:
|
|
2108
|
+
module = importlib.import_module(package)
|
|
2109
|
+
except ImportError:
|
|
2110
|
+
# package not found. Will be picked up elsewhere
|
|
2111
|
+
return
|
|
2112
|
+
if not hasattr(module, 'SUPPORTED_VERSION'):
|
|
2113
|
+
# package is not versioned
|
|
2114
|
+
return
|
|
2115
|
+
supported_version = module.SUPPORTED_VERSION if isinstance(module.SUPPORTED_VERSION, list) else [module.SUPPORTED_VERSION]
|
|
2116
|
+
components = 2 if version_string().startswith("1.") else 1
|
|
2117
|
+
current_version = ".".join(version_string().split(".")[:components])
|
|
2118
|
+
if current_version not in supported_version:
|
|
2119
|
+
raise PolicyPackageVersionError(package)
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
class Availability:
|
|
2123
|
+
"""
|
|
2124
|
+
This util class acts as a translator between the availability stored as
|
|
2125
|
+
integer and as boolean values.
|
|
2126
|
+
|
|
2127
|
+
`None` represents a missing value. This lets a user update a specific value
|
|
2128
|
+
without altering the other ones. If it needs to be evaluated, it will
|
|
2129
|
+
correspond to `True`.
|
|
2130
|
+
"""
|
|
2131
|
+
|
|
2132
|
+
read = None
|
|
2133
|
+
write = None
|
|
2134
|
+
delete = None
|
|
2135
|
+
|
|
2136
|
+
def __init__(
|
|
2137
|
+
self,
|
|
2138
|
+
read: Optional[bool] = None,
|
|
2139
|
+
write: Optional[bool] = None,
|
|
2140
|
+
delete: Optional[bool] = None
|
|
2141
|
+
):
|
|
2142
|
+
self.read = read
|
|
2143
|
+
self.write = write
|
|
2144
|
+
self.delete = delete
|
|
2145
|
+
|
|
2146
|
+
def __iter__(self):
|
|
2147
|
+
"""
|
|
2148
|
+
The iterator provides the feature to unpack the values of this class.
|
|
2149
|
+
|
|
2150
|
+
e.g. `read, write, delete = Availability(True, False, True)`
|
|
2151
|
+
|
|
2152
|
+
:returns: An iterator over the values `read`, `write`, `delete`.
|
|
2153
|
+
"""
|
|
2154
|
+
return iter((self.read, self.write, self.delete))
|
|
2155
|
+
|
|
2156
|
+
def __repr__(self):
|
|
2157
|
+
return "Availability({}, {}, {})".format(self.read, self.write, self.delete)
|
|
2158
|
+
|
|
2159
|
+
def __eq__(self, other):
|
|
2160
|
+
return self.read == other.read and self.write == other.write and self.delete == other.delete
|
|
2161
|
+
|
|
2162
|
+
def __hash__(self):
|
|
2163
|
+
return hash(self.integer)
|
|
2164
|
+
|
|
2165
|
+
@classmethod
|
|
2166
|
+
def from_integer(cls, n):
|
|
2167
|
+
"""
|
|
2168
|
+
Returns a new Availability instance where the values are set to the
|
|
2169
|
+
corresponding bit values in the integer.
|
|
2170
|
+
|
|
2171
|
+
:param n: The integer value to get the availabilities from.
|
|
2172
|
+
:returns: The corresponding Availability instance.
|
|
2173
|
+
"""
|
|
2174
|
+
if n is None:
|
|
2175
|
+
return cls(None, None, None)
|
|
2176
|
+
|
|
2177
|
+
return cls(
|
|
2178
|
+
(n >> 2) % 2 == 1,
|
|
2179
|
+
(n >> 1) % 2 == 1,
|
|
2180
|
+
(n >> 0) % 2 == 1
|
|
2181
|
+
)
|
|
2182
|
+
|
|
2183
|
+
@property
|
|
2184
|
+
def integer(self):
|
|
2185
|
+
"""
|
|
2186
|
+
Returns the corresponding integer for the instance values. The three
|
|
2187
|
+
least-significant bits correspond to the availability values.
|
|
2188
|
+
|
|
2189
|
+
:returns: An integer corresponding to the availability values. `None`
|
|
2190
|
+
gets treated as `True`.
|
|
2191
|
+
"""
|
|
2192
|
+
read_value = (self.read or self.read is None) * 4
|
|
2193
|
+
write_value = (self.write or self.write is None) * 2
|
|
2194
|
+
delete_value = (self.delete or self.delete is None) * 1
|
|
2195
|
+
|
|
2196
|
+
return read_value + write_value + delete_value
|
|
2197
|
+
|
|
2198
|
+
|
|
2199
|
+
def retrying(
|
|
2200
|
+
retry_on_exception: "Callable[[Exception], bool]",
|
|
2201
|
+
wait_fixed: int,
|
|
2202
|
+
stop_max_attempt_number: int
|
|
2203
|
+
) -> "Callable[[Callable[..., T]], Callable[..., T]]":
|
|
2204
|
+
"""
|
|
2205
|
+
Decorator which retries a function multiple times on certain types of exceptions.
|
|
2206
|
+
:param retry_on_exception: Function which takes an exception as argument and returns True if we must retry on this exception
|
|
2207
|
+
:param wait_fixed: the amount of time to wait in-between two tries
|
|
2208
|
+
:param stop_max_attempt_number: maximum number of allowed attempts
|
|
2209
|
+
"""
|
|
2210
|
+
def _decorator(fn):
|
|
2211
|
+
@wraps(fn)
|
|
2212
|
+
def _wrapper(*args, **kwargs):
|
|
2213
|
+
attempt = 0
|
|
2214
|
+
while True:
|
|
2215
|
+
attempt += 1
|
|
2216
|
+
try:
|
|
2217
|
+
return fn(*args, **kwargs)
|
|
2218
|
+
except Exception as e:
|
|
2219
|
+
if attempt >= stop_max_attempt_number:
|
|
2220
|
+
raise
|
|
2221
|
+
if not retry_on_exception(e):
|
|
2222
|
+
raise
|
|
2223
|
+
time.sleep(wait_fixed / 1000.0)
|
|
2224
|
+
return _wrapper
|
|
2225
|
+
return _decorator
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
def deep_merge_dict(source: dict, destination: dict) -> dict:
|
|
2229
|
+
"""Merge two dictionaries together recursively"""
|
|
2230
|
+
for key, value in source.items():
|
|
2231
|
+
if isinstance(value, dict):
|
|
2232
|
+
# get node or create one
|
|
2233
|
+
node = destination.setdefault(key, {})
|
|
2234
|
+
deep_merge_dict(value, node)
|
|
2235
|
+
else:
|
|
2236
|
+
destination[key] = value
|
|
2237
|
+
|
|
2238
|
+
return destination
|