rucio-clients 32.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio-clients might be problematic. Click here for more details.

Files changed (88) hide show
  1. rucio/__init__.py +18 -0
  2. rucio/alembicrevision.py +16 -0
  3. rucio/client/__init__.py +16 -0
  4. rucio/client/accountclient.py +413 -0
  5. rucio/client/accountlimitclient.py +155 -0
  6. rucio/client/baseclient.py +929 -0
  7. rucio/client/client.py +77 -0
  8. rucio/client/configclient.py +113 -0
  9. rucio/client/credentialclient.py +54 -0
  10. rucio/client/didclient.py +691 -0
  11. rucio/client/diracclient.py +48 -0
  12. rucio/client/downloadclient.py +1674 -0
  13. rucio/client/exportclient.py +44 -0
  14. rucio/client/fileclient.py +51 -0
  15. rucio/client/importclient.py +42 -0
  16. rucio/client/lifetimeclient.py +74 -0
  17. rucio/client/lockclient.py +99 -0
  18. rucio/client/metaclient.py +137 -0
  19. rucio/client/pingclient.py +45 -0
  20. rucio/client/replicaclient.py +444 -0
  21. rucio/client/requestclient.py +109 -0
  22. rucio/client/rseclient.py +664 -0
  23. rucio/client/ruleclient.py +287 -0
  24. rucio/client/scopeclient.py +88 -0
  25. rucio/client/subscriptionclient.py +161 -0
  26. rucio/client/touchclient.py +78 -0
  27. rucio/client/uploadclient.py +871 -0
  28. rucio/common/__init__.py +14 -0
  29. rucio/common/cache.py +74 -0
  30. rucio/common/config.py +796 -0
  31. rucio/common/constants.py +92 -0
  32. rucio/common/constraints.py +18 -0
  33. rucio/common/didtype.py +187 -0
  34. rucio/common/exception.py +1092 -0
  35. rucio/common/extra.py +37 -0
  36. rucio/common/logging.py +404 -0
  37. rucio/common/pcache.py +1387 -0
  38. rucio/common/policy.py +84 -0
  39. rucio/common/schema/__init__.py +143 -0
  40. rucio/common/schema/atlas.py +411 -0
  41. rucio/common/schema/belleii.py +406 -0
  42. rucio/common/schema/cms.py +478 -0
  43. rucio/common/schema/domatpc.py +399 -0
  44. rucio/common/schema/escape.py +424 -0
  45. rucio/common/schema/generic.py +431 -0
  46. rucio/common/schema/generic_multi_vo.py +410 -0
  47. rucio/common/schema/icecube.py +404 -0
  48. rucio/common/schema/lsst.py +423 -0
  49. rucio/common/stomp_utils.py +160 -0
  50. rucio/common/stopwatch.py +56 -0
  51. rucio/common/test_rucio_server.py +148 -0
  52. rucio/common/types.py +158 -0
  53. rucio/common/utils.py +1946 -0
  54. rucio/rse/__init__.py +97 -0
  55. rucio/rse/protocols/__init__.py +14 -0
  56. rucio/rse/protocols/cache.py +123 -0
  57. rucio/rse/protocols/dummy.py +112 -0
  58. rucio/rse/protocols/gfal.py +701 -0
  59. rucio/rse/protocols/globus.py +243 -0
  60. rucio/rse/protocols/gsiftp.py +93 -0
  61. rucio/rse/protocols/http_cache.py +83 -0
  62. rucio/rse/protocols/mock.py +124 -0
  63. rucio/rse/protocols/ngarc.py +210 -0
  64. rucio/rse/protocols/posix.py +251 -0
  65. rucio/rse/protocols/protocol.py +530 -0
  66. rucio/rse/protocols/rclone.py +365 -0
  67. rucio/rse/protocols/rfio.py +137 -0
  68. rucio/rse/protocols/srm.py +339 -0
  69. rucio/rse/protocols/ssh.py +414 -0
  70. rucio/rse/protocols/storm.py +207 -0
  71. rucio/rse/protocols/webdav.py +547 -0
  72. rucio/rse/protocols/xrootd.py +295 -0
  73. rucio/rse/rsemanager.py +752 -0
  74. rucio/vcsversion.py +11 -0
  75. rucio/version.py +46 -0
  76. rucio_clients-32.8.6.data/data/etc/rse-accounts.cfg.template +25 -0
  77. rucio_clients-32.8.6.data/data/etc/rucio.cfg.atlas.client.template +42 -0
  78. rucio_clients-32.8.6.data/data/etc/rucio.cfg.template +257 -0
  79. rucio_clients-32.8.6.data/data/requirements.txt +55 -0
  80. rucio_clients-32.8.6.data/data/rucio_client/merge_rucio_configs.py +147 -0
  81. rucio_clients-32.8.6.data/scripts/rucio +2540 -0
  82. rucio_clients-32.8.6.data/scripts/rucio-admin +2434 -0
  83. rucio_clients-32.8.6.dist-info/METADATA +50 -0
  84. rucio_clients-32.8.6.dist-info/RECORD +88 -0
  85. rucio_clients-32.8.6.dist-info/WHEEL +5 -0
  86. rucio_clients-32.8.6.dist-info/licenses/AUTHORS.rst +94 -0
  87. rucio_clients-32.8.6.dist-info/licenses/LICENSE +201 -0
  88. rucio_clients-32.8.6.dist-info/top_level.txt +1 -0
rucio/common/utils.py ADDED
@@ -0,0 +1,1946 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import argparse
17
+ import base64
18
+ import datetime
19
+ import errno
20
+ import getpass
21
+ import hashlib
22
+ import io
23
+ import itertools
24
+ import json
25
+ import logging
26
+ import os
27
+ import os.path
28
+ import re
29
+ import signal
30
+ import socket
31
+ import subprocess
32
+ import tempfile
33
+ import threading
34
+ import time
35
+ from collections import OrderedDict
36
+ from configparser import NoOptionError, NoSectionError
37
+ from enum import Enum
38
+ from functools import partial, wraps
39
+ from io import StringIO
40
+ from itertools import zip_longest
41
+ from typing import TYPE_CHECKING
42
+ from urllib.parse import urlparse, urlencode, quote, parse_qsl, urlunparse
43
+ from uuid import uuid4 as uuid
44
+ from xml.etree import ElementTree
45
+
46
+ import mmap
47
+ import requests
48
+ import zlib
49
+
50
+ from rucio.common.config import config_get, config_has_section
51
+ from rucio.common.exception import MissingModuleException, InvalidType, InputValidationError, MetalinkJsonParsingError, RucioException, \
52
+ DuplicateCriteriaInDIDFilter, DIDFilterSyntaxError, InvalidAlgorithmName, PolicyPackageVersionError
53
+ from rucio.common.extra import import_extras
54
+ from rucio.common.types import InternalAccount, InternalScope
55
+
56
+ EXTRA_MODULES = import_extras(['paramiko'])
57
+
58
+ if EXTRA_MODULES['paramiko']:
59
+ try:
60
+ from paramiko import RSAKey
61
+ except Exception:
62
+ EXTRA_MODULES['paramiko'] = False
63
+
64
+ if TYPE_CHECKING:
65
+ from collections.abc import Callable
66
+ from typing import TypeVar
67
+
68
+ T = TypeVar('T')
69
+
70
+
71
+ # HTTP code dictionary. Not complete. Can be extended if needed.
72
+ codes = {
73
+ # Informational.
74
+ 200: '200 OK',
75
+ 201: '201 Created',
76
+ 202: '202 Accepted',
77
+
78
+ # Client Error.
79
+ 400: '400 Bad Request',
80
+ 401: '401 Unauthorized',
81
+ 403: '403 Forbidden',
82
+ 404: '404 Not Found',
83
+ 405: '405 Method Not Allowed',
84
+ 406: '406 Not Acceptable',
85
+ 408: '408 Request Timeout',
86
+ 409: '409 Conflict',
87
+ 410: '410 Gone',
88
+
89
+ # Server Error.
90
+ 500: '500 Internal Server Error',
91
+ 501: '501 Not Implemented',
92
+ 502: '502 Bad Gateway',
93
+ 503: '503 Service Unavailable',
94
+ 504: '504 Gateway Timeout'
95
+ }
96
+
97
+ # RFC 1123 (ex RFC 822)
98
+ DATE_FORMAT = '%a, %d %b %Y %H:%M:%S UTC'
99
+
100
+
101
+ def invert_dict(d):
102
+ """
103
+ Invert the dictionary.
104
+ CAUTION: this function is not deterministic unless the input dictionary is one-to-one mapping.
105
+
106
+ :param d: source dictionary
107
+ :returns: dictionary {value: key for key, value in d.items()}
108
+ """
109
+ return {value: key for key, value in d.items()}
110
+
111
+
112
+ def dids_as_dicts(did_list):
113
+ """
114
+ Converts list of DIDs to list of dictionaries
115
+ :param did_list: list of DIDs as either "scope:name" or {"scope":"scope", "name","name"}
116
+ :returns: list of dictionaries {"scope":"scope", "name","name"}
117
+ """
118
+ out = []
119
+ for did in did_list:
120
+ if isinstance(did, str):
121
+ scope, name = did.split(":", 1)
122
+ did = dict(scope=scope, name=name)
123
+ if isinstance(did, dict):
124
+ if not ("name" in did and "scope" in did):
125
+ raise ValueError("Scope or name missing in: %s" % (did,))
126
+ else:
127
+ raise ValueError("Can not convert item %s (%s) to a DID" % (did, type(did)))
128
+ out.append(did)
129
+ return out
130
+
131
+
132
+ def build_url(url, path=None, params=None, doseq=False):
133
+ """
134
+ utitily function to build an url for requests to the rucio system.
135
+
136
+ If the optional parameter doseq is evaluates to True, individual key=value pairs
137
+ separated by '&' are generated for each element of the value sequence for the key.
138
+ """
139
+ complete_url = url
140
+ if path is not None:
141
+ complete_url += "/" + path
142
+ if params is not None:
143
+ complete_url += "?"
144
+ if isinstance(params, str):
145
+ complete_url += quote(params)
146
+ else:
147
+ complete_url += urlencode(params, doseq=doseq)
148
+ return complete_url
149
+
150
+
151
+ def all_oidc_req_claims_present(scope, audience, required_scope, required_audience, sepatator=" "):
152
+ """
153
+ Checks if both of the following statements are true:
154
+ - all items in required_scope are present in scope string
155
+ - all items in required_audience are present in audience
156
+ returns false otherwise. audience and scope must be both strings
157
+ or both lists. Similarly for required_* variables.
158
+ If this condition is satisfied, False is returned.
159
+ :params scope: list of strings or one string where items are separated by a separator input variable
160
+ :params audience: list of strings or one string where items are separated by a separator input variable
161
+ :params required_scope: list of strings or one string where items are separated by a separator input variable
162
+ :params required_audience: list of strings or one string where items are separated by a separator input variable
163
+ :params sepatator: separator string, space by default
164
+ :returns : True or False
165
+ """
166
+ if not scope:
167
+ scope = ""
168
+ if not audience:
169
+ audience = ""
170
+ if not required_scope:
171
+ required_scope = ""
172
+ if not required_audience:
173
+ required_audience = ""
174
+ if (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, list) and isinstance(required_audience, list)):
175
+ scope = [str(it) for it in scope]
176
+ audience = [str(it) for it in audience]
177
+ required_scope = [str(it) for it in required_scope]
178
+ required_audience = [str(it) for it in required_audience]
179
+ req_scope_present = all(elem in scope for elem in required_scope)
180
+ req_audience_present = all(elem in audience for elem in required_audience)
181
+ return req_scope_present and req_audience_present
182
+ elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, str) and isinstance(required_audience, str)):
183
+ scope = str(scope)
184
+ audience = str(audience)
185
+ required_scope = str(required_scope)
186
+ required_audience = str(required_audience)
187
+ req_scope_present = all(elem in scope.split(sepatator) for elem in required_scope.split(sepatator))
188
+ req_audience_present = all(elem in audience.split(sepatator) for elem in required_audience.split(sepatator))
189
+ return req_scope_present and req_audience_present
190
+ elif (isinstance(scope, list) and isinstance(audience, list) and isinstance(required_scope, str) and isinstance(required_audience, str)):
191
+ scope = [str(it) for it in scope]
192
+ audience = [str(it) for it in audience]
193
+ required_scope = str(required_scope)
194
+ required_audience = str(required_audience)
195
+ req_scope_present = all(elem in scope for elem in required_scope.split(sepatator))
196
+ req_audience_present = all(elem in audience for elem in required_audience.split(sepatator))
197
+ return req_scope_present and req_audience_present
198
+ elif (isinstance(scope, str) and isinstance(audience, str) and isinstance(required_scope, list) and isinstance(required_audience, list)):
199
+ scope = str(scope)
200
+ audience = str(audience)
201
+ required_scope = [str(it) for it in required_scope]
202
+ required_audience = [str(it) for it in required_audience]
203
+ req_scope_present = all(elem in scope.split(sepatator) for elem in required_scope)
204
+ req_audience_present = all(elem in audience.split(sepatator) for elem in required_audience)
205
+ return req_scope_present and req_audience_present
206
+ else:
207
+ return False
208
+
209
+
210
+ def generate_uuid():
211
+ return str(uuid()).replace('-', '').lower()
212
+
213
+
214
+ def generate_uuid_bytes():
215
+ return uuid().bytes
216
+
217
+
218
+ # GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5', 'sha256', 'crc32']
219
+ GLOBALLY_SUPPORTED_CHECKSUMS = ['adler32', 'md5']
220
+ CHECKSUM_ALGO_DICT = {}
221
+ PREFERRED_CHECKSUM = GLOBALLY_SUPPORTED_CHECKSUMS[0]
222
+ CHECKSUM_KEY = 'supported_checksums'
223
+
224
+
225
+ def is_checksum_valid(checksum_name):
226
+ """
227
+ A simple function to check wether a checksum algorithm is supported.
228
+ Relies on GLOBALLY_SUPPORTED_CHECKSUMS to allow for expandability.
229
+
230
+ :param checksum_name: The name of the checksum to be verified.
231
+ :returns: True if checksum_name is in GLOBALLY_SUPPORTED_CHECKSUMS list, False otherwise.
232
+ """
233
+
234
+ return checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS
235
+
236
+
237
+ def set_preferred_checksum(checksum_name):
238
+ """
239
+ A simple function to check wether a checksum algorithm is supported.
240
+ Relies on GLOBALLY_SUPPORTED_CHECKSUMS to allow for expandability.
241
+
242
+ :param checksum_name: The name of the checksum to be verified.
243
+ :returns: True if checksum_name is in GLOBALLY_SUPPORTED_CHECKSUMS list, False otherwise.
244
+ """
245
+ if is_checksum_valid(checksum_name):
246
+ global PREFERRED_CHECKSUM
247
+ PREFERRED_CHECKSUM = checksum_name
248
+
249
+
250
+ def adler32(file):
251
+ """
252
+ An Adler-32 checksum is obtained by calculating two 16-bit checksums A and B
253
+ and concatenating their bits into a 32-bit integer. A is the sum of all bytes in the
254
+ stream plus one, and B is the sum of the individual values of A from each step.
255
+
256
+ :param file: file name
257
+ :returns: Hexified string, padded to 8 values.
258
+ """
259
+
260
+ # adler starting value is _not_ 0
261
+ adler = 1
262
+
263
+ can_mmap = False
264
+ # try:
265
+ # with open(file, 'r+b') as f:
266
+ # can_mmap = True
267
+ # except:
268
+ # pass
269
+
270
+ try:
271
+ # use mmap if possible
272
+ if can_mmap:
273
+ with open(file, 'r+b') as f:
274
+ m = mmap.mmap(f.fileno(), 0)
275
+ # partial block reads at slightly increased buffer sizes
276
+ for block in iter(partial(m.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
277
+ adler = zlib.adler32(block, adler)
278
+ else:
279
+ with open(file, 'rb') as f:
280
+ # partial block reads at slightly increased buffer sizes
281
+ for block in iter(partial(f.read, io.DEFAULT_BUFFER_SIZE * 8), b''):
282
+ adler = zlib.adler32(block, adler)
283
+
284
+ except Exception as e:
285
+ raise Exception('FATAL - could not get Adler-32 checksum of file %s: %s' % (file, e))
286
+
287
+ # backflip on 32bit -- can be removed once everything is fully migrated to 64bit
288
+ if adler < 0:
289
+ adler = adler + 2 ** 32
290
+
291
+ return str('%08x' % adler)
292
+
293
+
294
+ CHECKSUM_ALGO_DICT['adler32'] = adler32
295
+
296
+
297
+ def md5(file):
298
+ """
299
+ Runs the MD5 algorithm (RFC-1321) on the binary content of the file named file and returns the hexadecimal digest
300
+
301
+ :param file: file name
302
+ :returns: string of 32 hexadecimal digits
303
+ """
304
+ hash_md5 = hashlib.md5()
305
+ try:
306
+ with open(file, "rb") as f:
307
+ list(map(hash_md5.update, iter(lambda: f.read(4096), b"")))
308
+ except Exception as e:
309
+ raise Exception('FATAL - could not get MD5 checksum of file %s - %s' % (file, e))
310
+
311
+ return hash_md5.hexdigest()
312
+
313
+
314
+ CHECKSUM_ALGO_DICT['md5'] = md5
315
+
316
+
317
+ def sha256(file):
318
+ """
319
+ Runs the SHA256 algorithm on the binary content of the file named file and returns the hexadecimal digest
320
+
321
+ :param file: file name
322
+ :returns: string of 32 hexadecimal digits
323
+ """
324
+ with open(file, "rb") as f:
325
+ bytes_ = f.read() # read entire file as bytes
326
+ readable_hash = hashlib.sha256(bytes_).hexdigest()
327
+ print(readable_hash)
328
+ return readable_hash
329
+
330
+
331
+ CHECKSUM_ALGO_DICT['sha256'] = sha256
332
+
333
+
334
+ def crc32(file):
335
+ """
336
+ Runs the CRC32 algorithm on the binary content of the file named file and returns the hexadecimal digest
337
+
338
+ :param file: file name
339
+ :returns: string of 32 hexadecimal digits
340
+ """
341
+ prev = 0
342
+ for eachLine in open(file, "rb"):
343
+ prev = zlib.crc32(eachLine, prev)
344
+ return "%X" % (prev & 0xFFFFFFFF)
345
+
346
+
347
+ CHECKSUM_ALGO_DICT['crc32'] = crc32
348
+
349
+
350
+ def str_to_date(string):
351
+ """ Converts a RFC-1123 string to the corresponding datetime value.
352
+
353
+ :param string: the RFC-1123 string to convert to datetime value.
354
+ """
355
+ return datetime.datetime.strptime(string, DATE_FORMAT) if string else None
356
+
357
+
358
+ def val_to_space_sep_str(vallist):
359
+ """ Converts a list of values into a string of space separated values
360
+
361
+ :param vallist: the list of values to to convert into string
362
+ :return: the string of space separated values or the value initially passed as parameter
363
+ """
364
+ try:
365
+ if isinstance(vallist, list):
366
+ return str(" ".join(vallist))
367
+ else:
368
+ return str(vallist)
369
+ except:
370
+ return str('')
371
+
372
+
373
+ def date_to_str(date):
374
+ """ Converts a datetime value to the corresponding RFC-1123 string.
375
+
376
+ :param date: the datetime value to convert.
377
+ """
378
+ return datetime.datetime.strftime(date, DATE_FORMAT) if date else None
379
+
380
+
381
+ class APIEncoder(json.JSONEncoder):
382
+ """ Propretary JSONEconder subclass used by the json render function.
383
+ This is needed to address the encoding of special values.
384
+ """
385
+
386
+ def default(self, obj): # pylint: disable=E0202
387
+ if isinstance(obj, datetime.datetime):
388
+ # convert any datetime to RFC 1123 format
389
+ return date_to_str(obj)
390
+ elif isinstance(obj, (datetime.time, datetime.date)):
391
+ # should not happen since the only supported date-like format
392
+ # supported at dmain schema level is 'datetime' .
393
+ return obj.isoformat()
394
+ elif isinstance(obj, datetime.timedelta):
395
+ return obj.days * 24 * 60 * 60 + obj.seconds
396
+ elif isinstance(obj, Enum):
397
+ return obj.name
398
+ elif isinstance(obj, (InternalAccount, InternalScope)):
399
+ return obj.external
400
+ return json.JSONEncoder.default(self, obj)
401
+
402
+
403
+ def render_json(**data):
404
+ """ JSON render function
405
+ """
406
+ return json.dumps(data, cls=APIEncoder)
407
+
408
+
409
+ def render_json_list(list_):
410
+ """ JSON render function for list
411
+ """
412
+ return json.dumps(list_, cls=APIEncoder)
413
+
414
+
415
+ def datetime_parser(dct):
416
+ """ datetime parser
417
+ """
418
+ for k, v in list(dct.items()):
419
+ if isinstance(v, str) and re.search(" UTC", v):
420
+ try:
421
+ dct[k] = datetime.datetime.strptime(v, DATE_FORMAT)
422
+ except Exception:
423
+ pass
424
+ return dct
425
+
426
+
427
+ def parse_response(data):
428
+ """
429
+ JSON render function
430
+ """
431
+ if hasattr(data, 'decode'):
432
+ data = data.decode('utf-8')
433
+
434
+ return json.loads(data, object_hook=datetime_parser)
435
+
436
+
437
+ def execute(cmd) -> tuple[int, str, str]:
438
+ """
439
+ Executes a command in a subprocess. Returns a tuple
440
+ of (exitcode, out, err), where out is the string output
441
+ from stdout and err is the string output from stderr when
442
+ executing the command.
443
+
444
+ :param cmd: Command string to execute
445
+ """
446
+
447
+ process = subprocess.Popen(cmd,
448
+ shell=True,
449
+ stdin=subprocess.PIPE,
450
+ stdout=subprocess.PIPE,
451
+ stderr=subprocess.PIPE)
452
+
453
+ result = process.communicate()
454
+ (out, err) = result
455
+ exitcode = process.returncode
456
+ return exitcode, out.decode(encoding='utf-8'), err.decode(encoding='utf-8')
457
+
458
+
459
+ def rse_supported_protocol_operations():
460
+ """ Returns a list with operations supported by all RSE protocols."""
461
+ return ['read', 'write', 'delete', 'third_party_copy_read', 'third_party_copy_write']
462
+
463
+
464
+ def rse_supported_protocol_domains():
465
+ """ Returns a list with all supoorted RSE protocol domains."""
466
+ return ['lan', 'wan']
467
+
468
+
469
+ def grouper(iterable, n, fillvalue=None):
470
+ """ Collect data into fixed-length chunks or blocks """
471
+ # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
472
+ args = [iter(iterable)] * n
473
+ return zip_longest(*args, fillvalue=fillvalue)
474
+
475
+
476
+ def chunks(iterable, n):
477
+ """
478
+ Yield successive n-sized chunks from l.
479
+ """
480
+ if isinstance(iterable, list):
481
+ for i in range(0, len(iterable), n):
482
+ yield iterable[i:i + n]
483
+ else:
484
+ it = iter(iterable)
485
+ while True:
486
+ chunk = list(itertools.islice(it, n))
487
+ if not chunk:
488
+ return
489
+ yield chunk
490
+
491
+
492
+ def dict_chunks(dict_, n):
493
+ """
494
+ Iterate over the dictionary in groups of the requested size
495
+ """
496
+ it = iter(dict_)
497
+ for _ in range(0, len(dict_), n):
498
+ yield {k: dict_[k] for k in itertools.islice(it, n)}
499
+
500
+
501
+ def my_key_generator(namespace, fn, **kw):
502
+ """
503
+ Customyzed key generator for dogpile
504
+ """
505
+ fname = fn.__name__
506
+
507
+ def generate_key(*arg, **kw):
508
+ return namespace + "_" + fname + "_".join(str(s) for s in filter(None, arg))
509
+
510
+ return generate_key
511
+
512
+
513
+ def construct_surl_DQ2(dsn: str, scope: str, filename: str) -> str:
514
+ """
515
+ Defines relative SURL for new replicas. This method
516
+ contains DQ2 convention. To be used for non-deterministic sites.
517
+ Method imported from DQ2.
518
+
519
+ @return: relative SURL for new replica.
520
+ @rtype: str
521
+ """
522
+ # check how many dots in dsn
523
+ fields = dsn.split('.')
524
+ nfields = len(fields)
525
+
526
+ if nfields == 0:
527
+ return '/other/other/%s' % (filename)
528
+ elif nfields == 1:
529
+ stripped_dsn = __strip_dsn(dsn)
530
+ return '/other/%s/%s' % (stripped_dsn, filename)
531
+ elif nfields == 2:
532
+ project = fields[0]
533
+ stripped_dsn = __strip_dsn(dsn)
534
+ return '/%s/%s/%s' % (project, stripped_dsn, filename)
535
+ elif nfields < 5 or re.match('user*|group*', fields[0]):
536
+ project = fields[0]
537
+ f2 = fields[1]
538
+ f3 = fields[2]
539
+ stripped_dsn = __strip_dsn(dsn)
540
+ return '/%s/%s/%s/%s/%s' % (project, f2, f3, stripped_dsn, filename)
541
+ else:
542
+ project = fields[0]
543
+ dataset_type = fields[4]
544
+ if nfields == 5:
545
+ tag = 'other'
546
+ else:
547
+ tag = __strip_tag(fields[-1])
548
+ stripped_dsn = __strip_dsn(dsn)
549
+ return '/%s/%s/%s/%s/%s' % (project, dataset_type, tag, stripped_dsn, filename)
550
+
551
+
552
+ def construct_surl_T0(dsn: str, scope: str, filename: str) -> str:
553
+ """
554
+ Defines relative SURL for new replicas. This method
555
+ contains Tier0 convention. To be used for non-deterministic sites.
556
+
557
+ @return: relative SURL for new replica.
558
+ @rtype: str
559
+ """
560
+ fields = dsn.split('.')
561
+ nfields = len(fields)
562
+ if nfields >= 3:
563
+ return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], fields[1], dsn, filename)
564
+ elif nfields == 1:
565
+ return '/%s/%s/%s/%s/%s' % (fields[0], 'other', 'other', dsn, filename)
566
+ elif nfields == 2:
567
+ return '/%s/%s/%s/%s/%s' % (fields[0], fields[2], 'other', dsn, filename)
568
+ elif nfields == 0:
569
+ return '/other/other/other/other/%s' % (filename)
570
+
571
+
572
+ def construct_surl_BelleII(dsn: str, scope: str, filename: str) -> str:
573
+ """
574
+ Defines relative SURL for Belle II specific replicas.
575
+ This method contains the Belle II convention.
576
+ To be used for non-deterministic Belle II sites.
577
+ DSN (or datablock in the Belle II naming) contains /
578
+
579
+ """
580
+
581
+ fields = dsn.split("/")
582
+ nfields = len(fields)
583
+ if nfields == 0:
584
+ return '/other/%s' % (filename)
585
+ else:
586
+ return '%s/%s' % (dsn, filename)
587
+
588
+
589
+ _SURL_ALGORITHMS = {}
590
+ _DEFAULT_SURL = 'DQ2'
591
+ _loaded_policy_modules = False
592
+
593
+
594
+ def register_surl_algorithm(surl_callable, name=None):
595
+ if name is None:
596
+ name = surl_callable.__name__
597
+ _SURL_ALGORITHMS[name] = surl_callable
598
+
599
+
600
+ register_surl_algorithm(construct_surl_T0, 'T0')
601
+ register_surl_algorithm(construct_surl_DQ2, 'DQ2')
602
+ register_surl_algorithm(construct_surl_BelleII, 'BelleII')
603
+
604
+
605
+ def construct_surl(dsn: str, scope: str, filename: str, naming_convention: str = None) -> str:
606
+ """
607
+ Applies non-deterministic source url convention to the given replica.
608
+ use the naming_convention to call the actual function which will do the job.
609
+ Rucio administrators can potentially register additional surl generation algorithms,
610
+ which are not implemented inside this main rucio repository, so changing the
611
+ argument list must be done with caution.
612
+ """
613
+ global _loaded_policy_modules
614
+ if not _loaded_policy_modules:
615
+ # on first call, register any SURL functions from the policy packages
616
+ register_policy_package_algorithms('surl', _SURL_ALGORITHMS)
617
+ _loaded_policy_modules = True
618
+
619
+ if naming_convention is None or naming_convention not in _SURL_ALGORITHMS:
620
+ naming_convention = _DEFAULT_SURL
621
+ return _SURL_ALGORITHMS[naming_convention](dsn, scope, filename)
622
+
623
+
624
+ def __strip_dsn(dsn):
625
+ """
626
+ Drop the _sub and _dis suffixes for panda datasets from the lfc path
627
+ they will be registered in.
628
+ Method imported from DQ2.
629
+ """
630
+
631
+ suffixes_to_drop = ['_dis', '_sub', '_frag']
632
+ fields = dsn.split('.')
633
+ last_field = fields[-1]
634
+ try:
635
+ for suffix in suffixes_to_drop:
636
+ last_field = re.sub('%s.*$' % suffix, '', last_field)
637
+ except IndexError:
638
+ return dsn
639
+ fields[-1] = last_field
640
+ stripped_dsn = '.'.join(fields)
641
+ return stripped_dsn
642
+
643
+
644
+ def __strip_tag(tag):
645
+ """
646
+ Drop the _sub and _dis suffixes for panda datasets from the lfc path
647
+ they will be registered in
648
+ Method imported from DQ2.
649
+ """
650
+ suffixes_to_drop = ['_dis', '_sub', '_tid']
651
+ stripped_tag = tag
652
+ try:
653
+ for suffix in suffixes_to_drop:
654
+ stripped_tag = re.sub('%s.*$' % suffix, '', stripped_tag)
655
+ except IndexError:
656
+ return stripped_tag
657
+ return stripped_tag
658
+
659
+
660
+ def clean_surls(surls):
661
+ res = []
662
+ for surl in surls:
663
+ if surl.startswith('srm'):
664
+ surl = re.sub(':[0-9]+/', '/', surl)
665
+ surl = re.sub(r'/srm/managerv1\?SFN=', '', surl)
666
+ surl = re.sub(r'/srm/v2/server\?SFN=', '', surl)
667
+ surl = re.sub(r'/srm/managerv2\?SFN=', '', surl)
668
+ if '?GoogleAccessId' in surl:
669
+ surl = surl.split('?GoogleAccessId')[0]
670
+ if '?X-Amz' in surl:
671
+ surl = surl.split('?X-Amz')[0]
672
+ res.append(surl)
673
+ res.sort()
674
+ return res
675
+
676
+
677
+ _EXTRACT_SCOPE_ALGORITHMS = {}
678
+ _DEFAULT_EXTRACT = 'atlas'
679
+ _loaded_policy_package_scope_algorithms = False
680
+
681
+
682
+ def extract_scope_atlas(did, scopes):
683
+ # Try to extract the scope from the DSN
684
+ if did.find(':') > -1:
685
+ if len(did.split(':')) > 2:
686
+ raise RucioException('Too many colons. Cannot extract scope and name')
687
+ scope, name = did.split(':')[0], did.split(':')[1]
688
+ if name.endswith('/'):
689
+ name = name[:-1]
690
+ return scope, name
691
+ else:
692
+ scope = did.split('.')[0]
693
+ if did.startswith('user') or did.startswith('group'):
694
+ scope = ".".join(did.split('.')[0:2])
695
+ if did.endswith('/'):
696
+ did = did[:-1]
697
+ return scope, did
698
+
699
+
700
+ def extract_scope_dirac(did, scopes):
701
+ # Default dirac scope extract algorithm. Scope is the second element in the LFN or the first one (VO name)
702
+ # if only one element is the result of a split.
703
+ elem = did.rstrip('/').split('/')
704
+ if len(elem) > 2:
705
+ scope = elem[2]
706
+ else:
707
+ scope = elem[1]
708
+ return scope, did
709
+
710
+
711
+ def extract_scope_belleii(did, scopes):
712
+ split_did = did.split('/')
713
+ if did.startswith('/belle/mock/'):
714
+ return 'mock', did
715
+ if did.startswith('/belle/MC/'):
716
+ if did.startswith('/belle/MC/BG') or \
717
+ did.startswith('/belle/MC/build') or \
718
+ did.startswith('/belle/MC/generic') or \
719
+ did.startswith('/belle/MC/log') or \
720
+ did.startswith('/belle/MC/mcprod') or \
721
+ did.startswith('/belle/MC/prerelease') or \
722
+ did.startswith('/belle/MC/release'):
723
+ return 'mc', did
724
+ if did.startswith('/belle/MC/cert') or \
725
+ did.startswith('/belle/MC/dirac') or \
726
+ did.startswith('/belle/MC/dr3') or \
727
+ did.startswith('/belle/MC/fab') or \
728
+ did.startswith('/belle/MC/hideki') or \
729
+ did.startswith('/belle/MC/merge') or \
730
+ did.startswith('/belle/MC/migration') or \
731
+ did.startswith('/belle/MC/skim') or \
732
+ did.startswith('/belle/MC/test'):
733
+ return 'mc_tmp', did
734
+ if len(split_did) > 4:
735
+ if split_did[3].find('fab') > -1 or split_did[3].find('merge') > -1 or split_did[3].find('skim') > -1:
736
+ return 'mc_tmp', did
737
+ if split_did[3].find('release') > -1:
738
+ return 'mc', did
739
+ return 'mc_tmp', did
740
+ if did.startswith('/belle/Raw/'):
741
+ return 'raw', did
742
+ if did.startswith('/belle/hRaw'):
743
+ return 'hraw', did
744
+ if did.startswith('/belle/user/'):
745
+ if len(split_did) > 4:
746
+ if len(split_did[3]) == 1 and 'user.%s' % (split_did[4]) in scopes:
747
+ return 'user.%s' % split_did[4], did
748
+ if len(split_did) > 3:
749
+ if 'user.%s' % (split_did[3]) in scopes:
750
+ return 'user.%s' % split_did[3], did
751
+ return 'user', did
752
+ if did.startswith('/belle/group/'):
753
+ if len(split_did) > 4:
754
+ if 'group.%s' % (split_did[4]) in scopes:
755
+ return 'group.%s' % split_did[4], did
756
+ return 'group', did
757
+ if did.startswith('/belle/data/') or did.startswith('/belle/Data/'):
758
+ if len(split_did) > 4:
759
+ if split_did[3] in ['fab', 'skim']: # /belle/Data/fab --> data_tmp
760
+ return 'data_tmp', did
761
+ if split_did[3].find('release') > -1: # /belle/Data/release --> data
762
+ return 'data', did
763
+ if len(split_did) > 5:
764
+ if split_did[3] in ['proc']: # /belle/Data/proc
765
+ if split_did[4].find('release') > -1: # /belle/Data/proc/release*
766
+ if len(split_did) > 7 and split_did[6] in ['GCR2c', 'prod00000007', 'prod6b', 'proc7b',
767
+ 'proc8b', 'Bucket4', 'Bucket6test', 'bucket6',
768
+ 'proc9', 'bucket7', 'SKIMDATAx1', 'proc10Valid',
769
+ 'proc10', 'SkimP10x1', 'SkimP11x1', 'SkimB9x1',
770
+ 'SkimB10x1', 'SkimB11x1']: # /belle/Data/proc/release*/*/proc10/* --> data_tmp (Old convention)
771
+ return 'data_tmp', did
772
+ else: # /belle/Data/proc/release*/*/proc11/* --> data (New convention)
773
+ return 'data', did
774
+ if split_did[4].find('fab') > -1: # /belle/Data/proc/fab* --> data_tmp
775
+ return 'data_tmp', did
776
+ return 'data_tmp', did
777
+ if did.startswith('/belle/ddm/functional_tests/') or did.startswith('/belle/ddm/tests/') or did.startswith('/belle/test/ddm_test'):
778
+ return 'test', did
779
+ if did.startswith('/belle/BG/'):
780
+ return 'data', did
781
+ if did.startswith('/belle/collection'):
782
+ return 'collection', did
783
+ return 'other', did
784
+
785
+
786
+ def register_extract_scope_algorithm(extract_callable, name=[]):
787
+ if name is None:
788
+ name = extract_callable.__name__
789
+ _EXTRACT_SCOPE_ALGORITHMS[name] = extract_callable
790
+
791
+
792
+ register_extract_scope_algorithm(extract_scope_atlas, 'atlas')
793
+ register_extract_scope_algorithm(extract_scope_belleii, 'belleii')
794
+ register_extract_scope_algorithm(extract_scope_dirac, 'dirac')
795
+
796
+
797
+ def extract_scope(did, scopes=None, default_extract=_DEFAULT_EXTRACT):
798
+ global _loaded_policy_package_scope_algorithms
799
+ if not _loaded_policy_package_scope_algorithms:
800
+ register_policy_package_algorithms('scope', _EXTRACT_SCOPE_ALGORITHMS)
801
+ _loaded_policy_package_scope_algorithms = True
802
+ extract_scope_convention = config_get('common', 'extract_scope', False, None) or config_get('policy', 'extract_scope', False, None)
803
+ if extract_scope_convention is None or extract_scope_convention not in _EXTRACT_SCOPE_ALGORITHMS:
804
+ extract_scope_convention = default_extract
805
+ return _EXTRACT_SCOPE_ALGORITHMS[extract_scope_convention](did=did, scopes=scopes)
806
+
807
+
808
+ def pid_exists(pid):
809
+ """
810
+ Check whether pid exists in the current process table.
811
+ UNIX only.
812
+ """
813
+ if pid < 0:
814
+ return False
815
+ if pid == 0:
816
+ # According to "man 2 kill" PID 0 refers to every process
817
+ # in the process group of the calling process.
818
+ # On certain systems 0 is a valid PID but we have no way
819
+ # to know that in a portable fashion.
820
+ raise ValueError('invalid PID 0')
821
+ try:
822
+ os.kill(pid, 0)
823
+ except OSError as err:
824
+ if err.errno == errno.ESRCH:
825
+ # ESRCH == No such process
826
+ return False
827
+ elif err.errno == errno.EPERM:
828
+ # EPERM clearly means there's a process to deny access to
829
+ return True
830
+ else:
831
+ # According to "man 2 kill" possible error values are
832
+ # (EINVAL, EPERM, ESRCH)
833
+ raise
834
+ else:
835
+ return True
836
+
837
+
838
+ def sizefmt(num, human=True):
839
+ """
840
+ Print human readable file sizes
841
+ """
842
+ if num is None:
843
+ return '0.0 B'
844
+ try:
845
+ num = int(num)
846
+ if human:
847
+ for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']:
848
+ if abs(num) < 1000.0:
849
+ return "%3.3f %sB" % (num, unit)
850
+ num /= 1000.0
851
+ return "%.1f %sB" % (num, 'Y')
852
+ else:
853
+ return str(num)
854
+ except OverflowError:
855
+ return 'Inf'
856
+
857
+
858
+ def get_tmp_dir():
859
+ """
860
+ Get a path where to store temporary files.
861
+
862
+ Rucio searches a standard list of temporary directories. The list is:
863
+
864
+ The directory named by the TMP environment variable.
865
+ The directory named by the TMPDIR environment variable.
866
+ The directory named by the TEMP environment variable.
867
+
868
+ As a last resort, the /tmp/ directory.
869
+
870
+ :return: A path.
871
+ """
872
+ base_dir = os.path.abspath(tempfile.gettempdir())
873
+ try:
874
+ return os.path.join(base_dir, getpass.getuser())
875
+ except Exception:
876
+ pass
877
+
878
+ try:
879
+ return os.path.join(base_dir, str(os.getuid()))
880
+ except Exception:
881
+ pass
882
+
883
+ return base_dir
884
+
885
+
886
+ def is_archive(name):
887
+ '''
888
+ Check if a file name is an archive file or not.
889
+
890
+ :return: A boolean.
891
+ '''
892
+ regexp = r'^.*\.(zip|zipx|tar.gz|tgz|tar.Z|tar.bz2|tbz2)(\.\d+)*$'
893
+ if re.match(regexp, name, re.I):
894
+ return True
895
+ return False
896
+
897
+
898
+ class Color:
899
+ PURPLE = '\033[95m'
900
+ CYAN = '\033[96m'
901
+ DARKCYAN = '\033[36m'
902
+ BLUE = '\033[94m'
903
+ GREEN = '\033[92m'
904
+ YELLOW = '\033[93m'
905
+ RED = '\033[91m'
906
+ BOLD = '\033[1m'
907
+ UNDERLINE = '\033[4m'
908
+ END = '\033[0m'
909
+
910
+
911
+ def detect_client_location():
912
+ """
913
+ Normally client IP will be set on the server side (request.remote_addr)
914
+ Here setting ip on the one seen by the host itself. There is no connection
915
+ to Google DNS servers.
916
+ Try to determine the sitename automatically from common environment variables,
917
+ in this order: SITE_NAME, ATLAS_SITE_NAME, OSG_SITE_NAME. If none of these exist
918
+ use the fixed string 'ROAMING'.
919
+
920
+ If environment variables sets location, it uses it.
921
+ """
922
+
923
+ ip = None
924
+
925
+ try:
926
+ s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
927
+ s.connect(("2001:4860:4860:0:0:0:0:8888", 80))
928
+ ip = s.getsockname()[0]
929
+ except Exception:
930
+ pass
931
+
932
+ if not ip:
933
+ try:
934
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
935
+ s.connect(("8.8.8.8", 80))
936
+ ip = s.getsockname()[0]
937
+ except Exception:
938
+ pass
939
+
940
+ if not ip:
941
+ ip = '0.0.0.0'
942
+
943
+ site = os.environ.get('SITE_NAME',
944
+ os.environ.get('ATLAS_SITE_NAME',
945
+ os.environ.get('OSG_SITE_NAME',
946
+ 'ROAMING')))
947
+
948
+ latitude = os.environ.get('RUCIO_LATITUDE')
949
+ longitude = os.environ.get('RUCIO_LONGITUDE')
950
+ if latitude and longitude:
951
+ try:
952
+ latitude = float(latitude)
953
+ longitude = float(longitude)
954
+ except ValueError:
955
+ latitude = longitude = 0
956
+ print('Client set latitude and longitude are not valid.')
957
+ else:
958
+ latitude = longitude = None
959
+
960
+ return {'ip': ip,
961
+ 'fqdn': socket.getfqdn(),
962
+ 'site': site,
963
+ 'latitude': latitude,
964
+ 'longitude': longitude}
965
+
966
+
967
+ def ssh_sign(private_key, message):
968
+ """
969
+ Sign a string message using the private key.
970
+
971
+ :param private_key: The SSH RSA private key as a string.
972
+ :param message: The message to sign as a string.
973
+ :return: Base64 encoded signature as a string.
974
+ """
975
+ if isinstance(message, str):
976
+ message = message.encode()
977
+ if not EXTRA_MODULES['paramiko']:
978
+ raise MissingModuleException('The paramiko module is not installed or faulty.')
979
+ sio_private_key = StringIO(private_key)
980
+ priv_k = RSAKey.from_private_key(sio_private_key)
981
+ sio_private_key.close()
982
+ signature_stream = priv_k.sign_ssh_data(message)
983
+ signature_stream.rewind()
984
+ base64_encoded = base64.b64encode(signature_stream.get_remainder())
985
+ base64_encoded = base64_encoded.decode()
986
+ return base64_encoded
987
+
988
+
989
+ def make_valid_did(lfn_dict):
990
+ """
991
+ When managing information about a LFN (such as in `rucio upload` or
992
+ the RSE manager's upload), we add the `filename` attribute to record
993
+ the name of the file on the local disk in addition to the remainder
994
+ of the DID information.
995
+
996
+ This function will take that python dictionary, and strip out the
997
+ additional `filename` key. If this is not done, then the dictionary
998
+ will not pass the DID JSON schema validation.
999
+ """
1000
+ if 'filename' not in lfn_dict:
1001
+ return lfn_dict
1002
+
1003
+ lfn_copy = dict(lfn_dict)
1004
+ lfn_copy['name'] = lfn_copy.get('name', lfn_copy['filename'])
1005
+ del lfn_copy['filename']
1006
+ return lfn_copy
1007
+
1008
+
1009
+ def send_trace(trace, trace_endpoint, user_agent, retries=5):
1010
+ """
1011
+ Send the given trace to the trace endpoint
1012
+
1013
+ :param trace: the trace dictionary to send
1014
+ :param trace_endpoint: the endpoint where the trace should be send
1015
+ :param user_agent: the user agent sending the trace
1016
+ :param retries: the number of retries if sending fails
1017
+ :return: 0 on success, 1 on failure
1018
+ """
1019
+ if user_agent.startswith('pilot'):
1020
+ return 0
1021
+ for dummy in range(retries):
1022
+ try:
1023
+ requests.post(trace_endpoint + '/traces/', verify=False, data=json.dumps(trace))
1024
+ return 0
1025
+ except Exception:
1026
+ pass
1027
+ return 1
1028
+
1029
+
1030
+ def add_url_query(url, query):
1031
+ """
1032
+ Add a new dictionary to URL parameters
1033
+
1034
+ :param url: The existing URL
1035
+ :param query: A dictionary containing key/value pairs to be added to the URL
1036
+ :return: The expanded URL with the new query parameters
1037
+ """
1038
+
1039
+ url_parts = list(urlparse(url))
1040
+ mod_query = dict(parse_qsl(url_parts[4]))
1041
+ mod_query.update(query)
1042
+ url_parts[4] = urlencode(mod_query)
1043
+ return urlunparse(url_parts)
1044
+
1045
+
1046
+ def get_bytes_value_from_string(input_string):
1047
+ """
1048
+ Get bytes from a string that represents a storage value and unit
1049
+
1050
+ :param input_string: String containing a value and an unit
1051
+ :return: Integer value representing the value in bytes
1052
+ """
1053
+ result = re.findall('^([0-9]+)([A-Za-z]+)$', input_string)
1054
+ if result:
1055
+ value = int(result[0][0])
1056
+ unit = result[0][1].lower()
1057
+ if unit == 'b':
1058
+ value = value
1059
+ elif unit == 'kb':
1060
+ value = value * 1000
1061
+ elif unit == 'mb':
1062
+ value = value * 1000000
1063
+ elif unit == 'gb':
1064
+ value = value * 1000000000
1065
+ elif unit == 'tb':
1066
+ value = value * 1000000000000
1067
+ elif unit == 'pb':
1068
+ value = value * 1000000000000000
1069
+ else:
1070
+ return False
1071
+ return value
1072
+ else:
1073
+ return False
1074
+
1075
+
1076
+ def parse_did_filter_from_string(input_string):
1077
+ """
1078
+ Parse DID filter options in format 'length<3,type=all' from string.
1079
+
1080
+ :param input_string: String containing the filter options.
1081
+ :return: filter dictionary and type as string.
1082
+ """
1083
+ filters = {}
1084
+ type_ = 'collection'
1085
+ if input_string:
1086
+ filter_options = input_string.replace(' ', '').split(',')
1087
+ for option in filter_options:
1088
+ value = None
1089
+ key = None
1090
+
1091
+ if '>=' in option:
1092
+ key, value = option.split('>=')
1093
+ if key == 'length':
1094
+ key = 'length.gte'
1095
+ elif '>' in option:
1096
+ key, value = option.split('>')
1097
+ if key == 'length':
1098
+ key = 'length.gt'
1099
+ elif '<=' in option:
1100
+ key, value = option.split('<=')
1101
+ if key == 'length':
1102
+ key = 'length.lte'
1103
+ elif '<' in option:
1104
+ key, value = option.split('<')
1105
+ if key == 'length':
1106
+ key = 'length.lt'
1107
+ elif '=' in option:
1108
+ key, value = option.split('=')
1109
+ if key == 'created_after' or key == 'created_before':
1110
+ value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ')
1111
+
1112
+ if key == 'type':
1113
+ if value.upper() in ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']:
1114
+ type_ = value.lower()
1115
+ else:
1116
+ raise InvalidType('{0} is not a valid type. Valid types are {1}'.format(value, ['ALL', 'COLLECTION', 'CONTAINER', 'DATASET', 'FILE']))
1117
+ elif key in ('length.gt', 'length.lt', 'length.gte', 'length.lte', 'length'):
1118
+ try:
1119
+ value = int(value)
1120
+ filters[key] = value
1121
+ except ValueError:
1122
+ raise ValueError('Length has to be an integer value.')
1123
+ filters[key] = value
1124
+ elif isinstance(value, str):
1125
+ if value.lower() == 'true':
1126
+ value = '1'
1127
+ elif value.lower() == 'false':
1128
+ value = '0'
1129
+ filters[key] = value
1130
+ else:
1131
+ filters[key] = value
1132
+
1133
+ return filters, type_
1134
+
1135
+
1136
+ def parse_did_filter_from_string_fe(input_string, name='*', type='collection', omit_name=False):
1137
+ """
1138
+ Parse DID filter string for the filter engine (fe).
1139
+
1140
+ Should adhere to the following conventions:
1141
+ - ';' represents the logical OR operator
1142
+ - ',' represents the logical AND operator
1143
+ - all operators belong to set of (<=, >=, ==, !=, >, <, =)
1144
+ - there should be no duplicate key+operator criteria.
1145
+
1146
+ One sided and compound inequalities are supported.
1147
+
1148
+ Sanity checking of input is left to the filter engine.
1149
+
1150
+ :param input_string: String containing the filter options.
1151
+ :param name: DID name.
1152
+ :param type: The type of the did: all(container, dataset, file), collection(dataset or container), dataset, container.
1153
+ :param omit_name: omit addition of name to filters.
1154
+ :return: list of dictionaries with each dictionary as a separate OR expression.
1155
+ """
1156
+ # lookup table unifying all comprehended operators to a nominal suffix.
1157
+ # note that the order matters as the regex engine is eager, e.g. don't want to evaluate '<=' as '<' and '='.
1158
+ operators_suffix_LUT = OrderedDict({
1159
+ '<=': 'lte',
1160
+ '>=': 'gte',
1161
+ '==': '',
1162
+ '!=': 'ne',
1163
+ '>': 'gt',
1164
+ '<': 'lt',
1165
+ '=': ''
1166
+ })
1167
+
1168
+ # lookup table mapping operator opposites, used to reverse compound inequalities.
1169
+ operator_opposites_LUT = {
1170
+ 'lt': 'gt',
1171
+ 'lte': 'gte'
1172
+ }
1173
+ operator_opposites_LUT.update({op2: op1 for op1, op2 in operator_opposites_LUT.items()})
1174
+
1175
+ filters = []
1176
+ if input_string:
1177
+ or_groups = list(filter(None, input_string.split(';'))) # split <input_string> into OR clauses
1178
+ for or_group in or_groups:
1179
+ or_group = or_group.strip()
1180
+ and_groups = list(filter(None, or_group.split(','))) # split <or_group> into AND clauses
1181
+ and_group_filters = {}
1182
+ for and_group in and_groups:
1183
+ and_group = and_group.strip()
1184
+ # tokenise this AND clause using operators as delimiters.
1185
+ tokenisation_regex = "({})".format('|'.join(operators_suffix_LUT.keys()))
1186
+ and_group_split_by_operator = list(filter(None, re.split(tokenisation_regex, and_group)))
1187
+ if len(and_group_split_by_operator) == 3: # this is a one-sided inequality or expression
1188
+ key, operator, value = [token.strip() for token in and_group_split_by_operator]
1189
+
1190
+ # substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
1191
+ operator_mapped = operators_suffix_LUT.get(operator)
1192
+
1193
+ filter_key_full = key
1194
+ if operator_mapped is not None:
1195
+ if operator_mapped:
1196
+ filter_key_full = "{}.{}".format(key, operator_mapped)
1197
+ else:
1198
+ raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
1199
+
1200
+ if filter_key_full in and_group_filters:
1201
+ raise DuplicateCriteriaInDIDFilter(filter_key_full)
1202
+ else:
1203
+ and_group_filters[filter_key_full] = value
1204
+ elif len(and_group_split_by_operator) == 5: # this is a compound inequality
1205
+ value1, operator1, key, operator2, value2 = [token.strip() for token in and_group_split_by_operator]
1206
+
1207
+ # substitute input operator with the nominal operator defined by the LUT, <operators_suffix_LUT>.
1208
+ operator1_mapped = operator_opposites_LUT.get(operators_suffix_LUT.get(operator1))
1209
+ operator2_mapped = operators_suffix_LUT.get(operator2)
1210
+
1211
+ filter_key1_full = filter_key2_full = key
1212
+ if operator1_mapped is not None and operator2_mapped is not None:
1213
+ if operator1_mapped: # ignore '' operator (maps from equals)
1214
+ filter_key1_full = "{}.{}".format(key, operator1_mapped)
1215
+ if operator2_mapped: # ignore '' operator (maps from equals)
1216
+ filter_key2_full = "{}.{}".format(key, operator2_mapped)
1217
+ else:
1218
+ raise DIDFilterSyntaxError("{} operator not understood.".format(operator_mapped))
1219
+
1220
+ if filter_key1_full in and_group_filters:
1221
+ raise DuplicateCriteriaInDIDFilter(filter_key1_full)
1222
+ else:
1223
+ and_group_filters[filter_key1_full] = value1
1224
+ if filter_key2_full in and_group_filters:
1225
+ raise DuplicateCriteriaInDIDFilter(filter_key2_full)
1226
+ else:
1227
+ and_group_filters[filter_key2_full] = value2
1228
+ else:
1229
+ raise DIDFilterSyntaxError(and_group)
1230
+
1231
+ # add name key to each AND clause if it hasn't already been populated from the filter and <omit_name> not set.
1232
+ if not omit_name and 'name' not in and_group_filters:
1233
+ and_group_filters['name'] = name
1234
+
1235
+ filters.append(and_group_filters)
1236
+ else:
1237
+ if not omit_name:
1238
+ filters.append({
1239
+ 'name': name
1240
+ })
1241
+ return filters, type
1242
+
1243
+
1244
+ def parse_replicas_from_file(path):
1245
+ """
1246
+ Parses the output of list_replicas from a json or metalink file
1247
+ into a dictionary. Metalink parsing is tried first and if it fails
1248
+ it tries to parse json.
1249
+
1250
+ :param path: the path to the input file
1251
+
1252
+ :returns: a list with a dictionary for each file
1253
+ """
1254
+ with open(path) as fp:
1255
+ try:
1256
+ root = ElementTree.parse(fp).getroot()
1257
+ return parse_replicas_metalink(root)
1258
+ except ElementTree.ParseError as xml_err:
1259
+ try:
1260
+ return json.load(fp)
1261
+ except ValueError as json_err:
1262
+ raise MetalinkJsonParsingError(path, xml_err, json_err)
1263
+
1264
+
1265
+ def parse_replicas_from_string(string):
1266
+ """
1267
+ Parses the output of list_replicas from a json or metalink string
1268
+ into a dictionary. Metalink parsing is tried first and if it fails
1269
+ it tries to parse json.
1270
+
1271
+ :param string: the string to parse
1272
+
1273
+ :returns: a list with a dictionary for each file
1274
+ """
1275
+ try:
1276
+ root = ElementTree.fromstring(string)
1277
+ return parse_replicas_metalink(root)
1278
+ except ElementTree.ParseError as xml_err:
1279
+ try:
1280
+ return json.loads(string)
1281
+ except ValueError as json_err:
1282
+ raise MetalinkJsonParsingError(string, xml_err, json_err)
1283
+
1284
+
1285
+ def parse_replicas_metalink(root):
1286
+ """
1287
+ Transforms the metalink tree into a list of dictionaries where
1288
+ each dictionary describes a file with its replicas.
1289
+ Will be called by parse_replicas_from_file and parse_replicas_from_string.
1290
+
1291
+ :param root: root node of the metalink tree
1292
+
1293
+ :returns: a list with a dictionary for each file
1294
+ """
1295
+ files = []
1296
+
1297
+ # metalink namespace
1298
+ ns = '{urn:ietf:params:xml:ns:metalink}'
1299
+ str_to_bool = {'true': True, 'True': True, 'false': False, 'False': False}
1300
+
1301
+ # loop over all <file> tags of the metalink string
1302
+ for file_tag_obj in root.findall(ns + 'file'):
1303
+ # search for identity-tag
1304
+ identity_tag_obj = file_tag_obj.find(ns + 'identity')
1305
+ if not ElementTree.iselement(identity_tag_obj):
1306
+ raise InputValidationError('Failed to locate identity-tag inside %s' % ElementTree.tostring(file_tag_obj))
1307
+
1308
+ cur_file = {'did': identity_tag_obj.text,
1309
+ 'adler32': None,
1310
+ 'md5': None,
1311
+ 'sources': []}
1312
+
1313
+ parent_dids = set()
1314
+ parent_dids_tag_obj = file_tag_obj.find(ns + 'parents')
1315
+ if ElementTree.iselement(parent_dids_tag_obj):
1316
+ for did_tag_obj in parent_dids_tag_obj.findall(ns + 'did'):
1317
+ parent_dids.add(did_tag_obj.text)
1318
+ cur_file['parent_dids'] = parent_dids
1319
+
1320
+ size_tag_obj = file_tag_obj.find(ns + 'size')
1321
+ cur_file['bytes'] = int(size_tag_obj.text) if ElementTree.iselement(size_tag_obj) else None
1322
+
1323
+ for hash_tag_obj in file_tag_obj.findall(ns + 'hash'):
1324
+ hash_type = hash_tag_obj.get('type')
1325
+ if hash_type:
1326
+ cur_file[hash_type] = hash_tag_obj.text
1327
+
1328
+ for url_tag_obj in file_tag_obj.findall(ns + 'url'):
1329
+ key_rename_map = {'location': 'rse'}
1330
+ src = {}
1331
+ for k, v in url_tag_obj.items():
1332
+ k = key_rename_map.get(k, k)
1333
+ src[k] = str_to_bool.get(v, v)
1334
+ src['pfn'] = url_tag_obj.text
1335
+ cur_file['sources'].append(src)
1336
+
1337
+ files.append(cur_file)
1338
+
1339
+ return files
1340
+
1341
+
1342
+ def get_thread_with_periodic_running_function(interval, action, graceful_stop):
1343
+ """
1344
+ Get a thread where a function runs periodically.
1345
+
1346
+ :param interval: Interval in seconds when the action fucntion should run.
1347
+ :param action: Function, that should run periodically.
1348
+ :param graceful_stop: Threading event used to check for graceful stop.
1349
+ """
1350
+ def start():
1351
+ while not graceful_stop.is_set():
1352
+ starttime = time.time()
1353
+ action()
1354
+ time.sleep(interval - ((time.time() - starttime)))
1355
+ t = threading.Thread(target=start)
1356
+ return t
1357
+
1358
+
1359
+ def run_cmd_process(cmd, timeout=3600):
1360
+ """
1361
+ shell command parser with timeout
1362
+
1363
+ :param cmd: shell command as a string
1364
+ :param timeout: in seconds
1365
+
1366
+ :return: stdout xor stderr, and errorcode
1367
+ """
1368
+
1369
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, preexec_fn=os.setsid, universal_newlines=True)
1370
+
1371
+ try:
1372
+ stdout, stderr = process.communicate(timeout=timeout)
1373
+ except subprocess.TimeoutExpired:
1374
+ try:
1375
+ # Kill the whole process group since we're using shell=True.
1376
+ os.killpg(os.getpgid(process.pid), signal.SIGTERM)
1377
+ stdout, stderr = process.communicate(timeout=3)
1378
+ except subprocess.TimeoutExpired:
1379
+ os.killpg(os.getpgid(process.pid), signal.SIGKILL)
1380
+ stdout, stderr = process.communicate()
1381
+
1382
+ if not stderr:
1383
+ stderr = ''
1384
+ if not stdout:
1385
+ stdout = ''
1386
+ if stderr and stderr != '':
1387
+ stdout += " Error: " + stderr
1388
+ if process:
1389
+ returncode = process.returncode
1390
+ else:
1391
+ returncode = 1
1392
+ if returncode != 1 and 'Command time-out' in stdout:
1393
+ returncode = 1
1394
+ if returncode is None:
1395
+ returncode = 0
1396
+
1397
+ return returncode, stdout
1398
+
1399
+
1400
+ def api_update_return_dict(dictionary, session=None):
1401
+ """
1402
+ Ensure that rse is in a dictionary returned from core
1403
+
1404
+ :param dictionary: The dictionary to edit
1405
+ :param session: The DB session to use
1406
+ :returns dictionary: The edited dictionary
1407
+ """
1408
+ if not isinstance(dictionary, dict):
1409
+ return dictionary
1410
+
1411
+ copied = False # Avoid side effects from pass by object
1412
+
1413
+ for rse_str in ['rse', 'src_rse', 'source_rse', 'dest_rse', 'destination_rse']:
1414
+ rse_id_str = '%s_id' % rse_str
1415
+ if rse_id_str in dictionary.keys() and dictionary[rse_id_str] is not None:
1416
+ if rse_str not in dictionary.keys():
1417
+ if not copied:
1418
+ dictionary = dictionary.copy()
1419
+ copied = True
1420
+ import rucio.core.rse
1421
+ dictionary[rse_str] = rucio.core.rse.get_rse_name(rse_id=dictionary[rse_id_str], session=session)
1422
+
1423
+ if 'account' in dictionary.keys() and dictionary['account'] is not None:
1424
+ if not copied:
1425
+ dictionary = dictionary.copy()
1426
+ copied = True
1427
+ dictionary['account'] = dictionary['account'].external
1428
+
1429
+ if 'scope' in dictionary.keys() and dictionary['scope'] is not None:
1430
+ if not copied:
1431
+ dictionary = dictionary.copy()
1432
+ copied = True
1433
+ dictionary['scope'] = dictionary['scope'].external
1434
+
1435
+ return dictionary
1436
+
1437
+
1438
+ def setup_logger(module_name=None, logger_name=None, logger_level=None, verbose=False):
1439
+ '''
1440
+ Factory method to set logger with handlers.
1441
+ :param module_name: __name__ of the module that is calling this method
1442
+ :param logger_name: name of the logger, typically name of the module.
1443
+ :param logger_level: if not given, fetched from config.
1444
+ :param verbose: verbose option set in bin/rucio
1445
+ '''
1446
+ # helper method for cfg check
1447
+ def _force_cfg_log_level(cfg_option):
1448
+ cfg_forced_modules = config_get('logging', cfg_option, raise_exception=False, default=None, clean_cached=True,
1449
+ check_config_table=False)
1450
+ if cfg_forced_modules:
1451
+ if re.match(str(cfg_forced_modules), module_name):
1452
+ return True
1453
+ return False
1454
+
1455
+ # creating log
1456
+ if not logger_name:
1457
+ if not module_name:
1458
+ logger_name = 'usr'
1459
+ else:
1460
+ logger_name = module_name.split('.')[-1]
1461
+ logger = logging.getLogger(logger_name)
1462
+
1463
+ # extracting the log level
1464
+ if not logger_level:
1465
+ logger_level = logging.INFO
1466
+ if verbose:
1467
+ logger_level = logging.DEBUG
1468
+
1469
+ # overriding by the config
1470
+ cfg_levels = (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)
1471
+ for level in cfg_levels:
1472
+ cfg_opt = 'forceloglevel' + logging.getLevelName(level)
1473
+ if _force_cfg_log_level(cfg_opt):
1474
+ logger_level = level
1475
+
1476
+ # setting the log level
1477
+ logger.setLevel(logger_level)
1478
+
1479
+ # preferred logger handling
1480
+ def add_handler(logger):
1481
+ hdlr = logging.StreamHandler()
1482
+
1483
+ def emit_decorator(fnc):
1484
+ def func(*args):
1485
+ if 'RUCIO_LOGGING_FORMAT' not in os.environ:
1486
+ levelno = args[0].levelno
1487
+ format_str = '%(asctime)s\t%(levelname)s\t%(message)s\033[0m'
1488
+ if levelno >= logging.CRITICAL:
1489
+ color = '\033[31;1m'
1490
+ elif levelno >= logging.ERROR:
1491
+ color = '\033[31;1m'
1492
+ elif levelno >= logging.WARNING:
1493
+ color = '\033[33;1m'
1494
+ elif levelno >= logging.INFO:
1495
+ color = '\033[32;1m'
1496
+ elif levelno >= logging.DEBUG:
1497
+ color = '\033[36;1m'
1498
+ format_str = '%(asctime)s\t%(levelname)s\t%(filename)s\t%(message)s\033[0m'
1499
+ else:
1500
+ color = '\033[0m'
1501
+ formatter = logging.Formatter('{0}{1}'.format(color, format_str))
1502
+ else:
1503
+ formatter = logging.Formatter(os.environ['RUCIO_LOGGING_FORMAT'])
1504
+ hdlr.setFormatter(formatter)
1505
+ return fnc(*args)
1506
+ return func
1507
+ hdlr.emit = emit_decorator(hdlr.emit)
1508
+ logger.addHandler(hdlr)
1509
+
1510
+ # setting handler and formatter
1511
+ if not logger.handlers:
1512
+ add_handler(logger)
1513
+
1514
+ return logger
1515
+
1516
+
1517
+ def daemon_sleep(start_time, sleep_time, graceful_stop, logger=logging.log):
1518
+ """Sleeps a daemon the time provided by sleep_time"""
1519
+ end_time = time.time()
1520
+ time_diff = end_time - start_time
1521
+ if time_diff < sleep_time:
1522
+ logger(logging.INFO, 'Sleeping for a while : %s seconds', (sleep_time - time_diff))
1523
+ graceful_stop.wait(sleep_time - time_diff)
1524
+
1525
+
1526
+ def is_client():
1527
+ """"
1528
+ Checks if the function is called from a client or from a server/daemon
1529
+
1530
+ :returns client_mode: True if is called from a client, False if it is called from a server/daemon
1531
+ """
1532
+ if 'RUCIO_CLIENT_MODE' not in os.environ:
1533
+ try:
1534
+ if config_has_section('database'):
1535
+ client_mode = False
1536
+ elif config_has_section('client'):
1537
+ client_mode = True
1538
+ else:
1539
+ client_mode = False
1540
+ except RuntimeError:
1541
+ # If no configuration file is found the default value should be True
1542
+ client_mode = True
1543
+ else:
1544
+ if os.environ['RUCIO_CLIENT_MODE']:
1545
+ client_mode = True
1546
+ else:
1547
+ client_mode = False
1548
+
1549
+ return client_mode
1550
+
1551
+
1552
+ class retry:
1553
+ """Retry callable object with configuragle number of attempts"""
1554
+
1555
+ def __init__(self, func, *args, **kwargs):
1556
+ '''
1557
+ :param func: a method that should be executed with retries
1558
+ :param args: parametres of the func
1559
+ :param kwargs: key word arguments of the func
1560
+ '''
1561
+ self.func, self.args, self.kwargs = func, args, kwargs
1562
+
1563
+ def __call__(self, mtries=3, logger=logging.log):
1564
+ '''
1565
+ :param mtries: maximum number of attempts to execute the function
1566
+ :param logger: preferred logger
1567
+ '''
1568
+ attempt = mtries
1569
+ while attempt > 1:
1570
+ try:
1571
+ if logger:
1572
+ logger(logging.DEBUG, '{}: Attempt {}'.format(self.func.__name__, mtries - attempt + 1))
1573
+ return self.func(*self.args, **self.kwargs)
1574
+ except Exception as e:
1575
+ if logger:
1576
+ logger(logging.DEBUG, '{}: Attempt failed {}'.format(self.func.__name__, mtries - attempt + 1))
1577
+ logger(logging.DEBUG, str(e))
1578
+ attempt -= 1
1579
+ return self.func(*self.args, **self.kwargs)
1580
+
1581
+
1582
+ class StoreAndDeprecateWarningAction(argparse.Action):
1583
+ '''
1584
+ StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
1585
+ and represents a store action with a deprecated argument name.
1586
+ '''
1587
+
1588
+ def __init__(self,
1589
+ option_strings,
1590
+ new_option_string,
1591
+ dest,
1592
+ **kwargs):
1593
+ """
1594
+ :param option_strings: all possible argument name strings
1595
+ :param new_option_string: the new option string which replaces the old
1596
+ :param dest: name of variable to store the value in
1597
+ :param kwargs: everything else
1598
+ """
1599
+ super(StoreAndDeprecateWarningAction, self).__init__(
1600
+ option_strings=option_strings,
1601
+ dest=dest,
1602
+ **kwargs)
1603
+ assert new_option_string in option_strings
1604
+ self.new_option_string = new_option_string
1605
+
1606
+ def __call__(self, parser, namespace, values, option_string=None):
1607
+ if option_string and option_string != self.new_option_string:
1608
+ # The logger gets typically initialized after the argument parser
1609
+ # to set the verbosity of the logger. Thus using simple print to console.
1610
+ print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
1611
+
1612
+ setattr(namespace, self.dest, values)
1613
+
1614
+
1615
+ class StoreTrueAndDeprecateWarningAction(argparse._StoreConstAction):
1616
+ '''
1617
+ StoreAndDeprecateWarningAction is a descendant of :class:`argparse.Action`
1618
+ and represents a store action with a deprecated argument name.
1619
+ '''
1620
+
1621
+ def __init__(self,
1622
+ option_strings,
1623
+ new_option_string,
1624
+ dest,
1625
+ default=False,
1626
+ required=False,
1627
+ help=None):
1628
+ """
1629
+ :param option_strings: all possible argument name strings
1630
+ :param new_option_string: the new option string which replaces the old
1631
+ :param dest: name of variable to store the value in
1632
+ :param kwargs: everything else
1633
+ """
1634
+ super(StoreTrueAndDeprecateWarningAction, self).__init__(
1635
+ option_strings=option_strings,
1636
+ dest=dest,
1637
+ const=True,
1638
+ default=default,
1639
+ required=required,
1640
+ help=help)
1641
+ assert new_option_string in option_strings
1642
+ self.new_option_string = new_option_string
1643
+
1644
+ def __call__(self, parser, namespace, values, option_string=None):
1645
+ super(StoreTrueAndDeprecateWarningAction, self).__call__(parser, namespace, values, option_string=option_string)
1646
+ if option_string and option_string != self.new_option_string:
1647
+ # The logger gets typically initialized after the argument parser
1648
+ # to set the verbosity of the logger. Thus using simple print to console.
1649
+ print("Warning: The commandline argument {} is deprecated! Please use {} in the future.".format(option_string, self.new_option_string))
1650
+
1651
+
1652
+ class PriorityQueue:
1653
+ """
1654
+ Heap-based [1] priority queue which supports priority update operations
1655
+
1656
+ It is used as a dictionary: pq['element'] = priority
1657
+ The element with the highest priority can be accessed with pq.top() or pq.pop(),
1658
+ depending on the desire to keep it in the heap or not.
1659
+
1660
+ [1] https://en.wikipedia.org/wiki/Heap_(data_structure)
1661
+ """
1662
+ class ContainerSlot:
1663
+ def __init__(self, position, priority):
1664
+ self.pos = position
1665
+ self.prio = priority
1666
+
1667
+ def __init__(self):
1668
+ self.heap = []
1669
+ self.container = {}
1670
+
1671
+ def __len__(self):
1672
+ return len(self.heap)
1673
+
1674
+ def __getitem__(self, item):
1675
+ return self.container[item].prio
1676
+
1677
+ def __setitem__(self, key, value):
1678
+ if key in self.container:
1679
+ existing_prio = self.container[key].prio
1680
+ self.container[key].prio = value
1681
+ if value < existing_prio:
1682
+ self._priority_decreased(key)
1683
+ elif existing_prio < value:
1684
+ self._priority_increased(key)
1685
+ else:
1686
+ self.heap.append(key)
1687
+ self.container[key] = self.ContainerSlot(position=len(self.heap) - 1, priority=value)
1688
+ self._priority_decreased(key)
1689
+
1690
+ def __contains__(self, item):
1691
+ return item in self.container
1692
+
1693
+ def top(self):
1694
+ return self.heap[0]
1695
+
1696
+ def pop(self):
1697
+ item = self.heap[0]
1698
+ self.container.pop(item)
1699
+
1700
+ tmp_item = self.heap.pop()
1701
+ if self.heap:
1702
+ self.heap[0] = tmp_item
1703
+ self.container[tmp_item].pos = 0
1704
+ self._priority_increased(tmp_item)
1705
+ return item
1706
+
1707
+ def _priority_decreased(self, item):
1708
+ heap_changed = False
1709
+
1710
+ pos = self.container[item].pos
1711
+ pos_parent = (pos - 1) // 2
1712
+ while pos > 0 and self.container[self.heap[pos]].prio < self.container[self.heap[pos_parent]].prio:
1713
+ tmp_item, parent = self.heap[pos], self.heap[pos_parent] = self.heap[pos_parent], self.heap[pos]
1714
+ self.container[tmp_item].pos, self.container[parent].pos = self.container[parent].pos, self.container[tmp_item].pos
1715
+
1716
+ pos = pos_parent
1717
+ pos_parent = (pos - 1) // 2
1718
+
1719
+ heap_changed = True
1720
+ return heap_changed
1721
+
1722
+ def _priority_increased(self, item):
1723
+ heap_changed = False
1724
+ heap_len = len(self.heap)
1725
+ pos = self.container[item].pos
1726
+ pos_child1 = 2 * pos + 1
1727
+ pos_child2 = 2 * pos + 2
1728
+
1729
+ heap_restored = False
1730
+ while not heap_restored:
1731
+ # find minimum between item, child1, and child2
1732
+ if pos_child1 < heap_len and self.container[self.heap[pos_child1]].prio < self.container[self.heap[pos]].prio:
1733
+ pos_min = pos_child1
1734
+ else:
1735
+ pos_min = pos
1736
+ if pos_child2 < heap_len and self.container[self.heap[pos_child2]].prio < self.container[self.heap[pos_min]].prio:
1737
+ pos_min = pos_child2
1738
+
1739
+ if pos_min != pos:
1740
+ _, tmp_item = self.heap[pos_min], self.heap[pos] = self.heap[pos], self.heap[pos_min]
1741
+ self.container[tmp_item].pos = pos
1742
+
1743
+ pos = pos_min
1744
+ pos_child1 = 2 * pos + 1
1745
+ pos_child2 = 2 * pos + 2
1746
+
1747
+ heap_changed = True
1748
+ else:
1749
+ heap_restored = True
1750
+
1751
+ self.container[self.heap[pos]].pos = pos
1752
+ return heap_changed
1753
+
1754
+
1755
+ def register_policy_package_algorithms(algorithm_type, dictionary):
1756
+ '''
1757
+ Loads all the algorithms of a given type from the policy package(s) and registers them
1758
+ :param algorithm_type: the type of algorithm to register (e.g. 'surl', 'lfn2pfn')
1759
+ :param dictionary: the dictionary to register them in
1760
+ :param vo: the name of the relevant VO (None for single VO)
1761
+ '''
1762
+ def try_importing_policy(algorithm_type, dictionary, vo=None):
1763
+ import importlib
1764
+ try:
1765
+ env_name = 'RUCIO_POLICY_PACKAGE' + ('' if not vo else '_' + vo.upper())
1766
+ if env_name in os.environ:
1767
+ package = os.environ[env_name]
1768
+ else:
1769
+ package = config.config_get('policy', 'package' + ('' if not vo else '-' + vo))
1770
+ check_policy_package_version(package)
1771
+ module = importlib.import_module(package)
1772
+ if hasattr(module, 'get_algorithms'):
1773
+ all_algorithms = module.get_algorithms()
1774
+ if algorithm_type in all_algorithms:
1775
+ algorithms = all_algorithms[algorithm_type]
1776
+ if not vo:
1777
+ dictionary.update(algorithms)
1778
+ else:
1779
+ # check that the names are correctly prefixed
1780
+ for k in algorithms.keys():
1781
+ if k.lower().startswith(vo.lower()):
1782
+ dictionary[k] = algorithms[k]
1783
+ else:
1784
+ raise InvalidAlgorithmName(k, vo)
1785
+ except (NoOptionError, NoSectionError, ImportError):
1786
+ pass
1787
+
1788
+ from rucio.common import config
1789
+ try:
1790
+ multivo = config.config_get_bool('common', 'multi_vo')
1791
+ except (NoOptionError, NoSectionError):
1792
+ multivo = False
1793
+ if not multivo:
1794
+ # single policy package
1795
+ try_importing_policy(algorithm_type, dictionary)
1796
+ else:
1797
+ # determine whether on client or server
1798
+ client = False
1799
+ if 'RUCIO_CLIENT_MODE' not in os.environ:
1800
+ if not config.config_has_section('database') and config.config_has_section('client'):
1801
+ client = True
1802
+ else:
1803
+ if os.environ['RUCIO_CLIENT_MODE']:
1804
+ client = True
1805
+
1806
+ # on client, only register algorithms for selected VO
1807
+ if client:
1808
+ if 'RUCIO_VO' in os.environ:
1809
+ vo = os.environ['RUCIO_VO']
1810
+ else:
1811
+ try:
1812
+ vo = config.config_get('client', 'vo')
1813
+ except (NoOptionError, NoSectionError):
1814
+ vo = 'def'
1815
+ try_importing_policy(algorithm_type, dictionary, vo)
1816
+ # on server, list all VOs and register their algorithms
1817
+ else:
1818
+ from rucio.core.vo import list_vos
1819
+ # policy package per VO
1820
+ vos = list_vos()
1821
+ for vo in vos:
1822
+ try_importing_policy(algorithm_type, dictionary, vo['vo'])
1823
+
1824
+
1825
+ def check_policy_package_version(package):
1826
+ import importlib
1827
+ from rucio.version import version_string
1828
+ '''
1829
+ Checks that the Rucio version supported by the policy package is compatible
1830
+ with this version. Raises an exception if not.
1831
+ :param package: the fully qualified name of the policy package
1832
+ '''
1833
+ try:
1834
+ module = importlib.import_module(package)
1835
+ except ImportError:
1836
+ # package not found. Will be picked up elsewhere
1837
+ return
1838
+ if not hasattr(module, 'SUPPORTED_VERSION'):
1839
+ # package is not versioned
1840
+ return
1841
+ supported_version = module.SUPPORTED_VERSION if isinstance(module.SUPPORTED_VERSION, list) else [module.SUPPORTED_VERSION]
1842
+ components = 2 if version_string().startswith("1.") else 1
1843
+ current_version = ".".join(version_string().split(".")[:components])
1844
+ if current_version not in supported_version:
1845
+ raise PolicyPackageVersionError(package)
1846
+
1847
+
1848
+ class Availability:
1849
+ """
1850
+ This util class acts as a translator between the availability stored as
1851
+ integer and as boolen values.
1852
+
1853
+ `None` represents a missing value. This lets a user update a specific value
1854
+ without altering the other ones. If it needs to be evaluated, it will
1855
+ correspond to `True`.
1856
+ """
1857
+
1858
+ read = None
1859
+ write = None
1860
+ delete = None
1861
+
1862
+ def __init__(self, read=None, write=None, delete=None):
1863
+ self.read = read
1864
+ self.write = write
1865
+ self.delete = delete
1866
+
1867
+ def __iter__(self):
1868
+ """
1869
+ The iterator provides the feature to unpack the values of this class.
1870
+
1871
+ e.g. `read, write, delete = Availability(True, False, True)`
1872
+
1873
+ :returns: An iterator over the values `read`, `write`, `delete`.
1874
+ """
1875
+ return iter((self.read, self.write, self.delete))
1876
+
1877
+ def __repr__(self):
1878
+ return "Availability({}, {}, {})".format(self.read, self.write, self.delete)
1879
+
1880
+ def __eq__(self, other):
1881
+ return self.read == other.read and self.write == other.write and self.delete == other.delete
1882
+
1883
+ def __hash__(self):
1884
+ return hash(self.integer)
1885
+
1886
+ @classmethod
1887
+ def from_integer(cls, n):
1888
+ """
1889
+ Returns a new Availability instance where the values are set to the
1890
+ corresponding bit values in the integer.
1891
+
1892
+ :param n: The integer value to get the availabilities from.
1893
+ :returns: The corresponding Availability instance.
1894
+ """
1895
+ if n is None:
1896
+ return cls(None, None, None)
1897
+
1898
+ return cls(
1899
+ (n >> 2) % 2 == 1,
1900
+ (n >> 1) % 2 == 1,
1901
+ (n >> 0) % 2 == 1
1902
+ )
1903
+
1904
+ @property
1905
+ def integer(self):
1906
+ """
1907
+ Returns the corresponding integer for the instance values. The three
1908
+ least-significant bits correspond to the availability values.
1909
+
1910
+ :returns: An integer corresponding to the availability values. `None`
1911
+ gets treated as `True`.
1912
+ """
1913
+ read_value = (self.read or self.read is None) * 4
1914
+ write_value = (self.write or self.write is None) * 2
1915
+ delete_value = (self.delete or self.delete is None) * 1
1916
+
1917
+ return read_value + write_value + delete_value
1918
+
1919
+
1920
+ def retrying(
1921
+ retry_on_exception: "Callable[[Exception], bool]",
1922
+ wait_fixed: int,
1923
+ stop_max_attempt_number: int
1924
+ ) -> "Callable[[Callable[..., T]], Callable[..., T]]":
1925
+ """
1926
+ Decorator which retries a function multiple times on certain types of exceptions.
1927
+ :param retry_on_exception: Function which takes an exception as argument and returns True if we must retry on this exception
1928
+ :param wait_fixed: the amount of time to wait in-between two tries
1929
+ :param stop_max_attempt_number: maximum number of allowed attempts
1930
+ """
1931
+ def _decorator(fn):
1932
+ @wraps(fn)
1933
+ def _wrapper(*args, **kwargs):
1934
+ attempt = 0
1935
+ while True:
1936
+ attempt += 1
1937
+ try:
1938
+ return fn(*args, **kwargs)
1939
+ except Exception as e:
1940
+ if attempt >= stop_max_attempt_number:
1941
+ raise
1942
+ if not retry_on_exception(e):
1943
+ raise
1944
+ time.sleep(wait_fixed / 1000.0)
1945
+ return _wrapper
1946
+ return _decorator