rucio-clients 37.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio-clients might be problematic. Click here for more details.

Files changed (104) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/cli/__init__.py +14 -0
  4. rucio/cli/account.py +216 -0
  5. rucio/cli/bin_legacy/__init__.py +13 -0
  6. rucio/cli/bin_legacy/rucio.py +2825 -0
  7. rucio/cli/bin_legacy/rucio_admin.py +2500 -0
  8. rucio/cli/command.py +272 -0
  9. rucio/cli/config.py +72 -0
  10. rucio/cli/did.py +191 -0
  11. rucio/cli/download.py +128 -0
  12. rucio/cli/lifetime_exception.py +33 -0
  13. rucio/cli/replica.py +162 -0
  14. rucio/cli/rse.py +293 -0
  15. rucio/cli/rule.py +158 -0
  16. rucio/cli/scope.py +40 -0
  17. rucio/cli/subscription.py +73 -0
  18. rucio/cli/upload.py +60 -0
  19. rucio/cli/utils.py +226 -0
  20. rucio/client/__init__.py +15 -0
  21. rucio/client/accountclient.py +432 -0
  22. rucio/client/accountlimitclient.py +183 -0
  23. rucio/client/baseclient.py +983 -0
  24. rucio/client/client.py +120 -0
  25. rucio/client/configclient.py +126 -0
  26. rucio/client/credentialclient.py +59 -0
  27. rucio/client/didclient.py +868 -0
  28. rucio/client/diracclient.py +56 -0
  29. rucio/client/downloadclient.py +1783 -0
  30. rucio/client/exportclient.py +44 -0
  31. rucio/client/fileclient.py +50 -0
  32. rucio/client/importclient.py +42 -0
  33. rucio/client/lifetimeclient.py +90 -0
  34. rucio/client/lockclient.py +109 -0
  35. rucio/client/metaconventionsclient.py +140 -0
  36. rucio/client/pingclient.py +44 -0
  37. rucio/client/replicaclient.py +452 -0
  38. rucio/client/requestclient.py +125 -0
  39. rucio/client/richclient.py +317 -0
  40. rucio/client/rseclient.py +746 -0
  41. rucio/client/ruleclient.py +294 -0
  42. rucio/client/scopeclient.py +90 -0
  43. rucio/client/subscriptionclient.py +173 -0
  44. rucio/client/touchclient.py +82 -0
  45. rucio/client/uploadclient.py +969 -0
  46. rucio/common/__init__.py +13 -0
  47. rucio/common/bittorrent.py +234 -0
  48. rucio/common/cache.py +111 -0
  49. rucio/common/checksum.py +168 -0
  50. rucio/common/client.py +122 -0
  51. rucio/common/config.py +788 -0
  52. rucio/common/constants.py +217 -0
  53. rucio/common/constraints.py +17 -0
  54. rucio/common/didtype.py +237 -0
  55. rucio/common/exception.py +1208 -0
  56. rucio/common/extra.py +31 -0
  57. rucio/common/logging.py +420 -0
  58. rucio/common/pcache.py +1409 -0
  59. rucio/common/plugins.py +185 -0
  60. rucio/common/policy.py +93 -0
  61. rucio/common/schema/__init__.py +200 -0
  62. rucio/common/schema/generic.py +416 -0
  63. rucio/common/schema/generic_multi_vo.py +395 -0
  64. rucio/common/stomp_utils.py +423 -0
  65. rucio/common/stopwatch.py +55 -0
  66. rucio/common/test_rucio_server.py +154 -0
  67. rucio/common/types.py +483 -0
  68. rucio/common/utils.py +1688 -0
  69. rucio/rse/__init__.py +96 -0
  70. rucio/rse/protocols/__init__.py +13 -0
  71. rucio/rse/protocols/bittorrent.py +194 -0
  72. rucio/rse/protocols/cache.py +111 -0
  73. rucio/rse/protocols/dummy.py +100 -0
  74. rucio/rse/protocols/gfal.py +708 -0
  75. rucio/rse/protocols/globus.py +243 -0
  76. rucio/rse/protocols/http_cache.py +82 -0
  77. rucio/rse/protocols/mock.py +123 -0
  78. rucio/rse/protocols/ngarc.py +209 -0
  79. rucio/rse/protocols/posix.py +250 -0
  80. rucio/rse/protocols/protocol.py +361 -0
  81. rucio/rse/protocols/rclone.py +365 -0
  82. rucio/rse/protocols/rfio.py +145 -0
  83. rucio/rse/protocols/srm.py +338 -0
  84. rucio/rse/protocols/ssh.py +414 -0
  85. rucio/rse/protocols/storm.py +195 -0
  86. rucio/rse/protocols/webdav.py +594 -0
  87. rucio/rse/protocols/xrootd.py +302 -0
  88. rucio/rse/rsemanager.py +881 -0
  89. rucio/rse/translation.py +260 -0
  90. rucio/vcsversion.py +11 -0
  91. rucio/version.py +45 -0
  92. rucio_clients-37.0.0rc1.data/data/etc/rse-accounts.cfg.template +25 -0
  93. rucio_clients-37.0.0rc1.data/data/etc/rucio.cfg.atlas.client.template +43 -0
  94. rucio_clients-37.0.0rc1.data/data/etc/rucio.cfg.template +241 -0
  95. rucio_clients-37.0.0rc1.data/data/requirements.client.txt +19 -0
  96. rucio_clients-37.0.0rc1.data/data/rucio_client/merge_rucio_configs.py +144 -0
  97. rucio_clients-37.0.0rc1.data/scripts/rucio +133 -0
  98. rucio_clients-37.0.0rc1.data/scripts/rucio-admin +97 -0
  99. rucio_clients-37.0.0rc1.dist-info/METADATA +54 -0
  100. rucio_clients-37.0.0rc1.dist-info/RECORD +104 -0
  101. rucio_clients-37.0.0rc1.dist-info/WHEEL +5 -0
  102. rucio_clients-37.0.0rc1.dist-info/licenses/AUTHORS.rst +100 -0
  103. rucio_clients-37.0.0rc1.dist-info/licenses/LICENSE +201 -0
  104. rucio_clients-37.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,969 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import base64
16
+ import copy
17
+ import json
18
+ import logging
19
+ import os
20
+ import os.path
21
+ import random
22
+ import socket
23
+ import time
24
+ from typing import TYPE_CHECKING, Any, Final, Optional, cast
25
+
26
+ from rucio import version
27
+ from rucio.client.client import Client
28
+ from rucio.common.bittorrent import bittorrent_v2_merkle_sha256
29
+ from rucio.common.checksum import GLOBALLY_SUPPORTED_CHECKSUMS, adler32, md5
30
+ from rucio.common.client import detect_client_location
31
+ from rucio.common.config import config_get, config_get_bool, config_get_int
32
+ from rucio.common.constants import RseAttr
33
+ from rucio.common.exception import (
34
+ DataIdentifierAlreadyExists,
35
+ DataIdentifierNotFound,
36
+ FileReplicaAlreadyExists,
37
+ InputValidationError,
38
+ NoFilesUploaded,
39
+ NotAllFilesUploaded,
40
+ ResourceTemporaryUnavailable,
41
+ RSEChecksumUnavailable,
42
+ RSEOperationNotSupported,
43
+ RSEWriteBlocked,
44
+ RucioException,
45
+ ScopeNotFound,
46
+ ServiceUnavailable,
47
+ )
48
+ from rucio.common.utils import execute, generate_uuid, make_valid_did, retry, send_trace
49
+ from rucio.rse import rsemanager as rsemgr
50
+
51
+ if TYPE_CHECKING:
52
+ from collections.abc import Iterable, Mapping
53
+
54
+ from rucio.common.types import AttachDict, DatasetDict, DIDStringDict, FileToUploadDict, FileToUploadWithCollectedAndDatasetInfoDict, FileToUploadWithCollectedInfoDict, LFNDict, LoggerFunction, PathTypeAlias, RSESettingsDict, TraceBaseDict, TraceDict
55
+ from rucio.rse.protocols.protocol import RSEProtocol
56
+
57
+
58
+ class UploadClient:
59
+
60
+ def __init__(
61
+ self,
62
+ _client: Optional[Client] = None,
63
+ logger: Optional["LoggerFunction"] = None,
64
+ tracing: bool = True
65
+ ):
66
+ """
67
+ Initialises the basic settings for an UploadClient object
68
+
69
+ :param _client: - Optional: rucio.client.client.Client object. If None, a new object will be created.
70
+ :param logger: - Optional: logging.Logger object. If None, default logger will be used.
71
+ """
72
+ if not logger:
73
+ self.logger = logging.log
74
+ else:
75
+ self.logger = logger.log
76
+
77
+ self.client: Final[Client] = _client if _client else Client()
78
+ self.client_location = detect_client_location()
79
+ # if token should be used, use only JWT tokens
80
+ self.auth_token: Optional[str] = self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
81
+ self.tracing = tracing
82
+ if not self.tracing:
83
+ logger(logging.DEBUG, 'Tracing is turned off.')
84
+ if self.client.account is None:
85
+ self.logger(logging.DEBUG, 'No account specified, querying rucio.')
86
+ try:
87
+ acc = self.client.whoami()
88
+ if acc is None:
89
+ raise InputValidationError('account not specified and rucio has no account with your identity')
90
+ self.client.account = acc['account']
91
+ except RucioException as e:
92
+ raise InputValidationError('account not specified and problem with rucio: %s' % e)
93
+ self.logger(logging.DEBUG, 'Discovered account as "%s"' % self.client.account)
94
+ self.default_file_scope: Final[str] = 'user.' + self.client.account
95
+ self.rses = {}
96
+ self.rse_expressions = {}
97
+
98
+ self.trace: "TraceBaseDict" = {
99
+ 'hostname': socket.getfqdn(),
100
+ 'account': self.client.account,
101
+ 'eventType': 'upload',
102
+ 'eventVersion': version.RUCIO_VERSION[0],
103
+ 'vo': self.client.vo if self.client.vo != 'def' else None
104
+ }
105
+
106
+ def upload(
107
+ self,
108
+ items: "Iterable[FileToUploadDict]",
109
+ summary_file_path: Optional[str] = None,
110
+ traces_copy_out: Optional[list["TraceBaseDict"]] = None,
111
+ ignore_availability: bool = False,
112
+ activity: Optional[str] = None
113
+ ) -> int:
114
+ """
115
+ :param items: List of dictionaries. Each dictionary describing a file to upload. Keys:
116
+ path - path of the file that will be uploaded
117
+ rse - rse expression/name (e.g. 'CERN-PROD_DATADISK') where to upload the file
118
+ did_scope - Optional: custom did scope (Default: user.<account>)
119
+ did_name - Optional: custom did name (Default: name of the file)
120
+ dataset_scope - Optional: custom dataset scope
121
+ dataset_name - Optional: custom dataset name
122
+ dataset_meta - Optional: custom metadata for dataset
123
+ impl - Optional: name of the protocol implementation to be used to upload this item.
124
+ force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None)
125
+ pfn - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory)
126
+ no_register - Optional: if True, the file will not be registered in the rucio catalogue
127
+ register_after_upload - Optional: if True, the file will be registered after successful upload
128
+ lifetime - Optional: the lifetime of the file after it was uploaded
129
+ transfer_timeout - Optional: time after the upload will be aborted
130
+ guid - Optional: guid of the file
131
+ recursive - Optional: if set, parses the folder structure recursively into collections
132
+ :param summary_file_path: Optional: a path where a summary in form of a json file will be stored
133
+ :param traces_copy_out: reference to an external list, where the traces should be uploaded
134
+ :param ignore_availability: ignore the availability of a RSE
135
+ :param activity: the activity set to the rule if no dataset is specified
136
+
137
+ :returns: 0 on success
138
+
139
+ :raises InputValidationError: if any input arguments are in a wrong format
140
+ :raises RSEWriteBlocked: if a given RSE is not available for writing
141
+ :raises NoFilesUploaded: if no files were successfully uploaded
142
+ :raises NotAllFilesUploaded: if not all files were successfully uploaded
143
+ """
144
+ # helper to get rse from rse_expression:
145
+ def _pick_random_rse(rse_expression: str) -> dict[str, Any]:
146
+ rses = [r['rse'] for r in self.client.list_rses(rse_expression)] # can raise InvalidRSEExpression
147
+ random.shuffle(rses)
148
+ return rses[0]
149
+
150
+ logger = self.logger
151
+ self.trace['uuid'] = generate_uuid()
152
+
153
+ # check given sources, resolve dirs into files, and collect meta infos
154
+ files = self._collect_and_validate_file_info(items)
155
+ logger(logging.DEBUG, 'Num. of files that upload client is processing: {}'.format(len(files)))
156
+
157
+ # check if RSE of every file is available for writing
158
+ # and cache rse settings
159
+ registered_dataset_dids = set()
160
+ registered_file_dids = set()
161
+ rse_expression = None
162
+ for file in files:
163
+ rse_expression = file['rse']
164
+ rse = self.rse_expressions.setdefault(rse_expression, _pick_random_rse(rse_expression))
165
+
166
+ if not self.rses.get(rse):
167
+ rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse, vo=self.client.vo))
168
+ if not ignore_availability and rse_settings['availability_write'] != 1:
169
+ raise RSEWriteBlocked('%s is not available for writing. No actions have been taken' % rse)
170
+
171
+ dataset_scope = file.get('dataset_scope')
172
+ dataset_name = file.get('dataset_name')
173
+ file['rse'] = rse
174
+ if dataset_scope and dataset_name:
175
+ dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name))
176
+ file['dataset_did_str'] = dataset_did_str
177
+ registered_dataset_dids.add(dataset_did_str)
178
+
179
+ registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name']))
180
+ wrong_dids = registered_file_dids.intersection(registered_dataset_dids)
181
+ if len(wrong_dids):
182
+ raise InputValidationError('DIDs used to address both files and datasets: %s' % str(wrong_dids))
183
+ logger(logging.DEBUG, 'Input validation done.')
184
+
185
+ # clear this set again to ensure that we only try to register datasets once
186
+ registered_dataset_dids = set()
187
+ num_succeeded = 0
188
+ summary = []
189
+ for file in files:
190
+ basename = file['basename']
191
+ logger(logging.INFO, 'Preparing upload for file %s' % basename)
192
+
193
+ no_register = file.get('no_register')
194
+ register_after_upload = file.get('register_after_upload') and not no_register
195
+ pfn = file.get('pfn')
196
+ force_scheme = file.get('force_scheme')
197
+ impl = file.get('impl')
198
+ delete_existing = False
199
+
200
+ trace = copy.deepcopy(self.trace)
201
+ # appending trace to list reference, if the reference exists
202
+ if traces_copy_out is not None:
203
+ traces_copy_out.append(trace)
204
+
205
+ rse = file['rse']
206
+ trace['scope'] = file['did_scope']
207
+ trace['datasetScope'] = file.get('dataset_scope', '')
208
+ trace['dataset'] = file.get('dataset_name', '')
209
+ trace['remoteSite'] = rse
210
+ trace['filesize'] = file['bytes']
211
+
212
+ file_did = {'scope': file['did_scope'], 'name': file['did_name']}
213
+ dataset_did_str = file.get('dataset_did_str')
214
+ rse_settings = self.rses[rse]
215
+ rse_sign_service = rse_settings.get('sign_url', None)
216
+ is_deterministic = rse_settings.get('deterministic', True)
217
+ if not is_deterministic and not pfn:
218
+ logger(logging.ERROR, 'PFN has to be defined for NON-DETERMINISTIC RSE.')
219
+ continue
220
+ if pfn and is_deterministic:
221
+ logger(logging.WARNING, 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
222
+ no_register = True
223
+
224
+ # resolving local area networks
225
+ domain = 'wan'
226
+ rse_attributes = {}
227
+ try:
228
+ rse_attributes = self.client.list_rse_attributes(rse)
229
+ except:
230
+ logger(logging.WARNING, 'Attributes of the RSE: %s not available.' % rse)
231
+ if (self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes):
232
+ if self.client_location['site'] == rse_attributes[RseAttr.SITE]:
233
+ domain = 'lan'
234
+ logger(logging.DEBUG, '{} domain is used for the upload'.format(domain))
235
+
236
+ # FIXME:
237
+ # Rewrite preferred_impl selection - also check test_upload.py/test_download.py and fix impl order (see FIXME there)
238
+ #
239
+ # if not impl and not force_scheme:
240
+ # impl = self.preferred_impl(rse_settings, domain)
241
+
242
+ if not no_register and not register_after_upload:
243
+ self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
244
+
245
+ # if register_after_upload, file should be overwritten if it is not registered
246
+ # otherwise if file already exists on RSE we're done
247
+ if register_after_upload:
248
+ if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
249
+ try:
250
+ self.client.get_did(file['did_scope'], file['did_name'])
251
+ logger(logging.INFO, 'File already registered. Skipping upload.')
252
+ trace['stateReason'] = 'File already exists'
253
+ continue
254
+ except DataIdentifierNotFound:
255
+ logger(logging.INFO, 'File already exists on RSE. Previous left overs will be overwritten.')
256
+ delete_existing = True
257
+ elif not is_deterministic and not no_register:
258
+ if rsemgr.exists(rse_settings, pfn, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
259
+ logger(logging.INFO, 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
260
+ trace['stateReason'] = 'File already exists'
261
+ continue
262
+ elif rsemgr.exists(rse_settings, file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger):
263
+ logger(logging.INFO, 'File already exists on RSE with different pfn. Skipping upload.')
264
+ trace['stateReason'] = 'File already exists'
265
+ continue
266
+ else:
267
+ if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
268
+ logger(logging.INFO, 'File already exists on RSE. Skipping upload')
269
+ trace['stateReason'] = 'File already exists'
270
+ continue
271
+
272
+ # protocol handling and upload
273
+ protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme, domain=domain, impl=impl)
274
+ protocols.reverse()
275
+ success = False
276
+ state_reason = ''
277
+ logger(logging.DEBUG, str(protocols))
278
+ while not success and len(protocols):
279
+ protocol = protocols.pop()
280
+ cur_scheme = protocol['scheme']
281
+ logger(logging.INFO, 'Trying upload with %s to %s' % (cur_scheme, rse))
282
+ lfn: "LFNDict" = {
283
+ 'name': file['did_name'],
284
+ 'scope': file['did_scope']
285
+ }
286
+ lfn['filename'] = basename
287
+
288
+ for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
289
+ if checksum_name in file:
290
+ lfn[checksum_name] = file[checksum_name]
291
+
292
+ lfn['filesize'] = file['bytes']
293
+
294
+ sign_service = None
295
+ if cur_scheme == 'https':
296
+ sign_service = rse_sign_service
297
+
298
+ trace['protocol'] = cur_scheme
299
+ trace['transferStart'] = time.time()
300
+ logger(logging.DEBUG, 'Processing upload with the domain: {}'.format(domain))
301
+ try:
302
+ pfn = self._upload_item(rse_settings=rse_settings,
303
+ rse_attributes=rse_attributes,
304
+ lfn=lfn,
305
+ source_dir=file['dirname'],
306
+ domain=domain,
307
+ impl=impl,
308
+ force_scheme=cur_scheme,
309
+ force_pfn=pfn,
310
+ transfer_timeout=file.get('transfer_timeout'),
311
+ delete_existing=delete_existing,
312
+ sign_service=sign_service)
313
+ logger(logging.DEBUG, 'Upload done.')
314
+ success = True
315
+ file['upload_result'] = {0: True, 1: None, 'success': True, 'pfn': pfn} # TODO: needs to be removed
316
+ except (ServiceUnavailable, ResourceTemporaryUnavailable, RSEOperationNotSupported, RucioException) as error:
317
+ logger(logging.WARNING, 'Upload attempt failed')
318
+ logger(logging.INFO, 'Exception: %s' % str(error), exc_info=True)
319
+ state_reason = str(error)
320
+
321
+ if success:
322
+ trace['transferEnd'] = time.time()
323
+ trace['clientState'] = 'DONE'
324
+ file['state'] = 'A'
325
+ logger(logging.INFO, 'Successfully uploaded file %s' % basename)
326
+ self._send_trace(cast("TraceDict", trace))
327
+
328
+ if summary_file_path:
329
+ summary.append(copy.deepcopy(file))
330
+
331
+ registration_succeeded = True
332
+ if not no_register:
333
+ if register_after_upload:
334
+ self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
335
+ else:
336
+ replica_for_api = self._convert_file_for_api(file)
337
+ try:
338
+ self.client.update_replicas_states(rse, files=[replica_for_api])
339
+ except Exception as error:
340
+ registration_succeeded = False
341
+ logger(logging.ERROR, 'Failed to update replica state for file {}'.format(basename))
342
+ logger(logging.DEBUG, 'Details: {}'.format(str(error)))
343
+
344
+ # add file to dataset if needed
345
+ if dataset_did_str and not no_register:
346
+ try:
347
+ self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) # type: ignore (`dataset_scope` and `dataset_name` always exist if `dataset_did_str`)
348
+ except Exception as error:
349
+ registration_succeeded = False
350
+ logger(logging.ERROR, 'Failed to attach file to the dataset')
351
+ logger(logging.DEBUG, 'Attaching to dataset {}'.format(str(error)))
352
+
353
+ # only report success if the registration operations succeeded as well
354
+ if registration_succeeded:
355
+ num_succeeded += 1
356
+ else:
357
+ trace['clientState'] = 'FAILED'
358
+ trace['stateReason'] = state_reason
359
+ self._send_trace(cast('TraceDict', trace))
360
+ logger(logging.ERROR, 'Failed to upload file %s' % basename)
361
+
362
+ if summary_file_path:
363
+ logger(logging.DEBUG, 'Summary will be available at {}'.format(summary_file_path))
364
+ final_summary = {}
365
+ for file in summary:
366
+ file_scope = file['did_scope']
367
+ file_name = file['did_name']
368
+ file_did_str = '%s:%s' % (file_scope, file_name)
369
+ final_summary[file_did_str] = {'scope': file_scope,
370
+ 'name': file_name,
371
+ 'bytes': file['bytes'],
372
+ 'rse': file['rse'],
373
+ 'pfn': file['upload_result'].get('pfn', ''),
374
+ 'guid': file['meta']['guid']}
375
+
376
+ for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
377
+ if checksum_name in file:
378
+ final_summary[file_did_str][checksum_name] = file[checksum_name]
379
+
380
+ with open(summary_file_path, 'w') as summary_file:
381
+ json.dump(final_summary, summary_file, sort_keys=True, indent=1)
382
+
383
+ if num_succeeded == 0:
384
+ raise NoFilesUploaded()
385
+ elif num_succeeded != len(files):
386
+ raise NotAllFilesUploaded()
387
+ return 0
388
+
389
+ def _add_bittorrent_meta(self, file: "Mapping[str, Any]") -> None:
390
+ pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(os.path.join(file['dirname'], file['basename']))
391
+ bittorrent_meta = {
392
+ 'bittorrent_pieces_root': base64.b64encode(pieces_root).decode(),
393
+ 'bittorrent_pieces_layers': base64.b64encode(pieces_layers).decode(),
394
+ 'bittorrent_piece_length': piece_length,
395
+ }
396
+ self.client.set_metadata_bulk(scope=file['did_scope'], name=file['did_name'], meta=bittorrent_meta)
397
+ self.logger(logging.INFO, f"Added bittorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
398
+
399
+ def _register_file(
400
+ self,
401
+ file: "Mapping[str, Any]",
402
+ registered_dataset_dids: set[str],
403
+ ignore_availability: bool = False,
404
+ activity: Optional[str] = None
405
+ ) -> None:
406
+ """
407
+ Registers the given file in Rucio. Creates a dataset if
408
+ needed. Registers the file DID and creates the replication
409
+ rule if needed. Adds a replica to the file did.
410
+ (This function is meant to be used as class internal only)
411
+
412
+ :param file: dictionary describing the file
413
+ :param registered_dataset_dids: set of dataset dids that were already registered
414
+ :param ignore_availability: ignore the availability of a RSE
415
+ :param activity: the activity set to the rule if no dataset is specified
416
+
417
+ :raises DataIdentifierAlreadyExists: if file DID is already registered and the checksums do not match
418
+ """
419
+ logger = self.logger
420
+ logger(logging.DEBUG, 'Registering file')
421
+
422
+ # verification whether the scope exists
423
+ account_scopes = []
424
+ try:
425
+ account_scopes = self.client.list_scopes_for_account(self.client.account)
426
+ except ScopeNotFound:
427
+ pass
428
+ if account_scopes and file['did_scope'] not in account_scopes:
429
+ logger(logging.WARNING, 'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
430
+
431
+ rse = file['rse']
432
+ dataset_did_str = file.get('dataset_did_str')
433
+ # register a dataset if we need to
434
+ if dataset_did_str and dataset_did_str not in registered_dataset_dids:
435
+ registered_dataset_dids.add(dataset_did_str)
436
+ try:
437
+ logger(logging.DEBUG, 'Trying to create dataset: %s' % dataset_did_str)
438
+ self.client.add_dataset(scope=file['dataset_scope'],
439
+ name=file['dataset_name'],
440
+ meta=file.get('dataset_meta'),
441
+ rules=[{'account': self.client.account,
442
+ 'copies': 1,
443
+ 'rse_expression': rse,
444
+ 'grouping': 'DATASET',
445
+ 'lifetime': file.get('lifetime')}])
446
+ logger(logging.INFO, 'Successfully created dataset %s' % dataset_did_str)
447
+ except DataIdentifierAlreadyExists:
448
+ logger(logging.INFO, 'Dataset %s already exists - no rule will be created' % dataset_did_str)
449
+ if file.get('lifetime') is not None:
450
+ raise InputValidationError('Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str,
451
+ file.get('lifetime')))
452
+ else:
453
+ logger(logging.DEBUG, 'Skipping dataset registration')
454
+
455
+ file_scope = file['did_scope']
456
+ file_name = file['did_name']
457
+ file_did = {'scope': file_scope, 'name': file_name}
458
+ replica_for_api = self._convert_file_for_api(file)
459
+ try:
460
+ # if the remote checksum is different this did must not be used
461
+ meta = self.client.get_metadata(file_scope, file_name)
462
+ logger(logging.INFO, 'File DID already exists')
463
+ logger(logging.DEBUG, 'local checksum: %s, remote checksum: %s' % (file['adler32'], meta['adler32']))
464
+
465
+ if str(meta['adler32']).lstrip('0') != str(file['adler32']).lstrip('0'):
466
+ logger(logging.ERROR, 'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
467
+ raise DataIdentifierAlreadyExists
468
+
469
+ # add file to rse if it is not registered yet
470
+ replicastate = list(self.client.list_replicas([file_did], all_states=True))
471
+ if rse not in replicastate[0]['rses']:
472
+ self.client.add_replicas(rse=rse, files=[replica_for_api])
473
+ logger(logging.INFO, 'Successfully added replica in Rucio catalogue at %s' % rse)
474
+ except DataIdentifierNotFound:
475
+ logger(logging.DEBUG, 'File DID does not exist')
476
+ self.client.add_replicas(rse=rse, files=[replica_for_api])
477
+ if config_get_bool('client', 'register_bittorrent_meta', default=False):
478
+ self._add_bittorrent_meta(file=file)
479
+ logger(logging.INFO, 'Successfully added replica in Rucio catalogue at %s' % rse)
480
+ if not dataset_did_str:
481
+ # only need to add rules for files if no dataset is given
482
+ self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file.get('lifetime'), ignore_availability=ignore_availability, activity=activity)
483
+ logger(logging.INFO, 'Successfully added replication rule at %s' % rse)
484
+
485
+ def _get_file_guid(self, file: "Mapping[str, Any]") -> str:
486
+ """
487
+ Get the guid of a file, trying different strategies
488
+ (This function is meant to be used as class internal only)
489
+
490
+ :param file: dictionary describing the file
491
+
492
+ :returns: the guid
493
+ """
494
+ guid = file.get('guid')
495
+ if not guid and 'pool.root' in file['basename'].lower() and not file.get('no_register'):
496
+ status, output, err = execute('pool_extractFileIdentifier %s' % file['path'])
497
+ if status != 0:
498
+ msg = 'Trying to upload ROOT files but pool_extractFileIdentifier tool can not be found.\n'
499
+ msg += 'Setup your ATHENA environment and try again.'
500
+ raise RucioException(msg)
501
+ try:
502
+ guid = output.splitlines()[-1].split()[0].replace('-', '').lower()
503
+ except Exception:
504
+ raise RucioException('Error extracting GUID from output of pool_extractFileIdentifier')
505
+ elif guid:
506
+ guid = guid.replace('-', '')
507
+ else:
508
+ guid = generate_uuid()
509
+ return guid
510
+
511
+ def _collect_file_info(
512
+ self,
513
+ filepath: "PathTypeAlias",
514
+ item: "FileToUploadDict"
515
+ ) -> "FileToUploadWithCollectedInfoDict":
516
+ """
517
+ Collects infos (e.g. size, checksums, etc.) about the file and
518
+ returns them as a dictionary
519
+ (This function is meant to be used as class internal only)
520
+
521
+ :param filepath: path where the file is stored
522
+ :param item: input options for the given file
523
+
524
+ :returns: a dictionary containing all collected info and the input options
525
+ """
526
+ new_item = copy.deepcopy(item)
527
+ new_item = cast("FileToUploadWithCollectedInfoDict", new_item)
528
+ new_item['path'] = filepath
529
+ new_item['dirname'] = os.path.dirname(filepath)
530
+ new_item['basename'] = os.path.basename(filepath)
531
+
532
+ new_item['bytes'] = os.stat(filepath).st_size
533
+ new_item['adler32'] = adler32(filepath)
534
+ new_item['md5'] = md5(filepath)
535
+ new_item['meta'] = {'guid': self._get_file_guid(new_item)}
536
+ new_item['state'] = 'C'
537
+ if not new_item.get('did_scope'):
538
+ new_item['did_scope'] = self.default_file_scope
539
+ if not new_item.get('did_name'):
540
+ new_item['did_name'] = new_item['basename']
541
+
542
+ return new_item
543
+
544
+ def _collect_and_validate_file_info(self, items: "Iterable[FileToUploadDict]") -> list["FileToUploadWithCollectedInfoDict"]:
545
+ """
546
+ Checks if there are any inconsistencies within the given input
547
+ options and stores the output of _collect_file_info for every file
548
+ (This function is meant to be used as class internal only)
549
+
550
+ :param filepath: list of dictionaries with all input files and options
551
+
552
+ :returns: a list of dictionaries containing all descriptions of the files to upload
553
+
554
+ :raises InputValidationError: if an input option has a wrong format
555
+ """
556
+ logger = self.logger
557
+ files: list["FileToUploadWithCollectedInfoDict"] = []
558
+ for item in items:
559
+ path = item.get('path')
560
+ pfn = item.get('pfn')
561
+ recursive = item.get('recursive')
562
+ if not path:
563
+ logger(logging.WARNING, 'Skipping source entry because the key "path" is missing')
564
+ continue
565
+ if not item.get('rse'):
566
+ logger(logging.WARNING, 'Skipping file %s because no rse was given' % path)
567
+ continue
568
+ if pfn:
569
+ item['force_scheme'] = pfn.split(':')[0]
570
+ impl = item.get('impl')
571
+ if impl:
572
+ impl_split = impl.split('.')
573
+ if len(impl_split) == 1:
574
+ impl = 'rucio.rse.protocols.' + impl + '.Default'
575
+ else:
576
+ impl = 'rucio.rse.protocols.' + impl
577
+ item['impl'] = impl
578
+ if os.path.isdir(path) and not recursive:
579
+ dname, subdirs, fnames = next(os.walk(path))
580
+ for fname in fnames:
581
+ file = self._collect_file_info(os.path.join(dname, fname), item)
582
+ files.append(file)
583
+ if not len(fnames) and not len(subdirs):
584
+ logger(logging.WARNING, 'Skipping %s because it is empty.' % dname)
585
+ elif not len(fnames):
586
+ logger(logging.WARNING, 'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
587
+ elif os.path.isdir(path) and recursive:
588
+ files.extend(cast("list[FileToUploadWithCollectedInfoDict]", self._recursive(item)))
589
+ elif os.path.isfile(path) and not recursive:
590
+ file = self._collect_file_info(path, item)
591
+ files.append(file)
592
+ elif os.path.isfile(path) and recursive:
593
+ logger(logging.WARNING, 'Skipping %s because of --recursive flag' % path)
594
+ else:
595
+ logger(logging.WARNING, 'No such file or directory: %s' % path)
596
+
597
+ if not len(files):
598
+ raise InputValidationError('No valid input files given')
599
+
600
+ return files
601
+
602
+ def _convert_file_for_api(self, file: "Mapping[str, Any]") -> dict[str, Any]:
603
+ """
604
+ Creates a new dictionary that contains only the values
605
+ that are needed for the upload with the correct keys
606
+ (This function is meant to be used as class internal only)
607
+
608
+ :param file: dictionary describing a file to upload
609
+
610
+ :returns: dictionary containing not more then the needed values for the upload
611
+ """
612
+ replica = {}
613
+ replica['scope'] = file['did_scope']
614
+ replica['name'] = file['did_name']
615
+ replica['bytes'] = file['bytes']
616
+ replica['adler32'] = file['adler32']
617
+ replica['md5'] = file['md5']
618
+ replica['meta'] = file['meta']
619
+ replica['state'] = file['state']
620
+ pfn = file.get('pfn')
621
+ if pfn:
622
+ replica['pfn'] = pfn
623
+ return replica
624
+
625
+ def _upload_item(
626
+ self,
627
+ rse_settings: "RSESettingsDict",
628
+ rse_attributes: dict[str, Any],
629
+ lfn: "LFNDict",
630
+ source_dir: Optional[str] = None,
631
+ domain: str = 'wan',
632
+ impl: Optional[str] = None,
633
+ force_pfn: Optional[str] = None,
634
+ force_scheme: Optional[str] = None,
635
+ transfer_timeout: Optional[int] = None,
636
+ delete_existing: bool = False,
637
+ sign_service: Optional[str] = None
638
+ ) -> Optional[str]:
639
+ """
640
+ Uploads a file to the connected storage.
641
+
642
+ :param rse_settings: dictionary containing the RSE settings
643
+ :param rse_attributes: dictionary containing the RSE attribute key value pairs
644
+ :param lfn: a single dict containing 'scope' and 'name'.
645
+ Example:
646
+ {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}
647
+ If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
648
+ :param source_dir: path to the local directory including the source files
649
+ :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
650
+ :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
651
+ :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it
652
+ :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL
653
+
654
+ :raises RucioException(msg): general exception with msg for more details.
655
+ """
656
+ logger = self.logger
657
+
658
+ # Construct protocol for write operation.
659
+ # IMPORTANT: All upload stat() checks are always done with the write_protocol EXCEPT for cloud resources (signed URL for write cannot be used for read)
660
+ protocol_write = self._create_protocol(rse_settings, 'write', force_scheme=force_scheme, domain=domain, impl=impl)
661
+
662
+ base_name = lfn.get('filename', lfn['name'])
663
+ name = lfn.get('name', base_name)
664
+ scope = lfn['scope']
665
+
666
+ # Conditional lfn properties
667
+ if 'adler32' not in lfn and 'md5' not in lfn:
668
+ logger(logging.WARNING, 'Missing checksum for file %s:%s' % (lfn['scope'], name))
669
+
670
+ # Getting pfn
671
+ pfn = None
672
+ signed_read_pfn = None
673
+ try:
674
+ pfn = list(protocol_write.lfns2pfns(make_valid_did(lfn)).values())[0]
675
+ logger(logging.DEBUG, 'The PFN created from the LFN: {}'.format(pfn))
676
+ except Exception as error:
677
+ logger(logging.WARNING, 'Failed to create PFN for LFN: %s' % lfn)
678
+ logger(logging.DEBUG, str(error), exc_info=True)
679
+ if force_pfn:
680
+ pfn = force_pfn
681
+ logger(logging.DEBUG, 'The given PFN is used: {}'.format(pfn))
682
+
683
+ # Auth. mostly for object stores
684
+ if sign_service:
685
+ protocol_read = self._create_protocol(rse_settings, 'read', domain=domain, impl=impl)
686
+ if pfn is not None:
687
+ signed_read_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'read', pfn)
688
+ pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'write', pfn)
689
+
690
+ # Create a name of tmp file if renaming operation is supported
691
+ pfn_tmp = cast("str", '%s.rucio.upload' % pfn if protocol_write.renaming else pfn)
692
+ signed_read_pfn_tmp = '%s.rucio.upload' % signed_read_pfn if protocol_write.renaming else signed_read_pfn
693
+
694
+ # Either DID exists or not register_after_upload
695
+ if protocol_write.overwrite is False and delete_existing is False:
696
+ if sign_service:
697
+ # Construct protocol for read ONLY for cloud resources and get signed URL for GET
698
+ if protocol_read.exists(signed_read_pfn):
699
+ raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
700
+ elif protocol_write.exists(pfn):
701
+ raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
702
+
703
+ # Removing tmp from earlier attempts
704
+ if (not sign_service and protocol_write.exists(pfn_tmp)) or (sign_service and protocol_read.exists(signed_read_pfn_tmp)):
705
+ logger(logging.DEBUG, 'Removing remains of previous upload attempts.')
706
+ try:
707
+ # Construct protocol for delete operation.
708
+ protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
709
+ delete_pfn = '%s.rucio.upload' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
710
+ if sign_service:
711
+ delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
712
+ protocol_delete.delete(delete_pfn)
713
+ protocol_delete.close()
714
+ except Exception as error:
715
+ raise RSEOperationNotSupported('Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
716
+
717
+ # Removing not registered files from earlier attempts
718
+ if delete_existing:
719
+ logger(logging.DEBUG, 'Removing not-registered remains of previous upload attempts.')
720
+ try:
721
+ # Construct protocol for delete operation.
722
+ protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
723
+ delete_pfn = '%s' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
724
+ if sign_service:
725
+ delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
726
+ protocol_delete.delete(delete_pfn)
727
+ protocol_delete.close()
728
+ except Exception as error:
729
+ raise RSEOperationNotSupported('Unable to remove file %s: %s' % (pfn, str(error)))
730
+
731
+ # Process the upload of the tmp file
732
+ try:
733
+ retry(protocol_write.put, base_name, pfn_tmp, source_dir, transfer_timeout=transfer_timeout)(mtries=2, logger=logger)
734
+ logger(logging.INFO, 'Successful upload of temporary file. {}'.format(pfn_tmp))
735
+ except Exception as error:
736
+ raise RSEOperationNotSupported(str(error))
737
+
738
+ # Is stat after that upload allowed?
739
+ skip_upload_stat = rse_attributes.get(RseAttr.SKIP_UPLOAD_STAT, False)
740
+ self.logger(logging.DEBUG, 'skip_upload_stat=%s', skip_upload_stat)
741
+
742
+ # Checksum verification, obsolete, see Gabriele changes.
743
+ if not skip_upload_stat:
744
+ try:
745
+ stats = self._retry_protocol_stat(protocol_write, pfn_tmp)
746
+ if not isinstance(stats, dict):
747
+ raise RucioException('Could not get protocol.stats for given PFN: %s' % pfn)
748
+
749
+ # The checksum and filesize check
750
+ if ('filesize' in stats) and ('filesize' in lfn):
751
+ self.logger(logging.DEBUG, 'Filesize: Expected=%s Found=%s' % (lfn['filesize'], stats['filesize']))
752
+ if int(stats['filesize']) != int(lfn['filesize']):
753
+ raise RucioException('Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
754
+ if rse_settings['verify_checksum'] is not False:
755
+ if ('adler32' in stats) and ('adler32' in lfn):
756
+ self.logger(logging.DEBUG, 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
757
+ if str(stats['adler32']).lstrip('0') != str(lfn['adler32']).lstrip('0'):
758
+ raise RucioException('Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
759
+
760
+ except Exception as error:
761
+ raise error
762
+
763
+ # The upload finished successful and the file can be renamed
764
+ try:
765
+ if protocol_write.renaming:
766
+ logger(logging.DEBUG, 'Renaming file %s to %s' % (pfn_tmp, pfn))
767
+ protocol_write.rename(pfn_tmp, pfn) # type: ignore (pfn might be None)
768
+ except Exception:
769
+ raise RucioException('Unable to rename the tmp file %s.' % pfn_tmp)
770
+
771
+ protocol_write.close()
772
+
773
+ return pfn
774
+
775
+ def _retry_protocol_stat(
776
+ self,
777
+ protocol: "RSEProtocol",
778
+ pfn: str
779
+ ) -> dict[str, Any]:
780
+ """
781
+ Try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail
782
+ :param protocol: The protocol to use to reach this file
783
+ :param pfn: Physical file name of the target for the protocol stat
784
+ """
785
+ retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
786
+ for attempt in range(retries):
787
+ try:
788
+ self.logger(logging.DEBUG, 'stat: pfn=%s' % pfn)
789
+ stats = protocol.stat(pfn)
790
+
791
+ if int(stats['filesize']) == 0:
792
+ raise Exception('Filesize came back as 0. Potential storage race condition, need to retry.')
793
+
794
+ return stats
795
+ except RSEChecksumUnavailable as error:
796
+ # The stat succeeded here, but the checksum failed
797
+ raise error
798
+ except Exception as error:
799
+ self.logger(logging.DEBUG, 'stat: unexpected error=%s' % error)
800
+ fail_str = ['The requested service is not available at the moment', 'Permission refused']
801
+ if any(x in str(error) for x in fail_str):
802
+ raise error
803
+ self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2**attempt)
804
+ time.sleep(2**attempt)
805
+ return protocol.stat(pfn)
806
+
807
+ def _create_protocol(
808
+ self,
809
+ rse_settings: "RSESettingsDict",
810
+ operation: str,
811
+ impl: Optional[str] = None,
812
+ force_scheme: Optional[str] = None,
813
+ domain: str = 'wan'
814
+ ) -> "RSEProtocol":
815
+ """
816
+ Protocol construction.
817
+ :param rse_settings: rse_settings
818
+ :param operation: activity, e.g. read, write, delete etc.
819
+ :param force_scheme: custom scheme
820
+ :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication
821
+ """
822
+ try:
823
+ protocol = rsemgr.create_protocol(rse_settings, operation, scheme=force_scheme, domain=domain, impl=impl, auth_token=self.auth_token, logger=self.logger)
824
+ protocol.connect()
825
+ except Exception as error:
826
+ self.logger(logging.WARNING, 'Failed to create protocol for operation: %s' % operation)
827
+ self.logger(logging.DEBUG, 'scheme: %s, exception: %s' % (force_scheme, error))
828
+ raise error
829
+ return protocol
830
+
831
+ def _send_trace(self, trace: "TraceDict") -> None:
832
+ """
833
+ Checks if sending trace is allowed and send the trace.
834
+
835
+ :param trace: the trace
836
+ """
837
+ if self.tracing:
838
+ send_trace(trace, self.client.trace_host, self.client.user_agent)
839
+
840
+ def _recursive(self, item: "FileToUploadDict") -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
841
+ """
842
+ If the --recursive flag is set, it replicates the folder structure recursively into collections
843
+ A folder only can have either other folders inside or files, but not both of them
844
+ - If it has folders, the root folder will be a container
845
+ - If it has files, the root folder will be a dataset
846
+ - If it is empty, it does not create anything
847
+
848
+ :param item: dictionary containing all descriptions of the files to upload
849
+ """
850
+ files: list["FileToUploadWithCollectedAndDatasetInfoDict"] = []
851
+ datasets: list["DatasetDict"] = []
852
+ containers: list["DIDStringDict"] = []
853
+ attach: "Iterable[AttachDict]" = []
854
+ scope = item.get('did_scope')
855
+ if scope is None:
856
+ scope = self.default_file_scope
857
+ rse = item.get('rse')
858
+ path = item.get('path')
859
+ if path and isinstance(path, str):
860
+ if path[-1] == '/':
861
+ path = path[0:-1]
862
+ i = 0
863
+ path = os.path.abspath(path)
864
+ for root, dirs, fnames in os.walk(path):
865
+ if len(dirs) > 0 and len(fnames) > 0 and i == 0:
866
+ self.logger(logging.ERROR, 'A container can only have either collections or files, not both')
867
+ raise InputValidationError('Invalid input folder structure')
868
+ if len(fnames) > 0:
869
+ datasets.append({'scope': scope, 'name': root.split('/')[-1], 'rse': rse})
870
+ self.logger(logging.DEBUG, 'Appended dataset with DID %s:%s' % (scope, path))
871
+ for fname in fnames:
872
+ file = self._collect_file_info(os.path.join(root, fname), item)
873
+ file = cast("FileToUploadWithCollectedAndDatasetInfoDict", file)
874
+ file['dataset_scope'] = scope
875
+ file['dataset_name'] = root.split('/')[-1]
876
+ files.append(file)
877
+ self.logger(logging.DEBUG, 'Appended file with DID %s:%s' % (scope, fname))
878
+ elif len(dirs) > 0:
879
+ containers.append({'scope': scope, 'name': root.split('/')[-1]})
880
+ self.logger(logging.DEBUG, 'Appended container with DID %s:%s' % (scope, path))
881
+ attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse, 'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
882
+ elif len(dirs) == 0 and len(fnames) == 0:
883
+ self.logger(logging.WARNING, 'The folder %s is empty, skipping' % root)
884
+ continue
885
+ i += 1
886
+ # if everything went ok, replicate the folder structure in Rucio storage
887
+ for dataset in datasets:
888
+ try:
889
+ self.client.add_dataset(scope=dataset['scope'], name=dataset['name'], rse=dataset['rse'])
890
+ self.logger(logging.INFO, 'Created dataset with DID %s:%s' % (dataset['scope'], dataset['name']))
891
+ except RucioException as error:
892
+ self.logger(logging.ERROR, error)
893
+ self.logger(logging.ERROR, 'It was not possible to create dataset with DID %s:%s' % (dataset['scope'], dataset['name']))
894
+ for container in containers:
895
+ try:
896
+ self.client.add_container(scope=container['scope'], name=container['name'])
897
+ self.logger(logging.INFO, 'Created container with DID %s:%s' % (container['scope'], container['name']))
898
+ except RucioException as error:
899
+ self.logger(logging.ERROR, error)
900
+ self.logger(logging.ERROR, 'It was not possible to create dataset with DID %s:%s' % (container['scope'], container['name']))
901
+ for att in attach:
902
+ try:
903
+ self.client.attach_dids(scope=att['scope'], name=att['name'], dids=[att['did']])
904
+ self.logger(logging.INFO, 'DIDs attached to collection %s:%s' % (att['scope'], att['name']))
905
+ except RucioException as error:
906
+ self.logger(logging.ERROR, error)
907
+ self.logger(logging.ERROR, 'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
908
+ return files
909
+
910
+ def preferred_impl(
911
+ self,
912
+ rse_settings: "RSESettingsDict",
913
+ domain: str
914
+ ) -> Optional[str]:
915
+ """
916
+ Finds the optimum protocol impl preferred by the client and
917
+ supported by the remote RSE.
918
+
919
+ :param rse_settings: dictionary containing the RSE settings
920
+ :param domain: The network domain, either 'wan' (default) or 'lan'
921
+
922
+ :raises RucioException(msg): general exception with msg for more details.
923
+ """
924
+ preferred_protocols = []
925
+ supported_impl = None
926
+
927
+ try:
928
+ preferred_impls = config_get('upload', 'preferred_impl')
929
+ except Exception as error:
930
+ self.logger(logging.INFO, 'No preferred protocol impl in rucio.cfg: %s' % (error))
931
+ pass
932
+ else:
933
+ preferred_impls = list(preferred_impls.split(', '))
934
+ i = 0
935
+ while i < len(preferred_impls):
936
+ impl = preferred_impls[i]
937
+ impl_split = impl.split('.')
938
+ if len(impl_split) == 1:
939
+ preferred_impls[i] = 'rucio.rse.protocols.' + impl + '.Default'
940
+ else:
941
+ preferred_impls[i] = 'rucio.rse.protocols.' + impl
942
+ i += 1
943
+
944
+ preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if protocol['impl'] in preferred_impls]
945
+
946
+ if len(preferred_protocols) > 0:
947
+ preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if protocol not in preferred_protocols]
948
+ else:
949
+ preferred_protocols = reversed(rse_settings['protocols'])
950
+
951
+ for protocol in preferred_protocols:
952
+ if domain not in list(protocol['domains'].keys()):
953
+ self.logger(logging.DEBUG, 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
954
+ continue
955
+ if not all(operations in protocol['domains'][domain] for operations in ("read", "write", "delete")):
956
+ self.logger(logging.DEBUG, 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
957
+ continue
958
+ try:
959
+ supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'], auth_token=self.auth_token, logger=self.logger)
960
+ supported_protocol.connect()
961
+ except Exception as error:
962
+ self.logger(logging.DEBUG, 'Failed to create protocol "%s", exception: %s' % (protocol['impl'], error))
963
+ pass
964
+ else:
965
+ self.logger(logging.INFO, 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
966
+ supported_impl = protocol['impl']
967
+ break
968
+
969
+ return supported_impl