rucio-clients 37.5.0__py3-none-any.whl → 37.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio-clients might be problematic. Click here for more details.

Files changed (46) hide show
  1. rucio/cli/bin_legacy/rucio.py +41 -22
  2. rucio/cli/bin_legacy/rucio_admin.py +1 -1
  3. rucio/cli/did.py +2 -2
  4. rucio/cli/rse.py +2 -3
  5. rucio/cli/rule.py +9 -5
  6. rucio/cli/subscription.py +1 -1
  7. rucio/client/baseclient.py +9 -4
  8. rucio/client/didclient.py +16 -16
  9. rucio/client/downloadclient.py +16 -15
  10. rucio/client/exportclient.py +45 -4
  11. rucio/client/lockclient.py +3 -3
  12. rucio/client/pingclient.py +35 -4
  13. rucio/client/replicaclient.py +2 -2
  14. rucio/client/touchclient.py +3 -2
  15. rucio/client/uploadclient.py +728 -183
  16. rucio/common/cache.py +1 -2
  17. rucio/common/client.py +4 -30
  18. rucio/common/config.py +27 -3
  19. rucio/common/constants.py +5 -1
  20. rucio/common/didtype.py +2 -2
  21. rucio/common/pcache.py +20 -25
  22. rucio/common/plugins.py +12 -19
  23. rucio/common/policy.py +3 -2
  24. rucio/common/schema/__init__.py +11 -8
  25. rucio/common/types.py +7 -5
  26. rucio/common/utils.py +1 -1
  27. rucio/rse/__init__.py +7 -6
  28. rucio/rse/protocols/ngarc.py +2 -2
  29. rucio/rse/protocols/srm.py +1 -1
  30. rucio/rse/protocols/webdav.py +8 -1
  31. rucio/rse/rsemanager.py +5 -4
  32. rucio/rse/translation.py +2 -2
  33. rucio/vcsversion.py +3 -3
  34. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/METADATA +1 -1
  35. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/RECORD +46 -46
  36. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/data/etc/rse-accounts.cfg.template +0 -0
  37. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/data/etc/rucio.cfg.atlas.client.template +0 -0
  38. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/data/etc/rucio.cfg.template +0 -0
  39. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/data/requirements.client.txt +0 -0
  40. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/data/rucio_client/merge_rucio_configs.py +0 -0
  41. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/scripts/rucio +0 -0
  42. {rucio_clients-37.5.0.data → rucio_clients-37.7.0.data}/scripts/rucio-admin +0 -0
  43. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/WHEEL +0 -0
  44. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/licenses/AUTHORS.rst +0 -0
  45. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/licenses/LICENSE +0 -0
  46. {rucio_clients-37.5.0.dist-info → rucio_clients-37.7.0.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ from rucio.common.bittorrent import bittorrent_v2_merkle_sha256
29
29
  from rucio.common.checksum import GLOBALLY_SUPPORTED_CHECKSUMS, adler32, md5
30
30
  from rucio.common.client import detect_client_location
31
31
  from rucio.common.config import config_get, config_get_bool, config_get_int
32
- from rucio.common.constants import RseAttr
32
+ from rucio.common.constants import DEFAULT_VO, RseAttr
33
33
  from rucio.common.exception import (
34
34
  DataIdentifierAlreadyExists,
35
35
  DataIdentifierNotFound,
@@ -56,18 +56,32 @@ if TYPE_CHECKING:
56
56
 
57
57
 
58
58
  class UploadClient:
59
-
60
59
  def __init__(
61
- self,
62
- _client: Optional[Client] = None,
63
- logger: Optional["LoggerFunction"] = None,
64
- tracing: bool = True
60
+ self,
61
+ _client: Optional[Client] = None,
62
+ logger: Optional["LoggerFunction"] = None,
63
+ tracing: bool = True
65
64
  ):
66
65
  """
67
- Initialises the basic settings for an UploadClient object
68
-
69
- :param _client: - Optional: rucio.client.client.Client object. If None, a new object will be created.
70
- :param logger: - Optional: logging.Logger object. If None, default logger will be used.
66
+ Initialize the UploadClient with the necessary configuration to manage file uploads.
67
+
68
+ This method is used to create a new UploadClient instance that can upload files. It
69
+ allows the use of an existing Rucio Client, a custom logger, and tracing for debug
70
+ information during the upload process.
71
+
72
+ Parameters
73
+ ----------
74
+ _client
75
+ An existing Rucio `Client` instance to reuse. If not provided, a new one is created.
76
+ logger
77
+ A logger function. If not provided, the default Python logger is used.
78
+ tracing
79
+ Indicates whether to enable tracing to capture upload activity details.
80
+
81
+ Raises
82
+ ------
83
+ InputValidationError
84
+ If the client account is not found or is invalid, preventing upload setup.
71
85
  """
72
86
  if not logger:
73
87
  self.logger = logging.log
@@ -77,7 +91,9 @@ class UploadClient:
77
91
  self.client: Final[Client] = _client if _client else Client()
78
92
  self.client_location = detect_client_location()
79
93
  # if token should be used, use only JWT tokens
80
- self.auth_token: Optional[str] = self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
94
+ self.auth_token: Optional[str] = (
95
+ self.client.auth_token if len(self.client.auth_token.split(".")) == 3 else None
96
+ )
81
97
  self.tracing = tracing
82
98
  if not self.tracing:
83
99
  logger(logging.DEBUG, 'Tracing is turned off.')
@@ -86,10 +102,14 @@ class UploadClient:
86
102
  try:
87
103
  acc = self.client.whoami()
88
104
  if acc is None:
89
- raise InputValidationError('account not specified and rucio has no account with your identity')
105
+ raise InputValidationError(
106
+ 'Account not specified and rucio has no account with your identity'
107
+ )
90
108
  self.client.account = acc['account']
91
109
  except RucioException as e:
92
- raise InputValidationError('account not specified and problem with rucio: %s' % e)
110
+ raise InputValidationError(
111
+ f'Account not specified and problem with rucio: {e}'
112
+ )
93
113
  self.logger(logging.DEBUG, 'Discovered account as "%s"' % self.client.account)
94
114
  self.default_file_scope: Final[str] = 'user.' + self.client.account
95
115
  self.rses = {}
@@ -100,7 +120,7 @@ class UploadClient:
100
120
  'account': self.client.account,
101
121
  'eventType': 'upload',
102
122
  'eventVersion': version.RUCIO_VERSION[0],
103
- 'vo': self.client.vo if self.client.vo != 'def' else None
123
+ 'vo': self.client.vo if self.client.vo != DEFAULT_VO else None
104
124
  }
105
125
 
106
126
  def upload(
@@ -112,34 +132,205 @@ class UploadClient:
112
132
  activity: Optional[str] = None
113
133
  ) -> int:
114
134
  """
115
- :param items: List of dictionaries. Each dictionary describing a file to upload. Keys:
116
- path - path of the file that will be uploaded
117
- rse - rse expression/name (e.g. 'CERN-PROD_DATADISK') where to upload the file
118
- did_scope - Optional: custom did scope (Default: user.<account>)
119
- did_name - Optional: custom did name (Default: name of the file)
120
- dataset_scope - Optional: custom dataset scope
121
- dataset_name - Optional: custom dataset name
122
- dataset_meta - Optional: custom metadata for dataset
123
- impl - Optional: name of the protocol implementation to be used to upload this item.
124
- force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None)
125
- pfn - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory)
126
- no_register - Optional: if True, the file will not be registered in the rucio catalogue
127
- register_after_upload - Optional: if True, the file will be registered after successful upload
128
- lifetime - Optional: the lifetime of the file after it was uploaded
129
- transfer_timeout - Optional: time after the upload will be aborted
130
- guid - Optional: guid of the file
131
- recursive - Optional: if set, parses the folder structure recursively into collections
132
- :param summary_file_path: Optional: a path where a summary in form of a json file will be stored
133
- :param traces_copy_out: reference to an external list, where the traces should be uploaded
134
- :param ignore_availability: ignore the availability of a RSE
135
- :param activity: the activity set to the rule if no dataset is specified
136
-
137
- :returns: 0 on success
138
-
139
- :raises InputValidationError: if any input arguments are in a wrong format
140
- :raises RSEWriteBlocked: if a given RSE is not available for writing
141
- :raises NoFilesUploaded: if no files were successfully uploaded
142
- :raises NotAllFilesUploaded: if not all files were successfully uploaded
135
+ Uploads one or more files to an RSE (Rucio Storage Element) and optionally registers them.
136
+
137
+ An overview of this method's performed actions:
138
+
139
+ 1. Collects and validates file info from the passed `items` (directories may be
140
+ also included), ensuring valid paths exist on the local filesystem. If an RSE
141
+ expression is provided, a single RSE is picked at random from it.
142
+
143
+ 2. Checks the RSE's availability for writing (unless `ignore_availability` is True).
144
+
145
+ 3. Optionally registers each file in the Rucio Catalog, handling the DID creation,
146
+ dataset creation/attachment, and replication rules as needed.
147
+
148
+ 4. Uploads the files using the underlying protocol handlers and verifies checksums
149
+ if desired/possible. Partial or failed uploads raise exceptions.
150
+
151
+ 5. (Optional) Produces a JSON summary file at `summary_file_path`, listing the final
152
+ PFNs, checksums, and other info for all successfully uploaded files.
153
+
154
+ Parameters
155
+ ----------
156
+ items
157
+ A sequence of dictionaries, each describing a file to upload (or a
158
+ directory to be scanned). For each item, the supported keys are:
159
+
160
+ * **`path`** (PathTypeAlias, required):
161
+ The local path to the file or directory. If this is a directory and
162
+ `recursive` is True, the directory (and its subdirectories) are traversed.
163
+
164
+ * **`rse`** (str, required):
165
+ The target RSE or an RSE expression where the upload should be placed. If
166
+ an expression is provided (e.g., "tier=1"), one RSE from that expression
167
+ is chosen randomly.
168
+
169
+ * **`did_scope`** (str, not required):
170
+ The Rucio scope in which to register the file DID. Defaults to `user.<account>`.
171
+
172
+ * **`did_name`** (str, not required):
173
+ The logical filename in Rucio. Defaults to the local basename if not provided.
174
+
175
+ * **`lifetime`** (int, not required):
176
+ The lifetime (in seconds) to apply when creating a new replication rule.
177
+ For file uploads without a dataset, a new rule with that lifetime is created
178
+ if the file DID does not already exist in Rucio. For a new dataset, the
179
+ dataset is created with a rule using this lifetime, but if the dataset
180
+ already exists and you specify a lifetime, an error is raised.
181
+
182
+ _**Note:**_ **`lifetime`** is not automatically applied to nested containers
183
+ or datasets in recursive mode.
184
+
185
+ * **`impl`** (str, not required):
186
+ Name of the protocol implementation to be used for uploading this item.
187
+ For example, `"rucio.rse.protocols.gfal.Default"`.
188
+
189
+ * **`pfn`** (str, not required):
190
+ Allows you to explicitly set the Physical File Name (PFN) for the upload,
191
+ determining exactly where the file is placed on the storage. However, for
192
+ deterministic RSEs, specifying a PFN causes the client to skip registering
193
+ the file under the usual deterministic scheme. For non-deterministic RSEs,
194
+ you can still force the file to be registered in the Rucio catalog after
195
+ being uploaded, using `no_register=False` along with `register_after_upload=True`
196
+ (or by manually handling the registration later).
197
+
198
+ * **`force_scheme`** (str, not required):
199
+ Enforces the use of a specific protocol scheme (e.g., davs, https) during
200
+ file uploads. If the selected protocol is not compatible, the upload will
201
+ stop and raise an error instead of falling back to any other scheme.
202
+
203
+ * **`transfer_timeout`** (int, not required):
204
+ A maximum duration (in seconds) to wait for each individual file transfer
205
+ to complete. If the file transfer does not finish before this timeout
206
+ elapses, the operation will be aborted and retried one last time. When
207
+ transfer_timeout is None, no specific timeout is enforced, and the transfer
208
+ may continue until it completes or fails for another reason.
209
+
210
+ * **`guid`** (str, not required):
211
+ If provided, Rucio will use this GUID. If not provided and the file is
212
+ “pool.root” with `no_register` unset, Rucio tries to extract the GUID via
213
+ `pool_extractFileIdentifier`, raising an error if that fails. Otherwise, a
214
+ random GUID will be generated.
215
+
216
+ * **`no_register`** (bool, not required, default=False):
217
+ If set to True, the file is not registered in the Rucio Catalog, i.e., there
218
+ is no DID creation, no replica entry, and no rules. This is appropriate if
219
+ you plan to register the replica or create rules separately.
220
+
221
+ _**Note:**_ If **`recursive`**=True, the method still creates datasets
222
+ and/or containers for the directories when needed.
223
+
224
+ * **`register_after_upload`** (bool, not required, default=False):
225
+ If set to True, the file is uploaded first, and only then is the DID created
226
+ or updated in the Catalog. This can be useful when you want the actual data
227
+ on storage before finalizing the registration. By default (False), the file
228
+ is registered in Rucio before the physical upload if `no_register` is False.
229
+
230
+ * **`recursive`** (bool, not required, default=False):
231
+ If set to `True`, the method treats the specified path as a directory and
232
+ (depending on the combination with other parameters) recursively traverses
233
+ its subdirectories, mapping them into container/dataset hierarchies. Single
234
+ top-level file paths are ignored, but individual files found in subdirectories
235
+ are processed. Empty directories or non-existent paths also produce a warning.
236
+ If `False`, then top-level file paths or the direct children-files of the
237
+ given top-level directory are only processed (subdirectories are ignored,
238
+ and no container structure is created).
239
+
240
+ * **`dataset_scope`** / **`dataset_name`** (str, not required):
241
+ To register uploaded files into a dataset DID, you need to specify both
242
+ dataset_name and dataset_scope. With no_register=False, the client ensures
243
+ {dataset_scope}:{dataset_name} exists (creating it with a replication rule
244
+ if it doesn't), or simply attaching new files if it does. If the dataset
245
+ already exists and you specify a new lifetime, or if a checksum mismatch
246
+ is detected, registration fails. In non-recursive mode, only files in the
247
+ top-level directory are attached to the dataset and subdirectories are
248
+ skipped with a warning. In recursive mode, the client aims to create
249
+ containers for directories containing only subdirectories and datasets for
250
+ directories containing only files (raising an error if the top-level folder
251
+ mixes files and directories). If the top-level directory has subdirectories,
252
+ the user-supplied dataset_name is effectively ignored at that level (each
253
+ subdirectory becomes its own dataset or container); if there are no
254
+ subdirectories, the entire folder is registered as a single dataset.
255
+
256
+ * **`dataset_meta`** (dict, not required):
257
+ Additional metadata (e.g., `{'project': 'myProject'}`) to attach to the
258
+ newly created dataset when: the dataset does not already exist, `recursive=False`,
259
+ `no_register=False` and both `dataset_scope` and `dataset_name` are provided.
260
+
261
+ _**Note:**_ If multiple files share the same `dataset_scope` and `dataset_name`,
262
+ then if a dataset is created, it considers only the first item’s dataset_meta.
263
+ summary_file_path
264
+ If specified, a JSON file is created with a summary of each successfully
265
+ uploaded file, including checksum, PFN, scope, and name entries.
266
+ traces_copy_out
267
+ A list reference for collecting the trace dictionaries that Rucio generates
268
+ while iterating over each file. A new trace dictionary is appended to this list
269
+ for each file considered (even those ultimately skipped or already on the RSE).
270
+ ignore_availability
271
+ If set to True, the RSE's "write availability" is not enforced. By default,
272
+ this is False, and an RSE marked as unavailable for writing will raise an error.
273
+ activity
274
+ If you are uploading files without a parent dataset, this string sets the “activity”
275
+ on the replication rule that Rucio creates for each file (e.g., "Analysis"),
276
+ which can affect RSE queue priorities.
277
+
278
+ _**Note:**_ If your files are uploaded into a dataset, the dataset’s replication
279
+ rule does not use this activity parameter.
280
+
281
+ Returns
282
+ -------
283
+ int
284
+ Status code (``0`` if all files were uploaded successfully).
285
+
286
+ Raises
287
+ ------
288
+ NoFilesUploaded
289
+ Raised if none of the requested files could be uploaded.
290
+ NotAllFilesUploaded
291
+ Raised if some files were successfully uploaded, but others failed.
292
+ RSEWriteBlocked
293
+ Raised if `ignore_availability=False` but the chosen RSE does not allow writing.
294
+ InputValidationError
295
+ Raised if mandatory fields are missing, if conflicting DIDs are found,
296
+ or if no valid files remain after input parsing.
297
+
298
+ Examples
299
+ --------
300
+ ??? Example
301
+
302
+ Upload a single local file to the *CERN-PROD* RSE and write a JSON summary to
303
+ ``upload_summary.json``:
304
+
305
+ ```python
306
+ from rucio.client.uploadclient import UploadClient
307
+ upload_client = UploadClient()
308
+ items = [
309
+ {"path": "/data/file1.txt",
310
+ "rse": "CERN-PROD", # target RSE
311
+ "did_scope": "user.alice", # optional; defaults to user.<account>
312
+ "did_name": "file1.txt"} # optional; defaults to basename
313
+ ]
314
+ upload_client.upload(items, summary_file_path="upload_summary.json")
315
+ ```
316
+
317
+ Recursively upload every file found under ``/data/dataset`` into a new
318
+ dataset ``user.alice:mydataset`` on a random RSE that matches the
319
+ expression ``tier=1``; collect per-file *trace* dictionaries for later
320
+ inspection:
321
+
322
+ ```python
323
+ traces: list[TraceBaseDict] = []
324
+ dir_item = {
325
+ "path": "/data/dataset",
326
+ "rse": "tier=1", # RSE expression; one will be chosen
327
+ "recursive": True,
328
+ "dataset_scope": "user.alice",
329
+ "dataset_name": "mydataset",
330
+ "dataset_meta": {"project": "demo"},
331
+ }
332
+ upload_client.upload([dir_item], traces_copy_out=traces)
333
+ ```
143
334
  """
144
335
  # helper to get rse from rse_expression:
145
336
  def _pick_random_rse(rse_expression: str) -> dict[str, Any]:
@@ -198,7 +389,7 @@ class UploadClient:
198
389
  delete_existing = False
199
390
 
200
391
  trace = copy.deepcopy(self.trace)
201
- # appending trace to list reference, if the reference exists
392
+ # appending trace to the list reference if the reference exists
202
393
  if traces_copy_out is not None:
203
394
  traces_copy_out.append(trace)
204
395
 
@@ -218,7 +409,8 @@ class UploadClient:
218
409
  logger(logging.ERROR, 'PFN has to be defined for NON-DETERMINISTIC RSE.')
219
410
  continue
220
411
  if pfn and is_deterministic:
221
- logger(logging.WARNING, 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
412
+ logger(logging.WARNING,
413
+ 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs')
222
414
  no_register = True
223
415
 
224
416
  # resolving local area networks
@@ -228,7 +420,7 @@ class UploadClient:
228
420
  rse_attributes = self.client.list_rse_attributes(rse)
229
421
  except:
230
422
  logger(logging.WARNING, 'Attributes of the RSE: %s not available.' % rse)
231
- if (self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes):
423
+ if self.client_location and 'lan' in rse_settings['domain'] and RseAttr.SITE in rse_attributes:
232
424
  if self.client_location['site'] == rse_attributes[RseAttr.SITE]:
233
425
  domain = 'lan'
234
426
  logger(logging.DEBUG, '{} domain is used for the upload'.format(domain))
@@ -240,12 +432,22 @@ class UploadClient:
240
432
  # impl = self.preferred_impl(rse_settings, domain)
241
433
 
242
434
  if not no_register and not register_after_upload:
243
- self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
435
+ self._register_file(file,
436
+ registered_dataset_dids,
437
+ ignore_availability=ignore_availability,
438
+ activity=activity)
244
439
 
245
- # if register_after_upload, file should be overwritten if it is not registered
246
- # otherwise if file already exists on RSE we're done
440
+ # if register_after_upload, the file should be overwritten if it is not registered,
441
+ # otherwise if the file already exists on RSE we're done
247
442
  if register_after_upload:
248
- if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
443
+ if rsemgr.exists(rse_settings,
444
+ pfn if pfn else file_did, # type: ignore (pfn is str)
445
+ domain=domain,
446
+ scheme=force_scheme,
447
+ impl=impl,
448
+ auth_token=self.auth_token,
449
+ vo=self.client.vo,
450
+ logger=logger):
249
451
  try:
250
452
  self.client.get_did(file['did_scope'], file['did_name'])
251
453
  logger(logging.INFO, 'File already registered. Skipping upload.')
@@ -255,22 +457,48 @@ class UploadClient:
255
457
  logger(logging.INFO, 'File already exists on RSE. Previous left overs will be overwritten.')
256
458
  delete_existing = True
257
459
  elif not is_deterministic and not no_register:
258
- if rsemgr.exists(rse_settings, pfn, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
259
- logger(logging.INFO, 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
460
+ if rsemgr.exists(rse_settings,
461
+ pfn, # type: ignore (pfn is str)
462
+ domain=domain,
463
+ scheme=force_scheme,
464
+ impl=impl,
465
+ auth_token=self.auth_token,
466
+ vo=self.client.vo,
467
+ logger=logger):
468
+ logger(logging.INFO,
469
+ 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.')
260
470
  trace['stateReason'] = 'File already exists'
261
471
  continue
262
- elif rsemgr.exists(rse_settings, file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger):
472
+ elif rsemgr.exists(rse_settings,
473
+ file_did,
474
+ domain=domain,
475
+ scheme=force_scheme,
476
+ impl=impl,
477
+ auth_token=self.auth_token,
478
+ vo=self.client.vo,
479
+ logger=logger):
263
480
  logger(logging.INFO, 'File already exists on RSE with different pfn. Skipping upload.')
264
481
  trace['stateReason'] = 'File already exists'
265
482
  continue
266
483
  else:
267
- if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, scheme=force_scheme, impl=impl, auth_token=self.auth_token, vo=self.client.vo, logger=logger): # type: ignore (pfn is str)
484
+ if rsemgr.exists(rse_settings,
485
+ pfn if pfn else file_did, # type: ignore (pfn is str)
486
+ domain=domain,
487
+ scheme=force_scheme,
488
+ impl=impl,
489
+ auth_token=self.auth_token,
490
+ vo=self.client.vo,
491
+ logger=logger):
268
492
  logger(logging.INFO, 'File already exists on RSE. Skipping upload')
269
493
  trace['stateReason'] = 'File already exists'
270
494
  continue
271
495
 
272
496
  # protocol handling and upload
273
- protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme, domain=domain, impl=impl)
497
+ protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
498
+ operation='write',
499
+ scheme=force_scheme,
500
+ domain=domain,
501
+ impl=impl)
274
502
  protocols.reverse()
275
503
  success = False
276
504
  state_reason = ''
@@ -279,11 +507,9 @@ class UploadClient:
279
507
  protocol = protocols.pop()
280
508
  cur_scheme = protocol['scheme']
281
509
  logger(logging.INFO, 'Trying upload with %s to %s' % (cur_scheme, rse))
282
- lfn: "LFNDict" = {
283
- 'name': file['did_name'],
284
- 'scope': file['did_scope']
285
- }
286
- lfn['filename'] = basename
510
+ lfn: "LFNDict" = {'name': file['did_name'],
511
+ 'scope': file['did_scope'],
512
+ 'filename': basename}
287
513
 
288
514
  for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
289
515
  if checksum_name in file:
@@ -313,7 +539,10 @@ class UploadClient:
313
539
  logger(logging.DEBUG, 'Upload done.')
314
540
  success = True
315
541
  file['upload_result'] = {0: True, 1: None, 'success': True, 'pfn': pfn} # TODO: needs to be removed
316
- except (ServiceUnavailable, ResourceTemporaryUnavailable, RSEOperationNotSupported, RucioException) as error:
542
+ except (ServiceUnavailable,
543
+ ResourceTemporaryUnavailable,
544
+ RSEOperationNotSupported,
545
+ RucioException) as error:
317
546
  logger(logging.WARNING, 'Upload attempt failed')
318
547
  logger(logging.INFO, 'Exception: %s' % str(error), exc_info=True)
319
548
  state_reason = str(error)
@@ -331,7 +560,10 @@ class UploadClient:
331
560
  registration_succeeded = True
332
561
  if not no_register:
333
562
  if register_after_upload:
334
- self._register_file(file, registered_dataset_dids, ignore_availability=ignore_availability, activity=activity)
563
+ self._register_file(file,
564
+ registered_dataset_dids,
565
+ ignore_availability=ignore_availability,
566
+ activity=activity)
335
567
  else:
336
568
  replica_for_api = self._convert_file_for_api(file)
337
569
  try:
@@ -341,10 +573,13 @@ class UploadClient:
341
573
  logger(logging.ERROR, 'Failed to update replica state for file {}'.format(basename))
342
574
  logger(logging.DEBUG, 'Details: {}'.format(str(error)))
343
575
 
344
- # add file to dataset if needed
576
+ # add the file to dataset if needed
345
577
  if dataset_did_str and not no_register:
346
578
  try:
347
- self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) # type: ignore (`dataset_scope` and `dataset_name` always exist if `dataset_did_str`)
579
+ self.client.attach_dids(
580
+ file['dataset_scope'], # type: ignore (`dataset_scope` always exists if `dataset_did_str`)
581
+ file['dataset_name'], # type: ignore (`dataset_name` always exists if `dataset_did_str`)
582
+ [file_did])
348
583
  except Exception as error:
349
584
  registration_succeeded = False
350
585
  logger(logging.ERROR, 'Failed to attach file to the dataset')
@@ -386,15 +621,31 @@ class UploadClient:
386
621
  raise NotAllFilesUploaded()
387
622
  return 0
388
623
 
389
- def _add_bittorrent_meta(self, file: "Mapping[str, Any]") -> None:
390
- pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(os.path.join(file['dirname'], file['basename']))
624
+ def _add_bittorrent_meta(
625
+ self,
626
+ file: "Mapping[str, Any]"
627
+ ) -> None:
628
+ """
629
+ Add BitTorrent v2 metadata to the file DID.
630
+
631
+ This method calculates the BitTorrent v2 pieces root, layers, and piece length for
632
+ the specified local file, and updates the file DID's metadata with these values.
633
+
634
+ Parameters
635
+ ----------
636
+ file
637
+ A dictionary that must include 'dirname', 'basename', 'did_scope',
638
+ and 'did_name', describing the file path and the associated DID.
639
+ """
640
+ pieces_root, pieces_layers, piece_length = bittorrent_v2_merkle_sha256(
641
+ os.path.join(file['dirname'], file['basename']))
391
642
  bittorrent_meta = {
392
643
  'bittorrent_pieces_root': base64.b64encode(pieces_root).decode(),
393
644
  'bittorrent_pieces_layers': base64.b64encode(pieces_layers).decode(),
394
645
  'bittorrent_piece_length': piece_length,
395
646
  }
396
647
  self.client.set_metadata_bulk(scope=file['did_scope'], name=file['did_name'], meta=bittorrent_meta)
397
- self.logger(logging.INFO, f"Added bittorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
648
+ self.logger(logging.INFO, f"Added BitTorrent metadata to file DID {file['did_scope']}:{file['did_name']}")
398
649
 
399
650
  def _register_file(
400
651
  self,
@@ -404,17 +655,33 @@ class UploadClient:
404
655
  activity: Optional[str] = None
405
656
  ) -> None:
406
657
  """
407
- Registers the given file in Rucio. Creates a dataset if
408
- needed. Registers the file DID and creates the replication
409
- rule if needed. Adds a replica to the file did.
410
- (This function is meant to be used as class internal only)
411
-
412
- :param file: dictionary describing the file
413
- :param registered_dataset_dids: set of dataset dids that were already registered
414
- :param ignore_availability: ignore the availability of a RSE
415
- :param activity: the activity set to the rule if no dataset is specified
416
-
417
- :raises DataIdentifierAlreadyExists: if file DID is already registered and the checksums do not match
658
+ Register a single file DID in Rucio, optionally creating its parent dataset if needed.
659
+
660
+ Ensures that a file is known in the Rucio catalog under the specified scope. If a
661
+ dataset is specified in `file` and it does not yet exist, the method creates it and
662
+ attaches the file to that dataset, applying replication rules as appropriate. If no
663
+ dataset is provided and the file DID does not yet exist in Rucio, the method creates
664
+ a replication rule for the newly added file. If the file DID already exists, no new
665
+ top-level rule is created (the file’s existing rules or attachments remain unchanged).
666
+ Checksums are compared to prevent conflicts if the file is already registered.
667
+
668
+ Parameters
669
+ ----------
670
+ file
671
+ A dictionary containing file information (e.g., 'did_scope', 'did_name', 'adler32', etc.).
672
+ registered_dataset_dids
673
+ A set of dataset DIDs already registered to avoid duplicates.
674
+ ignore_availability
675
+ If True, creates replication rules even when the RSE is marked unavailable.
676
+ activity
677
+ Specifies the transfer activity (e.g., 'User Subscriptions') for the replication rule.
678
+
679
+ Raises
680
+ ------
681
+ InputValidationError
682
+ If a dataset already exists, but the caller attempts to set a new lifetime for it.
683
+ DataIdentifierAlreadyExists
684
+ If the local checksum differs from the remote checksum.
418
685
  """
419
686
  logger = self.logger
420
687
  logger(logging.DEBUG, 'Registering file')
@@ -426,7 +693,8 @@ class UploadClient:
426
693
  except ScopeNotFound:
427
694
  pass
428
695
  if account_scopes and file['did_scope'] not in account_scopes:
429
- logger(logging.WARNING, 'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
696
+ logger(logging.WARNING,
697
+ 'Scope {} not found for the account {}.'.format(file['did_scope'], self.client.account))
430
698
 
431
699
  rse = file['rse']
432
700
  dataset_did_str = file.get('dataset_did_str')
@@ -447,8 +715,8 @@ class UploadClient:
447
715
  except DataIdentifierAlreadyExists:
448
716
  logger(logging.INFO, 'Dataset %s already exists - no rule will be created' % dataset_did_str)
449
717
  if file.get('lifetime') is not None:
450
- raise InputValidationError('Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str,
451
- file.get('lifetime')))
718
+ raise InputValidationError(
719
+ 'Dataset %s exists and lifetime %s given. Prohibited to modify parent dataset lifetime.' % (dataset_did_str, file.get('lifetime')))
452
720
  else:
453
721
  logger(logging.DEBUG, 'Skipping dataset registration')
454
722
 
@@ -457,16 +725,17 @@ class UploadClient:
457
725
  file_did = {'scope': file_scope, 'name': file_name}
458
726
  replica_for_api = self._convert_file_for_api(file)
459
727
  try:
460
- # if the remote checksum is different this did must not be used
728
+ # if the remote checksum is different, this DID must not be used
461
729
  meta = self.client.get_metadata(file_scope, file_name)
462
730
  logger(logging.INFO, 'File DID already exists')
463
731
  logger(logging.DEBUG, 'local checksum: %s, remote checksum: %s' % (file['adler32'], meta['adler32']))
464
732
 
465
733
  if str(meta['adler32']).lstrip('0') != str(file['adler32']).lstrip('0'):
466
- logger(logging.ERROR, 'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
734
+ logger(logging.ERROR,
735
+ 'Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
467
736
  raise DataIdentifierAlreadyExists
468
737
 
469
- # add file to rse if it is not registered yet
738
+ # add the file to rse if it is not registered yet
470
739
  replicastate = list(self.client.list_replicas([file_did], all_states=True))
471
740
  if rse not in replicastate[0]['rses']:
472
741
  self.client.add_replicas(rse=rse, files=[replica_for_api])
@@ -479,17 +748,51 @@ class UploadClient:
479
748
  logger(logging.INFO, 'Successfully added replica in Rucio catalogue at %s' % rse)
480
749
  if not dataset_did_str:
481
750
  # only need to add rules for files if no dataset is given
482
- self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file.get('lifetime'), ignore_availability=ignore_availability, activity=activity)
751
+ self.client.add_replication_rule([file_did],
752
+ copies=1,
753
+ rse_expression=rse,
754
+ lifetime=file.get('lifetime'),
755
+ ignore_availability=ignore_availability,
756
+ activity=activity)
483
757
  logger(logging.INFO, 'Successfully added replication rule at %s' % rse)
484
758
 
485
- def _get_file_guid(self, file: "Mapping[str, Any]") -> str:
759
+ def _get_file_guid(
760
+ self,
761
+ file: "Mapping[str, Any]"
762
+ ) -> str:
486
763
  """
487
- Get the guid of a file, trying different strategies
488
- (This function is meant to be used as class internal only)
764
+ Returns the unique identifier (GUID) for the given file.
765
+
766
+ If no GUID exists and the filename suggests a ROOT file, it extracts it with
767
+ `pool_extractFileIdentifier`. If a GUID exists, it is returned without dashes.
768
+ Otherwise, a new GUID is generated.
769
+
770
+ Parameters
771
+ ----------
772
+ file
773
+ A dictionary describing the file, expected to include:
774
+
775
+ * **`basename`**:
776
+ The base filename.
489
777
 
490
- :param file: dictionary describing the file
778
+ * **`path`**:
779
+ The path to the file.
491
780
 
492
- :returns: the guid
781
+ * **`guid`** (optional):
782
+ A pre-assigned GUID string.
783
+
784
+ * **`no_register`** (optional):
785
+ If True, skip attempts to derive a GUID for ROOT files.
786
+
787
+ Returns
788
+ -------
789
+ str
790
+ A string containing the file's GUID, stripped of dashes and in lowercase.
791
+
792
+ Raises
793
+ ------
794
+ RucioException
795
+ If GUID extraction using the `pool_extractFileIdentifier` command fails.
493
796
  """
494
797
  guid = file.get('guid')
495
798
  if not guid and 'pool.root' in file['basename'].lower() and not file.get('no_register'):
@@ -514,14 +817,24 @@ class UploadClient:
514
817
  item: "FileToUploadDict"
515
818
  ) -> "FileToUploadWithCollectedInfoDict":
516
819
  """
517
- Collects infos (e.g. size, checksums, etc.) about the file and
518
- returns them as a dictionary
519
- (This function is meant to be used as class internal only)
520
-
521
- :param filepath: path where the file is stored
522
- :param item: input options for the given file
523
-
524
- :returns: a dictionary containing all collected info and the input options
820
+ Collects and returns essential file descriptors (e.g., size, checksums, GUID, etc.).
821
+
822
+ This method computes the file's size, calculates its Adler-32 and MD5 checksums,
823
+ and retrieves the file's GUID. These values, along with other existing fields from
824
+ the input dictionary, are returned in a new dictionary.
825
+
826
+ Parameters
827
+ ----------
828
+ filepath
829
+ The local filesystem path to the file.
830
+ item
831
+ A dictionary containing initial upload parameters (e.g., RSE name, scope) for the
832
+ file. Some of its fields may be updated or augmented in the returned dictionary.
833
+
834
+ Returns
835
+ -------
836
+ "FileToUploadWithCollectedInfoDict"
837
+ A new dictionary enriched with relevant file descriptors.
525
838
  """
526
839
  new_item = copy.deepcopy(item)
527
840
  new_item = cast("FileToUploadWithCollectedInfoDict", new_item)
@@ -541,17 +854,51 @@ class UploadClient:
541
854
 
542
855
  return new_item
543
856
 
544
- def _collect_and_validate_file_info(self, items: "Iterable[FileToUploadDict]") -> list["FileToUploadWithCollectedInfoDict"]:
857
+ def _collect_and_validate_file_info(
858
+ self,
859
+ items: "Iterable[FileToUploadDict]"
860
+ ) -> list["FileToUploadWithCollectedInfoDict"]:
545
861
  """
546
- Checks if there are any inconsistencies within the given input
547
- options and stores the output of _collect_file_info for every file
548
- (This function is meant to be used as class internal only)
549
-
550
- :param filepath: list of dictionaries with all input files and options
551
-
552
- :returns: a list of dictionaries containing all descriptions of the files to upload
553
-
554
- :raises InputValidationError: if an input option has a wrong format
862
+ Collect and verify local file info for upload, optionally registering folders as
863
+ datasets/containers.
864
+
865
+ This method iterates over the provided items, each describing a local path and
866
+ associated upload parameters, checks that each item has a valid path and RSE, and
867
+ computes basic file details such as size and checksums. If the item is a directory
868
+ and `recursive` is set, the method calls `_recursive` to traverse subdirectories,
869
+ creating or attaching them as Rucio datasets or containers.
870
+
871
+ Parameters
872
+ ----------
873
+ items
874
+ An iterable of dictionaries describing files or directories, where each dictionary
875
+ typically has:
876
+
877
+ * **`path`**:
878
+ Local file system path
879
+
880
+ * **`rse`**:
881
+ Name of the RSE destination
882
+
883
+ * **`pfn`** (optional):
884
+ Physical file name (PFN)
885
+
886
+ * **`impl`** (optional):
887
+ Protocol implementation
888
+
889
+ * **`recursive`** (optional):
890
+ Whether to traverse directories recursively
891
+
892
+ Returns
893
+ -------
894
+ list["FileToUploadWithCollectedInfoDict"]
895
+ A list of dictionaries enriched with file descriptors (size, checksums, etc.)
896
+ and ready for further upload processing.
897
+
898
+ Raises
899
+ ------
900
+ InputValidationError
901
+ If no valid files are found.
555
902
  """
556
903
  logger = self.logger
557
904
  files: list["FileToUploadWithCollectedInfoDict"] = []
@@ -583,7 +930,8 @@ class UploadClient:
583
930
  if not len(fnames) and not len(subdirs):
584
931
  logger(logging.WARNING, 'Skipping %s because it is empty.' % dname)
585
932
  elif not len(fnames):
586
- logger(logging.WARNING, 'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
933
+ logger(logging.WARNING,
934
+ 'Skipping %s because it has no files in it. Subdirectories are not supported.' % dname)
587
935
  elif os.path.isdir(path) and recursive:
588
936
  files.extend(cast("list[FileToUploadWithCollectedInfoDict]", self._recursive(item)))
589
937
  elif os.path.isfile(path) and not recursive:
@@ -599,15 +947,27 @@ class UploadClient:
599
947
 
600
948
  return files
601
949
 
602
- def _convert_file_for_api(self, file: "Mapping[str, Any]") -> dict[str, Any]:
950
+ def _convert_file_for_api(
951
+ self,
952
+ file: "Mapping[str, Any]"
953
+ ) -> dict[str, Any]:
603
954
  """
604
- Creates a new dictionary that contains only the values
605
- that are needed for the upload with the correct keys
606
- (This function is meant to be used as class internal only)
607
-
608
- :param file: dictionary describing a file to upload
609
-
610
- :returns: dictionary containing not more then the needed values for the upload
955
+ Create a minimal dictionary of file attributes for the Rucio API.
956
+
957
+ This method extracts only the necessary fields from the provided file dictionary,
958
+ producing a new dictionary that is suitable for registering or updating
959
+ a file replica in Rucio.
960
+
961
+ Parameters
962
+ ----------
963
+ file
964
+ A dictionary describing a file, expected to include at least `did_scope`,
965
+ `did_name`, `bytes`, `adler32`, `md5`, `meta`, `state`, and optionally `pfn`.
966
+
967
+ Returns
968
+ -------
969
+ dict[str, Any]
970
+ A dictionary containing only the relevant file attributes for Rucio's REST API.
611
971
  """
612
972
  replica = {}
613
973
  replica['scope'] = file['did_scope']
@@ -637,27 +997,67 @@ class UploadClient:
637
997
  sign_service: Optional[str] = None
638
998
  ) -> Optional[str]:
639
999
  """
640
- Uploads a file to the connected storage.
641
-
642
- :param rse_settings: dictionary containing the RSE settings
643
- :param rse_attributes: dictionary containing the RSE attribute key value pairs
644
- :param lfn: a single dict containing 'scope' and 'name'.
645
- Example:
646
- {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}
647
- If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name').
648
- :param source_dir: path to the local directory including the source files
649
- :param force_pfn: use the given PFN -- can lead to dark data, use sparingly
650
- :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description
651
- :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it
652
- :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL
653
-
654
- :raises RucioException(msg): general exception with msg for more details.
1000
+ Perform the actual file transfer to an RSE using the appropriate protocol.
1001
+
1002
+ This method is used once all necessary file information is resolved (logical file
1003
+ name, checksums, etc.). It creates and verifies the physical file name (PFN),
1004
+ optionally removes or overwrites stale replicas, uploads the file (potentially via
1005
+ a temporary PFN suffix), checks its size/checksum consistency, and finalizes it
1006
+ under the expected PFN.
1007
+
1008
+ Parameters
1009
+ ----------
1010
+ rse_settings
1011
+ Dictionary containing the RSE configuration.
1012
+ rse_attributes
1013
+ Additional attributes of the RSE (e.g. 'archive_timeout').
1014
+ lfn
1015
+ An optional dictionary describing the logical file (e.g., {'name': '1_rse_local_put.raw',
1016
+ 'scope': 'user.jdoe', ..}). If the 'filename' key is present, it overrides 'name'
1017
+ in determining the local file name to read from source_dir.
1018
+ source_dir
1019
+ Local source directory path where the file to be uploaded resides.
1020
+ domain
1021
+ Network domain for the upload, commonly 'wan' for wide-area networks.
1022
+ impl
1023
+ Name of the protocol implementation to be enforced (if any).
1024
+ force_pfn
1025
+ If provided, forces the use of this PFN for the file location on the storage
1026
+ (use with care since it can lead to "dark" data).
1027
+ force_scheme
1028
+ If provided, forces the protocol scheme (e.g. 'davs', 'https') to be used.
1029
+ transfer_timeout
1030
+ Timeout (in seconds) for the transfer operation before it fails.
1031
+ delete_existing
1032
+ If True, removes any unregistered or stale file on the storage that matches this PFN.
1033
+ sign_service
1034
+ If set, requests a signed URL from the given service (e.g., gcs, s3, swift).
1035
+
1036
+ Returns
1037
+ -------
1038
+ Optional[str]
1039
+ The final PFN (physical file name) of the successfully uploaded file, or None
1040
+ if creation failed.
1041
+
1042
+ Raises
1043
+ ------
1044
+ FileReplicaAlreadyExists
1045
+ If the target file already exists and overwrite is not allowed.
1046
+ RSEOperationNotSupported
1047
+ If storage-side operations (delete/rename/put) are not supported or fail.
1048
+ RucioException
1049
+ If renaming or other critical operations cannot be completed.
655
1050
  """
1051
+
656
1052
  logger = self.logger
657
1053
 
658
1054
  # Construct protocol for write operation.
659
1055
  # IMPORTANT: All upload stat() checks are always done with the write_protocol EXCEPT for cloud resources (signed URL for write cannot be used for read)
660
- protocol_write = self._create_protocol(rse_settings, 'write', force_scheme=force_scheme, domain=domain, impl=impl)
1056
+ protocol_write = self._create_protocol(rse_settings,
1057
+ 'write',
1058
+ force_scheme=force_scheme,
1059
+ domain=domain,
1060
+ impl=impl)
661
1061
 
662
1062
  base_name = lfn.get('filename', lfn['name'])
663
1063
  name = lfn.get('name', base_name)
@@ -682,44 +1082,58 @@ class UploadClient:
682
1082
 
683
1083
  # Auth. mostly for object stores
684
1084
  if sign_service:
685
- protocol_read = self._create_protocol(rse_settings, 'read', domain=domain, impl=impl)
1085
+ protocol_read = self._create_protocol(rse_settings,
1086
+ 'read',
1087
+ domain=domain,
1088
+ impl=impl)
686
1089
  if pfn is not None:
687
1090
  signed_read_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'read', pfn)
688
1091
  pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'write', pfn)
689
1092
 
690
- # Create a name of tmp file if renaming operation is supported
1093
+ # Create a name of tmp file if the renaming operation is supported
691
1094
  pfn_tmp = cast("str", '%s.rucio.upload' % pfn if protocol_write.renaming else pfn)
692
1095
  signed_read_pfn_tmp = '%s.rucio.upload' % signed_read_pfn if protocol_write.renaming else signed_read_pfn
693
1096
 
694
1097
  # Either DID exists or not register_after_upload
695
1098
  if protocol_write.overwrite is False and delete_existing is False:
696
1099
  if sign_service:
697
- # Construct protocol for read ONLY for cloud resources and get signed URL for GET
1100
+ # Construct protocol for read-ONLY for cloud resources and get signed URL for GET
698
1101
  if protocol_read.exists(signed_read_pfn):
699
- raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
1102
+ raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
700
1103
  elif protocol_write.exists(pfn):
701
- raise FileReplicaAlreadyExists('File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ?
1104
+ raise FileReplicaAlreadyExists(
1105
+ 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception?
702
1106
 
703
1107
  # Removing tmp from earlier attempts
704
- if (not sign_service and protocol_write.exists(pfn_tmp)) or (sign_service and protocol_read.exists(signed_read_pfn_tmp)):
1108
+ if (not sign_service and protocol_write.exists(pfn_tmp)) or (
1109
+ sign_service and protocol_read.exists(signed_read_pfn_tmp)):
705
1110
  logger(logging.DEBUG, 'Removing remains of previous upload attempts.')
706
1111
  try:
707
1112
  # Construct protocol for delete operation.
708
- protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
1113
+ protocol_delete = self._create_protocol(rse_settings,
1114
+ 'delete',
1115
+ force_scheme=force_scheme,
1116
+ domain=domain,
1117
+ impl=impl)
709
1118
  delete_pfn = '%s.rucio.upload' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
710
1119
  if sign_service:
711
1120
  delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
712
1121
  protocol_delete.delete(delete_pfn)
713
1122
  protocol_delete.close()
714
1123
  except Exception as error:
715
- raise RSEOperationNotSupported('Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
1124
+ raise RSEOperationNotSupported(
1125
+ 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(error)))
716
1126
 
717
1127
  # Removing not registered files from earlier attempts
718
1128
  if delete_existing:
719
1129
  logger(logging.DEBUG, 'Removing not-registered remains of previous upload attempts.')
720
1130
  try:
721
1131
  # Construct protocol for delete operation.
722
- protocol_delete = self._create_protocol(rse_settings, 'delete', force_scheme=force_scheme, domain=domain, impl=impl)
1132
+ protocol_delete = self._create_protocol(rse_settings,
1133
+ 'delete',
1134
+ force_scheme=force_scheme,
1135
+ domain=domain,
1136
+ impl=impl)
723
1137
  delete_pfn = '%s' % list(protocol_delete.lfns2pfns(make_valid_did(lfn)).values())[0]
724
1138
  if sign_service:
725
1139
  delete_pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'delete', delete_pfn)
@@ -730,7 +1144,14 @@ class UploadClient:
730
1144
 
731
1145
  # Process the upload of the tmp file
732
1146
  try:
733
- retry(protocol_write.put, base_name, pfn_tmp, source_dir, transfer_timeout=transfer_timeout)(mtries=2, logger=logger)
1147
+ retry(protocol_write.put,
1148
+ base_name,
1149
+ pfn_tmp,
1150
+ source_dir,
1151
+ transfer_timeout=transfer_timeout)(
1152
+ mtries=2,
1153
+ logger=logger
1154
+ )
734
1155
  logger(logging.INFO, 'Successful upload of temporary file. {}'.format(pfn_tmp))
735
1156
  except Exception as error:
736
1157
  raise RSEOperationNotSupported(str(error))
@@ -750,17 +1171,20 @@ class UploadClient:
750
1171
  if ('filesize' in stats) and ('filesize' in lfn):
751
1172
  self.logger(logging.DEBUG, 'Filesize: Expected=%s Found=%s' % (lfn['filesize'], stats['filesize']))
752
1173
  if int(stats['filesize']) != int(lfn['filesize']):
753
- raise RucioException('Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
1174
+ raise RucioException(
1175
+ 'Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize']))
754
1176
  if rse_settings['verify_checksum'] is not False:
755
1177
  if ('adler32' in stats) and ('adler32' in lfn):
756
- self.logger(logging.DEBUG, 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
1178
+ self.logger(logging.DEBUG,
1179
+ 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32']))
757
1180
  if str(stats['adler32']).lstrip('0') != str(lfn['adler32']).lstrip('0'):
758
- raise RucioException('Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
1181
+ raise RucioException(
1182
+ 'Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32']))
759
1183
 
760
1184
  except Exception as error:
761
1185
  raise error
762
1186
 
763
- # The upload finished successful and the file can be renamed
1187
+ # The upload finished successfully and the file can be renamed
764
1188
  try:
765
1189
  if protocol_write.renaming:
766
1190
  logger(logging.DEBUG, 'Renaming file %s to %s' % (pfn_tmp, pfn))
@@ -778,9 +1202,31 @@ class UploadClient:
778
1202
  pfn: str
779
1203
  ) -> dict[str, Any]:
780
1204
  """
781
- Try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail
782
- :param protocol: The protocol to use to reach this file
783
- :param pfn: Physical file name of the target for the protocol stat
1205
+ Attempt to retrieve file statistics with exponential backoff.
1206
+
1207
+ This method invokes `protocol.stat` a limited number of times, waiting with an
1208
+ exponential backoff between each attempt when an error occurs. After the configured
1209
+ number of retries, the method performs one final `stat` call and returns its result
1210
+ or lets any resulting exception propagate.
1211
+
1212
+ Parameters
1213
+ ----------
1214
+ protocol
1215
+ The RSEProtocol instance to use for retrieving file statistics
1216
+ pfn
1217
+ The physical file name (PFN) to be checked.
1218
+
1219
+ Returns
1220
+ -------
1221
+ dict[str, Any]
1222
+ A dictionary expected to include the filesize and adler32 for the provided pfn.
1223
+
1224
+ Raises
1225
+ ------
1226
+ RSEChecksumUnavailable
1227
+ If the protocol indicates a missing checksum for the file.
1228
+ Exception
1229
+ If the requested service is not available or permissions are not granted.
784
1230
  """
785
1231
  retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
786
1232
  for attempt in range(retries):
@@ -800,8 +1246,8 @@ class UploadClient:
800
1246
  fail_str = ['The requested service is not available at the moment', 'Permission refused']
801
1247
  if any(x in str(error) for x in fail_str):
802
1248
  raise error
803
- self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2**attempt)
804
- time.sleep(2**attempt)
1249
+ self.logger(logging.DEBUG, 'stat: unknown edge case, retrying in %ss' % 2 ** attempt)
1250
+ time.sleep(2 ** attempt)
805
1251
  return protocol.stat(pfn)
806
1252
 
807
1253
  def _create_protocol(
@@ -813,14 +1259,44 @@ class UploadClient:
813
1259
  domain: str = 'wan'
814
1260
  ) -> "RSEProtocol":
815
1261
  """
816
- Protocol construction.
817
- :param rse_settings: rse_settings
818
- :param operation: activity, e.g. read, write, delete etc.
819
- :param force_scheme: custom scheme
820
- :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication
1262
+ Creates and returns the protocol object for the requested RSE operation.
1263
+
1264
+ Establishes a connection using the specified parameters (scheme, domain, etc.)
1265
+ and returns a protocol instance capable of handling the requested operation.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ rse_settings
1270
+ The dictionary containing RSE configuration.
1271
+ operation
1272
+ The intended operation, such as 'read', 'write', or 'delete'.
1273
+ impl
1274
+ An optional override for the default protocol implementation.
1275
+ force_scheme
1276
+ If provided, forces the protocol to use this scheme.
1277
+ domain
1278
+ The network domain to be used, defaulting to 'wan'.
1279
+
1280
+ Returns
1281
+ -------
1282
+ "RSEProtocol"
1283
+ The instantiated `RSEProtocol` object.
1284
+
1285
+ Raises
1286
+ ------
1287
+ Exception
1288
+ If the protocol creation or connection attempt fails.
821
1289
  """
822
1290
  try:
823
- protocol = rsemgr.create_protocol(rse_settings, operation, scheme=force_scheme, domain=domain, impl=impl, auth_token=self.auth_token, logger=self.logger)
1291
+ protocol = rsemgr.create_protocol(
1292
+ rse_settings,
1293
+ operation,
1294
+ scheme=force_scheme,
1295
+ domain=domain,
1296
+ impl=impl,
1297
+ auth_token=self.auth_token,
1298
+ logger=self.logger
1299
+ )
824
1300
  protocol.connect()
825
1301
  except Exception as error:
826
1302
  self.logger(logging.WARNING, 'Failed to create protocol for operation: %s' % operation)
@@ -828,24 +1304,69 @@ class UploadClient:
828
1304
  raise error
829
1305
  return protocol
830
1306
 
831
- def _send_trace(self, trace: "TraceDict") -> None:
1307
+ def _send_trace(
1308
+ self,
1309
+ trace: "TraceDict"
1310
+ ) -> None:
832
1311
  """
833
- Checks if sending trace is allowed and send the trace.
1312
+ Sends the trace if tracing is enabled.
834
1313
 
835
- :param trace: the trace
1314
+ If `self.tracing` is True, this method uses Rucio's `send_trace` function to
1315
+ dispatch the provided trace object to Rucio host. Otherwise, it takes no action.
1316
+
1317
+ Parameters
1318
+ ----------
1319
+ trace
1320
+ The trace object to be sent.
836
1321
  """
837
1322
  if self.tracing:
838
1323
  send_trace(trace, self.client.trace_host, self.client.user_agent)
839
1324
 
840
- def _recursive(self, item: "FileToUploadDict") -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
1325
+ def _recursive(
1326
+ self,
1327
+ item: "FileToUploadDict"
1328
+ ) -> list["FileToUploadWithCollectedAndDatasetInfoDict"]:
841
1329
  """
842
- If the --recursive flag is set, it replicates the folder structure recursively into collections
843
- A folder only can have either other folders inside or files, but not both of them
844
- - If it has folders, the root folder will be a container
845
- - If it has files, the root folder will be a dataset
846
- - If it is empty, it does not create anything
847
-
848
- :param item: dictionary containing all descriptions of the files to upload
1330
+ Recursively inspects a folder and creates corresponding Rucio datasets or containers.
1331
+
1332
+ This method traverses the local path specified in the given dictionary `item` and
1333
+ interprets subfolders as either Rucio containers (if they themselves contain further
1334
+ subfolders) or datasets (if they only contain files). Files within these datasets
1335
+ are gathered into a list with additional upload information. The method also attempts
1336
+ to create and attach these datasets/containers in Rucio, replicating the folder
1337
+ structure.
1338
+
1339
+ Note:
1340
+ ------
1341
+ Currently, this method does not allow the top-level directory to contain both files
1342
+ and subdirectories.
1343
+
1344
+ Parameters
1345
+ ----------
1346
+ item
1347
+ A dictionary describing the local path and upload parameters.
1348
+ It must contain at least:
1349
+
1350
+ * **`rse`**:
1351
+ The target RSE for the upload.
1352
+
1353
+ * **`path`**:
1354
+ The local directory path to inspect.
1355
+
1356
+ * **`did_scope`** (optional):
1357
+ Custom scope for the resulting datasets/containers.
1358
+
1359
+ Returns
1360
+ -------
1361
+ list["FileToUploadWithCollectedAndDatasetInfoDict"]
1362
+ A list of file descriptors enriched with collected file information, each
1363
+ conforming to FileToUploadWithCollectedAndDatasetInfoDict.
1364
+
1365
+ Raises
1366
+ ------
1367
+ InputValidationError
1368
+ If a folder contains both files and subdirectories at its top level (invalid
1369
+ container/dataset structure).
849
1370
  """
850
1371
  files: list["FileToUploadWithCollectedAndDatasetInfoDict"] = []
851
1372
  datasets: list["DatasetDict"] = []
@@ -878,7 +1399,8 @@ class UploadClient:
878
1399
  elif len(dirs) > 0:
879
1400
  containers.append({'scope': scope, 'name': root.split('/')[-1]})
880
1401
  self.logger(logging.DEBUG, 'Appended container with DID %s:%s' % (scope, path))
881
- attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse, 'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
1402
+ attach.extend([{'scope': scope, 'name': root.split('/')[-1], 'rse': rse,
1403
+ 'did': {'scope': scope, 'name': dir_}} for dir_ in dirs])
882
1404
  elif len(dirs) == 0 and len(fnames) == 0:
883
1405
  self.logger(logging.WARNING, 'The folder %s is empty, skipping' % root)
884
1406
  continue
@@ -904,7 +1426,8 @@ class UploadClient:
904
1426
  self.logger(logging.INFO, 'DIDs attached to collection %s:%s' % (att['scope'], att['name']))
905
1427
  except RucioException as error:
906
1428
  self.logger(logging.ERROR, error)
907
- self.logger(logging.ERROR, 'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
1429
+ self.logger(logging.ERROR,
1430
+ 'It was not possible to attach to collection with DID %s:%s' % (att['scope'], att['name']))
908
1431
  return files
909
1432
 
910
1433
  def preferred_impl(
@@ -913,13 +1436,29 @@ class UploadClient:
913
1436
  domain: str
914
1437
  ) -> Optional[str]:
915
1438
  """
916
- Finds the optimum protocol impl preferred by the client and
917
- supported by the remote RSE.
918
-
919
- :param rse_settings: dictionary containing the RSE settings
920
- :param domain: The network domain, either 'wan' (default) or 'lan'
921
-
922
- :raises RucioException(msg): general exception with msg for more details.
1439
+ Select a suitable protocol implementation for read, write, and delete operations on
1440
+ the given RSE and domain.
1441
+
1442
+ This method checks the local client configuration (under the `[upload] preferred_impl`
1443
+ setting) and compares it against the list of protocols declared in `rse_settings`.
1444
+ It attempts to find a protocol that supports the required I/O operations (read,
1445
+ write, delete) in the specified domain. If multiple preferred protocols are listed
1446
+ in the config, it iterates in order and returns the first viable match.
1447
+
1448
+ Parameters
1449
+ ----------
1450
+ rse_settings
1451
+ A dictionary describing RSE details, including available protocols and their
1452
+ domains.
1453
+ domain
1454
+ The network domain (e.g., 'lan' or 'wan') in which the protocol must support
1455
+ all operations.
1456
+
1457
+ Returns
1458
+ -------
1459
+ Optional[str]
1460
+ The name of a protocol implementation that can handle read/write/delete
1461
+ for the specified domain, or None if no suitable protocol was found.
923
1462
  """
924
1463
  preferred_protocols = []
925
1464
  supported_impl = None
@@ -941,28 +1480,34 @@ class UploadClient:
941
1480
  preferred_impls[i] = 'rucio.rse.protocols.' + impl
942
1481
  i += 1
943
1482
 
944
- preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if protocol['impl'] in preferred_impls]
1483
+ preferred_protocols = [protocol for protocol in reversed(rse_settings['protocols']) if
1484
+ protocol['impl'] in preferred_impls]
945
1485
 
946
1486
  if len(preferred_protocols) > 0:
947
- preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if protocol not in preferred_protocols]
1487
+ preferred_protocols += [protocol for protocol in reversed(rse_settings['protocols']) if
1488
+ protocol not in preferred_protocols]
948
1489
  else:
949
1490
  preferred_protocols = reversed(rse_settings['protocols'])
950
1491
 
951
1492
  for protocol in preferred_protocols:
952
1493
  if domain not in list(protocol['domains'].keys()):
953
- self.logger(logging.DEBUG, 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
1494
+ self.logger(logging.DEBUG,
1495
+ 'Unsuitable protocol "%s": Domain %s not supported' % (protocol['impl'], domain))
954
1496
  continue
955
1497
  if not all(operations in protocol['domains'][domain] for operations in ("read", "write", "delete")):
956
- self.logger(logging.DEBUG, 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
1498
+ self.logger(logging.DEBUG,
1499
+ 'Unsuitable protocol "%s": All operations are not supported' % (protocol['impl']))
957
1500
  continue
958
1501
  try:
959
- supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'], auth_token=self.auth_token, logger=self.logger)
1502
+ supported_protocol = rsemgr.create_protocol(rse_settings, 'write', domain=domain, impl=protocol['impl'],
1503
+ auth_token=self.auth_token, logger=self.logger)
960
1504
  supported_protocol.connect()
961
1505
  except Exception as error:
962
1506
  self.logger(logging.DEBUG, 'Failed to create protocol "%s", exception: %s' % (protocol['impl'], error))
963
1507
  pass
964
1508
  else:
965
- self.logger(logging.INFO, 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
1509
+ self.logger(logging.INFO,
1510
+ 'Preferred protocol impl supported locally and remotely: %s' % (protocol['impl']))
966
1511
  supported_impl = protocol['impl']
967
1512
  break
968
1513